feat: add cogVideo tool (#10456)

2024-11-16 11:42:29 +08:00 · 2024-11-08 17:04:05 +08:00 · 2024-11-08 17:04:05 +08:00 · 4fe5297e35
commit 4fe5297e35
parent 22dee4f6f3
4 changed files with 107 additions and 0 deletions
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo.py
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo.py
@ -0,0 +1,24 @@
 from typing import Any, Union
 from zhipuai import ZhipuAI
 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
 class CogVideoTool(BuiltinTool):
    def _invoke(
        self, user_id: str, tool_parameters: dict[str, Any]
    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        client = ZhipuAI(
            base_url=self.runtime.credentials["zhipuai_base_url"],
            api_key=self.runtime.credentials["zhipuai_api_key"],
        )
        if not tool_parameters.get("prompt") and not tool_parameters.get("image_url"):
            return self.create_text_message("require at least one of prompt and image_url")
        response = client.videos.generations(
            model="cogvideox", prompt=tool_parameters.get("prompt"), image_url=tool_parameters.get("image_url")
        )
        return self.create_json_message(response.dict())
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml
@ -0,0 +1,32 @@
 identity:
  name: cogvideo
  author: hjlarry
  label:
    en_US: CogVideo
    zh_Hans: CogVideo 视频生成
 description:
  human:
    en_US: Use the CogVideox model provided by ZhipuAI to generate videos based on user prompts and images.
    zh_Hans: 使用智谱cogvideox模型，根据用户输入的提示词和图片，生成视频。
  llm: A tool for generating videos. The input is user's prompt or image url or both of them, the output is a task id. You can use another tool with this task id to check the status and get the video.
 parameters:
  - name: prompt
    type: string
    label:
      en_US: prompt
      zh_Hans: 提示词
    human_description:
      en_US: The prompt text used to generate video.
      zh_Hans: 用于生成视频的提示词。
    llm_description: The prompt text used to generate video. Optional.
    form: llm
  - name: image_url
    type: string
    label:
      en_US: image url
      zh_Hans: 图片链接
    human_description:
      en_US: The image url used to generate video.
      zh_Hans: 输入一个图片链接，生成的视频将基于该图片和提示词。
    llm_description: The image url used to generate video. Optional.
    form: llm
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py
@ -0,0 +1,30 @@
 from typing import Any, Union
 import httpx
 from zhipuai import ZhipuAI
 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
 class CogVideoJobTool(BuiltinTool):
    def _invoke(
        self, user_id: str, tool_parameters: dict[str, Any]
    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        client = ZhipuAI(
            api_key=self.runtime.credentials["zhipuai_api_key"],
            base_url=self.runtime.credentials["zhipuai_base_url"],
        )
        response = client.videos.retrieve_videos_result(id=tool_parameters.get("id"))
        result = [self.create_json_message(response.dict())]
        if response.task_status == "SUCCESS":
            for item in response.video_result:
                video_cover_image = self.create_image_message(item.cover_image_url)
                result.append(video_cover_image)
                video = self.create_blob_message(
                    blob=httpx.get(item.url).content, meta={"mime_type": "video/mp4"}, save_as=self.VariableKey.VIDEO
                )
                result.append(video)
        return result
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml
@ -0,0 +1,21 @@
 identity:
  name: cogvideo_job
  author: hjlarry
  label:
    en_US: CogVideo Result
    zh_Hans: CogVideo 结果获取
 description:
  human:
    en_US: Get the result of CogVideo tool generation.
    zh_Hans: 根据 CogVideo 工具返回的 id 获取视频生成结果。
  llm: Get the result of CogVideo tool generation. The input is the id which is returned by the CogVideo tool. The output is the url of video and video cover image.
 parameters:
  - name: id
    type: string
    label:
      en_US: id
    human_description:
      en_US: The id returned by the CogVideo.
      zh_Hans: CogVideo 工具返回的 id。
    llm_description: The id returned by the cogvideo.
    form: llm