diff --git a/api/core/tools/provider/builtin/transcript/_assets/icon.svg b/api/core/tools/provider/builtin/transcript/_assets/icon.svg
new file mode 100644
index 0000000000..83b0700fec
--- /dev/null
+++ b/api/core/tools/provider/builtin/transcript/_assets/icon.svg
@@ -0,0 +1,11 @@
+
+
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/transcript/tools/transcript.py b/api/core/tools/provider/builtin/transcript/tools/transcript.py
new file mode 100644
index 0000000000..d4e8ea8eef
--- /dev/null
+++ b/api/core/tools/provider/builtin/transcript/tools/transcript.py
@@ -0,0 +1,84 @@
+from typing import Any, Dict, Union, List
+from urllib.parse import urlparse, parse_qs
+from youtube_transcript_api import YouTubeTranscriptApi
+from core.tools.tool.builtin_tool import BuiltinTool
+from core.tools.entities.tool_entities import ToolInvokeMessage
+
+class YouTubeTranscriptTool(BuiltinTool):
+ def _invoke(self, user_id: str, tool_parameters: Dict[str, Any]) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]:
+ """
+ Invoke the YouTube transcript tool
+ """
+ try:
+ # Extract parameters with defaults
+ video_input = tool_parameters['video_id']
+ language = tool_parameters.get('language')
+ output_format = tool_parameters.get('format', 'text')
+ preserve_formatting = tool_parameters.get('preserve_formatting', False)
+ proxy = tool_parameters.get('proxy')
+ cookies = tool_parameters.get('cookies')
+
+ # Extract video ID from URL if needed
+ video_id = self._extract_video_id(video_input)
+
+ # Common kwargs for API calls
+ kwargs = {
+ 'proxies': {"https": proxy} if proxy else None,
+ 'cookies': cookies
+ }
+
+ try:
+ if language:
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, **kwargs)
+ try:
+ transcript = transcript_list.find_transcript([language])
+ except:
+ # If requested language not found, try translating from English
+ transcript = transcript_list.find_transcript(['en']).translate(language)
+ transcript_data = transcript.fetch()
+ else:
+ transcript_data = YouTubeTranscriptApi.get_transcript(
+ video_id,
+ preserve_formatting=preserve_formatting,
+ **kwargs
+ )
+
+ # Format output
+ formatter_class = {
+ 'json': 'JSONFormatter',
+ 'pretty': 'PrettyPrintFormatter',
+ 'srt': 'SRTFormatter',
+ 'vtt': 'WebVTTFormatter'
+ }.get(output_format)
+
+ if formatter_class:
+ from youtube_transcript_api import formatters
+ formatter = getattr(formatters, formatter_class)()
+ formatted_transcript = formatter.format_transcript(transcript_data)
+ else:
+ formatted_transcript = ' '.join(entry['text'] for entry in transcript_data)
+
+ return self.create_text_message(text=formatted_transcript)
+
+ except Exception as e:
+ return self.create_text_message(
+ text=f"Error getting transcript: {str(e)}"
+ )
+
+ except Exception as e:
+ return self.create_text_message(
+ text=f"Error processing request: {str(e)}"
+ )
+
+ def _extract_video_id(self, video_input: str) -> str:
+ """
+ Extract video ID from URL or return as-is if already an ID
+ """
+ if 'youtube.com' in video_input or 'youtu.be' in video_input:
+ # Parse URL
+ parsed_url = urlparse(video_input)
+ if 'youtube.com' in parsed_url.netloc:
+ return parse_qs(parsed_url.query)['v'][0]
+ else: # youtu.be
+ return parsed_url.path[1:]
+ return video_input # Assume it's already a video ID
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/transcript/tools/transcript.yaml b/api/core/tools/provider/builtin/transcript/tools/transcript.yaml
new file mode 100644
index 0000000000..123b2f7673
--- /dev/null
+++ b/api/core/tools/provider/builtin/transcript/tools/transcript.yaml
@@ -0,0 +1,101 @@
+identity:
+ name: free_youtube_transcript
+ author: Tao Wang
+ label:
+ en_US: Free YouTube Transcript API
+ zh_Hans: 免费获取 YouTube 转录
+description:
+ human:
+ en_US: Get transcript from a YouTube video for free.
+ zh_Hans: 免费获取 YouTube 视频的转录文案。
+ llm: A tool for retrieving transcript from YouTube videos.
+parameters:
+ - name: video_id
+ type: string
+ required: true
+ label:
+ en_US: Video ID/URL
+ zh_Hans: 视频ID
+ human_description:
+ en_US: Used to define the video from which the transcript will be fetched. You can find the id in the video url. For example - https://www.youtube.com/watch?v=video_id.
+ zh_Hans: 您要哪条视频的转录文案?您可以在视频链接中找到id。例如 - https://www.youtube.com/watch?v=video_id。
+ llm_description: Used to define the video from which the transcript will be fetched. For example - https://www.youtube.com/watch?v=video_id.
+ form: llm
+ - name: language
+ type: string
+ required: false
+ label:
+ en_US: Language Code
+ zh_Hans: 语言
+ human_description:
+ en_US: Language code (e.g. 'en', 'zh') for the transcript.
+ zh_Hans: 字幕语言代码(如'en'、'zh')。留空则自动选择。
+ llm_description: Used to set the language for transcripts.
+ form: form
+ - name: format
+ type: select
+ required: false
+ default: text
+ options:
+ - value: text
+ label:
+ en_US: Plain Text
+ zh_Hans: 纯文本
+ - value: json
+ label:
+ en_US: JSON Format
+ zh_Hans: JSON 格式
+ - value: pretty
+ label:
+ en_US: Pretty Print Format
+ zh_Hans: 美化格式
+ - value: srt
+ label:
+ en_US: SRT Format
+ zh_Hans: SRT 格式
+ - value: vtt
+ label:
+ en_US: WebVTT Format
+ zh_Hans: WebVTT 格式
+ label:
+ en_US: Output Format
+ zh_Hans: 输出格式
+ human_description:
+ en_US: Format of the transcript output
+ zh_Hans: 字幕输出格式
+ llm_description: The format to output the transcript in. Options are text (plain text), json (raw transcript data), srt (SubRip format), or vtt (WebVTT format)
+ form: form
+ - name: preserve_formatting
+ type: boolean
+ required: false
+ default: false
+ label:
+ en_US: Preserve Formatting
+ zh_Hans: 保留格式
+ human_description:
+ en_US: Keep HTML formatting elements like (italics) and (bold)
+ zh_Hans: 保留HTML格式元素,如(斜体)和(粗体)
+ llm_description: Whether to preserve HTML formatting elements in the transcript text
+ form: form
+ - name: proxy
+ type: string
+ required: false
+ label:
+ en_US: HTTPS Proxy
+ zh_Hans: HTTPS 代理
+ human_description:
+ en_US: HTTPS proxy URL (e.g. https://user:pass@domain:port)
+ zh_Hans: HTTPS 代理地址(如 https://user:pass@domain:port)
+ llm_description: HTTPS proxy to use for the request. Format should be https://user:pass@domain:port
+ form: form
+ - name: cookies
+ type: string
+ required: false
+ label:
+ en_US: Cookies File Path
+ zh_Hans: Cookies 文件路径
+ human_description:
+ en_US: Path to cookies.txt file for accessing age-restricted videos
+ zh_Hans: 用于访问年龄限制视频的 cookies.txt 文件路径
+ llm_description: Path to a cookies.txt file containing YouTube cookies, needed for accessing age-restricted videos
+ form: form
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/transcript/transcript.py b/api/core/tools/provider/builtin/transcript/transcript.py
new file mode 100644
index 0000000000..b119bc5163
--- /dev/null
+++ b/api/core/tools/provider/builtin/transcript/transcript.py
@@ -0,0 +1,12 @@
+from typing import Any, Dict
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+from core.tools.errors import ToolProviderCredentialValidationError
+
+from core.tools.provider.builtin.transcript.tools.transcript import YouTubeTranscriptTool
+
+class YouTubeTranscriptProvider(BuiltinToolProviderController):
+ def _validate_credentials(self, credentials: Dict[str, Any]) -> None:
+ """
+ No credentials needed for YouTube Transcript API
+ """
+ pass
\ No newline at end of file
diff --git a/api/core/tools/provider/builtin/transcript/transcript.yaml b/api/core/tools/provider/builtin/transcript/transcript.yaml
new file mode 100644
index 0000000000..26924aacc3
--- /dev/null
+++ b/api/core/tools/provider/builtin/transcript/transcript.yaml
@@ -0,0 +1,14 @@
+identity:
+ author: Tao Wang
+ name: transcript
+ label:
+ en_US: Transcript
+ zh_Hans: Transcript
+ description:
+ en_US: Get transcripts from YouTube videos
+ zh_Hans: 获取 YouTube 视频的字幕/转录文本
+ icon: icon.svg
+ tags:
+ - videos
+credentials_for_provider:
+ # No credentials needed for this provider as the YouTube Transcript API is free
\ No newline at end of file
diff --git a/api/poetry.lock b/api/poetry.lock
index 74c2ef5dc6..f52ba234f2 100644
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
@@ -10854,6 +10854,20 @@ requests = ">=2.31"
nospam = ["requests-cache (>=1.0)", "requests-ratelimiter (>=0.3.1)"]
repair = ["scipy (>=1.6.3)"]
+[[package]]
+name = "youtube-transcript-api"
+version = "0.6.2"
+description = "This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!"
+optional = false
+python-versions = "*"
+files = [
+ {file = "youtube_transcript_api-0.6.2-py3-none-any.whl", hash = "sha256:019dbf265c6a68a0591c513fff25ed5a116ce6525832aefdfb34d4df5567121c"},
+ {file = "youtube_transcript_api-0.6.2.tar.gz", hash = "sha256:cad223d7620633cec44f657646bffc8bbc5598bd8e70b1ad2fa8277dec305eb7"},
+]
+
+[package.dependencies]
+requests = "*"
+
[[package]]
name = "zhipuai"
version = "2.1.5.20230904"
@@ -11078,4 +11092,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
-content-hash = "2ba4b464eebc26598f290fa94713acc44c588f902176e6efa80622911d40f0ac"
+content-hash = "69a3f471f85dce9e5fb889f739e148a4a6d95aaf94081414503867c7157dba69"
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 8def7cd8f7..0d87c1b1c8 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -187,6 +187,7 @@ websocket-client = "~1.7.0"
werkzeug = "~3.0.1"
xinference-client = "0.15.2"
yarl = "~1.9.4"
+youtube-transcript-api = "~0.6.2"
zhipuai = "~2.1.5"
# Before adding new dependency, consider place it in alphabet order (a-z) and suitable group.