Compare commits

...

9 Commits

Author SHA1 Message Date
Qiuzg
e215cfb327
Merge e2e5e0e22e into 15f341b655 2024-11-14 10:12:00 +01:00
非法操作
15f341b655
feat: add the audio tool (#10695) 2024-11-14 16:37:15 +08:00
crazywoola
b358490607
chore: update issue template (#10693) 2024-11-14 16:12:27 +08:00
crazywoola
f9e4196fd5
Update pull_request_template.md (#10692) 2024-11-14 15:56:37 +08:00
crazywoola
751525802d
feat: update pr template (#10690) 2024-11-14 15:52:15 +08:00
lz
2abacd2a2d
export configuration 'CODE_EXECUTION_TIMEOUT' to .env (#10688)
Co-authored-by: liuzhu <liuzhu@fridaycloud.com.cn>
2024-11-14 15:34:34 +08:00
Nam Vu
a3155e0613
Update expat version (#10686) 2024-11-14 15:30:55 +08:00
Jyong
70b9e4caf5
check dataset is none (#10682) 2024-11-14 14:07:19 +08:00
root
e2e5e0e22e Submitting tasks in a loop 2024-10-29 14:28:57 +08:00
13 changed files with 266 additions and 34 deletions

View File

@ -1,34 +1,32 @@
# Checklist:
# Summary
Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
> [!Tip]
> Close issue syntax: `Fixes #<issue number>` or `Resolves #<issue number>`, see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) for more details.
# Screenshots
<table>
<tr>
<td>Before: </td>
<td>After: </td>
</tr>
<tr>
<td>...</td>
<td>...</td>
</tr>
</table>
# Checklist
> [!IMPORTANT]
> Please review the checklist below before submitting your pull request.
- [ ] Please open an issue before creating a PR or link to an existing issue
- [ ] I have performed a self-review of my own code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I ran `dev/reformat`(backend) and `cd web && npx lint-staged`(frontend) to appease the lint gods
# Description
Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue. Close issue syntax: `Fixes #<issue number>`, see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) for more details.
Fixes
## Type of Change
- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
- [ ] This change requires a documentation update, included: [Dify Document](https://github.com/langgenius/dify-docs)
- [ ] Improvement, including but not limited to code refactoring, performance optimization, and UI/UX improvement
- [ ] Dependency upgrade
# Testing Instructions
Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
- [ ] Test A
- [ ] Test B
- [x] I understand that this PR may be closed in case there was no previous discussion or issues. (This doesn't apply to typos!)
- [x] I've added a test for each change that was introduced, and I tried as much as possible to make a single atomic change.
- [x] I've updated the documentation accordingly.
- [x] I ran `dev/reformat`(backend) and `cd web && npx lint-staged`(frontend) to appease the lint gods

View File

@ -55,7 +55,7 @@ RUN apt-get update \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends expat=2.6.3-2 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-7 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
&& apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-7 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
# install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \
&& apt-get autoremove -y \

View File

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="200" height="200" viewBox="0 0 200 200" fill="none">
<path d="M167.358 102.395C167.358 117.174 157.246 129.18 144.61 131.027H137.861C125.225 129.18 115.113 117.174 115.113 102.395H100.792C100.792 123.637 115.118 142.106 133.653 145.801V164.276H147.139V145.801C165.674 142.106 180 124.558 180 102.4H167.358V102.395ZM154.717 62.677C154.717 53.4397 147.979 46.9765 140.396 46.9765C138.523 46.9446 136.663 47.3273 134.924 48.1024C133.185 48.8775 131.603 50.0294 130.27 51.4909C128.936 52.9524 127.878 54.6943 127.157 56.6148C126.436 58.5354 126.066 60.5962 126.07 62.677V78.3775H154.717V70.4478V62.677ZM126.07 102.395C126.07 111.632 132.813 118.095 140.396 118.095C142.269 118.127 144.13 117.744 145.868 116.969C147.607 116.194 149.189 115.042 150.523 113.581C151.856 112.119 152.914 110.377 153.635 108.457C154.356 106.536 154.726 104.475 154.722 102.395V86.694H126.07V102.395ZM92.1297 45.8938L70.4796 21.7595L69.4235 20.5865L59.604 20L68.3674 20.5865L67.3113 21.7654L64.1429 25.2961L63.6149 25.8826L64.1429 27.0614L66.2552 29.4133L77.8723 42.3631H54.1099C35.1 43.5361 20.3146 61.1896 20.3146 81.7874V83.5527H28.2354V81.7932C28.2354 65.8992 39.8525 52.3628 54.1099 51.1899H77.8723L66.2552 64.1338L64.671 65.8992L64.1429 67.0722L63.6149 67.6645L64.1429 68.251L68.3674 72.9606L68.8954 73.5471L69.4235 72.9606L74.1759 67.6645L92.1297 47.6591L92.6578 47.0727L92.1297 45.8938ZM20 95.8496V118.213H30.033V107.034H50.099V168.821H40.066V180H70.165V168.821H60.132V107.034H80.198V118.213H90.231V95.8496H20Z" fill="#FF0099"/>
</svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -0,0 +1,6 @@
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class AudioToolProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict) -> None:
pass

View File

@ -0,0 +1,11 @@
identity:
author: hjlarry
name: audio
label:
en_US: Audio
description:
en_US: A tool for tts and asr.
zh_Hans: 一个用于文本转语音和语音转文本的工具。
icon: icon.svg
tags:
- utilities

View File

@ -0,0 +1,70 @@
import io
from typing import Any
from core.file.enums import FileType
from core.file.file_manager import download
from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType
from core.tools.entities.common_entities import I18nObject
from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter, ToolParameterOption
from core.tools.tool.builtin_tool import BuiltinTool
from services.model_provider_service import ModelProviderService
class ASRTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
file = tool_parameters.get("audio_file")
if file.type != FileType.AUDIO:
return [self.create_text_message("not a valid audio file")]
audio_binary = io.BytesIO(download(file))
audio_binary.name = "temp.mp3"
provider, model = tool_parameters.get("model").split("#")
model_manager = ModelManager()
model_instance = model_manager.get_model_instance(
tenant_id=self.runtime.tenant_id,
provider=provider,
model_type=ModelType.SPEECH2TEXT,
model=model,
)
text = model_instance.invoke_speech2text(
file=audio_binary,
user=user_id,
)
return [self.create_text_message(text)]
def get_available_models(self) -> list[tuple[str, str]]:
model_provider_service = ModelProviderService()
models = model_provider_service.get_models_by_model_type(
tenant_id=self.runtime.tenant_id, model_type="speech2text"
)
items = []
for provider_model in models:
provider = provider_model.provider
for model in provider_model.models:
items.append((provider, model.model))
return items
def get_runtime_parameters(self) -> list[ToolParameter]:
parameters = []
options = []
for provider, model in self.get_available_models():
option = ToolParameterOption(value=f"{provider}#{model}", label=I18nObject(en_US=f"{model}({provider})"))
options.append(option)
parameters.append(
ToolParameter(
name="model",
label=I18nObject(en_US="Model", zh_Hans="Model"),
human_description=I18nObject(
en_US="All available ASR models",
zh_Hans="所有可用的 ASR 模型",
),
type=ToolParameter.ToolParameterType.SELECT,
form=ToolParameter.ToolParameterForm.FORM,
required=True,
default=options[0].value,
options=options,
)
)
return parameters

View File

@ -0,0 +1,22 @@
identity:
name: asr
author: hjlarry
label:
en_US: Speech To Text
description:
human:
en_US: Convert audio file to text.
zh_Hans: 将音频文件转换为文本。
llm: Convert audio file to text.
parameters:
- name: audio_file
type: file
required: true
label:
en_US: Audio File
zh_Hans: 音频文件
human_description:
en_US: The audio file to be converted.
zh_Hans: 要转换的音频文件。
llm_description: The audio file to be converted.
form: llm

View File

@ -0,0 +1,90 @@
import io
from typing import Any
from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.tools.entities.common_entities import I18nObject
from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter, ToolParameterOption
from core.tools.tool.builtin_tool import BuiltinTool
from services.model_provider_service import ModelProviderService
class TTSTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
provider, model = tool_parameters.get("model").split("#")
voice = tool_parameters.get(f"voice#{provider}#{model}")
model_manager = ModelManager()
model_instance = model_manager.get_model_instance(
tenant_id=self.runtime.tenant_id,
provider=provider,
model_type=ModelType.TTS,
model=model,
)
tts = model_instance.invoke_tts(
content_text=tool_parameters.get("text"),
user=user_id,
tenant_id=self.runtime.tenant_id,
voice=voice,
)
buffer = io.BytesIO()
for chunk in tts:
buffer.write(chunk)
wav_bytes = buffer.getvalue()
return [
self.create_text_message("Audio generated successfully"),
self.create_blob_message(
blob=wav_bytes,
meta={"mime_type": "audio/x-wav"},
save_as=self.VariableKey.AUDIO,
),
]
def get_available_models(self) -> list[tuple[str, str, list[Any]]]:
model_provider_service = ModelProviderService()
models = model_provider_service.get_models_by_model_type(tenant_id=self.runtime.tenant_id, model_type="tts")
items = []
for provider_model in models:
provider = provider_model.provider
for model in provider_model.models:
voices = model.model_properties.get(ModelPropertyKey.VOICES, [])
items.append((provider, model.model, voices))
return items
def get_runtime_parameters(self) -> list[ToolParameter]:
parameters = []
options = []
for provider, model, voices in self.get_available_models():
option = ToolParameterOption(value=f"{provider}#{model}", label=I18nObject(en_US=f"{model}({provider})"))
options.append(option)
parameters.append(
ToolParameter(
name=f"voice#{provider}#{model}",
label=I18nObject(en_US=f"Voice of {model}({provider})"),
type=ToolParameter.ToolParameterType.SELECT,
form=ToolParameter.ToolParameterForm.FORM,
options=[
ToolParameterOption(value=voice.get("mode"), label=I18nObject(en_US=voice.get("name")))
for voice in voices
],
)
)
parameters.insert(
0,
ToolParameter(
name="model",
label=I18nObject(en_US="Model", zh_Hans="Model"),
human_description=I18nObject(
en_US="All available TTS models",
zh_Hans="所有可用的 TTS 模型",
),
type=ToolParameter.ToolParameterType.SELECT,
form=ToolParameter.ToolParameterForm.FORM,
required=True,
default=options[0].value,
options=options,
),
)
return parameters

View File

@ -0,0 +1,22 @@
identity:
name: tts
author: hjlarry
label:
en_US: Text To Speech
description:
human:
en_US: Convert text to audio file.
zh_Hans: 将文本转换为音频文件。
llm: Convert text to audio file.
parameters:
- name: text
type: string
required: true
label:
en_US: Text
zh_Hans: 文本
human_description:
en_US: The text to be converted.
zh_Hans: 要转换的文本。
llm_description: The text to be converted.
form: llm

View File

@ -916,10 +916,12 @@ class DocumentService:
db.session.commit()
# trigger async task
if document_ids:
document_indexing_task.delay(dataset.id, document_ids)
if duplicate_document_ids:
duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids)
if document_ids:
for dociment_id in document_ids:
document_indexing_task.delay(dataset.id, [dociment_id])
if duplicate_document_ids:
for duplicate_document_id in duplicate_document_ids:
duplicate_document_indexing_task.delay(dataset.id, [duplicate_document_id])
return documents, batch

View File

@ -25,7 +25,9 @@ def document_indexing_task(dataset_id: str, document_ids: list):
start_at = time.perf_counter()
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow"))
return
# check document limit
features = FeatureService.get_features(dataset.tenant_id)
try:

View File

@ -689,6 +689,9 @@ TEMPLATE_TRANSFORM_MAX_LENGTH=80000
CODE_MAX_STRING_ARRAY_LENGTH=30
CODE_MAX_OBJECT_ARRAY_LENGTH=30
CODE_MAX_NUMBER_ARRAY_LENGTH=1000
CODE_EXECUTION_CONNECT_TIMEOUT=10
CODE_EXECUTION_READ_TIMEOUT=60
CODE_EXECUTION_WRITE_TIMEOUT=10
# Workflow runtime configuration
WORKFLOW_MAX_EXECUTION_STEPS=500

View File

@ -244,6 +244,9 @@ x-shared-env: &shared-api-worker-env
RESET_PASSWORD_TOKEN_EXPIRY_MINUTES: ${RESET_PASSWORD_TOKEN_EXPIRY_MINUTES:-5}
CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194}
CODE_EXECUTION_API_KEY: ${SANDBOX_API_KEY:-dify-sandbox}
CODE_EXECUTION_CONNECT_TIMEOUT: ${CODE_EXECUTION_CONNECT_TIMEOUT:-10}
CODE_EXECUTION_READ_TIMEOUT: ${CODE_EXECUTION_READ_TIMEOUT:-60}
CODE_EXECUTION_WRITE_TIMEOUT: ${CODE_EXECUTION_WRITE_TIMEOUT:-10}
CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807}
CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808}
CODE_MAX_DEPTH: ${CODE_MAX_DEPTH:-5}