mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 11:42:29 +08:00
Merge branch 'main' into feat/support-extractor-tools
This commit is contained in:
commit
67b1190535
|
@ -1,3 +1,3 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
poetry install -C api
|
cd api && poetry install
|
4
.github/workflows/build-push.yml
vendored
4
.github/workflows/build-push.yml
vendored
|
@ -49,7 +49,7 @@ jobs:
|
||||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Login to Docker Hub
|
- name: Login to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
username: ${{ env.DOCKERHUB_USER }}
|
username: ${{ env.DOCKERHUB_USER }}
|
||||||
password: ${{ env.DOCKERHUB_TOKEN }}
|
password: ${{ env.DOCKERHUB_TOKEN }}
|
||||||
|
@ -114,7 +114,7 @@ jobs:
|
||||||
merge-multiple: true
|
merge-multiple: true
|
||||||
|
|
||||||
- name: Login to Docker Hub
|
- name: Login to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
username: ${{ env.DOCKERHUB_USER }}
|
username: ${{ env.DOCKERHUB_USER }}
|
||||||
password: ${{ env.DOCKERHUB_TOKEN }}
|
password: ${{ env.DOCKERHUB_TOKEN }}
|
||||||
|
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -174,6 +174,7 @@ docker/volumes/unstructured/*
|
||||||
docker/volumes/pgvector/data/*
|
docker/volumes/pgvector/data/*
|
||||||
docker/volumes/pgvecto_rs/data/*
|
docker/volumes/pgvecto_rs/data/*
|
||||||
docker/volumes/couchbase/*
|
docker/volumes/couchbase/*
|
||||||
|
docker/volumes/oceanbase/*
|
||||||
|
|
||||||
docker/nginx/conf.d/default.conf
|
docker/nginx/conf.d/default.conf
|
||||||
docker/nginx/ssl/*
|
docker/nginx/ssl/*
|
||||||
|
|
153
README.md
153
README.md
|
@ -46,6 +46,56 @@
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
## Table of Content
|
||||||
|
0. [Quick-Start🚀](https://github.com/langgenius/dify?tab=readme-ov-file#quick-start)
|
||||||
|
|
||||||
|
1. [Intro📖](https://github.com/langgenius/dify?tab=readme-ov-file#intro)
|
||||||
|
|
||||||
|
2. [How to use🔧](https://github.com/langgenius/dify?tab=readme-ov-file#using-dify)
|
||||||
|
|
||||||
|
3. [Stay Ahead🏃](https://github.com/langgenius/dify?tab=readme-ov-file#staying-ahead)
|
||||||
|
|
||||||
|
4. [Next Steps🏹](https://github.com/langgenius/dify?tab=readme-ov-file#next-steps)
|
||||||
|
|
||||||
|
5. [Contributing💪](https://github.com/langgenius/dify?tab=readme-ov-file#contributing)
|
||||||
|
|
||||||
|
6. [Community and Contact🏠](https://github.com/langgenius/dify?tab=readme-ov-file#community--contact)
|
||||||
|
|
||||||
|
7. [Star-History📈](https://github.com/langgenius/dify?tab=readme-ov-file#star-history)
|
||||||
|
|
||||||
|
8. [Security🔒](https://github.com/langgenius/dify?tab=readme-ov-file#security-disclosure)
|
||||||
|
|
||||||
|
9. [License🤝](https://github.com/langgenius/dify?tab=readme-ov-file#license)
|
||||||
|
|
||||||
|
> Make sure you read through this README before you start utilizing Dify😊
|
||||||
|
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
The quickest way to deploy Dify locally is to run our [docker-compose.yml](https://github.com/langgenius/dify/blob/main/docker/docker-compose.yaml). Follow the instructions to start in 5 minutes.
|
||||||
|
|
||||||
|
> Before installing Dify, make sure your machine meets the following minimum system requirements:
|
||||||
|
>
|
||||||
|
>- CPU >= 2 Core
|
||||||
|
>- RAM >= 4 GiB
|
||||||
|
>- Docker and Docker Compose Installed
|
||||||
|
</br>
|
||||||
|
|
||||||
|
Run the following command in your terminal to clone the whole repo.
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/langgenius/dify.git
|
||||||
|
```
|
||||||
|
After cloning,run the following command one by one.
|
||||||
|
```bash
|
||||||
|
cd dify
|
||||||
|
cd docker
|
||||||
|
cp .env.example .env
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization process. You will be asked to setup an admin account.
|
||||||
|
For more info of quick setup, check [here](https://docs.dify.ai/getting-started/install-self-hosted/docker-compose)
|
||||||
|
|
||||||
|
## Intro
|
||||||
Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production. Here's a list of the core features:
|
Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production. Here's a list of the core features:
|
||||||
</br> </br>
|
</br> </br>
|
||||||
|
|
||||||
|
@ -79,73 +129,6 @@ Dify is an open-source LLM app development platform. Its intuitive interface com
|
||||||
All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic.
|
All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic.
|
||||||
|
|
||||||
|
|
||||||
## Feature comparison
|
|
||||||
<table style="width: 100%;">
|
|
||||||
<tr>
|
|
||||||
<th align="center">Feature</th>
|
|
||||||
<th align="center">Dify.AI</th>
|
|
||||||
<th align="center">LangChain</th>
|
|
||||||
<th align="center">Flowise</th>
|
|
||||||
<th align="center">OpenAI Assistants API</th>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Programming Approach</td>
|
|
||||||
<td align="center">API + App-oriented</td>
|
|
||||||
<td align="center">Python Code</td>
|
|
||||||
<td align="center">App-oriented</td>
|
|
||||||
<td align="center">API-oriented</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Supported LLMs</td>
|
|
||||||
<td align="center">Rich Variety</td>
|
|
||||||
<td align="center">Rich Variety</td>
|
|
||||||
<td align="center">Rich Variety</td>
|
|
||||||
<td align="center">OpenAI-only</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">RAG Engine</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Agent</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Workflow</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Observability</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Enterprise Features (SSO/Access control)</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td align="center">Local Deployment</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">✅</td>
|
|
||||||
<td align="center">❌</td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
## Using Dify
|
## Using Dify
|
||||||
|
|
||||||
- **Cloud </br>**
|
- **Cloud </br>**
|
||||||
|
@ -166,30 +149,21 @@ Star Dify on GitHub and be instantly notified of new releases.
|
||||||
|
|
||||||
![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4)
|
![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Quick start
|
|
||||||
> Before installing Dify, make sure your machine meets the following minimum system requirements:
|
|
||||||
>
|
|
||||||
>- CPU >= 2 Core
|
|
||||||
>- RAM >= 4 GiB
|
|
||||||
|
|
||||||
</br>
|
|
||||||
|
|
||||||
The easiest way to start the Dify server is to run our [docker-compose.yml](docker/docker-compose.yaml) file. Before running the installation command, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd docker
|
|
||||||
cp .env.example .env
|
|
||||||
docker compose up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization process.
|
|
||||||
|
|
||||||
> If you'd like to contribute to Dify or do additional development, refer to our [guide to deploying from source code](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code)
|
|
||||||
|
|
||||||
## Next steps
|
## Next steps
|
||||||
|
|
||||||
|
Go to [quick-start](https://github.com/langgenius/dify?tab=readme-ov-file#quick-start) to setup your Dify or setup by source code.
|
||||||
|
|
||||||
|
#### If you......
|
||||||
|
If you forget your admin account, you can refer to this [guide](https://docs.dify.ai/getting-started/install-self-hosted/faqs#id-4.-how-to-reset-the-password-of-the-admin-account) to reset the password.
|
||||||
|
|
||||||
|
> Use docker compose up without "-d" to enable logs printing out in your terminal. This might be useful if you have encountered unknow problems when using Dify.
|
||||||
|
|
||||||
|
If you encountered system error and would like to acquire help in Github issues, make sure you always paste logs of the error in the request to accerate the conversation. Go to [Community & contact](https://github.com/langgenius/dify?tab=readme-ov-file#community--contact) for more information.
|
||||||
|
|
||||||
|
> Please read the [Dify Documentation](https://docs.dify.ai/) for detailed how-to-use guidance. Most of the potential problems are explained in the doc.
|
||||||
|
|
||||||
|
> If you'd like to contribute to Dify or make additional development, refer to our [guide to deploying from source code](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code)
|
||||||
|
|
||||||
If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker-compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments).
|
If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker-compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments).
|
||||||
|
|
||||||
If you'd like to configure a highly-available setup, there are community-contributed [Helm Charts](https://helm.sh/) and YAML files which allow Dify to be deployed on Kubernetes.
|
If you'd like to configure a highly-available setup, there are community-contributed [Helm Charts](https://helm.sh/) and YAML files which allow Dify to be deployed on Kubernetes.
|
||||||
|
@ -228,6 +202,7 @@ At the same time, please consider supporting Dify by sharing it on social media
|
||||||
* [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
* [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||||
* [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community.
|
* [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community.
|
||||||
* [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
|
* [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
|
||||||
|
* Make sure a log, if possible, is attached to an error reported to maximize solution efficiency.
|
||||||
|
|
||||||
## Star history
|
## Star history
|
||||||
|
|
||||||
|
|
241
README_PT.md
Normal file
241
README_PT.md
Normal file
|
@ -0,0 +1,241 @@
|
||||||
|
![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab)
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
📌 <a href="https://dify.ai/blog/introducing-dify-workflow-file-upload-a-demo-on-ai-podcast">Introduzindo o Dify Workflow com Upload de Arquivo: Recrie o Podcast Google NotebookLM</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<a href="https://cloud.dify.ai">Dify Cloud</a> ·
|
||||||
|
<a href="https://docs.dify.ai/getting-started/install-self-hosted">Auto-hospedagem</a> ·
|
||||||
|
<a href="https://docs.dify.ai">Documentação</a> ·
|
||||||
|
<a href="https://udify.app/chat/22L1zSxg6yW1cWQg">Consultas empresariais</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<a href="https://dify.ai" target="_blank">
|
||||||
|
<img alt="Static Badge" src="https://img.shields.io/badge/Product-F04438"></a>
|
||||||
|
<a href="https://dify.ai/pricing" target="_blank">
|
||||||
|
<img alt="Static Badge" src="https://img.shields.io/badge/free-pricing?logo=free&color=%20%23155EEF&label=pricing&labelColor=%20%23528bff"></a>
|
||||||
|
<a href="https://discord.gg/FngNHpbcY7" target="_blank">
|
||||||
|
<img src="https://img.shields.io/discord/1082486657678311454?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb"
|
||||||
|
alt="chat on Discord"></a>
|
||||||
|
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
|
||||||
|
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
|
||||||
|
alt="follow on X(Twitter)"></a>
|
||||||
|
<a href="https://hub.docker.com/u/langgenius" target="_blank">
|
||||||
|
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
|
||||||
|
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
|
||||||
|
<img alt="Commits last month" src="https://img.shields.io/github/commit-activity/m/langgenius/dify?labelColor=%20%2332b583&color=%20%2312b76a"></a>
|
||||||
|
<a href="https://github.com/langgenius/dify/" target="_blank">
|
||||||
|
<img alt="Issues closed" src="https://img.shields.io/github/issues-search?query=repo%3Alanggenius%2Fdify%20is%3Aclosed&label=issues%20closed&labelColor=%20%237d89b0&color=%20%235d6b98"></a>
|
||||||
|
<a href="https://github.com/langgenius/dify/discussions/" target="_blank">
|
||||||
|
<img alt="Discussion posts" src="https://img.shields.io/github/discussions/langgenius/dify?labelColor=%20%239b8afb&color=%20%237a5af8"></a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<a href="./README.md"><img alt="README em Inglês" src="https://img.shields.io/badge/English-d9d9d9"></a>
|
||||||
|
<a href="./README_CN.md"><img alt="简体中文版自述文件" src="https://img.shields.io/badge/简体中文-d9d9d9"></a>
|
||||||
|
<a href="./README_JA.md"><img alt="日本語のREADME" src="https://img.shields.io/badge/日本語-d9d9d9"></a>
|
||||||
|
<a href="./README_ES.md"><img alt="README em Espanhol" src="https://img.shields.io/badge/Español-d9d9d9"></a>
|
||||||
|
<a href="./README_FR.md"><img alt="README em Francês" src="https://img.shields.io/badge/Français-d9d9d9"></a>
|
||||||
|
<a href="./README_KL.md"><img alt="README tlhIngan Hol" src="https://img.shields.io/badge/Klingon-d9d9d9"></a>
|
||||||
|
<a href="./README_KR.md"><img alt="README em Coreano" src="https://img.shields.io/badge/한국어-d9d9d9"></a>
|
||||||
|
<a href="./README_AR.md"><img alt="README em Árabe" src="https://img.shields.io/badge/العربية-d9d9d9"></a>
|
||||||
|
<a href="./README_TR.md"><img alt="README em Turco" src="https://img.shields.io/badge/Türkçe-d9d9d9"></a>
|
||||||
|
<a href="./README_VI.md"><img alt="README em Vietnamita" src="https://img.shields.io/badge/Ti%E1%BA%BFng%20Vi%E1%BB%87t-d9d9d9"></a>
|
||||||
|
<a href="./README_PT.md"><img alt="README em Português - BR" src="https://img.shields.io/badge/Portugu%C3%AAs-BR?style=flat&label=BR&color=d9d9d9"></a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
Dify é uma plataforma de desenvolvimento de aplicativos LLM de código aberto. Sua interface intuitiva combina workflow de IA, pipeline RAG, capacidades de agente, gerenciamento de modelos, recursos de observabilidade e muito mais, permitindo que você vá rapidamente do protótipo à produção. Aqui está uma lista das principais funcionalidades:
|
||||||
|
</br> </br>
|
||||||
|
|
||||||
|
**1. Workflow**:
|
||||||
|
Construa e teste workflows poderosos de IA em uma interface visual, aproveitando todos os recursos a seguir e muito mais.
|
||||||
|
|
||||||
|
|
||||||
|
https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
**2. Suporte abrangente a modelos**:
|
||||||
|
Integração perfeita com centenas de LLMs proprietários e de código aberto de diversas provedoras e soluções auto-hospedadas, abrangendo GPT, Mistral, Llama3 e qualquer modelo compatível com a API da OpenAI. A lista completa de provedores suportados pode ser encontrada [aqui](https://docs.dify.ai/getting-started/readme/model-providers).
|
||||||
|
|
||||||
|
![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3)
|
||||||
|
|
||||||
|
|
||||||
|
**3. IDE de Prompt**:
|
||||||
|
Interface intuitiva para criação de prompts, comparação de desempenho de modelos e adição de recursos como conversão de texto para fala em um aplicativo baseado em chat.
|
||||||
|
|
||||||
|
**4. Pipeline RAG**:
|
||||||
|
Extensas capacidades de RAG que cobrem desde a ingestão de documentos até a recuperação, com suporte nativo para extração de texto de PDFs, PPTs e outros formatos de documentos comuns.
|
||||||
|
|
||||||
|
**5. Capacidades de agente**:
|
||||||
|
Você pode definir agentes com base em LLM Function Calling ou ReAct e adicionar ferramentas pré-construídas ou personalizadas para o agente. O Dify oferece mais de 50 ferramentas integradas para agentes de IA, como Google Search, DALL·E, Stable Diffusion e WolframAlpha.
|
||||||
|
|
||||||
|
**6. LLMOps**:
|
||||||
|
Monitore e analise os registros e o desempenho do aplicativo ao longo do tempo. É possível melhorar continuamente prompts, conjuntos de dados e modelos com base nos dados de produção e anotações.
|
||||||
|
|
||||||
|
**7. Backend como Serviço**:
|
||||||
|
Todas os recursos do Dify vêm com APIs correspondentes, permitindo que você integre o Dify sem esforço na lógica de negócios da sua empresa.
|
||||||
|
|
||||||
|
|
||||||
|
## Comparação de recursos
|
||||||
|
<table style="width: 100%;">
|
||||||
|
<tr>
|
||||||
|
<th align="center">Recurso</th>
|
||||||
|
<th align="center">Dify.AI</th>
|
||||||
|
<th align="center">LangChain</th>
|
||||||
|
<th align="center">Flowise</th>
|
||||||
|
<th align="center">OpenAI Assistants API</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">Abordagem de Programação</td>
|
||||||
|
<td align="center">Orientada a API + Aplicativo</td>
|
||||||
|
<td align="center">Código Python</td>
|
||||||
|
<td align="center">Orientada a Aplicativo</td>
|
||||||
|
<td align="center">Orientada a API</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">LLMs Suportados</td>
|
||||||
|
<td align="center">Variedade Rica</td>
|
||||||
|
<td align="center">Variedade Rica</td>
|
||||||
|
<td align="center">Variedade Rica</td>
|
||||||
|
<td align="center">Apenas OpenAI</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">RAG Engine</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">Agente</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">Workflow</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">Observabilidade</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">Recursos Empresariais (SSO/Controle de Acesso)</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td align="center">Implantação Local</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">✅</td>
|
||||||
|
<td align="center">❌</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Usando o Dify
|
||||||
|
|
||||||
|
- **Nuvem </br>**
|
||||||
|
Oferecemos o serviço [Dify Cloud](https://dify.ai) para qualquer pessoa experimentar sem nenhuma configuração. Ele fornece todas as funcionalidades da versão auto-hospedada, incluindo 200 chamadas GPT-4 gratuitas no plano sandbox.
|
||||||
|
|
||||||
|
- **Auto-hospedagem do Dify Community Edition</br>**
|
||||||
|
Configure rapidamente o Dify no seu ambiente com este [guia inicial](#quick-start).
|
||||||
|
Use nossa [documentação](https://docs.dify.ai) para referências adicionais e instruções mais detalhadas.
|
||||||
|
|
||||||
|
- **Dify para empresas/organizações</br>**
|
||||||
|
Oferecemos recursos adicionais voltados para empresas. [Envie suas perguntas através deste chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) ou [envie-nos um e-mail](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) para discutir necessidades empresariais. </br>
|
||||||
|
> Para startups e pequenas empresas que utilizam AWS, confira o [Dify Premium no AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e implemente no seu próprio AWS VPC com um clique. É uma oferta AMI acessível com a opção de criar aplicativos com logotipo e marca personalizados.
|
||||||
|
|
||||||
|
|
||||||
|
## Mantendo-se atualizado
|
||||||
|
|
||||||
|
Dê uma estrela no Dify no GitHub e seja notificado imediatamente sobre novos lançamentos.
|
||||||
|
|
||||||
|
![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Início rápido
|
||||||
|
> Antes de instalar o Dify, certifique-se de que sua máquina atenda aos seguintes requisitos mínimos de sistema:
|
||||||
|
>
|
||||||
|
>- CPU >= 2 Núcleos
|
||||||
|
>- RAM >= 4 GiB
|
||||||
|
|
||||||
|
</br>
|
||||||
|
|
||||||
|
A maneira mais fácil de iniciar o servidor Dify é executar nosso arquivo [docker-compose.yml](docker/docker-compose.yaml). Antes de rodar o comando de instalação, certifique-se de que o [Docker](https://docs.docker.com/get-docker/) e o [Docker Compose](https://docs.docker.com/compose/install/) estão instalados na sua máquina:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd docker
|
||||||
|
cp .env.example .env
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Após a execução, você pode acessar o painel do Dify no navegador em [http://localhost/install](http://localhost/install) e iniciar o processo de inicialização.
|
||||||
|
|
||||||
|
> Se você deseja contribuir com o Dify ou fazer desenvolvimento adicional, consulte nosso [guia para implantar a partir do código fonte](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code).
|
||||||
|
|
||||||
|
## Próximos passos
|
||||||
|
|
||||||
|
Se precisar personalizar a configuração, consulte os comentários no nosso arquivo [.env.example](docker/.env.example) e atualize os valores correspondentes no seu arquivo `.env`. Além disso, talvez seja necessário fazer ajustes no próprio arquivo `docker-compose.yaml`, como alterar versões de imagem, mapeamentos de portas ou montagens de volumes, com base no seu ambiente de implantação específico e nas suas necessidades. Após fazer quaisquer alterações, execute novamente `docker-compose up -d`. Você pode encontrar a lista completa de variáveis de ambiente disponíveis [aqui](https://docs.dify.ai/getting-started/install-self-hosted/environments).
|
||||||
|
|
||||||
|
Se deseja configurar uma instalação de alta disponibilidade, há [Helm Charts](https://helm.sh/) e arquivos YAML contribuídos pela comunidade que permitem a implantação do Dify no Kubernetes.
|
||||||
|
|
||||||
|
- [Helm Chart de @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
|
||||||
|
- [Helm Chart de @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
|
||||||
|
- [Arquivo YAML de @Winson-030](https://github.com/Winson-030/dify-kubernetes)
|
||||||
|
|
||||||
|
#### Usando o Terraform para Implantação
|
||||||
|
|
||||||
|
Implante o Dify na Plataforma Cloud com um único clique usando [terraform](https://www.terraform.io/)
|
||||||
|
|
||||||
|
##### Azure Global
|
||||||
|
- [Azure Terraform por @nikawang](https://github.com/nikawang/dify-azure-terraform)
|
||||||
|
|
||||||
|
##### Google Cloud
|
||||||
|
- [Google Cloud Terraform por @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
|
||||||
|
|
||||||
|
## Contribuindo
|
||||||
|
|
||||||
|
Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||||
|
Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências.
|
||||||
|
|
||||||
|
> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
|
||||||
|
|
||||||
|
**Contribuidores**
|
||||||
|
|
||||||
|
<a href="https://github.com/langgenius/dify/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=langgenius/dify" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
## Comunidade e contato
|
||||||
|
|
||||||
|
* [Discussões no GitHub](https://github.com/langgenius/dify/discussions). Melhor para: compartilhar feedback e fazer perguntas.
|
||||||
|
* [Problemas no GitHub](https://github.com/langgenius/dify/issues). Melhor para: relatar bugs encontrados no Dify.AI e propor novos recursos. Veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||||
|
* [Discord](https://discord.gg/FngNHpbcY7). Melhor para: compartilhar suas aplicações e interagir com a comunidade.
|
||||||
|
* [X(Twitter)](https://twitter.com/dify_ai). Melhor para: compartilhar suas aplicações e interagir com a comunidade.
|
||||||
|
|
||||||
|
## Histórico de estrelas
|
||||||
|
|
||||||
|
[![Gráfico de Histórico de Estrelas](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date)
|
||||||
|
|
||||||
|
## Divulgação de segurança
|
||||||
|
|
||||||
|
Para proteger sua privacidade, evite postar problemas de segurança no GitHub. Em vez disso, envie suas perguntas para security@dify.ai e forneceremos uma resposta mais detalhada.
|
||||||
|
|
||||||
|
## Licença
|
||||||
|
|
||||||
|
Este repositório está disponível sob a [Licença de Código Aberto Dify](LICENSE), que é essencialmente Apache 2.0 com algumas restrições adicionais.
|
|
@ -202,6 +202,20 @@ TIDB_VECTOR_USER=xxx.root
|
||||||
TIDB_VECTOR_PASSWORD=xxxxxx
|
TIDB_VECTOR_PASSWORD=xxxxxx
|
||||||
TIDB_VECTOR_DATABASE=dify
|
TIDB_VECTOR_DATABASE=dify
|
||||||
|
|
||||||
|
# Tidb on qdrant configuration
|
||||||
|
TIDB_ON_QDRANT_URL=http://127.0.0.1
|
||||||
|
TIDB_ON_QDRANT_API_KEY=dify
|
||||||
|
TIDB_ON_QDRANT_CLIENT_TIMEOUT=20
|
||||||
|
TIDB_ON_QDRANT_GRPC_ENABLED=false
|
||||||
|
TIDB_ON_QDRANT_GRPC_PORT=6334
|
||||||
|
TIDB_PUBLIC_KEY=dify
|
||||||
|
TIDB_PRIVATE_KEY=dify
|
||||||
|
TIDB_API_URL=http://127.0.0.1
|
||||||
|
TIDB_IAM_API_URL=http://127.0.0.1
|
||||||
|
TIDB_REGION=regions/aws-us-east-1
|
||||||
|
TIDB_PROJECT_ID=dify
|
||||||
|
TIDB_SPEND_LIMIT=100
|
||||||
|
|
||||||
# Chroma configuration
|
# Chroma configuration
|
||||||
CHROMA_HOST=127.0.0.1
|
CHROMA_HOST=127.0.0.1
|
||||||
CHROMA_PORT=8000
|
CHROMA_PORT=8000
|
||||||
|
@ -249,6 +263,14 @@ VIKINGDB_SCHEMA=http
|
||||||
VIKINGDB_CONNECTION_TIMEOUT=30
|
VIKINGDB_CONNECTION_TIMEOUT=30
|
||||||
VIKINGDB_SOCKET_TIMEOUT=30
|
VIKINGDB_SOCKET_TIMEOUT=30
|
||||||
|
|
||||||
|
# OceanBase Vector configuration
|
||||||
|
OCEANBASE_VECTOR_HOST=127.0.0.1
|
||||||
|
OCEANBASE_VECTOR_PORT=2881
|
||||||
|
OCEANBASE_VECTOR_USER=root@test
|
||||||
|
OCEANBASE_VECTOR_PASSWORD=
|
||||||
|
OCEANBASE_VECTOR_DATABASE=test
|
||||||
|
OCEANBASE_MEMORY_LIMIT=6G
|
||||||
|
|
||||||
# Upload configuration
|
# Upload configuration
|
||||||
UPLOAD_FILE_SIZE_LIMIT=15
|
UPLOAD_FILE_SIZE_LIMIT=15
|
||||||
UPLOAD_FILE_BATCH_LIMIT=5
|
UPLOAD_FILE_BATCH_LIMIT=5
|
||||||
|
|
|
@ -55,7 +55,12 @@ RUN apt-get update \
|
||||||
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
|
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
|
||||||
&& apt-get update \
|
&& apt-get update \
|
||||||
# For Security
|
# For Security
|
||||||
&& apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1 expat=2.6.3-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \
|
&& apt-get install -y --no-install-recommends expat=2.6.3-2 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \
|
||||||
|
&& if [ "$(dpkg --print-architecture)" = "amd64" ]; then \
|
||||||
|
apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1+b1; \
|
||||||
|
else \
|
||||||
|
apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1; \
|
||||||
|
fi \
|
||||||
# install a chinese font to support the use of tools like matplotlib
|
# install a chinese font to support the use of tools like matplotlib
|
||||||
&& apt-get install -y fonts-noto-cjk \
|
&& apt-get install -y fonts-noto-cjk \
|
||||||
&& apt-get autoremove -y \
|
&& apt-get autoremove -y \
|
||||||
|
|
|
@ -279,6 +279,7 @@ def migrate_knowledge_vector_database():
|
||||||
VectorType.VIKINGDB,
|
VectorType.VIKINGDB,
|
||||||
VectorType.UPSTASH,
|
VectorType.UPSTASH,
|
||||||
VectorType.COUCHBASE,
|
VectorType.COUCHBASE,
|
||||||
|
VectorType.OCEANBASE,
|
||||||
}
|
}
|
||||||
page = 1
|
page = 1
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -16,11 +16,13 @@ from configs.middleware.storage.supabase_storage_config import SupabaseStorageCo
|
||||||
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
||||||
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||||
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
||||||
|
from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||||
from configs.middleware.vdb.chroma_config import ChromaConfig
|
from configs.middleware.vdb.chroma_config import ChromaConfig
|
||||||
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
|
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
|
||||||
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
||||||
from configs.middleware.vdb.milvus_config import MilvusConfig
|
from configs.middleware.vdb.milvus_config import MilvusConfig
|
||||||
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
||||||
|
from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
|
||||||
from configs.middleware.vdb.opensearch_config import OpenSearchConfig
|
from configs.middleware.vdb.opensearch_config import OpenSearchConfig
|
||||||
from configs.middleware.vdb.oracle_config import OracleConfig
|
from configs.middleware.vdb.oracle_config import OracleConfig
|
||||||
from configs.middleware.vdb.pgvector_config import PGVectorConfig
|
from configs.middleware.vdb.pgvector_config import PGVectorConfig
|
||||||
|
@ -257,5 +259,7 @@ class MiddlewareConfig(
|
||||||
VikingDBConfig,
|
VikingDBConfig,
|
||||||
UpstashConfig,
|
UpstashConfig,
|
||||||
TidbOnQdrantConfig,
|
TidbOnQdrantConfig,
|
||||||
|
OceanBaseVectorConfig,
|
||||||
|
BaiduVectorDBConfig,
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
35
api/configs/middleware/vdb/oceanbase_config.py
Normal file
35
api/configs/middleware/vdb/oceanbase_config.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import Field, PositiveInt
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
|
class OceanBaseVectorConfig(BaseSettings):
|
||||||
|
"""
|
||||||
|
Configuration settings for OceanBase Vector database
|
||||||
|
"""
|
||||||
|
|
||||||
|
OCEANBASE_VECTOR_HOST: Optional[str] = Field(
|
||||||
|
description="Hostname or IP address of the OceanBase Vector server (e.g. 'localhost')",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
OCEANBASE_VECTOR_PORT: Optional[PositiveInt] = Field(
|
||||||
|
description="Port number on which the OceanBase Vector server is listening (default is 2881)",
|
||||||
|
default=2881,
|
||||||
|
)
|
||||||
|
|
||||||
|
OCEANBASE_VECTOR_USER: Optional[str] = Field(
|
||||||
|
description="Username for authenticating with the OceanBase Vector database",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
OCEANBASE_VECTOR_PASSWORD: Optional[str] = Field(
|
||||||
|
description="Password for authenticating with the OceanBase Vector database",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
OCEANBASE_VECTOR_DATABASE: Optional[str] = Field(
|
||||||
|
description="Name of the OceanBase Vector database to connect to",
|
||||||
|
default=None,
|
||||||
|
)
|
|
@ -63,3 +63,8 @@ class TidbOnQdrantConfig(BaseSettings):
|
||||||
description="Tidb project id",
|
description="Tidb project id",
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
TIDB_SPEND_LIMIT: Optional[int] = Field(
|
||||||
|
description="Tidb spend limit",
|
||||||
|
default=100,
|
||||||
|
)
|
||||||
|
|
|
@ -628,6 +628,7 @@ class DatasetRetrievalSettingApi(Resource):
|
||||||
| VectorType.BAIDU
|
| VectorType.BAIDU
|
||||||
| VectorType.VIKINGDB
|
| VectorType.VIKINGDB
|
||||||
| VectorType.UPSTASH
|
| VectorType.UPSTASH
|
||||||
|
| VectorType.OCEANBASE
|
||||||
):
|
):
|
||||||
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
||||||
case (
|
case (
|
||||||
|
@ -669,6 +670,7 @@ class DatasetRetrievalSettingMockApi(Resource):
|
||||||
| VectorType.BAIDU
|
| VectorType.BAIDU
|
||||||
| VectorType.VIKINGDB
|
| VectorType.VIKINGDB
|
||||||
| VectorType.UPSTASH
|
| VectorType.UPSTASH
|
||||||
|
| VectorType.OCEANBASE
|
||||||
):
|
):
|
||||||
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
||||||
case (
|
case (
|
||||||
|
|
|
@ -3,6 +3,7 @@ import logging
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from flask_restful import Resource, reqparse
|
from flask_restful import Resource, reqparse
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
|
|
||||||
|
@ -47,43 +48,15 @@ class VersionApi(Resource):
|
||||||
|
|
||||||
|
|
||||||
def _has_new_version(*, latest_version: str, current_version: str) -> bool:
|
def _has_new_version(*, latest_version: str, current_version: str) -> bool:
|
||||||
def parse_version(version: str) -> tuple:
|
try:
|
||||||
# Split version into parts and pre-release suffix if any
|
latest = version.parse(latest_version)
|
||||||
parts = version.split("-")
|
current = version.parse(current_version)
|
||||||
version_parts = parts[0].split(".")
|
|
||||||
pre_release = parts[1] if len(parts) > 1 else None
|
|
||||||
|
|
||||||
# Validate version format
|
# Compare versions
|
||||||
if len(version_parts) != 3:
|
return latest > current
|
||||||
raise ValueError(f"Invalid version format: {version}")
|
except version.InvalidVersion:
|
||||||
|
logging.warning(f"Invalid version format: latest={latest_version}, current={current_version}")
|
||||||
try:
|
|
||||||
# Convert version parts to integers
|
|
||||||
major, minor, patch = map(int, version_parts)
|
|
||||||
return (major, minor, patch, pre_release)
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(f"Invalid version format: {version}")
|
|
||||||
|
|
||||||
latest = parse_version(latest_version)
|
|
||||||
current = parse_version(current_version)
|
|
||||||
|
|
||||||
# Compare major, minor, and patch versions
|
|
||||||
for latest_part, current_part in zip(latest[:3], current[:3]):
|
|
||||||
if latest_part > current_part:
|
|
||||||
return True
|
|
||||||
elif latest_part < current_part:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# If versions are equal, check pre-release suffixes
|
|
||||||
if latest[3] is None and current[3] is not None:
|
|
||||||
return True
|
|
||||||
elif latest[3] is not None and current[3] is None:
|
|
||||||
return False
|
return False
|
||||||
elif latest[3] is not None and current[3] is not None:
|
|
||||||
# Simple string comparison for pre-release versions
|
|
||||||
return latest[3] > current[3]
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
api.add_resource(VersionApi, "/version")
|
api.add_resource(VersionApi, "/version")
|
||||||
|
|
|
@ -230,7 +230,7 @@ class DocumentUpdateByFileApi(DatasetApiResource):
|
||||||
except ProviderTokenNotInitError as ex:
|
except ProviderTokenNotInitError as ex:
|
||||||
raise ProviderNotInitializeError(ex.description)
|
raise ProviderNotInitializeError(ex.description)
|
||||||
document = documents[0]
|
document = documents[0]
|
||||||
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
|
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
|
||||||
return documents_and_batch_fields, 200
|
return documents_and_batch_fields, 200
|
||||||
|
|
||||||
|
|
||||||
|
@ -331,10 +331,26 @@ class DocumentIndexingStatusApi(DatasetApiResource):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
api.add_resource(DocumentAddByTextApi, "/datasets/<uuid:dataset_id>/document/create_by_text")
|
api.add_resource(
|
||||||
api.add_resource(DocumentAddByFileApi, "/datasets/<uuid:dataset_id>/document/create_by_file")
|
DocumentAddByTextApi,
|
||||||
api.add_resource(DocumentUpdateByTextApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text")
|
"/datasets/<uuid:dataset_id>/document/create_by_text",
|
||||||
api.add_resource(DocumentUpdateByFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file")
|
"/datasets/<uuid:dataset_id>/document/create-by-text",
|
||||||
|
)
|
||||||
|
api.add_resource(
|
||||||
|
DocumentAddByFileApi,
|
||||||
|
"/datasets/<uuid:dataset_id>/document/create_by_file",
|
||||||
|
"/datasets/<uuid:dataset_id>/document/create-by-file",
|
||||||
|
)
|
||||||
|
api.add_resource(
|
||||||
|
DocumentUpdateByTextApi,
|
||||||
|
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text",
|
||||||
|
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-text",
|
||||||
|
)
|
||||||
|
api.add_resource(
|
||||||
|
DocumentUpdateByFileApi,
|
||||||
|
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
|
||||||
|
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
|
||||||
|
)
|
||||||
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||||
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
|
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
|
||||||
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
|
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
|
||||||
|
|
|
@ -14,4 +14,4 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
|
||||||
return self.perform_hit_testing(dataset, args)
|
return self.perform_hit_testing(dataset, args)
|
||||||
|
|
||||||
|
|
||||||
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
|
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing", "/datasets/<uuid:dataset_id>/retrieve")
|
||||||
|
|
|
@ -37,6 +37,17 @@ def _get_max_tokens(default: int, min_val: int, max_val: int) -> ParameterRule:
|
||||||
return rule
|
return rule
|
||||||
|
|
||||||
|
|
||||||
|
def _get_o1_max_tokens(default: int, min_val: int, max_val: int) -> ParameterRule:
|
||||||
|
rule = ParameterRule(
|
||||||
|
name="max_completion_tokens",
|
||||||
|
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.MAX_TOKENS],
|
||||||
|
)
|
||||||
|
rule.default = default
|
||||||
|
rule.min = min_val
|
||||||
|
rule.max = max_val
|
||||||
|
return rule
|
||||||
|
|
||||||
|
|
||||||
class AzureBaseModel(BaseModel):
|
class AzureBaseModel(BaseModel):
|
||||||
base_model_name: str
|
base_model_name: str
|
||||||
entity: AIModelEntity
|
entity: AIModelEntity
|
||||||
|
@ -1098,14 +1109,6 @@ LLM_BASE_MODELS = [
|
||||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||||
},
|
},
|
||||||
parameter_rules=[
|
parameter_rules=[
|
||||||
ParameterRule(
|
|
||||||
name="temperature",
|
|
||||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
|
||||||
),
|
|
||||||
ParameterRule(
|
|
||||||
name="top_p",
|
|
||||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
|
||||||
),
|
|
||||||
ParameterRule(
|
ParameterRule(
|
||||||
name="response_format",
|
name="response_format",
|
||||||
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
|
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
|
||||||
|
@ -1116,7 +1119,7 @@ LLM_BASE_MODELS = [
|
||||||
required=False,
|
required=False,
|
||||||
options=["text", "json_object"],
|
options=["text", "json_object"],
|
||||||
),
|
),
|
||||||
_get_max_tokens(default=512, min_val=1, max_val=32768),
|
_get_o1_max_tokens(default=512, min_val=1, max_val=32768),
|
||||||
],
|
],
|
||||||
pricing=PriceConfig(
|
pricing=PriceConfig(
|
||||||
input=15.00,
|
input=15.00,
|
||||||
|
@ -1143,14 +1146,6 @@ LLM_BASE_MODELS = [
|
||||||
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
ModelPropertyKey.CONTEXT_SIZE: 128000,
|
||||||
},
|
},
|
||||||
parameter_rules=[
|
parameter_rules=[
|
||||||
ParameterRule(
|
|
||||||
name="temperature",
|
|
||||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
|
|
||||||
),
|
|
||||||
ParameterRule(
|
|
||||||
name="top_p",
|
|
||||||
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
|
|
||||||
),
|
|
||||||
ParameterRule(
|
ParameterRule(
|
||||||
name="response_format",
|
name="response_format",
|
||||||
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
|
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
|
||||||
|
@ -1161,7 +1156,7 @@ LLM_BASE_MODELS = [
|
||||||
required=False,
|
required=False,
|
||||||
options=["text", "json_object"],
|
options=["text", "json_object"],
|
||||||
),
|
),
|
||||||
_get_max_tokens(default=512, min_val=1, max_val=65536),
|
_get_o1_max_tokens(default=512, min_val=1, max_val=65536),
|
||||||
],
|
],
|
||||||
pricing=PriceConfig(
|
pricing=PriceConfig(
|
||||||
input=3.00,
|
input=3.00,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
model: hunyuan-standard-256k
|
model: hunyuan-standard-256K
|
||||||
label:
|
label:
|
||||||
zh_Hans: hunyuan-standard-256k
|
zh_Hans: hunyuan-standard-256K
|
||||||
en_US: hunyuan-standard-256k
|
en_US: hunyuan-standard-256K
|
||||||
model_type: llm
|
model_type: llm
|
||||||
features:
|
features:
|
||||||
- agent-thought
|
- agent-thought
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
|
@ -0,0 +1,3 @@
|
||||||
|
<svg width="1200" height="925" viewBox="0 0 1200 925" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<path d="M780.152 250.999L907.882 462.174C907.882 462.174 880.925 510.854 867.43 535.21C834.845 594.039 764.171 612.49 710.442 508.333L420.376 0H0L459.926 803.307C552.303 964.663 787.366 964.663 879.743 803.307C989.874 610.952 1089.87 441.97 1200 249.646L1052.28 0H639.519L780.152 250.999Z" fill="#3366FF"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 417 B |
83
api/core/model_runtime/model_providers/vessl_ai/llm/llm.py
Normal file
83
api/core/model_runtime/model_providers/vessl_ai/llm/llm.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
|
from core.model_runtime.entities.common_entities import I18nObject
|
||||||
|
from core.model_runtime.entities.llm_entities import LLMMode
|
||||||
|
from core.model_runtime.entities.model_entities import (
|
||||||
|
AIModelEntity,
|
||||||
|
DefaultParameterName,
|
||||||
|
FetchFrom,
|
||||||
|
ModelPropertyKey,
|
||||||
|
ModelType,
|
||||||
|
ParameterRule,
|
||||||
|
ParameterType,
|
||||||
|
PriceConfig,
|
||||||
|
)
|
||||||
|
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
|
||||||
|
|
||||||
|
|
||||||
|
class VesslAILargeLanguageModel(OAIAPICompatLargeLanguageModel):
|
||||||
|
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
|
||||||
|
features = []
|
||||||
|
|
||||||
|
entity = AIModelEntity(
|
||||||
|
model=model,
|
||||||
|
label=I18nObject(en_US=model),
|
||||||
|
model_type=ModelType.LLM,
|
||||||
|
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||||
|
features=features,
|
||||||
|
model_properties={
|
||||||
|
ModelPropertyKey.MODE: credentials.get("mode"),
|
||||||
|
},
|
||||||
|
parameter_rules=[
|
||||||
|
ParameterRule(
|
||||||
|
name=DefaultParameterName.TEMPERATURE.value,
|
||||||
|
label=I18nObject(en_US="Temperature"),
|
||||||
|
type=ParameterType.FLOAT,
|
||||||
|
default=float(credentials.get("temperature", 0.7)),
|
||||||
|
min=0,
|
||||||
|
max=2,
|
||||||
|
precision=2,
|
||||||
|
),
|
||||||
|
ParameterRule(
|
||||||
|
name=DefaultParameterName.TOP_P.value,
|
||||||
|
label=I18nObject(en_US="Top P"),
|
||||||
|
type=ParameterType.FLOAT,
|
||||||
|
default=float(credentials.get("top_p", 1)),
|
||||||
|
min=0,
|
||||||
|
max=1,
|
||||||
|
precision=2,
|
||||||
|
),
|
||||||
|
ParameterRule(
|
||||||
|
name=DefaultParameterName.TOP_K.value,
|
||||||
|
label=I18nObject(en_US="Top K"),
|
||||||
|
type=ParameterType.INT,
|
||||||
|
default=int(credentials.get("top_k", 50)),
|
||||||
|
min=-2147483647,
|
||||||
|
max=2147483647,
|
||||||
|
precision=0,
|
||||||
|
),
|
||||||
|
ParameterRule(
|
||||||
|
name=DefaultParameterName.MAX_TOKENS.value,
|
||||||
|
label=I18nObject(en_US="Max Tokens"),
|
||||||
|
type=ParameterType.INT,
|
||||||
|
default=512,
|
||||||
|
min=1,
|
||||||
|
max=int(credentials.get("max_tokens_to_sample", 4096)),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
pricing=PriceConfig(
|
||||||
|
input=Decimal(credentials.get("input_price", 0)),
|
||||||
|
output=Decimal(credentials.get("output_price", 0)),
|
||||||
|
unit=Decimal(credentials.get("unit", 0)),
|
||||||
|
currency=credentials.get("currency", "USD"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if credentials["mode"] == "chat":
|
||||||
|
entity.model_properties[ModelPropertyKey.MODE] = LLMMode.CHAT.value
|
||||||
|
elif credentials["mode"] == "completion":
|
||||||
|
entity.model_properties[ModelPropertyKey.MODE] = LLMMode.COMPLETION.value
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown completion type {credentials['completion_type']}")
|
||||||
|
|
||||||
|
return entity
|
10
api/core/model_runtime/model_providers/vessl_ai/vessl_ai.py
Normal file
10
api/core/model_runtime/model_providers/vessl_ai/vessl_ai.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class VesslAIProvider(ModelProvider):
|
||||||
|
def validate_provider_credentials(self, credentials: dict) -> None:
|
||||||
|
pass
|
|
@ -0,0 +1,56 @@
|
||||||
|
provider: vessl_ai
|
||||||
|
label:
|
||||||
|
en_US: vessl_ai
|
||||||
|
icon_small:
|
||||||
|
en_US: icon_s_en.svg
|
||||||
|
icon_large:
|
||||||
|
en_US: icon_l_en.png
|
||||||
|
background: "#F1EFED"
|
||||||
|
help:
|
||||||
|
title:
|
||||||
|
en_US: How to deploy VESSL AI LLM Model Endpoint
|
||||||
|
url:
|
||||||
|
en_US: https://docs.vessl.ai/guides/get-started/llama3-deployment
|
||||||
|
supported_model_types:
|
||||||
|
- llm
|
||||||
|
configurate_methods:
|
||||||
|
- customizable-model
|
||||||
|
model_credential_schema:
|
||||||
|
model:
|
||||||
|
label:
|
||||||
|
en_US: Model Name
|
||||||
|
placeholder:
|
||||||
|
en_US: Enter your model name
|
||||||
|
credential_form_schemas:
|
||||||
|
- variable: endpoint_url
|
||||||
|
label:
|
||||||
|
en_US: endpoint url
|
||||||
|
type: text-input
|
||||||
|
required: true
|
||||||
|
placeholder:
|
||||||
|
en_US: Enter the url of your endpoint url
|
||||||
|
- variable: api_key
|
||||||
|
required: true
|
||||||
|
label:
|
||||||
|
en_US: API Key
|
||||||
|
type: secret-input
|
||||||
|
placeholder:
|
||||||
|
en_US: Enter your VESSL AI secret key
|
||||||
|
- variable: mode
|
||||||
|
show_on:
|
||||||
|
- variable: __model_type
|
||||||
|
value: llm
|
||||||
|
label:
|
||||||
|
en_US: Completion mode
|
||||||
|
type: select
|
||||||
|
required: false
|
||||||
|
default: chat
|
||||||
|
placeholder:
|
||||||
|
en_US: Select completion mode
|
||||||
|
options:
|
||||||
|
- value: completion
|
||||||
|
label:
|
||||||
|
en_US: Completion
|
||||||
|
- value: chat
|
||||||
|
label:
|
||||||
|
en_US: Chat
|
|
@ -115,6 +115,7 @@ class _CommonWenxin:
|
||||||
"ernie-character-8k-0321": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-char-8k",
|
"ernie-character-8k-0321": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-char-8k",
|
||||||
"ernie-4.0-turbo-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-8k",
|
"ernie-4.0-turbo-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-8k",
|
||||||
"ernie-4.0-turbo-8k-preview": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-8k-preview",
|
"ernie-4.0-turbo-8k-preview": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-8k-preview",
|
||||||
|
"ernie-4.0-turbo-128k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-128k",
|
||||||
"yi_34b_chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/yi_34b_chat",
|
"yi_34b_chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/yi_34b_chat",
|
||||||
"embedding-v1": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/embedding-v1",
|
"embedding-v1": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/embedding-v1",
|
||||||
"bge-large-en": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_en",
|
"bge-large-en": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_en",
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
model: ernie-4.0-turbo-128k
|
||||||
|
label:
|
||||||
|
en_US: Ernie-4.0-turbo-128K
|
||||||
|
model_type: llm
|
||||||
|
features:
|
||||||
|
- agent-thought
|
||||||
|
model_properties:
|
||||||
|
mode: chat
|
||||||
|
context_size: 131072
|
||||||
|
parameter_rules:
|
||||||
|
- name: temperature
|
||||||
|
use_template: temperature
|
||||||
|
min: 0.1
|
||||||
|
max: 1.0
|
||||||
|
default: 0.8
|
||||||
|
- name: top_p
|
||||||
|
use_template: top_p
|
||||||
|
- name: max_tokens
|
||||||
|
use_template: max_tokens
|
||||||
|
default: 1024
|
||||||
|
min: 2
|
||||||
|
max: 4096
|
||||||
|
- name: presence_penalty
|
||||||
|
use_template: presence_penalty
|
||||||
|
default: 1.0
|
||||||
|
min: 1.0
|
||||||
|
max: 2.0
|
||||||
|
- name: frequency_penalty
|
||||||
|
use_template: frequency_penalty
|
||||||
|
- name: response_format
|
||||||
|
use_template: response_format
|
||||||
|
- name: disable_search
|
||||||
|
label:
|
||||||
|
zh_Hans: 禁用搜索
|
||||||
|
en_US: Disable Search
|
||||||
|
type: boolean
|
||||||
|
help:
|
||||||
|
zh_Hans: 禁用模型自行进行外部搜索。
|
||||||
|
en_US: Disable the model to perform external search.
|
||||||
|
required: false
|
|
@ -34,6 +34,8 @@ class RetrievalService:
|
||||||
reranking_mode: Optional[str] = "reranking_model",
|
reranking_mode: Optional[str] = "reranking_model",
|
||||||
weights: Optional[dict] = None,
|
weights: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
|
if not query:
|
||||||
|
return []
|
||||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||||
if not dataset:
|
if not dataset:
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -3,11 +3,13 @@ import time
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from pydantic import BaseModel, model_validator
|
from pydantic import BaseModel, model_validator
|
||||||
from pymochow import MochowClient
|
from pymochow import MochowClient
|
||||||
from pymochow.auth.bce_credentials import BceCredentials
|
from pymochow.auth.bce_credentials import BceCredentials
|
||||||
from pymochow.configuration import Configuration
|
from pymochow.configuration import Configuration
|
||||||
from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, TableState
|
from pymochow.exception import ServerError
|
||||||
|
from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, ServerErrCode, TableState
|
||||||
from pymochow.model.schema import Field, HNSWParams, Schema, VectorIndex
|
from pymochow.model.schema import Field, HNSWParams, Schema, VectorIndex
|
||||||
from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row
|
from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row
|
||||||
|
|
||||||
|
@ -116,6 +118,7 @@ class BaiduVector(BaseVector):
|
||||||
self._db.table(self._collection_name).delete(filter=f"{key} = '{value}'")
|
self._db.table(self._collection_name).delete(filter=f"{key} = '{value}'")
|
||||||
|
|
||||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||||
|
query_vector = [float(val) if isinstance(val, np.float64) else val for val in query_vector]
|
||||||
anns = AnnSearch(
|
anns = AnnSearch(
|
||||||
vector_field=self.field_vector,
|
vector_field=self.field_vector,
|
||||||
vector_floats=query_vector,
|
vector_floats=query_vector,
|
||||||
|
@ -149,7 +152,13 @@ class BaiduVector(BaseVector):
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
def delete(self) -> None:
|
def delete(self) -> None:
|
||||||
self._db.drop_table(table_name=self._collection_name)
|
try:
|
||||||
|
self._db.drop_table(table_name=self._collection_name)
|
||||||
|
except ServerError as e:
|
||||||
|
if e.code == ServerErrCode.TABLE_NOT_EXIST:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
def _init_client(self, config) -> MochowClient:
|
def _init_client(self, config) -> MochowClient:
|
||||||
config = Configuration(credentials=BceCredentials(config.account, config.api_key), endpoint=config.endpoint)
|
config = Configuration(credentials=BceCredentials(config.account, config.api_key), endpoint=config.endpoint)
|
||||||
|
@ -166,7 +175,14 @@ class BaiduVector(BaseVector):
|
||||||
if exists:
|
if exists:
|
||||||
return self._client.database(self._client_config.database)
|
return self._client.database(self._client_config.database)
|
||||||
else:
|
else:
|
||||||
return self._client.create_database(database_name=self._client_config.database)
|
try:
|
||||||
|
self._client.create_database(database_name=self._client_config.database)
|
||||||
|
except ServerError as e:
|
||||||
|
if e.code == ServerErrCode.DB_ALREADY_EXIST:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
return
|
||||||
|
|
||||||
def _table_existed(self) -> bool:
|
def _table_existed(self) -> bool:
|
||||||
tables = self._db.list_table()
|
tables = self._db.list_table()
|
||||||
|
@ -175,7 +191,7 @@ class BaiduVector(BaseVector):
|
||||||
def _create_table(self, dimension: int) -> None:
|
def _create_table(self, dimension: int) -> None:
|
||||||
# Try to grab distributed lock and create table
|
# Try to grab distributed lock and create table
|
||||||
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||||
with redis_client.lock(lock_name, timeout=20):
|
with redis_client.lock(lock_name, timeout=60):
|
||||||
table_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
table_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||||
if redis_client.get(table_exist_cache_key):
|
if redis_client.get(table_exist_cache_key):
|
||||||
return
|
return
|
||||||
|
@ -238,15 +254,14 @@ class BaiduVector(BaseVector):
|
||||||
description="Table for Dify",
|
description="Table for Dify",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Wait for table created
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
table = self._db.describe_table(self._collection_name)
|
||||||
|
if table.state == TableState.NORMAL:
|
||||||
|
break
|
||||||
redis_client.set(table_exist_cache_key, 1, ex=3600)
|
redis_client.set(table_exist_cache_key, 1, ex=3600)
|
||||||
|
|
||||||
# Wait for table created
|
|
||||||
while True:
|
|
||||||
time.sleep(1)
|
|
||||||
table = self._db.describe_table(self._collection_name)
|
|
||||||
if table.state == TableState.NORMAL:
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
class BaiduVectorFactory(AbstractVectorFactory):
|
class BaiduVectorFactory(AbstractVectorFactory):
|
||||||
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> BaiduVector:
|
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> BaiduVector:
|
||||||
|
|
0
api/core/rag/datasource/vdb/oceanbase/__init__.py
Normal file
0
api/core/rag/datasource/vdb/oceanbase/__init__.py
Normal file
209
api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py
Normal file
209
api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, model_validator
|
||||||
|
from pyobvector import VECTOR, ObVecClient
|
||||||
|
from sqlalchemy import JSON, Column, String, func
|
||||||
|
from sqlalchemy.dialects.mysql import LONGTEXT
|
||||||
|
|
||||||
|
from configs import dify_config
|
||||||
|
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||||
|
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||||
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.embedding.embedding_base import Embeddings
|
||||||
|
from core.rag.models.document import Document
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from models.dataset import Dataset
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_OCEANBASE_HNSW_BUILD_PARAM = {"M": 16, "efConstruction": 256}
|
||||||
|
DEFAULT_OCEANBASE_HNSW_SEARCH_PARAM = {"efSearch": 64}
|
||||||
|
OCEANBASE_SUPPORTED_VECTOR_INDEX_TYPE = "HNSW"
|
||||||
|
DEFAULT_OCEANBASE_VECTOR_METRIC_TYPE = "l2"
|
||||||
|
|
||||||
|
|
||||||
|
class OceanBaseVectorConfig(BaseModel):
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
user: str
|
||||||
|
password: str
|
||||||
|
database: str
|
||||||
|
|
||||||
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
|
def validate_config(cls, values: dict) -> dict:
|
||||||
|
if not values["host"]:
|
||||||
|
raise ValueError("config OCEANBASE_VECTOR_HOST is required")
|
||||||
|
if not values["port"]:
|
||||||
|
raise ValueError("config OCEANBASE_VECTOR_PORT is required")
|
||||||
|
if not values["user"]:
|
||||||
|
raise ValueError("config OCEANBASE_VECTOR_USER is required")
|
||||||
|
if not values["database"]:
|
||||||
|
raise ValueError("config OCEANBASE_VECTOR_DATABASE is required")
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class OceanBaseVector(BaseVector):
|
||||||
|
def __init__(self, collection_name: str, config: OceanBaseVectorConfig):
|
||||||
|
super().__init__(collection_name)
|
||||||
|
self._config = config
|
||||||
|
self._hnsw_ef_search = -1
|
||||||
|
self._client = ObVecClient(
|
||||||
|
uri=f"{self._config.host}:{self._config.port}",
|
||||||
|
user=self._config.user,
|
||||||
|
password=self._config.password,
|
||||||
|
db_name=self._config.database,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_type(self) -> str:
|
||||||
|
return VectorType.OCEANBASE
|
||||||
|
|
||||||
|
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
self._vec_dim = len(embeddings[0])
|
||||||
|
self._create_collection()
|
||||||
|
self.add_texts(texts, embeddings)
|
||||||
|
|
||||||
|
def _create_collection(self) -> None:
|
||||||
|
lock_name = "vector_indexing_lock_" + self._collection_name
|
||||||
|
with redis_client.lock(lock_name, timeout=20):
|
||||||
|
collection_exist_cache_key = "vector_indexing_" + self._collection_name
|
||||||
|
if redis_client.get(collection_exist_cache_key):
|
||||||
|
return
|
||||||
|
|
||||||
|
if self._client.check_table_exists(self._collection_name):
|
||||||
|
return
|
||||||
|
|
||||||
|
self.delete()
|
||||||
|
|
||||||
|
cols = [
|
||||||
|
Column("id", String(36), primary_key=True, autoincrement=False),
|
||||||
|
Column("vector", VECTOR(self._vec_dim)),
|
||||||
|
Column("text", LONGTEXT),
|
||||||
|
Column("metadata", JSON),
|
||||||
|
]
|
||||||
|
vidx_params = self._client.prepare_index_params()
|
||||||
|
vidx_params.add_index(
|
||||||
|
field_name="vector",
|
||||||
|
index_type=OCEANBASE_SUPPORTED_VECTOR_INDEX_TYPE,
|
||||||
|
index_name="vector_index",
|
||||||
|
metric_type=DEFAULT_OCEANBASE_VECTOR_METRIC_TYPE,
|
||||||
|
params=DEFAULT_OCEANBASE_HNSW_BUILD_PARAM,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._client.create_table_with_index_params(
|
||||||
|
table_name=self._collection_name,
|
||||||
|
columns=cols,
|
||||||
|
vidxs=vidx_params,
|
||||||
|
)
|
||||||
|
vals = []
|
||||||
|
params = self._client.perform_raw_text_sql("SHOW PARAMETERS LIKE '%ob_vector_memory_limit_percentage%'")
|
||||||
|
for row in params:
|
||||||
|
val = int(row[6])
|
||||||
|
vals.append(val)
|
||||||
|
if len(vals) == 0:
|
||||||
|
print("ob_vector_memory_limit_percentage not found in parameters.")
|
||||||
|
exit(1)
|
||||||
|
if any(val == 0 for val in vals):
|
||||||
|
try:
|
||||||
|
self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
"Failed to set ob_vector_memory_limit_percentage. "
|
||||||
|
+ "Maybe the database user has insufficient privilege.",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||||
|
|
||||||
|
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
ids = self._get_uuids(documents)
|
||||||
|
for id, doc, emb in zip(ids, documents, embeddings):
|
||||||
|
self._client.insert(
|
||||||
|
table_name=self._collection_name,
|
||||||
|
data={
|
||||||
|
"id": id,
|
||||||
|
"vector": emb,
|
||||||
|
"text": doc.page_content,
|
||||||
|
"metadata": doc.metadata,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def text_exists(self, id: str) -> bool:
|
||||||
|
cur = self._client.get(table_name=self._collection_name, id=id)
|
||||||
|
return cur.rowcount != 0
|
||||||
|
|
||||||
|
def delete_by_ids(self, ids: list[str]) -> None:
|
||||||
|
self._client.delete(table_name=self._collection_name, ids=ids)
|
||||||
|
|
||||||
|
def get_ids_by_metadata_field(self, key: str, value: str) -> list[str]:
|
||||||
|
cur = self._client.get(
|
||||||
|
table_name=self._collection_name,
|
||||||
|
where_clause=f"metadata->>'$.{key}' = '{value}'",
|
||||||
|
output_column_name=["id"],
|
||||||
|
)
|
||||||
|
return [row[0] for row in cur]
|
||||||
|
|
||||||
|
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||||
|
ids = self.get_ids_by_metadata_field(key, value)
|
||||||
|
self.delete_by_ids(ids)
|
||||||
|
|
||||||
|
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||||
|
ef_search = kwargs.get("ef_search", self._hnsw_ef_search)
|
||||||
|
if ef_search != self._hnsw_ef_search:
|
||||||
|
self._client.set_ob_hnsw_ef_search(ef_search)
|
||||||
|
self._hnsw_ef_search = ef_search
|
||||||
|
topk = kwargs.get("top_k", 10)
|
||||||
|
cur = self._client.ann_search(
|
||||||
|
table_name=self._collection_name,
|
||||||
|
vec_column_name="vector",
|
||||||
|
vec_data=query_vector,
|
||||||
|
topk=topk,
|
||||||
|
distance_func=func.l2_distance,
|
||||||
|
output_column_names=["text", "metadata"],
|
||||||
|
with_dist=True,
|
||||||
|
)
|
||||||
|
docs = []
|
||||||
|
for text, metadata, distance in cur:
|
||||||
|
metadata = json.loads(metadata)
|
||||||
|
metadata["score"] = 1 - distance / math.sqrt(2)
|
||||||
|
docs.append(
|
||||||
|
Document(
|
||||||
|
page_content=text,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return docs
|
||||||
|
|
||||||
|
def delete(self) -> None:
|
||||||
|
self._client.drop_table_if_exist(self._collection_name)
|
||||||
|
|
||||||
|
|
||||||
|
class OceanBaseVectorFactory(AbstractVectorFactory):
|
||||||
|
def init_vector(
|
||||||
|
self,
|
||||||
|
dataset: Dataset,
|
||||||
|
attributes: list,
|
||||||
|
embeddings: Embeddings,
|
||||||
|
) -> BaseVector:
|
||||||
|
if dataset.index_struct_dict:
|
||||||
|
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||||
|
collection_name = class_prefix.lower()
|
||||||
|
else:
|
||||||
|
dataset_id = dataset.id
|
||||||
|
collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
|
||||||
|
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.OCEANBASE, collection_name))
|
||||||
|
return OceanBaseVector(
|
||||||
|
collection_name,
|
||||||
|
OceanBaseVectorConfig(
|
||||||
|
host=dify_config.OCEANBASE_VECTOR_HOST,
|
||||||
|
port=dify_config.OCEANBASE_VECTOR_PORT,
|
||||||
|
user=dify_config.OCEANBASE_VECTOR_USER,
|
||||||
|
password=(dify_config.OCEANBASE_VECTOR_PASSWORD or ""),
|
||||||
|
database=dify_config.OCEANBASE_VECTOR_DATABASE,
|
||||||
|
),
|
||||||
|
)
|
|
@ -4,6 +4,7 @@ import uuid
|
||||||
import requests
|
import requests
|
||||||
from requests.auth import HTTPDigestAuth
|
from requests.auth import HTTPDigestAuth
|
||||||
|
|
||||||
|
from configs import dify_config
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from models.dataset import TidbAuthBinding
|
from models.dataset import TidbAuthBinding
|
||||||
|
@ -36,7 +37,7 @@ class TidbService:
|
||||||
}
|
}
|
||||||
|
|
||||||
spending_limit = {
|
spending_limit = {
|
||||||
"monthly": 100,
|
"monthly": dify_config.TIDB_SPEND_LIMIT,
|
||||||
}
|
}
|
||||||
password = str(uuid.uuid4()).replace("-", "")[:16]
|
password = str(uuid.uuid4()).replace("-", "")[:16]
|
||||||
display_name = str(uuid.uuid4()).replace("-", "")[:16]
|
display_name = str(uuid.uuid4()).replace("-", "")[:16]
|
||||||
|
@ -208,7 +209,7 @@ class TidbService:
|
||||||
}
|
}
|
||||||
|
|
||||||
spending_limit = {
|
spending_limit = {
|
||||||
"monthly": 10,
|
"monthly": dify_config.TIDB_SPEND_LIMIT,
|
||||||
}
|
}
|
||||||
password = str(uuid.uuid4()).replace("-", "")[:16]
|
password = str(uuid.uuid4()).replace("-", "")[:16]
|
||||||
display_name = str(uuid.uuid4()).replace("-", "")
|
display_name = str(uuid.uuid4()).replace("-", "")
|
||||||
|
|
|
@ -134,6 +134,10 @@ class Vector:
|
||||||
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import TidbOnQdrantVectorFactory
|
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import TidbOnQdrantVectorFactory
|
||||||
|
|
||||||
return TidbOnQdrantVectorFactory
|
return TidbOnQdrantVectorFactory
|
||||||
|
case VectorType.OCEANBASE:
|
||||||
|
from core.rag.datasource.vdb.oceanbase.oceanbase_vector import OceanBaseVectorFactory
|
||||||
|
|
||||||
|
return OceanBaseVectorFactory
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Vector store {vector_type} is not supported.")
|
raise ValueError(f"Vector store {vector_type} is not supported.")
|
||||||
|
|
||||||
|
|
|
@ -21,3 +21,4 @@ class VectorType(str, Enum):
|
||||||
VIKINGDB = "vikingdb"
|
VIKINGDB = "vikingdb"
|
||||||
UPSTASH = "upstash"
|
UPSTASH = "upstash"
|
||||||
TIDB_ON_QDRANT = "tidb_on_qdrant"
|
TIDB_ON_QDRANT = "tidb_on_qdrant"
|
||||||
|
OCEANBASE = "oceanbase"
|
||||||
|
|
|
@ -27,18 +27,17 @@ class RerankModelRunner(BaseRerankRunner):
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
docs = []
|
docs = []
|
||||||
doc_id = []
|
doc_id = set()
|
||||||
unique_documents = []
|
unique_documents = []
|
||||||
dify_documents = [item for item in documents if item.provider == "dify"]
|
for document in documents:
|
||||||
external_documents = [item for item in documents if item.provider == "external"]
|
if document.provider == "dify" and document.metadata["doc_id"] not in doc_id:
|
||||||
for document in dify_documents:
|
doc_id.add(document.metadata["doc_id"])
|
||||||
if document.metadata["doc_id"] not in doc_id:
|
|
||||||
doc_id.append(document.metadata["doc_id"])
|
|
||||||
docs.append(document.page_content)
|
docs.append(document.page_content)
|
||||||
unique_documents.append(document)
|
unique_documents.append(document)
|
||||||
for document in external_documents:
|
elif document.provider == "external":
|
||||||
docs.append(document.page_content)
|
if document not in unique_documents:
|
||||||
unique_documents.append(document)
|
docs.append(document.page_content)
|
||||||
|
unique_documents.append(document)
|
||||||
|
|
||||||
documents = unique_documents
|
documents = unique_documents
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
class RerankMode(Enum):
|
class RerankMode(str, Enum):
|
||||||
RERANKING_MODEL = "reranking_model"
|
RERANKING_MODEL = "reranking_model"
|
||||||
WEIGHTED_SCORE = "weighted_score"
|
WEIGHTED_SCORE = "weighted_score"
|
||||||
|
|
|
@ -22,6 +22,7 @@ from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaK
|
||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.entities.context_entities import DocumentContext
|
from core.rag.entities.context_entities import DocumentContext
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
|
from core.rag.rerank.rerank_type import RerankMode
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
|
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
|
||||||
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
|
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
|
||||||
|
@ -361,10 +362,39 @@ class DatasetRetrieval:
|
||||||
reranking_enable: bool = True,
|
reranking_enable: bool = True,
|
||||||
message_id: Optional[str] = None,
|
message_id: Optional[str] = None,
|
||||||
):
|
):
|
||||||
|
if not available_datasets:
|
||||||
|
return []
|
||||||
threads = []
|
threads = []
|
||||||
all_documents = []
|
all_documents = []
|
||||||
dataset_ids = [dataset.id for dataset in available_datasets]
|
dataset_ids = [dataset.id for dataset in available_datasets]
|
||||||
index_type = None
|
index_type_check = all(
|
||||||
|
item.indexing_technique == available_datasets[0].indexing_technique for item in available_datasets
|
||||||
|
)
|
||||||
|
if not index_type_check and (not reranking_enable or reranking_mode != RerankMode.RERANKING_MODEL):
|
||||||
|
raise ValueError(
|
||||||
|
"The configured knowledge base list have different indexing technique, please set reranking model."
|
||||||
|
)
|
||||||
|
index_type = available_datasets[0].indexing_technique
|
||||||
|
if index_type == "high_quality":
|
||||||
|
embedding_model_check = all(
|
||||||
|
item.embedding_model == available_datasets[0].embedding_model for item in available_datasets
|
||||||
|
)
|
||||||
|
embedding_model_provider_check = all(
|
||||||
|
item.embedding_model_provider == available_datasets[0].embedding_model_provider
|
||||||
|
for item in available_datasets
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
reranking_enable
|
||||||
|
and reranking_mode == "weighted_score"
|
||||||
|
and (not embedding_model_check or not embedding_model_provider_check)
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
"The configured knowledge base list have different embedding model, please set reranking model."
|
||||||
|
)
|
||||||
|
if reranking_enable and reranking_mode == RerankMode.WEIGHTED_SCORE:
|
||||||
|
weights["vector_setting"]["embedding_provider_name"] = available_datasets[0].embedding_model_provider
|
||||||
|
weights["vector_setting"]["embedding_model_name"] = available_datasets[0].embedding_model
|
||||||
|
|
||||||
for dataset in available_datasets:
|
for dataset in available_datasets:
|
||||||
index_type = dataset.indexing_technique
|
index_type = dataset.indexing_technique
|
||||||
retrieval_thread = threading.Thread(
|
retrieval_thread = threading.Thread(
|
||||||
|
|
|
@ -33,7 +33,9 @@ class BarChartTool(BuiltinTool):
|
||||||
if axis:
|
if axis:
|
||||||
axis = [label[:10] + "..." if len(label) > 10 else label for label in axis]
|
axis = [label[:10] + "..." if len(label) > 10 else label for label in axis]
|
||||||
ax.set_xticklabels(axis, rotation=45, ha="right")
|
ax.set_xticklabels(axis, rotation=45, ha="right")
|
||||||
ax.bar(axis, data)
|
# ensure all labels, including duplicates, are correctly displayed
|
||||||
|
ax.bar(range(len(data)), data)
|
||||||
|
ax.set_xticks(range(len(data)))
|
||||||
else:
|
else:
|
||||||
ax.bar(range(len(data)), data)
|
ax.bar(range(len(data)), data)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
import base64
|
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -8,7 +6,7 @@ import httpx
|
||||||
from websocket import WebSocket
|
from websocket import WebSocket
|
||||||
from yarl import URL
|
from yarl import URL
|
||||||
|
|
||||||
from core.file.file_manager import _get_encoded_string
|
from core.file.file_manager import download
|
||||||
from core.file.models import File
|
from core.file.models import File
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,8 +27,7 @@ class ComfyUiClient:
|
||||||
return response.content
|
return response.content
|
||||||
|
|
||||||
def upload_image(self, image_file: File) -> dict:
|
def upload_image(self, image_file: File) -> dict:
|
||||||
image_content = base64.b64decode(_get_encoded_string(image_file))
|
file = download(image_file)
|
||||||
file = io.BytesIO(image_content)
|
|
||||||
files = {"image": (image_file.filename, file, image_file.mime_type), "overwrite": "true"}
|
files = {"image": (image_file.filename, file, image_file.mime_type), "overwrite": "true"}
|
||||||
res = httpx.post(str(self.base_url / "upload/image"), files=files)
|
res = httpx.post(str(self.base_url / "upload/image"), files=files)
|
||||||
return res.json()
|
return res.json()
|
||||||
|
@ -47,12 +44,7 @@ class ComfyUiClient:
|
||||||
ws.connect(ws_address)
|
ws.connect(ws_address)
|
||||||
return ws, client_id
|
return ws, client_id
|
||||||
|
|
||||||
def set_prompt(
|
def set_prompt_by_ksampler(self, origin_prompt: dict, positive_prompt: str, negative_prompt: str = "") -> dict:
|
||||||
self, origin_prompt: dict, positive_prompt: str, negative_prompt: str = "", image_name: str = ""
|
|
||||||
) -> dict:
|
|
||||||
"""
|
|
||||||
find the first KSampler, then can find the prompt node through it.
|
|
||||||
"""
|
|
||||||
prompt = origin_prompt.copy()
|
prompt = origin_prompt.copy()
|
||||||
id_to_class_type = {id: details["class_type"] for id, details in prompt.items()}
|
id_to_class_type = {id: details["class_type"] for id, details in prompt.items()}
|
||||||
k_sampler = [key for key, value in id_to_class_type.items() if value == "KSampler"][0]
|
k_sampler = [key for key, value in id_to_class_type.items() if value == "KSampler"][0]
|
||||||
|
@ -64,9 +56,20 @@ class ComfyUiClient:
|
||||||
negative_input_id = prompt.get(k_sampler)["inputs"]["negative"][0]
|
negative_input_id = prompt.get(k_sampler)["inputs"]["negative"][0]
|
||||||
prompt.get(negative_input_id)["inputs"]["text"] = negative_prompt
|
prompt.get(negative_input_id)["inputs"]["text"] = negative_prompt
|
||||||
|
|
||||||
if image_name != "":
|
return prompt
|
||||||
image_loader = [key for key, value in id_to_class_type.items() if value == "LoadImage"][0]
|
|
||||||
prompt.get(image_loader)["inputs"]["image"] = image_name
|
def set_prompt_images_by_ids(self, origin_prompt: dict, image_names: list[str], image_ids: list[str]) -> dict:
|
||||||
|
prompt = origin_prompt.copy()
|
||||||
|
for index, image_node_id in enumerate(image_ids):
|
||||||
|
prompt[image_node_id]["inputs"]["image"] = image_names[index]
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
def set_prompt_images_by_default(self, origin_prompt: dict, image_names: list[str]) -> dict:
|
||||||
|
prompt = origin_prompt.copy()
|
||||||
|
id_to_class_type = {id: details["class_type"] for id, details in prompt.items()}
|
||||||
|
load_image_nodes = [key for key, value in id_to_class_type.items() if value == "LoadImage"]
|
||||||
|
for load_image, image_name in zip(load_image_nodes, image_names):
|
||||||
|
prompt.get(load_image)["inputs"]["image"] = image_name
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def track_progress(self, prompt: dict, ws: WebSocket, prompt_id: str):
|
def track_progress(self, prompt: dict, ws: WebSocket, prompt_id: str):
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import json
|
import json
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from core.file import FileType
|
||||||
from core.tools.entities.tool_entities import ToolInvokeMessage
|
from core.tools.entities.tool_entities import ToolInvokeMessage
|
||||||
|
from core.tools.errors import ToolParameterValidationError
|
||||||
from core.tools.provider.builtin.comfyui.tools.comfyui_client import ComfyUiClient
|
from core.tools.provider.builtin.comfyui.tools.comfyui_client import ComfyUiClient
|
||||||
from core.tools.tool.builtin_tool import BuiltinTool
|
from core.tools.tool.builtin_tool import BuiltinTool
|
||||||
|
|
||||||
|
@ -10,19 +12,46 @@ class ComfyUIWorkflowTool(BuiltinTool):
|
||||||
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
|
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
|
||||||
comfyui = ComfyUiClient(self.runtime.credentials["base_url"])
|
comfyui = ComfyUiClient(self.runtime.credentials["base_url"])
|
||||||
|
|
||||||
positive_prompt = tool_parameters.get("positive_prompt")
|
positive_prompt = tool_parameters.get("positive_prompt", "")
|
||||||
negative_prompt = tool_parameters.get("negative_prompt")
|
negative_prompt = tool_parameters.get("negative_prompt", "")
|
||||||
|
images = tool_parameters.get("images") or []
|
||||||
workflow = tool_parameters.get("workflow_json")
|
workflow = tool_parameters.get("workflow_json")
|
||||||
image_name = ""
|
image_names = []
|
||||||
if image := tool_parameters.get("image"):
|
for image in images:
|
||||||
|
if image.type != FileType.IMAGE:
|
||||||
|
continue
|
||||||
image_name = comfyui.upload_image(image).get("name")
|
image_name = comfyui.upload_image(image).get("name")
|
||||||
|
image_names.append(image_name)
|
||||||
|
|
||||||
|
set_prompt_with_ksampler = True
|
||||||
|
if "{{positive_prompt}}" in workflow:
|
||||||
|
set_prompt_with_ksampler = False
|
||||||
|
workflow = workflow.replace("{{positive_prompt}}", positive_prompt)
|
||||||
|
workflow = workflow.replace("{{negative_prompt}}", negative_prompt)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
origin_prompt = json.loads(workflow)
|
prompt = json.loads(workflow)
|
||||||
except:
|
except:
|
||||||
return self.create_text_message("the Workflow JSON is not correct")
|
return self.create_text_message("the Workflow JSON is not correct")
|
||||||
|
|
||||||
prompt = comfyui.set_prompt(origin_prompt, positive_prompt, negative_prompt, image_name)
|
if set_prompt_with_ksampler:
|
||||||
|
try:
|
||||||
|
prompt = comfyui.set_prompt_by_ksampler(prompt, positive_prompt, negative_prompt)
|
||||||
|
except:
|
||||||
|
raise ToolParameterValidationError(
|
||||||
|
"Failed set prompt with KSampler, try replace prompt to {{positive_prompt}} in your workflow json"
|
||||||
|
)
|
||||||
|
|
||||||
|
if image_names:
|
||||||
|
if image_ids := tool_parameters.get("image_ids"):
|
||||||
|
image_ids = image_ids.split(",")
|
||||||
|
try:
|
||||||
|
prompt = comfyui.set_prompt_images_by_ids(prompt, image_names, image_ids)
|
||||||
|
except:
|
||||||
|
raise ToolParameterValidationError("the Image Node ID List not match your upload image files.")
|
||||||
|
else:
|
||||||
|
prompt = comfyui.set_prompt_images_by_default(prompt, image_names)
|
||||||
|
|
||||||
images = comfyui.generate_image_by_prompt(prompt)
|
images = comfyui.generate_image_by_prompt(prompt)
|
||||||
result = []
|
result = []
|
||||||
for img in images:
|
for img in images:
|
||||||
|
|
|
@ -24,12 +24,12 @@ parameters:
|
||||||
zh_Hans: 负面提示词
|
zh_Hans: 负面提示词
|
||||||
llm_description: Negative prompt, you should describe the image you don't want to generate as a list of words as possible as detailed, the prompt must be written in English.
|
llm_description: Negative prompt, you should describe the image you don't want to generate as a list of words as possible as detailed, the prompt must be written in English.
|
||||||
form: llm
|
form: llm
|
||||||
- name: image
|
- name: images
|
||||||
type: file
|
type: files
|
||||||
label:
|
label:
|
||||||
en_US: Input Image
|
en_US: Input Images
|
||||||
zh_Hans: 输入的图片
|
zh_Hans: 输入的图片
|
||||||
llm_description: The input image, used to transfer to the comfyui workflow to generate another image.
|
llm_description: The input images, used to transfer to the comfyui workflow to generate another image.
|
||||||
form: llm
|
form: llm
|
||||||
- name: workflow_json
|
- name: workflow_json
|
||||||
type: string
|
type: string
|
||||||
|
@ -40,3 +40,15 @@ parameters:
|
||||||
en_US: exported from ComfyUI workflow
|
en_US: exported from ComfyUI workflow
|
||||||
zh_Hans: 从ComfyUI的工作流中导出
|
zh_Hans: 从ComfyUI的工作流中导出
|
||||||
form: form
|
form: form
|
||||||
|
- name: image_ids
|
||||||
|
type: string
|
||||||
|
label:
|
||||||
|
en_US: Image Node ID List
|
||||||
|
zh_Hans: 图片节点ID列表
|
||||||
|
placeholder:
|
||||||
|
en_US: Use commas to separate multiple node ID
|
||||||
|
zh_Hans: 多个节点ID时使用半角逗号分隔
|
||||||
|
human_description:
|
||||||
|
en_US: When the workflow has multiple image nodes, enter the ID list of these nodes, and the images will be passed to ComfyUI in the order of the list.
|
||||||
|
zh_Hans: 当工作流有多个图片节点时,输入这些节点的ID列表,图片将按列表顺序传给ComfyUI
|
||||||
|
form: form
|
||||||
|
|
|
@ -3,7 +3,7 @@ import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from os import listdir, path
|
from os import listdir, path
|
||||||
from threading import Lock
|
from threading import Lock, Thread
|
||||||
from typing import Any, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
|
@ -647,4 +647,5 @@ class ToolManager:
|
||||||
raise ValueError(f"provider type {provider_type} not found")
|
raise ValueError(f"provider type {provider_type} not found")
|
||||||
|
|
||||||
|
|
||||||
ToolManager.load_builtin_providers_cache()
|
# preload builtin tool providers
|
||||||
|
Thread(target=ToolManager.load_builtin_providers_cache, name="pre_load_builtin_providers_cache", daemon=True).start()
|
||||||
|
|
|
@ -153,6 +153,7 @@ class AnswerStreamGeneratorRouter:
|
||||||
NodeType.IF_ELSE,
|
NodeType.IF_ELSE,
|
||||||
NodeType.QUESTION_CLASSIFIER,
|
NodeType.QUESTION_CLASSIFIER,
|
||||||
NodeType.ITERATION,
|
NodeType.ITERATION,
|
||||||
|
NodeType.CONVERSATION_VARIABLE_ASSIGNER,
|
||||||
}:
|
}:
|
||||||
answer_dependencies[answer_node_id].append(source_node_id)
|
answer_dependencies[answer_node_id].append(source_node_id)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -5,6 +5,7 @@ import json
|
||||||
import docx
|
import docx
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pypdfium2
|
import pypdfium2
|
||||||
|
import yaml
|
||||||
from unstructured.partition.email import partition_email
|
from unstructured.partition.email import partition_email
|
||||||
from unstructured.partition.epub import partition_epub
|
from unstructured.partition.epub import partition_epub
|
||||||
from unstructured.partition.msg import partition_msg
|
from unstructured.partition.msg import partition_msg
|
||||||
|
@ -101,6 +102,8 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
|
||||||
return _extract_text_from_msg(file_content)
|
return _extract_text_from_msg(file_content)
|
||||||
case "application/json":
|
case "application/json":
|
||||||
return _extract_text_from_json(file_content)
|
return _extract_text_from_json(file_content)
|
||||||
|
case "application/x-yaml" | "text/yaml":
|
||||||
|
return _extract_text_from_yaml(file_content)
|
||||||
case _:
|
case _:
|
||||||
raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
|
raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
|
||||||
|
|
||||||
|
@ -112,6 +115,8 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
|
||||||
return _extract_text_from_plain_text(file_content)
|
return _extract_text_from_plain_text(file_content)
|
||||||
case ".json":
|
case ".json":
|
||||||
return _extract_text_from_json(file_content)
|
return _extract_text_from_json(file_content)
|
||||||
|
case ".yaml" | ".yml":
|
||||||
|
return _extract_text_from_yaml(file_content)
|
||||||
case ".pdf":
|
case ".pdf":
|
||||||
return _extract_text_from_pdf(file_content)
|
return _extract_text_from_pdf(file_content)
|
||||||
case ".doc" | ".docx":
|
case ".doc" | ".docx":
|
||||||
|
@ -149,6 +154,15 @@ def _extract_text_from_json(file_content: bytes) -> str:
|
||||||
raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e
|
raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_text_from_yaml(file_content: bytes) -> str:
|
||||||
|
"""Extract the content from yaml file"""
|
||||||
|
try:
|
||||||
|
yaml_data = yaml.safe_load_all(file_content.decode("utf-8"))
|
||||||
|
return yaml.dump_all(yaml_data, allow_unicode=True, sort_keys=False)
|
||||||
|
except (UnicodeDecodeError, yaml.YAMLError) as e:
|
||||||
|
raise TextExtractionError(f"Failed to decode or parse YAML file: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
def _extract_text_from_pdf(file_content: bytes) -> str:
|
def _extract_text_from_pdf(file_content: bytes) -> str:
|
||||||
try:
|
try:
|
||||||
pdf_file = io.BytesIO(file_content)
|
pdf_file = io.BytesIO(file_content)
|
||||||
|
|
|
@ -94,7 +94,7 @@ class Response:
|
||||||
@property
|
@property
|
||||||
def is_file(self):
|
def is_file(self):
|
||||||
content_type = self.content_type
|
content_type = self.content_type
|
||||||
content_disposition = self.response.headers.get("Content-Disposition", "")
|
content_disposition = self.response.headers.get("content-disposition", "")
|
||||||
|
|
||||||
return "attachment" in content_disposition or (
|
return "attachment" in content_disposition or (
|
||||||
not any(non_file in content_type for non_file in NON_FILE_CONTENT_TYPES)
|
not any(non_file in content_type for non_file in NON_FILE_CONTENT_TYPES)
|
||||||
|
@ -103,7 +103,7 @@ class Response:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def content_type(self) -> str:
|
def content_type(self) -> str:
|
||||||
return self.headers.get("Content-Type", "")
|
return self.headers.get("content-type", "")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def text(self) -> str:
|
def text(self) -> str:
|
||||||
|
|
|
@ -142,10 +142,11 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
|
||||||
Extract files from response
|
Extract files from response
|
||||||
"""
|
"""
|
||||||
files = []
|
files = []
|
||||||
|
is_file = response.is_file
|
||||||
content_type = response.content_type
|
content_type = response.content_type
|
||||||
content = response.content
|
content = response.content
|
||||||
|
|
||||||
if content_type:
|
if is_file and content_type:
|
||||||
# extract filename from url
|
# extract filename from url
|
||||||
filename = path.basename(url)
|
filename = path.basename(url)
|
||||||
# extract extension if possible
|
# extract extension if possible
|
||||||
|
|
|
@ -327,7 +327,7 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||||
if variable is None:
|
if variable is None:
|
||||||
raise ValueError(f"Variable {variable_selector.variable} not found")
|
raise ValueError(f"Variable {variable_selector.variable} not found")
|
||||||
if isinstance(variable, NoneSegment):
|
if isinstance(variable, NoneSegment):
|
||||||
continue
|
inputs[variable_selector.variable] = ""
|
||||||
inputs[variable_selector.variable] = variable.to_object()
|
inputs[variable_selector.variable] = variable.to_object()
|
||||||
|
|
||||||
memory = node_data.memory
|
memory = node_data.memory
|
||||||
|
@ -349,13 +349,11 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||||
variable = self.graph_runtime_state.variable_pool.get(selector)
|
variable = self.graph_runtime_state.variable_pool.get(selector)
|
||||||
if variable is None:
|
if variable is None:
|
||||||
return []
|
return []
|
||||||
if isinstance(variable, FileSegment):
|
elif isinstance(variable, FileSegment):
|
||||||
return [variable.value]
|
return [variable.value]
|
||||||
if isinstance(variable, ArrayFileSegment):
|
elif isinstance(variable, ArrayFileSegment):
|
||||||
return variable.value
|
return variable.value
|
||||||
# FIXME: Temporary fix for empty array,
|
elif isinstance(variable, NoneSegment | ArrayAnySegment):
|
||||||
# all variables added to variable pool should be a Segment instance.
|
|
||||||
if isinstance(variable, ArrayAnySegment) and len(variable.value) == 0:
|
|
||||||
return []
|
return []
|
||||||
raise ValueError(f"Invalid variable type: {type(variable)}")
|
raise ValueError(f"Invalid variable type: {type(variable)}")
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ from core.variables import (
|
||||||
ArrayNumberVariable,
|
ArrayNumberVariable,
|
||||||
ArrayObjectSegment,
|
ArrayObjectSegment,
|
||||||
ArrayObjectVariable,
|
ArrayObjectVariable,
|
||||||
|
ArraySegment,
|
||||||
ArrayStringSegment,
|
ArrayStringSegment,
|
||||||
ArrayStringVariable,
|
ArrayStringVariable,
|
||||||
FileSegment,
|
FileSegment,
|
||||||
|
@ -79,7 +80,7 @@ def build_segment(value: Any, /) -> Segment:
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
items = [build_segment(item) for item in value]
|
items = [build_segment(item) for item in value]
|
||||||
types = {item.value_type for item in items}
|
types = {item.value_type for item in items}
|
||||||
if len(types) != 1:
|
if len(types) != 1 or all(isinstance(item, ArraySegment) for item in items):
|
||||||
return ArrayAnySegment(value=value)
|
return ArrayAnySegment(value=value)
|
||||||
match types.pop():
|
match types.pop():
|
||||||
case SegmentType.STRING:
|
case SegmentType.STRING:
|
||||||
|
|
|
@ -121,6 +121,7 @@ conversation_fields = {
|
||||||
"from_account_name": fields.String,
|
"from_account_name": fields.String,
|
||||||
"read_at": TimestampField,
|
"read_at": TimestampField,
|
||||||
"created_at": TimestampField,
|
"created_at": TimestampField,
|
||||||
|
"updated_at": TimestampField,
|
||||||
"annotation": fields.Nested(annotation_fields, allow_null=True),
|
"annotation": fields.Nested(annotation_fields, allow_null=True),
|
||||||
"model_config": fields.Nested(simple_model_config_fields),
|
"model_config": fields.Nested(simple_model_config_fields),
|
||||||
"user_feedback_stats": fields.Nested(feedback_stat_fields),
|
"user_feedback_stats": fields.Nested(feedback_stat_fields),
|
||||||
|
@ -182,6 +183,7 @@ conversation_detail_fields = {
|
||||||
"from_end_user_id": fields.String,
|
"from_end_user_id": fields.String,
|
||||||
"from_account_id": fields.String,
|
"from_account_id": fields.String,
|
||||||
"created_at": TimestampField,
|
"created_at": TimestampField,
|
||||||
|
"updated_at": TimestampField,
|
||||||
"annotated": fields.Boolean,
|
"annotated": fields.Boolean,
|
||||||
"introduction": fields.String,
|
"introduction": fields.String,
|
||||||
"model_config": fields.Nested(model_config_fields),
|
"model_config": fields.Nested(model_config_fields),
|
||||||
|
@ -197,6 +199,7 @@ simple_conversation_fields = {
|
||||||
"status": fields.String,
|
"status": fields.String,
|
||||||
"introduction": fields.String,
|
"introduction": fields.String,
|
||||||
"created_at": TimestampField,
|
"created_at": TimestampField,
|
||||||
|
"updated_at": TimestampField,
|
||||||
}
|
}
|
||||||
|
|
||||||
conversation_infinite_scroll_pagination_fields = {
|
conversation_infinite_scroll_pagination_fields = {
|
||||||
|
|
|
@ -396,7 +396,7 @@ class AppModelConfig(db.Model):
|
||||||
"file_upload": self.file_upload_dict,
|
"file_upload": self.file_upload_dict,
|
||||||
}
|
}
|
||||||
|
|
||||||
def from_model_config_dict(self, model_config: dict):
|
def from_model_config_dict(self, model_config: Mapping[str, Any]):
|
||||||
self.opening_statement = model_config.get("opening_statement")
|
self.opening_statement = model_config.get("opening_statement")
|
||||||
self.suggested_questions = (
|
self.suggested_questions = (
|
||||||
json.dumps(model_config["suggested_questions"]) if model_config.get("suggested_questions") else None
|
json.dumps(model_config["suggested_questions"]) if model_config.get("suggested_questions") else None
|
||||||
|
|
23
api/poetry.lock
generated
23
api/poetry.lock
generated
|
@ -7269,6 +7269,22 @@ files = [
|
||||||
ed25519 = ["PyNaCl (>=1.4.0)"]
|
ed25519 = ["PyNaCl (>=1.4.0)"]
|
||||||
rsa = ["cryptography"]
|
rsa = ["cryptography"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyobvector"
|
||||||
|
version = "0.1.6"
|
||||||
|
description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
|
||||||
|
optional = false
|
||||||
|
python-versions = "<4.0,>=3.9"
|
||||||
|
files = [
|
||||||
|
{file = "pyobvector-0.1.6-py3-none-any.whl", hash = "sha256:0d700e865a85b4716b9a03384189e49288cd9d5f3cef88aed4740bc82d5fd136"},
|
||||||
|
{file = "pyobvector-0.1.6.tar.gz", hash = "sha256:05551addcac8c596992d5e38b480c83ca3481c6cfc6f56a1a1bddfb2e6ae037e"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
numpy = ">=1.26.0,<2.0.0"
|
||||||
|
pymysql = ">=1.1.1,<2.0.0"
|
||||||
|
sqlalchemy = ">=2.0.32,<3.0.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyopenssl"
|
name = "pyopenssl"
|
||||||
version = "24.2.1"
|
version = "24.2.1"
|
||||||
|
@ -8677,6 +8693,11 @@ files = [
|
||||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
|
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
|
||||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
|
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
|
||||||
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
|
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
|
||||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
|
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
|
||||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
|
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
|
||||||
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
|
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
|
||||||
|
@ -10919,4 +10940,4 @@ cffi = ["cffi (>=1.11)"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.10,<3.13"
|
python-versions = ">=3.10,<3.13"
|
||||||
content-hash = "52552faf5f4823056eb48afe05349ab2f0e9a5bc42105211ccbbb54b59e27b59"
|
content-hash = "ef927b98c33d704d680e08db0e5c7d9a4e05454c66fcd6a5f656a65eb08e886b"
|
||||||
|
|
|
@ -247,6 +247,7 @@ pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] }
|
||||||
pgvector = "0.2.5"
|
pgvector = "0.2.5"
|
||||||
pymilvus = "~2.4.4"
|
pymilvus = "~2.4.4"
|
||||||
pymochow = "1.3.1"
|
pymochow = "1.3.1"
|
||||||
|
pyobvector = "~0.1.6"
|
||||||
qdrant-client = "1.7.3"
|
qdrant-client = "1.7.3"
|
||||||
tcvectordb = "1.3.2"
|
tcvectordb = "1.3.2"
|
||||||
tidb-vector = "0.0.9"
|
tidb-vector = "0.0.9"
|
||||||
|
|
3
api/services/app_dsl_service/__init__.py
Normal file
3
api/services/app_dsl_service/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from .service import AppDslService
|
||||||
|
|
||||||
|
__all__ = ["AppDslService"]
|
34
api/services/app_dsl_service/exc.py
Normal file
34
api/services/app_dsl_service/exc.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
class DSLVersionNotSupportedError(ValueError):
|
||||||
|
"""Raised when the imported DSL version is not supported by the current Dify version."""
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidYAMLFormatError(ValueError):
|
||||||
|
"""Raised when the provided YAML format is invalid."""
|
||||||
|
|
||||||
|
|
||||||
|
class MissingAppDataError(ValueError):
|
||||||
|
"""Raised when the app data is missing in the provided DSL."""
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidAppModeError(ValueError):
|
||||||
|
"""Raised when the app mode is invalid."""
|
||||||
|
|
||||||
|
|
||||||
|
class MissingWorkflowDataError(ValueError):
|
||||||
|
"""Raised when the workflow data is missing in the provided DSL."""
|
||||||
|
|
||||||
|
|
||||||
|
class MissingModelConfigError(ValueError):
|
||||||
|
"""Raised when the model config data is missing in the provided DSL."""
|
||||||
|
|
||||||
|
|
||||||
|
class FileSizeLimitExceededError(ValueError):
|
||||||
|
"""Raised when the file size exceeds the allowed limit."""
|
||||||
|
|
||||||
|
|
||||||
|
class EmptyContentError(ValueError):
|
||||||
|
"""Raised when the content fetched from the URL is empty."""
|
||||||
|
|
||||||
|
|
||||||
|
class ContentDecodingError(ValueError):
|
||||||
|
"""Raised when there is an error decoding the content."""
|
|
@ -1,8 +1,11 @@
|
||||||
import logging
|
import logging
|
||||||
|
from collections.abc import Mapping
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import yaml
|
||||||
import yaml # type: ignore
|
from packaging import version
|
||||||
|
|
||||||
|
from core.helper import ssrf_proxy
|
||||||
from events.app_event import app_model_config_was_updated, app_was_created
|
from events.app_event import app_model_config_was_updated, app_was_created
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from factories import variable_factory
|
from factories import variable_factory
|
||||||
|
@ -11,6 +14,18 @@ from models.model import App, AppMode, AppModelConfig
|
||||||
from models.workflow import Workflow
|
from models.workflow import Workflow
|
||||||
from services.workflow_service import WorkflowService
|
from services.workflow_service import WorkflowService
|
||||||
|
|
||||||
|
from .exc import (
|
||||||
|
ContentDecodingError,
|
||||||
|
DSLVersionNotSupportedError,
|
||||||
|
EmptyContentError,
|
||||||
|
FileSizeLimitExceededError,
|
||||||
|
InvalidAppModeError,
|
||||||
|
InvalidYAMLFormatError,
|
||||||
|
MissingAppDataError,
|
||||||
|
MissingModelConfigError,
|
||||||
|
MissingWorkflowDataError,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
current_dsl_version = "0.1.2"
|
current_dsl_version = "0.1.2"
|
||||||
|
@ -30,32 +45,21 @@ class AppDslService:
|
||||||
:param args: request args
|
:param args: request args
|
||||||
:param account: Account instance
|
:param account: Account instance
|
||||||
"""
|
"""
|
||||||
try:
|
max_size = 10 * 1024 * 1024 # 10MB
|
||||||
max_size = 10 * 1024 * 1024 # 10MB
|
response = ssrf_proxy.get(url.strip(), follow_redirects=True, timeout=(10, 10))
|
||||||
timeout = httpx.Timeout(10.0)
|
response.raise_for_status()
|
||||||
with httpx.stream("GET", url.strip(), follow_redirects=True, timeout=timeout) as response:
|
content = response.content
|
||||||
response.raise_for_status()
|
|
||||||
total_size = 0
|
if len(content) > max_size:
|
||||||
content = b""
|
raise FileSizeLimitExceededError("File size exceeds the limit of 10MB")
|
||||||
for chunk in response.iter_bytes():
|
|
||||||
total_size += len(chunk)
|
|
||||||
if total_size > max_size:
|
|
||||||
raise ValueError("File size exceeds the limit of 10MB")
|
|
||||||
content += chunk
|
|
||||||
except httpx.HTTPStatusError as http_err:
|
|
||||||
raise ValueError(f"HTTP error occurred: {http_err}")
|
|
||||||
except httpx.RequestError as req_err:
|
|
||||||
raise ValueError(f"Request error occurred: {req_err}")
|
|
||||||
except Exception as e:
|
|
||||||
raise ValueError(f"Failed to fetch DSL from URL: {e}")
|
|
||||||
|
|
||||||
if not content:
|
if not content:
|
||||||
raise ValueError("Empty content from url")
|
raise EmptyContentError("Empty content from url")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = content.decode("utf-8")
|
data = content.decode("utf-8")
|
||||||
except UnicodeDecodeError as e:
|
except UnicodeDecodeError as e:
|
||||||
raise ValueError(f"Error decoding content: {e}")
|
raise ContentDecodingError(f"Error decoding content: {e}")
|
||||||
|
|
||||||
return cls.import_and_create_new_app(tenant_id, data, args, account)
|
return cls.import_and_create_new_app(tenant_id, data, args, account)
|
||||||
|
|
||||||
|
@ -71,14 +75,14 @@ class AppDslService:
|
||||||
try:
|
try:
|
||||||
import_data = yaml.safe_load(data)
|
import_data = yaml.safe_load(data)
|
||||||
except yaml.YAMLError:
|
except yaml.YAMLError:
|
||||||
raise ValueError("Invalid YAML format in data argument.")
|
raise InvalidYAMLFormatError("Invalid YAML format in data argument.")
|
||||||
|
|
||||||
# check or repair dsl version
|
# check or repair dsl version
|
||||||
import_data = cls._check_or_fix_dsl(import_data)
|
import_data = _check_or_fix_dsl(import_data)
|
||||||
|
|
||||||
app_data = import_data.get("app")
|
app_data = import_data.get("app")
|
||||||
if not app_data:
|
if not app_data:
|
||||||
raise ValueError("Missing app in data argument")
|
raise MissingAppDataError("Missing app in data argument")
|
||||||
|
|
||||||
# get app basic info
|
# get app basic info
|
||||||
name = args.get("name") or app_data.get("name")
|
name = args.get("name") or app_data.get("name")
|
||||||
|
@ -90,11 +94,18 @@ class AppDslService:
|
||||||
|
|
||||||
# import dsl and create app
|
# import dsl and create app
|
||||||
app_mode = AppMode.value_of(app_data.get("mode"))
|
app_mode = AppMode.value_of(app_data.get("mode"))
|
||||||
|
|
||||||
if app_mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
|
if app_mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
|
||||||
|
workflow_data = import_data.get("workflow")
|
||||||
|
if not workflow_data or not isinstance(workflow_data, dict):
|
||||||
|
raise MissingWorkflowDataError(
|
||||||
|
"Missing workflow in data argument when app mode is advanced-chat or workflow"
|
||||||
|
)
|
||||||
|
|
||||||
app = cls._import_and_create_new_workflow_based_app(
|
app = cls._import_and_create_new_workflow_based_app(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
app_mode=app_mode,
|
app_mode=app_mode,
|
||||||
workflow_data=import_data.get("workflow"),
|
workflow_data=workflow_data,
|
||||||
account=account,
|
account=account,
|
||||||
name=name,
|
name=name,
|
||||||
description=description,
|
description=description,
|
||||||
|
@ -104,10 +115,16 @@ class AppDslService:
|
||||||
use_icon_as_answer_icon=use_icon_as_answer_icon,
|
use_icon_as_answer_icon=use_icon_as_answer_icon,
|
||||||
)
|
)
|
||||||
elif app_mode in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION}:
|
elif app_mode in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION}:
|
||||||
|
model_config = import_data.get("model_config")
|
||||||
|
if not model_config or not isinstance(model_config, dict):
|
||||||
|
raise MissingModelConfigError(
|
||||||
|
"Missing model_config in data argument when app mode is chat, agent-chat or completion"
|
||||||
|
)
|
||||||
|
|
||||||
app = cls._import_and_create_new_model_config_based_app(
|
app = cls._import_and_create_new_model_config_based_app(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
app_mode=app_mode,
|
app_mode=app_mode,
|
||||||
model_config_data=import_data.get("model_config"),
|
model_config_data=model_config,
|
||||||
account=account,
|
account=account,
|
||||||
name=name,
|
name=name,
|
||||||
description=description,
|
description=description,
|
||||||
|
@ -117,7 +134,7 @@ class AppDslService:
|
||||||
use_icon_as_answer_icon=use_icon_as_answer_icon,
|
use_icon_as_answer_icon=use_icon_as_answer_icon,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid app mode")
|
raise InvalidAppModeError("Invalid app mode")
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
@ -132,26 +149,32 @@ class AppDslService:
|
||||||
try:
|
try:
|
||||||
import_data = yaml.safe_load(data)
|
import_data = yaml.safe_load(data)
|
||||||
except yaml.YAMLError:
|
except yaml.YAMLError:
|
||||||
raise ValueError("Invalid YAML format in data argument.")
|
raise InvalidYAMLFormatError("Invalid YAML format in data argument.")
|
||||||
|
|
||||||
# check or repair dsl version
|
# check or repair dsl version
|
||||||
import_data = cls._check_or_fix_dsl(import_data)
|
import_data = _check_or_fix_dsl(import_data)
|
||||||
|
|
||||||
app_data = import_data.get("app")
|
app_data = import_data.get("app")
|
||||||
if not app_data:
|
if not app_data:
|
||||||
raise ValueError("Missing app in data argument")
|
raise MissingAppDataError("Missing app in data argument")
|
||||||
|
|
||||||
# import dsl and overwrite app
|
# import dsl and overwrite app
|
||||||
app_mode = AppMode.value_of(app_data.get("mode"))
|
app_mode = AppMode.value_of(app_data.get("mode"))
|
||||||
if app_mode not in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
|
if app_mode not in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
|
||||||
raise ValueError("Only support import workflow in advanced-chat or workflow app.")
|
raise InvalidAppModeError("Only support import workflow in advanced-chat or workflow app.")
|
||||||
|
|
||||||
if app_data.get("mode") != app_model.mode:
|
if app_data.get("mode") != app_model.mode:
|
||||||
raise ValueError(f"App mode {app_data.get('mode')} is not matched with current app mode {app_mode.value}")
|
raise ValueError(f"App mode {app_data.get('mode')} is not matched with current app mode {app_mode.value}")
|
||||||
|
|
||||||
|
workflow_data = import_data.get("workflow")
|
||||||
|
if not workflow_data or not isinstance(workflow_data, dict):
|
||||||
|
raise MissingWorkflowDataError(
|
||||||
|
"Missing workflow in data argument when app mode is advanced-chat or workflow"
|
||||||
|
)
|
||||||
|
|
||||||
return cls._import_and_overwrite_workflow_based_app(
|
return cls._import_and_overwrite_workflow_based_app(
|
||||||
app_model=app_model,
|
app_model=app_model,
|
||||||
workflow_data=import_data.get("workflow"),
|
workflow_data=workflow_data,
|
||||||
account=account,
|
account=account,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -186,35 +209,12 @@ class AppDslService:
|
||||||
|
|
||||||
return yaml.dump(export_data, allow_unicode=True)
|
return yaml.dump(export_data, allow_unicode=True)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _check_or_fix_dsl(cls, import_data: dict) -> dict:
|
|
||||||
"""
|
|
||||||
Check or fix dsl
|
|
||||||
|
|
||||||
:param import_data: import data
|
|
||||||
"""
|
|
||||||
if not import_data.get("version"):
|
|
||||||
import_data["version"] = "0.1.0"
|
|
||||||
|
|
||||||
if not import_data.get("kind") or import_data.get("kind") != "app":
|
|
||||||
import_data["kind"] = "app"
|
|
||||||
|
|
||||||
if import_data.get("version") != current_dsl_version:
|
|
||||||
# Currently only one DSL version, so no difference checks or compatibility fixes will be performed.
|
|
||||||
logger.warning(
|
|
||||||
f"DSL version {import_data.get('version')} is not compatible "
|
|
||||||
f"with current version {current_dsl_version}, related to "
|
|
||||||
f"Dify version {dsl_to_dify_version_mapping.get(current_dsl_version)}."
|
|
||||||
)
|
|
||||||
|
|
||||||
return import_data
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _import_and_create_new_workflow_based_app(
|
def _import_and_create_new_workflow_based_app(
|
||||||
cls,
|
cls,
|
||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
app_mode: AppMode,
|
app_mode: AppMode,
|
||||||
workflow_data: dict,
|
workflow_data: Mapping[str, Any],
|
||||||
account: Account,
|
account: Account,
|
||||||
name: str,
|
name: str,
|
||||||
description: str,
|
description: str,
|
||||||
|
@ -238,7 +238,9 @@ class AppDslService:
|
||||||
:param use_icon_as_answer_icon: use app icon as answer icon
|
:param use_icon_as_answer_icon: use app icon as answer icon
|
||||||
"""
|
"""
|
||||||
if not workflow_data:
|
if not workflow_data:
|
||||||
raise ValueError("Missing workflow in data argument when app mode is advanced-chat or workflow")
|
raise MissingWorkflowDataError(
|
||||||
|
"Missing workflow in data argument when app mode is advanced-chat or workflow"
|
||||||
|
)
|
||||||
|
|
||||||
app = cls._create_app(
|
app = cls._create_app(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
|
@ -277,7 +279,7 @@ class AppDslService:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _import_and_overwrite_workflow_based_app(
|
def _import_and_overwrite_workflow_based_app(
|
||||||
cls, app_model: App, workflow_data: dict, account: Account
|
cls, app_model: App, workflow_data: Mapping[str, Any], account: Account
|
||||||
) -> Workflow:
|
) -> Workflow:
|
||||||
"""
|
"""
|
||||||
Import app dsl and overwrite workflow based app
|
Import app dsl and overwrite workflow based app
|
||||||
|
@ -287,7 +289,9 @@ class AppDslService:
|
||||||
:param account: Account instance
|
:param account: Account instance
|
||||||
"""
|
"""
|
||||||
if not workflow_data:
|
if not workflow_data:
|
||||||
raise ValueError("Missing workflow in data argument when app mode is advanced-chat or workflow")
|
raise MissingWorkflowDataError(
|
||||||
|
"Missing workflow in data argument when app mode is advanced-chat or workflow"
|
||||||
|
)
|
||||||
|
|
||||||
# fetch draft workflow by app_model
|
# fetch draft workflow by app_model
|
||||||
workflow_service = WorkflowService()
|
workflow_service = WorkflowService()
|
||||||
|
@ -323,7 +327,7 @@ class AppDslService:
|
||||||
cls,
|
cls,
|
||||||
tenant_id: str,
|
tenant_id: str,
|
||||||
app_mode: AppMode,
|
app_mode: AppMode,
|
||||||
model_config_data: dict,
|
model_config_data: Mapping[str, Any],
|
||||||
account: Account,
|
account: Account,
|
||||||
name: str,
|
name: str,
|
||||||
description: str,
|
description: str,
|
||||||
|
@ -345,7 +349,9 @@ class AppDslService:
|
||||||
:param icon_background: app icon background
|
:param icon_background: app icon background
|
||||||
"""
|
"""
|
||||||
if not model_config_data:
|
if not model_config_data:
|
||||||
raise ValueError("Missing model_config in data argument when app mode is chat, agent-chat or completion")
|
raise MissingModelConfigError(
|
||||||
|
"Missing model_config in data argument when app mode is chat, agent-chat or completion"
|
||||||
|
)
|
||||||
|
|
||||||
app = cls._create_app(
|
app = cls._create_app(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
|
@ -448,3 +454,34 @@ class AppDslService:
|
||||||
raise ValueError("Missing app configuration, please check.")
|
raise ValueError("Missing app configuration, please check.")
|
||||||
|
|
||||||
export_data["model_config"] = app_model_config.to_dict()
|
export_data["model_config"] = app_model_config.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def _check_or_fix_dsl(import_data: dict[str, Any]) -> Mapping[str, Any]:
|
||||||
|
"""
|
||||||
|
Check or fix dsl
|
||||||
|
|
||||||
|
:param import_data: import data
|
||||||
|
:raises DSLVersionNotSupportedError: if the imported DSL version is newer than the current version
|
||||||
|
"""
|
||||||
|
if not import_data.get("version"):
|
||||||
|
import_data["version"] = "0.1.0"
|
||||||
|
|
||||||
|
if not import_data.get("kind") or import_data.get("kind") != "app":
|
||||||
|
import_data["kind"] = "app"
|
||||||
|
|
||||||
|
imported_version = import_data.get("version")
|
||||||
|
if imported_version != current_dsl_version:
|
||||||
|
if imported_version and version.parse(imported_version) > version.parse(current_dsl_version):
|
||||||
|
raise DSLVersionNotSupportedError(
|
||||||
|
f"The imported DSL version {imported_version} is newer than "
|
||||||
|
f"the current supported version {current_dsl_version}. "
|
||||||
|
f"Please upgrade your Dify instance to import this configuration."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"DSL version {imported_version} is older than "
|
||||||
|
f"the current version {current_dsl_version}. "
|
||||||
|
f"This may cause compatibility issues."
|
||||||
|
)
|
||||||
|
|
||||||
|
return import_data
|
|
@ -736,11 +736,12 @@ class DocumentService:
|
||||||
dataset.retrieval_model = document_data.get("retrieval_model") or default_retrieval_model
|
dataset.retrieval_model = document_data.get("retrieval_model") or default_retrieval_model
|
||||||
|
|
||||||
documents = []
|
documents = []
|
||||||
batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
|
|
||||||
if document_data.get("original_document_id"):
|
if document_data.get("original_document_id"):
|
||||||
document = DocumentService.update_document_with_dataset_id(dataset, document_data, account)
|
document = DocumentService.update_document_with_dataset_id(dataset, document_data, account)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
batch = document.batch
|
||||||
else:
|
else:
|
||||||
|
batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
|
||||||
# save process rule
|
# save process rule
|
||||||
if not dataset_process_rule:
|
if not dataset_process_rule:
|
||||||
process_rule = document_data["process_rule"]
|
process_rule = document_data["process_rule"]
|
||||||
|
@ -921,7 +922,7 @@ class DocumentService:
|
||||||
if duplicate_document_ids:
|
if duplicate_document_ids:
|
||||||
duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids)
|
duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids)
|
||||||
|
|
||||||
return documents, batch
|
return documents, batch
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def check_documents_upload_quota(count: int, features: FeatureModel):
|
def check_documents_upload_quota(count: int, features: FeatureModel):
|
||||||
|
|
|
@ -35,7 +35,7 @@ class FileService:
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
if not filename:
|
if not filename:
|
||||||
raise FileNotExistsError
|
raise FileNotExistsError
|
||||||
extension = filename.split(".")[-1]
|
extension = filename.split(".")[-1].lower()
|
||||||
if len(filename) > 200:
|
if len(filename) > 200:
|
||||||
filename = filename.split(".")[0][:200] + "." + extension
|
filename = filename.split(".")[0][:200] + "." + extension
|
||||||
|
|
||||||
|
|
|
@ -84,5 +84,10 @@ VOLC_EMBEDDING_ENDPOINT_ID=
|
||||||
# 360 AI Credentials
|
# 360 AI Credentials
|
||||||
ZHINAO_API_KEY=
|
ZHINAO_API_KEY=
|
||||||
|
|
||||||
|
# VESSL AI Credentials
|
||||||
|
VESSL_AI_MODEL_NAME=
|
||||||
|
VESSL_AI_API_KEY=
|
||||||
|
VESSL_AI_ENDPOINT_URL=
|
||||||
|
|
||||||
# Gitee AI Credentials
|
# Gitee AI Credentials
|
||||||
GITEE_AI_API_KEY=
|
GITEE_AI_API_KEY=
|
131
api/tests/integration_tests/model_runtime/vessl_ai/test_llm.py
Normal file
131
api/tests/integration_tests/model_runtime/vessl_ai/test_llm.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
import os
|
||||||
|
from collections.abc import Generator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||||
|
from core.model_runtime.entities.message_entities import (
|
||||||
|
AssistantPromptMessage,
|
||||||
|
SystemPromptMessage,
|
||||||
|
UserPromptMessage,
|
||||||
|
)
|
||||||
|
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||||
|
from core.model_runtime.model_providers.vessl_ai.llm.llm import VesslAILargeLanguageModel
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_credentials():
|
||||||
|
model = VesslAILargeLanguageModel()
|
||||||
|
|
||||||
|
with pytest.raises(CredentialsValidateFailedError):
|
||||||
|
model.validate_credentials(
|
||||||
|
model=os.environ.get("VESSL_AI_MODEL_NAME"),
|
||||||
|
credentials={
|
||||||
|
"api_key": "invalid_key",
|
||||||
|
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
|
||||||
|
"mode": "chat",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(CredentialsValidateFailedError):
|
||||||
|
model.validate_credentials(
|
||||||
|
model=os.environ.get("VESSL_AI_MODEL_NAME"),
|
||||||
|
credentials={
|
||||||
|
"api_key": os.environ.get("VESSL_AI_API_KEY"),
|
||||||
|
"endpoint_url": "http://invalid_url",
|
||||||
|
"mode": "chat",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
model.validate_credentials(
|
||||||
|
model=os.environ.get("VESSL_AI_MODEL_NAME"),
|
||||||
|
credentials={
|
||||||
|
"api_key": os.environ.get("VESSL_AI_API_KEY"),
|
||||||
|
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
|
||||||
|
"mode": "chat",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_invoke_model():
|
||||||
|
model = VesslAILargeLanguageModel()
|
||||||
|
|
||||||
|
response = model.invoke(
|
||||||
|
model=os.environ.get("VESSL_AI_MODEL_NAME"),
|
||||||
|
credentials={
|
||||||
|
"api_key": os.environ.get("VESSL_AI_API_KEY"),
|
||||||
|
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
|
||||||
|
"mode": "chat",
|
||||||
|
},
|
||||||
|
prompt_messages=[
|
||||||
|
SystemPromptMessage(
|
||||||
|
content="You are a helpful AI assistant.",
|
||||||
|
),
|
||||||
|
UserPromptMessage(content="Who are you?"),
|
||||||
|
],
|
||||||
|
model_parameters={
|
||||||
|
"temperature": 1.0,
|
||||||
|
"top_k": 2,
|
||||||
|
"top_p": 0.5,
|
||||||
|
},
|
||||||
|
stop=["How"],
|
||||||
|
stream=False,
|
||||||
|
user="abc-123",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(response, LLMResult)
|
||||||
|
assert len(response.message.content) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_invoke_stream_model():
|
||||||
|
model = VesslAILargeLanguageModel()
|
||||||
|
|
||||||
|
response = model.invoke(
|
||||||
|
model=os.environ.get("VESSL_AI_MODEL_NAME"),
|
||||||
|
credentials={
|
||||||
|
"api_key": os.environ.get("VESSL_AI_API_KEY"),
|
||||||
|
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
|
||||||
|
"mode": "chat",
|
||||||
|
},
|
||||||
|
prompt_messages=[
|
||||||
|
SystemPromptMessage(
|
||||||
|
content="You are a helpful AI assistant.",
|
||||||
|
),
|
||||||
|
UserPromptMessage(content="Who are you?"),
|
||||||
|
],
|
||||||
|
model_parameters={
|
||||||
|
"temperature": 1.0,
|
||||||
|
"top_k": 2,
|
||||||
|
"top_p": 0.5,
|
||||||
|
},
|
||||||
|
stop=["How"],
|
||||||
|
stream=True,
|
||||||
|
user="abc-123",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(response, Generator)
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
assert isinstance(chunk, LLMResultChunk)
|
||||||
|
assert isinstance(chunk.delta, LLMResultChunkDelta)
|
||||||
|
assert isinstance(chunk.delta.message, AssistantPromptMessage)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_num_tokens():
|
||||||
|
model = VesslAILargeLanguageModel()
|
||||||
|
|
||||||
|
num_tokens = model.get_num_tokens(
|
||||||
|
model=os.environ.get("VESSL_AI_MODEL_NAME"),
|
||||||
|
credentials={
|
||||||
|
"api_key": os.environ.get("VESSL_AI_API_KEY"),
|
||||||
|
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
|
||||||
|
},
|
||||||
|
prompt_messages=[
|
||||||
|
SystemPromptMessage(
|
||||||
|
content="You are a helpful AI assistant.",
|
||||||
|
),
|
||||||
|
UserPromptMessage(content="Hello World!"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(num_tokens, int)
|
||||||
|
assert num_tokens == 21
|
71
api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py
Normal file
71
api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.datasource.vdb.oceanbase.oceanbase_vector import (
|
||||||
|
OceanBaseVector,
|
||||||
|
OceanBaseVectorConfig,
|
||||||
|
)
|
||||||
|
from tests.integration_tests.vdb.__mock.tcvectordb import setup_tcvectordb_mock
|
||||||
|
from tests.integration_tests.vdb.test_vector_store import (
|
||||||
|
AbstractVectorTest,
|
||||||
|
get_example_text,
|
||||||
|
setup_mock_redis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def oceanbase_vector():
|
||||||
|
return OceanBaseVector(
|
||||||
|
"dify_test_collection",
|
||||||
|
config=OceanBaseVectorConfig(
|
||||||
|
host="127.0.0.1",
|
||||||
|
port="2881",
|
||||||
|
user="root@test",
|
||||||
|
database="test",
|
||||||
|
password="test",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OceanBaseVectorTest(AbstractVectorTest):
|
||||||
|
def __init__(self, vector: OceanBaseVector):
|
||||||
|
super().__init__()
|
||||||
|
self.vector = vector
|
||||||
|
|
||||||
|
def search_by_vector(self):
|
||||||
|
hits_by_vector = self.vector.search_by_vector(query_vector=self.example_embedding)
|
||||||
|
assert len(hits_by_vector) == 0
|
||||||
|
|
||||||
|
def search_by_full_text(self):
|
||||||
|
hits_by_full_text = self.vector.search_by_full_text(query=get_example_text())
|
||||||
|
assert len(hits_by_full_text) == 0
|
||||||
|
|
||||||
|
def text_exists(self):
|
||||||
|
exist = self.vector.text_exists(self.example_doc_id)
|
||||||
|
assert exist == True
|
||||||
|
|
||||||
|
def get_ids_by_metadata_field(self):
|
||||||
|
ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
|
||||||
|
assert len(ids) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def setup_mock_oceanbase_client():
|
||||||
|
with patch("core.rag.datasource.vdb.oceanbase.oceanbase_vector.ObVecClient", new_callable=MagicMock) as mock_client:
|
||||||
|
yield mock_client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def setup_mock_oceanbase_vector(oceanbase_vector):
|
||||||
|
with patch.object(oceanbase_vector, "_client"):
|
||||||
|
yield oceanbase_vector
|
||||||
|
|
||||||
|
|
||||||
|
def test_oceanbase_vector(
|
||||||
|
setup_mock_redis,
|
||||||
|
setup_mock_oceanbase_client,
|
||||||
|
setup_mock_oceanbase_vector,
|
||||||
|
oceanbase_vector,
|
||||||
|
):
|
||||||
|
OceanBaseVectorTest(oceanbase_vector).run_all_tests()
|
|
@ -430,3 +430,37 @@ def test_multi_colons_parse(setup_http_mock):
|
||||||
assert urlencode({"Redirect": "http://example2.com"}) in result.process_data.get("request", "")
|
assert urlencode({"Redirect": "http://example2.com"}) in result.process_data.get("request", "")
|
||||||
assert 'form-data; name="Redirect"\r\n\r\nhttp://example6.com' in result.process_data.get("request", "")
|
assert 'form-data; name="Redirect"\r\n\r\nhttp://example6.com' in result.process_data.get("request", "")
|
||||||
# assert "http://example3.com" == resp.get("headers", {}).get("referer")
|
# assert "http://example3.com" == resp.get("headers", {}).get("referer")
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_file(monkeypatch):
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"core.tools.tool_file_manager.ToolFileManager.create_file_by_raw",
|
||||||
|
lambda *args, **kwargs: SimpleNamespace(id="1"),
|
||||||
|
)
|
||||||
|
|
||||||
|
node = init_http_node(
|
||||||
|
config={
|
||||||
|
"id": "1",
|
||||||
|
"data": {
|
||||||
|
"title": "http",
|
||||||
|
"desc": "",
|
||||||
|
"method": "get",
|
||||||
|
"url": "https://cloud.dify.ai/logo/logo-site.png",
|
||||||
|
"authorization": {
|
||||||
|
"type": "no-auth",
|
||||||
|
"config": None,
|
||||||
|
},
|
||||||
|
"params": "",
|
||||||
|
"headers": "",
|
||||||
|
"body": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
result = node._run()
|
||||||
|
assert result.process_data is not None
|
||||||
|
assert result.outputs is not None
|
||||||
|
resp = result.outputs
|
||||||
|
assert len(resp.get("files", [])) == 1
|
||||||
|
|
|
@ -22,17 +22,3 @@ from controllers.console.version import _has_new_version
|
||||||
)
|
)
|
||||||
def test_has_new_version(latest_version, current_version, expected):
|
def test_has_new_version(latest_version, current_version, expected):
|
||||||
assert _has_new_version(latest_version=latest_version, current_version=current_version) == expected
|
assert _has_new_version(latest_version=latest_version, current_version=current_version) == expected
|
||||||
|
|
||||||
|
|
||||||
def test_has_new_version_invalid_input():
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
_has_new_version(latest_version="1.0", current_version="1.0.0")
|
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
_has_new_version(latest_version="1.0.0", current_version="1.0")
|
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
_has_new_version(latest_version="invalid", current_version="1.0.0")
|
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
_has_new_version(latest_version="1.0.0", current_version="invalid")
|
|
||||||
|
|
125
api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
Normal file
125
api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
|
from core.file import File, FileTransferMethod, FileType
|
||||||
|
from core.model_runtime.entities.message_entities import ImagePromptMessageContent
|
||||||
|
from core.variables import ArrayAnySegment, ArrayFileSegment, NoneSegment
|
||||||
|
from core.workflow.entities.variable_pool import VariablePool
|
||||||
|
from core.workflow.graph_engine import Graph, GraphInitParams, GraphRuntimeState
|
||||||
|
from core.workflow.nodes.answer import AnswerStreamGenerateRoute
|
||||||
|
from core.workflow.nodes.end import EndStreamParam
|
||||||
|
from core.workflow.nodes.llm.entities import ContextConfig, LLMNodeData, ModelConfig, VisionConfig, VisionConfigOptions
|
||||||
|
from core.workflow.nodes.llm.node import LLMNode
|
||||||
|
from models.enums import UserFrom
|
||||||
|
from models.workflow import WorkflowType
|
||||||
|
|
||||||
|
|
||||||
|
class TestLLMNode:
|
||||||
|
@pytest.fixture
|
||||||
|
def llm_node(self):
|
||||||
|
data = LLMNodeData(
|
||||||
|
title="Test LLM",
|
||||||
|
model=ModelConfig(provider="openai", name="gpt-3.5-turbo", mode="chat", completion_params={}),
|
||||||
|
prompt_template=[],
|
||||||
|
memory=None,
|
||||||
|
context=ContextConfig(enabled=False),
|
||||||
|
vision=VisionConfig(
|
||||||
|
enabled=True,
|
||||||
|
configs=VisionConfigOptions(
|
||||||
|
variable_selector=["sys", "files"],
|
||||||
|
detail=ImagePromptMessageContent.DETAIL.HIGH,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
variable_pool = VariablePool(
|
||||||
|
system_variables={},
|
||||||
|
user_inputs={},
|
||||||
|
)
|
||||||
|
node = LLMNode(
|
||||||
|
id="1",
|
||||||
|
config={
|
||||||
|
"id": "1",
|
||||||
|
"data": data.model_dump(),
|
||||||
|
},
|
||||||
|
graph_init_params=GraphInitParams(
|
||||||
|
tenant_id="1",
|
||||||
|
app_id="1",
|
||||||
|
workflow_type=WorkflowType.WORKFLOW,
|
||||||
|
workflow_id="1",
|
||||||
|
graph_config={},
|
||||||
|
user_id="1",
|
||||||
|
user_from=UserFrom.ACCOUNT,
|
||||||
|
invoke_from=InvokeFrom.SERVICE_API,
|
||||||
|
call_depth=0,
|
||||||
|
),
|
||||||
|
graph=Graph(
|
||||||
|
root_node_id="1",
|
||||||
|
answer_stream_generate_routes=AnswerStreamGenerateRoute(
|
||||||
|
answer_dependencies={},
|
||||||
|
answer_generate_route={},
|
||||||
|
),
|
||||||
|
end_stream_param=EndStreamParam(
|
||||||
|
end_dependencies={},
|
||||||
|
end_stream_variable_selector_mapping={},
|
||||||
|
),
|
||||||
|
),
|
||||||
|
graph_runtime_state=GraphRuntimeState(
|
||||||
|
variable_pool=variable_pool,
|
||||||
|
start_at=0,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return node
|
||||||
|
|
||||||
|
def test_fetch_files_with_file_segment(self, llm_node):
|
||||||
|
file = File(
|
||||||
|
id="1",
|
||||||
|
tenant_id="test",
|
||||||
|
type=FileType.IMAGE,
|
||||||
|
filename="test.jpg",
|
||||||
|
transfer_method=FileTransferMethod.LOCAL_FILE,
|
||||||
|
related_id="1",
|
||||||
|
)
|
||||||
|
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], file)
|
||||||
|
|
||||||
|
result = llm_node._fetch_files(selector=["sys", "files"])
|
||||||
|
assert result == [file]
|
||||||
|
|
||||||
|
def test_fetch_files_with_array_file_segment(self, llm_node):
|
||||||
|
files = [
|
||||||
|
File(
|
||||||
|
id="1",
|
||||||
|
tenant_id="test",
|
||||||
|
type=FileType.IMAGE,
|
||||||
|
filename="test1.jpg",
|
||||||
|
transfer_method=FileTransferMethod.LOCAL_FILE,
|
||||||
|
related_id="1",
|
||||||
|
),
|
||||||
|
File(
|
||||||
|
id="2",
|
||||||
|
tenant_id="test",
|
||||||
|
type=FileType.IMAGE,
|
||||||
|
filename="test2.jpg",
|
||||||
|
transfer_method=FileTransferMethod.LOCAL_FILE,
|
||||||
|
related_id="2",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], ArrayFileSegment(value=files))
|
||||||
|
|
||||||
|
result = llm_node._fetch_files(selector=["sys", "files"])
|
||||||
|
assert result == files
|
||||||
|
|
||||||
|
def test_fetch_files_with_none_segment(self, llm_node):
|
||||||
|
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], NoneSegment())
|
||||||
|
|
||||||
|
result = llm_node._fetch_files(selector=["sys", "files"])
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_fetch_files_with_array_any_segment(self, llm_node):
|
||||||
|
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], ArrayAnySegment(value=[]))
|
||||||
|
|
||||||
|
result = llm_node._fetch_files(selector=["sys", "files"])
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_fetch_files_with_non_existent_variable(self, llm_node):
|
||||||
|
result = llm_node._fetch_files(selector=["sys", "files"])
|
||||||
|
assert result == []
|
100
api/tests/unit_tests/oss/__mock/aliyun_oss.py
Normal file
100
api/tests/unit_tests/oss/__mock/aliyun_oss.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
import os
|
||||||
|
import posixpath
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from _pytest.monkeypatch import MonkeyPatch
|
||||||
|
from oss2 import Bucket
|
||||||
|
from oss2.models import GetObjectResult, PutObjectResult
|
||||||
|
|
||||||
|
from tests.unit_tests.oss.__mock.base import (
|
||||||
|
get_example_bucket,
|
||||||
|
get_example_data,
|
||||||
|
get_example_filename,
|
||||||
|
get_example_filepath,
|
||||||
|
get_example_folder,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MockResponse:
|
||||||
|
def __init__(self, status, headers, request_id):
|
||||||
|
self.status = status
|
||||||
|
self.headers = headers
|
||||||
|
self.request_id = request_id
|
||||||
|
|
||||||
|
|
||||||
|
class MockAliyunOssClass:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
auth,
|
||||||
|
endpoint,
|
||||||
|
bucket_name,
|
||||||
|
is_cname=False,
|
||||||
|
session=None,
|
||||||
|
connect_timeout=None,
|
||||||
|
app_name="",
|
||||||
|
enable_crc=True,
|
||||||
|
proxies=None,
|
||||||
|
region=None,
|
||||||
|
cloudbox_id=None,
|
||||||
|
is_path_style=False,
|
||||||
|
is_verify_object_strict=True,
|
||||||
|
):
|
||||||
|
self.bucket_name = get_example_bucket()
|
||||||
|
self.key = posixpath.join(get_example_folder(), get_example_filename())
|
||||||
|
self.content = get_example_data()
|
||||||
|
self.filepath = get_example_filepath()
|
||||||
|
self.resp = MockResponse(
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"etag": "ee8de918d05640145b18f70f4c3aa602",
|
||||||
|
"x-oss-version-id": "CAEQNhiBgMDJgZCA0BYiIDc4MGZjZGI2OTBjOTRmNTE5NmU5NmFhZjhjYmY0****",
|
||||||
|
},
|
||||||
|
"request_id",
|
||||||
|
)
|
||||||
|
|
||||||
|
def put_object(self, key, data, headers=None, progress_callback=None):
|
||||||
|
assert key == self.key
|
||||||
|
assert data == self.content
|
||||||
|
return PutObjectResult(self.resp)
|
||||||
|
|
||||||
|
def get_object(self, key, byte_range=None, headers=None, progress_callback=None, process=None, params=None):
|
||||||
|
assert key == self.key
|
||||||
|
|
||||||
|
get_object_output = MagicMock(GetObjectResult)
|
||||||
|
get_object_output.read.return_value = self.content
|
||||||
|
return get_object_output
|
||||||
|
|
||||||
|
def get_object_to_file(
|
||||||
|
self, key, filename, byte_range=None, headers=None, progress_callback=None, process=None, params=None
|
||||||
|
):
|
||||||
|
assert key == self.key
|
||||||
|
assert filename == self.filepath
|
||||||
|
|
||||||
|
def object_exists(self, key, headers=None):
|
||||||
|
assert key == self.key
|
||||||
|
return True
|
||||||
|
|
||||||
|
def delete_object(self, key, params=None, headers=None):
|
||||||
|
assert key == self.key
|
||||||
|
self.resp.headers["x-oss-delete-marker"] = True
|
||||||
|
return self.resp
|
||||||
|
|
||||||
|
|
||||||
|
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def setup_aliyun_oss_mock(monkeypatch: MonkeyPatch):
|
||||||
|
if MOCK:
|
||||||
|
monkeypatch.setattr(Bucket, "__init__", MockAliyunOssClass.__init__)
|
||||||
|
monkeypatch.setattr(Bucket, "put_object", MockAliyunOssClass.put_object)
|
||||||
|
monkeypatch.setattr(Bucket, "get_object", MockAliyunOssClass.get_object)
|
||||||
|
monkeypatch.setattr(Bucket, "get_object_to_file", MockAliyunOssClass.get_object_to_file)
|
||||||
|
monkeypatch.setattr(Bucket, "object_exists", MockAliyunOssClass.object_exists)
|
||||||
|
monkeypatch.setattr(Bucket, "delete_object", MockAliyunOssClass.delete_object)
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
if MOCK:
|
||||||
|
monkeypatch.undo()
|
|
@ -0,0 +1,22 @@
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from oss2 import Auth
|
||||||
|
|
||||||
|
from extensions.storage.aliyun_oss_storage import AliyunOssStorage
|
||||||
|
from tests.unit_tests.oss.__mock.aliyun_oss import setup_aliyun_oss_mock
|
||||||
|
from tests.unit_tests.oss.__mock.base import (
|
||||||
|
BaseStorageTest,
|
||||||
|
get_example_bucket,
|
||||||
|
get_example_folder,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestAliyunOss(BaseStorageTest):
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def setup_method(self, setup_aliyun_oss_mock):
|
||||||
|
"""Executed before each test method."""
|
||||||
|
with patch.object(Auth, "__init__", return_value=None):
|
||||||
|
self.storage = AliyunOssStorage()
|
||||||
|
self.storage.bucket_name = get_example_bucket()
|
||||||
|
self.storage.folder = get_example_folder()
|
|
@ -0,0 +1,41 @@
|
||||||
|
import pytest
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
|
from services.app_dsl_service import AppDslService
|
||||||
|
from services.app_dsl_service.exc import DSLVersionNotSupportedError
|
||||||
|
from services.app_dsl_service.service import _check_or_fix_dsl, current_dsl_version
|
||||||
|
|
||||||
|
|
||||||
|
class TestAppDSLService:
|
||||||
|
def test_check_or_fix_dsl_missing_version(self):
|
||||||
|
import_data = {}
|
||||||
|
result = _check_or_fix_dsl(import_data)
|
||||||
|
assert result["version"] == "0.1.0"
|
||||||
|
assert result["kind"] == "app"
|
||||||
|
|
||||||
|
def test_check_or_fix_dsl_missing_kind(self):
|
||||||
|
import_data = {"version": "0.1.0"}
|
||||||
|
result = _check_or_fix_dsl(import_data)
|
||||||
|
assert result["kind"] == "app"
|
||||||
|
|
||||||
|
def test_check_or_fix_dsl_older_version(self):
|
||||||
|
import_data = {"version": "0.0.9", "kind": "app"}
|
||||||
|
result = _check_or_fix_dsl(import_data)
|
||||||
|
assert result["version"] == "0.0.9"
|
||||||
|
|
||||||
|
def test_check_or_fix_dsl_current_version(self):
|
||||||
|
import_data = {"version": current_dsl_version, "kind": "app"}
|
||||||
|
result = _check_or_fix_dsl(import_data)
|
||||||
|
assert result["version"] == current_dsl_version
|
||||||
|
|
||||||
|
def test_check_or_fix_dsl_newer_version(self):
|
||||||
|
current_version = version.parse(current_dsl_version)
|
||||||
|
newer_version = f"{current_version.major}.{current_version.minor + 1}.0"
|
||||||
|
import_data = {"version": newer_version, "kind": "app"}
|
||||||
|
with pytest.raises(DSLVersionNotSupportedError):
|
||||||
|
_check_or_fix_dsl(import_data)
|
||||||
|
|
||||||
|
def test_check_or_fix_dsl_invalid_kind(self):
|
||||||
|
import_data = {"version": current_dsl_version, "kind": "invalid"}
|
||||||
|
result = _check_or_fix_dsl(import_data)
|
||||||
|
assert result["kind"] == "app"
|
|
@ -13,3 +13,4 @@ pytest api/tests/integration_tests/vdb/chroma \
|
||||||
api/tests/integration_tests/vdb/tcvectordb \
|
api/tests/integration_tests/vdb/tcvectordb \
|
||||||
api/tests/integration_tests/vdb/upstash \
|
api/tests/integration_tests/vdb/upstash \
|
||||||
api/tests/integration_tests/vdb/couchbase \
|
api/tests/integration_tests/vdb/couchbase \
|
||||||
|
api/tests/integration_tests/vdb/oceanbase \
|
||||||
|
|
|
@ -455,6 +455,20 @@ TIDB_VECTOR_USER=xxx.root
|
||||||
TIDB_VECTOR_PASSWORD=xxxxxx
|
TIDB_VECTOR_PASSWORD=xxxxxx
|
||||||
TIDB_VECTOR_DATABASE=dify
|
TIDB_VECTOR_DATABASE=dify
|
||||||
|
|
||||||
|
# Tidb on qdrant configuration, only available when VECTOR_STORE is `tidb_on_qdrant`
|
||||||
|
TIDB_ON_QDRANT_URL=http://127.0.0.1
|
||||||
|
TIDB_ON_QDRANT_API_KEY=dify
|
||||||
|
TIDB_ON_QDRANT_CLIENT_TIMEOUT=20
|
||||||
|
TIDB_ON_QDRANT_GRPC_ENABLED=false
|
||||||
|
TIDB_ON_QDRANT_GRPC_PORT=6334
|
||||||
|
TIDB_PUBLIC_KEY=dify
|
||||||
|
TIDB_PRIVATE_KEY=dify
|
||||||
|
TIDB_API_URL=http://127.0.0.1
|
||||||
|
TIDB_IAM_API_URL=http://127.0.0.1
|
||||||
|
TIDB_REGION=regions/aws-us-east-1
|
||||||
|
TIDB_PROJECT_ID=dify
|
||||||
|
TIDB_SPEND_LIMIT=100
|
||||||
|
|
||||||
# Chroma configuration, only available when VECTOR_STORE is `chroma`
|
# Chroma configuration, only available when VECTOR_STORE is `chroma`
|
||||||
CHROMA_HOST=127.0.0.1
|
CHROMA_HOST=127.0.0.1
|
||||||
CHROMA_PORT=8000
|
CHROMA_PORT=8000
|
||||||
|
@ -517,6 +531,14 @@ VIKINGDB_SCHEMA=http
|
||||||
VIKINGDB_CONNECTION_TIMEOUT=30
|
VIKINGDB_CONNECTION_TIMEOUT=30
|
||||||
VIKINGDB_SOCKET_TIMEOUT=30
|
VIKINGDB_SOCKET_TIMEOUT=30
|
||||||
|
|
||||||
|
# OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase`
|
||||||
|
OCEANBASE_VECTOR_HOST=oceanbase-vector
|
||||||
|
OCEANBASE_VECTOR_PORT=2881
|
||||||
|
OCEANBASE_VECTOR_USER=root@test
|
||||||
|
OCEANBASE_VECTOR_PASSWORD=
|
||||||
|
OCEANBASE_VECTOR_DATABASE=test
|
||||||
|
OCEANBASE_MEMORY_LIMIT=6G
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# Knowledge Configuration
|
# Knowledge Configuration
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
|
@ -536,6 +558,22 @@ ETL_TYPE=dify
|
||||||
# For example: http://unstructured:8000/general/v0/general
|
# For example: http://unstructured:8000/general/v0/general
|
||||||
UNSTRUCTURED_API_URL=
|
UNSTRUCTURED_API_URL=
|
||||||
|
|
||||||
|
# ------------------------------
|
||||||
|
# Model Configuration
|
||||||
|
# ------------------------------
|
||||||
|
|
||||||
|
# The maximum number of tokens allowed for prompt generation.
|
||||||
|
# This setting controls the upper limit of tokens that can be used by the LLM
|
||||||
|
# when generating a prompt in the prompt generation tool.
|
||||||
|
# Default: 512 tokens.
|
||||||
|
PROMPT_GENERATION_MAX_TOKENS=512
|
||||||
|
|
||||||
|
# The maximum number of tokens allowed for code generation.
|
||||||
|
# This setting controls the upper limit of tokens that can be used by the LLM
|
||||||
|
# when generating code in the code generation tool.
|
||||||
|
# Default: 1024 tokens.
|
||||||
|
CODE_GENERATION_MAX_TOKENS=1024
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# Multi-modal Configuration
|
# Multi-modal Configuration
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
|
@ -550,6 +588,12 @@ MULTIMODAL_SEND_IMAGE_FORMAT=base64
|
||||||
# Upload image file size limit, default 10M.
|
# Upload image file size limit, default 10M.
|
||||||
UPLOAD_IMAGE_FILE_SIZE_LIMIT=10
|
UPLOAD_IMAGE_FILE_SIZE_LIMIT=10
|
||||||
|
|
||||||
|
# Upload video file size limit, default 100M.
|
||||||
|
UPLOAD_VIDEO_FILE_SIZE_LIMIT=100
|
||||||
|
|
||||||
|
# Upload audio file size limit, default 50M.
|
||||||
|
UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# Sentry Configuration
|
# Sentry Configuration
|
||||||
# Used for application monitoring and error log tracking.
|
# Used for application monitoring and error log tracking.
|
||||||
|
|
|
@ -56,6 +56,7 @@ services:
|
||||||
SANDBOX_PORT: ${SANDBOX_PORT:-8194}
|
SANDBOX_PORT: ${SANDBOX_PORT:-8194}
|
||||||
volumes:
|
volumes:
|
||||||
- ./volumes/sandbox/dependencies:/dependencies
|
- ./volumes/sandbox/dependencies:/dependencies
|
||||||
|
- ./volumes/sandbox/conf:/conf
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ "CMD", "curl", "-f", "http://localhost:8194/health" ]
|
test: [ "CMD", "curl", "-f", "http://localhost:8194/health" ]
|
||||||
networks:
|
networks:
|
||||||
|
|
|
@ -140,6 +140,18 @@ x-shared-env: &shared-api-worker-env
|
||||||
TIDB_VECTOR_USER: ${TIDB_VECTOR_USER:-}
|
TIDB_VECTOR_USER: ${TIDB_VECTOR_USER:-}
|
||||||
TIDB_VECTOR_PASSWORD: ${TIDB_VECTOR_PASSWORD:-}
|
TIDB_VECTOR_PASSWORD: ${TIDB_VECTOR_PASSWORD:-}
|
||||||
TIDB_VECTOR_DATABASE: ${TIDB_VECTOR_DATABASE:-dify}
|
TIDB_VECTOR_DATABASE: ${TIDB_VECTOR_DATABASE:-dify}
|
||||||
|
TIDB_ON_QDRANT_URL: ${TIDB_ON_QDRANT_URL:-http://127.0.0.1}
|
||||||
|
TIDB_ON_QDRANT_API_KEY: ${TIDB_ON_QDRANT_API_KEY:-dify}
|
||||||
|
TIDB_ON_QDRANT_CLIENT_TIMEOUT: ${TIDB_ON_QDRANT_CLIENT_TIMEOUT:-20}
|
||||||
|
TIDB_ON_QDRANT_GRPC_ENABLED: ${TIDB_ON_QDRANT_GRPC_ENABLED:-false}
|
||||||
|
TIDB_ON_QDRANT_GRPC_PORT: ${TIDB_ON_QDRANT_GRPC_PORT:-6334}
|
||||||
|
TIDB_PUBLIC_KEY: ${TIDB_PUBLIC_KEY:-dify}
|
||||||
|
TIDB_PRIVATE_KEY: ${TIDB_PRIVATE_KEY:-dify}
|
||||||
|
TIDB_API_URL: ${TIDB_API_URL:-http://127.0.0.1}
|
||||||
|
TIDB_IAM_API_URL: ${TIDB_IAM_API_URL:-http://127.0.0.1}
|
||||||
|
TIDB_REGION: ${TIDB_REGION:-regions/aws-us-east-1}
|
||||||
|
TIDB_PROJECT_ID: ${TIDB_PROJECT_ID:-dify}
|
||||||
|
TIDB_SPEND_LIMIT: ${TIDB_SPEND_LIMIT:-100}
|
||||||
ORACLE_HOST: ${ORACLE_HOST:-oracle}
|
ORACLE_HOST: ${ORACLE_HOST:-oracle}
|
||||||
ORACLE_PORT: ${ORACLE_PORT:-1521}
|
ORACLE_PORT: ${ORACLE_PORT:-1521}
|
||||||
ORACLE_USER: ${ORACLE_USER:-dify}
|
ORACLE_USER: ${ORACLE_USER:-dify}
|
||||||
|
@ -195,8 +207,12 @@ x-shared-env: &shared-api-worker-env
|
||||||
UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
|
UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
|
||||||
ETL_TYPE: ${ETL_TYPE:-dify}
|
ETL_TYPE: ${ETL_TYPE:-dify}
|
||||||
UNSTRUCTURED_API_URL: ${UNSTRUCTURED_API_URL:-}
|
UNSTRUCTURED_API_URL: ${UNSTRUCTURED_API_URL:-}
|
||||||
|
PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
|
||||||
|
CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
|
||||||
MULTIMODAL_SEND_IMAGE_FORMAT: ${MULTIMODAL_SEND_IMAGE_FORMAT:-base64}
|
MULTIMODAL_SEND_IMAGE_FORMAT: ${MULTIMODAL_SEND_IMAGE_FORMAT:-base64}
|
||||||
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
|
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
|
||||||
|
UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}
|
||||||
|
UPLOAD_AUDIO_FILE_SIZE_LIMIT: ${UPLOAD_AUDIO_FILE_SIZE_LIMIT:-50}
|
||||||
SENTRY_DSN: ${API_SENTRY_DSN:-}
|
SENTRY_DSN: ${API_SENTRY_DSN:-}
|
||||||
SENTRY_TRACES_SAMPLE_RATE: ${API_SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
SENTRY_TRACES_SAMPLE_RATE: ${API_SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
||||||
SENTRY_PROFILES_SAMPLE_RATE: ${API_SENTRY_PROFILES_SAMPLE_RATE:-1.0}
|
SENTRY_PROFILES_SAMPLE_RATE: ${API_SENTRY_PROFILES_SAMPLE_RATE:-1.0}
|
||||||
|
@ -243,6 +259,12 @@ x-shared-env: &shared-api-worker-env
|
||||||
POSITION_PROVIDER_INCLUDES: ${POSITION_PROVIDER_INCLUDES:-}
|
POSITION_PROVIDER_INCLUDES: ${POSITION_PROVIDER_INCLUDES:-}
|
||||||
POSITION_PROVIDER_EXCLUDES: ${POSITION_PROVIDER_EXCLUDES:-}
|
POSITION_PROVIDER_EXCLUDES: ${POSITION_PROVIDER_EXCLUDES:-}
|
||||||
MAX_VARIABLE_SIZE: ${MAX_VARIABLE_SIZE:-204800}
|
MAX_VARIABLE_SIZE: ${MAX_VARIABLE_SIZE:-204800}
|
||||||
|
OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-http://oceanbase-vector}
|
||||||
|
OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881}
|
||||||
|
OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test}
|
||||||
|
OCEANBASE_VECTOR_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-""}
|
||||||
|
OCEANBASE_VECTOR_DATABASE: ${OCEANBASE_VECTOR_DATABASE:-test}
|
||||||
|
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
|
||||||
|
|
||||||
services:
|
services:
|
||||||
# API service
|
# API service
|
||||||
|
@ -570,6 +592,18 @@ services:
|
||||||
CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider}
|
CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider}
|
||||||
IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE}
|
IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE}
|
||||||
|
|
||||||
|
# OceanBase vector database
|
||||||
|
oceanbase-vector:
|
||||||
|
image: quay.io/oceanbase/oceanbase-ce:4.3.3.0-100000142024101215
|
||||||
|
profiles:
|
||||||
|
- oceanbase-vector
|
||||||
|
restart: always
|
||||||
|
volumes:
|
||||||
|
- ./volumes/oceanbase/data:/root/ob
|
||||||
|
- ./volumes/oceanbase/conf:/root/.obd/cluster
|
||||||
|
environment:
|
||||||
|
OB_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
|
||||||
|
|
||||||
# Oracle vector database
|
# Oracle vector database
|
||||||
oracle:
|
oracle:
|
||||||
image: container-registry.oracle.com/database/free:latest
|
image: container-registry.oracle.com/database/free:latest
|
||||||
|
|
14
docker/volumes/sandbox/conf/config.yaml
Normal file
14
docker/volumes/sandbox/conf/config.yaml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
app:
|
||||||
|
port: 8194
|
||||||
|
debug: True
|
||||||
|
key: dify-sandbox
|
||||||
|
max_workers: 4
|
||||||
|
max_requests: 50
|
||||||
|
worker_timeout: 5
|
||||||
|
python_path: /usr/local/bin/python3
|
||||||
|
enable_network: True # please make sure there is no network risk in your environment
|
||||||
|
allowed_syscalls: # please leave it empty if you have no idea how seccomp works
|
||||||
|
proxy:
|
||||||
|
socks5: ''
|
||||||
|
http: ''
|
||||||
|
https: ''
|
35
docker/volumes/sandbox/conf/config.yaml.example
Normal file
35
docker/volumes/sandbox/conf/config.yaml.example
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
app:
|
||||||
|
port: 8194
|
||||||
|
debug: True
|
||||||
|
key: dify-sandbox
|
||||||
|
max_workers: 4
|
||||||
|
max_requests: 50
|
||||||
|
worker_timeout: 5
|
||||||
|
python_path: /usr/local/bin/python3
|
||||||
|
python_lib_path:
|
||||||
|
- /usr/local/lib/python3.10
|
||||||
|
- /usr/lib/python3.10
|
||||||
|
- /usr/lib/python3
|
||||||
|
- /usr/lib/x86_64-linux-gnu
|
||||||
|
- /etc/ssl/certs/ca-certificates.crt
|
||||||
|
- /etc/nsswitch.conf
|
||||||
|
- /etc/hosts
|
||||||
|
- /etc/resolv.conf
|
||||||
|
- /run/systemd/resolve/stub-resolv.conf
|
||||||
|
- /run/resolvconf/resolv.conf
|
||||||
|
- /etc/localtime
|
||||||
|
- /usr/share/zoneinfo
|
||||||
|
- /etc/timezone
|
||||||
|
# add more paths if needed
|
||||||
|
python_pip_mirror_url: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
nodejs_path: /usr/local/bin/node
|
||||||
|
enable_network: True
|
||||||
|
allowed_syscalls:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
# add all the syscalls which you require
|
||||||
|
proxy:
|
||||||
|
socks5: ''
|
||||||
|
http: ''
|
||||||
|
https: ''
|
|
@ -9,8 +9,8 @@ const DatasetFooter = () => {
|
||||||
<footer className='px-12 py-6 grow-0 shrink-0'>
|
<footer className='px-12 py-6 grow-0 shrink-0'>
|
||||||
<h3 className='text-xl font-semibold leading-tight text-gradient'>{t('dataset.didYouKnow')}</h3>
|
<h3 className='text-xl font-semibold leading-tight text-gradient'>{t('dataset.didYouKnow')}</h3>
|
||||||
<p className='mt-1 text-sm font-normal leading-tight text-gray-700'>
|
<p className='mt-1 text-sm font-normal leading-tight text-gray-700'>
|
||||||
{t('dataset.intro1')}<a className='inline-flex items-center gap-1 link' target='_blank' rel='noopener noreferrer' href='/'>{t('dataset.intro2')}</a>{t('dataset.intro3')}<br />
|
{t('dataset.intro1')}<span className='inline-flex items-center gap-1 text-blue-600'>{t('dataset.intro2')}</span>{t('dataset.intro3')}<br />
|
||||||
{t('dataset.intro4')}<a className='inline-flex items-center gap-1 link' target='_blank' rel='noopener noreferrer' href='/'>{t('dataset.intro5')}</a>{t('dataset.intro6')}
|
{t('dataset.intro4')}<span className='inline-flex items-center gap-1 text-blue-600'>{t('dataset.intro5')}</span>{t('dataset.intro6')}
|
||||||
</p>
|
</p>
|
||||||
</footer>
|
</footer>
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
import type { FC } from 'react'
|
import { type FC, useEffect } from 'react'
|
||||||
import { useContext } from 'use-context-selector'
|
import { useContext } from 'use-context-selector'
|
||||||
import TemplateEn from './template/template.en.mdx'
|
import TemplateEn from './template/template.en.mdx'
|
||||||
import TemplateZh from './template/template.zh.mdx'
|
import TemplateZh from './template/template.zh.mdx'
|
||||||
|
@ -14,6 +14,13 @@ const Doc: FC<DocProps> = ({
|
||||||
apiBaseUrl,
|
apiBaseUrl,
|
||||||
}) => {
|
}) => {
|
||||||
const { locale } = useContext(I18n)
|
const { locale } = useContext(I18n)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const hash = location.hash
|
||||||
|
if (hash)
|
||||||
|
document.querySelector(hash)?.scrollIntoView()
|
||||||
|
}, [])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'>
|
<article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'>
|
||||||
{
|
{
|
||||||
|
|
|
@ -20,17 +20,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</CodeGroup>
|
</CodeGroup>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/document/create_by_text'
|
url='/datasets/{dataset_id}/document/create-by-text'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Create a document from text'
|
title='Create a Document from Text'
|
||||||
name='#create_by_text'
|
name='#create-by-text'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
This api is based on an existing Knowledge and creates a new document through text based on this Knowledge.
|
This API is based on an existing knowledge and creates a new document through text based on this knowledge.
|
||||||
|
|
||||||
### Params
|
### Params
|
||||||
<Properties>
|
<Properties>
|
||||||
|
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||||
Index mode
|
Index mode
|
||||||
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
|
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
|
||||||
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index
|
- <code>economy</code> Economy: Build using inverted index of keyword table index
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='process_rule' type='object' key='process_rule'>
|
<Property name='process_rule' type='object' key='process_rule'>
|
||||||
Processing rules
|
Processing rules
|
||||||
|
@ -62,7 +62,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
||||||
- <code>remove_urls_emails</code> Delete URL, email address
|
- <code>remove_urls_emails</code> Delete URL, email address
|
||||||
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
||||||
- <code>segmentation</code> (object) segmentation rules
|
- <code>segmentation</code> (object) Segmentation rules
|
||||||
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
||||||
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/document/create_by_text"
|
label="/datasets/{dataset_id}/document/create-by-text"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -123,17 +123,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/document/create_by_file'
|
url='/datasets/{dataset_id}/document/create-by-file'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Create documents from files'
|
title='Create a Document from a File'
|
||||||
name='#create_by_file'
|
name='#create-by-file'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
This api is based on an existing Knowledge and creates a new document through a file based on this Knowledge.
|
This API is based on an existing knowledge and creates a new document through a file based on this knowledge.
|
||||||
|
|
||||||
### Params
|
### Params
|
||||||
<Properties>
|
<Properties>
|
||||||
|
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='data' type='multipart/form-data json string' key='data'>
|
<Property name='data' type='multipart/form-data json string' key='data'>
|
||||||
- original_document_id Source document ID (optional)
|
- <code>original_document_id</code> Source document ID (optional)
|
||||||
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
|
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
|
||||||
- The source document cannot be an archived document
|
- The source document cannot be an archived document
|
||||||
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default
|
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default
|
||||||
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
|
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
|
||||||
|
|
||||||
- indexing_technique Index mode
|
- <code>indexing_technique</code> Index mode
|
||||||
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
|
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
|
||||||
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index
|
- <code>economy</code> Economy: Build using inverted index of keyword table index
|
||||||
|
|
||||||
- process_rule Processing rules
|
- <code>process_rule</code> Processing rules
|
||||||
- <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
|
- <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
|
||||||
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
|
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
|
||||||
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
|
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
|
||||||
|
@ -164,7 +164,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
||||||
- <code>remove_urls_emails</code> Delete URL, email address
|
- <code>remove_urls_emails</code> Delete URL, email address
|
||||||
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
||||||
- <code>segmentation</code> (object) segmentation rules
|
- <code>segmentation</code> (object) Segmentation rules
|
||||||
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
||||||
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/document/create_by_file"
|
label="/datasets/{dataset_id}/document/create-by-file"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
||||||
--form 'file=@"/path/to/file"'
|
--form 'file=@"/path/to/file"'
|
||||||
|
@ -221,12 +221,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets'
|
url='/datasets'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Create an empty Knowledge'
|
title='Create an Empty Knowledge Base'
|
||||||
name='#create_empty_dataset'
|
name='#create_empty_dataset'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -240,9 +240,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
Knowledge description (optional)
|
Knowledge description (optional)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||||
Index Technique (optional)
|
Index technique (optional)
|
||||||
- <code>high_quality</code> high_quality
|
- <code>high_quality</code> High quality
|
||||||
- <code>economy</code> economy
|
- <code>economy</code> Economy
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='permission' type='string' key='permission'>
|
<Property name='permission' type='string' key='permission'>
|
||||||
Permission
|
Permission
|
||||||
|
@ -252,21 +252,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='provider' type='string' key='provider'>
|
<Property name='provider' type='string' key='provider'>
|
||||||
Provider (optional, default: vendor)
|
Provider (optional, default: vendor)
|
||||||
- <code>vendor</code> vendor
|
- <code>vendor</code> Vendor
|
||||||
- <code>external</code> external knowledge
|
- <code>external</code> External knowledge
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
|
<Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
|
||||||
External Knowledge api id (optional)
|
External knowledge API ID (optional)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
||||||
External Knowledge id (optional)
|
External knowledge ID (optional)
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
<Col sticky>
|
<Col sticky>
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets"
|
label="/datasets"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
|
||||||
>
|
>
|
||||||
|
@ -306,12 +306,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets'
|
url='/datasets'
|
||||||
method='GET'
|
method='GET'
|
||||||
title='Knowledge list'
|
title='Get Knowledge Base List'
|
||||||
name='#dataset_list'
|
name='#dataset_list'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -327,9 +327,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
<Col sticky>
|
<Col sticky>
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets"
|
label="/datasets"
|
||||||
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
|
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
|
||||||
>
|
>
|
||||||
|
@ -369,12 +369,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}'
|
url='/datasets/{dataset_id}'
|
||||||
method='DELETE'
|
method='DELETE'
|
||||||
title='Delete knowledge'
|
title='Delete a Knowledge Base'
|
||||||
name='#delete_dataset'
|
name='#delete_dataset'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -406,17 +406,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
|
url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Update document via text'
|
title='Update a Document with Text'
|
||||||
name='#update_by_text'
|
name='#update-by-text'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
This api is based on an existing Knowledge and updates the document through text based on this Knowledge.
|
This API is based on an existing knowledge and updates the document through text based on this knowledge.
|
||||||
|
|
||||||
### Params
|
### Params
|
||||||
<Properties>
|
<Properties>
|
||||||
|
@ -446,7 +446,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
||||||
- <code>remove_urls_emails</code> Delete URL, email address
|
- <code>remove_urls_emails</code> Delete URL, email address
|
||||||
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
||||||
- <code>segmentation</code> (object) segmentation rules
|
- <code>segmentation</code> (object) Segmentation rules
|
||||||
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
||||||
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
|
label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -503,17 +503,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
|
url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Update a document from a file'
|
title='Update a Document with a File'
|
||||||
name='#update_by_file'
|
name='#update-by-file'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
This api is based on an existing Knowledge, and updates documents through files based on this Knowledge
|
This API is based on an existing knowledge, and updates documents through files based on this knowledge
|
||||||
|
|
||||||
### Params
|
### Params
|
||||||
<Properties>
|
<Properties>
|
||||||
|
@ -543,7 +543,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
|
||||||
- <code>remove_urls_emails</code> Delete URL, email address
|
- <code>remove_urls_emails</code> Delete URL, email address
|
||||||
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
|
||||||
- <code>segmentation</code> (object) segmentation rules
|
- <code>segmentation</code> (object) Segmentation rules
|
||||||
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
|
||||||
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
- <code>max_tokens</code> Maximum length (token) defaults to 1000
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
|
label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
||||||
--form 'file=@"/path/to/file"'
|
--form 'file=@"/path/to/file"'
|
||||||
|
@ -597,12 +597,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
|
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
|
||||||
method='GET'
|
method='GET'
|
||||||
title='Get document embedding status (progress)'
|
title='Get Document Embedding Status (Progress)'
|
||||||
name='#indexing_status'
|
name='#indexing_status'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -652,12 +652,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}'
|
||||||
method='DELETE'
|
method='DELETE'
|
||||||
title='Delete document'
|
title='Delete a Document'
|
||||||
name='#delete_document'
|
name='#delete_document'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -694,12 +694,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents'
|
url='/datasets/{dataset_id}/documents'
|
||||||
method='GET'
|
method='GET'
|
||||||
title='Knowledge document list'
|
title='Get the Document List of a Knowledge Base'
|
||||||
name='#dataset_document_list'
|
name='#dataset_document_list'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -714,13 +714,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Query
|
### Query
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='keyword' type='string' key='keyword'>
|
<Property name='keyword' type='string' key='keyword'>
|
||||||
Search keywords, currently only search document names(optional)
|
Search keywords, currently only search document names (optional)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='page' type='string' key='page'>
|
<Property name='page' type='string' key='page'>
|
||||||
Page number(optional)
|
Page number (optional)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='limit' type='string' key='limit'>
|
<Property name='limit' type='string' key='limit'>
|
||||||
Number of items returned, default 20, range 1-100(optional)
|
Number of items returned, default 20, range 1-100 (optional)
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -769,12 +769,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Add segment'
|
title='Add Chunks to a Document'
|
||||||
name='#create_new_segment'
|
name='#create_new_segment'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -792,9 +792,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='segments' type='object list' key='segments'>
|
<Property name='segments' type='object list' key='segments'>
|
||||||
- <code>content</code> (text) Text content/question content, required
|
- <code>content</code> (text) Text content / question content, required
|
||||||
- <code>answer</code> (text) Answer content, if the mode of the Knowledge is qa mode, pass the value(optional)
|
- <code>answer</code> (text) Answer content, if the mode of the knowledge is Q&A mode, pass the value (optional)
|
||||||
- <code>keywords</code> (list) Keywords(optional)
|
- <code>keywords</code> (list) Keywords (optional)
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -855,12 +855,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||||
method='GET'
|
method='GET'
|
||||||
title='get documents segments'
|
title='Get Chunks from a Document'
|
||||||
name='#get_segment'
|
name='#get_segment'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -878,10 +878,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Query
|
### Query
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='keyword' type='string' key='keyword'>
|
<Property name='keyword' type='string' key='keyword'>
|
||||||
keyword,choosable
|
Keyword (optional)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='status' type='string' key='status'>
|
<Property name='status' type='string' key='status'>
|
||||||
Search status,completed
|
Search status, completed
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -933,12 +933,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
method='DELETE'
|
method='DELETE'
|
||||||
title='delete document segment'
|
title='Delete a Chunk in a Document'
|
||||||
name='#delete_segment'
|
name='#delete_segment'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -979,12 +979,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='update document segment'
|
title='Update a Chunk in a Document '
|
||||||
name='#update_segment'
|
name='#update_segment'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
|
@ -1005,10 +1005,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='segment' type='object' key='segment'>
|
<Property name='segment' type='object' key='segment'>
|
||||||
- <code>content</code> (text) text content/question content,required
|
- <code>content</code> (text) Text content / question content, required
|
||||||
- <code>answer</code> (text) Answer content, not required, passed if the Knowledge is in qa mode
|
- <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
|
||||||
- <code>keywords</code> (list) keyword, not required
|
- <code>keywords</code> (list) Keyword (optional)
|
||||||
- <code>enabled</code> (bool) false/true, not required
|
- <code>enabled</code> (bool) False / true (optional)
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -1067,41 +1067,41 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/hit_testing'
|
url='/datasets/{dataset_id}/retrieve'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='Dataset hit testing'
|
title='Retrieve Chunks from a Knowledge Base'
|
||||||
name='#dataset_hit_testing'
|
name='#dataset_retrieval'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
### Path
|
### Path
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='dataset_id' type='string' key='dataset_id'>
|
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||||
Dataset ID
|
Knowledge ID
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
|
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='query' type='string' key='query'>
|
<Property name='query' type='string' key='query'>
|
||||||
retrieval keywordc
|
Query keyword
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
retrieval keyword(Optional, if not filled, it will be recalled according to the default method)
|
Retrieval model (optional, if not filled, it will be recalled according to the default method)
|
||||||
- <code>search_method</code> (text) Search method: One of the following four keywords is required
|
- <code>search_method</code> (text) Search method: One of the following four keywords is required
|
||||||
- <code>keyword_search</code> Keyword search
|
- <code>keyword_search</code> Keyword search
|
||||||
- <code>semantic_search</code> Semantic search
|
- <code>semantic_search</code> Semantic search
|
||||||
- <code>full_text_search</code> Full-text search
|
- <code>full_text_search</code> Full-text search
|
||||||
- <code>hybrid_search</code> Hybrid search
|
- <code>hybrid_search</code> Hybrid search
|
||||||
- <code>reranking_enable</code> (bool) Whether to enable reranking, optional, required if the search mode is semantic_search or hybrid_search
|
- <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
|
||||||
- <code>reranking_mode</code> (object) Rerank model configuration, optional, required if reranking is enabled
|
- <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
|
||||||
- <code>reranking_provider_name</code> (string) Rerank model provider
|
- <code>reranking_provider_name</code> (string) Rerank model provider
|
||||||
- <code>reranking_model_name</code> (string) Rerank model name
|
- <code>reranking_model_name</code> (string) Rerank model name
|
||||||
- <code>weights</code> (double) Semantic search weight setting in hybrid search mode
|
- <code>weights</code> (double) Semantic search weight setting in hybrid search mode
|
||||||
- <code>top_k</code> (integer) Number of results to return, optional
|
- <code>top_k</code> (integer) Number of results to return (optional)
|
||||||
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
|
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
|
||||||
- <code>score_threshold</code> (double) Score threshold
|
- <code>score_threshold</code> (double) Score threshold
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -1114,26 +1114,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/hit_testing"
|
label="/datasets/{dataset_id}/retrieve"
|
||||||
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
|
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
|
||||||
"query": "test",
|
"query": "test",
|
||||||
"retrieval_model": {
|
"retrieval_model": {
|
||||||
"search_method": "keyword_search",
|
"search_method": "keyword_search",
|
||||||
"reranking_enable": false,
|
"reranking_enable": false,
|
||||||
"reranking_mode": null,
|
"reranking_mode": null,
|
||||||
"reranking_model": {
|
"reranking_model": {
|
||||||
"reranking_provider_name": "",
|
"reranking_provider_name": "",
|
||||||
"reranking_model_name": ""
|
"reranking_model_name": ""
|
||||||
},
|
},
|
||||||
"weights": null,
|
"weights": null,
|
||||||
"top_k": 1,
|
"top_k": 1,
|
||||||
"score_threshold_enabled": false,
|
"score_threshold_enabled": false,
|
||||||
"score_threshold": null
|
"score_threshold": null
|
||||||
}
|
}
|
||||||
}'`}
|
}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -1212,7 +1212,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
|
|
@ -20,13 +20,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</CodeGroup>
|
</CodeGroup>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/document/create_by_text'
|
url='/datasets/{dataset_id}/document/create-by-text'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='通过文本创建文档'
|
title='通过文本创建文档'
|
||||||
name='#create_by_text'
|
name='#create-by-text'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||||
索引方式
|
索引方式
|
||||||
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
|
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
|
||||||
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
|
- <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='process_rule' type='object' key='process_rule'>
|
<Property name='process_rule' type='object' key='process_rule'>
|
||||||
处理规则
|
处理规则
|
||||||
|
@ -64,7 +64,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
||||||
- <code>segmentation</code> (object) 分段规则
|
- <code>segmentation</code> (object) 分段规则
|
||||||
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
||||||
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
|
- <code>max_tokens</code> 最大长度(token)默认为 1000
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/document/create_by_text"
|
label="/datasets/{dataset_id}/document/create-by-text"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
|
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -123,13 +123,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/document/create_by_file'
|
url='/datasets/{dataset_id}/document/create-by-file'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='通过文件创建文档 '
|
title='通过文件创建文档 '
|
||||||
name='#create_by_file'
|
name='#create-by-file'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='data' type='multipart/form-data json string' key='data'>
|
<Property name='data' type='multipart/form-data json string' key='data'>
|
||||||
- original_document_id 源文档 ID (选填)
|
- <code>original_document_id</code> 源文档 ID(选填)
|
||||||
- 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
|
- 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
|
||||||
- 源文档不可为归档的文档
|
- 源文档不可为归档的文档
|
||||||
- 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
|
- 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
|
||||||
- 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
|
- 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
|
||||||
|
|
||||||
- indexing_technique 索引方式
|
- <code>indexing_technique</code> 索引方式
|
||||||
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
|
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
|
||||||
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
|
- <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
|
||||||
|
|
||||||
- process_rule 处理规则
|
- <code>process_rule</code> 处理规则
|
||||||
- <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
|
- <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
|
||||||
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
|
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
|
||||||
- <code>pre_processing_rules</code> (array[object]) 预处理规则
|
- <code>pre_processing_rules</code> (array[object]) 预处理规则
|
||||||
|
@ -166,7 +166,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
||||||
- <code>segmentation</code> (object) 分段规则
|
- <code>segmentation</code> (object) 分段规则
|
||||||
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
||||||
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
|
- <code>max_tokens</code> 最大长度(token)默认为 1000
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='file' type='multipart/form-data' key='file'>
|
<Property name='file' type='multipart/form-data' key='file'>
|
||||||
需要上传的文件。
|
需要上传的文件。
|
||||||
|
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/document/create_by_file"
|
label="/datasets/{dataset_id}/document/create-by-file"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
||||||
--form 'file=@"/path/to/file"'
|
--form 'file=@"/path/to/file"'
|
||||||
|
@ -221,7 +221,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets'
|
url='/datasets'
|
||||||
|
@ -245,13 +245,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>economy</code> 经济
|
- <code>economy</code> 经济
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='permission' type='string' key='permission'>
|
<Property name='permission' type='string' key='permission'>
|
||||||
权限(选填,默认only_me)
|
权限(选填,默认 only_me)
|
||||||
- <code>only_me</code> 仅自己
|
- <code>only_me</code> 仅自己
|
||||||
- <code>all_team_members</code> 所有团队成员
|
- <code>all_team_members</code> 所有团队成员
|
||||||
- <code>partial_members</code> 部分团队成员
|
- <code>partial_members</code> 部分团队成员
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='provider' type='string' key='provider'>
|
<Property name='provider' type='string' key='provider'>
|
||||||
provider,(选填,默认 vendor)
|
Provider(选填,默认 vendor)
|
||||||
- <code>vendor</code> 上传文件
|
- <code>vendor</code> 上传文件
|
||||||
- <code>external</code> 外部知识库
|
- <code>external</code> 外部知识库
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -264,9 +264,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
<Col sticky>
|
<Col sticky>
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets"
|
label="/datasets"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
|
||||||
>
|
>
|
||||||
|
@ -306,7 +306,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets'
|
url='/datasets'
|
||||||
|
@ -369,7 +369,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}'
|
url='/datasets/{dataset_id}'
|
||||||
|
@ -406,13 +406,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
|
url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='通过文本更新文档 '
|
title='通过文本更新文档 '
|
||||||
name='#update_by_text'
|
name='#update-by-text'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
@ -431,7 +431,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='name' type='string' key='name'>
|
<Property name='name' type='string' key='name'>
|
||||||
文档名称 (选填)
|
文档名称(选填)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='text' type='string' key='text'>
|
<Property name='text' type='string' key='text'>
|
||||||
文档内容(选填)
|
文档内容(选填)
|
||||||
|
@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
||||||
- <code>segmentation</code> (object) 分段规则
|
- <code>segmentation</code> (object) 分段规则
|
||||||
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
||||||
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
|
- <code>max_tokens</code> 最大长度(token)默认为 1000
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
|
label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -503,13 +503,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
|
url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='通过文件更新文档 '
|
title='通过文件更新文档 '
|
||||||
name='#update_by_file'
|
name='#update-by-file'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
@ -528,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='name' type='string' key='name'>
|
<Property name='name' type='string' key='name'>
|
||||||
文档名称 (选填)
|
文档名称(选填)
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='file' type='multipart/form-data' key='file'>
|
<Property name='file' type='multipart/form-data' key='file'>
|
||||||
需要上传的文件
|
需要上传的文件
|
||||||
|
@ -545,7 +545,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
|
||||||
- <code>segmentation</code> (object) 分段规则
|
- <code>segmentation</code> (object) 分段规则
|
||||||
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
|
||||||
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
|
- <code>max_tokens</code> 最大长度(token)默认为 1000
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
|
label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
|
||||||
--form 'file=@"/path/to/file"'
|
--form 'file=@"/path/to/file"'
|
||||||
|
@ -597,7 +597,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
|
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
|
||||||
|
@ -652,7 +652,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}'
|
||||||
|
@ -694,7 +694,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents'
|
url='/datasets/{dataset_id}/documents'
|
||||||
|
@ -769,7 +769,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||||
|
@ -793,7 +793,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='segments' type='object list' key='segments'>
|
<Property name='segments' type='object list' key='segments'>
|
||||||
- <code>content</code> (text) 文本内容/问题内容,必填
|
- <code>content</code> (text) 文本内容/问题内容,必填
|
||||||
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值
|
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
|
||||||
- <code>keywords</code> (list) 关键字,非必填
|
- <code>keywords</code> (list) 关键字,非必填
|
||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
|
@ -855,7 +855,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments'
|
||||||
|
@ -933,7 +933,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
|
@ -979,7 +979,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
|
@ -1006,7 +1006,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='segment' type='object' key='segment'>
|
<Property name='segment' type='object' key='segment'>
|
||||||
- <code>content</code> (text) 文本内容/问题内容,必填
|
- <code>content</code> (text) 文本内容/问题内容,必填
|
||||||
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值
|
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
|
||||||
- <code>keywords</code> (list) 关键字,非必填
|
- <code>keywords</code> (list) 关键字,非必填
|
||||||
- <code>enabled</code> (bool) false/true,非必填
|
- <code>enabled</code> (bool) false/true,非必填
|
||||||
</Property>
|
</Property>
|
||||||
|
@ -1068,13 +1068,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/hit_testing'
|
url='/datasets/{dataset_id}/retrieve'
|
||||||
method='POST'
|
method='POST'
|
||||||
title='知识库召回测试'
|
title='检索知识库'
|
||||||
name='#dataset_hit_testing'
|
name='#dataset_retrieval'
|
||||||
/>
|
/>
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
@ -1088,23 +1088,23 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
### Request Body
|
### Request Body
|
||||||
<Properties>
|
<Properties>
|
||||||
<Property name='query' type='string' key='query'>
|
<Property name='query' type='string' key='query'>
|
||||||
召回关键词
|
检索关键词
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
召回参数(选填,如不填,按照默认方式召回)
|
检索参数(选填,如不填,按照默认方式召回)
|
||||||
- <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
|
- <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
|
||||||
- <code>keyword_search</code> 关键字检索
|
- <code>keyword_search</code> 关键字检索
|
||||||
- <code>semantic_search</code> 语义检索
|
- <code>semantic_search</code> 语义检索
|
||||||
- <code>full_text_search</code> 全文检索
|
- <code>full_text_search</code> 全文检索
|
||||||
- <code>hybrid_search</code> 混合检索
|
- <code>hybrid_search</code> 混合检索
|
||||||
- <code>reranking_enable</code> (bool) 是否启用 Reranking,非必填,如果检索模式为semantic_search模式或者hybrid_search则传值
|
- <code>reranking_enable</code> (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
|
||||||
- <code>reranking_mode</code> (object) Rerank模型配置,非必填,如果启用了 reranking 则传值
|
- <code>reranking_mode</code> (object) Rerank模型配置,非必填,如果启用了 reranking 则传值
|
||||||
- <code>reranking_provider_name</code> (string) Rerank 模型提供商
|
- <code>reranking_provider_name</code> (string) Rerank 模型提供商
|
||||||
- <code>reranking_model_name</code> (string) Rerank 模型名称
|
- <code>reranking_model_name</code> (string) Rerank 模型名称
|
||||||
- <code>weights</code> (double) 混合检索模式下语意检索的权重设置
|
- <code>weights</code> (double) 混合检索模式下语意检索的权重设置
|
||||||
- <code>top_k</code> (integer) 返回结果数量,非必填
|
- <code>top_k</code> (integer) 返回结果数量,非必填
|
||||||
- <code>score_threshold_enabled</code> (bool) 是否开启Score阈值
|
- <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
|
||||||
- <code>score_threshold</code> (double) Score阈值
|
- <code>score_threshold</code> (double) Score 阈值
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
||||||
未启用字段
|
未启用字段
|
||||||
|
@ -1115,26 +1115,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
<CodeGroup
|
<CodeGroup
|
||||||
title="Request"
|
title="Request"
|
||||||
tag="POST"
|
tag="POST"
|
||||||
label="/datasets/{dataset_id}/hit_testing"
|
label="/datasets/{dataset_id}/retrieve"
|
||||||
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
|
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
|
||||||
"query": "test",
|
"query": "test",
|
||||||
"retrieval_model": {
|
"retrieval_model": {
|
||||||
"search_method": "keyword_search",
|
"search_method": "keyword_search",
|
||||||
"reranking_enable": false,
|
"reranking_enable": false,
|
||||||
"reranking_mode": null,
|
"reranking_mode": null,
|
||||||
"reranking_model": {
|
"reranking_model": {
|
||||||
"reranking_provider_name": "",
|
"reranking_provider_name": "",
|
||||||
"reranking_model_name": ""
|
"reranking_model_name": ""
|
||||||
},
|
},
|
||||||
"weights": null,
|
"weights": null,
|
||||||
"top_k": 1,
|
"top_k": 1,
|
||||||
"score_threshold_enabled": false,
|
"score_threshold_enabled": false,
|
||||||
"score_threshold": null
|
"score_threshold": null
|
||||||
}
|
}
|
||||||
}'`}
|
}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }}
|
||||||
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \
|
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
|
||||||
--header 'Authorization: Bearer {api_key}' \
|
--header 'Authorization: Bearer {api_key}' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
|
@ -1214,7 +1214,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
|
|
||||||
---
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
|
|
|
@ -15,6 +15,7 @@ import { AppType } from '@/types/app'
|
||||||
import type { DataSet } from '@/models/datasets'
|
import type { DataSet } from '@/models/datasets'
|
||||||
import {
|
import {
|
||||||
getMultipleRetrievalConfig,
|
getMultipleRetrievalConfig,
|
||||||
|
getSelectedDatasetsMode,
|
||||||
} from '@/app/components/workflow/nodes/knowledge-retrieval/utils'
|
} from '@/app/components/workflow/nodes/knowledge-retrieval/utils'
|
||||||
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||||
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||||
|
@ -38,6 +39,7 @@ const DatasetConfig: FC = () => {
|
||||||
isAgent,
|
isAgent,
|
||||||
datasetConfigs,
|
datasetConfigs,
|
||||||
setDatasetConfigs,
|
setDatasetConfigs,
|
||||||
|
setRerankSettingModalOpen,
|
||||||
} = useContext(ConfigContext)
|
} = useContext(ConfigContext)
|
||||||
const formattingChangedDispatcher = useFormattingChangedDispatcher()
|
const formattingChangedDispatcher = useFormattingChangedDispatcher()
|
||||||
|
|
||||||
|
@ -55,6 +57,20 @@ const DatasetConfig: FC = () => {
|
||||||
...(datasetConfigs as any),
|
...(datasetConfigs as any),
|
||||||
...retrievalConfig,
|
...retrievalConfig,
|
||||||
})
|
})
|
||||||
|
const {
|
||||||
|
allExternal,
|
||||||
|
allInternal,
|
||||||
|
mixtureInternalAndExternal,
|
||||||
|
mixtureHighQualityAndEconomic,
|
||||||
|
inconsistentEmbeddingModel,
|
||||||
|
} = getSelectedDatasetsMode(filteredDataSets)
|
||||||
|
|
||||||
|
if (
|
||||||
|
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|
||||||
|
|| mixtureInternalAndExternal
|
||||||
|
|| allExternal
|
||||||
|
)
|
||||||
|
setRerankSettingModalOpen(true)
|
||||||
formattingChangedDispatcher()
|
formattingChangedDispatcher()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -266,7 +266,7 @@ const ConfigContent: FC<Props> = ({
|
||||||
<div className='mt-2'>
|
<div className='mt-2'>
|
||||||
<div className='flex items-center'>
|
<div className='flex items-center'>
|
||||||
{
|
{
|
||||||
selectedDatasetsMode.allEconomic && (
|
selectedDatasetsMode.allEconomic && !selectedDatasetsMode.mixtureInternalAndExternal && (
|
||||||
<div
|
<div
|
||||||
className='flex items-center'
|
className='flex items-center'
|
||||||
onClick={handleDisabledSwitchClick}
|
onClick={handleDisabledSwitchClick}
|
||||||
|
|
|
@ -12,6 +12,7 @@ import { RETRIEVE_TYPE } from '@/types/app'
|
||||||
import Toast from '@/app/components/base/toast'
|
import Toast from '@/app/components/base/toast'
|
||||||
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||||
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||||
|
import { RerankingModeEnum } from '@/models/datasets'
|
||||||
import type { DataSet } from '@/models/datasets'
|
import type { DataSet } from '@/models/datasets'
|
||||||
import type { DatasetConfigs } from '@/models/debug'
|
import type { DatasetConfigs } from '@/models/debug'
|
||||||
import {
|
import {
|
||||||
|
@ -47,7 +48,10 @@ const ParamsConfig = ({
|
||||||
const isValid = () => {
|
const isValid = () => {
|
||||||
let errMsg = ''
|
let errMsg = ''
|
||||||
if (tempDataSetConfigs.retrieval_model === RETRIEVE_TYPE.multiWay) {
|
if (tempDataSetConfigs.retrieval_model === RETRIEVE_TYPE.multiWay) {
|
||||||
if (!tempDataSetConfigs.reranking_model?.reranking_model_name && (rerankDefaultModel && !isRerankDefaultModelValid))
|
if (tempDataSetConfigs.reranking_enable
|
||||||
|
&& tempDataSetConfigs.reranking_mode === RerankingModeEnum.RerankingModel
|
||||||
|
&& !isRerankDefaultModelValid
|
||||||
|
)
|
||||||
errMsg = t('appDebug.datasetConfig.rerankModelRequired')
|
errMsg = t('appDebug.datasetConfig.rerankModelRequired')
|
||||||
}
|
}
|
||||||
if (errMsg) {
|
if (errMsg) {
|
||||||
|
@ -62,7 +66,9 @@ const ParamsConfig = ({
|
||||||
if (!isValid())
|
if (!isValid())
|
||||||
return
|
return
|
||||||
const config = { ...tempDataSetConfigs }
|
const config = { ...tempDataSetConfigs }
|
||||||
if (config.retrieval_model === RETRIEVE_TYPE.multiWay && !config.reranking_model) {
|
if (config.retrieval_model === RETRIEVE_TYPE.multiWay
|
||||||
|
&& config.reranking_mode === RerankingModeEnum.RerankingModel
|
||||||
|
&& !config.reranking_model) {
|
||||||
config.reranking_model = {
|
config.reranking_model = {
|
||||||
reranking_provider_name: rerankDefaultModel?.provider?.provider,
|
reranking_provider_name: rerankDefaultModel?.provider?.provider,
|
||||||
reranking_model_name: rerankDefaultModel?.model,
|
reranking_model_name: rerankDefaultModel?.model,
|
||||||
|
|
|
@ -253,12 +253,18 @@ const Configuration: FC = () => {
|
||||||
}
|
}
|
||||||
hideSelectDataSet()
|
hideSelectDataSet()
|
||||||
const {
|
const {
|
||||||
allEconomic,
|
allExternal,
|
||||||
|
allInternal,
|
||||||
|
mixtureInternalAndExternal,
|
||||||
mixtureHighQualityAndEconomic,
|
mixtureHighQualityAndEconomic,
|
||||||
inconsistentEmbeddingModel,
|
inconsistentEmbeddingModel,
|
||||||
} = getSelectedDatasetsMode(newDatasets)
|
} = getSelectedDatasetsMode(newDatasets)
|
||||||
|
|
||||||
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel)
|
if (
|
||||||
|
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|
||||||
|
|| mixtureInternalAndExternal
|
||||||
|
|| allExternal
|
||||||
|
)
|
||||||
setRerankSettingModalOpen(true)
|
setRerankSettingModalOpen(true)
|
||||||
|
|
||||||
const { datasets, retrieval_model, score_threshold_enabled, ...restConfigs } = datasetConfigs
|
const { datasets, retrieval_model, score_threshold_enabled, ...restConfigs } = datasetConfigs
|
||||||
|
|
|
@ -36,6 +36,7 @@ import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
|
||||||
import TextGeneration from '@/app/components/app/text-generate/item'
|
import TextGeneration from '@/app/components/app/text-generate/item'
|
||||||
import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils'
|
import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils'
|
||||||
import MessageLogModal from '@/app/components/base/message-log-modal'
|
import MessageLogModal from '@/app/components/base/message-log-modal'
|
||||||
|
import PromptLogModal from '@/app/components/base/prompt-log-modal'
|
||||||
import { useStore as useAppStore } from '@/app/components/app/store'
|
import { useStore as useAppStore } from '@/app/components/app/store'
|
||||||
import { useAppContext } from '@/context/app-context'
|
import { useAppContext } from '@/context/app-context'
|
||||||
import useTimestamp from '@/hooks/use-timestamp'
|
import useTimestamp from '@/hooks/use-timestamp'
|
||||||
|
@ -168,11 +169,13 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
|
||||||
const { userProfile: { timezone } } = useAppContext()
|
const { userProfile: { timezone } } = useAppContext()
|
||||||
const { formatTime } = useTimestamp()
|
const { formatTime } = useTimestamp()
|
||||||
const { onClose, appDetail } = useContext(DrawerContext)
|
const { onClose, appDetail } = useContext(DrawerContext)
|
||||||
const { currentLogItem, setCurrentLogItem, showMessageLogModal, setShowMessageLogModal, currentLogModalActiveTab } = useAppStore(useShallow(state => ({
|
const { currentLogItem, setCurrentLogItem, showMessageLogModal, setShowMessageLogModal, showPromptLogModal, setShowPromptLogModal, currentLogModalActiveTab } = useAppStore(useShallow(state => ({
|
||||||
currentLogItem: state.currentLogItem,
|
currentLogItem: state.currentLogItem,
|
||||||
setCurrentLogItem: state.setCurrentLogItem,
|
setCurrentLogItem: state.setCurrentLogItem,
|
||||||
showMessageLogModal: state.showMessageLogModal,
|
showMessageLogModal: state.showMessageLogModal,
|
||||||
setShowMessageLogModal: state.setShowMessageLogModal,
|
setShowMessageLogModal: state.setShowMessageLogModal,
|
||||||
|
showPromptLogModal: state.showPromptLogModal,
|
||||||
|
setShowPromptLogModal: state.setShowPromptLogModal,
|
||||||
currentLogModalActiveTab: state.currentLogModalActiveTab,
|
currentLogModalActiveTab: state.currentLogModalActiveTab,
|
||||||
})))
|
})))
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
@ -192,8 +195,8 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
|
||||||
conversation_id: detail.id,
|
conversation_id: detail.id,
|
||||||
limit: 10,
|
limit: 10,
|
||||||
}
|
}
|
||||||
if (allChatItems.at(-1)?.id)
|
if (allChatItems[0]?.id)
|
||||||
params.first_id = allChatItems.at(-1)?.id.replace('question-', '')
|
params.first_id = allChatItems[0]?.id.replace('question-', '')
|
||||||
const messageRes = await fetchChatMessages({
|
const messageRes = await fetchChatMessages({
|
||||||
url: `/apps/${appDetail?.id}/chat-messages`,
|
url: `/apps/${appDetail?.id}/chat-messages`,
|
||||||
params,
|
params,
|
||||||
|
@ -557,6 +560,16 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
|
||||||
defaultTab={currentLogModalActiveTab}
|
defaultTab={currentLogModalActiveTab}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
{showPromptLogModal && (
|
||||||
|
<PromptLogModal
|
||||||
|
width={width}
|
||||||
|
currentLogItem={currentLogItem}
|
||||||
|
onCancel={() => {
|
||||||
|
setCurrentLogItem()
|
||||||
|
setShowPromptLogModal(false)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1804,6 +1804,280 @@ exports[`build chat item tree and get thread messages should get thread messages
|
||||||
]
|
]
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
exports[`build chat item tree and get thread messages should work with partial messages 1`] = `
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"agent_thoughts": [
|
||||||
|
{
|
||||||
|
"chain_id": null,
|
||||||
|
"created_at": 1726105809,
|
||||||
|
"files": [],
|
||||||
|
"id": "1019cd79-d141-4f9f-880a-fc1441cfd802",
|
||||||
|
"message_id": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
|
||||||
|
"observation": "",
|
||||||
|
"position": 1,
|
||||||
|
"thought": "Sure! My number is 54. Your turn!",
|
||||||
|
"tool": "",
|
||||||
|
"tool_input": "",
|
||||||
|
"tool_labels": {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"agent_thoughts": [
|
||||||
|
{
|
||||||
|
"chain_id": null,
|
||||||
|
"created_at": 1726105822,
|
||||||
|
"files": [],
|
||||||
|
"id": "0773bec7-b992-4a53-92b2-20ebaeae8798",
|
||||||
|
"message_id": "324bce32-c98c-435d-a66b-bac974ebb5ed",
|
||||||
|
"observation": "",
|
||||||
|
"position": 1,
|
||||||
|
"thought": "My number is 4729. Your turn!",
|
||||||
|
"tool": "",
|
||||||
|
"tool_input": "",
|
||||||
|
"tool_labels": {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"children": [],
|
||||||
|
"content": "My number is 4729. Your turn!",
|
||||||
|
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
|
||||||
|
"feedbackDisabled": false,
|
||||||
|
"id": "324bce32-c98c-435d-a66b-bac974ebb5ed",
|
||||||
|
"input": {
|
||||||
|
"inputs": {},
|
||||||
|
"query": "3306",
|
||||||
|
},
|
||||||
|
"isAnswer": true,
|
||||||
|
"log": [
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "Sure! My number is 54. Your turn!",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "3306",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "My number is 4729. Your turn!",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"message_files": [],
|
||||||
|
"more": {
|
||||||
|
"latency": "1.30",
|
||||||
|
"time": "09/11/2024 09:50 PM",
|
||||||
|
"tokens": 66,
|
||||||
|
},
|
||||||
|
"parentMessageId": "question-324bce32-c98c-435d-a66b-bac974ebb5ed",
|
||||||
|
"siblingIndex": 0,
|
||||||
|
"workflow_run_id": null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"content": "3306",
|
||||||
|
"id": "question-324bce32-c98c-435d-a66b-bac974ebb5ed",
|
||||||
|
"isAnswer": false,
|
||||||
|
"message_files": [],
|
||||||
|
"parentMessageId": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"agent_thoughts": [
|
||||||
|
{
|
||||||
|
"chain_id": null,
|
||||||
|
"created_at": 1726107812,
|
||||||
|
"files": [],
|
||||||
|
"id": "5ca650f3-982c-4399-8b95-9ea241c76707",
|
||||||
|
"message_id": "684b5396-4e91-4043-88e9-aabe48b21acc",
|
||||||
|
"observation": "",
|
||||||
|
"position": 1,
|
||||||
|
"thought": "My number is 4821. Your turn!",
|
||||||
|
"tool": "",
|
||||||
|
"tool_input": "",
|
||||||
|
"tool_labels": {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"agent_thoughts": [
|
||||||
|
{
|
||||||
|
"chain_id": null,
|
||||||
|
"created_at": 1726111024,
|
||||||
|
"files": [],
|
||||||
|
"id": "095cacab-afad-4387-a41d-1662578b8b13",
|
||||||
|
"message_id": "19904a7b-7494-4ed8-b72c-1d18668cea8c",
|
||||||
|
"observation": "",
|
||||||
|
"position": 1,
|
||||||
|
"thought": "My number is 1456. Your turn!",
|
||||||
|
"tool": "",
|
||||||
|
"tool_input": "",
|
||||||
|
"tool_labels": {},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"children": [],
|
||||||
|
"content": "My number is 1456. Your turn!",
|
||||||
|
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
|
||||||
|
"feedbackDisabled": false,
|
||||||
|
"id": "19904a7b-7494-4ed8-b72c-1d18668cea8c",
|
||||||
|
"input": {
|
||||||
|
"inputs": {},
|
||||||
|
"query": "1003",
|
||||||
|
},
|
||||||
|
"isAnswer": true,
|
||||||
|
"log": [
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "Sure! My number is 54. Your turn!",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "3306",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "My number is 4821. Your turn!",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "1003",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "My number is 1456. Your turn!",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"message_files": [],
|
||||||
|
"more": {
|
||||||
|
"latency": "1.38",
|
||||||
|
"time": "09/11/2024 11:17 PM",
|
||||||
|
"tokens": 86,
|
||||||
|
},
|
||||||
|
"parentMessageId": "question-19904a7b-7494-4ed8-b72c-1d18668cea8c",
|
||||||
|
"siblingIndex": 0,
|
||||||
|
"workflow_run_id": null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"content": "1003",
|
||||||
|
"id": "question-19904a7b-7494-4ed8-b72c-1d18668cea8c",
|
||||||
|
"isAnswer": false,
|
||||||
|
"message_files": [],
|
||||||
|
"parentMessageId": "684b5396-4e91-4043-88e9-aabe48b21acc",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"content": "My number is 4821. Your turn!",
|
||||||
|
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
|
||||||
|
"feedbackDisabled": false,
|
||||||
|
"id": "684b5396-4e91-4043-88e9-aabe48b21acc",
|
||||||
|
"input": {
|
||||||
|
"inputs": {},
|
||||||
|
"query": "3306",
|
||||||
|
},
|
||||||
|
"isAnswer": true,
|
||||||
|
"log": [
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "Sure! My number is 54. Your turn!",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "3306",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "My number is 4821. Your turn!",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"message_files": [],
|
||||||
|
"more": {
|
||||||
|
"latency": "1.48",
|
||||||
|
"time": "09/11/2024 10:23 PM",
|
||||||
|
"tokens": 66,
|
||||||
|
},
|
||||||
|
"parentMessageId": "question-684b5396-4e91-4043-88e9-aabe48b21acc",
|
||||||
|
"siblingIndex": 1,
|
||||||
|
"workflow_run_id": null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"content": "3306",
|
||||||
|
"id": "question-684b5396-4e91-4043-88e9-aabe48b21acc",
|
||||||
|
"isAnswer": false,
|
||||||
|
"message_files": [],
|
||||||
|
"parentMessageId": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"content": "Sure! My number is 54. Your turn!",
|
||||||
|
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
|
||||||
|
"feedbackDisabled": false,
|
||||||
|
"id": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
|
||||||
|
"input": {
|
||||||
|
"inputs": {},
|
||||||
|
"query": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
|
||||||
|
},
|
||||||
|
"isAnswer": true,
|
||||||
|
"log": [
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "user",
|
||||||
|
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"files": [],
|
||||||
|
"role": "assistant",
|
||||||
|
"text": "Sure! My number is 54. Your turn!",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"message_files": [],
|
||||||
|
"more": {
|
||||||
|
"latency": "1.52",
|
||||||
|
"time": "09/11/2024 09:50 PM",
|
||||||
|
"tokens": 46,
|
||||||
|
},
|
||||||
|
"parentMessageId": "question-cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
|
||||||
|
"siblingIndex": 0,
|
||||||
|
"workflow_run_id": null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"content": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
|
||||||
|
"id": "question-cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
|
||||||
|
"isAnswer": false,
|
||||||
|
"message_files": [],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
exports[`build chat item tree and get thread messages should work with real world messages 1`] = `
|
exports[`build chat item tree and get thread messages should work with real world messages 1`] = `
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
|
|
@ -255,4 +255,10 @@ describe('build chat item tree and get thread messages', () => {
|
||||||
const threadMessages6_2 = getThreadMessages(tree6, 'ff4c2b43-48a5-47ad-9dc5-08b34ddba61b')
|
const threadMessages6_2 = getThreadMessages(tree6, 'ff4c2b43-48a5-47ad-9dc5-08b34ddba61b')
|
||||||
expect(threadMessages6_2).toMatchSnapshot()
|
expect(threadMessages6_2).toMatchSnapshot()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
const partialMessages = (realWorldMessages as ChatItemInTree[]).slice(-10)
|
||||||
|
const tree7 = buildChatItemTree(partialMessages)
|
||||||
|
it('should work with partial messages', () => {
|
||||||
|
expect(tree7).toMatchSnapshot()
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
|
@ -134,6 +134,12 @@ function buildChatItemTree(allMessages: IChatItem[]): ChatItemInTree[] {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If no messages have parentMessageId=null (indicating a root node),
|
||||||
|
// then we likely have a partial chat history. In this case,
|
||||||
|
// use the first available message as the root node.
|
||||||
|
if (rootNodes.length === 0 && allMessages.length > 0)
|
||||||
|
rootNodes.push(map[allMessages[0]!.id]!)
|
||||||
|
|
||||||
return rootNodes
|
return rootNodes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import type { FC } from 'react'
|
import type { FC } from 'react'
|
||||||
import { useState } from 'react'
|
import { useRef, useState } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import { RiSearchLine } from '@remixicon/react'
|
import { RiSearchLine } from '@remixicon/react'
|
||||||
import cn from '@/utils/classnames'
|
import cn from '@/utils/classnames'
|
||||||
|
@ -12,6 +12,7 @@ type SearchInputProps = {
|
||||||
onChange: (v: string) => void
|
onChange: (v: string) => void
|
||||||
white?: boolean
|
white?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const SearchInput: FC<SearchInputProps> = ({
|
const SearchInput: FC<SearchInputProps> = ({
|
||||||
placeholder,
|
placeholder,
|
||||||
className,
|
className,
|
||||||
|
@ -21,6 +22,7 @@ const SearchInput: FC<SearchInputProps> = ({
|
||||||
}) => {
|
}) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const [focus, setFocus] = useState<boolean>(false)
|
const [focus, setFocus] = useState<boolean>(false)
|
||||||
|
const isComposing = useRef<boolean>(false)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className={cn(
|
<div className={cn(
|
||||||
|
@ -45,7 +47,14 @@ const SearchInput: FC<SearchInputProps> = ({
|
||||||
placeholder={placeholder || t('common.operation.search')!}
|
placeholder={placeholder || t('common.operation.search')!}
|
||||||
value={value}
|
value={value}
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
onChange(e.target.value)
|
if (!isComposing.current)
|
||||||
|
onChange(e.target.value)
|
||||||
|
}}
|
||||||
|
onCompositionStart={() => {
|
||||||
|
isComposing.current = true
|
||||||
|
}}
|
||||||
|
onCompositionEnd={() => {
|
||||||
|
isComposing.current = false
|
||||||
}}
|
}}
|
||||||
onFocus={() => setFocus(true)}
|
onFocus={() => setFocus(true)}
|
||||||
onBlur={() => setFocus(false)}
|
onBlur={() => setFocus(false)}
|
||||||
|
|
|
@ -39,6 +39,7 @@ export const Heading = function H2({
|
||||||
}
|
}
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
<span id={name?.replace(/^#/, '')} className='relative -top-28' />
|
||||||
<div className="flex items-center gap-x-3" >
|
<div className="flex items-center gap-x-3" >
|
||||||
<span className={`font-mono text-[0.625rem] font-semibold leading-6 rounded-lg px-1.5 ring-1 ring-inset ${style}`}>{method}</span>
|
<span className={`font-mono text-[0.625rem] font-semibold leading-6 rounded-lg px-1.5 ring-1 ring-inset ${style}`}>{method}</span>
|
||||||
{/* <span className="h-0.5 w-0.5 rounded-full bg-zinc-300 dark:bg-zinc-600"></span> */}
|
{/* <span className="h-0.5 w-0.5 rounded-full bg-zinc-300 dark:bg-zinc-600"></span> */}
|
||||||
|
|
|
@ -656,6 +656,11 @@ Chat applications support session persistence, allowing previous chat history to
|
||||||
<Property name='pinned' type='bool' key='pinned'>
|
<Property name='pinned' type='bool' key='pinned'>
|
||||||
Return only pinned conversations as `true`, only non-pinned as `false`
|
Return only pinned conversations as `true`, only non-pinned as `false`
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='sort_by' type='string' key='sort_by'>
|
||||||
|
Sorting Field (Optional), Default: -updated_at (sorted in descending order by update time)
|
||||||
|
- Available Values: created_at, -created_at, updated_at, -updated_at
|
||||||
|
- The symbol before the field represents the order or reverse, "-" represents reverse order.
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
|
|
||||||
### Response
|
### Response
|
||||||
|
|
|
@ -691,6 +691,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
||||||
<Property name='pinned' type='bool' key='pinned'>
|
<Property name='pinned' type='bool' key='pinned'>
|
||||||
只返回置顶 true,只返回非置顶 false
|
只返回置顶 true,只返回非置顶 false
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='sort_by' type='string' key='sort_by'>
|
||||||
|
排序字段(选题),默认 -updated_at(按更新时间倒序排列)
|
||||||
|
- 可选值:created_at, -created_at, updated_at, -updated_at
|
||||||
|
- 字段前面的符号代表顺序或倒序,-代表倒序
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
|
|
||||||
### Response
|
### Response
|
||||||
|
|
|
@ -690,6 +690,11 @@ Chat applications support session persistence, allowing previous chat history to
|
||||||
<Property name='pinned' type='bool' key='pinned'>
|
<Property name='pinned' type='bool' key='pinned'>
|
||||||
Return only pinned conversations as `true`, only non-pinned as `false`
|
Return only pinned conversations as `true`, only non-pinned as `false`
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='sort_by' type='string' key='sort_by'>
|
||||||
|
Sorting Field (Optional), Default: -updated_at (sorted in descending order by update time)
|
||||||
|
- Available Values: created_at, -created_at, updated_at, -updated_at
|
||||||
|
- The symbol before the field represents the order or reverse, "-" represents reverse order.
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
|
|
||||||
### Response
|
### Response
|
||||||
|
|
|
@ -705,6 +705,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
||||||
<Property name='pinned' type='bool' key='pinned'>
|
<Property name='pinned' type='bool' key='pinned'>
|
||||||
只返回置顶 true,只返回非置顶 false
|
只返回置顶 true,只返回非置顶 false
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='sort_by' type='string' key='sort_by'>
|
||||||
|
排序字段(选题),默认 -updated_at(按更新时间倒序排列)
|
||||||
|
- 可选值:created_at, -created_at, updated_at, -updated_at
|
||||||
|
- 字段前面的符号代表顺序或倒序,-代表倒序
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
|
|
||||||
### Response
|
### Response
|
||||||
|
|
|
@ -26,7 +26,7 @@ type Props = {
|
||||||
isFocus: boolean
|
isFocus: boolean
|
||||||
isInNode?: boolean
|
isInNode?: boolean
|
||||||
onGenerated?: (prompt: string) => void
|
onGenerated?: (prompt: string) => void
|
||||||
codeLanguages: CodeLanguage
|
codeLanguages?: CodeLanguage
|
||||||
fileList?: FileEntity[]
|
fileList?: FileEntity[]
|
||||||
showFileList?: boolean
|
showFileList?: boolean
|
||||||
showCodeGenerator?: boolean
|
showCodeGenerator?: boolean
|
||||||
|
@ -78,7 +78,7 @@ const Base: FC<Props> = ({
|
||||||
e.stopPropagation()
|
e.stopPropagation()
|
||||||
}}>
|
}}>
|
||||||
{headerRight}
|
{headerRight}
|
||||||
{showCodeGenerator && (
|
{showCodeGenerator && codeLanguages && (
|
||||||
<div className='ml-1'>
|
<div className='ml-1'>
|
||||||
<CodeGeneratorButton onGenerated={onGenerated} codeLanguages={codeLanguages}/>
|
<CodeGeneratorButton onGenerated={onGenerated} codeLanguages={codeLanguages}/>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -31,6 +31,7 @@ export type Props = {
|
||||||
noWrapper?: boolean
|
noWrapper?: boolean
|
||||||
isExpand?: boolean
|
isExpand?: boolean
|
||||||
showFileList?: boolean
|
showFileList?: boolean
|
||||||
|
onGenerated?: (value: string) => void
|
||||||
showCodeGenerator?: boolean
|
showCodeGenerator?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,6 +65,7 @@ const CodeEditor: FC<Props> = ({
|
||||||
noWrapper,
|
noWrapper,
|
||||||
isExpand,
|
isExpand,
|
||||||
showFileList,
|
showFileList,
|
||||||
|
onGenerated,
|
||||||
showCodeGenerator = false,
|
showCodeGenerator = false,
|
||||||
}) => {
|
}) => {
|
||||||
const [isFocus, setIsFocus] = React.useState(false)
|
const [isFocus, setIsFocus] = React.useState(false)
|
||||||
|
@ -151,9 +153,6 @@ const CodeEditor: FC<Props> = ({
|
||||||
|
|
||||||
return isFocus ? 'focus-theme' : 'blur-theme'
|
return isFocus ? 'focus-theme' : 'blur-theme'
|
||||||
})()
|
})()
|
||||||
const handleGenerated = (code: string) => {
|
|
||||||
handleEditorChange(code)
|
|
||||||
}
|
|
||||||
|
|
||||||
const main = (
|
const main = (
|
||||||
<>
|
<>
|
||||||
|
@ -205,7 +204,7 @@ const CodeEditor: FC<Props> = ({
|
||||||
isFocus={isFocus && !readOnly}
|
isFocus={isFocus && !readOnly}
|
||||||
minHeight={minHeight}
|
minHeight={minHeight}
|
||||||
isInNode={isInNode}
|
isInNode={isInNode}
|
||||||
onGenerated={handleGenerated}
|
onGenerated={onGenerated}
|
||||||
codeLanguages={language}
|
codeLanguages={language}
|
||||||
fileList={fileList}
|
fileList={fileList}
|
||||||
showFileList={showFileList}
|
showFileList={showFileList}
|
||||||
|
|
326
web/app/components/workflow/nodes/code/code-parser.spec.ts
Normal file
326
web/app/components/workflow/nodes/code/code-parser.spec.ts
Normal file
|
@ -0,0 +1,326 @@
|
||||||
|
import { VarType } from '../../types'
|
||||||
|
import { extractFunctionParams, extractReturnType } from './code-parser'
|
||||||
|
import { CodeLanguage } from './types'
|
||||||
|
|
||||||
|
const SAMPLE_CODES = {
|
||||||
|
python3: {
|
||||||
|
noParams: 'def main():',
|
||||||
|
singleParam: 'def main(param1):',
|
||||||
|
multipleParams: `def main(param1, param2, param3):
|
||||||
|
return {"result": param1}`,
|
||||||
|
withTypes: `def main(param1: str, param2: int, param3: List[str]):
|
||||||
|
result = process_data(param1, param2)
|
||||||
|
return {"output": result}`,
|
||||||
|
withDefaults: `def main(param1: str = "default", param2: int = 0):
|
||||||
|
return {"data": param1}`,
|
||||||
|
},
|
||||||
|
javascript: {
|
||||||
|
noParams: 'function main() {',
|
||||||
|
singleParam: 'function main(param1) {',
|
||||||
|
multipleParams: `function main(param1, param2, param3) {
|
||||||
|
return { result: param1 }
|
||||||
|
}`,
|
||||||
|
withComments: `// Main function
|
||||||
|
function main(param1, param2) {
|
||||||
|
// Process data
|
||||||
|
return { output: process(param1, param2) }
|
||||||
|
}`,
|
||||||
|
withSpaces: 'function main( param1 , param2 ) {',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('extractFunctionParams', () => {
|
||||||
|
describe('Python3', () => {
|
||||||
|
test('handles no parameters', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.python3.noParams, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual([])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extracts single parameter', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.python3.singleParam, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual(['param1'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extracts multiple parameters', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.python3.multipleParams, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual(['param1', 'param2', 'param3'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles type hints', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.python3.withTypes, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual(['param1', 'param2', 'param3'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles default values', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.python3.withDefaults, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual(['param1', 'param2'])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// JavaScriptのテストケース
|
||||||
|
describe('JavaScript', () => {
|
||||||
|
test('handles no parameters', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.javascript.noParams, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual([])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extracts single parameter', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.javascript.singleParam, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual(['param1'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extracts multiple parameters', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.javascript.multipleParams, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual(['param1', 'param2', 'param3'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles comments in code', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.javascript.withComments, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual(['param1', 'param2'])
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles whitespace', () => {
|
||||||
|
const result = extractFunctionParams(SAMPLE_CODES.javascript.withSpaces, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual(['param1', 'param2'])
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
const RETURN_TYPE_SAMPLES = {
|
||||||
|
python3: {
|
||||||
|
singleReturn: `
|
||||||
|
def main(param1):
|
||||||
|
return {"result": "value"}`,
|
||||||
|
|
||||||
|
multipleReturns: `
|
||||||
|
def main(param1, param2):
|
||||||
|
return {"result": "value", "status": "success"}`,
|
||||||
|
|
||||||
|
noReturn: `
|
||||||
|
def main():
|
||||||
|
print("Hello")`,
|
||||||
|
|
||||||
|
complexReturn: `
|
||||||
|
def main():
|
||||||
|
data = process()
|
||||||
|
return {"result": data, "count": 42, "messages": ["hello"]}`,
|
||||||
|
nestedObject: `
|
||||||
|
def main(name, age, city):
|
||||||
|
return {
|
||||||
|
'personal_info': {
|
||||||
|
'name': name,
|
||||||
|
'age': age,
|
||||||
|
'city': city
|
||||||
|
},
|
||||||
|
'timestamp': int(time.time()),
|
||||||
|
'status': 'active'
|
||||||
|
}`,
|
||||||
|
},
|
||||||
|
|
||||||
|
javascript: {
|
||||||
|
singleReturn: `
|
||||||
|
function main(param1) {
|
||||||
|
return { result: "value" }
|
||||||
|
}`,
|
||||||
|
|
||||||
|
multipleReturns: `
|
||||||
|
function main(param1) {
|
||||||
|
return { result: "value", status: "success" }
|
||||||
|
}`,
|
||||||
|
|
||||||
|
withParentheses: `
|
||||||
|
function main() {
|
||||||
|
return ({ result: "value", status: "success" })
|
||||||
|
}`,
|
||||||
|
|
||||||
|
noReturn: `
|
||||||
|
function main() {
|
||||||
|
console.log("Hello")
|
||||||
|
}`,
|
||||||
|
|
||||||
|
withQuotes: `
|
||||||
|
function main() {
|
||||||
|
return { "result": 'value', 'status': "success" }
|
||||||
|
}`,
|
||||||
|
nestedObject: `
|
||||||
|
function main(name, age, city) {
|
||||||
|
return {
|
||||||
|
personal_info: {
|
||||||
|
name: name,
|
||||||
|
age: age,
|
||||||
|
city: city
|
||||||
|
},
|
||||||
|
timestamp: Date.now(),
|
||||||
|
status: 'active'
|
||||||
|
}
|
||||||
|
}`,
|
||||||
|
withJSDoc: `
|
||||||
|
/**
|
||||||
|
* Creates a user profile with personal information and metadata
|
||||||
|
* @param {string} name - The user's name
|
||||||
|
* @param {number} age - The user's age
|
||||||
|
* @param {string} city - The user's city of residence
|
||||||
|
* @returns {Object} An object containing the user profile
|
||||||
|
*/
|
||||||
|
function main(name, age, city) {
|
||||||
|
return {
|
||||||
|
result: {
|
||||||
|
personal_info: {
|
||||||
|
name: name,
|
||||||
|
age: age,
|
||||||
|
city: city
|
||||||
|
},
|
||||||
|
timestamp: Date.now(),
|
||||||
|
status: 'active'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}`,
|
||||||
|
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('extractReturnType', () => {
|
||||||
|
// Python3のテスト
|
||||||
|
describe('Python3', () => {
|
||||||
|
test('extracts single return value', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.singleReturn, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extracts multiple return values', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.multipleReturns, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
status: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns empty object when no return statement', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.noReturn, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual({})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles complex return statement', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.complexReturn, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
count: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
messages: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
test('handles nested object structure', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.nestedObject, CodeLanguage.python3)
|
||||||
|
expect(result).toEqual({
|
||||||
|
personal_info: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
timestamp: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
status: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// JavaScriptのテスト
|
||||||
|
describe('JavaScript', () => {
|
||||||
|
test('extracts single return value', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.singleReturn, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('extracts multiple return values', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.multipleReturns, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
status: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles return with parentheses', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.withParentheses, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
status: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('returns empty object when no return statement', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.noReturn, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual({})
|
||||||
|
})
|
||||||
|
|
||||||
|
test('handles quoted keys', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.withQuotes, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual({
|
||||||
|
result: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
status: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
test('handles nested object structure', () => {
|
||||||
|
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.nestedObject, CodeLanguage.javascript)
|
||||||
|
expect(result).toEqual({
|
||||||
|
personal_info: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
timestamp: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
status: {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
86
web/app/components/workflow/nodes/code/code-parser.ts
Normal file
86
web/app/components/workflow/nodes/code/code-parser.ts
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
import { VarType } from '../../types'
|
||||||
|
import type { OutputVar } from './types'
|
||||||
|
import { CodeLanguage } from './types'
|
||||||
|
|
||||||
|
export const extractFunctionParams = (code: string, language: CodeLanguage) => {
|
||||||
|
if (language === CodeLanguage.json)
|
||||||
|
return []
|
||||||
|
|
||||||
|
const patterns: Record<Exclude<CodeLanguage, CodeLanguage.json>, RegExp> = {
|
||||||
|
[CodeLanguage.python3]: /def\s+main\s*\((.*?)\)/,
|
||||||
|
[CodeLanguage.javascript]: /function\s+main\s*\((.*?)\)/,
|
||||||
|
}
|
||||||
|
const match = code.match(patterns[language])
|
||||||
|
const params: string[] = []
|
||||||
|
|
||||||
|
if (match?.[1]) {
|
||||||
|
params.push(...match[1].split(',')
|
||||||
|
.map(p => p.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.map(p => p.split(':')[0].trim()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return params
|
||||||
|
}
|
||||||
|
export const extractReturnType = (code: string, language: CodeLanguage): OutputVar => {
|
||||||
|
const codeWithoutComments = code.replace(/\/\*\*[\s\S]*?\*\//, '')
|
||||||
|
console.log(codeWithoutComments)
|
||||||
|
|
||||||
|
const returnIndex = codeWithoutComments.indexOf('return')
|
||||||
|
if (returnIndex === -1)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
// returnから始まる部分文字列を取得
|
||||||
|
const codeAfterReturn = codeWithoutComments.slice(returnIndex)
|
||||||
|
|
||||||
|
let bracketCount = 0
|
||||||
|
let startIndex = codeAfterReturn.indexOf('{')
|
||||||
|
|
||||||
|
if (language === CodeLanguage.javascript && startIndex === -1) {
|
||||||
|
const parenStart = codeAfterReturn.indexOf('(')
|
||||||
|
if (parenStart !== -1)
|
||||||
|
startIndex = codeAfterReturn.indexOf('{', parenStart)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (startIndex === -1)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
let endIndex = -1
|
||||||
|
|
||||||
|
for (let i = startIndex; i < codeAfterReturn.length; i++) {
|
||||||
|
if (codeAfterReturn[i] === '{')
|
||||||
|
bracketCount++
|
||||||
|
if (codeAfterReturn[i] === '}') {
|
||||||
|
bracketCount--
|
||||||
|
if (bracketCount === 0) {
|
||||||
|
endIndex = i + 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (endIndex === -1)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
const returnContent = codeAfterReturn.slice(startIndex + 1, endIndex - 1)
|
||||||
|
console.log(returnContent)
|
||||||
|
|
||||||
|
const result: OutputVar = {}
|
||||||
|
|
||||||
|
const keyRegex = /['"]?(\w+)['"]?\s*:(?![^{]*})/g
|
||||||
|
const matches = returnContent.matchAll(keyRegex)
|
||||||
|
|
||||||
|
for (const match of matches) {
|
||||||
|
console.log(`Found key: "${match[1]}" from match: "${match[0]}"`)
|
||||||
|
const key = match[1]
|
||||||
|
result[key] = {
|
||||||
|
type: VarType.string,
|
||||||
|
children: null,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
|
@ -5,6 +5,7 @@ import RemoveEffectVarConfirm from '../_base/components/remove-effect-var-confir
|
||||||
import useConfig from './use-config'
|
import useConfig from './use-config'
|
||||||
import type { CodeNodeType } from './types'
|
import type { CodeNodeType } from './types'
|
||||||
import { CodeLanguage } from './types'
|
import { CodeLanguage } from './types'
|
||||||
|
import { extractFunctionParams, extractReturnType } from './code-parser'
|
||||||
import VarList from '@/app/components/workflow/nodes/_base/components/variable/var-list'
|
import VarList from '@/app/components/workflow/nodes/_base/components/variable/var-list'
|
||||||
import OutputVarList from '@/app/components/workflow/nodes/_base/components/variable/output-var-list'
|
import OutputVarList from '@/app/components/workflow/nodes/_base/components/variable/output-var-list'
|
||||||
import AddButton from '@/app/components/base/button/add-button'
|
import AddButton from '@/app/components/base/button/add-button'
|
||||||
|
@ -12,10 +13,9 @@ import Field from '@/app/components/workflow/nodes/_base/components/field'
|
||||||
import Split from '@/app/components/workflow/nodes/_base/components/split'
|
import Split from '@/app/components/workflow/nodes/_base/components/split'
|
||||||
import CodeEditor from '@/app/components/workflow/nodes/_base/components/editor/code-editor'
|
import CodeEditor from '@/app/components/workflow/nodes/_base/components/editor/code-editor'
|
||||||
import TypeSelector from '@/app/components/workflow/nodes/_base/components/selector'
|
import TypeSelector from '@/app/components/workflow/nodes/_base/components/selector'
|
||||||
import type { NodePanelProps } from '@/app/components/workflow/types'
|
import { type NodePanelProps } from '@/app/components/workflow/types'
|
||||||
import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form'
|
import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form'
|
||||||
import ResultPanel from '@/app/components/workflow/run/result-panel'
|
import ResultPanel from '@/app/components/workflow/run/result-panel'
|
||||||
|
|
||||||
const i18nPrefix = 'workflow.nodes.code'
|
const i18nPrefix = 'workflow.nodes.code'
|
||||||
|
|
||||||
const codeLanguages = [
|
const codeLanguages = [
|
||||||
|
@ -38,6 +38,7 @@ const Panel: FC<NodePanelProps<CodeNodeType>> = ({
|
||||||
readOnly,
|
readOnly,
|
||||||
inputs,
|
inputs,
|
||||||
outputKeyOrders,
|
outputKeyOrders,
|
||||||
|
handleCodeAndVarsChange,
|
||||||
handleVarListChange,
|
handleVarListChange,
|
||||||
handleAddVariable,
|
handleAddVariable,
|
||||||
handleRemoveVariable,
|
handleRemoveVariable,
|
||||||
|
@ -61,6 +62,18 @@ const Panel: FC<NodePanelProps<CodeNodeType>> = ({
|
||||||
setInputVarValues,
|
setInputVarValues,
|
||||||
} = useConfig(id, data)
|
} = useConfig(id, data)
|
||||||
|
|
||||||
|
const handleGeneratedCode = (value: string) => {
|
||||||
|
const params = extractFunctionParams(value, inputs.code_language)
|
||||||
|
const codeNewInput = params.map((p) => {
|
||||||
|
return {
|
||||||
|
variable: p,
|
||||||
|
value_selector: [],
|
||||||
|
}
|
||||||
|
})
|
||||||
|
const returnTypes = extractReturnType(value, inputs.code_language)
|
||||||
|
handleCodeAndVarsChange(value, codeNewInput, returnTypes)
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className='mt-2'>
|
<div className='mt-2'>
|
||||||
<div className='px-4 pb-4 space-y-4'>
|
<div className='px-4 pb-4 space-y-4'>
|
||||||
|
@ -92,6 +105,7 @@ const Panel: FC<NodePanelProps<CodeNodeType>> = ({
|
||||||
language={inputs.code_language}
|
language={inputs.code_language}
|
||||||
value={inputs.code}
|
value={inputs.code}
|
||||||
onChange={handleCodeChange}
|
onChange={handleCodeChange}
|
||||||
|
onGenerated={handleGeneratedCode}
|
||||||
showCodeGenerator={true}
|
showCodeGenerator={true}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -3,7 +3,7 @@ import produce from 'immer'
|
||||||
import useVarList from '../_base/hooks/use-var-list'
|
import useVarList from '../_base/hooks/use-var-list'
|
||||||
import useOutputVarList from '../_base/hooks/use-output-var-list'
|
import useOutputVarList from '../_base/hooks/use-output-var-list'
|
||||||
import { BlockEnum, VarType } from '../../types'
|
import { BlockEnum, VarType } from '../../types'
|
||||||
import type { Var } from '../../types'
|
import type { Var, Variable } from '../../types'
|
||||||
import { useStore } from '../../store'
|
import { useStore } from '../../store'
|
||||||
import type { CodeNodeType, OutputVar } from './types'
|
import type { CodeNodeType, OutputVar } from './types'
|
||||||
import { CodeLanguage } from './types'
|
import { CodeLanguage } from './types'
|
||||||
|
@ -136,7 +136,15 @@ const useConfig = (id: string, payload: CodeNodeType) => {
|
||||||
const setInputVarValues = useCallback((newPayload: Record<string, any>) => {
|
const setInputVarValues = useCallback((newPayload: Record<string, any>) => {
|
||||||
setRunInputData(newPayload)
|
setRunInputData(newPayload)
|
||||||
}, [setRunInputData])
|
}, [setRunInputData])
|
||||||
|
const handleCodeAndVarsChange = useCallback((code: string, inputVariables: Variable[], outputVariables: OutputVar) => {
|
||||||
|
const newInputs = produce(inputs, (draft) => {
|
||||||
|
draft.code = code
|
||||||
|
draft.variables = inputVariables
|
||||||
|
draft.outputs = outputVariables
|
||||||
|
})
|
||||||
|
setInputs(newInputs)
|
||||||
|
syncOutputKeyOrders(outputVariables)
|
||||||
|
}, [inputs, setInputs, syncOutputKeyOrders])
|
||||||
return {
|
return {
|
||||||
readOnly,
|
readOnly,
|
||||||
inputs,
|
inputs,
|
||||||
|
@ -163,6 +171,7 @@ const useConfig = (id: string, payload: CodeNodeType) => {
|
||||||
inputVarValues,
|
inputVarValues,
|
||||||
setInputVarValues,
|
setInputVarValues,
|
||||||
runResult,
|
runResult,
|
||||||
|
handleCodeAndVarsChange,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -240,7 +240,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
|
||||||
if (
|
if (
|
||||||
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|
||||||
|| mixtureInternalAndExternal
|
|| mixtureInternalAndExternal
|
||||||
|| (allExternal && newDatasets.length > 1)
|
|| allExternal
|
||||||
)
|
)
|
||||||
setRerankModelOpen(true)
|
setRerankModelOpen(true)
|
||||||
}, [inputs, setInputs, payload.retrieval_mode, selectedDatasets, currentRerankModel])
|
}, [inputs, setInputs, payload.retrieval_mode, selectedDatasets, currentRerankModel])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user