Merge branch 'main' into feat/support-extractor-tools

This commit is contained in:
jyong 2024-11-04 14:24:16 +08:00
commit 67b1190535
99 changed files with 2850 additions and 517 deletions

View File

@ -1,3 +1,3 @@
#!/bin/bash
poetry install -C api
cd api && poetry install

View File

@ -49,7 +49,7 @@ jobs:
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
- name: Login to Docker Hub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ env.DOCKERHUB_USER }}
password: ${{ env.DOCKERHUB_TOKEN }}
@ -114,7 +114,7 @@ jobs:
merge-multiple: true
- name: Login to Docker Hub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ env.DOCKERHUB_USER }}
password: ${{ env.DOCKERHUB_TOKEN }}

1
.gitignore vendored
View File

@ -174,6 +174,7 @@ docker/volumes/unstructured/*
docker/volumes/pgvector/data/*
docker/volumes/pgvecto_rs/data/*
docker/volumes/couchbase/*
docker/volumes/oceanbase/*
docker/nginx/conf.d/default.conf
docker/nginx/ssl/*

153
README.md
View File

@ -46,6 +46,56 @@
</p>
## Table of Content
0. [Quick-Start🚀](https://github.com/langgenius/dify?tab=readme-ov-file#quick-start)
1. [Intro📖](https://github.com/langgenius/dify?tab=readme-ov-file#intro)
2. [How to use🔧](https://github.com/langgenius/dify?tab=readme-ov-file#using-dify)
3. [Stay Ahead🏃](https://github.com/langgenius/dify?tab=readme-ov-file#staying-ahead)
4. [Next Steps🏹](https://github.com/langgenius/dify?tab=readme-ov-file#next-steps)
5. [Contributing💪](https://github.com/langgenius/dify?tab=readme-ov-file#contributing)
6. [Community and Contact🏠](https://github.com/langgenius/dify?tab=readme-ov-file#community--contact)
7. [Star-History📈](https://github.com/langgenius/dify?tab=readme-ov-file#star-history)
8. [Security🔒](https://github.com/langgenius/dify?tab=readme-ov-file#security-disclosure)
9. [License🤝](https://github.com/langgenius/dify?tab=readme-ov-file#license)
> Make sure you read through this README before you start utilizing Dify😊
## Quick start
The quickest way to deploy Dify locally is to run our [docker-compose.yml](https://github.com/langgenius/dify/blob/main/docker/docker-compose.yaml). Follow the instructions to start in 5 minutes.
> Before installing Dify, make sure your machine meets the following minimum system requirements:
>
>- CPU >= 2 Core
>- RAM >= 4 GiB
>- Docker and Docker Compose Installed
</br>
Run the following command in your terminal to clone the whole repo.
```bash
git clone https://github.com/langgenius/dify.git
```
After cloning,run the following command one by one.
```bash
cd dify
cd docker
cp .env.example .env
docker compose up -d
```
After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization process. You will be asked to setup an admin account.
For more info of quick setup, check [here](https://docs.dify.ai/getting-started/install-self-hosted/docker-compose)
## Intro
Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production. Here's a list of the core features:
</br> </br>
@ -79,73 +129,6 @@ Dify is an open-source LLM app development platform. Its intuitive interface com
All of Dify's offerings come with corresponding APIs, so you could effortlessly integrate Dify into your own business logic.
## Feature comparison
<table style="width: 100%;">
<tr>
<th align="center">Feature</th>
<th align="center">Dify.AI</th>
<th align="center">LangChain</th>
<th align="center">Flowise</th>
<th align="center">OpenAI Assistants API</th>
</tr>
<tr>
<td align="center">Programming Approach</td>
<td align="center">API + App-oriented</td>
<td align="center">Python Code</td>
<td align="center">App-oriented</td>
<td align="center">API-oriented</td>
</tr>
<tr>
<td align="center">Supported LLMs</td>
<td align="center">Rich Variety</td>
<td align="center">Rich Variety</td>
<td align="center">Rich Variety</td>
<td align="center">OpenAI-only</td>
</tr>
<tr>
<td align="center">RAG Engine</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Agent</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Workflow</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Observability</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Enterprise Features (SSO/Access control)</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Local Deployment</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
</table>
## Using Dify
- **Cloud </br>**
@ -166,30 +149,21 @@ Star Dify on GitHub and be instantly notified of new releases.
![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4)
## Quick start
> Before installing Dify, make sure your machine meets the following minimum system requirements:
>
>- CPU >= 2 Core
>- RAM >= 4 GiB
</br>
The easiest way to start the Dify server is to run our [docker-compose.yml](docker/docker-compose.yaml) file. Before running the installation command, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:
```bash
cd docker
cp .env.example .env
docker compose up -d
```
After running, you can access the Dify dashboard in your browser at [http://localhost/install](http://localhost/install) and start the initialization process.
> If you'd like to contribute to Dify or do additional development, refer to our [guide to deploying from source code](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code)
## Next steps
Go to [quick-start](https://github.com/langgenius/dify?tab=readme-ov-file#quick-start) to setup your Dify or setup by source code.
#### If you......
If you forget your admin account, you can refer to this [guide](https://docs.dify.ai/getting-started/install-self-hosted/faqs#id-4.-how-to-reset-the-password-of-the-admin-account) to reset the password.
> Use docker compose up without "-d" to enable logs printing out in your terminal. This might be useful if you have encountered unknow problems when using Dify.
If you encountered system error and would like to acquire help in Github issues, make sure you always paste logs of the error in the request to accerate the conversation. Go to [Community & contact](https://github.com/langgenius/dify?tab=readme-ov-file#community--contact) for more information.
> Please read the [Dify Documentation](https://docs.dify.ai/) for detailed how-to-use guidance. Most of the potential problems are explained in the doc.
> If you'd like to contribute to Dify or make additional development, refer to our [guide to deploying from source code](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code)
If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker-compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments).
If you'd like to configure a highly-available setup, there are community-contributed [Helm Charts](https://helm.sh/) and YAML files which allow Dify to be deployed on Kubernetes.
@ -228,6 +202,7 @@ At the same time, please consider supporting Dify by sharing it on social media
* [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
* [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community.
* [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community.
* Make sure a log, if possible, is attached to an error reported to maximize solution efficiency.
## Star history

241
README_PT.md Normal file
View File

@ -0,0 +1,241 @@
![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab)
<p align="center">
📌 <a href="https://dify.ai/blog/introducing-dify-workflow-file-upload-a-demo-on-ai-podcast">Introduzindo o Dify Workflow com Upload de Arquivo: Recrie o Podcast Google NotebookLM</a>
</p>
<p align="center">
<a href="https://cloud.dify.ai">Dify Cloud</a> ·
<a href="https://docs.dify.ai/getting-started/install-self-hosted">Auto-hospedagem</a> ·
<a href="https://docs.dify.ai">Documentação</a> ·
<a href="https://udify.app/chat/22L1zSxg6yW1cWQg">Consultas empresariais</a>
</p>
<p align="center">
<a href="https://dify.ai" target="_blank">
<img alt="Static Badge" src="https://img.shields.io/badge/Product-F04438"></a>
<a href="https://dify.ai/pricing" target="_blank">
<img alt="Static Badge" src="https://img.shields.io/badge/free-pricing?logo=free&color=%20%23155EEF&label=pricing&labelColor=%20%23528bff"></a>
<a href="https://discord.gg/FngNHpbcY7" target="_blank">
<img src="https://img.shields.io/discord/1082486657678311454?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb"
alt="chat on Discord"></a>
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
<img alt="Commits last month" src="https://img.shields.io/github/commit-activity/m/langgenius/dify?labelColor=%20%2332b583&color=%20%2312b76a"></a>
<a href="https://github.com/langgenius/dify/" target="_blank">
<img alt="Issues closed" src="https://img.shields.io/github/issues-search?query=repo%3Alanggenius%2Fdify%20is%3Aclosed&label=issues%20closed&labelColor=%20%237d89b0&color=%20%235d6b98"></a>
<a href="https://github.com/langgenius/dify/discussions/" target="_blank">
<img alt="Discussion posts" src="https://img.shields.io/github/discussions/langgenius/dify?labelColor=%20%239b8afb&color=%20%237a5af8"></a>
</p>
<p align="center">
<a href="./README.md"><img alt="README em Inglês" src="https://img.shields.io/badge/English-d9d9d9"></a>
<a href="./README_CN.md"><img alt="简体中文版自述文件" src="https://img.shields.io/badge/简体中文-d9d9d9"></a>
<a href="./README_JA.md"><img alt="日本語のREADME" src="https://img.shields.io/badge/日本語-d9d9d9"></a>
<a href="./README_ES.md"><img alt="README em Espanhol" src="https://img.shields.io/badge/Español-d9d9d9"></a>
<a href="./README_FR.md"><img alt="README em Francês" src="https://img.shields.io/badge/Français-d9d9d9"></a>
<a href="./README_KL.md"><img alt="README tlhIngan Hol" src="https://img.shields.io/badge/Klingon-d9d9d9"></a>
<a href="./README_KR.md"><img alt="README em Coreano" src="https://img.shields.io/badge/한국어-d9d9d9"></a>
<a href="./README_AR.md"><img alt="README em Árabe" src="https://img.shields.io/badge/العربية-d9d9d9"></a>
<a href="./README_TR.md"><img alt="README em Turco" src="https://img.shields.io/badge/Türkçe-d9d9d9"></a>
<a href="./README_VI.md"><img alt="README em Vietnamita" src="https://img.shields.io/badge/Ti%E1%BA%BFng%20Vi%E1%BB%87t-d9d9d9"></a>
<a href="./README_PT.md"><img alt="README em Português - BR" src="https://img.shields.io/badge/Portugu%C3%AAs-BR?style=flat&label=BR&color=d9d9d9"></a>
</p>
Dify é uma plataforma de desenvolvimento de aplicativos LLM de código aberto. Sua interface intuitiva combina workflow de IA, pipeline RAG, capacidades de agente, gerenciamento de modelos, recursos de observabilidade e muito mais, permitindo que você vá rapidamente do protótipo à produção. Aqui está uma lista das principais funcionalidades:
</br> </br>
**1. Workflow**:
Construa e teste workflows poderosos de IA em uma interface visual, aproveitando todos os recursos a seguir e muito mais.
https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa
**2. Suporte abrangente a modelos**:
Integração perfeita com centenas de LLMs proprietários e de código aberto de diversas provedoras e soluções auto-hospedadas, abrangendo GPT, Mistral, Llama3 e qualquer modelo compatível com a API da OpenAI. A lista completa de provedores suportados pode ser encontrada [aqui](https://docs.dify.ai/getting-started/readme/model-providers).
![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3)
**3. IDE de Prompt**:
Interface intuitiva para criação de prompts, comparação de desempenho de modelos e adição de recursos como conversão de texto para fala em um aplicativo baseado em chat.
**4. Pipeline RAG**:
Extensas capacidades de RAG que cobrem desde a ingestão de documentos até a recuperação, com suporte nativo para extração de texto de PDFs, PPTs e outros formatos de documentos comuns.
**5. Capacidades de agente**:
Você pode definir agentes com base em LLM Function Calling ou ReAct e adicionar ferramentas pré-construídas ou personalizadas para o agente. O Dify oferece mais de 50 ferramentas integradas para agentes de IA, como Google Search, DALL·E, Stable Diffusion e WolframAlpha.
**6. LLMOps**:
Monitore e analise os registros e o desempenho do aplicativo ao longo do tempo. É possível melhorar continuamente prompts, conjuntos de dados e modelos com base nos dados de produção e anotações.
**7. Backend como Serviço**:
Todas os recursos do Dify vêm com APIs correspondentes, permitindo que você integre o Dify sem esforço na lógica de negócios da sua empresa.
## Comparação de recursos
<table style="width: 100%;">
<tr>
<th align="center">Recurso</th>
<th align="center">Dify.AI</th>
<th align="center">LangChain</th>
<th align="center">Flowise</th>
<th align="center">OpenAI Assistants API</th>
</tr>
<tr>
<td align="center">Abordagem de Programação</td>
<td align="center">Orientada a API + Aplicativo</td>
<td align="center">Código Python</td>
<td align="center">Orientada a Aplicativo</td>
<td align="center">Orientada a API</td>
</tr>
<tr>
<td align="center">LLMs Suportados</td>
<td align="center">Variedade Rica</td>
<td align="center">Variedade Rica</td>
<td align="center">Variedade Rica</td>
<td align="center">Apenas OpenAI</td>
</tr>
<tr>
<td align="center">RAG Engine</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Agente</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Workflow</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Observabilidade</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Recursos Empresariais (SSO/Controle de Acesso)</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
<tr>
<td align="center">Implantação Local</td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
<td align="center"></td>
</tr>
</table>
## Usando o Dify
- **Nuvem </br>**
Oferecemos o serviço [Dify Cloud](https://dify.ai) para qualquer pessoa experimentar sem nenhuma configuração. Ele fornece todas as funcionalidades da versão auto-hospedada, incluindo 200 chamadas GPT-4 gratuitas no plano sandbox.
- **Auto-hospedagem do Dify Community Edition</br>**
Configure rapidamente o Dify no seu ambiente com este [guia inicial](#quick-start).
Use nossa [documentação](https://docs.dify.ai) para referências adicionais e instruções mais detalhadas.
- **Dify para empresas/organizações</br>**
Oferecemos recursos adicionais voltados para empresas. [Envie suas perguntas através deste chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) ou [envie-nos um e-mail](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry) para discutir necessidades empresariais. </br>
> Para startups e pequenas empresas que utilizam AWS, confira o [Dify Premium no AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e implemente no seu próprio AWS VPC com um clique. É uma oferta AMI acessível com a opção de criar aplicativos com logotipo e marca personalizados.
## Mantendo-se atualizado
Dê uma estrela no Dify no GitHub e seja notificado imediatamente sobre novos lançamentos.
![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4)
## Início rápido
> Antes de instalar o Dify, certifique-se de que sua máquina atenda aos seguintes requisitos mínimos de sistema:
>
>- CPU >= 2 Núcleos
>- RAM >= 4 GiB
</br>
A maneira mais fácil de iniciar o servidor Dify é executar nosso arquivo [docker-compose.yml](docker/docker-compose.yaml). Antes de rodar o comando de instalação, certifique-se de que o [Docker](https://docs.docker.com/get-docker/) e o [Docker Compose](https://docs.docker.com/compose/install/) estão instalados na sua máquina:
```bash
cd docker
cp .env.example .env
docker compose up -d
```
Após a execução, você pode acessar o painel do Dify no navegador em [http://localhost/install](http://localhost/install) e iniciar o processo de inicialização.
> Se você deseja contribuir com o Dify ou fazer desenvolvimento adicional, consulte nosso [guia para implantar a partir do código fonte](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code).
## Próximos passos
Se precisar personalizar a configuração, consulte os comentários no nosso arquivo [.env.example](docker/.env.example) e atualize os valores correspondentes no seu arquivo `.env`. Além disso, talvez seja necessário fazer ajustes no próprio arquivo `docker-compose.yaml`, como alterar versões de imagem, mapeamentos de portas ou montagens de volumes, com base no seu ambiente de implantação específico e nas suas necessidades. Após fazer quaisquer alterações, execute novamente `docker-compose up -d`. Você pode encontrar a lista completa de variáveis de ambiente disponíveis [aqui](https://docs.dify.ai/getting-started/install-self-hosted/environments).
Se deseja configurar uma instalação de alta disponibilidade, há [Helm Charts](https://helm.sh/) e arquivos YAML contribuídos pela comunidade que permitem a implantação do Dify no Kubernetes.
- [Helm Chart de @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
- [Helm Chart de @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm)
- [Arquivo YAML de @Winson-030](https://github.com/Winson-030/dify-kubernetes)
#### Usando o Terraform para Implantação
Implante o Dify na Plataforma Cloud com um único clique usando [terraform](https://www.terraform.io/)
##### Azure Global
- [Azure Terraform por @nikawang](https://github.com/nikawang/dify-azure-terraform)
##### Google Cloud
- [Google Cloud Terraform por @sotazum](https://github.com/DeNA/dify-google-cloud-terraform)
## Contribuindo
Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências.
> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
**Contribuidores**
<a href="https://github.com/langgenius/dify/graphs/contributors">
<img src="https://contrib.rocks/image?repo=langgenius/dify" />
</a>
## Comunidade e contato
* [Discussões no GitHub](https://github.com/langgenius/dify/discussions). Melhor para: compartilhar feedback e fazer perguntas.
* [Problemas no GitHub](https://github.com/langgenius/dify/issues). Melhor para: relatar bugs encontrados no Dify.AI e propor novos recursos. Veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
* [Discord](https://discord.gg/FngNHpbcY7). Melhor para: compartilhar suas aplicações e interagir com a comunidade.
* [X(Twitter)](https://twitter.com/dify_ai). Melhor para: compartilhar suas aplicações e interagir com a comunidade.
## Histórico de estrelas
[![Gráfico de Histórico de Estrelas](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date)
## Divulgação de segurança
Para proteger sua privacidade, evite postar problemas de segurança no GitHub. Em vez disso, envie suas perguntas para security@dify.ai e forneceremos uma resposta mais detalhada.
## Licença
Este repositório está disponível sob a [Licença de Código Aberto Dify](LICENSE), que é essencialmente Apache 2.0 com algumas restrições adicionais.

View File

@ -202,6 +202,20 @@ TIDB_VECTOR_USER=xxx.root
TIDB_VECTOR_PASSWORD=xxxxxx
TIDB_VECTOR_DATABASE=dify
# Tidb on qdrant configuration
TIDB_ON_QDRANT_URL=http://127.0.0.1
TIDB_ON_QDRANT_API_KEY=dify
TIDB_ON_QDRANT_CLIENT_TIMEOUT=20
TIDB_ON_QDRANT_GRPC_ENABLED=false
TIDB_ON_QDRANT_GRPC_PORT=6334
TIDB_PUBLIC_KEY=dify
TIDB_PRIVATE_KEY=dify
TIDB_API_URL=http://127.0.0.1
TIDB_IAM_API_URL=http://127.0.0.1
TIDB_REGION=regions/aws-us-east-1
TIDB_PROJECT_ID=dify
TIDB_SPEND_LIMIT=100
# Chroma configuration
CHROMA_HOST=127.0.0.1
CHROMA_PORT=8000
@ -249,6 +263,14 @@ VIKINGDB_SCHEMA=http
VIKINGDB_CONNECTION_TIMEOUT=30
VIKINGDB_SOCKET_TIMEOUT=30
# OceanBase Vector configuration
OCEANBASE_VECTOR_HOST=127.0.0.1
OCEANBASE_VECTOR_PORT=2881
OCEANBASE_VECTOR_USER=root@test
OCEANBASE_VECTOR_PASSWORD=
OCEANBASE_VECTOR_DATABASE=test
OCEANBASE_MEMORY_LIMIT=6G
# Upload configuration
UPLOAD_FILE_SIZE_LIMIT=15
UPLOAD_FILE_BATCH_LIMIT=5

View File

@ -55,7 +55,12 @@ RUN apt-get update \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1 expat=2.6.3-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \
&& apt-get install -y --no-install-recommends expat=2.6.3-2 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \
&& if [ "$(dpkg --print-architecture)" = "amd64" ]; then \
apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1+b1; \
else \
apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1; \
fi \
# install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \
&& apt-get autoremove -y \

View File

@ -279,6 +279,7 @@ def migrate_knowledge_vector_database():
VectorType.VIKINGDB,
VectorType.UPSTASH,
VectorType.COUCHBASE,
VectorType.OCEANBASE,
}
page = 1
while True:

View File

@ -16,11 +16,13 @@ from configs.middleware.storage.supabase_storage_config import SupabaseStorageCo
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
from configs.middleware.vdb.chroma_config import ChromaConfig
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
from configs.middleware.vdb.milvus_config import MilvusConfig
from configs.middleware.vdb.myscale_config import MyScaleConfig
from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
from configs.middleware.vdb.opensearch_config import OpenSearchConfig
from configs.middleware.vdb.oracle_config import OracleConfig
from configs.middleware.vdb.pgvector_config import PGVectorConfig
@ -257,5 +259,7 @@ class MiddlewareConfig(
VikingDBConfig,
UpstashConfig,
TidbOnQdrantConfig,
OceanBaseVectorConfig,
BaiduVectorDBConfig,
):
pass

View File

@ -0,0 +1,35 @@
from typing import Optional
from pydantic import Field, PositiveInt
from pydantic_settings import BaseSettings
class OceanBaseVectorConfig(BaseSettings):
"""
Configuration settings for OceanBase Vector database
"""
OCEANBASE_VECTOR_HOST: Optional[str] = Field(
description="Hostname or IP address of the OceanBase Vector server (e.g. 'localhost')",
default=None,
)
OCEANBASE_VECTOR_PORT: Optional[PositiveInt] = Field(
description="Port number on which the OceanBase Vector server is listening (default is 2881)",
default=2881,
)
OCEANBASE_VECTOR_USER: Optional[str] = Field(
description="Username for authenticating with the OceanBase Vector database",
default=None,
)
OCEANBASE_VECTOR_PASSWORD: Optional[str] = Field(
description="Password for authenticating with the OceanBase Vector database",
default=None,
)
OCEANBASE_VECTOR_DATABASE: Optional[str] = Field(
description="Name of the OceanBase Vector database to connect to",
default=None,
)

View File

@ -63,3 +63,8 @@ class TidbOnQdrantConfig(BaseSettings):
description="Tidb project id",
default=None,
)
TIDB_SPEND_LIMIT: Optional[int] = Field(
description="Tidb spend limit",
default=100,
)

View File

@ -628,6 +628,7 @@ class DatasetRetrievalSettingApi(Resource):
| VectorType.BAIDU
| VectorType.VIKINGDB
| VectorType.UPSTASH
| VectorType.OCEANBASE
):
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
case (
@ -669,6 +670,7 @@ class DatasetRetrievalSettingMockApi(Resource):
| VectorType.BAIDU
| VectorType.VIKINGDB
| VectorType.UPSTASH
| VectorType.OCEANBASE
):
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
case (

View File

@ -3,6 +3,7 @@ import logging
import requests
from flask_restful import Resource, reqparse
from packaging import version
from configs import dify_config
@ -47,43 +48,15 @@ class VersionApi(Resource):
def _has_new_version(*, latest_version: str, current_version: str) -> bool:
def parse_version(version: str) -> tuple:
# Split version into parts and pre-release suffix if any
parts = version.split("-")
version_parts = parts[0].split(".")
pre_release = parts[1] if len(parts) > 1 else None
try:
latest = version.parse(latest_version)
current = version.parse(current_version)
# Validate version format
if len(version_parts) != 3:
raise ValueError(f"Invalid version format: {version}")
try:
# Convert version parts to integers
major, minor, patch = map(int, version_parts)
return (major, minor, patch, pre_release)
except ValueError:
raise ValueError(f"Invalid version format: {version}")
latest = parse_version(latest_version)
current = parse_version(current_version)
# Compare major, minor, and patch versions
for latest_part, current_part in zip(latest[:3], current[:3]):
if latest_part > current_part:
return True
elif latest_part < current_part:
return False
# If versions are equal, check pre-release suffixes
if latest[3] is None and current[3] is not None:
return True
elif latest[3] is not None and current[3] is None:
# Compare versions
return latest > current
except version.InvalidVersion:
logging.warning(f"Invalid version format: latest={latest_version}, current={current_version}")
return False
elif latest[3] is not None and current[3] is not None:
# Simple string comparison for pre-release versions
return latest[3] > current[3]
return False
api.add_resource(VersionApi, "/version")

View File

@ -230,7 +230,7 @@ class DocumentUpdateByFileApi(DatasetApiResource):
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
return documents_and_batch_fields, 200
@ -331,10 +331,26 @@ class DocumentIndexingStatusApi(DatasetApiResource):
return data
api.add_resource(DocumentAddByTextApi, "/datasets/<uuid:dataset_id>/document/create_by_text")
api.add_resource(DocumentAddByFileApi, "/datasets/<uuid:dataset_id>/document/create_by_file")
api.add_resource(DocumentUpdateByTextApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text")
api.add_resource(DocumentUpdateByFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file")
api.add_resource(
DocumentAddByTextApi,
"/datasets/<uuid:dataset_id>/document/create_by_text",
"/datasets/<uuid:dataset_id>/document/create-by-text",
)
api.add_resource(
DocumentAddByFileApi,
"/datasets/<uuid:dataset_id>/document/create_by_file",
"/datasets/<uuid:dataset_id>/document/create-by-file",
)
api.add_resource(
DocumentUpdateByTextApi,
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text",
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-text",
)
api.add_resource(
DocumentUpdateByFileApi,
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
)
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")

View File

@ -14,4 +14,4 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
return self.perform_hit_testing(dataset, args)
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing", "/datasets/<uuid:dataset_id>/retrieve")

View File

@ -37,6 +37,17 @@ def _get_max_tokens(default: int, min_val: int, max_val: int) -> ParameterRule:
return rule
def _get_o1_max_tokens(default: int, min_val: int, max_val: int) -> ParameterRule:
rule = ParameterRule(
name="max_completion_tokens",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.MAX_TOKENS],
)
rule.default = default
rule.min = min_val
rule.max = max_val
return rule
class AzureBaseModel(BaseModel):
base_model_name: str
entity: AIModelEntity
@ -1098,14 +1109,6 @@ LLM_BASE_MODELS = [
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name="temperature",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name="top_p",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name="response_format",
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
@ -1116,7 +1119,7 @@ LLM_BASE_MODELS = [
required=False,
options=["text", "json_object"],
),
_get_max_tokens(default=512, min_val=1, max_val=32768),
_get_o1_max_tokens(default=512, min_val=1, max_val=32768),
],
pricing=PriceConfig(
input=15.00,
@ -1143,14 +1146,6 @@ LLM_BASE_MODELS = [
ModelPropertyKey.CONTEXT_SIZE: 128000,
},
parameter_rules=[
ParameterRule(
name="temperature",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
),
ParameterRule(
name="top_p",
**PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
),
ParameterRule(
name="response_format",
label=I18nObject(zh_Hans="回复格式", en_US="response_format"),
@ -1161,7 +1156,7 @@ LLM_BASE_MODELS = [
required=False,
options=["text", "json_object"],
),
_get_max_tokens(default=512, min_val=1, max_val=65536),
_get_o1_max_tokens(default=512, min_val=1, max_val=65536),
],
pricing=PriceConfig(
input=3.00,

View File

@ -1,7 +1,7 @@
model: hunyuan-standard-256k
model: hunyuan-standard-256K
label:
zh_Hans: hunyuan-standard-256k
en_US: hunyuan-standard-256k
zh_Hans: hunyuan-standard-256K
en_US: hunyuan-standard-256K
model_type: llm
features:
- agent-thought

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -0,0 +1,3 @@
<svg width="1200" height="925" viewBox="0 0 1200 925" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M780.152 250.999L907.882 462.174C907.882 462.174 880.925 510.854 867.43 535.21C834.845 594.039 764.171 612.49 710.442 508.333L420.376 0H0L459.926 803.307C552.303 964.663 787.366 964.663 879.743 803.307C989.874 610.952 1089.87 441.97 1200 249.646L1052.28 0H639.519L780.152 250.999Z" fill="#3366FF"/>
</svg>

After

Width:  |  Height:  |  Size: 417 B

View File

@ -0,0 +1,83 @@
from decimal import Decimal
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.llm_entities import LLMMode
from core.model_runtime.entities.model_entities import (
AIModelEntity,
DefaultParameterName,
FetchFrom,
ModelPropertyKey,
ModelType,
ParameterRule,
ParameterType,
PriceConfig,
)
from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
class VesslAILargeLanguageModel(OAIAPICompatLargeLanguageModel):
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
features = []
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.LLM,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
features=features,
model_properties={
ModelPropertyKey.MODE: credentials.get("mode"),
},
parameter_rules=[
ParameterRule(
name=DefaultParameterName.TEMPERATURE.value,
label=I18nObject(en_US="Temperature"),
type=ParameterType.FLOAT,
default=float(credentials.get("temperature", 0.7)),
min=0,
max=2,
precision=2,
),
ParameterRule(
name=DefaultParameterName.TOP_P.value,
label=I18nObject(en_US="Top P"),
type=ParameterType.FLOAT,
default=float(credentials.get("top_p", 1)),
min=0,
max=1,
precision=2,
),
ParameterRule(
name=DefaultParameterName.TOP_K.value,
label=I18nObject(en_US="Top K"),
type=ParameterType.INT,
default=int(credentials.get("top_k", 50)),
min=-2147483647,
max=2147483647,
precision=0,
),
ParameterRule(
name=DefaultParameterName.MAX_TOKENS.value,
label=I18nObject(en_US="Max Tokens"),
type=ParameterType.INT,
default=512,
min=1,
max=int(credentials.get("max_tokens_to_sample", 4096)),
),
],
pricing=PriceConfig(
input=Decimal(credentials.get("input_price", 0)),
output=Decimal(credentials.get("output_price", 0)),
unit=Decimal(credentials.get("unit", 0)),
currency=credentials.get("currency", "USD"),
),
)
if credentials["mode"] == "chat":
entity.model_properties[ModelPropertyKey.MODE] = LLMMode.CHAT.value
elif credentials["mode"] == "completion":
entity.model_properties[ModelPropertyKey.MODE] = LLMMode.COMPLETION.value
else:
raise ValueError(f"Unknown completion type {credentials['completion_type']}")
return entity

View File

@ -0,0 +1,10 @@
import logging
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class VesslAIProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
pass

View File

@ -0,0 +1,56 @@
provider: vessl_ai
label:
en_US: vessl_ai
icon_small:
en_US: icon_s_en.svg
icon_large:
en_US: icon_l_en.png
background: "#F1EFED"
help:
title:
en_US: How to deploy VESSL AI LLM Model Endpoint
url:
en_US: https://docs.vessl.ai/guides/get-started/llama3-deployment
supported_model_types:
- llm
configurate_methods:
- customizable-model
model_credential_schema:
model:
label:
en_US: Model Name
placeholder:
en_US: Enter your model name
credential_form_schemas:
- variable: endpoint_url
label:
en_US: endpoint url
type: text-input
required: true
placeholder:
en_US: Enter the url of your endpoint url
- variable: api_key
required: true
label:
en_US: API Key
type: secret-input
placeholder:
en_US: Enter your VESSL AI secret key
- variable: mode
show_on:
- variable: __model_type
value: llm
label:
en_US: Completion mode
type: select
required: false
default: chat
placeholder:
en_US: Select completion mode
options:
- value: completion
label:
en_US: Completion
- value: chat
label:
en_US: Chat

View File

@ -115,6 +115,7 @@ class _CommonWenxin:
"ernie-character-8k-0321": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-char-8k",
"ernie-4.0-turbo-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-8k",
"ernie-4.0-turbo-8k-preview": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-8k-preview",
"ernie-4.0-turbo-128k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-turbo-128k",
"yi_34b_chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/yi_34b_chat",
"embedding-v1": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/embedding-v1",
"bge-large-en": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_en",

View File

@ -0,0 +1,40 @@
model: ernie-4.0-turbo-128k
label:
en_US: Ernie-4.0-turbo-128K
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
min: 0.1
max: 1.0
default: 0.8
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 1024
min: 2
max: 4096
- name: presence_penalty
use_template: presence_penalty
default: 1.0
min: 1.0
max: 2.0
- name: frequency_penalty
use_template: frequency_penalty
- name: response_format
use_template: response_format
- name: disable_search
label:
zh_Hans: 禁用搜索
en_US: Disable Search
type: boolean
help:
zh_Hans: 禁用模型自行进行外部搜索。
en_US: Disable the model to perform external search.
required: false

View File

@ -34,6 +34,8 @@ class RetrievalService:
reranking_mode: Optional[str] = "reranking_model",
weights: Optional[dict] = None,
):
if not query:
return []
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
return []

View File

@ -3,11 +3,13 @@ import time
import uuid
from typing import Any
import numpy as np
from pydantic import BaseModel, model_validator
from pymochow import MochowClient
from pymochow.auth.bce_credentials import BceCredentials
from pymochow.configuration import Configuration
from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, TableState
from pymochow.exception import ServerError
from pymochow.model.enum import FieldType, IndexState, IndexType, MetricType, ServerErrCode, TableState
from pymochow.model.schema import Field, HNSWParams, Schema, VectorIndex
from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row
@ -116,6 +118,7 @@ class BaiduVector(BaseVector):
self._db.table(self._collection_name).delete(filter=f"{key} = '{value}'")
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
query_vector = [float(val) if isinstance(val, np.float64) else val for val in query_vector]
anns = AnnSearch(
vector_field=self.field_vector,
vector_floats=query_vector,
@ -149,7 +152,13 @@ class BaiduVector(BaseVector):
return docs
def delete(self) -> None:
self._db.drop_table(table_name=self._collection_name)
try:
self._db.drop_table(table_name=self._collection_name)
except ServerError as e:
if e.code == ServerErrCode.TABLE_NOT_EXIST:
pass
else:
raise
def _init_client(self, config) -> MochowClient:
config = Configuration(credentials=BceCredentials(config.account, config.api_key), endpoint=config.endpoint)
@ -166,7 +175,14 @@ class BaiduVector(BaseVector):
if exists:
return self._client.database(self._client_config.database)
else:
return self._client.create_database(database_name=self._client_config.database)
try:
self._client.create_database(database_name=self._client_config.database)
except ServerError as e:
if e.code == ServerErrCode.DB_ALREADY_EXIST:
pass
else:
raise
return
def _table_existed(self) -> bool:
tables = self._db.list_table()
@ -175,7 +191,7 @@ class BaiduVector(BaseVector):
def _create_table(self, dimension: int) -> None:
# Try to grab distributed lock and create table
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
with redis_client.lock(lock_name, timeout=20):
with redis_client.lock(lock_name, timeout=60):
table_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
if redis_client.get(table_exist_cache_key):
return
@ -238,15 +254,14 @@ class BaiduVector(BaseVector):
description="Table for Dify",
)
# Wait for table created
while True:
time.sleep(1)
table = self._db.describe_table(self._collection_name)
if table.state == TableState.NORMAL:
break
redis_client.set(table_exist_cache_key, 1, ex=3600)
# Wait for table created
while True:
time.sleep(1)
table = self._db.describe_table(self._collection_name)
if table.state == TableState.NORMAL:
break
class BaiduVectorFactory(AbstractVectorFactory):
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> BaiduVector:

View File

@ -0,0 +1,209 @@
import json
import logging
import math
from typing import Any
from pydantic import BaseModel, model_validator
from pyobvector import VECTOR, ObVecClient
from sqlalchemy import JSON, Column, String, func
from sqlalchemy.dialects.mysql import LONGTEXT
from configs import dify_config
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document
from extensions.ext_redis import redis_client
from models.dataset import Dataset
logger = logging.getLogger(__name__)
DEFAULT_OCEANBASE_HNSW_BUILD_PARAM = {"M": 16, "efConstruction": 256}
DEFAULT_OCEANBASE_HNSW_SEARCH_PARAM = {"efSearch": 64}
OCEANBASE_SUPPORTED_VECTOR_INDEX_TYPE = "HNSW"
DEFAULT_OCEANBASE_VECTOR_METRIC_TYPE = "l2"
class OceanBaseVectorConfig(BaseModel):
host: str
port: int
user: str
password: str
database: str
@model_validator(mode="before")
@classmethod
def validate_config(cls, values: dict) -> dict:
if not values["host"]:
raise ValueError("config OCEANBASE_VECTOR_HOST is required")
if not values["port"]:
raise ValueError("config OCEANBASE_VECTOR_PORT is required")
if not values["user"]:
raise ValueError("config OCEANBASE_VECTOR_USER is required")
if not values["database"]:
raise ValueError("config OCEANBASE_VECTOR_DATABASE is required")
return values
class OceanBaseVector(BaseVector):
def __init__(self, collection_name: str, config: OceanBaseVectorConfig):
super().__init__(collection_name)
self._config = config
self._hnsw_ef_search = -1
self._client = ObVecClient(
uri=f"{self._config.host}:{self._config.port}",
user=self._config.user,
password=self._config.password,
db_name=self._config.database,
)
def get_type(self) -> str:
return VectorType.OCEANBASE
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
self._vec_dim = len(embeddings[0])
self._create_collection()
self.add_texts(texts, embeddings)
def _create_collection(self) -> None:
lock_name = "vector_indexing_lock_" + self._collection_name
with redis_client.lock(lock_name, timeout=20):
collection_exist_cache_key = "vector_indexing_" + self._collection_name
if redis_client.get(collection_exist_cache_key):
return
if self._client.check_table_exists(self._collection_name):
return
self.delete()
cols = [
Column("id", String(36), primary_key=True, autoincrement=False),
Column("vector", VECTOR(self._vec_dim)),
Column("text", LONGTEXT),
Column("metadata", JSON),
]
vidx_params = self._client.prepare_index_params()
vidx_params.add_index(
field_name="vector",
index_type=OCEANBASE_SUPPORTED_VECTOR_INDEX_TYPE,
index_name="vector_index",
metric_type=DEFAULT_OCEANBASE_VECTOR_METRIC_TYPE,
params=DEFAULT_OCEANBASE_HNSW_BUILD_PARAM,
)
self._client.create_table_with_index_params(
table_name=self._collection_name,
columns=cols,
vidxs=vidx_params,
)
vals = []
params = self._client.perform_raw_text_sql("SHOW PARAMETERS LIKE '%ob_vector_memory_limit_percentage%'")
for row in params:
val = int(row[6])
vals.append(val)
if len(vals) == 0:
print("ob_vector_memory_limit_percentage not found in parameters.")
exit(1)
if any(val == 0 for val in vals):
try:
self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30")
except Exception as e:
raise Exception(
"Failed to set ob_vector_memory_limit_percentage. "
+ "Maybe the database user has insufficient privilege.",
e,
)
redis_client.set(collection_exist_cache_key, 1, ex=3600)
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
ids = self._get_uuids(documents)
for id, doc, emb in zip(ids, documents, embeddings):
self._client.insert(
table_name=self._collection_name,
data={
"id": id,
"vector": emb,
"text": doc.page_content,
"metadata": doc.metadata,
},
)
def text_exists(self, id: str) -> bool:
cur = self._client.get(table_name=self._collection_name, id=id)
return cur.rowcount != 0
def delete_by_ids(self, ids: list[str]) -> None:
self._client.delete(table_name=self._collection_name, ids=ids)
def get_ids_by_metadata_field(self, key: str, value: str) -> list[str]:
cur = self._client.get(
table_name=self._collection_name,
where_clause=f"metadata->>'$.{key}' = '{value}'",
output_column_name=["id"],
)
return [row[0] for row in cur]
def delete_by_metadata_field(self, key: str, value: str) -> None:
ids = self.get_ids_by_metadata_field(key, value)
self.delete_by_ids(ids)
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
return []
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
ef_search = kwargs.get("ef_search", self._hnsw_ef_search)
if ef_search != self._hnsw_ef_search:
self._client.set_ob_hnsw_ef_search(ef_search)
self._hnsw_ef_search = ef_search
topk = kwargs.get("top_k", 10)
cur = self._client.ann_search(
table_name=self._collection_name,
vec_column_name="vector",
vec_data=query_vector,
topk=topk,
distance_func=func.l2_distance,
output_column_names=["text", "metadata"],
with_dist=True,
)
docs = []
for text, metadata, distance in cur:
metadata = json.loads(metadata)
metadata["score"] = 1 - distance / math.sqrt(2)
docs.append(
Document(
page_content=text,
metadata=metadata,
)
)
return docs
def delete(self) -> None:
self._client.drop_table_if_exist(self._collection_name)
class OceanBaseVectorFactory(AbstractVectorFactory):
def init_vector(
self,
dataset: Dataset,
attributes: list,
embeddings: Embeddings,
) -> BaseVector:
if dataset.index_struct_dict:
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
collection_name = class_prefix.lower()
else:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.OCEANBASE, collection_name))
return OceanBaseVector(
collection_name,
OceanBaseVectorConfig(
host=dify_config.OCEANBASE_VECTOR_HOST,
port=dify_config.OCEANBASE_VECTOR_PORT,
user=dify_config.OCEANBASE_VECTOR_USER,
password=(dify_config.OCEANBASE_VECTOR_PASSWORD or ""),
database=dify_config.OCEANBASE_VECTOR_DATABASE,
),
)

View File

@ -4,6 +4,7 @@ import uuid
import requests
from requests.auth import HTTPDigestAuth
from configs import dify_config
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.dataset import TidbAuthBinding
@ -36,7 +37,7 @@ class TidbService:
}
spending_limit = {
"monthly": 100,
"monthly": dify_config.TIDB_SPEND_LIMIT,
}
password = str(uuid.uuid4()).replace("-", "")[:16]
display_name = str(uuid.uuid4()).replace("-", "")[:16]
@ -208,7 +209,7 @@ class TidbService:
}
spending_limit = {
"monthly": 10,
"monthly": dify_config.TIDB_SPEND_LIMIT,
}
password = str(uuid.uuid4()).replace("-", "")[:16]
display_name = str(uuid.uuid4()).replace("-", "")

View File

@ -134,6 +134,10 @@ class Vector:
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import TidbOnQdrantVectorFactory
return TidbOnQdrantVectorFactory
case VectorType.OCEANBASE:
from core.rag.datasource.vdb.oceanbase.oceanbase_vector import OceanBaseVectorFactory
return OceanBaseVectorFactory
case _:
raise ValueError(f"Vector store {vector_type} is not supported.")

View File

@ -21,3 +21,4 @@ class VectorType(str, Enum):
VIKINGDB = "vikingdb"
UPSTASH = "upstash"
TIDB_ON_QDRANT = "tidb_on_qdrant"
OCEANBASE = "oceanbase"

View File

@ -27,18 +27,17 @@ class RerankModelRunner(BaseRerankRunner):
:return:
"""
docs = []
doc_id = []
doc_id = set()
unique_documents = []
dify_documents = [item for item in documents if item.provider == "dify"]
external_documents = [item for item in documents if item.provider == "external"]
for document in dify_documents:
if document.metadata["doc_id"] not in doc_id:
doc_id.append(document.metadata["doc_id"])
for document in documents:
if document.provider == "dify" and document.metadata["doc_id"] not in doc_id:
doc_id.add(document.metadata["doc_id"])
docs.append(document.page_content)
unique_documents.append(document)
for document in external_documents:
docs.append(document.page_content)
unique_documents.append(document)
elif document.provider == "external":
if document not in unique_documents:
docs.append(document.page_content)
unique_documents.append(document)
documents = unique_documents

View File

@ -1,6 +1,6 @@
from enum import Enum
class RerankMode(Enum):
class RerankMode(str, Enum):
RERANKING_MODEL = "reranking_model"
WEIGHTED_SCORE = "weighted_score"

View File

@ -22,6 +22,7 @@ from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaK
from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.entities.context_entities import DocumentContext
from core.rag.models.document import Document
from core.rag.rerank.rerank_type import RerankMode
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
@ -361,10 +362,39 @@ class DatasetRetrieval:
reranking_enable: bool = True,
message_id: Optional[str] = None,
):
if not available_datasets:
return []
threads = []
all_documents = []
dataset_ids = [dataset.id for dataset in available_datasets]
index_type = None
index_type_check = all(
item.indexing_technique == available_datasets[0].indexing_technique for item in available_datasets
)
if not index_type_check and (not reranking_enable or reranking_mode != RerankMode.RERANKING_MODEL):
raise ValueError(
"The configured knowledge base list have different indexing technique, please set reranking model."
)
index_type = available_datasets[0].indexing_technique
if index_type == "high_quality":
embedding_model_check = all(
item.embedding_model == available_datasets[0].embedding_model for item in available_datasets
)
embedding_model_provider_check = all(
item.embedding_model_provider == available_datasets[0].embedding_model_provider
for item in available_datasets
)
if (
reranking_enable
and reranking_mode == "weighted_score"
and (not embedding_model_check or not embedding_model_provider_check)
):
raise ValueError(
"The configured knowledge base list have different embedding model, please set reranking model."
)
if reranking_enable and reranking_mode == RerankMode.WEIGHTED_SCORE:
weights["vector_setting"]["embedding_provider_name"] = available_datasets[0].embedding_model_provider
weights["vector_setting"]["embedding_model_name"] = available_datasets[0].embedding_model
for dataset in available_datasets:
index_type = dataset.indexing_technique
retrieval_thread = threading.Thread(

View File

@ -33,7 +33,9 @@ class BarChartTool(BuiltinTool):
if axis:
axis = [label[:10] + "..." if len(label) > 10 else label for label in axis]
ax.set_xticklabels(axis, rotation=45, ha="right")
ax.bar(axis, data)
# ensure all labels, including duplicates, are correctly displayed
ax.bar(range(len(data)), data)
ax.set_xticks(range(len(data)))
else:
ax.bar(range(len(data)), data)

View File

@ -1,5 +1,3 @@
import base64
import io
import json
import random
import uuid
@ -8,7 +6,7 @@ import httpx
from websocket import WebSocket
from yarl import URL
from core.file.file_manager import _get_encoded_string
from core.file.file_manager import download
from core.file.models import File
@ -29,8 +27,7 @@ class ComfyUiClient:
return response.content
def upload_image(self, image_file: File) -> dict:
image_content = base64.b64decode(_get_encoded_string(image_file))
file = io.BytesIO(image_content)
file = download(image_file)
files = {"image": (image_file.filename, file, image_file.mime_type), "overwrite": "true"}
res = httpx.post(str(self.base_url / "upload/image"), files=files)
return res.json()
@ -47,12 +44,7 @@ class ComfyUiClient:
ws.connect(ws_address)
return ws, client_id
def set_prompt(
self, origin_prompt: dict, positive_prompt: str, negative_prompt: str = "", image_name: str = ""
) -> dict:
"""
find the first KSampler, then can find the prompt node through it.
"""
def set_prompt_by_ksampler(self, origin_prompt: dict, positive_prompt: str, negative_prompt: str = "") -> dict:
prompt = origin_prompt.copy()
id_to_class_type = {id: details["class_type"] for id, details in prompt.items()}
k_sampler = [key for key, value in id_to_class_type.items() if value == "KSampler"][0]
@ -64,9 +56,20 @@ class ComfyUiClient:
negative_input_id = prompt.get(k_sampler)["inputs"]["negative"][0]
prompt.get(negative_input_id)["inputs"]["text"] = negative_prompt
if image_name != "":
image_loader = [key for key, value in id_to_class_type.items() if value == "LoadImage"][0]
prompt.get(image_loader)["inputs"]["image"] = image_name
return prompt
def set_prompt_images_by_ids(self, origin_prompt: dict, image_names: list[str], image_ids: list[str]) -> dict:
prompt = origin_prompt.copy()
for index, image_node_id in enumerate(image_ids):
prompt[image_node_id]["inputs"]["image"] = image_names[index]
return prompt
def set_prompt_images_by_default(self, origin_prompt: dict, image_names: list[str]) -> dict:
prompt = origin_prompt.copy()
id_to_class_type = {id: details["class_type"] for id, details in prompt.items()}
load_image_nodes = [key for key, value in id_to_class_type.items() if value == "LoadImage"]
for load_image, image_name in zip(load_image_nodes, image_names):
prompt.get(load_image)["inputs"]["image"] = image_name
return prompt
def track_progress(self, prompt: dict, ws: WebSocket, prompt_id: str):

View File

@ -1,7 +1,9 @@
import json
from typing import Any
from core.file import FileType
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.errors import ToolParameterValidationError
from core.tools.provider.builtin.comfyui.tools.comfyui_client import ComfyUiClient
from core.tools.tool.builtin_tool import BuiltinTool
@ -10,19 +12,46 @@ class ComfyUIWorkflowTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
comfyui = ComfyUiClient(self.runtime.credentials["base_url"])
positive_prompt = tool_parameters.get("positive_prompt")
negative_prompt = tool_parameters.get("negative_prompt")
positive_prompt = tool_parameters.get("positive_prompt", "")
negative_prompt = tool_parameters.get("negative_prompt", "")
images = tool_parameters.get("images") or []
workflow = tool_parameters.get("workflow_json")
image_name = ""
if image := tool_parameters.get("image"):
image_names = []
for image in images:
if image.type != FileType.IMAGE:
continue
image_name = comfyui.upload_image(image).get("name")
image_names.append(image_name)
set_prompt_with_ksampler = True
if "{{positive_prompt}}" in workflow:
set_prompt_with_ksampler = False
workflow = workflow.replace("{{positive_prompt}}", positive_prompt)
workflow = workflow.replace("{{negative_prompt}}", negative_prompt)
try:
origin_prompt = json.loads(workflow)
prompt = json.loads(workflow)
except:
return self.create_text_message("the Workflow JSON is not correct")
prompt = comfyui.set_prompt(origin_prompt, positive_prompt, negative_prompt, image_name)
if set_prompt_with_ksampler:
try:
prompt = comfyui.set_prompt_by_ksampler(prompt, positive_prompt, negative_prompt)
except:
raise ToolParameterValidationError(
"Failed set prompt with KSampler, try replace prompt to {{positive_prompt}} in your workflow json"
)
if image_names:
if image_ids := tool_parameters.get("image_ids"):
image_ids = image_ids.split(",")
try:
prompt = comfyui.set_prompt_images_by_ids(prompt, image_names, image_ids)
except:
raise ToolParameterValidationError("the Image Node ID List not match your upload image files.")
else:
prompt = comfyui.set_prompt_images_by_default(prompt, image_names)
images = comfyui.generate_image_by_prompt(prompt)
result = []
for img in images:

View File

@ -24,12 +24,12 @@ parameters:
zh_Hans: 负面提示词
llm_description: Negative prompt, you should describe the image you don't want to generate as a list of words as possible as detailed, the prompt must be written in English.
form: llm
- name: image
type: file
- name: images
type: files
label:
en_US: Input Image
en_US: Input Images
zh_Hans: 输入的图片
llm_description: The input image, used to transfer to the comfyui workflow to generate another image.
llm_description: The input images, used to transfer to the comfyui workflow to generate another image.
form: llm
- name: workflow_json
type: string
@ -40,3 +40,15 @@ parameters:
en_US: exported from ComfyUI workflow
zh_Hans: 从ComfyUI的工作流中导出
form: form
- name: image_ids
type: string
label:
en_US: Image Node ID List
zh_Hans: 图片节点ID列表
placeholder:
en_US: Use commas to separate multiple node ID
zh_Hans: 多个节点ID时使用半角逗号分隔
human_description:
en_US: When the workflow has multiple image nodes, enter the ID list of these nodes, and the images will be passed to ComfyUI in the order of the list.
zh_Hans: 当工作流有多个图片节点时输入这些节点的ID列表图片将按列表顺序传给ComfyUI
form: form

View File

@ -3,7 +3,7 @@ import logging
import mimetypes
from collections.abc import Generator
from os import listdir, path
from threading import Lock
from threading import Lock, Thread
from typing import Any, Optional, Union
from configs import dify_config
@ -647,4 +647,5 @@ class ToolManager:
raise ValueError(f"provider type {provider_type} not found")
ToolManager.load_builtin_providers_cache()
# preload builtin tool providers
Thread(target=ToolManager.load_builtin_providers_cache, name="pre_load_builtin_providers_cache", daemon=True).start()

View File

@ -153,6 +153,7 @@ class AnswerStreamGeneratorRouter:
NodeType.IF_ELSE,
NodeType.QUESTION_CLASSIFIER,
NodeType.ITERATION,
NodeType.CONVERSATION_VARIABLE_ASSIGNER,
}:
answer_dependencies[answer_node_id].append(source_node_id)
else:

View File

@ -5,6 +5,7 @@ import json
import docx
import pandas as pd
import pypdfium2
import yaml
from unstructured.partition.email import partition_email
from unstructured.partition.epub import partition_epub
from unstructured.partition.msg import partition_msg
@ -101,6 +102,8 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
return _extract_text_from_msg(file_content)
case "application/json":
return _extract_text_from_json(file_content)
case "application/x-yaml" | "text/yaml":
return _extract_text_from_yaml(file_content)
case _:
raise UnsupportedFileTypeError(f"Unsupported MIME type: {mime_type}")
@ -112,6 +115,8 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
return _extract_text_from_plain_text(file_content)
case ".json":
return _extract_text_from_json(file_content)
case ".yaml" | ".yml":
return _extract_text_from_yaml(file_content)
case ".pdf":
return _extract_text_from_pdf(file_content)
case ".doc" | ".docx":
@ -149,6 +154,15 @@ def _extract_text_from_json(file_content: bytes) -> str:
raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e
def _extract_text_from_yaml(file_content: bytes) -> str:
"""Extract the content from yaml file"""
try:
yaml_data = yaml.safe_load_all(file_content.decode("utf-8"))
return yaml.dump_all(yaml_data, allow_unicode=True, sort_keys=False)
except (UnicodeDecodeError, yaml.YAMLError) as e:
raise TextExtractionError(f"Failed to decode or parse YAML file: {e}") from e
def _extract_text_from_pdf(file_content: bytes) -> str:
try:
pdf_file = io.BytesIO(file_content)

View File

@ -94,7 +94,7 @@ class Response:
@property
def is_file(self):
content_type = self.content_type
content_disposition = self.response.headers.get("Content-Disposition", "")
content_disposition = self.response.headers.get("content-disposition", "")
return "attachment" in content_disposition or (
not any(non_file in content_type for non_file in NON_FILE_CONTENT_TYPES)
@ -103,7 +103,7 @@ class Response:
@property
def content_type(self) -> str:
return self.headers.get("Content-Type", "")
return self.headers.get("content-type", "")
@property
def text(self) -> str:

View File

@ -142,10 +142,11 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
Extract files from response
"""
files = []
is_file = response.is_file
content_type = response.content_type
content = response.content
if content_type:
if is_file and content_type:
# extract filename from url
filename = path.basename(url)
# extract extension if possible

View File

@ -327,7 +327,7 @@ class LLMNode(BaseNode[LLMNodeData]):
if variable is None:
raise ValueError(f"Variable {variable_selector.variable} not found")
if isinstance(variable, NoneSegment):
continue
inputs[variable_selector.variable] = ""
inputs[variable_selector.variable] = variable.to_object()
memory = node_data.memory
@ -349,13 +349,11 @@ class LLMNode(BaseNode[LLMNodeData]):
variable = self.graph_runtime_state.variable_pool.get(selector)
if variable is None:
return []
if isinstance(variable, FileSegment):
elif isinstance(variable, FileSegment):
return [variable.value]
if isinstance(variable, ArrayFileSegment):
elif isinstance(variable, ArrayFileSegment):
return variable.value
# FIXME: Temporary fix for empty array,
# all variables added to variable pool should be a Segment instance.
if isinstance(variable, ArrayAnySegment) and len(variable.value) == 0:
elif isinstance(variable, NoneSegment | ArrayAnySegment):
return []
raise ValueError(f"Invalid variable type: {type(variable)}")

View File

@ -10,6 +10,7 @@ from core.variables import (
ArrayNumberVariable,
ArrayObjectSegment,
ArrayObjectVariable,
ArraySegment,
ArrayStringSegment,
ArrayStringVariable,
FileSegment,
@ -79,7 +80,7 @@ def build_segment(value: Any, /) -> Segment:
if isinstance(value, list):
items = [build_segment(item) for item in value]
types = {item.value_type for item in items}
if len(types) != 1:
if len(types) != 1 or all(isinstance(item, ArraySegment) for item in items):
return ArrayAnySegment(value=value)
match types.pop():
case SegmentType.STRING:

View File

@ -121,6 +121,7 @@ conversation_fields = {
"from_account_name": fields.String,
"read_at": TimestampField,
"created_at": TimestampField,
"updated_at": TimestampField,
"annotation": fields.Nested(annotation_fields, allow_null=True),
"model_config": fields.Nested(simple_model_config_fields),
"user_feedback_stats": fields.Nested(feedback_stat_fields),
@ -182,6 +183,7 @@ conversation_detail_fields = {
"from_end_user_id": fields.String,
"from_account_id": fields.String,
"created_at": TimestampField,
"updated_at": TimestampField,
"annotated": fields.Boolean,
"introduction": fields.String,
"model_config": fields.Nested(model_config_fields),
@ -197,6 +199,7 @@ simple_conversation_fields = {
"status": fields.String,
"introduction": fields.String,
"created_at": TimestampField,
"updated_at": TimestampField,
}
conversation_infinite_scroll_pagination_fields = {

View File

@ -396,7 +396,7 @@ class AppModelConfig(db.Model):
"file_upload": self.file_upload_dict,
}
def from_model_config_dict(self, model_config: dict):
def from_model_config_dict(self, model_config: Mapping[str, Any]):
self.opening_statement = model_config.get("opening_statement")
self.suggested_questions = (
json.dumps(model_config["suggested_questions"]) if model_config.get("suggested_questions") else None

23
api/poetry.lock generated
View File

@ -7269,6 +7269,22 @@ files = [
ed25519 = ["PyNaCl (>=1.4.0)"]
rsa = ["cryptography"]
[[package]]
name = "pyobvector"
version = "0.1.6"
description = "A python SDK for OceanBase Vector Store, based on SQLAlchemy, compatible with Milvus API."
optional = false
python-versions = "<4.0,>=3.9"
files = [
{file = "pyobvector-0.1.6-py3-none-any.whl", hash = "sha256:0d700e865a85b4716b9a03384189e49288cd9d5f3cef88aed4740bc82d5fd136"},
{file = "pyobvector-0.1.6.tar.gz", hash = "sha256:05551addcac8c596992d5e38b480c83ca3481c6cfc6f56a1a1bddfb2e6ae037e"},
]
[package.dependencies]
numpy = ">=1.26.0,<2.0.0"
pymysql = ">=1.1.1,<2.0.0"
sqlalchemy = ">=2.0.32,<3.0.0"
[[package]]
name = "pyopenssl"
version = "24.2.1"
@ -8677,6 +8693,11 @@ files = [
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
@ -10919,4 +10940,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "52552faf5f4823056eb48afe05349ab2f0e9a5bc42105211ccbbb54b59e27b59"
content-hash = "ef927b98c33d704d680e08db0e5c7d9a4e05454c66fcd6a5f656a65eb08e886b"

View File

@ -247,6 +247,7 @@ pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] }
pgvector = "0.2.5"
pymilvus = "~2.4.4"
pymochow = "1.3.1"
pyobvector = "~0.1.6"
qdrant-client = "1.7.3"
tcvectordb = "1.3.2"
tidb-vector = "0.0.9"

View File

@ -0,0 +1,3 @@
from .service import AppDslService
__all__ = ["AppDslService"]

View File

@ -0,0 +1,34 @@
class DSLVersionNotSupportedError(ValueError):
"""Raised when the imported DSL version is not supported by the current Dify version."""
class InvalidYAMLFormatError(ValueError):
"""Raised when the provided YAML format is invalid."""
class MissingAppDataError(ValueError):
"""Raised when the app data is missing in the provided DSL."""
class InvalidAppModeError(ValueError):
"""Raised when the app mode is invalid."""
class MissingWorkflowDataError(ValueError):
"""Raised when the workflow data is missing in the provided DSL."""
class MissingModelConfigError(ValueError):
"""Raised when the model config data is missing in the provided DSL."""
class FileSizeLimitExceededError(ValueError):
"""Raised when the file size exceeds the allowed limit."""
class EmptyContentError(ValueError):
"""Raised when the content fetched from the URL is empty."""
class ContentDecodingError(ValueError):
"""Raised when there is an error decoding the content."""

View File

@ -1,8 +1,11 @@
import logging
from collections.abc import Mapping
from typing import Any
import httpx
import yaml # type: ignore
import yaml
from packaging import version
from core.helper import ssrf_proxy
from events.app_event import app_model_config_was_updated, app_was_created
from extensions.ext_database import db
from factories import variable_factory
@ -11,6 +14,18 @@ from models.model import App, AppMode, AppModelConfig
from models.workflow import Workflow
from services.workflow_service import WorkflowService
from .exc import (
ContentDecodingError,
DSLVersionNotSupportedError,
EmptyContentError,
FileSizeLimitExceededError,
InvalidAppModeError,
InvalidYAMLFormatError,
MissingAppDataError,
MissingModelConfigError,
MissingWorkflowDataError,
)
logger = logging.getLogger(__name__)
current_dsl_version = "0.1.2"
@ -30,32 +45,21 @@ class AppDslService:
:param args: request args
:param account: Account instance
"""
try:
max_size = 10 * 1024 * 1024 # 10MB
timeout = httpx.Timeout(10.0)
with httpx.stream("GET", url.strip(), follow_redirects=True, timeout=timeout) as response:
response.raise_for_status()
total_size = 0
content = b""
for chunk in response.iter_bytes():
total_size += len(chunk)
if total_size > max_size:
raise ValueError("File size exceeds the limit of 10MB")
content += chunk
except httpx.HTTPStatusError as http_err:
raise ValueError(f"HTTP error occurred: {http_err}")
except httpx.RequestError as req_err:
raise ValueError(f"Request error occurred: {req_err}")
except Exception as e:
raise ValueError(f"Failed to fetch DSL from URL: {e}")
max_size = 10 * 1024 * 1024 # 10MB
response = ssrf_proxy.get(url.strip(), follow_redirects=True, timeout=(10, 10))
response.raise_for_status()
content = response.content
if len(content) > max_size:
raise FileSizeLimitExceededError("File size exceeds the limit of 10MB")
if not content:
raise ValueError("Empty content from url")
raise EmptyContentError("Empty content from url")
try:
data = content.decode("utf-8")
except UnicodeDecodeError as e:
raise ValueError(f"Error decoding content: {e}")
raise ContentDecodingError(f"Error decoding content: {e}")
return cls.import_and_create_new_app(tenant_id, data, args, account)
@ -71,14 +75,14 @@ class AppDslService:
try:
import_data = yaml.safe_load(data)
except yaml.YAMLError:
raise ValueError("Invalid YAML format in data argument.")
raise InvalidYAMLFormatError("Invalid YAML format in data argument.")
# check or repair dsl version
import_data = cls._check_or_fix_dsl(import_data)
import_data = _check_or_fix_dsl(import_data)
app_data = import_data.get("app")
if not app_data:
raise ValueError("Missing app in data argument")
raise MissingAppDataError("Missing app in data argument")
# get app basic info
name = args.get("name") or app_data.get("name")
@ -90,11 +94,18 @@ class AppDslService:
# import dsl and create app
app_mode = AppMode.value_of(app_data.get("mode"))
if app_mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
workflow_data = import_data.get("workflow")
if not workflow_data or not isinstance(workflow_data, dict):
raise MissingWorkflowDataError(
"Missing workflow in data argument when app mode is advanced-chat or workflow"
)
app = cls._import_and_create_new_workflow_based_app(
tenant_id=tenant_id,
app_mode=app_mode,
workflow_data=import_data.get("workflow"),
workflow_data=workflow_data,
account=account,
name=name,
description=description,
@ -104,10 +115,16 @@ class AppDslService:
use_icon_as_answer_icon=use_icon_as_answer_icon,
)
elif app_mode in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION}:
model_config = import_data.get("model_config")
if not model_config or not isinstance(model_config, dict):
raise MissingModelConfigError(
"Missing model_config in data argument when app mode is chat, agent-chat or completion"
)
app = cls._import_and_create_new_model_config_based_app(
tenant_id=tenant_id,
app_mode=app_mode,
model_config_data=import_data.get("model_config"),
model_config_data=model_config,
account=account,
name=name,
description=description,
@ -117,7 +134,7 @@ class AppDslService:
use_icon_as_answer_icon=use_icon_as_answer_icon,
)
else:
raise ValueError("Invalid app mode")
raise InvalidAppModeError("Invalid app mode")
return app
@ -132,26 +149,32 @@ class AppDslService:
try:
import_data = yaml.safe_load(data)
except yaml.YAMLError:
raise ValueError("Invalid YAML format in data argument.")
raise InvalidYAMLFormatError("Invalid YAML format in data argument.")
# check or repair dsl version
import_data = cls._check_or_fix_dsl(import_data)
import_data = _check_or_fix_dsl(import_data)
app_data = import_data.get("app")
if not app_data:
raise ValueError("Missing app in data argument")
raise MissingAppDataError("Missing app in data argument")
# import dsl and overwrite app
app_mode = AppMode.value_of(app_data.get("mode"))
if app_mode not in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}:
raise ValueError("Only support import workflow in advanced-chat or workflow app.")
raise InvalidAppModeError("Only support import workflow in advanced-chat or workflow app.")
if app_data.get("mode") != app_model.mode:
raise ValueError(f"App mode {app_data.get('mode')} is not matched with current app mode {app_mode.value}")
workflow_data = import_data.get("workflow")
if not workflow_data or not isinstance(workflow_data, dict):
raise MissingWorkflowDataError(
"Missing workflow in data argument when app mode is advanced-chat or workflow"
)
return cls._import_and_overwrite_workflow_based_app(
app_model=app_model,
workflow_data=import_data.get("workflow"),
workflow_data=workflow_data,
account=account,
)
@ -186,35 +209,12 @@ class AppDslService:
return yaml.dump(export_data, allow_unicode=True)
@classmethod
def _check_or_fix_dsl(cls, import_data: dict) -> dict:
"""
Check or fix dsl
:param import_data: import data
"""
if not import_data.get("version"):
import_data["version"] = "0.1.0"
if not import_data.get("kind") or import_data.get("kind") != "app":
import_data["kind"] = "app"
if import_data.get("version") != current_dsl_version:
# Currently only one DSL version, so no difference checks or compatibility fixes will be performed.
logger.warning(
f"DSL version {import_data.get('version')} is not compatible "
f"with current version {current_dsl_version}, related to "
f"Dify version {dsl_to_dify_version_mapping.get(current_dsl_version)}."
)
return import_data
@classmethod
def _import_and_create_new_workflow_based_app(
cls,
tenant_id: str,
app_mode: AppMode,
workflow_data: dict,
workflow_data: Mapping[str, Any],
account: Account,
name: str,
description: str,
@ -238,7 +238,9 @@ class AppDslService:
:param use_icon_as_answer_icon: use app icon as answer icon
"""
if not workflow_data:
raise ValueError("Missing workflow in data argument when app mode is advanced-chat or workflow")
raise MissingWorkflowDataError(
"Missing workflow in data argument when app mode is advanced-chat or workflow"
)
app = cls._create_app(
tenant_id=tenant_id,
@ -277,7 +279,7 @@ class AppDslService:
@classmethod
def _import_and_overwrite_workflow_based_app(
cls, app_model: App, workflow_data: dict, account: Account
cls, app_model: App, workflow_data: Mapping[str, Any], account: Account
) -> Workflow:
"""
Import app dsl and overwrite workflow based app
@ -287,7 +289,9 @@ class AppDslService:
:param account: Account instance
"""
if not workflow_data:
raise ValueError("Missing workflow in data argument when app mode is advanced-chat or workflow")
raise MissingWorkflowDataError(
"Missing workflow in data argument when app mode is advanced-chat or workflow"
)
# fetch draft workflow by app_model
workflow_service = WorkflowService()
@ -323,7 +327,7 @@ class AppDslService:
cls,
tenant_id: str,
app_mode: AppMode,
model_config_data: dict,
model_config_data: Mapping[str, Any],
account: Account,
name: str,
description: str,
@ -345,7 +349,9 @@ class AppDslService:
:param icon_background: app icon background
"""
if not model_config_data:
raise ValueError("Missing model_config in data argument when app mode is chat, agent-chat or completion")
raise MissingModelConfigError(
"Missing model_config in data argument when app mode is chat, agent-chat or completion"
)
app = cls._create_app(
tenant_id=tenant_id,
@ -448,3 +454,34 @@ class AppDslService:
raise ValueError("Missing app configuration, please check.")
export_data["model_config"] = app_model_config.to_dict()
def _check_or_fix_dsl(import_data: dict[str, Any]) -> Mapping[str, Any]:
"""
Check or fix dsl
:param import_data: import data
:raises DSLVersionNotSupportedError: if the imported DSL version is newer than the current version
"""
if not import_data.get("version"):
import_data["version"] = "0.1.0"
if not import_data.get("kind") or import_data.get("kind") != "app":
import_data["kind"] = "app"
imported_version = import_data.get("version")
if imported_version != current_dsl_version:
if imported_version and version.parse(imported_version) > version.parse(current_dsl_version):
raise DSLVersionNotSupportedError(
f"The imported DSL version {imported_version} is newer than "
f"the current supported version {current_dsl_version}. "
f"Please upgrade your Dify instance to import this configuration."
)
else:
logger.warning(
f"DSL version {imported_version} is older than "
f"the current version {current_dsl_version}. "
f"This may cause compatibility issues."
)
return import_data

View File

@ -736,11 +736,12 @@ class DocumentService:
dataset.retrieval_model = document_data.get("retrieval_model") or default_retrieval_model
documents = []
batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
if document_data.get("original_document_id"):
document = DocumentService.update_document_with_dataset_id(dataset, document_data, account)
documents.append(document)
batch = document.batch
else:
batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
# save process rule
if not dataset_process_rule:
process_rule = document_data["process_rule"]
@ -921,7 +922,7 @@ class DocumentService:
if duplicate_document_ids:
duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids)
return documents, batch
return documents, batch
@staticmethod
def check_documents_upload_quota(count: int, features: FeatureModel):

View File

@ -35,7 +35,7 @@ class FileService:
filename = file.filename
if not filename:
raise FileNotExistsError
extension = filename.split(".")[-1]
extension = filename.split(".")[-1].lower()
if len(filename) > 200:
filename = filename.split(".")[0][:200] + "." + extension

View File

@ -84,5 +84,10 @@ VOLC_EMBEDDING_ENDPOINT_ID=
# 360 AI Credentials
ZHINAO_API_KEY=
# VESSL AI Credentials
VESSL_AI_MODEL_NAME=
VESSL_AI_API_KEY=
VESSL_AI_ENDPOINT_URL=
# Gitee AI Credentials
GITEE_AI_API_KEY=
GITEE_AI_API_KEY=

View File

@ -0,0 +1,131 @@
import os
from collections.abc import Generator
import pytest
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
SystemPromptMessage,
UserPromptMessage,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.vessl_ai.llm.llm import VesslAILargeLanguageModel
def test_validate_credentials():
model = VesslAILargeLanguageModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model=os.environ.get("VESSL_AI_MODEL_NAME"),
credentials={
"api_key": "invalid_key",
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
"mode": "chat",
},
)
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model=os.environ.get("VESSL_AI_MODEL_NAME"),
credentials={
"api_key": os.environ.get("VESSL_AI_API_KEY"),
"endpoint_url": "http://invalid_url",
"mode": "chat",
},
)
model.validate_credentials(
model=os.environ.get("VESSL_AI_MODEL_NAME"),
credentials={
"api_key": os.environ.get("VESSL_AI_API_KEY"),
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
"mode": "chat",
},
)
def test_invoke_model():
model = VesslAILargeLanguageModel()
response = model.invoke(
model=os.environ.get("VESSL_AI_MODEL_NAME"),
credentials={
"api_key": os.environ.get("VESSL_AI_API_KEY"),
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
"mode": "chat",
},
prompt_messages=[
SystemPromptMessage(
content="You are a helpful AI assistant.",
),
UserPromptMessage(content="Who are you?"),
],
model_parameters={
"temperature": 1.0,
"top_k": 2,
"top_p": 0.5,
},
stop=["How"],
stream=False,
user="abc-123",
)
assert isinstance(response, LLMResult)
assert len(response.message.content) > 0
def test_invoke_stream_model():
model = VesslAILargeLanguageModel()
response = model.invoke(
model=os.environ.get("VESSL_AI_MODEL_NAME"),
credentials={
"api_key": os.environ.get("VESSL_AI_API_KEY"),
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
"mode": "chat",
},
prompt_messages=[
SystemPromptMessage(
content="You are a helpful AI assistant.",
),
UserPromptMessage(content="Who are you?"),
],
model_parameters={
"temperature": 1.0,
"top_k": 2,
"top_p": 0.5,
},
stop=["How"],
stream=True,
user="abc-123",
)
assert isinstance(response, Generator)
for chunk in response:
assert isinstance(chunk, LLMResultChunk)
assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage)
def test_get_num_tokens():
model = VesslAILargeLanguageModel()
num_tokens = model.get_num_tokens(
model=os.environ.get("VESSL_AI_MODEL_NAME"),
credentials={
"api_key": os.environ.get("VESSL_AI_API_KEY"),
"endpoint_url": os.environ.get("VESSL_AI_ENDPOINT_URL"),
},
prompt_messages=[
SystemPromptMessage(
content="You are a helpful AI assistant.",
),
UserPromptMessage(content="Hello World!"),
],
)
assert isinstance(num_tokens, int)
assert num_tokens == 21

View File

@ -0,0 +1,71 @@
from unittest.mock import MagicMock, patch
import pytest
from core.rag.datasource.vdb.oceanbase.oceanbase_vector import (
OceanBaseVector,
OceanBaseVectorConfig,
)
from tests.integration_tests.vdb.__mock.tcvectordb import setup_tcvectordb_mock
from tests.integration_tests.vdb.test_vector_store import (
AbstractVectorTest,
get_example_text,
setup_mock_redis,
)
@pytest.fixture
def oceanbase_vector():
return OceanBaseVector(
"dify_test_collection",
config=OceanBaseVectorConfig(
host="127.0.0.1",
port="2881",
user="root@test",
database="test",
password="test",
),
)
class OceanBaseVectorTest(AbstractVectorTest):
def __init__(self, vector: OceanBaseVector):
super().__init__()
self.vector = vector
def search_by_vector(self):
hits_by_vector = self.vector.search_by_vector(query_vector=self.example_embedding)
assert len(hits_by_vector) == 0
def search_by_full_text(self):
hits_by_full_text = self.vector.search_by_full_text(query=get_example_text())
assert len(hits_by_full_text) == 0
def text_exists(self):
exist = self.vector.text_exists(self.example_doc_id)
assert exist == True
def get_ids_by_metadata_field(self):
ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
assert len(ids) == 0
@pytest.fixture
def setup_mock_oceanbase_client():
with patch("core.rag.datasource.vdb.oceanbase.oceanbase_vector.ObVecClient", new_callable=MagicMock) as mock_client:
yield mock_client
@pytest.fixture
def setup_mock_oceanbase_vector(oceanbase_vector):
with patch.object(oceanbase_vector, "_client"):
yield oceanbase_vector
def test_oceanbase_vector(
setup_mock_redis,
setup_mock_oceanbase_client,
setup_mock_oceanbase_vector,
oceanbase_vector,
):
OceanBaseVectorTest(oceanbase_vector).run_all_tests()

View File

@ -430,3 +430,37 @@ def test_multi_colons_parse(setup_http_mock):
assert urlencode({"Redirect": "http://example2.com"}) in result.process_data.get("request", "")
assert 'form-data; name="Redirect"\r\n\r\nhttp://example6.com' in result.process_data.get("request", "")
# assert "http://example3.com" == resp.get("headers", {}).get("referer")
def test_image_file(monkeypatch):
from types import SimpleNamespace
monkeypatch.setattr(
"core.tools.tool_file_manager.ToolFileManager.create_file_by_raw",
lambda *args, **kwargs: SimpleNamespace(id="1"),
)
node = init_http_node(
config={
"id": "1",
"data": {
"title": "http",
"desc": "",
"method": "get",
"url": "https://cloud.dify.ai/logo/logo-site.png",
"authorization": {
"type": "no-auth",
"config": None,
},
"params": "",
"headers": "",
"body": None,
},
}
)
result = node._run()
assert result.process_data is not None
assert result.outputs is not None
resp = result.outputs
assert len(resp.get("files", [])) == 1

View File

@ -22,17 +22,3 @@ from controllers.console.version import _has_new_version
)
def test_has_new_version(latest_version, current_version, expected):
assert _has_new_version(latest_version=latest_version, current_version=current_version) == expected
def test_has_new_version_invalid_input():
with pytest.raises(ValueError):
_has_new_version(latest_version="1.0", current_version="1.0.0")
with pytest.raises(ValueError):
_has_new_version(latest_version="1.0.0", current_version="1.0")
with pytest.raises(ValueError):
_has_new_version(latest_version="invalid", current_version="1.0.0")
with pytest.raises(ValueError):
_has_new_version(latest_version="1.0.0", current_version="invalid")

View File

@ -0,0 +1,125 @@
import pytest
from core.app.entities.app_invoke_entities import InvokeFrom
from core.file import File, FileTransferMethod, FileType
from core.model_runtime.entities.message_entities import ImagePromptMessageContent
from core.variables import ArrayAnySegment, ArrayFileSegment, NoneSegment
from core.workflow.entities.variable_pool import VariablePool
from core.workflow.graph_engine import Graph, GraphInitParams, GraphRuntimeState
from core.workflow.nodes.answer import AnswerStreamGenerateRoute
from core.workflow.nodes.end import EndStreamParam
from core.workflow.nodes.llm.entities import ContextConfig, LLMNodeData, ModelConfig, VisionConfig, VisionConfigOptions
from core.workflow.nodes.llm.node import LLMNode
from models.enums import UserFrom
from models.workflow import WorkflowType
class TestLLMNode:
@pytest.fixture
def llm_node(self):
data = LLMNodeData(
title="Test LLM",
model=ModelConfig(provider="openai", name="gpt-3.5-turbo", mode="chat", completion_params={}),
prompt_template=[],
memory=None,
context=ContextConfig(enabled=False),
vision=VisionConfig(
enabled=True,
configs=VisionConfigOptions(
variable_selector=["sys", "files"],
detail=ImagePromptMessageContent.DETAIL.HIGH,
),
),
)
variable_pool = VariablePool(
system_variables={},
user_inputs={},
)
node = LLMNode(
id="1",
config={
"id": "1",
"data": data.model_dump(),
},
graph_init_params=GraphInitParams(
tenant_id="1",
app_id="1",
workflow_type=WorkflowType.WORKFLOW,
workflow_id="1",
graph_config={},
user_id="1",
user_from=UserFrom.ACCOUNT,
invoke_from=InvokeFrom.SERVICE_API,
call_depth=0,
),
graph=Graph(
root_node_id="1",
answer_stream_generate_routes=AnswerStreamGenerateRoute(
answer_dependencies={},
answer_generate_route={},
),
end_stream_param=EndStreamParam(
end_dependencies={},
end_stream_variable_selector_mapping={},
),
),
graph_runtime_state=GraphRuntimeState(
variable_pool=variable_pool,
start_at=0,
),
)
return node
def test_fetch_files_with_file_segment(self, llm_node):
file = File(
id="1",
tenant_id="test",
type=FileType.IMAGE,
filename="test.jpg",
transfer_method=FileTransferMethod.LOCAL_FILE,
related_id="1",
)
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], file)
result = llm_node._fetch_files(selector=["sys", "files"])
assert result == [file]
def test_fetch_files_with_array_file_segment(self, llm_node):
files = [
File(
id="1",
tenant_id="test",
type=FileType.IMAGE,
filename="test1.jpg",
transfer_method=FileTransferMethod.LOCAL_FILE,
related_id="1",
),
File(
id="2",
tenant_id="test",
type=FileType.IMAGE,
filename="test2.jpg",
transfer_method=FileTransferMethod.LOCAL_FILE,
related_id="2",
),
]
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], ArrayFileSegment(value=files))
result = llm_node._fetch_files(selector=["sys", "files"])
assert result == files
def test_fetch_files_with_none_segment(self, llm_node):
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], NoneSegment())
result = llm_node._fetch_files(selector=["sys", "files"])
assert result == []
def test_fetch_files_with_array_any_segment(self, llm_node):
llm_node.graph_runtime_state.variable_pool.add(["sys", "files"], ArrayAnySegment(value=[]))
result = llm_node._fetch_files(selector=["sys", "files"])
assert result == []
def test_fetch_files_with_non_existent_variable(self, llm_node):
result = llm_node._fetch_files(selector=["sys", "files"])
assert result == []

View File

@ -0,0 +1,100 @@
import os
import posixpath
from unittest.mock import MagicMock
import pytest
from _pytest.monkeypatch import MonkeyPatch
from oss2 import Bucket
from oss2.models import GetObjectResult, PutObjectResult
from tests.unit_tests.oss.__mock.base import (
get_example_bucket,
get_example_data,
get_example_filename,
get_example_filepath,
get_example_folder,
)
class MockResponse:
def __init__(self, status, headers, request_id):
self.status = status
self.headers = headers
self.request_id = request_id
class MockAliyunOssClass:
def __init__(
self,
auth,
endpoint,
bucket_name,
is_cname=False,
session=None,
connect_timeout=None,
app_name="",
enable_crc=True,
proxies=None,
region=None,
cloudbox_id=None,
is_path_style=False,
is_verify_object_strict=True,
):
self.bucket_name = get_example_bucket()
self.key = posixpath.join(get_example_folder(), get_example_filename())
self.content = get_example_data()
self.filepath = get_example_filepath()
self.resp = MockResponse(
200,
{
"etag": "ee8de918d05640145b18f70f4c3aa602",
"x-oss-version-id": "CAEQNhiBgMDJgZCA0BYiIDc4MGZjZGI2OTBjOTRmNTE5NmU5NmFhZjhjYmY0****",
},
"request_id",
)
def put_object(self, key, data, headers=None, progress_callback=None):
assert key == self.key
assert data == self.content
return PutObjectResult(self.resp)
def get_object(self, key, byte_range=None, headers=None, progress_callback=None, process=None, params=None):
assert key == self.key
get_object_output = MagicMock(GetObjectResult)
get_object_output.read.return_value = self.content
return get_object_output
def get_object_to_file(
self, key, filename, byte_range=None, headers=None, progress_callback=None, process=None, params=None
):
assert key == self.key
assert filename == self.filepath
def object_exists(self, key, headers=None):
assert key == self.key
return True
def delete_object(self, key, params=None, headers=None):
assert key == self.key
self.resp.headers["x-oss-delete-marker"] = True
return self.resp
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
@pytest.fixture
def setup_aliyun_oss_mock(monkeypatch: MonkeyPatch):
if MOCK:
monkeypatch.setattr(Bucket, "__init__", MockAliyunOssClass.__init__)
monkeypatch.setattr(Bucket, "put_object", MockAliyunOssClass.put_object)
monkeypatch.setattr(Bucket, "get_object", MockAliyunOssClass.get_object)
monkeypatch.setattr(Bucket, "get_object_to_file", MockAliyunOssClass.get_object_to_file)
monkeypatch.setattr(Bucket, "object_exists", MockAliyunOssClass.object_exists)
monkeypatch.setattr(Bucket, "delete_object", MockAliyunOssClass.delete_object)
yield
if MOCK:
monkeypatch.undo()

View File

@ -0,0 +1,22 @@
from unittest.mock import MagicMock, patch
import pytest
from oss2 import Auth
from extensions.storage.aliyun_oss_storage import AliyunOssStorage
from tests.unit_tests.oss.__mock.aliyun_oss import setup_aliyun_oss_mock
from tests.unit_tests.oss.__mock.base import (
BaseStorageTest,
get_example_bucket,
get_example_folder,
)
class TestAliyunOss(BaseStorageTest):
@pytest.fixture(autouse=True)
def setup_method(self, setup_aliyun_oss_mock):
"""Executed before each test method."""
with patch.object(Auth, "__init__", return_value=None):
self.storage = AliyunOssStorage()
self.storage.bucket_name = get_example_bucket()
self.storage.folder = get_example_folder()

View File

@ -0,0 +1,41 @@
import pytest
from packaging import version
from services.app_dsl_service import AppDslService
from services.app_dsl_service.exc import DSLVersionNotSupportedError
from services.app_dsl_service.service import _check_or_fix_dsl, current_dsl_version
class TestAppDSLService:
def test_check_or_fix_dsl_missing_version(self):
import_data = {}
result = _check_or_fix_dsl(import_data)
assert result["version"] == "0.1.0"
assert result["kind"] == "app"
def test_check_or_fix_dsl_missing_kind(self):
import_data = {"version": "0.1.0"}
result = _check_or_fix_dsl(import_data)
assert result["kind"] == "app"
def test_check_or_fix_dsl_older_version(self):
import_data = {"version": "0.0.9", "kind": "app"}
result = _check_or_fix_dsl(import_data)
assert result["version"] == "0.0.9"
def test_check_or_fix_dsl_current_version(self):
import_data = {"version": current_dsl_version, "kind": "app"}
result = _check_or_fix_dsl(import_data)
assert result["version"] == current_dsl_version
def test_check_or_fix_dsl_newer_version(self):
current_version = version.parse(current_dsl_version)
newer_version = f"{current_version.major}.{current_version.minor + 1}.0"
import_data = {"version": newer_version, "kind": "app"}
with pytest.raises(DSLVersionNotSupportedError):
_check_or_fix_dsl(import_data)
def test_check_or_fix_dsl_invalid_kind(self):
import_data = {"version": current_dsl_version, "kind": "invalid"}
result = _check_or_fix_dsl(import_data)
assert result["kind"] == "app"

View File

@ -13,3 +13,4 @@ pytest api/tests/integration_tests/vdb/chroma \
api/tests/integration_tests/vdb/tcvectordb \
api/tests/integration_tests/vdb/upstash \
api/tests/integration_tests/vdb/couchbase \
api/tests/integration_tests/vdb/oceanbase \

View File

@ -455,6 +455,20 @@ TIDB_VECTOR_USER=xxx.root
TIDB_VECTOR_PASSWORD=xxxxxx
TIDB_VECTOR_DATABASE=dify
# Tidb on qdrant configuration, only available when VECTOR_STORE is `tidb_on_qdrant`
TIDB_ON_QDRANT_URL=http://127.0.0.1
TIDB_ON_QDRANT_API_KEY=dify
TIDB_ON_QDRANT_CLIENT_TIMEOUT=20
TIDB_ON_QDRANT_GRPC_ENABLED=false
TIDB_ON_QDRANT_GRPC_PORT=6334
TIDB_PUBLIC_KEY=dify
TIDB_PRIVATE_KEY=dify
TIDB_API_URL=http://127.0.0.1
TIDB_IAM_API_URL=http://127.0.0.1
TIDB_REGION=regions/aws-us-east-1
TIDB_PROJECT_ID=dify
TIDB_SPEND_LIMIT=100
# Chroma configuration, only available when VECTOR_STORE is `chroma`
CHROMA_HOST=127.0.0.1
CHROMA_PORT=8000
@ -517,6 +531,14 @@ VIKINGDB_SCHEMA=http
VIKINGDB_CONNECTION_TIMEOUT=30
VIKINGDB_SOCKET_TIMEOUT=30
# OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase`
OCEANBASE_VECTOR_HOST=oceanbase-vector
OCEANBASE_VECTOR_PORT=2881
OCEANBASE_VECTOR_USER=root@test
OCEANBASE_VECTOR_PASSWORD=
OCEANBASE_VECTOR_DATABASE=test
OCEANBASE_MEMORY_LIMIT=6G
# ------------------------------
# Knowledge Configuration
# ------------------------------
@ -536,6 +558,22 @@ ETL_TYPE=dify
# For example: http://unstructured:8000/general/v0/general
UNSTRUCTURED_API_URL=
# ------------------------------
# Model Configuration
# ------------------------------
# The maximum number of tokens allowed for prompt generation.
# This setting controls the upper limit of tokens that can be used by the LLM
# when generating a prompt in the prompt generation tool.
# Default: 512 tokens.
PROMPT_GENERATION_MAX_TOKENS=512
# The maximum number of tokens allowed for code generation.
# This setting controls the upper limit of tokens that can be used by the LLM
# when generating code in the code generation tool.
# Default: 1024 tokens.
CODE_GENERATION_MAX_TOKENS=1024
# ------------------------------
# Multi-modal Configuration
# ------------------------------
@ -550,6 +588,12 @@ MULTIMODAL_SEND_IMAGE_FORMAT=base64
# Upload image file size limit, default 10M.
UPLOAD_IMAGE_FILE_SIZE_LIMIT=10
# Upload video file size limit, default 100M.
UPLOAD_VIDEO_FILE_SIZE_LIMIT=100
# Upload audio file size limit, default 50M.
UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
# ------------------------------
# Sentry Configuration
# Used for application monitoring and error log tracking.

View File

@ -56,6 +56,7 @@ services:
SANDBOX_PORT: ${SANDBOX_PORT:-8194}
volumes:
- ./volumes/sandbox/dependencies:/dependencies
- ./volumes/sandbox/conf:/conf
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8194/health" ]
networks:

View File

@ -140,6 +140,18 @@ x-shared-env: &shared-api-worker-env
TIDB_VECTOR_USER: ${TIDB_VECTOR_USER:-}
TIDB_VECTOR_PASSWORD: ${TIDB_VECTOR_PASSWORD:-}
TIDB_VECTOR_DATABASE: ${TIDB_VECTOR_DATABASE:-dify}
TIDB_ON_QDRANT_URL: ${TIDB_ON_QDRANT_URL:-http://127.0.0.1}
TIDB_ON_QDRANT_API_KEY: ${TIDB_ON_QDRANT_API_KEY:-dify}
TIDB_ON_QDRANT_CLIENT_TIMEOUT: ${TIDB_ON_QDRANT_CLIENT_TIMEOUT:-20}
TIDB_ON_QDRANT_GRPC_ENABLED: ${TIDB_ON_QDRANT_GRPC_ENABLED:-false}
TIDB_ON_QDRANT_GRPC_PORT: ${TIDB_ON_QDRANT_GRPC_PORT:-6334}
TIDB_PUBLIC_KEY: ${TIDB_PUBLIC_KEY:-dify}
TIDB_PRIVATE_KEY: ${TIDB_PRIVATE_KEY:-dify}
TIDB_API_URL: ${TIDB_API_URL:-http://127.0.0.1}
TIDB_IAM_API_URL: ${TIDB_IAM_API_URL:-http://127.0.0.1}
TIDB_REGION: ${TIDB_REGION:-regions/aws-us-east-1}
TIDB_PROJECT_ID: ${TIDB_PROJECT_ID:-dify}
TIDB_SPEND_LIMIT: ${TIDB_SPEND_LIMIT:-100}
ORACLE_HOST: ${ORACLE_HOST:-oracle}
ORACLE_PORT: ${ORACLE_PORT:-1521}
ORACLE_USER: ${ORACLE_USER:-dify}
@ -195,8 +207,12 @@ x-shared-env: &shared-api-worker-env
UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
ETL_TYPE: ${ETL_TYPE:-dify}
UNSTRUCTURED_API_URL: ${UNSTRUCTURED_API_URL:-}
PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
MULTIMODAL_SEND_IMAGE_FORMAT: ${MULTIMODAL_SEND_IMAGE_FORMAT:-base64}
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}
UPLOAD_AUDIO_FILE_SIZE_LIMIT: ${UPLOAD_AUDIO_FILE_SIZE_LIMIT:-50}
SENTRY_DSN: ${API_SENTRY_DSN:-}
SENTRY_TRACES_SAMPLE_RATE: ${API_SENTRY_TRACES_SAMPLE_RATE:-1.0}
SENTRY_PROFILES_SAMPLE_RATE: ${API_SENTRY_PROFILES_SAMPLE_RATE:-1.0}
@ -243,6 +259,12 @@ x-shared-env: &shared-api-worker-env
POSITION_PROVIDER_INCLUDES: ${POSITION_PROVIDER_INCLUDES:-}
POSITION_PROVIDER_EXCLUDES: ${POSITION_PROVIDER_EXCLUDES:-}
MAX_VARIABLE_SIZE: ${MAX_VARIABLE_SIZE:-204800}
OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-http://oceanbase-vector}
OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881}
OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test}
OCEANBASE_VECTOR_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-""}
OCEANBASE_VECTOR_DATABASE: ${OCEANBASE_VECTOR_DATABASE:-test}
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
services:
# API service
@ -570,6 +592,18 @@ services:
CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider}
IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE}
# OceanBase vector database
oceanbase-vector:
image: quay.io/oceanbase/oceanbase-ce:4.3.3.0-100000142024101215
profiles:
- oceanbase-vector
restart: always
volumes:
- ./volumes/oceanbase/data:/root/ob
- ./volumes/oceanbase/conf:/root/.obd/cluster
environment:
OB_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
# Oracle vector database
oracle:
image: container-registry.oracle.com/database/free:latest

View File

@ -0,0 +1,14 @@
app:
port: 8194
debug: True
key: dify-sandbox
max_workers: 4
max_requests: 50
worker_timeout: 5
python_path: /usr/local/bin/python3
enable_network: True # please make sure there is no network risk in your environment
allowed_syscalls: # please leave it empty if you have no idea how seccomp works
proxy:
socks5: ''
http: ''
https: ''

View File

@ -0,0 +1,35 @@
app:
port: 8194
debug: True
key: dify-sandbox
max_workers: 4
max_requests: 50
worker_timeout: 5
python_path: /usr/local/bin/python3
python_lib_path:
- /usr/local/lib/python3.10
- /usr/lib/python3.10
- /usr/lib/python3
- /usr/lib/x86_64-linux-gnu
- /etc/ssl/certs/ca-certificates.crt
- /etc/nsswitch.conf
- /etc/hosts
- /etc/resolv.conf
- /run/systemd/resolve/stub-resolv.conf
- /run/resolvconf/resolv.conf
- /etc/localtime
- /usr/share/zoneinfo
- /etc/timezone
# add more paths if needed
python_pip_mirror_url: https://pypi.tuna.tsinghua.edu.cn/simple
nodejs_path: /usr/local/bin/node
enable_network: True
allowed_syscalls:
- 1
- 2
- 3
# add all the syscalls which you require
proxy:
socks5: ''
http: ''
https: ''

View File

@ -9,8 +9,8 @@ const DatasetFooter = () => {
<footer className='px-12 py-6 grow-0 shrink-0'>
<h3 className='text-xl font-semibold leading-tight text-gradient'>{t('dataset.didYouKnow')}</h3>
<p className='mt-1 text-sm font-normal leading-tight text-gray-700'>
{t('dataset.intro1')}<a className='inline-flex items-center gap-1 link' target='_blank' rel='noopener noreferrer' href='/'>{t('dataset.intro2')}</a>{t('dataset.intro3')}<br />
{t('dataset.intro4')}<a className='inline-flex items-center gap-1 link' target='_blank' rel='noopener noreferrer' href='/'>{t('dataset.intro5')}</a>{t('dataset.intro6')}
{t('dataset.intro1')}<span className='inline-flex items-center gap-1 text-blue-600'>{t('dataset.intro2')}</span>{t('dataset.intro3')}<br />
{t('dataset.intro4')}<span className='inline-flex items-center gap-1 text-blue-600'>{t('dataset.intro5')}</span>{t('dataset.intro6')}
</p>
</footer>
)

View File

@ -1,6 +1,6 @@
'use client'
import type { FC } from 'react'
import { type FC, useEffect } from 'react'
import { useContext } from 'use-context-selector'
import TemplateEn from './template/template.en.mdx'
import TemplateZh from './template/template.zh.mdx'
@ -14,6 +14,13 @@ const Doc: FC<DocProps> = ({
apiBaseUrl,
}) => {
const { locale } = useContext(I18n)
useEffect(() => {
const hash = location.hash
if (hash)
document.querySelector(hash)?.scrollIntoView()
}, [])
return (
<article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'>
{

View File

@ -20,17 +20,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</CodeGroup>
</div>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/document/create_by_text'
url='/datasets/{dataset_id}/document/create-by-text'
method='POST'
title='Create a document from text'
name='#create_by_text'
title='Create a Document from Text'
name='#create-by-text'
/>
<Row>
<Col>
This api is based on an existing Knowledge and creates a new document through text based on this Knowledge.
This API is based on an existing knowledge and creates a new document through text based on this knowledge.
### Params
<Properties>
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Property name='indexing_technique' type='string' key='indexing_technique'>
Index mode
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index
- <code>economy</code> Economy: Build using inverted index of keyword table index
</Property>
<Property name='process_rule' type='object' key='process_rule'>
Processing rules
@ -62,7 +62,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
label="/datasets/{dataset_id}/document/create-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -123,17 +123,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/document/create_by_file'
url='/datasets/{dataset_id}/document/create-by-file'
method='POST'
title='Create documents from files'
name='#create_by_file'
title='Create a Document from a File'
name='#create-by-file'
/>
<Row>
<Col>
This api is based on an existing Knowledge and creates a new document through a file based on this Knowledge.
This API is based on an existing knowledge and creates a new document through a file based on this knowledge.
### Params
<Properties>
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='data' type='multipart/form-data json string' key='data'>
- original_document_id Source document ID (optional)
- <code>original_document_id</code> Source document ID (optional)
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
- The source document cannot be an archived document
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
- indexing_technique Index mode
- <code>indexing_technique</code> Index mode
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index
- <code>economy</code> Economy: Build using inverted index of keyword table index
- process_rule Processing rules
- <code>process_rule</code> Processing rules
- <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
@ -164,7 +164,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/document/create-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -221,12 +221,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets'
method='POST'
title='Create an empty Knowledge'
title='Create an Empty Knowledge Base'
name='#create_empty_dataset'
/>
<Row>
@ -240,9 +240,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
Knowledge description (optional)
</Property>
<Property name='indexing_technique' type='string' key='indexing_technique'>
Index Technique (optional)
- <code>high_quality</code> high_quality
- <code>economy</code> economy
Index technique (optional)
- <code>high_quality</code> High quality
- <code>economy</code> Economy
</Property>
<Property name='permission' type='string' key='permission'>
Permission
@ -252,21 +252,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Property>
<Property name='provider' type='string' key='provider'>
Provider (optional, default: vendor)
- <code>vendor</code> vendor
- <code>external</code> external knowledge
- <code>vendor</code> Vendor
- <code>external</code> External knowledge
</Property>
<Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
External Knowledge api id (optional)
External knowledge API ID (optional)
</Property>
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
External Knowledge id (optional)
External knowledge ID (optional)
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="POST"
<CodeGroup
title="Request"
tag="POST"
label="/datasets"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
>
@ -306,12 +306,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets'
method='GET'
title='Knowledge list'
title='Get Knowledge Base List'
name='#dataset_list'
/>
<Row>
@ -327,9 +327,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="POST"
<CodeGroup
title="Request"
tag="POST"
label="/datasets"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
>
@ -369,12 +369,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}'
method='DELETE'
title='Delete knowledge'
title='Delete a Knowledge Base'
name='#delete_dataset'
/>
<Row>
@ -406,17 +406,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
method='POST'
title='Update document via text'
name='#update_by_text'
title='Update a Document with Text'
name='#update-by-text'
/>
<Row>
<Col>
This api is based on an existing Knowledge and updates the document through text based on this Knowledge.
This API is based on an existing knowledge and updates the document through text based on this knowledge.
### Params
<Properties>
@ -446,7 +446,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -503,17 +503,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
method='POST'
title='Update a document from a file'
name='#update_by_file'
title='Update a Document with a File'
name='#update-by-file'
/>
<Row>
<Col>
This api is based on an existing Knowledge, and updates documents through files based on this Knowledge
This API is based on an existing knowledge, and updates documents through files based on this knowledge
### Params
<Properties>
@ -543,7 +543,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -597,12 +597,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
method='GET'
title='Get document embedding status (progress)'
title='Get Document Embedding Status (Progress)'
name='#indexing_status'
/>
<Row>
@ -652,12 +652,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}'
method='DELETE'
title='Delete document'
title='Delete a Document'
name='#delete_document'
/>
<Row>
@ -694,12 +694,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents'
method='GET'
title='Knowledge document list'
title='Get the Document List of a Knowledge Base'
name='#dataset_document_list'
/>
<Row>
@ -714,13 +714,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Query
<Properties>
<Property name='keyword' type='string' key='keyword'>
Search keywords, currently only search document names(optional)
Search keywords, currently only search document names (optional)
</Property>
<Property name='page' type='string' key='page'>
Page number(optional)
Page number (optional)
</Property>
<Property name='limit' type='string' key='limit'>
Number of items returned, default 20, range 1-100(optional)
Number of items returned, default 20, range 1-100 (optional)
</Property>
</Properties>
</Col>
@ -769,12 +769,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='POST'
title='Add segment'
title='Add Chunks to a Document'
name='#create_new_segment'
/>
<Row>
@ -792,9 +792,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) Text content/question content, required
- <code>answer</code> (text) Answer content, if the mode of the Knowledge is qa mode, pass the value(optional)
- <code>keywords</code> (list) Keywords(optional)
- <code>content</code> (text) Text content / question content, required
- <code>answer</code> (text) Answer content, if the mode of the knowledge is Q&A mode, pass the value (optional)
- <code>keywords</code> (list) Keywords (optional)
</Property>
</Properties>
</Col>
@ -855,12 +855,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='GET'
title='get documents segments'
title='Get Chunks from a Document'
name='#get_segment'
/>
<Row>
@ -878,10 +878,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Query
<Properties>
<Property name='keyword' type='string' key='keyword'>
keywordchoosable
Keyword (optional)
</Property>
<Property name='status' type='string' key='status'>
Search statuscompleted
Search status, completed
</Property>
</Properties>
</Col>
@ -933,12 +933,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='DELETE'
title='delete document segment'
title='Delete a Chunk in a Document'
name='#delete_segment'
/>
<Row>
@ -979,12 +979,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='POST'
title='update document segment'
title='Update a Chunk in a Document '
name='#update_segment'
/>
<Row>
@ -1005,10 +1005,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='segment' type='object' key='segment'>
- <code>content</code> (text) text content/question contentrequired
- <code>answer</code> (text) Answer content, not required, passed if the Knowledge is in qa mode
- <code>keywords</code> (list) keyword, not required
- <code>enabled</code> (bool) false/true, not required
- <code>content</code> (text) Text content / question content, required
- <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
- <code>keywords</code> (list) Keyword (optional)
- <code>enabled</code> (bool) False / true (optional)
</Property>
</Properties>
</Col>
@ -1067,41 +1067,41 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/hit_testing'
url='/datasets/{dataset_id}/retrieve'
method='POST'
title='Dataset hit testing'
name='#dataset_hit_testing'
title='Retrieve Chunks from a Knowledge Base'
name='#dataset_retrieval'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Dataset ID
Knowledge ID
</Property>
</Properties>
### Request Body
<Properties>
<Property name='query' type='string' key='query'>
retrieval keywordc
Query keyword
</Property>
<Property name='retrieval_model' type='object' key='retrieval_model'>
retrieval keyword(Optional, if not filled, it will be recalled according to the default method)
Retrieval model (optional, if not filled, it will be recalled according to the default method)
- <code>search_method</code> (text) Search method: One of the following four keywords is required
- <code>keyword_search</code> Keyword search
- <code>semantic_search</code> Semantic search
- <code>full_text_search</code> Full-text search
- <code>hybrid_search</code> Hybrid search
- <code>reranking_enable</code> (bool) Whether to enable reranking, optional, required if the search mode is semantic_search or hybrid_search
- <code>reranking_mode</code> (object) Rerank model configuration, optional, required if reranking is enabled
- <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
- <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
- <code>reranking_provider_name</code> (string) Rerank model provider
- <code>reranking_model_name</code> (string) Rerank model name
- <code>weights</code> (double) Semantic search weight setting in hybrid search mode
- <code>top_k</code> (integer) Number of results to return, optional
- <code>top_k</code> (integer) Number of results to return (optional)
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
- <code>score_threshold</code> (double) Score threshold
</Property>
@ -1114,26 +1114,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/hit_testing"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
}
}'`}
label="/datasets/{dataset_id}/retrieve"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
}
}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -1212,7 +1212,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Row>
<Col>

View File

@ -20,13 +20,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</CodeGroup>
</div>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/document/create_by_text'
url='/datasets/{dataset_id}/document/create-by-text'
method='POST'
title='通过文本创建文档'
name='#create_by_text'
name='#create-by-text'
/>
<Row>
<Col>
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Property name='indexing_technique' type='string' key='indexing_technique'>
索引方式
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
- <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
</Property>
<Property name='process_rule' type='object' key='process_rule'>
处理规则
@ -64,7 +64,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
</Properties>
</Col>
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
label="/datasets/{dataset_id}/document/create-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -123,13 +123,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/document/create_by_file'
url='/datasets/{dataset_id}/document/create-by-file'
method='POST'
title='通过文件创建文档 '
name='#create_by_file'
name='#create-by-file'
/>
<Row>
<Col>
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='data' type='multipart/form-data json string' key='data'>
- original_document_id 源文档 ID (选填)
- <code>original_document_id</code> 源文档 ID选填
- 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
- 源文档不可为归档的文档
- 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
- 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
- indexing_technique 索引方式
- <code>indexing_technique</code> 索引方式
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
- <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
- process_rule 处理规则
- <code>process_rule</code> 处理规则
- <code>mode</code> (string) 清洗、分段模式 automatic 自动 / custom 自定义
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则
@ -166,7 +166,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
<Property name='file' type='multipart/form-data' key='file'>
需要上传的文件。
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/document/create-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -221,7 +221,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets'
@ -245,13 +245,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>economy</code> 经济
</Property>
<Property name='permission' type='string' key='permission'>
权限选填默认only_me
权限(选填,默认 only_me
- <code>only_me</code> 仅自己
- <code>all_team_members</code> 所有团队成员
- <code>partial_members</code> 部分团队成员
</Property>
<Property name='provider' type='string' key='provider'>
provider(选填,默认 vendor
Provider(选填,默认 vendor
- <code>vendor</code> 上传文件
- <code>external</code> 外部知识库
</Property>
@ -264,9 +264,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="POST"
<CodeGroup
title="Request"
tag="POST"
label="/datasets"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name", "permission": "only_me"}'`}
>
@ -306,7 +306,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets'
@ -369,7 +369,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}'
@ -406,13 +406,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
method='POST'
title='通过文本更新文档 '
name='#update_by_text'
name='#update-by-text'
/>
<Row>
<Col>
@ -431,7 +431,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='name' type='string' key='name'>
文档名称 (选填)
文档名称(选填)
</Property>
<Property name='text' type='string' key='text'>
文档内容(选填)
@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
</Properties>
</Col>
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -503,13 +503,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
method='POST'
title='通过文件更新文档 '
name='#update_by_file'
name='#update-by-file'
/>
<Row>
<Col>
@ -528,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='name' type='string' key='name'>
文档名称 (选填)
文档名称(选填)
</Property>
<Property name='file' type='multipart/form-data' key='file'>
需要上传的文件
@ -545,7 +545,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
</Properties>
</Col>
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -597,7 +597,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
@ -652,7 +652,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}'
@ -694,7 +694,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents'
@ -769,7 +769,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
@ -793,7 +793,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Properties>
<Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) 文本内容/问题内容,必填
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
- <code>keywords</code> (list) 关键字,非必填
</Property>
</Properties>
@ -855,7 +855,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
@ -933,7 +933,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
@ -979,7 +979,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
@ -1006,7 +1006,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Properties>
<Property name='segment' type='object' key='segment'>
- <code>content</code> (text) 文本内容/问题内容,必填
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
- <code>keywords</code> (list) 关键字,非必填
- <code>enabled</code> (bool) false/true非必填
</Property>
@ -1068,13 +1068,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col>
</Row>
---
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/hit_testing'
url='/datasets/{dataset_id}/retrieve'
method='POST'
title='知识库召回测试'
name='#dataset_hit_testing'
title='检索知识库'
name='#dataset_retrieval'
/>
<Row>
<Col>
@ -1088,23 +1088,23 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='query' type='string' key='query'>
召回关键词
检索关键词
</Property>
<Property name='retrieval_model' type='object' key='retrieval_model'>
召回参数(选填,如不填,按照默认方式召回)
检索参数(选填,如不填,按照默认方式召回)
- <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
- <code>keyword_search</code> 关键字检索
- <code>semantic_search</code> 语义检索
- <code>full_text_search</code> 全文检索
- <code>hybrid_search</code> 混合检索
- <code>reranking_enable</code> (bool) 是否启用 Reranking非必填如果检索模式为semantic_search模式或者hybrid_search则传值
- <code>reranking_enable</code> (bool) 是否启用 Reranking非必填如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
- <code>reranking_mode</code> (object) Rerank模型配置非必填如果启用了 reranking 则传值
- <code>reranking_provider_name</code> (string) Rerank 模型提供商
- <code>reranking_model_name</code> (string) Rerank 模型名称
- <code>weights</code> (double) 混合检索模式下语意检索的权重设置
- <code>top_k</code> (integer) 返回结果数量,非必填
- <code>score_threshold_enabled</code> (bool) 是否开启Score阈值
- <code>score_threshold</code> (double) Score阈值
- <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
- <code>score_threshold</code> (double) Score 阈值
</Property>
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
未启用字段
@ -1115,26 +1115,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/hit_testing"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
}
}'`}
label="/datasets/{dataset_id}/retrieve"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
}
}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -1214,7 +1214,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Row>
---
<hr className='ml-0 mr-0' />
<Row>
<Col>

View File

@ -15,6 +15,7 @@ import { AppType } from '@/types/app'
import type { DataSet } from '@/models/datasets'
import {
getMultipleRetrievalConfig,
getSelectedDatasetsMode,
} from '@/app/components/workflow/nodes/knowledge-retrieval/utils'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
@ -38,6 +39,7 @@ const DatasetConfig: FC = () => {
isAgent,
datasetConfigs,
setDatasetConfigs,
setRerankSettingModalOpen,
} = useContext(ConfigContext)
const formattingChangedDispatcher = useFormattingChangedDispatcher()
@ -55,6 +57,20 @@ const DatasetConfig: FC = () => {
...(datasetConfigs as any),
...retrievalConfig,
})
const {
allExternal,
allInternal,
mixtureInternalAndExternal,
mixtureHighQualityAndEconomic,
inconsistentEmbeddingModel,
} = getSelectedDatasetsMode(filteredDataSets)
if (
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|| mixtureInternalAndExternal
|| allExternal
)
setRerankSettingModalOpen(true)
formattingChangedDispatcher()
}

View File

@ -266,7 +266,7 @@ const ConfigContent: FC<Props> = ({
<div className='mt-2'>
<div className='flex items-center'>
{
selectedDatasetsMode.allEconomic && (
selectedDatasetsMode.allEconomic && !selectedDatasetsMode.mixtureInternalAndExternal && (
<div
className='flex items-center'
onClick={handleDisabledSwitchClick}

View File

@ -12,6 +12,7 @@ import { RETRIEVE_TYPE } from '@/types/app'
import Toast from '@/app/components/base/toast'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { RerankingModeEnum } from '@/models/datasets'
import type { DataSet } from '@/models/datasets'
import type { DatasetConfigs } from '@/models/debug'
import {
@ -47,7 +48,10 @@ const ParamsConfig = ({
const isValid = () => {
let errMsg = ''
if (tempDataSetConfigs.retrieval_model === RETRIEVE_TYPE.multiWay) {
if (!tempDataSetConfigs.reranking_model?.reranking_model_name && (rerankDefaultModel && !isRerankDefaultModelValid))
if (tempDataSetConfigs.reranking_enable
&& tempDataSetConfigs.reranking_mode === RerankingModeEnum.RerankingModel
&& !isRerankDefaultModelValid
)
errMsg = t('appDebug.datasetConfig.rerankModelRequired')
}
if (errMsg) {
@ -62,7 +66,9 @@ const ParamsConfig = ({
if (!isValid())
return
const config = { ...tempDataSetConfigs }
if (config.retrieval_model === RETRIEVE_TYPE.multiWay && !config.reranking_model) {
if (config.retrieval_model === RETRIEVE_TYPE.multiWay
&& config.reranking_mode === RerankingModeEnum.RerankingModel
&& !config.reranking_model) {
config.reranking_model = {
reranking_provider_name: rerankDefaultModel?.provider?.provider,
reranking_model_name: rerankDefaultModel?.model,

View File

@ -253,12 +253,18 @@ const Configuration: FC = () => {
}
hideSelectDataSet()
const {
allEconomic,
allExternal,
allInternal,
mixtureInternalAndExternal,
mixtureHighQualityAndEconomic,
inconsistentEmbeddingModel,
} = getSelectedDatasetsMode(newDatasets)
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel)
if (
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|| mixtureInternalAndExternal
|| allExternal
)
setRerankSettingModalOpen(true)
const { datasets, retrieval_model, score_threshold_enabled, ...restConfigs } = datasetConfigs

View File

@ -36,6 +36,7 @@ import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import TextGeneration from '@/app/components/app/text-generate/item'
import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils'
import MessageLogModal from '@/app/components/base/message-log-modal'
import PromptLogModal from '@/app/components/base/prompt-log-modal'
import { useStore as useAppStore } from '@/app/components/app/store'
import { useAppContext } from '@/context/app-context'
import useTimestamp from '@/hooks/use-timestamp'
@ -168,11 +169,13 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
const { userProfile: { timezone } } = useAppContext()
const { formatTime } = useTimestamp()
const { onClose, appDetail } = useContext(DrawerContext)
const { currentLogItem, setCurrentLogItem, showMessageLogModal, setShowMessageLogModal, currentLogModalActiveTab } = useAppStore(useShallow(state => ({
const { currentLogItem, setCurrentLogItem, showMessageLogModal, setShowMessageLogModal, showPromptLogModal, setShowPromptLogModal, currentLogModalActiveTab } = useAppStore(useShallow(state => ({
currentLogItem: state.currentLogItem,
setCurrentLogItem: state.setCurrentLogItem,
showMessageLogModal: state.showMessageLogModal,
setShowMessageLogModal: state.setShowMessageLogModal,
showPromptLogModal: state.showPromptLogModal,
setShowPromptLogModal: state.setShowPromptLogModal,
currentLogModalActiveTab: state.currentLogModalActiveTab,
})))
const { t } = useTranslation()
@ -192,8 +195,8 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
conversation_id: detail.id,
limit: 10,
}
if (allChatItems.at(-1)?.id)
params.first_id = allChatItems.at(-1)?.id.replace('question-', '')
if (allChatItems[0]?.id)
params.first_id = allChatItems[0]?.id.replace('question-', '')
const messageRes = await fetchChatMessages({
url: `/apps/${appDetail?.id}/chat-messages`,
params,
@ -557,6 +560,16 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) {
defaultTab={currentLogModalActiveTab}
/>
)}
{showPromptLogModal && (
<PromptLogModal
width={width}
currentLogItem={currentLogItem}
onCancel={() => {
setCurrentLogItem()
setShowPromptLogModal(false)
}}
/>
)}
</div>
)
}

View File

@ -1804,6 +1804,280 @@ exports[`build chat item tree and get thread messages should get thread messages
]
`;
exports[`build chat item tree and get thread messages should work with partial messages 1`] = `
[
{
"children": [
{
"agent_thoughts": [
{
"chain_id": null,
"created_at": 1726105809,
"files": [],
"id": "1019cd79-d141-4f9f-880a-fc1441cfd802",
"message_id": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
"observation": "",
"position": 1,
"thought": "Sure! My number is 54. Your turn!",
"tool": "",
"tool_input": "",
"tool_labels": {},
},
],
"children": [
{
"children": [
{
"agent_thoughts": [
{
"chain_id": null,
"created_at": 1726105822,
"files": [],
"id": "0773bec7-b992-4a53-92b2-20ebaeae8798",
"message_id": "324bce32-c98c-435d-a66b-bac974ebb5ed",
"observation": "",
"position": 1,
"thought": "My number is 4729. Your turn!",
"tool": "",
"tool_input": "",
"tool_labels": {},
},
],
"children": [],
"content": "My number is 4729. Your turn!",
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
"feedbackDisabled": false,
"id": "324bce32-c98c-435d-a66b-bac974ebb5ed",
"input": {
"inputs": {},
"query": "3306",
},
"isAnswer": true,
"log": [
{
"files": [],
"role": "user",
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
},
{
"files": [],
"role": "assistant",
"text": "Sure! My number is 54. Your turn!",
},
{
"files": [],
"role": "user",
"text": "3306",
},
{
"files": [],
"role": "assistant",
"text": "My number is 4729. Your turn!",
},
],
"message_files": [],
"more": {
"latency": "1.30",
"time": "09/11/2024 09:50 PM",
"tokens": 66,
},
"parentMessageId": "question-324bce32-c98c-435d-a66b-bac974ebb5ed",
"siblingIndex": 0,
"workflow_run_id": null,
},
],
"content": "3306",
"id": "question-324bce32-c98c-435d-a66b-bac974ebb5ed",
"isAnswer": false,
"message_files": [],
"parentMessageId": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
},
{
"children": [
{
"agent_thoughts": [
{
"chain_id": null,
"created_at": 1726107812,
"files": [],
"id": "5ca650f3-982c-4399-8b95-9ea241c76707",
"message_id": "684b5396-4e91-4043-88e9-aabe48b21acc",
"observation": "",
"position": 1,
"thought": "My number is 4821. Your turn!",
"tool": "",
"tool_input": "",
"tool_labels": {},
},
],
"children": [
{
"children": [
{
"agent_thoughts": [
{
"chain_id": null,
"created_at": 1726111024,
"files": [],
"id": "095cacab-afad-4387-a41d-1662578b8b13",
"message_id": "19904a7b-7494-4ed8-b72c-1d18668cea8c",
"observation": "",
"position": 1,
"thought": "My number is 1456. Your turn!",
"tool": "",
"tool_input": "",
"tool_labels": {},
},
],
"children": [],
"content": "My number is 1456. Your turn!",
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
"feedbackDisabled": false,
"id": "19904a7b-7494-4ed8-b72c-1d18668cea8c",
"input": {
"inputs": {},
"query": "1003",
},
"isAnswer": true,
"log": [
{
"files": [],
"role": "user",
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
},
{
"files": [],
"role": "assistant",
"text": "Sure! My number is 54. Your turn!",
},
{
"files": [],
"role": "user",
"text": "3306",
},
{
"files": [],
"role": "assistant",
"text": "My number is 4821. Your turn!",
},
{
"files": [],
"role": "user",
"text": "1003",
},
{
"files": [],
"role": "assistant",
"text": "My number is 1456. Your turn!",
},
],
"message_files": [],
"more": {
"latency": "1.38",
"time": "09/11/2024 11:17 PM",
"tokens": 86,
},
"parentMessageId": "question-19904a7b-7494-4ed8-b72c-1d18668cea8c",
"siblingIndex": 0,
"workflow_run_id": null,
},
],
"content": "1003",
"id": "question-19904a7b-7494-4ed8-b72c-1d18668cea8c",
"isAnswer": false,
"message_files": [],
"parentMessageId": "684b5396-4e91-4043-88e9-aabe48b21acc",
},
],
"content": "My number is 4821. Your turn!",
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
"feedbackDisabled": false,
"id": "684b5396-4e91-4043-88e9-aabe48b21acc",
"input": {
"inputs": {},
"query": "3306",
},
"isAnswer": true,
"log": [
{
"files": [],
"role": "user",
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
},
{
"files": [],
"role": "assistant",
"text": "Sure! My number is 54. Your turn!",
},
{
"files": [],
"role": "user",
"text": "3306",
},
{
"files": [],
"role": "assistant",
"text": "My number is 4821. Your turn!",
},
],
"message_files": [],
"more": {
"latency": "1.48",
"time": "09/11/2024 10:23 PM",
"tokens": 66,
},
"parentMessageId": "question-684b5396-4e91-4043-88e9-aabe48b21acc",
"siblingIndex": 1,
"workflow_run_id": null,
},
],
"content": "3306",
"id": "question-684b5396-4e91-4043-88e9-aabe48b21acc",
"isAnswer": false,
"message_files": [],
"parentMessageId": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
},
],
"content": "Sure! My number is 54. Your turn!",
"conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80",
"feedbackDisabled": false,
"id": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
"input": {
"inputs": {},
"query": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
},
"isAnswer": true,
"log": [
{
"files": [],
"role": "user",
"text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
},
{
"files": [],
"role": "assistant",
"text": "Sure! My number is 54. Your turn!",
},
],
"message_files": [],
"more": {
"latency": "1.52",
"time": "09/11/2024 09:50 PM",
"tokens": 46,
},
"parentMessageId": "question-cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
"siblingIndex": 0,
"workflow_run_id": null,
},
],
"content": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38",
"id": "question-cd5affb0-7bc2-4a6f-be7e-25e74595c9dd",
"isAnswer": false,
"message_files": [],
},
]
`;
exports[`build chat item tree and get thread messages should work with real world messages 1`] = `
[
{

View File

@ -255,4 +255,10 @@ describe('build chat item tree and get thread messages', () => {
const threadMessages6_2 = getThreadMessages(tree6, 'ff4c2b43-48a5-47ad-9dc5-08b34ddba61b')
expect(threadMessages6_2).toMatchSnapshot()
})
const partialMessages = (realWorldMessages as ChatItemInTree[]).slice(-10)
const tree7 = buildChatItemTree(partialMessages)
it('should work with partial messages', () => {
expect(tree7).toMatchSnapshot()
})
})

View File

@ -134,6 +134,12 @@ function buildChatItemTree(allMessages: IChatItem[]): ChatItemInTree[] {
}
}
// If no messages have parentMessageId=null (indicating a root node),
// then we likely have a partial chat history. In this case,
// use the first available message as the root node.
if (rootNodes.length === 0 && allMessages.length > 0)
rootNodes.push(map[allMessages[0]!.id]!)
return rootNodes
}

View File

@ -1,5 +1,5 @@
import type { FC } from 'react'
import { useState } from 'react'
import { useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { RiSearchLine } from '@remixicon/react'
import cn from '@/utils/classnames'
@ -12,6 +12,7 @@ type SearchInputProps = {
onChange: (v: string) => void
white?: boolean
}
const SearchInput: FC<SearchInputProps> = ({
placeholder,
className,
@ -21,6 +22,7 @@ const SearchInput: FC<SearchInputProps> = ({
}) => {
const { t } = useTranslation()
const [focus, setFocus] = useState<boolean>(false)
const isComposing = useRef<boolean>(false)
return (
<div className={cn(
@ -45,7 +47,14 @@ const SearchInput: FC<SearchInputProps> = ({
placeholder={placeholder || t('common.operation.search')!}
value={value}
onChange={(e) => {
onChange(e.target.value)
if (!isComposing.current)
onChange(e.target.value)
}}
onCompositionStart={() => {
isComposing.current = true
}}
onCompositionEnd={() => {
isComposing.current = false
}}
onFocus={() => setFocus(true)}
onBlur={() => setFocus(false)}

View File

@ -39,6 +39,7 @@ export const Heading = function H2({
}
return (
<>
<span id={name?.replace(/^#/, '')} className='relative -top-28' />
<div className="flex items-center gap-x-3" >
<span className={`font-mono text-[0.625rem] font-semibold leading-6 rounded-lg px-1.5 ring-1 ring-inset ${style}`}>{method}</span>
{/* <span className="h-0.5 w-0.5 rounded-full bg-zinc-300 dark:bg-zinc-600"></span> */}

View File

@ -656,6 +656,11 @@ Chat applications support session persistence, allowing previous chat history to
<Property name='pinned' type='bool' key='pinned'>
Return only pinned conversations as `true`, only non-pinned as `false`
</Property>
<Property name='sort_by' type='string' key='sort_by'>
Sorting Field (Optional), Default: -updated_at (sorted in descending order by update time)
- Available Values: created_at, -created_at, updated_at, -updated_at
- The symbol before the field represents the order or reverse, "-" represents reverse order.
</Property>
</Properties>
### Response

View File

@ -691,6 +691,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
<Property name='pinned' type='bool' key='pinned'>
只返回置顶 true只返回非置顶 false
</Property>
<Property name='sort_by' type='string' key='sort_by'>
排序字段(选题),默认 -updated_at(按更新时间倒序排列)
- 可选值created_at, -created_at, updated_at, -updated_at
- 字段前面的符号代表顺序或倒序,-代表倒序
</Property>
</Properties>
### Response

View File

@ -690,6 +690,11 @@ Chat applications support session persistence, allowing previous chat history to
<Property name='pinned' type='bool' key='pinned'>
Return only pinned conversations as `true`, only non-pinned as `false`
</Property>
<Property name='sort_by' type='string' key='sort_by'>
Sorting Field (Optional), Default: -updated_at (sorted in descending order by update time)
- Available Values: created_at, -created_at, updated_at, -updated_at
- The symbol before the field represents the order or reverse, "-" represents reverse order.
</Property>
</Properties>
### Response

View File

@ -705,6 +705,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
<Property name='pinned' type='bool' key='pinned'>
只返回置顶 true只返回非置顶 false
</Property>
<Property name='sort_by' type='string' key='sort_by'>
排序字段(选题),默认 -updated_at(按更新时间倒序排列)
- 可选值created_at, -created_at, updated_at, -updated_at
- 字段前面的符号代表顺序或倒序,-代表倒序
</Property>
</Properties>
### Response

View File

@ -26,7 +26,7 @@ type Props = {
isFocus: boolean
isInNode?: boolean
onGenerated?: (prompt: string) => void
codeLanguages: CodeLanguage
codeLanguages?: CodeLanguage
fileList?: FileEntity[]
showFileList?: boolean
showCodeGenerator?: boolean
@ -78,7 +78,7 @@ const Base: FC<Props> = ({
e.stopPropagation()
}}>
{headerRight}
{showCodeGenerator && (
{showCodeGenerator && codeLanguages && (
<div className='ml-1'>
<CodeGeneratorButton onGenerated={onGenerated} codeLanguages={codeLanguages}/>
</div>

View File

@ -31,6 +31,7 @@ export type Props = {
noWrapper?: boolean
isExpand?: boolean
showFileList?: boolean
onGenerated?: (value: string) => void
showCodeGenerator?: boolean
}
@ -64,6 +65,7 @@ const CodeEditor: FC<Props> = ({
noWrapper,
isExpand,
showFileList,
onGenerated,
showCodeGenerator = false,
}) => {
const [isFocus, setIsFocus] = React.useState(false)
@ -151,9 +153,6 @@ const CodeEditor: FC<Props> = ({
return isFocus ? 'focus-theme' : 'blur-theme'
})()
const handleGenerated = (code: string) => {
handleEditorChange(code)
}
const main = (
<>
@ -205,7 +204,7 @@ const CodeEditor: FC<Props> = ({
isFocus={isFocus && !readOnly}
minHeight={minHeight}
isInNode={isInNode}
onGenerated={handleGenerated}
onGenerated={onGenerated}
codeLanguages={language}
fileList={fileList}
showFileList={showFileList}

View File

@ -0,0 +1,326 @@
import { VarType } from '../../types'
import { extractFunctionParams, extractReturnType } from './code-parser'
import { CodeLanguage } from './types'
const SAMPLE_CODES = {
python3: {
noParams: 'def main():',
singleParam: 'def main(param1):',
multipleParams: `def main(param1, param2, param3):
return {"result": param1}`,
withTypes: `def main(param1: str, param2: int, param3: List[str]):
result = process_data(param1, param2)
return {"output": result}`,
withDefaults: `def main(param1: str = "default", param2: int = 0):
return {"data": param1}`,
},
javascript: {
noParams: 'function main() {',
singleParam: 'function main(param1) {',
multipleParams: `function main(param1, param2, param3) {
return { result: param1 }
}`,
withComments: `// Main function
function main(param1, param2) {
// Process data
return { output: process(param1, param2) }
}`,
withSpaces: 'function main( param1 , param2 ) {',
},
}
describe('extractFunctionParams', () => {
describe('Python3', () => {
test('handles no parameters', () => {
const result = extractFunctionParams(SAMPLE_CODES.python3.noParams, CodeLanguage.python3)
expect(result).toEqual([])
})
test('extracts single parameter', () => {
const result = extractFunctionParams(SAMPLE_CODES.python3.singleParam, CodeLanguage.python3)
expect(result).toEqual(['param1'])
})
test('extracts multiple parameters', () => {
const result = extractFunctionParams(SAMPLE_CODES.python3.multipleParams, CodeLanguage.python3)
expect(result).toEqual(['param1', 'param2', 'param3'])
})
test('handles type hints', () => {
const result = extractFunctionParams(SAMPLE_CODES.python3.withTypes, CodeLanguage.python3)
expect(result).toEqual(['param1', 'param2', 'param3'])
})
test('handles default values', () => {
const result = extractFunctionParams(SAMPLE_CODES.python3.withDefaults, CodeLanguage.python3)
expect(result).toEqual(['param1', 'param2'])
})
})
// JavaScriptのテストケース
describe('JavaScript', () => {
test('handles no parameters', () => {
const result = extractFunctionParams(SAMPLE_CODES.javascript.noParams, CodeLanguage.javascript)
expect(result).toEqual([])
})
test('extracts single parameter', () => {
const result = extractFunctionParams(SAMPLE_CODES.javascript.singleParam, CodeLanguage.javascript)
expect(result).toEqual(['param1'])
})
test('extracts multiple parameters', () => {
const result = extractFunctionParams(SAMPLE_CODES.javascript.multipleParams, CodeLanguage.javascript)
expect(result).toEqual(['param1', 'param2', 'param3'])
})
test('handles comments in code', () => {
const result = extractFunctionParams(SAMPLE_CODES.javascript.withComments, CodeLanguage.javascript)
expect(result).toEqual(['param1', 'param2'])
})
test('handles whitespace', () => {
const result = extractFunctionParams(SAMPLE_CODES.javascript.withSpaces, CodeLanguage.javascript)
expect(result).toEqual(['param1', 'param2'])
})
})
})
const RETURN_TYPE_SAMPLES = {
python3: {
singleReturn: `
def main(param1):
return {"result": "value"}`,
multipleReturns: `
def main(param1, param2):
return {"result": "value", "status": "success"}`,
noReturn: `
def main():
print("Hello")`,
complexReturn: `
def main():
data = process()
return {"result": data, "count": 42, "messages": ["hello"]}`,
nestedObject: `
def main(name, age, city):
return {
'personal_info': {
'name': name,
'age': age,
'city': city
},
'timestamp': int(time.time()),
'status': 'active'
}`,
},
javascript: {
singleReturn: `
function main(param1) {
return { result: "value" }
}`,
multipleReturns: `
function main(param1) {
return { result: "value", status: "success" }
}`,
withParentheses: `
function main() {
return ({ result: "value", status: "success" })
}`,
noReturn: `
function main() {
console.log("Hello")
}`,
withQuotes: `
function main() {
return { "result": 'value', 'status': "success" }
}`,
nestedObject: `
function main(name, age, city) {
return {
personal_info: {
name: name,
age: age,
city: city
},
timestamp: Date.now(),
status: 'active'
}
}`,
withJSDoc: `
/**
* Creates a user profile with personal information and metadata
* @param {string} name - The user's name
* @param {number} age - The user's age
* @param {string} city - The user's city of residence
* @returns {Object} An object containing the user profile
*/
function main(name, age, city) {
return {
result: {
personal_info: {
name: name,
age: age,
city: city
},
timestamp: Date.now(),
status: 'active'
}
};
}`,
},
}
describe('extractReturnType', () => {
// Python3のテスト
describe('Python3', () => {
test('extracts single return value', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.singleReturn, CodeLanguage.python3)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
})
})
test('extracts multiple return values', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.multipleReturns, CodeLanguage.python3)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
status: {
type: VarType.string,
children: null,
},
})
})
test('returns empty object when no return statement', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.noReturn, CodeLanguage.python3)
expect(result).toEqual({})
})
test('handles complex return statement', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.complexReturn, CodeLanguage.python3)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
count: {
type: VarType.string,
children: null,
},
messages: {
type: VarType.string,
children: null,
},
})
})
test('handles nested object structure', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.python3.nestedObject, CodeLanguage.python3)
expect(result).toEqual({
personal_info: {
type: VarType.string,
children: null,
},
timestamp: {
type: VarType.string,
children: null,
},
status: {
type: VarType.string,
children: null,
},
})
})
})
// JavaScriptのテスト
describe('JavaScript', () => {
test('extracts single return value', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.singleReturn, CodeLanguage.javascript)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
})
})
test('extracts multiple return values', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.multipleReturns, CodeLanguage.javascript)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
status: {
type: VarType.string,
children: null,
},
})
})
test('handles return with parentheses', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.withParentheses, CodeLanguage.javascript)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
status: {
type: VarType.string,
children: null,
},
})
})
test('returns empty object when no return statement', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.noReturn, CodeLanguage.javascript)
expect(result).toEqual({})
})
test('handles quoted keys', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.withQuotes, CodeLanguage.javascript)
expect(result).toEqual({
result: {
type: VarType.string,
children: null,
},
status: {
type: VarType.string,
children: null,
},
})
})
test('handles nested object structure', () => {
const result = extractReturnType(RETURN_TYPE_SAMPLES.javascript.nestedObject, CodeLanguage.javascript)
expect(result).toEqual({
personal_info: {
type: VarType.string,
children: null,
},
timestamp: {
type: VarType.string,
children: null,
},
status: {
type: VarType.string,
children: null,
},
})
})
})
})

View File

@ -0,0 +1,86 @@
import { VarType } from '../../types'
import type { OutputVar } from './types'
import { CodeLanguage } from './types'
export const extractFunctionParams = (code: string, language: CodeLanguage) => {
if (language === CodeLanguage.json)
return []
const patterns: Record<Exclude<CodeLanguage, CodeLanguage.json>, RegExp> = {
[CodeLanguage.python3]: /def\s+main\s*\((.*?)\)/,
[CodeLanguage.javascript]: /function\s+main\s*\((.*?)\)/,
}
const match = code.match(patterns[language])
const params: string[] = []
if (match?.[1]) {
params.push(...match[1].split(',')
.map(p => p.trim())
.filter(Boolean)
.map(p => p.split(':')[0].trim()),
)
}
return params
}
export const extractReturnType = (code: string, language: CodeLanguage): OutputVar => {
const codeWithoutComments = code.replace(/\/\*\*[\s\S]*?\*\//, '')
console.log(codeWithoutComments)
const returnIndex = codeWithoutComments.indexOf('return')
if (returnIndex === -1)
return {}
// returnから始まる部分文字列を取得
const codeAfterReturn = codeWithoutComments.slice(returnIndex)
let bracketCount = 0
let startIndex = codeAfterReturn.indexOf('{')
if (language === CodeLanguage.javascript && startIndex === -1) {
const parenStart = codeAfterReturn.indexOf('(')
if (parenStart !== -1)
startIndex = codeAfterReturn.indexOf('{', parenStart)
}
if (startIndex === -1)
return {}
let endIndex = -1
for (let i = startIndex; i < codeAfterReturn.length; i++) {
if (codeAfterReturn[i] === '{')
bracketCount++
if (codeAfterReturn[i] === '}') {
bracketCount--
if (bracketCount === 0) {
endIndex = i + 1
break
}
}
}
if (endIndex === -1)
return {}
const returnContent = codeAfterReturn.slice(startIndex + 1, endIndex - 1)
console.log(returnContent)
const result: OutputVar = {}
const keyRegex = /['"]?(\w+)['"]?\s*:(?![^{]*})/g
const matches = returnContent.matchAll(keyRegex)
for (const match of matches) {
console.log(`Found key: "${match[1]}" from match: "${match[0]}"`)
const key = match[1]
result[key] = {
type: VarType.string,
children: null,
}
}
console.log(result)
return result
}

View File

@ -5,6 +5,7 @@ import RemoveEffectVarConfirm from '../_base/components/remove-effect-var-confir
import useConfig from './use-config'
import type { CodeNodeType } from './types'
import { CodeLanguage } from './types'
import { extractFunctionParams, extractReturnType } from './code-parser'
import VarList from '@/app/components/workflow/nodes/_base/components/variable/var-list'
import OutputVarList from '@/app/components/workflow/nodes/_base/components/variable/output-var-list'
import AddButton from '@/app/components/base/button/add-button'
@ -12,10 +13,9 @@ import Field from '@/app/components/workflow/nodes/_base/components/field'
import Split from '@/app/components/workflow/nodes/_base/components/split'
import CodeEditor from '@/app/components/workflow/nodes/_base/components/editor/code-editor'
import TypeSelector from '@/app/components/workflow/nodes/_base/components/selector'
import type { NodePanelProps } from '@/app/components/workflow/types'
import { type NodePanelProps } from '@/app/components/workflow/types'
import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form'
import ResultPanel from '@/app/components/workflow/run/result-panel'
const i18nPrefix = 'workflow.nodes.code'
const codeLanguages = [
@ -38,6 +38,7 @@ const Panel: FC<NodePanelProps<CodeNodeType>> = ({
readOnly,
inputs,
outputKeyOrders,
handleCodeAndVarsChange,
handleVarListChange,
handleAddVariable,
handleRemoveVariable,
@ -61,6 +62,18 @@ const Panel: FC<NodePanelProps<CodeNodeType>> = ({
setInputVarValues,
} = useConfig(id, data)
const handleGeneratedCode = (value: string) => {
const params = extractFunctionParams(value, inputs.code_language)
const codeNewInput = params.map((p) => {
return {
variable: p,
value_selector: [],
}
})
const returnTypes = extractReturnType(value, inputs.code_language)
handleCodeAndVarsChange(value, codeNewInput, returnTypes)
}
return (
<div className='mt-2'>
<div className='px-4 pb-4 space-y-4'>
@ -92,6 +105,7 @@ const Panel: FC<NodePanelProps<CodeNodeType>> = ({
language={inputs.code_language}
value={inputs.code}
onChange={handleCodeChange}
onGenerated={handleGeneratedCode}
showCodeGenerator={true}
/>
</div>

View File

@ -3,7 +3,7 @@ import produce from 'immer'
import useVarList from '../_base/hooks/use-var-list'
import useOutputVarList from '../_base/hooks/use-output-var-list'
import { BlockEnum, VarType } from '../../types'
import type { Var } from '../../types'
import type { Var, Variable } from '../../types'
import { useStore } from '../../store'
import type { CodeNodeType, OutputVar } from './types'
import { CodeLanguage } from './types'
@ -136,7 +136,15 @@ const useConfig = (id: string, payload: CodeNodeType) => {
const setInputVarValues = useCallback((newPayload: Record<string, any>) => {
setRunInputData(newPayload)
}, [setRunInputData])
const handleCodeAndVarsChange = useCallback((code: string, inputVariables: Variable[], outputVariables: OutputVar) => {
const newInputs = produce(inputs, (draft) => {
draft.code = code
draft.variables = inputVariables
draft.outputs = outputVariables
})
setInputs(newInputs)
syncOutputKeyOrders(outputVariables)
}, [inputs, setInputs, syncOutputKeyOrders])
return {
readOnly,
inputs,
@ -163,6 +171,7 @@ const useConfig = (id: string, payload: CodeNodeType) => {
inputVarValues,
setInputVarValues,
runResult,
handleCodeAndVarsChange,
}
}

View File

@ -240,7 +240,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
if (
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|| mixtureInternalAndExternal
|| (allExternal && newDatasets.length > 1)
|| allExternal
)
setRerankModelOpen(true)
}, [inputs, setInputs, payload.retrieval_mode, selectedDatasets, currentRerankModel])