From cee1c4f63d27861706f8b3d98e54f31b582cb445 Mon Sep 17 00:00:00 2001 From: Nam Vu Date: Thu, 31 Oct 2024 14:49:28 +0700 Subject: [PATCH 1/6] fix: Version '1:1.3.dfsg+really1.3.1-1' for 'zlib1g' was not found (#10096) --- api/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/Dockerfile b/api/Dockerfile index f078181264..1d13be8bf3 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -55,7 +55,7 @@ RUN apt-get update \ && echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \ && apt-get update \ # For Security - && apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1 expat=2.6.3-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \ + && apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1+b1 expat=2.6.3-2 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \ # install a chinese font to support the use of tools like matplotlib && apt-get install -y fonts-noto-cjk \ && apt-get autoremove -y \ From 0154a26e0b1b66971e5a20df208b7bcc37b01531 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:51:33 +0800 Subject: [PATCH 2/6] fix issue: update document segment setting failed (#10107) --- api/services/dataset_service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 414ef0224a..9d70357515 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -736,11 +736,12 @@ class DocumentService: dataset.retrieval_model = document_data.get("retrieval_model") or default_retrieval_model documents = [] - batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999)) if document_data.get("original_document_id"): document = DocumentService.update_document_with_dataset_id(dataset, document_data, account) documents.append(document) + batch = document.batch else: + batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999)) # save process rule if not dataset_process_rule: process_rule = document_data["process_rule"] @@ -921,7 +922,7 @@ class DocumentService: if duplicate_document_ids: duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids) - return documents, batch + return documents, batch @staticmethod def check_documents_upload_quota(count: int, features: FeatureModel): From 73f29484e7d035bbfce782e95db0d813ac2dc454 Mon Sep 17 00:00:00 2001 From: Hash Brown Date: Thu, 31 Oct 2024 16:02:20 +0800 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20log=20detail=20panel=20not=20showing?= =?UTF-8?q?=20any=20message=20when=20total=20count=20greate=E2=80=A6=20(#1?= =?UTF-8?q?0119)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web/app/components/app/log/list.tsx | 4 +- .../__snapshots__/utils.spec.ts.snap | 274 ++++++++++++++++++ .../base/chat/__tests__/utils.spec.ts | 6 + web/app/components/base/chat/utils.ts | 6 + 4 files changed, 288 insertions(+), 2 deletions(-) diff --git a/web/app/components/app/log/list.tsx b/web/app/components/app/log/list.tsx index 754d18b49d..4c12cab581 100644 --- a/web/app/components/app/log/list.tsx +++ b/web/app/components/app/log/list.tsx @@ -195,8 +195,8 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) { conversation_id: detail.id, limit: 10, } - if (allChatItems.at(-1)?.id) - params.first_id = allChatItems.at(-1)?.id.replace('question-', '') + if (allChatItems[0]?.id) + params.first_id = allChatItems[0]?.id.replace('question-', '') const messageRes = await fetchChatMessages({ url: `/apps/${appDetail?.id}/chat-messages`, params, diff --git a/web/app/components/base/chat/__tests__/__snapshots__/utils.spec.ts.snap b/web/app/components/base/chat/__tests__/__snapshots__/utils.spec.ts.snap index 070975bfa7..7da09c4529 100644 --- a/web/app/components/base/chat/__tests__/__snapshots__/utils.spec.ts.snap +++ b/web/app/components/base/chat/__tests__/__snapshots__/utils.spec.ts.snap @@ -1804,6 +1804,280 @@ exports[`build chat item tree and get thread messages should get thread messages ] `; +exports[`build chat item tree and get thread messages should work with partial messages 1`] = ` +[ + { + "children": [ + { + "agent_thoughts": [ + { + "chain_id": null, + "created_at": 1726105809, + "files": [], + "id": "1019cd79-d141-4f9f-880a-fc1441cfd802", + "message_id": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd", + "observation": "", + "position": 1, + "thought": "Sure! My number is 54. Your turn!", + "tool": "", + "tool_input": "", + "tool_labels": {}, + }, + ], + "children": [ + { + "children": [ + { + "agent_thoughts": [ + { + "chain_id": null, + "created_at": 1726105822, + "files": [], + "id": "0773bec7-b992-4a53-92b2-20ebaeae8798", + "message_id": "324bce32-c98c-435d-a66b-bac974ebb5ed", + "observation": "", + "position": 1, + "thought": "My number is 4729. Your turn!", + "tool": "", + "tool_input": "", + "tool_labels": {}, + }, + ], + "children": [], + "content": "My number is 4729. Your turn!", + "conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80", + "feedbackDisabled": false, + "id": "324bce32-c98c-435d-a66b-bac974ebb5ed", + "input": { + "inputs": {}, + "query": "3306", + }, + "isAnswer": true, + "log": [ + { + "files": [], + "role": "user", + "text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38", + }, + { + "files": [], + "role": "assistant", + "text": "Sure! My number is 54. Your turn!", + }, + { + "files": [], + "role": "user", + "text": "3306", + }, + { + "files": [], + "role": "assistant", + "text": "My number is 4729. Your turn!", + }, + ], + "message_files": [], + "more": { + "latency": "1.30", + "time": "09/11/2024 09:50 PM", + "tokens": 66, + }, + "parentMessageId": "question-324bce32-c98c-435d-a66b-bac974ebb5ed", + "siblingIndex": 0, + "workflow_run_id": null, + }, + ], + "content": "3306", + "id": "question-324bce32-c98c-435d-a66b-bac974ebb5ed", + "isAnswer": false, + "message_files": [], + "parentMessageId": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd", + }, + { + "children": [ + { + "agent_thoughts": [ + { + "chain_id": null, + "created_at": 1726107812, + "files": [], + "id": "5ca650f3-982c-4399-8b95-9ea241c76707", + "message_id": "684b5396-4e91-4043-88e9-aabe48b21acc", + "observation": "", + "position": 1, + "thought": "My number is 4821. Your turn!", + "tool": "", + "tool_input": "", + "tool_labels": {}, + }, + ], + "children": [ + { + "children": [ + { + "agent_thoughts": [ + { + "chain_id": null, + "created_at": 1726111024, + "files": [], + "id": "095cacab-afad-4387-a41d-1662578b8b13", + "message_id": "19904a7b-7494-4ed8-b72c-1d18668cea8c", + "observation": "", + "position": 1, + "thought": "My number is 1456. Your turn!", + "tool": "", + "tool_input": "", + "tool_labels": {}, + }, + ], + "children": [], + "content": "My number is 1456. Your turn!", + "conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80", + "feedbackDisabled": false, + "id": "19904a7b-7494-4ed8-b72c-1d18668cea8c", + "input": { + "inputs": {}, + "query": "1003", + }, + "isAnswer": true, + "log": [ + { + "files": [], + "role": "user", + "text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38", + }, + { + "files": [], + "role": "assistant", + "text": "Sure! My number is 54. Your turn!", + }, + { + "files": [], + "role": "user", + "text": "3306", + }, + { + "files": [], + "role": "assistant", + "text": "My number is 4821. Your turn!", + }, + { + "files": [], + "role": "user", + "text": "1003", + }, + { + "files": [], + "role": "assistant", + "text": "My number is 1456. Your turn!", + }, + ], + "message_files": [], + "more": { + "latency": "1.38", + "time": "09/11/2024 11:17 PM", + "tokens": 86, + }, + "parentMessageId": "question-19904a7b-7494-4ed8-b72c-1d18668cea8c", + "siblingIndex": 0, + "workflow_run_id": null, + }, + ], + "content": "1003", + "id": "question-19904a7b-7494-4ed8-b72c-1d18668cea8c", + "isAnswer": false, + "message_files": [], + "parentMessageId": "684b5396-4e91-4043-88e9-aabe48b21acc", + }, + ], + "content": "My number is 4821. Your turn!", + "conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80", + "feedbackDisabled": false, + "id": "684b5396-4e91-4043-88e9-aabe48b21acc", + "input": { + "inputs": {}, + "query": "3306", + }, + "isAnswer": true, + "log": [ + { + "files": [], + "role": "user", + "text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38", + }, + { + "files": [], + "role": "assistant", + "text": "Sure! My number is 54. Your turn!", + }, + { + "files": [], + "role": "user", + "text": "3306", + }, + { + "files": [], + "role": "assistant", + "text": "My number is 4821. Your turn!", + }, + ], + "message_files": [], + "more": { + "latency": "1.48", + "time": "09/11/2024 10:23 PM", + "tokens": 66, + }, + "parentMessageId": "question-684b5396-4e91-4043-88e9-aabe48b21acc", + "siblingIndex": 1, + "workflow_run_id": null, + }, + ], + "content": "3306", + "id": "question-684b5396-4e91-4043-88e9-aabe48b21acc", + "isAnswer": false, + "message_files": [], + "parentMessageId": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd", + }, + ], + "content": "Sure! My number is 54. Your turn!", + "conversationId": "dd6c9cfd-2656-48ec-bd51-2139c1790d80", + "feedbackDisabled": false, + "id": "cd5affb0-7bc2-4a6f-be7e-25e74595c9dd", + "input": { + "inputs": {}, + "query": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38", + }, + "isAnswer": true, + "log": [ + { + "files": [], + "role": "user", + "text": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38", + }, + { + "files": [], + "role": "assistant", + "text": "Sure! My number is 54. Your turn!", + }, + ], + "message_files": [], + "more": { + "latency": "1.52", + "time": "09/11/2024 09:50 PM", + "tokens": 46, + }, + "parentMessageId": "question-cd5affb0-7bc2-4a6f-be7e-25e74595c9dd", + "siblingIndex": 0, + "workflow_run_id": null, + }, + ], + "content": "Let's play a game, I say a number , and you response me with another bigger, yet randomly number. I'll start first, 38", + "id": "question-cd5affb0-7bc2-4a6f-be7e-25e74595c9dd", + "isAnswer": false, + "message_files": [], + }, +] +`; + exports[`build chat item tree and get thread messages should work with real world messages 1`] = ` [ { diff --git a/web/app/components/base/chat/__tests__/utils.spec.ts b/web/app/components/base/chat/__tests__/utils.spec.ts index c602ac8a99..1dead1c949 100644 --- a/web/app/components/base/chat/__tests__/utils.spec.ts +++ b/web/app/components/base/chat/__tests__/utils.spec.ts @@ -255,4 +255,10 @@ describe('build chat item tree and get thread messages', () => { const threadMessages6_2 = getThreadMessages(tree6, 'ff4c2b43-48a5-47ad-9dc5-08b34ddba61b') expect(threadMessages6_2).toMatchSnapshot() }) + + const partialMessages = (realWorldMessages as ChatItemInTree[]).slice(-10) + const tree7 = buildChatItemTree(partialMessages) + it('should work with partial messages', () => { + expect(tree7).toMatchSnapshot() + }) }) diff --git a/web/app/components/base/chat/utils.ts b/web/app/components/base/chat/utils.ts index 16357361cf..61dfaecffc 100644 --- a/web/app/components/base/chat/utils.ts +++ b/web/app/components/base/chat/utils.ts @@ -134,6 +134,12 @@ function buildChatItemTree(allMessages: IChatItem[]): ChatItemInTree[] { } } + // If no messages have parentMessageId=null (indicating a root node), + // then we likely have a partial chat history. In this case, + // use the first available message as the root node. + if (rootNodes.length === 0 && allMessages.length > 0) + rootNodes.push(map[allMessages[0]!.id]!) + return rootNodes } From 05d9adeb99b566b1f9110098b80d8c59fe0394f3 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Thu, 31 Oct 2024 16:07:39 +0800 Subject: [PATCH 4/6] fix(Dockerfile): conditionally install zlib1g based on architecture (#10118) --- api/Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/Dockerfile b/api/Dockerfile index 1d13be8bf3..1f84fab657 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -55,7 +55,12 @@ RUN apt-get update \ && echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \ && apt-get update \ # For Security - && apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1+b1 expat=2.6.3-2 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \ + && apt-get install -y --no-install-recommends expat=2.6.3-2 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \ + && if [ "$(dpkg --print-architecture)" = "amd64" ]; then \ + apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1+b1; \ + else \ + apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1; \ + fi \ # install a chinese font to support the use of tools like matplotlib && apt-get install -y fonts-noto-cjk \ && apt-get autoremove -y \ From 11ca1bec0bcbc9c9eb75303ec4cacf8c89ff96b2 Mon Sep 17 00:00:00 2001 From: omr <145922434+y-omr@users.noreply.github.com> Date: Thu, 31 Oct 2024 17:32:58 +0900 Subject: [PATCH 5/6] fix: optimize unique document filtering with set (#10082) --- api/core/rag/rerank/rerank_model.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/api/core/rag/rerank/rerank_model.py b/api/core/rag/rerank/rerank_model.py index 40ebf0befd..fc82b2080b 100644 --- a/api/core/rag/rerank/rerank_model.py +++ b/api/core/rag/rerank/rerank_model.py @@ -27,18 +27,17 @@ class RerankModelRunner(BaseRerankRunner): :return: """ docs = [] - doc_id = [] + doc_id = set() unique_documents = [] - dify_documents = [item for item in documents if item.provider == "dify"] - external_documents = [item for item in documents if item.provider == "external"] - for document in dify_documents: - if document.metadata["doc_id"] not in doc_id: - doc_id.append(document.metadata["doc_id"]) + for document in documents: + if document.provider == "dify" and document.metadata["doc_id"] not in doc_id: + doc_id.add(document.metadata["doc_id"]) docs.append(document.page_content) unique_documents.append(document) - for document in external_documents: - docs.append(document.page_content) - unique_documents.append(document) + elif document.provider == "external": + if document not in unique_documents: + docs.append(document.page_content) + unique_documents.append(document) documents = unique_documents From ce260f79d20a4184a5eba98915822a0fc1c4a61c Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Thu, 31 Oct 2024 18:29:12 +0800 Subject: [PATCH 6/6] Feat/update knowledge api url (#10102) Co-authored-by: nite-knite --- .../service_api/dataset/document.py | 24 +- .../service_api/dataset/hit_testing.py | 2 +- web/app/(commonLayout)/datasets/Doc.tsx | 9 +- .../datasets/template/template.en.mdx | 230 +++++++++--------- .../datasets/template/template.zh.mdx | 160 ++++++------ web/app/components/develop/md.tsx | 1 + 6 files changed, 225 insertions(+), 201 deletions(-) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 0a0a38c4c6..9da8bbd3ba 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -331,10 +331,26 @@ class DocumentIndexingStatusApi(DatasetApiResource): return data -api.add_resource(DocumentAddByTextApi, "/datasets//document/create_by_text") -api.add_resource(DocumentAddByFileApi, "/datasets//document/create_by_file") -api.add_resource(DocumentUpdateByTextApi, "/datasets//documents//update_by_text") -api.add_resource(DocumentUpdateByFileApi, "/datasets//documents//update_by_file") +api.add_resource( + DocumentAddByTextApi, + "/datasets//document/create_by_text", + "/datasets//document/create-by-text", +) +api.add_resource( + DocumentAddByFileApi, + "/datasets//document/create_by_file", + "/datasets//document/create-by-file", +) +api.add_resource( + DocumentUpdateByTextApi, + "/datasets//documents//update_by_text", + "/datasets//documents//update-by-text", +) +api.add_resource( + DocumentUpdateByFileApi, + "/datasets//documents//update_by_file", + "/datasets//documents//update-by-file", +) api.add_resource(DocumentDeleteApi, "/datasets//documents/") api.add_resource(DocumentListApi, "/datasets//documents") api.add_resource(DocumentIndexingStatusApi, "/datasets//documents//indexing-status") diff --git a/api/controllers/service_api/dataset/hit_testing.py b/api/controllers/service_api/dataset/hit_testing.py index 9c9a4302c9..465f71bf03 100644 --- a/api/controllers/service_api/dataset/hit_testing.py +++ b/api/controllers/service_api/dataset/hit_testing.py @@ -14,4 +14,4 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase): return self.perform_hit_testing(dataset, args) -api.add_resource(HitTestingApi, "/datasets//hit-testing") +api.add_resource(HitTestingApi, "/datasets//hit-testing", "/datasets//retrieve") diff --git a/web/app/(commonLayout)/datasets/Doc.tsx b/web/app/(commonLayout)/datasets/Doc.tsx index a6dd8c23ef..553dca5008 100644 --- a/web/app/(commonLayout)/datasets/Doc.tsx +++ b/web/app/(commonLayout)/datasets/Doc.tsx @@ -1,6 +1,6 @@ 'use client' -import type { FC } from 'react' +import { type FC, useEffect } from 'react' import { useContext } from 'use-context-selector' import TemplateEn from './template/template.en.mdx' import TemplateZh from './template/template.zh.mdx' @@ -14,6 +14,13 @@ const Doc: FC = ({ apiBaseUrl, }) => { const { locale } = useContext(I18n) + + useEffect(() => { + const hash = location.hash + if (hash) + document.querySelector(hash)?.scrollIntoView() + }, []) + return (
{ diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index 3c9385f8bc..263230d049 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -20,17 +20,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
- This api is based on an existing Knowledge and creates a new document through text based on this Knowledge. + This API is based on an existing knowledge and creates a new document through text based on this knowledge. ### Params @@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Index mode - high_quality High quality: embedding using embedding model, built as vector database index - - economy Economy: Build using inverted index of Keyword Table Index + - economy Economy: Build using inverted index of keyword table index Processing rules @@ -62,7 +62,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules + - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 @@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -123,17 +123,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
- This api is based on an existing Knowledge and creates a new document through a file based on this Knowledge. + This API is based on an existing knowledge and creates a new document through a file based on this knowledge. ### Params @@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - - original_document_id Source document ID (optional) + - original_document_id Source document ID (optional) - Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document - The source document cannot be an archived document - When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default - When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required - - indexing_technique Index mode + - indexing_technique Index mode - high_quality High quality: embedding using embedding model, built as vector database index - - economy Economy: Build using inverted index of Keyword Table Index + - economy Economy: Build using inverted index of keyword table index - - process_rule Processing rules + - process_rule Processing rules - mode (string) Cleaning, segmentation mode, automatic / custom - rules (object) Custom rules (in automatic mode, this field is empty) - pre_processing_rules (array[object]) Preprocessing rules @@ -164,7 +164,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules + - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 @@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -221,12 +221,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -240,9 +240,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Knowledge description (optional) - Index Technique (optional) - - high_quality high_quality - - economy economy + Index technique (optional) + - high_quality High quality + - economy Economy Permission @@ -252,21 +252,21 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Provider (optional, default: vendor) - - vendor vendor - - external external knowledge + - vendor Vendor + - external External knowledge - External Knowledge api id (optional) + External knowledge API ID (optional) - External Knowledge id (optional) + External knowledge ID (optional) - @@ -306,12 +306,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -327,9 +327,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - @@ -369,12 +369,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -406,17 +406,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
- This api is based on an existing Knowledge and updates the document through text based on this Knowledge. + This API is based on an existing knowledge and updates the document through text based on this knowledge. ### Params @@ -446,7 +446,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules + - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 @@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -503,17 +503,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
- This api is based on an existing Knowledge, and updates documents through files based on this Knowledge + This API is based on an existing knowledge, and updates documents through files based on this knowledge ### Params @@ -543,7 +543,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules + - segmentation (object) Segmentation rules - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 @@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -597,12 +597,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -652,12 +652,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -694,12 +694,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -714,13 +714,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Query - Search keywords, currently only search document names(optional) + Search keywords, currently only search document names (optional) - Page number(optional) + Page number (optional) - Number of items returned, default 20, range 1-100(optional) + Number of items returned, default 20, range 1-100 (optional) @@ -769,12 +769,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -792,9 +792,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - - content (text) Text content/question content, required - - answer (text) Answer content, if the mode of the Knowledge is qa mode, pass the value(optional) - - keywords (list) Keywords(optional) + - content (text) Text content / question content, required + - answer (text) Answer content, if the mode of the knowledge is Q&A mode, pass the value (optional) + - keywords (list) Keywords (optional) @@ -855,12 +855,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -878,10 +878,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Query - keyword,choosable + Keyword (optional) - Search status,completed + Search status, completed @@ -933,12 +933,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -979,12 +979,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -1005,10 +1005,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - - content (text) text content/question content,required - - answer (text) Answer content, not required, passed if the Knowledge is in qa mode - - keywords (list) keyword, not required - - enabled (bool) false/true, not required + - content (text) Text content / question content, required + - answer (text) Answer content, passed if the knowledge is in Q&A mode (optional) + - keywords (list) Keyword (optional) + - enabled (bool) False / true (optional) @@ -1067,41 +1067,41 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
### Path - Dataset ID + Knowledge ID ### Request Body - retrieval keywordc + Query keyword - retrieval keyword(Optional, if not filled, it will be recalled according to the default method) + Retrieval model (optional, if not filled, it will be recalled according to the default method) - search_method (text) Search method: One of the following four keywords is required - keyword_search Keyword search - semantic_search Semantic search - full_text_search Full-text search - hybrid_search Hybrid search - - reranking_enable (bool) Whether to enable reranking, optional, required if the search mode is semantic_search or hybrid_search - - reranking_mode (object) Rerank model configuration, optional, required if reranking is enabled + - reranking_enable (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional) + - reranking_mode (object) Rerank model configuration, required if reranking is enabled - reranking_provider_name (string) Rerank model provider - reranking_model_name (string) Rerank model name - weights (double) Semantic search weight setting in hybrid search mode - - top_k (integer) Number of results to return, optional + - top_k (integer) Number of results to return (optional) - score_threshold_enabled (bool) Whether to enable score threshold - score_threshold (double) Score threshold @@ -1114,26 +1114,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit-testing' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -1212,7 +1212,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 9f477aa605..9c25d1e7bb 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -20,13 +20,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 索引方式 - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 + - economy 经济:使用 keyword table index 的倒排索引进行构建 处理规则 @@ -64,7 +64,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - max_tokens 最大长度(token)默认为 1000 @@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \ + curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -123,13 +123,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - - original_document_id 源文档 ID (选填) + - original_document_id 源文档 ID(选填) - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制 - 源文档不可为归档的文档 - 当传入 original_document_id 时,代表文档进行更新操作,process_rule 为可填项目,不填默认使用源文档的分段方式 - 未传入 original_document_id 时,代表文档进行新增操作,process_rule 为必填 - - indexing_technique 索引方式 + - indexing_technique 索引方式 - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 + - economy 经济:使用 keyword table index 的倒排索引进行构建 - - process_rule 处理规则 + - process_rule 处理规则 - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 - rules (object) 自定义规则(自动模式下,该字段为空) - pre_processing_rules (array[object]) 预处理规则 @@ -166,7 +166,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - max_tokens 最大长度(token)默认为 1000 需要上传的文件。 @@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -221,7 +221,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
economy 经济 - 权限(选填,默认only_me) + 权限(选填,默认 only_me) - only_me 仅自己 - all_team_members 所有团队成员 - partial_members 部分团队成员 - provider,(选填,默认 vendor) + Provider(选填,默认 vendor) - vendor 上传文件 - external 外部知识库 @@ -264,9 +264,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - @@ -306,7 +306,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---- +
---- +
---- +
@@ -431,7 +431,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - 文档名称 (选填) + 文档名称(选填) 文档内容(选填) @@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - max_tokens 最大长度(token)默认为 1000 @@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -503,13 +503,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -528,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - 文档名称 (选填) + 文档名称(选填) 需要上传的文件 @@ -545,7 +545,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - segmentation (object) 分段规则 - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - max_tokens 最大长度(token)默认为 1000 @@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -597,7 +597,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
---- +
---- +
---- +
- content (text) 文本内容/问题内容,必填 - - answer (text) 答案内容,非必填,如果知识库的模式为qa模式则传值 + - answer (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值 - keywords (list) 关键字,非必填 @@ -855,7 +855,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
---- +
---- +
- content (text) 文本内容/问题内容,必填 - - answer (text) 答案内容,非必填,如果知识库的模式为qa模式则传值 + - answer (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值 - keywords (list) 关键字,非必填 - enabled (bool) false/true,非必填 @@ -1068,13 +1068,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
@@ -1088,23 +1088,23 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - 召回关键词 + 检索关键词 - 召回参数(选填,如不填,按照默认方式召回) + 检索参数(选填,如不填,按照默认方式召回) - search_method (text) 检索方法:以下三个关键字之一,必填 - keyword_search 关键字检索 - semantic_search 语义检索 - full_text_search 全文检索 - hybrid_search 混合检索 - - reranking_enable (bool) 是否启用 Reranking,非必填,如果检索模式为semantic_search模式或者hybrid_search则传值 + - reranking_enable (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值 - reranking_mode (object) Rerank模型配置,非必填,如果启用了 reranking 则传值 - reranking_provider_name (string) Rerank 模型提供商 - reranking_model_name (string) Rerank 模型名称 - weights (double) 混合检索模式下语意检索的权重设置 - top_k (integer) 返回结果数量,非必填 - - score_threshold_enabled (bool) 是否开启Score阈值 - - score_threshold (double) Score阈值 + - score_threshold_enabled (bool) 是否开启 score 阈值 + - score_threshold (double) Score 阈值 未启用字段 @@ -1115,26 +1115,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit-testing' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -1214,7 +1214,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ---- +
diff --git a/web/app/components/develop/md.tsx b/web/app/components/develop/md.tsx index 87f7b35aaf..26b4007c87 100644 --- a/web/app/components/develop/md.tsx +++ b/web/app/components/develop/md.tsx @@ -39,6 +39,7 @@ export const Heading = function H2({ } return ( <> +
{method} {/* */}