Merge branch 'feat/update-knowledge-api-url' into deploy/dev
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions

This commit is contained in:
nite-knite 2024-10-30 13:31:44 +08:00
commit f32c1ff9ce
4 changed files with 138 additions and 138 deletions

View File

@ -359,10 +359,10 @@ class DocumentIndexingStatusApi(DatasetApiResource):
return data
api.add_resource(DocumentAddByTextApi, "/datasets/<uuid:dataset_id>/document/create_by_text")
api.add_resource(DocumentAddByFileApi, "/datasets/<uuid:dataset_id>/document/create_by_file")
api.add_resource(DocumentUpdateByTextApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text")
api.add_resource(DocumentUpdateByFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file")
api.add_resource(DocumentAddByTextApi, "/datasets/<uuid:dataset_id>/document/create_by_text", "/datasets/<uuid:dataset_id>/document/create-by-text")
api.add_resource(DocumentAddByFileApi, "/datasets/<uuid:dataset_id>/document/create_by_file", "/datasets/<uuid:dataset_id>/document/create-by-file")
api.add_resource(DocumentUpdateByTextApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text", "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-text")
api.add_resource(DocumentUpdateByFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file", "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file")
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")

View File

@ -14,4 +14,4 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
return self.perform_hit_testing(dataset, args)
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing", "/datasets/<uuid:dataset_id>/retrieve")

View File

@ -23,14 +23,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/document/create_by_text'
url='/datasets/{dataset_id}/document/create-by-text'
method='POST'
title='Create a document from text'
name='#create_by_text'
name='#create-by-text'
/>
<Row>
<Col>
This api is based on an existing Knowledge and creates a new document through text based on this Knowledge.
This API is based on an existing knowledge and creates a new document through text based on this knowledge.
### Params
<Properties>
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Property name='indexing_technique' type='string' key='indexing_technique'>
Index mode
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index
- <code>economy</code> Economy: Build using inverted index of keyword table index
</Property>
<Property name='process_rule' type='object' key='process_rule'>
Processing rules
@ -62,7 +62,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
label="/datasets/{dataset_id}/document/create-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -126,14 +126,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/document/create_by_file'
url='/datasets/{dataset_id}/document/create-by-file'
method='POST'
title='Create documents from files'
name='#create_by_file'
name='#create-by-file'
/>
<Row>
<Col>
This api is based on an existing Knowledge and creates a new document through a file based on this Knowledge.
This API is based on an existing knowledge and creates a new document through a file based on this knowledge.
### Params
<Properties>
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='data' type='multipart/form-data json string' key='data'>
- original_document_id Source document ID (optional)
- <code>original_document_id</code> Source document ID (optional)
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
- The source document cannot be an archived document
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
- indexing_technique Index mode
- <code>indexing_technique</code> Index mode
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index
- <code>economy</code> Economy: Build using inverted index of keyword table index
- process_rule Processing rules
- <code>process_rule</code> Processing rules
- <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules
@ -164,7 +164,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/document/create-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -226,7 +226,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Heading
url='/datasets'
method='POST'
title='Create an empty Knowledge'
title='Create an empty knowledge'
name='#create_empty_dataset'
/>
<Row>
@ -240,9 +240,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
Knowledge description (optional)
</Property>
<Property name='indexing_technique' type='string' key='indexing_technique'>
Index Technique (optional)
- <code>high_quality</code> high_quality
- <code>economy</code> economy
Index technique (optional)
- <code>high_quality</code> High quality
- <code>economy</code> Economy
</Property>
<Property name='permission' type='string' key='permission'>
Permission
@ -252,14 +252,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Property>
<Property name='provider' type='string' key='provider'>
Provider (optional, default: vendor)
- <code>vendor</code> vendor
- <code>external</code> external knowledge
- <code>vendor</code> Vendor
- <code>external</code> External knowledge
</Property>
<Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
External Knowledge api id (optional)
External knowledge API ID (optional)
</Property>
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
External Knowledge id (optional)
External knowledge ID (optional)
</Property>
</Properties>
</Col>
@ -409,14 +409,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
method='POST'
title='Update document via text'
name='#update_by_text'
name='#update-by-text'
/>
<Row>
<Col>
This api is based on an existing Knowledge and updates the document through text based on this Knowledge.
This API is based on an existing knowledge and updates the document through text based on this knowledge.
### Params
<Properties>
@ -446,7 +446,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -506,14 +506,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
method='POST'
title='Update a document from a file'
name='#update_by_file'
name='#update-by-file'
/>
<Row>
<Col>
This api is based on an existing Knowledge, and updates documents through files based on this Knowledge
This API is based on an existing knowledge, and updates documents through files based on this knowledge
### Params
<Properties>
@ -543,7 +543,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules
- <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property>
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -714,13 +714,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Query
<Properties>
<Property name='keyword' type='string' key='keyword'>
Search keywords, currently only search document names(optional)
Search keywords, currently only search document names (optional)
</Property>
<Property name='page' type='string' key='page'>
Page number(optional)
Page number (optional)
</Property>
<Property name='limit' type='string' key='limit'>
Number of items returned, default 20, range 1-100(optional)
Number of items returned, default 20, range 1-100 (optional)
</Property>
</Properties>
</Col>
@ -792,9 +792,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) Text content/question content, required
- <code>answer</code> (text) Answer content, if the mode of the Knowledge is qa mode, pass the value(optional)
- <code>keywords</code> (list) Keywords(optional)
- <code>content</code> (text) Text content / question content, required
- <code>answer</code> (text) Answer content, if the mode of the knowledge is Q&A mode, pass the value (optional)
- <code>keywords</code> (list) Keywords (optional)
</Property>
</Properties>
</Col>
@ -860,7 +860,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='GET'
title='get documents segments'
title='Get documents segments'
name='#get_segment'
/>
<Row>
@ -878,10 +878,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Query
<Properties>
<Property name='keyword' type='string' key='keyword'>
keywordchoosable
Keyword (optional)
</Property>
<Property name='status' type='string' key='status'>
Search statuscompleted
Search status, completed
</Property>
</Properties>
</Col>
@ -938,7 +938,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='DELETE'
title='delete document segment'
title='Delete document segment'
name='#delete_segment'
/>
<Row>
@ -984,7 +984,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='POST'
title='update document segment'
title='Update document segment'
name='#update_segment'
/>
<Row>
@ -1005,10 +1005,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='segment' type='object' key='segment'>
- <code>content</code> (text) text content/question contentrequired
- <code>answer</code> (text) Answer content, not required, passed if the Knowledge is in qa mode
- <code>keywords</code> (list) keyword, not required
- <code>enabled</code> (bool) false/true, not required
- <code>content</code> (text) Text content / question content, required
- <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
- <code>keywords</code> (list) Keyword (optional)
- <code>enabled</code> (bool) False / true (optional)
</Property>
</Properties>
</Col>
@ -1070,38 +1070,38 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/hit_testing'
url='/datasets/{dataset_id}/retrieve'
method='POST'
title='Dataset hit testing'
name='#dataset_hit_testing'
title='Retrieve knowledge'
name='#dataset_retrieval'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Dataset ID
Knowledge ID
</Property>
</Properties>
### Request Body
<Properties>
<Property name='query' type='string' key='query'>
retrieval keywordc
Query keyword
</Property>
<Property name='retrieval_model' type='object' key='retrieval_model'>
retrieval keyword(Optional, if not filled, it will be recalled according to the default method)
Retrieval model (optional, if not filled, it will be recalled according to the default method)
- <code>search_method</code> (text) Search method: One of the following four keywords is required
- <code>keyword_search</code> Keyword search
- <code>semantic_search</code> Semantic search
- <code>full_text_search</code> Full-text search
- <code>hybrid_search</code> Hybrid search
- <code>reranking_enable</code> (bool) Whether to enable reranking, optional, required if the search mode is semantic_search or hybrid_search
- <code>reranking_mode</code> (object) Rerank model configuration, optional, required if reranking is enabled
- <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
- <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
- <code>reranking_provider_name</code> (string) Rerank model provider
- <code>reranking_model_name</code> (string) Rerank model name
- <code>weights</code> (double) Semantic search weight setting in hybrid search mode
- <code>top_k</code> (integer) Number of results to return, optional
- <code>top_k</code> (integer) Number of results to return (optional)
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
- <code>score_threshold</code> (double) Score threshold
</Property>
@ -1114,26 +1114,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/hit_testing"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
}
}'`}
label="/datasets/{dataset_id}/retrieve"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
"reranking_enable": false,
"reranking_mode": null,
"reranking_model": {
"reranking_provider_name": "",
"reranking_model_name": ""
},
"weights": null,
"top_k": 1,
"score_threshold_enabled": false,
"score_threshold": null
}
}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{

View File

@ -23,10 +23,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/document/create_by_text'
url='/datasets/{dataset_id}/document/create-by-text'
method='POST'
title='通过文本创建文档'
name='#create_by_text'
name='#create-by-text'
/>
<Row>
<Col>
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Property name='indexing_technique' type='string' key='indexing_technique'>
索引方式
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
- <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
</Property>
<Property name='process_rule' type='object' key='process_rule'>
处理规则
@ -64,7 +64,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
</Properties>
</Col>
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
label="/datasets/{dataset_id}/document/create-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -126,10 +126,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/document/create_by_file'
url='/datasets/{dataset_id}/document/create-by-file'
method='POST'
title='通过文件创建文档 '
name='#create_by_file'
name='#create-by-file'
/>
<Row>
<Col>
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='data' type='multipart/form-data json string' key='data'>
- original_document_id 源文档 ID (选填)
- <code>original_document_id</code> 源文档 ID选填
- 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
- 源文档不可为归档的文档
- 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
- 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
- indexing_technique 索引方式
- <code>indexing_technique</code> 索引方式
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建
- <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
- process_rule 处理规则
- <code>process_rule</code> 处理规则
- <code>mode</code> (string) 清洗、分段模式 automatic 自动 / custom 自定义
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则
@ -166,7 +166,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
<Property name='file' type='multipart/form-data' key='file'>
需要上传的文件。
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/document/create-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -245,13 +245,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>economy</code> 经济
</Property>
<Property name='permission' type='string' key='permission'>
权限选填默认only_me
权限(选填,默认 only_me
- <code>only_me</code> 仅自己
- <code>all_team_members</code> 所有团队成员
- <code>partial_members</code> 部分团队成员
</Property>
<Property name='provider' type='string' key='provider'>
provider(选填,默认 vendor
Provider(选填,默认 vendor
- <code>vendor</code> 上传文件
- <code>external</code> 外部知识库
</Property>
@ -409,10 +409,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
method='POST'
title='通过文本更新文档 '
name='#update_by_text'
name='#update-by-text'
/>
<Row>
<Col>
@ -431,7 +431,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='name' type='string' key='name'>
文档名称 (选填)
文档名称(选填)
</Property>
<Property name='text' type='string' key='text'>
文档内容(选填)
@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
</Properties>
</Col>
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
@ -506,10 +506,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file'
url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
method='POST'
title='通过文件更新文档 '
name='#update_by_file'
name='#update-by-file'
/>
<Row>
<Col>
@ -528,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='name' type='string' key='name'>
文档名称 (选填)
文档名称(选填)
</Property>
<Property name='file' type='multipart/form-data' key='file'>
需要上传的文件
@ -545,7 +545,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000
- <code>max_tokens</code> 最大长度token默认为 1000
</Property>
</Properties>
</Col>
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
--header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"'
@ -1071,10 +1071,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/hit_testing'
url='/datasets/{dataset_id}/retrieve'
method='POST'
title='知识库召回测试'
name='#dataset_hit_testing'
title='检索知识库'
name='#dataset_retrieval'
/>
<Row>
<Col>
@ -1088,10 +1088,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
<Properties>
<Property name='query' type='string' key='query'>
召回关键词
检索关键词
</Property>
<Property name='retrieval_model' type='object' key='retrieval_model'>
召回参数(选填,如不填,按照默认方式召回)
检索参数(选填,如不填,按照默认方式召回)
- <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
- <code>keyword_search</code> 关键字检索
- <code>semantic_search</code> 语义检索
@ -1115,8 +1115,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="POST"
label="/datasets/{dataset_id}/hit_testing"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
label="/datasets/{dataset_id}/retrieve"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test",
"retrieval_model": {
"search_method": "keyword_search",
@ -1134,7 +1134,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit_testing' \
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{