Feat/update knowledge api url (#10102)

Co-authored-by: nite-knite <nkCoding@gmail.com>
This commit is contained in:
Jyong 2024-10-31 18:29:12 +08:00 committed by GitHub
parent 11ca1bec0b
commit ce260f79d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 225 additions and 201 deletions

View File

@ -331,10 +331,26 @@ class DocumentIndexingStatusApi(DatasetApiResource):
return data return data
api.add_resource(DocumentAddByTextApi, "/datasets/<uuid:dataset_id>/document/create_by_text") api.add_resource(
api.add_resource(DocumentAddByFileApi, "/datasets/<uuid:dataset_id>/document/create_by_file") DocumentAddByTextApi,
api.add_resource(DocumentUpdateByTextApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text") "/datasets/<uuid:dataset_id>/document/create_by_text",
api.add_resource(DocumentUpdateByFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file") "/datasets/<uuid:dataset_id>/document/create-by-text",
)
api.add_resource(
DocumentAddByFileApi,
"/datasets/<uuid:dataset_id>/document/create_by_file",
"/datasets/<uuid:dataset_id>/document/create-by-file",
)
api.add_resource(
DocumentUpdateByTextApi,
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_text",
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-text",
)
api.add_resource(
DocumentUpdateByFileApi,
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
)
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>") api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents") api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status") api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")

View File

@ -14,4 +14,4 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
return self.perform_hit_testing(dataset, args) return self.perform_hit_testing(dataset, args)
api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing") api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing", "/datasets/<uuid:dataset_id>/retrieve")

View File

@ -1,6 +1,6 @@
'use client' 'use client'
import type { FC } from 'react' import { type FC, useEffect } from 'react'
import { useContext } from 'use-context-selector' import { useContext } from 'use-context-selector'
import TemplateEn from './template/template.en.mdx' import TemplateEn from './template/template.en.mdx'
import TemplateZh from './template/template.zh.mdx' import TemplateZh from './template/template.zh.mdx'
@ -14,6 +14,13 @@ const Doc: FC<DocProps> = ({
apiBaseUrl, apiBaseUrl,
}) => { }) => {
const { locale } = useContext(I18n) const { locale } = useContext(I18n)
useEffect(() => {
const hash = location.hash
if (hash)
document.querySelector(hash)?.scrollIntoView()
}, [])
return ( return (
<article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'> <article className='mx-1 px-4 sm:mx-12 pt-16 bg-white rounded-t-xl prose prose-xl'>
{ {

View File

@ -20,17 +20,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</CodeGroup> </CodeGroup>
</div> </div>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/document/create_by_text' url='/datasets/{dataset_id}/document/create-by-text'
method='POST' method='POST'
title='Create a document from text' title='Create a Document from Text'
name='#create_by_text' name='#create-by-text'
/> />
<Row> <Row>
<Col> <Col>
This api is based on an existing Knowledge and creates a new document through text based on this Knowledge. This API is based on an existing knowledge and creates a new document through text based on this knowledge.
### Params ### Params
<Properties> <Properties>
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Property name='indexing_technique' type='string' key='indexing_technique'> <Property name='indexing_technique' type='string' key='indexing_technique'>
Index mode Index mode
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index - <code>economy</code> Economy: Build using inverted index of keyword table index
</Property> </Property>
<Property name='process_rule' type='object' key='process_rule'> <Property name='process_rule' type='object' key='process_rule'>
Processing rules Processing rules
@ -62,7 +62,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address - <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules - <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000 - <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property> </Property>
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/document/create_by_text" label="/datasets/{dataset_id}/document/create-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
@ -123,17 +123,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/document/create_by_file' url='/datasets/{dataset_id}/document/create-by-file'
method='POST' method='POST'
title='Create documents from files' title='Create a Document from a File'
name='#create_by_file' name='#create-by-file'
/> />
<Row> <Row>
<Col> <Col>
This api is based on an existing Knowledge and creates a new document through a file based on this Knowledge. This API is based on an existing knowledge and creates a new document through a file based on this knowledge.
### Params ### Params
<Properties> <Properties>
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='data' type='multipart/form-data json string' key='data'> <Property name='data' type='multipart/form-data json string' key='data'>
- original_document_id Source document ID (optional) - <code>original_document_id</code> Source document ID (optional)
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document - Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
- The source document cannot be an archived document - The source document cannot be an archived document
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default - When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by default
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required - When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
- indexing_technique Index mode - <code>indexing_technique</code> Index mode
- <code>high_quality</code> High quality: embedding using embedding model, built as vector database index - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
- <code>economy</code> Economy: Build using inverted index of Keyword Table Index - <code>economy</code> Economy: Build using inverted index of keyword table index
- process_rule Processing rules - <code>process_rule</code> Processing rules
- <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
- <code>rules</code> (object) Custom rules (in automatic mode, this field is empty) - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
- <code>pre_processing_rules</code> (array[object]) Preprocessing rules - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
@ -164,7 +164,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address - <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules - <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000 - <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property> </Property>
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/document/create_by_file" label="/datasets/{dataset_id}/document/create-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"' --form 'file=@"/path/to/file"'
@ -221,12 +221,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets' url='/datasets'
method='POST' method='POST'
title='Create an empty Knowledge' title='Create an Empty Knowledge Base'
name='#create_empty_dataset' name='#create_empty_dataset'
/> />
<Row> <Row>
@ -240,9 +240,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
Knowledge description (optional) Knowledge description (optional)
</Property> </Property>
<Property name='indexing_technique' type='string' key='indexing_technique'> <Property name='indexing_technique' type='string' key='indexing_technique'>
Index Technique (optional) Index technique (optional)
- <code>high_quality</code> high_quality - <code>high_quality</code> High quality
- <code>economy</code> economy - <code>economy</code> Economy
</Property> </Property>
<Property name='permission' type='string' key='permission'> <Property name='permission' type='string' key='permission'>
Permission Permission
@ -252,14 +252,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Property> </Property>
<Property name='provider' type='string' key='provider'> <Property name='provider' type='string' key='provider'>
Provider (optional, default: vendor) Provider (optional, default: vendor)
- <code>vendor</code> vendor - <code>vendor</code> Vendor
- <code>external</code> external knowledge - <code>external</code> External knowledge
</Property> </Property>
<Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'> <Property name='external_knowledge_api_id' type='str' key='external_knowledge_api_id'>
External Knowledge api id (optional) External knowledge API ID (optional)
</Property> </Property>
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'> <Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
External Knowledge id (optional) External knowledge ID (optional)
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -306,12 +306,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets' url='/datasets'
method='GET' method='GET'
title='Knowledge list' title='Get Knowledge Base List'
name='#dataset_list' name='#dataset_list'
/> />
<Row> <Row>
@ -369,12 +369,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}' url='/datasets/{dataset_id}'
method='DELETE' method='DELETE'
title='Delete knowledge' title='Delete a Knowledge Base'
name='#delete_dataset' name='#delete_dataset'
/> />
<Row> <Row>
@ -406,17 +406,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text' url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
method='POST' method='POST'
title='Update document via text' title='Update a Document with Text'
name='#update_by_text' name='#update-by-text'
/> />
<Row> <Row>
<Col> <Col>
This api is based on an existing Knowledge and updates the document through text based on this Knowledge. This API is based on an existing knowledge and updates the document through text based on this knowledge.
### Params ### Params
<Properties> <Properties>
@ -446,7 +446,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address - <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules - <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000 - <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property> </Property>
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text" label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
@ -503,17 +503,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file' url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
method='POST' method='POST'
title='Update a document from a file' title='Update a Document with a File'
name='#update_by_file' name='#update-by-file'
/> />
<Row> <Row>
<Col> <Col>
This api is based on an existing Knowledge, and updates documents through files based on this Knowledge This API is based on an existing knowledge, and updates documents through files based on this knowledge
### Params ### Params
<Properties> <Properties>
@ -543,7 +543,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
- <code>remove_urls_emails</code> Delete URL, email address - <code>remove_urls_emails</code> Delete URL, email address
- <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
- <code>segmentation</code> (object) segmentation rules - <code>segmentation</code> (object) Segmentation rules
- <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- <code>max_tokens</code> Maximum length (token) defaults to 1000 - <code>max_tokens</code> Maximum length (token) defaults to 1000
</Property> </Property>
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file" label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"' --form 'file=@"/path/to/file"'
@ -597,12 +597,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{batch}/indexing-status' url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
method='GET' method='GET'
title='Get document embedding status (progress)' title='Get Document Embedding Status (Progress)'
name='#indexing_status' name='#indexing_status'
/> />
<Row> <Row>
@ -652,12 +652,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}' url='/datasets/{dataset_id}/documents/{document_id}'
method='DELETE' method='DELETE'
title='Delete document' title='Delete a Document'
name='#delete_document' name='#delete_document'
/> />
<Row> <Row>
@ -694,12 +694,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents' url='/datasets/{dataset_id}/documents'
method='GET' method='GET'
title='Knowledge document list' title='Get the Document List of a Knowledge Base'
name='#dataset_document_list' name='#dataset_document_list'
/> />
<Row> <Row>
@ -714,13 +714,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Query ### Query
<Properties> <Properties>
<Property name='keyword' type='string' key='keyword'> <Property name='keyword' type='string' key='keyword'>
Search keywords, currently only search document names(optional) Search keywords, currently only search document names (optional)
</Property> </Property>
<Property name='page' type='string' key='page'> <Property name='page' type='string' key='page'>
Page number(optional) Page number (optional)
</Property> </Property>
<Property name='limit' type='string' key='limit'> <Property name='limit' type='string' key='limit'>
Number of items returned, default 20, range 1-100(optional) Number of items returned, default 20, range 1-100 (optional)
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -769,12 +769,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments' url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='POST' method='POST'
title='Add segment' title='Add Chunks to a Document'
name='#create_new_segment' name='#create_new_segment'
/> />
<Row> <Row>
@ -792,9 +792,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='segments' type='object list' key='segments'> <Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) Text content/question content, required - <code>content</code> (text) Text content / question content, required
- <code>answer</code> (text) Answer content, if the mode of the Knowledge is qa mode, pass the value(optional) - <code>answer</code> (text) Answer content, if the mode of the knowledge is Q&A mode, pass the value (optional)
- <code>keywords</code> (list) Keywords(optional) - <code>keywords</code> (list) Keywords (optional)
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -855,12 +855,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments' url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='GET' method='GET'
title='get documents segments' title='Get Chunks from a Document'
name='#get_segment' name='#get_segment'
/> />
<Row> <Row>
@ -878,10 +878,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Query ### Query
<Properties> <Properties>
<Property name='keyword' type='string' key='keyword'> <Property name='keyword' type='string' key='keyword'>
keywordchoosable Keyword (optional)
</Property> </Property>
<Property name='status' type='string' key='status'> <Property name='status' type='string' key='status'>
Search statuscompleted Search status, completed
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -933,12 +933,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='DELETE' method='DELETE'
title='delete document segment' title='Delete a Chunk in a Document'
name='#delete_segment' name='#delete_segment'
/> />
<Row> <Row>
@ -979,12 +979,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='POST' method='POST'
title='update document segment' title='Update a Chunk in a Document '
name='#update_segment' name='#update_segment'
/> />
<Row> <Row>
@ -1005,10 +1005,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='segment' type='object' key='segment'> <Property name='segment' type='object' key='segment'>
- <code>content</code> (text) text content/question contentrequired - <code>content</code> (text) Text content / question content, required
- <code>answer</code> (text) Answer content, not required, passed if the Knowledge is in qa mode - <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
- <code>keywords</code> (list) keyword, not required - <code>keywords</code> (list) Keyword (optional)
- <code>enabled</code> (bool) false/true, not required - <code>enabled</code> (bool) False / true (optional)
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -1067,41 +1067,41 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/hit-testing' url='/datasets/{dataset_id}/retrieve'
method='POST' method='POST'
title='Dataset hit testing' title='Retrieve Chunks from a Knowledge Base'
name='#dataset_hit_testing' name='#dataset_retrieval'
/> />
<Row> <Row>
<Col> <Col>
### Path ### Path
<Properties> <Properties>
<Property name='dataset_id' type='string' key='dataset_id'> <Property name='dataset_id' type='string' key='dataset_id'>
Dataset ID Knowledge ID
</Property> </Property>
</Properties> </Properties>
### Request Body ### Request Body
<Properties> <Properties>
<Property name='query' type='string' key='query'> <Property name='query' type='string' key='query'>
retrieval keywordc Query keyword
</Property> </Property>
<Property name='retrieval_model' type='object' key='retrieval_model'> <Property name='retrieval_model' type='object' key='retrieval_model'>
retrieval keyword(Optional, if not filled, it will be recalled according to the default method) Retrieval model (optional, if not filled, it will be recalled according to the default method)
- <code>search_method</code> (text) Search method: One of the following four keywords is required - <code>search_method</code> (text) Search method: One of the following four keywords is required
- <code>keyword_search</code> Keyword search - <code>keyword_search</code> Keyword search
- <code>semantic_search</code> Semantic search - <code>semantic_search</code> Semantic search
- <code>full_text_search</code> Full-text search - <code>full_text_search</code> Full-text search
- <code>hybrid_search</code> Hybrid search - <code>hybrid_search</code> Hybrid search
- <code>reranking_enable</code> (bool) Whether to enable reranking, optional, required if the search mode is semantic_search or hybrid_search - <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
- <code>reranking_mode</code> (object) Rerank model configuration, optional, required if reranking is enabled - <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
- <code>reranking_provider_name</code> (string) Rerank model provider - <code>reranking_provider_name</code> (string) Rerank model provider
- <code>reranking_model_name</code> (string) Rerank model name - <code>reranking_model_name</code> (string) Rerank model name
- <code>weights</code> (double) Semantic search weight setting in hybrid search mode - <code>weights</code> (double) Semantic search weight setting in hybrid search mode
- <code>top_k</code> (integer) Number of results to return, optional - <code>top_k</code> (integer) Number of results to return (optional)
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
- <code>score_threshold</code> (double) Score threshold - <code>score_threshold</code> (double) Score threshold
</Property> </Property>
@ -1114,8 +1114,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/hit-testing" label="/datasets/{dataset_id}/retrieve"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit-testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{ targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test", "query": "test",
"retrieval_model": { "retrieval_model": {
"search_method": "keyword_search", "search_method": "keyword_search",
@ -1130,10 +1130,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
"score_threshold_enabled": false, "score_threshold_enabled": false,
"score_threshold": null "score_threshold": null
} }
}'`} }'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit-testing' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
@ -1212,7 +1212,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Row> <Row>
<Col> <Col>

View File

@ -20,13 +20,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</CodeGroup> </CodeGroup>
</div> </div>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/document/create_by_text' url='/datasets/{dataset_id}/document/create-by-text'
method='POST' method='POST'
title='通过文本创建文档' title='通过文本创建文档'
name='#create_by_text' name='#create-by-text'
/> />
<Row> <Row>
<Col> <Col>
@ -50,7 +50,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Property name='indexing_technique' type='string' key='indexing_technique'> <Property name='indexing_technique' type='string' key='indexing_technique'>
索引方式 索引方式
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建 - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
</Property> </Property>
<Property name='process_rule' type='object' key='process_rule'> <Property name='process_rule' type='object' key='process_rule'>
处理规则 处理规则
@ -64,7 +64,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则 - <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000 - <code>max_tokens</code> 最大长度token默认为 1000
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -72,11 +72,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/document/create_by_text" label="/datasets/{dataset_id}/document/create-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \ curl --location --request --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-text' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
@ -123,13 +123,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/document/create_by_file' url='/datasets/{dataset_id}/document/create-by-file'
method='POST' method='POST'
title='通过文件创建文档 ' title='通过文件创建文档 '
name='#create_by_file' name='#create-by-file'
/> />
<Row> <Row>
<Col> <Col>
@ -145,17 +145,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='data' type='multipart/form-data json string' key='data'> <Property name='data' type='multipart/form-data json string' key='data'>
- original_document_id 源文档 ID (选填) - <code>original_document_id</code> 源文档 ID选填
- 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制 - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
- 源文档不可为归档的文档 - 源文档不可为归档的文档
- 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式 - 当传入 <code>original_document_id</code> 时,代表文档进行更新操作,<code>process_rule</code> 为可填项目,不填默认使用源文档的分段方式
- 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填 - 未传入 <code>original_document_id</code> 时,代表文档进行新增操作,<code>process_rule</code> 为必填
- indexing_technique 索引方式 - <code>indexing_technique</code> 索引方式
- <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - <code>high_quality</code> 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- <code>economy</code> 经济:使用 Keyword Table Index 的倒排索引进行构建 - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
- process_rule 处理规则 - <code>process_rule</code> 处理规则
- <code>mode</code> (string) 清洗、分段模式 automatic 自动 / custom 自定义 - <code>mode</code> (string) 清洗、分段模式 automatic 自动 / custom 自定义
- <code>rules</code> (object) 自定义规则(自动模式下,该字段为空) - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
- <code>pre_processing_rules</code> (array[object]) 预处理规则 - <code>pre_processing_rules</code> (array[object]) 预处理规则
@ -166,7 +166,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则 - <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000 - <code>max_tokens</code> 最大长度token默认为 1000
</Property> </Property>
<Property name='file' type='multipart/form-data' key='file'> <Property name='file' type='multipart/form-data' key='file'>
需要上传的文件。 需要上传的文件。
@ -177,11 +177,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/document/create_by_file" label="/datasets/{dataset_id}/document/create-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create-by-file' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"' --form 'file=@"/path/to/file"'
@ -221,7 +221,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets' url='/datasets'
@ -245,13 +245,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>economy</code> 经济 - <code>economy</code> 经济
</Property> </Property>
<Property name='permission' type='string' key='permission'> <Property name='permission' type='string' key='permission'>
权限选填默认only_me 权限(选填,默认 only_me
- <code>only_me</code> 仅自己 - <code>only_me</code> 仅自己
- <code>all_team_members</code> 所有团队成员 - <code>all_team_members</code> 所有团队成员
- <code>partial_members</code> 部分团队成员 - <code>partial_members</code> 部分团队成员
</Property> </Property>
<Property name='provider' type='string' key='provider'> <Property name='provider' type='string' key='provider'>
provider(选填,默认 vendor Provider(选填,默认 vendor
- <code>vendor</code> 上传文件 - <code>vendor</code> 上传文件
- <code>external</code> 外部知识库 - <code>external</code> 外部知识库
</Property> </Property>
@ -306,7 +306,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets' url='/datasets'
@ -369,7 +369,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}' url='/datasets/{dataset_id}'
@ -406,13 +406,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_text' url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
method='POST' method='POST'
title='通过文本更新文档 ' title='通过文本更新文档 '
name='#update_by_text' name='#update-by-text'
/> />
<Row> <Row>
<Col> <Col>
@ -431,7 +431,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='name' type='string' key='name'> <Property name='name' type='string' key='name'>
文档名称 (选填) 文档名称(选填)
</Property> </Property>
<Property name='text' type='string' key='text'> <Property name='text' type='string' key='text'>
文档内容(选填) 文档内容(选填)
@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则 - <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000 - <code>max_tokens</code> 最大长度token默认为 1000
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -456,11 +456,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_text" label="/datasets/{dataset_id}/documents/{document_id}/update-by-text"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-text' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
@ -503,13 +503,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/update_by_file' url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
method='POST' method='POST'
title='通过文件更新文档 ' title='通过文件更新文档 '
name='#update_by_file' name='#update-by-file'
/> />
<Row> <Row>
<Col> <Col>
@ -528,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='name' type='string' key='name'> <Property name='name' type='string' key='name'>
文档名称 (选填) 文档名称(选填)
</Property> </Property>
<Property name='file' type='multipart/form-data' key='file'> <Property name='file' type='multipart/form-data' key='file'>
需要上传的文件 需要上传的文件
@ -545,7 +545,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
- <code>segmentation</code> (object) 分段规则 - <code>segmentation</code> (object) 分段规则
- <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- <code>max_tokens</code> 最大长度 (token) 默认为 1000 - <code>max_tokens</code> 最大长度token默认为 1000
</Property> </Property>
</Properties> </Properties>
</Col> </Col>
@ -553,11 +553,11 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/documents/{document_id}/update_by_file" label="/datasets/{dataset_id}/documents/{document_id}/update-by-file"
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_file' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update-by-file' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
--form 'file=@"/path/to/file"' --form 'file=@"/path/to/file"'
@ -597,7 +597,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{batch}/indexing-status' url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
@ -652,7 +652,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}' url='/datasets/{dataset_id}/documents/{document_id}'
@ -694,7 +694,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents' url='/datasets/{dataset_id}/documents'
@ -769,7 +769,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments' url='/datasets/{dataset_id}/documents/{document_id}/segments'
@ -793,7 +793,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Properties> <Properties>
<Property name='segments' type='object list' key='segments'> <Property name='segments' type='object list' key='segments'>
- <code>content</code> (text) 文本内容/问题内容,必填 - <code>content</code> (text) 文本内容/问题内容,必填
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值 - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
- <code>keywords</code> (list) 关键字,非必填 - <code>keywords</code> (list) 关键字,非必填
</Property> </Property>
</Properties> </Properties>
@ -855,7 +855,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments' url='/datasets/{dataset_id}/documents/{document_id}/segments'
@ -933,7 +933,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
@ -979,7 +979,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
@ -1006,7 +1006,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<Properties> <Properties>
<Property name='segment' type='object' key='segment'> <Property name='segment' type='object' key='segment'>
- <code>content</code> (text) 文本内容/问题内容,必填 - <code>content</code> (text) 文本内容/问题内容,必填
- <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为qa模式则传值 - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
- <code>keywords</code> (list) 关键字,非必填 - <code>keywords</code> (list) 关键字,非必填
- <code>enabled</code> (bool) false/true非必填 - <code>enabled</code> (bool) false/true非必填
</Property> </Property>
@ -1068,13 +1068,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Col> </Col>
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/hit-testing' url='/datasets/{dataset_id}/retrieve'
method='POST' method='POST'
title='知识库召回测试' title='检索知识库'
name='#dataset_hit_testing' name='#dataset_retrieval'
/> />
<Row> <Row>
<Col> <Col>
@ -1088,23 +1088,23 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body ### Request Body
<Properties> <Properties>
<Property name='query' type='string' key='query'> <Property name='query' type='string' key='query'>
召回关键词 检索关键词
</Property> </Property>
<Property name='retrieval_model' type='object' key='retrieval_model'> <Property name='retrieval_model' type='object' key='retrieval_model'>
召回参数(选填,如不填,按照默认方式召回) 检索参数(选填,如不填,按照默认方式召回)
- <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填 - <code>search_method</code> (text) 检索方法:以下三个关键字之一,必填
- <code>keyword_search</code> 关键字检索 - <code>keyword_search</code> 关键字检索
- <code>semantic_search</code> 语义检索 - <code>semantic_search</code> 语义检索
- <code>full_text_search</code> 全文检索 - <code>full_text_search</code> 全文检索
- <code>hybrid_search</code> 混合检索 - <code>hybrid_search</code> 混合检索
- <code>reranking_enable</code> (bool) 是否启用 Reranking非必填如果检索模式为semantic_search模式或者hybrid_search则传值 - <code>reranking_enable</code> (bool) 是否启用 Reranking非必填如果检索模式为 semantic_search 模式或者 hybrid_search 则传值
- <code>reranking_mode</code> (object) Rerank模型配置非必填如果启用了 reranking 则传值 - <code>reranking_mode</code> (object) Rerank模型配置非必填如果启用了 reranking 则传值
- <code>reranking_provider_name</code> (string) Rerank 模型提供商 - <code>reranking_provider_name</code> (string) Rerank 模型提供商
- <code>reranking_model_name</code> (string) Rerank 模型名称 - <code>reranking_model_name</code> (string) Rerank 模型名称
- <code>weights</code> (double) 混合检索模式下语意检索的权重设置 - <code>weights</code> (double) 混合检索模式下语意检索的权重设置
- <code>top_k</code> (integer) 返回结果数量,非必填 - <code>top_k</code> (integer) 返回结果数量,非必填
- <code>score_threshold_enabled</code> (bool) 是否开启Score阈值 - <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
- <code>score_threshold</code> (double) Score阈值 - <code>score_threshold</code> (double) Score 阈值
</Property> </Property>
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'> <Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
未启用字段 未启用字段
@ -1115,8 +1115,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup <CodeGroup
title="Request" title="Request"
tag="POST" tag="POST"
label="/datasets/{dataset_id}/hit-testing" label="/datasets/{dataset_id}/retrieve"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/hit-testing' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{ targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \\\n--header 'Authorization: Bearer {api_key}'\\\n--header 'Content-Type: application/json'\\\n--data-raw '{
"query": "test", "query": "test",
"retrieval_model": { "retrieval_model": {
"search_method": "keyword_search", "search_method": "keyword_search",
@ -1131,10 +1131,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
"score_threshold_enabled": false, "score_threshold_enabled": false,
"score_threshold": null "score_threshold": null
} }
}'`} }'`}
> >
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/hit-testing' \ curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/retrieve' \
--header 'Authorization: Bearer {api_key}' \ --header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \ --header 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
@ -1214,7 +1214,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
</Row> </Row>
--- <hr className='ml-0 mr-0' />
<Row> <Row>
<Col> <Col>

View File

@ -39,6 +39,7 @@ export const Heading = function H2({
} }
return ( return (
<> <>
<span id={name?.replace(/^#/, '')} className='relative -top-28' />
<div className="flex items-center gap-x-3" > <div className="flex items-center gap-x-3" >
<span className={`font-mono text-[0.625rem] font-semibold leading-6 rounded-lg px-1.5 ring-1 ring-inset ${style}`}>{method}</span> <span className={`font-mono text-[0.625rem] font-semibold leading-6 rounded-lg px-1.5 ring-1 ring-inset ${style}`}>{method}</span>
{/* <span className="h-0.5 w-0.5 rounded-full bg-zinc-300 dark:bg-zinc-600"></span> */} {/* <span className="h-0.5 w-0.5 rounded-full bg-zinc-300 dark:bg-zinc-600"></span> */}