mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 03:32:23 +08:00
feat: couchbase integration (#6165)
Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Elliot Scribner <elliot.scribner@couchbase.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Bowen Liang <bowenliang@apache.org>
This commit is contained in:
parent
fc37e654fc
commit
c8ef9223e5
3
.github/workflows/api-tests.yml
vendored
3
.github/workflows/api-tests.yml
vendored
|
@ -78,7 +78,7 @@ jobs:
|
||||||
- name: Run Workflow
|
- name: Run Workflow
|
||||||
run: poetry run -C api bash dev/pytest/pytest_workflow.sh
|
run: poetry run -C api bash dev/pytest/pytest_workflow.sh
|
||||||
|
|
||||||
- name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch)
|
- name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
|
||||||
uses: hoverkraft-tech/compose-action@v2.0.0
|
uses: hoverkraft-tech/compose-action@v2.0.0
|
||||||
with:
|
with:
|
||||||
compose-file: |
|
compose-file: |
|
||||||
|
@ -86,6 +86,7 @@ jobs:
|
||||||
services: |
|
services: |
|
||||||
weaviate
|
weaviate
|
||||||
qdrant
|
qdrant
|
||||||
|
couchbase-server
|
||||||
etcd
|
etcd
|
||||||
minio
|
minio
|
||||||
milvus-standalone
|
milvus-standalone
|
||||||
|
|
4
.github/workflows/expose_service_ports.sh
vendored
4
.github/workflows/expose_service_ports.sh
vendored
|
@ -7,5 +7,7 @@ yq eval '.services["milvus-standalone"].ports += ["19530:19530"]' -i docker/dock
|
||||||
yq eval '.services.pgvector.ports += ["5433:5432"]' -i docker/docker-compose.yaml
|
yq eval '.services.pgvector.ports += ["5433:5432"]' -i docker/docker-compose.yaml
|
||||||
yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compose.yaml
|
yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compose.yaml
|
||||||
yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-compose.yaml
|
yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-compose.yaml
|
||||||
|
yq eval '.services.couchbase-server.ports += ["8091-8096:8091-8096"]' -i docker/docker-compose.yaml
|
||||||
|
yq eval '.services.couchbase-server.ports += ["11210:11210"]' -i docker/docker-compose.yaml
|
||||||
|
|
||||||
echo "Ports exposed for sandbox, weaviate, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch"
|
echo "Ports exposed for sandbox, weaviate, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase"
|
||||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -173,6 +173,7 @@ docker/volumes/myscale/log/*
|
||||||
docker/volumes/unstructured/*
|
docker/volumes/unstructured/*
|
||||||
docker/volumes/pgvector/data/*
|
docker/volumes/pgvector/data/*
|
||||||
docker/volumes/pgvecto_rs/data/*
|
docker/volumes/pgvecto_rs/data/*
|
||||||
|
docker/volumes/couchbase/*
|
||||||
|
|
||||||
docker/nginx/conf.d/default.conf
|
docker/nginx/conf.d/default.conf
|
||||||
docker/nginx/ssl/*
|
docker/nginx/ssl/*
|
||||||
|
@ -189,4 +190,4 @@ pyrightconfig.json
|
||||||
api/.vscode
|
api/.vscode
|
||||||
|
|
||||||
.idea/
|
.idea/
|
||||||
.vscode
|
.vscode
|
||||||
|
|
|
@ -120,7 +120,7 @@ SUPABASE_URL=your-server-url
|
||||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||||
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||||
|
|
||||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, vikingdb, upstash
|
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash
|
||||||
VECTOR_STORE=weaviate
|
VECTOR_STORE=weaviate
|
||||||
|
|
||||||
# Weaviate configuration
|
# Weaviate configuration
|
||||||
|
@ -136,6 +136,13 @@ QDRANT_CLIENT_TIMEOUT=20
|
||||||
QDRANT_GRPC_ENABLED=false
|
QDRANT_GRPC_ENABLED=false
|
||||||
QDRANT_GRPC_PORT=6334
|
QDRANT_GRPC_PORT=6334
|
||||||
|
|
||||||
|
#Couchbase configuration
|
||||||
|
COUCHBASE_CONNECTION_STRING=127.0.0.1
|
||||||
|
COUCHBASE_USER=Administrator
|
||||||
|
COUCHBASE_PASSWORD=password
|
||||||
|
COUCHBASE_BUCKET_NAME=Embeddings
|
||||||
|
COUCHBASE_SCOPE_NAME=_default
|
||||||
|
|
||||||
# Milvus configuration
|
# Milvus configuration
|
||||||
MILVUS_URI=http://127.0.0.1:19530
|
MILVUS_URI=http://127.0.0.1:19530
|
||||||
MILVUS_TOKEN=
|
MILVUS_TOKEN=
|
||||||
|
|
|
@ -278,6 +278,7 @@ def migrate_knowledge_vector_database():
|
||||||
VectorType.BAIDU,
|
VectorType.BAIDU,
|
||||||
VectorType.VIKINGDB,
|
VectorType.VIKINGDB,
|
||||||
VectorType.UPSTASH,
|
VectorType.UPSTASH,
|
||||||
|
VectorType.COUCHBASE,
|
||||||
}
|
}
|
||||||
page = 1
|
page = 1
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -17,6 +17,7 @@ from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCO
|
||||||
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||||
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
||||||
from configs.middleware.vdb.chroma_config import ChromaConfig
|
from configs.middleware.vdb.chroma_config import ChromaConfig
|
||||||
|
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
|
||||||
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
||||||
from configs.middleware.vdb.milvus_config import MilvusConfig
|
from configs.middleware.vdb.milvus_config import MilvusConfig
|
||||||
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
||||||
|
@ -251,6 +252,7 @@ class MiddlewareConfig(
|
||||||
TiDBVectorConfig,
|
TiDBVectorConfig,
|
||||||
WeaviateConfig,
|
WeaviateConfig,
|
||||||
ElasticsearchConfig,
|
ElasticsearchConfig,
|
||||||
|
CouchbaseConfig,
|
||||||
InternalTestConfig,
|
InternalTestConfig,
|
||||||
VikingDBConfig,
|
VikingDBConfig,
|
||||||
UpstashConfig,
|
UpstashConfig,
|
||||||
|
|
34
api/configs/middleware/vdb/couchbase_config.py
Normal file
34
api/configs/middleware/vdb/couchbase_config.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class CouchbaseConfig(BaseModel):
|
||||||
|
"""
|
||||||
|
Couchbase configs
|
||||||
|
"""
|
||||||
|
|
||||||
|
COUCHBASE_CONNECTION_STRING: Optional[str] = Field(
|
||||||
|
description="COUCHBASE connection string",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
COUCHBASE_USER: Optional[str] = Field(
|
||||||
|
description="COUCHBASE user",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
COUCHBASE_PASSWORD: Optional[str] = Field(
|
||||||
|
description="COUCHBASE password",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
COUCHBASE_BUCKET_NAME: Optional[str] = Field(
|
||||||
|
description="COUCHBASE bucket name",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
COUCHBASE_SCOPE_NAME: Optional[str] = Field(
|
||||||
|
description="COUCHBASE scope name",
|
||||||
|
default=None,
|
||||||
|
)
|
|
@ -640,6 +640,7 @@ class DatasetRetrievalSettingApi(Resource):
|
||||||
| VectorType.ELASTICSEARCH
|
| VectorType.ELASTICSEARCH
|
||||||
| VectorType.PGVECTOR
|
| VectorType.PGVECTOR
|
||||||
| VectorType.TIDB_ON_QDRANT
|
| VectorType.TIDB_ON_QDRANT
|
||||||
|
| VectorType.COUCHBASE
|
||||||
):
|
):
|
||||||
return {
|
return {
|
||||||
"retrieval_method": [
|
"retrieval_method": [
|
||||||
|
@ -678,6 +679,7 @@ class DatasetRetrievalSettingMockApi(Resource):
|
||||||
| VectorType.MYSCALE
|
| VectorType.MYSCALE
|
||||||
| VectorType.ORACLE
|
| VectorType.ORACLE
|
||||||
| VectorType.ELASTICSEARCH
|
| VectorType.ELASTICSEARCH
|
||||||
|
| VectorType.COUCHBASE
|
||||||
| VectorType.PGVECTOR
|
| VectorType.PGVECTOR
|
||||||
):
|
):
|
||||||
return {
|
return {
|
||||||
|
|
0
api/core/rag/datasource/vdb/couchbase/__init__.py
Normal file
0
api/core/rag/datasource/vdb/couchbase/__init__.py
Normal file
378
api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
Normal file
378
api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
Normal file
|
@ -0,0 +1,378 @@
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from couchbase import search
|
||||||
|
from couchbase.auth import PasswordAuthenticator
|
||||||
|
from couchbase.cluster import Cluster
|
||||||
|
from couchbase.management.search import SearchIndex
|
||||||
|
|
||||||
|
# needed for options -- cluster, timeout, SQL++ (N1QL) query, etc.
|
||||||
|
from couchbase.options import ClusterOptions, SearchOptions
|
||||||
|
from couchbase.vector_search import VectorQuery, VectorSearch
|
||||||
|
from flask import current_app
|
||||||
|
from pydantic import BaseModel, model_validator
|
||||||
|
|
||||||
|
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||||
|
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||||
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.embedding.embedding_base import Embeddings
|
||||||
|
from core.rag.models.document import Document
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from models.dataset import Dataset
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class CouchbaseConfig(BaseModel):
|
||||||
|
connection_string: str
|
||||||
|
user: str
|
||||||
|
password: str
|
||||||
|
bucket_name: str
|
||||||
|
scope_name: str
|
||||||
|
|
||||||
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
|
def validate_config(cls, values: dict) -> dict:
|
||||||
|
if not values.get("connection_string"):
|
||||||
|
raise ValueError("config COUCHBASE_CONNECTION_STRING is required")
|
||||||
|
if not values.get("user"):
|
||||||
|
raise ValueError("config COUCHBASE_USER is required")
|
||||||
|
if not values.get("password"):
|
||||||
|
raise ValueError("config COUCHBASE_PASSWORD is required")
|
||||||
|
if not values.get("bucket_name"):
|
||||||
|
raise ValueError("config COUCHBASE_PASSWORD is required")
|
||||||
|
if not values.get("scope_name"):
|
||||||
|
raise ValueError("config COUCHBASE_SCOPE_NAME is required")
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class CouchbaseVector(BaseVector):
|
||||||
|
def __init__(self, collection_name: str, config: CouchbaseConfig):
|
||||||
|
super().__init__(collection_name)
|
||||||
|
self._client_config = config
|
||||||
|
|
||||||
|
"""Connect to couchbase"""
|
||||||
|
|
||||||
|
auth = PasswordAuthenticator(config.user, config.password)
|
||||||
|
options = ClusterOptions(auth)
|
||||||
|
self._cluster = Cluster(config.connection_string, options)
|
||||||
|
self._bucket = self._cluster.bucket(config.bucket_name)
|
||||||
|
self._scope = self._bucket.scope(config.scope_name)
|
||||||
|
self._bucket_name = config.bucket_name
|
||||||
|
self._scope_name = config.scope_name
|
||||||
|
|
||||||
|
# Wait until the cluster is ready for use.
|
||||||
|
self._cluster.wait_until_ready(timedelta(seconds=5))
|
||||||
|
|
||||||
|
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
index_id = str(uuid.uuid4()).replace("-", "")
|
||||||
|
self._create_collection(uuid=index_id, vector_length=len(embeddings[0]))
|
||||||
|
self.add_texts(texts, embeddings)
|
||||||
|
|
||||||
|
def _create_collection(self, vector_length: int, uuid: str):
|
||||||
|
lock_name = "vector_indexing_lock_{}".format(self._collection_name)
|
||||||
|
with redis_client.lock(lock_name, timeout=20):
|
||||||
|
collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
|
||||||
|
if redis_client.get(collection_exist_cache_key):
|
||||||
|
return
|
||||||
|
if self._collection_exists(self._collection_name):
|
||||||
|
return
|
||||||
|
manager = self._bucket.collections()
|
||||||
|
manager.create_collection(self._client_config.scope_name, self._collection_name)
|
||||||
|
|
||||||
|
index_manager = self._scope.search_indexes()
|
||||||
|
|
||||||
|
index_definition = json.loads("""
|
||||||
|
{
|
||||||
|
"type": "fulltext-index",
|
||||||
|
"name": "Embeddings._default.Vector_Search",
|
||||||
|
"uuid": "26d4db528e78b716",
|
||||||
|
"sourceType": "gocbcore",
|
||||||
|
"sourceName": "Embeddings",
|
||||||
|
"sourceUUID": "2242e4a25b4decd6650c9c7b3afa1dbf",
|
||||||
|
"planParams": {
|
||||||
|
"maxPartitionsPerPIndex": 1024,
|
||||||
|
"indexPartitions": 1
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"doc_config": {
|
||||||
|
"docid_prefix_delim": "",
|
||||||
|
"docid_regexp": "",
|
||||||
|
"mode": "scope.collection.type_field",
|
||||||
|
"type_field": "type"
|
||||||
|
},
|
||||||
|
"mapping": {
|
||||||
|
"analysis": { },
|
||||||
|
"default_analyzer": "standard",
|
||||||
|
"default_datetime_parser": "dateTimeOptional",
|
||||||
|
"default_field": "_all",
|
||||||
|
"default_mapping": {
|
||||||
|
"dynamic": true,
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
"default_type": "_default",
|
||||||
|
"docvalues_dynamic": false,
|
||||||
|
"index_dynamic": true,
|
||||||
|
"store_dynamic": true,
|
||||||
|
"type_field": "_type",
|
||||||
|
"types": {
|
||||||
|
"collection_name": {
|
||||||
|
"dynamic": true,
|
||||||
|
"enabled": true,
|
||||||
|
"properties": {
|
||||||
|
"embedding": {
|
||||||
|
"dynamic": false,
|
||||||
|
"enabled": true,
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"dims": 1536,
|
||||||
|
"index": true,
|
||||||
|
"name": "embedding",
|
||||||
|
"similarity": "dot_product",
|
||||||
|
"type": "vector",
|
||||||
|
"vector_index_optimized_for": "recall"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"dynamic": true,
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"dynamic": false,
|
||||||
|
"enabled": true,
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"index": true,
|
||||||
|
"name": "text",
|
||||||
|
"store": true,
|
||||||
|
"type": "text"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"store": {
|
||||||
|
"indexType": "scorch",
|
||||||
|
"segmentVersion": 16
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sourceParams": { }
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
index_definition["name"] = self._collection_name + "_search"
|
||||||
|
index_definition["uuid"] = uuid
|
||||||
|
index_definition["params"]["mapping"]["types"]["collection_name"]["properties"]["embedding"]["fields"][0][
|
||||||
|
"dims"
|
||||||
|
] = vector_length
|
||||||
|
index_definition["params"]["mapping"]["types"][self._scope_name + "." + self._collection_name] = (
|
||||||
|
index_definition["params"]["mapping"]["types"].pop("collection_name")
|
||||||
|
)
|
||||||
|
time.sleep(2)
|
||||||
|
index_manager.upsert_index(
|
||||||
|
SearchIndex(
|
||||||
|
index_definition["name"],
|
||||||
|
params=index_definition["params"],
|
||||||
|
source_name=self._bucket_name,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||||
|
|
||||||
|
def _collection_exists(self, name: str):
|
||||||
|
scope_collection_map: dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Get a list of all scopes in the bucket
|
||||||
|
for scope in self._bucket.collections().get_all_scopes():
|
||||||
|
scope_collection_map[scope.name] = []
|
||||||
|
|
||||||
|
# Get a list of all the collections in the scope
|
||||||
|
for collection in scope.collections:
|
||||||
|
scope_collection_map[scope.name].append(collection.name)
|
||||||
|
|
||||||
|
# Check if the collection exists in the scope
|
||||||
|
return self._collection_name in scope_collection_map[self._scope_name]
|
||||||
|
|
||||||
|
def get_type(self) -> str:
|
||||||
|
return VectorType.COUCHBASE
|
||||||
|
|
||||||
|
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
uuids = self._get_uuids(documents)
|
||||||
|
texts = [d.page_content for d in documents]
|
||||||
|
metadatas = [d.metadata for d in documents]
|
||||||
|
|
||||||
|
doc_ids = []
|
||||||
|
|
||||||
|
documents_to_insert = [
|
||||||
|
{"text": text, "embedding": vector, "metadata": metadata}
|
||||||
|
for id, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
|
||||||
|
]
|
||||||
|
for doc, id in zip(documents_to_insert, uuids):
|
||||||
|
result = self._scope.collection(self._collection_name).upsert(id, doc)
|
||||||
|
|
||||||
|
doc_ids.extend(uuids)
|
||||||
|
|
||||||
|
return doc_ids
|
||||||
|
|
||||||
|
def text_exists(self, id: str) -> bool:
|
||||||
|
# Use a parameterized query for safety and correctness
|
||||||
|
query = f"""
|
||||||
|
SELECT COUNT(1) AS count FROM
|
||||||
|
`{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||||
|
WHERE META().id = $doc_id
|
||||||
|
"""
|
||||||
|
# Pass the id as a parameter to the query
|
||||||
|
result = self._cluster.query(query, named_parameters={"doc_id": id}).execute()
|
||||||
|
for row in result:
|
||||||
|
return row["count"] > 0
|
||||||
|
return False # Return False if no rows are returned
|
||||||
|
|
||||||
|
def delete_by_ids(self, ids: list[str]) -> None:
|
||||||
|
query = f"""
|
||||||
|
DELETE FROM `{self._bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||||
|
WHERE META().id IN $doc_ids;
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
||||||
|
|
||||||
|
def delete_by_document_id(self, document_id: str):
|
||||||
|
query = f"""
|
||||||
|
DELETE FROM
|
||||||
|
`{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||||
|
WHERE META().id = $doc_id;
|
||||||
|
"""
|
||||||
|
self._cluster.query(query, named_parameters={"doc_id": document_id}).execute()
|
||||||
|
|
||||||
|
# def get_ids_by_metadata_field(self, key: str, value: str):
|
||||||
|
# query = f"""
|
||||||
|
# SELECT id FROM
|
||||||
|
# `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||||
|
# WHERE `metadata.{key}` = $value;
|
||||||
|
# """
|
||||||
|
# result = self._cluster.query(query, named_parameters={'value':value})
|
||||||
|
# return [row['id'] for row in result.rows()]
|
||||||
|
|
||||||
|
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||||
|
query = f"""
|
||||||
|
DELETE FROM `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
|
||||||
|
WHERE metadata.{key} = $value;
|
||||||
|
"""
|
||||||
|
self._cluster.query(query, named_parameters={"value": value}).execute()
|
||||||
|
|
||||||
|
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||||
|
top_k = kwargs.get("top_k", 5)
|
||||||
|
score_threshold = kwargs.get("score_threshold") or 0.0
|
||||||
|
|
||||||
|
search_req = search.SearchRequest.create(
|
||||||
|
VectorSearch.from_vector_query(
|
||||||
|
VectorQuery(
|
||||||
|
"embedding",
|
||||||
|
query_vector,
|
||||||
|
top_k,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
search_iter = self._scope.search(
|
||||||
|
self._collection_name + "_search",
|
||||||
|
search_req,
|
||||||
|
SearchOptions(limit=top_k, collections=[self._collection_name], fields=["*"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = []
|
||||||
|
# Parse the results
|
||||||
|
for row in search_iter.rows():
|
||||||
|
text = row.fields.pop("text")
|
||||||
|
metadata = self._format_metadata(row.fields)
|
||||||
|
score = row.score
|
||||||
|
metadata["score"] = score
|
||||||
|
doc = Document(page_content=text, metadata=metadata)
|
||||||
|
if score >= score_threshold:
|
||||||
|
docs.append(doc)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Search failed with error: {e}")
|
||||||
|
|
||||||
|
return docs
|
||||||
|
|
||||||
|
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||||
|
top_k = kwargs.get("top_k", 2)
|
||||||
|
try:
|
||||||
|
CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))
|
||||||
|
search_iter = self._scope.search(
|
||||||
|
self._collection_name + "_search", CBrequest, SearchOptions(limit=top_k, fields=["*"])
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = []
|
||||||
|
for row in search_iter.rows():
|
||||||
|
text = row.fields.pop("text")
|
||||||
|
metadata = self._format_metadata(row.fields)
|
||||||
|
score = row.score
|
||||||
|
metadata["score"] = score
|
||||||
|
doc = Document(page_content=text, metadata=metadata)
|
||||||
|
docs.append(doc)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Search failed with error: {e}")
|
||||||
|
|
||||||
|
return docs
|
||||||
|
|
||||||
|
def delete(self):
|
||||||
|
manager = self._bucket.collections()
|
||||||
|
scopes = manager.get_all_scopes()
|
||||||
|
|
||||||
|
for scope in scopes:
|
||||||
|
for collection in scope.collections:
|
||||||
|
if collection.name == self._collection_name:
|
||||||
|
manager.drop_collection("_default", self._collection_name)
|
||||||
|
|
||||||
|
def _format_metadata(self, row_fields: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Helper method to format the metadata from the Couchbase Search API.
|
||||||
|
Args:
|
||||||
|
row_fields (Dict[str, Any]): The fields to format.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: The formatted metadata.
|
||||||
|
"""
|
||||||
|
metadata = {}
|
||||||
|
for key, value in row_fields.items():
|
||||||
|
# Couchbase Search returns the metadata key with a prefix
|
||||||
|
# `metadata.` We remove it to get the original metadata key
|
||||||
|
if key.startswith("metadata"):
|
||||||
|
new_key = key.split("metadata" + ".")[-1]
|
||||||
|
metadata[new_key] = value
|
||||||
|
else:
|
||||||
|
metadata[key] = value
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
class CouchbaseVectorFactory(AbstractVectorFactory):
|
||||||
|
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> CouchbaseVector:
|
||||||
|
if dataset.index_struct_dict:
|
||||||
|
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||||
|
collection_name = class_prefix
|
||||||
|
else:
|
||||||
|
dataset_id = dataset.id
|
||||||
|
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||||
|
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.COUCHBASE, collection_name))
|
||||||
|
|
||||||
|
config = current_app.config
|
||||||
|
return CouchbaseVector(
|
||||||
|
collection_name=collection_name,
|
||||||
|
config=CouchbaseConfig(
|
||||||
|
connection_string=config.get("COUCHBASE_CONNECTION_STRING"),
|
||||||
|
user=config.get("COUCHBASE_USER"),
|
||||||
|
password=config.get("COUCHBASE_PASSWORD"),
|
||||||
|
bucket_name=config.get("COUCHBASE_BUCKET_NAME"),
|
||||||
|
scope_name=config.get("COUCHBASE_SCOPE_NAME"),
|
||||||
|
),
|
||||||
|
)
|
|
@ -114,6 +114,10 @@ class Vector:
|
||||||
from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory
|
from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory
|
||||||
|
|
||||||
return AnalyticdbVectorFactory
|
return AnalyticdbVectorFactory
|
||||||
|
case VectorType.COUCHBASE:
|
||||||
|
from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory
|
||||||
|
|
||||||
|
return CouchbaseVectorFactory
|
||||||
case VectorType.BAIDU:
|
case VectorType.BAIDU:
|
||||||
from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory
|
from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ class VectorType(str, Enum):
|
||||||
TENCENT = "tencent"
|
TENCENT = "tencent"
|
||||||
ORACLE = "oracle"
|
ORACLE = "oracle"
|
||||||
ELASTICSEARCH = "elasticsearch"
|
ELASTICSEARCH = "elasticsearch"
|
||||||
|
COUCHBASE = "couchbase"
|
||||||
BAIDU = "baidu"
|
BAIDU = "baidu"
|
||||||
VIKINGDB = "vikingdb"
|
VIKINGDB = "vikingdb"
|
||||||
UPSTASH = "upstash"
|
UPSTASH = "upstash"
|
||||||
|
|
55
api/poetry.lock
generated
55
api/poetry.lock
generated
|
@ -1801,6 +1801,46 @@ requests = ">=2.8"
|
||||||
six = "*"
|
six = "*"
|
||||||
xmltodict = "*"
|
xmltodict = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "couchbase"
|
||||||
|
version = "4.3.3"
|
||||||
|
description = "Python Client for Couchbase"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "couchbase-4.3.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:d8069e4f01332859d56cca597874645c914699162b3979d1b432f0dfc186b124"},
|
||||||
|
{file = "couchbase-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1caa6cfef49c785b35b1702102f718227f351df87bba2694b9334520c41e9eb5"},
|
||||||
|
{file = "couchbase-4.3.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f4a9a65c44935249fa078fb90a3c28ea71da9d2d5889fcd514b12d0538010ae0"},
|
||||||
|
{file = "couchbase-4.3.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4f144b8c482c18283d8e419b844630d41f3249b07d43d40b5e3535444e57d0fb"},
|
||||||
|
{file = "couchbase-4.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1c534fba6fdc7cf47eed9dee8a57d1e9eb867bf008574e321fa380a77cebf32f"},
|
||||||
|
{file = "couchbase-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b841be06e0e4370b69ebef6bca3409c378186f7d6e964cd645ba18e97216c022"},
|
||||||
|
{file = "couchbase-4.3.3-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:eee7a73b3acbdc78ae314fddf7f975b3c9e05df07df255f4dcc878939a2abae0"},
|
||||||
|
{file = "couchbase-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:53417cafcf90ff4e2fd81ebba2a08b7ad56f17160d1c5019ad3b09c758aeb363"},
|
||||||
|
{file = "couchbase-4.3.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0cefd13bea8b0f150f1b9d27fd7614f971f77419b31817781d26ba315ed658bb"},
|
||||||
|
{file = "couchbase-4.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78fa1054d7740e2fe38fce0a2aab4e9a2d30263d894e0615ee5df297f02f59a3"},
|
||||||
|
{file = "couchbase-4.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb093899cfad5a7472258a9b6a57775dbf23a6e0180241507ba89ce3ab241e41"},
|
||||||
|
{file = "couchbase-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f7cfbdc699af5715f49365ffbb05a6a7366a534c0d7161edf270ad3e735a6c5d"},
|
||||||
|
{file = "couchbase-4.3.3-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:58352cae9b8affdaa2ac012e0a03c8c2632ee6297a878232888b4e0360d0d5df"},
|
||||||
|
{file = "couchbase-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:728e7e3b5e1682706cb9d63993d289226d02a25089527b8ecb4e3889dabc38cf"},
|
||||||
|
{file = "couchbase-4.3.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:73014bf098cf14187a39cc13453e0d859c1d54568df28f69cc308a9a5f24feb2"},
|
||||||
|
{file = "couchbase-4.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a743375804068ae01b73c916bfca738764c8c12f381bb399ef04e784935856a1"},
|
||||||
|
{file = "couchbase-4.3.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:394c122cfe02a76a99e7d5178e64129f6da49843225e78d8629abcab556c24af"},
|
||||||
|
{file = "couchbase-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:bf85d7a5cda548d9801614651206068b4445fa37972e62b14d7521a958198693"},
|
||||||
|
{file = "couchbase-4.3.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:92d23c9cedd571631070791f2afee0e3d7d8c9ce1bf2ea6e9a4f2fdbc37a0f1e"},
|
||||||
|
{file = "couchbase-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:38c42eb29a73cce2998ae5df45bd61b16dce9765d3bff968ec5cf6a622faa291"},
|
||||||
|
{file = "couchbase-4.3.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:afed137bf0edc642d7b201b6ab7b1e7117bb4c8eac6b2f253cc6e106f334a2a1"},
|
||||||
|
{file = "couchbase-4.3.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:954d991377d47883aaf903934c5d0f19577680a2abf80d3ce5bb9b3c80991fc7"},
|
||||||
|
{file = "couchbase-4.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5552b9fa684630698dc98d6f3b1082540634c1b7ad5bf53b843b5da57b0169c"},
|
||||||
|
{file = "couchbase-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:f88f2b7e0c894f7237d9f3fb5c46abc44b8151a97b3ca8e75f57d23ebf59f9da"},
|
||||||
|
{file = "couchbase-4.3.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:769e1e2367ea1d4de181fcd4b4e353e9abef97d15b581a6c5aea49ece3dc7d59"},
|
||||||
|
{file = "couchbase-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:47f59a0b35ffce060583fd11f98f049f3b70701cf14aab9ac092594aca486aeb"},
|
||||||
|
{file = "couchbase-4.3.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:440bb93d611827ba0ea2403c6f204fe931467a6cb5811f0e03bf1779204ef843"},
|
||||||
|
{file = "couchbase-4.3.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cdb4dde62e1d41c0b8707121ab68fa78b7a1508541bd48fc850be396f91bc8d9"},
|
||||||
|
{file = "couchbase-4.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7f8cf45f317b39cc19db5c67b565662f08d6c90305b3aa14e04bc22707258213"},
|
||||||
|
{file = "couchbase-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:c97d48ad486c8f201b4482d5594258f949369cb44792ed148d5159a3d12ae21b"},
|
||||||
|
{file = "couchbase-4.3.3.tar.gz", hash = "sha256:27808500551564b39b46943cf3daab572694889c1eb638425d363edb48b20da7"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "coverage"
|
name = "coverage"
|
||||||
version = "7.2.7"
|
version = "7.2.7"
|
||||||
|
@ -6850,6 +6890,19 @@ files = [
|
||||||
{file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
|
{file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
|
||||||
{file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
|
{file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
|
||||||
{file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
|
{file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
|
||||||
|
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"},
|
||||||
|
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"},
|
||||||
|
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"},
|
||||||
|
{file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"},
|
||||||
|
{file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"},
|
||||||
|
{file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"},
|
||||||
|
{file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -10866,4 +10919,4 @@ cffi = ["cffi (>=1.11)"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.10,<3.13"
|
python-versions = ">=3.10,<3.13"
|
||||||
content-hash = "1b268122d3d4771ba219f0e983322e0454b7b8644dba35da38d7d950d489e1ba"
|
content-hash = "52552faf5f4823056eb48afe05349ab2f0e9a5bc42105211ccbbb54b59e27b59"
|
||||||
|
|
|
@ -239,6 +239,7 @@ alibabacloud_gpdb20160503 = "~3.8.0"
|
||||||
alibabacloud_tea_openapi = "~0.3.9"
|
alibabacloud_tea_openapi = "~0.3.9"
|
||||||
chromadb = "0.5.1"
|
chromadb = "0.5.1"
|
||||||
clickhouse-connect = "~0.7.16"
|
clickhouse-connect = "~0.7.16"
|
||||||
|
couchbase = "~4.3.0"
|
||||||
elasticsearch = "8.14.0"
|
elasticsearch = "8.14.0"
|
||||||
opensearch-py = "2.4.0"
|
opensearch-py = "2.4.0"
|
||||||
oracledb = "~2.2.1"
|
oracledb = "~2.2.1"
|
||||||
|
|
50
api/tests/integration_tests/vdb/couchbase/test_couchbase.py
Normal file
50
api/tests/integration_tests/vdb/couchbase/test_couchbase.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
|
||||||
|
from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseConfig, CouchbaseVector
|
||||||
|
from tests.integration_tests.vdb.test_vector_store import (
|
||||||
|
AbstractVectorTest,
|
||||||
|
get_example_text,
|
||||||
|
setup_mock_redis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_healthy_container(service_name="couchbase-server", timeout=300):
|
||||||
|
start_time = time.time()
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
result = subprocess.run(
|
||||||
|
["docker", "inspect", "--format", "{{.State.Health.Status}}", service_name], capture_output=True, text=True
|
||||||
|
)
|
||||||
|
if result.stdout.strip() == "healthy":
|
||||||
|
print(f"{service_name} is healthy!")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"Waiting for {service_name} to be healthy...")
|
||||||
|
time.sleep(10)
|
||||||
|
raise TimeoutError(f"{service_name} did not become healthy in time")
|
||||||
|
|
||||||
|
|
||||||
|
class CouchbaseTest(AbstractVectorTest):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.vector = CouchbaseVector(
|
||||||
|
collection_name=self.collection_name,
|
||||||
|
config=CouchbaseConfig(
|
||||||
|
connection_string="couchbase://127.0.0.1",
|
||||||
|
user="Administrator",
|
||||||
|
password="password",
|
||||||
|
bucket_name="Embeddings",
|
||||||
|
scope_name="_default",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def search_by_vector(self):
|
||||||
|
# brief sleep to ensure document is indexed
|
||||||
|
time.sleep(5)
|
||||||
|
hits_by_vector = self.vector.search_by_vector(query_vector=self.example_embedding)
|
||||||
|
assert len(hits_by_vector) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_couchbase(setup_mock_redis):
|
||||||
|
wait_for_healthy_container("couchbase-server", timeout=60)
|
||||||
|
CouchbaseTest().run_all_tests()
|
|
@ -11,4 +11,5 @@ pytest api/tests/integration_tests/vdb/chroma \
|
||||||
api/tests/integration_tests/vdb/vikingdb \
|
api/tests/integration_tests/vdb/vikingdb \
|
||||||
api/tests/integration_tests/vdb/baidu \
|
api/tests/integration_tests/vdb/baidu \
|
||||||
api/tests/integration_tests/vdb/tcvectordb \
|
api/tests/integration_tests/vdb/tcvectordb \
|
||||||
api/tests/integration_tests/vdb/upstash
|
api/tests/integration_tests/vdb/upstash \
|
||||||
|
api/tests/integration_tests/vdb/couchbase \
|
||||||
|
|
|
@ -375,7 +375,7 @@ SUPABASE_URL=your-server-url
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
|
|
||||||
# The type of vector store to use.
|
# The type of vector store to use.
|
||||||
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `vikingdb`.
|
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `couchbase`, `vikingdb`.
|
||||||
VECTOR_STORE=weaviate
|
VECTOR_STORE=weaviate
|
||||||
|
|
||||||
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
|
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
|
||||||
|
@ -414,6 +414,14 @@ MYSCALE_PASSWORD=
|
||||||
MYSCALE_DATABASE=dify
|
MYSCALE_DATABASE=dify
|
||||||
MYSCALE_FTS_PARAMS=
|
MYSCALE_FTS_PARAMS=
|
||||||
|
|
||||||
|
# Couchbase configurations, only available when VECTOR_STORE is `couchbase`
|
||||||
|
# The connection string must include hostname defined in the docker-compose file (couchbase-server in this case)
|
||||||
|
COUCHBASE_CONNECTION_STRING=couchbase://couchbase-server
|
||||||
|
COUCHBASE_USER=Administrator
|
||||||
|
COUCHBASE_PASSWORD=password
|
||||||
|
COUCHBASE_BUCKET_NAME=Embeddings
|
||||||
|
COUCHBASE_SCOPE_NAME=_default
|
||||||
|
|
||||||
# pgvector configurations, only available when VECTOR_STORE is `pgvector`
|
# pgvector configurations, only available when VECTOR_STORE is `pgvector`
|
||||||
PGVECTOR_HOST=pgvector
|
PGVECTOR_HOST=pgvector
|
||||||
PGVECTOR_PORT=5432
|
PGVECTOR_PORT=5432
|
||||||
|
|
4
docker/couchbase-server/Dockerfile
Normal file
4
docker/couchbase-server/Dockerfile
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
FROM couchbase/server:latest AS stage_base
|
||||||
|
# FROM couchbase:latest AS stage_base
|
||||||
|
COPY init-cbserver.sh /opt/couchbase/init/
|
||||||
|
RUN chmod +x /opt/couchbase/init/init-cbserver.sh
|
44
docker/couchbase-server/init-cbserver.sh
Executable file
44
docker/couchbase-server/init-cbserver.sh
Executable file
|
@ -0,0 +1,44 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# used to start couchbase server - can't get around this as docker compose only allows you to start one command - so we have to start couchbase like the standard couchbase Dockerfile would
|
||||||
|
# https://github.com/couchbase/docker/blob/master/enterprise/couchbase-server/7.2.0/Dockerfile#L88
|
||||||
|
|
||||||
|
/entrypoint.sh couchbase-server &
|
||||||
|
|
||||||
|
# track if setup is complete so we don't try to setup again
|
||||||
|
FILE=/opt/couchbase/init/setupComplete.txt
|
||||||
|
|
||||||
|
if ! [ -f "$FILE" ]; then
|
||||||
|
# used to automatically create the cluster based on environment variables
|
||||||
|
# https://docs.couchbase.com/server/current/cli/cbcli/couchbase-cli-cluster-init.html
|
||||||
|
|
||||||
|
echo $COUCHBASE_ADMINISTRATOR_USERNAME ":" $COUCHBASE_ADMINISTRATOR_PASSWORD
|
||||||
|
|
||||||
|
sleep 20s
|
||||||
|
/opt/couchbase/bin/couchbase-cli cluster-init -c 127.0.0.1 \
|
||||||
|
--cluster-username $COUCHBASE_ADMINISTRATOR_USERNAME \
|
||||||
|
--cluster-password $COUCHBASE_ADMINISTRATOR_PASSWORD \
|
||||||
|
--services data,index,query,fts \
|
||||||
|
--cluster-ramsize $COUCHBASE_RAM_SIZE \
|
||||||
|
--cluster-index-ramsize $COUCHBASE_INDEX_RAM_SIZE \
|
||||||
|
--cluster-eventing-ramsize $COUCHBASE_EVENTING_RAM_SIZE \
|
||||||
|
--cluster-fts-ramsize $COUCHBASE_FTS_RAM_SIZE \
|
||||||
|
--index-storage-setting default
|
||||||
|
|
||||||
|
sleep 2s
|
||||||
|
|
||||||
|
# used to auto create the bucket based on environment variables
|
||||||
|
# https://docs.couchbase.com/server/current/cli/cbcli/couchbase-cli-bucket-create.html
|
||||||
|
|
||||||
|
/opt/couchbase/bin/couchbase-cli bucket-create -c localhost:8091 \
|
||||||
|
--username $COUCHBASE_ADMINISTRATOR_USERNAME \
|
||||||
|
--password $COUCHBASE_ADMINISTRATOR_PASSWORD \
|
||||||
|
--bucket $COUCHBASE_BUCKET \
|
||||||
|
--bucket-ramsize $COUCHBASE_BUCKET_RAMSIZE \
|
||||||
|
--bucket-type couchbase
|
||||||
|
|
||||||
|
# create file so we know that the cluster is setup and don't run the setup again
|
||||||
|
touch $FILE
|
||||||
|
fi
|
||||||
|
# docker compose will stop the container from running unless we do this
|
||||||
|
# known issue and workaround
|
||||||
|
tail -f /dev/null
|
|
@ -110,6 +110,11 @@ x-shared-env: &shared-api-worker-env
|
||||||
QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20}
|
QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20}
|
||||||
QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false}
|
QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false}
|
||||||
QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
|
QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
|
||||||
|
COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-'couchbase-server'}
|
||||||
|
COUCHBASE_USER: ${COUCHBASE_USER:-Administrator}
|
||||||
|
COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password}
|
||||||
|
COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings}
|
||||||
|
COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default}
|
||||||
MILVUS_URI: ${MILVUS_URI:-http://127.0.0.1:19530}
|
MILVUS_URI: ${MILVUS_URI:-http://127.0.0.1:19530}
|
||||||
MILVUS_TOKEN: ${MILVUS_TOKEN:-}
|
MILVUS_TOKEN: ${MILVUS_TOKEN:-}
|
||||||
MILVUS_USER: ${MILVUS_USER:-root}
|
MILVUS_USER: ${MILVUS_USER:-root}
|
||||||
|
@ -475,6 +480,39 @@ services:
|
||||||
environment:
|
environment:
|
||||||
QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456}
|
QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456}
|
||||||
|
|
||||||
|
# The Couchbase vector store.
|
||||||
|
couchbase-server:
|
||||||
|
build: ./couchbase-server
|
||||||
|
profiles:
|
||||||
|
- couchbase
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
- CLUSTER_NAME=dify_search
|
||||||
|
- COUCHBASE_ADMINISTRATOR_USERNAME=${COUCHBASE_USER:-Administrator}
|
||||||
|
- COUCHBASE_ADMINISTRATOR_PASSWORD=${COUCHBASE_PASSWORD:-password}
|
||||||
|
- COUCHBASE_BUCKET=${COUCHBASE_BUCKET_NAME:-Embeddings}
|
||||||
|
- COUCHBASE_BUCKET_RAMSIZE=512
|
||||||
|
- COUCHBASE_RAM_SIZE=2048
|
||||||
|
- COUCHBASE_EVENTING_RAM_SIZE=512
|
||||||
|
- COUCHBASE_INDEX_RAM_SIZE=512
|
||||||
|
- COUCHBASE_FTS_RAM_SIZE=1024
|
||||||
|
hostname: couchbase-server
|
||||||
|
container_name: couchbase-server
|
||||||
|
working_dir: /opt/couchbase
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
|
entrypoint: [""]
|
||||||
|
command: sh -c "/opt/couchbase/init/init-cbserver.sh"
|
||||||
|
volumes:
|
||||||
|
- ./volumes/couchbase/data:/opt/couchbase/var/lib/couchbase/data
|
||||||
|
healthcheck:
|
||||||
|
# ensure bucket was created before proceeding
|
||||||
|
test: [ "CMD-SHELL", "curl -s -f -u Administrator:password http://localhost:8091/pools/default/buckets | grep -q '\\[{' || exit 1" ]
|
||||||
|
interval: 10s
|
||||||
|
retries: 10
|
||||||
|
start_period: 30s
|
||||||
|
timeout: 10s
|
||||||
|
|
||||||
# The pgvector vector database.
|
# The pgvector vector database.
|
||||||
pgvector:
|
pgvector:
|
||||||
image: pgvector/pgvector:pg16
|
image: pgvector/pgvector:pg16
|
||||||
|
|
Loading…
Reference in New Issue
Block a user