mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 11:42:29 +08:00
test: add unit tests for vector stores of Milvus, Qdrant and Weaviate (#3688)
This commit is contained in:
parent
8ca5aa1190
commit
9cec8c1750
28
.github/workflows/api-tests.yml
vendored
28
.github/workflows/api-tests.yml
vendored
|
@ -37,6 +37,27 @@ jobs:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Weaviate
|
||||||
|
uses: hoverkraft-tech/compose-action@v2.0.0
|
||||||
|
with:
|
||||||
|
compose-file: docker/docker-compose.middleware.yaml
|
||||||
|
services: weaviate
|
||||||
|
|
||||||
|
- name: Set up Qdrant
|
||||||
|
uses: hoverkraft-tech/compose-action@v2.0.0
|
||||||
|
with:
|
||||||
|
compose-file: docker/docker-compose.qdrant.yaml
|
||||||
|
services: qdrant
|
||||||
|
|
||||||
|
- name: Set up Milvus
|
||||||
|
uses: hoverkraft-tech/compose-action@v2.0.0
|
||||||
|
with:
|
||||||
|
compose-file: docker/docker-compose.milvus.yaml
|
||||||
|
services: |
|
||||||
|
etcd
|
||||||
|
minio
|
||||||
|
milvus-standalone
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
|
@ -49,6 +70,9 @@ jobs:
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pip install -r ./api/requirements.txt -r ./api/requirements-dev.txt
|
run: pip install -r ./api/requirements.txt -r ./api/requirements-dev.txt
|
||||||
|
|
||||||
|
- name: Run Unit tests
|
||||||
|
run: dev/pytest/pytest_unit_tests.sh
|
||||||
|
|
||||||
- name: Run ModelRuntime
|
- name: Run ModelRuntime
|
||||||
run: dev/pytest/pytest_model_runtime.sh
|
run: dev/pytest/pytest_model_runtime.sh
|
||||||
|
|
||||||
|
@ -58,5 +82,5 @@ jobs:
|
||||||
- name: Run Workflow
|
- name: Run Workflow
|
||||||
run: dev/pytest/pytest_workflow.sh
|
run: dev/pytest/pytest_workflow.sh
|
||||||
|
|
||||||
- name: Run Unit tests
|
- name: Run Vector Stores
|
||||||
run: dev/pytest/pytest_unit_tests.sh
|
run: dev/pytest/pytest_vdb.sh
|
||||||
|
|
|
@ -250,7 +250,7 @@ class MilvusVector(BaseVector):
|
||||||
|
|
||||||
# Create the collection
|
# Create the collection
|
||||||
collection_name = self._collection_name
|
collection_name = self._collection_name
|
||||||
self._client.create_collection(collection_name=collection_name,
|
self._client.create_collection_with_schema(collection_name=collection_name,
|
||||||
schema=schema, index_param=index_params,
|
schema=schema, index_param=index_params,
|
||||||
consistency_level=self._consistency_level)
|
consistency_level=self._consistency_level)
|
||||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||||
|
|
|
@ -55,7 +55,7 @@ xinference-client==0.9.4
|
||||||
safetensors~=0.4.3
|
safetensors~=0.4.3
|
||||||
zhipuai==1.0.7
|
zhipuai==1.0.7
|
||||||
werkzeug~=3.0.1
|
werkzeug~=3.0.1
|
||||||
pymilvus~=2.3.7
|
pymilvus==2.3.1
|
||||||
qdrant-client==1.7.3
|
qdrant-client==1.7.3
|
||||||
cohere~=5.2.4
|
cohere~=5.2.4
|
||||||
pyyaml~=6.0.1
|
pyyaml~=6.0.1
|
||||||
|
|
0
api/tests/integration_tests/vdb/milvus/__init__.py
Normal file
0
api/tests/integration_tests/vdb/milvus/__init__.py
Normal file
38
api/tests/integration_tests/vdb/milvus/test_milvus.py
Normal file
38
api/tests/integration_tests/vdb/milvus/test_milvus.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector
|
||||||
|
from models.dataset import Dataset
|
||||||
|
from tests.integration_tests.vdb.test_vector_store import (
|
||||||
|
get_sample_document,
|
||||||
|
get_sample_embedding,
|
||||||
|
get_sample_query_vector,
|
||||||
|
setup_mock_redis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_milvus_vector(setup_mock_redis) -> None:
|
||||||
|
dataset_id = str(uuid.uuid4())
|
||||||
|
vector = MilvusVector(
|
||||||
|
collection_name=Dataset.gen_collection_name_by_id(dataset_id),
|
||||||
|
config=MilvusConfig(
|
||||||
|
host='localhost',
|
||||||
|
port=19530,
|
||||||
|
user='root',
|
||||||
|
password='Milvus',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# create vector
|
||||||
|
vector.create(
|
||||||
|
texts=[get_sample_document(dataset_id)],
|
||||||
|
embeddings=[get_sample_embedding()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# search by vector
|
||||||
|
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
|
||||||
|
assert len(hits_by_vector) >= 1
|
||||||
|
|
||||||
|
# milvus dos not support full text searching yet in < 2.3.x
|
||||||
|
|
||||||
|
# delete vector
|
||||||
|
vector.delete()
|
0
api/tests/integration_tests/vdb/qdrant/__init__.py
Normal file
0
api/tests/integration_tests/vdb/qdrant/__init__.py
Normal file
40
api/tests/integration_tests/vdb/qdrant/test_qdrant.py
Normal file
40
api/tests/integration_tests/vdb/qdrant/test_qdrant.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector
|
||||||
|
from models.dataset import Dataset
|
||||||
|
from tests.integration_tests.vdb.test_vector_store import (
|
||||||
|
get_sample_document,
|
||||||
|
get_sample_embedding,
|
||||||
|
get_sample_query_vector,
|
||||||
|
get_sample_text,
|
||||||
|
setup_mock_redis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_qdrant_vector(setup_mock_redis)-> None:
|
||||||
|
dataset_id = str(uuid.uuid4())
|
||||||
|
vector = QdrantVector(
|
||||||
|
collection_name=Dataset.gen_collection_name_by_id(dataset_id),
|
||||||
|
group_id=dataset_id,
|
||||||
|
config=QdrantConfig(
|
||||||
|
endpoint='http://localhost:6333',
|
||||||
|
api_key='difyai123456',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# create vector
|
||||||
|
vector.create(
|
||||||
|
texts=[get_sample_document(dataset_id)],
|
||||||
|
embeddings=[get_sample_embedding()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# search by vector
|
||||||
|
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
|
||||||
|
assert len(hits_by_vector) >= 1
|
||||||
|
|
||||||
|
# search by full text
|
||||||
|
hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
|
||||||
|
assert len(hits_by_full_text) >= 1
|
||||||
|
|
||||||
|
# delete vector
|
||||||
|
vector.delete()
|
46
api/tests/integration_tests/vdb/test_vector_store.py
Normal file
46
api/tests/integration_tests/vdb/test_vector_store.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.rag.models.document import Document
|
||||||
|
from extensions import ext_redis
|
||||||
|
|
||||||
|
|
||||||
|
def get_sample_text() -> str:
|
||||||
|
return 'test_text'
|
||||||
|
|
||||||
|
|
||||||
|
def get_sample_embedding() -> list[float]:
|
||||||
|
return [1.1, 2.2, 3.3]
|
||||||
|
|
||||||
|
|
||||||
|
def get_sample_query_vector() -> list[float]:
|
||||||
|
return get_sample_embedding()
|
||||||
|
|
||||||
|
|
||||||
|
def get_sample_document(sample_dataset_id: str) -> Document:
|
||||||
|
doc = Document(
|
||||||
|
page_content=get_sample_text(),
|
||||||
|
metadata={
|
||||||
|
"doc_id": sample_dataset_id,
|
||||||
|
"doc_hash": sample_dataset_id,
|
||||||
|
"document_id": sample_dataset_id,
|
||||||
|
"dataset_id": sample_dataset_id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def setup_mock_redis() -> None:
|
||||||
|
# get
|
||||||
|
ext_redis.redis_client.get = MagicMock(return_value=None)
|
||||||
|
|
||||||
|
# set
|
||||||
|
ext_redis.redis_client.set = MagicMock(return_value=None)
|
||||||
|
|
||||||
|
# lock
|
||||||
|
mock_redis_lock = MagicMock()
|
||||||
|
mock_redis_lock.__enter__ = MagicMock()
|
||||||
|
mock_redis_lock.__exit__ = MagicMock()
|
||||||
|
ext_redis.redis_client.lock = mock_redis_lock
|
41
api/tests/integration_tests/vdb/weaviate/test_weaviate.py
Normal file
41
api/tests/integration_tests/vdb/weaviate/test_weaviate.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector
|
||||||
|
from models.dataset import Dataset
|
||||||
|
from tests.integration_tests.vdb.test_vector_store import (
|
||||||
|
get_sample_document,
|
||||||
|
get_sample_embedding,
|
||||||
|
get_sample_query_vector,
|
||||||
|
get_sample_text,
|
||||||
|
setup_mock_redis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_weaviate_vector(setup_mock_redis) -> None:
|
||||||
|
attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
|
||||||
|
dataset_id = str(uuid.uuid4())
|
||||||
|
vector = WeaviateVector(
|
||||||
|
collection_name=Dataset.gen_collection_name_by_id(dataset_id),
|
||||||
|
config=WeaviateConfig(
|
||||||
|
endpoint='http://localhost:8080',
|
||||||
|
api_key='WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih',
|
||||||
|
),
|
||||||
|
attributes=attributes
|
||||||
|
)
|
||||||
|
|
||||||
|
# create vector
|
||||||
|
vector.create(
|
||||||
|
texts=[get_sample_document(dataset_id)],
|
||||||
|
embeddings=[get_sample_embedding()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# search by vector
|
||||||
|
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
|
||||||
|
assert len(hits_by_vector) >= 1
|
||||||
|
|
||||||
|
# search by full text
|
||||||
|
hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
|
||||||
|
assert len(hits_by_full_text) >= 1
|
||||||
|
|
||||||
|
# delete vector
|
||||||
|
vector.delete()
|
|
@ -18,7 +18,7 @@ def test_default_value():
|
||||||
with pytest.raises(ValidationError) as e:
|
with pytest.raises(ValidationError) as e:
|
||||||
MilvusConfig(**config)
|
MilvusConfig(**config)
|
||||||
assert e.value.errors()[1]['msg'] == f'config MILVUS_{key.upper()} is required'
|
assert e.value.errors()[1]['msg'] == f'config MILVUS_{key.upper()} is required'
|
||||||
|
|
||||||
config = MilvusConfig(**valid_config)
|
config = MilvusConfig(**valid_config)
|
||||||
assert config.secure is False
|
assert config.secure is False
|
||||||
assert config.database == 'default'
|
assert config.database == 'default'
|
||||||
|
|
4
dev/pytest/pytest_vdb.sh
Executable file
4
dev/pytest/pytest_vdb.sh
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -x
|
||||||
|
|
||||||
|
pytest api/tests/integration_tests/vdb/
|
|
@ -36,7 +36,7 @@ services:
|
||||||
timeout: 20s
|
timeout: 20s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|
||||||
standalone:
|
milvus-standalone:
|
||||||
container_name: milvus-standalone
|
container_name: milvus-standalone
|
||||||
image: milvusdb/milvus:v2.3.1
|
image: milvusdb/milvus:v2.3.1
|
||||||
command: ["milvus", "run", "standalone"]
|
command: ["milvus", "run", "standalone"]
|
12
docker/docker-compose.qdrant.yaml
Normal file
12
docker/docker-compose.qdrant.yaml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
version: '3'
|
||||||
|
services:
|
||||||
|
# Qdrant vector store.
|
||||||
|
qdrant:
|
||||||
|
image: langgenius/qdrant:v1.7.3
|
||||||
|
restart: always
|
||||||
|
volumes:
|
||||||
|
- ./volumes/qdrant:/qdrant/storage
|
||||||
|
environment:
|
||||||
|
QDRANT_API_KEY: 'difyai123456'
|
||||||
|
ports:
|
||||||
|
- "6333:6333"
|
Loading…
Reference in New Issue
Block a user