test: add unit tests for vector stores of Milvus, Qdrant and Weaviate (#3688)

This commit is contained in:
Bowen Liang 2024-04-24 21:52:42 +08:00 committed by GitHub
parent 8ca5aa1190
commit 9cec8c1750
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 211 additions and 6 deletions

View File

@ -37,6 +37,27 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Weaviate
uses: hoverkraft-tech/compose-action@v2.0.0
with:
compose-file: docker/docker-compose.middleware.yaml
services: weaviate
- name: Set up Qdrant
uses: hoverkraft-tech/compose-action@v2.0.0
with:
compose-file: docker/docker-compose.qdrant.yaml
services: qdrant
- name: Set up Milvus
uses: hoverkraft-tech/compose-action@v2.0.0
with:
compose-file: docker/docker-compose.milvus.yaml
services: |
etcd
minio
milvus-standalone
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
@ -49,6 +70,9 @@ jobs:
- name: Install dependencies
run: pip install -r ./api/requirements.txt -r ./api/requirements-dev.txt
- name: Run Unit tests
run: dev/pytest/pytest_unit_tests.sh
- name: Run ModelRuntime
run: dev/pytest/pytest_model_runtime.sh
@ -58,5 +82,5 @@ jobs:
- name: Run Workflow
run: dev/pytest/pytest_workflow.sh
- name: Run Unit tests
run: dev/pytest/pytest_unit_tests.sh
- name: Run Vector Stores
run: dev/pytest/pytest_vdb.sh

View File

@ -250,7 +250,7 @@ class MilvusVector(BaseVector):
# Create the collection
collection_name = self._collection_name
self._client.create_collection(collection_name=collection_name,
self._client.create_collection_with_schema(collection_name=collection_name,
schema=schema, index_param=index_params,
consistency_level=self._consistency_level)
redis_client.set(collection_exist_cache_key, 1, ex=3600)

View File

@ -55,7 +55,7 @@ xinference-client==0.9.4
safetensors~=0.4.3
zhipuai==1.0.7
werkzeug~=3.0.1
pymilvus~=2.3.7
pymilvus==2.3.1
qdrant-client==1.7.3
cohere~=5.2.4
pyyaml~=6.0.1

View File

@ -0,0 +1,38 @@
import uuid
from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector
from models.dataset import Dataset
from tests.integration_tests.vdb.test_vector_store import (
get_sample_document,
get_sample_embedding,
get_sample_query_vector,
setup_mock_redis,
)
def test_milvus_vector(setup_mock_redis) -> None:
dataset_id = str(uuid.uuid4())
vector = MilvusVector(
collection_name=Dataset.gen_collection_name_by_id(dataset_id),
config=MilvusConfig(
host='localhost',
port=19530,
user='root',
password='Milvus',
)
)
# create vector
vector.create(
texts=[get_sample_document(dataset_id)],
embeddings=[get_sample_embedding()],
)
# search by vector
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
assert len(hits_by_vector) >= 1
# milvus dos not support full text searching yet in < 2.3.x
# delete vector
vector.delete()

View File

@ -0,0 +1,40 @@
import uuid
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector
from models.dataset import Dataset
from tests.integration_tests.vdb.test_vector_store import (
get_sample_document,
get_sample_embedding,
get_sample_query_vector,
get_sample_text,
setup_mock_redis,
)
def test_qdrant_vector(setup_mock_redis)-> None:
dataset_id = str(uuid.uuid4())
vector = QdrantVector(
collection_name=Dataset.gen_collection_name_by_id(dataset_id),
group_id=dataset_id,
config=QdrantConfig(
endpoint='http://localhost:6333',
api_key='difyai123456',
)
)
# create vector
vector.create(
texts=[get_sample_document(dataset_id)],
embeddings=[get_sample_embedding()],
)
# search by vector
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
assert len(hits_by_vector) >= 1
# search by full text
hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
assert len(hits_by_full_text) >= 1
# delete vector
vector.delete()

View File

@ -0,0 +1,46 @@
from unittest.mock import MagicMock
import pytest
from core.rag.models.document import Document
from extensions import ext_redis
def get_sample_text() -> str:
return 'test_text'
def get_sample_embedding() -> list[float]:
return [1.1, 2.2, 3.3]
def get_sample_query_vector() -> list[float]:
return get_sample_embedding()
def get_sample_document(sample_dataset_id: str) -> Document:
doc = Document(
page_content=get_sample_text(),
metadata={
"doc_id": sample_dataset_id,
"doc_hash": sample_dataset_id,
"document_id": sample_dataset_id,
"dataset_id": sample_dataset_id,
}
)
return doc
@pytest.fixture
def setup_mock_redis() -> None:
# get
ext_redis.redis_client.get = MagicMock(return_value=None)
# set
ext_redis.redis_client.set = MagicMock(return_value=None)
# lock
mock_redis_lock = MagicMock()
mock_redis_lock.__enter__ = MagicMock()
mock_redis_lock.__exit__ = MagicMock()
ext_redis.redis_client.lock = mock_redis_lock

View File

@ -0,0 +1,41 @@
import uuid
from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector
from models.dataset import Dataset
from tests.integration_tests.vdb.test_vector_store import (
get_sample_document,
get_sample_embedding,
get_sample_query_vector,
get_sample_text,
setup_mock_redis,
)
def test_weaviate_vector(setup_mock_redis) -> None:
attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
dataset_id = str(uuid.uuid4())
vector = WeaviateVector(
collection_name=Dataset.gen_collection_name_by_id(dataset_id),
config=WeaviateConfig(
endpoint='http://localhost:8080',
api_key='WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih',
),
attributes=attributes
)
# create vector
vector.create(
texts=[get_sample_document(dataset_id)],
embeddings=[get_sample_embedding()],
)
# search by vector
hits_by_vector = vector.search_by_vector(query_vector=get_sample_query_vector())
assert len(hits_by_vector) >= 1
# search by full text
hits_by_full_text = vector.search_by_full_text(query=get_sample_text())
assert len(hits_by_full_text) >= 1
# delete vector
vector.delete()

4
dev/pytest/pytest_vdb.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
set -x
pytest api/tests/integration_tests/vdb/

View File

@ -36,7 +36,7 @@ services:
timeout: 20s
retries: 3
standalone:
milvus-standalone:
container_name: milvus-standalone
image: milvusdb/milvus:v2.3.1
command: ["milvus", "run", "standalone"]

View File

@ -0,0 +1,12 @@
version: '3'
services:
# Qdrant vector store.
qdrant:
image: langgenius/qdrant:v1.7.3
restart: always
volumes:
- ./volumes/qdrant:/qdrant/storage
environment:
QDRANT_API_KEY: 'difyai123456'
ports:
- "6333:6333"