mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 11:42:29 +08:00
feat: add WEAVIATE_BATCH_SIZE (#349)
This commit is contained in:
parent
6a3ab36101
commit
cd136fb293
|
@ -72,6 +72,7 @@ VECTOR_STORE=weaviate
|
||||||
WEAVIATE_ENDPOINT=http://localhost:8080
|
WEAVIATE_ENDPOINT=http://localhost:8080
|
||||||
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
|
||||||
WEAVIATE_GRPC_ENABLED=false
|
WEAVIATE_GRPC_ENABLED=false
|
||||||
|
WEAVIATE_BATCH_SIZE=100
|
||||||
|
|
||||||
# Qdrant configuration, use `path:` prefix for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
|
# Qdrant configuration, use `path:` prefix for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
|
||||||
QDRANT_URL=path:storage/qdrant
|
QDRANT_URL=path:storage/qdrant
|
||||||
|
|
|
@ -43,6 +43,7 @@ DEFAULTS = {
|
||||||
'SENTRY_TRACES_SAMPLE_RATE': 1.0,
|
'SENTRY_TRACES_SAMPLE_RATE': 1.0,
|
||||||
'SENTRY_PROFILES_SAMPLE_RATE': 1.0,
|
'SENTRY_PROFILES_SAMPLE_RATE': 1.0,
|
||||||
'WEAVIATE_GRPC_ENABLED': 'True',
|
'WEAVIATE_GRPC_ENABLED': 'True',
|
||||||
|
'WEAVIATE_BATCH_SIZE': 100,
|
||||||
'CELERY_BACKEND': 'database',
|
'CELERY_BACKEND': 'database',
|
||||||
'PDF_PREVIEW': 'True',
|
'PDF_PREVIEW': 'True',
|
||||||
'LOG_LEVEL': 'INFO',
|
'LOG_LEVEL': 'INFO',
|
||||||
|
@ -138,6 +139,7 @@ class Config:
|
||||||
self.WEAVIATE_ENDPOINT = get_env('WEAVIATE_ENDPOINT')
|
self.WEAVIATE_ENDPOINT = get_env('WEAVIATE_ENDPOINT')
|
||||||
self.WEAVIATE_API_KEY = get_env('WEAVIATE_API_KEY')
|
self.WEAVIATE_API_KEY = get_env('WEAVIATE_API_KEY')
|
||||||
self.WEAVIATE_GRPC_ENABLED = get_bool_env('WEAVIATE_GRPC_ENABLED')
|
self.WEAVIATE_GRPC_ENABLED = get_bool_env('WEAVIATE_GRPC_ENABLED')
|
||||||
|
self.WEAVIATE_BATCH_SIZE = int(get_env('WEAVIATE_BATCH_SIZE'))
|
||||||
|
|
||||||
# qdrant settings
|
# qdrant settings
|
||||||
self.QDRANT_URL = get_env('QDRANT_URL')
|
self.QDRANT_URL = get_env('QDRANT_URL')
|
||||||
|
|
|
@ -27,7 +27,8 @@ class VectorStore:
|
||||||
self._client = WeaviateVectorStoreClient(
|
self._client = WeaviateVectorStoreClient(
|
||||||
endpoint=app.config['WEAVIATE_ENDPOINT'],
|
endpoint=app.config['WEAVIATE_ENDPOINT'],
|
||||||
api_key=app.config['WEAVIATE_API_KEY'],
|
api_key=app.config['WEAVIATE_API_KEY'],
|
||||||
grpc_enabled=app.config['WEAVIATE_GRPC_ENABLED']
|
grpc_enabled=app.config['WEAVIATE_GRPC_ENABLED'],
|
||||||
|
batch_size=app.config['WEAVIATE_BATCH_SIZE']
|
||||||
)
|
)
|
||||||
elif self._vector_store == 'qdrant':
|
elif self._vector_store == 'qdrant':
|
||||||
self._client = QdrantVectorStoreClient(
|
self._client = QdrantVectorStoreClient(
|
||||||
|
|
|
@ -18,10 +18,10 @@ from llama_index.readers.weaviate.utils import (
|
||||||
|
|
||||||
class WeaviateVectorStoreClient(BaseVectorStoreClient):
|
class WeaviateVectorStoreClient(BaseVectorStoreClient):
|
||||||
|
|
||||||
def __init__(self, endpoint: str, api_key: str, grpc_enabled: bool):
|
def __init__(self, endpoint: str, api_key: str, grpc_enabled: bool, batch_size: int):
|
||||||
self._client = self.init_from_config(endpoint, api_key, grpc_enabled)
|
self._client = self.init_from_config(endpoint, api_key, grpc_enabled, batch_size)
|
||||||
|
|
||||||
def init_from_config(self, endpoint: str, api_key: str, grpc_enabled: bool):
|
def init_from_config(self, endpoint: str, api_key: str, grpc_enabled: bool, batch_size: int):
|
||||||
auth_config = weaviate.auth.AuthApiKey(api_key=api_key)
|
auth_config = weaviate.auth.AuthApiKey(api_key=api_key)
|
||||||
|
|
||||||
weaviate.connect.connection.has_grpc = grpc_enabled
|
weaviate.connect.connection.has_grpc = grpc_enabled
|
||||||
|
@ -36,7 +36,7 @@ class WeaviateVectorStoreClient(BaseVectorStoreClient):
|
||||||
client.batch.configure(
|
client.batch.configure(
|
||||||
# `batch_size` takes an `int` value to enable auto-batching
|
# `batch_size` takes an `int` value to enable auto-batching
|
||||||
# (`None` is used for manual batching)
|
# (`None` is used for manual batching)
|
||||||
batch_size=100,
|
batch_size=batch_size,
|
||||||
# dynamically update the `batch_size` based on import speed
|
# dynamically update the `batch_size` based on import speed
|
||||||
dynamic=True,
|
dynamic=True,
|
||||||
# `timeout_retries` takes an `int` value to retry on time outs
|
# `timeout_retries` takes an `int` value to retry on time outs
|
||||||
|
|
Loading…
Reference in New Issue
Block a user