From 7ae728a9a3bd44baa365c89a0d86f5dee743a286 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Mon, 26 Aug 2024 15:14:05 +0800 Subject: [PATCH] fix nltk averaged_perceptron_tagger download and fix score limit is none (#7582) Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> --- api/.idea/vcs.xml | 1 + api/Dockerfile | 2 +- api/core/rag/retrieval/dataset_retrieval.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/api/.idea/vcs.xml b/api/.idea/vcs.xml index eaa7c25c60..b7af618884 100644 --- a/api/.idea/vcs.xml +++ b/api/.idea/vcs.xml @@ -12,5 +12,6 @@ + diff --git a/api/Dockerfile b/api/Dockerfile index 10a3dc2eed..cca6488679 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -65,7 +65,7 @@ COPY --from=packages ${VIRTUAL_ENV} ${VIRTUAL_ENV} ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Download nltk data -RUN python -c "import nltk; nltk.download('punkt')" +RUN python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger')" # Copy source code COPY . /app/api/ diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index fc6d231f8e..c970e3dafa 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -616,6 +616,7 @@ class DatasetRetrieval: for document in all_documents: if score_threshold is None or document.metadata['score'] >= score_threshold: filter_documents.append(document) + if not filter_documents: return [] filter_documents = sorted(filter_documents, key=lambda x: x.metadata['score'], reverse=True)