Skip to content

Commit

Permalink
feat: add retireval_top_n to config in env
Browse files Browse the repository at this point in the history
  • Loading branch information
ProseGuys committed Nov 26, 2024
1 parent 208d6d6 commit c26ed3e
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 4 deletions.
4 changes: 3 additions & 1 deletion api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -410,4 +410,6 @@ POSITION_PROVIDER_EXCLUDES=
# Reset password token expiry minutes
RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5

CREATE_TIDB_SERVICE_JOB_ENABLED=false
CREATE_TIDB_SERVICE_JOB_ENABLED=false

RETRIEVAL_TOP_N=
5 changes: 5 additions & 0 deletions api/configs/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,11 @@ class DataSetConfig(BaseSettings):
PLAN_SANDBOX_CLEAN_MESSAGE_DAY_SETTING: PositiveInt = Field(
description="Interval in days for message cleanup operations - plan: sandbox",
default=30,
)

RETRIEVAL_TOP_N: Optional[PositiveInt] = Field(
description="number of retrieval top_n",
default=None
)


Expand Down
18 changes: 15 additions & 3 deletions api/core/rag/datasource/retrieval_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from flask import Flask, current_app

from configs import DifyConfig
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.vdb.vector_factory import Vector
Expand Down Expand Up @@ -105,13 +106,18 @@ def retrieve(
exception_message = ";\n".join(exceptions)
raise Exception(exception_message)


if retrieval_method == RetrievalMethod.HYBRID_SEARCH.value:
data_post_processor = DataPostProcessor(
str(dataset.tenant_id), reranking_mode, reranking_model, weights, False
)
all_documents = data_post_processor.invoke(
query=query, documents=all_documents, score_threshold=score_threshold, top_n=top_k
query=query,
documents=all_documents,
score_threshold=score_threshold,
top_n=DifyConfig.RETRIEVAL_TOP_N if DifyConfig.RETRIEVAL_TOP_N else top_k
)

return all_documents

@classmethod
Expand Down Expand Up @@ -178,7 +184,10 @@ def embedding_search(
)
all_documents.extend(
data_post_processor.invoke(
query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents)
query=query,
documents=documents,
score_threshold=score_threshold,
top_n=DifyConfig.RETRIEVAL_TOP_N if DifyConfig.RETRIEVAL_TOP_N else len(documents)
)
)
else:
Expand Down Expand Up @@ -220,7 +229,10 @@ def full_text_index_search(
)
all_documents.extend(
data_post_processor.invoke(
query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents)
query=query,
documents=documents,
score_threshold=score_threshold,
top_n=DifyConfig.RETRIEVAL_TOP_N if DifyConfig.RETRIEVAL_TOP_N else len(documents)
)
)
else:
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ x-shared-env: &shared-api-worker-env
OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false}
RETRIEVAL_TOP_N: ${RETRIEVAL_TOP_N:-}

services:
# API service
Expand Down

0 comments on commit c26ed3e

Please sign in to comment.