diff --git a/api/commands.py b/api/commands.py index a68ccf63993ea3..d5a0e47858ae5b 100644 --- a/api/commands.py +++ b/api/commands.py @@ -534,7 +534,7 @@ def deal_dataset_vector(flask_app: Flask, dataset: Dataset, normalization_count: embeddings=embeddings ) if index: - index.delete_by_group_id(dataset.id) + # index.delete_by_group_id(dataset.id) index.restore_dataset_in_one(dataset, dataset_collection_binding) else: click.echo('passed.') diff --git a/api/core/index/vector_index/base.py b/api/core/index/vector_index/base.py index 1e59135f37ed13..8fd1dee79a2099 100644 --- a/api/core/index/vector_index/base.py +++ b/api/core/index/vector_index/base.py @@ -113,8 +113,10 @@ def delete_by_ids(self, ids: list[str]) -> None: def delete_by_group_id(self, group_id: str) -> None: vector_store = self._get_vector_store() vector_store = cast(self._get_vector_store_class(), vector_store) - - vector_store.delete() + if self.dataset.collection_binding_id: + vector_store.delete_by_group_id(group_id) + else: + vector_store.delete() def delete(self) -> None: vector_store = self._get_vector_store() diff --git a/api/events/event_handlers/clean_when_dataset_deleted.py b/api/events/event_handlers/clean_when_dataset_deleted.py index e9975c92bc47d1..93181ea1617671 100644 --- a/api/events/event_handlers/clean_when_dataset_deleted.py +++ b/api/events/event_handlers/clean_when_dataset_deleted.py @@ -5,4 +5,5 @@ @dataset_was_deleted.connect def handle(sender, **kwargs): dataset = sender - clean_dataset_task.delay(dataset.id, dataset.tenant_id, dataset.indexing_technique, dataset.index_struct) + clean_dataset_task.delay(dataset.id, dataset.tenant_id, dataset.indexing_technique, + dataset.index_struct, dataset.collection_binding_id) diff --git a/api/tasks/clean_dataset_task.py b/api/tasks/clean_dataset_task.py index 43217a30b269d3..8f5e37f49bc115 100644 --- a/api/tasks/clean_dataset_task.py +++ b/api/tasks/clean_dataset_task.py @@ -13,13 +13,15 @@ @shared_task(queue='dataset') -def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str, index_struct: str): +def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str, + index_struct: str, collection_binding_id: str): """ Clean dataset when dataset deleted. :param dataset_id: dataset id :param tenant_id: tenant id :param indexing_technique: indexing technique :param index_struct: index struct dict + :param collection_binding_id: collection binding id Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) """ @@ -27,8 +29,13 @@ def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str, start_at = time.perf_counter() try: - dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() - + dataset = Dataset( + id=dataset_id, + tenant_id=tenant_id, + indexing_technique=indexing_technique, + index_struct=index_struct, + collection_binding_id=collection_binding_id + ) documents = db.session.query(Document).filter(Document.dataset_id == dataset_id).all() segments = db.session.query(DocumentSegment).filter(DocumentSegment.dataset_id == dataset_id).all() diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index 7a45bf7475978e..6a3b52a40b5d90 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -31,7 +31,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): raise Exception('Dataset not found') if action == "remove": - index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=False) + index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True) index.delete_by_group_id(dataset.id) elif action == "add": dataset_documents = db.session.query(DatasetDocument).filter(