Skip to content

Commit

Permalink
qdrant migrate issue
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnJyong committed Sep 20, 2023
1 parent b247335 commit d53d941
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 8 deletions.
2 changes: 1 addition & 1 deletion api/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ def deal_dataset_vector(flask_app: Flask, dataset: Dataset, normalization_count:
embeddings=embeddings
)
if index:
index.delete_by_group_id(dataset.id)
# index.delete_by_group_id(dataset.id)
index.restore_dataset_in_one(dataset, dataset_collection_binding)
else:
click.echo('passed.')
Expand Down
6 changes: 4 additions & 2 deletions api/core/index/vector_index/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,10 @@ def delete_by_ids(self, ids: list[str]) -> None:
def delete_by_group_id(self, group_id: str) -> None:
vector_store = self._get_vector_store()
vector_store = cast(self._get_vector_store_class(), vector_store)

vector_store.delete()
if self.dataset.collection_binding_id:
vector_store.delete_by_group_id(group_id)
else:
vector_store.delete()

def delete(self) -> None:
vector_store = self._get_vector_store()
Expand Down
3 changes: 2 additions & 1 deletion api/events/event_handlers/clean_when_dataset_deleted.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
@dataset_was_deleted.connect
def handle(sender, **kwargs):
dataset = sender
clean_dataset_task.delay(dataset.id, dataset.tenant_id, dataset.indexing_technique, dataset.index_struct)
clean_dataset_task.delay(dataset.id, dataset.tenant_id, dataset.indexing_technique,
dataset.index_struct, dataset.collection_binding_id)
13 changes: 10 additions & 3 deletions api/tasks/clean_dataset_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,29 @@


@shared_task(queue='dataset')
def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str, index_struct: str):
def clean_dataset_task(dataset_id: str, tenant_id: str, indexing_technique: str,
index_struct: str, collection_binding_id: str):
"""
Clean dataset when dataset deleted.
:param dataset_id: dataset id
:param tenant_id: tenant id
:param indexing_technique: indexing technique
:param index_struct: index struct dict
:param collection_binding_id: collection binding id
Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
"""
logging.info(click.style('Start clean dataset when dataset deleted: {}'.format(dataset_id), fg='green'))
start_at = time.perf_counter()

try:
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()

dataset = Dataset(
id=dataset_id,
tenant_id=tenant_id,
indexing_technique=indexing_technique,
index_struct=index_struct,
collection_binding_id=collection_binding_id
)
documents = db.session.query(Document).filter(Document.dataset_id == dataset_id).all()
segments = db.session.query(DocumentSegment).filter(DocumentSegment.dataset_id == dataset_id).all()

Expand Down
2 changes: 1 addition & 1 deletion api/tasks/deal_dataset_vector_index_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
raise Exception('Dataset not found')

if action == "remove":
index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=False)
index = IndexBuilder.get_index(dataset, 'high_quality', ignore_high_quality_check=True)
index.delete_by_group_id(dataset.id)
elif action == "add":
dataset_documents = db.session.query(DatasetDocument).filter(
Expand Down

0 comments on commit d53d941

Please sign in to comment.