From 648f641f1f546dff19c5cb7310cafdd708bf52e5 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Fri, 19 Jul 2024 19:03:33 +0800 Subject: [PATCH 1/5] update clean embedding cache query logic --- api/schedule/clean_embedding_cache_task.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/api/schedule/clean_embedding_cache_task.py b/api/schedule/clean_embedding_cache_task.py index f68c54600a6012..8af18a1a205327 100644 --- a/api/schedule/clean_embedding_cache_task.py +++ b/api/schedule/clean_embedding_cache_task.py @@ -8,6 +8,7 @@ from configs import dify_config from extensions.ext_database import db from models.dataset import Embedding +from sqlalchemy import text @app.celery.task(queue='dataset') @@ -18,12 +19,18 @@ def clean_embedding_cache_task(): thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days) while True: try: - embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \ + embedding_ids = db.session.query(Embedding.id).filter(Embedding.created_at < thirty_days_ago) \ .order_by(Embedding.created_at.desc()).limit(100).all() + embedding_ids = [embedding_id[0] for embedding_id in embedding_ids] except NotFound: break - for embedding in embeddings: - db.session.delete(embedding) - db.session.commit() + if embedding_ids: + db.session.execute(text( + "DELETE FROM embeddings WHERE id in :embedding_ids" + ), {'embedding_ids': tuple(embedding_ids)}) + + db.session.commit() + else: + break end_at = time.perf_counter() click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green')) From ac7a7118dd0cf3c70f5360b264e7ad1eca0bd67c Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Fri, 19 Jul 2024 19:04:27 +0800 Subject: [PATCH 2/5] update clean embedding cache query logic --- api/schedule/clean_embedding_cache_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/schedule/clean_embedding_cache_task.py b/api/schedule/clean_embedding_cache_task.py index 8af18a1a205327..49b1be3a339b8a 100644 --- a/api/schedule/clean_embedding_cache_task.py +++ b/api/schedule/clean_embedding_cache_task.py @@ -2,13 +2,13 @@ import time import click +from sqlalchemy import text from werkzeug.exceptions import NotFound import app from configs import dify_config from extensions.ext_database import db from models.dataset import Embedding -from sqlalchemy import text @app.celery.task(queue='dataset') From 5b0de41a9e9e526a2c9314bb473b4b04396fee51 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Fri, 19 Jul 2024 19:10:28 +0800 Subject: [PATCH 3/5] update clean embedding cache query logic --- api/models/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/models/dataset.py b/api/models/dataset.py index d0be005a154878..34dde2dcef737f 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -630,7 +630,8 @@ class Embedding(db.Model): __tablename__ = 'embeddings' __table_args__ = ( db.PrimaryKeyConstraint('id', name='embedding_pkey'), - db.UniqueConstraint('model_name', 'hash', 'provider_name', name='embedding_hash_idx') + db.UniqueConstraint('model_name', 'hash', 'provider_name', name='embedding_hash_idx'), + db.Index('created_at_idx', 'created_at') ) id = db.Column(StringUUID, primary_key=True, server_default=db.text('uuid_generate_v4()')) From 049a7ac5e0c210fe8281a8c3dff5efce22744705 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Sat, 20 Jul 2024 01:22:08 +0800 Subject: [PATCH 4/5] add embeddings index created_at --- ...5b_add_embedding_cache_created_at_index.py | 33 +++++++++++++++++++ api/models/model.py | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py diff --git a/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py b/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py new file mode 100644 index 00000000000000..5ce8806fe49501 --- /dev/null +++ b/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py @@ -0,0 +1,33 @@ +"""add-embedding-cache-created_at_index + +Revision ID: 6e957a32015b +Revises: fecff1c3da27 +Create Date: 2024-07-19 17:21:34.414705 + +""" +from alembic import op +import models as models +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '6e957a32015b' +down_revision = 'fecff1c3da27' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.create_index('created_at_idx', ['created_at'], unique=False) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.drop_index('created_at_idx') + + # ### end Alembic commands ### diff --git a/api/models/model.py b/api/models/model.py index 331bb91c29819f..396cd7ec6382c3 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -1383,7 +1383,7 @@ class TraceAppConfig(db.Model): __tablename__ = 'trace_app_config' __table_args__ = ( db.PrimaryKeyConstraint('id', name='tracing_app_config_pkey'), - db.Index('tracing_app_config_app_id_idx', 'app_id'), + db.Index('trace_app_config_app_id_idx', 'app_id'), ) id = db.Column(StringUUID, server_default=db.text('uuid_generate_v4()')) From 0b6f2698fe2514a19915b4d4aaaa1ec6081b52ac Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Sat, 20 Jul 2024 01:23:36 +0800 Subject: [PATCH 5/5] add embeddings index created_at --- .../6e957a32015b_add_embedding_cache_created_at_index.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py b/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py index 5ce8806fe49501..7445f664cd75a1 100644 --- a/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py +++ b/api/migrations/versions/6e957a32015b_add_embedding_cache_created_at_index.py @@ -6,9 +6,8 @@ """ from alembic import op -import models as models -import sqlalchemy as sa +import models as models # revision identifiers, used by Alembic. revision = '6e957a32015b'