diff --git a/.github/workflows/expose_service_ports.sh b/.github/workflows/expose_service_ports.sh index d3146cd90dc02b..ae85305a747b69 100755 --- a/.github/workflows/expose_service_ports.sh +++ b/.github/workflows/expose_service_ports.sh @@ -9,6 +9,5 @@ yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compos yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-compose.yaml yq eval '.services.couchbase-server.ports += ["8091-8096:8091-8096"]' -i docker/docker-compose.yaml yq eval '.services.couchbase-server.ports += ["11210:11210"]' -i docker/docker-compose.yaml -yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/docker-compose.yaml echo "Ports exposed for sandbox, weaviate, tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase" diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index 146bee95f21c5f..4800b6e88ac020 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -51,7 +51,15 @@ jobs: - name: Expose Service Ports run: sh .github/workflows/expose_service_ports.sh - - name: Set up Vector Stores (TiDB, Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase) + - name: Set up Vector Store (TiDB) + uses: hoverkraft-tech/compose-action@v2.0.2 + with: + compose-file: docker/tidb/docker-compose.yaml + services: | + tidb + tiflash + + - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase) uses: hoverkraft-tech/compose-action@v2.0.2 with: compose-file: | @@ -67,7 +75,9 @@ jobs: pgvector chroma elasticsearch - tidb + + - name: Check TiDB Ready + run: poetry run -C api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py - name: Test Vector Stores run: poetry run -C api bash dev/pytest/pytest_vdb.sh diff --git a/.gitignore b/.gitignore index 1423bfee56e922..60dd2cbeb33ff2 100644 --- a/.gitignore +++ b/.gitignore @@ -163,6 +163,8 @@ docker/volumes/db/data/* docker/volumes/redis/data/* docker/volumes/weaviate/* docker/volumes/qdrant/* +docker/tidb/data/* +docker/tidb/logs/* docker/volumes/etcd/* docker/volumes/minio/* docker/volumes/milvus/* diff --git a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py index be3a417390e802..3ecb34e2f4f680 100644 --- a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py +++ b/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py @@ -104,14 +104,14 @@ def _create_collection(self, dimension: int): text TEXT NOT NULL, meta JSON NOT NULL, doc_id VARCHAR(64) AS (JSON_UNQUOTE(JSON_EXTRACT(meta, '$.doc_id'))) STORED, - KEY (doc_id), vector VECTOR({dimension}) NOT NULL COMMENT "hnsw(distance={self._distance_func})", create_time DATETIME DEFAULT CURRENT_TIMESTAMP, - update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP + update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + KEY (doc_id), + VECTOR INDEX idx_vector ((VEC_COSINE_DISTANCE(vector))) USING HNSW ); """) session.execute(create_statement) - # tidb vector not support 'CREATE/ADD INDEX' now session.commit() redis_client.set(collection_exist_cache_key, 1, ex=3600) diff --git a/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py b/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py new file mode 100644 index 00000000000000..84e8feb6db37ec --- /dev/null +++ b/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py @@ -0,0 +1,48 @@ +import time + +import pymysql + + +def create_table(): + try: + connection = pymysql.connect( + host="localhost", + port=4000, + user="root", + password="", + ) + + with connection.cursor() as cursor: + table_name = "test.foo_vector_table" + create_table_query = f""" + CREATE TABLE {table_name} ( + id INT PRIMARY KEY, + embedding VECTOR(5), + VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(embedding))) + ); + """ + drop_table_query = f"DROP TABLE {table_name};" + cursor.execute(create_table_query) + cursor.execute(drop_table_query) + connection.commit() + print("TiFlash is ready in TiDB.") + finally: + if connection: + connection.close() + + +def main(): + attempts = 30 + retry_wait_seconds = 2 + for attempt in range(attempts): + try: + create_table() + break + except Exception as e: + print(f"TiFlash is not ready. Exception: {e}") + print(f"Attempt {attempt + 1} failed,retry in {retry_wait_seconds} seconds...") + time.sleep(retry_wait_seconds) + + +if __name__ == "__main__": + main() diff --git a/docker/tidb/config/pd-nightly-tiflash.toml b/docker/tidb/config/pd-nightly-tiflash.toml new file mode 100644 index 00000000000000..511b69d8e3b1a8 --- /dev/null +++ b/docker/tidb/config/pd-nightly-tiflash.toml @@ -0,0 +1,2 @@ +[replication] +max-replicas = 1 \ No newline at end of file diff --git a/docker/tidb/config/tiflash-learner-nightly.toml b/docker/tidb/config/tiflash-learner-nightly.toml new file mode 100644 index 00000000000000..9bbb1569a4613c --- /dev/null +++ b/docker/tidb/config/tiflash-learner-nightly.toml @@ -0,0 +1,10 @@ +log-file = "/logs/tiflash_tikv.log" + +[server] +engine-addr = "tiflash:4030" +addr = "0.0.0.0:20280" +advertise-addr = "tiflash:20280" +status-addr = "tiflash:20292" + +[storage] +data-dir = "/data/flash" diff --git a/docker/tidb/config/tiflash-nightly.toml b/docker/tidb/config/tiflash-nightly.toml new file mode 100644 index 00000000000000..195dc7d78981bd --- /dev/null +++ b/docker/tidb/config/tiflash-nightly.toml @@ -0,0 +1,63 @@ +default_profile = "default" +display_name = "TiFlash" +listen_host = "0.0.0.0" +mark_cache_size = 5368709120 +tmp_path = "/data/tmp" +path = "/data" +tcp_port = 9110 +http_port = 8223 + +[flash] +tidb_status_addr = "tidb:10080" +service_addr = "tiflash:4030" + +[flash.flash_cluster] +cluster_manager_path = "/tiflash/flash_cluster_manager" +log = "/logs/tiflash_cluster_manager.log" +master_ttl = 60 +refresh_interval = 20 +update_rule_interval = 5 + +[flash.proxy] +config = "/tiflash-learner.toml" + +[status] +metrics_port = 8234 + +[logger] +errorlog = "/logs/tiflash_error.log" +log = "/logs/tiflash.log" +count = 20 +level = "debug" +size = "1000M" + +[raft] +pd_addr = "pd0:2379" +storage_engine = "tmt" + + +[users] + +[users.default] +password = "" +profile = "default" +quota = "default" + +[users.default.networks] +ip = "::/0" + +[users.readonly] +password = "" +profile = "readonly" +quota = "default" + +[users.readonly.networks] +ip = "::/0" + +[profiles] + +[profiles.default] +load_balancing = "random" + +[profiles.readonly] +readonly = 1 diff --git a/docker/tidb/docker-compose.yaml b/docker/tidb/docker-compose.yaml new file mode 100644 index 00000000000000..6ccc74cb7c9a7b --- /dev/null +++ b/docker/tidb/docker-compose.yaml @@ -0,0 +1,66 @@ +version: '2.1' + +services: + pd0: + image: pingcap/pd:v8.5.0 + ports: + - "2379" + volumes: + - ./config/pd-nightly-tiflash.toml:/pd.toml:ro + - ./data:/data + - ./logs:/logs + command: + - --name=pd0 + - --client-urls=http://0.0.0.0:2379 + - --peer-urls=http://0.0.0.0:2380 + - --advertise-client-urls=http://pd0:2379 + - --advertise-peer-urls=http://pd0:2380 + - --initial-cluster=pd0=http://pd0:2380 + - --data-dir=/data/pd + - --config=/pd.toml + - --log-file=/logs/pd.log + restart: on-failure + tikv: + image: pingcap/tikv:v8.5.0 + volumes: + - ./data:/data + - ./logs:/logs + command: + - --addr=0.0.0.0:20160 + - --advertise-addr=tikv:20160 + - --status-addr=tikv:20180 + - --data-dir=/data/tikv + - --pd=pd0:2379 + - --log-file=/logs/tikv.log + depends_on: + - "pd0" + restart: on-failure + tidb: + image: pingcap/tidb:v8.5.0 + ports: + - "4000:4000" + - "10080:10080" + volumes: + - ./logs:/logs + command: + - --status=10080 + - --advertise-address=tidb + - --store=tikv + - --path=pd0:2379 + - --log-file=/logs/tidb.log + depends_on: + - "tikv" + restart: on-failure + tiflash: + image: pingcap/tiflash:v8.5.0 + volumes: + - ./config/tiflash-nightly.toml:/tiflash.toml:ro + - ./config/tiflash-learner-nightly.toml:/tiflash-learner.toml:ro + - ./data:/data + - ./logs:/logs + command: + - --config=/tiflash.toml + depends_on: + - "tikv" + - "tidb" + restart: on-failure