From 0e6c10682119a813aac4aae12e70011bb916c1b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Fri, 6 Dec 2024 17:45:19 +0100 Subject: [PATCH] finops: Limit cost of vector store in AI Search --- app/persistence/ai_search.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/app/persistence/ai_search.py b/app/persistence/ai_search.py index 3c83dd0a..85ba0490 100644 --- a/app/persistence/ai_search.py +++ b/app/persistence/ai_search.py @@ -12,6 +12,8 @@ AzureOpenAIVectorizerParameters, HnswAlgorithmConfiguration, LexicalAnalyzerName, + RescoringOptions, + ScalarQuantizationCompression, SearchableField, SearchField, SearchFieldDataType, @@ -22,6 +24,7 @@ SemanticSearch, SimpleField, VectorSearch, + VectorSearchCompressionRescoreStorageMethod, VectorSearchProfile, ) from azure.search.documents.models import ( @@ -203,6 +206,7 @@ async def _use_client(self) -> SearchClient: SearchField( name="vectors", searchable=True, + stored=False, type=SearchFieldDataType.Collection(SearchFieldDataType.Single), vector_search_dimensions=self._config.embedding_dimensions, vector_search_profile_name="profile-default", @@ -212,6 +216,7 @@ async def _use_client(self) -> SearchClient: profiles=[ VectorSearchProfile( algorithm_configuration_name="algorithm-default", + compression_name="compression-scalar", name="profile-default", vectorizer_name="vectorizer-default", ), @@ -232,6 +237,17 @@ async def _use_client(self) -> SearchClient: ), ) ], + # Eliminate redundant vectors + # See: https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-storage-options + compressions=[ + ScalarQuantizationCompression( + compression_name="compression-scalar", + rescomonitorring_options=RescoringOptions( + default_oversampling=10, + rescore_storage_method=VectorSearchCompressionRescoreStorageMethod.PRESERVE_ORIGINALS, + ), + ), + ], ) semantic_search = SemanticSearch( default_configuration_name=self._config.semantic_configuration,