finops: Limit cost of vector store in AI Search

microsoft · Dec 6, 2024 · 0e6c106 · 0e6c106
1 parent 02e2393
commit 0e6c106
Showing 1 changed file with 16 additions and 0 deletions.
diff --git a/app/persistence/ai_search.py b/app/persistence/ai_search.py
@@ -12,6 +12,8 @@
     AzureOpenAIVectorizerParameters,
     HnswAlgorithmConfiguration,
     LexicalAnalyzerName,
+    RescoringOptions,
+    ScalarQuantizationCompression,
     SearchableField,
     SearchField,
     SearchFieldDataType,
@@ -22,6 +24,7 @@
     SemanticSearch,
     SimpleField,
     VectorSearch,
+    VectorSearchCompressionRescoreStorageMethod,
     VectorSearchProfile,
 )
 from azure.search.documents.models import (
@@ -203,6 +206,7 @@ async def _use_client(self) -> SearchClient:
             SearchField(
                 name="vectors",
                 searchable=True,
+                stored=False,
                 type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                 vector_search_dimensions=self._config.embedding_dimensions,
                 vector_search_profile_name="profile-default",
@@ -212,6 +216,7 @@ async def _use_client(self) -> SearchClient:
             profiles=[
                 VectorSearchProfile(
                     algorithm_configuration_name="algorithm-default",
+                    compression_name="compression-scalar",
                     name="profile-default",
                     vectorizer_name="vectorizer-default",
                 ),
@@ -232,6 +237,17 @@ async def _use_client(self) -> SearchClient:
                     ),
                 )
             ],
+            # Eliminate redundant vectors
+            # See: https://learn.microsoft.com/en-us/azure/search/vector-search-how-to-storage-options
+            compressions=[
+                ScalarQuantizationCompression(
+                    compression_name="compression-scalar",
+                    rescomonitorring_options=RescoringOptions(
+                        default_oversampling=10,
+                        rescore_storage_method=VectorSearchCompressionRescoreStorageMethod.PRESERVE_ORIGINALS,
+                    ),
+                ),
+            ],
         )
         semantic_search = SemanticSearch(
             default_configuration_name=self._config.semantic_configuration,