Revert "feat: hugging face supports embeddings."

This reverts commit b8db580.
langgenius · Sep 19, 2023 · f711f1f · f711f1f
1 parent abb96e1
commit f711f1f
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 199 deletions.
diff --git a/api/core/model_providers/providers/huggingface_hub_provider.py b/api/core/model_providers/providers/huggingface_hub_provider.py
@@ -10,8 +10,6 @@
 
 from core.model_providers.models.base import BaseProviderModel
 from core.third_party.langchain.llms.huggingface_endpoint_llm import HuggingFaceEndpointLLM
-from core.third_party.langchain.embeddings.huggingface_hub_embedding import HuggingfaceHubEmbeddings
-from core.model_providers.models.embedding.huggingface_embedding import HuggingfaceEmbedding
 from models.provider import ProviderType
 
 
@@ -90,15 +88,19 @@ def is_model_credentials_valid_or_raise(cls, model_name: str, model_type: ModelT
             if 'task_type' not in credentials:
                 raise CredentialsValidateFailedError('Task Type must be provided.')
 
-            if credentials['task_type'] not in ("text2text-generation", "text-generation", "summarization", 'feature-extraction'):
+            if credentials['task_type'] not in ("text2text-generation", "text-generation", "summarization"):
                 raise CredentialsValidateFailedError('Task Type must be one of text2text-generation, '
-                                                     'text-generation, summarization, feature-extraction.')
+                                                     'text-generation, summarization.')
 
             try:
-                if credentials['task_type'] == 'feature-extraction':
-                    cls.check_embedding_valid(credentials, model_name)
-                else:
-                    cls.check_llm_valid(credentials)    
+                llm = HuggingFaceEndpointLLM(
+                    endpoint_url=credentials['huggingfacehub_endpoint_url'],
+                    task=credentials['task_type'],
+                    model_kwargs={"temperature": 0.5, "max_new_tokens": 200},
+                    huggingfacehub_api_token=credentials['huggingfacehub_api_token']
+                )
+
+                llm("ping")
             except Exception as e:
                 raise CredentialsValidateFailedError(f"{e.__class__.__name__}:{str(e)}")
         else:
@@ -110,33 +112,13 @@ def is_model_credentials_valid_or_raise(cls, model_name: str, model_type: ModelT
                 if 'inference' in model_info.cardData and not model_info.cardData['inference']:
                     raise ValueError(f'Inference API has been turned off for this model {model_name}.')
 
-                VALID_TASKS = ("text2text-generation", "text-generation", "summarization", "feature-extraction")
+                VALID_TASKS = ("text2text-generation", "text-generation", "summarization")
                 if model_info.pipeline_tag not in VALID_TASKS:
                     raise ValueError(f"Model {model_name} is not a valid task, "
                                      f"must be one of {VALID_TASKS}.")
             except Exception as e:
                 raise CredentialsValidateFailedError(f"{e.__class__.__name__}:{str(e)}")
 
-    @classmethod
-    def check_llm_valid(cls, credentials: dict):
-        llm = HuggingFaceEndpointLLM(
-            endpoint_url=credentials['huggingfacehub_endpoint_url'],
-            task=credentials['task_type'],
-            model_kwargs={"temperature": 0.5, "max_new_tokens": 200},
-            huggingfacehub_api_token=credentials['huggingfacehub_api_token']
-        )
-
-        llm("ping")
-
-    @classmethod
-    def check_embedding_valid(cls, credentials: dict, model_name: str):
-        embedding_model = HuggingfaceHubEmbeddings(
-            model=model_name,
-            **credentials
-        )
-
-        embedding_model.embed_query("ping")
-
     @classmethod
     def encrypt_model_credentials(cls, tenant_id: str, model_name: str, model_type: ModelType,
                                   credentials: dict) -> dict:
@@ -209,4 +191,4 @@ def encrypt_provider_credentials(cls, tenant_id: str, credentials: dict) -> dict
         return {}
 
     def get_provider_credentials(self, obfuscated: bool = False) -> dict:
-        return {}
+        return {}
diff --git a/api/core/third_party/langchain/embeddings/huggingface_hub_embedding.py b/api/core/third_party/langchain/embeddings/huggingface_hub_embedding.py
diff --git a/api/tests/integration_tests/models/embedding/test_huggingface_hub_embedding.py b/api/tests/integration_tests/models/embedding/test_huggingface_hub_embedding.py