diff --git a/api/app.py b/api/app.py index 0ba13f054354a6..4e5fd1f24bd368 100644 --- a/api/app.py +++ b/api/app.py @@ -6,9 +6,9 @@ if not os.environ.get("DEBUG") or os.environ.get("DEBUG").lower() != 'true': from gevent import monkey monkey.patch_all() - if os.environ.get("VECTOR_STORE") == 'milvus': - import grpc.experimental.gevent - grpc.experimental.gevent.init_gevent() + # if os.environ.get("VECTOR_STORE") == 'milvus': + import grpc.experimental.gevent + grpc.experimental.gevent.init_gevent() import langchain langchain.verbose = True diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index cce740b53b7378..a0070f6d0d0214 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -161,6 +161,7 @@ def _handle_generate_response(self, model: str, credentials: dict, response: Das result = LLMResult( model=model, message=assistant_prompt_message, + prompt_messages=prompt_messages, usage=usage, ) diff --git a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py index fbaeaf6bcfb99b..603840a4a86ae0 100644 --- a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py @@ -37,7 +37,7 @@ def _invoke(self, model: str, credentials: dict, return TextEmbeddingResult( embeddings=embeddings, - usage=self._calc_response_usage(model, embedding_used_tokens), + usage=self._calc_response_usage(model, credentials_kwargs, embedding_used_tokens), model=model ) @@ -115,7 +115,7 @@ def embed_query(self, text: str) -> List[float]: """ return self.embed_documents([text])[0] - def _calc_response_usage(self, model: str, tokens: int) -> EmbeddingUsage: + def _calc_response_usage(self, model: str,credentials: dict, tokens: int) -> EmbeddingUsage: """ Calculate response usage @@ -126,6 +126,7 @@ def _calc_response_usage(self, model: str, tokens: int) -> EmbeddingUsage: # get input price info input_price_info = self.get_price( model=model, + credentials=credentials, price_type=PriceType.INPUT, tokens=tokens )