From 2e941bb91c662e9b7b100c08254dcb09ebbca031 Mon Sep 17 00:00:00 2001 From: JuHyung Son Date: Fri, 2 Aug 2024 21:48:09 +0900 Subject: [PATCH] add new provider Solar (#6884) --- .../model_providers/_position.yaml | 1 + .../model_providers/upstage/__init__.py | 0 .../upstage/_assets/icon_l_en.svg | 14 + .../upstage/_assets/icon_s_en.svg | 3 + .../model_providers/upstage/_common.py | 57 ++ .../model_providers/upstage/llm/__init__.py | 0 .../upstage/llm/_position.yaml | 1 + .../model_providers/upstage/llm/llm.py | 575 ++++++++++++++++++ .../upstage/llm/solar-1-mini-chat.yaml | 43 ++ .../upstage/text_embedding/__init__.py | 0 .../solar-embedding-1-large-passage.yaml | 9 + .../solar-embedding-1-large-query.yaml | 9 + .../upstage/text_embedding/text_embedding.py | 195 ++++++ .../model_providers/upstage/upstage.py | 32 + .../model_providers/upstage/upstage.yaml | 49 ++ api/docker/entrypoint.sh | 2 +- api/pyproject.toml | 1 + .../model_runtime/upstage/__init__.py | 0 .../model_runtime/upstage/test_llm.py | 245 ++++++++ .../model_runtime/upstage/test_provider.py | 23 + .../upstage/test_text_embedding.py | 67 ++ dev/pytest/pytest_model_runtime.sh | 4 +- 22 files changed, 1328 insertions(+), 2 deletions(-) create mode 100644 api/core/model_runtime/model_providers/upstage/__init__.py create mode 100644 api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg create mode 100644 api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg create mode 100644 api/core/model_runtime/model_providers/upstage/_common.py create mode 100644 api/core/model_runtime/model_providers/upstage/llm/__init__.py create mode 100644 api/core/model_runtime/model_providers/upstage/llm/_position.yaml create mode 100644 api/core/model_runtime/model_providers/upstage/llm/llm.py create mode 100644 api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml create mode 100644 api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py create mode 100644 api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml create mode 100644 api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml create mode 100644 api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py create mode 100644 api/core/model_runtime/model_providers/upstage/upstage.py create mode 100644 api/core/model_runtime/model_providers/upstage/upstage.yaml create mode 100644 api/tests/integration_tests/model_runtime/upstage/__init__.py create mode 100644 api/tests/integration_tests/model_runtime/upstage/test_llm.py create mode 100644 api/tests/integration_tests/model_runtime/upstage/test_provider.py create mode 100644 api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml index c2fa0e5a6ef216..b4e024a81ec7bb 100644 --- a/api/core/model_runtime/model_providers/_position.yaml +++ b/api/core/model_runtime/model_providers/_position.yaml @@ -6,6 +6,7 @@ - nvidia - nvidia_nim - cohere +- upstage - bedrock - togetherai - openrouter diff --git a/api/core/model_runtime/model_providers/upstage/__init__.py b/api/core/model_runtime/model_providers/upstage/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg new file mode 100644 index 00000000000000..0761f85ba64958 --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg new file mode 100644 index 00000000000000..44ef12b7303098 --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg @@ -0,0 +1,3 @@ + + + diff --git a/api/core/model_runtime/model_providers/upstage/_common.py b/api/core/model_runtime/model_providers/upstage/_common.py new file mode 100644 index 00000000000000..13b73181e95ffb --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/_common.py @@ -0,0 +1,57 @@ + +from collections.abc import Mapping + +import openai +from httpx import Timeout + +from core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +class _CommonUpstage: + def _to_credential_kwargs(self, credentials: Mapping) -> dict: + """ + Transform credentials to kwargs for model instance + + :param credentials: + :return: + """ + credentials_kwargs = { + "api_key": credentials['upstage_api_key'], + "base_url": "https://api.upstage.ai/v1/solar", + "timeout": Timeout(315.0, read=300.0, write=20.0, connect=10.0), + "max_retries": 1 + } + + return credentials_kwargs + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError], + InvokeServerUnavailableError: [openai.InternalServerError], + InvokeRateLimitError: [openai.RateLimitError], + InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError], + InvokeBadRequestError: [ + openai.BadRequestError, + openai.NotFoundError, + openai.UnprocessableEntityError, + openai.APIError, + ], + } + + diff --git a/api/core/model_runtime/model_providers/upstage/llm/__init__.py b/api/core/model_runtime/model_providers/upstage/llm/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/api/core/model_runtime/model_providers/upstage/llm/_position.yaml b/api/core/model_runtime/model_providers/upstage/llm/_position.yaml new file mode 100644 index 00000000000000..d4f03e1988f8b8 --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/llm/_position.yaml @@ -0,0 +1 @@ +- soloar-1-mini-chat diff --git a/api/core/model_runtime/model_providers/upstage/llm/llm.py b/api/core/model_runtime/model_providers/upstage/llm/llm.py new file mode 100644 index 00000000000000..d1ed4619d6bbbf --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/llm/llm.py @@ -0,0 +1,575 @@ +import logging +from collections.abc import Generator +from typing import Optional, Union, cast + +from openai import OpenAI, Stream +from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall +from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall +from openai.types.chat.chat_completion_message import FunctionCall +from tokenizers import Tokenizer + +from core.model_runtime.callbacks.base_callback import Callback +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + PromptMessage, + PromptMessageContentType, + PromptMessageTool, + SystemPromptMessage, + TextPromptMessageContent, + ToolPromptMessage, + UserPromptMessage, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel +from core.model_runtime.model_providers.upstage._common import _CommonUpstage + +logger = logging.getLogger(__name__) + +UPSTAGE_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object. +The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure +if you are not sure about the structure. + + +{{instructions}} + +""" + +class UpstageLargeLanguageModel(_CommonUpstage, LargeLanguageModel): + """ + Model class for Upstage large language model. + """ + + def _invoke(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, + stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]: + """ + Invoke large language model + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param tools: tools for tool calling + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + + return self._chat_generate( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user + ) + + def _code_block_mode_wrapper(self, + model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None, callbacks: Optional[list[Callback]] = None) -> Union[LLMResult, Generator]: + """ + Code block mode wrapper for invoking large language model + """ + if 'response_format' in model_parameters and model_parameters['response_format'] in ['JSON', 'XML']: + stop = stop or [] + self._transform_chat_json_prompts( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + response_format=model_parameters['response_format'] + ) + model_parameters.pop('response_format') + + return self._invoke( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user + ) + + def _transform_chat_json_prompts(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, + stream: bool = True, user: str | None = None, response_format: str = 'JSON') -> None: + """ + Transform json prompts + """ + if stop is None: + stop = [] + if "```\n" not in stop: + stop.append("```\n") + if "\n```" not in stop: + stop.append("\n```") + + if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage): + prompt_messages[0] = SystemPromptMessage( + content=UPSTAGE_BLOCK_MODE_PROMPT + .replace("{{instructions}}", prompt_messages[0].content) + .replace("{{block}}", response_format) + ) + prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n")) + else: + prompt_messages.insert(0, SystemPromptMessage( + content=UPSTAGE_BLOCK_MODE_PROMPT + .replace("{{instructions}}", f"Please output a valid {response_format} object.") + .replace("{{block}}", response_format) + )) + prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}")) + + def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: + """ + return self._num_tokens_from_messages(model, prompt_messages, tools) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + client.chat.completions.create( + messages=[{"role": "user", "content": "ping"}], + model=model, + temperature=0, + max_tokens=10, + stream=False + ) + except Exception as e: + raise CredentialsValidateFailedError(str(e)) + + def _chat_generate(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, + stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]: + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + extra_model_kwargs = {} + + if tools: + extra_model_kwargs["functions"] = [{ + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters + } for tool in tools] + + if stop: + extra_model_kwargs["stop"] = stop + + if user: + extra_model_kwargs["user"] = user + + # chat model + response = client.chat.completions.create( + messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages], + model=model, + stream=stream, + **model_parameters, + **extra_model_kwargs, + ) + + if stream: + return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools) + return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools) + + def _handle_chat_generate_response(self, model: str, credentials: dict, response: ChatCompletion, + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None) -> LLMResult: + """ + Handle llm chat response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response + """ + assistant_message = response.choices[0].message + # assistant_message_tool_calls = assistant_message.tool_calls + assistant_message_function_call = assistant_message.function_call + + # extract tool calls from response + # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) + function_call = self._extract_response_function_call(assistant_message_function_call) + tool_calls = [function_call] if function_call else [] + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage( + content=assistant_message.content, + tool_calls=tool_calls + ) + + # calculate num tokens + if response.usage: + # transform usage + prompt_tokens = response.usage.prompt_tokens + completion_tokens = response.usage.completion_tokens + else: + # calculate num tokens + prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools) + completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message]) + + # transform usage + usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) + + # transform response + response = LLMResult( + model=response.model, + prompt_messages=prompt_messages, + message=assistant_prompt_message, + usage=usage, + system_fingerprint=response.system_fingerprint, + ) + + return response + + def _handle_chat_generate_stream_response(self, model: str, credentials: dict, response: Stream[ChatCompletionChunk], + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None) -> Generator: + """ + Handle llm chat stream response + + :param model: model name + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response chunk generator + """ + full_assistant_content = '' + delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None + prompt_tokens = 0 + completion_tokens = 0 + final_tool_calls = [] + final_chunk = LLMResultChunk( + model=model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage(content=''), + ) + ) + + for chunk in response: + if len(chunk.choices) == 0: + if chunk.usage: + # calculate num tokens + prompt_tokens = chunk.usage.prompt_tokens + completion_tokens = chunk.usage.completion_tokens + continue + + delta = chunk.choices[0] + has_finish_reason = delta.finish_reason is not None + + if not has_finish_reason and (delta.delta.content is None or delta.delta.content == '') and \ + delta.delta.function_call is None: + continue + + # assistant_message_tool_calls = delta.delta.tool_calls + assistant_message_function_call = delta.delta.function_call + + # extract tool calls from response + if delta_assistant_message_function_call_storage is not None: + # handle process of stream function call + if assistant_message_function_call: + # message has not ended ever + delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments + continue + else: + # message has ended + assistant_message_function_call = delta_assistant_message_function_call_storage + delta_assistant_message_function_call_storage = None + else: + if assistant_message_function_call: + # start of stream function call + delta_assistant_message_function_call_storage = assistant_message_function_call + if delta_assistant_message_function_call_storage.arguments is None: + delta_assistant_message_function_call_storage.arguments = '' + if not has_finish_reason: + continue + + # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) + function_call = self._extract_response_function_call(assistant_message_function_call) + tool_calls = [function_call] if function_call else [] + if tool_calls: + final_tool_calls.extend(tool_calls) + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage( + content=delta.delta.content if delta.delta.content else '', + tool_calls=tool_calls + ) + + full_assistant_content += delta.delta.content if delta.delta.content else '' + + if has_finish_reason: + final_chunk = LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + finish_reason=delta.finish_reason, + ) + ) + else: + yield LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + ) + ) + + if not prompt_tokens: + prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools) + + if not completion_tokens: + full_assistant_prompt_message = AssistantPromptMessage( + content=full_assistant_content, + tool_calls=final_tool_calls + ) + completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message]) + + # transform usage + usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) + final_chunk.delta.usage = usage + + yield final_chunk + + def _extract_response_tool_calls(self, + response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \ + -> list[AssistantPromptMessage.ToolCall]: + """ + Extract tool calls from response + + :param response_tool_calls: response tool calls + :return: list of tool calls + """ + tool_calls = [] + if response_tool_calls: + for response_tool_call in response_tool_calls: + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_tool_call.function.name, + arguments=response_tool_call.function.arguments + ) + + tool_call = AssistantPromptMessage.ToolCall( + id=response_tool_call.id, + type=response_tool_call.type, + function=function + ) + tool_calls.append(tool_call) + + return tool_calls + + def _extract_response_function_call(self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall) \ + -> AssistantPromptMessage.ToolCall: + """ + Extract function call from response + + :param response_function_call: response function call + :return: tool call + """ + tool_call = None + if response_function_call: + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_function_call.name, + arguments=response_function_call.arguments + ) + + tool_call = AssistantPromptMessage.ToolCall( + id=response_function_call.name, + type="function", + function=function + ) + + return tool_call + + def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: + """ + Convert PromptMessage to dict for Upstage API + """ + if isinstance(message, UserPromptMessage): + message = cast(UserPromptMessage, message) + if isinstance(message.content, str): + message_dict = {"role": "user", "content": message.content} + else: + sub_messages = [] + for message_content in message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + sub_message_dict = { + "type": "text", + "text": message_content.data + } + sub_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast(ImagePromptMessageContent, message_content) + sub_message_dict = { + "type": "image_url", + "image_url": { + "url": message_content.data, + "detail": message_content.detail.value + } + } + sub_messages.append(sub_message_dict) + + message_dict = {"role": "user", "content": sub_messages} + elif isinstance(message, AssistantPromptMessage): + message = cast(AssistantPromptMessage, message) + message_dict = {"role": "assistant", "content": message.content} + if message.tool_calls: + # message_dict["tool_calls"] = [tool_call.dict() for tool_call in + # message.tool_calls] + function_call = message.tool_calls[0] + message_dict["function_call"] = { + "name": function_call.function.name, + "arguments": function_call.function.arguments, + } + elif isinstance(message, SystemPromptMessage): + message = cast(SystemPromptMessage, message) + message_dict = {"role": "system", "content": message.content} + elif isinstance(message, ToolPromptMessage): + message = cast(ToolPromptMessage, message) + # message_dict = { + # "role": "tool", + # "content": message.content, + # "tool_call_id": message.tool_call_id + # } + message_dict = { + "role": "function", + "content": message.content, + "name": message.tool_call_id + } + else: + raise ValueError(f"Got unknown type {message}") + + if message.name: + message_dict["name"] = message.name + + return message_dict + + def _get_tokenizer(self) -> Tokenizer: + return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer") + + def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None) -> int: + """ + Calculate num tokens for solar with Huggingface Solar tokenizer. + Solar tokenizer is opened in huggingface https://huggingface.co/upstage/solar-1-mini-tokenizer + """ + tokenizer = self._get_tokenizer() + tokens_per_message = 5 # <|im_start|>{role}\n{message}<|im_end|> + tokens_prefix = 1 # <|startoftext|> + tokens_suffix = 3 # <|im_start|>assistant\n + + num_tokens = 0 + num_tokens += tokens_prefix + + messages_dict = [self._convert_prompt_message_to_dict(message) for message in messages] + for message in messages_dict: + num_tokens += tokens_per_message + for key, value in message.items(): + if isinstance(value, list): + text = '' + for item in value: + if isinstance(item, dict) and item['type'] == 'text': + text += item['text'] + value = text + + if key == "tool_calls": + for tool_call in value: + for t_key, t_value in tool_call.items(): + num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False)) + if t_key == "function": + for f_key, f_value in t_value.items(): + num_tokens += len(tokenizer.encode(f_key, add_special_tokens=False)) + num_tokens += len(tokenizer.encode(f_value, add_special_tokens=False)) + else: + num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False)) + num_tokens += len(tokenizer.encode(t_value, add_special_tokens=False)) + else: + num_tokens += len(tokenizer.encode(str(value), add_special_tokens=False)) + num_tokens += tokens_suffix + + if tools: + num_tokens += self._num_tokens_for_tools(tokenizer, tools) + + return num_tokens + + def _num_tokens_for_tools(self, tokenizer: Tokenizer, tools: list[PromptMessageTool]) -> int: + """ + Calculate num tokens for tool calling with upstage tokenizer. + + :param tokenizer: huggingface tokenizer + :param tools: tools for tool calling + :return: number of tokens + """ + num_tokens = 0 + for tool in tools: + num_tokens += len(tokenizer.encode('type')) + num_tokens += len(tokenizer.encode('function')) + + # calculate num tokens for function object + num_tokens += len(tokenizer.encode('name')) + num_tokens += len(tokenizer.encode(tool.name)) + num_tokens += len(tokenizer.encode('description')) + num_tokens += len(tokenizer.encode(tool.description)) + parameters = tool.parameters + num_tokens += len(tokenizer.encode('parameters')) + if 'title' in parameters: + num_tokens += len(tokenizer.encode('title')) + num_tokens += len(tokenizer.encode(parameters.get("title"))) + num_tokens += len(tokenizer.encode('type')) + num_tokens += len(tokenizer.encode(parameters.get("type"))) + if 'properties' in parameters: + num_tokens += len(tokenizer.encode('properties')) + for key, value in parameters.get('properties').items(): + num_tokens += len(tokenizer.encode(key)) + for field_key, field_value in value.items(): + num_tokens += len(tokenizer.encode(field_key)) + if field_key == 'enum': + for enum_field in field_value: + num_tokens += 3 + num_tokens += len(tokenizer.encode(enum_field)) + else: + num_tokens += len(tokenizer.encode(field_key)) + num_tokens += len(tokenizer.encode(str(field_value))) + if 'required' in parameters: + num_tokens += len(tokenizer.encode('required')) + for required_field in parameters['required']: + num_tokens += 3 + num_tokens += len(tokenizer.encode(required_field)) + + return num_tokens diff --git a/api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml b/api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml new file mode 100644 index 00000000000000..787ac83f8ad92d --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml @@ -0,0 +1,43 @@ +model: solar-1-mini-chat +label: + zh_Hans: solar-1-mini-chat + en_US: solar-1-mini-chat + ko_KR: solar-1-mini-chat +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 32768 + - name: seed + label: + zh_Hans: 种子 + en_US: Seed + type: int + help: + zh_Hans: + 如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint + 响应参数来监视变化。 + en_US: + If specified, model will make a best effort to sample deterministically, + such that repeated requests with the same seed and parameters should return + the same result. Determinism is not guaranteed, and you should refer to the + system_fingerprint response parameter to monitor changes in the backend. + required: false +pricing: + input: "0.5" + output: "0.5" + unit: "0.000001" + currency: USD diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py b/api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml new file mode 100644 index 00000000000000..d838a5bbb1bbfd --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml @@ -0,0 +1,9 @@ +model: solar-embedding-1-large-passage +model_type: text-embedding +model_properties: + context_size: 4000 + max_chunks: 32 +pricing: + input: '0.1' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml new file mode 100644 index 00000000000000..c77645cffdd8f4 --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml @@ -0,0 +1,9 @@ +model: solar-embedding-1-large-query +model_type: text-embedding +model_properties: + context_size: 4000 + max_chunks: 32 +pricing: + input: '0.1' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py new file mode 100644 index 00000000000000..05ae8665d65bdd --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py @@ -0,0 +1,195 @@ +import base64 +import time +from collections.abc import Mapping +from typing import Union + +import numpy as np +from openai import OpenAI +from tokenizers import Tokenizer + +from core.model_runtime.entities.model_entities import PriceType +from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel +from core.model_runtime.model_providers.upstage._common import _CommonUpstage + + +class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel): + """ + Model class for Upstage text embedding model. + """ + def _get_tokenizer(self) -> Tokenizer: + return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer") + + def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :return: embeddings result + """ + + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + extra_model_kwargs = {} + if user: + extra_model_kwargs["user"] = user + extra_model_kwargs["encoding_format"] = "base64" + + context_size = self._get_context_size(model, credentials) + max_chunks = self._get_max_chunks(model, credentials) + + embeddings: list[list[float]] = [[] for _ in range(len(texts))] + tokens = [] + indices = [] + used_tokens = 0 + + tokenizer = self._get_tokenizer() + + for i, text in enumerate(texts): + token = tokenizer.encode(text, add_special_tokens=False).tokens + for j in range(0, len(token), context_size): + tokens += [token[j:j+context_size]] + indices += [i] + + batched_embeddings = [] + _iter = range(0, len(tokens), max_chunks) + + for i in _iter: + embeddings_batch, embedding_used_tokens = self._embedding_invoke( + model=model, + client=client, + texts=tokens[i:i+max_chunks], + extra_model_kwargs=extra_model_kwargs, + ) + + used_tokens += embedding_used_tokens + batched_embeddings += embeddings_batch + + results: list[list[list[float]]] = [[] for _ in range(len(texts))] + num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))] + + for i in range(len(indices)): + results[indices[i]].append(batched_embeddings[i]) + num_tokens_in_batch[indices[i]].append(len(tokens[i])) + + for i in range(len(texts)): + _result = results[i] + if len(_result) == 0: + embeddings_batch, embedding_used_tokens = self._embedding_invoke( + model=model, + client=client, + texts=[texts[i]], + extra_model_kwargs=extra_model_kwargs, + ) + used_tokens += embedding_used_tokens + average = embeddings_batch[0] + else: + average = np.average(_result, axis=0, weights=num_tokens_in_batch[i]) + embeddings[i] = (average / np.linalg.norm(average)).tolist() + + usage = self._calc_response_usage( + model=model, + credentials=credentials, + tokens=used_tokens + ) + + return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + tokenizer = self._get_tokenizer() + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + if len(texts) == 0: + return 0 + + tokenizer = self._get_tokenizer() + + total_num_tokens = 0 + for text in texts: + # calculate the number of tokens in the encoded text + tokenized_text = tokenizer.encode(text) + total_num_tokens += len(tokenized_text) + + return total_num_tokens + + def validate_credentials(self, model: str, credentials: Mapping) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + # call embedding model + self._embedding_invoke( + model=model, + client=client, + texts=['ping'], + extra_model_kwargs={} + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict) -> tuple[list[list[float]], int]: + """ + Invoke embedding model + :param model: model name + :param client: model client + :param texts: texts to embed + :param extra_model_kwargs: extra model kwargs + :return: embeddings and used tokens + """ + response = client.embeddings.create( + model=model, + input=texts, + **extra_model_kwargs + ) + + if 'encoding_format' in extra_model_kwargs and extra_model_kwargs['encoding_format'] == 'base64': + return ([list(np.frombuffer(base64.b64decode(embedding.embedding), dtype=np.float32)) for embedding in response.data], response.usage.total_tokens) + + return [data.embedding for data in response.data], response.usage.total_tokens + + def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param credentials: model credentials + :param tokens: input tokens + :return: usage + """ + input_price_info = self.get_price( + model=model, + credentials=credentials, + tokens=tokens, + price_type=PriceType.INPUT + ) + + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at + ) + + return usage diff --git a/api/core/model_runtime/model_providers/upstage/upstage.py b/api/core/model_runtime/model_providers/upstage/upstage.py new file mode 100644 index 00000000000000..56c91c00618922 --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/upstage.py @@ -0,0 +1,32 @@ +import logging + +from core.model_runtime.entities.model_entities import ModelType +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.model_provider import ModelProvider + +logger = logging.getLogger(__name__) + + +class UpstageProvider(ModelProvider): + + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials from defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + + model_instance.validate_credentials( + model="solar-1-mini-chat", + credentials=credentials + ) + except CredentialsValidateFailedError as e: + logger.exception(f'{self.get_provider_schema().provider} credentials validate failed') + raise e + except Exception as e: + logger.exception(f'{self.get_provider_schema().provider} credentials validate failed') + raise e + diff --git a/api/core/model_runtime/model_providers/upstage/upstage.yaml b/api/core/model_runtime/model_providers/upstage/upstage.yaml new file mode 100644 index 00000000000000..837667cfa9b41f --- /dev/null +++ b/api/core/model_runtime/model_providers/upstage/upstage.yaml @@ -0,0 +1,49 @@ +provider: upstage +label: + en_US: Upstage +description: + en_US: Models provided by Upstage, such as Solar-1-mini-chat. + zh_Hans: Upstage 提供的模型,例如 Solar-1-mini-chat. +icon_small: + en_US: icon_s_en.svg +icon_large: + en_US: icon_l_en.svg +background: "#FFFFF" +help: + title: + en_US: Get your API Key from Upstage + zh_Hans: 从 Upstage 获取 API Key + url: + en_US: https://console.upstage.ai/api-keys +supported_model_types: + - llm + - text-embedding +configurate_methods: + - predefined-model +model_credential_schema: + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter your model name + zh_Hans: 输入模型名称 + credential_form_schemas: + - variable: upstage_api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key +provider_credential_schema: + credential_form_schemas: + - variable: upstage_api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 9cf5c505d138af..64e4e719ab9be8 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -4,7 +4,7 @@ set -e if [[ "${MIGRATION_ENABLED}" == "true" ]]; then echo "Running migrations" - flask upgrade-db + flask db upgrade fi if [[ "${MODE}" == "worker" ]]; then diff --git a/api/pyproject.toml b/api/pyproject.toml index c2c1d56403acd0..567d2677897fcb 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -73,6 +73,7 @@ quote-style = "single" [tool.pytest_env] OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii" +UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa" AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com" AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94" ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz" diff --git a/api/tests/integration_tests/model_runtime/upstage/__init__.py b/api/tests/integration_tests/model_runtime/upstage/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/api/tests/integration_tests/model_runtime/upstage/test_llm.py b/api/tests/integration_tests/model_runtime/upstage/test_llm.py new file mode 100644 index 00000000000000..c35580a8b1ec00 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/upstage/test_llm.py @@ -0,0 +1,245 @@ +import os +from collections.abc import Generator + +import pytest + +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + PromptMessageTool, + SystemPromptMessage, + UserPromptMessage, +) +from core.model_runtime.entities.model_entities import AIModelEntity, ModelType +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel +from core.model_runtime.model_providers.upstage.llm.llm import UpstageLargeLanguageModel + +"""FOR MOCK FIXTURES, DO NOT REMOVE""" +from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock + + +def test_predefined_models(): + model = UpstageLargeLanguageModel() + model_schemas = model.predefined_models() + + assert len(model_schemas) >= 1 + assert isinstance(model_schemas[0], AIModelEntity) + +@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True) +def test_validate_credentials_for_chat_model(setup_openai_mock): + model = UpstageLargeLanguageModel() + + with pytest.raises(CredentialsValidateFailedError): + # model name to gpt-3.5-turbo because of mocking + model.validate_credentials( + model='gpt-3.5-turbo', + credentials={ + 'upstage_api_key': 'invalid_key' + } + ) + + model.validate_credentials( + model='solar-1-mini-chat', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + } + ) + +@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True) +def test_invoke_chat_model(setup_openai_mock): + model = UpstageLargeLanguageModel() + + result = model.invoke( + model='solar-1-mini-chat', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + }, + prompt_messages=[ + SystemPromptMessage( + content='You are a helpful AI assistant.', + ), + UserPromptMessage( + content='Hello World!' + ) + ], + model_parameters={ + 'temperature': 0.0, + 'top_p': 1.0, + 'presence_penalty': 0.0, + 'frequency_penalty': 0.0, + 'max_tokens': 10 + }, + stop=['How'], + stream=False, + user="abc-123" + ) + + assert isinstance(result, LLMResult) + assert len(result.message.content) > 0 + +@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True) +def test_invoke_chat_model_with_tools(setup_openai_mock): + model = UpstageLargeLanguageModel() + + result = model.invoke( + model='solar-1-mini-chat', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + }, + prompt_messages=[ + SystemPromptMessage( + content='You are a helpful AI assistant.', + ), + UserPromptMessage( + content="what's the weather today in London?", + ) + ], + model_parameters={ + 'temperature': 0.0, + 'max_tokens': 100 + }, + tools=[ + PromptMessageTool( + name='get_weather', + description='Determine weather in my location', + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": [ + "c", + "f" + ] + } + }, + "required": [ + "location" + ] + } + ), + PromptMessageTool( + name='get_stock_price', + description='Get the current stock price', + parameters={ + "type": "object", + "properties": { + "symbol": { + "type": "string", + "description": "The stock symbol" + } + }, + "required": [ + "symbol" + ] + } + ) + ], + stream=False, + user="abc-123" + ) + + assert isinstance(result, LLMResult) + assert isinstance(result.message, AssistantPromptMessage) + assert len(result.message.tool_calls) > 0 + +@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True) +def test_invoke_stream_chat_model(setup_openai_mock): + model = UpstageLargeLanguageModel() + + result = model.invoke( + model='solar-1-mini-chat', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + }, + prompt_messages=[ + SystemPromptMessage( + content='You are a helpful AI assistant.', + ), + UserPromptMessage( + content='Hello World!' + ) + ], + model_parameters={ + 'temperature': 0.0, + 'max_tokens': 100 + }, + stream=True, + user="abc-123" + ) + + assert isinstance(result, Generator) + + for chunk in result: + assert isinstance(chunk, LLMResultChunk) + assert isinstance(chunk.delta, LLMResultChunkDelta) + assert isinstance(chunk.delta.message, AssistantPromptMessage) + assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True + if chunk.delta.finish_reason is not None: + assert chunk.delta.usage is not None + assert chunk.delta.usage.completion_tokens > 0 + + +def test_get_num_tokens(): + model = UpstageLargeLanguageModel() + + num_tokens = model.get_num_tokens( + model='solar-1-mini-chat', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + }, + prompt_messages=[ + UserPromptMessage( + content='Hello World!' + ) + ] + ) + + assert num_tokens == 13 + + num_tokens = model.get_num_tokens( + model='solar-1-mini-chat', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + }, + prompt_messages=[ + SystemPromptMessage( + content='You are a helpful AI assistant.', + ), + UserPromptMessage( + content='Hello World!' + ) + ], + tools=[ + PromptMessageTool( + name='get_weather', + description='Determine weather in my location', + parameters={ + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": [ + "c", + "f" + ] + } + }, + "required": [ + "location" + ] + } + ), + ] + ) + + assert num_tokens == 106 diff --git a/api/tests/integration_tests/model_runtime/upstage/test_provider.py b/api/tests/integration_tests/model_runtime/upstage/test_provider.py new file mode 100644 index 00000000000000..c33eef49b2a79e --- /dev/null +++ b/api/tests/integration_tests/model_runtime/upstage/test_provider.py @@ -0,0 +1,23 @@ +import os + +import pytest + +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.upstage.upstage import UpstageProvider +from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock + + +@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True) +def test_validate_provider_credentials(setup_openai_mock): + provider = UpstageProvider() + + with pytest.raises(CredentialsValidateFailedError): + provider.validate_provider_credentials( + credentials={} + ) + + provider.validate_provider_credentials( + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + } + ) diff --git a/api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py b/api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py new file mode 100644 index 00000000000000..54135a0e748d40 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py @@ -0,0 +1,67 @@ +import os + +import pytest + +from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.upstage.text_embedding.text_embedding import UpstageTextEmbeddingModel +from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock + + +@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True) +def test_validate_credentials(setup_openai_mock): + model = UpstageTextEmbeddingModel() + + with pytest.raises(CredentialsValidateFailedError): + model.validate_credentials( + model='solar-embedding-1-large-passage', + credentials={ + 'upstage_api_key': 'invalid_key' + } + ) + + model.validate_credentials( + model='solar-embedding-1-large-passage', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY') + } + ) + +@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True) +def test_invoke_model(setup_openai_mock): + model = UpstageTextEmbeddingModel() + + result = model.invoke( + model='solar-embedding-1-large-passage', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'), + }, + texts=[ + "hello", + "world", + " ".join(["long_text"] * 100), + " ".join(["another_long_text"] * 100) + ], + user="abc-123" + ) + + assert isinstance(result, TextEmbeddingResult) + assert len(result.embeddings) == 4 + assert result.usage.total_tokens == 2 + + +def test_get_num_tokens(): + model = UpstageTextEmbeddingModel() + + num_tokens = model.get_num_tokens( + model='solar-embedding-1-large-passage', + credentials={ + 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'), + }, + texts=[ + "hello", + "world" + ] + ) + + assert num_tokens == 5 diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh index 2e113346c728b4..aba13292ab8315 100755 --- a/dev/pytest/pytest_model_runtime.sh +++ b/dev/pytest/pytest_model_runtime.sh @@ -5,4 +5,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \ api/tests/integration_tests/model_runtime/azure_openai \ api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \ api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \ - api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py + api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \ + api/tests/integration_tests/model_runtime/upstage +