diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index c2fa0e5a6ef216..b4e024a81ec7bb 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -6,6 +6,7 @@
- nvidia
- nvidia_nim
- cohere
+- upstage
- bedrock
- togetherai
- openrouter
diff --git a/api/core/model_runtime/model_providers/upstage/__init__.py b/api/core/model_runtime/model_providers/upstage/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg
new file mode 100644
index 00000000000000..0761f85ba64958
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg
@@ -0,0 +1,14 @@
+
diff --git a/api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg
new file mode 100644
index 00000000000000..44ef12b7303098
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+
diff --git a/api/core/model_runtime/model_providers/upstage/_common.py b/api/core/model_runtime/model_providers/upstage/_common.py
new file mode 100644
index 00000000000000..13b73181e95ffb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/_common.py
@@ -0,0 +1,57 @@
+
+from collections.abc import Mapping
+
+import openai
+from httpx import Timeout
+
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+
+
+class _CommonUpstage:
+ def _to_credential_kwargs(self, credentials: Mapping) -> dict:
+ """
+ Transform credentials to kwargs for model instance
+
+ :param credentials:
+ :return:
+ """
+ credentials_kwargs = {
+ "api_key": credentials['upstage_api_key'],
+ "base_url": "https://api.upstage.ai/v1/solar",
+ "timeout": Timeout(315.0, read=300.0, write=20.0, connect=10.0),
+ "max_retries": 1
+ }
+
+ return credentials_kwargs
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ The key is the error type thrown to the caller
+ The value is the error type thrown by the model,
+ which needs to be converted into a unified error type for the caller.
+
+ :return: Invoke error mapping
+ """
+ return {
+ InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
+ InvokeServerUnavailableError: [openai.InternalServerError],
+ InvokeRateLimitError: [openai.RateLimitError],
+ InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
+ InvokeBadRequestError: [
+ openai.BadRequestError,
+ openai.NotFoundError,
+ openai.UnprocessableEntityError,
+ openai.APIError,
+ ],
+ }
+
+
diff --git a/api/core/model_runtime/model_providers/upstage/llm/__init__.py b/api/core/model_runtime/model_providers/upstage/llm/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/api/core/model_runtime/model_providers/upstage/llm/_position.yaml b/api/core/model_runtime/model_providers/upstage/llm/_position.yaml
new file mode 100644
index 00000000000000..d4f03e1988f8b8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/llm/_position.yaml
@@ -0,0 +1 @@
+- soloar-1-mini-chat
diff --git a/api/core/model_runtime/model_providers/upstage/llm/llm.py b/api/core/model_runtime/model_providers/upstage/llm/llm.py
new file mode 100644
index 00000000000000..d1ed4619d6bbbf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/llm/llm.py
@@ -0,0 +1,575 @@
+import logging
+from collections.abc import Generator
+from typing import Optional, Union, cast
+
+from openai import OpenAI, Stream
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall
+from openai.types.chat.chat_completion_message import FunctionCall
+from tokenizers import Tokenizer
+
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+ AssistantPromptMessage,
+ ImagePromptMessageContent,
+ PromptMessage,
+ PromptMessageContentType,
+ PromptMessageTool,
+ SystemPromptMessage,
+ TextPromptMessageContent,
+ ToolPromptMessage,
+ UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.upstage._common import _CommonUpstage
+
+logger = logging.getLogger(__name__)
+
+UPSTAGE_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+
+
+{{instructions}}
+
+"""
+
+class UpstageLargeLanguageModel(_CommonUpstage, LargeLanguageModel):
+ """
+ Model class for Upstage large language model.
+ """
+
+ def _invoke(self, model: str, credentials: dict,
+ prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+ stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+ """
+ Invoke large language model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param tools: tools for tool calling
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+
+ return self._chat_generate(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user
+ )
+
+ def _code_block_mode_wrapper(self,
+ model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None, callbacks: Optional[list[Callback]] = None) -> Union[LLMResult, Generator]:
+ """
+ Code block mode wrapper for invoking large language model
+ """
+ if 'response_format' in model_parameters and model_parameters['response_format'] in ['JSON', 'XML']:
+ stop = stop or []
+ self._transform_chat_json_prompts(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user,
+ response_format=model_parameters['response_format']
+ )
+ model_parameters.pop('response_format')
+
+ return self._invoke(
+ model=model,
+ credentials=credentials,
+ prompt_messages=prompt_messages,
+ model_parameters=model_parameters,
+ tools=tools,
+ stop=stop,
+ stream=stream,
+ user=user
+ )
+
+ def _transform_chat_json_prompts(self, model: str, credentials: dict,
+ prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
+ stream: bool = True, user: str | None = None, response_format: str = 'JSON') -> None:
+ """
+ Transform json prompts
+ """
+ if stop is None:
+ stop = []
+ if "```\n" not in stop:
+ stop.append("```\n")
+ if "\n```" not in stop:
+ stop.append("\n```")
+
+ if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+ prompt_messages[0] = SystemPromptMessage(
+ content=UPSTAGE_BLOCK_MODE_PROMPT
+ .replace("{{instructions}}", prompt_messages[0].content)
+ .replace("{{block}}", response_format)
+ )
+ prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n"))
+ else:
+ prompt_messages.insert(0, SystemPromptMessage(
+ content=UPSTAGE_BLOCK_MODE_PROMPT
+ .replace("{{instructions}}", f"Please output a valid {response_format} object.")
+ .replace("{{block}}", response_format)
+ ))
+ prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return:
+ """
+ return self._num_tokens_from_messages(model, prompt_messages, tools)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ client.chat.completions.create(
+ messages=[{"role": "user", "content": "ping"}],
+ model=model,
+ temperature=0,
+ max_tokens=10,
+ stream=False
+ )
+ except Exception as e:
+ raise CredentialsValidateFailedError(str(e))
+
+ def _chat_generate(self, model: str, credentials: dict,
+ prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+ stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ extra_model_kwargs = {}
+
+ if tools:
+ extra_model_kwargs["functions"] = [{
+ "name": tool.name,
+ "description": tool.description,
+ "parameters": tool.parameters
+ } for tool in tools]
+
+ if stop:
+ extra_model_kwargs["stop"] = stop
+
+ if user:
+ extra_model_kwargs["user"] = user
+
+ # chat model
+ response = client.chat.completions.create(
+ messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
+ model=model,
+ stream=stream,
+ **model_parameters,
+ **extra_model_kwargs,
+ )
+
+ if stream:
+ return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)
+ return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)
+
+ def _handle_chat_generate_response(self, model: str, credentials: dict, response: ChatCompletion,
+ prompt_messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None) -> LLMResult:
+ """
+ Handle llm chat response
+
+ :param model: model name
+ :param credentials: credentials
+ :param response: response
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return: llm response
+ """
+ assistant_message = response.choices[0].message
+ # assistant_message_tool_calls = assistant_message.tool_calls
+ assistant_message_function_call = assistant_message.function_call
+
+ # extract tool calls from response
+ # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+ function_call = self._extract_response_function_call(assistant_message_function_call)
+ tool_calls = [function_call] if function_call else []
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(
+ content=assistant_message.content,
+ tool_calls=tool_calls
+ )
+
+ # calculate num tokens
+ if response.usage:
+ # transform usage
+ prompt_tokens = response.usage.prompt_tokens
+ completion_tokens = response.usage.completion_tokens
+ else:
+ # calculate num tokens
+ prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+ completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message])
+
+ # transform usage
+ usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+
+ # transform response
+ response = LLMResult(
+ model=response.model,
+ prompt_messages=prompt_messages,
+ message=assistant_prompt_message,
+ usage=usage,
+ system_fingerprint=response.system_fingerprint,
+ )
+
+ return response
+
+ def _handle_chat_generate_stream_response(self, model: str, credentials: dict, response: Stream[ChatCompletionChunk],
+ prompt_messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None) -> Generator:
+ """
+ Handle llm chat stream response
+
+ :param model: model name
+ :param response: response
+ :param prompt_messages: prompt messages
+ :param tools: tools for tool calling
+ :return: llm response chunk generator
+ """
+ full_assistant_content = ''
+ delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None
+ prompt_tokens = 0
+ completion_tokens = 0
+ final_tool_calls = []
+ final_chunk = LLMResultChunk(
+ model=model,
+ prompt_messages=prompt_messages,
+ delta=LLMResultChunkDelta(
+ index=0,
+ message=AssistantPromptMessage(content=''),
+ )
+ )
+
+ for chunk in response:
+ if len(chunk.choices) == 0:
+ if chunk.usage:
+ # calculate num tokens
+ prompt_tokens = chunk.usage.prompt_tokens
+ completion_tokens = chunk.usage.completion_tokens
+ continue
+
+ delta = chunk.choices[0]
+ has_finish_reason = delta.finish_reason is not None
+
+ if not has_finish_reason and (delta.delta.content is None or delta.delta.content == '') and \
+ delta.delta.function_call is None:
+ continue
+
+ # assistant_message_tool_calls = delta.delta.tool_calls
+ assistant_message_function_call = delta.delta.function_call
+
+ # extract tool calls from response
+ if delta_assistant_message_function_call_storage is not None:
+ # handle process of stream function call
+ if assistant_message_function_call:
+ # message has not ended ever
+ delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments
+ continue
+ else:
+ # message has ended
+ assistant_message_function_call = delta_assistant_message_function_call_storage
+ delta_assistant_message_function_call_storage = None
+ else:
+ if assistant_message_function_call:
+ # start of stream function call
+ delta_assistant_message_function_call_storage = assistant_message_function_call
+ if delta_assistant_message_function_call_storage.arguments is None:
+ delta_assistant_message_function_call_storage.arguments = ''
+ if not has_finish_reason:
+ continue
+
+ # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+ function_call = self._extract_response_function_call(assistant_message_function_call)
+ tool_calls = [function_call] if function_call else []
+ if tool_calls:
+ final_tool_calls.extend(tool_calls)
+
+ # transform assistant message to prompt message
+ assistant_prompt_message = AssistantPromptMessage(
+ content=delta.delta.content if delta.delta.content else '',
+ tool_calls=tool_calls
+ )
+
+ full_assistant_content += delta.delta.content if delta.delta.content else ''
+
+ if has_finish_reason:
+ final_chunk = LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ finish_reason=delta.finish_reason,
+ )
+ )
+ else:
+ yield LLMResultChunk(
+ model=chunk.model,
+ prompt_messages=prompt_messages,
+ system_fingerprint=chunk.system_fingerprint,
+ delta=LLMResultChunkDelta(
+ index=delta.index,
+ message=assistant_prompt_message,
+ )
+ )
+
+ if not prompt_tokens:
+ prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+
+ if not completion_tokens:
+ full_assistant_prompt_message = AssistantPromptMessage(
+ content=full_assistant_content,
+ tool_calls=final_tool_calls
+ )
+ completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message])
+
+ # transform usage
+ usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+ final_chunk.delta.usage = usage
+
+ yield final_chunk
+
+ def _extract_response_tool_calls(self,
+ response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \
+ -> list[AssistantPromptMessage.ToolCall]:
+ """
+ Extract tool calls from response
+
+ :param response_tool_calls: response tool calls
+ :return: list of tool calls
+ """
+ tool_calls = []
+ if response_tool_calls:
+ for response_tool_call in response_tool_calls:
+ function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+ name=response_tool_call.function.name,
+ arguments=response_tool_call.function.arguments
+ )
+
+ tool_call = AssistantPromptMessage.ToolCall(
+ id=response_tool_call.id,
+ type=response_tool_call.type,
+ function=function
+ )
+ tool_calls.append(tool_call)
+
+ return tool_calls
+
+ def _extract_response_function_call(self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall) \
+ -> AssistantPromptMessage.ToolCall:
+ """
+ Extract function call from response
+
+ :param response_function_call: response function call
+ :return: tool call
+ """
+ tool_call = None
+ if response_function_call:
+ function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+ name=response_function_call.name,
+ arguments=response_function_call.arguments
+ )
+
+ tool_call = AssistantPromptMessage.ToolCall(
+ id=response_function_call.name,
+ type="function",
+ function=function
+ )
+
+ return tool_call
+
+ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+ """
+ Convert PromptMessage to dict for Upstage API
+ """
+ if isinstance(message, UserPromptMessage):
+ message = cast(UserPromptMessage, message)
+ if isinstance(message.content, str):
+ message_dict = {"role": "user", "content": message.content}
+ else:
+ sub_messages = []
+ for message_content in message.content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(TextPromptMessageContent, message_content)
+ sub_message_dict = {
+ "type": "text",
+ "text": message_content.data
+ }
+ sub_messages.append(sub_message_dict)
+ elif message_content.type == PromptMessageContentType.IMAGE:
+ message_content = cast(ImagePromptMessageContent, message_content)
+ sub_message_dict = {
+ "type": "image_url",
+ "image_url": {
+ "url": message_content.data,
+ "detail": message_content.detail.value
+ }
+ }
+ sub_messages.append(sub_message_dict)
+
+ message_dict = {"role": "user", "content": sub_messages}
+ elif isinstance(message, AssistantPromptMessage):
+ message = cast(AssistantPromptMessage, message)
+ message_dict = {"role": "assistant", "content": message.content}
+ if message.tool_calls:
+ # message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+ # message.tool_calls]
+ function_call = message.tool_calls[0]
+ message_dict["function_call"] = {
+ "name": function_call.function.name,
+ "arguments": function_call.function.arguments,
+ }
+ elif isinstance(message, SystemPromptMessage):
+ message = cast(SystemPromptMessage, message)
+ message_dict = {"role": "system", "content": message.content}
+ elif isinstance(message, ToolPromptMessage):
+ message = cast(ToolPromptMessage, message)
+ # message_dict = {
+ # "role": "tool",
+ # "content": message.content,
+ # "tool_call_id": message.tool_call_id
+ # }
+ message_dict = {
+ "role": "function",
+ "content": message.content,
+ "name": message.tool_call_id
+ }
+ else:
+ raise ValueError(f"Got unknown type {message}")
+
+ if message.name:
+ message_dict["name"] = message.name
+
+ return message_dict
+
+ def _get_tokenizer(self) -> Tokenizer:
+ return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
+
+ def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage],
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
+ """
+ Calculate num tokens for solar with Huggingface Solar tokenizer.
+ Solar tokenizer is opened in huggingface https://huggingface.co/upstage/solar-1-mini-tokenizer
+ """
+ tokenizer = self._get_tokenizer()
+ tokens_per_message = 5 # <|im_start|>{role}\n{message}<|im_end|>
+ tokens_prefix = 1 # <|startoftext|>
+ tokens_suffix = 3 # <|im_start|>assistant\n
+
+ num_tokens = 0
+ num_tokens += tokens_prefix
+
+ messages_dict = [self._convert_prompt_message_to_dict(message) for message in messages]
+ for message in messages_dict:
+ num_tokens += tokens_per_message
+ for key, value in message.items():
+ if isinstance(value, list):
+ text = ''
+ for item in value:
+ if isinstance(item, dict) and item['type'] == 'text':
+ text += item['text']
+ value = text
+
+ if key == "tool_calls":
+ for tool_call in value:
+ for t_key, t_value in tool_call.items():
+ num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
+ if t_key == "function":
+ for f_key, f_value in t_value.items():
+ num_tokens += len(tokenizer.encode(f_key, add_special_tokens=False))
+ num_tokens += len(tokenizer.encode(f_value, add_special_tokens=False))
+ else:
+ num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
+ num_tokens += len(tokenizer.encode(t_value, add_special_tokens=False))
+ else:
+ num_tokens += len(tokenizer.encode(str(value), add_special_tokens=False))
+ num_tokens += tokens_suffix
+
+ if tools:
+ num_tokens += self._num_tokens_for_tools(tokenizer, tools)
+
+ return num_tokens
+
+ def _num_tokens_for_tools(self, tokenizer: Tokenizer, tools: list[PromptMessageTool]) -> int:
+ """
+ Calculate num tokens for tool calling with upstage tokenizer.
+
+ :param tokenizer: huggingface tokenizer
+ :param tools: tools for tool calling
+ :return: number of tokens
+ """
+ num_tokens = 0
+ for tool in tools:
+ num_tokens += len(tokenizer.encode('type'))
+ num_tokens += len(tokenizer.encode('function'))
+
+ # calculate num tokens for function object
+ num_tokens += len(tokenizer.encode('name'))
+ num_tokens += len(tokenizer.encode(tool.name))
+ num_tokens += len(tokenizer.encode('description'))
+ num_tokens += len(tokenizer.encode(tool.description))
+ parameters = tool.parameters
+ num_tokens += len(tokenizer.encode('parameters'))
+ if 'title' in parameters:
+ num_tokens += len(tokenizer.encode('title'))
+ num_tokens += len(tokenizer.encode(parameters.get("title")))
+ num_tokens += len(tokenizer.encode('type'))
+ num_tokens += len(tokenizer.encode(parameters.get("type")))
+ if 'properties' in parameters:
+ num_tokens += len(tokenizer.encode('properties'))
+ for key, value in parameters.get('properties').items():
+ num_tokens += len(tokenizer.encode(key))
+ for field_key, field_value in value.items():
+ num_tokens += len(tokenizer.encode(field_key))
+ if field_key == 'enum':
+ for enum_field in field_value:
+ num_tokens += 3
+ num_tokens += len(tokenizer.encode(enum_field))
+ else:
+ num_tokens += len(tokenizer.encode(field_key))
+ num_tokens += len(tokenizer.encode(str(field_value)))
+ if 'required' in parameters:
+ num_tokens += len(tokenizer.encode('required'))
+ for required_field in parameters['required']:
+ num_tokens += 3
+ num_tokens += len(tokenizer.encode(required_field))
+
+ return num_tokens
diff --git a/api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml b/api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml
new file mode 100644
index 00000000000000..787ac83f8ad92d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml
@@ -0,0 +1,43 @@
+model: solar-1-mini-chat
+label:
+ zh_Hans: solar-1-mini-chat
+ en_US: solar-1-mini-chat
+ ko_KR: solar-1-mini-chat
+model_type: llm
+features:
+ - multi-tool-call
+ - agent-thought
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 32768
+ - name: seed
+ label:
+ zh_Hans: 种子
+ en_US: Seed
+ type: int
+ help:
+ zh_Hans:
+ 如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
+ 响应参数来监视变化。
+ en_US:
+ If specified, model will make a best effort to sample deterministically,
+ such that repeated requests with the same seed and parameters should return
+ the same result. Determinism is not guaranteed, and you should refer to the
+ system_fingerprint response parameter to monitor changes in the backend.
+ required: false
+pricing:
+ input: "0.5"
+ output: "0.5"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py b/api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml
new file mode 100644
index 00000000000000..d838a5bbb1bbfd
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml
@@ -0,0 +1,9 @@
+model: solar-embedding-1-large-passage
+model_type: text-embedding
+model_properties:
+ context_size: 4000
+ max_chunks: 32
+pricing:
+ input: '0.1'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml
new file mode 100644
index 00000000000000..c77645cffdd8f4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml
@@ -0,0 +1,9 @@
+model: solar-embedding-1-large-query
+model_type: text-embedding
+model_properties:
+ context_size: 4000
+ max_chunks: 32
+pricing:
+ input: '0.1'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
new file mode 100644
index 00000000000000..05ae8665d65bdd
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
@@ -0,0 +1,195 @@
+import base64
+import time
+from collections.abc import Mapping
+from typing import Union
+
+import numpy as np
+from openai import OpenAI
+from tokenizers import Tokenizer
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.upstage._common import _CommonUpstage
+
+
+class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
+ """
+ Model class for Upstage text embedding model.
+ """
+ def _get_tokenizer(self) -> Tokenizer:
+ return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
+
+ def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :return: embeddings result
+ """
+
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ extra_model_kwargs = {}
+ if user:
+ extra_model_kwargs["user"] = user
+ extra_model_kwargs["encoding_format"] = "base64"
+
+ context_size = self._get_context_size(model, credentials)
+ max_chunks = self._get_max_chunks(model, credentials)
+
+ embeddings: list[list[float]] = [[] for _ in range(len(texts))]
+ tokens = []
+ indices = []
+ used_tokens = 0
+
+ tokenizer = self._get_tokenizer()
+
+ for i, text in enumerate(texts):
+ token = tokenizer.encode(text, add_special_tokens=False).tokens
+ for j in range(0, len(token), context_size):
+ tokens += [token[j:j+context_size]]
+ indices += [i]
+
+ batched_embeddings = []
+ _iter = range(0, len(tokens), max_chunks)
+
+ for i in _iter:
+ embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+ model=model,
+ client=client,
+ texts=tokens[i:i+max_chunks],
+ extra_model_kwargs=extra_model_kwargs,
+ )
+
+ used_tokens += embedding_used_tokens
+ batched_embeddings += embeddings_batch
+
+ results: list[list[list[float]]] = [[] for _ in range(len(texts))]
+ num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
+
+ for i in range(len(indices)):
+ results[indices[i]].append(batched_embeddings[i])
+ num_tokens_in_batch[indices[i]].append(len(tokens[i]))
+
+ for i in range(len(texts)):
+ _result = results[i]
+ if len(_result) == 0:
+ embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+ model=model,
+ client=client,
+ texts=[texts[i]],
+ extra_model_kwargs=extra_model_kwargs,
+ )
+ used_tokens += embedding_used_tokens
+ average = embeddings_batch[0]
+ else:
+ average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
+ embeddings[i] = (average / np.linalg.norm(average)).tolist()
+
+ usage = self._calc_response_usage(
+ model=model,
+ credentials=credentials,
+ tokens=used_tokens
+ )
+
+ return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ tokenizer = self._get_tokenizer()
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ if len(texts) == 0:
+ return 0
+
+ tokenizer = self._get_tokenizer()
+
+ total_num_tokens = 0
+ for text in texts:
+ # calculate the number of tokens in the encoded text
+ tokenized_text = tokenizer.encode(text)
+ total_num_tokens += len(tokenized_text)
+
+ return total_num_tokens
+
+ def validate_credentials(self, model: str, credentials: Mapping) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ # call embedding model
+ self._embedding_invoke(
+ model=model,
+ client=client,
+ texts=['ping'],
+ extra_model_kwargs={}
+ )
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict) -> tuple[list[list[float]], int]:
+ """
+ Invoke embedding model
+ :param model: model name
+ :param client: model client
+ :param texts: texts to embed
+ :param extra_model_kwargs: extra model kwargs
+ :return: embeddings and used tokens
+ """
+ response = client.embeddings.create(
+ model=model,
+ input=texts,
+ **extra_model_kwargs
+ )
+
+ if 'encoding_format' in extra_model_kwargs and extra_model_kwargs['encoding_format'] == 'base64':
+ return ([list(np.frombuffer(base64.b64decode(embedding.embedding), dtype=np.float32)) for embedding in response.data], response.usage.total_tokens)
+
+ return [data.embedding for data in response.data], response.usage.total_tokens
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: input tokens
+ :return: usage
+ """
+ input_price_info = self.get_price(
+ model=model,
+ credentials=credentials,
+ tokens=tokens,
+ price_type=PriceType.INPUT
+ )
+
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at
+ )
+
+ return usage
diff --git a/api/core/model_runtime/model_providers/upstage/upstage.py b/api/core/model_runtime/model_providers/upstage/upstage.py
new file mode 100644
index 00000000000000..56c91c00618922
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/upstage.py
@@ -0,0 +1,32 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class UpstageProvider(ModelProvider):
+
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials from defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.LLM)
+
+ model_instance.validate_credentials(
+ model="solar-1-mini-chat",
+ credentials=credentials
+ )
+ except CredentialsValidateFailedError as e:
+ logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+ raise e
+ except Exception as e:
+ logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+ raise e
+
diff --git a/api/core/model_runtime/model_providers/upstage/upstage.yaml b/api/core/model_runtime/model_providers/upstage/upstage.yaml
new file mode 100644
index 00000000000000..837667cfa9b41f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/upstage/upstage.yaml
@@ -0,0 +1,49 @@
+provider: upstage
+label:
+ en_US: Upstage
+description:
+ en_US: Models provided by Upstage, such as Solar-1-mini-chat.
+ zh_Hans: Upstage 提供的模型,例如 Solar-1-mini-chat.
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.svg
+background: "#FFFFF"
+help:
+ title:
+ en_US: Get your API Key from Upstage
+ zh_Hans: 从 Upstage 获取 API Key
+ url:
+ en_US: https://console.upstage.ai/api-keys
+supported_model_types:
+ - llm
+ - text-embedding
+configurate_methods:
+ - predefined-model
+model_credential_schema:
+ model:
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ placeholder:
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
+ credential_form_schemas:
+ - variable: upstage_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: upstage_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh
index 9cf5c505d138af..64e4e719ab9be8 100755
--- a/api/docker/entrypoint.sh
+++ b/api/docker/entrypoint.sh
@@ -4,7 +4,7 @@ set -e
if [[ "${MIGRATION_ENABLED}" == "true" ]]; then
echo "Running migrations"
- flask upgrade-db
+ flask db upgrade
fi
if [[ "${MODE}" == "worker" ]]; then
diff --git a/api/pyproject.toml b/api/pyproject.toml
index c2c1d56403acd0..567d2677897fcb 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -73,6 +73,7 @@ quote-style = "single"
[tool.pytest_env]
OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
+UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"
diff --git a/api/tests/integration_tests/model_runtime/upstage/__init__.py b/api/tests/integration_tests/model_runtime/upstage/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/api/tests/integration_tests/model_runtime/upstage/test_llm.py b/api/tests/integration_tests/model_runtime/upstage/test_llm.py
new file mode 100644
index 00000000000000..c35580a8b1ec00
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/upstage/test_llm.py
@@ -0,0 +1,245 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+ AssistantPromptMessage,
+ PromptMessageTool,
+ SystemPromptMessage,
+ UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.upstage.llm.llm import UpstageLargeLanguageModel
+
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+def test_predefined_models():
+ model = UpstageLargeLanguageModel()
+ model_schemas = model.predefined_models()
+
+ assert len(model_schemas) >= 1
+ assert isinstance(model_schemas[0], AIModelEntity)
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
+ model = UpstageLargeLanguageModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ # model name to gpt-3.5-turbo because of mocking
+ model.validate_credentials(
+ model='gpt-3.5-turbo',
+ credentials={
+ 'upstage_api_key': 'invalid_key'
+ }
+ )
+
+ model.validate_credentials(
+ model='solar-1-mini-chat',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ }
+ )
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
+ model = UpstageLargeLanguageModel()
+
+ result = model.invoke(
+ model='solar-1-mini-chat',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ },
+ prompt_messages=[
+ SystemPromptMessage(
+ content='You are a helpful AI assistant.',
+ ),
+ UserPromptMessage(
+ content='Hello World!'
+ )
+ ],
+ model_parameters={
+ 'temperature': 0.0,
+ 'top_p': 1.0,
+ 'presence_penalty': 0.0,
+ 'frequency_penalty': 0.0,
+ 'max_tokens': 10
+ },
+ stop=['How'],
+ stream=False,
+ user="abc-123"
+ )
+
+ assert isinstance(result, LLMResult)
+ assert len(result.message.content) > 0
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
+ model = UpstageLargeLanguageModel()
+
+ result = model.invoke(
+ model='solar-1-mini-chat',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ },
+ prompt_messages=[
+ SystemPromptMessage(
+ content='You are a helpful AI assistant.',
+ ),
+ UserPromptMessage(
+ content="what's the weather today in London?",
+ )
+ ],
+ model_parameters={
+ 'temperature': 0.0,
+ 'max_tokens': 100
+ },
+ tools=[
+ PromptMessageTool(
+ name='get_weather',
+ description='Determine weather in my location',
+ parameters={
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "enum": [
+ "c",
+ "f"
+ ]
+ }
+ },
+ "required": [
+ "location"
+ ]
+ }
+ ),
+ PromptMessageTool(
+ name='get_stock_price',
+ description='Get the current stock price',
+ parameters={
+ "type": "object",
+ "properties": {
+ "symbol": {
+ "type": "string",
+ "description": "The stock symbol"
+ }
+ },
+ "required": [
+ "symbol"
+ ]
+ }
+ )
+ ],
+ stream=False,
+ user="abc-123"
+ )
+
+ assert isinstance(result, LLMResult)
+ assert isinstance(result.message, AssistantPromptMessage)
+ assert len(result.message.tool_calls) > 0
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
+ model = UpstageLargeLanguageModel()
+
+ result = model.invoke(
+ model='solar-1-mini-chat',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ },
+ prompt_messages=[
+ SystemPromptMessage(
+ content='You are a helpful AI assistant.',
+ ),
+ UserPromptMessage(
+ content='Hello World!'
+ )
+ ],
+ model_parameters={
+ 'temperature': 0.0,
+ 'max_tokens': 100
+ },
+ stream=True,
+ user="abc-123"
+ )
+
+ assert isinstance(result, Generator)
+
+ for chunk in result:
+ assert isinstance(chunk, LLMResultChunk)
+ assert isinstance(chunk.delta, LLMResultChunkDelta)
+ assert isinstance(chunk.delta.message, AssistantPromptMessage)
+ assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+ if chunk.delta.finish_reason is not None:
+ assert chunk.delta.usage is not None
+ assert chunk.delta.usage.completion_tokens > 0
+
+
+def test_get_num_tokens():
+ model = UpstageLargeLanguageModel()
+
+ num_tokens = model.get_num_tokens(
+ model='solar-1-mini-chat',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ },
+ prompt_messages=[
+ UserPromptMessage(
+ content='Hello World!'
+ )
+ ]
+ )
+
+ assert num_tokens == 13
+
+ num_tokens = model.get_num_tokens(
+ model='solar-1-mini-chat',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ },
+ prompt_messages=[
+ SystemPromptMessage(
+ content='You are a helpful AI assistant.',
+ ),
+ UserPromptMessage(
+ content='Hello World!'
+ )
+ ],
+ tools=[
+ PromptMessageTool(
+ name='get_weather',
+ description='Determine weather in my location',
+ parameters={
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "enum": [
+ "c",
+ "f"
+ ]
+ }
+ },
+ "required": [
+ "location"
+ ]
+ }
+ ),
+ ]
+ )
+
+ assert num_tokens == 106
diff --git a/api/tests/integration_tests/model_runtime/upstage/test_provider.py b/api/tests/integration_tests/model_runtime/upstage/test_provider.py
new file mode 100644
index 00000000000000..c33eef49b2a79e
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/upstage/test_provider.py
@@ -0,0 +1,23 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.upstage.upstage import UpstageProvider
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
+ provider = UpstageProvider()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ provider.validate_provider_credentials(
+ credentials={}
+ )
+
+ provider.validate_provider_credentials(
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ }
+ )
diff --git a/api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py b/api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py
new file mode 100644
index 00000000000000..54135a0e748d40
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py
@@ -0,0 +1,67 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.upstage.text_embedding.text_embedding import UpstageTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
+ model = UpstageTextEmbeddingModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(
+ model='solar-embedding-1-large-passage',
+ credentials={
+ 'upstage_api_key': 'invalid_key'
+ }
+ )
+
+ model.validate_credentials(
+ model='solar-embedding-1-large-passage',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+ }
+ )
+
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_invoke_model(setup_openai_mock):
+ model = UpstageTextEmbeddingModel()
+
+ result = model.invoke(
+ model='solar-embedding-1-large-passage',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
+ },
+ texts=[
+ "hello",
+ "world",
+ " ".join(["long_text"] * 100),
+ " ".join(["another_long_text"] * 100)
+ ],
+ user="abc-123"
+ )
+
+ assert isinstance(result, TextEmbeddingResult)
+ assert len(result.embeddings) == 4
+ assert result.usage.total_tokens == 2
+
+
+def test_get_num_tokens():
+ model = UpstageTextEmbeddingModel()
+
+ num_tokens = model.get_num_tokens(
+ model='solar-embedding-1-large-passage',
+ credentials={
+ 'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
+ },
+ texts=[
+ "hello",
+ "world"
+ ]
+ )
+
+ assert num_tokens == 5
diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh
index 2e113346c728b4..aba13292ab8315 100755
--- a/dev/pytest/pytest_model_runtime.sh
+++ b/dev/pytest/pytest_model_runtime.sh
@@ -5,4 +5,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
api/tests/integration_tests/model_runtime/azure_openai \
api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
- api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py
+ api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
+ api/tests/integration_tests/model_runtime/upstage
+