diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml index b52df3e4e3fdee..8703a97edd1133 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml @@ -1,4 +1,5 @@ - Tencent/Hunyuan-A52B-Instruct +- Qwen/QwQ-32B-Preview - Qwen/Qwen2.5-72B-Instruct - Qwen/Qwen2.5-32B-Instruct - Qwen/Qwen2.5-14B-Instruct @@ -19,6 +20,7 @@ - 01-ai/Yi-1.5-6B-Chat - internlm/internlm2_5-20b-chat - internlm/internlm2_5-7b-chat +- meta-llama/Llama-3.3-70B-Instruct - meta-llama/Meta-Llama-3.1-405B-Instruct - meta-llama/Meta-Llama-3.1-70B-Instruct - meta-llama/Meta-Llama-3.1-8B-Instruct diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-llama-3.3-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-llama-3.3-70b-instruct.yaml new file mode 100644 index 00000000000000..9373a8f4ca9f4a --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-llama-3.3-70b-instruct.yaml @@ -0,0 +1,53 @@ +model: meta-llama/Llama-3.3-70B-Instruct +label: + en_US: meta-llama/Llama-3.3-70B-Instruct +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '4.13' + output: '4.13' + unit: '0.000001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen-qwq-32B-preview.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen-qwq-32B-preview.yaml new file mode 100644 index 00000000000000..c949de4d75604c --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen-qwq-32B-preview.yaml @@ -0,0 +1,53 @@ +model: Qwen/QwQ-32B-Preview +label: + en_US: Qwen/QwQ-32B-Preview +model_type: llm +features: + - agent-thought + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: max_tokens + use_template: max_tokens + type: int + default: 512 + min: 1 + max: 4096 + help: + zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 + en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: frequency_penalty + use_template: frequency_penalty + - name: response_format + label: + zh_Hans: 回复格式 + en_US: Response Format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '1.26' + output: '1.26' + unit: '0.000001' + currency: RMB