From 56b43f62d1a7d677fa5d1200efbc43f0535a3f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E7=BE=BD?= <821760648@qq.com> Date: Wed, 31 Jul 2024 21:24:02 +0800 Subject: [PATCH] feat: nvidia add llama3.1 model (#6844) --- .../model_providers/nvidia/llm/_position.yaml | 3 ++ .../nvidia/llm/llama-3.1-405b.yaml | 36 +++++++++++++++++++ .../nvidia/llm/llama-3.1-70b.yaml | 36 +++++++++++++++++++ .../nvidia/llm/llama-3.1-8b.yaml | 36 +++++++++++++++++++ .../model_providers/nvidia/llm/llm.py | 3 ++ 5 files changed, 114 insertions(+) create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml index 2401f2a890378e..6cc197b70b7031 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml @@ -2,6 +2,9 @@ - google/codegemma-7b - google/recurrentgemma-2b - meta/llama2-70b +- meta/llama-3.1-8b-instruct +- meta/llama-3.1-70b-instruct +- meta/llama-3.1-405b-instruct - meta/llama3-8b-instruct - meta/llama3-70b-instruct - mistralai/mistral-large diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml new file mode 100644 index 00000000000000..5472de99027643 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml @@ -0,0 +1,36 @@ +model: meta/llama-3.1-405b-instruct +label: + zh_Hans: meta/llama-3.1-405b-instruct + en_US: meta/llama-3.1-405b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 + - name: frequency_penalt + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml new file mode 100644 index 00000000000000..16af0554a1ef41 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml @@ -0,0 +1,36 @@ +model: meta/llama-3.1-70b-instruct +label: + zh_Hans: meta/llama-3.1-70b-instruct + en_US: meta/llama-3.1-70b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml new file mode 100644 index 00000000000000..f2d43dc30edf89 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml @@ -0,0 +1,36 @@ +model: meta/llama-3.1-8b-instruct +label: + zh_Hans: meta/llama-3.1-8b-instruct + en_US: meta/llama-3.1-8b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py index 11252b92115df7..494b7374f5ae28 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py +++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py @@ -31,6 +31,9 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel): 'meta/llama2-70b': '', 'meta/llama3-8b-instruct': '', 'meta/llama3-70b-instruct': '', + 'meta/llama-3.1-8b-instruct': '', + 'meta/llama-3.1-70b-instruct': '', + 'meta/llama-3.1-405b-instruct': '', 'google/recurrentgemma-2b': '' }