From 56b43f62d1a7d677fa5d1200efbc43f0535a3f1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E7=BE=BD?= <821760648@qq.com>
Date: Wed, 31 Jul 2024 21:24:02 +0800
Subject: [PATCH] feat: nvidia add llama3.1 model (#6844)

---
 .../model_providers/nvidia/llm/_position.yaml |  3 ++
 .../nvidia/llm/llama-3.1-405b.yaml            | 36 +++++++++++++++++++
 .../nvidia/llm/llama-3.1-70b.yaml             | 36 +++++++++++++++++++
 .../nvidia/llm/llama-3.1-8b.yaml              | 36 +++++++++++++++++++
 .../model_providers/nvidia/llm/llm.py         |  3 ++
 5 files changed, 114 insertions(+)
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml

diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
index 2401f2a890378e..6cc197b70b7031 100644
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -2,6 +2,9 @@
 - google/codegemma-7b
 - google/recurrentgemma-2b
 - meta/llama2-70b
+- meta/llama-3.1-8b-instruct
+- meta/llama-3.1-70b-instruct
+- meta/llama-3.1-405b-instruct
 - meta/llama3-8b-instruct
 - meta/llama3-70b-instruct
 - mistralai/mistral-large
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
new file mode 100644
index 00000000000000..5472de99027643
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama-3.1-405b-instruct
+label:
+  zh_Hans: meta/llama-3.1-405b-instruct
+  en_US: meta/llama-3.1-405b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalt
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
new file mode 100644
index 00000000000000..16af0554a1ef41
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama-3.1-70b-instruct
+label:
+  zh_Hans: meta/llama-3.1-70b-instruct
+  en_US: meta/llama-3.1-70b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml
new file mode 100644
index 00000000000000..f2d43dc30edf89
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml
@@ -0,0 +1,36 @@
+model: meta/llama-3.1-8b-instruct
+label:
+  zh_Hans: meta/llama-3.1-8b-instruct
+  en_US: meta/llama-3.1-8b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
index 11252b92115df7..494b7374f5ae28 100644
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -31,6 +31,9 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
         'meta/llama2-70b': '',
         'meta/llama3-8b-instruct': '',
         'meta/llama3-70b-instruct': '',
+        'meta/llama-3.1-8b-instruct': '',
+        'meta/llama-3.1-70b-instruct': '',
+        'meta/llama-3.1-405b-instruct': '',
         'google/recurrentgemma-2b': ''
         
     }