From 23ed15d19f138fd0bf555f17303fdafb1d74c3f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E7=BE=BD?= <xiaoyu@sulg.top>
Date: Tue, 6 Aug 2024 02:16:41 +0000
Subject: [PATCH] feat:nvidia add nemotron4-340b and microsoft/phi-3 (#6973)

---
 .../model_providers/nvidia/llm/_position.yaml |  3 ++
 .../model_providers/nvidia/llm/llm.py         |  6 ++--
 .../nvidia/llm/nemotron-4-340b-instruct.yaml  | 36 +++++++++++++++++++
 .../llm/phi-3-medium-128k-instruct.yaml       | 36 +++++++++++++++++++
 .../nvidia/llm/phi-3-mini-128k-instruct.yaml  | 36 +++++++++++++++++++
 5 files changed, 115 insertions(+), 2 deletions(-)
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/nemotron-4-340b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/nvidia/llm/phi-3-mini-128k-instruct.yaml

diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
index 6cc197b70b7031..ad01d430d61c79 100644
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -10,5 +10,8 @@
 - mistralai/mistral-large
 - mistralai/mixtral-8x7b-instruct-v0.1
 - mistralai/mixtral-8x22b-instruct-v0.1
+- nvidia/nemotron-4-340b-instruct
+- microsoft/phi-3-medium-128k-instruct
+- microsoft/phi-3-mini-128k-instruct
 - fuyu-8b
 - snowflake/arctic
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
index 494b7374f5ae28..bc42eaca658bac 100644
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -34,8 +34,10 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
         'meta/llama-3.1-8b-instruct': '',
         'meta/llama-3.1-70b-instruct': '',
         'meta/llama-3.1-405b-instruct': '',
-        'google/recurrentgemma-2b': ''
-        
+        'google/recurrentgemma-2b': '',
+        'nvidia/nemotron-4-340b-instruct': '',
+        'microsoft/phi-3-medium-128k-instruct':'',
+        'microsoft/phi-3-mini-128k-instruct':''
     }
 
     def _invoke(self, model: str, credentials: dict,
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/nemotron-4-340b-instruct.yaml b/api/core/model_runtime/model_providers/nvidia/llm/nemotron-4-340b-instruct.yaml
new file mode 100644
index 00000000000000..e5537cd2fd9dc8
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/nemotron-4-340b-instruct.yaml
@@ -0,0 +1,36 @@
+model: nvidia/nemotron-4-340b-instruct
+label:
+  zh_Hans: nvidia/nemotron-4-340b-instruct
+  en_US: nvidia/nemotron-4-340b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/phi-3-medium-128k-instruct.yaml b/api/core/model_runtime/model_providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
new file mode 100644
index 00000000000000..0c5538d1350613
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
@@ -0,0 +1,36 @@
+model: microsoft/phi-3-medium-128k-instruct
+label:
+  zh_Hans: microsoft/phi-3-medium-128k-instruct
+  en_US: microsoft/phi-3-medium-128k-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/phi-3-mini-128k-instruct.yaml b/api/core/model_runtime/model_providers/nvidia/llm/phi-3-mini-128k-instruct.yaml
new file mode 100644
index 00000000000000..1eb1c51d01157c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/phi-3-mini-128k-instruct.yaml
@@ -0,0 +1,36 @@
+model: microsoft/phi-3-mini-128k-instruct
+label:
+  zh_Hans: microsoft/phi-3-mini-128k-instruct
+  en_US: microsoft/phi-3-mini-128k-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0