diff --git a/api/.vscode/launch.json b/api/.vscode/launch.json
index e3c1f797c61601..cb718f9f60abcf 100644
--- a/api/.vscode/launch.json
+++ b/api/.vscode/launch.json
@@ -4,6 +4,21 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
+        {
+            "name": "Python: Celery",
+            "type": "python",
+            "request": "launch",
+            "module": "celery",
+            "justMyCode": true,
+            "args": ["-A", "app.celery", "worker", "-P", "gevent", "-c", "1", "--loglevel", "info", "-Q", "dataset,generation,mail"],
+            "envFile": "${workspaceFolder}/.env",
+            "env": {
+                "FLASK_APP": "app.py",
+                "FLASK_DEBUG": "1",
+                "GEVENT_SUPPORT": "True"
+            },
+            "console": "integratedTerminal"
+        },
         {
             "name": "Python: Flask",
             "type": "python",
diff --git a/api/core/model_runtime/model_providers/chatglm/llm/llm.py b/api/core/model_runtime/model_providers/chatglm/llm/llm.py
index 5c03829b0b4fa6..6884ede2bc692d 100644
--- a/api/core/model_runtime/model_providers/chatglm/llm/llm.py
+++ b/api/core/model_runtime/model_providers/chatglm/llm/llm.py
@@ -81,21 +81,11 @@ def validate_credentials(self, model: str, credentials: dict) -> None:
         :return:
         """
         try:
-            response = post(join(credentials['api_base'], "v1/chat/completions"), data=dumps({
-                "model": model,
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": "ping"
-                    }
-                ],
-            }),
-            headers={
-                "Content-Type": "application/json",
-                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
+            self._invoke(model=model, credentials=credentials, prompt_messages=[
+                UserPromptMessage(content="ping"),
+            ], model_parameters={
+                "max_tokens": 16,
             })
-            if response.status_code != 200:
-                raise CredentialsValidateFailedError("Invalid credentials")
         except Exception as e:
             raise CredentialsValidateFailedError(str(e))
 
diff --git a/api/tests/integration_tests/.env.example b/api/tests/integration_tests/.env.example
index 5f19663eb42fa4..89080b07881b38 100644
--- a/api/tests/integration_tests/.env.example
+++ b/api/tests/integration_tests/.env.example
@@ -60,4 +60,7 @@ LOCALAI_SERVER_URL=
 COHERE_API_KEY=
 
 # Jina Credentials
-JINA_API_KEY=
\ No newline at end of file
+JINA_API_KEY=
+
+# Mock Switch
+MOCK_SWITCH=false
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/anthropic.py b/api/tests/integration_tests/model_runtime/__mock/anthropic.py
new file mode 100644
index 00000000000000..34127515a01776
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/anthropic.py
@@ -0,0 +1,68 @@
+import anthropic
+from anthropic import Anthropic
+from anthropic.resources.completions import Completions
+from anthropic.types import completion_create_params, Completion
+from anthropic._types import NOT_GIVEN, NotGiven, Headers, Query, Body
+
+from _pytest.monkeypatch import MonkeyPatch
+
+from typing import List, Union, Literal, Any, Generator
+from time import sleep
+
+import pytest
+import os
+
+MOCK = os.getenv('MOCK_SWITCH', 'false') == 'true'
+
+class MockAnthropicClass(object):
+    @staticmethod
+    def mocked_anthropic_chat_create_sync(model: str) -> Completion:
+        return Completion(
+            completion='hello, I\'m a chatbot from anthropic',
+            model=model,
+            stop_reason='stop_sequence'
+        )
+
+    @staticmethod
+    def mocked_anthropic_chat_create_stream(model: str) -> Generator[Completion, None, None]:
+        full_response_text = "hello, I'm a chatbot from anthropic"
+
+        for i in range(0, len(full_response_text) + 1):
+            sleep(0.1)
+            if i == len(full_response_text):
+                yield Completion(
+                    completion='',
+                    model=model,
+                    stop_reason='stop_sequence'
+                )
+            else:
+                yield Completion(
+                    completion=full_response_text[i],
+                    model=model,
+                    stop_reason=''
+                )
+
+    def mocked_anthropic(self: Completions, *,
+        max_tokens_to_sample: int,
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
+        prompt: str,
+        stream: Literal[True],
+        **kwargs: Any
+    ) -> Union[Completion, Generator[Completion, None, None]]:
+        if len(self._client.api_key) < 18:
+            raise anthropic.AuthenticationError('Invalid API key')
+
+        if stream:
+            return MockAnthropicClass.mocked_anthropic_chat_create_stream(model=model)
+        else:
+            return MockAnthropicClass.mocked_anthropic_chat_create_sync(model=model)
+
+@pytest.fixture
+def setup_anthropic_mock(request, monkeypatch: MonkeyPatch):
+    if MOCK:
+        monkeypatch.setattr(Completions, 'create', MockAnthropicClass.mocked_anthropic)
+
+    yield
+
+    if MOCK:
+        monkeypatch.undo()
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/google.py b/api/tests/integration_tests/model_runtime/__mock/google.py
new file mode 100644
index 00000000000000..6a16586c83e5d9
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/google.py
@@ -0,0 +1,127 @@
+from google.generativeai import GenerativeModel
+from google.generativeai.types import GenerateContentResponse
+from google.generativeai.types.generation_types import BaseGenerateContentResponse
+import google.generativeai.types.generation_types as generation_config_types
+import google.generativeai.types.content_types as content_types
+import google.generativeai.types.safety_types as safety_types
+from google.generativeai.client import _ClientManager, configure
+
+from google.ai import generativelanguage as glm
+
+from typing import Generator, List
+from _pytest.monkeypatch import MonkeyPatch
+
+import pytest
+
+current_api_key = ''
+
+class MockGoogleResponseClass(object):
+    _done = False
+
+    def __iter__(self):
+        full_response_text = 'it\'s google!'
+
+        for i in range(0, len(full_response_text) + 1, 1):
+            if i == len(full_response_text):
+                self._done = True
+                yield GenerateContentResponse(
+                    done=True,
+                    iterator=None,
+                    result=glm.GenerateContentResponse({
+
+                    }),
+                    chunks=[]
+                )                
+            else:
+                yield GenerateContentResponse(
+                    done=False,
+                    iterator=None,
+                    result=glm.GenerateContentResponse({
+
+                    }),
+                    chunks=[]
+                )
+
+class MockGoogleResponseCandidateClass(object):
+    finish_reason = 'stop'
+
+class MockGoogleClass(object):
+    @staticmethod
+    def generate_content_sync() -> GenerateContentResponse:
+        return GenerateContentResponse(
+            done=True,
+            iterator=None,
+            result=glm.GenerateContentResponse({
+
+            }),
+            chunks=[]
+        )
+
+    @staticmethod
+    def generate_content_stream() -> Generator[GenerateContentResponse, None, None]:
+        return MockGoogleResponseClass()
+
+    def generate_content(self: GenerativeModel,
+        contents: content_types.ContentsType,
+        *,
+        generation_config: generation_config_types.GenerationConfigType | None = None,
+        safety_settings: safety_types.SafetySettingOptions | None = None,
+        stream: bool = False,
+        **kwargs,
+    ) -> GenerateContentResponse:
+        global current_api_key
+
+        if len(current_api_key) < 16:
+            raise Exception('Invalid API key')
+
+        if stream:
+            return MockGoogleClass.generate_content_stream()
+        
+        return MockGoogleClass.generate_content_sync()
+    
+    @property
+    def generative_response_text(self) -> str:
+        return 'it\'s google!'
+    
+    @property
+    def generative_response_candidates(self) -> List[MockGoogleResponseCandidateClass]:
+        return [MockGoogleResponseCandidateClass()]
+    
+    def make_client(self: _ClientManager, name: str):
+        global current_api_key
+
+        if name.endswith("_async"):
+            name = name.split("_")[0]
+            cls = getattr(glm, name.title() + "ServiceAsyncClient")
+        else:
+            cls = getattr(glm, name.title() + "ServiceClient")
+
+        # Attempt to configure using defaults.
+        if not self.client_config:
+            configure()
+
+        client_options = self.client_config.get("client_options", None)
+        if client_options:
+            current_api_key = client_options.api_key
+
+        def nop(self, *args, **kwargs):
+            pass
+
+        original_init = cls.__init__
+        cls.__init__ = nop
+        client: glm.GenerativeServiceClient = cls(**self.client_config)
+        cls.__init__ = original_init
+
+        if not self.default_metadata:
+            return client
+    
+@pytest.fixture
+def setup_google_mock(request, monkeypatch: MonkeyPatch):
+    monkeypatch.setattr(BaseGenerateContentResponse, "text", MockGoogleClass.generative_response_text)
+    monkeypatch.setattr(BaseGenerateContentResponse, "candidates", MockGoogleClass.generative_response_candidates)
+    monkeypatch.setattr(GenerativeModel, "generate_content", MockGoogleClass.generate_content)
+    monkeypatch.setattr(_ClientManager, "make_client", MockGoogleClass.make_client)
+
+    yield
+
+    monkeypatch.undo()
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai.py b/api/tests/integration_tests/model_runtime/__mock/openai.py
new file mode 100644
index 00000000000000..d4b9de5c510147
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai.py
@@ -0,0 +1,63 @@
+from tests.integration_tests.model_runtime.__mock.openai_completion import MockCompletionsClass
+from tests.integration_tests.model_runtime.__mock.openai_chat import MockChatClass
+from tests.integration_tests.model_runtime.__mock.openai_remote import MockModelClass
+from tests.integration_tests.model_runtime.__mock.openai_moderation import MockModerationClass
+from tests.integration_tests.model_runtime.__mock.openai_speech2text import MockSpeech2TextClass
+from tests.integration_tests.model_runtime.__mock.openai_embeddings import MockEmbeddingsClass
+from openai.resources.completions import Completions
+from openai.resources.chat import Completions as ChatCompletions
+from openai.resources.models import Models
+from openai.resources.moderations import Moderations
+from openai.resources.audio.transcriptions import Transcriptions
+from openai.resources.embeddings import Embeddings
+
+# import monkeypatch
+from _pytest.monkeypatch import MonkeyPatch
+from typing import Literal, Callable, List
+
+import os
+import pytest
+
+def mock_openai(monkeypatch: MonkeyPatch, methods: List[Literal["completion", "chat", "remote", "moderation", "speech2text", "text_embedding"]]) -> Callable[[], None]:
+    """
+        mock openai module
+
+        :param monkeypatch: pytest monkeypatch fixture
+        :return: unpatch function
+    """
+    def unpatch() -> None:
+        monkeypatch.undo()
+
+    if "completion" in methods:
+        monkeypatch.setattr(Completions, "create", MockCompletionsClass.completion_create)
+
+    if "chat" in methods:
+        monkeypatch.setattr(ChatCompletions, "create", MockChatClass.chat_create)
+
+    if "remote" in methods:
+        monkeypatch.setattr(Models, "list", MockModelClass.list)
+
+    if "moderation" in methods:
+        monkeypatch.setattr(Moderations, "create", MockModerationClass.moderation_create)
+
+    if "speech2text" in methods:
+        monkeypatch.setattr(Transcriptions, "create", MockSpeech2TextClass.speech2text_create)
+
+    if "text_embedding" in methods:
+        monkeypatch.setattr(Embeddings, "create", MockEmbeddingsClass.create_embeddings)
+
+    return unpatch
+
+
+MOCK = os.getenv('MOCK_SWITCH', 'false').lower() == 'true'
+
+@pytest.fixture
+def setup_openai_mock(request, monkeypatch):
+    methods = request.param if hasattr(request, 'param') else []
+    if MOCK:
+        unpatch = mock_openai(monkeypatch, methods=methods)
+    
+    yield
+
+    if MOCK:
+        unpatch()
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai_chat.py b/api/tests/integration_tests/model_runtime/__mock/openai_chat.py
new file mode 100644
index 00000000000000..03e4c14ed5da90
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_chat.py
@@ -0,0 +1,235 @@
+from openai import OpenAI
+from openai.types import Completion as CompletionMessage
+from openai._types import NotGiven, NOT_GIVEN
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam, \
+    ChatCompletionToolChoiceOptionParam, ChatCompletionToolParam, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall, ChoiceDeltaFunctionCall,\
+    Choice, ChoiceDelta, ChoiceDeltaToolCallFunction
+from openai.types.chat.chat_completion import Choice as _ChatCompletionChoice, ChatCompletion as _ChatCompletion
+from openai.types.chat.chat_completion_message import FunctionCall, ChatCompletionMessage
+from openai.types.chat.chat_completion_message_tool_call import Function
+from openai.types.completion_usage import CompletionUsage
+from openai.resources.chat.completions import Completions
+from openai import AzureOpenAI
+
+import openai.types.chat.completion_create_params as completion_create_params
+
+# import monkeypatch
+from typing import List, Any, Generator, Union, Optional, Literal
+from time import time, sleep
+from json import dumps, loads
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockChatClass(object):
+    @staticmethod
+    def generate_function_call(
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+    ) -> Optional[FunctionCall]:
+        if not functions or len(functions) == 0:
+            return None
+        function: completion_create_params.Function = functions[0]
+        function_name = function['name']
+        function_description = function['description']
+        function_parameters = function['parameters']
+        function_parameters_type = function_parameters['type']
+        if function_parameters_type != 'object':
+            return None
+        function_parameters_properties = function_parameters['properties']
+        function_parameters_required = function_parameters['required']
+        parameters = {}
+        for parameter_name, parameter in function_parameters_properties.items():
+            if parameter_name not in function_parameters_required:
+                continue
+            parameter_type = parameter['type']
+            if parameter_type == 'string':
+                if 'enum' in parameter:
+                    if len(parameter['enum']) == 0:
+                        continue
+                    parameters[parameter_name] = parameter['enum'][0]
+                else:
+                    parameters[parameter_name] = 'kawaii'
+            elif parameter_type == 'integer':
+                parameters[parameter_name] = 114514
+            elif parameter_type == 'number':
+                parameters[parameter_name] = 1919810.0
+            elif parameter_type == 'boolean':
+                parameters[parameter_name] = True
+
+        return FunctionCall(name=function_name, arguments=dumps(parameters))
+        
+    @staticmethod
+    def generate_tool_calls(
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+    ) -> Optional[List[ChatCompletionMessageToolCall]]:
+        list_tool_calls = []
+        if not tools or len(tools) == 0:
+            return None
+        tool: ChatCompletionToolParam = tools[0]
+
+        if tools['type'] != 'function':
+            return None
+        
+        function = tool['function']
+
+        function_call = MockChatClass.generate_function_call(functions=[function])
+        if function_call is None:
+            return None
+        
+        list_tool_calls.append(ChatCompletionMessageToolCall(
+            id='sakurajima-mai',
+            function=Function(
+                name=function_call.name,
+                arguments=function_call.arguments,
+            ),
+            type='function'
+        ))
+
+        return list_tool_calls
+    
+    @staticmethod
+    def mocked_openai_chat_create_sync(
+        model: str,
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+    ) -> CompletionMessage:
+        tool_calls = []
+        function_call = MockChatClass.generate_function_call(functions=functions)
+        if not function_call:
+            tool_calls = MockChatClass.generate_tool_calls(tools=tools)
+
+        sleep(1)
+        return _ChatCompletion(
+            id='cmpl-3QJQa5jXJ5Z5X',
+            choices=[
+                _ChatCompletionChoice(
+                    finish_reason='content_filter',
+                    index=0,
+                    message=ChatCompletionMessage(
+                        content='elaina',
+                        role='assistant',
+                        function_call=function_call,
+                        tool_calls=tool_calls
+                    )
+                )
+            ],
+            created=int(time()),
+            model=model,
+            object='chat.completion',
+            system_fingerprint='',
+            usage=CompletionUsage(
+                prompt_tokens=2,
+                completion_tokens=1,
+                total_tokens=3,
+            )
+        )
+    
+    @staticmethod
+    def mocked_openai_chat_create_stream(
+        model: str,
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+    ) -> Generator[ChatCompletionChunk, None, None]:
+        tool_calls = []
+        function_call = MockChatClass.generate_function_call(functions=functions)
+        if not function_call:
+            tool_calls = MockChatClass.generate_tool_calls(tools=tools)
+
+        full_text = "Hello, world!\n\n```python\nprint('Hello, world!')\n```"
+        for i in range(0, len(full_text) + 1):
+            sleep(0.1)
+            if i == len(full_text):
+                yield ChatCompletionChunk(
+                    id='cmpl-3QJQa5jXJ5Z5X',
+                    choices=[
+                        Choice(
+                            delta=ChoiceDelta(
+                                content='',
+                                function_call=ChoiceDeltaFunctionCall(
+                                    name=function_call.name,
+                                    arguments=function_call.arguments,
+                                ) if function_call else None,
+                                role='assistant',
+                                tool_calls=[
+                                    ChoiceDeltaToolCall(
+                                        index=0,
+                                        id='misaka-mikoto',
+                                        function=ChoiceDeltaToolCallFunction(
+                                            name=tool_calls[0].function.name,
+                                            arguments=tool_calls[0].function.arguments,
+                                        ),
+                                        type='function'
+                                    )
+                                ] if tool_calls and len(tool_calls) > 0 else None
+                            ),
+                            finish_reason='function_call',
+                            index=0,
+                        )
+                    ],
+                    created=int(time()),
+                    model=model,
+                    object='chat.completion.chunk',
+                    system_fingerprint='',
+                    usage=CompletionUsage(
+                        prompt_tokens=2,
+                        completion_tokens=17,
+                        total_tokens=19,
+                    ),
+                )
+            else:
+                yield ChatCompletionChunk(
+                    id='cmpl-3QJQa5jXJ5Z5X',
+                    choices=[
+                        Choice(
+                            delta=ChoiceDelta(
+                                content=full_text[i],
+                                role='assistant',
+                            ),
+                            finish_reason='content_filter',
+                            index=0,
+                        )
+                    ],
+                    created=int(time()),
+                    model=model,
+                    object='chat.completion.chunk',
+                    system_fingerprint='',
+                )
+
+    def chat_create(self: Completions, *,
+        messages: List[ChatCompletionMessageParam],
+        model: Union[str,Literal[
+            "gpt-4-1106-preview", "gpt-4-vision-preview", "gpt-4", "gpt-4-0314", "gpt-4-0613",
+            "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
+            "gpt-3.5-turbo-1106", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"],
+        ],
+        functions: List[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        **kwargs: Any,
+    ):
+        openai_models = [
+            "gpt-4-1106-preview", "gpt-4-vision-preview", "gpt-4", "gpt-4-0314", "gpt-4-0613",
+            "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
+            "gpt-3.5-turbo-1106", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613",
+        ]
+        azure_openai_models = [
+            "gpt35", "gpt-4v", "gpt-35-turbo"
+        ]
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        if model in openai_models + azure_openai_models:
+            if not re.match(r'sk-[a-zA-Z0-9]{24,}$', self._client.api_key) and type(self._client) == OpenAI:
+                # sometime, provider use OpenAI compatible API will not have api key or have different api key format
+                # so we only check if model is in openai_models
+                raise InvokeAuthorizationError('Invalid api key')
+            if len(self._client.api_key) < 18 and type(self._client) == AzureOpenAI:
+                raise InvokeAuthorizationError('Invalid api key')
+        if stream:
+            return MockChatClass.mocked_openai_chat_create_stream(model=model, functions=functions, tools=tools)
+        
+        return MockChatClass.mocked_openai_chat_create_sync(model=model, functions=functions, tools=tools)
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai_completion.py b/api/tests/integration_tests/model_runtime/__mock/openai_completion.py
new file mode 100644
index 00000000000000..526e7b1b393687
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_completion.py
@@ -0,0 +1,121 @@
+from openai import BadRequestError, OpenAI, AzureOpenAI
+from openai.types import Completion as CompletionMessage
+from openai._types import NotGiven, NOT_GIVEN
+from openai.types.completion import CompletionChoice
+from openai.types.completion_usage import CompletionUsage
+from openai.resources.completions import Completions
+
+# import monkeypatch
+from typing import List, Any, Generator, Union, Optional, Literal
+from time import time, sleep
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockCompletionsClass(object):
+    @staticmethod
+    def mocked_openai_completion_create_sync(
+        model: str
+    ) -> CompletionMessage:
+        sleep(1)
+        return CompletionMessage(
+            id="cmpl-3QJQa5jXJ5Z5X",
+            object="text_completion",
+            created=int(time()),
+            model=model,
+            system_fingerprint="",
+            choices=[
+                CompletionChoice(
+                    text="mock",
+                    index=0,
+                    logprobs=None,
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=2,
+                completion_tokens=1,
+                total_tokens=3,
+            )
+        )
+    
+    @staticmethod
+    def mocked_openai_completion_create_stream(
+        model: str
+    ) -> Generator[CompletionMessage, None, None]:
+        full_text = "Hello, world!\n\n```python\nprint('Hello, world!')\n```"
+        for i in range(0, len(full_text) + 1):
+            sleep(0.1)
+            if i == len(full_text):
+                yield CompletionMessage(
+                    id="cmpl-3QJQa5jXJ5Z5X",
+                    object="text_completion",
+                    created=int(time()),
+                    model=model,
+                    system_fingerprint="",
+                    choices=[
+                        CompletionChoice(
+                            text="",
+                            index=0,
+                            logprobs=None,
+                            finish_reason="stop",
+                        )
+                    ],
+                    usage=CompletionUsage(
+                        prompt_tokens=2,
+                        completion_tokens=17,
+                        total_tokens=19,
+                    ),
+                )
+            else:
+                yield CompletionMessage(
+                    id="cmpl-3QJQa5jXJ5Z5X",
+                    object="text_completion",
+                    created=int(time()),
+                    model=model,
+                    system_fingerprint="",
+                    choices=[
+                        CompletionChoice(
+                            text=full_text[i],
+                            index=0,
+                            logprobs=None,
+                            finish_reason="content_filter"
+                        )
+                    ],
+                )
+
+    def completion_create(self: Completions, *, model: Union[
+            str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo-instruct",
+                "text-davinci-003", "text-davinci-002", "text-davinci-001",
+                "code-davinci-002", "text-curie-001", "text-babbage-001",
+                "text-ada-001"],
+        ],
+        prompt: Union[str, List[str], List[int], List[List[int]], None],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ):
+        openai_models = [
+            "babbage-002", "davinci-002", "gpt-3.5-turbo-instruct", "text-davinci-003", "text-davinci-002", "text-davinci-001",
+            "code-davinci-002", "text-curie-001", "text-babbage-001", "text-ada-001",
+        ]
+        azure_openai_models = [
+            "gpt-35-turbo-instruct"
+        ]
+
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        if model in openai_models + azure_openai_models:
+            if not re.match(r'sk-[a-zA-Z0-9]{24,}$', self._client.api_key) and type(self._client) == OpenAI:
+                # sometime, provider use OpenAI compatible API will not have api key or have different api key format
+                # so we only check if model is in openai_models
+                raise InvokeAuthorizationError('Invalid api key')
+            if len(self._client.api_key) < 18 and type(self._client) == AzureOpenAI:
+                raise InvokeAuthorizationError('Invalid api key')
+            
+        if not prompt:
+            raise BadRequestError('Invalid prompt')
+        if stream:
+            return MockCompletionsClass.mocked_openai_completion_create_stream(model=model)
+        
+        return MockCompletionsClass.mocked_openai_completion_create_sync(model=model)
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai_embeddings.py b/api/tests/integration_tests/model_runtime/__mock/openai_embeddings.py
new file mode 100644
index 00000000000000..291357173932a6
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_embeddings.py
@@ -0,0 +1,70 @@
+from openai.resources.embeddings import Embeddings
+from openai._types import NotGiven, NOT_GIVEN
+from openai.types.create_embedding_response import CreateEmbeddingResponse, Usage
+from openai.types.embedding import Embedding
+from openai import OpenAI
+
+from typing import Union, List, Literal, Any
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockEmbeddingsClass(object):
+    def create_embeddings(
+        self: Embeddings, *,
+        input: Union[str, List[str], List[int], List[List[int]]],
+        model: Union[str, Literal["text-embedding-ada-002"]],
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ) -> CreateEmbeddingResponse:
+        if isinstance(input, str):
+            input = [input]
+
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        
+        if len(self._client.api_key) < 18:
+            raise InvokeAuthorizationError('Invalid API key')
+        
+        if encoding_format == 'float':
+            return CreateEmbeddingResponse(
+                data=[
+                    Embedding(
+                        embedding=[0.23333 for _ in range(233)],
+                        index=i,
+                        object='embedding'
+                    ) for i in range(len(input))
+                ],
+                model=model,
+                object='list',
+                # marked: usage of embeddings should equal the number of testcase
+                usage=Usage(
+                    prompt_tokens=2,
+                    total_tokens=2
+                )
+            )
+        
+        embeddings = 'VEfNvMLUnrwFleO8hcj9vEE/yrzyjOA84E1MvNfoCrxjrI+8sZUKvNgrBT17uY07gJ/IvNvhHLrUemc8KXXGumalIT3YKwU7ZsnbPMhATrwTt6u8JEwRPNMmCjxGREW7TRKvu6/MG7zAyDU8wXLkuuMDZDsXsL28zHzaOw0IArzOiMO8LtASvPKM4Dul5l+80V0bPGVDZ7wYNrI89ucsvJZdYztzRm+8P8ysOyGbc7zrdgK9sdiEPKQ8sbulKdq7KIgdvKIMDj25dNc8k0AXPBn/oLzrdgK8IXe5uz0Dvrt50V68tTjLO4ZOcjoG9x29oGfZufiwmzwMDXy8EL6ZPHvdx7nKjzE8+LCbPG22hTs3EZq7TM+0POrRzTxVZo084wPkO8Nak7z8cpw8pDwxvA2T8LvBC7C72fltvC8Atjp3fYE8JHDLvEYgC7xAdls8YiabPPkEeTzPUbK8gOLCPEBSIbyt5Oy8CpreusNakzywUhA824vLPHRlr7zAhTs7IZtzvHd9AT2xY/O6ok8IvOihqrql5l88K4EvuknWorvYKwW9iXkbvGMTRLw5qPG7onPCPLgNIzwAbK67ftbZPMxYILvAyDW9TLB0vIid1buzCKi7u+d0u8iDSLxNVam8PZyJPNxnETvVANw8Oi5mu9nVszzl65I7DIKNvLGVirxsMJE7tPXQu2PvCT1zRm87p1l9uyRMkbsdfqe8U52ePHRlr7wt9Mw8/C8ivTu02rwJFGq8tpoFPWnC7blWumq7sfy+vG1zCzy9Nlg8iv+PuvxT3DuLU228kVhoOkmTqDrv1kg8ocmTu1WpBzsKml48DzglvI8ECzxwTd27I+pWvIWkQ7xUR007GqlPPBFEDrzGECu865q8PI7BkDwNxYc8tgG6ullMSLsIajs84lk1PNLjD70mv648ZmInO2tnIjzvb5Q8o5KCPLo9xrwKMyq9QqGEvI8ECzxO2508ATUdPRAlTry5kxc8KVGMPJyBHjxIUC476KGqvIU9DzwX87c88PUIParrWrzdlzS/G3K+uzEw2TxB2BU86AhfPAMiRj2dK808a85WPPCft7xU4Bg95Q9NPDxZjzwrpek7yNkZvHa0EjyQ0nM6Nq9fuyjvUbsRq8I7CAMHO3VSWLyuauE7U1qkvPkEeTxs7ZY7B6FMO48Eizy75/S7ieBPvB07rTxmyVu8onPCO5rc6Tu7XIa7oEMfPYngT7u24vk7/+W5PE8eGDxJ1iI9t4cuvBGHiLyH1GY7jfghu+oUSDwa7Mk7iXmbuut2grrq8I2563v8uyofdTxRTrs44lm1vMeWnzukf6s7r4khvEKhhDyhyZO8G5Z4Oy56wTz4sBs81Zknuz3fg7wnJuO74n1vvASEADu98128gUl3vBtyvrtZCU47yep8u5FYaDx2G0e8a85WO5cmUjz3kds8qgqbPCUaerx50d67WKIZPI7BkDua3Om74vKAvL3zXbzXpRA9CI51vLo9xryKzXg7tXtFO9RWLTwnJuM854LqPEIs8zuO5cq8d8V1u9P0cjrQ++C8cGwdPDdUlLoOGeW8auEtu8Z337nlzFK8aRg/vFCkDD0nRSM879bIvKUFID1iStU8EL6ZvLufgLtKgNE7KVEMvJOnSzwahRU895HbvJiIjLvc8n88bmC0PPLP2rywM9C7jTscOoS3mjy/Znu7dhvHuu5Q1Dyq61o6CI71u09hkry0jhw8gb6IPI8EC7uoVAM8gs9rvGM3fjx2G8e81FYtu/ojubyYRRK72Riuu83elDtNNmk70/TyuzUFsbvgKZI7onNCvAehzLumr8679R6+urr6SztX2So8Bl5SOwSEgLv5NpA8LwC2PGPvibzJ6vw7H2tQvOtXwrzXpRC8j0z/uxwcbTy2vr+8VWYNu+t2ArwKmt68NKN2O3XrIzw9A747UU47vaavzjwU+qW8YBqyvE02aTyEt5o8cCmjOxtyPrxs7ZY775NOu+SJWLxMJQY8/bWWu6IMDrzSSsQ7GSPbPLlQnbpVzcE7Pka4PJ96sLycxJg8v/9GPO2HZTyeW3C8Vpawtx2iYTwWBg87/qI/OviwGzxyWcY7M9WNPIA4FD32C2e8tNGWPJ43trxCoYS8FGHavItTbbu7n4C80NemPLm30Ty1OMu7vG1pvG3aPztBP0o75Q/NPJhFEj2V9i683PL/O97+aLz6iu27cdPRum/mKLwvVgc89fqDu3LA+jvm2Ls8mVZ1PIuFBD3ZGK47Cpreut7+aLziWTU8XSEgPMvSKzzO73e5040+vBlmVTxS1K+8mQ4BPZZ8o7w8FpW6OR0DPSSPCz21Vwu99fqDOjMYiDy7XAY8oYaZO+aVwTyX49c84OaXOqdZfTunEQk7B8AMvMDs7zo/D6e8OP5CvN9gIzwNCII8FefOPE026TpzIjU8XsvOO+J9b7rkIiQ8is34O+e0AbxBpv67hcj9uiPq1jtCoQQ8JfY/u86nAz0Wkf28LnrBPJlW9Tt8P4K7BbSjO9grhbyAOJS8G3K+vJLe3LzXpZA7NQUxPJs+JDz6vAS8QHZbvYNVYDrj3yk88PWIPOJ97zuSIVc8ZUPnPMqPsbx2cZi7QfzPOxYGDz2hqtO6H2tQO543NjyFPY+7JRUAOt0wgDyJeZu8MpKTu6AApTtg1ze82JI5vKllZjvrV0I7HX6nu7vndDxg1ze8jwQLu1ZTNjuJvBU7BXGpvAP+C7xJk6g8j2u/vBABlLzlqBi8M9WNutRWLTx0zGM9sHbKPLoZDDtmyVu8tpqFOvPumjyuRqe87lBUvFU0drxs7Za8ejMZOzJPGbyC7qu863v8PDPVjTxJ1iI7Ca01PLuAQLuNHFy7At9LOwP+i7tYxlO80NemO9elkDx45LU8h9TmuzxZjzz/5bk8p84OurvndLwAkGi7XL9luCSzRTwMgg08vrxMPKIwyDwdomG8K6VpPGPvCTxkmTi7M/lHPGxUSzxwKSM8wQuwvOqtkzrLFSa8SbdivAMixjw2r9+7xWt2vAyCDT1NEi87B8CMvG1zi7xpwm27MrbNO9R6Z7xJt+K7jNnhu9ZiFrve/ug55CKkvCwHJLqsOr47+ortvPwvIr2v8NW8YmmVOE+FTLywUhA8MTBZvMiDyLtx8hG8OEE9vMDsbzroCF88DelBOobnPbx+b6U8sbnEOywr3ro93wO9dMzjup2xwbwnRaO7cRZMu8Z337vS44+7VpYwvFWphzxKgNE8L1aHPLPFLbunzo66zFggPN+jHbs7tFo8nW7HO9JKRLyoeD28Fm1DPGZip7u5dNe7KMsXvFnlkzxQpAw7MrZNPHpX0zwSyoK7ayQovPR0Dz3gClK8/juLPDjaCLvqrZO7a4vcO9HEzzvife88KKzXvDmocbwpMkw7t2huvaIMjjznguo7Gy/EOzxZjzoLuZ48qi5VvCjLFzuDmNo654LquyrXgDy7XAa8e7mNvJ7QAb0Rq8K7ojBIvBN0MTuOfha8GoUVveb89bxMsHS8jV9WPPKM4LyAOJS8me9AvZv7qbsbcr47tuL5uaXmXzweKNa7rkYnPINV4Lxcv+W8tVcLvI8oxbzvbxS7oYaZu9+jHT0cHO08c7uAPCSzRTywUhA85xu2u+wBcTuJvJU8PBYVusTghzsnAim8acJtPFQE0zzFIwI9C7meO1DIRry7XAY8MKpkPJZd47suN0e5JTm6u6BDn7zfx1e8AJDoOr9CQbwaQps7x/1TPLTRFryqLtU8JybjPIXI/Tz6I7k6mVb1PMWKNryd1fs8Ok0mPHt2kzy9Ep48TTZpvPS3ibwGOpi8Ns4fPBqFlbr3Kqc8+QR5vHLA+rt7uY289YXyPI6iULxL4gu8Tv/XuycCKbwCnFG8C7kevVG1b7zIXw68GoWVO4rNeDnrM4i8MxgIPUNLs7zSoJW86ScfO+rRzbs6Cqw8NxGautP0cjw0wjY8CGq7vAkU6rxKgNG5+uA+vJXXbrwKM6o86vCNOu+yjjoQAZS8xATCOQVxKbynzo68wxcZvMhATjzS4488ArsRvNEaobwRh4i7t4euvAvd2DwnAik8UtQvvBFEDrz4sJs79gtnvOknnzy+vEy8D3sfPLH8vjzmLo28KVGMvOtXwjvpapm8HBxtPH3K8Lu753Q8/l9FvLvn9DomoG48fET8u9zy/7wMpke8zmQJu3oU2TzlD828KteAPAwNfLu+mBI5ldduPNZDVjq+vEy8eEvqvDHJpLwUPaC6qi7VPABsLjwFcSm72sJcu+bYO7v41NW8RiALvYB7DjzL0is7qLs3us1FSbzaf2K8MnNTuxABFDzF8Wo838fXvOBNzDzre3w8afQEvQE1nbulBaC78zEVvG5B9LzH/VM82Riuuwu5nrwsByQ8Y6yPvHXro7yQ0nM8nStNPJkyOzwnJmM80m7+O1VmjTzqrZM8dhvHOyAQBbz3baG8KTJMPOlqmbxsVEs8Pq3suy56QbzUVq08X3CDvAE1nTwUHuA7hue9vF8tCbvwOAO6F7A9ugd9kryqLtW7auEtu9ONPryPa7+8o9r2O570OzyFpEO8ntCBPOqtk7sykhO7lC1AOw2TcLswhiq6vx4HvP5fRbwuesG7Mk8ZvA4Z5TlfcAM9DrIwPL//xrzMm5q8JEwRPHBsnbxL4gu8jyjFu99gozrkZZ483GeRPLuAwDuYiIw8iv8PvK5Gpzx+b6W87Yflu3NGbzyE+hQ8a4tcPItT7bsoy5e8L1YHvWQyBDwrga86kPEzvBQ9oDxtl0W8lwKYvGpIYrxQ5wY8AJDovOLyALyw3f489JjJvMdTpTkKMyo8V9mqvH3K8LpyNYy8JHDLOixu2LpQ54Y8Q0uzu8LUnrs0wrY84vIAveihqjwfihA8DIKNvLDd/jywM1C7FB7gOxsLirxAUqE7sulnvH3K8DkAkGg8jsGQvO+TzrynWf287CCxvK4Drbwg8UQ8JRr6vFEqAbskjwu76q2TPNP0cjopDhK8dVJYvFIXKrxLn5G8AK8oPAb3HbxbOXE8Bvedun5Q5ThHyjk8QdiVvBXDlLw0o/Y7aLGKupkOgTxKPdc81kNWPtUAXLxUR827X1FDPf47izxsEVE8akhiPIhaWzxYX5+7hT0PPSrXgLxQC0E8i4WEvKUp2jtCLHM8DcWHO768zLxnK5a89R6+vH9czrorpem73h0pvAnwr7yKzXi8gDgUPf47Czq9zyO8728UOf34EDy6PUY76OSkvKZIGr2ZDgE8gzEmPG3av7v77Ce7/oP/O3MiNTtas/w8x1OlO/D1CDvDfs27ll1jO2Ufrbv1hXK8WINZuxN0sbuxlYq8OYS3uia/rjyiTwi9O7TaO+/WyDyiDA49E7erO3fF9bj6I7k7qHi9O3SoKbyBSfc7drSSvGPvCT2pQay7t2huPGnC7byUCQY8CEaBu6rHoDhx8hE8/fgQvCjLl7zdeHS8x/3TO0Isc7tas3y8jwQLvUKhhDz+foU8fCDCPC+ZgTywD5Y7ZR8tOla66rtCCLm8gWg3vDoKrLxbWDE76SefPBkj2zrlqJi7pebfuv6Df7zWQ9a7lHA6PGDXtzzMv1Q8mtxpOwJ4lzxKGZ28mGnMPDw6z7yxY/O7m2Leu7juYjwvVge8zFigPGpIYjtWumo5xs2wOgyCjbxrZ6K8bbaFvKzTCbsks8W7C7mePIU9DzxQyEY8posUvAW0ozrHlh88CyBTPJRwursxySQ757SBuqcRCbwNCIK8EL6ZvIG+iLsIRgE8rF74vOJZtbuUcDq8r/DVPMpMt7sL3Vi8eWqquww/kzqj2vY5auGtu85kiTwMPxM66KGqvBIxNzuwUpA8v2b7u09C0rx7ms08NUirvFYQPLxKPdc68mimvP5fRTtoPPm7XuqOOgOJ+jxfLYm7u58AvXz8B72PR4W6ldfuuys+tbvYKwW7pkiaPLB2SjvKj7G875POvA6yML7qFEg9Eu68O6Up2rz77Kc84CmSPP6ivzz4sJu6/C+iOaUpWjwq14A84E3MOYB7Dr2d1Xu775NOvC6e+7spUYw8PzPhO5TGizt29ww9yNkZPY7lyrz020M7QRsQu3z8BzwkCZe79YXyO8jZmTzvGUM8HgQcO9kYrrzxBmy8hLeaPLYBOjz+oj88flBlO6GqUzuiMMi8fxlUvCr7ujz41NU8DA38PBeMAzx7uY28TTZpvFG1bzxtc4s89ucsPEereTwfipC82p4iPKtNFbzo5KQ7pcKlOW5gtDzO73c7B6FMOzRbgjxCXoo8v0JBOSl1RrwxDJ+7XWSaPD3Aw7sOsjA8tuJ5vKw6Pry5k5c8ZUNnvG/H6DyVTAA8Shkdvd7+aDvtpiW9qUGsPFTgmDwbcr68TTbpO1DnhryNX9a7mrivvIqpPjxsqhy81HrnOzv31Dvth+U6UtQvPBz4MrvtpqW84OYXvRz4sjxwkFe8zSGPuycCqbyFPY8818nKOw84JTy8bWk8USqBvBGHiLtosQo8BOs0u9skl7xQ54Y8uvrLPOknn7w705o8Jny0PAd9EjxhoKa8Iv2tu2M3/jtsVEs8DcUHPQSEADs3eE48GkKbupRR+rvdeHQ7Xy2JvO1jKz0xMFm8sWPzux07LbyrTZW7bdq/O6Pa9r0ahRW9CyDTOjSjdjyQ8bO8yaIIPfupLTz/CfQ7xndfvJs+JD0zPEK8KO/RvMpw8bwObzY7fm+lPJtiXrz5BHm8WmsIvKlBrLuDdKA7hWHJOgd9Ers0o/Y7nlvwu5NAl7u8BrW6utYRO2SZuDxyNYw8CppevAY6GDxVqQe9oGdZPFa6ary3RLS70NcmO2PQSb36ZrM86q2TPML42LwewaE8k2RRPDmocTsi/S29o/k2PHRlr7zjnC+8gHsOPUpcFzxtl8W6tuL5vHw/gry/2wy9yaIIvINV4Dx3fQG7ISFoPO7pnzwGXlK8HPiyPGAaMjzBC7A7MQyfu+eC6jyV1+67pDyxvBWkVLxrJKg754LqOScCKbwpUQy8KIgdOJDSc7zDfk08tLLWvNZDVjyh7c28ShmdvMnlgjs2NdS8ISHovP5+hbxGIIs8ayQouyKnXDzBcmS6zw44u86IQ7yl5l+7cngGvWvOVrsEhIC7yNkZPJODkbuAn0g8XN6lPOaVwbuTgxG8OR2DPAb3HTzlqJi8nUoNvCAVf73Mmxo9afSEu4FotzveHSk8c0ZvOMFOqjwP9Sq87iwavIEBg7xIUK68IbozuozZ4btg17c7vx4Hvarr2rtp9IQ8Rt0QO+1jqzyeNzY8kNLzO8sVpry98108OCL9uyisV7vhr4Y8FgaPvLFjczw42og8gWg3vPX6gzsNk/C83GeRPCUVgDy0jpw7yNkZu2VD5zvh93o81h+cuw3Fhzyl5t+86Y7TvHa0EjyzCCi7WmsIPIy1Jzy00Ra6NUiru50rTTx50d47/HKcO2wwETw0f7y8sFIQvNxnkbzS4w855pVBu9FdGzx9yvC6TM80vFQjkzy/Zvs7BhtYPLjKKLqPa787A/6LOyiInbzooSq8728UPIFJ97wq+7q8R6v5u1tYMbwdomG6iSPKPAb3HTx3oTu7fGO8POqtk7ze/ug84wNkPMnq/DsB8iK9ogwOu6lBrDznguo8NQUxvHKcwDo28tm7yNmZPN1UurxCoYS80m7+Oy+9OzzGzTC836MdvCDNCrtaawi7dVLYPEfKuTxzRm88cCmjOyXSBbwGOpi879ZIO8dTJbtqnrO8NMI2vR1+J7xwTV087umfPFG17zsC30s8oYaZPKllZrzZGK47zss9vP21FryZywa9bbYFPVNapDt2G0e7E3SxPMUjgry5dNc895Hbu0H8z7ueN7a7OccxPFhfH7vC1B48n3owvEhQLrzu6Z+8HTutvEBSITw6Taa5g1XgPCzEqbxfLYk9OYQ3vBlm1bvPUTI8wIU7PIy1pzyFyP07gzGmO3NGb7yS3ty7O5CguyEhaLyWoF28pmxUOaZImrz+g/87mnU1vFbsgTxvo668PFmPO2KNTzy09VC8LG5YPHhL6rsvJPC7kTQuvEGCxDlhB9s6u58AvfCAd7z0t4k7kVjoOCkOkrxMjDq8iPOmPL0SnrxsMJG7OEG9vCUa+rvx4rE7cpxAPDCGqjukf6u8TEnAvNn57TweBBw7JdKFvIy1p7vIg8i7'
+
+        data = []
+        for i, text in enumerate(input):
+            obj = Embedding(
+                embedding=[],
+                index=i,
+                object='embedding'
+            )
+            obj.embedding = embeddings
+
+            data.append(obj)
+
+        return CreateEmbeddingResponse(
+            data=data,
+            model=model,
+            object='list',
+            # marked: usage of embeddings should equal the number of testcase
+            usage=Usage(
+                prompt_tokens=2,
+                total_tokens=2
+            )
+        )
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai_moderation.py b/api/tests/integration_tests/model_runtime/__mock/openai_moderation.py
new file mode 100644
index 00000000000000..81fe9e99f41221
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_moderation.py
@@ -0,0 +1,67 @@
+from openai.resources.moderations import Moderations
+from openai.types import ModerationCreateResponse
+from openai.types.moderation import Moderation, Categories, CategoryScores
+from openai._types import NotGiven, NOT_GIVEN
+
+from typing import Union, List, Literal, Any
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockModerationClass(object):
+    def moderation_create(self: Moderations,*,
+        input: Union[str, List[str]],
+        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ) -> ModerationCreateResponse:
+        if isinstance(input, str):
+            input = [input]
+
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        
+        if len(self._client.api_key) < 18:
+            raise InvokeAuthorizationError('Invalid API key')
+
+        for text in input:
+            result = []
+            if 'kill' in text:
+                moderation_categories = {
+                    'harassment': False, 'harassment/threatening': False, 'hate': False, 'hate/threatening': False,
+                    'self-harm': False, 'self-harm/instructions': False, 'self-harm/intent': False, 'sexual': False,
+                    'sexual/minors': False, 'violence': False, 'violence/graphic': False
+                }
+                moderation_categories_scores = {
+                    'harassment': 1.0, 'harassment/threatening': 1.0, 'hate': 1.0, 'hate/threatening': 1.0,
+                    'self-harm': 1.0, 'self-harm/instructions': 1.0, 'self-harm/intent': 1.0, 'sexual': 1.0,
+                    'sexual/minors': 1.0, 'violence': 1.0, 'violence/graphic': 1.0
+                }
+
+                result.append(Moderation(
+                    flagged=True,
+                    categories=Categories(**moderation_categories),
+                    category_scores=CategoryScores(**moderation_categories_scores)
+                ))
+            else:
+                moderation_categories = {
+                    'harassment': False, 'harassment/threatening': False, 'hate': False, 'hate/threatening': False,
+                    'self-harm': False, 'self-harm/instructions': False, 'self-harm/intent': False, 'sexual': False,
+                    'sexual/minors': False, 'violence': False, 'violence/graphic': False
+                }
+                moderation_categories_scores = {
+                    'harassment': 0.0, 'harassment/threatening': 0.0, 'hate': 0.0, 'hate/threatening': 0.0,
+                    'self-harm': 0.0, 'self-harm/instructions': 0.0, 'self-harm/intent': 0.0, 'sexual': 0.0,
+                    'sexual/minors': 0.0, 'violence': 0.0, 'violence/graphic': 0.0
+                }
+                result.append(Moderation(
+                    flagged=False,
+                    categories=Categories(**moderation_categories),
+                    category_scores=CategoryScores(**moderation_categories_scores)
+                ))
+
+        return ModerationCreateResponse(
+            id='shiroii kuloko',
+            model=model,
+            results=result
+        )
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai_remote.py b/api/tests/integration_tests/model_runtime/__mock/openai_remote.py
new file mode 100644
index 00000000000000..5fc14d038be057
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_remote.py
@@ -0,0 +1,22 @@
+from openai.resources.models import Models
+from openai.types.model import Model
+
+from typing import List
+from time import time
+
+class MockModelClass(object):
+    """
+        mock class for openai.models.Models
+    """
+    def list(
+        self,
+        **kwargs,
+    ) -> List[Model]:
+        return [
+            Model(
+                id='ft:gpt-3.5-turbo-0613:personal::8GYJLPDQ',
+                created=int(time()),
+                object='model',
+                owned_by='organization:org-123',
+            )
+        ]
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/openai_speech2text.py b/api/tests/integration_tests/model_runtime/__mock/openai_speech2text.py
new file mode 100644
index 00000000000000..ae9692f3630a6b
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/openai_speech2text.py
@@ -0,0 +1,30 @@
+from openai.resources.audio.transcriptions import Transcriptions
+from openai._types import NotGiven, NOT_GIVEN, FileTypes
+from openai.types.audio.transcription import Transcription
+
+from typing import Union, List, Literal, Any
+
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
+
+import re
+
+class MockSpeech2TextClass(object):
+    def speech2text_create(self: Transcriptions,
+        *,
+        file: FileTypes,
+        model: Union[str, Literal["whisper-1"]],
+        language: str | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        **kwargs: Any
+    ) -> Transcription:
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._client.base_url.__str__()):
+            raise InvokeAuthorizationError('Invalid base url')
+        
+        if len(self._client.api_key) < 18:
+            raise InvokeAuthorizationError('Invalid API key')
+        
+        return Transcription(
+            text='1, 2, 3, 4, 5, 6, 7, 8, 9, 10'
+        )
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/__mock/xinference.py b/api/tests/integration_tests/model_runtime/__mock/xinference.py
new file mode 100644
index 00000000000000..d0eeeffd0658b2
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -0,0 +1,142 @@
+from xinference_client.client.restful.restful_client import Client, \
+    RESTfulChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatglmCppChatModelHandle, \
+    RESTfulEmbeddingModelHandle, RESTfulRerankModelHandle
+from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage
+
+from requests.sessions import Session
+from requests import Response
+from requests.exceptions import ConnectionError
+from typing import Union, List
+
+from _pytest.monkeypatch import MonkeyPatch
+import pytest
+import os
+import re
+
+class MockXinferenceClass(object):
+    def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
+        if not re.match(r'https?:\/\/[^\s\/$.?#].[^\s]*$', self.base_url):
+            raise RuntimeError('404 Not Found')
+        
+        if 'generate' == model_uid:
+            return RESTfulGenerateModelHandle(model_uid, base_url=self.base_url)
+        if 'chat' == model_uid:
+            return RESTfulChatModelHandle(model_uid, base_url=self.base_url)
+        if 'embedding' == model_uid:
+            return RESTfulEmbeddingModelHandle(model_uid, base_url=self.base_url)
+        if 'rerank' == model_uid:
+            return RESTfulRerankModelHandle(model_uid, base_url=self.base_url)
+        raise RuntimeError('404 Not Found')
+        
+    def get(self: Session, url: str, **kwargs):
+        if '/v1/models/' in url:
+            response = Response()
+            
+            # get model uid
+            model_uid = url.split('/')[-1]
+            if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \
+                model_uid not in ['generate', 'chat', 'embedding', 'rerank']:
+                response.status_code = 404
+                raise ConnectionError('404 Not Found')
+
+            # check if url is valid
+            if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url):
+                response.status_code = 404
+                raise ConnectionError('404 Not Found')
+
+            response.status_code = 200
+            response._content = b'''{
+    "model_type": "LLM",
+    "address": "127.0.0.1:43877",
+    "accelerators": [
+        "0",
+        "1"
+    ],
+    "model_name": "chatglm3-6b",
+    "model_lang": [
+        "en"
+    ],
+    "model_ability": [
+        "generate",
+        "chat"
+    ],
+    "model_description": "latest chatglm3",
+    "model_format": "pytorch",
+    "model_size_in_billions": 7,
+    "quantization": "none",
+    "model_hub": "huggingface",
+    "revision": null,
+    "context_length": 2048,
+    "replica": 1
+}'''
+            return response
+        
+    def rerank(self: RESTfulRerankModelHandle, documents: List[str], query: str, top_n: int) -> dict:
+        # check if self._model_uid is a valid uuid
+        if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', self._model_uid) and \
+            self._model_uid != 'rerank':
+            raise RuntimeError('404 Not Found')
+        
+        if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', self._base_url):
+            raise RuntimeError('404 Not Found')
+
+        if top_n is None:
+            top_n = 1
+
+        return {
+            'results': [
+                {
+                    'index': i,
+                    'document': doc,
+                    'relevance_score': 0.9
+                }
+                for i, doc in enumerate(documents[:top_n])
+            ]
+        }
+        
+    def create_embedding(
+        self: RESTfulGenerateModelHandle,
+        input: Union[str, List[str]],
+        **kwargs
+    ) -> dict:
+        # check if self._model_uid is a valid uuid
+        if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', self._model_uid) and \
+            self._model_uid != 'embedding':
+            raise RuntimeError('404 Not Found')
+
+        if isinstance(input, str):
+            input = [input]
+        ipt_len = len(input)
+
+        embedding = Embedding(
+            object="list",
+            model=self._model_uid,
+            data=[
+                EmbeddingData(
+                    index=i,
+                    object="embedding",
+                    embedding=[1919.810 for _ in range(768)]
+                )
+                for i in range(ipt_len)
+            ],
+            usage=EmbeddingUsage(
+                prompt_tokens=ipt_len,
+                total_tokens=ipt_len
+            )
+        )
+
+        return embedding
+
+MOCK = os.getenv('MOCK_SWITCH', 'false').lower() == 'true'
+
+@pytest.fixture
+def setup_xinference_mock(request, monkeypatch: MonkeyPatch):
+    if MOCK:
+        monkeypatch.setattr(Client, 'get_model', MockXinferenceClass.get_chat_model)
+        monkeypatch.setattr(Session, 'get', MockXinferenceClass.get)
+        monkeypatch.setattr(RESTfulEmbeddingModelHandle, 'create_embedding', MockXinferenceClass.create_embedding)
+        monkeypatch.setattr(RESTfulRerankModelHandle, 'rerank', MockXinferenceClass.rerank)
+    yield
+
+    if MOCK:
+        monkeypatch.undo()
\ No newline at end of file
diff --git a/api/tests/integration_tests/model_runtime/anthropic/test_llm.py b/api/tests/integration_tests/model_runtime/anthropic/test_llm.py
index c321b2fdf14094..276d76bed4ac40 100644
--- a/api/tests/integration_tests/model_runtime/anthropic/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/anthropic/test_llm.py
@@ -9,8 +9,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.anthropic.llm.llm import AnthropicLargeLanguageModel
 
+from tests.integration_tests.model_runtime.__mock.anthropic import setup_anthropic_mock
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_anthropic_mock', [['none']], indirect=True)
+def test_validate_credentials(setup_anthropic_mock):
     model = AnthropicLargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -28,8 +30,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_anthropic_mock', [['none']], indirect=True)
+def test_invoke_model(setup_anthropic_mock):
     model = AnthropicLargeLanguageModel()
 
     response = model.invoke(
@@ -59,8 +61,8 @@ def test_invoke_model():
     assert isinstance(response, LLMResult)
     assert len(response.message.content) > 0
 
-
-def test_invoke_stream_model():
+@pytest.mark.parametrize('setup_anthropic_mock', [['none']], indirect=True)
+def test_invoke_stream_model(setup_anthropic_mock):
     model = AnthropicLargeLanguageModel()
 
     response = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/anthropic/test_provider.py b/api/tests/integration_tests/model_runtime/anthropic/test_provider.py
index b973b7b18cf623..16af2427633e25 100644
--- a/api/tests/integration_tests/model_runtime/anthropic/test_provider.py
+++ b/api/tests/integration_tests/model_runtime/anthropic/test_provider.py
@@ -5,8 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.anthropic.anthropic import AnthropicProvider
 
+from tests.integration_tests.model_runtime.__mock.anthropic import setup_anthropic_mock
 
-def test_validate_provider_credentials():
+@pytest.mark.parametrize('setup_anthropic_mock', [['none']], indirect=True)
+def test_validate_provider_credentials(setup_anthropic_mock):
     provider = AnthropicProvider()
 
     with pytest.raises(CredentialsValidateFailedError):
diff --git a/api/tests/integration_tests/model_runtime/azure_openai/test_llm.py b/api/tests/integration_tests/model_runtime/azure_openai/test_llm.py
index a1443eaf6329b1..e74465283e453f 100644
--- a/api/tests/integration_tests/model_runtime/azure_openai/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/azure_openai/test_llm.py
@@ -10,8 +10,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.azure_openai.llm.llm import AzureOpenAILargeLanguageModel
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_credentials_for_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -33,8 +35,8 @@ def test_validate_credentials_for_chat_model():
         }
     )
 
-
-def test_validate_credentials_for_completion_model():
+@pytest.mark.parametrize('setup_openai_mock', [['completion']], indirect=True)
+def test_validate_credentials_for_completion_model(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -56,8 +58,8 @@ def test_validate_credentials_for_completion_model():
         }
     )
 
-
-def test_invoke_completion_model():
+@pytest.mark.parametrize('setup_openai_mock', [['completion']], indirect=True)
+def test_invoke_completion_model(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -83,8 +85,8 @@ def test_invoke_completion_model():
     assert isinstance(result, LLMResult)
     assert len(result.message.content) > 0
 
-
-def test_invoke_stream_completion_model():
+@pytest.mark.parametrize('setup_openai_mock', [['completion']], indirect=True)
+def test_invoke_stream_completion_model(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -115,8 +117,8 @@ def test_invoke_stream_completion_model():
         assert isinstance(chunk.delta.message, AssistantPromptMessage)
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 
-
-def test_invoke_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -155,8 +157,8 @@ def test_invoke_chat_model():
         assert isinstance(chunk.delta.message, AssistantPromptMessage)
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 
-
-def test_invoke_stream_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -191,10 +193,10 @@ def test_invoke_stream_chat_model():
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
         if chunk.delta.finish_reason is not None:
             assert chunk.delta.usage is not None
-            assert chunk.delta.usage.completion_tokens == 16
-
+            assert chunk.delta.usage.completion_tokens > 0
 
-def test_invoke_chat_model_with_vision():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_vision(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -230,8 +232,8 @@ def test_invoke_chat_model_with_vision():
     assert isinstance(result, LLMResult)
     assert len(result.message.content) > 0
 
-
-def test_invoke_chat_model_with_tools():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
     model = AzureOpenAILargeLanguageModel()
 
     result = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/azure_openai/test_text_embedding.py b/api/tests/integration_tests/model_runtime/azure_openai/test_text_embedding.py
index c666bee9aebf0e..797f6996880116 100644
--- a/api/tests/integration_tests/model_runtime/azure_openai/test_text_embedding.py
+++ b/api/tests/integration_tests/model_runtime/azure_openai/test_text_embedding.py
@@ -6,8 +6,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.azure_openai.text_embedding.text_embedding import AzureOpenAITextEmbeddingModel
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
     model = AzureOpenAITextEmbeddingModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -29,8 +31,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_invoke_model(setup_openai_mock):
     model = AzureOpenAITextEmbeddingModel()
 
     result = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/chatglm/test_llm.py b/api/tests/integration_tests/model_runtime/chatglm/test_llm.py
index 183a25c182d5a6..0b139c9ee236b5 100644
--- a/api/tests/integration_tests/model_runtime/chatglm/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/chatglm/test_llm.py
@@ -11,13 +11,16 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.chatglm.llm.llm import ChatGLMLargeLanguageModel
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
 def test_predefined_models():
     model = ChatGLMLargeLanguageModel()
     model_schemas = model.predefined_models()
     assert len(model_schemas) >= 1
     assert isinstance(model_schemas[0], AIModelEntity)
 
-def test_validate_credentials_for_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
     model = ChatGLMLargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -35,7 +38,8 @@ def test_validate_credentials_for_chat_model():
         }
     )
 
-def test_invoke_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_model(setup_openai_mock):
     model = ChatGLMLargeLanguageModel()
 
     response = model.invoke(
@@ -64,7 +68,8 @@ def test_invoke_model():
     assert len(response.message.content) > 0
     assert response.usage.total_tokens > 0
 
-def test_invoke_stream_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_model(setup_openai_mock):
     model = ChatGLMLargeLanguageModel()
 
     response = model.invoke(
@@ -96,7 +101,8 @@ def test_invoke_stream_model():
         assert isinstance(chunk.delta.message, AssistantPromptMessage)
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 
-def test_invoke_stream_model_with_functions():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_model_with_functions(setup_openai_mock):
     model = ChatGLMLargeLanguageModel()
 
     response = model.invoke(
@@ -162,7 +168,8 @@ def test_invoke_stream_model_with_functions():
     assert call is not None
     assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
 
-def test_invoke_model_with_functions():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_model_with_functions(setup_openai_mock):
     model = ChatGLMLargeLanguageModel()
 
     response = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/chatglm/test_provider.py b/api/tests/integration_tests/model_runtime/chatglm/test_provider.py
index 65f0095e4b7348..3cfcf77403a6bf 100644
--- a/api/tests/integration_tests/model_runtime/chatglm/test_provider.py
+++ b/api/tests/integration_tests/model_runtime/chatglm/test_provider.py
@@ -5,8 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.chatglm.chatglm import ChatGLMProvider
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_provider_credentials():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
     provider = ChatGLMProvider()
 
     with pytest.raises(CredentialsValidateFailedError):
diff --git a/api/tests/integration_tests/model_runtime/google/test_llm.py b/api/tests/integration_tests/model_runtime/google/test_llm.py
index 2c0420b59cbc63..8e3eb755495591 100644
--- a/api/tests/integration_tests/model_runtime/google/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/google/test_llm.py
@@ -9,8 +9,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.google.llm.llm import GoogleLargeLanguageModel
 
+from tests.integration_tests.model_runtime.__mock.google import setup_google_mock
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_google_mock', [['none']], indirect=True)
+def test_validate_credentials(setup_google_mock):
     model = GoogleLargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -28,8 +30,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_google_mock', [['none']], indirect=True)
+def test_invoke_model(setup_google_mock):
     model = GoogleLargeLanguageModel()
 
     response = model.invoke(
@@ -70,8 +72,8 @@ def test_invoke_model():
     assert isinstance(response, LLMResult)
     assert len(response.message.content) > 0
 
-
-def test_invoke_stream_model():
+@pytest.mark.parametrize('setup_google_mock', [['none']], indirect=True)
+def test_invoke_stream_model(setup_google_mock):
     model = GoogleLargeLanguageModel()
 
     response = model.invoke(
@@ -152,8 +154,8 @@ def test_invoke_chat_model_with_vision():
     assert isinstance(result, LLMResult)
     assert len(result.message.content) > 0
 
-
-def test_invoke_chat_model_with_vision_multi_pics():
+@pytest.mark.parametrize('setup_google_mock', [['none']], indirect=True)
+def test_invoke_chat_model_with_vision_multi_pics(setup_google_mock):
     model = GoogleLargeLanguageModel()
 
     result = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/google/test_provider.py b/api/tests/integration_tests/model_runtime/google/test_provider.py
index 8baf68607b6a59..0478b6c409ce34 100644
--- a/api/tests/integration_tests/model_runtime/google/test_provider.py
+++ b/api/tests/integration_tests/model_runtime/google/test_provider.py
@@ -5,7 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.google.google import GoogleProvider
 
-def test_validate_provider_credentials():
+from tests.integration_tests.model_runtime.__mock.google import setup_google_mock
+
+@pytest.mark.parametrize('setup_google_mock', [['none']], indirect=True)
+def test_validate_provider_credentials(setup_google_mock):
     provider = GoogleProvider()
 
     with pytest.raises(CredentialsValidateFailedError):
diff --git a/api/tests/integration_tests/model_runtime/openai/test_llm.py b/api/tests/integration_tests/model_runtime/openai/test_llm.py
index ea0693e5f864e8..b379758e55aac2 100644
--- a/api/tests/integration_tests/model_runtime/openai/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/openai/test_llm.py
@@ -12,6 +12,9 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.openai.llm.llm import OpenAILargeLanguageModel
 
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
 def test_predefined_models():
     model = OpenAILargeLanguageModel()
     model_schemas = model.predefined_models()
@@ -19,8 +22,8 @@ def test_predefined_models():
     assert len(model_schemas) >= 1
     assert isinstance(model_schemas[0], AIModelEntity)
 
-
-def test_validate_credentials_for_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -38,8 +41,8 @@ def test_validate_credentials_for_chat_model():
         }
     )
 
-
-def test_validate_credentials_for_completion_model():
+@pytest.mark.parametrize('setup_openai_mock', [['completion']], indirect=True)
+def test_validate_credentials_for_completion_model(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -57,7 +60,8 @@ def test_validate_credentials_for_completion_model():
         }
     )
 
-def test_invoke_completion_model():
+@pytest.mark.parametrize('setup_openai_mock', [['completion']], indirect=True)
+def test_invoke_completion_model(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -83,8 +87,8 @@ def test_invoke_completion_model():
     assert len(result.message.content) > 0
     assert model._num_tokens_from_string('gpt-3.5-turbo-instruct', result.message.content) == 1
 
-
-def test_invoke_stream_completion_model():
+@pytest.mark.parametrize('setup_openai_mock', [['completion']], indirect=True)
+def test_invoke_stream_completion_model(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -114,8 +118,8 @@ def test_invoke_stream_completion_model():
         assert isinstance(chunk.delta.message, AssistantPromptMessage)
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 
-
-def test_invoke_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -152,8 +156,8 @@ def test_invoke_chat_model():
         assert isinstance(chunk.delta.message, AssistantPromptMessage)
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
 
-
-def test_invoke_chat_model_with_vision():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_vision(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -187,8 +191,8 @@ def test_invoke_chat_model_with_vision():
     assert isinstance(result, LLMResult)
     assert len(result.message.content) > 0
 
-
-def test_invoke_chat_model_with_tools():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -257,8 +261,8 @@ def test_invoke_chat_model_with_tools():
     assert isinstance(result.message, AssistantPromptMessage)
     assert len(result.message.tool_calls) > 0
 
-
-def test_invoke_stream_chat_model():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     result = model.invoke(
@@ -291,7 +295,7 @@ def test_invoke_stream_chat_model():
         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
         if chunk.delta.finish_reason is not None:
             assert chunk.delta.usage is not None
-            assert chunk.delta.usage.completion_tokens == 16
+            assert chunk.delta.usage.completion_tokens > 0
 
 
 def test_get_num_tokens():
@@ -328,8 +332,8 @@ def test_get_num_tokens():
 
     assert num_tokens == 21
 
-
-def test_fine_tuned_models():
+@pytest.mark.parametrize('setup_openai_mock', [['chat', 'remote']], indirect=True)
+def test_fine_tuned_models(setup_openai_mock):
     model = OpenAILargeLanguageModel()
 
     remote_models = model.remote_models(credentials={
diff --git a/api/tests/integration_tests/model_runtime/openai/test_moderation.py b/api/tests/integration_tests/model_runtime/openai/test_moderation.py
index fe4fbb001db074..1a1c9431450520 100644
--- a/api/tests/integration_tests/model_runtime/openai/test_moderation.py
+++ b/api/tests/integration_tests/model_runtime/openai/test_moderation.py
@@ -5,8 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.openai.moderation.moderation import OpenAIModerationModel
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_openai_mock', [['moderation']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
     model = OpenAIModerationModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -24,8 +26,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_openai_mock', [['moderation']], indirect=True)
+def test_invoke_model(setup_openai_mock):
     model = OpenAIModerationModel()
 
     result = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/openai/test_provider.py b/api/tests/integration_tests/model_runtime/openai/test_provider.py
index f0780bfc874f15..d667364e5cfc2e 100644
--- a/api/tests/integration_tests/model_runtime/openai/test_provider.py
+++ b/api/tests/integration_tests/model_runtime/openai/test_provider.py
@@ -5,8 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.openai.openai import OpenAIProvider
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_provider_credentials():
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
     provider = OpenAIProvider()
 
     with pytest.raises(CredentialsValidateFailedError):
diff --git a/api/tests/integration_tests/model_runtime/openai/test_speech2text.py b/api/tests/integration_tests/model_runtime/openai/test_speech2text.py
index 138487bfae6bc5..6353743d6aed7c 100644
--- a/api/tests/integration_tests/model_runtime/openai/test_speech2text.py
+++ b/api/tests/integration_tests/model_runtime/openai/test_speech2text.py
@@ -5,8 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.openai.speech2text.speech2text import OpenAISpeech2TextModel
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_openai_mock', [['speech2text']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
     model = OpenAISpeech2TextModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -24,8 +26,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_openai_mock', [['speech2text']], indirect=True)
+def test_invoke_model(setup_openai_mock):
     model = OpenAISpeech2TextModel()
 
     # Get the directory of the current file
diff --git a/api/tests/integration_tests/model_runtime/openai/test_text_embedding.py b/api/tests/integration_tests/model_runtime/openai/test_text_embedding.py
index b3ef0f2ef12690..b86ee682f11a81 100644
--- a/api/tests/integration_tests/model_runtime/openai/test_text_embedding.py
+++ b/api/tests/integration_tests/model_runtime/openai/test_text_embedding.py
@@ -6,8 +6,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.openai.text_embedding.text_embedding import OpenAITextEmbeddingModel
 
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
     model = OpenAITextEmbeddingModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -25,8 +27,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_invoke_model(setup_openai_mock):
     model = OpenAITextEmbeddingModel()
 
     result = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/xinference/test_embeddings.py b/api/tests/integration_tests/model_runtime/xinference/test_embeddings.py
index a618e49084cbb6..f0ee893f75f45d 100644
--- a/api/tests/integration_tests/model_runtime/xinference/test_embeddings.py
+++ b/api/tests/integration_tests/model_runtime/xinference/test_embeddings.py
@@ -6,7 +6,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.xinference.text_embedding.text_embedding import XinferenceTextEmbeddingModel
 
-def test_validate_credentials():
+from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock, MOCK
+
+@pytest.mark.parametrize('setup_xinference_mock', [['none']], indirect=True)
+def test_validate_credentials(setup_xinference_mock):
     model = XinferenceTextEmbeddingModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -14,8 +17,6 @@ def test_validate_credentials():
             model='bge-base-en',
             credentials={
                 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
-                'model_type': 'embeddings',
-                'model_name': 'NOT IMPORTANT',
                 'model_uid': 'www ' + os.environ.get('XINFERENCE_EMBEDDINGS_MODEL_UID')
             }
         )
@@ -24,22 +25,18 @@ def test_validate_credentials():
         model='bge-base-en',
         credentials={
             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
-            'model_type': 'embeddings',
-            'model_name': 'NOT IMPORTANT',
             'model_uid': os.environ.get('XINFERENCE_EMBEDDINGS_MODEL_UID')
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_xinference_mock', [['none']], indirect=True)
+def test_invoke_model(setup_xinference_mock):
     model = XinferenceTextEmbeddingModel()
 
     result = model.invoke(
         model='bge-base-en',
         credentials={
             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
-            'model_type': 'embeddings',
-            'model_name': 'NOT IMPORTANT',
             'model_uid': os.environ.get('XINFERENCE_EMBEDDINGS_MODEL_UID')
         },
         texts=[
@@ -60,8 +57,6 @@ def test_get_num_tokens():
         model='bge-base-en',
         credentials={
             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
-            'model_type': 'embeddings',
-            'model_name': 'NOT IMPORTANT',
             'model_uid': os.environ.get('XINFERENCE_EMBEDDINGS_MODEL_UID')
         },
         texts=[
diff --git a/api/tests/integration_tests/model_runtime/xinference/test_llm.py b/api/tests/integration_tests/model_runtime/xinference/test_llm.py
index cac313b88ccce1..2974e86466027e 100644
--- a/api/tests/integration_tests/model_runtime/xinference/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/xinference/test_llm.py
@@ -11,7 +11,12 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
 
-def test_validate_credentials_for_chat_model():
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
     model = XinferenceAILargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -40,7 +45,8 @@ def test_validate_credentials_for_chat_model():
         }
     )
 
-def test_invoke_chat_model():
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
     model = XinferenceAILargeLanguageModel()
 
     response = model.invoke(
@@ -70,7 +76,8 @@ def test_invoke_chat_model():
     assert len(response.message.content) > 0
     assert response.usage.total_tokens > 0
 
-def test_invoke_stream_chat_model():
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
     model = XinferenceAILargeLanguageModel()
 
     response = model.invoke(
@@ -229,7 +236,8 @@ def test_invoke_stream_chat_model():
 #     assert response.usage.total_tokens > 0
 #     assert response.message.tool_calls[0].function.name == 'get_current_weather'
 
-def test_validate_credentials_for_generation_model():
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
+def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
     model = XinferenceAILargeLanguageModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -258,7 +266,8 @@ def test_validate_credentials_for_generation_model():
         }
     )
 
-def test_invoke_generation_model():
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
+def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
     model = XinferenceAILargeLanguageModel()
 
     response = model.invoke(
@@ -285,35 +294,8 @@ def test_invoke_generation_model():
     assert len(response.message.content) > 0
     assert response.usage.total_tokens > 0
 
-def test_invoke_generation_model_with_max_tokens():
-    model = XinferenceAILargeLanguageModel()
-
-    response = model.invoke(
-        model='alapaca',
-        credentials={
-            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
-            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
-        },
-        prompt_messages=[
-            UserPromptMessage(
-                content='the United States is'
-            )
-        ],
-        model_parameters={
-            'temperature': 0.7,
-            'top_p': 1.0,
-            'max_tokens': 3
-        },
-        stop=['you'],
-        user="abc-123",
-        stream=False
-    )
-
-    assert isinstance(response, LLMResult)
-    assert len(response.message.content) > 0
-    assert response.usage.completion_tokens == 3
-
-def test_invoke_stream_generation_model():
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
+def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
     model = XinferenceAILargeLanguageModel()
 
     response = model.invoke(
diff --git a/api/tests/integration_tests/model_runtime/xinference/test_rerank.py b/api/tests/integration_tests/model_runtime/xinference/test_rerank.py
index ab803ab61b1503..b1197aa6ae2c86 100644
--- a/api/tests/integration_tests/model_runtime/xinference/test_rerank.py
+++ b/api/tests/integration_tests/model_runtime/xinference/test_rerank.py
@@ -5,8 +5,10 @@
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.xinference.rerank.rerank import XinferenceRerankModel
 
+from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock, MOCK
 
-def test_validate_credentials():
+@pytest.mark.parametrize('setup_xinference_mock', [['none']], indirect=True)
+def test_validate_credentials(setup_xinference_mock):
     model = XinferenceRerankModel()
 
     with pytest.raises(CredentialsValidateFailedError):
@@ -26,8 +28,8 @@ def test_validate_credentials():
         }
     )
 
-
-def test_invoke_model():
+@pytest.mark.parametrize('setup_xinference_mock', [['none']], indirect=True)
+def test_invoke_model(setup_xinference_mock):
     model = XinferenceRerankModel()
 
     result = model.invoke(