diff --git a/package.json b/package.json index b2990a27d581..7f83cf4c6495 100644 --- a/package.json +++ b/package.json @@ -116,6 +116,7 @@ "@clerk/themes": "^2.1.37", "@codesandbox/sandpack-react": "^2.19.9", "@cyntler/react-doc-viewer": "^1.17.0", + "@google-cloud/vertexai": "^1.9.0", "@google/generative-ai": "^0.21.0", "@huggingface/inference": "^2.8.1", "@icons-pack/react-simple-icons": "9.6.0", diff --git a/src/app/(backend)/webapi/chat/vertexai/route.ts b/src/app/(backend)/webapi/chat/vertexai/route.ts new file mode 100644 index 000000000000..be20106760ec --- /dev/null +++ b/src/app/(backend)/webapi/chat/vertexai/route.ts @@ -0,0 +1,38 @@ +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +import { getLLMConfig } from '@/config/llm'; +import { AgentRuntime, ModelProvider } from '@/libs/agent-runtime'; +import { LobeVertexAI } from '@/libs/agent-runtime/vertexai'; +import { safeParseJSON } from '@/utils/safeParseJSON'; + +import { POST as UniverseRoute } from '../[provider]/route'; + +export const POST = async (req: Request) => + UniverseRoute(req, { + createRuntime: () => { + const { + VERTEXAI_PROJECT, + VERTEXAI_LOCATION, + VERTEXAI_CREDENTIALS, + VERTEXAI_CREDENTIALS_PATH, + } = getLLMConfig(); + + const credentialsContent = + VERTEXAI_CREDENTIALS ?? + (VERTEXAI_CREDENTIALS_PATH + ? readFileSync(resolve(process.cwd(), VERTEXAI_CREDENTIALS_PATH), 'utf8') + : undefined); + + const googleAuthOptions = credentialsContent ? safeParseJSON(credentialsContent) : undefined; + + const instance = LobeVertexAI.initFromVertexAI({ + googleAuthOptions: googleAuthOptions, + location: VERTEXAI_LOCATION, + project: VERTEXAI_PROJECT, + }); + + return new AgentRuntime(instance); + }, + params: { provider: ModelProvider.VertexAI }, + }); diff --git a/src/app/(main)/settings/llm/ProviderList/providers.tsx b/src/app/(main)/settings/llm/ProviderList/providers.tsx index a2e24524f98e..7f41ceb89500 100644 --- a/src/app/(main)/settings/llm/ProviderList/providers.tsx +++ b/src/app/(main)/settings/llm/ProviderList/providers.tsx @@ -23,6 +23,7 @@ import { TaichuProviderCard, TogetherAIProviderCard, UpstageProviderCard, + VertexAIProviderCard, ZeroOneProviderCard, ZhiPuProviderCard, } from '@/config/modelProviders'; @@ -34,8 +35,8 @@ import { useGithubProvider } from './Github'; import { useHuggingFaceProvider } from './HuggingFace'; import { useOllamaProvider } from './Ollama'; import { useOpenAIProvider } from './OpenAI'; -import { useWenxinProvider } from './Wenxin'; import { useSenseNovaProvider } from './SenseNova'; +import { useWenxinProvider } from './Wenxin'; export const useProviderList = (): ProviderItem[] => { const AzureProvider = useAzureProvider(); @@ -55,6 +56,7 @@ export const useProviderList = (): ProviderItem[] => { AnthropicProviderCard, BedrockProvider, GoogleProviderCard, + VertexAIProviderCard, DeepSeekProviderCard, HuggingFaceProvider, OpenRouterProviderCard, diff --git a/src/config/llm.ts b/src/config/llm.ts index 8060a708198f..c5e1744f560f 100644 --- a/src/config/llm.ts +++ b/src/config/llm.ts @@ -31,6 +31,13 @@ export const getLLMConfig = () => { GOOGLE_PROXY_URL: z.string().optional(), GOOGLE_MODEL_LIST: z.string().optional(), + ENABLED_VERTEXAI: z.boolean(), + VERTEXAI_CREDENTIALS: z.string().optional(), + VERTEXAI_CREDENTIALS_PATH: z.string().optional(), + VERTEXAI_PROJECT: z.string().optional(), + VERTEXAI_LOCATION: z.string().optional(), + VERTEXAI_MODEL_LIST: z.string().optional(), + ENABLED_MOONSHOT: z.boolean(), MOONSHOT_API_KEY: z.string().optional(), MOONSHOT_MODEL_LIST: z.string().optional(), @@ -177,6 +184,14 @@ export const getLLMConfig = () => { GOOGLE_PROXY_URL: process.env.GOOGLE_PROXY_URL, GOOGLE_MODEL_LIST: process.env.GOOGLE_MODEL_LIST, + ENABLED_VERTEXAI: + !!process.env.VERTEXAI_CREDENTIALS_PATH || !!process.env.VERTEXAI_CREDENTIALS, + VERTEXAI_CREDENTIALS_PATH: process.env.VERTEXAI_CREDENTIALS_PATH, + VERTEXAI_CREDENTIALS: process.env.VERTEXAI_CREDENTIALS, + VERTEXAI_LOCATION: process.env.VERTEXAI_LOCATION, + VERTEXAI_PROJECT: process.env.VERTEXAI_PROJECT, + VERTEXAI_MODEL_LIST: process.env.VERTEXAI_MODEL_LIST, + ENABLED_PERPLEXITY: !!process.env.PERPLEXITY_API_KEY, PERPLEXITY_API_KEY: process.env.PERPLEXITY_API_KEY, PERPLEXITY_MODEL_LIST: process.env.PERPLEXITY_MODEL_LIST, @@ -291,7 +306,8 @@ export const getLLMConfig = () => { HUGGINGFACE_PROXY_URL: process.env.HUGGINGFACE_PROXY_URL, HUGGINGFACE_MODEL_LIST: process.env.HUGGINGFACE_MODEL_LIST, - ENABLED_SENSENOVA: !!process.env.SENSENOVA_ACCESS_KEY_ID && !!process.env.SENSENOVA_ACCESS_KEY_SECRET, + ENABLED_SENSENOVA: + !!process.env.SENSENOVA_ACCESS_KEY_ID && !!process.env.SENSENOVA_ACCESS_KEY_SECRET, SENSENOVA_ACCESS_KEY_ID: process.env.SENSENOVA_ACCESS_KEY_ID, SENSENOVA_ACCESS_KEY_SECRET: process.env.SENSENOVA_ACCESS_KEY_SECRET, SENSENOVA_MODEL_LIST: process.env.SENSENOVA_MODEL_LIST, diff --git a/src/config/modelProviders/index.ts b/src/config/modelProviders/index.ts index 2237ef877b7c..40ed40906424 100644 --- a/src/config/modelProviders/index.ts +++ b/src/config/modelProviders/index.ts @@ -29,6 +29,7 @@ import StepfunProvider from './stepfun'; import TaichuProvider from './taichu'; import TogetherAIProvider from './togetherai'; import UpstageProvider from './upstage'; +import VertexAIProvider from './vertexai'; import WenxinProvider from './wenxin'; import ZeroOneProvider from './zeroone'; import ZhiPuProvider from './zhipu'; @@ -61,6 +62,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [ SiliconCloudProvider.chatModels, UpstageProvider.chatModels, SparkProvider.chatModels, + VertexAIProvider.chatModels, Ai21Provider.chatModels, HunyuanProvider.chatModels, WenxinProvider.chatModels, @@ -100,6 +102,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [ Ai360Provider, TaichuProvider, SiliconCloudProvider, + VertexAIProvider, ]; export const filterEnabledModels = (provider: ModelProviderCard) => { @@ -140,6 +143,7 @@ export { default as StepfunProviderCard } from './stepfun'; export { default as TaichuProviderCard } from './taichu'; export { default as TogetherAIProviderCard } from './togetherai'; export { default as UpstageProviderCard } from './upstage'; +export { default as VertexAIProviderCard } from './vertexai'; export { default as WenxinProviderCard } from './wenxin'; export { default as ZeroOneProviderCard } from './zeroone'; export { default as ZhiPuProviderCard } from './zhipu'; diff --git a/src/config/modelProviders/vertexai.ts b/src/config/modelProviders/vertexai.ts new file mode 100644 index 000000000000..da632c7bf12e --- /dev/null +++ b/src/config/modelProviders/vertexai.ts @@ -0,0 +1,217 @@ +import { ModelProviderCard } from '@/types/llm'; + +// ref: https://ai.google.dev/gemini-api/docs/models/gemini +const VertexAI: ModelProviderCard = { + chatModels: [ + { + description: + 'Gemini 1.5 Flash 是Google最新的多模态AI模型,具备快速处理能力,支持文本、图像和视频输入,适用于多种任务的高效扩展。', + displayName: 'Gemini 1.5 Flash', + enabled: true, + functionCall: true, + id: 'gemini-1.5-flash-latest', + maxOutput: 8192, + pricing: { + cachedInput: 0.018_75, + input: 0.075, + output: 0.3, + }, + tokens: 1_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.5 Flash 002 是一款高效的多模态模型,支持广泛应用的扩展。', + displayName: 'Gemini 1.5 Flash 002', + enabled: true, + functionCall: true, + id: 'gemini-1.5-flash-002', + maxOutput: 8192, + pricing: { + cachedInput: 0.018_75, + input: 0.075, + output: 0.3, + }, + releasedAt: '2024-09-25', + tokens: 1_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.5 Flash 001 是一款高效的多模态模型,支持广泛应用的扩展。', + displayName: 'Gemini 1.5 Flash 001', + functionCall: true, + id: 'gemini-1.5-flash-001', + maxOutput: 8192, + pricing: { + cachedInput: 0.018_75, + input: 0.075, + output: 0.3, + }, + tokens: 1_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.5 Flash 0827 提供了优化后的多模态处理能力,适用多种复杂任务场景。', + displayName: 'Gemini 1.5 Flash 0827', + functionCall: true, + id: 'gemini-1.5-flash-exp-0827', + maxOutput: 8192, + pricing: { + cachedInput: 0.018_75, + input: 0.075, + output: 0.3, + }, + releasedAt: '2024-08-27', + tokens: 1_000_000 + 8192, + vision: true, + }, + + { + description: + 'Gemini 1.5 Flash 8B 0924 是最新的实验性模型,在文本和多模态用例中都有显著的性能提升。', + displayName: 'Gemini 1.5 Flash 8B 0924', + functionCall: true, + id: 'gemini-1.5-flash-8b-exp-0924', + maxOutput: 8192, + pricing: { + cachedInput: 0.018_75, + input: 0.075, + output: 0.3, + }, + releasedAt: '2024-09-24', + tokens: 1_000_000 + 8192, + vision: true, + }, + { + description: + 'Gemini 1.5 Pro 支持高达200万个tokens,是中型多模态模型的理想选择,适用于复杂任务的多方面支持。', + displayName: 'Gemini 1.5 Pro', + enabled: true, + functionCall: true, + id: 'gemini-1.5-pro-latest', + maxOutput: 8192, + pricing: { + cachedInput: 0.875, + input: 3.5, + output: 10.5, + }, + releasedAt: '2024-02-15', + tokens: 2_000_000 + 8192, + vision: true, + }, + { + description: + 'Gemini 1.5 Pro 002 是最新的生产就绪模型,提供更高质量的输出,特别在数学、长上下文和视觉任务方面有显著提升。', + displayName: 'Gemini 1.5 Pro 002', + enabled: true, + functionCall: true, + id: 'gemini-1.5-pro-002', + maxOutput: 8192, + pricing: { + cachedInput: 0.315, + input: 1.25, + output: 2.5, + }, + releasedAt: '2024-09-24', + tokens: 2_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.5 Pro 001 是可扩展的多模态AI解决方案,支持广泛的复杂任务。', + displayName: 'Gemini 1.5 Pro 001', + functionCall: true, + id: 'gemini-1.5-pro-001', + maxOutput: 8192, + pricing: { + cachedInput: 0.875, + input: 3.5, + output: 10.5, + }, + releasedAt: '2024-02-15', + tokens: 2_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.5 Pro 0827 结合最新优化技术,带来更高效的多模态数据处理能力。', + displayName: 'Gemini 1.5 Pro 0827', + functionCall: true, + id: 'gemini-1.5-pro-exp-0827', + maxOutput: 8192, + pricing: { + cachedInput: 0.875, + input: 3.5, + output: 10.5, + }, + releasedAt: '2024-08-27', + tokens: 2_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.5 Pro 0801 提供出色的多模态处理能力,为应用开发带来更大灵活性。', + displayName: 'Gemini 1.5 Pro 0801', + functionCall: true, + id: 'gemini-1.5-pro-exp-0801', + maxOutput: 8192, + pricing: { + cachedInput: 0.875, + input: 3.5, + output: 10.5, + }, + releasedAt: '2024-08-01', + tokens: 2_000_000 + 8192, + vision: true, + }, + { + description: 'Gemini 1.0 Pro 是Google的高性能AI模型,专为广泛任务扩展而设计。', + displayName: 'Gemini 1.0 Pro', + id: 'gemini-1.0-pro-latest', + maxOutput: 2048, + pricing: { + input: 0.5, + output: 1.5, + }, + releasedAt: '2023-12-06', + tokens: 30_720 + 2048, + }, + { + description: + 'Gemini 1.0 Pro 001 (Tuning) 提供稳定并可调优的性能,是复杂任务解决方案的理想选择。', + displayName: 'Gemini 1.0 Pro 001 (Tuning)', + functionCall: true, + id: 'gemini-1.0-pro-001', + maxOutput: 2048, + pricing: { + input: 0.5, + output: 1.5, + }, + releasedAt: '2023-12-06', + tokens: 30_720 + 2048, + }, + { + description: 'Gemini 1.0 Pro 002 (Tuning) 提供出色的多模态支持,专注于复杂任务的有效解决。', + displayName: 'Gemini 1.0 Pro 002 (Tuning)', + id: 'gemini-1.0-pro-002', + maxOutput: 2048, + pricing: { + input: 0.5, + output: 1.5, + }, + releasedAt: '2023-12-06', + tokens: 30_720 + 2048, + }, + ], + checkModel: 'gemini-1.5-flash-latest', + description: + 'Google 的 Gemini 系列是其最先进、通用的 AI模型,由 Google DeepMind 打造,专为多模态设计,支持文本、代码、图像、音频和视频的无缝理解与处理。适用于从数据中心到移动设备的多种环境,极大提升了AI模型的效率与应用广泛性。', + id: 'vertexai', + modelsUrl: 'https://cloud.google.com/vertex-ai/generative-ai/docs/learn/model-versioning', + name: 'VertexAI', + showApiKey: false, + showChecker: false, + smoothing: { + speed: 2, + text: true, + }, + url: 'https://cloud.google.com/vertex-ai', +}; + +export default VertexAI; diff --git a/src/const/settings/llm.ts b/src/const/settings/llm.ts index 9c478db2e95b..24309ae6b5f6 100644 --- a/src/const/settings/llm.ts +++ b/src/const/settings/llm.ts @@ -152,6 +152,10 @@ export const DEFAULT_LLM_CONFIG: UserModelProviderConfig = { enabled: false, enabledModels: filterEnabledModels(UpstageProviderCard), }, + vertexai: { + enabled: false, + enabledModels: filterEnabledModels(GoogleProviderCard), + }, wenxin: { enabled: false, enabledModels: filterEnabledModels(WenxinProviderCard), diff --git a/src/libs/agent-runtime/error.ts b/src/libs/agent-runtime/error.ts index f0f736e3bbf1..9554ba84fa09 100644 --- a/src/libs/agent-runtime/error.ts +++ b/src/libs/agent-runtime/error.ts @@ -13,6 +13,7 @@ export const AgentRuntimeErrorType = { OllamaBizError: 'OllamaBizError', InvalidBedrockCredentials: 'InvalidBedrockCredentials', + InvalidVertexCredentials: 'InvalidVertexCredentials', StreamChunkError: 'StreamChunkError', InvalidGithubToken: 'InvalidGithubToken', diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts index 24fc6852b878..1347617d1469 100644 --- a/src/libs/agent-runtime/google/index.ts +++ b/src/libs/agent-runtime/google/index.ts @@ -1,9 +1,12 @@ +import type { VertexAI } from '@google-cloud/vertexai'; import { Content, FunctionCallPart, FunctionDeclaration, Tool as GoogleFunctionCallTool, GoogleGenerativeAI, + HarmBlockThreshold, + HarmCategory, Part, SchemaType, } from '@google/generative-ai'; @@ -27,26 +30,21 @@ import { StreamingResponse } from '../utils/response'; import { GoogleGenerativeAIStream, convertIterableToStream } from '../utils/streams'; import { parseDataUri } from '../utils/uriParser'; -enum HarmCategory { - HARM_CATEGORY_DANGEROUS_CONTENT = 'HARM_CATEGORY_DANGEROUS_CONTENT', - HARM_CATEGORY_HARASSMENT = 'HARM_CATEGORY_HARASSMENT', - HARM_CATEGORY_HATE_SPEECH = 'HARM_CATEGORY_HATE_SPEECH', - HARM_CATEGORY_SEXUALLY_EXPLICIT = 'HARM_CATEGORY_SEXUALLY_EXPLICIT', -} - -enum HarmBlockThreshold { - BLOCK_NONE = 'BLOCK_NONE', +interface LobeGoogleAIParams { + apiKey?: string; + baseURL?: string; + client?: GoogleGenerativeAI | VertexAI; } export class LobeGoogleAI implements LobeRuntimeAI { private client: GoogleGenerativeAI; baseURL?: string; - constructor({ apiKey, baseURL }: { apiKey?: string; baseURL?: string } = {}) { + constructor({ apiKey, baseURL, client }: LobeGoogleAIParams = {}) { if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey); - this.client = new GoogleGenerativeAI(apiKey); - this.baseURL = baseURL; + this.client = client ? (client as GoogleGenerativeAI) : new GoogleGenerativeAI(apiKey); + this.baseURL = client ? undefined : baseURL; } async chat(rawPayload: ChatStreamPayload, options?: ChatCompetitionOptions) { diff --git a/src/libs/agent-runtime/types/type.ts b/src/libs/agent-runtime/types/type.ts index db64c94f23fb..d8e656c2f54f 100644 --- a/src/libs/agent-runtime/types/type.ts +++ b/src/libs/agent-runtime/types/type.ts @@ -51,6 +51,7 @@ export enum ModelProvider { Taichu = 'taichu', TogetherAI = 'togetherai', Upstage = 'upstage', + VertexAI = 'vertexai', Wenxin = 'wenxin', ZeroOne = 'zeroone', ZhiPu = 'zhipu', diff --git a/src/libs/agent-runtime/vertexai/index.ts b/src/libs/agent-runtime/vertexai/index.ts new file mode 100644 index 000000000000..d2cd98cdbe4a --- /dev/null +++ b/src/libs/agent-runtime/vertexai/index.ts @@ -0,0 +1,23 @@ +import { VertexAI, VertexInit } from '@google-cloud/vertexai'; + +import { AgentRuntimeError, AgentRuntimeErrorType, LobeGoogleAI } from '@/libs/agent-runtime'; + +export class LobeVertexAI extends LobeGoogleAI { + static initFromVertexAI(params?: VertexInit) { + try { + const client = new VertexAI({ ...params }); + + return new LobeGoogleAI({ apiKey: 'avoid-error', client }); + } catch (e) { + const err = e as Error; + + if (err.name === 'IllegalArgumentError') { + throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidVertexCredentials, { + message: err.message, + }); + } + + throw e; + } + } +} diff --git a/src/server/globalConfig/index.ts b/src/server/globalConfig/index.ts index 7c4704cda103..922f9287ef3d 100644 --- a/src/server/globalConfig/index.ts +++ b/src/server/globalConfig/index.ts @@ -32,6 +32,7 @@ import { TaichuProviderCard, TogetherAIProviderCard, UpstageProviderCard, + VertexAIProviderCard, WenxinProviderCard, ZeroOneProviderCard, ZhiPuProviderCard, @@ -143,6 +144,9 @@ export const getServerGlobalConfig = () => { ENABLED_HUGGINGFACE, HUGGINGFACE_MODEL_LIST, + + ENABLED_VERTEXAI, + VERTEXAI_MODEL_LIST, } = getLLMConfig(); const config: GlobalServerConfig = { @@ -387,6 +391,14 @@ export const getServerGlobalConfig = () => { modelString: UPSTAGE_MODEL_LIST, }), }, + vertexai: { + enabled: ENABLED_VERTEXAI, + enabledModels: extractEnabledModels(VERTEXAI_MODEL_LIST), + serverModelCards: transformToChatModelCards({ + defaultChatModels: VertexAIProviderCard.chatModels, + modelString: VERTEXAI_MODEL_LIST, + }), + }, wenxin: { enabled: ENABLED_WENXIN, enabledModels: extractEnabledModels(WENXIN_MODEL_LIST), diff --git a/src/types/user/settings/keyVaults.ts b/src/types/user/settings/keyVaults.ts index 8ff980fa055f..517b198e2234 100644 --- a/src/types/user/settings/keyVaults.ts +++ b/src/types/user/settings/keyVaults.ts @@ -58,6 +58,7 @@ export interface UserKeyVaults { taichu?: OpenAICompatibleKeyVault; togetherai?: OpenAICompatibleKeyVault; upstage?: OpenAICompatibleKeyVault; + vertexai?: undefined; wenxin?: WenxinKeyVault; zeroone?: OpenAICompatibleKeyVault; zhipu?: OpenAICompatibleKeyVault;