Skip to content

Commit

Permalink
feat: add baidu obs storage (#9024)
Browse files Browse the repository at this point in the history
  • Loading branch information
hwzhuhao authored Oct 7, 2024
1 parent 959a81a commit 2571b0c
Show file tree
Hide file tree
Showing 7 changed files with 304 additions and 147 deletions.
8 changes: 7 additions & 1 deletion api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ DB_DATABASE=dify

# Storage configuration
# use for store upload files, private keys...
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos, baidu-obs
STORAGE_TYPE=local
STORAGE_LOCAL_PATH=storage
S3_USE_AWS_MANAGED_IAM=false
Expand Down Expand Up @@ -79,6 +79,12 @@ HUAWEI_OBS_SECRET_KEY=your-secret-key
HUAWEI_OBS_ACCESS_KEY=your-access-key
HUAWEI_OBS_SERVER=your-server-url

# Baidu OBS Storage Configuration
BAIDU_OBS_BUCKET_NAME=your-bucket-name
BAIDU_OBS_SECRET_KEY=your-secret-key
BAIDU_OBS_ACCESS_KEY=your-access-key
BAIDU_OBS_ENDPOINT=your-server-url

# OCI Storage configuration
OCI_ENDPOINT=your-endpoint
OCI_BUCKET_NAME=your-bucket-name
Expand Down
8 changes: 5 additions & 3 deletions api/configs/middleware/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
from configs.middleware.storage.oci_storage_config import OCIStorageConfig
Expand Down Expand Up @@ -200,12 +201,13 @@ class MiddlewareConfig(
StorageConfig,
AliyunOSSStorageConfig,
AzureBlobStorageConfig,
BaiduOBSStorageConfig,
GoogleCloudStorageConfig,
TencentCloudCOSStorageConfig,
HuaweiCloudOBSStorageConfig,
VolcengineTOSStorageConfig,
S3StorageConfig,
OCIStorageConfig,
S3StorageConfig,
TencentCloudCOSStorageConfig,
VolcengineTOSStorageConfig,
# configs of vdb and vdb providers
VectorStoreConfig,
AnalyticdbConfig,
Expand Down
29 changes: 29 additions & 0 deletions api/configs/middleware/storage/baidu_obs_storage_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from typing import Optional

from pydantic import BaseModel, Field


class BaiduOBSStorageConfig(BaseModel):
"""
Configuration settings for Baidu Object Storage Service (OBS)
"""

BAIDU_OBS_BUCKET_NAME: Optional[str] = Field(
description="Name of the Baidu OBS bucket to store and retrieve objects (e.g., 'my-obs-bucket')",
default=None,
)

BAIDU_OBS_ACCESS_KEY: Optional[str] = Field(
description="Access Key ID for authenticating with Baidu OBS",
default=None,
)

BAIDU_OBS_SECRET_KEY: Optional[str] = Field(
description="Secret Access Key for authenticating with Baidu OBS",
default=None,
)

BAIDU_OBS_ENDPOINT: Optional[str] = Field(
description="URL of the Baidu OSS endpoint for your chosen region (e.g., 'https://.bj.bcebos.com')",
default=None,
)
3 changes: 3 additions & 0 deletions api/extensions/ext_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from extensions.storage.aliyun_storage import AliyunStorage
from extensions.storage.azure_storage import AzureStorage
from extensions.storage.baidu_storage import BaiduStorage
from extensions.storage.google_storage import GoogleStorage
from extensions.storage.huawei_storage import HuaweiStorage
from extensions.storage.local_storage import LocalStorage
Expand Down Expand Up @@ -35,6 +36,8 @@ def init_app(self, app: Flask):
self.storage_runner = OCIStorage(app=app)
elif storage_type == "huawei-obs":
self.storage_runner = HuaweiStorage(app=app)
elif storage_type == "baidu-obs":
self.storage_runner = BaiduStorage(app=app)
elif storage_type == "volcengine-tos":
self.storage_runner = VolcengineStorage(app=app)
else:
Expand Down
60 changes: 60 additions & 0 deletions api/extensions/storage/baidu_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import base64
import hashlib
from collections.abc import Generator

from baidubce.auth.bce_credentials import BceCredentials
from baidubce.bce_client_configuration import BceClientConfiguration
from baidubce.services.bos.bos_client import BosClient
from flask import Flask

from extensions.storage.base_storage import BaseStorage


class BaiduStorage(BaseStorage):
"""Implementation for baidu obs storage."""

def __init__(self, app: Flask):
super().__init__(app)
app_config = self.app.config
self.bucket_name = app_config.get("BAIDU_OBS_BUCKET_NAME")
client_config = BceClientConfiguration(
credentials=BceCredentials(
access_key_id=app_config.get("BAIDU_OBS_ACCESS_KEY"),
secret_access_key=app_config.get("BAIDU_OBS_SECRET_KEY"),
),
endpoint=app_config.get("BAIDU_OBS_ENDPOINT"),
)

self.client = BosClient(config=client_config)

def save(self, filename, data):
md5 = hashlib.md5()
md5.update(data)
content_md5 = base64.standard_b64encode(md5.digest())
self.client.put_object(
bucket_name=self.bucket_name, key=filename, data=data, content_length=len(data), content_md5=content_md5
)

def load_once(self, filename: str) -> bytes:
response = self.client.get_object(bucket_name=self.bucket_name, key=filename)
return response.data.read()

def load_stream(self, filename: str) -> Generator:
def generate(filename: str = filename) -> Generator:
response = self.client.get_object(bucket_name=self.bucket_name, key=filename).data
while chunk := response.read(4096):
yield chunk

return generate()

def download(self, filename, target_filepath):
self.client.get_object_to_file(bucket_name=self.bucket_name, key=filename, file_name=target_filepath)

def exists(self, filename):
res = self.client.get_object_meta_data(bucket_name=self.bucket_name, key=filename)
if res is None:
return False
return True

def delete(self, filename):
self.client.delete_object(bucket_name=self.bucket_name, key=filename)
Loading

0 comments on commit 2571b0c

Please sign in to comment.