diff --git a/api/core/app/features/rate_limiting/rate_limit.py b/api/core/app/features/rate_limiting/rate_limit.py index d2d2f0e3b82f90..a0f96df9ce53d8 100644 --- a/api/core/app/features/rate_limiting/rate_limit.py +++ b/api/core/app/features/rate_limiting/rate_limit.py @@ -1,8 +1,9 @@ import logging import time import uuid +from collections.abc import Generator from datetime import timedelta -from typing import Optional +from typing import Optional, Union from core.errors.error import AppInvokeQuotaExceededError from extensions.ext_redis import redis_client @@ -81,3 +82,39 @@ def exit(self, request_id: str): @staticmethod def gen_request_key() -> str: return str(uuid.uuid4()) + + def generate(self, generator: Union[Generator, callable, dict], request_id: str): + if isinstance(generator, dict): + return generator + else: + return RateLimitGenerator(self, generator, request_id) + + +class RateLimitGenerator: + def __init__(self, rate_limit: RateLimit, generator: Union[Generator, callable], request_id: str): + self.rate_limit = rate_limit + if callable(generator): + self.generator = generator() + else: + self.generator = generator + self.request_id = request_id + self.closed = False + + def __iter__(self): + return self + + def __next__(self): + if self.closed: + raise StopIteration + try: + return next(self.generator) + except StopIteration: + self.close() + raise + + def close(self): + if not self.closed: + self.closed = True + self.rate_limit.exit(self.request_id) + if self.generator is not None and hasattr(self.generator, 'close'): + self.generator.close() diff --git a/api/fields/app_fields.py b/api/fields/app_fields.py index 123a432f6b368a..94d804a919869f 100644 --- a/api/fields/app_fields.py +++ b/api/fields/app_fields.py @@ -72,7 +72,7 @@ app_partial_fields = { 'id': fields.String, 'name': fields.String, - 'max_active_requests': fields.Integer, + 'max_active_requests': fields.Raw(), 'description': fields.String(attribute='desc_or_prompt'), 'mode': fields.String(attribute='mode_compatible_with_agent'), 'icon': fields.String, diff --git a/api/models/model.py b/api/models/model.py index d16f89edee7c01..4d67272c1a393f 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -74,7 +74,7 @@ class App(db.Model): is_public = db.Column(db.Boolean, nullable=False, server_default=db.text('false')) is_universal = db.Column(db.Boolean, nullable=False, server_default=db.text('false')) tracing = db.Column(db.Text, nullable=True) - max_active_requests = db.Column(db.Integer, nullable=False, server_default=db.text('0')) + max_active_requests = db.Column(db.Integer, nullable=True) created_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) diff --git a/api/services/app_generate_service.py b/api/services/app_generate_service.py index 09a50dd702739a..3acd3becdb2deb 100644 --- a/api/services/app_generate_service.py +++ b/api/services/app_generate_service.py @@ -36,58 +36,59 @@ def generate(cls, app_model: App, try: request_id = rate_limit.enter(request_id) if app_model.mode == AppMode.COMPLETION.value: - return CompletionAppGenerator().generate( + return rate_limit.generate(CompletionAppGenerator().generate( app_model=app_model, user=user, args=args, invoke_from=invoke_from, stream=streaming - ) + ), request_id) elif app_model.mode == AppMode.AGENT_CHAT.value or app_model.is_agent: - return AgentChatAppGenerator().generate( + return rate_limit.generate(AgentChatAppGenerator().generate( app_model=app_model, user=user, args=args, invoke_from=invoke_from, stream=streaming - ) + ), request_id) elif app_model.mode == AppMode.CHAT.value: - return ChatAppGenerator().generate( + return rate_limit.generate(ChatAppGenerator().generate( app_model=app_model, user=user, args=args, invoke_from=invoke_from, stream=streaming - ) + ), request_id) elif app_model.mode == AppMode.ADVANCED_CHAT.value: workflow = cls._get_workflow(app_model, invoke_from) - return AdvancedChatAppGenerator().generate( + return rate_limit.generate(AdvancedChatAppGenerator().generate( app_model=app_model, workflow=workflow, user=user, args=args, invoke_from=invoke_from, stream=streaming - ) + ), request_id) elif app_model.mode == AppMode.WORKFLOW.value: workflow = cls._get_workflow(app_model, invoke_from) - return WorkflowAppGenerator().generate( + return rate_limit.generate(WorkflowAppGenerator().generate( app_model=app_model, workflow=workflow, user=user, args=args, invoke_from=invoke_from, stream=streaming - ) + ), request_id) else: raise ValueError(f'Invalid app mode {app_model.mode}') finally: - rate_limit.exit(request_id) + if not streaming: + rate_limit.exit(request_id) @staticmethod def _get_max_active_requests(app_model: App) -> int: max_active_requests = app_model.max_active_requests - if app_model.max_active_requests == 0: + if app_model.max_active_requests is None: from flask import current_app max_active_requests = int(current_app.config['APP_MAX_ACTIVE_REQUESTS']) return max_active_requests diff --git a/api/services/app_service.py b/api/services/app_service.py index c4c63e6d066d0b..03986db2aea1ee 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -325,16 +325,15 @@ def update_app(self, app: App, args: dict) -> App: """ app.name = args.get('name') app.description = args.get('description', '') - app.max_active_requests = args.get('max_active_requests', 0) or 0 - if app.max_active_requests < 0: - app.max_active_requests = 0 + app.max_active_requests = args.get('max_active_requests') app.icon = args.get('icon') app.icon_background = args.get('icon_background') app.updated_at = datetime.now(timezone.utc).replace(tzinfo=None) db.session.commit() - rate_limit = RateLimit(app.id, app.max_active_requests) - rate_limit.flush_cache(use_local_value=True) + if app.max_active_requests is not None: + rate_limit = RateLimit(app.id, app.max_active_requests) + rate_limit.flush_cache(use_local_value=True) return app def update_app_name(self, app: App, name: str) -> App: diff --git a/web/app/components/explore/create-app-modal/index.tsx b/web/app/components/explore/create-app-modal/index.tsx index 714123c5722d39..9624baa07cc2b4 100644 --- a/web/app/components/explore/create-app-modal/index.tsx +++ b/web/app/components/explore/create-app-modal/index.tsx @@ -99,6 +99,16 @@ const CreateAppModal = ({ onChange={e => setDescription(e.target.value)} /> + {/* description */} +
+
{t('app.newApp.captionDescription')}
+