Merge branch 'feat/workflow-backend' into deploy/dev

# Conflicts: # api/commands.py
langgenius · Mar 4, 2024 · 91e56dd · 91e56dd
2 parents 7d78646 + 13c5d82
commit 91e56dd
Show file tree

Hide file tree

Showing 55 changed files with 2,770 additions and 84 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,24 +1,26 @@
-# Dify Open Source License
+# Open Source License
 
-The Dify project is licensed under the Apache License 2.0, with the following additional conditions:
+Dify is licensed under the Apache License 2.0, with the following additional conditions:
 
-1. Dify is permitted to be used for commercialization, such as using Dify as a "backend-as-a-service" for your other applications, or delivering it to enterprises as an application development platform. However, when the following conditions are met, you must contact the producer to obtain a commercial license:
+1. Dify may be utilized commercially, including as a backend service for other applications or as an application development platform for enterprises. Should the conditions below be met, a commercial license must be obtained from the producer:
 
-a. Multi-tenant SaaS service: Unless explicitly authorized by Dify in writing, you may not use the Dify.AI source code to operate a multi-tenant SaaS service that is similar to the Dify.AI service edition.
-b. LOGO and copyright information: In the process of using Dify, you may not remove or modify the LOGO or copyright information in the Dify console.
+a. Multi-tenant SaaS service: Unless explicitly authorized by Dify in writing, you may not use the Dify source code to operate a multi-tenant environment. 
+    - Tenant Definition: Within the context of Dify, one tenant corresponds to one workspace. The workspace provides a separated area for each tenant's data and configurations.
+
+b. LOGO and copyright information: In the process of using Dify's frontend components, you may not remove or modify the LOGO or copyright information in the Dify console or applications. This restriction is inapplicable to uses of Dify that do not involve its frontend components.
 
 Please contact [email protected] by email to inquire about licensing matters.
 
-2. As a contributor, you should agree that your contributed code:
+2. As a contributor, you should agree that:
 
-a. The producer can adjust the open-source agreement to be more strict or relaxed.
-b. Can be used for commercial purposes, such as Dify's cloud business.
+a. The producer can adjust the open-source agreement to be more strict or relaxed as deemed necessary.
+b. Your contributed code may be used for commercial purposes, including but not limited to its cloud business operations.
 
-Apart from this, all other rights and restrictions follow the Apache License 2.0. If you need more detailed information, you can refer to the full version of Apache License 2.0.
+Apart from the specific conditions mentioned above, all other rights and restrictions follow the Apache License 2.0. Detailed information about the Apache License 2.0 can be found at http://www.apache.org/licenses/LICENSE-2.0.
 
 The interactive design of this product is protected by appearance patent.
 
-© 2023 LangGenius, Inc.
+© 2024 LangGenius, Inc.
 
 
 ----------

diff --git a/api/commands.py b/api/commands.py
@@ -15,7 +15,7 @@
 from models.account import Tenant
 from models.dataset import Dataset, DatasetCollectionBinding, DocumentSegment
 from models.dataset import Document as DatasetDocument
-from models.model import Account, App, AppMode, Conversation
+from models.model import Account, App, AppMode, AppModelConfig, AppAnnotationSetting, Conversation, MessageAnnotation
 from models.provider import Provider, ProviderModel
 
 
@@ -125,7 +125,114 @@ def reset_encrypt_key_pair():
 
 
 @click.command('vdb-migrate', help='migrate vector db.')
-def vdb_migrate():
+@click.option('--scope', default='all', prompt=False, help='The scope of vector database to migrate, Default is All.')
+def vdb_migrate(scope: str):
+    if scope in ['knowledge', 'all']:
+        migrate_knowledge_vector_database()
+    if scope in ['annotation', 'all']:
+        migrate_annotation_vector_database()
+
+
+def migrate_annotation_vector_database():
+    """
+    Migrate annotation datas to target vector database .
+    """
+    click.echo(click.style('Start migrate annotation data.', fg='green'))
+    create_count = 0
+    skipped_count = 0
+    total_count = 0
+    page = 1
+    while True:
+        try:
+            # get apps info
+            apps = db.session.query(App).filter(
+                App.status == 'normal'
+            ).order_by(App.created_at.desc()).paginate(page=page, per_page=50)
+        except NotFound:
+            break
+
+        page += 1
+        for app in apps:
+            total_count = total_count + 1
+            click.echo(f'Processing the {total_count} app {app.id}. '
+                       + f'{create_count} created, {skipped_count} skipped.')
+            try:
+                click.echo('Create app annotation index: {}'.format(app.id))
+                app_annotation_setting = db.session.query(AppAnnotationSetting).filter(
+                    AppAnnotationSetting.app_id == app.id
+                ).first()
+
+                if not app_annotation_setting:
+                    skipped_count = skipped_count + 1
+                    click.echo('App annotation setting is disabled: {}'.format(app.id))
+                    continue
+                # get dataset_collection_binding info
+                dataset_collection_binding = db.session.query(DatasetCollectionBinding).filter(
+                    DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id
+                ).first()
+                if not dataset_collection_binding:
+                    click.echo('App annotation collection binding is not exist: {}'.format(app.id))
+                    continue
+                annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app.id).all()
+                dataset = Dataset(
+                    id=app.id,
+                    tenant_id=app.tenant_id,
+                    indexing_technique='high_quality',
+                    embedding_model_provider=dataset_collection_binding.provider_name,
+                    embedding_model=dataset_collection_binding.model_name,
+                    collection_binding_id=dataset_collection_binding.id
+                )
+                documents = []
+                if annotations:
+                    for annotation in annotations:
+                        document = Document(
+                            page_content=annotation.question,
+                            metadata={
+                                "annotation_id": annotation.id,
+                                "app_id": app.id,
+                                "doc_id": annotation.id
+                            }
+                        )
+                        documents.append(document)
+
+                vector = Vector(dataset, attributes=['doc_id', 'annotation_id', 'app_id'])
+                click.echo(f"Start to migrate annotation, app_id: {app.id}.")
+
+                try:
+                    vector.delete()
+                    click.echo(
+                        click.style(f'Successfully delete vector index for app: {app.id}.',
+                                    fg='green'))
+                except Exception as e:
+                    click.echo(
+                        click.style(f'Failed to delete vector index for app {app.id}.',
+                                    fg='red'))
+                    raise e
+                if documents:
+                    try:
+                        click.echo(click.style(
+                            f'Start to created vector index with {len(documents)} annotations for app {app.id}.',
+                            fg='green'))
+                        vector.create(documents)
+                        click.echo(
+                            click.style(f'Successfully created vector index for app {app.id}.', fg='green'))
+                    except Exception as e:
+                        click.echo(click.style(f'Failed to created vector index for app {app.id}.', fg='red'))
+                        raise e
+                click.echo(f'Successfully migrated app annotation {app.id}.')
+                create_count += 1
+            except Exception as e:
+                click.echo(
+                    click.style('Create app annotation index error: {} {}'.format(e.__class__.__name__, str(e)),
+                                fg='red'))
+                continue
+
+    click.echo(
+        click.style(f'Congratulations! Create {create_count} app annotation indexes, and skipped {skipped_count} apps.',
+                    fg='green'))
+
+
+def migrate_knowledge_vector_database():
     """
     Migrate vector database datas to target vector database .
     """

diff --git a/api/controllers/console/workspace/tool_providers.py b/api/controllers/console/workspace/tool_providers.py
@@ -259,6 +259,7 @@ def post(self):
         parser = reqparse.RequestParser()
 
         parser.add_argument('tool_name', type=str, required=True, nullable=False, location='json')
+        parser.add_argument('provider_name', type=str, required=False, nullable=False, location='json')
         parser.add_argument('credentials', type=dict, required=True, nullable=False, location='json')
         parser.add_argument('parameters', type=dict, required=True, nullable=False, location='json')
         parser.add_argument('schema_type', type=str, required=True, nullable=False, location='json')
@@ -268,6 +269,7 @@ def post(self):
 
         return ToolManageService.test_api_tool_preview(
             current_user.current_tenant_id,
+            args['provider_name'] if args['provider_name'] else '',
             args['tool_name'],
             args['credentials'],
             args['parameters'],

diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
@@ -140,7 +140,8 @@ def delete(self) -> None:
         connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
 
         from pymilvus import utility
-        utility.drop_collection(self._collection_name, None, using=alias)
+        if utility.has_collection(self._collection_name, using=alias):
+            utility.drop_collection(self._collection_name, None, using=alias)
 
     def text_exists(self, id: str) -> bool:
 

diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
@@ -231,21 +231,30 @@ def delete_by_metadata_field(self, key: str, value: str):
 
     def delete(self):
         from qdrant_client.http import models
-        filter = models.Filter(
-            must=[
-                models.FieldCondition(
-                    key="group_id",
-                    match=models.MatchValue(value=self._group_id),
+        from qdrant_client.http.exceptions import UnexpectedResponse
+
+        try:
+            filter = models.Filter(
+                must=[
+                    models.FieldCondition(
+                        key="group_id",
+                        match=models.MatchValue(value=self._group_id),
+                    ),
+                ],
+            )
+            self._client.delete(
+                collection_name=self._collection_name,
+                points_selector=FilterSelector(
+                    filter=filter
                 ),
-            ],
-        )
-        self._client.delete(
-            collection_name=self._collection_name,
-            points_selector=FilterSelector(
-                filter=filter
-            ),
-        )
-
+            )
+        except UnexpectedResponse as e:
+            # Collection does not exist, so return
+            if e.status_code == 404:                
+                return
+            # Some other error occurred, so re-raise the exception
+            else:
+                raise e
     def delete_by_ids(self, ids: list[str]) -> None:
 
         from qdrant_client.http import models

diff --git a/api/core/tools/provider/_position.yaml b/api/core/tools/provider/_position.yaml
@@ -1,16 +1,20 @@
 - google
 - bing
+- duckduckgo
+- yahoo
 - wikipedia
+- arxiv
+- pubmed
 - dalle
 - azuredalle
+- stablediffusion
 - webscraper
+- youtube
 - wolframalpha
+- maths
 - github
 - chart
 - time
-- yahoo
-- stablediffusion
 - vectorizer
-- youtube
 - gaode
-- maths
+- wecom
diff --git a/api/core/tools/provider/builtin/bing/bing.py b/api/core/tools/provider/builtin/bing/bing.py
@@ -16,7 +16,8 @@ def _validate_credentials(self, credentials: dict[str, Any]) -> None:
                 user_id='',
                 tool_parameters={
                     "query": "test",
-                    "result_type": "link"
+                    "result_type": "link",
+                    "enable_webpages": True,
                 },
             )
         except Exception as e:

diff --git a/api/core/tools/provider/builtin/pubmed/_assets/icon.svg b/api/core/tools/provider/builtin/pubmed/_assets/icon.svg
diff --git a/api/core/tools/provider/builtin/pubmed/pubmed.py b/api/core/tools/provider/builtin/pubmed/pubmed.py
@@ -0,0 +1,20 @@
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.pubmed.tools.pubmed_search import PubMedSearchTool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class PubMedProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            PubMedSearchTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "query": "John Doe",
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
diff --git a/api/core/tools/provider/builtin/pubmed/pubmed.yaml b/api/core/tools/provider/builtin/pubmed/pubmed.yaml
@@ -0,0 +1,10 @@
+identity:
+  author: Pink Banana
+  name: pubmed
+  label:
+    en_US: PubMed
+    zh_Hans: PubMed
+  description:
+    en_US: A search engine for biomedical literature.
+    zh_Hans: 一款生物医学文献搜索引擎。
+  icon: icon.svg
diff --git a/api/core/tools/provider/builtin/pubmed/tools/pubmed_search.py b/api/core/tools/provider/builtin/pubmed/tools/pubmed_search.py
@@ -0,0 +1,40 @@
+from typing import Any
+
+from langchain.tools import PubmedQueryRun
+from pydantic import BaseModel, Field
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class PubMedInput(BaseModel):
+    query: str = Field(..., description="Search query.")
+
+
+class PubMedSearchTool(BuiltinTool):
+    """
+    Tool for performing a search using PubMed search engine.
+    """
+
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
+        """
+        Invoke the PubMed search tool.
+
+        Args:
+            user_id (str): The ID of the user invoking the tool.
+            tool_parameters (dict[str, Any]): The parameters for the tool invocation.
+
+        Returns:
+            ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation.
+        """
+        query = tool_parameters.get('query', '')
+
+        if not query:
+            return self.create_text_message('Please input query')
+
+        tool = PubmedQueryRun(args_schema=PubMedInput)
+
+        result = tool.run(query)
+
+        return self.create_text_message(self.summary(user_id=user_id, content=result))
+
diff --git a/api/core/tools/provider/builtin/pubmed/tools/pubmed_search.yaml b/api/core/tools/provider/builtin/pubmed/tools/pubmed_search.yaml
@@ -0,0 +1,23 @@
+identity:
+  name: pubmed_search
+  author: Pink Banana
+  label:
+    en_US: PubMed Search
+    zh_Hans: PubMed 搜索
+description:
+  human:
+    en_US: PubMed® comprises more than 35 million citations for biomedical literature from MEDLINE, life science journals, and online books. Citations may include links to full text content from PubMed Central and publisher web sites.
+    zh_Hans: PubMed® 包含来自 MEDLINE、生命科学期刊和在线书籍的超过 3500 万篇生物医学文献引用。引用可能包括来自 PubMed Central 和出版商网站的全文内容链接。
+  llm: Perform searches on PubMed and get results.
+parameters:
+  - name: query
+    type: string
+    required: true
+    label:
+      en_US: Query string
+      zh_Hans: 查询语句
+    human_description:
+      en_US: The search query.
+      zh_Hans: 搜索查询语句。
+    llm_description: Key words for searching
+    form: llm
diff --git a/api/core/tools/provider/builtin/stablediffusion/tools/stable_diffusion.py b/api/core/tools/provider/builtin/stablediffusion/tools/stable_diffusion.py
@@ -70,7 +70,7 @@ def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) \
         if not base_url:
             return self.create_text_message('Please input base_url')
 
-        if 'model' in tool_parameters:
+        if 'model' in tool_parameters and tool_parameters['model']:
             self.runtime.credentials['model'] = tool_parameters['model']
 
         model = self.runtime.credentials.get('model', None)

diff --git a/api/core/tools/provider/builtin/wecom/_assets/icon.png b/api/core/tools/provider/builtin/wecom/_assets/icon.png