Add 2 firecrawl tools : Scrape and Search (#6016)

Co-authored-by: -LAN- <[email protected]>
langgenius · Jul 6, 2024 · ab847c8 · ab847c8
1 parent b217ee4
commit ab847c8
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 0 deletions.
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
@@ -0,0 +1,26 @@
+import json
+from typing import Any, Union
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class ScrapeTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
+
+        crawl_result = app.scrape_url(
+            url=tool_parameters['url'],
+            wait=True
+        )
+
+        if isinstance(crawl_result, dict):
+            result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
+        else:
+            result_message = str(crawl_result)
+
+        if not crawl_result:
+            return self.create_text_message("Scrape request failed.")
+
+        return self.create_text_message(result_message)
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/scrape.yaml b/api/core/tools/provider/builtin/firecrawl/tools/scrape.yaml
@@ -0,0 +1,23 @@
+identity:
+  name: scrape
+  author: ahasasjeb
+  label:
+    en_US: Scrape
+    zh_Hans: 抓取
+description:
+  human:
+    en_US: Extract data from a single URL.
+    zh_Hans: 从单个URL抓取数据。
+  llm: This tool is designed to scrape URL and output the content in Markdown format.
+parameters:
+  - name: url
+    type: string
+    required: true
+    label:
+      en_US: URL to scrape
+      zh_Hans: 要抓取的URL
+    human_description:
+      en_US: The URL of the website to scrape and extract data from.
+      zh_Hans: 要抓取并提取数据的网站URL。
+    llm_description: The URL of the website that needs to be crawled. This is a required parameter.
+    form: llm
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/search.py b/api/core/tools/provider/builtin/firecrawl/tools/search.py
@@ -0,0 +1,26 @@
+import json
+from typing import Any, Union
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SearchTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
+
+        crawl_result = app.search(
+            query=tool_parameters['keyword'],
+            wait=True
+        )
+
+        if isinstance(crawl_result, dict):
+            result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
+        else:
+            result_message = str(crawl_result)
+
+        if not crawl_result:
+            return self.create_text_message("Search request failed.")
+
+        return self.create_text_message(result_message)
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/search.yaml b/api/core/tools/provider/builtin/firecrawl/tools/search.yaml
@@ -0,0 +1,23 @@
+identity:
+  name: search
+  author: ahasasjeb
+  label:
+    en_US: Search
+    zh_Hans: 搜索
+description:
+  human:
+    en_US: Search, and output in Markdown format
+    zh_Hans: 搜索，并且以Markdown格式输出
+  llm: This tool can perform online searches and convert the results to Markdown format.
+parameters:
+  - name: keyword
+    type: string
+    required: true
+    label:
+      en_US: keyword
+      zh_Hans: 关键词
+    human_description:
+      en_US: Input keywords to use Firecrawl API for search.
+      zh_Hans: 输入关键词即可使用Firecrawl API进行搜索。
+    llm_description: Efficiently extract keywords from user text.
+    form: llm