bottom&top of a page tools

h2oai · Nov 5, 2024 · ce23343 · ce23343
1 parent 2cea515
commit ce23343
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 2 deletions.
diff --git a/openai_server/agent_tools/web_agent_tool.py b/openai_server/agent_tools/web_agent_tool.py
@@ -175,6 +175,16 @@ def page_down(self) -> str:
         header, content = self.browser_state()
         return header.strip() + "\n=======================\n" + content
 
+    def top_of_page(self) -> str:
+        self.browser.top_of_page()
+        header, content = self.browser_state()
+        return header.strip() + "\n=======================\n" + content
+
+    def bottom_of_page(self) -> str:
+        self.browser.bottom_of_page()
+        header, content = self.browser_state()
+        return header.strip() + "\n=======================\n" + content
+
     def download_file(self, url: str) -> str:
         self.browser.visit_page(url)
         header, content = self.browser_state()
@@ -258,6 +268,10 @@ def ask(self, raw_question: str, attachment_file_path: str = None) -> str:
                 tool_result = self.page_up()
             elif tool == "page_down":
                 tool_result = self.page_down()
+            elif tool == "top_of_page":
+                tool_result = self.top_of_page()
+            elif tool == "bottom_of_page":
+                tool_result = self.bottom_of_page()
             elif tool == "download_file":
                 tool_result = self.download_file(**args)
             elif tool == "find_on_page_ctrl_f":

diff --git a/openai_server/browser/prompts/choose_tool.txt b/openai_server/browser/prompts/choose_tool.txt
@@ -31,6 +31,10 @@ When the page is very long, content truncation may occur due to the limited disp
     Scroll the viewport UP one page-length in the current webpage and return the new viewport content.
 - page_down() -> str:
     Scroll the viewport DOWN one page-length in the current webpage and return the new viewport content.
+- bottom_of_page() -> str:
+    Scroll the viewport to the BOTTOM of the page. This can be useful for long pages when you want to quickly get to the bottom of the current page.
+- top_of_page() -> str:
+    Scroll the viewport to the TOP of the page. This can be useful for long pages when you want to quickly get to the top of the current page.
 - download_file(url: str) -> str:
     Download a file at a given URL and, if possible, return its text. File types that will returned as text: .pdf, .docx, .xlsx, .pptx, .wav, .mp3, .jpg, .jpeg, .png(You can read the text content of the file with these extensions).
 - find_on_page_ctrl_f(search_string: str) -> str:

diff --git a/openai_server/browser/prompts/summarize_step.txt b/openai_server/browser/prompts/summarize_step.txt
@@ -5,7 +5,6 @@ Your ultimate goal is to find the answer to the question below.
 
 # Tools
 
-## Browser
 The functions of the browser will share the same session, that means the viewport will persist between calls
 Every function will return the text of the current viewport after the action is performed. For long pages(longer than 1 viewport), you can use the page_up() and page_down() functions to scroll the viewport.
 Since the page has been converted from HTML to Markdown, you cannot submit information using a form, nor can you enter information in any text boxes. If you want to use the form inside the page, try using the computer_terminal below to read the html content.
@@ -20,6 +19,10 @@ When the page is very long, content truncation may occur due to the limited disp
     Scroll the viewport UP one page-length in the current webpage and return the new viewport content.
 - page_down() -> str:
     Scroll the viewport DOWN one page-length in the current webpage and return the new viewport content.
+- bottom_of_page() -> str:
+    Scroll the viewport to the BOTTOM of the page. This can be useful for long pages when you want to quickly get to the bottom of the current page.
+- top_of_page() -> str:
+    Scroll the viewport to the TOP of the page. This can be useful for long pages when you want to quickly get to the top of the current page.
 - download_file(url: str) -> str:
     Download a file at a given URL and, if possible, return its text. File types that will returned as text: .pdf, .docx, .xlsx, .pptx, .wav, .mp3, .jpg, .jpeg, .png(You can read the text content of the file with these extensions).
 - find_on_page_ctrl_f(search_string: str) -> str:
@@ -66,4 +69,10 @@ Explanation:
   xxxx 
 Plan:
   xxxx 
-```
+```
+
+# Plan Tips:
+- Some web pages may have contents like the following [TEXT](https://some.url.com). This means that, you can click or see the content of 'TEXT' by visiting the mentioned URL next to that, which is https://some.url.com.
+
+Important: User is never able to see the current viewport, the user will always see your expected response. That's why, you always have to provide expected answers under 'Facts' section of your response.
+Don't assume that the user is able to see the current viewport as you do.
diff --git a/openai_server/browser/utils.py b/openai_server/browser/utils.py
@@ -100,6 +100,14 @@ def page_down(self) -> None:
 
     def page_up(self) -> None:
         self.viewport_current_page = max(self.viewport_current_page - 1, 0)
+
+    def bottom_of_page(self) -> None:
+        "Scroll the viewport to the bottom of the page. This can be useful for long pages when you want to quickly get to the bottom.  For example, sections like References, External Links, or See Also can be at the bottom of a page."
+        self.viewport_current_page = len(self.viewport_pages) - 1
+
+    def top_of_page(self) -> None:
+        "Scroll the viewport to the top of the page. This can be useful for long pages when you want to quickly get to the top. For example, the Table of Contents, Search Box, or Introduction can be at the top of a page."
+        self.viewport_current_page = 0
 
     def find_on_page(self, query: str) -> Union[str, None]:
         """Searches for the query from the current viewport forward, looping back to the start if necessary."""