diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py index 85933378f3..daae9addaf 100644 --- a/gpt_engineer/applications/cli/cli_agent.py +++ b/gpt_engineer/applications/cli/cli_agent.py @@ -109,6 +109,7 @@ def with_default_config( improve_fn: ImproveType = improve_fn, process_code_fn: CodeProcessor = execute_entrypoint, preprompts_holder: PrepromptsHolder = None, + diff_timeout=3, ): """ Creates a new instance of CliAgent with default configurations for memory, execution environment, @@ -186,6 +187,7 @@ def improve( files_dict: FilesDict, prompt: Prompt, execution_command: Optional[str] = None, + diff_timeout=3, ) -> FilesDict: """ Improves an existing piece of code using the AI and step bundle based on the provided prompt. @@ -206,7 +208,12 @@ def improve( """ files_dict = self.improve_fn( - self.ai, prompt, files_dict, self.memory, self.preprompts_holder + self.ai, + prompt, + files_dict, + self.memory, + self.preprompts_holder, + diff_timeout=diff_timeout, ) # entrypoint = gen_entrypoint( # self.ai, prompt, files_dict, self.memory, self.preprompts_holder diff --git a/gpt_engineer/applications/cli/file_selector.py b/gpt_engineer/applications/cli/file_selector.py index a80608620c..e64764059e 100644 --- a/gpt_engineer/applications/cli/file_selector.py +++ b/gpt_engineer/applications/cli/file_selector.py @@ -76,7 +76,7 @@ def __init__(self, project_path: Union[str, Path]): self.metadata_db = DiskMemory(metadata_path(self.project_path)) self.toml_path = self.metadata_db.path / self.FILE_LIST_NAME - def ask_for_files(self) -> tuple[FilesDict, bool]: + def ask_for_files(self, skip_file_selection=False) -> tuple[FilesDict, bool]: """ Prompts the user to select files for context improvement. @@ -89,8 +89,9 @@ def ask_for_files(self) -> tuple[FilesDict, bool]: A dictionary with file paths as keys and file contents as values. """ - if os.getenv("GPTE_TEST_MODE"): + if os.getenv("GPTE_TEST_MODE") or skip_file_selection: # In test mode, retrieve files from a predefined TOML configuration + # also get from toml if skip_file_selector is active assert self.FILE_LIST_NAME in self.metadata_db selected_files = self.get_files_from_toml(self.project_path, self.toml_path) else: @@ -412,7 +413,7 @@ def get_current_files(self, project_path: Union[str, Path]) -> List[str]: if is_git_repo(project_path) and "projects" not in project_path.parts: all_files = filter_by_gitignore(project_path, all_files) - return all_files + return sorted(all_files, key=lambda x: Path(x).as_posix()) class DisplayablePath(object): diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py index 51b88b10c3..5a0c4135b7 100644 --- a/gpt_engineer/applications/cli/main.py +++ b/gpt_engineer/applications/cli/main.py @@ -357,6 +357,12 @@ def main( "--use_cache", help="Speeds up computations and saves tokens when running the same prompt multiple times by caching the LLM response.", ), + skip_file_selection: bool = typer.Option( + False, + "--skip-file-selection", + "-s", + help="Skip interactive file selection in improve mode and use the generated TOML file directly.", + ), no_execution: bool = typer.Option( False, "--no_execution", @@ -367,6 +373,11 @@ def main( "--sysinfo", help="Output system information for debugging", ), + diff_timeout: int = typer.Option( + 3, + "--diff_timeout", + help="Diff regexp timeout. Default: 3. Increase if regexp search timeouts.", + ), ): """ The main entry point for the CLI tool that generates or improves a project. @@ -405,6 +416,8 @@ def main( Speeds up computations and saves tokens when running the same prompt multiple times by caching the LLM response. verbose : bool Flag indicating whether to enable verbose logging. + skip_file_selection: bool + Skip interactive file selection in improve mode and use the generated TOML file directly no_execution: bool Run setup but to not call LLM or write any code. For testing purposes. sysinfo: bool @@ -501,13 +514,17 @@ def main( files = FileStore(project_path) if not no_execution: if improve_mode: - files_dict_before, is_linting = FileSelector(project_path).ask_for_files() + files_dict_before, is_linting = FileSelector(project_path).ask_for_files( + skip_file_selection=skip_file_selection + ) # lint the code if is_linting: files_dict_before = files.linting(files_dict_before) - files_dict = handle_improve_mode(prompt, agent, memory, files_dict_before) + files_dict = handle_improve_mode( + prompt, agent, memory, files_dict_before, diff_timeout=diff_timeout + ) if not files_dict or files_dict_before == files_dict: print( f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path}/logs folder in a github issue?" diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py index e9a5bc7443..35eec6100e 100644 --- a/gpt_engineer/core/chat_to_files.py +++ b/gpt_engineer/core/chat_to_files.py @@ -120,7 +120,7 @@ def apply_diffs(diffs: Dict[str, Diff], files: FilesDict) -> FilesDict: return files -def parse_diffs(diff_string: str) -> dict: +def parse_diffs(diff_string: str, diff_timeout=3) -> dict: """ Parses a diff string in the unified git diff format. @@ -138,7 +138,7 @@ def parse_diffs(diff_string: str) -> dict: diffs = {} try: - for block in diff_block_pattern.finditer(diff_string, timeout=1): + for block in diff_block_pattern.finditer(diff_string, timeout=diff_timeout): diff_block = block.group() # Parse individual diff blocks and update the diffs dictionary diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py index d778948b65..6b46263424 100644 --- a/gpt_engineer/core/default/steps.py +++ b/gpt_engineer/core/default/steps.py @@ -274,6 +274,7 @@ def improve_fn( files_dict: FilesDict, memory: BaseMemory, preprompts_holder: PrepromptsHolder, + diff_timeout=3, ) -> FilesDict: """ Improves the code based on user input and returns the updated files. @@ -308,14 +309,16 @@ def improve_fn( DEBUG_LOG_FILE, "UPLOADED FILES:\n" + files_dict.to_log() + "\nPROMPT:\n" + prompt.text, ) - return _improve_loop(ai, files_dict, memory, messages) + return _improve_loop(ai, files_dict, memory, messages, diff_timeout=diff_timeout) def _improve_loop( - ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List + ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List, diff_timeout=3 ) -> FilesDict: messages = ai.next(messages, step_name=curr_fn()) - files_dict, errors = salvage_correct_hunks(messages, files_dict, memory) + files_dict, errors = salvage_correct_hunks( + messages, files_dict, memory, diff_timeout=diff_timeout + ) retries = 0 while errors and retries < MAX_EDIT_REFINEMENT_STEPS: @@ -327,21 +330,21 @@ def _improve_loop( ) ) messages = ai.next(messages, step_name=curr_fn()) - files_dict, errors = salvage_correct_hunks(messages, files_dict, memory) + files_dict, errors = salvage_correct_hunks( + messages, files_dict, memory, diff_timeout + ) retries += 1 return files_dict def salvage_correct_hunks( - messages: List, - files_dict: FilesDict, - memory: BaseMemory, + messages: List, files_dict: FilesDict, memory: BaseMemory, diff_timeout=3 ) -> tuple[FilesDict, List[str]]: error_messages = [] ai_response = messages[-1].content.strip() - diffs = parse_diffs(ai_response) + diffs = parse_diffs(ai_response, diff_timeout=diff_timeout) # validate and correct diffs for _, diff in diffs.items(): @@ -370,13 +373,13 @@ def flush(self): file.flush() -def handle_improve_mode(prompt, agent, memory, files_dict): +def handle_improve_mode(prompt, agent, memory, files_dict, diff_timeout=3): captured_output = io.StringIO() old_stdout = sys.stdout sys.stdout = Tee(sys.stdout, captured_output) try: - files_dict = agent.improve(files_dict, prompt) + files_dict = agent.improve(files_dict, prompt, diff_timeout=diff_timeout) except Exception as e: print( f"Error while improving the project: {e}\nCould you please upload the debug_log_file.txt in {memory.path}/logs folder to github?\nFULL STACK TRACE:\n" diff --git a/gpt_engineer/tools/custom_steps.py b/gpt_engineer/tools/custom_steps.py index 827fbb6850..8e4f7cb930 100644 --- a/gpt_engineer/tools/custom_steps.py +++ b/gpt_engineer/tools/custom_steps.py @@ -44,6 +44,7 @@ def self_heal( prompt: Prompt = None, preprompts_holder: PrepromptsHolder = None, memory: BaseMemory = None, + diff_timeout=3, ) -> FilesDict: """ Attempts to execute the code from the entrypoint and if it fails, sends the error output back to the AI with instructions to fix. @@ -111,7 +112,7 @@ def self_heal( f"A program with this specification was requested:\n{prompt}\n, but running it produced the following output:\n{stdout_full}\n and the following errors:\n{stderr_full}. Please change it so that it fulfills the requirements." ) files_dict = improve_fn( - ai, new_prompt, files_dict, memory, preprompts_holder + ai, new_prompt, files_dict, memory, preprompts_holder, diff_timeout ) else: break diff --git a/gpt_engineer/tools/supported_languages.py b/gpt_engineer/tools/supported_languages.py index f17536c852..7644540ca2 100644 --- a/gpt_engineer/tools/supported_languages.py +++ b/gpt_engineer/tools/supported_languages.py @@ -46,7 +46,9 @@ "extensions": [".cpp", ".cc", ".cxx", ".h", ".hpp", ".hxx"], "tree_sitter_name": "cpp", }, - {"name": "C", "extensions": [".c", ".h"], "tree_sitter_name": "c"} + {"name": "C", "extensions": [".c", ".h"], "tree_sitter_name": "c"}, + {"name": "Markdown", "extensions": [".md"], "tree_sitter_name": "md"}, + {"name": "Arduino C", "extensions": [".ino"], "tree_sitter_name": "ino"} # ---- the following are not supported by the current code chunker implementation ---- # { # "name": "Swift", diff --git a/tests/applications/cli/test_main.py b/tests/applications/cli/test_main.py index 1c33e7f3c9..161a84052d 100644 --- a/tests/applications/cli/test_main.py +++ b/tests/applications/cli/test_main.py @@ -92,6 +92,36 @@ def test_improve_existing_project(self, tmp_path, monkeypatch): ) args() + # Runs gpt-engineer with improve mode and improves an existing project in the specified path, with skip_file_selection + def test_improve_existing_project_skip_file_selection(self, tmp_path, monkeypatch): + p = tmp_path / "projects/example" + p.mkdir(parents=True) + (p / "prompt").write_text(prompt_text) + args = DefaultArgumentsMain( + str(p), + improve_mode=True, + llm_via_clipboard=True, + no_execution=True, + skip_file_selection=True, + ) + args() + assert args.skip_file_selection, "Skip_file_selection not set" + + # Runs gpt-engineer with improve mode and improves an existing project in the specified path, with skip_file_selection + def test_improve_existing_project_diff_timeout(self, tmp_path, monkeypatch): + p = tmp_path / "projects/example" + p.mkdir(parents=True) + (p / "prompt").write_text(prompt_text) + args = DefaultArgumentsMain( + str(p), + improve_mode=True, + llm_via_clipboard=True, + no_execution=True, + diff_timeout=99, + ) + args() + assert args.diff_timeout == 99, "Diff timeout not set" + # def improve_generator(): # yield "y" # while True: diff --git a/tests/core/test_file_selector_enhancements.py b/tests/core/test_file_selector_enhancements.py new file mode 100644 index 0000000000..a5de0a6f38 --- /dev/null +++ b/tests/core/test_file_selector_enhancements.py @@ -0,0 +1,59 @@ +import os + +from pathlib import Path +from typing import List, Union + +from gpt_engineer.applications.cli.file_selector import FileSelector + +editorcalled = False + + +def set_editor_called( + self, input_path: Union[str, Path], init: bool = True +) -> List[str]: + global editorcalled + editorcalled = True + return [] + + +def set_file_selector_tmpproject(tmp_path): + project_path = tmp_path / "project/" + os.mkdir(project_path) + os.mkdir(project_path / "x") + os.mkdir(project_path / "a") + + gpteng_path = project_path / ".gpteng" + os.mkdir(gpteng_path) + + with open(gpteng_path / "file_selection.toml", "w") as file: + file.write("[files]\n") + file.write(' "x/xxtest.py" = "selected"\n') + file.write(' "a/aatest.py" = "selected"\n') + + with open(project_path / "x/xxtest.py", "w") as file: + file.write('print("Hello")') + + with open(project_path / "a/aatest.py", "w") as file: + file.write('print("Hello")') + + return project_path + + +def test_file_selector_enhancement_skip_file_selector(tmp_path): + project_path = set_file_selector_tmpproject(tmp_path) + fileSelector = FileSelector(project_path=project_path) + fileSelector.editor_file_selector = set_editor_called + fileSelector.ask_for_files(skip_file_selection=True) + + assert editorcalled is False, "FileSelector.skip_file_selector is not working" + + +def test_file_selector_enhancement_sort(tmp_path): + project_path = set_file_selector_tmpproject(tmp_path) + fileSelector = FileSelector(project_path=project_path) + + sortedFiles = fileSelector.get_current_files(project_path) + assert sortedFiles == [ + "a/aatest.py", + "x/xxtest.py", + ], "FileSelector.get_current_files is unsorted!"