Skip to content

Commit

Permalink
Merge pull request #124 from Pythagora-io/fix/123-recover-from-invali…
Browse files Browse the repository at this point in the history
…d-json-response

recover from invalid json response
  • Loading branch information
LeonOstrez authored Oct 2, 2023
2 parents 6c571f0 + 70a7a61 commit 140330e
Show file tree
Hide file tree
Showing 19 changed files with 208 additions and 26 deletions.
1 change: 1 addition & 0 deletions pilot/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .database import database_exists, create_database, save_app
8 changes: 5 additions & 3 deletions pilot/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
File,
]


def get_created_apps():
return [model_to_dict(app) for app in App.select().where((App.name.is_null(False)) & (App.status.is_null(False)))]

Expand Down Expand Up @@ -264,7 +265,7 @@ def hash_and_save_step(Model, app_id, unique_data_fields, data_fields, message):
record = Model.get_by_id(inserted_id)
logger.debug(yellow(f"{message} with id {record.id}"))
except IntegrityError as e:
print(f"A record with data {unique_data_fields} already exists for {Model.__name__}.")
logger.warn(f"A record with data {unique_data_fields} already exists for {Model.__name__}.")
return None
return record

Expand All @@ -288,9 +289,10 @@ def save_development_step(project, prompt_path, prompt_data, messages, llm_respo

development_step = hash_and_save_step(DevelopmentSteps, project.args['app_id'], unique_data, data_fields,
"Saved Development Step")
project.checkpoints['last_development_step'] = development_step
if development_step is not None:
project.checkpoints['last_development_step'] = development_step

project.save_files_snapshot(development_step.id)
project.save_files_snapshot(development_step.id)

return development_step

Expand Down
10 changes: 8 additions & 2 deletions pilot/helpers/Debugger.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import platform
import uuid

from const.code_execution import MAX_COMMAND_DEBUG_TRIES, MAX_RECUSION_LAYER
Expand All @@ -6,7 +7,7 @@
from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError


class Debugger():
class Debugger:
def __init__(self, agent):
self.agent = agent
self.recursion_layer = 0
Expand Down Expand Up @@ -41,7 +42,12 @@ def debug(self, convo, command=None, user_input=None, issue_description=None, is
convo.load_branch(function_uuid)

debugging_plan = convo.send_message('dev_ops/debug.prompt',
{ 'command': command['command'] if command is not None else None, 'user_input': user_input, 'issue_description': issue_description },
{
'command': command['command'] if command is not None else None,
'user_input': user_input,
'issue_description': issue_description,
'os': platform.system()
},
DEBUG_STEPS_BREAKDOWN)

try:
Expand Down
2 changes: 2 additions & 0 deletions pilot/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .AgentConvo import AgentConvo
from .Project import Project
18 changes: 14 additions & 4 deletions pilot/helpers/agents/Developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,16 @@ def step_command_run(self, convo, step, i):
additional_message = 'Let\'s start with the step #0:\n\n' if i == 0 else f'So far, steps { ", ".join(f"#{j}" for j in range(i)) } are finished so let\'s do step #{i + 1} now.\n\n'
return run_command_until_success(data['command'], data['timeout'], convo, additional_message=additional_message)

def step_human_intervention(self, convo, step):
def step_human_intervention(self, convo, step: dict):
"""
:param convo:
:param step: {'human_intervention_description': 'some description'}
:return:
"""
while True:
human_intervention_description = step['human_intervention_description'] + yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`') if self.run_command is not None else step['human_intervention_description']
human_intervention_description = step['human_intervention_description'] + \
yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`') \
if self.run_command is not None else step['human_intervention_description']
response = self.project.ask_for_human_intervention('I need human intervention:',
human_intervention_description,
cbs={ 'r': lambda conv: run_command_until_success(self.run_command, None, conv, force=True, return_cli_response=True) },
Expand Down Expand Up @@ -260,8 +267,11 @@ def execute_task(self, convo, task_steps, test_command=None, reset_convo=True,
def continue_development(self, iteration_convo, last_branch_name, continue_description=''):
while True:
iteration_convo.load_branch(last_branch_name)
user_description = ('Here is a description of what should be working: \n\n' + blue_bold(continue_description) + '\n') if continue_description != '' else ''
user_description = 'Can you check if the app works please? ' + user_description + '\nIf you want to run the app, ' + yellow_bold('just type "r" and press ENTER and that will run `' + self.run_command + '`')
user_description = ('Here is a description of what should be working: \n\n' + blue_bold(continue_description) + '\n') \
if continue_description != '' else ''
user_description = 'Can you check if the app works please? ' + user_description + \
'\nIf you want to run the app, ' + \
yellow_bold('just type "r" and press ENTER and that will run `' + self.run_command + '`')
# continue_description = ''
response = self.project.ask_for_human_intervention(
user_description,
Expand Down
2 changes: 2 additions & 0 deletions pilot/helpers/agents/ProductOwner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def __init__(self, project):
super().__init__('product_owner', project)

def get_project_description(self):
# TODO: why save the project before user has even committed to a name & description?
# The UI saves a record as soon as the click "Create Project" button
self.project.app = save_app(self.project)
self.project.current_step = PROJECT_DESCRIPTION_STEP

Expand Down
5 changes: 4 additions & 1 deletion pilot/helpers/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@

from .Architect import Architect, ARCHITECTURE_STEP
from .CodeMonkey import CodeMonkey, IMPLEMENT_CHANGES, GET_FILES
from .Developer import Developer, ENVIRONMENT_SETUP_STEP
from .TechLead import TechLead
58 changes: 57 additions & 1 deletion pilot/helpers/agents/test_Developer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import builtins
import json
import os
import pytest
from unittest.mock import patch

import requests

from helpers.AgentConvo import AgentConvo
from dotenv import load_dotenv
load_dotenv()
Expand Down Expand Up @@ -122,4 +125,57 @@ def test_code_changes_manual_test_no(self, mock_get_saved_user_input, mock_chat_
result = self.developer.test_code_changes(monkey, convo)

# Then
assert result == {'success': True, 'user_input': 'continue'}
assert result == {'success': True, 'user_input': 'no'}

@patch('helpers.cli.execute_command', return_value=('stdout:\n```\n\n```', 'DONE'))
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
@patch('utils.llm_connection.requests.post')
@patch('utils.questionary.get_saved_user_input')
def test_test_code_changes_invalid_json(self, mock_get_saved_user_input,
mock_requests_post,
mock_save,
mock_get_saved_step,
mock_execute):
# Given
monkey = None
convo = AgentConvo(self.developer)
convo.save_branch = lambda branch_name=None: branch_name
convo.load_branch = lambda function_uuid=None: function_uuid
self.project.developer = self.developer

# we send a GET_TEST_TYPE spec, but the 1st response is invalid
types_in_response = ['command', 'command_test']
json_received = []

def generate_response(*args, **kwargs):
json_received.append(kwargs['json'])

gpt_response = json.dumps({
'type': types_in_response.pop(0),
'command': {
'command': 'node server.js',
'timeout': 3000
}
})
choice = json.dumps({'delta': {'content': gpt_response}})
line = json.dumps({'choices': [json.loads(choice)]}).encode('utf-8')

response = requests.Response()
response.status_code = 200
response.iter_lines = lambda: [line]
return response

mock_requests_post.side_effect = generate_response

mock_questionary = MockQuestionary([''])

with patch('utils.questionary.questionary', mock_questionary):
# When
result = self.developer.test_code_changes(monkey, convo)

# Then
assert result == {'success': True, 'cli_response': 'stdout:\n```\n\n```'}
assert mock_requests_post.call_count == 2
assert "The JSON is invalid at $.type - 'command' is not one of ['automated_test', 'command_test', 'manual_test', 'no_test']" in json_received[1]['messages'][3]['content']
assert mock_execute.call_count == 1
31 changes: 22 additions & 9 deletions pilot/helpers/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
import platform

from logger.logger import logger
from utils.style import yellow, green, red, yellow_bold, white_bold
from database.database import get_saved_command_run, save_command_run
from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
Expand All @@ -15,13 +16,15 @@

interrupted = False


def enqueue_output(out, q):
for line in iter(out.readline, ''):
if interrupted: # Check if the flag is set
break
q.put(line)
out.close()


def run_command(command, root_path, q_stdout, q_stderr, pid_container):
"""
Execute a command in a subprocess.
Expand All @@ -36,6 +39,7 @@ def run_command(command, root_path, q_stdout, q_stderr, pid_container):
Returns:
subprocess.Popen: The subprocess object.
"""
logger.info(f'Running `{command}`')
if platform.system() == 'Windows': # Check the operating system
process = subprocess.Popen(
command,
Expand Down Expand Up @@ -65,19 +69,19 @@ def run_command(command, root_path, q_stdout, q_stderr, pid_container):
t_stderr.start()
return process


def terminate_process(pid):
if platform.system() == "Windows":
try:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)])
except subprocess.CalledProcessError:
# Handle any potential errors here
pass
except subprocess.CalledProcessError as e:
logger.error(f'Error while terminating process: {e}')
else: # Unix-like systems
try:
os.killpg(pid, signal.SIGKILL)
except OSError:
# Handle any potential errors here
pass
except OSError as e:
logger.error(f'Error while terminating process: {e}')


def execute_command(project, command, timeout=None, force=False):
"""
Expand Down Expand Up @@ -112,13 +116,15 @@ def execute_command(project, command, timeout=None, force=False):
# TODO: I think AutoGPT allows other feedback here, like:
# "That's not going to work, let's do X instead"
# We don't explicitly make "no" or "skip" options to the user
# see https://github.com/Pythagora-io/gpt-pilot/issues/122
if answer == 'no':
return '', 'DONE'
elif answer == 'skip':
return '', 'DONE'


# TODO when a shell built-in commands (like cd or source) is executed, the output is not captured properly - this will need to be changed at some point
# TODO: Windows support
if "cd " in command or "source " in command:
command = "bash -c '" + command + "'"

Expand Down Expand Up @@ -157,6 +163,7 @@ def execute_command(project, command, timeout=None, force=False):
output_line = q.get_nowait()
if output_line not in output:
print(green('CLI OUTPUT:') + output_line, end='')
logger.info('CLI OUTPUT: ' + output_line)
output += output_line
break

Expand All @@ -174,6 +181,7 @@ def execute_command(project, command, timeout=None, force=False):
if line:
output += line
print(green('CLI OUTPUT:') + line, end='')
logger.info('CLI OUTPUT: ' + line)

# Read stderr
try:
Expand All @@ -184,13 +192,16 @@ def execute_command(project, command, timeout=None, force=False):
if stderr_line:
stderr_output += stderr_line
print(red('CLI ERROR:') + stderr_line, end='') # Print with different color for distinction
logger.error('CLI ERROR: ' + stderr_line)

except (KeyboardInterrupt, TimeoutError) as e:
interrupted = True
if isinstance(e, KeyboardInterrupt):
print("\nCTRL+C detected. Stopping command execution...")
print('\nCTRL+C detected. Stopping command execution...')
logger.info('CTRL+C detected. Stopping command execution...')
else:
print("\nTimeout detected. Stopping command execution...")
print('\nTimeout detected. Stopping command execution...')
logger.warn('Timeout detected. Stopping command execution...')

terminate_process(pid_container[0])

Expand Down Expand Up @@ -267,7 +278,9 @@ def execute_command_and_check_cli_response(command, timeout, convo):
{ 'cli_response': cli_response, 'command': command })
return cli_response, llm_response

def run_command_until_success(command, timeout, convo, additional_message=None, force=False, return_cli_response=False, is_root_task=False):

def run_command_until_success(command, timeout, convo, additional_message=None, force=False,
return_cli_response=False, is_root_task=False):
"""
Run a command until it succeeds or reaches a timeout.
Expand Down
4 changes: 4 additions & 0 deletions pilot/logger/logger.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
import logging


Expand Down Expand Up @@ -31,6 +32,7 @@ def setup_logger():


def filter_sensitive_fields(record):
# TODO: also remove escape sequences for colors, bold etc
if isinstance(record.args, dict): # check if args is a dictionary
args = record.args.copy()
for field in sensitive_fields:
Expand All @@ -44,6 +46,8 @@ def filter_sensitive_fields(record):
args_list = ['*****' if arg in sensitive_fields else arg for arg in args_list]
record.args = tuple(args_list)

# Remove ANSI escape sequences - colours & bold
record.msg = re.sub(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])', '', record.msg)
return record.levelno <= logging.INFO


Expand Down
8 changes: 7 additions & 1 deletion pilot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,20 @@ def local_print(*args, **kwargs):
else:
return local_print, ipc_client_instance


if __name__ == "__main__":
try:
# sys.argv.append('--ux-test=' + 'run_command_until_success')
args = init()

builtins.print, ipc_client_instance = get_custom_print(args)
if '--api-key' in args:
os.environ["OPENAI_API_KEY"] = args['--api-key']
os.environ["OPENAI_API_KEY"] = args['--api-key']
if '--get-created-apps-with-steps' in args:
print({ 'db_data': get_created_apps_with_steps() }, type='info')
elif '--ux-test' in args:
from test.ux_tests import run_test
run_test(args['--ux-test'])
else:
# TODO get checkpoint from database and fill the project with it
project = Project(args, ipc_client_instance=ipc_client_instance)
Expand Down
2 changes: 1 addition & 1 deletion pilot/prompts/dev_ops/debug.prompt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ You wanted me to check this - `{{ issue_description }}` but there was a problem{
```
{% endif %}I want you to debug this issue by yourself and I will give you 2 functions that you can use - `run_command` and `implement_code_changes`.

`run_command` function will run a command on the machine and will return the CLI output to you so you can see what to do next.
`run_command` function will run a command on the machine and will return the CLI output to you so you can see what to do next. Note that the command will run on a {{ os }} machine.

`implement_code_changes` function will change the code where you just need to thoroughly describe what needs to be implemented, I will implement the requested changes and let you know.

Expand Down
6 changes: 6 additions & 0 deletions pilot/prompts/utils/invalid_json.prompt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[INST]I received an invalid JSON response. The response was a parseable JSON object, but it is not valid against the schema I provided. The JSON is invalid {{ invalid_reason }}

Please try again with a valid JSON object, referring to the previous JSON schema I provided above.

A response which starts with "I'm sorry for the confusion" would be an example of an invalid response, a preamble must NOT be included.
[/INST]
1 change: 1 addition & 0 deletions pilot/test/ux_tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The functions in this directory are used to test specific scenarios of the user experience.
10 changes: 10 additions & 0 deletions pilot/test/ux_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .run_command_until_success import run_command_until_success


def run_test(test_name: str):
print(f'Running UX test "{test_name}"...')

if test_name == 'run_command_until_success':
return run_command_until_success()

print(f'UX test "{test_name}" not found')
Loading

0 comments on commit 140330e

Please sign in to comment.