Skip to content

Commit

Permalink
make stricter completion condition for extract (#151)
Browse files Browse the repository at this point in the history
* make stricter completion condition for extract

* small prompt update

* add github stars eval

* small eval change

* remove unnecessary vars

* fix nits
  • Loading branch information
anishk23733 authored Nov 1, 2024
1 parent b3ba2ef commit 3e37495
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 4 deletions.
59 changes: 59 additions & 0 deletions evals/index.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,63 @@ const homedepot = async () => {
}
};

const extract_github_stars = async () => {
const logger = new EvalLogger();

const stagehand = new Stagehand({
env,
verbose: 2,
headless: process.env.HEADLESS !== "false",
logger: (message: { category?: string; message: string }) => {
logger.log(message.message);
},
});

logger.init(stagehand);

const { debugUrl, sessionUrl } = await stagehand.init();

try {
await stagehand.page.goto("https://github.com/facebook/react");

const { stars } = await stagehand.extract({
instruction: "Extract the number of stars for the project",
schema: z.object({
stars: z.number().describe("the number of stars for the project"),
}),
modelName: "gpt-4o-2024-08-06",
});

const expectedStarsString = await stagehand.page
.locator("#repo-stars-counter-star")
.first()
.innerHTML();

const expectedStars = expectedStarsString.toLowerCase().endsWith('k')
? parseFloat(expectedStarsString.slice(0, -1)) * 1000
: parseFloat(expectedStarsString);

await stagehand.context.close().catch(() => {});
return {
_success: stars === expectedStars,
stars,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} catch (error) {
console.error("Error or timeout occurred:", error);
await stagehand.context.close().catch(() => {});
return {
_success: false,
error: JSON.parse(JSON.stringify(error, null, 2)),
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}
};

const extract_collaborators_from_github_repository = async () => {
const logger = new EvalLogger();

Expand Down Expand Up @@ -1145,6 +1202,7 @@ const tasks = {
peeler_complex,
wikipedia,
simple_google_search,
extract_github_stars,
extract_collaborators_from_github_repository,
extract_last_twenty_github_commits,
costar,
Expand Down Expand Up @@ -1194,6 +1252,7 @@ const testcases = [
},
{ input: { name: "peeler_complex" } },
{ input: { name: "simple_google_search" } },
{ input: { name: "extract_github_stars" } },
{
input: {
name: "extract_collaborators_from_github_repository",
Expand Down
10 changes: 6 additions & 4 deletions lib/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,10 @@ const metadataSystemPrompt = `You are an AI assistant tasked with evaluating the
Analyze the extraction response and determine if the task is completed or if more information is needed.
Strictly abide by the following criteria:
1. If you are certain that the instruction is completed, set the completion status to true, even if there are still chunks left.
2. If there could still be more information to extract and there are still chunks left, set the completion status to false.`;
1. Once the instruction has been satisfied by the current extraction response, ALWAYS set completion status to true and stop processing, regardless of remaining chunks.
2. Only set completion status to false if BOTH of these conditions are true:
- The instruction has not been satisfied yet
- There are still chunks left to process (chunksTotal > chunksSeen)`;

export function buildMetadataSystemPrompt() {
return {
Expand All @@ -272,8 +274,8 @@ export function buildMetadataPrompt(
role: "user",
content: `Instruction: ${instruction}
Extracted content: ${JSON.stringify(extractionResponse, null, 2)}
Chunks seen: ${chunksSeen}
Chunks total: ${chunksTotal}`,
chunksSeen: ${chunksSeen}
chunksTotal: ${chunksTotal}`,
};
}

Expand Down

0 comments on commit 3e37495

Please sign in to comment.