Skip to content

Commit

Permalink
🧪 more evals (#306)
Browse files Browse the repository at this point in the history
  • Loading branch information
seanmcguire12 authored Dec 15, 2024
1 parent d2b591d commit fecd42e
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 0 deletions.
75 changes: 75 additions & 0 deletions evals/combination/wichita.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { initStagehand } from "../utils";
import { EvalFunction } from "../../types/evals";
import { z } from "zod";

export const wichita: EvalFunction = async ({
modelName,
logger,
useTextExtract,
}) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://www.wichitafallstx.gov/Bids.aspx");

await stagehand.act({
action: 'Click on "Show Closed/Awarded/Cancelled bids"',
});

const result = await stagehand.extract({
instruction: "Extract the total number of bids that the search produced.",
schema: z.object({
total_results: z.string(),
}),
modelName,
useTextExtract,
});

await stagehand.close();

const { total_results } = result;

const expectedNumber = 405;
const extractedNumber = parseInt(total_results.replace(/[^\d]/g, ""), 10);

const isWithinRange =
extractedNumber >= expectedNumber - 10 &&
extractedNumber <= expectedNumber + 10;

if (!isWithinRange) {
logger.error({
message: "Total number of results is not within the expected range",
level: 0,
auxiliary: {
expected: {
value: `${expectedNumber} ± 10`,
type: "string",
},
actual: {
value: extractedNumber.toString(),
type: "integer",
},
},
});
return {
_success: false,
error: "Total number of results is not within the expected range",
extractedNumber,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

return {
_success: true,
extractedNumber,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
65 changes: 65 additions & 0 deletions evals/observe/ionwave_observe.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { initStagehand } from "../utils";
import { EvalFunction } from "../../types/evals";

export const ionwave_observe: EvalFunction = async ({ modelName, logger }) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");

const observations = await stagehand.observe();

if (observations.length === 0) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const expectedLocator = `div.rowLinks:nth-child(27) > div:nth-child(1) > a:nth-child(1)`;

const expectedResult = await stagehand.page
.locator(expectedLocator)
.first()
.innerText();

let foundMatch = false;
for (const observation of observations) {
try {
const observationResult = await stagehand.page
.locator(observation.selector)
.first()
.innerText();

if (observationResult === expectedResult) {
foundMatch = true;
break;
}
} catch (error) {
console.warn(
`Failed to check observation with selector ${observation.selector}:`,
error.message,
);
continue;
}
}

await stagehand.close();

return {
_success: foundMatch,
expected: expectedResult,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};
65 changes: 65 additions & 0 deletions evals/observe/panamcs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { initStagehand } from "../utils";
import { EvalFunction } from "../../types/evals";

export const panamcs: EvalFunction = async ({ modelName, logger }) => {
const { stagehand, initResponse } = await initStagehand({
modelName,
logger,
});

const { debugUrl, sessionUrl } = initResponse;

await stagehand.page.goto("https://panamcs.org/about/staff/");

const observations = await stagehand.observe();

if (observations.length === 0) {
await stagehand.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const expectedLocator = `a.btn:nth-child(3)`;

const expectedResult = await stagehand.page
.locator(expectedLocator)
.first()
.innerText();

let foundMatch = false;
for (const observation of observations) {
try {
const observationResult = await stagehand.page
.locator(observation.selector)
.first()
.innerText();

if (observationResult === expectedResult) {
foundMatch = true;
break;
}
} catch (error) {
console.warn(
`Failed to check observation with selector ${observation.selector}:`,
error.message,
);
continue;
}
}

await stagehand.close();

return {
_success: foundMatch,
expected: expectedResult,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};

0 comments on commit fecd42e

Please sign in to comment.