Skip to content

Commit

Permalink
add caching to actions (by sub-action) + add variables to act (#171)
Browse files Browse the repository at this point in the history
* add caching on the llm provider level

* add sub action caching

* action cache hit

* remove pnpm lock

* revert readme

* beep boop

* add caching on the llm provider level

* remove all of playground

* default back to env for browserbase args

* leftover

* leftover

* fix eval

* fix the bug in prev commit

* update how eval looks

* move act to handlers folder

* rename

* add new file

* update vision for less error (increase timeout + change error to warning)

* clean up the bundle script npm script
  • Loading branch information
navidkpr authored Nov 11, 2024
1 parent 703636b commit d7d4f59
Show file tree
Hide file tree
Showing 21 changed files with 2,156 additions and 1,145 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ evals/**/public
lib/dom/bundle.js
evals/public
*.tgz
evals/playground.ts
5 changes: 4 additions & 1 deletion evals/index.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,10 @@ const peeler_complex = async () => {
await stagehand.page.goto(`https://chefstoys.com/`, { timeout: 60000 });

await stagehand.act({
action: "search for peelers",
action: "search for %search_query%",
variables: {
search_query: "peeler",
},
});

await stagehand.act({
Expand Down
126 changes: 0 additions & 126 deletions evals/playground.ts
Original file line number Diff line number Diff line change
@@ -1,126 +0,0 @@
import { Stagehand } from "../lib";
import { z } from "zod";
import { EvalLogger } from "./utils";

// eval failing
const homedepot = async () => {
const stagehand = new Stagehand({
env: "LOCAL",
verbose: 1,
debugDom: true,
headless: process.env.HEADLESS !== "false",
});

await stagehand.init();

try {
await stagehand.page.goto("https://www.homedepot.com/");

await stagehand.act({ action: "search for gas grills" });

await stagehand.act({ action: "click on the first gas grill" });

await stagehand.act({ action: "click on the Product Details" });

await stagehand.act({ action: "find the Primary Burner BTU" });

const productSpecs = await stagehand.extract({
instruction: "Extract the Primary Burner BTU of the product",
schema: z.object({
productSpecs: z
.array(
z.object({
burnerBTU: z.string().describe("Primary Burner BTU"),
}),
)
.describe("Gas grill Primary Burner BTU"),
}),
modelName: "gpt-4o-2024-08-06",
});
console.log("The gas grill primary burner BTU is:", productSpecs);

if (
!productSpecs ||
!productSpecs.productSpecs ||
productSpecs.productSpecs.length === 0
) {
return false;
}

return true;
} catch (error) {
console.error(`Error in homedepot function: ${error.message}`);
return false;
} finally {
await stagehand.context.close();
}
};

const vanta = async () => {
const logger = new EvalLogger();

const stagehand = new Stagehand({
env: "LOCAL",
headless: process.env.HEADLESS !== "false",
logger: (message: any) => {
logger.log(message);
},
verbose: 2,
});

logger.init(stagehand);

const { debugUrl, sessionUrl } = await stagehand.init();

await stagehand.page.goto("https://www.vanta.com/");

const observations = await stagehand.observe({
instruction: "find the text for the request demo button",
});

console.log("Observations:", observations);

if (observations.length === 0) {
await stagehand.context.close();
return {
_success: false,
observations,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}

const observationResult = await stagehand.page
.locator(observations[0].selector)
.first()
.innerHTML();

const expectedLocator = `body > div.page-wrapper > div.nav_component > div.nav_element.w-nav > div.padding-global > div > div > nav > div.nav_cta-wrapper.is-new > a.nav_cta-button-desktop.is-smaller.w-button`;

const expectedResult = await stagehand.page
.locator(expectedLocator)
.first()
.innerHTML();

await stagehand.context.close();

return {
_success: observationResult == expectedResult,
expected: expectedResult,
actual: observationResult,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
};

async function main() {
// const homedepotResult = await homedepot();
const vantaResult = await vanta();

// console.log("Result:", homedepotResult);
console.log("Result:", vantaResult);
}

main().catch(console.error);
1 change: 1 addition & 0 deletions examples/example.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ async function example() {
});
console.log(`Our favorite contributor is ${contributor.username}`);
}

(async () => {
await example();
})();
137 changes: 137 additions & 0 deletions lib/cache/ActionCache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import { BaseCache, CacheEntry } from "./BaseCache";

export interface PlaywrightCommand {
method: string;
args: string[];
}

export interface ActionEntry extends CacheEntry {
data: {
playwrightCommand: PlaywrightCommand;
componentString: string;
xpaths: string[];
newStepString: string;
completed: boolean;
previousSelectors: string[];
action: string;
};
}

/**
* ActionCache handles logging and retrieving actions along with their Playwright commands.
*/
export class ActionCache extends BaseCache<ActionEntry> {
constructor(
logger: (message: {
category?: string;
message: string;
level?: number;
}) => void,
cacheDir?: string,
cacheFile?: string,
) {
super(logger, cacheDir, cacheFile || "action_cache.json");
}

public async addActionStep({
url,
action,
previousSelectors,
playwrightCommand,
componentString,
xpaths,
newStepString,
completed,
requestId,
}: {
url: string;
action: string;
previousSelectors: string[];
playwrightCommand: PlaywrightCommand;
componentString: string;
requestId: string;
xpaths: string[];
newStepString: string;
completed: boolean;
}): Promise<void> {
this.logger({
category: "action_cache",
message: `Adding action step to cache: ${action}, requestId: ${requestId}, url: ${url}, previousSelectors: ${previousSelectors}`,
level: 1,
});

await this.set(
{ url, action, previousSelectors },
{
playwrightCommand,
componentString,
xpaths,
newStepString,
completed,
previousSelectors,
action,
},
requestId,
);
}

/**
* Retrieves all actions for a specific trajectory.
* @param trajectoryId - Unique identifier for the trajectory.
* @param requestId - The identifier for the current request.
* @returns An array of TrajectoryEntry objects or null if not found.
*/
public async getActionStep({
url,
action,
previousSelectors,
requestId,
}: {
url: string;
action: string;
previousSelectors: string[];
requestId: string;
}): Promise<ActionEntry["data"] | null> {
const data = await super.get({ url, action, previousSelectors }, requestId);
if (!data) {
return null;
}

return data;
}

public async removeActionStep(cacheHashObj: {
url: string;
action: string;
previousSelectors: string[];
requestId: string;
}): Promise<void> {
await super.delete(cacheHashObj);
}

/**
* Clears all actions for a specific trajectory.
* @param trajectoryId - Unique identifier for the trajectory.
* @param requestId - The identifier for the current request.
*/
public async clearAction(requestId: string): Promise<void> {
await super.deleteCacheForRequestId(requestId);
this.logger({
category: "action_cache",
message: `Cleared action for ID: ${requestId}`,
level: 1,
});
}

/**
* Resets the entire action cache.
*/
public async resetCache(): Promise<void> {
await super.resetCache();
this.logger({
category: "action_cache",
message: "Action cache has been reset.",
level: 1,
});
}
}
Loading

0 comments on commit d7d4f59

Please sign in to comment.