-
Notifications
You must be signed in to change notification settings - Fork 133
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add caching to actions (by sub-action) + add variables to act (#171)
* add caching on the llm provider level * add sub action caching * action cache hit * remove pnpm lock * revert readme * beep boop * add caching on the llm provider level * remove all of playground * default back to env for browserbase args * leftover * leftover * fix eval * fix the bug in prev commit * update how eval looks * move act to handlers folder * rename * add new file * update vision for less error (increase timeout + change error to warning) * clean up the bundle script npm script
- Loading branch information
Showing
21 changed files
with
2,156 additions
and
1,145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ evals/**/public | |
lib/dom/bundle.js | ||
evals/public | ||
*.tgz | ||
evals/playground.ts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,126 +0,0 @@ | ||
import { Stagehand } from "../lib"; | ||
import { z } from "zod"; | ||
import { EvalLogger } from "./utils"; | ||
|
||
// eval failing | ||
const homedepot = async () => { | ||
const stagehand = new Stagehand({ | ||
env: "LOCAL", | ||
verbose: 1, | ||
debugDom: true, | ||
headless: process.env.HEADLESS !== "false", | ||
}); | ||
|
||
await stagehand.init(); | ||
|
||
try { | ||
await stagehand.page.goto("https://www.homedepot.com/"); | ||
|
||
await stagehand.act({ action: "search for gas grills" }); | ||
|
||
await stagehand.act({ action: "click on the first gas grill" }); | ||
|
||
await stagehand.act({ action: "click on the Product Details" }); | ||
|
||
await stagehand.act({ action: "find the Primary Burner BTU" }); | ||
|
||
const productSpecs = await stagehand.extract({ | ||
instruction: "Extract the Primary Burner BTU of the product", | ||
schema: z.object({ | ||
productSpecs: z | ||
.array( | ||
z.object({ | ||
burnerBTU: z.string().describe("Primary Burner BTU"), | ||
}), | ||
) | ||
.describe("Gas grill Primary Burner BTU"), | ||
}), | ||
modelName: "gpt-4o-2024-08-06", | ||
}); | ||
console.log("The gas grill primary burner BTU is:", productSpecs); | ||
|
||
if ( | ||
!productSpecs || | ||
!productSpecs.productSpecs || | ||
productSpecs.productSpecs.length === 0 | ||
) { | ||
return false; | ||
} | ||
|
||
return true; | ||
} catch (error) { | ||
console.error(`Error in homedepot function: ${error.message}`); | ||
return false; | ||
} finally { | ||
await stagehand.context.close(); | ||
} | ||
}; | ||
|
||
const vanta = async () => { | ||
const logger = new EvalLogger(); | ||
|
||
const stagehand = new Stagehand({ | ||
env: "LOCAL", | ||
headless: process.env.HEADLESS !== "false", | ||
logger: (message: any) => { | ||
logger.log(message); | ||
}, | ||
verbose: 2, | ||
}); | ||
|
||
logger.init(stagehand); | ||
|
||
const { debugUrl, sessionUrl } = await stagehand.init(); | ||
|
||
await stagehand.page.goto("https://www.vanta.com/"); | ||
|
||
const observations = await stagehand.observe({ | ||
instruction: "find the text for the request demo button", | ||
}); | ||
|
||
console.log("Observations:", observations); | ||
|
||
if (observations.length === 0) { | ||
await stagehand.context.close(); | ||
return { | ||
_success: false, | ||
observations, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
} | ||
|
||
const observationResult = await stagehand.page | ||
.locator(observations[0].selector) | ||
.first() | ||
.innerHTML(); | ||
|
||
const expectedLocator = `body > div.page-wrapper > div.nav_component > div.nav_element.w-nav > div.padding-global > div > div > nav > div.nav_cta-wrapper.is-new > a.nav_cta-button-desktop.is-smaller.w-button`; | ||
|
||
const expectedResult = await stagehand.page | ||
.locator(expectedLocator) | ||
.first() | ||
.innerHTML(); | ||
|
||
await stagehand.context.close(); | ||
|
||
return { | ||
_success: observationResult == expectedResult, | ||
expected: expectedResult, | ||
actual: observationResult, | ||
debugUrl, | ||
sessionUrl, | ||
logs: logger.getLogs(), | ||
}; | ||
}; | ||
|
||
async function main() { | ||
// const homedepotResult = await homedepot(); | ||
const vantaResult = await vanta(); | ||
|
||
// console.log("Result:", homedepotResult); | ||
console.log("Result:", vantaResult); | ||
} | ||
|
||
main().catch(console.error); | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
import { BaseCache, CacheEntry } from "./BaseCache"; | ||
|
||
export interface PlaywrightCommand { | ||
method: string; | ||
args: string[]; | ||
} | ||
|
||
export interface ActionEntry extends CacheEntry { | ||
data: { | ||
playwrightCommand: PlaywrightCommand; | ||
componentString: string; | ||
xpaths: string[]; | ||
newStepString: string; | ||
completed: boolean; | ||
previousSelectors: string[]; | ||
action: string; | ||
}; | ||
} | ||
|
||
/** | ||
* ActionCache handles logging and retrieving actions along with their Playwright commands. | ||
*/ | ||
export class ActionCache extends BaseCache<ActionEntry> { | ||
constructor( | ||
logger: (message: { | ||
category?: string; | ||
message: string; | ||
level?: number; | ||
}) => void, | ||
cacheDir?: string, | ||
cacheFile?: string, | ||
) { | ||
super(logger, cacheDir, cacheFile || "action_cache.json"); | ||
} | ||
|
||
public async addActionStep({ | ||
url, | ||
action, | ||
previousSelectors, | ||
playwrightCommand, | ||
componentString, | ||
xpaths, | ||
newStepString, | ||
completed, | ||
requestId, | ||
}: { | ||
url: string; | ||
action: string; | ||
previousSelectors: string[]; | ||
playwrightCommand: PlaywrightCommand; | ||
componentString: string; | ||
requestId: string; | ||
xpaths: string[]; | ||
newStepString: string; | ||
completed: boolean; | ||
}): Promise<void> { | ||
this.logger({ | ||
category: "action_cache", | ||
message: `Adding action step to cache: ${action}, requestId: ${requestId}, url: ${url}, previousSelectors: ${previousSelectors}`, | ||
level: 1, | ||
}); | ||
|
||
await this.set( | ||
{ url, action, previousSelectors }, | ||
{ | ||
playwrightCommand, | ||
componentString, | ||
xpaths, | ||
newStepString, | ||
completed, | ||
previousSelectors, | ||
action, | ||
}, | ||
requestId, | ||
); | ||
} | ||
|
||
/** | ||
* Retrieves all actions for a specific trajectory. | ||
* @param trajectoryId - Unique identifier for the trajectory. | ||
* @param requestId - The identifier for the current request. | ||
* @returns An array of TrajectoryEntry objects or null if not found. | ||
*/ | ||
public async getActionStep({ | ||
url, | ||
action, | ||
previousSelectors, | ||
requestId, | ||
}: { | ||
url: string; | ||
action: string; | ||
previousSelectors: string[]; | ||
requestId: string; | ||
}): Promise<ActionEntry["data"] | null> { | ||
const data = await super.get({ url, action, previousSelectors }, requestId); | ||
if (!data) { | ||
return null; | ||
} | ||
|
||
return data; | ||
} | ||
|
||
public async removeActionStep(cacheHashObj: { | ||
url: string; | ||
action: string; | ||
previousSelectors: string[]; | ||
requestId: string; | ||
}): Promise<void> { | ||
await super.delete(cacheHashObj); | ||
} | ||
|
||
/** | ||
* Clears all actions for a specific trajectory. | ||
* @param trajectoryId - Unique identifier for the trajectory. | ||
* @param requestId - The identifier for the current request. | ||
*/ | ||
public async clearAction(requestId: string): Promise<void> { | ||
await super.deleteCacheForRequestId(requestId); | ||
this.logger({ | ||
category: "action_cache", | ||
message: `Cleared action for ID: ${requestId}`, | ||
level: 1, | ||
}); | ||
} | ||
|
||
/** | ||
* Resets the entire action cache. | ||
*/ | ||
public async resetCache(): Promise<void> { | ||
await super.resetCache(); | ||
this.logger({ | ||
category: "action_cache", | ||
message: "Action cache has been reset.", | ||
level: 1, | ||
}); | ||
} | ||
} |
Oops, something went wrong.