Develop AI Agents that can browse the web
Install AgentKit
npm install @inngest/agent-kit inngest @browserbasehq/sdk playwright-core
Don't have an existing project?
mkdir my-agent-kit-project && npm init
2. Setup an AgentKit Newtork with an Agent
import {
anthropic,
createAgent,
createNetwork,
} from "@inngest/agent-kit";
const searchAgent = createAgent({
name: "reddit_searcher",
description: "An agent that searches Reddit for relevant information",
system:
"You are a helpful assistant that searches Reddit for relevant information.",
});
// Create the network
const redditSearchNetwork = createNetwork({
name: "reddit_search_network",
description: "A network that searches Reddit using Browserbase",
agents: [searchAgent],
maxIter: 2,
defaultModel: anthropic({
model: "claude-3-5-sonnet-latest",
max_tokens: 4096,
});
Create a Browserbase tool
import {
anthropic,
createAgent,
createNetwork,
createTool,
} from "@inngest/agent-kit";
import { z } from "zod";
import { chromium } from "playwright-core";
import Browserbase from "@browserbasehq/sdk";
const bb = new Browserbase({
apiKey: process.env.BROWSERBASE_API_KEY as string,
});
// Create a tool to search Reddit using Browserbase
const searchReddit = createTool({
name: "search_reddit",
description: "Search Reddit posts and comments",
parameters: z.object({
query: z.string().describe("The search query for Reddit"),
}),
handler: async ({ query }, { step }) => {
return await step?.run("search-on-reddit", async () => {
// Create a new session
const session = await bb.sessions.create({
projectId: process.env.BROWSERBASE_PROJECT_ID as string,
});
// Connect to the session
const browser = await chromium.connectOverCDP(session.connectUrl);
try {
const page = await browser.newPage();
// Construct the search URL
const searchUrl = `https://search-new.pullpush.io/?type=submission&q=${query}`;
console.log(searchUrl);
await page.goto(searchUrl);
// Wait for results to load
await page.waitForSelector("div.results", { timeout: 10000 });
// Extract search results
const results = await page.evaluate(() => {
const posts = document.querySelectorAll("div.results div:has(h1)");
return Array.from(posts).map((post) => ({
title: post.querySelector("h1")?.textContent?.trim(),
content: post.querySelector("div")?.textContent?.trim(),
}));
});
console.log("results", JSON.stringify(results, null, 2));
return results.slice(0, 5); // Return top 5 results
} finally {
await browser.close();
}
});
},
});
BROWSERBASE_API_KEY
and BROWSERBASE_PROJECT_ID
in the
.env
file. You can find your API key and project ID from the Browserbase
dashboard.step.run()
function. This ensures that the tool will only run once across multiple runs.More information about using step.run()
can be found in the Multi steps tools page.goto()
: navigate to a specific URLobserve()
: observe the current pageextract()
: extract data from the current pageact()
: take action on the current pageimport { createAgent, createTool } from "@inngest/agent-kit";
import { z } from "zod";
import { getStagehand, stringToZodSchema } from "./utils.js";
const webSearchAgent = createAgent({
name: "web_search_agent",
description: "I am a web search agent.",
system: `You are a web search agent.
`,
tools: [
createTool({
name: "navigate",
description: "Navigate to a given URL",
parameters: z.object({
url: z.string().describe("the URL to navigate to"),
}),
handler: async ({ url }, { step, network }) => {
return await step?.run("navigate", async () => {
const stagehand = await getStagehand(
network?.state.kv.get("browserbaseSessionID")!
);
await stagehand.page.goto(url);
return `Navigated to ${url}.`;
});
},
}),
createTool({
name: "extract",
description: "Extract data from the page",
parameters: z.object({
instruction: z
.string()
.describe("Instructions for what data to extract from the page"),
schema: z
.string()
.describe(
"A string representing the properties and types of data to extract, for example: '{ name: string, age: number }'"
),
}),
handler: async ({ instruction, schema }, { step, network }) => {
return await step?.run("extract", async () => {
const stagehand = await getStagehand(
network?.state.kv.get("browserbaseSessionID")!
);
const zodSchema = stringToZodSchema(schema);
return await stagehand.page.extract({
instruction,
schema: zodSchema,
});
});
},
}),
createTool({
name: "act",
description: "Perform an action on the page",
parameters: z.object({
action: z
.string()
.describe("The action to perform (e.g. 'click the login button')"),
}),
handler: async ({ action }, { step, network }) => {
return await step?.run("act", async () => {
const stagehand = await getStagehand(
network?.state.kv.get("browserbaseSessionID")!
);
return await stagehand.page.act({ action });
});
},
}),
createTool({
name: "observe",
description: "Observe the page",
parameters: z.object({
instruction: z
.string()
.describe("Specific instruction for what to observe on the page"),
}),
handler: async ({ instruction }, { step, network }) => {
return await step?.run("observe", async () => {
const stagehand = await getStagehand(
network?.state.kv.get("browserbaseSessionID")!
);
return await stagehand.page.observe({ instruction });
});
},
}),
],
});
getStagehand()
helper function is used to retrieve the persisted instance created for the network execution (see full code below).