How to Give a CrewAI Agent Browser Tools

CrewAI agents collaborate in crews — researcher, writer, QA, analyst. What none of them can do by default is actually look at a web page, take a screenshot, or verify that a UI element exists.

Here's how to add browser tools to any CrewAI agent using the PageBolt API.

Install

pip install crewai crewai-tools requests

Define the tools

CrewAI tools extend BaseTool and implement a _run method. One class per capability:

import os
import base64
import requests
from crewai.tools import BaseTool
from pydantic import Field
from typing import Optional

PAGEBOLT_API_KEY = os.environ["PAGEBOLT_API_KEY"]
BASE_URL = "https://pagebolt.dev/api/v1"
HEADERS = {"x-api-key": PAGEBOLT_API_KEY, "Content-Type": "application/json"}


class ScreenshotTool(BaseTool):
    name: str = "take_screenshot"
    description: str = (
        "Take a screenshot of any web page. "
        "Input: a full URL (e.g. https://example.com). "
        "Returns a confirmation with the image size. "
        "Use this to visually inspect a page, check a layout, "
        "or verify rendered content."
    )

    def _run(self, url: str) -> str:
        res = requests.post(
            f"{BASE_URL}/screenshot",
            headers=HEADERS,
            json={"url": url, "blockBanners": True, "fullPage": True},
            timeout=30,
        )
        res.raise_for_status()
        b64 = base64.b64encode(res.content).decode()
        return (
            f"Screenshot captured from {url} ({len(res.content):,} bytes). "
            f"Base64 preview: data:image/png;base64,{b64[:80]}..."
        )


class InspectPageTool(BaseTool):
    name: str = "inspect_page"
    description: str = (
        "Get all interactive elements on a page with their CSS selectors. "
        "Input: a full URL. "
        "Returns buttons, inputs, links, and forms with unique selectors. "
        "Always use this before attempting to automate a page."
    )

    def _run(self, url: str) -> str:
        res = requests.post(
            f"{BASE_URL}/inspect",
            headers=HEADERS,
            json={"url": url},
            timeout=30,
        )
        res.raise_for_status()
        data = res.json()
        elements = data.get("elements", [])[:30]
        lines = [
            f"{el.get('tag')}[{el.get('role', '')}] "
            f"'{(el.get('text') or '')[:70]}' -> {el.get('selector', '')}"
            for el in elements
        ]
        return (
            f"Found {len(data.get('elements', []))} elements on {url}:\n"
            + "\n".join(lines)
        )


class GeneratePDFTool(BaseTool):
    name: str = "generate_pdf"
    description: str = (
        "Generate a PDF of any web page or HTML content. "
        "Input: a full URL. "
        "Use for invoices, reports, documentation, or any printable page."
    )

    def _run(self, url: str) -> str:
        res = requests.post(
            f"{BASE_URL}/pdf",
            headers=HEADERS,
            json={"url": url},
            timeout=30,
        )
        res.raise_for_status()
        output_path = f"output_{url.split('/')[-1] or 'page'}.pdf"
        with open(output_path, "wb") as f:
            f.write(res.content)
        return f"PDF saved to {output_path} ({len(res.content):,} bytes) from {url}"


class RunSequenceTool(BaseTool):
    name: str = "run_browser_sequence"
    description: str = (
        "Run a multi-step browser automation and screenshot the final state. "
        "Input: a JSON string with keys: url (starting page) and steps (list of actions). "
        "Each step has: action (navigate/click/fill/wait/screenshot), "
        "selector (for click/fill), value (for fill), url (for navigate), ms (for wait). "
        "Example: {\"url\": \"https://example.com\", \"steps\": [{\"action\": \"click\", \"selector\": \"#btn\"}, {\"action\": \"screenshot\"}]}"
    )

    def _run(self, input_json: str) -> str:
        import json
        try:
            data = json.loads(input_json)
        except json.JSONDecodeError:
            return "Error: input must be valid JSON with 'url' and 'steps' keys"

        url = data.get("url")
        steps = data.get("steps", [])

        # Ensure there's a screenshot step at the end
        if not steps or steps[-1].get("action") != "screenshot":
            steps.append({"action": "screenshot"})

        all_steps = [{"action": "navigate", "url": url}] + steps

        res = requests.post(
            "https://pagebolt.dev/api/v1/sequence",
            headers=HEADERS,
            json={"steps": all_steps},
            timeout=60,
        )
        if not res.ok:
            return f"Sequence failed: {res.status_code} {res.text[:200]}"

        result = res.json()
        outputs = result.get("outputs", [])
        return (
            f"Sequence completed: {len(outputs)} output(s). "
            f"Steps executed: {len(all_steps)}. "
            f"Final state captured."
        )

Build a crew

A research crew that can visually inspect websites before writing reports:

from crewai import Agent, Task, Crew, Process

# Tools
screenshot_tool = ScreenshotTool()
inspect_tool = InspectPageTool()
pdf_tool = GeneratePDFTool()
sequence_tool = RunSequenceTool()

# Agents
web_researcher = Agent(
    role="Web Research Specialist",
    goal="Gather accurate information from web pages using visual inspection",
    backstory=(
        "You are a meticulous researcher who takes screenshots to verify claims, "
        "inspects page structure before drawing conclusions, and documents findings "
        "with visual evidence."
    ),
    tools=[screenshot_tool, inspect_tool],
    verbose=True,
)

qa_engineer = Agent(
    role="QA Engineer",
    goal="Verify that web pages render correctly and interactive elements work as expected",
    backstory=(
        "You are a QA engineer who checks pages visually and structurally. "
        "You inspect pages to find their elements, then verify they look and function correctly."
    ),
    tools=[screenshot_tool, inspect_tool, sequence_tool],
    verbose=True,
)

document_specialist = Agent(
    role="Documentation Specialist",
    goal="Generate PDF documentation and reports from web pages",
    backstory=(
        "You capture web content as PDFs for compliance, archiving, and distribution."
    ),
    tools=[screenshot_tool, pdf_tool],
    verbose=True,
)

# Tasks
research_task = Task(
    description=(
        "Research the homepage of https://example.com. "
        "1. Take a screenshot and describe the visual layout. "
        "2. Inspect the page and list all navigation links and CTA buttons with their selectors. "
        "3. Summarize what the page is about based on visual and structural inspection."
    ),
    agent=web_researcher,
    expected_output=(
        "A structured report with: visual description, list of navigation elements "
        "with selectors, and a 2-3 sentence summary of page purpose."
    ),
)

qa_task = Task(
    description=(
        "QA check https://example.com. "
        "1. Inspect the page to find all form inputs and buttons. "
        "2. Take a full-page screenshot. "
        "3. Report any elements that look broken, missing, or unexpected."
    ),
    agent=qa_engineer,
    expected_output=(
        "QA report with: list of interactive elements found, screenshot confirmation, "
        "and pass/fail assessment of page integrity."
    ),
)

# Run the crew
crew = Crew(
    agents=[web_researcher, qa_engineer],
    tasks=[research_task, qa_task],
    process=Process.sequential,
    verbose=True,
)

result = crew.kickoff()
print(result)

Visual QA crew for deploy checks

from crewai import Agent, Task, Crew, Process

visual_checker = Agent(
    role="Visual QA Specialist",
    goal="Check that key pages render correctly after a deployment",
    backstory="You catch visual regressions and layout issues before users do.",
    tools=[screenshot_tool, inspect_tool],
    verbose=True,
)

pages_to_check = [
    "https://staging.yourapp.com",
    "https://staging.yourapp.com/pricing",
    "https://staging.yourapp.com/login",
]

tasks = [
    Task(
        description=(
            f"Check {page}: take a screenshot and inspect for missing elements. "
            "Report any visual issues, broken layouts, or missing navigation."
        ),
        agent=visual_checker,
        expected_output=f"Pass/fail report for {page} with details of any issues found.",
    )
    for page in pages_to_check
]

crew = Crew(
    agents=[visual_checker],
    tasks=tasks,
    process=Process.sequential,
    verbose=True,
)

result = crew.kickoff()
print(result)

Parallel crew with roles

from crewai import Agent, Task, Crew, Process

# Three specialists working in parallel on different aspects
screenshotter = Agent(
    role="Visual Capture Specialist",
    goal="Take accurate screenshots for documentation and verification",
    tools=[screenshot_tool],
    verbose=False,
)

inspector = Agent(
    role="Page Structure Analyst",
    goal="Map all interactive elements on a page for automation and testing",
    tools=[inspect_tool],
    verbose=False,
)

archivist = Agent(
    role="PDF Archivist",
    goal="Generate PDF archives of important pages",
    tools=[pdf_tool],
    verbose=False,
)

url = "https://example.com"

crew = Crew(
    agents=[screenshotter, inspector, archivist],
    tasks=[
        Task(
            description=f"Take a full-page screenshot of {url} and describe what you see.",
            agent=screenshotter,
            expected_output="Visual description of the page.",
        ),
        Task(
            description=f"Inspect {url} and list all forms and buttons.",
            agent=inspector,
            expected_output="Structured list of interactive elements with CSS selectors.",
        ),
        Task(
            description=f"Generate a PDF of {url} and confirm it was saved.",
            agent=archivist,
            expected_output="Confirmation with file path and size.",
        ),
    ],
    process=Process.parallel,
    verbose=True,
)

result = crew.kickoff()
print(result)

The agent framework picture

All three major Python agent frameworks now have working PageBolt integrations:

Framework	Integration	Best for
LangChain	`langchain-mcp-adapters` or manual `@tool`	Flexible agent chains, custom logic
LlamaIndex	`BasicMCPClient` + `McpToolSpec`	RAG + web capture pipelines
CrewAI	`BaseTool` subclasses	Multi-agent crews with defined roles

All three can also use the MCP approach (see How to use PageBolt MCP tools in a LangChain or LlamaIndex agent) if you prefer automatic tool discovery over manual wrappers.