{
  "schema_version": "openagent.agent_resource_packet.v1",
  "id": "res_crawl4ai",
  "slug": "crawl4ai",
  "name": "crawl4ai",
  "canonical_url": "https://www.openagent.bot/agents/crawl4ai",
  "category": "agents",
  "resource_type": "agent",
  "summary": "crawl4ai is an open-source web crawling and scraping framework designed specifically for LLM data pipelines. It extracts clean, structured content from websites — handling JavaScript rendering, pagination, and complex selectors — and outputs data ready for RAG systems, AI training datasets, and agent research workflows.",
  "capabilities": [
    "workflow-orchestration"
  ],
  "constraints": [
    "open-source"
  ],
  "scenarios": [
    "developer-workflow"
  ],
  "deployment_modes": [
    "cloud"
  ],
  "interfaces": [
    "repo"
  ],
  "integrations": [
    "agents"
  ],
  "permission_surface": [
    "browser",
    "memory",
    "external services"
  ],
  "risk_level": "elevated",
  "source_confidence": "high",
  "recommended_workflows": [
    "Browser automation",
    "Coding agent workflow",
    "Evaluation and observability"
  ],
  "avoid_when": [
    "Users who need a general-purpose browser automation framework (use Playwright or Puppeteer instead)",
    "Teams looking for a managed, cloud-hosted scraping API"
  ],
  "primary_actions": [
    "Inspect repository",
    "Open Homepage",
    "Inspect repository"
  ],
  "evidence_urls": [
    "https://github.com/unclecode/crawl4ai",
    "https://crawl4ai.com",
    "https://github.com/unclecode/crawl4ai/blob/main/README.md"
  ],
  "last_verified_at": "2026-06-03",
  "machine_readable": {
    "json_url": "https://www.openagent.bot/agents/crawl4ai.json",
    "markdown_url": "https://www.openagent.bot/agents/crawl4ai.md",
    "agent_json_url": "https://www.openagent.bot/agents/crawl4ai.agent.json"
  }
}