import modal # Install the necessary dependencies as custom container image which we will pass to our functions crawler = modal.Image.debian_slim(python_version="3.10").pip_install_from_requirements("requirements.txt").run_commands( "apt-get update", "apt-get install -y software-properties-common", "apt-add-repository non-free", "apt-add-repository contrib", "playwright install-deps chromium", "playwright install chromium", "playwright install", ) import asyncio from crawl4ai import AsyncWebCrawler import playwright from typing import Optional, Union, List from pydantic import BaseModel, Field from fastapi import Header, HTTPException from jwt import decode, PyJWTError import os app = modal.App("crawler") class CrawlRequest(BaseModel): url: str bypass_cache: bool = Field(default=False) # other kwargs # Define the function that will be executed in the container @app.function(image=crawler) @modal.web_endpoint(method="POST", docs=True) async def crawl(request: CrawlRequest, authorization: str = Header(...)): # You will want to have your own authorization strategy here to protect your endpoint print(f"Crawling URL: {request}") # Create an instance of AsyncWebCrawler async with AsyncWebCrawler(verbose=True) as crawler: # Run the crawler on the given URL crawl_kwargs = request.dict(exclude_unset=True) try: result = await crawler.arun(**crawl_kwargs) print(result) return result except Exception as e: error_message = f"Error during crawling: {str(e)}" print(error_message) return {"error": error_message} # Entrypoint that will be used to trigger the crawler when testing locally @app.local_entrypoint() async def main(url: str): result = crawl.remote(CrawlRequest(url=url)) print(result) return result