Spaces:
Build error
Build error
import modal | |
# Install the necessary dependencies as custom container image which we will pass to our functions | |
crawler = modal.Image.debian_slim(python_version="3.10").pip_install_from_requirements("requirements.txt").run_commands( | |
"apt-get update", | |
"apt-get install -y software-properties-common", | |
"apt-add-repository non-free", | |
"apt-add-repository contrib", | |
"playwright install-deps chromium", | |
"playwright install chromium", | |
"playwright install", | |
) | |
import asyncio | |
from crawl4ai import AsyncWebCrawler | |
import playwright | |
from typing import Optional, Union, List | |
from pydantic import BaseModel, Field | |
from fastapi import Header, HTTPException | |
from jwt import decode, PyJWTError | |
import os | |
app = modal.App("crawler") | |
class CrawlRequest(BaseModel): | |
url: str | |
bypass_cache: bool = Field(default=False) | |
# other kwargs | |
# Define the function that will be executed in the container | |
async def crawl(request: CrawlRequest, authorization: str = Header(...)): | |
# You will want to have your own authorization strategy here to protect your endpoint | |
print(f"Crawling URL: {request}") | |
# Create an instance of AsyncWebCrawler | |
async with AsyncWebCrawler(verbose=True) as crawler: | |
# Run the crawler on the given URL | |
crawl_kwargs = request.dict(exclude_unset=True) | |
try: | |
result = await crawler.arun(**crawl_kwargs) | |
print(result) | |
return result | |
except Exception as e: | |
error_message = f"Error during crawling: {str(e)}" | |
print(error_message) | |
return {"error": error_message} | |
# Entrypoint that will be used to trigger the crawler when testing locally | |
async def main(url: str): | |
result = crawl.remote(CrawlRequest(url=url)) | |
print(result) | |
return result | |