Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import requests | |
import os | |
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
dataset = "mozilla-foundation/common_voice_17_0" | |
config = "en" | |
split = "validation" | |
def search(rows: list[dict]): | |
file_paths_to_find = [row["path"] for row in rows] | |
paths_in_clause = ", ".join([f"'{path}'" for path in file_paths_to_find]) | |
where_clause = f'"path" IN ({paths_in_clause})' | |
api_url = f"https://datasets-server.huggingface.co/filter?dataset={dataset}&config={config}&split={split}&where={where_clause}&offset=0" | |
response = requests.get(api_url, headers=headers) | |
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) | |
data = response.json() | |
return data.get("rows", []) | |