Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 738 Bytes
1e5834c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import requests
import os
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
dataset = "mozilla-foundation/common_voice_17_0"
config = "en"
split = "validation"
def search(rows: list[dict]):
file_paths_to_find = [row["path"] for row in rows]
paths_in_clause = ", ".join([f"'{path}'" for path in file_paths_to_find])
where_clause = f'"path" IN ({paths_in_clause})'
api_url = f"https://datasets-server.huggingface.co/filter?dataset={dataset}&config={config}&split={split}&where={where_clause}&offset=0"
response = requests.get(api_url, headers=headers)
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
data = response.json()
return data.get("rows", [])
|