Spaces:

freddyaboulton
/

voice-match

Running on CPU Upgrade

voice-match / search.py

commit

1e5834c 22 days ago

738 Bytes

	import requests
	import os

	headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}

	dataset = "mozilla-foundation/common_voice_17_0"
	config = "en"
	split = "validation"


	def search(rows: list[dict]):
	file_paths_to_find = [row["path"] for row in rows]

	paths_in_clause = ", ".join([f"'{path}'" for path in file_paths_to_find])
	where_clause = f'"path" IN ({paths_in_clause})'

	api_url = f"https://datasets-server.huggingface.co/filter?dataset={dataset}&config={config}&split={split}&where={where_clause}&offset=0"

	response = requests.get(api_url, headers=headers)
	response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
	data = response.json()

	return data.get("rows", [])