import gradio as gr from huggingface_hub import hf_hub_download from audio_index import AudioEmbeddingSystem from search import search import pandas as pd import numpy as np db_file = hf_hub_download( repo_id="freddyaboulton/common-voice-english-audio", filename="audio_db.sqlite", repo_type="dataset", ) index_file = hf_hub_download( repo_id="freddyaboulton/common-voice-english-audio", filename="audio_faiss.index", repo_type="dataset", ) audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file) def audio_search(audio_tuple): sample_rate, array = audio_tuple array = array[: int(sample_rate * 10)] array = array.astype(np.float32) / 32768.0 rows = audio_embedding_system.search((sample_rate, array)) orig_rows = search(rows) for row in rows: path = row["path"] for orig_row in orig_rows: if orig_row["path"] == path: row["sentence"] = orig_row["sentence"] row["audio"] = [ "