import gradio as gr from huggingface_hub import hf_hub_download from audio_index import AudioEmbeddingSystem from search import search, get_prompt import pandas as pd import numpy as np db_file = hf_hub_download( repo_id="freddyaboulton/common-voice-english-audio", filename="audio_db_full.sqlite", repo_type="dataset", ) index_file = hf_hub_download( repo_id="freddyaboulton/common-voice-english-audio", filename="audio_faiss_full.index", repo_type="dataset", ) audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file) def audio_search(audio_tuple, prompt: str): if audio_tuple is None: return gr.skip() sample_rate, array = audio_tuple if array.dtype == np.int16: array = array.astype(np.float32) / 32768.0 rows = audio_embedding_system.search((sample_rate, array)) least_similar = audio_embedding_system.search((sample_rate, array), least_similar=True) rows += least_similar orig_rows = search(rows) for i, row in enumerate(rows): path = row["path"] for orig in orig_rows: orig_row = orig["row"] if orig_row["path"] == path: row["sentence"] = orig_row["sentence"] row["audio"] = [ "