import gradio as gr from huggingface_hub import hf_hub_download from audio_index import AudioEmbeddingSystem from search import search, get_prompt import pandas as pd import numpy as np db_file = hf_hub_download( repo_id="freddyaboulton/common-voice-english-audio", filename="audio_db_full.sqlite", repo_type="dataset", ) index_file = hf_hub_download( repo_id="freddyaboulton/common-voice-english-audio", filename="audio_faiss_full.index", repo_type="dataset", ) audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file) def audio_search(audio_tuple, prompt: str): if audio_tuple is None: return gr.skip() sample_rate, array = audio_tuple if array.dtype == np.int16: array = array.astype(np.float32) / 32768.0 rows = audio_embedding_system.search((sample_rate, array)) least_similar = audio_embedding_system.search((sample_rate, array), least_similar=True) rows += least_similar orig_rows = search(rows) for i, row in enumerate(rows): path = row["path"] for orig in orig_rows: orig_row = orig["row"] if orig_row["path"] == path: row["sentence"] = orig_row["sentence"] row["audio"] = [ "" ] df = pd.DataFrame(rows)[["path", "audio", "sentence", "distance"]].sort_values( by="distance", ascending=True ) # Define the styling function def style_path_column(col): n = len(col) # Default empty styles styles = [''] * n for i in range(n): # First 5 rows: green background with opacity if i < 5: styles[i] = 'background-color: rgba(0, 255, 0, 0.3)' # Last 3 rows: red background with opacity elif i >= n - 3: styles[i] = 'background-color: rgba(255, 0, 0, 0.3)' return styles # Apply the styling to the 'path' column and return the Styler object return df.style.apply(style_path_column, subset=['path']) sample_text = gr.Textbox( label="Prompt", info="Hit Enter to get a prompt from the common voice dataset", value=get_prompt(), ) iface = gr.Interface( fn=audio_search, inputs=[gr.Audio( label="Record or upload a clip of your voice", sources=["microphone", "upload"] ), sample_text], outputs=gr.Dataframe( show_label=False, headers=["path", "audio", "sentence", "distance"], datatype=["str", "html", "str", "number"], ), ) with gr.Blocks() as demo: gr.HTML( f"""