Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from audio_index import AudioEmbeddingSystem | |
from search import search, get_prompt | |
import pandas as pd | |
import numpy as np | |
db_file = hf_hub_download( | |
repo_id="freddyaboulton/common-voice-english-audio", | |
filename="audio_db_full.sqlite", | |
repo_type="dataset", | |
) | |
index_file = hf_hub_download( | |
repo_id="freddyaboulton/common-voice-english-audio", | |
filename="audio_faiss_full.index", | |
repo_type="dataset", | |
) | |
audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file) | |
def audio_search(audio_tuple, prompt: str): | |
if audio_tuple is None: | |
return gr.skip() | |
sample_rate, array = audio_tuple | |
if array.dtype == np.int16: | |
array = array.astype(np.float32) / 32768.0 | |
rows = audio_embedding_system.search((sample_rate, array)) | |
print(rows) | |
orig_rows = search(rows) | |
for row in rows: | |
path = row["path"] | |
for orig in orig_rows: | |
orig_row = orig["row"] | |
print(orig_row) | |
if orig_row["path"] == path: | |
row["sentence"] = orig_row["sentence"] | |
row["audio"] = [ | |
"<audio src=" + orig_row["audio"][0]["src"] + " controls />" | |
] | |
return pd.DataFrame(rows)[["path", "audio", "sentence", "distance"]].sort_values( | |
by="distance", ascending=True | |
) | |
sample_text = gr.Textbox( | |
label="Prompt", | |
info="Hit Enter to get a prompt from the common voice dataset", | |
value=get_prompt(), | |
) | |
iface = gr.Interface( | |
fn=audio_search, | |
inputs=[gr.Audio( | |
label="Record or upload a clip of your voice", sources=["microphone", "upload"] | |
), sample_text], | |
outputs=gr.Dataframe( | |
show_label=False, | |
headers=["path", "audio", "sentence", "distance"], | |
datatype=["str", "html", "str", "number"], | |
), | |
) | |
with gr.Blocks() as demo: | |
gr.HTML( | |
f""" | |
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'> | |
<img src="/gradio_api/file=Karaoke_Huggy.png" alt="Voice Match" style="height: 100px; margin-right: 10px"> Voice Match | |
</h1> | |
""" | |
) | |
gr.HTML( | |
""" | |
<h2 style='text-align: center'> | |
Powered by <a href="https://huggingface.co/rimelabs/rimecaster">RimeCaster</a> | |
</h2> | |
""" | |
) | |
gr.Markdown( | |
f""" | |
<div style='text-align: center'> | |
Record or upload an English clip of your voice and we'll find the most similar voices in the <a href="https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0">Common Voice dataset</a>. | |
</div> | |
""" | |
) | |
iface.render() | |
sample_text.submit(fn=get_prompt, inputs=None, outputs=sample_text) | |
demo.launch(allowed_paths=["Karaoke_Huggy.png"]) | |