Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from audio_index import AudioEmbeddingSystem | |
from search import search, get_prompt | |
import pandas as pd | |
import numpy as np | |
db_file = hf_hub_download( | |
repo_id="freddyaboulton/common-voice-english-audio", | |
filename="audio_db_full.sqlite", | |
repo_type="dataset", | |
) | |
index_file = hf_hub_download( | |
repo_id="freddyaboulton/common-voice-english-audio", | |
filename="audio_faiss_full.index", | |
repo_type="dataset", | |
) | |
audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file) | |
def audio_search(audio_tuple, prompt: str): | |
if audio_tuple is None: | |
return gr.skip() | |
sample_rate, array = audio_tuple | |
if array.dtype == np.int16: | |
array = array.astype(np.float32) / 32768.0 | |
rows = audio_embedding_system.search((sample_rate, array)) | |
least_similar = audio_embedding_system.search((sample_rate, array), least_similar=True) | |
rows += least_similar | |
orig_rows = search(rows) | |
for i, row in enumerate(rows): | |
path = row["path"] | |
for orig in orig_rows: | |
orig_row = orig["row"] | |
if orig_row["path"] == path: | |
row["sentence"] = orig_row["sentence"] | |
row["audio"] = [ | |
"<audio src=" + orig_row["audio"][0]["src"] + " controls />" | |
] | |
df = pd.DataFrame(rows)[["path", "audio", "sentence", "distance"]].sort_values( | |
by="distance", ascending=True | |
) | |
# Define the styling function | |
def style_path_column(col): | |
n = len(col) | |
# Default empty styles | |
styles = [''] * n | |
for i in range(n): | |
# First 5 rows: green background with opacity | |
if i < 5: | |
styles[i] = 'background-color: rgba(0, 255, 0, 0.3)' | |
# Last 3 rows: red background with opacity | |
elif i >= 5: | |
styles[i] = 'background-color: rgba(255, 0, 0, 0.3)' | |
return styles | |
# Apply the styling to the 'path' column and return the Styler object | |
return df.style.apply(style_path_column, subset=['path']) | |
sample_text = gr.Textbox( | |
label="Suggested Prompt", | |
info="Unsure what to record? Use this prompt. Hit Enter to get a new one from the common voice dataset", | |
value=get_prompt(), | |
) | |
iface = gr.Interface( | |
fn=audio_search, | |
inputs=[gr.Audio( | |
label="Record or upload a clip of your voice", sources=["microphone", "upload"] | |
), sample_text], | |
outputs=gr.Dataframe( | |
show_label=False, | |
headers=["path", "audio", "sentence", "distance"], | |
datatype=["str", "html", "str", "number"], | |
), | |
) | |
with gr.Blocks() as demo: | |
gr.HTML( | |
f""" | |
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'> | |
<img src="/gradio_api/file=Karaoke_Huggy.png" alt="Voice Match" style="height: 100px; margin-right: 10px"> Voice Match | |
</h1> | |
""" | |
) | |
gr.HTML( | |
""" | |
<h2 style='text-align: center'> | |
Powered by <a href="https://huggingface.co/rimelabs/rimecaster">RimeCaster</a> | |
</h2> | |
""" | |
) | |
gr.Markdown( | |
f""" | |
<div style='text-align: center'> | |
Record or upload an English clip of your voice and we'll find the most similar (and dissimilar) voices in the <a href="https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0">Common Voice dataset</a>. | |
</div> | |
""" | |
) | |
iface.render() | |
sample_text.submit(fn=get_prompt, inputs=None, outputs=sample_text) | |
demo.launch(allowed_paths=["Karaoke_Huggy.png"]) | |