Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,530 Bytes
b29e61c 1e5834c 5906e88 1e5834c 4d71280 b29e61c 1e5834c b3948c4 1e5834c b3948c4 1e5834c b29e61c 1e5834c 4d71280 c3277a4 1e5834c b29e61c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
from huggingface_hub import hf_hub_download
from audio_index import AudioEmbeddingSystem
from search import search
import pandas as pd
import numpy as np
db_file = hf_hub_download(
repo_id="freddyaboulton/common-voice-english-audio",
filename="audio_db.sqlite",
repo_type="dataset",
)
index_file = hf_hub_download(
repo_id="freddyaboulton/common-voice-english-audio",
filename="audio_faiss.index",
repo_type="dataset",
)
audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)
def audio_search(audio_tuple):
sample_rate, array = audio_tuple
array = array[: int(sample_rate * 10)]
array = array.astype(np.float32) / 32768.0
rows = audio_embedding_system.search((sample_rate, array))
orig_rows = search(rows)
for row in rows:
path = row["path"]
for orig_row in orig_rows:
if orig_row["path"] == path:
row["sentence"] = orig_row["sentence"]
row["audio"] = [
"<audio src=" + orig_row["audio"]["src"] + " controls />"
]
return pd.DataFrame(rows).sort_values(by="distance", ascending=True)
demo = gr.Interface(
fn=audio_search,
inputs=gr.Audio(
label="Record or upload a clip of your voice", sources=["upload", "microphone"]
),
outputs=gr.Dataframe(
headers=["path", "audio", "sentence", "distance", "vector_id"],
datatype=["str", "markdown", "str", "number", "str"],
),
)
demo.launch()
|