Spaces:

freddyaboulton
/

voice-match

Running on CPU Upgrade

voice-match / app.py

Code

4d71280 3 months ago

1.53 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from audio_index import AudioEmbeddingSystem
	from search import search
	import pandas as pd
	import numpy as np

	db_file = hf_hub_download(
	repo_id="freddyaboulton/common-voice-english-audio",
	filename="audio_db.sqlite",
	repo_type="dataset",
	)
	index_file = hf_hub_download(
	repo_id="freddyaboulton/common-voice-english-audio",
	filename="audio_faiss.index",
	repo_type="dataset",
	)

	audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)


	def audio_search(audio_tuple):
	sample_rate, array = audio_tuple
	array = array[: int(sample_rate * 10)]
	array = array.astype(np.float32) / 32768.0

	rows = audio_embedding_system.search((sample_rate, array))
	orig_rows = search(rows)
	for row in rows:
	path = row["path"]
	for orig_row in orig_rows:
	if orig_row["path"] == path:
	row["sentence"] = orig_row["sentence"]
	row["audio"] = [
	"<audio src=" + orig_row["audio"]["src"] + " controls />"
	]
	return pd.DataFrame(rows).sort_values(by="distance", ascending=True)


	demo = gr.Interface(
	fn=audio_search,
	inputs=gr.Audio(
	label="Record or upload a clip of your voice", sources=["upload", "microphone"]
	),
	outputs=gr.Dataframe(
	headers=["path", "audio", "sentence", "distance", "vector_id"],
	datatype=["str", "markdown", "str", "number", "str"],
	),
	)
	demo.launch()