Spaces:

freddyaboulton
/

voice-match

Running on CPU Upgrade

App Files Files Community

voice-match / app.py

freddyaboulton HF Staff

edits

2d57870 3 months ago

raw

history blame contribute delete

3.6 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from audio_index import AudioEmbeddingSystem
	from search import search, get_prompt
	import pandas as pd
	import numpy as np

	db_file = hf_hub_download(
	repo_id="freddyaboulton/common-voice-english-audio",
	filename="audio_db_full.sqlite",
	repo_type="dataset",
	)
	index_file = hf_hub_download(
	repo_id="freddyaboulton/common-voice-english-audio",
	filename="audio_faiss_full.index",
	repo_type="dataset",
	)

	audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)


	def audio_search(audio_tuple, prompt: str):
	if audio_tuple is None:
	return gr.skip()

	sample_rate, array = audio_tuple
	if array.dtype == np.int16:
	array = array.astype(np.float32) / 32768.0

	rows = audio_embedding_system.search((sample_rate, array))
	least_similar = audio_embedding_system.search((sample_rate, array), least_similar=True)
	rows += least_similar
	orig_rows = search(rows)
	for i, row in enumerate(rows):
	path = row["path"]
	for orig in orig_rows:
	orig_row = orig["row"]
	if orig_row["path"] == path:
	row["sentence"] = orig_row["sentence"]
	row["audio"] = [
	"<audio src=" + orig_row["audio"][0]["src"] + " controls />"
	]
	df = pd.DataFrame(rows)[["path", "audio", "sentence", "distance"]].sort_values(
	by="distance", ascending=True
	)

	# Define the styling function
	def style_path_column(col):
	n = len(col)
	# Default empty styles
	styles = [''] * n
	for i in range(n):
	# First 5 rows: green background with opacity
	if i < 5:
	styles[i] = 'background-color: rgba(0, 255, 0, 0.3)'
	# Last 3 rows: red background with opacity
	elif i >= 5:
	styles[i] = 'background-color: rgba(255, 0, 0, 0.3)'
	return styles

	# Apply the styling to the 'path' column and return the Styler object
	return df.style.apply(style_path_column, subset=['path'])

	sample_text = gr.Textbox(
	label="Suggested Prompt",
	info="Unsure what to record? Use this prompt. Hit Enter to get a new one from the common voice dataset",
	value=get_prompt(),
	)
	iface = gr.Interface(
	fn=audio_search,
	inputs=[gr.Audio(
	label="Record or upload a clip of your voice", sources=["microphone", "upload"]
	), sample_text],
	outputs=gr.Dataframe(
	show_label=False,
	headers=["path", "audio", "sentence", "distance"],
	datatype=["str", "html", "str", "number"],
	),
	)
	with gr.Blocks() as demo:
	gr.HTML(
	f"""
	<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
	<img src="/gradio_api/file=Karaoke_Huggy.png" alt="Voice Match" style="height: 100px; margin-right: 10px"> Voice Match
	</h1>
	"""
	)
	gr.HTML(
	"""
	<h2 style='text-align: center'>
	Powered by <a href="https://huggingface.co/rimelabs/rimecaster">RimeCaster</a>
	</h2>
	"""
	)
	gr.Markdown(
	f"""
	<div style='text-align: center'>
	Record or upload an English clip of your voice and we'll find the most similar (and dissimilar) voices in the <a href="https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0">Common Voice dataset</a>.
	</div>
	"""
	)
	iface.render()
	sample_text.submit(fn=get_prompt, inputs=None, outputs=sample_text)

	demo.launch(allowed_paths=["Karaoke_Huggy.png"])