Spaces:

freddyaboulton
/

voice-match

Running on CPU Upgrade

App Files Files Community

freddyaboulton HF Staff commited on Apr 11

Commit

e1ef382

1 Parent(s): 76339ca

Add Prompts

Browse files

Files changed (2) hide show

app.py +15 -6
search.py +11 -12

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from audio_index import AudioEmbeddingSystem
-from search import search
 import pandas as pd
 import numpy as np
@@ -19,7 +19,10 @@ index_file = hf_hub_download(
 audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)
-def audio_search(audio_tuple):
     sample_rate, array = audio_tuple
     if array.dtype == np.int16:
         array = array.astype(np.float32) / 32768.0
@@ -41,13 +44,18 @@ def audio_search(audio_tuple):
         by="distance", ascending=True
     )
 iface = gr.Interface(
     fn=audio_search,
-    inputs=gr.Audio(
         label="Record or upload a clip of your voice", sources=["microphone", "upload"]
-    ),
     outputs=gr.Dataframe(
         headers=["path", "audio", "sentence", "distance"],
         datatype=["str", "html", "str", "number"],
     ),
@@ -56,7 +64,7 @@ with gr.Blocks() as demo:
     gr.HTML(
         f"""
         <h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
-        <img src="/gradio_api/file=Karaoke_Huggy.png" alt="Voice Match" style="width: 95px; height: 30px; margin-right: 10px"> Voice Match
         </h1>
         """
     )
@@ -75,5 +83,6 @@ with gr.Blocks() as demo:
         """
     )
     iface.render()
 demo.launch(allowed_paths=["Karaoke_Huggy.png"])

 import gradio as gr
 from huggingface_hub import hf_hub_download
 from audio_index import AudioEmbeddingSystem
+from search import search, get_prompt
 import pandas as pd
 import numpy as np
 audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)
+def audio_search(audio_tuple, prompt: str):
+    if audio_tuple is None:
+        return gr.skip()
     sample_rate, array = audio_tuple
     if array.dtype == np.int16:
         array = array.astype(np.float32) / 32768.0
         by="distance", ascending=True
     )
+sample_text = gr.Textbox(
+    label="Prompt",
+    info="Hit Enter to get a prompt from the common voice dataset",
+    value=get_prompt(),
+)
 iface = gr.Interface(
     fn=audio_search,
+    inputs=[gr.Audio(
         label="Record or upload a clip of your voice", sources=["microphone", "upload"]
+    ), sample_text],
     outputs=gr.Dataframe(
+        show_label=False,
         headers=["path", "audio", "sentence", "distance"],
         datatype=["str", "html", "str", "number"],
     ),
     gr.HTML(
         f"""
         <h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
+        <img src="/gradio_api/file=Karaoke_Huggy.png" alt="Voice Match" style="height: 100px; margin-right: 10px"> Voice Match
         </h1>
         """
     )
         """
     )
     iface.render()
+    sample_text.submit(fn=get_prompt, inputs=None, outputs=sample_text)
 demo.launch(allowed_paths=["Karaoke_Huggy.png"])

search.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import requests
 import os
 headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
 dataset = "mozilla-foundation/common_voice_17_0"
@@ -27,6 +27,16 @@ def _search(paths: list[str]):
     return data.get("rows", [])
 def search(rows: list[dict]):
     file_paths_to_find = [row["path"] for row in rows]
     train_paths = []
@@ -41,14 +51,3 @@ def search(rows: list[dict]):
     validation_rows = _search(validation_paths)
     return train_rows + validation_rows
-    paths_in_clause = ", ".join([f"'{path}'" for path in file_paths_to_find])
-    where_clause = f'"path" IN ({paths_in_clause})'
-    api_url = f"https://datasets-server.huggingface.co/filter?dataset={dataset}&config={config}&split={split}&where={where_clause}&offset=0"
-    response = requests.get(api_url, headers=headers)
-    response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
-    data = response.json()
-    return data.get("rows", [])

 import requests
 import os
+import random
 headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
 dataset = "mozilla-foundation/common_voice_17_0"
     return data.get("rows", [])
+def get_prompt():
+    """Get a random sentence from the Common Voice dataset"""
+    offset = random.randint(0, 100_000)
+    api_url = f"https://datasets-server.huggingface.co/rows?dataset={dataset}&config={config}&split=train&offset={offset}&length=1"
+    response = requests.get(api_url, headers=headers)
+    response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
+    data = response.json()
+    return data.get("rows", [])[0]["row"]["sentence"]
 def search(rows: list[dict]):
     file_paths_to_find = [row["path"] for row in rows]
     train_paths = []
     validation_rows = _search(validation_paths)
     return train_rows + validation_rows