File size: 1,530 Bytes
b29e61c
1e5834c
5906e88
 
1e5834c
4d71280
b29e61c
1e5834c
b3948c4
 
 
1e5834c
 
b3948c4
 
 
1e5834c
b29e61c
1e5834c
 
 
 
 
 
4d71280
 
c3277a4
1e5834c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b29e61c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
from huggingface_hub import hf_hub_download
from audio_index import AudioEmbeddingSystem
from search import search
import pandas as pd
import numpy as np

db_file = hf_hub_download(
    repo_id="freddyaboulton/common-voice-english-audio",
    filename="audio_db.sqlite",
    repo_type="dataset",
)
index_file = hf_hub_download(
    repo_id="freddyaboulton/common-voice-english-audio",
    filename="audio_faiss.index",
    repo_type="dataset",
)

audio_embedding_system = AudioEmbeddingSystem(db_path=db_file, index_path=index_file)


def audio_search(audio_tuple):
    sample_rate, array = audio_tuple
    array = array[: int(sample_rate * 10)]
    array = array.astype(np.float32) / 32768.0

    rows = audio_embedding_system.search((sample_rate, array))
    orig_rows = search(rows)
    for row in rows:
        path = row["path"]
        for orig_row in orig_rows:
            if orig_row["path"] == path:
                row["sentence"] = orig_row["sentence"]
                row["audio"] = [
                    "<audio src=" + orig_row["audio"]["src"] + " controls />"
                ]
    return pd.DataFrame(rows).sort_values(by="distance", ascending=True)


demo = gr.Interface(
    fn=audio_search,
    inputs=gr.Audio(
        label="Record or upload a clip of your voice", sources=["upload", "microphone"]
    ),
    outputs=gr.Dataframe(
        headers=["path", "audio", "sentence", "distance", "vector_id"],
        datatype=["str", "markdown", "str", "number", "str"],
    ),
)
demo.launch()