Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import os | |
from pathlib import Path | |
from httpx import AsyncClient | |
import gradio as gr | |
import numpy as np | |
from dotenv import load_dotenv | |
from fastrtc import ( | |
AdditionalOutputs, | |
ReplyOnPause, | |
Stream, | |
audio_to_bytes, | |
get_turn_credentials_async, | |
get_turn_credentials, | |
) | |
from gradio.utils import get_space | |
from languages import LANGUAGES | |
cur_dir = Path(__file__).parent | |
load_dotenv() | |
client = AsyncClient(timeout=30) | |
async def transcribe_file(audio: tuple[int, np.ndarray], language: str): | |
response = await client.post( | |
url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions", | |
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}, | |
files={"file": audio_to_bytes(audio)}, | |
data={"response_format": "text", "language": language}, | |
) | |
return response.text | |
async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str): | |
text = await transcribe_file(audio, language) | |
yield AdditionalOutputs(transcript + " " + text) | |
transcript = gr.Textbox(label="Transcript") | |
stream = Stream( | |
ReplyOnPause(transcribe, input_sample_rate=48_100), | |
modality="audio", | |
mode="send", | |
additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")], | |
additional_outputs=[transcript], | |
additional_outputs_handler=lambda a, b: b, | |
rtc_configuration=get_turn_credentials_async, | |
server_rtc_configuration=get_turn_credentials(ttl=604_800), | |
concurrency_limit=20 if get_space() else None, | |
time_limit=300, | |
ui_args={"title": ""}, | |
) | |
iface = gr.Interface( | |
fn=transcribe_file, | |
inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")], | |
outputs=gr.Textbox(label="Transcript"), | |
) | |
with gr.Blocks() as demo: | |
gr.HTML( | |
""" | |
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'> | |
<img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper | |
</h1> | |
""" | |
) | |
gr.HTML( | |
""" | |
<h2 style='text-align: center'> | |
Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a> | |
</h2> | |
""" | |
) | |
with gr.Tabs(): | |
with gr.Tab("Streaming"): | |
gr.Markdown( | |
"Grant access to the microphone and speak naturally. The transcript will be updated as you pause." | |
) | |
stream.ui.render() | |
with gr.Tab("File Upload"): | |
iface.render() | |
if __name__ == "__main__": | |
demo.launch(allowed_paths=["AV_Huggy.png"]) | |