freddyaboulton's picture
Upload 2 files
bd507ca verified
import os
from pathlib import Path
from httpx import AsyncClient
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
audio_to_bytes,
get_turn_credentials_async,
get_turn_credentials,
)
from gradio.utils import get_space
from languages import LANGUAGES
cur_dir = Path(__file__).parent
load_dotenv()
client = AsyncClient(timeout=30)
async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
response = await client.post(
url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
files={"file": audio_to_bytes(audio)},
data={"response_format": "text", "language": language},
)
return response.text
async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
text = await transcribe_file(audio, language)
yield AdditionalOutputs(transcript + " " + text)
transcript = gr.Textbox(label="Transcript")
stream = Stream(
ReplyOnPause(transcribe, input_sample_rate=48_100),
modality="audio",
mode="send",
additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
additional_outputs=[transcript],
additional_outputs_handler=lambda a, b: b,
rtc_configuration=get_turn_credentials_async,
server_rtc_configuration=get_turn_credentials(ttl=604_800),
concurrency_limit=20 if get_space() else None,
time_limit=300,
ui_args={"title": ""},
)
iface = gr.Interface(
fn=transcribe_file,
inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
outputs=gr.Textbox(label="Transcript"),
)
with gr.Blocks() as demo:
gr.HTML(
"""
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
<img src="/gradio_api/file=AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> Really Fast Whisper
</h1>
"""
)
gr.HTML(
"""
<h2 style='text-align: center'>
Powered by <a href="https://huggingface.co/hfendpoints/whisper-large-v3">HF Inference Endpoints</a> and <a href="https://fastrtc.org/">FastRTC</a>
</h2>
"""
)
with gr.Tabs():
with gr.Tab("Streaming"):
gr.Markdown(
"Grant access to the microphone and speak naturally. The transcript will be updated as you pause."
)
stream.ui.render()
with gr.Tab("File Upload"):
iface.render()
if __name__ == "__main__":
demo.launch(allowed_paths=["AV_Huggy.png"])