import os from pathlib import Path from httpx import AsyncClient import gradio as gr import numpy as np from dotenv import load_dotenv from fastrtc import ( AdditionalOutputs, ReplyOnPause, Stream, audio_to_bytes, get_turn_credentials_async, get_turn_credentials, ) from gradio.utils import get_space from languages import LANGUAGES cur_dir = Path(__file__).parent load_dotenv() client = AsyncClient(timeout=30) async def transcribe_file(audio: tuple[int, np.ndarray], language: str): response = await client.post( url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions", headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}, files={"file": audio_to_bytes(audio)}, data={"response_format": "text", "language": language}, ) return response.text async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str): text = await transcribe_file(audio, language) yield AdditionalOutputs(transcript + " " + text) transcript = gr.Textbox(label="Transcript") stream = Stream( ReplyOnPause(transcribe, input_sample_rate=48_100), modality="audio", mode="send", additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")], additional_outputs=[transcript], additional_outputs_handler=lambda a, b: b, rtc_configuration=get_turn_credentials_async, server_rtc_configuration=get_turn_credentials(ttl=604_800), concurrency_limit=20 if get_space() else None, time_limit=300, ui_args={"title": ""}, ) iface = gr.Interface( fn=transcribe_file, inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")], outputs=gr.Textbox(label="Transcript"), ) with gr.Blocks() as demo: gr.HTML( """