Tamil_S2T / app.py
shourya-goenka's picture
Update app.py
cdbca09 verified
import torch
from transformers import pipeline
import json
import requests
from io import BytesIO
transcribe = pipeline(task = "automatic-speech-recognition", model = "vasista22/whisper-tamil-medium", chunk_length_s=30, device="cpu")
transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language="ta", task="transcribe")
def process_audio(params):
# DEEPGRAM_API_KEY = "57642b9249bc230f8055c4daeeb0a866d28f6491"
solutions=[]
file_ids = []
params = json.loads(params)
audio_files = params.get("urls",[])
if not params.get("normalfileID",[]):
file_ids = [None]*len(audio_files)
else:
file_ids = params.get("normalfileID",[])
print(params)
for audio, file_id in zip(audio_files,file_ids):
# deepgram = DeepgramClient(DEEPGRAM_API_KEY)
# options = PrerecordedOptions(
# model="enhanced",
# language="ta",
# )
print("API hit")
# response = deepgram.listen.prerecorded.v("1").transcribe_url({"url": audio}, options)
# text = response['results']['channels'][0]['alternatives'][0]['transcript']
print(audio)
response = requests.get(audio)
audio2 = BytesIO(response.content)
text = transcribe(audio2)["text"]
answer_dict = {}
answer_dict.update({'audioURL':audio, 'solutions':text, "qcUserId": None, "normalfileID": file_id})
solutions.append(answer_dict)
# result_url = f"{api}/{job_id}"
# send_results_to_api(solutions, result_url)
return json.dumps({"solutions":solutions})
import gradio as gr
inputt = gr.Textbox(label = "Parameter in json format Eg. {'audio_files':['file1.mp3','file2.wav'], 'api':'https://api.example.com', 'job_id':'1001'}")
outputt = gr.JSON()
application = gr.Interface(fn=process_audio, inputs = inputt, outputs= outputt, title="Multilingual (Hindi/English) Audio transcription with API Intergration")
application.launch()