Spaces:
Sleeping
Sleeping
import torch | |
from transformers import pipeline | |
import json | |
import requests | |
from io import BytesIO | |
transcribe = pipeline(task = "automatic-speech-recognition", model = "vasista22/whisper-tamil-medium", chunk_length_s=30, device="cpu") | |
transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language="ta", task="transcribe") | |
def process_audio(params): | |
# DEEPGRAM_API_KEY = "57642b9249bc230f8055c4daeeb0a866d28f6491" | |
solutions=[] | |
file_ids = [] | |
params = json.loads(params) | |
audio_files = params.get("urls",[]) | |
if not params.get("normalfileID",[]): | |
file_ids = [None]*len(audio_files) | |
else: | |
file_ids = params.get("normalfileID",[]) | |
print(params) | |
for audio, file_id in zip(audio_files,file_ids): | |
# deepgram = DeepgramClient(DEEPGRAM_API_KEY) | |
# options = PrerecordedOptions( | |
# model="enhanced", | |
# language="ta", | |
# ) | |
print("API hit") | |
# response = deepgram.listen.prerecorded.v("1").transcribe_url({"url": audio}, options) | |
# text = response['results']['channels'][0]['alternatives'][0]['transcript'] | |
print(audio) | |
response = requests.get(audio) | |
audio2 = BytesIO(response.content) | |
text = transcribe(audio2)["text"] | |
answer_dict = {} | |
answer_dict.update({'audioURL':audio, 'solutions':text, "qcUserId": None, "normalfileID": file_id}) | |
solutions.append(answer_dict) | |
# result_url = f"{api}/{job_id}" | |
# send_results_to_api(solutions, result_url) | |
return json.dumps({"solutions":solutions}) | |
import gradio as gr | |
inputt = gr.Textbox(label = "Parameter in json format Eg. {'audio_files':['file1.mp3','file2.wav'], 'api':'https://api.example.com', 'job_id':'1001'}") | |
outputt = gr.JSON() | |
application = gr.Interface(fn=process_audio, inputs = inputt, outputs= outputt, title="Multilingual (Hindi/English) Audio transcription with API Intergration") | |
application.launch() |