import torch from transformers import pipeline import json import requests from io import BytesIO transcribe = pipeline(task = "automatic-speech-recognition", model = "vasista22/whisper-tamil-medium", chunk_length_s=30, device="cpu") transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language="ta", task="transcribe") def process_audio(params): # DEEPGRAM_API_KEY = "57642b9249bc230f8055c4daeeb0a866d28f6491" solutions=[] file_ids = [] params = json.loads(params) audio_files = params.get("urls",[]) if not params.get("normalfileID",[]): file_ids = [None]*len(audio_files) else: file_ids = params.get("normalfileID",[]) print(params) for audio, file_id in zip(audio_files,file_ids): # deepgram = DeepgramClient(DEEPGRAM_API_KEY) # options = PrerecordedOptions( # model="enhanced", # language="ta", # ) print("API hit") # response = deepgram.listen.prerecorded.v("1").transcribe_url({"url": audio}, options) # text = response['results']['channels'][0]['alternatives'][0]['transcript'] print(audio) response = requests.get(audio) audio2 = BytesIO(response.content) text = transcribe(audio2)["text"] answer_dict = {} answer_dict.update({'audioURL':audio, 'solutions':text, "qcUserId": None, "normalfileID": file_id}) solutions.append(answer_dict) # result_url = f"{api}/{job_id}" # send_results_to_api(solutions, result_url) return json.dumps({"solutions":solutions}) import gradio as gr inputt = gr.Textbox(label = "Parameter in json format Eg. {'audio_files':['file1.mp3','file2.wav'], 'api':'https://api.example.com', 'job_id':'1001'}") outputt = gr.JSON() application = gr.Interface(fn=process_audio, inputs = inputt, outputs= outputt, title="Multilingual (Hindi/English) Audio transcription with API Intergration") application.launch()