import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch import speech_recognition as sr from gtts import gTTS from deep_translator import GoogleTranslator import os import subprocess # Language Mapping with more robust models LANG_MAP = { 'en-te': { 'source': 'en', 'target': 'te', 'model': 'Helsinki-NLP/opus-mt-en-te' }, 'te-en': { 'source': 'te', 'target': 'en', 'model': 'Google/byt5-base', # More robust for te-en translation 'target_language': 'en' } } # Load translation models and tokenizers class TranslationHandler: def __init__(self): self.models = {} self.tokenizers = {} # Preload models for both translation directions for direction, config in LANG_MAP.items(): try: tokenizer = AutoTokenizer.from_pretrained(config['model']) model = AutoModelForSeq2SeqLM.from_pretrained(config['model']) self.models[direction] = model self.tokenizers[direction] = tokenizer except Exception as e: print(f"Error loading model for {direction}: {e}") def translate(self, text, direction): # Ensure direction is valid if direction not in self.models: return GoogleTranslator( source=LANG_MAP[direction]['source'], target=LANG_MAP[direction]['target'] ).translate(text) try: # Prepare input tokenizer = self.tokenizers[direction] model = self.models[direction] # Special handling for Telugu to English if direction == 'te-en': # Prefix the input for better translation input_text = f"translate Telugu to English: {text}" inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True) else: # Standard translation for other directions inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True) # Generate translation with torch.no_grad(): outputs = model.generate(**inputs) # Decode translation translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return translated_text except Exception as e: # Fallback to deep translator if model translation fails print(f"Model translation failed: {e}") return GoogleTranslator( source=LANG_MAP[direction]['source'], target=LANG_MAP[direction]['target'] ).translate(text) # Convert audio to wav if needed def convert_to_wav(input_file): if not input_file: return None # Check file extension file_ext = os.path.splitext(input_file)[1].lower() # If not wav, convert using ffmpeg if file_ext != '.wav': wav_file = os.path.splitext(input_file)[0] + '.wav' try: subprocess.run(['ffmpeg', '-i', input_file, '-acodec', 'pcm_s16le', '-ar', '16000', wav_file], check=True) return wav_file except Exception as e: print(f"Audio conversion error: {e}") return None return input_file # Initialize translation handler translation_handler = TranslationHandler() # Transliteration Function (for Telugu) def get_transliteration(text, lang): if lang == 'te': transliterations = { "జాగ్రత్తగా ఉండండి": "jāgrattagā uṇḍaṇḍi", "నమస్కారం": "namaskāraṁ", "ధన్యవాదాలు": "dhanyavādālu" } return transliterations.get(text, text) return text # Text-to-Speech Function def text_to_speech(text, lang): try: # Use 'en' for English output tts_lang = 'en' if lang == 'en' else 'te' tts = gTTS(text, lang=tts_lang) audio_file = "translated_audio.mp3" tts.save(audio_file) return audio_file except Exception as e: return f"TTS error: {str(e)}" # Main Functionality def process_text_or_audio(input_text, audio_file, direction): # If audio is provided, attempt speech recognition if audio_file: try: # Convert audio to wav if needed wav_file = convert_to_wav(audio_file) if not wav_file: return "Audio conversion failed", None, None recognizer = sr.Recognizer() with sr.AudioFile(wav_file) as source: audio = recognizer.record(source) input_text = recognizer.recognize_google(audio) except Exception as e: return f"Audio Recognition Error: {str(e)}", None, None # Translate text try: # Translate using the custom translation handler translated_text = translation_handler.translate(input_text, direction) # Get source and target languages src_lang, tgt_lang = LANG_MAP[direction]['source'], LANG_MAP[direction]['target'] # Get transliteration transliteration = get_transliteration(translated_text, tgt_lang) # Generate audio audio_output = text_to_speech(translated_text, tgt_lang) return translated_text, transliteration, audio_output except Exception as e: return f"Processing Error: {str(e)}", None, None # Create Gradio Interface demo = gr.Interface( fn=process_text_or_audio, inputs=[ gr.Textbox(label="Enter Text"), gr.Audio(type="filepath", label="Upload Audio File (Optional)"), gr.Radio(["en-te", "te-en"], value="en-te", label="Translation Direction") ], outputs=[ gr.Textbox(label="Translated Text"), gr.Textbox(label="Transliteration"), gr.Audio(label="Text-to-Speech Output") ], title="Language Translator and Speech Tool", description="Translate text/audio between English and Telugu with transliteration and TTS support." ) # Launch the app if __name__ == "__main__": demo.launch()