mshanker1 commited on
Commit
1af98f2
·
verified ·
1 Parent(s): bc7c179

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -1,4 +1,3 @@
1
- from huggingface_hub import HfApi, HfFolder
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
@@ -8,10 +7,19 @@ from deep_translator import GoogleTranslator
8
  import os
9
  import subprocess
10
 
11
- # Language Mapping
12
  LANG_MAP = {
13
- 'en-te': {'source': 'en', 'target': 'te', 'model': 'Helsinki-NLP/opus-mt-en-te'},
14
- 'te-en': {'source': 'te', 'target': 'en', 'model': 'Helsinki-NLP/opus-mt-te-en'}
 
 
 
 
 
 
 
 
 
15
  }
16
 
17
  # Load translation models and tokenizers
@@ -44,8 +52,14 @@ class TranslationHandler:
44
  tokenizer = self.tokenizers[direction]
45
  model = self.models[direction]
46
 
47
- # Tokenize input
48
- inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
 
 
 
 
 
 
49
 
50
  # Generate translation
51
  with torch.no_grad():
@@ -100,7 +114,9 @@ def get_transliteration(text, lang):
100
  # Text-to-Speech Function
101
  def text_to_speech(text, lang):
102
  try:
103
- tts = gTTS(text, lang=lang)
 
 
104
  audio_file = "translated_audio.mp3"
105
  tts.save(audio_file)
106
  return audio_file
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import torch
 
7
  import os
8
  import subprocess
9
 
10
+ # Language Mapping with more robust models
11
  LANG_MAP = {
12
+ 'en-te': {
13
+ 'source': 'en',
14
+ 'target': 'te',
15
+ 'model': 'Helsinki-NLP/opus-mt-en-te'
16
+ },
17
+ 'te-en': {
18
+ 'source': 'te',
19
+ 'target': 'en',
20
+ 'model': 'Google/byt5-base', # More robust for te-en translation
21
+ 'target_language': 'en'
22
+ }
23
  }
24
 
25
  # Load translation models and tokenizers
 
52
  tokenizer = self.tokenizers[direction]
53
  model = self.models[direction]
54
 
55
+ # Special handling for Telugu to English
56
+ if direction == 'te-en':
57
+ # Prefix the input for better translation
58
+ input_text = f"translate Telugu to English: {text}"
59
+ inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
60
+ else:
61
+ # Standard translation for other directions
62
+ inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
63
 
64
  # Generate translation
65
  with torch.no_grad():
 
114
  # Text-to-Speech Function
115
  def text_to_speech(text, lang):
116
  try:
117
+ # Use 'en' for English output
118
+ tts_lang = 'en' if lang == 'en' else 'te'
119
+ tts = gTTS(text, lang=tts_lang)
120
  audio_file = "translated_audio.mp3"
121
  tts.save(audio_file)
122
  return audio_file