Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
from huggingface_hub import HfApi, HfFolder
|
2 |
import gradio as gr
|
3 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
import torch
|
@@ -8,10 +7,19 @@ from deep_translator import GoogleTranslator
|
|
8 |
import os
|
9 |
import subprocess
|
10 |
|
11 |
-
# Language Mapping
|
12 |
LANG_MAP = {
|
13 |
-
'en-te': {
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
}
|
16 |
|
17 |
# Load translation models and tokenizers
|
@@ -44,8 +52,14 @@ class TranslationHandler:
|
|
44 |
tokenizer = self.tokenizers[direction]
|
45 |
model = self.models[direction]
|
46 |
|
47 |
-
#
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
# Generate translation
|
51 |
with torch.no_grad():
|
@@ -100,7 +114,9 @@ def get_transliteration(text, lang):
|
|
100 |
# Text-to-Speech Function
|
101 |
def text_to_speech(text, lang):
|
102 |
try:
|
103 |
-
|
|
|
|
|
104 |
audio_file = "translated_audio.mp3"
|
105 |
tts.save(audio_file)
|
106 |
return audio_file
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
import torch
|
|
|
7 |
import os
|
8 |
import subprocess
|
9 |
|
10 |
+
# Language Mapping with more robust models
|
11 |
LANG_MAP = {
|
12 |
+
'en-te': {
|
13 |
+
'source': 'en',
|
14 |
+
'target': 'te',
|
15 |
+
'model': 'Helsinki-NLP/opus-mt-en-te'
|
16 |
+
},
|
17 |
+
'te-en': {
|
18 |
+
'source': 'te',
|
19 |
+
'target': 'en',
|
20 |
+
'model': 'Google/byt5-base', # More robust for te-en translation
|
21 |
+
'target_language': 'en'
|
22 |
+
}
|
23 |
}
|
24 |
|
25 |
# Load translation models and tokenizers
|
|
|
52 |
tokenizer = self.tokenizers[direction]
|
53 |
model = self.models[direction]
|
54 |
|
55 |
+
# Special handling for Telugu to English
|
56 |
+
if direction == 'te-en':
|
57 |
+
# Prefix the input for better translation
|
58 |
+
input_text = f"translate Telugu to English: {text}"
|
59 |
+
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
|
60 |
+
else:
|
61 |
+
# Standard translation for other directions
|
62 |
+
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
63 |
|
64 |
# Generate translation
|
65 |
with torch.no_grad():
|
|
|
114 |
# Text-to-Speech Function
|
115 |
def text_to_speech(text, lang):
|
116 |
try:
|
117 |
+
# Use 'en' for English output
|
118 |
+
tts_lang = 'en' if lang == 'en' else 'te'
|
119 |
+
tts = gTTS(text, lang=tts_lang)
|
120 |
audio_file = "translated_audio.mp3"
|
121 |
tts.save(audio_file)
|
122 |
return audio_file
|