Moustafa1111111111 commited on
Commit
1d5d75a
·
1 Parent(s): fb28d52

create TTS

Browse files
Files changed (16) hide show
  1. .gitattributes +1 -0
  2. .gitignore +8 -0
  3. .gitmodules +3 -0
  4. Dockerfile +65 -0
  5. README.md +5 -4
  6. index.html +19 -0
  7. local_server_new.py +136 -0
  8. requirements.txt +140 -0
  9. runtime.txt +1 -0
  10. script.js +38 -0
  11. speaker_reference.wav +3 -0
  12. start.sh +13 -0
  13. style.css +76 -0
  14. web/index.html +19 -0
  15. web/script.js +38 -0
  16. web/style.css +76 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ speaker_reference.wav filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ TTS/**/*.wav
2
+ TTS/**/*.png
3
+ TTS/**/*.gif
4
+ TTS/**/*.exe
5
+ TTS/new_venv1/
6
+ __pycache__/
7
+ *.pyc
8
+ *.log
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "TTS"]
2
+ path = TTS
3
+ url = https://github.com/coqui-ai/TTS.git
Dockerfile ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim-buster
2
+
3
+ # Install Git (if not already included - might be needed for other parts of your repo)
4
+ RUN apt-get update && apt-get install -y git
5
+
6
+ # Set the working directory in the container
7
+ WORKDIR /app
8
+
9
+ # Copy your entire repository content (excluding the potentially problematic TTS submodule)
10
+ COPY . /app/
11
+
12
+ # Install build tools and other dependencies that might be needed by TTS or other parts of your app
13
+ RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ build-essential \
15
+ cmake \
16
+ pkg-config \
17
+ libblis-dev \
18
+ python3-venv \
19
+ python3-dev \
20
+ wget \
21
+ libopenblas-dev
22
+
23
+ # Create a virtual environment
24
+ RUN python3 -m venv venv
25
+ RUN . /app/venv/bin/activate
26
+
27
+ # Copy the requirements.txt file
28
+ COPY requirements.txt /app/
29
+
30
+ # Install all dependencies, including TTS, from the requirements.txt
31
+ RUN pip install --no-cache-dir -r /app/requirements.txt --timeout=300
32
+
33
+ # Create the model directory
34
+ RUN mkdir -p /app/models/xtts_v2
35
+
36
+ # Download XTTS v2 model files
37
+ RUN wget -O /app/models/xtts_v2/config.json https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json?download=true
38
+ RUN wget -O /app/models/xtts_v2/model.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth?download=true
39
+ RUN wget -O /app/models/xtts_v2/vocab.json https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json?download=true
40
+ RUN wget -O /app/models/xtts_v2/dvae.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth?download=true
41
+ RUN wget -O /app/models/xtts_v2/speakers_xtts.pth https://huggingface.co/coqui/XTTS-v2/resolve/main/speakers_xtts.pth?download=true
42
+
43
+ # Create the audio directory if it doesn't exist
44
+ RUN mkdir -p /app/audio
45
+
46
+ # Copy the speaker_reference.wav file if it exists at the root
47
+ COPY speaker_reference.wav /app/audio/speaker_reference.wav
48
+
49
+ # Copy the web page files
50
+ COPY web /app/web
51
+
52
+ # Copy the application code
53
+ COPY local_server_new.py /app/
54
+
55
+ # Create start.sh script
56
+ RUN echo "#!/bin/bash" > start.sh && \
57
+ echo "source /app/venv/bin/activate" >> start.sh && \
58
+ echo "/app/venv/bin/python -m uvicorn local_server_new:app --host 0.0.0.0 --port 80" >> start.sh && \
59
+ chmod +x start.sh
60
+
61
+ # Expose port
62
+ EXPOSE 80
63
+
64
+ # Run the app using the script
65
+ CMD ["./start.sh"]
README.md CHANGED
@@ -1,11 +1,12 @@
1
  ---
2
- title: TTS3
3
- emoji: 📈
4
- colorFrom: purple
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: TTS
3
+ emoji: 😻
4
+ colorFrom: pink
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
+ short_description: XTTS Large Language Model
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Text to Speech</title>
5
+ <link rel="stylesheet" href="style.css">
6
+ </head>
7
+ <body>
8
+ <h1>Text to Speech</h1>
9
+ <textarea id="inputText" rows="5" cols="50"></textarea><br><br>
10
+ <button id="convertButton">Convert to Speech</button>
11
+ <div id="status"></div>
12
+ <div id="audioOutput" style="margin-top: 20px;">
13
+ <a id="downloadLink" href="#" download="output.wav" style="display: none;">Download Audio</a>
14
+ <audio id="audioPlayer" controls style="display: none;"></audio>
15
+ </div>
16
+
17
+ <script src="script.js"></script>
18
+ </body>
19
+ </html>
local_server_new.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from fastapi.responses import FileResponse
5
+ from fastapi.staticfiles import StaticFiles
6
+ import logging
7
+ import torch
8
+ import os
9
+ from TTS.api import TTS
10
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
11
+ from langdetect import detect
12
+
13
+ # Allowlist XttsConfig so torch.load doesn't raise UnpicklingError
14
+ from torch.serialization import add_safe_globals
15
+ from TTS.tts.configs.xtts_config import XttsConfig
16
+ add_safe_globals([XttsConfig])
17
+
18
+ # ✅ Monkey-patch torch.load to always use weights_only=False
19
+ _original_torch_load = torch.load
20
+ def patched_torch_load(*args, **kwargs):
21
+ kwargs["weights_only"] = False
22
+ return _original_torch_load(*args, **kwargs)
23
+ torch.load = patched_torch_load
24
+
25
+ logging.basicConfig(level=logging.DEBUG)
26
+
27
+ # Initialize FastAPI
28
+ app = FastAPI()
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"],
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Load TTS model from local files
37
+ try:
38
+ model_dir = "/app/models/xtts_v2"
39
+ config_path = os.path.join(model_dir, "config.json")
40
+ # When providing config_path, TTS might expect the directory for model_path
41
+ tts = TTS(model_path=model_dir, config_path=config_path).to("cuda" if torch.cuda.is_available() else "cpu")
42
+ print("XTTS v2 model loaded successfully from local files.")
43
+ except Exception as e:
44
+ print(f"Error loading XTTS v2 model from local files: {e}")
45
+ print("Falling back to loading by model name (license might be required).")
46
+ tts = TTS("tts_models/multilingual/multi-dataset-xtts_v2").to("cuda" if torch.cuda.is_available() else "cpu")
47
+
48
+ # Load sentiment models
49
+ arabic_model_name = "aubmindlab/bert-base-arabertv02-twitter"
50
+ sentiment_tokenizer = AutoTokenizer.from_pretrained(arabic_model_name)
51
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERT")
52
+ sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
53
+
54
+ # Input class for POST body
55
+ class Message(BaseModel):
56
+ text: str
57
+
58
+ # Language detection
59
+ def detect_language_safely(text):
60
+ try:
61
+ if any('\u0600' <= c <= '\u06FF' for c in text):
62
+ return "ar"
63
+ return detect(text)
64
+ except:
65
+ return "ar" if any('\u0600' <= c <= '\u06FF' for c in text) else "en"
66
+
67
+ # Sentiment to emotion mapping
68
+ def map_sentiment_to_emotion(sentiment, language="en"):
69
+ if language == "ar":
70
+ return "happy" if sentiment == "positive" else "sad" if sentiment == "negative" else "neutral"
71
+ return "happy" if "positive" in sentiment.lower() else "sad" if "negative" in sentiment.lower() else "neutral"
72
+
73
+ # Simple Arabic sentiment analysis
74
+ def arabic_sentiment_analysis(text):
75
+ pos_words = ["سعيد", "فرح", "ممتاز", "رائع", "جيد", "حب", "جميل", "نجاح", "أحسنت", "شكرا"]
76
+ neg_words = ["حزين", "غاضب", "سيء", "فشل", "خطأ", "مشكلة", "صعب", "لا أحب", "سخيف", "مؤسف"]
77
+ pos_count = sum(1 for word in pos_words if word in text.lower())
78
+ neg_count = sum(1 for word in neg_words if word in text.lower())
79
+
80
+ if pos_count > neg_count:
81
+ return "positive"
82
+ elif neg_count > pos_count:
83
+ return "negative"
84
+ else:
85
+ try:
86
+ inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
87
+ outputs = sentiment_model(**inputs)
88
+ sentiment_class = torch.argmax(outputs.logits).item()
89
+ return ["negative", "neutral", "positive"][sentiment_class]
90
+ except:
91
+ return "neutral"
92
+
93
+ # Main TTS endpoint
94
+ @app.post("/text-to-speech/")
95
+ def text_to_speech(msg: Message):
96
+ text = msg.text
97
+ language = detect_language_safely(text)
98
+ emotion = "neutral"
99
+
100
+ if language == "en":
101
+ try:
102
+ sentiment_result = sentiment_analyzer(text)[0]
103
+ emotion = map_sentiment_to_emotion(sentiment_result["label"])
104
+ except:
105
+ pass
106
+ else:
107
+ try:
108
+ sentiment_result = arabic_sentiment_analysis(text)
109
+ emotion = map_sentiment_to_emotion(sentiment_result, language="ar")
110
+ except:
111
+ pass
112
+
113
+ output_filename = "output.wav"
114
+ try:
115
+ tts.tts_to_file(
116
+ text=text,
117
+ file_path=output_filename,
118
+ emotion=emotion,
119
+ speaker_wav="/app/audio/speaker_reference.wav", # Updated path
120
+ language=language
121
+ )
122
+ return {
123
+ "status": "success",
124
+ "audio_file": output_filename,
125
+ "url": "/audio"
126
+ }
127
+ except Exception as e:
128
+ return {"status": "error", "message": str(e)}
129
+
130
+ # ✅ Serve the audio file
131
+ @app.get("/audio")
132
+ def get_audio():
133
+ return FileResponse("output.wav", media_type="audio/wav", filename="output.wav")
134
+
135
+ # Serve static files (your web page) from the 'web' directory
136
+ app.mount("/", StaticFiles(directory="web", html=True), name="static")
requirements.txt ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TTS==0.22.0
2
+ numpy==1.22.0
3
+ transformers==4.51.3
4
+ torch==2.7.0
5
+ torchaudio==2.7.0
6
+ scipy==1.11.4 # Or >=1.11.2 as specified during install
7
+ numba==0.60.0
8
+ soundfile==0.13.1
9
+ pypinyin==0.54.0
10
+ flask==3.1.0
11
+ bangla==0.0.5
12
+ scikit-learn==1.6.1 # Or >=1.3.0
13
+ fsspec==2025.3.2 # Or >=2023.6.0
14
+ umap-learn==0.5.7 # Or >=0.5.1
15
+ gruut[de,es,fr]==2.2.3
16
+ nltk==3.9.1
17
+ einops==0.8.1 # Or >=0.6.0
18
+ num2words==0.5.14
19
+ pandas==1.5.3 # Or <2.0,>=1.4
20
+ trainer==0.0.36 # Or >=0.0.32
21
+ coqpit==0.0.17 # Or >=0.0.16
22
+ unidecode==1.4.0 # Or >=1.3.2
23
+ pysbd==0.3.4 # Or >=0.3.4
24
+ inflect==7.5.0 # Or >=5.6.0
25
+ bnunicodenormalizer==0.1.7
26
+ anyascii==0.3.2 # Or >=0.3.0
27
+ packaging==25.0 # Or >=23.1
28
+ librosa==0.10.0 # Or >=0.10.0 (multiple versions might have been tried)
29
+ matplotlib==3.8.4 # Or >=3.7.0 (multiple versions might have been tried)
30
+ tqdm==4.67.1 # Or >=4.64.1
31
+ encodec==0.1.1
32
+ pyyaml==6.0.2 # Or >=6.0
33
+ jieba==0.42.1
34
+ hangul-romanize==0.1.0
35
+ jamo==0.4.1
36
+ bnnumerizer==0.0.2
37
+ aiohttp==3.11.18 # Or >=3.8.1
38
+ spacy[ja]==3.8.5 # Or >=3
39
+ Babel==2.17.0 # Or <3.0.0,>=2.8.0
40
+ jsonlines==1.2.0 # Or ~=1.2.0
41
+ dateparser==1.1.8 # Or ~=1.1.0
42
+ gruut-lang-en==2.0.1 # Or ~=2.0.0
43
+ python-crfsuite==0.9.11 # Or ~=0.9.7
44
+ networkx==2.8.8 # Or <3.0.0,>=2.5.0
45
+ gruut-ipa==0.13.0 # Or <1.0,>=0.12.0
46
+ gruut-lang-es==2.0.1 # Or ~=2.0.0
47
+ gruut-lang-fr==2.0.2 # Or ~=2.0.0
48
+ gruut-lang-de==2.0.1 # Or ~=2.0.0
49
+ attrs==25.3.0 # Or >=17.3.0
50
+ aiohappyeyeballs==2.6.1 # Or >=2.3.0
51
+ aiosignal==1.3.2 # Or >=1.1.2
52
+ propcache==0.3.1 # Or >=0.2.0
53
+ async-timeout==5.0.1 # Or <6.0,>=4.0
54
+ frozenlist==1.6.0 # Or >=1.1.1
55
+ multidict==6.4.3 # Or <7.0,>=4.5
56
+ yarl==1.20.0 # Or <2.0,>=1.17.0
57
+ Werkzeug==3.1.3 # Or >=3.1
58
+ blinker==1.9.0 # Or >=1.9
59
+ click==8.1.8 # Or >=8.1.3
60
+ Jinja2==3.1.6 # Or >=3.1.2
61
+ importlib-metadata==8.7.0 # Or >=3.6
62
+ itsdangerous==2.2.0 # Or >=2.2
63
+ more-itertools==10.7.0 # Or >=8.5.0
64
+ typeguard==4.4.2 # Or >=4.0.1
65
+ typing-extensions==4.13.2 # Or >=4.1.1
66
+ joblib==1.5.0 # Or >=0.14
67
+ decorator==5.2.1 # Or >=4.3.0
68
+ pooch==1.8.2 # Or >=1.1
69
+ lazy-loader==0.4 # Or >=0.1
70
+ msgpack==1.1.0 # Or >=1.0
71
+ soxr==0.5.0.post1 # Or >=0.3.2
72
+ pyparsing==3.2.3 # Or >=2.3.1
73
+ fonttools==4.57.0 # Or >=4.22.0
74
+ cycler==0.12.1 # Or >=0.10
75
+ python-dateutil==2.9.0.post0 # Or >=2.7
76
+ contourpy==1.2.1 # Or >=1.0.1 (multiple versions might have been tried)
77
+ importlib-resources==6.5.2 # Or >=3.2.0
78
+ pillow==11.2.1 # Or >=8
79
+ kiwisolver==1.4.7 # Or >=1.3.1
80
+ llvmlite==0.43.0 # Or <0.44,>=0.43.0dev0
81
+ pytz==2025.2 # Or >=2020.1
82
+ threadpoolctl==3.6.0 # Or >=3.1.0
83
+ cffi==1.17.1 # Or >=1.0
84
+ typer==0.15.3 # Or <1.0.0,>=0.3.0
85
+ requests==2.32.3 # Or <3.0.0,>=2.13.0
86
+ cymem==2.0.11 # Or <2.1.0,>=2.0.2
87
+ srsly==2.5.1 # Or <3.0.0,>=2.4.3
88
+ pydantic==2.11.4 # Or !=1.8,!=1.8.1,<3.0.0,>=1.7.4
89
+ murmurhash==1.0.12 # Or <1.1.0,>=0.28.0
90
+ catalogue==2.0.10 # Or <2.1.0,>=2.0.6
91
+ wasabi==1.1.3 # Or <1.2.0,>=0.9.1
92
+ weasel==0.4.1 # Or <0.5.0,>=0.1.0
93
+ spacy-legacy==3.0.12 # Or <3.1.0,>=3.0.11
94
+ langcodes==3.5.0 # Or <4.0.0,>=3.2.0
95
+ spacy-loggers==1.0.5 # Or <2.0.0,>=1.0.0
96
+ thinc==8.3.4 # Or <8.4.0,>=8.3.4 (multiple versions might have been tried)
97
+ preshed==3.0.9 # Or <3.1.0,>=3.0.2
98
+ sudachidict-core==20250129 # Or >=20211220
99
+ sudachipy==0.6.10 # Or !=0.6.1,>=0.5.2
100
+ filelock==3.18.0
101
+ sympy==1.14.0 # Or >=1.13.3
102
+ colorama==0.4.6
103
+ tensorboard==2.19.0
104
+ psutil==7.0.0
105
+ safetensors==0.5.3 # Or >=0.4.3
106
+ tokenizers==0.21.1 # Or <0.22,>=0.21
107
+ regex==2024.11.6 # Or !=2019.12.17
108
+ huggingface-hub==0.31.1 # Or <1.0,>=0.30.0
109
+ pynndescent==0.5.13 # Or >=0.5
110
+ pycparser==2.22
111
+ tzlocal==5.3.1
112
+ zipp==3.21.0
113
+ MarkupSafe==3.0.2 # Or >=2.0
114
+ six==1.17.0
115
+ language-data==1.3.0 # Or >=1.2
116
+ platformdirs==4.3.7 # Or >=2.5.0
117
+ annotated-types==0.7.0 # Or >=0.6.0
118
+ typing-inspection==0.4.0 # Or >=0.4.0
119
+ pydantic-core==2.33.2
120
+ urllib3==2.4.0 # Or <3,>=1.21.1
121
+ idna==3.10 # Or <4,>=2.5
122
+ charset-normalizer==3.4.2 # Or <4,>=2
123
+ certifi==2025.4.26 # Or >=2017.4.17
124
+ mpmath==1.3.0 # Or <1.4,>=1.1.0
125
+ confection==0.1.5 # Or <1.0.0,>=0.0.1
126
+ shellingham==1.5.4 # Or >=1.3.0
127
+ rich==14.0.0 # Or >=10.11.0
128
+ smart-open==7.1.0 # Or <8.0.0,>=5.2.1
129
+ cloudpathlib==0.21.0 # Or <1.0.0,>=0.7.0
130
+ markdown==3.8 # Or >=2.6.8
131
+ protobuf==6.30.2 # Or !=4.24.0,>=3.19.6
132
+ tensorboard-data-server==0.7.2 # Or <0.8.0,>=0.7.0
133
+ grpcio==1.71.0 # Or >=1.48.2
134
+ absl-py==2.2.2 # Or >=0.4
135
+ marisa-trie==1.2.1 # Or >=1.1.0
136
+ pygments==2.19.1 # Or <3.0.0,>=2.13.0
137
+ markdown-it-py==3.0.0 # Or >=2.2.0
138
+ wrapt==1.17.2
139
+ tzdata==2025.2
140
+ mdurl==0.1.2 # Or ~=0.1
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.9
script.js ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener('DOMContentLoaded', () => {
2
+ const convertButton = document.getElementById('convertButton');
3
+ const inputText = document.getElementById('inputText');
4
+ const statusDiv = document.getElementById('status');
5
+ const downloadLink = document.getElementById('downloadLink');
6
+ const audioPlayer = document.getElementById('audioPlayer');
7
+
8
+ convertButton.addEventListener('click', async () => {
9
+ const text = inputText.value;
10
+ statusDiv.textContent = 'Processing...';
11
+ downloadLink.style.display = 'none';
12
+ audioPlayer.style.display = 'none';
13
+
14
+ try {
15
+ const response = await fetch('http://localhost:5000/text-to-speech/', {
16
+ method: 'POST',
17
+ headers: {
18
+ 'Content-Type': 'application/json',
19
+ },
20
+ body: JSON.stringify({ text: text }),
21
+ });
22
+
23
+ const data = await response.json();
24
+
25
+ if (data.status === 'success') {
26
+ statusDiv.textContent = 'Speech generated successfully!';
27
+ downloadLink.href = 'http://localhost:5000' + data.url;
28
+ downloadLink.style.display = 'block';
29
+ audioPlayer.src = 'http://localhost:5000' + data.url;
30
+ audioPlayer.style.display = 'block';
31
+ } else {
32
+ statusDiv.textContent = `Error: ${data.message}`;
33
+ }
34
+ } catch (error) {
35
+ statusDiv.textContent = `Network error: ${error}`;
36
+ }
37
+ });
38
+ });
speaker_reference.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7fa2e3b58516ba6057ab93fa819eca83097c31a90d24e53cc6593ef384ce1f1
3
+ size 188476
start.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 1. Navigate to the application directory (where local_server_new.py is)
4
+ cd /app
5
+
6
+ # 2. Activate the Python virtual environment
7
+ source venv/bin/activate
8
+
9
+ # 3. Run the FastAPI application using Uvicorn
10
+ # - Bind to all interfaces (0.0.0.0)
11
+ # - Listen on port 80 (required by Hugging Face Spaces for HTTP)
12
+ # - Specify your FastAPI application module and app instance (local_server_new:app)
13
+ uvicorn local_server_new:app --host 0.0.0.0 --port 80
style.css ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
3
+ background-color: #f4f7f6;
4
+ margin: 40px;
5
+ display: flex;
6
+ flex-direction: column;
7
+ align-items: center;
8
+ color: #333;
9
+ }
10
+
11
+ h1 {
12
+ color: #2c3e50;
13
+ margin-bottom: 30px;
14
+ text-align: center;
15
+ font-size: 2.5em;
16
+ }
17
+
18
+ textarea {
19
+ padding: 15px;
20
+ border: 1px solid #ccc;
21
+ border-radius: 8px;
22
+ font-size: 1em;
23
+ margin-bottom: 20px;
24
+ width: 80%;
25
+ max-width: 600px;
26
+ box-sizing: border-box;
27
+ resize: vertical; /* Allows vertical resizing */
28
+ box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
29
+ }
30
+
31
+ button {
32
+ background-color: #3498db;
33
+ color: white;
34
+ padding: 12px 25px;
35
+ border: none;
36
+ border-radius: 8px;
37
+ cursor: pointer;
38
+ font-size: 1.1em;
39
+ transition: background-color 0.3s ease;
40
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
41
+ }
42
+
43
+ button:hover {
44
+ background-color: #2980b9;
45
+ }
46
+
47
+ #status {
48
+ margin-top: 20px;
49
+ font-weight: bold;
50
+ color: #27ae60; /* Green for success, you can change for errors */
51
+ }
52
+
53
+ #audioOutput {
54
+ margin-top: 30px;
55
+ text-align: center;
56
+ }
57
+
58
+ #downloadLink {
59
+ display: inline-block;
60
+ background-color: #2ecc71;
61
+ color: white;
62
+ padding: 10px 20px;
63
+ border-radius: 5px;
64
+ text-decoration: none;
65
+ font-size: 1em;
66
+ transition: background-color 0.3s ease;
67
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
68
+ }
69
+
70
+ #downloadLink:hover {
71
+ background-color: #27ae60;
72
+ }
73
+
74
+ #audioPlayer {
75
+ margin-top: 10px;
76
+ }
web/index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Text to Speech</title>
5
+ <link rel="stylesheet" href="style.css">
6
+ </head>
7
+ <body>
8
+ <h1>Text to Speech</h1>
9
+ <textarea id="inputText" rows="5" cols="50"></textarea><br><br>
10
+ <button id="convertButton">Convert to Speech</button>
11
+ <div id="status"></div>
12
+ <div id="audioOutput" style="margin-top: 20px;">
13
+ <a id="downloadLink" href="#" download="output.wav" style="display: none;">Download Audio</a>
14
+ <audio id="audioPlayer" controls style="display: none;"></audio>
15
+ </div>
16
+
17
+ <script src="script.js"></script>
18
+ </body>
19
+ </html>
web/script.js ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ document.addEventListener('DOMContentLoaded', () => {
2
+ const convertButton = document.getElementById('convertButton');
3
+ const inputText = document.getElementById('inputText');
4
+ const statusDiv = document.getElementById('status');
5
+ const downloadLink = document.getElementById('downloadLink');
6
+ const audioPlayer = document.getElementById('audioPlayer');
7
+
8
+ convertButton.addEventListener('click', async () => {
9
+ const text = inputText.value;
10
+ statusDiv.textContent = 'Processing...';
11
+ downloadLink.style.display = 'none';
12
+ audioPlayer.style.display = 'none';
13
+
14
+ try {
15
+ const response = await fetch('http://localhost:5000/text-to-speech/', {
16
+ method: 'POST',
17
+ headers: {
18
+ 'Content-Type': 'application/json',
19
+ },
20
+ body: JSON.stringify({ text: text }),
21
+ });
22
+
23
+ const data = await response.json();
24
+
25
+ if (data.status === 'success') {
26
+ statusDiv.textContent = 'Speech generated successfully!';
27
+ downloadLink.href = 'http://localhost:5000' + data.url;
28
+ downloadLink.style.display = 'block';
29
+ audioPlayer.src = 'http://localhost:5000' + data.url;
30
+ audioPlayer.style.display = 'block';
31
+ } else {
32
+ statusDiv.textContent = `Error: ${data.message}`;
33
+ }
34
+ } catch (error) {
35
+ statusDiv.textContent = `Network error: ${error}`;
36
+ }
37
+ });
38
+ });
web/style.css ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
3
+ background-color: #f4f7f6;
4
+ margin: 40px;
5
+ display: flex;
6
+ flex-direction: column;
7
+ align-items: center;
8
+ color: #333;
9
+ }
10
+
11
+ h1 {
12
+ color: #2c3e50;
13
+ margin-bottom: 30px;
14
+ text-align: center;
15
+ font-size: 2.5em;
16
+ }
17
+
18
+ textarea {
19
+ padding: 15px;
20
+ border: 1px solid #ccc;
21
+ border-radius: 8px;
22
+ font-size: 1em;
23
+ margin-bottom: 20px;
24
+ width: 80%;
25
+ max-width: 600px;
26
+ box-sizing: border-box;
27
+ resize: vertical; /* Allows vertical resizing */
28
+ box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
29
+ }
30
+
31
+ button {
32
+ background-color: #3498db;
33
+ color: white;
34
+ padding: 12px 25px;
35
+ border: none;
36
+ border-radius: 8px;
37
+ cursor: pointer;
38
+ font-size: 1.1em;
39
+ transition: background-color 0.3s ease;
40
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
41
+ }
42
+
43
+ button:hover {
44
+ background-color: #2980b9;
45
+ }
46
+
47
+ #status {
48
+ margin-top: 20px;
49
+ font-weight: bold;
50
+ color: #27ae60; /* Green for success, you can change for errors */
51
+ }
52
+
53
+ #audioOutput {
54
+ margin-top: 30px;
55
+ text-align: center;
56
+ }
57
+
58
+ #downloadLink {
59
+ display: inline-block;
60
+ background-color: #2ecc71;
61
+ color: white;
62
+ padding: 10px 20px;
63
+ border-radius: 5px;
64
+ text-decoration: none;
65
+ font-size: 1em;
66
+ transition: background-color 0.3s ease;
67
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.15);
68
+ }
69
+
70
+ #downloadLink:hover {
71
+ background-color: #27ae60;
72
+ }
73
+
74
+ #audioPlayer {
75
+ margin-top: 10px;
76
+ }