Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,11 @@ model = None
|
|
34 |
news_generator = None
|
35 |
whisper_model = None
|
36 |
|
|
|
|
|
|
|
|
|
|
|
37 |
@spaces.GPU(duration=60)
|
38 |
def initialize_models():
|
39 |
"""Initialize models with Zero GPU optimizations"""
|
@@ -49,6 +54,8 @@ def initialize_models():
|
|
49 |
model_name,
|
50 |
token=HUGGINGFACE_TOKEN
|
51 |
)
|
|
|
|
|
52 |
tokenizer.pad_token = tokenizer.eos_token
|
53 |
|
54 |
# Load model
|
@@ -60,6 +67,8 @@ def initialize_models():
|
|
60 |
device_map="auto",
|
61 |
low_cpu_mem_usage=True
|
62 |
)
|
|
|
|
|
63 |
|
64 |
# Create pipeline
|
65 |
logger.info("Creating pipeline...")
|
@@ -75,20 +84,26 @@ def initialize_models():
|
|
75 |
top_p=0.95,
|
76 |
repetition_penalty=1.2
|
77 |
)
|
|
|
|
|
78 |
|
79 |
# Load Whisper model
|
80 |
logger.info("Loading Whisper model...")
|
81 |
whisper_model = whisper.load_model("base")
|
|
|
|
|
82 |
|
83 |
logger.info("All models initialized successfully")
|
84 |
return True
|
85 |
except Exception as e:
|
86 |
logger.error(f"Error during model initialization: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
87 |
raise
|
88 |
|
89 |
-
# Inicializar los modelos
|
90 |
-
initialize_models()
|
91 |
-
|
92 |
def download_social_media_video(url):
|
93 |
"""Download a video from social media."""
|
94 |
ydl_opts = {
|
@@ -139,6 +154,9 @@ def preprocess_audio(audio_file):
|
|
139 |
def transcribe_audio(file):
|
140 |
"""Transcribe an audio or video file."""
|
141 |
try:
|
|
|
|
|
|
|
142 |
if isinstance(file, str) and file.startswith('http'):
|
143 |
file_path = download_social_media_video(file)
|
144 |
elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
@@ -147,8 +165,14 @@ def transcribe_audio(file):
|
|
147 |
file_path = preprocess_audio(file)
|
148 |
|
149 |
logger.info(f"Transcribing audio: {file_path}")
|
|
|
|
|
|
|
150 |
with torch.inference_mode():
|
151 |
result = whisper_model.transcribe(file_path)
|
|
|
|
|
|
|
152 |
transcription = result.get("text", "Error in transcription")
|
153 |
logger.info(f"Transcription completed: {transcription[:50]}...")
|
154 |
return transcription
|
@@ -172,6 +196,7 @@ def read_document(document_path):
|
|
172 |
else:
|
173 |
return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
|
174 |
except Exception as e:
|
|
|
175 |
return f"Error reading document: {str(e)}"
|
176 |
|
177 |
def read_url(url):
|
@@ -182,6 +207,7 @@ def read_url(url):
|
|
182 |
soup = BeautifulSoup(response.content, 'html.parser')
|
183 |
return soup.get_text()
|
184 |
except Exception as e:
|
|
|
185 |
return f"Error reading URL: {str(e)}"
|
186 |
|
187 |
def process_social_content(url):
|
@@ -190,7 +216,8 @@ def process_social_content(url):
|
|
190 |
text_content = read_url(url)
|
191 |
try:
|
192 |
video_content = transcribe_audio(url)
|
193 |
-
except Exception:
|
|
|
194 |
video_content = None
|
195 |
|
196 |
return {
|
@@ -204,6 +231,9 @@ def process_social_content(url):
|
|
204 |
@spaces.GPU(duration=60)
|
205 |
def generate_news(instructions, facts, size, tone, *args):
|
206 |
try:
|
|
|
|
|
|
|
207 |
# Initialize knowledge base
|
208 |
knowledge_base = {
|
209 |
"instructions": instructions,
|
@@ -227,12 +257,16 @@ def generate_news(instructions, facts, size, tone, *args):
|
|
227 |
# Process URLs
|
228 |
for url in urls:
|
229 |
if url:
|
230 |
-
|
|
|
|
|
231 |
|
232 |
# Process documents
|
233 |
for document in documents:
|
234 |
if document is not None:
|
235 |
-
|
|
|
|
|
236 |
|
237 |
# Process audio files
|
238 |
for i in range(0, len(audios), 3):
|
@@ -265,14 +299,15 @@ def generate_news(instructions, facts, size, tone, *args):
|
|
265 |
for idx, data in enumerate(knowledge_base["audio_data"]):
|
266 |
if data["audio"] is not None:
|
267 |
transcription = transcribe_audio(data["audio"])
|
268 |
-
|
269 |
-
|
|
|
270 |
|
271 |
for data in knowledge_base["social_content"]:
|
272 |
-
if data["text"]:
|
273 |
transcriptions_text += f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}\n'
|
274 |
raw_transcriptions += transcriptions_text + "\n\n"
|
275 |
-
if data["video"]:
|
276 |
video_transcription = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}\n'
|
277 |
transcriptions_text += video_transcription
|
278 |
raw_transcriptions += video_transcription + "\n\n"
|
@@ -323,9 +358,14 @@ Follow these requirements:
|
|
323 |
|
324 |
except Exception as e:
|
325 |
logger.error(f"Error generating news: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
return f"Error generating the news article: {str(e)}", ""
|
327 |
|
328 |
-
# Create Gradio interface
|
329 |
def create_demo():
|
330 |
with gr.Blocks() as demo:
|
331 |
gr.Markdown("## Generador de noticias todo en uno")
|
|
|
34 |
news_generator = None
|
35 |
whisper_model = None
|
36 |
|
37 |
+
def check_models_initialized():
|
38 |
+
"""Check if all models are properly initialized"""
|
39 |
+
if None in (tokenizer, model, news_generator, whisper_model):
|
40 |
+
raise RuntimeError("Models not properly initialized. Please ensure initialization was successful.")
|
41 |
+
|
42 |
@spaces.GPU(duration=60)
|
43 |
def initialize_models():
|
44 |
"""Initialize models with Zero GPU optimizations"""
|
|
|
54 |
model_name,
|
55 |
token=HUGGINGFACE_TOKEN
|
56 |
)
|
57 |
+
if tokenizer is None:
|
58 |
+
raise RuntimeError("Failed to initialize tokenizer")
|
59 |
tokenizer.pad_token = tokenizer.eos_token
|
60 |
|
61 |
# Load model
|
|
|
67 |
device_map="auto",
|
68 |
low_cpu_mem_usage=True
|
69 |
)
|
70 |
+
if model is None:
|
71 |
+
raise RuntimeError("Failed to initialize model")
|
72 |
|
73 |
# Create pipeline
|
74 |
logger.info("Creating pipeline...")
|
|
|
84 |
top_p=0.95,
|
85 |
repetition_penalty=1.2
|
86 |
)
|
87 |
+
if news_generator is None:
|
88 |
+
raise RuntimeError("Failed to initialize news generator pipeline")
|
89 |
|
90 |
# Load Whisper model
|
91 |
logger.info("Loading Whisper model...")
|
92 |
whisper_model = whisper.load_model("base")
|
93 |
+
if whisper_model is None:
|
94 |
+
raise RuntimeError("Failed to initialize Whisper model")
|
95 |
|
96 |
logger.info("All models initialized successfully")
|
97 |
return True
|
98 |
except Exception as e:
|
99 |
logger.error(f"Error during model initialization: {str(e)}")
|
100 |
+
# Reset all models to None if initialization fails
|
101 |
+
tokenizer = None
|
102 |
+
model = None
|
103 |
+
news_generator = None
|
104 |
+
whisper_model = None
|
105 |
raise
|
106 |
|
|
|
|
|
|
|
107 |
def download_social_media_video(url):
|
108 |
"""Download a video from social media."""
|
109 |
ydl_opts = {
|
|
|
154 |
def transcribe_audio(file):
|
155 |
"""Transcribe an audio or video file."""
|
156 |
try:
|
157 |
+
# Check if models are initialized
|
158 |
+
check_models_initialized()
|
159 |
+
|
160 |
if isinstance(file, str) and file.startswith('http'):
|
161 |
file_path = download_social_media_video(file)
|
162 |
elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
|
|
165 |
file_path = preprocess_audio(file)
|
166 |
|
167 |
logger.info(f"Transcribing audio: {file_path}")
|
168 |
+
if not os.path.exists(file_path):
|
169 |
+
raise FileNotFoundError(f"Audio file not found: {file_path}")
|
170 |
+
|
171 |
with torch.inference_mode():
|
172 |
result = whisper_model.transcribe(file_path)
|
173 |
+
if not result:
|
174 |
+
raise RuntimeError("Transcription failed to produce results")
|
175 |
+
|
176 |
transcription = result.get("text", "Error in transcription")
|
177 |
logger.info(f"Transcription completed: {transcription[:50]}...")
|
178 |
return transcription
|
|
|
196 |
else:
|
197 |
return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
|
198 |
except Exception as e:
|
199 |
+
logger.error(f"Error reading document: {str(e)}")
|
200 |
return f"Error reading document: {str(e)}"
|
201 |
|
202 |
def read_url(url):
|
|
|
207 |
soup = BeautifulSoup(response.content, 'html.parser')
|
208 |
return soup.get_text()
|
209 |
except Exception as e:
|
210 |
+
logger.error(f"Error reading URL: {str(e)}")
|
211 |
return f"Error reading URL: {str(e)}"
|
212 |
|
213 |
def process_social_content(url):
|
|
|
216 |
text_content = read_url(url)
|
217 |
try:
|
218 |
video_content = transcribe_audio(url)
|
219 |
+
except Exception as e:
|
220 |
+
logger.error(f"Error processing video content: {str(e)}")
|
221 |
video_content = None
|
222 |
|
223 |
return {
|
|
|
231 |
@spaces.GPU(duration=60)
|
232 |
def generate_news(instructions, facts, size, tone, *args):
|
233 |
try:
|
234 |
+
# Check if models are initialized
|
235 |
+
check_models_initialized()
|
236 |
+
|
237 |
# Initialize knowledge base
|
238 |
knowledge_base = {
|
239 |
"instructions": instructions,
|
|
|
257 |
# Process URLs
|
258 |
for url in urls:
|
259 |
if url:
|
260 |
+
content = read_url(url)
|
261 |
+
if content and not content.startswith("Error"):
|
262 |
+
knowledge_base["url_content"].append(content)
|
263 |
|
264 |
# Process documents
|
265 |
for document in documents:
|
266 |
if document is not None:
|
267 |
+
content = read_document(document.name)
|
268 |
+
if content and not content.startswith("Error"):
|
269 |
+
knowledge_base["document_content"].append(content)
|
270 |
|
271 |
# Process audio files
|
272 |
for i in range(0, len(audios), 3):
|
|
|
299 |
for idx, data in enumerate(knowledge_base["audio_data"]):
|
300 |
if data["audio"] is not None:
|
301 |
transcription = transcribe_audio(data["audio"])
|
302 |
+
if not transcription.startswith("Error"):
|
303 |
+
transcriptions_text += f'"{transcription}" - {data["name"]}, {data["position"]}\n'
|
304 |
+
raw_transcriptions += f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}\n\n'
|
305 |
|
306 |
for data in knowledge_base["social_content"]:
|
307 |
+
if data["text"] and not str(data["text"]).startswith("Error"):
|
308 |
transcriptions_text += f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}\n'
|
309 |
raw_transcriptions += transcriptions_text + "\n\n"
|
310 |
+
if data["video"] and not str(data["video"]).startswith("Error"):
|
311 |
video_transcription = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}\n'
|
312 |
transcriptions_text += video_transcription
|
313 |
raw_transcriptions += video_transcription + "\n\n"
|
|
|
358 |
|
359 |
except Exception as e:
|
360 |
logger.error(f"Error generating news: {str(e)}")
|
361 |
+
# Try to reinitialize models if they're not working
|
362 |
+
try:
|
363 |
+
initialize_models()
|
364 |
+
logger.info("Models reinitialized successfully")
|
365 |
+
except Exception as reinit_error:
|
366 |
+
logger.error(f"Failed to reinitialize models: {str(reinit_error)}")
|
367 |
return f"Error generating the news article: {str(e)}", ""
|
368 |
|
|
|
369 |
def create_demo():
|
370 |
with gr.Blocks() as demo:
|
371 |
gr.Markdown("## Generador de noticias todo en uno")
|