CamiloVega commited on
Commit
9b37297
·
verified ·
1 Parent(s): 24a5257

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -11
app.py CHANGED
@@ -34,6 +34,11 @@ model = None
34
  news_generator = None
35
  whisper_model = None
36
 
 
 
 
 
 
37
  @spaces.GPU(duration=60)
38
  def initialize_models():
39
  """Initialize models with Zero GPU optimizations"""
@@ -49,6 +54,8 @@ def initialize_models():
49
  model_name,
50
  token=HUGGINGFACE_TOKEN
51
  )
 
 
52
  tokenizer.pad_token = tokenizer.eos_token
53
 
54
  # Load model
@@ -60,6 +67,8 @@ def initialize_models():
60
  device_map="auto",
61
  low_cpu_mem_usage=True
62
  )
 
 
63
 
64
  # Create pipeline
65
  logger.info("Creating pipeline...")
@@ -75,20 +84,26 @@ def initialize_models():
75
  top_p=0.95,
76
  repetition_penalty=1.2
77
  )
 
 
78
 
79
  # Load Whisper model
80
  logger.info("Loading Whisper model...")
81
  whisper_model = whisper.load_model("base")
 
 
82
 
83
  logger.info("All models initialized successfully")
84
  return True
85
  except Exception as e:
86
  logger.error(f"Error during model initialization: {str(e)}")
 
 
 
 
 
87
  raise
88
 
89
- # Inicializar los modelos
90
- initialize_models()
91
-
92
  def download_social_media_video(url):
93
  """Download a video from social media."""
94
  ydl_opts = {
@@ -139,6 +154,9 @@ def preprocess_audio(audio_file):
139
  def transcribe_audio(file):
140
  """Transcribe an audio or video file."""
141
  try:
 
 
 
142
  if isinstance(file, str) and file.startswith('http'):
143
  file_path = download_social_media_video(file)
144
  elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
@@ -147,8 +165,14 @@ def transcribe_audio(file):
147
  file_path = preprocess_audio(file)
148
 
149
  logger.info(f"Transcribing audio: {file_path}")
 
 
 
150
  with torch.inference_mode():
151
  result = whisper_model.transcribe(file_path)
 
 
 
152
  transcription = result.get("text", "Error in transcription")
153
  logger.info(f"Transcription completed: {transcription[:50]}...")
154
  return transcription
@@ -172,6 +196,7 @@ def read_document(document_path):
172
  else:
173
  return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
174
  except Exception as e:
 
175
  return f"Error reading document: {str(e)}"
176
 
177
  def read_url(url):
@@ -182,6 +207,7 @@ def read_url(url):
182
  soup = BeautifulSoup(response.content, 'html.parser')
183
  return soup.get_text()
184
  except Exception as e:
 
185
  return f"Error reading URL: {str(e)}"
186
 
187
  def process_social_content(url):
@@ -190,7 +216,8 @@ def process_social_content(url):
190
  text_content = read_url(url)
191
  try:
192
  video_content = transcribe_audio(url)
193
- except Exception:
 
194
  video_content = None
195
 
196
  return {
@@ -204,6 +231,9 @@ def process_social_content(url):
204
  @spaces.GPU(duration=60)
205
  def generate_news(instructions, facts, size, tone, *args):
206
  try:
 
 
 
207
  # Initialize knowledge base
208
  knowledge_base = {
209
  "instructions": instructions,
@@ -227,12 +257,16 @@ def generate_news(instructions, facts, size, tone, *args):
227
  # Process URLs
228
  for url in urls:
229
  if url:
230
- knowledge_base["url_content"].append(read_url(url))
 
 
231
 
232
  # Process documents
233
  for document in documents:
234
  if document is not None:
235
- knowledge_base["document_content"].append(read_document(document.name))
 
 
236
 
237
  # Process audio files
238
  for i in range(0, len(audios), 3):
@@ -265,14 +299,15 @@ def generate_news(instructions, facts, size, tone, *args):
265
  for idx, data in enumerate(knowledge_base["audio_data"]):
266
  if data["audio"] is not None:
267
  transcription = transcribe_audio(data["audio"])
268
- transcriptions_text += f'"{transcription}" - {data["name"]}, {data["position"]}\n'
269
- raw_transcriptions += f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}\n\n'
 
270
 
271
  for data in knowledge_base["social_content"]:
272
- if data["text"]:
273
  transcriptions_text += f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}\n'
274
  raw_transcriptions += transcriptions_text + "\n\n"
275
- if data["video"]:
276
  video_transcription = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}\n'
277
  transcriptions_text += video_transcription
278
  raw_transcriptions += video_transcription + "\n\n"
@@ -323,9 +358,14 @@ Follow these requirements:
323
 
324
  except Exception as e:
325
  logger.error(f"Error generating news: {str(e)}")
 
 
 
 
 
 
326
  return f"Error generating the news article: {str(e)}", ""
327
 
328
- # Create Gradio interface
329
  def create_demo():
330
  with gr.Blocks() as demo:
331
  gr.Markdown("## Generador de noticias todo en uno")
 
34
  news_generator = None
35
  whisper_model = None
36
 
37
+ def check_models_initialized():
38
+ """Check if all models are properly initialized"""
39
+ if None in (tokenizer, model, news_generator, whisper_model):
40
+ raise RuntimeError("Models not properly initialized. Please ensure initialization was successful.")
41
+
42
  @spaces.GPU(duration=60)
43
  def initialize_models():
44
  """Initialize models with Zero GPU optimizations"""
 
54
  model_name,
55
  token=HUGGINGFACE_TOKEN
56
  )
57
+ if tokenizer is None:
58
+ raise RuntimeError("Failed to initialize tokenizer")
59
  tokenizer.pad_token = tokenizer.eos_token
60
 
61
  # Load model
 
67
  device_map="auto",
68
  low_cpu_mem_usage=True
69
  )
70
+ if model is None:
71
+ raise RuntimeError("Failed to initialize model")
72
 
73
  # Create pipeline
74
  logger.info("Creating pipeline...")
 
84
  top_p=0.95,
85
  repetition_penalty=1.2
86
  )
87
+ if news_generator is None:
88
+ raise RuntimeError("Failed to initialize news generator pipeline")
89
 
90
  # Load Whisper model
91
  logger.info("Loading Whisper model...")
92
  whisper_model = whisper.load_model("base")
93
+ if whisper_model is None:
94
+ raise RuntimeError("Failed to initialize Whisper model")
95
 
96
  logger.info("All models initialized successfully")
97
  return True
98
  except Exception as e:
99
  logger.error(f"Error during model initialization: {str(e)}")
100
+ # Reset all models to None if initialization fails
101
+ tokenizer = None
102
+ model = None
103
+ news_generator = None
104
+ whisper_model = None
105
  raise
106
 
 
 
 
107
  def download_social_media_video(url):
108
  """Download a video from social media."""
109
  ydl_opts = {
 
154
  def transcribe_audio(file):
155
  """Transcribe an audio or video file."""
156
  try:
157
+ # Check if models are initialized
158
+ check_models_initialized()
159
+
160
  if isinstance(file, str) and file.startswith('http'):
161
  file_path = download_social_media_video(file)
162
  elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
 
165
  file_path = preprocess_audio(file)
166
 
167
  logger.info(f"Transcribing audio: {file_path}")
168
+ if not os.path.exists(file_path):
169
+ raise FileNotFoundError(f"Audio file not found: {file_path}")
170
+
171
  with torch.inference_mode():
172
  result = whisper_model.transcribe(file_path)
173
+ if not result:
174
+ raise RuntimeError("Transcription failed to produce results")
175
+
176
  transcription = result.get("text", "Error in transcription")
177
  logger.info(f"Transcription completed: {transcription[:50]}...")
178
  return transcription
 
196
  else:
197
  return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
198
  except Exception as e:
199
+ logger.error(f"Error reading document: {str(e)}")
200
  return f"Error reading document: {str(e)}"
201
 
202
  def read_url(url):
 
207
  soup = BeautifulSoup(response.content, 'html.parser')
208
  return soup.get_text()
209
  except Exception as e:
210
+ logger.error(f"Error reading URL: {str(e)}")
211
  return f"Error reading URL: {str(e)}"
212
 
213
  def process_social_content(url):
 
216
  text_content = read_url(url)
217
  try:
218
  video_content = transcribe_audio(url)
219
+ except Exception as e:
220
+ logger.error(f"Error processing video content: {str(e)}")
221
  video_content = None
222
 
223
  return {
 
231
  @spaces.GPU(duration=60)
232
  def generate_news(instructions, facts, size, tone, *args):
233
  try:
234
+ # Check if models are initialized
235
+ check_models_initialized()
236
+
237
  # Initialize knowledge base
238
  knowledge_base = {
239
  "instructions": instructions,
 
257
  # Process URLs
258
  for url in urls:
259
  if url:
260
+ content = read_url(url)
261
+ if content and not content.startswith("Error"):
262
+ knowledge_base["url_content"].append(content)
263
 
264
  # Process documents
265
  for document in documents:
266
  if document is not None:
267
+ content = read_document(document.name)
268
+ if content and not content.startswith("Error"):
269
+ knowledge_base["document_content"].append(content)
270
 
271
  # Process audio files
272
  for i in range(0, len(audios), 3):
 
299
  for idx, data in enumerate(knowledge_base["audio_data"]):
300
  if data["audio"] is not None:
301
  transcription = transcribe_audio(data["audio"])
302
+ if not transcription.startswith("Error"):
303
+ transcriptions_text += f'"{transcription}" - {data["name"]}, {data["position"]}\n'
304
+ raw_transcriptions += f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}\n\n'
305
 
306
  for data in knowledge_base["social_content"]:
307
+ if data["text"] and not str(data["text"]).startswith("Error"):
308
  transcriptions_text += f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}\n'
309
  raw_transcriptions += transcriptions_text + "\n\n"
310
+ if data["video"] and not str(data["video"]).startswith("Error"):
311
  video_transcription = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}\n'
312
  transcriptions_text += video_transcription
313
  raw_transcriptions += video_transcription + "\n\n"
 
358
 
359
  except Exception as e:
360
  logger.error(f"Error generating news: {str(e)}")
361
+ # Try to reinitialize models if they're not working
362
+ try:
363
+ initialize_models()
364
+ logger.info("Models reinitialized successfully")
365
+ except Exception as reinit_error:
366
+ logger.error(f"Failed to reinitialize models: {str(reinit_error)}")
367
  return f"Error generating the news article: {str(e)}", ""
368
 
 
369
  def create_demo():
370
  with gr.Blocks() as demo:
371
  gr.Markdown("## Generador de noticias todo en uno")