Spaces:

CamiloVega
/

NewsIA

Sleeping

App Files Files Community

CamiloVega commited on Nov 2, 2024

Commit

9b37297

verified ·

1 Parent(s): 24a5257

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -11

app.py CHANGED Viewed

@@ -34,6 +34,11 @@ model = None
 news_generator = None
 whisper_model = None
 @spaces.GPU(duration=60)
 def initialize_models():
     """Initialize models with Zero GPU optimizations"""
@@ -49,6 +54,8 @@ def initialize_models():
             model_name,
             token=HUGGINGFACE_TOKEN
         )
         tokenizer.pad_token = tokenizer.eos_token
         # Load model
@@ -60,6 +67,8 @@ def initialize_models():
             device_map="auto",
             low_cpu_mem_usage=True
         )
         # Create pipeline
         logger.info("Creating pipeline...")
@@ -75,20 +84,26 @@ def initialize_models():
             top_p=0.95,
             repetition_penalty=1.2
         )
         # Load Whisper model
         logger.info("Loading Whisper model...")
         whisper_model = whisper.load_model("base")
         logger.info("All models initialized successfully")
         return True
     except Exception as e:
         logger.error(f"Error during model initialization: {str(e)}")
         raise
-# Inicializar los modelos
-initialize_models()
 def download_social_media_video(url):
     """Download a video from social media."""
     ydl_opts = {
@@ -139,6 +154,9 @@ def preprocess_audio(audio_file):
 def transcribe_audio(file):
     """Transcribe an audio or video file."""
     try:
         if isinstance(file, str) and file.startswith('http'):
             file_path = download_social_media_video(file)
         elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
@@ -147,8 +165,14 @@ def transcribe_audio(file):
             file_path = preprocess_audio(file)
         logger.info(f"Transcribing audio: {file_path}")
         with torch.inference_mode():
             result = whisper_model.transcribe(file_path)
         transcription = result.get("text", "Error in transcription")
         logger.info(f"Transcription completed: {transcription[:50]}...")
         return transcription
@@ -172,6 +196,7 @@ def read_document(document_path):
         else:
             return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
     except Exception as e:
         return f"Error reading document: {str(e)}"
 def read_url(url):
@@ -182,6 +207,7 @@ def read_url(url):
         soup = BeautifulSoup(response.content, 'html.parser')
         return soup.get_text()
     except Exception as e:
         return f"Error reading URL: {str(e)}"
 def process_social_content(url):
@@ -190,7 +216,8 @@ def process_social_content(url):
         text_content = read_url(url)
         try:
             video_content = transcribe_audio(url)
-        except Exception:
             video_content = None
         return {
@@ -204,6 +231,9 @@ def process_social_content(url):
 @spaces.GPU(duration=60)
 def generate_news(instructions, facts, size, tone, *args):
     try:
         # Initialize knowledge base
         knowledge_base = {
             "instructions": instructions,
@@ -227,12 +257,16 @@ def generate_news(instructions, facts, size, tone, *args):
         # Process URLs
         for url in urls:
             if url:
-                knowledge_base["url_content"].append(read_url(url))
         # Process documents
         for document in documents:
             if document is not None:
-                knowledge_base["document_content"].append(read_document(document.name))
         # Process audio files
         for i in range(0, len(audios), 3):
@@ -265,14 +299,15 @@ def generate_news(instructions, facts, size, tone, *args):
         for idx, data in enumerate(knowledge_base["audio_data"]):
             if data["audio"] is not None:
                 transcription = transcribe_audio(data["audio"])
-                transcriptions_text += f'"{transcription}" - {data["name"]}, {data["position"]}\n'
-                raw_transcriptions += f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}\n\n'
         for data in knowledge_base["social_content"]:
-            if data["text"]:
                 transcriptions_text += f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}\n'
                 raw_transcriptions += transcriptions_text + "\n\n"
-            if data["video"]:
                 video_transcription = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}\n'
                 transcriptions_text += video_transcription
                 raw_transcriptions += video_transcription + "\n\n"
@@ -323,9 +358,14 @@ Follow these requirements:
     except Exception as e:
         logger.error(f"Error generating news: {str(e)}")
         return f"Error generating the news article: {str(e)}", ""
-# Create Gradio interface
 def create_demo():
     with gr.Blocks() as demo:
         gr.Markdown("## Generador de noticias todo en uno")

 news_generator = None
 whisper_model = None
+def check_models_initialized():
+    """Check if all models are properly initialized"""
+    if None in (tokenizer, model, news_generator, whisper_model):
+        raise RuntimeError("Models not properly initialized. Please ensure initialization was successful.")
 @spaces.GPU(duration=60)
 def initialize_models():
     """Initialize models with Zero GPU optimizations"""
             model_name,
             token=HUGGINGFACE_TOKEN
         )
+        if tokenizer is None:
+            raise RuntimeError("Failed to initialize tokenizer")
         tokenizer.pad_token = tokenizer.eos_token
         # Load model
             device_map="auto",
             low_cpu_mem_usage=True
         )
+        if model is None:
+            raise RuntimeError("Failed to initialize model")
         # Create pipeline
         logger.info("Creating pipeline...")
             top_p=0.95,
             repetition_penalty=1.2
         )
+        if news_generator is None:
+            raise RuntimeError("Failed to initialize news generator pipeline")
         # Load Whisper model
         logger.info("Loading Whisper model...")
         whisper_model = whisper.load_model("base")
+        if whisper_model is None:
+            raise RuntimeError("Failed to initialize Whisper model")
         logger.info("All models initialized successfully")
         return True
     except Exception as e:
         logger.error(f"Error during model initialization: {str(e)}")
+        # Reset all models to None if initialization fails
+        tokenizer = None
+        model = None
+        news_generator = None
+        whisper_model = None
         raise
 def download_social_media_video(url):
     """Download a video from social media."""
     ydl_opts = {
 def transcribe_audio(file):
     """Transcribe an audio or video file."""
     try:
+        # Check if models are initialized
+        check_models_initialized()
         if isinstance(file, str) and file.startswith('http'):
             file_path = download_social_media_video(file)
         elif isinstance(file, str) and file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
             file_path = preprocess_audio(file)
         logger.info(f"Transcribing audio: {file_path}")
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"Audio file not found: {file_path}")
         with torch.inference_mode():
             result = whisper_model.transcribe(file_path)
+            if not result:
+                raise RuntimeError("Transcription failed to produce results")
         transcription = result.get("text", "Error in transcription")
         logger.info(f"Transcription completed: {transcription[:50]}...")
         return transcription
         else:
             return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
     except Exception as e:
+        logger.error(f"Error reading document: {str(e)}")
         return f"Error reading document: {str(e)}"
 def read_url(url):
         soup = BeautifulSoup(response.content, 'html.parser')
         return soup.get_text()
     except Exception as e:
+        logger.error(f"Error reading URL: {str(e)}")
         return f"Error reading URL: {str(e)}"
 def process_social_content(url):
         text_content = read_url(url)
         try:
             video_content = transcribe_audio(url)
+        except Exception as e:
+            logger.error(f"Error processing video content: {str(e)}")
             video_content = None
         return {
 @spaces.GPU(duration=60)
 def generate_news(instructions, facts, size, tone, *args):
     try:
+        # Check if models are initialized
+        check_models_initialized()
         # Initialize knowledge base
         knowledge_base = {
             "instructions": instructions,
         # Process URLs
         for url in urls:
             if url:
+                content = read_url(url)
+                if content and not content.startswith("Error"):
+                    knowledge_base["url_content"].append(content)
         # Process documents
         for document in documents:
             if document is not None:
+                content = read_document(document.name)
+                if content and not content.startswith("Error"):
+                    knowledge_base["document_content"].append(content)
         # Process audio files
         for i in range(0, len(audios), 3):
         for idx, data in enumerate(knowledge_base["audio_data"]):
             if data["audio"] is not None:
                 transcription = transcribe_audio(data["audio"])
+                if not transcription.startswith("Error"):
+                    transcriptions_text += f'"{transcription}" - {data["name"]}, {data["position"]}\n'
+                    raw_transcriptions += f'[Audio/Video {idx + 1}]: "{transcription}" - {data["name"]}, {data["position"]}\n\n'
         for data in knowledge_base["social_content"]:
+            if data["text"] and not str(data["text"]).startswith("Error"):
                 transcriptions_text += f'[Social media text]: "{data["text"][:200]}..." - {data["name"]}, {data["context"]}\n'
                 raw_transcriptions += transcriptions_text + "\n\n"
+            if data["video"] and not str(data["video"]).startswith("Error"):
                 video_transcription = f'[Social media video]: "{data["video"]}" - {data["name"]}, {data["context"]}\n'
                 transcriptions_text += video_transcription
                 raw_transcriptions += video_transcription + "\n\n"
     except Exception as e:
         logger.error(f"Error generating news: {str(e)}")
+        # Try to reinitialize models if they're not working
+        try:
+            initialize_models()
+            logger.info("Models reinitialized successfully")
+        except Exception as reinit_error:
+            logger.error(f"Failed to reinitialize models: {str(reinit_error)}")
         return f"Error generating the news article: {str(e)}", ""
 def create_demo():
     with gr.Blocks() as demo:
         gr.Markdown("## Generador de noticias todo en uno")