Spaces:

CamiloVega
/

NewsIA

Sleeping

App Files Files Community

CamiloVega commited on Mar 31

Commit

48d2a37

verified ·

1 Parent(s): 76536cf

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -727

app.py CHANGED Viewed

@@ -54,18 +54,18 @@ class ModelManager:
         return cls._instance
     def __init__(self):
-        if not self._initialized:
             logger.info("Initializing ModelManager attributes.")
             self.tokenizer = None
             self.model = None
             self.text_pipeline = None
             self.whisper_model = None
-            # self._initialized remains False until a model is successfully loaded
             self.llm_loaded = False
             self.whisper_loaded = False
             self.last_used = time.time()
             self.llm_loading = False
             self.whisper_loading = False
     def _cleanup_memory(self):
         """Utility function to force memory cleanup"""
@@ -81,6 +81,7 @@ class ModelManager:
         """Explicitly resets the LLM components."""
         logger.info("--- Attempting to reset LLM ---")
         try:
             if hasattr(self, 'model') and self.model is not None:
                 del self.model
                 logger.info("LLM model deleted.")
@@ -91,10 +92,11 @@ class ModelManager:
                 del self.text_pipeline
                 logger.info("LLM pipeline deleted.")
             self.model = None
             self.tokenizer = None
             self.text_pipeline = None
-            self.llm_loaded = False
             self._cleanup_memory()
             logger.info("LLM components reset successfully.")
         except Exception as e:
@@ -110,7 +112,7 @@ class ModelManager:
                 logger.info("Whisper model deleted.")
             self.whisper_model = None
-            self.whisper_loaded = False
             self._cleanup_memory()
             logger.info("Whisper component reset successfully.")
         except Exception as e:
@@ -130,7 +132,7 @@ class ModelManager:
             return True
         # Explicitly try to free Whisper memory before loading LLM
-        self.reset_whisper()
         self.llm_loading = True
         logger.info("Starting LLM initialization...")
@@ -186,7 +188,7 @@ class ModelManager:
              return True
         # Explicitly try to free LLM memory before loading Whisper
-        self.reset_llm()
         self.whisper_loading = True
         logger.info("Starting Whisper initialization...")
@@ -219,8 +221,7 @@ class ModelManager:
                  self.initialize_llm() # This will raise error if it fails
                  logger.info("LLM initialization completed by check_llm_initialized.")
             else:
-                 # This state should ideally be avoided by sequential logic, but handle anyway
-                 logger.info("LLM initialization is already in progress by another request. Waiting briefly.")
                  time.sleep(10)
                  if not self.llm_loaded:
                      logger.error("LLM initialization timed out or failed after waiting.")
@@ -241,7 +242,7 @@ class ModelManager:
                 self.initialize_whisper() # This will raise error if it fails
                 logger.info("Whisper initialization completed by check_whisper_initialized.")
             else:
-                logger.info("Whisper initialization is already in progress by another request. Waiting briefly.")
                 time.sleep(10)
                 if not self.whisper_loaded:
                     logger.error("Whisper initialization timed out or failed after waiting.")
@@ -254,180 +255,118 @@ class ModelManager:
     def reset_models(self, force=False):
         """Reset models if idle or forced."""
-        # This function now just calls the specific resets.
-        # Idle logic could be added back if needed, but explicit resets might be better for ZeroGPU.
         if force:
             logger.info("Forcing reset of all models.")
             self.reset_llm()
             self.reset_whisper()
-        # else: # Optional: Add idle check back if desired
-            # current_time = time.time()
-            # if current_time - self.last_used > 600:
-            #    logger.info("Resetting models due to inactivity.")
-            #    self.reset_llm()
-            #    self.reset_whisper()
-# --- Rest of the functions (download_social_media_video, convert_video_to_audio, etc.) remain the same as the previous version with detailed logging ---
-# --- Paste the functions from the previous answer here, starting from @lru_cache...download_social_media_video down to the end of process_social_media_url ---
-@lru_cache(maxsize=16) # Reduced cache size slightly
 def download_social_media_video(url):
     """Download audio from a social media video URL."""
     logger.info(f"Attempting to download audio from social media URL: {url}")
     temp_dir = tempfile.mkdtemp()
-    # Note: Using filename from info_dict can be unreliable. Let yt-dlp decide final name.
     output_template = os.path.join(temp_dir, '%(id)s.%(ext)s')
-    final_audio_file_path = None # Will store the path of the actual downloaded mp3
     ydl_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192', # Standard quality
-        }],
-        'outtmpl': output_template,
-        'quiet': True,
-        'no_warnings': True,
-        'nocheckcertificate': True, # Sometimes needed for tricky sites
-        'retries': 3, # Add retries
-        'socket_timeout': 15, # Timeout
-        'cachedir': False, # Avoid caching issues in temp envs
     }
     try:
-        logger.info(f"yt-dlp options: {ydl_opts}") # Log options for debugging
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            logger.info("Extracting info and downloading...")
-            # Download should happen here and postprocessor rename to .mp3
             info_dict = ydl.extract_info(url, download=True)
-            logger.info(f"yt-dlp extraction complete for {url}. ID: {info_dict.get('id')}")
-            # Find the downloaded MP3 file (name might not exactly match ID if title had weird chars)
             found_files = [f for f in os.listdir(temp_dir) if f.endswith('.mp3')]
             if found_files:
                 final_audio_file_path = os.path.join(temp_dir, found_files[0])
-                logger.info(f"Found downloaded MP3: {final_audio_file_path}")
             else:
                  logger.error(f"Could not find downloaded MP3 file in {temp_dir} for URL {url}")
                  raise FileNotFoundError(f"Downloaded MP3 not found in {temp_dir}")
-        # Read the file content to return, as the temp dir might be cleaned up
-        logger.info(f"Reading content of {final_audio_file_path}")
-        with open(final_audio_file_path, 'rb') as f:
-            audio_content = f.read()
-        # Save the content to a new temporary file that Gradio can handle better
-        logger.info("Saving audio content to a new temporary file...")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_output_file:
             temp_output_file.write(audio_content)
             final_path_for_gradio = temp_output_file.name
         logger.info(f"Audio content saved to temporary file for processing: {final_path_for_gradio}")
         return final_path_for_gradio
     except yt_dlp.utils.DownloadError as e:
         logger.error(f"!!! yt-dlp download error for {url}: {str(e)}")
-        # Don't log full traceback here as DownloadError is often informative enough
-        return None # Return None to indicate failure
     except Exception as e:
         logger.error(f"!!! Unexpected error downloading video from {url}: {str(e)}")
         logger.error(traceback.format_exc())
-        return None # Return None
     finally:
-         # Clean up the temporary directory and its contents
         if os.path.exists(temp_dir):
-            logger.info(f"Cleaning up temporary download directory: {temp_dir}")
             try:
                 import shutil
                 shutil.rmtree(temp_dir)
-                logger.info("Temporary download directory cleaned up.")
-            except Exception as cleanup_e:
-                logger.warning(f"Could not completely clean up temp download directory {temp_dir}: {cleanup_e}")
 def convert_video_to_audio(video_file_path):
     """Convert a video file to audio using ffmpeg directly."""
     logger.info(f"Attempting to convert video to audio: {video_file_path}")
-    output_file_path = None # Initialize
     try:
-        # Create a temporary file path for the output MP3
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
-            output_file_path = temp_file.name
-        logger.info(f"Output audio path will be: {output_file_path}")
-        command = [
-            "ffmpeg",
-            "-i", video_file_path,
-            "-vn",          # No video
-            "-acodec", "libmp3lame", # Specify MP3 codec
-            "-ab", "192k",   # Audio bitrate
-            "-ar", "44100",  # Audio sample rate
-            "-ac", "2",      # Stereo audio
-            output_file_path,
-            "-y",           # Overwrite output file if it exists
-            "-loglevel", "error" # Suppress verbose ffmpeg output, show only errors
-        ]
-        logger.info(f"Executing ffmpeg command: {' '.join(command)}")
-        process = subprocess.run(command, check=True, capture_output=True, text=True, timeout=120) # Added timeout
-        logger.info(f"ffmpeg conversion successful for {video_file_path}.")
-        # Log stdout/stderr only if needed for debugging, can be verbose
-        # logger.debug(f"ffmpeg stdout: {process.stdout}")
-        # logger.debug(f"ffmpeg stderr: {process.stderr}")
-        # Verify output file exists and has size
         if not os.path.exists(output_file_path) or os.path.getsize(output_file_path) == 0:
             logger.error(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.")
             raise RuntimeError(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.")
         logger.info(f"Video successfully converted to audio: {output_file_path}")
         return output_file_path
     except subprocess.CalledProcessError as e:
          logger.error(f"!!! ffmpeg command failed with exit code {e.returncode} for video: {video_file_path}")
          logger.error(f"ffmpeg stderr: {e.stderr}")
-         # Clean up potentially empty/invalid output file
          if output_file_path and os.path.exists(output_file_path):
-             logger.info(f"Cleaning up failed ffmpeg output file: {output_file_path}")
-             os.remove(output_file_path)
          raise RuntimeError(f"ffmpeg conversion failed: {e.stderr}") from e
     except subprocess.TimeoutExpired as e:
         logger.error(f"!!! ffmpeg command timed out after {e.timeout} seconds for video: {video_file_path}")
         if output_file_path and os.path.exists(output_file_path):
-             logger.info(f"Cleaning up potentially incomplete ffmpeg output file: {output_file_path}")
-             os.remove(output_file_path)
         raise RuntimeError(f"ffmpeg conversion timed out after {e.timeout} seconds.") from e
     except Exception as e:
         logger.error(f"!!! Error converting video '{video_file_path}': {str(e)}")
         logger.error(traceback.format_exc())
-        # Clean up potentially created output file
         if output_file_path and os.path.exists(output_file_path):
-             logger.info(f"Cleaning up ffmpeg output file due to exception: {output_file_path}")
-             os.remove(output_file_path)
-        raise # Re-raise the exception
 def preprocess_audio(input_audio_path):
     """Preprocess the audio file (e.g., normalize volume)."""
     logger.info(f"Attempting to preprocess audio file: {input_audio_path}")
     output_path = None
     try:
-        # Check if file exists before trying to load
         if not os.path.exists(input_audio_path):
              logger.error(f"Input audio file for preprocessing not found: {input_audio_path}")
              raise FileNotFoundError(f"Input audio file not found: {input_audio_path}")
-        logger.info("Loading audio with pydub...")
         audio = AudioSegment.from_file(input_audio_path)
-        logger.info("Audio loaded.")
-        # Example: Normalize volume (optional, uncomment if needed)
-        # logger.info(f"Original dBFS: {audio.dBFS}. Normalizing target: -20 dBFS.")
-        # change_in_dBFS = -20.0 - audio.dBFS
-        # audio = audio.apply_gain(change_in_dBFS)
-        # logger.info("Volume normalization applied.")
-        # Export to a new temporary file
-        logger.info("Exporting preprocessed audio...")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
             output_path = temp_file.name
             audio.export(output_path, format="mp3")
@@ -435,397 +374,208 @@ def preprocess_audio(input_audio_path):
         return output_path
     except FileNotFoundError as e:
         logger.error(f"!!! File not found during audio preprocessing: {e}")
-        raise # Reraise specific error
     except Exception as e:
         logger.error(f"!!! Error preprocessing audio '{input_audio_path}': {str(e)}")
         logger.error(traceback.format_exc())
-        # Clean up potentially created output file if error occurred during export
         if output_path and os.path.exists(output_path):
-             logger.info(f"Cleaning up preprocessing output file due to exception: {output_path}")
-             os.remove(output_path)
-        raise # Re-raise the exception
-@spaces.GPU(duration=300) # Allow more time for transcription
 def transcribe_audio_or_video(file_input):
     """Transcribe an audio or video file (local path or Gradio File object)."""
     logger.info(f"--- Starting transcription process for input: {type(file_input)} ---")
-    audio_file_to_transcribe = None
-    original_input_path = None
-    temp_files_to_clean = []
-    processing_step = "Initialization"
-    transcription = "" # Default value
     try:
         processing_step = "Whisper Model Check"
         logger.info("Checking/Initializing Whisper model for transcription...")
-        # *** Crucial Change: Reset LLM before ensuring Whisper is ready ***
-        # model_manager.reset_llm()
-        # *** Let's try NOT resetting LLM here, maybe both can fit? Check logs if fails ***
-        model_manager.check_whisper_initialized() # Will raise error if fails
         logger.info("Whisper model is ready for transcription.")
-        if file_input is None:
-            logger.info("No file input provided for transcription. Returning empty string.")
-            return ""
-        # ... (rest of the input type handling, conversion, preprocessing - same as before) ...
         processing_step = "Input Type Handling"
-        if isinstance(file_input, str): # Input is a path
             original_input_path = file_input
-            logger.info(f"Processing path input: {original_input_path}")
-            if not os.path.exists(original_input_path):
-                 logger.error(f"Input file path does not exist: {original_input_path}")
-                 raise FileNotFoundError(f"Input file not found: {original_input_path}")
             input_path = original_input_path
-        elif hasattr(file_input, 'name') and file_input.name: # Input is a Gradio File object
             original_input_path = file_input.name
-            logger.info(f"Processing Gradio file input. Temp path: {original_input_path}")
-            if not os.path.exists(original_input_path):
-                logger.error(f"Gradio temporary file path does not exist: {original_input_path}")
-                raise FileNotFoundError(f"Gradio temporary file not found: {original_input_path}")
-            input_path = original_input_path # Gradio usually provides a temp path
-        else:
-            logger.error(f"Unsupported input type for transcription: {type(file_input)}")
-            raise TypeError("Invalid input type for transcription. Expected file path or Gradio File object.")
-        logger.info(f"Input path identified: {input_path}")
         file_extension = os.path.splitext(input_path)[1].lower()
-        logger.info(f"File extension: {file_extension}")
         processing_step = "Video Conversion Check"
         if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm']:
-            logger.info(f"Detected video file ({file_extension}), attempting conversion to audio...")
-            converted_audio_path = convert_video_to_audio(input_path) # Raises error on failure
-            logger.info(f"Video converted to audio: {converted_audio_path}")
-            temp_files_to_clean.append(converted_audio_path)
-            audio_file_to_process = converted_audio_path
-        elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac']: # Added more audio types
              logger.info(f"Detected audio file ({file_extension}).")
              audio_file_to_process = input_path
-        else:
-            logger.error(f"Unsupported file extension for transcription: {file_extension}")
-            raise ValueError(f"Unsupported file type: {file_extension}")
         processing_step = "Audio Preprocessing"
         try:
-            logger.info(f"Attempting to preprocess audio file: {audio_file_to_process}")
             preprocessed_audio_path = preprocess_audio(audio_file_to_process)
-            if preprocessed_audio_path != audio_file_to_process:
-                 logger.info("Preprocessing created a new file, adding to cleanup list.")
-                 temp_files_to_clean.append(preprocessed_audio_path)
             audio_file_to_transcribe = preprocessed_audio_path
-            logger.info(f"Audio preprocessing successful. File to transcribe: {audio_file_to_transcribe}")
         except Exception as preprocess_err:
-            logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio for transcription.")
-            logger.warning(traceback.format_exc())
             audio_file_to_transcribe = audio_file_to_process
         processing_step = "Transcription Execution"
         logger.info(f"Starting transcription execution for: {audio_file_to_transcribe}")
-        if not os.path.exists(audio_file_to_transcribe):
-            logger.error(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
-            raise FileNotFoundError(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
-        logger.info("Calling Whisper model transcribe method...")
         with torch.inference_mode():
-            use_fp16 = torch.cuda.is_available()
-            logger.info(f"Using fp16 for transcription: {use_fp16}")
-            # Add language='en' if most input is English, might improve speed/accuracy
-            result = model_manager.whisper_model.transcribe(
-                audio_file_to_transcribe, fp16=use_fp16 #, language="en"
-            )
-            logger.info("Whisper model transcribe method finished.")
-            if not result or "text" not in result:
-                logger.error("Transcription failed to produce results or 'text' key missing.")
-                raise RuntimeError("Transcription failed to produce results")
         transcription = result.get("text", "Error: Transcription result empty")
-        log_transcription = (transcription[:100] + '...') if len(transcription) > 100 else transcription
-        logger.info(f"Transcription completed successfully: '{log_transcription}'")
         processing_step = "Success"
-        # *** Optional: Reset Whisper immediately after use if memory is tight ***
-        # logger.info("Resetting Whisper model after successful transcription.")
-        # model_manager.reset_whisper()
-    # ... (keep the except blocks same as before) ...
     except FileNotFoundError as e:
-        logger.error(f"!!! File not found error during transcription (Step: {processing_step}): {e}")
-        logger.error(traceback.format_exc())
-        transcription = f"Error: Input file not found ({e})"
     except ValueError as e:
-         logger.error(f"!!! Value error during transcription (Step: {processing_step}): {e}")
-         logger.error(traceback.format_exc())
-         transcription = f"Error: Unsupported file type ({e})"
     except TypeError as e:
-         logger.error(f"!!! Type error during transcription setup (Step: {processing_step}): {e}")
-         logger.error(traceback.format_exc())
-         transcription = f"Error: Invalid input provided ({e})"
     except RuntimeError as e:
-         logger.error(f"!!! Runtime error during transcription (Step: {processing_step}): {e}")
-         logger.error(traceback.format_exc())
-         transcription = f"Error during processing: {e}"
     except Exception as e:
-        logger.error(f"!!! Unexpected error during transcription (Step: {processing_step}): {str(e)}")
-        logger.error(traceback.format_exc())
-        transcription = f"Error processing the file: An unexpected error occurred."
     finally:
-        # Clean up temporary files
-        logger.info(f"--- Cleaning up temporary files for transcription process ({len(temp_files_to_clean)} files) ---")
         for temp_file in temp_files_to_clean:
             try:
-                if os.path.exists(temp_file):
-                    os.remove(temp_file)
-                    logger.info(f"Cleaned up temporary file: {temp_file}")
-                # else:
-                #     logger.info(f"Temporary file already removed or never created: {temp_file}")
-            except Exception as e:
-                logger.warning(f"Could not remove temporary file {temp_file}: {str(e)}")
-        logger.info("--- Finished transcription process cleanup ---")
-        # Return the result (could be transcription or error message)
         return transcription
 @lru_cache(maxsize=16)
 def read_document(document_path):
     """Read the content of a document (PDF, DOCX, XLSX, CSV)."""
     logger.info(f"Attempting to read document: {document_path}")
     try:
-        if not os.path.exists(document_path):
-            logger.error(f"Document not found at path: {document_path}")
-            raise FileNotFoundError(f"Document not found: {document_path}")
-        file_extension = os.path.splitext(document_path)[1].lower()
-        logger.info(f"Document type detected: {file_extension}")
         content = ""
         if file_extension == ".pdf":
-            logger.info("Reading PDF document using PyMuPDF (fitz)...")
             doc = fitz.open(document_path)
-            # Check for encryption first
             if doc.is_encrypted:
-                logger.warning(f"PDF document {document_path} is encrypted. Attempting to decrypt with empty password.")
-                if not doc.authenticate(""):
-                    logger.error(f"Failed to decrypt PDF {document_path} with empty password.")
-                    doc.close()
-                    raise ValueError("Encrypted PDF cannot be read without password.")
-            content = "\n".join([page.get_text() for page in doc])
-            doc.close()
-            logger.info(f"PDF read successfully. Length: {len(content)} chars.")
         elif file_extension == ".docx":
-            logger.info("Reading DOCX document using python-docx...")
-            doc = docx.Document(document_path)
-            content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
-            logger.info(f"DOCX read successfully. Length: {len(content)} chars.")
         elif file_extension in (".xlsx", ".xls"):
-            logger.info("Reading Excel document using pandas...")
-            xls = pd.ExcelFile(document_path)
-            text_parts = []
             for sheet_name in xls.sheet_names:
-                logger.info(f"Reading sheet: {sheet_name}")
-                df = pd.read_excel(xls, sheet_name=sheet_name)
-                text_parts.append(f"--- Sheet: {sheet_name} ---\n{df.to_string()}")
             content = "\n\n".join(text_parts).strip()
-            logger.info(f"Excel read successfully. Length: {len(content)} chars.")
         elif file_extension == ".csv":
-            logger.info("Reading CSV document using pandas...")
             try:
-                logger.info("Attempting CSV read with comma separator...")
-                # Try to sniff encoding
-                with open(document_path, 'rb') as f:
-                    import chardet
-                    encoding = chardet.detect(f.read())['encoding']
-                    logger.info(f"Detected CSV encoding: {encoding}")
                 df = pd.read_csv(document_path, encoding=encoding)
-            except (pd.errors.ParserError, UnicodeDecodeError) as e1:
-                 logger.warning(f"Could not parse CSV {document_path} with comma/detected encoding ({e1}), trying semicolon.")
-                 try:
-                     df = pd.read_csv(document_path, sep=';', encoding=encoding)
                  except Exception as e2:
-                     logger.error(f"Also failed with semicolon separator: {e2}. Trying latin1 encoding.")
-                     try:
-                        df = pd.read_csv(document_path, encoding='latin1')
-                     except Exception as e3:
-                         logger.error(f"Also failed with latin1: {e3}. Giving up.")
-                         raise ValueError(f"Failed to parse CSV: {e1}, {e2}, {e3}")
             content = df.to_string()
-            logger.info(f"CSV read successfully. Length: {len(content)} chars.")
-        else:
-            logger.warning(f"Unsupported document type for reading: {file_extension}")
-            return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
         return content
-    except FileNotFoundError as e:
-        logger.error(f"!!! File not found error while reading document: {e}")
-        return f"Error: Document file not found at {document_path}"
-    except ValueError as e: # Catch specific errors like encryption or CSV parsing
-        logger.error(f"!!! Value error reading document {document_path}: {e}")
-        logger.error(traceback.format_exc())
-        return f"Error reading document: {e}"
-    except Exception as e:
-        logger.error(f"!!! Error reading document {document_path}: {str(e)}")
-        logger.error(traceback.format_exc())
-        return f"Error reading document: {str(e)}"
 @lru_cache(maxsize=16)
 def read_url(url):
     """Read the main textual content of a URL."""
     logger.info(f"Attempting to read URL: {url}")
-    if not url or not url.strip().startswith('http'):
-        logger.warning(f"Invalid or empty URL provided: '{url}'")
-        return ""
     try:
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.9',
-            'Connection': 'keep-alive'
-        }
-        logger.info(f"Sending GET request to {url} with headers: {headers}")
         response = requests.get(url, headers=headers, timeout=20, allow_redirects=True)
-        logger.info(f"Received response from {url}. Status code: {response.status_code}, Content-Type: {response.headers.get('content-type')}")
         response.raise_for_status()
         content_type = response.headers.get('content-type', '').lower()
-        if not ('html' in content_type or 'text' in content_type):
-             logger.warning(f"URL {url} has non-text content type: {content_type}. Skipping.")
-             return f"Error: URL content type ({content_type}) is not text/html."
-        # Decode content carefully
         detected_encoding = response.encoding if response.encoding else response.apparent_encoding
-        logger.info(f"Decoding response content with encoding: {detected_encoding}")
         html_content = response.content.decode(detected_encoding or 'utf-8', errors='ignore')
-        logger.info(f"Parsing HTML content from {url} using BeautifulSoup...")
         soup = BeautifulSoup(html_content, 'html.parser')
-        logger.info("HTML parsed.")
-        logger.info("Removing script, style, and other non-content tags...")
         tags_to_remove = ["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button", "link", "head"]
         for tag_name in tags_to_remove:
-            for element in soup.find_all(tag_name):
-                element.extract()
-        logger.info("Non-content tags removed.")
-        logger.info("Attempting to find main content container...")
-        main_content = (
-            soup.find("main") or
-            soup.find("article") or
-            soup.find("div", class_=["content", "main", "post-content", "entry-content", "article-body", "story-content"]) or # Added more common classes
-            soup.find("div", id=["content", "main", "article", "story"]) # Added more common IDs
-        )
         text = ""
-        if main_content:
-            logger.info("Main content container found. Extracting text.")
-            text = main_content.get_text(separator='\n', strip=True)
         else:
-            logger.warning(f"No specific main content container found for {url}. Falling back to body text.")
             body = soup.find("body")
-            if body:
-                logger.info("Extracting text from body.")
-                text = body.get_text(separator='\n', strip=True)
-            else:
-                 logger.warning(f"No body tag found for {url}. Falling back to all text.")
-                 text = soup.get_text(separator='\n', strip=True)
-        logger.info("Cleaning extracted text whitespace...")
-        lines = [line.strip() for line in text.split('\n') if line.strip()]
-        cleaned_text = "\n".join(lines)
-        logger.info(f"Text cleaning complete. Initial length: {len(text)}, Cleaned length: {len(cleaned_text)}")
-        if not cleaned_text:
-            logger.warning(f"Could not extract meaningful text from URL: {url}")
-            return "Error: Could not extract text content from URL."
         max_chars = 15000
-        if len(cleaned_text) > max_chars:
-            logger.info(f"URL content is long ({len(cleaned_text)} chars), truncating to {max_chars} characters.")
-            final_text = cleaned_text[:max_chars] + "... [content truncated]"
-        else:
-            final_text = cleaned_text
-        logger.info(f"Successfully read and processed URL {url}. Final text length: {len(final_text)}")
         return final_text
-    except requests.exceptions.RequestException as e:
-        logger.error(f"!!! Error fetching URL {url}: {str(e)}")
-        return f"Error reading URL: Could not fetch content ({e})"
-    except Exception as e:
-        logger.error(f"!!! Error parsing URL {url}: {str(e)}")
-        logger.error(traceback.format_exc())
-        return f"Error reading URL: Could not parse content ({e})"
 def process_social_media_url(url):
     """Process a social media URL, attempting to get text and transcribe video/audio."""
     logger.info(f"--- Starting processing for social media URL: {url} ---")
-    if not url or not url.strip().startswith('http'):
-        logger.warning(f"Invalid or empty social media URL provided: '{url}'")
-        return None
-    text_content = None
-    video_transcription = None
-    temp_audio_file = None
-    # 1. Try extracting text content using read_url
-    logger.info(f"Attempting to read text content from social URL: {url}")
     try:
         text_content_result = read_url(url)
-        if text_content_result and not text_content_result.startswith("Error:"):
-             text_content = text_content_result
-             logger.info(f"Successfully read text content from {url}. Length: {len(text_content)}")
-        elif text_content_result:
-             logger.warning(f"read_url returned an error for {url}: {text_content_result}")
-        else:
-            logger.info(f"No text content extracted by read_url for {url}.")
-    except Exception as e:
-        logger.error(f"!!! Exception during text content extraction from social URL {url}: {e}")
-        logger.error(traceback.format_exc())
-    # 2. Try downloading and transcribing potential video/audio content
-    logger.info(f"Attempting to download audio/video content from social URL: {url}")
     try:
-        temp_audio_file = download_social_media_video(url) # Returns path or None
         if temp_audio_file:
-            logger.info(f"Audio downloaded from {url} to {temp_audio_file}. Proceeding to transcription.")
-            transcription_result = transcribe_audio_or_video(temp_audio_file) # Handles errors internally
-            if transcription_result and not transcription_result.startswith("Error"):
-                video_transcription = transcription_result
-                logger.info(f"Successfully transcribed audio from {url}. Length: {len(video_transcription)}")
-            elif transcription_result:
-                 logger.warning(f"Transcription returned an error for audio from {url}: {transcription_result}")
-            else:
-                 logger.warning(f"Transcription returned empty result for audio from {url}.")
-        else:
-             logger.info(f"No downloadable audio/video found or download failed for URL: {url}")
-    except Exception as e:
-        logger.error(f"!!! Exception during video/audio processing for social URL {url}: {e}")
-        logger.error(traceback.format_exc())
     finally:
-         # Clean up downloaded file if it exists
-        if temp_audio_file and os.path.exists(temp_audio_file):
-            logger.info(f"Cleaning up downloaded social media audio file: {temp_audio_file}")
-            try:
-                os.remove(temp_audio_file)
-                logger.info("Downloaded audio file removed.")
-            except Exception as e:
-                logger.warning(f"Failed to cleanup downloaded audio {temp_audio_file}: {e}")
-    # Return results
-    logger.info(f"--- Finished processing social media URL: {url} ---")
-    if text_content or video_transcription:
-        return {"text": text_content or "", "video": video_transcription or ""}
-    else:
-        # Return None only if BOTH failed and no content was retrieved
-        logger.info(f"No usable content retrieved for social URL: {url}")
-        return None
-# Create global model manager instance
-logger.info("Creating global ModelManager instance.")
-model_manager = ModelManager()
 @spaces.GPU(duration=10) # Duración corta solo para prueba
 def generate_news(instructions, facts, size, tone, *args):
@@ -843,6 +593,7 @@ def generate_news(instructions, facts, size, tone, *args):
         # --- NO CONSTRUYAS EL PROMPT ---
         # --- NO LLAMES A text_pipeline ---
         pass # Simplemente no hacemos nada
     except Exception as e:
         total_time = time.time() - request_start_time
@@ -857,319 +608,9 @@ def generate_news(instructions, facts, size, tone, *args):
     # Asegúrate de devolver dos strings
     return generated_article, raw_transcriptions
-        # --- Argument Parsing ---
-        # (Same as before)
-        logger.info("Parsing dynamic arguments...")
-        num_docs = 5
-        num_audio_sources = 5
-        num_audio_inputs_per_source = 3
-        num_urls = 5
-        num_social_sources = 3
-        num_social_inputs_per_source = 3
-        total_expected_args = num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls + (num_social_sources * num_social_inputs_per_source)
-        args_list = list(args)
-        if len(args_list) < total_expected_args:
-             logger.warning(f"Received fewer arguments ({len(args_list)}) than expected ({total_expected_args}). Padding with None.")
-             args_list.extend([None] * (total_expected_args - len(args_list)))
-        elif len(args_list) > total_expected_args:
-             logger.warning(f"Received more arguments ({len(args_list)}) than expected ({total_expected_args}). Truncating.")
-             args_list = args_list[:total_expected_args]
-        doc_files = args_list[0:num_docs]
-        audio_inputs_flat = args_list[num_docs : num_docs + (num_audio_sources * num_audio_inputs_per_source)]
-        url_inputs = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) : num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls]
-        social_inputs_flat = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls : total_expected_args]
-        logger.info(f"Argument parsing complete. Docs: {len(doc_files)}, Audio sets: {len(audio_inputs_flat)//3}, URLs: {len(url_inputs)}, Social sets: {len(social_inputs_flat)//3}")
-        knowledge_base = {
-            "instructions": instructions or "No specific instructions provided.",
-            "facts": facts or "No specific facts provided.",
-            "document_content": [], "audio_data": [], "url_content": [], "social_content": []
-        }
-        # --- Process Inputs (Documents, URLs, Collect Audio Info, Social Media) ---
-        # (Keep the processing loops same as previous version with detailed logging)
-        # --- Processing document inputs ---
-        logger.info("--- Processing document inputs ---")
-        doc_counter = 0
-        for i, doc_file in enumerate(doc_files):
-            if doc_file and hasattr(doc_file, 'name') and doc_file.name:
-                doc_filename = os.path.basename(doc_file.name)
-                logger.info(f"Attempting to read document {i+1}: {doc_filename} (Path: {doc_file.name})")
-                try:
-                    content = read_document(doc_file.name)
-                    if content and content.startswith("Error:"):
-                         logger.warning(f"Skipping document {i+1} ({doc_filename}) due to read error: {content}")
-                         raw_transcriptions += f"[Document {i+1}: {doc_filename}] Error reading: {content}\n\n"
-                    elif content:
-                        doc_excerpt = (content[:1000] + "... [document truncated]") if len(content) > 1000 else content
-                        knowledge_base["document_content"].append(f"[Document {i+1} Source: {doc_filename}]\n{doc_excerpt}")
-                        logger.info(f"Successfully processed document {i+1}. Added excerpt.")
-                        doc_counter += 1
-                    else:
-                         logger.warning(f"Skipping document {i+1} ({doc_filename}) because content is empty.")
-                         raw_transcriptions += f"[Document {i+1}: {doc_filename}] Read successfully but content is empty.\n\n"
-                except Exception as e:
-                    logger.error(f"!!! FAILED to process document {i+1} ({doc_filename}): {e}")
-                    logger.error(traceback.format_exc())
-                    raw_transcriptions += f"[Document {i+1}: {doc_filename}] CRITICAL Error during processing: {e}\n\n"
-            # else: logger.info(f"Skipping document slot {i+1}: No file.")
-        logger.info(f"--- Finished processing {doc_counter} documents. ---")
-        # --- Processing URL inputs ---
-        logger.info("--- Processing URL inputs ---")
-        url_counter = 0
-        for i, url in enumerate(url_inputs):
-             if url and isinstance(url, str) and url.strip().startswith('http'):
-                logger.info(f"Attempting to read URL {i+1}: {url}")
-                try:
-                    content = read_url(url)
-                    if content and content.startswith("Error:"):
-                         logger.warning(f"Skipping URL {i+1} ({url}) due to read error: {content}")
-                         raw_transcriptions += f"[URL {i+1}: {url}] Error reading: {content}\n\n"
-                    elif content:
-                        knowledge_base["url_content"].append(f"[URL {i+1} Source: {url}]\n{content}")
-                        logger.info(f"Successfully processed URL {i+1}. Added content.")
-                        url_counter += 1
-                    else:
-                         logger.warning(f"Skipping URL {i+1} ({url}) because content is empty.")
-                         raw_transcriptions += f"[URL {i+1}: {url}] Read successfully but content is empty.\n\n"
-                except Exception as e:
-                     logger.error(f"!!! FAILED to process URL {i+1} ({url}): {e}")
-                     logger.error(traceback.format_exc())
-                     raw_transcriptions += f"[URL {i+1}: {url}] CRITICAL Error during processing: {e}\n\n"
-             # elif url: logger.warning(f"Skipping URL slot {i+1}: Invalid URL '{url}'.")
-             # else: logger.info(f"Skipping URL slot {i+1}: No URL.")
-        logger.info(f"--- Finished processing {url_counter} URLs. ---")
-        # --- Processing audio/video inputs (collecting info) ---
-        logger.info("--- Processing audio/video inputs (collecting info) ---")
-        has_audio_source = False
-        audio_counter = 0
-        for i in range(num_audio_sources):
-            start_idx = i * num_audio_inputs_per_source
-            if start_idx + 2 < len(audio_inputs_flat):
-                audio_file = audio_inputs_flat[start_idx]
-                name = audio_inputs_flat[start_idx + 1] or f"Unnamed Audio Source {i+1}"
-                position = audio_inputs_flat[start_idx + 2] or "Role N/A"
-                if audio_file and hasattr(audio_file, 'name') and audio_file.name:
-                    audio_filename = os.path.basename(audio_file.name)
-                    logger.info(f"Found audio/video source {i+1}: {name} ({position}) - File: {audio_filename} (Path: {audio_file.name})")
-                    knowledge_base["audio_data"].append({"file_path": audio_file.name, "name": name, "position": position, "original_filename": audio_filename})
-                    has_audio_source = True
-                    audio_counter += 1
-                # else: logger.info(f"Skipping audio source slot {i+1}: No file.")
-            else: logger.warning(f"Index out of bounds for audio source {i+1}."); break
-        logger.info(f"--- Finished collecting audio/video info. {audio_counter} sources found. Transcription needed: {has_audio_source} ---")
-        # --- Processing social media inputs ---
-        logger.info("--- Processing social media inputs ---")
-        social_counter = 0
-        for i in range(num_social_sources):
-             start_idx = i * num_social_inputs_per_source
-             if start_idx + 2 < len(social_inputs_flat):
-                 social_url = social_inputs_flat[start_idx]
-                 social_name = social_inputs_flat[start_idx + 1] or f"Unnamed Social Source {i+1}"
-                 social_context = social_inputs_flat[start_idx + 2] or "Context N/A"
-                 if social_url and isinstance(social_url, str) and social_url.strip().startswith('http'):
-                     logger.info(f"Attempting to process social media URL {i+1}: {social_url} ({social_name}, {social_context})")
-                     try:
-                         social_data = process_social_media_url(social_url)
-                         if social_data: # process_social_media_url now returns dict even if empty
-                             if social_data.get("text") or social_data.get("video"):
-                                 logger.info(f"Successfully processed social URL {i+1}. Text: {bool(social_data.get('text'))}, Video: {bool(social_data.get('video'))}")
-                                 knowledge_base["social_content"].append({"url": social_url, "name": social_name, "context": social_context, "text": social_data.get("text", ""), "video_transcription": social_data.get("video", "")})
-                                 social_counter += 1
-                             else:
-                                logger.warning(f"Processed social URL {i+1} ({social_url}) but found no text or video content.")
-                                raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] Processed but no content found.\n\n"
-                         # No 'else' needed as process_social_media_url handles internal errors and returns dict
-                     except Exception as e:
-                         logger.error(f"!!! FAILED to process social URL {i+1} ({social_url}): {e}")
-                         logger.error(traceback.format_exc())
-                         raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] CRITICAL Error during processing: {e}\n\n"
-                 # elif social_url: logger.warning(f"Skipping social slot {i+1}: Invalid URL '{social_url}'.")
-                 # else: logger.info(f"Skipping social slot {i+1}: No URL.")
-             else: logger.warning(f"Index out of bounds for social source {i+1}."); break
-        logger.info(f"--- Finished processing {social_counter} social media sources. ---")
-        # --- Transcribe Audio/Video (Conditional) ---
-        transcriptions_for_prompt = ""
-        if has_audio_source:
-            logger.info("--- Starting Audio Transcription Phase ---")
-            # Whisper check/initialization happens INSIDE transcribe_audio_or_video now
-            for idx, data in enumerate(knowledge_base["audio_data"]):
-                 audio_filename = data['original_filename']
-                 logger.info(f"Attempting transcription for audio source {idx+1}: {audio_filename} ({data['name']}, {data['position']})")
-                 try:
-                     # transcribe_audio_or_video now includes model check and returns error string on failure
-                     transcription = transcribe_audio_or_video(data["file_path"])
-                     if transcription and not transcription.startswith("Error"):
-                         logger.info(f"Transcription successful for audio {idx+1}. Length: {len(transcription)}")
-                         quote = f'"{transcription}" - {data["name"]}, {data["position"]}'
-                         transcriptions_for_prompt += f"{quote}\n\n"
-                         raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n"{transcription}"\n\n'
-                     else:
-                         # Log the error message returned by the function
-                         logger.warning(f"Transcription failed or returned error for audio source {idx+1} ({audio_filename}): {transcription}")
-                         raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[Transcription Error: {transcription}]\n\n'
-                 except Exception as e:
-                     # Catch unexpected errors during the call itself
-                     logger.error(f"!!! CRITICAL Error during transcription call for audio source {idx+1} ({audio_filename}): {e}")
-                     logger.error(traceback.format_exc())
-                     raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[CRITICAL Error during transcription call: {e}]\n\n'
-            logger.info("--- Finished Audio Transcription Phase ---")
-        else:
-            logger.info("--- Skipping Audio Transcription Phase (no audio sources found) ---")
-        # --- Add Social Media Content to Prompt Data ---
-        # (Same as before)
-        logger.info("--- Adding social media content to prompt data ---")
-        social_content_added_to_prompt = False
-        for idx, data in enumerate(knowledge_base["social_content"]):
-            source_id_log = f'[Social Media {idx+1}: {data["url"]} ({data["name"]}, {data["context"]})]'
-            source_id_prompt = f'Social Media Post ({data["name"]}, {data["context"]} at {data["url"]}):'
-            content_added_this_source = False
-            if data["text"]:
-                text_excerpt = (data["text"][:500] + "...[text truncated]") if len(data["text"]) > 500 else data["text"]
-                social_text_prompt = f'{source_id_prompt}\nText Content:\n"{text_excerpt}"\n\n'
-                transcriptions_for_prompt += social_text_prompt
-                raw_transcriptions += f"{source_id_log}\nText Content:\n{data['text']}\n\n"
-                content_added_this_source = True; social_content_added_to_prompt = True
-            if data["video_transcription"]:
-                social_video_prompt = f'{source_id_prompt}\nVideo Transcription:\n"{data["video_transcription"]}"\n\n'
-                transcriptions_for_prompt += social_video_prompt
-                raw_transcriptions += f"{source_id_log}\nVideo Transcription:\n{data['video_transcription']}\n\n"
-                content_added_this_source = True; social_content_added_to_prompt = True
-            if content_added_this_source: logger.info(f"Added content from social source {idx+1} to prompt data.")
-            # else: logger.info(f"No usable content found for social source {idx+1} ({data['url']}).")
-        if not social_content_added_to_prompt: logger.info("No content from social media sources was added to the prompt data.")
-        logger.info("--- Finished adding social media content ---")
-        # --- Prepare Final Prompt ---
-        # (Same as before)
-        logger.info("--- Preparing final prompt for LLM ---")
-        document_summary = "\n\n".join(knowledge_base["document_content"]) if knowledge_base["document_content"] else "No document content provided or processed successfully."
-        url_summary = "\n\n".join(knowledge_base["url_content"]) if knowledge_base["url_content"] else "No URL content provided or processed successfully."
-        transcription_summary = transcriptions_for_prompt if transcriptions_for_prompt else "No usable transcriptions or social media content available."
-        prompt = f"""<s>[INST] You are a professional news writer... [SAME PROMPT AS BEFORE] ...Begin the article now. [/INST]\nArticle Draft:\n""" # Keep prompt structure
-        prompt_words = len(prompt.split()); prompt_chars = len(prompt)
-        logger.info(f"Generated prompt length: {prompt_words} words / {prompt_chars} characters.")
-        logger.debug(f"Prompt Start: {prompt[:200]}...")
-        logger.debug(f"...Prompt End: {prompt[-200:]}")
-        logger.info("--- Finished preparing final prompt ---")
-        # --- Generate News Article ---
-        logger.info("--- Starting LLM Generation Phase ---")
-        generation_start_time = time.time()
-        # Ensure LLM is ready (will also reset Whisper if loaded)
-        logger.info("Ensuring LLM is initialized for generation...")
-        try:
-            # *** Crucial Change: Reset Whisper before ensuring LLM is ready ***
-            # model_manager.reset_whisper()
-            # *** Let's try NOT resetting whisper, check logs if fails ***
-            model_manager.check_llm_initialized() # Raises error if fails
-            logger.info("LLM confirmed ready for generation.")
-        except Exception as llm_init_err:
-            logger.error(f"!!! FATAL: LLM could not be initialized. Cannot generate article.")
-            logger.error(traceback.format_exc())
-            raise RuntimeError(f"LLM failed to initialize, cannot generate article: {llm_init_err}")
-        # Estimate max_new_tokens
-        # (Same as before)
-        estimated_tokens_per_word = 1.5
-        max_new_tokens = int(size * estimated_tokens_per_word + 150)
-        model_max_length = 2048
-        prompt_tokens_estimate = prompt_chars // 3
-        available_tokens = model_max_length - prompt_tokens_estimate - 50
-        max_new_tokens = min(max_new_tokens, available_tokens)
-        max_new_tokens = max(max_new_tokens, 100)
-        logger.info(f"Estimated prompt tokens: ~{prompt_tokens_estimate}. Model max length: {model_max_length}. Requesting max_new_tokens: {max_new_tokens}")
-        try:
-            # Generate text
-            # (Same pipeline call as before)
-            logger.info("Calling LLM text generation pipeline...")
-            outputs = model_manager.text_pipeline(
-                prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7,
-                top_p=0.95, top_k=50, repetition_penalty=1.15,
-                pad_token_id=model_manager.tokenizer.eos_token_id, num_return_sequences=1
-            )
-            logger.info("LLM pipeline call finished.")
-            if not outputs or not isinstance(outputs, list) or not outputs[0].get('generated_text'):
-                 logger.error("LLM pipeline returned invalid or empty output.")
-                 raise RuntimeError("LLM generation failed: Pipeline returned empty or invalid output.")
-            full_generated_text = outputs[0]['generated_text']
-            logger.info(f"Raw generated text length: {len(full_generated_text)} chars.")
-            # Clean output
-            # (Same cleaning logic as before)
-            logger.info("Cleaning LLM output (removing prompt)...")
-            inst_marker = "[/INST]"
-            marker_pos = full_generated_text.find(inst_marker)
-            if marker_pos != -1:
-                generated_article = full_generated_text[marker_pos + len(inst_marker):].strip()
-                if generated_article.startswith("Article Draft:"):
-                    generated_article = generated_article[len("Article Draft:"):].strip()
-                logger.info("Prompt removed successfully using '[/INST]' marker.")
-            else:
-                 generated_article = full_generated_text
-                 logger.warning("Prompt marker '[/INST]' not found in LLM output. Returning full generated text.")
-            generation_time = time.time() - generation_start_time
-            logger.info(f"News generation completed in {generation_time:.2f} seconds.")
-            logger.info(f"Final article length: {len(generated_article)} characters.")
-            logger.info("--- Finished LLM Generation Phase ---")
-            # *** Optional: Reset LLM immediately after generation ***
-            # logger.info("Resetting LLM model after successful generation.")
-            # model_manager.reset_llm()
-        # ... (keep OOM and general Exception handling for generation same as before) ...
-        except torch.cuda.OutOfMemoryError as oom_error:
-             logger.error(f"!!! CUDA Out of Memory error during LLM generation: {oom_error}")
-             logger.error(traceback.format_exc())
-             logger.info("Attempting to reset models after OOM error...")
-             model_manager.reset_models(force=True)
-             raise RuntimeError("Generation failed due to insufficient GPU memory.") from oom_error
-        except Exception as gen_error:
-            logger.error(f"!!! Error during text generation pipeline: {str(gen_error)}")
-            logger.error(traceback.format_exc())
-            raise RuntimeError(f"LLM generation failed: {gen_error}") from gen_error
-        total_time = time.time() - request_start_time
-        logger.info(f"--- generate_news function completed successfully in {total_time:.2f} seconds. ---")
-        return generated_article.strip(), raw_transcriptions.strip()
-    except Exception as e:
-        # Catch-all for any unexpected error during the entire generate_news flow
-        # (Same as before)
-        total_time = time.time() - request_start_time
-        logger.error(f"!!! UNHANDLED Error in generate_news function after {total_time:.2f} seconds: {str(e)}")
-        logger.error(traceback.format_exc())
-        try:
-            logger.info("Attempting model reset due to unhandled error in generate_news.")
-            model_manager.reset_models(force=True)
-        except Exception as reset_error:
-            logger.error(f"Failed to reset models after error: {str(reset_error)}")
-        error_message = f"Error generating the news article: An unexpected error occurred. Please check logs. ({str(e)})"
-        transcription_log = raw_transcriptions.strip() + f"\n\n[CRITICAL ERROR] News generation failed unexpectedly: {str(e)}"
-        return error_message, transcription_log
-    finally:
-        # Final cleanup/logging
-        logger.info("--- generate_news function finished execution (either success or error) ---")
-        # Force cleanup after every run attempt on ZeroGPU
-        logger.info("Forcing model reset at the end of generate_news call.")
-        model_manager.reset_models(force=True)
 # --- create_demo function remains the same as the previous version ---
@@ -1259,6 +700,7 @@ def create_demo():
         outputs_list = [news_output, transcriptions_output]
         logger.info("Setting up event handlers.")
         generate_button.click(fn=generate_news, inputs=all_inputs, outputs=outputs_list)
         logger.info("Generate button click handler set.")

         return cls._instance
     def __init__(self):
+        if not hasattr(self, '_initialized') or not self._initialized: # Ensure init runs only once
             logger.info("Initializing ModelManager attributes.")
             self.tokenizer = None
             self.model = None
             self.text_pipeline = None
             self.whisper_model = None
             self.llm_loaded = False
             self.whisper_loaded = False
             self.last_used = time.time()
             self.llm_loading = False
             self.whisper_loading = False
+            self._initialized = True # Mark as initialized
     def _cleanup_memory(self):
         """Utility function to force memory cleanup"""
         """Explicitly resets the LLM components."""
         logger.info("--- Attempting to reset LLM ---")
         try:
+            # Check attributes before deleting
             if hasattr(self, 'model') and self.model is not None:
                 del self.model
                 logger.info("LLM model deleted.")
                 del self.text_pipeline
                 logger.info("LLM pipeline deleted.")
+            # Reset attributes
             self.model = None
             self.tokenizer = None
             self.text_pipeline = None
+            self.llm_loaded = False # Mark as not loaded
             self._cleanup_memory()
             logger.info("LLM components reset successfully.")
         except Exception as e:
                 logger.info("Whisper model deleted.")
             self.whisper_model = None
+            self.whisper_loaded = False # Mark as not loaded
             self._cleanup_memory()
             logger.info("Whisper component reset successfully.")
         except Exception as e:
             return True
         # Explicitly try to free Whisper memory before loading LLM
+        # self.reset_whisper() # Optional: Uncomment if severe memory pressure
         self.llm_loading = True
         logger.info("Starting LLM initialization...")
              return True
         # Explicitly try to free LLM memory before loading Whisper
+        # self.reset_llm() # Optional: Uncomment if severe memory pressure
         self.whisper_loading = True
         logger.info("Starting Whisper initialization...")
                  self.initialize_llm() # This will raise error if it fails
                  logger.info("LLM initialization completed by check_llm_initialized.")
             else:
+                 logger.info("LLM initialization is already in progress. Waiting briefly.")
                  time.sleep(10)
                  if not self.llm_loaded:
                      logger.error("LLM initialization timed out or failed after waiting.")
                 self.initialize_whisper() # This will raise error if it fails
                 logger.info("Whisper initialization completed by check_whisper_initialized.")
             else:
+                logger.info("Whisper initialization is already in progress. Waiting briefly.")
                 time.sleep(10)
                 if not self.whisper_loaded:
                     logger.error("Whisper initialization timed out or failed after waiting.")
     def reset_models(self, force=False):
         """Reset models if idle or forced."""
         if force:
             logger.info("Forcing reset of all models.")
             self.reset_llm()
             self.reset_whisper()
+# Create global model manager instance
+logger.info("Creating global ModelManager instance.")
+model_manager = ModelManager()
+# --- Functions: download_social_media_video, convert_video_to_audio, etc. ---
+# --- These functions are kept exactly the same as the previous full version ---
+# --- with detailed logging. Paste them here.                       ---
+@lru_cache(maxsize=16)
 def download_social_media_video(url):
     """Download audio from a social media video URL."""
     logger.info(f"Attempting to download audio from social media URL: {url}")
     temp_dir = tempfile.mkdtemp()
     output_template = os.path.join(temp_dir, '%(id)s.%(ext)s')
+    final_audio_file_path = None
     ydl_opts = {
+        'format': 'bestaudio/best', 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
+        'outtmpl': output_template, 'quiet': True, 'no_warnings': True, 'nocheckcertificate': True, 'retries': 3, 'socket_timeout': 15, 'cachedir': False
     }
     try:
+        logger.debug(f"yt-dlp options: {ydl_opts}")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            logger.debug("Extracting info and downloading...")
             info_dict = ydl.extract_info(url, download=True)
+            logger.debug(f"yt-dlp extraction complete for {url}. ID: {info_dict.get('id')}")
             found_files = [f for f in os.listdir(temp_dir) if f.endswith('.mp3')]
             if found_files:
                 final_audio_file_path = os.path.join(temp_dir, found_files[0])
+                logger.debug(f"Found downloaded MP3: {final_audio_file_path}")
             else:
                  logger.error(f"Could not find downloaded MP3 file in {temp_dir} for URL {url}")
                  raise FileNotFoundError(f"Downloaded MP3 not found in {temp_dir}")
+        logger.debug(f"Reading content of {final_audio_file_path}")
+        with open(final_audio_file_path, 'rb') as f: audio_content = f.read()
+        logger.debug("Saving audio content to a new temporary file...")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_output_file:
             temp_output_file.write(audio_content)
             final_path_for_gradio = temp_output_file.name
         logger.info(f"Audio content saved to temporary file for processing: {final_path_for_gradio}")
         return final_path_for_gradio
     except yt_dlp.utils.DownloadError as e:
         logger.error(f"!!! yt-dlp download error for {url}: {str(e)}")
+        return None
     except Exception as e:
         logger.error(f"!!! Unexpected error downloading video from {url}: {str(e)}")
         logger.error(traceback.format_exc())
+        return None
     finally:
         if os.path.exists(temp_dir):
+            logger.debug(f"Cleaning up temporary download directory: {temp_dir}")
             try:
                 import shutil
                 shutil.rmtree(temp_dir)
+            except Exception as cleanup_e: logger.warning(f"Could not clean up {temp_dir}: {cleanup_e}")
 def convert_video_to_audio(video_file_path):
     """Convert a video file to audio using ffmpeg directly."""
     logger.info(f"Attempting to convert video to audio: {video_file_path}")
+    output_file_path = None
     try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: output_file_path = temp_file.name
+        logger.debug(f"Output audio path will be: {output_file_path}")
+        command = ["ffmpeg", "-i", video_file_path, "-vn", "-acodec", "libmp3lame", "-ab", "192k", "-ar", "44100", "-ac", "2", output_file_path, "-y", "-loglevel", "error"]
+        logger.debug(f"Executing ffmpeg command: {' '.join(command)}")
+        process = subprocess.run(command, check=True, capture_output=True, text=True, timeout=120)
+        logger.debug(f"ffmpeg conversion successful for {video_file_path}.")
         if not os.path.exists(output_file_path) or os.path.getsize(output_file_path) == 0:
             logger.error(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.")
             raise RuntimeError(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.")
         logger.info(f"Video successfully converted to audio: {output_file_path}")
         return output_file_path
     except subprocess.CalledProcessError as e:
          logger.error(f"!!! ffmpeg command failed with exit code {e.returncode} for video: {video_file_path}")
          logger.error(f"ffmpeg stderr: {e.stderr}")
          if output_file_path and os.path.exists(output_file_path):
+             try: os.remove(output_file_path)
+             except: pass
          raise RuntimeError(f"ffmpeg conversion failed: {e.stderr}") from e
     except subprocess.TimeoutExpired as e:
         logger.error(f"!!! ffmpeg command timed out after {e.timeout} seconds for video: {video_file_path}")
         if output_file_path and os.path.exists(output_file_path):
+             try: os.remove(output_file_path)
+             except: pass
         raise RuntimeError(f"ffmpeg conversion timed out after {e.timeout} seconds.") from e
     except Exception as e:
         logger.error(f"!!! Error converting video '{video_file_path}': {str(e)}")
         logger.error(traceback.format_exc())
         if output_file_path and os.path.exists(output_file_path):
+             try: os.remove(output_file_path)
+             except: pass
+        raise
 def preprocess_audio(input_audio_path):
     """Preprocess the audio file (e.g., normalize volume)."""
     logger.info(f"Attempting to preprocess audio file: {input_audio_path}")
     output_path = None
     try:
         if not os.path.exists(input_audio_path):
              logger.error(f"Input audio file for preprocessing not found: {input_audio_path}")
              raise FileNotFoundError(f"Input audio file not found: {input_audio_path}")
+        logger.debug("Loading audio with pydub...")
         audio = AudioSegment.from_file(input_audio_path)
+        logger.debug("Audio loaded.")
+        # Optional normalization can be added here
+        logger.debug("Exporting preprocessed audio...")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
             output_path = temp_file.name
             audio.export(output_path, format="mp3")
         return output_path
     except FileNotFoundError as e:
         logger.error(f"!!! File not found during audio preprocessing: {e}")
+        raise
     except Exception as e:
         logger.error(f"!!! Error preprocessing audio '{input_audio_path}': {str(e)}")
         logger.error(traceback.format_exc())
         if output_path and os.path.exists(output_path):
+             try: os.remove(output_path)
+             except: pass
+        raise
+@spaces.GPU(duration=300)
 def transcribe_audio_or_video(file_input):
     """Transcribe an audio or video file (local path or Gradio File object)."""
     logger.info(f"--- Starting transcription process for input: {type(file_input)} ---")
+    audio_file_to_transcribe = None; original_input_path = None
+    temp_files_to_clean = []; processing_step = "Initialization"; transcription = ""
     try:
         processing_step = "Whisper Model Check"
         logger.info("Checking/Initializing Whisper model for transcription...")
+        model_manager.check_whisper_initialized()
         logger.info("Whisper model is ready for transcription.")
+        if file_input is None: return ""
         processing_step = "Input Type Handling"
+        if isinstance(file_input, str):
             original_input_path = file_input
+            if not os.path.exists(original_input_path): raise FileNotFoundError(f"Input file not found: {original_input_path}")
             input_path = original_input_path
+        elif hasattr(file_input, 'name') and file_input.name:
             original_input_path = file_input.name
+            if not os.path.exists(original_input_path): raise FileNotFoundError(f"Gradio temporary file not found: {original_input_path}")
+            input_path = original_input_path
+        else: raise TypeError("Invalid input type for transcription.")
+        logger.debug(f"Input path identified: {input_path}")
         file_extension = os.path.splitext(input_path)[1].lower()
+        logger.debug(f"File extension: {file_extension}")
         processing_step = "Video Conversion Check"
         if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm']:
+            logger.info(f"Detected video file ({file_extension}), converting...")
+            converted_audio_path = convert_video_to_audio(input_path)
+            temp_files_to_clean.append(converted_audio_path); audio_file_to_process = converted_audio_path
+        elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac']:
              logger.info(f"Detected audio file ({file_extension}).")
              audio_file_to_process = input_path
+        else: raise ValueError(f"Unsupported file type: {file_extension}")
         processing_step = "Audio Preprocessing"
         try:
+            logger.debug(f"Attempting to preprocess audio file: {audio_file_to_process}")
             preprocessed_audio_path = preprocess_audio(audio_file_to_process)
+            if preprocessed_audio_path != audio_file_to_process: temp_files_to_clean.append(preprocessed_audio_path)
             audio_file_to_transcribe = preprocessed_audio_path
+            logger.debug(f"Using preprocessed audio: {audio_file_to_transcribe}")
         except Exception as preprocess_err:
+            logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio.")
             audio_file_to_transcribe = audio_file_to_process
         processing_step = "Transcription Execution"
         logger.info(f"Starting transcription execution for: {audio_file_to_transcribe}")
+        if not os.path.exists(audio_file_to_transcribe): raise FileNotFoundError(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
+        logger.debug("Calling Whisper model transcribe method...")
         with torch.inference_mode():
+            use_fp16 = torch.cuda.is_available(); logger.debug(f"Using fp16: {use_fp16}")
+            result = model_manager.whisper_model.transcribe(audio_file_to_transcribe, fp16=use_fp16)
+        logger.debug("Whisper transcribe method finished.")
+        if not result or "text" not in result: raise RuntimeError("Transcription failed to produce results")
         transcription = result.get("text", "Error: Transcription result empty")
+        logger.info(f"Transcription completed successfully: '{transcription[:100]}...'")
         processing_step = "Success"
     except FileNotFoundError as e:
+        logger.error(f"!!! File not found error (Step: {processing_step}): {e}"); transcription = f"Error: Input file not found ({e})"
     except ValueError as e:
+         logger.error(f"!!! Value error (Step: {processing_step}): {e}"); transcription = f"Error: Unsupported file type ({e})"
     except TypeError as e:
+         logger.error(f"!!! Type error (Step: {processing_step}): {e}"); transcription = f"Error: Invalid input provided ({e})"
     except RuntimeError as e:
+         logger.error(f"!!! Runtime error (Step: {processing_step}): {e}"); logger.error(traceback.format_exc()); transcription = f"Error during processing: {e}"
     except Exception as e:
+        logger.error(f"!!! Unexpected error (Step: {processing_step}): {str(e)}"); logger.error(traceback.format_exc()); transcription = f"Error processing the file: An unexpected error occurred."
     finally:
+        logger.debug(f"--- Cleaning up {len(temp_files_to_clean)} temp files for transcription ---")
         for temp_file in temp_files_to_clean:
             try:
+                if os.path.exists(temp_file): os.remove(temp_file); logger.debug(f"Cleaned: {temp_file}")
+            except Exception as e: logger.warning(f"Could not remove temp file {temp_file}: {e}")
+        logger.debug("--- Finished transcription cleanup ---")
         return transcription
 @lru_cache(maxsize=16)
 def read_document(document_path):
     """Read the content of a document (PDF, DOCX, XLSX, CSV)."""
     logger.info(f"Attempting to read document: {document_path}")
     try:
+        if not os.path.exists(document_path): raise FileNotFoundError(f"Document not found: {document_path}")
+        file_extension = os.path.splitext(document_path)[1].lower(); logger.debug(f"Doc type: {file_extension}")
         content = ""
         if file_extension == ".pdf":
+            logger.debug("Reading PDF using PyMuPDF...")
             doc = fitz.open(document_path)
             if doc.is_encrypted:
+                logger.warning(f"PDF {document_path} encrypted. Trying empty password.")
+                if not doc.authenticate(""): raise ValueError("Encrypted PDF cannot be read.")
+            content = "\n".join([page.get_text() for page in doc]); doc.close()
         elif file_extension == ".docx":
+            logger.debug("Reading DOCX using python-docx...")
+            doc = docx.Document(document_path); content = "\n".join([p.text for p in doc.paragraphs])
         elif file_extension in (".xlsx", ".xls"):
+            logger.debug("Reading Excel using pandas...")
+            xls = pd.ExcelFile(document_path); text_parts = []
             for sheet_name in xls.sheet_names:
+                logger.debug(f"Reading sheet: {sheet_name}")
+                df = pd.read_excel(xls, sheet_name=sheet_name); text_parts.append(f"--- Sheet: {sheet_name} ---\n{df.to_string()}")
             content = "\n\n".join(text_parts).strip()
         elif file_extension == ".csv":
+            logger.debug("Reading CSV using pandas...")
             try:
+                with open(document_path, 'rb') as f: import chardet; encoding = chardet.detect(f.read())['encoding']
+                logger.debug(f"Detected CSV encoding: {encoding}")
                 df = pd.read_csv(document_path, encoding=encoding)
+            except (pd.errors.ParserError, UnicodeDecodeError, LookupError) as e1:
+                 logger.warning(f"CSV parse failed ({e1}), trying semicolon.")
+                 try: df = pd.read_csv(document_path, sep=';', encoding=encoding)
                  except Exception as e2:
+                     logger.error(f"Also failed with semicolon ({e2}). Trying latin1.")
+                     try: df = pd.read_csv(document_path, encoding='latin1')
+                     except Exception as e3: raise ValueError(f"Failed to parse CSV: {e1}, {e2}, {e3}")
             content = df.to_string()
+        else: return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document."
+        logger.info(f"Document read successfully. Length: {len(content)} chars.")
         return content
+    except FileNotFoundError as e: logger.error(f"!!! File not found reading doc: {e}"); return f"Error: Document file not found: {e}"
+    except ValueError as e: logger.error(f"!!! Value error reading doc: {e}"); return f"Error reading document: {e}"
+    except Exception as e: logger.error(f"!!! Error reading doc: {str(e)}"); logger.error(traceback.format_exc()); return f"Error reading document: {str(e)}"
 @lru_cache(maxsize=16)
 def read_url(url):
     """Read the main textual content of a URL."""
     logger.info(f"Attempting to read URL: {url}")
+    if not url or not url.strip().startswith('http'): return ""
     try:
+        headers = {'User-Agent': 'Mozilla/5.0 ... Chrome/91...', 'Accept': 'text/html...', 'Accept-Language': 'en-US,en;q=0.9', 'Connection': 'keep-alive'}
+        logger.debug(f"Sending GET to {url}")
         response = requests.get(url, headers=headers, timeout=20, allow_redirects=True)
+        logger.debug(f"Response from {url}: {response.status_code}, CT: {response.headers.get('content-type')}")
         response.raise_for_status()
         content_type = response.headers.get('content-type', '').lower()
+        if not ('html' in content_type or 'text' in content_type): return f"Error: URL content type ({content_type}) is not text/html."
         detected_encoding = response.encoding if response.encoding else response.apparent_encoding
         html_content = response.content.decode(detected_encoding or 'utf-8', errors='ignore')
+        logger.debug(f"Parsing HTML ({len(html_content)} bytes) from {url}...")
         soup = BeautifulSoup(html_content, 'html.parser')
         tags_to_remove = ["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button", "link", "head"]
         for tag_name in tags_to_remove:
+            for element in soup.find_all(tag_name): element.extract()
+        logger.debug("Finding main content container...")
+        main_content = (soup.find("main") or soup.find("article") or soup.find("div", class_=["content", "main", "post-content", "entry-content", "article-body", "story-content"]) or soup.find("div", id=["content", "main", "article", "story"]))
         text = ""
+        if main_content: text = main_content.get_text(separator='\n', strip=True)
         else:
             body = soup.find("body")
+            if body: text = body.get_text(separator='\n', strip=True)
+            else: text = soup.get_text(separator='\n', strip=True)
+        lines = [line.strip() for line in text.split('\n') if line.strip()]; cleaned_text = "\n".join(lines)
+        if not cleaned_text: return "Error: Could not extract text content from URL."
         max_chars = 15000
+        final_text = (cleaned_text[:max_chars] + "... [content truncated]") if len(cleaned_text) > max_chars else cleaned_text
+        logger.info(f"Successfully read URL {url}. Final length: {len(final_text)}")
         return final_text
+    except requests.exceptions.RequestException as e: logger.error(f"!!! Error fetching URL {url}: {e}"); return f"Error reading URL: Could not fetch content ({e})"
+    except Exception as e: logger.error(f"!!! Error parsing URL {url}: {e}"); logger.error(traceback.format_exc()); return f"Error reading URL: Could not parse content ({e})"
 def process_social_media_url(url):
     """Process a social media URL, attempting to get text and transcribe video/audio."""
     logger.info(f"--- Starting processing for social media URL: {url} ---")
+    if not url or not url.strip().startswith('http'): return None
+    text_content = None; video_transcription = None; temp_audio_file = None
     try:
+        logger.debug(f"Attempting text read from social URL: {url}")
         text_content_result = read_url(url)
+        if text_content_result and not text_content_result.startswith("Error:"): text_content = text_content_result; logger.debug("Text read success.")
+        elif text_content_result: logger.warning(f"read_url error for {url}: {text_content_result}")
+        else: logger.debug("No text via read_url.")
+    except Exception as e: logger.error(f"!!! Exception text reading social URL {url}: {e}"); logger.error(traceback.format_exc())
     try:
+        logger.debug(f"Attempting audio download from social URL: {url}")
+        temp_audio_file = download_social_media_video(url)
         if temp_audio_file:
+            logger.info(f"Audio downloaded from {url} to {temp_audio_file}. Transcribing...")
+            transcription_result = transcribe_audio_or_video(temp_audio_file)
+            if transcription_result and not transcription_result.startswith("Error"): video_transcription = transcription_result; logger.info("Transcription success.")
+            elif transcription_result: logger.warning(f"Transcription error for {url}: {transcription_result}")
+            else: logger.warning(f"Empty transcription for {url}.")
+        else: logger.debug("No downloadable audio found.")
+    except Exception as e: logger.error(f"!!! Exception audio processing social URL {url}: {e}"); logger.error(traceback.format_exc())
     finally:
+         if temp_audio_file and os.path.exists(temp_audio_file):
+            logger.debug(f"Cleaning up social temp audio: {temp_audio_file}")
+            try: os.remove(temp_audio_file)
+            except Exception as e: logger.warning(f"Failed cleanup {temp_audio_file}: {e}")
+    logger.debug(f"--- Finished processing social URL: {url} ---")
+    if text_content or video_transcription: return {"text": text_content or "", "video": video_transcription or ""}
+    else: logger.info(f"No usable content retrieved for social URL: {url}"); return None
+# ==============================================================
+# ========= SIMPLIFIED generate_news FOR DEBUGGING =============
+# ==============================================================
 @spaces.GPU(duration=10) # Duración corta solo para prueba
 def generate_news(instructions, facts, size, tone, *args):
         # --- NO CONSTRUYAS EL PROMPT ---
         # --- NO LLAMES A text_pipeline ---
         pass # Simplemente no hacemos nada
+        logger.info("Simplified version: Reached end of try block.")
     except Exception as e:
         total_time = time.time() - request_start_time
     # Asegúrate de devolver dos strings
     return generated_article, raw_transcriptions
+# ==============================================================
+# ================= END OF SIMPLIFIED VERSION ==================
+# ==============================================================
 # --- create_demo function remains the same as the previous version ---
         outputs_list = [news_output, transcriptions_output]
         logger.info("Setting up event handlers.")
+        # Asegúrate de que el botón llama a la función generate_news (aunque ahora esté simplificada)
         generate_button.click(fn=generate_news, inputs=all_inputs, outputs=outputs_list)
         logger.info("Generate button click handler set.")