Spaces:

CamiloVega
/

NewsIA

Sleeping

App Files Files Community

CamiloVega commited on Mar 31

Commit

e71af4a

verified ·

1 Parent(s): 67e305d

Update app.py

Browse files

Files changed (1) hide show

app.py +325 -537

app.py CHANGED Viewed

@@ -58,25 +58,80 @@ class ModelManager:
             logger.info("Initializing ModelManager attributes.")
             self.tokenizer = None
             self.model = None
-            self.text_pipeline = None # Renamed for clarity
             self.whisper_model = None
-            self._initialized = True
             self.last_used = time.time()
             self.llm_loading = False
             self.whisper_loading = False
-    @spaces.GPU(duration=120) # Increased duration for potentially long loads
     def initialize_llm(self):
         """Initialize LLM model with standard transformers"""
         logger.info("Attempting to initialize LLM.")
         if self.llm_loading:
             logger.info("LLM initialization already in progress. Skipping.")
-            return True # Assume it will succeed or fail elsewhere
-        if self.tokenizer and self.model and self.text_pipeline:
             logger.info("LLM already initialized.")
             self.last_used = time.time()
             return True
         self.llm_loading = True
         logger.info("Starting LLM initialization...")
         try:
@@ -84,114 +139,90 @@ class ModelManager:
             logger.info(f"Using LLM model: {MODEL_NAME}")
             logger.info("Loading LLM tokenizer...")
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_NAME,
-                token=HUGGINGFACE_TOKEN,
-                use_fast=True
-            )
             logger.info("LLM tokenizer loaded.")
             if self.tokenizer.pad_token is None:
-                logger.info("Setting pad_token to eos_token for LLM tokenizer.")
                 self.tokenizer.pad_token = self.tokenizer.eos_token
             logger.info("Loading LLM model...")
             self.model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME,
-                token=HUGGINGFACE_TOKEN,
-                device_map="auto",
-                torch_dtype=torch.float16,
-                low_cpu_mem_usage=True,
-                offload_folder="offload",
-                offload_state_dict=True
             )
             logger.info("LLM model loaded.")
             logger.info("Creating LLM text generation pipeline...")
             self.text_pipeline = pipeline(
-                "text-generation",
-                model=self.model,
-                tokenizer=self.tokenizer,
-                torch_dtype=torch.float16,
-                device_map="auto",
-                max_length=1024 # Default max length
             )
             logger.info("LLM text generation pipeline created.")
             logger.info("LLM initialized successfully.")
             self.last_used = time.time()
             self.llm_loading = False
             return True
         except Exception as e:
             logger.error(f"!!! ERROR during LLM initialization: {str(e)}")
-            logger.error(traceback.format_exc()) # Log full traceback
             logger.error("Resetting potentially partially loaded LLM components due to error.")
-            self.tokenizer = None
-            self.model = None
-            self.text_pipeline = None
-            if torch.cuda.is_available():
-                logger.info("Clearing CUDA cache after LLM init error.")
-                torch.cuda.empty_cache()
-            gc.collect()
             self.llm_loading = False
-            raise # Re-raise the exception to signal failure
-    @spaces.GPU(duration=120) # Increased duration
     def initialize_whisper(self):
         """Initialize Whisper model for audio transcription"""
         logger.info("Attempting to initialize Whisper.")
         if self.whisper_loading:
             logger.info("Whisper initialization already in progress. Skipping.")
             return True
-        if self.whisper_model:
              logger.info("Whisper already initialized.")
              self.last_used = time.time()
              return True
         self.whisper_loading = True
         logger.info("Starting Whisper initialization...")
         try:
-            WHISPER_MODEL_NAME = "tiny" # Consider "base" for better accuracy if "tiny" struggles
             logger.info(f"Loading Whisper model: {WHISPER_MODEL_NAME}")
-            # Specify weights_only=True to address the FutureWarning
-            # Note: Whisper's load_model might not directly support weights_only yet.
-            # If it errors, remove the weights_only=True. The warning is mainly informative.
-            # Let's attempt without weights_only first as whisper might handle it internally
             self.whisper_model = whisper.load_model(
-                WHISPER_MODEL_NAME,
-                device="cuda" if torch.cuda.is_available() else "cpu",
-                download_root="/tmp/whisper" # Use persistent storage if available/needed
             )
             logger.info(f"Whisper model '{WHISPER_MODEL_NAME}' loaded successfully.")
             self.last_used = time.time()
             self.whisper_loading = False
             return True
         except Exception as e:
             logger.error(f"!!! ERROR during Whisper initialization: {str(e)}")
             logger.error(traceback.format_exc())
             logger.error("Resetting potentially partially loaded Whisper components due to error.")
-            self.whisper_model = None
-            if torch.cuda.is_available():
-                logger.info("Clearing CUDA cache after Whisper init error.")
-                torch.cuda.empty_cache()
-            gc.collect()
             self.whisper_loading = False
             raise
     def check_llm_initialized(self):
         """Check if LLM is initialized and initialize if needed"""
         logger.info("Checking if LLM is initialized.")
-        if self.tokenizer is None or self.model is None or self.text_pipeline is None:
             logger.info("LLM not initialized, attempting initialization...")
-            if not self.llm_loading: # Prevent re-entry if already loading
                  self.initialize_llm() # This will raise error if it fails
                  logger.info("LLM initialization completed by check_llm_initialized.")
             else:
                  logger.info("LLM initialization is already in progress by another request. Waiting briefly.")
-                 # Optional: Wait a bit for the other process to finish
-                 time.sleep(10) # Increased wait time
-                 if self.tokenizer is None or self.model is None or self.text_pipeline is None:
                      logger.error("LLM initialization timed out or failed after waiting.")
                      raise RuntimeError("LLM initialization timed out or failed.")
                  else:
@@ -200,18 +231,19 @@ class ModelManager:
             logger.info("LLM was already initialized.")
         self.last_used = time.time()
     def check_whisper_initialized(self):
         """Check if Whisper model is initialized and initialize if needed"""
         logger.info("Checking if Whisper is initialized.")
-        if self.whisper_model is None:
             logger.info("Whisper model not initialized, attempting initialization...")
-            if not self.whisper_loading: # Prevent re-entry
                 self.initialize_whisper() # This will raise error if it fails
                 logger.info("Whisper initialization completed by check_whisper_initialized.")
             else:
                 logger.info("Whisper initialization is already in progress by another request. Waiting briefly.")
-                time.sleep(10) # Increased wait time
-                if self.whisper_model is None:
                     logger.error("Whisper initialization timed out or failed after waiting.")
                     raise RuntimeError("Whisper initialization timed out or failed.")
                 else:
@@ -221,62 +253,23 @@ class ModelManager:
         self.last_used = time.time()
     def reset_models(self, force=False):
-        """Reset models to free memory if they haven't been used recently"""
-        current_time = time.time()
-        should_reset = force or (current_time - self.last_used > 600) # 10 minutes idle threshold
-        logger.info(f"Checking if models should be reset. Force: {force}, Idle time: {current_time - self.last_used:.0f}s, Should reset: {should_reset}")
-        if should_reset:
-            try:
-                logger.info("--- Resetting models to free memory ---")
-                if hasattr(self, 'model') and self.model is not None:
-                    del self.model
-                    self.model = None
-                    logger.info("LLM model deleted.")
-                else: logger.info("LLM model was None or not found.")
-                if hasattr(self, 'tokenizer') and self.tokenizer is not None:
-                    del self.tokenizer
-                    self.tokenizer = None
-                    logger.info("LLM tokenizer deleted.")
-                else: logger.info("LLM tokenizer was None or not found.")
-                if hasattr(self, 'text_pipeline') and self.text_pipeline is not None:
-                    del self.text_pipeline
-                    self.text_pipeline = None
-                    logger.info("LLM pipeline deleted.")
-                else: logger.info("LLM pipeline was None or not found.")
-                if hasattr(self, 'whisper_model') and self.whisper_model is not None:
-                    del self.whisper_model
-                    self.whisper_model = None
-                    logger.info("Whisper model deleted.")
-                else: logger.info("Whisper model was None or not found.")
-                # Explicitly clear CUDA cache and collect garbage
-                if torch.cuda.is_available():
-                    logger.info("Clearing CUDA cache...")
-                    torch.cuda.empty_cache()
-                    logger.info("CUDA cache cleared.")
-                else:
-                    logger.info("CUDA not available, skipping cache clear.")
-                logger.info("Running garbage collection...")
-                collected_count = gc.collect()
-                logger.info(f"Garbage collected ({collected_count} objects). Models reset successfully.")
-                self._initialized = False # Mark as uninitialized so they reload on next use
-            except Exception as e:
-                logger.error(f"!!! ERROR during model reset: {str(e)}")
-                logger.error(traceback.format_exc())
-        else:
-            logger.info("Skipping model reset (not forced and not idle long enough).")
-# Create global model manager instance
-logger.info("Creating global ModelManager instance.")
-model_manager = ModelManager()
 @lru_cache(maxsize=16) # Reduced cache size slightly
 def download_social_media_video(url):
@@ -460,18 +453,22 @@ def transcribe_audio_or_video(file_input):
     original_input_path = None
     temp_files_to_clean = []
     processing_step = "Initialization"
     try:
         processing_step = "Whisper Model Check"
-        logger.info("Checking/Initializing Whisper model...")
         model_manager.check_whisper_initialized() # Will raise error if fails
-        logger.info("Whisper model is ready.")
         if file_input is None:
             logger.info("No file input provided for transcription. Returning empty string.")
-            return "" # Return empty string for None input
-        # Determine input type and get file path
         processing_step = "Input Type Handling"
         if isinstance(file_input, str): # Input is a path
             original_input_path = file_input
@@ -495,7 +492,6 @@ def transcribe_audio_or_video(file_input):
         file_extension = os.path.splitext(input_path)[1].lower()
         logger.info(f"File extension: {file_extension}")
-        # Check if it's a video file that needs conversion
         processing_step = "Video Conversion Check"
         if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm']:
             logger.info(f"Detected video file ({file_extension}), attempting conversion to audio...")
@@ -510,12 +506,10 @@ def transcribe_audio_or_video(file_input):
             logger.error(f"Unsupported file extension for transcription: {file_extension}")
             raise ValueError(f"Unsupported file type: {file_extension}")
-        # Preprocess the audio (optional)
         processing_step = "Audio Preprocessing"
         try:
             logger.info(f"Attempting to preprocess audio file: {audio_file_to_process}")
             preprocessed_audio_path = preprocess_audio(audio_file_to_process)
-            # If preprocessing creates a new file different from the input, add it to cleanup
             if preprocessed_audio_path != audio_file_to_process:
                  logger.info("Preprocessing created a new file, adding to cleanup list.")
                  temp_files_to_clean.append(preprocessed_audio_path)
@@ -523,25 +517,22 @@ def transcribe_audio_or_video(file_input):
             logger.info(f"Audio preprocessing successful. File to transcribe: {audio_file_to_transcribe}")
         except Exception as preprocess_err:
             logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio for transcription.")
-            logger.warning(traceback.format_exc()) # Log warning traceback
-            audio_file_to_transcribe = audio_file_to_process # Fallback
-        processing_step = "Transcription"
-        logger.info(f"Starting transcription for: {audio_file_to_transcribe}")
         if not os.path.exists(audio_file_to_transcribe):
             logger.error(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
             raise FileNotFoundError(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
-        # Perform transcription
         logger.info("Calling Whisper model transcribe method...")
-        with torch.inference_mode(): # Ensure inference mode for efficiency
-            # Use fp16 if available on CUDA
             use_fp16 = torch.cuda.is_available()
             logger.info(f"Using fp16 for transcription: {use_fp16}")
             result = model_manager.whisper_model.transcribe(
-                audio_file_to_transcribe,
-                fp16=use_fp16
-                # language="en" # Optional: specify language if known
             )
             logger.info("Whisper model transcribe method finished.")
             if not result or "text" not in result:
@@ -553,42 +544,46 @@ def transcribe_audio_or_video(file_input):
         logger.info(f"Transcription completed successfully: '{log_transcription}'")
         processing_step = "Success"
-        return transcription
     except FileNotFoundError as e:
         logger.error(f"!!! File not found error during transcription (Step: {processing_step}): {e}")
         logger.error(traceback.format_exc())
-        return f"Error: Input file not found ({e})"
     except ValueError as e:
          logger.error(f"!!! Value error during transcription (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
-         return f"Error: Unsupported file type ({e})"
     except TypeError as e:
          logger.error(f"!!! Type error during transcription setup (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
-         return f"Error: Invalid input provided ({e})"
     except RuntimeError as e:
          logger.error(f"!!! Runtime error during transcription (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
-         return f"Error during processing: {e}"
     except Exception as e:
         logger.error(f"!!! Unexpected error during transcription (Step: {processing_step}): {str(e)}")
         logger.error(traceback.format_exc())
-        return f"Error processing the file: An unexpected error occurred."
     finally:
-        # Clean up all temporary files created during the process
         logger.info(f"--- Cleaning up temporary files for transcription process ({len(temp_files_to_clean)} files) ---")
         for temp_file in temp_files_to_clean:
             try:
                 if os.path.exists(temp_file):
                     os.remove(temp_file)
                     logger.info(f"Cleaned up temporary file: {temp_file}")
-                else:
-                    logger.info(f"Temporary file already removed or never created: {temp_file}")
             except Exception as e:
                 logger.warning(f"Could not remove temporary file {temp_file}: {str(e)}")
         logger.info("--- Finished transcription process cleanup ---")
 @lru_cache(maxsize=16)
@@ -607,6 +602,13 @@ def read_document(document_path):
         if file_extension == ".pdf":
             logger.info("Reading PDF document using PyMuPDF (fitz)...")
             doc = fitz.open(document_path)
             content = "\n".join([page.get_text() for page in doc])
             doc.close()
             logger.info(f"PDF read successfully. Length: {len(content)} chars.")
@@ -627,16 +629,26 @@ def read_document(document_path):
             logger.info(f"Excel read successfully. Length: {len(content)} chars.")
         elif file_extension == ".csv":
             logger.info("Reading CSV document using pandas...")
-            # Try detecting separator
             try:
                 logger.info("Attempting CSV read with comma separator...")
-                df = pd.read_csv(document_path)
-            except pd.errors.ParserError:
-                 logger.warning(f"Could not parse CSV {document_path} with comma separator, trying semicolon.")
-                 df = pd.read_csv(document_path, sep=';')
-            except Exception as csv_err: # Catch other potential pandas errors
-                 logger.error(f"Error reading CSV {document_path}: {csv_err}")
-                 raise
             content = df.to_string()
             logger.info(f"CSV read successfully. Length: {len(content)} chars.")
         else:
@@ -647,8 +659,11 @@ def read_document(document_path):
     except FileNotFoundError as e:
         logger.error(f"!!! File not found error while reading document: {e}")
-        # logger.error(traceback.format_exc()) # Traceback might be less useful here
         return f"Error: Document file not found at {document_path}"
     except Exception as e:
         logger.error(f"!!! Error reading document {document_path}: {str(e)}")
         logger.error(traceback.format_exc())
@@ -660,38 +675,41 @@ def read_url(url):
     logger.info(f"Attempting to read URL: {url}")
     if not url or not url.strip().startswith('http'):
         logger.warning(f"Invalid or empty URL provided: '{url}'")
-        return "" # Return empty for invalid or empty URLs
     try:
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         logger.info(f"Sending GET request to {url} with headers: {headers}")
-        # Increased timeout
         response = requests.get(url, headers=headers, timeout=20, allow_redirects=True)
-        logger.info(f"Received response from {url}. Status code: {response.status_code}")
-        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
-        # Check content type - proceed only if likely HTML/text
         content_type = response.headers.get('content-type', '').lower()
-        logger.info(f"URL content type: {content_type}")
         if not ('html' in content_type or 'text' in content_type):
              logger.warning(f"URL {url} has non-text content type: {content_type}. Skipping.")
              return f"Error: URL content type ({content_type}) is not text/html."
         logger.info(f"Parsing HTML content from {url} using BeautifulSoup...")
-        soup = BeautifulSoup(response.content, 'html.parser')
         logger.info("HTML parsed.")
-        # Remove non-content elements like scripts, styles, nav, footers etc.
         logger.info("Removing script, style, and other non-content tags...")
-        tags_to_remove = ["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button"]
         for tag_name in tags_to_remove:
             for element in soup.find_all(tag_name):
                 element.extract()
         logger.info("Non-content tags removed.")
-        # Attempt to find main content area (common tags/attributes)
         logger.info("Attempting to find main content container...")
         main_content = (
             soup.find("main") or
@@ -710,23 +728,19 @@ def read_url(url):
             if body:
                 logger.info("Extracting text from body.")
                 text = body.get_text(separator='\n', strip=True)
-            else: # Very basic fallback
                  logger.warning(f"No body tag found for {url}. Falling back to all text.")
                  text = soup.get_text(separator='\n', strip=True)
-        # Clean up whitespace: replace multiple newlines/spaces with single ones
         logger.info("Cleaning extracted text whitespace...")
         lines = [line.strip() for line in text.split('\n') if line.strip()]
         cleaned_text = "\n".join(lines)
-        # cleaned_text = ' '.join(cleaned_text.split()) # Consolidate spaces - might merge paragraphs inappropriately, use newline join instead
         logger.info(f"Text cleaning complete. Initial length: {len(text)}, Cleaned length: {len(cleaned_text)}")
         if not cleaned_text:
             logger.warning(f"Could not extract meaningful text from URL: {url}")
             return "Error: Could not extract text content from URL."
-        # Limit content size to avoid overwhelming the LLM
         max_chars = 15000
         if len(cleaned_text) > max_chars:
             logger.info(f"URL content is long ({len(cleaned_text)} chars), truncating to {max_chars} characters.")
@@ -738,7 +752,6 @@ def read_url(url):
         return final_text
     except requests.exceptions.RequestException as e:
         logger.error(f"!!! Error fetching URL {url}: {str(e)}")
-        # logger.error(traceback.format_exc()) # Traceback might not be needed for RequestException
         return f"Error reading URL: Could not fetch content ({e})"
     except Exception as e:
         logger.error(f"!!! Error parsing URL {url}: {str(e)}")
@@ -754,7 +767,6 @@ def process_social_media_url(url):
     text_content = None
     video_transcription = None
-    error_occurred = False
     temp_audio_file = None
     # 1. Try extracting text content using read_url
@@ -766,13 +778,11 @@ def process_social_media_url(url):
              logger.info(f"Successfully read text content from {url}. Length: {len(text_content)}")
         elif text_content_result:
              logger.warning(f"read_url returned an error for {url}: {text_content_result}")
-             error_occurred = True # Mark as error but continue
         else:
             logger.info(f"No text content extracted by read_url for {url}.")
     except Exception as e:
         logger.error(f"!!! Exception during text content extraction from social URL {url}: {e}")
         logger.error(traceback.format_exc())
-        error_occurred = True
     # 2. Try downloading and transcribing potential video/audio content
     logger.info(f"Attempting to download audio/video content from social URL: {url}")
@@ -780,14 +790,12 @@ def process_social_media_url(url):
         temp_audio_file = download_social_media_video(url) # Returns path or None
         if temp_audio_file:
             logger.info(f"Audio downloaded from {url} to {temp_audio_file}. Proceeding to transcription.")
-            # Transcribe the downloaded audio file
             transcription_result = transcribe_audio_or_video(temp_audio_file) # Handles errors internally
             if transcription_result and not transcription_result.startswith("Error"):
                 video_transcription = transcription_result
                 logger.info(f"Successfully transcribed audio from {url}. Length: {len(video_transcription)}")
             elif transcription_result:
                  logger.warning(f"Transcription returned an error for audio from {url}: {transcription_result}")
-                 error_occurred = True # Mark as error but maybe text content worked
             else:
                  logger.warning(f"Transcription returned empty result for audio from {url}.")
         else:
@@ -795,7 +803,6 @@ def process_social_media_url(url):
     except Exception as e:
         logger.error(f"!!! Exception during video/audio processing for social URL {url}: {e}")
         logger.error(traceback.format_exc())
-        error_occurred = True
     finally:
          # Clean up downloaded file if it exists
         if temp_audio_file and os.path.exists(temp_audio_file):
@@ -808,11 +815,16 @@ def process_social_media_url(url):
     # Return results
     logger.info(f"--- Finished processing social media URL: {url} ---")
-    # Return dict even if empty, let caller decide if it's useful
-    return {
-        "text": text_content or "", # Ensure string type
-        "video": video_transcription or "" # Ensure string type
-    }
 @spaces.GPU(duration=300) # Allow more time for generation
@@ -825,6 +837,7 @@ def generate_news(instructions, facts, size, tone, *args):
     try:
         # --- Parameter Logging & Basic Validation ---
         logger.info(f"Received Instructions: {'Yes' if instructions else 'No'}")
         logger.info(f"Received Facts: {'Yes' if facts else 'No'}")
         logger.info(f"Requested Size: {size}, Tone: {tone}")
@@ -836,8 +849,8 @@ def generate_news(instructions, facts, size, tone, *args):
             size = 250
         logger.info(f"Using Size: {size}")
         # --- Argument Parsing ---
         logger.info("Parsing dynamic arguments...")
         num_docs = 5
         num_audio_sources = 5
@@ -855,7 +868,6 @@ def generate_news(instructions, facts, size, tone, *args):
              logger.warning(f"Received more arguments ({len(args_list)}) than expected ({total_expected_args}). Truncating.")
              args_list = args_list[:total_expected_args]
-        # Slice arguments based on the expected order
         doc_files = args_list[0:num_docs]
         audio_inputs_flat = args_list[num_docs : num_docs + (num_audio_sources * num_audio_inputs_per_source)]
         url_inputs = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) : num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls]
@@ -865,14 +877,12 @@ def generate_news(instructions, facts, size, tone, *args):
         knowledge_base = {
             "instructions": instructions or "No specific instructions provided.",
             "facts": facts or "No specific facts provided.",
-            "document_content": [],
-            "audio_data": [], # Will store dicts: {file_path, name, position, original_filename}
-            "url_content": [],
-            "social_content": [] # Will store dicts from process_social_media_url
         }
-        # --- Process Document Inputs ---
         logger.info("--- Processing document inputs ---")
         doc_counter = 0
         for i, doc_file in enumerate(doc_files):
@@ -880,31 +890,26 @@ def generate_news(instructions, facts, size, tone, *args):
                 doc_filename = os.path.basename(doc_file.name)
                 logger.info(f"Attempting to read document {i+1}: {doc_filename} (Path: {doc_file.name})")
                 try:
-                    content = read_document(doc_file.name) # doc_file.name is the temp path
                     if content and content.startswith("Error:"):
                          logger.warning(f"Skipping document {i+1} ({doc_filename}) due to read error: {content}")
                          raw_transcriptions += f"[Document {i+1}: {doc_filename}] Error reading: {content}\n\n"
                     elif content:
                         doc_excerpt = (content[:1000] + "... [document truncated]") if len(content) > 1000 else content
                         knowledge_base["document_content"].append(f"[Document {i+1} Source: {doc_filename}]\n{doc_excerpt}")
-                        logger.info(f"Successfully processed document {i+1}. Added excerpt to knowledge base.")
                         doc_counter += 1
-                        # Add full content to raw_transcriptions log? Might be too verbose.
-                        # raw_transcriptions += f"[Document {i+1}: {doc_filename}]\n{content}\n\n"
                     else:
-                         logger.warning(f"Skipping document {i+1} ({doc_filename}) because content is empty after reading.")
                          raw_transcriptions += f"[Document {i+1}: {doc_filename}] Read successfully but content is empty.\n\n"
                 except Exception as e:
                     logger.error(f"!!! FAILED to process document {i+1} ({doc_filename}): {e}")
                     logger.error(traceback.format_exc())
                     raw_transcriptions += f"[Document {i+1}: {doc_filename}] CRITICAL Error during processing: {e}\n\n"
-            else:
-                 logger.info(f"Skipping document slot {i+1}: No file provided or invalid file object.")
-        logger.info(f"--- Finished processing document inputs. {doc_counter} documents added. ---")
-        # Gradio handles cleanup of the uploaded temp file doc_file.name
-        # --- Process URL Inputs ---
         logger.info("--- Processing URL inputs ---")
         url_counter = 0
         for i, url in enumerate(url_inputs):
@@ -916,59 +921,42 @@ def generate_news(instructions, facts, size, tone, *args):
                          logger.warning(f"Skipping URL {i+1} ({url}) due to read error: {content}")
                          raw_transcriptions += f"[URL {i+1}: {url}] Error reading: {content}\n\n"
                     elif content:
-                        # Content is already truncated in read_url if needed
                         knowledge_base["url_content"].append(f"[URL {i+1} Source: {url}]\n{content}")
-                        logger.info(f"Successfully processed URL {i+1}. Added content to knowledge base.")
                         url_counter += 1
                     else:
-                         logger.warning(f"Skipping URL {i+1} ({url}) because content is empty after reading.")
                          raw_transcriptions += f"[URL {i+1}: {url}] Read successfully but content is empty.\n\n"
                 except Exception as e:
                      logger.error(f"!!! FAILED to process URL {i+1} ({url}): {e}")
                      logger.error(traceback.format_exc())
                      raw_transcriptions += f"[URL {i+1}: {url}] CRITICAL Error during processing: {e}\n\n"
-             elif url and isinstance(url, str) and url.strip():
-                 logger.warning(f"Skipping URL slot {i+1}: Input '{url}' is not a valid HTTP/HTTPS URL.")
-             else:
-                 logger.info(f"Skipping URL slot {i+1}: No URL provided.")
-        logger.info(f"--- Finished processing URL inputs. {url_counter} URLs added. ---")
-        # --- Process Audio/Video Inputs ---
         logger.info("--- Processing audio/video inputs (collecting info) ---")
         has_audio_source = False
         audio_counter = 0
         for i in range(num_audio_sources):
             start_idx = i * num_audio_inputs_per_source
-            # Check if indices are valid before accessing
             if start_idx + 2 < len(audio_inputs_flat):
                 audio_file = audio_inputs_flat[start_idx]
                 name = audio_inputs_flat[start_idx + 1] or f"Unnamed Audio Source {i+1}"
                 position = audio_inputs_flat[start_idx + 2] or "Role N/A"
                 if audio_file and hasattr(audio_file, 'name') and audio_file.name:
                     audio_filename = os.path.basename(audio_file.name)
                     logger.info(f"Found audio/video source {i+1}: {name} ({position}) - File: {audio_filename} (Path: {audio_file.name})")
-                    # Store info for transcription later
-                    knowledge_base["audio_data"].append({
-                        "file_path": audio_file.name, # Use the temp path
-                        "name": name,
-                        "position": position,
-                        "original_filename": audio_filename
-                    })
                     has_audio_source = True
                     audio_counter += 1
-                else:
-                     logger.info(f"Skipping audio source slot {i+1}: No file provided or invalid file object.")
-            else:
-                logger.warning(f"Index out of bounds when processing audio source {i+1}. Check argument parsing logic.")
-                break # Stop processing further audio if indexing is wrong
-        logger.info(f"--- Finished collecting audio/video input info. {audio_counter} sources found. Transcription needed: {has_audio_source} ---")
-        # --- Process Social Media Inputs ---
         logger.info("--- Processing social media inputs ---")
-        has_social_source = False
         social_counter = 0
         for i in range(num_social_sources):
              start_idx = i * num_social_inputs_per_source
@@ -976,168 +964,93 @@ def generate_news(instructions, facts, size, tone, *args):
                  social_url = social_inputs_flat[start_idx]
                  social_name = social_inputs_flat[start_idx + 1] or f"Unnamed Social Source {i+1}"
                  social_context = social_inputs_flat[start_idx + 2] or "Context N/A"
                  if social_url and isinstance(social_url, str) and social_url.strip().startswith('http'):
                      logger.info(f"Attempting to process social media URL {i+1}: {social_url} ({social_name}, {social_context})")
                      try:
-                         social_data = process_social_media_url(social_url) # Returns dict or None
-                         if social_data and (social_data.get("text") or social_data.get("video")):
-                             logger.info(f"Successfully processed social URL {i+1}. Text found: {bool(social_data.get('text'))}, Video transcription found: {bool(social_data.get('video'))}")
-                             knowledge_base["social_content"].append({
-                                "url": social_url,
-                                "name": social_name,
-                                "context": social_context,
-                                "text": social_data.get("text", ""),
-                                "video_transcription": social_data.get("video", "") # Store potential transcription
-                            })
-                             has_social_source = True # Mark even if only text is found
-                             social_counter += 1
-                         elif social_data:
-                             logger.warning(f"Processed social URL {i+1} ({social_url}) but found no text or video content.")
-                             raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] Processed but no content found.\n\n"
-                         else:
-                             # process_social_media_url returning None implies an error occurred during processing
-                             logger.error(f"Processing failed for social URL {i+1} ({social_url}). See previous logs.")
-                             raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] Error during processing.\n\n"
                      except Exception as e:
                          logger.error(f"!!! FAILED to process social URL {i+1} ({social_url}): {e}")
                          logger.error(traceback.format_exc())
                          raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] CRITICAL Error during processing: {e}\n\n"
-                 elif social_url and isinstance(social_url, str) and social_url.strip():
-                     logger.warning(f"Skipping social media slot {i+1}: Input '{social_url}' is not a valid HTTP/HTTPS URL.")
-                 else:
-                    logger.info(f"Skipping social media slot {i+1}: No URL provided.")
-             else:
-                  logger.warning(f"Index out of bounds when processing social source {i+1}. Check argument parsing logic.")
-                  break
-        logger.info(f"--- Finished processing social media inputs. {social_counter} sources added. ---")
         # --- Transcribe Audio/Video (Conditional) ---
         transcriptions_for_prompt = ""
         if has_audio_source:
             logger.info("--- Starting Audio Transcription Phase ---")
-            try:
-                # Ensure Whisper is ready (check_whisper_initialized raises error if fails)
-                logger.info("Ensuring Whisper model is initialized for transcription...")
-                model_manager.check_whisper_initialized()
-                logger.info("Whisper model confirmed ready.")
-                for idx, data in enumerate(knowledge_base["audio_data"]):
-                     audio_filename = data['original_filename']
-                     logger.info(f"Attempting transcription for audio source {idx+1}: {audio_filename} ({data['name']}, {data['position']})")
-                     try:
-                         # Call the robust transcription function
-                         transcription = transcribe_audio_or_video(data["file_path"])
-                         if transcription and not transcription.startswith("Error"):
-                             logger.info(f"Transcription successful for audio {idx+1}. Length: {len(transcription)}")
-                             quote = f'"{transcription}" - {data["name"]}, {data["position"]}'
-                             transcriptions_for_prompt += f"{quote}\n\n"
-                             raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n"{transcription}"\n\n'
-                         elif transcription:
-                             logger.warning(f"Transcription failed or returned error for audio source {idx+1} ({audio_filename}): {transcription}")
-                             raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[Error during transcription: {transcription}]\n\n'
-                         else:
-                             logger.warning(f"Transcription returned empty result for audio source {idx+1} ({audio_filename}).")
-                             raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[Transcription result was empty.]\n\n'
-                     except Exception as e:
-                         logger.error(f"!!! CRITICAL Error during transcription call for audio source {idx+1} ({audio_filename}): {e}")
-                         logger.error(traceback.format_exc())
-                         raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[CRITICAL Error during transcription: {e}]\n\n'
-                     # Gradio handles cleanup of the uploaded temp file audio_file.name based on the path stored
-            except Exception as whisper_init_err:
-                 # This catches errors from check_whisper_initialized if it failed
-                 logger.error(f"!!! FATAL: Whisper model could not be initialized. Skipping all audio transcriptions.")
-                 logger.error(traceback.format_exc())
-                 raw_transcriptions += f"\n\n[CRITICAL ERROR] Whisper model failed to load. Audio sources could not be transcribed: {whisper_init_err}\n\n"
-                 # Decide whether to continue without audio or return error immediately
-                 # For now, we continue and log the error.
             logger.info("--- Finished Audio Transcription Phase ---")
         else:
             logger.info("--- Skipping Audio Transcription Phase (no audio sources found) ---")
         # --- Add Social Media Content to Prompt Data ---
         logger.info("--- Adding social media content to prompt data ---")
         social_content_added_to_prompt = False
         for idx, data in enumerate(knowledge_base["social_content"]):
             source_id_log = f'[Social Media {idx+1}: {data["url"]} ({data["name"]}, {data["context"]})]'
             source_id_prompt = f'Social Media Post ({data["name"]}, {data["context"]} at {data["url"]}):'
             content_added_this_source = False
-            # Add text content if available
             if data["text"]:
                 text_excerpt = (data["text"][:500] + "...[text truncated]") if len(data["text"]) > 500 else data["text"]
                 social_text_prompt = f'{source_id_prompt}\nText Content:\n"{text_excerpt}"\n\n'
                 transcriptions_for_prompt += social_text_prompt
-                raw_transcriptions += f"{source_id_log}\nText Content:\n{data['text']}\n\n" # Log full text
-                logger.info(f"Added text excerpt from social source {idx+1} to prompt data.")
-                content_added_this_source = True
-                social_content_added_to_prompt = True
-            # Add video transcription if available
             if data["video_transcription"]:
                 social_video_prompt = f'{source_id_prompt}\nVideo Transcription:\n"{data["video_transcription"]}"\n\n'
                 transcriptions_for_prompt += social_video_prompt
                 raw_transcriptions += f"{source_id_log}\nVideo Transcription:\n{data['video_transcription']}\n\n"
-                logger.info(f"Added video transcription from social source {idx+1} to prompt data.")
-                content_added_this_source = True
-                social_content_added_to_prompt = True
-            if not content_added_this_source:
-                 logger.info(f"No usable text or video transcription found for social source {idx+1} ({data['url']}).")
-                 # No need to add error to raw_transcriptions here, lack of content is logged earlier
-        if not social_content_added_to_prompt:
-             logger.info("No content from social media sources was added to the prompt data.")
-        logger.info("--- Finished adding social media content to prompt data ---")
         # --- Prepare Final Prompt ---
         logger.info("--- Preparing final prompt for LLM ---")
         document_summary = "\n\n".join(knowledge_base["document_content"]) if knowledge_base["document_content"] else "No document content provided or processed successfully."
         url_summary = "\n\n".join(knowledge_base["url_content"]) if knowledge_base["url_content"] else "No URL content provided or processed successfully."
         transcription_summary = transcriptions_for_prompt if transcriptions_for_prompt else "No usable transcriptions or social media content available."
-        # Construct the prompt for the LLM
-        prompt = f"""<s>[INST] You are a professional news writer. Your task is to synthesize information from various sources into a coherent news article.
-Primary Instructions: {knowledge_base["instructions"]}
-Key Facts to Include: {knowledge_base["facts"]}
-Supporting Information:
-Document Content Summary:
-{document_summary}
-Web Content Summary (from URLs):
-{url_summary}
-Transcribed Quotes/Content (Use these directly or indirectly):
-{transcription_summary}
-Article Requirements:
-- Title: Create a concise and informative title for the article.
-- Hook: Write a compelling 15-word (approx.) hook sentence that complements the title.
-- Body: Write the main news article body, aiming for approximately {size} words.
-- Tone: Adopt a {tone} tone throughout the article.
-- 5 Ws: Ensure the first paragraph addresses the core questions (Who, What, When, Where, Why).
-- Quotes: Incorporate relevant information from the 'Transcribed Quotes/Content' section. Aim to use quotes where appropriate, but synthesize information rather than just listing quotes. Use quotation marks (" ") for direct quotes attributed correctly (e.g., based on name/position provided).
-- Style: Adhere to a professional journalistic style. Be objective and factual.
-- Accuracy: Do NOT invent information. Stick strictly to the provided facts, instructions, and source materials. If information is contradictory or missing, state that or omit the detail.
-- Structure: Organize the article logically with clear paragraphs.
-Begin the article now. [/INST]
-Article Draft:
-"""
-        # Log prompt length details
-        prompt_words = len(prompt.split())
-        prompt_chars = len(prompt)
         logger.info(f"Generated prompt length: {prompt_words} words / {prompt_chars} characters.")
-        # Log first/last few chars for verification, avoid logging full potentially huge prompt
         logger.debug(f"Prompt Start: {prompt[:200]}...")
         logger.debug(f"...Prompt End: {prompt[-200:]}")
         logger.info("--- Finished preparing final prompt ---")
@@ -1147,11 +1060,14 @@ Article Draft:
         logger.info("--- Starting LLM Generation Phase ---")
         generation_start_time = time.time()
-        # Ensure LLM is ready
         logger.info("Ensuring LLM is initialized for generation...")
         try:
             model_manager.check_llm_initialized() # Raises error if fails
-            logger.info("LLM confirmed ready.")
         except Exception as llm_init_err:
             logger.error(f"!!! FATAL: LLM could not be initialized. Cannot generate article.")
             logger.error(traceback.format_exc())
@@ -1159,29 +1075,24 @@ Article Draft:
         # Estimate max_new_tokens
         estimated_tokens_per_word = 1.5
-        max_new_tokens = int(size * estimated_tokens_per_word + 150) # size words + buffer for title/hook/etc.
-        model_max_length = 2048 # Check model card if different
-        # Simple length check for prompt tokens (more accurate requires tokenizer)
-        prompt_tokens_estimate = prompt_chars // 3 # Very rough estimate
-        available_tokens = model_max_length - prompt_tokens_estimate - 50 # Leave buffer
         max_new_tokens = min(max_new_tokens, available_tokens)
-        max_new_tokens = max(max_new_tokens, 100) # Ensure at least minimum generation length
         logger.info(f"Estimated prompt tokens: ~{prompt_tokens_estimate}. Model max length: {model_max_length}. Requesting max_new_tokens: {max_new_tokens}")
         try:
             logger.info("Calling LLM text generation pipeline...")
             outputs = model_manager.text_pipeline(
-                prompt,
-                max_new_tokens=max_new_tokens,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.95,
-                top_k=50,
-                repetition_penalty=1.15,
-                pad_token_id=model_manager.tokenizer.eos_token_id,
-                num_return_sequences=1
             )
             logger.info("LLM pipeline call finished.")
@@ -1189,40 +1100,39 @@ Article Draft:
                  logger.error("LLM pipeline returned invalid or empty output.")
                  raise RuntimeError("LLM generation failed: Pipeline returned empty or invalid output.")
-            # Extract generated text
             full_generated_text = outputs[0]['generated_text']
             logger.info(f"Raw generated text length: {len(full_generated_text)} chars.")
-            # logger.debug(f"Raw LLM Output:\n{full_generated_text}") # Careful logging full output
-            # Clean up the result by removing the prompt
             logger.info("Cleaning LLM output (removing prompt)...")
             inst_marker = "[/INST]"
             marker_pos = full_generated_text.find(inst_marker)
             if marker_pos != -1:
                 generated_article = full_generated_text[marker_pos + len(inst_marker):].strip()
-                # Further clean potentially leading "Article Draft:" if model included it
                 if generated_article.startswith("Article Draft:"):
                     generated_article = generated_article[len("Article Draft:"):].strip()
                 logger.info("Prompt removed successfully using '[/INST]' marker.")
             else:
-                 logger.warning("Prompt marker '[/INST]' not found in LLM output. Attempting fallback cleaning.")
-                 # Fallback: Try removing the input prompt string itself (less reliable for long prompts)
-                 # This is risky and might remove actual generated content if prompt is somehow repeated.
-                 # Let's just return the full output with a warning if marker not found.
-                 generated_article = full_generated_text # Keep full output
-                 logger.warning("Could not reliably remove prompt. Returning full generated text.")
             generation_time = time.time() - generation_start_time
             logger.info(f"News generation completed in {generation_time:.2f} seconds.")
             logger.info(f"Final article length: {len(generated_article)} characters.")
             logger.info("--- Finished LLM Generation Phase ---")
         except torch.cuda.OutOfMemoryError as oom_error:
              logger.error(f"!!! CUDA Out of Memory error during LLM generation: {oom_error}")
              logger.error(traceback.format_exc())
              logger.info("Attempting to reset models after OOM error...")
-             model_manager.reset_models(force=True) # Attempt to recover
-             raise RuntimeError("Generation failed due to insufficient GPU memory. Please try reducing article size or complexity.") from oom_error
         except Exception as gen_error:
             logger.error(f"!!! Error during text generation pipeline: {str(gen_error)}")
             logger.error(traceback.format_exc())
@@ -1230,79 +1140,53 @@ Article Draft:
         total_time = time.time() - request_start_time
         logger.info(f"--- generate_news function completed successfully in {total_time:.2f} seconds. ---")
-        # Return the generated article and the log of raw transcriptions
         return generated_article.strip(), raw_transcriptions.strip()
     except Exception as e:
         # Catch-all for any unexpected error during the entire generate_news flow
         total_time = time.time() - request_start_time
         logger.error(f"!!! UNHANDLED Error in generate_news function after {total_time:.2f} seconds: {str(e)}")
         logger.error(traceback.format_exc())
-        # Attempt to reset models to recover state if possible
         try:
             logger.info("Attempting model reset due to unhandled error in generate_news.")
             model_manager.reset_models(force=True)
         except Exception as reset_error:
             logger.error(f"Failed to reset models after error: {str(reset_error)}")
-        # Return error messages to the UI
         error_message = f"Error generating the news article: An unexpected error occurred. Please check logs. ({str(e)})"
         transcription_log = raw_transcriptions.strip() + f"\n\n[CRITICAL ERROR] News generation failed unexpectedly: {str(e)}"
         return error_message, transcription_log
     finally:
-        # Optional: Log resource usage here if possible/needed
         logger.info("--- generate_news function finished execution (either success or error) ---")
 def create_demo():
     """Creates the Gradio interface"""
     logger.info("--- Creating Gradio interface ---")
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 📰 NewsIA - AI News Generator")
         gr.Markdown("Create professional news articles from multiple information sources.")
-        # Store all input components for easy access/reset
         all_inputs = []
         with gr.Row():
             with gr.Column(scale=2):
                 logger.info("Creating instruction input.")
-                instructions = gr.Textbox(
-                    label="Instructions for the News Article",
-                    placeholder="Enter specific instructions for generating your news article (e.g., focus on the economic impact)",
-                    lines=2,
-                    value=""
-                )
                 all_inputs.append(instructions)
                 logger.info("Creating facts input.")
-                facts = gr.Textbox(
-                    label="Main Facts",
-                    placeholder="Describe the most important facts the news should include (e.g., Event name, date, location, key people involved)",
-                    lines=4,
-                    value=""
-                )
                 all_inputs.append(facts)
                 with gr.Row():
                     logger.info("Creating size slider.")
-                    size_slider = gr.Slider(
-                        label="Approximate Length (words)",
-                        minimum=100,
-                        maximum=700, # Increased max size
-                        value=250,
-                        step=50
-                    )
                     all_inputs.append(size_slider)
                     logger.info("Creating tone dropdown.")
-                    tone_dropdown = gr.Dropdown(
-                        label="Tone of the News Article",
-                        choices=["neutral", "serious", "formal", "urgent", "investigative", "human-interest", "lighthearted"],
-                        value="neutral"
-                    )
                     all_inputs.append(tone_dropdown)
             with gr.Column(scale=3):
                 with gr.Tabs():
                     with gr.TabItem("📝 Documents"):
@@ -1310,201 +1194,105 @@ def create_demo():
                         gr.Markdown("Upload relevant documents (PDF, DOCX, XLSX, CSV). Max 5.")
                         doc_inputs = []
                         for i in range(1, 6):
-                            doc_file = gr.File(
-                                label=f"Document {i}",
-                                file_types=["pdf", ".docx", ".xlsx", ".csv"], # Explicit extensions for clarity
-                                file_count="single" # Ensure single file per component
-                            )
                             doc_inputs.append(doc_file)
                         all_inputs.extend(doc_inputs)
                         logger.info(f"{len(doc_inputs)} document inputs created.")
                     with gr.TabItem("🔊 Audio/Video"):
                          logger.info("Creating audio/video input tabs.")
-                         gr.Markdown("Upload audio or video files for transcription (MP3, WAV, MP4, MOV, etc.). Max 5 sources.")
                          audio_video_inputs = []
                          for i in range(1, 6):
                             with gr.Group():
                                 gr.Markdown(f"**Source {i}**")
-                                audio_file = gr.File(
-                                    label=f"Audio/Video File {i}",
-                                    file_types=["audio", "video"]
-                                )
                                 with gr.Row():
-                                    speaker_name = gr.Textbox(
-                                        label="Speaker Name",
-                                        placeholder="Name of the interviewee or speaker",
-                                        value=""
-                                    )
-                                    speaker_role = gr.Textbox(
-                                        label="Role/Position",
-                                        placeholder="Speaker's title or role",
-                                        value=""
-                                    )
-                                audio_video_inputs.append(audio_file)
-                                audio_video_inputs.append(speaker_name)
-                                audio_video_inputs.append(speaker_role)
                          all_inputs.extend(audio_video_inputs)
-                         logger.info(f"{len(audio_video_inputs)} audio/video inputs created (file + 2 textboxes per source).")
                     with gr.TabItem("🌐 URLs"):
                          logger.info("Creating URL input tabs.")
-                         gr.Markdown("Add URLs to relevant web pages or articles. Max 5.")
                          url_inputs = []
                          for i in range(1, 6):
-                            url_textbox = gr.Textbox(
-                                label=f"URL {i}",
-                                placeholder="https://example.com/article",
-                                value=""
-                            )
                             url_inputs.append(url_textbox)
                          all_inputs.extend(url_inputs)
                          logger.info(f"{len(url_inputs)} URL inputs created.")
                     with gr.TabItem("📱 Social Media"):
                          logger.info("Creating social media input tabs.")
-                         gr.Markdown("Add URLs to social media posts (e.g., Twitter, YouTube, TikTok). Max 3.")
                          social_inputs = []
                          for i in range(1, 4):
                             with gr.Group():
                                 gr.Markdown(f"**Social Media Source {i}**")
-                                social_url_textbox = gr.Textbox(
-                                    label=f"Post URL",
-                                    placeholder="https://twitter.com/user/status/...",
-                                    value=""
-                                )
                                 with gr.Row():
-                                    social_name_textbox = gr.Textbox(
-                                        label=f"Account Name/User",
-                                        placeholder="Name or handle (e.g., @username)",
-                                        value=""
-                                    )
-                                    social_context_textbox = gr.Textbox(
-                                        label=f"Context",
-                                        placeholder="Brief context (e.g., statement on event X)",
-                                        value=""
-                                    )
-                                social_inputs.append(social_url_textbox)
-                                social_inputs.append(social_name_textbox)
-                                social_inputs.append(social_context_textbox)
                          all_inputs.extend(social_inputs)
-                         logger.info(f"{len(social_inputs)} social media inputs created (URL + 2 textboxes per source).")
         logger.info(f"Total number of input components collected: {len(all_inputs)}")
         with gr.Row():
             logger.info("Creating generate and clear buttons.")
             generate_button = gr.Button("✨ Generate News Article", variant="primary")
             clear_button = gr.Button("🔄 Clear All Inputs")
         with gr.Tabs():
             with gr.TabItem("📄 Generated News Article"):
                 logger.info("Creating news output textbox.")
-                news_output = gr.Textbox(
-                    label="Draft News Article",
-                    lines=20, # Increased lines
-                    show_copy_button=True,
-                    value="",
-                    interactive=False # Make non-editable initially
-                )
             with gr.TabItem("🎙️ Source Transcriptions & Logs"):
                 logger.info("Creating transcriptions/log output textbox.")
-                transcriptions_output = gr.Textbox(
-                    label="Transcriptions and Processing Log",
-                    lines=15, # Increased lines
-                    show_copy_button=True,
-                    value="",
-                    interactive=False # Make non-editable initially
-                )
-        # --- Event Handlers ---
         outputs_list = [news_output, transcriptions_output]
         logger.info("Setting up event handlers.")
-        # Generate button click
-        generate_button.click(
-            fn=generate_news,
-            inputs=all_inputs, # Pass the consolidated list
-            outputs=outputs_list
-        )
         logger.info("Generate button click handler set.")
-        # Clear button click
         def clear_all_inputs_and_outputs():
             logger.info("--- Clear All button clicked ---")
             reset_values = []
-            # Generate default values based on input component types
             for input_comp in all_inputs:
-                if isinstance(input_comp, (gr.Textbox, gr.Dropdown)):
-                    reset_values.append("")
-                elif isinstance(input_comp, gr.Slider):
-                    reset_values.append(250) # Reset slider to default
-                elif isinstance(input_comp, gr.File):
-                    reset_values.append(None)
-                else:
-                    logger.warning(f"Unhandled input type for reset: {type(input_comp)}. Resetting to None.")
-                    reset_values.append(None)
-            # Add default values for the output fields (empty strings for textboxes)
             reset_values.extend(["", ""])
             logger.info(f"Generated {len(reset_values)} reset values for UI components.")
-            # Also reset the models in the background (optional, but good for freeing resources)
             try:
                  logger.info("Calling model reset from clear button handler.")
                  model_manager.reset_models(force=True)
             except Exception as e:
                  logger.error(f"Error resetting models during clear operation: {e}")
                  logger.error(traceback.format_exc())
             logger.info("--- Clear All operation finished ---")
             return reset_values
-        clear_button.click(
-            fn=clear_all_inputs_and_outputs,
-            inputs=None, # No inputs needed for the clear function itself
-            outputs=all_inputs + outputs_list # The list of components to clear
-        )
         logger.info("Clear button click handler set.")
         logger.info("--- Gradio interface creation complete ---")
     return demo
 if __name__ == "__main__":
     logger.info("--- Running main execution block ---")
-    # Optional: Pre-initialize Whisper on startup (consider trade-offs)
-    # try:
-    #     logger.info("Attempting to pre-initialize Whisper model on startup...")
-    #     model_manager.initialize_whisper()
-    #     logger.info("Whisper pre-initialization successful.")
-    # except Exception as e:
-    #     logger.warning(f"Pre-initialization of Whisper model failed (will load on demand): {str(e)}")
-    #     logger.warning(traceback.format_exc())
-    # Create the Gradio Demo
     logger.info("Creating Gradio demo instance...")
     news_demo = create_demo()
     logger.info("Gradio demo instance created.")
-    # Configure the queue
     logger.info("Configuring Gradio queue...")
-    news_demo.queue() # Use default queue settings
     logger.info("Gradio queue configured.")
-    # Launch the Gradio app
     logger.info("Launching Gradio interface...")
     try:
-        news_demo.launch(
-            server_name="0.0.0.0", # Necessary for Docker/Spaces
-            server_port=7860,
-            # share=False, # Usually set by Spaces automatically
-            # debug=True # Enable for more Gradio-specific logs if needed
-        )
         logger.info("Gradio launch called. Application running.")
     except Exception as launch_err:
          logger.error(f"!!! CRITICAL Error during Gradio launch: {launch_err}")
          logger.error(traceback.format_exc())
-    logger.info("--- Main execution block finished ---") # May not be reached if launch blocks

             logger.info("Initializing ModelManager attributes.")
             self.tokenizer = None
             self.model = None
+            self.text_pipeline = None
             self.whisper_model = None
+            # self._initialized remains False until a model is successfully loaded
+            self.llm_loaded = False
+            self.whisper_loaded = False
             self.last_used = time.time()
             self.llm_loading = False
             self.whisper_loading = False
+    def _cleanup_memory(self):
+        """Utility function to force memory cleanup"""
+        logger.info("Running garbage collection...")
+        collected_count = gc.collect()
+        logger.info(f"Garbage collected ({collected_count} objects).")
+        if torch.cuda.is_available():
+            logger.info("Clearing CUDA cache...")
+            torch.cuda.empty_cache()
+            logger.info("CUDA cache cleared.")
+    def reset_llm(self):
+        """Explicitly resets the LLM components."""
+        logger.info("--- Attempting to reset LLM ---")
+        try:
+            if hasattr(self, 'model') and self.model is not None:
+                del self.model
+                logger.info("LLM model deleted.")
+            if hasattr(self, 'tokenizer') and self.tokenizer is not None:
+                del self.tokenizer
+                logger.info("LLM tokenizer deleted.")
+            if hasattr(self, 'text_pipeline') and self.text_pipeline is not None:
+                del self.text_pipeline
+                logger.info("LLM pipeline deleted.")
+            self.model = None
+            self.tokenizer = None
+            self.text_pipeline = None
+            self.llm_loaded = False
+            self._cleanup_memory()
+            logger.info("LLM components reset successfully.")
+        except Exception as e:
+            logger.error(f"!!! ERROR during LLM reset: {e}")
+            logger.error(traceback.format_exc())
+    def reset_whisper(self):
+        """Explicitly resets the Whisper model."""
+        logger.info("--- Attempting to reset Whisper ---")
+        try:
+            if hasattr(self, 'whisper_model') and self.whisper_model is not None:
+                del self.whisper_model
+                logger.info("Whisper model deleted.")
+            self.whisper_model = None
+            self.whisper_loaded = False
+            self._cleanup_memory()
+            logger.info("Whisper component reset successfully.")
+        except Exception as e:
+            logger.error(f"!!! ERROR during Whisper reset: {e}")
+            logger.error(traceback.format_exc())
+    @spaces.GPU(duration=120)
     def initialize_llm(self):
         """Initialize LLM model with standard transformers"""
         logger.info("Attempting to initialize LLM.")
         if self.llm_loading:
             logger.info("LLM initialization already in progress. Skipping.")
+            return True
+        if self.llm_loaded:
             logger.info("LLM already initialized.")
             self.last_used = time.time()
             return True
+        # Explicitly try to free Whisper memory before loading LLM
+        self.reset_whisper()
         self.llm_loading = True
         logger.info("Starting LLM initialization...")
         try:
             logger.info(f"Using LLM model: {MODEL_NAME}")
             logger.info("Loading LLM tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HUGGINGFACE_TOKEN, use_fast=True)
             logger.info("LLM tokenizer loaded.")
             if self.tokenizer.pad_token is None:
                 self.tokenizer.pad_token = self.tokenizer.eos_token
             logger.info("Loading LLM model...")
             self.model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME, token=HUGGINGFACE_TOKEN, device_map="auto",
+                torch_dtype=torch.float16, low_cpu_mem_usage=True,
+                offload_folder="offload", offload_state_dict=True
             )
             logger.info("LLM model loaded.")
             logger.info("Creating LLM text generation pipeline...")
             self.text_pipeline = pipeline(
+                "text-generation", model=self.model, tokenizer=self.tokenizer,
+                torch_dtype=torch.float16, device_map="auto", max_length=1024
             )
             logger.info("LLM text generation pipeline created.")
             logger.info("LLM initialized successfully.")
             self.last_used = time.time()
+            self.llm_loaded = True
             self.llm_loading = False
             return True
         except Exception as e:
             logger.error(f"!!! ERROR during LLM initialization: {str(e)}")
+            logger.error(traceback.format_exc())
             logger.error("Resetting potentially partially loaded LLM components due to error.")
+            self.reset_llm() # Use the specific reset function
             self.llm_loading = False
+            raise
+    @spaces.GPU(duration=120)
     def initialize_whisper(self):
         """Initialize Whisper model for audio transcription"""
         logger.info("Attempting to initialize Whisper.")
         if self.whisper_loading:
             logger.info("Whisper initialization already in progress. Skipping.")
             return True
+        if self.whisper_loaded:
              logger.info("Whisper already initialized.")
              self.last_used = time.time()
              return True
+        # Explicitly try to free LLM memory before loading Whisper
+        self.reset_llm()
         self.whisper_loading = True
         logger.info("Starting Whisper initialization...")
         try:
+            WHISPER_MODEL_NAME = "tiny"
             logger.info(f"Loading Whisper model: {WHISPER_MODEL_NAME}")
             self.whisper_model = whisper.load_model(
+                WHISPER_MODEL_NAME, device="cuda" if torch.cuda.is_available() else "cpu",
+                download_root="/tmp/whisper"
             )
             logger.info(f"Whisper model '{WHISPER_MODEL_NAME}' loaded successfully.")
             self.last_used = time.time()
+            self.whisper_loaded = True
             self.whisper_loading = False
             return True
         except Exception as e:
             logger.error(f"!!! ERROR during Whisper initialization: {str(e)}")
             logger.error(traceback.format_exc())
             logger.error("Resetting potentially partially loaded Whisper components due to error.")
+            self.reset_whisper() # Use the specific reset function
             self.whisper_loading = False
             raise
     def check_llm_initialized(self):
         """Check if LLM is initialized and initialize if needed"""
         logger.info("Checking if LLM is initialized.")
+        if not self.llm_loaded:
             logger.info("LLM not initialized, attempting initialization...")
+            if not self.llm_loading:
                  self.initialize_llm() # This will raise error if it fails
                  logger.info("LLM initialization completed by check_llm_initialized.")
             else:
+                 # This state should ideally be avoided by sequential logic, but handle anyway
                  logger.info("LLM initialization is already in progress by another request. Waiting briefly.")
+                 time.sleep(10)
+                 if not self.llm_loaded:
                      logger.error("LLM initialization timed out or failed after waiting.")
                      raise RuntimeError("LLM initialization timed out or failed.")
                  else:
             logger.info("LLM was already initialized.")
         self.last_used = time.time()
     def check_whisper_initialized(self):
         """Check if Whisper model is initialized and initialize if needed"""
         logger.info("Checking if Whisper is initialized.")
+        if not self.whisper_loaded:
             logger.info("Whisper model not initialized, attempting initialization...")
+            if not self.whisper_loading:
                 self.initialize_whisper() # This will raise error if it fails
                 logger.info("Whisper initialization completed by check_whisper_initialized.")
             else:
                 logger.info("Whisper initialization is already in progress by another request. Waiting briefly.")
+                time.sleep(10)
+                if not self.whisper_loaded:
                     logger.error("Whisper initialization timed out or failed after waiting.")
                     raise RuntimeError("Whisper initialization timed out or failed.")
                 else:
         self.last_used = time.time()
     def reset_models(self, force=False):
+        """Reset models if idle or forced."""
+        # This function now just calls the specific resets.
+        # Idle logic could be added back if needed, but explicit resets might be better for ZeroGPU.
+        if force:
+            logger.info("Forcing reset of all models.")
+            self.reset_llm()
+            self.reset_whisper()
+        # else: # Optional: Add idle check back if desired
+            # current_time = time.time()
+            # if current_time - self.last_used > 600:
+            #    logger.info("Resetting models due to inactivity.")
+            #    self.reset_llm()
+            #    self.reset_whisper()
+# --- Rest of the functions (download_social_media_video, convert_video_to_audio, etc.) remain the same as the previous version with detailed logging ---
+# --- Paste the functions from the previous answer here, starting from @lru_cache...download_social_media_video down to the end of process_social_media_url ---
 @lru_cache(maxsize=16) # Reduced cache size slightly
 def download_social_media_video(url):
     original_input_path = None
     temp_files_to_clean = []
     processing_step = "Initialization"
+    transcription = "" # Default value
     try:
         processing_step = "Whisper Model Check"
+        logger.info("Checking/Initializing Whisper model for transcription...")
+        # *** Crucial Change: Reset LLM before ensuring Whisper is ready ***
+        # model_manager.reset_llm()
+        # *** Let's try NOT resetting LLM here, maybe both can fit? Check logs if fails ***
         model_manager.check_whisper_initialized() # Will raise error if fails
+        logger.info("Whisper model is ready for transcription.")
         if file_input is None:
             logger.info("No file input provided for transcription. Returning empty string.")
+            return ""
+        # ... (rest of the input type handling, conversion, preprocessing - same as before) ...
         processing_step = "Input Type Handling"
         if isinstance(file_input, str): # Input is a path
             original_input_path = file_input
         file_extension = os.path.splitext(input_path)[1].lower()
         logger.info(f"File extension: {file_extension}")
         processing_step = "Video Conversion Check"
         if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm']:
             logger.info(f"Detected video file ({file_extension}), attempting conversion to audio...")
             logger.error(f"Unsupported file extension for transcription: {file_extension}")
             raise ValueError(f"Unsupported file type: {file_extension}")
         processing_step = "Audio Preprocessing"
         try:
             logger.info(f"Attempting to preprocess audio file: {audio_file_to_process}")
             preprocessed_audio_path = preprocess_audio(audio_file_to_process)
             if preprocessed_audio_path != audio_file_to_process:
                  logger.info("Preprocessing created a new file, adding to cleanup list.")
                  temp_files_to_clean.append(preprocessed_audio_path)
             logger.info(f"Audio preprocessing successful. File to transcribe: {audio_file_to_transcribe}")
         except Exception as preprocess_err:
             logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio for transcription.")
+            logger.warning(traceback.format_exc())
+            audio_file_to_transcribe = audio_file_to_process
+        processing_step = "Transcription Execution"
+        logger.info(f"Starting transcription execution for: {audio_file_to_transcribe}")
         if not os.path.exists(audio_file_to_transcribe):
             logger.error(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
             raise FileNotFoundError(f"Audio file to transcribe not found: {audio_file_to_transcribe}")
         logger.info("Calling Whisper model transcribe method...")
+        with torch.inference_mode():
             use_fp16 = torch.cuda.is_available()
             logger.info(f"Using fp16 for transcription: {use_fp16}")
+            # Add language='en' if most input is English, might improve speed/accuracy
             result = model_manager.whisper_model.transcribe(
+                audio_file_to_transcribe, fp16=use_fp16 #, language="en"
             )
             logger.info("Whisper model transcribe method finished.")
             if not result or "text" not in result:
         logger.info(f"Transcription completed successfully: '{log_transcription}'")
         processing_step = "Success"
+        # *** Optional: Reset Whisper immediately after use if memory is tight ***
+        # logger.info("Resetting Whisper model after successful transcription.")
+        # model_manager.reset_whisper()
+    # ... (keep the except blocks same as before) ...
     except FileNotFoundError as e:
         logger.error(f"!!! File not found error during transcription (Step: {processing_step}): {e}")
         logger.error(traceback.format_exc())
+        transcription = f"Error: Input file not found ({e})"
     except ValueError as e:
          logger.error(f"!!! Value error during transcription (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
+         transcription = f"Error: Unsupported file type ({e})"
     except TypeError as e:
          logger.error(f"!!! Type error during transcription setup (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
+         transcription = f"Error: Invalid input provided ({e})"
     except RuntimeError as e:
          logger.error(f"!!! Runtime error during transcription (Step: {processing_step}): {e}")
          logger.error(traceback.format_exc())
+         transcription = f"Error during processing: {e}"
     except Exception as e:
         logger.error(f"!!! Unexpected error during transcription (Step: {processing_step}): {str(e)}")
         logger.error(traceback.format_exc())
+        transcription = f"Error processing the file: An unexpected error occurred."
     finally:
+        # Clean up temporary files
         logger.info(f"--- Cleaning up temporary files for transcription process ({len(temp_files_to_clean)} files) ---")
         for temp_file in temp_files_to_clean:
             try:
                 if os.path.exists(temp_file):
                     os.remove(temp_file)
                     logger.info(f"Cleaned up temporary file: {temp_file}")
+                # else:
+                #     logger.info(f"Temporary file already removed or never created: {temp_file}")
             except Exception as e:
                 logger.warning(f"Could not remove temporary file {temp_file}: {str(e)}")
         logger.info("--- Finished transcription process cleanup ---")
+        # Return the result (could be transcription or error message)
+        return transcription
 @lru_cache(maxsize=16)
         if file_extension == ".pdf":
             logger.info("Reading PDF document using PyMuPDF (fitz)...")
             doc = fitz.open(document_path)
+            # Check for encryption first
+            if doc.is_encrypted:
+                logger.warning(f"PDF document {document_path} is encrypted. Attempting to decrypt with empty password.")
+                if not doc.authenticate(""):
+                    logger.error(f"Failed to decrypt PDF {document_path} with empty password.")
+                    doc.close()
+                    raise ValueError("Encrypted PDF cannot be read without password.")
             content = "\n".join([page.get_text() for page in doc])
             doc.close()
             logger.info(f"PDF read successfully. Length: {len(content)} chars.")
             logger.info(f"Excel read successfully. Length: {len(content)} chars.")
         elif file_extension == ".csv":
             logger.info("Reading CSV document using pandas...")
             try:
                 logger.info("Attempting CSV read with comma separator...")
+                # Try to sniff encoding
+                with open(document_path, 'rb') as f:
+                    import chardet
+                    encoding = chardet.detect(f.read())['encoding']
+                    logger.info(f"Detected CSV encoding: {encoding}")
+                df = pd.read_csv(document_path, encoding=encoding)
+            except (pd.errors.ParserError, UnicodeDecodeError) as e1:
+                 logger.warning(f"Could not parse CSV {document_path} with comma/detected encoding ({e1}), trying semicolon.")
+                 try:
+                     df = pd.read_csv(document_path, sep=';', encoding=encoding)
+                 except Exception as e2:
+                     logger.error(f"Also failed with semicolon separator: {e2}. Trying latin1 encoding.")
+                     try:
+                        df = pd.read_csv(document_path, encoding='latin1')
+                     except Exception as e3:
+                         logger.error(f"Also failed with latin1: {e3}. Giving up.")
+                         raise ValueError(f"Failed to parse CSV: {e1}, {e2}, {e3}")
             content = df.to_string()
             logger.info(f"CSV read successfully. Length: {len(content)} chars.")
         else:
     except FileNotFoundError as e:
         logger.error(f"!!! File not found error while reading document: {e}")
         return f"Error: Document file not found at {document_path}"
+    except ValueError as e: # Catch specific errors like encryption or CSV parsing
+        logger.error(f"!!! Value error reading document {document_path}: {e}")
+        logger.error(traceback.format_exc())
+        return f"Error reading document: {e}"
     except Exception as e:
         logger.error(f"!!! Error reading document {document_path}: {str(e)}")
         logger.error(traceback.format_exc())
     logger.info(f"Attempting to read URL: {url}")
     if not url or not url.strip().startswith('http'):
         logger.warning(f"Invalid or empty URL provided: '{url}'")
+        return ""
     try:
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Connection': 'keep-alive'
         }
         logger.info(f"Sending GET request to {url} with headers: {headers}")
         response = requests.get(url, headers=headers, timeout=20, allow_redirects=True)
+        logger.info(f"Received response from {url}. Status code: {response.status_code}, Content-Type: {response.headers.get('content-type')}")
+        response.raise_for_status()
         content_type = response.headers.get('content-type', '').lower()
         if not ('html' in content_type or 'text' in content_type):
              logger.warning(f"URL {url} has non-text content type: {content_type}. Skipping.")
              return f"Error: URL content type ({content_type}) is not text/html."
+        # Decode content carefully
+        detected_encoding = response.encoding if response.encoding else response.apparent_encoding
+        logger.info(f"Decoding response content with encoding: {detected_encoding}")
+        html_content = response.content.decode(detected_encoding or 'utf-8', errors='ignore')
         logger.info(f"Parsing HTML content from {url} using BeautifulSoup...")
+        soup = BeautifulSoup(html_content, 'html.parser')
         logger.info("HTML parsed.")
         logger.info("Removing script, style, and other non-content tags...")
+        tags_to_remove = ["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button", "link", "head"]
         for tag_name in tags_to_remove:
             for element in soup.find_all(tag_name):
                 element.extract()
         logger.info("Non-content tags removed.")
         logger.info("Attempting to find main content container...")
         main_content = (
             soup.find("main") or
             if body:
                 logger.info("Extracting text from body.")
                 text = body.get_text(separator='\n', strip=True)
+            else:
                  logger.warning(f"No body tag found for {url}. Falling back to all text.")
                  text = soup.get_text(separator='\n', strip=True)
         logger.info("Cleaning extracted text whitespace...")
         lines = [line.strip() for line in text.split('\n') if line.strip()]
         cleaned_text = "\n".join(lines)
         logger.info(f"Text cleaning complete. Initial length: {len(text)}, Cleaned length: {len(cleaned_text)}")
         if not cleaned_text:
             logger.warning(f"Could not extract meaningful text from URL: {url}")
             return "Error: Could not extract text content from URL."
         max_chars = 15000
         if len(cleaned_text) > max_chars:
             logger.info(f"URL content is long ({len(cleaned_text)} chars), truncating to {max_chars} characters.")
         return final_text
     except requests.exceptions.RequestException as e:
         logger.error(f"!!! Error fetching URL {url}: {str(e)}")
         return f"Error reading URL: Could not fetch content ({e})"
     except Exception as e:
         logger.error(f"!!! Error parsing URL {url}: {str(e)}")
     text_content = None
     video_transcription = None
     temp_audio_file = None
     # 1. Try extracting text content using read_url
              logger.info(f"Successfully read text content from {url}. Length: {len(text_content)}")
         elif text_content_result:
              logger.warning(f"read_url returned an error for {url}: {text_content_result}")
         else:
             logger.info(f"No text content extracted by read_url for {url}.")
     except Exception as e:
         logger.error(f"!!! Exception during text content extraction from social URL {url}: {e}")
         logger.error(traceback.format_exc())
     # 2. Try downloading and transcribing potential video/audio content
     logger.info(f"Attempting to download audio/video content from social URL: {url}")
         temp_audio_file = download_social_media_video(url) # Returns path or None
         if temp_audio_file:
             logger.info(f"Audio downloaded from {url} to {temp_audio_file}. Proceeding to transcription.")
             transcription_result = transcribe_audio_or_video(temp_audio_file) # Handles errors internally
             if transcription_result and not transcription_result.startswith("Error"):
                 video_transcription = transcription_result
                 logger.info(f"Successfully transcribed audio from {url}. Length: {len(video_transcription)}")
             elif transcription_result:
                  logger.warning(f"Transcription returned an error for audio from {url}: {transcription_result}")
             else:
                  logger.warning(f"Transcription returned empty result for audio from {url}.")
         else:
     except Exception as e:
         logger.error(f"!!! Exception during video/audio processing for social URL {url}: {e}")
         logger.error(traceback.format_exc())
     finally:
          # Clean up downloaded file if it exists
         if temp_audio_file and os.path.exists(temp_audio_file):
     # Return results
     logger.info(f"--- Finished processing social media URL: {url} ---")
+    if text_content or video_transcription:
+        return {"text": text_content or "", "video": video_transcription or ""}
+    else:
+        # Return None only if BOTH failed and no content was retrieved
+        logger.info(f"No usable content retrieved for social URL: {url}")
+        return None
+# Create global model manager instance
+logger.info("Creating global ModelManager instance.")
+model_manager = ModelManager()
 @spaces.GPU(duration=300) # Allow more time for generation
     try:
         # --- Parameter Logging & Basic Validation ---
+        # (Same as before)
         logger.info(f"Received Instructions: {'Yes' if instructions else 'No'}")
         logger.info(f"Received Facts: {'Yes' if facts else 'No'}")
         logger.info(f"Requested Size: {size}, Tone: {tone}")
             size = 250
         logger.info(f"Using Size: {size}")
         # --- Argument Parsing ---
+        # (Same as before)
         logger.info("Parsing dynamic arguments...")
         num_docs = 5
         num_audio_sources = 5
              logger.warning(f"Received more arguments ({len(args_list)}) than expected ({total_expected_args}). Truncating.")
              args_list = args_list[:total_expected_args]
         doc_files = args_list[0:num_docs]
         audio_inputs_flat = args_list[num_docs : num_docs + (num_audio_sources * num_audio_inputs_per_source)]
         url_inputs = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) : num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls]
         knowledge_base = {
             "instructions": instructions or "No specific instructions provided.",
             "facts": facts or "No specific facts provided.",
+            "document_content": [], "audio_data": [], "url_content": [], "social_content": []
         }
+        # --- Process Inputs (Documents, URLs, Collect Audio Info, Social Media) ---
+        # (Keep the processing loops same as previous version with detailed logging)
+        # --- Processing document inputs ---
         logger.info("--- Processing document inputs ---")
         doc_counter = 0
         for i, doc_file in enumerate(doc_files):
                 doc_filename = os.path.basename(doc_file.name)
                 logger.info(f"Attempting to read document {i+1}: {doc_filename} (Path: {doc_file.name})")
                 try:
+                    content = read_document(doc_file.name)
                     if content and content.startswith("Error:"):
                          logger.warning(f"Skipping document {i+1} ({doc_filename}) due to read error: {content}")
                          raw_transcriptions += f"[Document {i+1}: {doc_filename}] Error reading: {content}\n\n"
                     elif content:
                         doc_excerpt = (content[:1000] + "... [document truncated]") if len(content) > 1000 else content
                         knowledge_base["document_content"].append(f"[Document {i+1} Source: {doc_filename}]\n{doc_excerpt}")
+                        logger.info(f"Successfully processed document {i+1}. Added excerpt.")
                         doc_counter += 1
                     else:
+                         logger.warning(f"Skipping document {i+1} ({doc_filename}) because content is empty.")
                          raw_transcriptions += f"[Document {i+1}: {doc_filename}] Read successfully but content is empty.\n\n"
                 except Exception as e:
                     logger.error(f"!!! FAILED to process document {i+1} ({doc_filename}): {e}")
                     logger.error(traceback.format_exc())
                     raw_transcriptions += f"[Document {i+1}: {doc_filename}] CRITICAL Error during processing: {e}\n\n"
+            # else: logger.info(f"Skipping document slot {i+1}: No file.")
+        logger.info(f"--- Finished processing {doc_counter} documents. ---")
+        # --- Processing URL inputs ---
         logger.info("--- Processing URL inputs ---")
         url_counter = 0
         for i, url in enumerate(url_inputs):
                          logger.warning(f"Skipping URL {i+1} ({url}) due to read error: {content}")
                          raw_transcriptions += f"[URL {i+1}: {url}] Error reading: {content}\n\n"
                     elif content:
                         knowledge_base["url_content"].append(f"[URL {i+1} Source: {url}]\n{content}")
+                        logger.info(f"Successfully processed URL {i+1}. Added content.")
                         url_counter += 1
                     else:
+                         logger.warning(f"Skipping URL {i+1} ({url}) because content is empty.")
                          raw_transcriptions += f"[URL {i+1}: {url}] Read successfully but content is empty.\n\n"
                 except Exception as e:
                      logger.error(f"!!! FAILED to process URL {i+1} ({url}): {e}")
                      logger.error(traceback.format_exc())
                      raw_transcriptions += f"[URL {i+1}: {url}] CRITICAL Error during processing: {e}\n\n"
+             # elif url: logger.warning(f"Skipping URL slot {i+1}: Invalid URL '{url}'.")
+             # else: logger.info(f"Skipping URL slot {i+1}: No URL.")
+        logger.info(f"--- Finished processing {url_counter} URLs. ---")
+        # --- Processing audio/video inputs (collecting info) ---
         logger.info("--- Processing audio/video inputs (collecting info) ---")
         has_audio_source = False
         audio_counter = 0
         for i in range(num_audio_sources):
             start_idx = i * num_audio_inputs_per_source
             if start_idx + 2 < len(audio_inputs_flat):
                 audio_file = audio_inputs_flat[start_idx]
                 name = audio_inputs_flat[start_idx + 1] or f"Unnamed Audio Source {i+1}"
                 position = audio_inputs_flat[start_idx + 2] or "Role N/A"
                 if audio_file and hasattr(audio_file, 'name') and audio_file.name:
                     audio_filename = os.path.basename(audio_file.name)
                     logger.info(f"Found audio/video source {i+1}: {name} ({position}) - File: {audio_filename} (Path: {audio_file.name})")
+                    knowledge_base["audio_data"].append({"file_path": audio_file.name, "name": name, "position": position, "original_filename": audio_filename})
                     has_audio_source = True
                     audio_counter += 1
+                # else: logger.info(f"Skipping audio source slot {i+1}: No file.")
+            else: logger.warning(f"Index out of bounds for audio source {i+1}."); break
+        logger.info(f"--- Finished collecting audio/video info. {audio_counter} sources found. Transcription needed: {has_audio_source} ---")
+        # --- Processing social media inputs ---
         logger.info("--- Processing social media inputs ---")
         social_counter = 0
         for i in range(num_social_sources):
              start_idx = i * num_social_inputs_per_source
                  social_url = social_inputs_flat[start_idx]
                  social_name = social_inputs_flat[start_idx + 1] or f"Unnamed Social Source {i+1}"
                  social_context = social_inputs_flat[start_idx + 2] or "Context N/A"
                  if social_url and isinstance(social_url, str) and social_url.strip().startswith('http'):
                      logger.info(f"Attempting to process social media URL {i+1}: {social_url} ({social_name}, {social_context})")
                      try:
+                         social_data = process_social_media_url(social_url)
+                         if social_data: # process_social_media_url now returns dict even if empty
+                             if social_data.get("text") or social_data.get("video"):
+                                 logger.info(f"Successfully processed social URL {i+1}. Text: {bool(social_data.get('text'))}, Video: {bool(social_data.get('video'))}")
+                                 knowledge_base["social_content"].append({"url": social_url, "name": social_name, "context": social_context, "text": social_data.get("text", ""), "video_transcription": social_data.get("video", "")})
+                                 social_counter += 1
+                             else:
+                                logger.warning(f"Processed social URL {i+1} ({social_url}) but found no text or video content.")
+                                raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] Processed but no content found.\n\n"
+                         # No 'else' needed as process_social_media_url handles internal errors and returns dict
                      except Exception as e:
                          logger.error(f"!!! FAILED to process social URL {i+1} ({social_url}): {e}")
                          logger.error(traceback.format_exc())
                          raw_transcriptions += f"[Social Media {i+1}: {social_url} ({social_name})] CRITICAL Error during processing: {e}\n\n"
+                 # elif social_url: logger.warning(f"Skipping social slot {i+1}: Invalid URL '{social_url}'.")
+                 # else: logger.info(f"Skipping social slot {i+1}: No URL.")
+             else: logger.warning(f"Index out of bounds for social source {i+1}."); break
+        logger.info(f"--- Finished processing {social_counter} social media sources. ---")
         # --- Transcribe Audio/Video (Conditional) ---
         transcriptions_for_prompt = ""
         if has_audio_source:
             logger.info("--- Starting Audio Transcription Phase ---")
+            # Whisper check/initialization happens INSIDE transcribe_audio_or_video now
+            for idx, data in enumerate(knowledge_base["audio_data"]):
+                 audio_filename = data['original_filename']
+                 logger.info(f"Attempting transcription for audio source {idx+1}: {audio_filename} ({data['name']}, {data['position']})")
+                 try:
+                     # transcribe_audio_or_video now includes model check and returns error string on failure
+                     transcription = transcribe_audio_or_video(data["file_path"])
+                     if transcription and not transcription.startswith("Error"):
+                         logger.info(f"Transcription successful for audio {idx+1}. Length: {len(transcription)}")
+                         quote = f'"{transcription}" - {data["name"]}, {data["position"]}'
+                         transcriptions_for_prompt += f"{quote}\n\n"
+                         raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n"{transcription}"\n\n'
+                     else:
+                         # Log the error message returned by the function
+                         logger.warning(f"Transcription failed or returned error for audio source {idx+1} ({audio_filename}): {transcription}")
+                         raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[Transcription Error: {transcription}]\n\n'
+                 except Exception as e:
+                     # Catch unexpected errors during the call itself
+                     logger.error(f"!!! CRITICAL Error during transcription call for audio source {idx+1} ({audio_filename}): {e}")
+                     logger.error(traceback.format_exc())
+                     raw_transcriptions += f'[Audio/Video {idx + 1}: {audio_filename} ({data["name"]}, {data["position"]})]\n[CRITICAL Error during transcription call: {e}]\n\n'
             logger.info("--- Finished Audio Transcription Phase ---")
         else:
             logger.info("--- Skipping Audio Transcription Phase (no audio sources found) ---")
         # --- Add Social Media Content to Prompt Data ---
+        # (Same as before)
         logger.info("--- Adding social media content to prompt data ---")
         social_content_added_to_prompt = False
         for idx, data in enumerate(knowledge_base["social_content"]):
             source_id_log = f'[Social Media {idx+1}: {data["url"]} ({data["name"]}, {data["context"]})]'
             source_id_prompt = f'Social Media Post ({data["name"]}, {data["context"]} at {data["url"]}):'
             content_added_this_source = False
             if data["text"]:
                 text_excerpt = (data["text"][:500] + "...[text truncated]") if len(data["text"]) > 500 else data["text"]
                 social_text_prompt = f'{source_id_prompt}\nText Content:\n"{text_excerpt}"\n\n'
                 transcriptions_for_prompt += social_text_prompt
+                raw_transcriptions += f"{source_id_log}\nText Content:\n{data['text']}\n\n"
+                content_added_this_source = True; social_content_added_to_prompt = True
             if data["video_transcription"]:
                 social_video_prompt = f'{source_id_prompt}\nVideo Transcription:\n"{data["video_transcription"]}"\n\n'
                 transcriptions_for_prompt += social_video_prompt
                 raw_transcriptions += f"{source_id_log}\nVideo Transcription:\n{data['video_transcription']}\n\n"
+                content_added_this_source = True; social_content_added_to_prompt = True
+            if content_added_this_source: logger.info(f"Added content from social source {idx+1} to prompt data.")
+            # else: logger.info(f"No usable content found for social source {idx+1} ({data['url']}).")
+        if not social_content_added_to_prompt: logger.info("No content from social media sources was added to the prompt data.")
+        logger.info("--- Finished adding social media content ---")
         # --- Prepare Final Prompt ---
+        # (Same as before)
         logger.info("--- Preparing final prompt for LLM ---")
         document_summary = "\n\n".join(knowledge_base["document_content"]) if knowledge_base["document_content"] else "No document content provided or processed successfully."
         url_summary = "\n\n".join(knowledge_base["url_content"]) if knowledge_base["url_content"] else "No URL content provided or processed successfully."
         transcription_summary = transcriptions_for_prompt if transcriptions_for_prompt else "No usable transcriptions or social media content available."
+        prompt = f"""<s>[INST] You are a professional news writer... [SAME PROMPT AS BEFORE] ...Begin the article now. [/INST]\nArticle Draft:\n""" # Keep prompt structure
+        prompt_words = len(prompt.split()); prompt_chars = len(prompt)
         logger.info(f"Generated prompt length: {prompt_words} words / {prompt_chars} characters.")
         logger.debug(f"Prompt Start: {prompt[:200]}...")
         logger.debug(f"...Prompt End: {prompt[-200:]}")
         logger.info("--- Finished preparing final prompt ---")
         logger.info("--- Starting LLM Generation Phase ---")
         generation_start_time = time.time()
+        # Ensure LLM is ready (will also reset Whisper if loaded)
         logger.info("Ensuring LLM is initialized for generation...")
         try:
+            # *** Crucial Change: Reset Whisper before ensuring LLM is ready ***
+            # model_manager.reset_whisper()
+            # *** Let's try NOT resetting whisper, check logs if fails ***
             model_manager.check_llm_initialized() # Raises error if fails
+            logger.info("LLM confirmed ready for generation.")
         except Exception as llm_init_err:
             logger.error(f"!!! FATAL: LLM could not be initialized. Cannot generate article.")
             logger.error(traceback.format_exc())
         # Estimate max_new_tokens
+        # (Same as before)
         estimated_tokens_per_word = 1.5
+        max_new_tokens = int(size * estimated_tokens_per_word + 150)
+        model_max_length = 2048
+        prompt_tokens_estimate = prompt_chars // 3
+        available_tokens = model_max_length - prompt_tokens_estimate - 50
         max_new_tokens = min(max_new_tokens, available_tokens)
+        max_new_tokens = max(max_new_tokens, 100)
         logger.info(f"Estimated prompt tokens: ~{prompt_tokens_estimate}. Model max length: {model_max_length}. Requesting max_new_tokens: {max_new_tokens}")
         try:
+            # Generate text
+            # (Same pipeline call as before)
             logger.info("Calling LLM text generation pipeline...")
             outputs = model_manager.text_pipeline(
+                prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7,
+                top_p=0.95, top_k=50, repetition_penalty=1.15,
+                pad_token_id=model_manager.tokenizer.eos_token_id, num_return_sequences=1
             )
             logger.info("LLM pipeline call finished.")
                  logger.error("LLM pipeline returned invalid or empty output.")
                  raise RuntimeError("LLM generation failed: Pipeline returned empty or invalid output.")
             full_generated_text = outputs[0]['generated_text']
             logger.info(f"Raw generated text length: {len(full_generated_text)} chars.")
+            # Clean output
+            # (Same cleaning logic as before)
             logger.info("Cleaning LLM output (removing prompt)...")
             inst_marker = "[/INST]"
             marker_pos = full_generated_text.find(inst_marker)
             if marker_pos != -1:
                 generated_article = full_generated_text[marker_pos + len(inst_marker):].strip()
                 if generated_article.startswith("Article Draft:"):
                     generated_article = generated_article[len("Article Draft:"):].strip()
                 logger.info("Prompt removed successfully using '[/INST]' marker.")
             else:
+                 generated_article = full_generated_text
+                 logger.warning("Prompt marker '[/INST]' not found in LLM output. Returning full generated text.")
             generation_time = time.time() - generation_start_time
             logger.info(f"News generation completed in {generation_time:.2f} seconds.")
             logger.info(f"Final article length: {len(generated_article)} characters.")
             logger.info("--- Finished LLM Generation Phase ---")
+            # *** Optional: Reset LLM immediately after generation ***
+            # logger.info("Resetting LLM model after successful generation.")
+            # model_manager.reset_llm()
+        # ... (keep OOM and general Exception handling for generation same as before) ...
         except torch.cuda.OutOfMemoryError as oom_error:
              logger.error(f"!!! CUDA Out of Memory error during LLM generation: {oom_error}")
              logger.error(traceback.format_exc())
              logger.info("Attempting to reset models after OOM error...")
+             model_manager.reset_models(force=True)
+             raise RuntimeError("Generation failed due to insufficient GPU memory.") from oom_error
         except Exception as gen_error:
             logger.error(f"!!! Error during text generation pipeline: {str(gen_error)}")
             logger.error(traceback.format_exc())
         total_time = time.time() - request_start_time
         logger.info(f"--- generate_news function completed successfully in {total_time:.2f} seconds. ---")
         return generated_article.strip(), raw_transcriptions.strip()
     except Exception as e:
         # Catch-all for any unexpected error during the entire generate_news flow
+        # (Same as before)
         total_time = time.time() - request_start_time
         logger.error(f"!!! UNHANDLED Error in generate_news function after {total_time:.2f} seconds: {str(e)}")
         logger.error(traceback.format_exc())
         try:
             logger.info("Attempting model reset due to unhandled error in generate_news.")
             model_manager.reset_models(force=True)
         except Exception as reset_error:
             logger.error(f"Failed to reset models after error: {str(reset_error)}")
         error_message = f"Error generating the news article: An unexpected error occurred. Please check logs. ({str(e)})"
         transcription_log = raw_transcriptions.strip() + f"\n\n[CRITICAL ERROR] News generation failed unexpectedly: {str(e)}"
         return error_message, transcription_log
     finally:
+        # Final cleanup/logging
         logger.info("--- generate_news function finished execution (either success or error) ---")
+        # Force cleanup after every run attempt on ZeroGPU
+        logger.info("Forcing model reset at the end of generate_news call.")
+        model_manager.reset_models(force=True)
+# --- create_demo function remains the same as the previous version ---
 def create_demo():
     """Creates the Gradio interface"""
     logger.info("--- Creating Gradio interface ---")
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 📰 NewsIA - AI News Generator")
         gr.Markdown("Create professional news articles from multiple information sources.")
         all_inputs = []
         with gr.Row():
             with gr.Column(scale=2):
                 logger.info("Creating instruction input.")
+                instructions = gr.Textbox(label="Instructions for the News Article", placeholder="Enter specific instructions...", lines=2)
                 all_inputs.append(instructions)
                 logger.info("Creating facts input.")
+                facts = gr.Textbox(label="Main Facts", placeholder="Describe the most important facts...", lines=4)
                 all_inputs.append(facts)
                 with gr.Row():
                     logger.info("Creating size slider.")
+                    size_slider = gr.Slider(label="Approximate Length (words)", minimum=100, maximum=700, value=250, step=50)
                     all_inputs.append(size_slider)
                     logger.info("Creating tone dropdown.")
+                    tone_dropdown = gr.Dropdown(label="Tone of the News Article", choices=["neutral", "serious", "formal", "urgent", "investigative", "human-interest", "lighthearted"], value="neutral")
                     all_inputs.append(tone_dropdown)
             with gr.Column(scale=3):
                 with gr.Tabs():
                     with gr.TabItem("📝 Documents"):
                         gr.Markdown("Upload relevant documents (PDF, DOCX, XLSX, CSV). Max 5.")
                         doc_inputs = []
                         for i in range(1, 6):
+                            doc_file = gr.File(label=f"Document {i}", file_types=["pdf", ".docx", ".xlsx", ".csv"], file_count="single")
                             doc_inputs.append(doc_file)
                         all_inputs.extend(doc_inputs)
                         logger.info(f"{len(doc_inputs)} document inputs created.")
                     with gr.TabItem("🔊 Audio/Video"):
                          logger.info("Creating audio/video input tabs.")
+                         gr.Markdown("Upload audio or video files... Max 5 sources.")
                          audio_video_inputs = []
                          for i in range(1, 6):
                             with gr.Group():
                                 gr.Markdown(f"**Source {i}**")
+                                audio_file = gr.File(label=f"Audio/Video File {i}", file_types=["audio", "video"])
                                 with gr.Row():
+                                    speaker_name = gr.Textbox(label="Speaker Name", placeholder="Name...")
+                                    speaker_role = gr.Textbox(label="Role/Position", placeholder="Role...")
+                                audio_video_inputs.extend([audio_file, speaker_name, speaker_role])
                          all_inputs.extend(audio_video_inputs)
+                         logger.info(f"{len(audio_video_inputs)} audio/video inputs created.")
                     with gr.TabItem("🌐 URLs"):
                          logger.info("Creating URL input tabs.")
+                         gr.Markdown("Add URLs to relevant web pages... Max 5.")
                          url_inputs = []
                          for i in range(1, 6):
+                            url_textbox = gr.Textbox(label=f"URL {i}", placeholder="https://...")
                             url_inputs.append(url_textbox)
                          all_inputs.extend(url_inputs)
                          logger.info(f"{len(url_inputs)} URL inputs created.")
                     with gr.TabItem("📱 Social Media"):
                          logger.info("Creating social media input tabs.")
+                         gr.Markdown("Add URLs to social media posts... Max 3.")
                          social_inputs = []
                          for i in range(1, 4):
                             with gr.Group():
                                 gr.Markdown(f"**Social Media Source {i}**")
+                                social_url_textbox = gr.Textbox(label=f"Post URL", placeholder="https://...")
                                 with gr.Row():
+                                    social_name_textbox = gr.Textbox(label=f"Account Name/User", placeholder="@username")
+                                    social_context_textbox = gr.Textbox(label=f"Context", placeholder="Context...")
+                                social_inputs.extend([social_url_textbox, social_name_textbox, social_context_textbox])
                          all_inputs.extend(social_inputs)
+                         logger.info(f"{len(social_inputs)} social media inputs created.")
         logger.info(f"Total number of input components collected: {len(all_inputs)}")
         with gr.Row():
             logger.info("Creating generate and clear buttons.")
             generate_button = gr.Button("✨ Generate News Article", variant="primary")
             clear_button = gr.Button("🔄 Clear All Inputs")
         with gr.Tabs():
             with gr.TabItem("📄 Generated News Article"):
                 logger.info("Creating news output textbox.")
+                news_output = gr.Textbox(label="Draft News Article", lines=20, show_copy_button=True, interactive=False)
             with gr.TabItem("🎙️ Source Transcriptions & Logs"):
                 logger.info("Creating transcriptions/log output textbox.")
+                transcriptions_output = gr.Textbox(label="Transcriptions and Processing Log", lines=15, show_copy_button=True, interactive=False)
         outputs_list = [news_output, transcriptions_output]
         logger.info("Setting up event handlers.")
+        generate_button.click(fn=generate_news, inputs=all_inputs, outputs=outputs_list)
         logger.info("Generate button click handler set.")
         def clear_all_inputs_and_outputs():
             logger.info("--- Clear All button clicked ---")
             reset_values = []
             for input_comp in all_inputs:
+                if isinstance(input_comp, (gr.Textbox, gr.Dropdown)): reset_values.append("")
+                elif isinstance(input_comp, gr.Slider): reset_values.append(250)
+                elif isinstance(input_comp, gr.File): reset_values.append(None)
+                else: reset_values.append(None)
             reset_values.extend(["", ""])
             logger.info(f"Generated {len(reset_values)} reset values for UI components.")
             try:
                  logger.info("Calling model reset from clear button handler.")
                  model_manager.reset_models(force=True)
             except Exception as e:
                  logger.error(f"Error resetting models during clear operation: {e}")
                  logger.error(traceback.format_exc())
             logger.info("--- Clear All operation finished ---")
             return reset_values
+        clear_button.click(fn=clear_all_inputs_and_outputs, inputs=None, outputs=all_inputs + outputs_list)
         logger.info("Clear button click handler set.")
         logger.info("--- Gradio interface creation complete ---")
     return demo
+# --- main execution block remains the same ---
 if __name__ == "__main__":
     logger.info("--- Running main execution block ---")
     logger.info("Creating Gradio demo instance...")
     news_demo = create_demo()
     logger.info("Gradio demo instance created.")
     logger.info("Configuring Gradio queue...")
+    news_demo.queue()
     logger.info("Gradio queue configured.")
     logger.info("Launching Gradio interface...")
     try:
+        news_demo.launch(server_name="0.0.0.0", server_port=7860)
         logger.info("Gradio launch called. Application running.")
     except Exception as launch_err:
          logger.error(f"!!! CRITICAL Error during Gradio launch: {launch_err}")
          logger.error(traceback.format_exc())
+    logger.info("--- Main execution block potentially finished (if launch doesn't block indefinitely) ---")