Spaces:
Sleeping
Sleeping
import spaces | |
import gradio as gr | |
import logging | |
import os | |
import tempfile | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
import torch | |
import whisper | |
import subprocess | |
from pydub import AudioSegment | |
import fitz # PyMuPDF | |
import docx | |
import yt_dlp | |
from functools import lru_cache | |
import gc | |
import time | |
from huggingface_hub import login | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import traceback # For detailed error logging | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Login to Hugging Face Hub if token is available | |
HUGGINGFACE_TOKEN = os.environ.get('HUGGINGFACE_TOKEN') | |
if HUGGINGFACE_TOKEN: | |
try: | |
login(token=HUGGINGFACE_TOKEN) | |
logger.info("Successfully logged in to Hugging Face Hub.") | |
except Exception as e: | |
logger.error(f"Failed to login to Hugging Face Hub: {e}") | |
class ModelManager: | |
_instance = None | |
def __new__(cls): | |
if cls._instance is None: | |
cls._instance = super(ModelManager, cls).__new__(cls) | |
cls._instance._initialized = False | |
return cls._instance | |
def __init__(self): | |
if not self._initialized: | |
self.tokenizer = None | |
self.model = None | |
self.text_pipeline = None # Renamed for clarity | |
self.whisper_model = None | |
self._initialized = True | |
self.last_used = time.time() | |
self.llm_loading = False | |
self.whisper_loading = False | |
# Increased duration for potentially long loads | |
def initialize_llm(self): | |
"""Initialize LLM model with standard transformers""" | |
if self.llm_loading: | |
logger.info("LLM initialization already in progress.") | |
return True # Assume it will succeed or fail elsewhere | |
if self.tokenizer and self.model and self.text_pipeline: | |
logger.info("LLM already initialized.") | |
self.last_used = time.time() | |
return True | |
self.llm_loading = True | |
try: | |
# Use small model for ZeroGPU compatibility | |
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
logger.info("Loading LLM tokenizer...") | |
self.tokenizer = AutoTokenizer.from_pretrained( | |
MODEL_NAME, | |
token=HUGGINGFACE_TOKEN, | |
use_fast=True | |
) | |
if self.tokenizer.pad_token is None: | |
self.tokenizer.pad_token = self.tokenizer.eos_token | |
# Basic memory settings for ZeroGPU | |
logger.info("Loading LLM model...") | |
self.model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
token=HUGGINGFACE_TOKEN, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
low_cpu_mem_usage=True, | |
# Optimizations for ZeroGPU | |
# max_memory={0: "4GB"}, # Removed for better auto handling initially | |
offload_folder="offload", | |
offload_state_dict=True | |
) | |
# Create text generation pipeline | |
logger.info("Creating LLM text generation pipeline...") | |
self.text_pipeline = pipeline( | |
"text-generation", | |
model=self.model, | |
tokenizer=self.tokenizer, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
max_length=1024 # Default max length | |
) | |
logger.info("LLM initialized successfully") | |
self.last_used = time.time() | |
self.llm_loading = False | |
return True | |
except Exception as e: | |
logger.error(f"Error initializing LLM: {str(e)}") | |
logger.error(traceback.format_exc()) # Log full traceback | |
# Reset partially loaded components | |
self.tokenizer = None | |
self.model = None | |
self.text_pipeline = None | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
gc.collect() | |
self.llm_loading = False | |
raise # Re-raise the exception to signal failure | |
# Increased duration | |
def initialize_whisper(self): | |
"""Initialize Whisper model for audio transcription""" | |
if self.whisper_loading: | |
logger.info("Whisper initialization already in progress.") | |
return True | |
if self.whisper_model: | |
logger.info("Whisper already initialized.") | |
self.last_used = time.time() | |
return True | |
self.whisper_loading = True | |
try: | |
logger.info("Loading Whisper model...") | |
# Using tiny model for efficiency but can be changed based on needs | |
# Specify weights_only=True to address the FutureWarning | |
# Note: Whisper's load_model might not directly support weights_only yet. | |
# If it errors, remove the weights_only=True. The warning is mainly informative. | |
# Let's attempt without weights_only first as whisper might handle it internally | |
self.whisper_model = whisper.load_model( | |
"tiny", # Consider "base" for better accuracy if "tiny" struggles | |
device="cuda" if torch.cuda.is_available() else "cpu", | |
download_root="/tmp/whisper" # Use persistent storage if available/needed | |
) | |
logger.info("Whisper model initialized successfully") | |
self.last_used = time.time() | |
self.whisper_loading = False | |
return True | |
except Exception as e: | |
logger.error(f"Error initializing Whisper: {str(e)}") | |
logger.error(traceback.format_exc()) | |
self.whisper_model = None | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
gc.collect() | |
self.whisper_loading = False | |
raise | |
def check_llm_initialized(self): | |
"""Check if LLM is initialized and initialize if needed""" | |
if self.tokenizer is None or self.model is None or self.text_pipeline is None: | |
logger.info("LLM not initialized, initializing...") | |
if not self.llm_loading: # Prevent re-entry if already loading | |
self.initialize_llm() | |
else: | |
logger.info("LLM initialization is already in progress by another request.") | |
# Optional: Wait a bit for the other process to finish | |
time.sleep(5) | |
if self.tokenizer is None or self.model is None or self.text_pipeline is None: | |
raise RuntimeError("LLM initialization timed out or failed.") | |
self.last_used = time.time() | |
def check_whisper_initialized(self): | |
"""Check if Whisper model is initialized and initialize if needed""" | |
if self.whisper_model is None: | |
logger.info("Whisper model not initialized, initializing...") | |
if not self.whisper_loading: # Prevent re-entry | |
self.initialize_whisper() | |
else: | |
logger.info("Whisper initialization is already in progress by another request.") | |
time.sleep(5) | |
if self.whisper_model is None: | |
raise RuntimeError("Whisper initialization timed out or failed.") | |
self.last_used = time.time() | |
def reset_models(self, force=False): | |
"""Reset models to free memory if they haven't been used recently""" | |
current_time = time.time() | |
# Only reset if forced or models haven't been used for 10 minutes (600 seconds) | |
if force or (current_time - self.last_used > 600): | |
try: | |
logger.info("Resetting models to free memory...") | |
# Check and delete attributes safely | |
if hasattr(self, 'model') and self.model is not None: | |
del self.model | |
self.model = None | |
logger.info("LLM model deleted.") | |
if hasattr(self, 'tokenizer') and self.tokenizer is not None: | |
del self.tokenizer | |
self.tokenizer = None | |
logger.info("LLM tokenizer deleted.") | |
if hasattr(self, 'text_pipeline') and self.text_pipeline is not None: | |
del self.text_pipeline | |
self.text_pipeline = None | |
logger.info("LLM pipeline deleted.") | |
if hasattr(self, 'whisper_model') and self.whisper_model is not None: | |
del self.whisper_model | |
self.whisper_model = None | |
logger.info("Whisper model deleted.") | |
# Explicitly clear CUDA cache and collect garbage | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
# torch.cuda.synchronize() # May not be needed and can slow down | |
logger.info("CUDA cache cleared.") | |
gc.collect() | |
logger.info("Garbage collected. Models reset successfully.") | |
self._initialized = False # Mark as uninitialized so they reload on next use | |
except Exception as e: | |
logger.error(f"Error resetting models: {str(e)}") | |
logger.error(traceback.format_exc()) | |
# Create global model manager instance | |
model_manager = ModelManager() | |
# Reduced cache size slightly | |
def download_social_media_video(url): | |
"""Download audio from a social media video URL.""" | |
temp_dir = tempfile.mkdtemp() | |
output_template = os.path.join(temp_dir, '%(id)s.%(ext)s') | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '192', # Standard quality | |
}], | |
'outtmpl': output_template, | |
'quiet': True, | |
'no_warnings': True, | |
'nocheckcertificate': True, # Sometimes needed for tricky sites | |
'retries': 3, # Add retries | |
'socket_timeout': 15, # Timeout | |
} | |
try: | |
logger.info(f"Attempting to download audio from: {url}") | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=True) | |
# Construct the expected final filename after postprocessing | |
audio_file = os.path.join(temp_dir, f"{info_dict['id']}.mp3") | |
if not os.path.exists(audio_file): | |
# Fallback if filename doesn't match exactly (e.g., webm -> mp3) | |
found_files = [f for f in os.listdir(temp_dir) if f.endswith('.mp3')] | |
if found_files: | |
audio_file = os.path.join(temp_dir, found_files[0]) | |
else: | |
raise FileNotFoundError(f"Could not find downloaded MP3 in {temp_dir}") | |
logger.info(f"Audio downloaded successfully: {audio_file}") | |
# Read the file content to return, as the temp dir might be cleaned up | |
with open(audio_file, 'rb') as f: | |
audio_content = f.read() | |
# Clean up the temporary directory and file | |
try: | |
os.remove(audio_file) | |
os.rmdir(temp_dir) | |
except OSError as e: | |
logger.warning(f"Could not completely clean up temp download files: {e}") | |
# Save the content to a new temporary file that Gradio can handle | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_output_file: | |
temp_output_file.write(audio_content) | |
final_path = temp_output_file.name | |
logger.info(f"Audio saved to temporary file: {final_path}") | |
return final_path | |
except yt_dlp.utils.DownloadError as e: | |
logger.error(f"yt-dlp download error for {url}: {str(e)}") | |
# Clean up temp dir on error | |
try: | |
if os.path.exists(temp_dir): | |
import shutil | |
shutil.rmtree(temp_dir) | |
except Exception as cleanup_e: | |
logger.warning(f"Error during cleanup after download failure: {cleanup_e}") | |
return None # Return None to indicate failure | |
except Exception as e: | |
logger.error(f"Unexpected error downloading video from {url}: {str(e)}") | |
logger.error(traceback.format_exc()) | |
# Clean up temp dir on error | |
try: | |
if os.path.exists(temp_dir): | |
import shutil | |
shutil.rmtree(temp_dir) | |
except Exception as cleanup_e: | |
logger.warning(f"Error during cleanup after download failure: {cleanup_e}") | |
return None # Return None | |
def convert_video_to_audio(video_file_path): | |
"""Convert a video file to audio using ffmpeg directly.""" | |
try: | |
# Create a temporary file path for the output MP3 | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: | |
output_file_path = temp_file.name | |
logger.info(f"Converting video '{video_file_path}' to audio '{output_file_path}'") | |
# Use ffmpeg directly via subprocess | |
command = [ | |
"ffmpeg", | |
"-i", video_file_path, | |
"-vn", # No video | |
"-acodec", "libmp3lame", # Specify MP3 codec | |
"-ab", "192k", # Audio bitrate | |
"-ar", "44100", # Audio sample rate | |
"-ac", "2", # Stereo audio | |
output_file_path, | |
"-y", # Overwrite output file if it exists | |
"-loglevel", "error" # Suppress verbose ffmpeg output | |
] | |
process = subprocess.run(command, check=True, capture_output=True, text=True) | |
logger.info(f"ffmpeg conversion successful for {video_file_path}.") | |
logger.debug(f"ffmpeg stdout: {process.stdout}") | |
logger.debug(f"ffmpeg stderr: {process.stderr}") | |
# Verify output file exists and has size | |
if not os.path.exists(output_file_path) or os.path.getsize(output_file_path) == 0: | |
raise RuntimeError(f"ffmpeg conversion failed: Output file '{output_file_path}' not created or is empty.") | |
logger.info(f"Video converted to audio: {output_file_path}") | |
return output_file_path | |
except subprocess.CalledProcessError as e: | |
logger.error(f"ffmpeg command failed with exit code {e.returncode}") | |
logger.error(f"ffmpeg stderr: {e.stderr}") | |
logger.error(f"ffmpeg stdout: {e.stdout}") | |
# Clean up potentially empty output file | |
if os.path.exists(output_file_path): | |
os.remove(output_file_path) | |
raise RuntimeError(f"ffmpeg conversion failed: {e.stderr}") from e | |
except Exception as e: | |
logger.error(f"Error converting video '{video_file_path}': {str(e)}") | |
logger.error(traceback.format_exc()) | |
# Clean up potentially created output file | |
if 'output_file_path' in locals() and os.path.exists(output_file_path): | |
os.remove(output_file_path) | |
raise # Re-raise the exception | |
def preprocess_audio(input_audio_path): | |
"""Preprocess the audio file (e.g., normalize volume).""" | |
try: | |
logger.info(f"Preprocessing audio file: {input_audio_path}") | |
audio = AudioSegment.from_file(input_audio_path) | |
# Apply normalization (optional, adjust target dBFS as needed) | |
# Target loudness: -20 dBFS. Adjust gain based on current loudness. | |
# change_in_dBFS = -20.0 - audio.dBFS | |
# audio = audio.apply_gain(change_in_dBFS) | |
# Export to a new temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: | |
output_path = temp_file.name | |
audio.export(output_path, format="mp3") | |
logger.info(f"Audio preprocessed and saved to: {output_path}") | |
return output_path | |
except Exception as e: | |
logger.error(f"Error preprocessing audio '{input_audio_path}': {str(e)}") | |
logger.error(traceback.format_exc()) | |
# Return original path if preprocessing fails? Or raise error? | |
# Let's raise the error to signal failure clearly. | |
raise | |
# Allow more time for transcription | |
def transcribe_audio_or_video(file_input): | |
"""Transcribe an audio or video file (local path or Gradio File object).""" | |
audio_file_to_transcribe = None | |
original_input_path = None | |
temp_files_to_clean = [] | |
try: | |
model_manager.check_whisper_initialized() | |
if file_input is None: | |
logger.info("No file input provided for transcription.") | |
return "" # Return empty string for None input | |
# Determine input type and get file path | |
if isinstance(file_input, str): # Input is a path | |
original_input_path = file_input | |
logger.info(f"Processing path input: {original_input_path}") | |
if not os.path.exists(original_input_path): | |
logger.error(f"Input file path does not exist: {original_input_path}") | |
raise FileNotFoundError(f"Input file not found: {original_input_path}") | |
input_path = original_input_path | |
elif hasattr(file_input, 'name'): # Input is a Gradio File object | |
original_input_path = file_input.name | |
logger.info(f"Processing Gradio file input: {original_input_path}") | |
input_path = original_input_path # Gradio usually provides a temp path | |
else: | |
logger.error(f"Unsupported input type for transcription: {type(file_input)}") | |
raise TypeError("Invalid input type for transcription. Expected file path or Gradio File object.") | |
file_extension = os.path.splitext(input_path)[1].lower() | |
# Check if it's a video file that needs conversion | |
if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm']: | |
logger.info(f"Detected video file ({file_extension}), converting to audio...") | |
converted_audio_path = convert_video_to_audio(input_path) | |
temp_files_to_clean.append(converted_audio_path) | |
audio_file_to_process = converted_audio_path | |
elif file_extension in ['.mp3', '.wav', '.ogg', '.flac', '.m4a']: | |
logger.info(f"Detected audio file ({file_extension}).") | |
audio_file_to_process = input_path | |
else: | |
logger.error(f"Unsupported file extension for transcription: {file_extension}") | |
raise ValueError(f"Unsupported file type: {file_extension}") | |
# Preprocess the audio (optional, could be skipped if causing issues) | |
try: | |
preprocessed_audio_path = preprocess_audio(audio_file_to_process) | |
# If preprocessing creates a new file different from the input, add it to cleanup | |
if preprocessed_audio_path != audio_file_to_process: | |
temp_files_to_clean.append(preprocessed_audio_path) | |
audio_file_to_transcribe = preprocessed_audio_path | |
except Exception as preprocess_err: | |
logger.warning(f"Audio preprocessing failed: {preprocess_err}. Using original/converted audio.") | |
audio_file_to_transcribe = audio_file_to_process # Fallback | |
logger.info(f"Transcribing audio file: {audio_file_to_transcribe}") | |
if not os.path.exists(audio_file_to_transcribe): | |
raise FileNotFoundError(f"Audio file to transcribe not found: {audio_file_to_transcribe}") | |
# Perform transcription | |
with torch.inference_mode(): # Ensure inference mode for efficiency | |
# Use fp16 if available on CUDA | |
use_fp16 = torch.cuda.is_available() | |
result = model_manager.whisper_model.transcribe( | |
audio_file_to_transcribe, | |
fp16=use_fp16 | |
) | |
if not result: | |
raise RuntimeError("Transcription failed to produce results") | |
transcription = result.get("text", "Error: Transcription result empty") | |
# Limit transcription length shown in logs | |
log_transcription = (transcription[:100] + '...') if len(transcription) > 100 else transcription | |
logger.info(f"Transcription completed: {log_transcription}") | |
return transcription | |
except FileNotFoundError as e: | |
logger.error(f"File not found error during transcription: {e}") | |
return f"Error: Input file not found ({e})" | |
except ValueError as e: | |
logger.error(f"Value error during transcription: {e}") | |
return f"Error: Unsupported file type ({e})" | |
except TypeError as e: | |
logger.error(f"Type error during transcription setup: {e}") | |
return f"Error: Invalid input provided ({e})" | |
except RuntimeError as e: | |
logger.error(f"Runtime error during transcription: {e}") | |
logger.error(traceback.format_exc()) | |
return f"Error during processing: {e}" | |
except Exception as e: | |
logger.error(f"Unexpected error during transcription: {str(e)}") | |
logger.error(traceback.format_exc()) | |
return f"Error processing the file: An unexpected error occurred." | |
finally: | |
# Clean up all temporary files created during the process | |
for temp_file in temp_files_to_clean: | |
try: | |
if os.path.exists(temp_file): | |
os.remove(temp_file) | |
logger.info(f"Cleaned up temporary file: {temp_file}") | |
except Exception as e: | |
logger.warning(f"Could not remove temporary file {temp_file}: {str(e)}") | |
# Optionally reset models if idle (might be too aggressive here) | |
# model_manager.reset_models() | |
def read_document(document_path): | |
"""Read the content of a document (PDF, DOCX, XLSX, CSV).""" | |
try: | |
logger.info(f"Reading document: {document_path}") | |
if not os.path.exists(document_path): | |
raise FileNotFoundError(f"Document not found: {document_path}") | |
file_extension = os.path.splitext(document_path)[1].lower() | |
if file_extension == ".pdf": | |
doc = fitz.open(document_path) | |
text = "\n".join([page.get_text() for page in doc]) | |
doc.close() | |
return text | |
elif file_extension == ".docx": | |
doc = docx.Document(document_path) | |
return "\n".join([paragraph.text for paragraph in doc.paragraphs]) | |
elif file_extension in (".xlsx", ".xls"): | |
# Read all sheets and combine | |
xls = pd.ExcelFile(document_path) | |
text = "" | |
for sheet_name in xls.sheet_names: | |
df = pd.read_excel(xls, sheet_name=sheet_name) | |
text += f"--- Sheet: {sheet_name} ---\n{df.to_string()}\n\n" | |
return text.strip() | |
elif file_extension == ".csv": | |
# Try detecting separator | |
try: | |
df = pd.read_csv(document_path) | |
except pd.errors.ParserError: | |
logger.warning(f"Could not parse CSV {document_path} with default comma separator, trying semicolon.") | |
df = pd.read_csv(document_path, sep=';') | |
return df.to_string() | |
else: | |
logger.warning(f"Unsupported document type: {file_extension}") | |
return "Unsupported file type. Please upload a PDF, DOCX, XLSX or CSV document." | |
except FileNotFoundError as e: | |
logger.error(f"Error reading document: {e}") | |
return f"Error: Document file not found at {document_path}" | |
except Exception as e: | |
logger.error(f"Error reading document {document_path}: {str(e)}") | |
logger.error(traceback.format_exc()) | |
return f"Error reading document: {str(e)}" | |
def read_url(url): | |
"""Read the main textual content of a URL.""" | |
if not url or not url.strip().startswith('http'): | |
logger.info(f"Invalid or empty URL provided: '{url}'") | |
return "" # Return empty for invalid or empty URLs | |
try: | |
logger.info(f"Reading URL: {url}") | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
# Increased timeout | |
response = requests.get(url, headers=headers, timeout=20, allow_redirects=True) | |
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) | |
# Check content type - proceed only if likely HTML/text | |
content_type = response.headers.get('content-type', '').lower() | |
if not ('html' in content_type or 'text' in content_type): | |
logger.warning(f"URL {url} has non-text content type: {content_type}. Skipping.") | |
return f"Error: URL content type ({content_type}) is not text/html." | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Remove non-content elements like scripts, styles, nav, footers etc. | |
for element in soup(["script", "style", "meta", "noscript", "iframe", "header", "footer", "nav", "aside", "form", "button"]): | |
element.extract() | |
# Attempt to find main content area (common tags/attributes) | |
main_content = ( | |
soup.find("main") or | |
soup.find("article") or | |
soup.find("div", class_=["content", "main", "post-content", "entry-content", "article-body"]) or | |
soup.find("div", id=["content", "main", "article"]) | |
) | |
if main_content: | |
text = main_content.get_text(separator='\n', strip=True) | |
else: | |
# Fallback to body if no specific main content found | |
body = soup.find("body") | |
if body: | |
text = body.get_text(separator='\n', strip=True) | |
else: # Very basic fallback | |
text = soup.get_text(separator='\n', strip=True) | |
# Clean up whitespace: replace multiple newlines/spaces with single ones | |
text = '\n'.join([line.strip() for line in text.split('\n') if line.strip()]) | |
text = ' '.join(text.split()) # Consolidate spaces within lines | |
if not text: | |
logger.warning(f"Could not extract meaningful text from URL: {url}") | |
return "Error: Could not extract text content from URL." | |
# Limit content size to avoid overwhelming the LLM | |
max_chars = 15000 | |
if len(text) > max_chars: | |
logger.info(f"URL content truncated to {max_chars} characters.") | |
text = text[:max_chars] + "... [content truncated]" | |
return text | |
except requests.exceptions.RequestException as e: | |
logger.error(f"Error fetching URL {url}: {str(e)}") | |
return f"Error reading URL: Could not fetch content ({e})" | |
except Exception as e: | |
logger.error(f"Error parsing URL {url}: {str(e)}") | |
logger.error(traceback.format_exc()) | |
return f"Error reading URL: Could not parse content ({e})" | |
def process_social_media_url(url): | |
"""Process a social media URL, attempting to get text and transcribe video/audio.""" | |
if not url or not url.strip().startswith('http'): | |
logger.info(f"Invalid or empty social media URL: '{url}'") | |
return None | |
logger.info(f"Processing social media URL: {url}") | |
text_content = None | |
video_transcription = None | |
error_occurred = False | |
# 1. Try extracting text content using read_url (might work for some platforms/posts) | |
try: | |
text_content = read_url(url) | |
if text_content and text_content.startswith("Error:"): | |
logger.warning(f"Failed to read text content from social URL {url}: {text_content}") | |
text_content = None # Reset if it was an error message | |
except Exception as e: | |
logger.error(f"Error reading text content from social URL {url}: {e}") | |
error_occurred = True | |
# 2. Try downloading and transcribing potential video/audio content | |
downloaded_audio_path = None | |
try: | |
downloaded_audio_path = download_social_media_video(url) | |
if downloaded_audio_path: | |
logger.info(f"Audio downloaded from {url}, proceeding to transcription.") | |
video_transcription = transcribe_audio_or_video(downloaded_audio_path) | |
if video_transcription and video_transcription.startswith("Error"): | |
logger.warning(f"Transcription failed for audio from {url}: {video_transcription}") | |
video_transcription = None # Reset if it was an error | |
else: | |
logger.info(f"No downloadable audio/video found or download failed for URL: {url}") | |
except Exception as e: | |
logger.error(f"Error processing video content from social URL {url}: {e}") | |
logger.error(traceback.format_exc()) | |
error_occurred = True | |
finally: | |
# Clean up downloaded file if it exists | |
if downloaded_audio_path and os.path.exists(downloaded_audio_path): | |
try: | |
os.remove(downloaded_audio_path) | |
logger.info(f"Cleaned up downloaded audio: {downloaded_audio_path}") | |
except Exception as e: | |
logger.warning(f"Failed to cleanup downloaded audio {downloaded_audio_path}: {e}") | |
# Return results only if some content was found or no critical error occurred | |
if text_content or video_transcription or not error_occurred: | |
return { | |
"text": text_content or "", # Ensure string type | |
"video": video_transcription or "" # Ensure string type | |
} | |
else: | |
logger.error(f"Failed to process social media URL {url} completely.") | |
return None # Indicate failure | |
# Allow more time for generation | |
def generate_news(instructions, facts, size, tone, *args): | |
"""Generate a news article based on provided data using an LLM.""" | |
request_start_time = time.time() | |
logger.info("Received request to generate news.") | |
try: | |
# Ensure size is integer | |
try: | |
size = int(size) if size else 250 # Default size if None/empty | |
except ValueError: | |
logger.warning(f"Invalid size value '{size}', defaulting to 250.") | |
size = 250 | |
# Check if models are initialized, load if necessary | |
model_manager.check_llm_initialized() # LLM is essential | |
# Whisper might be needed later, check/load if audio sources exist | |
# --- Argument Parsing --- | |
# The order *must* match the order components are added to inputs_list in create_demo | |
# Fixed inputs: instructions, facts, size, tone (already passed directly) | |
# Dynamic inputs from *args: | |
# Expected order in *args based on create_demo: | |
# 5 Documents, 15 Audio-related, 5 URLs, 9 Social-related | |
num_docs = 5 | |
num_audio_sources = 5 | |
num_audio_inputs_per_source = 3 | |
num_urls = 5 | |
num_social_sources = 3 | |
num_social_inputs_per_source = 3 | |
total_expected_args = num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls + (num_social_sources * num_social_inputs_per_source) | |
args_list = list(args) | |
# Pad args_list with None if fewer arguments were received than expected | |
args_list.extend([None] * (total_expected_args - len(args_list))) | |
# Slice arguments based on the expected order | |
doc_files = args_list[0:num_docs] | |
audio_inputs_flat = args_list[num_docs : num_docs + (num_audio_sources * num_audio_inputs_per_source)] | |
url_inputs = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) : num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls] | |
social_inputs_flat = args_list[num_docs + (num_audio_sources * num_audio_inputs_per_source) + num_urls : total_expected_args] | |
knowledge_base = { | |
"instructions": instructions or "No specific instructions provided.", | |
"facts": facts or "No specific facts provided.", | |
"document_content": [], | |
"audio_data": [], | |
"url_content": [], | |
"social_content": [] | |
} | |
raw_transcriptions = "" # Initialize transcription log | |
# --- Process Inputs --- | |
logger.info("Processing document inputs...") | |
for i, doc_file in enumerate(doc_files): | |
if doc_file and hasattr(doc_file, 'name'): | |
try: | |
content = read_document(doc_file.name) # doc_file.name is the temp path | |
if content and not content.startswith("Error"): | |
# Truncate long documents for the knowledge base summary | |
doc_excerpt = (content[:1000] + "... [document truncated]") if len(content) > 1000 else content | |
knowledge_base["document_content"].append(f"[Document {i+1} Source: {os.path.basename(doc_file.name)}]\n{doc_excerpt}") | |
else: | |
logger.warning(f"Skipping document {i+1} due to read error or empty content: {content}") | |
except Exception as e: | |
logger.error(f"Failed to process document {i+1} ({doc_file.name}): {e}") | |
# No cleanup needed here, Gradio handles temp file uploads | |
logger.info("Processing URL inputs...") | |
for i, url in enumerate(url_inputs): | |
if url and isinstance(url, str) and url.strip().startswith('http'): | |
try: | |
content = read_url(url) | |
if content and not content.startswith("Error"): | |
# Content is already truncated in read_url if needed | |
knowledge_base["url_content"].append(f"[URL {i+1} Source: {url}]\n{content}") | |
else: | |
logger.warning(f"Skipping URL {i+1} ({url}) due to read error or empty content: {content}") | |
except Exception as e: | |
logger.error(f"Failed to process URL {i+1} ({url}): {e}") | |
logger.info("Processing audio/video inputs...") | |
has_audio_source = False | |
for i in range(num_audio_sources): | |
start_idx = i * num_audio_inputs_per_source | |
audio_file = audio_inputs_flat[start_idx] | |
name = audio_inputs_flat[start_idx + 1] or f"Source {i+1}" | |
position = audio_inputs_flat[start_idx + 2] or "N/A" | |
if audio_file and hasattr(audio_file, 'name'): | |
# Store info for transcription later | |
knowledge_base["audio_data"].append({ | |
"file_path": audio_file.name, # Use the temp path | |
"name": name, | |
"position": position, | |
"original_filename": os.path.basename(audio_file.name) # Keep original for logs | |
}) | |
has_audio_source = True | |
logger.info(f"Added audio source {i+1}: {name} ({position}) - File: {knowledge_base['audio_data'][-1]['original_filename']}") | |
logger.info("Processing social media inputs...") | |
has_social_source = False | |
for i in range(num_social_sources): | |
start_idx = i * num_social_inputs_per_source | |
social_url = social_inputs_flat[start_idx] | |
social_name = social_inputs_flat[start_idx + 1] or f"Social Source {i+1}" | |
social_context = social_inputs_flat[start_idx + 2] or "N/A" | |
if social_url and isinstance(social_url, str) and social_url.strip().startswith('http'): | |
try: | |
logger.info(f"Processing social media URL {i+1}: {social_url}") | |
social_data = process_social_media_url(social_url) | |
if social_data: | |
knowledge_base["social_content"].append({ | |
"url": social_url, | |
"name": social_name, | |
"context": social_context, | |
"text": social_data.get("text", ""), | |
"video_transcription": social_data.get("video", "") # Store potential transcription | |
}) | |
has_social_source = True | |
logger.info(f"Added social source {i+1}: {social_name} ({social_context}) from {social_url}") | |
else: | |
logger.warning(f"Could not retrieve any content for social URL {i+1}: {social_url}") | |
except Exception as e: | |
logger.error(f"Failed to process social URL {i+1} ({social_url}): {e}") | |
# --- Transcribe Audio/Video --- | |
# Only initialize Whisper if needed | |
transcriptions_for_prompt = "" | |
if has_audio_source or any(sc.get("video_transcription") == "[NEEDS_TRANSCRIPTION]" for sc in knowledge_base["social_content"]): # Check if transcription actually needed | |
logger.info("Audio sources detected, ensuring Whisper model is ready...") | |
try: | |
model_manager.check_whisper_initialized() | |
except Exception as whisper_init_err: | |
logger.error(f"FATAL: Whisper model initialization failed: {whisper_init_err}. Cannot transcribe.") | |
# Add error message to raw transcriptions and continue without transcriptions | |
raw_transcriptions += f"[ERROR] Whisper model failed to load. Audio sources could not be transcribed: {whisper_init_err}\n\n" | |
# Optionally return an error message immediately? | |
# return f"Error: Could not initialize transcription model. {whisper_init_err}", raw_transcriptions | |
if model_manager.whisper_model: # Proceed only if whisper loaded successfully | |
logger.info("Transcribing collected audio sources...") | |
for idx, data in enumerate(knowledge_base["audio_data"]): | |
try: | |
logger.info(f"Transcribing audio source {idx+1}: {data['original_filename']} ({data['name']}, {data['position']})") | |
transcription = transcribe_audio_or_video(data["file_path"]) | |
if transcription and not transcription.startswith("Error"): | |
quote = f'"{transcription}" - {data["name"]}, {data["position"]}' | |
transcriptions_for_prompt += f"{quote}\n\n" | |
raw_transcriptions += f'[Audio/Video {idx + 1}: {data["original_filename"]} ({data["name"]}, {data["position"]})]\n"{transcription}"\n\n' | |
else: | |
logger.warning(f"Transcription failed or returned error for audio source {idx+1}: {transcription}") | |
raw_transcriptions += f'[Audio/Video {idx + 1}: {data["original_filename"]} ({data["name"]}, {data["position"]})]\n[Error during transcription: {transcription}]\n\n' | |
except Exception as e: | |
logger.error(f"Error during transcription for audio source {idx+1} ({data['original_filename']}): {e}") | |
logger.error(traceback.format_exc()) | |
raw_transcriptions += f'[Audio/Video {idx + 1}: {data["original_filename"]} ({data["name"]}, {data["position"]})]\n[Error during transcription: {e}]\n\n' | |
# Gradio handles cleanup of the uploaded temp file audio_file.name | |
logger.info("Adding social media content to prompt data...") | |
for idx, data in enumerate(knowledge_base["social_content"]): | |
source_id = f'[Social Media {idx+1}: {data["url"]} ({data["name"]}, {data["context"]})]' | |
has_content = False | |
if data["text"] and not data["text"].startswith("Error"): | |
# Truncate long text for the prompt, but keep full in knowledge base maybe? | |
text_excerpt = (data["text"][:500] + "...[text truncated]") if len(data["text"]) > 500 else data["text"] | |
social_text_prompt = f'{source_id} - Text Content:\n"{text_excerpt}"\n\n' | |
transcriptions_for_prompt += social_text_prompt # Add text content as if it were a quote/source | |
raw_transcriptions += f"{source_id}\nText Content:\n{data['text']}\n\n" # Log full text | |
has_content = True | |
if data["video_transcription"] and not data["video_transcription"].startswith("Error"): | |
social_video_prompt = f'{source_id} - Video Transcription:\n"{data["video_transcription"]}"\n\n' | |
transcriptions_for_prompt += social_video_prompt | |
raw_transcriptions += f"{source_id}\nVideo Transcription:\n{data['video_transcription']}\n\n" | |
has_content = True | |
if not has_content: | |
raw_transcriptions += f"{source_id}\n[No usable text or video transcription found]\n\n" | |
# --- Prepare Final Prompt --- | |
# Combine document and URL summaries | |
document_summary = "\n\n".join(knowledge_base["document_content"]) if knowledge_base["document_content"] else "No document content provided." | |
url_summary = "\n\n".join(knowledge_base["url_content"]) if knowledge_base["url_content"] else "No URL content provided." | |
transcription_summary = transcriptions_for_prompt if transcriptions_for_prompt else "No usable transcriptions available." | |
# Construct the prompt for the LLM | |
prompt = f"""<s>[INST] You are a professional news writer. Your task is to synthesize information from various sources into a coherent news article. | |
Primary Instructions: {knowledge_base["instructions"]} | |
Key Facts to Include: {knowledge_base["facts"]} | |
Supporting Information: | |
Document Content Summary: | |
{document_summary} | |
Web Content Summary (from URLs): | |
{url_summary} | |
Transcribed Quotes/Content (Use these directly or indirectly): | |
{transcription_summary} | |
Article Requirements: | |
- Title: Create a concise and informative title for the article. | |
- Hook: Write a compelling 15-word (approx.) hook sentence that complements the title. | |
- Body: Write the main news article body, aiming for approximately {size} words. | |
- Tone: Adopt a {tone} tone throughout the article. | |
- 5 Ws: Ensure the first paragraph addresses the core questions (Who, What, When, Where, Why). | |
- Quotes: Incorporate relevant information from the 'Transcribed Quotes/Content' section. Aim to use quotes where appropriate, but synthesize information rather than just listing quotes. Use quotation marks (" ") for direct quotes attributed correctly (e.g., based on name/position provided). | |
- Style: Adhere to a professional journalistic style. Be objective and factual. | |
- Accuracy: Do NOT invent information. Stick strictly to the provided facts, instructions, and source materials. If information is contradictory or missing, state that or omit the detail. | |
- Structure: Organize the article logically with clear paragraphs. | |
Begin the article now. [/INST] | |
Article Draft: | |
""" | |
# Log the prompt length (useful for debugging context limits) | |
logger.info(f"Generated prompt length: {len(prompt.split())} words / {len(prompt)} characters.") | |
# Avoid logging the full prompt if it's too long or contains sensitive info | |
# logger.debug(f"Generated Prompt:\n{prompt}") | |
# --- Generate News Article --- | |
logger.info("Generating news article with LLM...") | |
generation_start_time = time.time() | |
# Estimate max_new_tokens based on requested size + buffer | |
# Add buffer for title, hook, and potential verbosity | |
estimated_tokens_per_word = 1.5 | |
max_new_tokens = int(size * estimated_tokens_per_word + 150) # size words + buffer | |
# Ensure max_new_tokens doesn't exceed model limits (adjust based on model's max context) | |
model_max_length = 2048 # Typical for TinyLlama, but check specific model card | |
# Calculate available space for generation | |
# Note: This token count is approximate. Precise tokenization is needed for accuracy. | |
# prompt_tokens = len(model_manager.tokenizer.encode(prompt)) # More accurate but slower | |
prompt_tokens_estimate = len(prompt) // 3 # Rough estimate | |
max_new_tokens = min(max_new_tokens, model_max_length - prompt_tokens_estimate - 50) # Leave buffer | |
max_new_tokens = max(max_new_tokens, 100) # Ensure at least a minimum generation length | |
logger.info(f"Requesting max_new_tokens: {max_new_tokens}") | |
try: | |
# Generate using the pipeline | |
outputs = model_manager.text_pipeline( | |
prompt, | |
max_new_tokens=max_new_tokens, # Use max_new_tokens instead of max_length | |
do_sample=True, | |
temperature=0.7, # Standard temperature for creative but factual | |
top_p=0.95, | |
top_k=50, # Consider adding top_k | |
repetition_penalty=1.15, # Adjusted penalty | |
pad_token_id=model_manager.tokenizer.eos_token_id, | |
num_return_sequences=1 | |
) | |
# Extract generated text | |
generated_text = outputs[0]['generated_text'] | |
# Clean up the result by removing the prompt | |
# Find the end of the prompt marker [/INST] and take text after it | |
inst_marker = "[/INST]" | |
marker_pos = generated_text.find(inst_marker) | |
if marker_pos != -1: | |
news_article = generated_text[marker_pos + len(inst_marker):].strip() | |
# Further clean potentially leading "Article Draft:" if model included it | |
if news_article.startswith("Article Draft:"): | |
news_article = news_article[len("Article Draft:"):].strip() | |
else: | |
# Fallback: Try removing the input prompt string itself (less reliable) | |
if prompt in generated_text: | |
news_article = generated_text.replace(prompt, "", 1).strip() | |
else: | |
# If prompt not found exactly, assume the output is only the generation | |
# This might happen if the pipeline handles prompt removal internally sometimes | |
news_article = generated_text | |
logger.warning("Prompt marker '[/INST]' not found in LLM output. Returning full output.") | |
generation_time = time.time() - generation_start_time | |
logger.info(f"News generation completed in {generation_time:.2f} seconds. Output length: {len(news_article)} characters.") | |
except torch.cuda.OutOfMemoryError as oom_error: | |
logger.error(f"CUDA Out of Memory error during LLM generation: {oom_error}") | |
logger.error(traceback.format_exc()) | |
model_manager.reset_models(force=True) # Attempt to recover | |
raise RuntimeError("Generation failed due to insufficient GPU memory. Please try reducing article size or complexity.") from oom_error | |
except Exception as gen_error: | |
logger.error(f"Error during text generation pipeline: {str(gen_error)}") | |
logger.error(traceback.format_exc()) | |
raise RuntimeError(f"LLM generation failed: {gen_error}") from gen_error | |
total_time = time.time() - request_start_time | |
logger.info(f"Total request processing time: {total_time:.2f} seconds.") | |
# Return the generated article and the log of raw transcriptions | |
return news_article, raw_transcriptions.strip() | |
except Exception as e: | |
total_time = time.time() - request_start_time | |
logger.error(f"Error in generate_news function after {total_time:.2f} seconds: {str(e)}") | |
logger.error(traceback.format_exc()) | |
# Attempt to reset models to recover state if possible | |
try: | |
model_manager.reset_models(force=True) | |
except Exception as reset_error: | |
logger.error(f"Failed to reset models after error: {str(reset_error)}") | |
# Return error messages to the UI | |
error_message = f"Error generating the news article: {str(e)}" | |
transcription_log = raw_transcriptions.strip() + f"\n\n[ERROR] News generation failed: {str(e)}" | |
return error_message, transcription_log | |
def create_demo(): | |
"""Creates the Gradio interface""" | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π° NewsIA - AI News Generator") | |
gr.Markdown("Create professional news articles from multiple information sources.") | |
# Store all input components for easy access/reset | |
all_inputs = [] | |
with gr.Row(): | |
with gr.Column(scale=2): | |
instructions = gr.Textbox( | |
label="Instructions for the News Article", | |
placeholder="Enter specific instructions for generating your news article (e.g., focus on the economic impact)", | |
lines=2, | |
value="" | |
) | |
all_inputs.append(instructions) | |
facts = gr.Textbox( | |
label="Main Facts", | |
placeholder="Describe the most important facts the news should include (e.g., Event name, date, location, key people involved)", | |
lines=4, | |
value="" | |
) | |
all_inputs.append(facts) | |
with gr.Row(): | |
size_slider = gr.Slider( | |
label="Approximate Length (words)", | |
minimum=100, | |
maximum=700, # Increased max size | |
value=250, | |
step=50 | |
) | |
all_inputs.append(size_slider) | |
tone_dropdown = gr.Dropdown( | |
label="Tone of the News Article", | |
choices=["neutral", "serious", "formal", "urgent", "investigative", "human-interest", "lighthearted"], | |
value="neutral" | |
) | |
all_inputs.append(tone_dropdown) | |
with gr.Column(scale=3): | |
with gr.Tabs(): | |
with gr.TabItem("π Documents"): | |
gr.Markdown("Upload relevant documents (PDF, DOCX, XLSX, CSV). Max 5.") | |
doc_inputs = [] | |
for i in range(1, 6): | |
doc_file = gr.File( | |
label=f"Document {i}", | |
file_types=["pdf", ".docx", ".xlsx", ".csv"], # Explicit extensions for clarity | |
file_count="single" # Ensure single file per component | |
) | |
doc_inputs.append(doc_file) | |
all_inputs.extend(doc_inputs) | |
with gr.TabItem("π Audio/Video"): | |
gr.Markdown("Upload audio or video files for transcription (MP3, WAV, MP4, MOV, etc.). Max 5 sources.") | |
audio_video_inputs = [] | |
for i in range(1, 6): | |
with gr.Group(): | |
gr.Markdown(f"**Source {i}**") | |
audio_file = gr.File( | |
label=f"Audio/Video File {i}", | |
file_types=["audio", "video"] | |
) | |
with gr.Row(): | |
speaker_name = gr.Textbox( | |
label="Speaker Name", | |
placeholder="Name of the interviewee or speaker", | |
value="" | |
) | |
speaker_role = gr.Textbox( | |
label="Role/Position", | |
placeholder="Speaker's title or role", | |
value="" | |
) | |
audio_video_inputs.append(audio_file) | |
audio_video_inputs.append(speaker_name) | |
audio_video_inputs.append(speaker_role) | |
all_inputs.extend(audio_video_inputs) | |
with gr.TabItem("π URLs"): | |
gr.Markdown("Add URLs to relevant web pages or articles. Max 5.") | |
url_inputs = [] | |
for i in range(1, 6): | |
url_textbox = gr.Textbox( | |
label=f"URL {i}", | |
placeholder="https://example.com/article", | |
value="" | |
) | |
url_inputs.append(url_textbox) | |
all_inputs.extend(url_inputs) | |
with gr.TabItem("π± Social Media"): | |
gr.Markdown("Add URLs to social media posts (e.g., Twitter, YouTube, TikTok). Max 3.") | |
social_inputs = [] | |
for i in range(1, 4): | |
with gr.Group(): | |
gr.Markdown(f"**Social Media Source {i}**") | |
social_url_textbox = gr.Textbox( | |
label=f"Post URL", | |
placeholder="https://twitter.com/user/status/...", | |
value="" | |
) | |
with gr.Row(): | |
social_name_textbox = gr.Textbox( | |
label=f"Account Name/User", | |
placeholder="Name or handle (e.g., @username)", | |
value="" | |
) | |
social_context_textbox = gr.Textbox( | |
label=f"Context", | |
placeholder="Brief context (e.g., statement on event X)", | |
value="" | |
) | |
social_inputs.append(social_url_textbox) | |
social_inputs.append(social_name_textbox) | |
social_inputs.append(social_context_textbox) | |
all_inputs.extend(social_inputs) | |
with gr.Row(): | |
generate_button = gr.Button("β¨ Generate News Article", variant="primary") | |
clear_button = gr.Button("π Clear All Inputs") | |
with gr.Tabs(): | |
with gr.TabItem("π Generated News Article"): | |
news_output = gr.Textbox( | |
label="Draft News Article", | |
lines=20, # Increased lines | |
show_copy_button=True, | |
value="" | |
) | |
with gr.TabItem("ποΈ Source Transcriptions & Logs"): | |
transcriptions_output = gr.Textbox( | |
label="Transcriptions and Processing Log", | |
lines=15, # Increased lines | |
show_copy_button=True, | |
value="" | |
) | |
# --- Event Handlers --- | |
# Define outputs | |
outputs_list = [news_output, transcriptions_output] | |
# Generate button click | |
generate_button.click( | |
fn=generate_news, | |
inputs=all_inputs, # Pass the consolidated list | |
outputs=outputs_list | |
) | |
# Clear button click | |
def clear_all_inputs_and_outputs(): | |
# Return a list of default values matching the number and type of inputs + outputs | |
reset_values = [] | |
for input_comp in all_inputs: | |
# Default for Textbox, Dropdown is "", for Slider is its default, for File is None | |
if isinstance(input_comp, (gr.Textbox, gr.Dropdown)): | |
reset_values.append("") | |
elif isinstance(input_comp, gr.Slider): | |
# Find the original default value if needed, or just use a sensible default | |
reset_values.append(250) # Reset slider to default | |
elif isinstance(input_comp, gr.File): | |
reset_values.append(None) | |
else: | |
reset_values.append(None) # Default for unknown/other types | |
# Add default values for the output fields | |
reset_values.extend(["", ""]) # Two Textbox outputs | |
# Also reset the models in the background | |
model_manager.reset_models(force=True) | |
logger.info("UI cleared and models reset.") | |
return reset_values | |
clear_button.click( | |
fn=clear_all_inputs_and_outputs, | |
inputs=None, # No inputs needed for the clear function itself | |
outputs=all_inputs + outputs_list # The list of components to clear | |
) | |
# Add event handler to reset models when the Gradio app closes or reloads (if possible) | |
# demo.unload(model_manager.reset_models, inputs=None, outputs=None) # Might not work reliably in Spaces | |
return demo | |
if __name__ == "__main__": | |
logger.info("Starting NewsIA application...") | |
# Optional: Pre-initialize Whisper on startup if desired and resources allow | |
# This can make the first transcription faster but uses GPU resources immediately. | |
# Consider enabling only if transcriptions are very common. | |
# try: | |
# logger.info("Attempting to pre-initialize Whisper model...") | |
# model_manager.initialize_whisper() | |
# except Exception as e: | |
# logger.warning(f"Pre-initialization of Whisper model failed (will load on demand): {str(e)}") | |
# Create the Gradio Demo | |
news_demo = create_demo() | |
# Configure the queue - remove concurrency_count and max_size | |
# Use default queue settings, suitable for most Spaces environments | |
news_demo.queue() | |
# Launch the Gradio app | |
logger.info("Launching Gradio interface...") | |
news_demo.launch( | |
server_name="0.0.0.0", # Necessary for Docker/Spaces | |
server_port=7860, | |
# share=True # Share=True is often handled by Spaces automatically, can be removed | |
# debug=True # Enable for more detailed Gradio logs if needed | |
) | |
logger.info("NewsIA application finished.") |