Spaces:

CamiloVega
/

NewsIA

Sleeping

App Files Files Community

CamiloVega commited on Mar 31

Commit

f7aec95

verified ·

1 Parent(s): a077e39

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -55

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ class ModelManager:
             self.tokenizer = AutoTokenizer.from_pretrained(
                 MODEL_NAME,
                 token=HUGGINGFACE_TOKEN,
-                use_fast=True,
             )
             if self.tokenizer.pad_token is None:
@@ -73,7 +73,11 @@ class ModelManager:
                 token=HUGGINGFACE_TOKEN,
                 device_map="auto",
                 torch_dtype=torch.float16,
-                low_cpu_mem_usage=True
             )
             # Create text generation pipeline
@@ -84,7 +88,7 @@ class ModelManager:
                 tokenizer=self.tokenizer,
                 torch_dtype=torch.float16,
                 device_map="auto",
-                max_length=2048
             )
             logger.info("LLM initialized successfully")
@@ -341,8 +345,8 @@ def generate_news(instructions, facts, size, tone, *args):
         # Prepare data structure for inputs
         knowledge_base = {
-            "instructions": instructions,
-            "facts": facts,
             "document_content": [],
             "audio_data": [],
             "url_content": [],
@@ -355,6 +359,12 @@ def generate_news(instructions, facts, size, tone, *args):
         num_urls = 5
         # Parse arguments
         audios = args[:num_audios]
         social_urls = args[num_audios:num_audios+num_social_urls]
         urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
@@ -371,7 +381,7 @@ def generate_news(instructions, facts, size, tone, *args):
         # Process documents
         logger.info("Processing documents...")
         for document in documents:
-            if document is not None:
                 content = read_document(document.name)
                 if content and not content.startswith("Error"):
                     knowledge_base["document_content"].append(content)
@@ -381,7 +391,7 @@ def generate_news(instructions, facts, size, tone, *args):
         for i in range(0, len(audios), 3):
             if i+2 < len(audios):  # Ensure we have complete set of 3 elements
                 audio_file, name, position = audios[i:i+3]
-                if audio_file is not None:
                     knowledge_base["audio_data"].append({
                         "audio": audio_file,
                         "name": name or "Unknown",
@@ -542,12 +552,14 @@ def create_demo():
                 instrucciones = gr.Textbox(
                     label="Instrucciones para la noticia",
                     placeholder="Escribe instrucciones específicas para la generación de tu noticia",
-                    lines=2
                 )
                 hechos = gr.Textbox(
                     label="Hechos principales",
                     placeholder="Describe los hechos más importantes que debe incluir la noticia",
-                    lines=4
                 )
                 with gr.Row():
@@ -565,80 +577,82 @@ def create_demo():
                     )
             with gr.Column(scale=3):
-                inputs_list = [instrucciones, hechos, tamaño, tono]
                 with gr.Tabs():
                     with gr.TabItem("📝 Documentos"):
-                        for i in range(1, 4):  # Reduced to 3 for better UX
-                            with gr.Row():
-                                documento = gr.File(
-                                    label=f"Documento {i}",
-                                    file_types=["pdf", "docx", "xlsx", "csv"],
-                                    file_count="single"
-                                )
-                                inputs_list.append(documento)
-                        # Add empty inputs to match the original expected array length
-                        for i in range(4, 6):
-                            inputs_list.append(None)
                     with gr.TabItem("🔊 Audio/Video"):
-                        for i in range(1, 4):  # Reduced to 3 for better UX
                             with gr.Group():
                                 gr.Markdown(f"**Fuente {i}**")
                                 file = gr.File(
                                     label=f"Audio/Video {i}",
-                                    file_types=["audio", "video"]
                                 )
                                 with gr.Row():
                                     nombre = gr.Textbox(
                                         label="Nombre",
-                                        placeholder="Nombre del entrevistado"
                                     )
                                     cargo = gr.Textbox(
                                         label="Cargo/Rol",
-                                        placeholder="Cargo o rol"
                                     )
-                                inputs_list.extend([file, nombre, cargo])
-                        # Add empty inputs to match the original expected array length
-                        for i in range(4, 6):
-                            inputs_list.extend([None, None, None])
                     with gr.TabItem("🌐 URLs"):
-                        for i in range(1, 4):  # Reduced to 3 for better UX
                             url = gr.Textbox(
                                 label=f"URL {i}",
-                                placeholder="https://..."
                             )
                             inputs_list.append(url)
-                        # Add empty inputs to match the original expected array length
-                        for i in range(4, 6):
-                            inputs_list.append(None)
                     with gr.TabItem("📱 Redes Sociales"):
-                        for i in range(1, 3):  # Reduced to 2 for better UX
                             with gr.Group():
                                 gr.Markdown(f"**Red Social {i}**")
                                 social_url = gr.Textbox(
                                     label=f"URL",
-                                    placeholder="https://..."
                                 )
                                 with gr.Row():
                                     social_nombre = gr.Textbox(
                                         label=f"Nombre/Cuenta",
-                                        placeholder="Nombre de la persona o cuenta"
                                     )
                                     social_contexto = gr.Textbox(
                                         label=f"Contexto",
-                                        placeholder="Contexto relevante"
                                     )
-                                inputs_list.extend([social_url, social_nombre, social_contexto])
-                        # Add empty inputs to match the original expected array length
-                        for i in range(3, 4):
-                            inputs_list.extend([None, None, None])
         with gr.Row():
             generar = gr.Button("✨ Generar Noticia", variant="primary")
@@ -649,14 +663,16 @@ def create_demo():
                 noticia_output = gr.Textbox(
                     label="Borrador de la noticia",
                     lines=15,
-                    show_copy_button=True
                 )
             with gr.TabItem("🎙️ Transcripciones"):
                 transcripciones_output = gr.Textbox(
                     label="Transcripciones de fuentes",
                     lines=10,
-                    show_copy_button=True
                 )
         # Set up event handlers
@@ -668,16 +684,11 @@ def create_demo():
         # Reset functionality to clear all inputs
         def reset_all():
-            output = []
-            for _ in range(len(inputs_list)):
-                output.append(None)
-            output.append("")
-            output.append("")
-            return output
         reset.click(
             fn=reset_all,
-            inputs=[],
             outputs=inputs_list + [noticia_output, transcripciones_output]
         )

             self.tokenizer = AutoTokenizer.from_pretrained(
                 MODEL_NAME,
                 token=HUGGINGFACE_TOKEN,
+                use_fast=True
             )
             if self.tokenizer.pad_token is None:
                 token=HUGGINGFACE_TOKEN,
                 device_map="auto",
                 torch_dtype=torch.float16,
+                low_cpu_mem_usage=True,
+                # Optimizations for ZeroGPU
+                max_memory={0: "4GB"},
+                offload_folder="offload",
+                offload_state_dict=True
             )
             # Create text generation pipeline
                 tokenizer=self.tokenizer,
                 torch_dtype=torch.float16,
                 device_map="auto",
+                max_length=1024
             )
             logger.info("LLM initialized successfully")
         # Prepare data structure for inputs
         knowledge_base = {
+            "instructions": instructions or "",
+            "facts": facts or "",
             "document_content": [],
             "audio_data": [],
             "url_content": [],
         num_urls = 5
         # Parse arguments
+        args = list(args)  # Convert tuple to list for easier manipulation
+        # Ensure we have enough arguments
+        while len(args) < (num_audios + num_social_urls + num_urls + 5):
+            args.append("")
         audios = args[:num_audios]
         social_urls = args[num_audios:num_audios+num_social_urls]
         urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
         # Process documents
         logger.info("Processing documents...")
         for document in documents:
+            if document and hasattr(document, 'name'):
                 content = read_document(document.name)
                 if content and not content.startswith("Error"):
                     knowledge_base["document_content"].append(content)
         for i in range(0, len(audios), 3):
             if i+2 < len(audios):  # Ensure we have complete set of 3 elements
                 audio_file, name, position = audios[i:i+3]
+                if audio_file and hasattr(audio_file, 'name'):
                     knowledge_base["audio_data"].append({
                         "audio": audio_file,
                         "name": name or "Unknown",
                 instrucciones = gr.Textbox(
                     label="Instrucciones para la noticia",
                     placeholder="Escribe instrucciones específicas para la generación de tu noticia",
+                    lines=2,
+                    value=""
                 )
                 hechos = gr.Textbox(
                     label="Hechos principales",
                     placeholder="Describe los hechos más importantes que debe incluir la noticia",
+                    lines=4,
+                    value=""
                 )
                 with gr.Row():
                     )
             with gr.Column(scale=3):
+                # Inicializamos la lista de inputs con valores conocidos
+                inputs_list = []
+                inputs_list.append(instrucciones)
+                inputs_list.append(hechos)
+                inputs_list.append(tamaño)
+                inputs_list.append(tono)
                 with gr.Tabs():
                     with gr.TabItem("📝 Documentos"):
+                        documentos = []
+                        for i in range(1, 6):  # Mantenemos 5 documentos como en el original
+                            documento = gr.File(
+                                label=f"Documento {i}",
+                                file_types=["pdf", "docx", "xlsx", "csv"],
+                                file_count="single",
+                                value=None
+                            )
+                            documentos.append(documento)
+                            inputs_list.append(documento)
                     with gr.TabItem("🔊 Audio/Video"):
+                        for i in range(1, 6):  # Mantenemos 5 fuentes como en el original
                             with gr.Group():
                                 gr.Markdown(f"**Fuente {i}**")
                                 file = gr.File(
                                     label=f"Audio/Video {i}",
+                                    file_types=["audio", "video"],
+                                    value=None
                                 )
                                 with gr.Row():
                                     nombre = gr.Textbox(
                                         label="Nombre",
+                                        placeholder="Nombre del entrevistado",
+                                        value=""
                                     )
                                     cargo = gr.Textbox(
                                         label="Cargo/Rol",
+                                        placeholder="Cargo o rol",
+                                        value=""
                                     )
+                                inputs_list.append(file)
+                                inputs_list.append(nombre)
+                                inputs_list.append(cargo)
                     with gr.TabItem("🌐 URLs"):
+                        for i in range(1, 6):  # Mantenemos 5 URLs como en el original
                             url = gr.Textbox(
                                 label=f"URL {i}",
+                                placeholder="https://...",
+                                value=""
                             )
                             inputs_list.append(url)
                     with gr.TabItem("📱 Redes Sociales"):
+                        for i in range(1, 4):  # Mantenemos 3 redes sociales como en el original
                             with gr.Group():
                                 gr.Markdown(f"**Red Social {i}**")
                                 social_url = gr.Textbox(
                                     label=f"URL",
+                                    placeholder="https://...",
+                                    value=""
                                 )
                                 with gr.Row():
                                     social_nombre = gr.Textbox(
                                         label=f"Nombre/Cuenta",
+                                        placeholder="Nombre de la persona o cuenta",
+                                        value=""
                                     )
                                     social_contexto = gr.Textbox(
                                         label=f"Contexto",
+                                        placeholder="Contexto relevante",
+                                        value=""
                                     )
+                                inputs_list.append(social_url)
+                                inputs_list.append(social_nombre)
+                                inputs_list.append(social_contexto)
         with gr.Row():
             generar = gr.Button("✨ Generar Noticia", variant="primary")
                 noticia_output = gr.Textbox(
                     label="Borrador de la noticia",
                     lines=15,
+                    show_copy_button=True,
+                    value=""
                 )
             with gr.TabItem("🎙️ Transcripciones"):
                 transcripciones_output = gr.Textbox(
                     label="Transcripciones de fuentes",
                     lines=10,
+                    show_copy_button=True,
+                    value=""
                 )
         # Set up event handlers
         # Reset functionality to clear all inputs
         def reset_all():
+            return [""] * len(inputs_list) + ["", ""]
         reset.click(
             fn=reset_all,
+            inputs=None,
             outputs=inputs_list + [noticia_output, transcripciones_output]
         )