Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -60,7 +60,7 @@ class ModelManager:
|
|
60 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
61 |
MODEL_NAME,
|
62 |
token=HUGGINGFACE_TOKEN,
|
63 |
-
use_fast=True
|
64 |
)
|
65 |
|
66 |
if self.tokenizer.pad_token is None:
|
@@ -73,7 +73,11 @@ class ModelManager:
|
|
73 |
token=HUGGINGFACE_TOKEN,
|
74 |
device_map="auto",
|
75 |
torch_dtype=torch.float16,
|
76 |
-
low_cpu_mem_usage=True
|
|
|
|
|
|
|
|
|
77 |
)
|
78 |
|
79 |
# Create text generation pipeline
|
@@ -84,7 +88,7 @@ class ModelManager:
|
|
84 |
tokenizer=self.tokenizer,
|
85 |
torch_dtype=torch.float16,
|
86 |
device_map="auto",
|
87 |
-
max_length=
|
88 |
)
|
89 |
|
90 |
logger.info("LLM initialized successfully")
|
@@ -341,8 +345,8 @@ def generate_news(instructions, facts, size, tone, *args):
|
|
341 |
|
342 |
# Prepare data structure for inputs
|
343 |
knowledge_base = {
|
344 |
-
"instructions": instructions,
|
345 |
-
"facts": facts,
|
346 |
"document_content": [],
|
347 |
"audio_data": [],
|
348 |
"url_content": [],
|
@@ -355,6 +359,12 @@ def generate_news(instructions, facts, size, tone, *args):
|
|
355 |
num_urls = 5
|
356 |
|
357 |
# Parse arguments
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
audios = args[:num_audios]
|
359 |
social_urls = args[num_audios:num_audios+num_social_urls]
|
360 |
urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
|
@@ -371,7 +381,7 @@ def generate_news(instructions, facts, size, tone, *args):
|
|
371 |
# Process documents
|
372 |
logger.info("Processing documents...")
|
373 |
for document in documents:
|
374 |
-
if document
|
375 |
content = read_document(document.name)
|
376 |
if content and not content.startswith("Error"):
|
377 |
knowledge_base["document_content"].append(content)
|
@@ -381,7 +391,7 @@ def generate_news(instructions, facts, size, tone, *args):
|
|
381 |
for i in range(0, len(audios), 3):
|
382 |
if i+2 < len(audios): # Ensure we have complete set of 3 elements
|
383 |
audio_file, name, position = audios[i:i+3]
|
384 |
-
if audio_file
|
385 |
knowledge_base["audio_data"].append({
|
386 |
"audio": audio_file,
|
387 |
"name": name or "Unknown",
|
@@ -542,12 +552,14 @@ def create_demo():
|
|
542 |
instrucciones = gr.Textbox(
|
543 |
label="Instrucciones para la noticia",
|
544 |
placeholder="Escribe instrucciones específicas para la generación de tu noticia",
|
545 |
-
lines=2
|
|
|
546 |
)
|
547 |
hechos = gr.Textbox(
|
548 |
label="Hechos principales",
|
549 |
placeholder="Describe los hechos más importantes que debe incluir la noticia",
|
550 |
-
lines=4
|
|
|
551 |
)
|
552 |
|
553 |
with gr.Row():
|
@@ -565,80 +577,82 @@ def create_demo():
|
|
565 |
)
|
566 |
|
567 |
with gr.Column(scale=3):
|
568 |
-
|
|
|
|
|
|
|
|
|
|
|
569 |
|
570 |
with gr.Tabs():
|
571 |
with gr.TabItem("📝 Documentos"):
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
for i in range(4, 6):
|
583 |
-
inputs_list.append(None)
|
584 |
|
585 |
with gr.TabItem("🔊 Audio/Video"):
|
586 |
-
for i in range(1,
|
587 |
with gr.Group():
|
588 |
gr.Markdown(f"**Fuente {i}**")
|
589 |
file = gr.File(
|
590 |
label=f"Audio/Video {i}",
|
591 |
-
file_types=["audio", "video"]
|
|
|
592 |
)
|
593 |
with gr.Row():
|
594 |
nombre = gr.Textbox(
|
595 |
label="Nombre",
|
596 |
-
placeholder="Nombre del entrevistado"
|
|
|
597 |
)
|
598 |
cargo = gr.Textbox(
|
599 |
label="Cargo/Rol",
|
600 |
-
placeholder="Cargo o rol"
|
|
|
601 |
)
|
602 |
-
inputs_list.
|
603 |
-
|
604 |
-
|
605 |
-
for i in range(4, 6):
|
606 |
-
inputs_list.extend([None, None, None])
|
607 |
|
608 |
with gr.TabItem("🌐 URLs"):
|
609 |
-
for i in range(1,
|
610 |
url = gr.Textbox(
|
611 |
label=f"URL {i}",
|
612 |
-
placeholder="https://..."
|
|
|
613 |
)
|
614 |
inputs_list.append(url)
|
615 |
-
|
616 |
-
# Add empty inputs to match the original expected array length
|
617 |
-
for i in range(4, 6):
|
618 |
-
inputs_list.append(None)
|
619 |
|
620 |
with gr.TabItem("📱 Redes Sociales"):
|
621 |
-
for i in range(1,
|
622 |
with gr.Group():
|
623 |
gr.Markdown(f"**Red Social {i}**")
|
624 |
social_url = gr.Textbox(
|
625 |
label=f"URL",
|
626 |
-
placeholder="https://..."
|
|
|
627 |
)
|
628 |
with gr.Row():
|
629 |
social_nombre = gr.Textbox(
|
630 |
label=f"Nombre/Cuenta",
|
631 |
-
placeholder="Nombre de la persona o cuenta"
|
|
|
632 |
)
|
633 |
social_contexto = gr.Textbox(
|
634 |
label=f"Contexto",
|
635 |
-
placeholder="Contexto relevante"
|
|
|
636 |
)
|
637 |
-
inputs_list.
|
638 |
-
|
639 |
-
|
640 |
-
for i in range(3, 4):
|
641 |
-
inputs_list.extend([None, None, None])
|
642 |
|
643 |
with gr.Row():
|
644 |
generar = gr.Button("✨ Generar Noticia", variant="primary")
|
@@ -649,14 +663,16 @@ def create_demo():
|
|
649 |
noticia_output = gr.Textbox(
|
650 |
label="Borrador de la noticia",
|
651 |
lines=15,
|
652 |
-
show_copy_button=True
|
|
|
653 |
)
|
654 |
|
655 |
with gr.TabItem("🎙️ Transcripciones"):
|
656 |
transcripciones_output = gr.Textbox(
|
657 |
label="Transcripciones de fuentes",
|
658 |
lines=10,
|
659 |
-
show_copy_button=True
|
|
|
660 |
)
|
661 |
|
662 |
# Set up event handlers
|
@@ -668,16 +684,11 @@ def create_demo():
|
|
668 |
|
669 |
# Reset functionality to clear all inputs
|
670 |
def reset_all():
|
671 |
-
|
672 |
-
for _ in range(len(inputs_list)):
|
673 |
-
output.append(None)
|
674 |
-
output.append("")
|
675 |
-
output.append("")
|
676 |
-
return output
|
677 |
|
678 |
reset.click(
|
679 |
fn=reset_all,
|
680 |
-
inputs=
|
681 |
outputs=inputs_list + [noticia_output, transcripciones_output]
|
682 |
)
|
683 |
|
|
|
60 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
61 |
MODEL_NAME,
|
62 |
token=HUGGINGFACE_TOKEN,
|
63 |
+
use_fast=True
|
64 |
)
|
65 |
|
66 |
if self.tokenizer.pad_token is None:
|
|
|
73 |
token=HUGGINGFACE_TOKEN,
|
74 |
device_map="auto",
|
75 |
torch_dtype=torch.float16,
|
76 |
+
low_cpu_mem_usage=True,
|
77 |
+
# Optimizations for ZeroGPU
|
78 |
+
max_memory={0: "4GB"},
|
79 |
+
offload_folder="offload",
|
80 |
+
offload_state_dict=True
|
81 |
)
|
82 |
|
83 |
# Create text generation pipeline
|
|
|
88 |
tokenizer=self.tokenizer,
|
89 |
torch_dtype=torch.float16,
|
90 |
device_map="auto",
|
91 |
+
max_length=1024
|
92 |
)
|
93 |
|
94 |
logger.info("LLM initialized successfully")
|
|
|
345 |
|
346 |
# Prepare data structure for inputs
|
347 |
knowledge_base = {
|
348 |
+
"instructions": instructions or "",
|
349 |
+
"facts": facts or "",
|
350 |
"document_content": [],
|
351 |
"audio_data": [],
|
352 |
"url_content": [],
|
|
|
359 |
num_urls = 5
|
360 |
|
361 |
# Parse arguments
|
362 |
+
args = list(args) # Convert tuple to list for easier manipulation
|
363 |
+
|
364 |
+
# Ensure we have enough arguments
|
365 |
+
while len(args) < (num_audios + num_social_urls + num_urls + 5):
|
366 |
+
args.append("")
|
367 |
+
|
368 |
audios = args[:num_audios]
|
369 |
social_urls = args[num_audios:num_audios+num_social_urls]
|
370 |
urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
|
|
|
381 |
# Process documents
|
382 |
logger.info("Processing documents...")
|
383 |
for document in documents:
|
384 |
+
if document and hasattr(document, 'name'):
|
385 |
content = read_document(document.name)
|
386 |
if content and not content.startswith("Error"):
|
387 |
knowledge_base["document_content"].append(content)
|
|
|
391 |
for i in range(0, len(audios), 3):
|
392 |
if i+2 < len(audios): # Ensure we have complete set of 3 elements
|
393 |
audio_file, name, position = audios[i:i+3]
|
394 |
+
if audio_file and hasattr(audio_file, 'name'):
|
395 |
knowledge_base["audio_data"].append({
|
396 |
"audio": audio_file,
|
397 |
"name": name or "Unknown",
|
|
|
552 |
instrucciones = gr.Textbox(
|
553 |
label="Instrucciones para la noticia",
|
554 |
placeholder="Escribe instrucciones específicas para la generación de tu noticia",
|
555 |
+
lines=2,
|
556 |
+
value=""
|
557 |
)
|
558 |
hechos = gr.Textbox(
|
559 |
label="Hechos principales",
|
560 |
placeholder="Describe los hechos más importantes que debe incluir la noticia",
|
561 |
+
lines=4,
|
562 |
+
value=""
|
563 |
)
|
564 |
|
565 |
with gr.Row():
|
|
|
577 |
)
|
578 |
|
579 |
with gr.Column(scale=3):
|
580 |
+
# Inicializamos la lista de inputs con valores conocidos
|
581 |
+
inputs_list = []
|
582 |
+
inputs_list.append(instrucciones)
|
583 |
+
inputs_list.append(hechos)
|
584 |
+
inputs_list.append(tamaño)
|
585 |
+
inputs_list.append(tono)
|
586 |
|
587 |
with gr.Tabs():
|
588 |
with gr.TabItem("📝 Documentos"):
|
589 |
+
documentos = []
|
590 |
+
for i in range(1, 6): # Mantenemos 5 documentos como en el original
|
591 |
+
documento = gr.File(
|
592 |
+
label=f"Documento {i}",
|
593 |
+
file_types=["pdf", "docx", "xlsx", "csv"],
|
594 |
+
file_count="single",
|
595 |
+
value=None
|
596 |
+
)
|
597 |
+
documentos.append(documento)
|
598 |
+
inputs_list.append(documento)
|
|
|
|
|
599 |
|
600 |
with gr.TabItem("🔊 Audio/Video"):
|
601 |
+
for i in range(1, 6): # Mantenemos 5 fuentes como en el original
|
602 |
with gr.Group():
|
603 |
gr.Markdown(f"**Fuente {i}**")
|
604 |
file = gr.File(
|
605 |
label=f"Audio/Video {i}",
|
606 |
+
file_types=["audio", "video"],
|
607 |
+
value=None
|
608 |
)
|
609 |
with gr.Row():
|
610 |
nombre = gr.Textbox(
|
611 |
label="Nombre",
|
612 |
+
placeholder="Nombre del entrevistado",
|
613 |
+
value=""
|
614 |
)
|
615 |
cargo = gr.Textbox(
|
616 |
label="Cargo/Rol",
|
617 |
+
placeholder="Cargo o rol",
|
618 |
+
value=""
|
619 |
)
|
620 |
+
inputs_list.append(file)
|
621 |
+
inputs_list.append(nombre)
|
622 |
+
inputs_list.append(cargo)
|
|
|
|
|
623 |
|
624 |
with gr.TabItem("🌐 URLs"):
|
625 |
+
for i in range(1, 6): # Mantenemos 5 URLs como en el original
|
626 |
url = gr.Textbox(
|
627 |
label=f"URL {i}",
|
628 |
+
placeholder="https://...",
|
629 |
+
value=""
|
630 |
)
|
631 |
inputs_list.append(url)
|
|
|
|
|
|
|
|
|
632 |
|
633 |
with gr.TabItem("📱 Redes Sociales"):
|
634 |
+
for i in range(1, 4): # Mantenemos 3 redes sociales como en el original
|
635 |
with gr.Group():
|
636 |
gr.Markdown(f"**Red Social {i}**")
|
637 |
social_url = gr.Textbox(
|
638 |
label=f"URL",
|
639 |
+
placeholder="https://...",
|
640 |
+
value=""
|
641 |
)
|
642 |
with gr.Row():
|
643 |
social_nombre = gr.Textbox(
|
644 |
label=f"Nombre/Cuenta",
|
645 |
+
placeholder="Nombre de la persona o cuenta",
|
646 |
+
value=""
|
647 |
)
|
648 |
social_contexto = gr.Textbox(
|
649 |
label=f"Contexto",
|
650 |
+
placeholder="Contexto relevante",
|
651 |
+
value=""
|
652 |
)
|
653 |
+
inputs_list.append(social_url)
|
654 |
+
inputs_list.append(social_nombre)
|
655 |
+
inputs_list.append(social_contexto)
|
|
|
|
|
656 |
|
657 |
with gr.Row():
|
658 |
generar = gr.Button("✨ Generar Noticia", variant="primary")
|
|
|
663 |
noticia_output = gr.Textbox(
|
664 |
label="Borrador de la noticia",
|
665 |
lines=15,
|
666 |
+
show_copy_button=True,
|
667 |
+
value=""
|
668 |
)
|
669 |
|
670 |
with gr.TabItem("🎙️ Transcripciones"):
|
671 |
transcripciones_output = gr.Textbox(
|
672 |
label="Transcripciones de fuentes",
|
673 |
lines=10,
|
674 |
+
show_copy_button=True,
|
675 |
+
value=""
|
676 |
)
|
677 |
|
678 |
# Set up event handlers
|
|
|
684 |
|
685 |
# Reset functionality to clear all inputs
|
686 |
def reset_all():
|
687 |
+
return [""] * len(inputs_list) + ["", ""]
|
|
|
|
|
|
|
|
|
|
|
688 |
|
689 |
reset.click(
|
690 |
fn=reset_all,
|
691 |
+
inputs=None,
|
692 |
outputs=inputs_list + [noticia_output, transcripciones_output]
|
693 |
)
|
694 |
|