CamiloVega commited on
Commit
f7aec95
·
verified ·
1 Parent(s): a077e39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -55
app.py CHANGED
@@ -60,7 +60,7 @@ class ModelManager:
60
  self.tokenizer = AutoTokenizer.from_pretrained(
61
  MODEL_NAME,
62
  token=HUGGINGFACE_TOKEN,
63
- use_fast=True,
64
  )
65
 
66
  if self.tokenizer.pad_token is None:
@@ -73,7 +73,11 @@ class ModelManager:
73
  token=HUGGINGFACE_TOKEN,
74
  device_map="auto",
75
  torch_dtype=torch.float16,
76
- low_cpu_mem_usage=True
 
 
 
 
77
  )
78
 
79
  # Create text generation pipeline
@@ -84,7 +88,7 @@ class ModelManager:
84
  tokenizer=self.tokenizer,
85
  torch_dtype=torch.float16,
86
  device_map="auto",
87
- max_length=2048
88
  )
89
 
90
  logger.info("LLM initialized successfully")
@@ -341,8 +345,8 @@ def generate_news(instructions, facts, size, tone, *args):
341
 
342
  # Prepare data structure for inputs
343
  knowledge_base = {
344
- "instructions": instructions,
345
- "facts": facts,
346
  "document_content": [],
347
  "audio_data": [],
348
  "url_content": [],
@@ -355,6 +359,12 @@ def generate_news(instructions, facts, size, tone, *args):
355
  num_urls = 5
356
 
357
  # Parse arguments
 
 
 
 
 
 
358
  audios = args[:num_audios]
359
  social_urls = args[num_audios:num_audios+num_social_urls]
360
  urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
@@ -371,7 +381,7 @@ def generate_news(instructions, facts, size, tone, *args):
371
  # Process documents
372
  logger.info("Processing documents...")
373
  for document in documents:
374
- if document is not None:
375
  content = read_document(document.name)
376
  if content and not content.startswith("Error"):
377
  knowledge_base["document_content"].append(content)
@@ -381,7 +391,7 @@ def generate_news(instructions, facts, size, tone, *args):
381
  for i in range(0, len(audios), 3):
382
  if i+2 < len(audios): # Ensure we have complete set of 3 elements
383
  audio_file, name, position = audios[i:i+3]
384
- if audio_file is not None:
385
  knowledge_base["audio_data"].append({
386
  "audio": audio_file,
387
  "name": name or "Unknown",
@@ -542,12 +552,14 @@ def create_demo():
542
  instrucciones = gr.Textbox(
543
  label="Instrucciones para la noticia",
544
  placeholder="Escribe instrucciones específicas para la generación de tu noticia",
545
- lines=2
 
546
  )
547
  hechos = gr.Textbox(
548
  label="Hechos principales",
549
  placeholder="Describe los hechos más importantes que debe incluir la noticia",
550
- lines=4
 
551
  )
552
 
553
  with gr.Row():
@@ -565,80 +577,82 @@ def create_demo():
565
  )
566
 
567
  with gr.Column(scale=3):
568
- inputs_list = [instrucciones, hechos, tamaño, tono]
 
 
 
 
 
569
 
570
  with gr.Tabs():
571
  with gr.TabItem("📝 Documentos"):
572
- for i in range(1, 4): # Reduced to 3 for better UX
573
- with gr.Row():
574
- documento = gr.File(
575
- label=f"Documento {i}",
576
- file_types=["pdf", "docx", "xlsx", "csv"],
577
- file_count="single"
578
- )
579
- inputs_list.append(documento)
580
-
581
- # Add empty inputs to match the original expected array length
582
- for i in range(4, 6):
583
- inputs_list.append(None)
584
 
585
  with gr.TabItem("🔊 Audio/Video"):
586
- for i in range(1, 4): # Reduced to 3 for better UX
587
  with gr.Group():
588
  gr.Markdown(f"**Fuente {i}**")
589
  file = gr.File(
590
  label=f"Audio/Video {i}",
591
- file_types=["audio", "video"]
 
592
  )
593
  with gr.Row():
594
  nombre = gr.Textbox(
595
  label="Nombre",
596
- placeholder="Nombre del entrevistado"
 
597
  )
598
  cargo = gr.Textbox(
599
  label="Cargo/Rol",
600
- placeholder="Cargo o rol"
 
601
  )
602
- inputs_list.extend([file, nombre, cargo])
603
-
604
- # Add empty inputs to match the original expected array length
605
- for i in range(4, 6):
606
- inputs_list.extend([None, None, None])
607
 
608
  with gr.TabItem("🌐 URLs"):
609
- for i in range(1, 4): # Reduced to 3 for better UX
610
  url = gr.Textbox(
611
  label=f"URL {i}",
612
- placeholder="https://..."
 
613
  )
614
  inputs_list.append(url)
615
-
616
- # Add empty inputs to match the original expected array length
617
- for i in range(4, 6):
618
- inputs_list.append(None)
619
 
620
  with gr.TabItem("📱 Redes Sociales"):
621
- for i in range(1, 3): # Reduced to 2 for better UX
622
  with gr.Group():
623
  gr.Markdown(f"**Red Social {i}**")
624
  social_url = gr.Textbox(
625
  label=f"URL",
626
- placeholder="https://..."
 
627
  )
628
  with gr.Row():
629
  social_nombre = gr.Textbox(
630
  label=f"Nombre/Cuenta",
631
- placeholder="Nombre de la persona o cuenta"
 
632
  )
633
  social_contexto = gr.Textbox(
634
  label=f"Contexto",
635
- placeholder="Contexto relevante"
 
636
  )
637
- inputs_list.extend([social_url, social_nombre, social_contexto])
638
-
639
- # Add empty inputs to match the original expected array length
640
- for i in range(3, 4):
641
- inputs_list.extend([None, None, None])
642
 
643
  with gr.Row():
644
  generar = gr.Button("✨ Generar Noticia", variant="primary")
@@ -649,14 +663,16 @@ def create_demo():
649
  noticia_output = gr.Textbox(
650
  label="Borrador de la noticia",
651
  lines=15,
652
- show_copy_button=True
 
653
  )
654
 
655
  with gr.TabItem("🎙️ Transcripciones"):
656
  transcripciones_output = gr.Textbox(
657
  label="Transcripciones de fuentes",
658
  lines=10,
659
- show_copy_button=True
 
660
  )
661
 
662
  # Set up event handlers
@@ -668,16 +684,11 @@ def create_demo():
668
 
669
  # Reset functionality to clear all inputs
670
  def reset_all():
671
- output = []
672
- for _ in range(len(inputs_list)):
673
- output.append(None)
674
- output.append("")
675
- output.append("")
676
- return output
677
 
678
  reset.click(
679
  fn=reset_all,
680
- inputs=[],
681
  outputs=inputs_list + [noticia_output, transcripciones_output]
682
  )
683
 
 
60
  self.tokenizer = AutoTokenizer.from_pretrained(
61
  MODEL_NAME,
62
  token=HUGGINGFACE_TOKEN,
63
+ use_fast=True
64
  )
65
 
66
  if self.tokenizer.pad_token is None:
 
73
  token=HUGGINGFACE_TOKEN,
74
  device_map="auto",
75
  torch_dtype=torch.float16,
76
+ low_cpu_mem_usage=True,
77
+ # Optimizations for ZeroGPU
78
+ max_memory={0: "4GB"},
79
+ offload_folder="offload",
80
+ offload_state_dict=True
81
  )
82
 
83
  # Create text generation pipeline
 
88
  tokenizer=self.tokenizer,
89
  torch_dtype=torch.float16,
90
  device_map="auto",
91
+ max_length=1024
92
  )
93
 
94
  logger.info("LLM initialized successfully")
 
345
 
346
  # Prepare data structure for inputs
347
  knowledge_base = {
348
+ "instructions": instructions or "",
349
+ "facts": facts or "",
350
  "document_content": [],
351
  "audio_data": [],
352
  "url_content": [],
 
359
  num_urls = 5
360
 
361
  # Parse arguments
362
+ args = list(args) # Convert tuple to list for easier manipulation
363
+
364
+ # Ensure we have enough arguments
365
+ while len(args) < (num_audios + num_social_urls + num_urls + 5):
366
+ args.append("")
367
+
368
  audios = args[:num_audios]
369
  social_urls = args[num_audios:num_audios+num_social_urls]
370
  urls = args[num_audios+num_social_urls:num_audios+num_social_urls+num_urls]
 
381
  # Process documents
382
  logger.info("Processing documents...")
383
  for document in documents:
384
+ if document and hasattr(document, 'name'):
385
  content = read_document(document.name)
386
  if content and not content.startswith("Error"):
387
  knowledge_base["document_content"].append(content)
 
391
  for i in range(0, len(audios), 3):
392
  if i+2 < len(audios): # Ensure we have complete set of 3 elements
393
  audio_file, name, position = audios[i:i+3]
394
+ if audio_file and hasattr(audio_file, 'name'):
395
  knowledge_base["audio_data"].append({
396
  "audio": audio_file,
397
  "name": name or "Unknown",
 
552
  instrucciones = gr.Textbox(
553
  label="Instrucciones para la noticia",
554
  placeholder="Escribe instrucciones específicas para la generación de tu noticia",
555
+ lines=2,
556
+ value=""
557
  )
558
  hechos = gr.Textbox(
559
  label="Hechos principales",
560
  placeholder="Describe los hechos más importantes que debe incluir la noticia",
561
+ lines=4,
562
+ value=""
563
  )
564
 
565
  with gr.Row():
 
577
  )
578
 
579
  with gr.Column(scale=3):
580
+ # Inicializamos la lista de inputs con valores conocidos
581
+ inputs_list = []
582
+ inputs_list.append(instrucciones)
583
+ inputs_list.append(hechos)
584
+ inputs_list.append(tamaño)
585
+ inputs_list.append(tono)
586
 
587
  with gr.Tabs():
588
  with gr.TabItem("📝 Documentos"):
589
+ documentos = []
590
+ for i in range(1, 6): # Mantenemos 5 documentos como en el original
591
+ documento = gr.File(
592
+ label=f"Documento {i}",
593
+ file_types=["pdf", "docx", "xlsx", "csv"],
594
+ file_count="single",
595
+ value=None
596
+ )
597
+ documentos.append(documento)
598
+ inputs_list.append(documento)
 
 
599
 
600
  with gr.TabItem("🔊 Audio/Video"):
601
+ for i in range(1, 6): # Mantenemos 5 fuentes como en el original
602
  with gr.Group():
603
  gr.Markdown(f"**Fuente {i}**")
604
  file = gr.File(
605
  label=f"Audio/Video {i}",
606
+ file_types=["audio", "video"],
607
+ value=None
608
  )
609
  with gr.Row():
610
  nombre = gr.Textbox(
611
  label="Nombre",
612
+ placeholder="Nombre del entrevistado",
613
+ value=""
614
  )
615
  cargo = gr.Textbox(
616
  label="Cargo/Rol",
617
+ placeholder="Cargo o rol",
618
+ value=""
619
  )
620
+ inputs_list.append(file)
621
+ inputs_list.append(nombre)
622
+ inputs_list.append(cargo)
 
 
623
 
624
  with gr.TabItem("🌐 URLs"):
625
+ for i in range(1, 6): # Mantenemos 5 URLs como en el original
626
  url = gr.Textbox(
627
  label=f"URL {i}",
628
+ placeholder="https://...",
629
+ value=""
630
  )
631
  inputs_list.append(url)
 
 
 
 
632
 
633
  with gr.TabItem("📱 Redes Sociales"):
634
+ for i in range(1, 4): # Mantenemos 3 redes sociales como en el original
635
  with gr.Group():
636
  gr.Markdown(f"**Red Social {i}**")
637
  social_url = gr.Textbox(
638
  label=f"URL",
639
+ placeholder="https://...",
640
+ value=""
641
  )
642
  with gr.Row():
643
  social_nombre = gr.Textbox(
644
  label=f"Nombre/Cuenta",
645
+ placeholder="Nombre de la persona o cuenta",
646
+ value=""
647
  )
648
  social_contexto = gr.Textbox(
649
  label=f"Contexto",
650
+ placeholder="Contexto relevante",
651
+ value=""
652
  )
653
+ inputs_list.append(social_url)
654
+ inputs_list.append(social_nombre)
655
+ inputs_list.append(social_contexto)
 
 
656
 
657
  with gr.Row():
658
  generar = gr.Button("✨ Generar Noticia", variant="primary")
 
663
  noticia_output = gr.Textbox(
664
  label="Borrador de la noticia",
665
  lines=15,
666
+ show_copy_button=True,
667
+ value=""
668
  )
669
 
670
  with gr.TabItem("🎙️ Transcripciones"):
671
  transcripciones_output = gr.Textbox(
672
  label="Transcripciones de fuentes",
673
  lines=10,
674
+ show_copy_button=True,
675
+ value=""
676
  )
677
 
678
  # Set up event handlers
 
684
 
685
  # Reset functionality to clear all inputs
686
  def reset_all():
687
+ return [""] * len(inputs_list) + ["", ""]
 
 
 
 
 
688
 
689
  reset.click(
690
  fn=reset_all,
691
+ inputs=None,
692
  outputs=inputs_list + [noticia_output, transcripciones_output]
693
  )
694