hymarog1 commited on
Commit
812a461
Β·
verified Β·
1 Parent(s): 7c51ba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -36
app.py CHANGED
@@ -707,7 +707,7 @@ if prompt:
707
  # πŸ“š Imports
708
  import evaluate
709
  from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
710
- from sklearn.metrics import f1_score
711
 
712
  # πŸ“Œ Load Evaluators Once
713
  @st.cache_resource
@@ -725,41 +725,13 @@ def evaluate_summary(generated_summary, ground_truth_summary):
725
  bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
726
  return rouge_result, bert_result
727
 
728
- def exact_match(prediction, ground_truth):
729
- return int(prediction.strip().lower() == ground_truth.strip().lower())
730
-
731
  def compute_bleu(prediction, ground_truth):
 
732
  reference = [ground_truth.strip().split()]
733
  candidate = prediction.strip().split()
734
  smoothie = SmoothingFunction().method4
735
  return sentence_bleu(reference, candidate, smoothing_function=smoothie)
736
 
737
- def compute_f1(prediction, ground_truth):
738
- """Compute F1 score based on token overlap, like in QA evaluation."""
739
- pred_tokens = prediction.strip().lower().split()
740
- gt_tokens = ground_truth.strip().lower().split()
741
-
742
- common_tokens = set(pred_tokens) & set(gt_tokens)
743
- num_common = len(common_tokens)
744
-
745
- if num_common == 0:
746
- return 0.0
747
-
748
- precision = num_common / len(pred_tokens)
749
- recall = num_common / len(gt_tokens)
750
- f1 = 2 * (precision * recall) / (precision + recall)
751
- return f1
752
-
753
- def evaluate_additional_metrics(prediction, ground_truth):
754
- em = exact_match(prediction, ground_truth)
755
- bleu = compute_bleu(prediction, ground_truth)
756
- f1 = compute_f1(prediction, ground_truth)
757
- return {
758
- "Exact Match": em,
759
- "BLEU Score": bleu,
760
- "F1 Score": f1
761
- }
762
-
763
  # πŸ“₯ Upload and Evaluate
764
  ground_truth_summary_file = st.file_uploader("πŸ“„ Upload Ground Truth Summary (.txt)", type=["txt"])
765
 
@@ -779,18 +751,16 @@ if ground_truth_summary_file:
779
  st.write("πŸ”Ή BERTScore:")
780
  st.json(bert_result)
781
 
782
- # Evaluate and Display Exact Match, BLEU, F1
783
- additional_metrics = evaluate_additional_metrics(prediction, ground_truth_summary)
784
- st.subheader("πŸ”Ž Additional Evaluation Metrics")
785
- st.json(additional_metrics)
786
 
787
  else:
788
  st.warning("⚠️ Please generate a summary first by uploading a document.")
789
 
790
 
791
 
792
-
793
-
794
  ######################################################################################################################
795
 
796
 
 
707
  # πŸ“š Imports
708
  import evaluate
709
  from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
710
+ import streamlit as st
711
 
712
  # πŸ“Œ Load Evaluators Once
713
  @st.cache_resource
 
725
  bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
726
  return rouge_result, bert_result
727
 
 
 
 
728
  def compute_bleu(prediction, ground_truth):
729
+ """Compute BLEU score for summaries."""
730
  reference = [ground_truth.strip().split()]
731
  candidate = prediction.strip().split()
732
  smoothie = SmoothingFunction().method4
733
  return sentence_bleu(reference, candidate, smoothing_function=smoothie)
734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
  # πŸ“₯ Upload and Evaluate
736
  ground_truth_summary_file = st.file_uploader("πŸ“„ Upload Ground Truth Summary (.txt)", type=["txt"])
737
 
 
751
  st.write("πŸ”Ή BERTScore:")
752
  st.json(bert_result)
753
 
754
+ # Compute and Display BLEU Score
755
+ bleu = compute_bleu(prediction, ground_truth_summary)
756
+ st.subheader("πŸ”΅ BLEU Score")
757
+ st.write(f"BLEU Score: {bleu:.4f}")
758
 
759
  else:
760
  st.warning("⚠️ Please generate a summary first by uploading a document.")
761
 
762
 
763
 
 
 
764
  ######################################################################################################################
765
 
766