Update app.py
Browse files
app.py
CHANGED
@@ -707,7 +707,7 @@ if prompt:
|
|
707 |
# π Imports
|
708 |
import evaluate
|
709 |
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
|
710 |
-
|
711 |
|
712 |
# π Load Evaluators Once
|
713 |
@st.cache_resource
|
@@ -725,41 +725,13 @@ def evaluate_summary(generated_summary, ground_truth_summary):
|
|
725 |
bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
|
726 |
return rouge_result, bert_result
|
727 |
|
728 |
-
def exact_match(prediction, ground_truth):
|
729 |
-
return int(prediction.strip().lower() == ground_truth.strip().lower())
|
730 |
-
|
731 |
def compute_bleu(prediction, ground_truth):
|
|
|
732 |
reference = [ground_truth.strip().split()]
|
733 |
candidate = prediction.strip().split()
|
734 |
smoothie = SmoothingFunction().method4
|
735 |
return sentence_bleu(reference, candidate, smoothing_function=smoothie)
|
736 |
|
737 |
-
def compute_f1(prediction, ground_truth):
|
738 |
-
"""Compute F1 score based on token overlap, like in QA evaluation."""
|
739 |
-
pred_tokens = prediction.strip().lower().split()
|
740 |
-
gt_tokens = ground_truth.strip().lower().split()
|
741 |
-
|
742 |
-
common_tokens = set(pred_tokens) & set(gt_tokens)
|
743 |
-
num_common = len(common_tokens)
|
744 |
-
|
745 |
-
if num_common == 0:
|
746 |
-
return 0.0
|
747 |
-
|
748 |
-
precision = num_common / len(pred_tokens)
|
749 |
-
recall = num_common / len(gt_tokens)
|
750 |
-
f1 = 2 * (precision * recall) / (precision + recall)
|
751 |
-
return f1
|
752 |
-
|
753 |
-
def evaluate_additional_metrics(prediction, ground_truth):
|
754 |
-
em = exact_match(prediction, ground_truth)
|
755 |
-
bleu = compute_bleu(prediction, ground_truth)
|
756 |
-
f1 = compute_f1(prediction, ground_truth)
|
757 |
-
return {
|
758 |
-
"Exact Match": em,
|
759 |
-
"BLEU Score": bleu,
|
760 |
-
"F1 Score": f1
|
761 |
-
}
|
762 |
-
|
763 |
# π₯ Upload and Evaluate
|
764 |
ground_truth_summary_file = st.file_uploader("π Upload Ground Truth Summary (.txt)", type=["txt"])
|
765 |
|
@@ -779,18 +751,16 @@ if ground_truth_summary_file:
|
|
779 |
st.write("πΉ BERTScore:")
|
780 |
st.json(bert_result)
|
781 |
|
782 |
-
#
|
783 |
-
|
784 |
-
st.subheader("
|
785 |
-
st.
|
786 |
|
787 |
else:
|
788 |
st.warning("β οΈ Please generate a summary first by uploading a document.")
|
789 |
|
790 |
|
791 |
|
792 |
-
|
793 |
-
|
794 |
######################################################################################################################
|
795 |
|
796 |
|
|
|
707 |
# π Imports
|
708 |
import evaluate
|
709 |
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
|
710 |
+
import streamlit as st
|
711 |
|
712 |
# π Load Evaluators Once
|
713 |
@st.cache_resource
|
|
|
725 |
bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
|
726 |
return rouge_result, bert_result
|
727 |
|
|
|
|
|
|
|
728 |
def compute_bleu(prediction, ground_truth):
|
729 |
+
"""Compute BLEU score for summaries."""
|
730 |
reference = [ground_truth.strip().split()]
|
731 |
candidate = prediction.strip().split()
|
732 |
smoothie = SmoothingFunction().method4
|
733 |
return sentence_bleu(reference, candidate, smoothing_function=smoothie)
|
734 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
735 |
# π₯ Upload and Evaluate
|
736 |
ground_truth_summary_file = st.file_uploader("π Upload Ground Truth Summary (.txt)", type=["txt"])
|
737 |
|
|
|
751 |
st.write("πΉ BERTScore:")
|
752 |
st.json(bert_result)
|
753 |
|
754 |
+
# Compute and Display BLEU Score
|
755 |
+
bleu = compute_bleu(prediction, ground_truth_summary)
|
756 |
+
st.subheader("π΅ BLEU Score")
|
757 |
+
st.write(f"BLEU Score: {bleu:.4f}")
|
758 |
|
759 |
else:
|
760 |
st.warning("β οΈ Please generate a summary first by uploading a document.")
|
761 |
|
762 |
|
763 |
|
|
|
|
|
764 |
######################################################################################################################
|
765 |
|
766 |
|