Spaces:

hymarog1
/

LegalDoc

Sleeping

App Files Files Community

hymarog1 commited on Apr 30

Commit

812a461

verified ·

1 Parent(s): 7c51ba0

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -36

app.py CHANGED Viewed

@@ -707,7 +707,7 @@ if prompt:
 # 📚 Imports
 import evaluate
 from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
-from sklearn.metrics import f1_score
 # 📌 Load Evaluators Once
 @st.cache_resource
@@ -725,41 +725,13 @@ def evaluate_summary(generated_summary, ground_truth_summary):
     bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
     return rouge_result, bert_result
-def exact_match(prediction, ground_truth):
-    return int(prediction.strip().lower() == ground_truth.strip().lower())
 def compute_bleu(prediction, ground_truth):
     reference = [ground_truth.strip().split()]
     candidate = prediction.strip().split()
     smoothie = SmoothingFunction().method4
     return sentence_bleu(reference, candidate, smoothing_function=smoothie)
-def compute_f1(prediction, ground_truth):
-    """Compute F1 score based on token overlap, like in QA evaluation."""
-    pred_tokens = prediction.strip().lower().split()
-    gt_tokens = ground_truth.strip().lower().split()
-    common_tokens = set(pred_tokens) & set(gt_tokens)
-    num_common = len(common_tokens)
-    if num_common == 0:
-        return 0.0
-    precision = num_common / len(pred_tokens)
-    recall = num_common / len(gt_tokens)
-    f1 = 2 * (precision * recall) / (precision + recall)
-    return f1
-def evaluate_additional_metrics(prediction, ground_truth):
-    em = exact_match(prediction, ground_truth)
-    bleu = compute_bleu(prediction, ground_truth)
-    f1 = compute_f1(prediction, ground_truth)
-    return {
-        "Exact Match": em,
-        "BLEU Score": bleu,
-        "F1 Score": f1
-    }
 # 📥 Upload and Evaluate
 ground_truth_summary_file = st.file_uploader("📄 Upload Ground Truth Summary (.txt)", type=["txt"])
@@ -779,18 +751,16 @@ if ground_truth_summary_file:
         st.write("🔹 BERTScore:")
         st.json(bert_result)
-        # Evaluate and Display Exact Match, BLEU, F1
-        additional_metrics = evaluate_additional_metrics(prediction, ground_truth_summary)
-        st.subheader("🔎 Additional Evaluation Metrics")
-        st.json(additional_metrics)
     else:
         st.warning("⚠️ Please generate a summary first by uploading a document.")
 ######################################################################################################################

 # 📚 Imports
 import evaluate
 from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
+import streamlit as st
 # 📌 Load Evaluators Once
 @st.cache_resource
     bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
     return rouge_result, bert_result
 def compute_bleu(prediction, ground_truth):
+    """Compute BLEU score for summaries."""
     reference = [ground_truth.strip().split()]
     candidate = prediction.strip().split()
     smoothie = SmoothingFunction().method4
     return sentence_bleu(reference, candidate, smoothing_function=smoothie)
 # 📥 Upload and Evaluate
 ground_truth_summary_file = st.file_uploader("📄 Upload Ground Truth Summary (.txt)", type=["txt"])
         st.write("🔹 BERTScore:")
         st.json(bert_result)
+        # Compute and Display BLEU Score
+        bleu = compute_bleu(prediction, ground_truth_summary)
+        st.subheader("🔵 BLEU Score")
+        st.write(f"BLEU Score: {bleu:.4f}")
     else:
         st.warning("⚠️ Please generate a summary first by uploading a document.")
 ######################################################################################################################