PabloTJ commited on
Commit
e5a040d
·
verified ·
1 Parent(s): fefd339

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -4,8 +4,12 @@ import re
4
  import numpy as np
5
  import pandas as pd
6
  import os
7
-
8
- # Set seed for reproducibility
 
 
 
 
9
  set_seed(42)
10
 
11
  # Define the six premium generation models:
@@ -33,30 +37,32 @@ grammar_model_names = [
33
  "hassaanik/grammar-correction-model"
34
  ]
35
 
36
- # Function to load generation pipelines on demand
37
  def load_generation_pipeline(model_name):
38
  try:
39
- return pipeline("text-generation", model=model_name)
 
 
40
  except Exception as e:
41
  print(f"Error loading generation model {model_name}: {e}")
42
  return None
43
 
44
- # Function to load grammar evaluation pipelines on demand
45
  def load_grammar_pipeline(model_name):
46
  try:
47
- return pipeline("text2text-generation", model=model_name)
 
48
  except Exception as e:
49
  print(f"Error loading grammar model {model_name}: {e}")
50
  return None
51
 
52
- # Pre-load grammar evaluators
53
  rater_models = []
54
  for model_name in grammar_model_names:
55
  p = load_grammar_pipeline(model_name)
56
  if p is not None:
57
  rater_models.append(p)
58
 
59
- # Utility functions to clean text and check for palindromes
60
  def clean_text(text):
61
  return re.sub(r'[^a-zA-Z0-9]', '', text.lower())
62
 
@@ -64,16 +70,15 @@ def is_palindrome(text):
64
  cleaned = clean_text(text)
65
  return cleaned == cleaned[::-1]
66
 
67
- # Build prompt with clear instructions to output only the palindrome.
68
  def build_prompt(lang):
69
  return (
70
  f"Instruction: Generate a single original palindrome in {lang}.\n"
71
  "Output only the palindrome. The palindrome should be a continuous text that reads the same forward and backward.\n"
72
- "Do not output any additional text or commentary.\n"
73
  "Palindrome: "
74
  )
75
 
76
- # Build prompt for grammar evaluation
77
  def grammar_prompt(pal, lang):
78
  return (
79
  f"Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. "
@@ -81,7 +86,6 @@ def grammar_prompt(pal, lang):
81
  f'"{pal}"\n'
82
  )
83
 
84
- # Extract numeric score from text output
85
  def extract_score(text):
86
  match = re.search(r"\d{1,3}", text)
87
  if match:
@@ -89,7 +93,7 @@ def extract_score(text):
89
  return min(max(score, 0), 100)
90
  return 0
91
 
92
- # Main benchmark function - runs all tests and saves CSV automatically.
93
  def run_benchmark_all():
94
  results = []
95
  for model_name in premium_models:
@@ -105,7 +109,6 @@ def run_benchmark_all():
105
  valid = is_palindrome(gen_output)
106
  cleaned_len = len(clean_text(gen_output))
107
 
108
- # Evaluate grammar using both grammar models
109
  scores = []
110
  for rater in rater_models:
111
  rprompt = grammar_prompt(gen_output, lang)
@@ -135,12 +138,11 @@ def run_benchmark_all():
135
  print(f"CSV saved to {os.path.abspath(csv_path)}")
136
  return gr.Dataframe(df), csv_path
137
 
138
- # Build the Gradio UI using a Blocks layout
139
  with gr.Blocks(title="Premium Model Palindrome Benchmark") as demo:
140
  gr.Markdown("# Premium Model Palindrome Benchmark")
141
  gr.Markdown(
142
- "This benchmark runs automatically over 6 premium text-generation models across 5 languages "
143
- "(English, German, Spanish, French, Portuguese) and saves the results to a CSV file upon completion."
144
  )
145
  with gr.Row():
146
  run_button = gr.Button("Run All Benchmarks")
 
4
  import numpy as np
5
  import pandas as pd
6
  import os
7
+ import torch
8
+
9
+ # Check if CUDA (GPU) is available
10
+ print("CUDA available:", torch.cuda.is_available())
11
+
12
+ # Set a seed for reproducibility
13
  set_seed(42)
14
 
15
  # Define the six premium generation models:
 
37
  "hassaanik/grammar-correction-model"
38
  ]
39
 
40
+ # Function to load generation pipelines, specifying GPU if available.
41
  def load_generation_pipeline(model_name):
42
  try:
43
+ # Use device=0 if GPU is available; otherwise, use CPU (device=-1)
44
+ device = 0 if torch.cuda.is_available() else -1
45
+ return pipeline("text-generation", model=model_name, device=device)
46
  except Exception as e:
47
  print(f"Error loading generation model {model_name}: {e}")
48
  return None
49
 
50
+ # Function to load grammar evaluation pipelines.
51
  def load_grammar_pipeline(model_name):
52
  try:
53
+ device = 0 if torch.cuda.is_available() else -1
54
+ return pipeline("text2text-generation", model=model_name, device=device)
55
  except Exception as e:
56
  print(f"Error loading grammar model {model_name}: {e}")
57
  return None
58
 
59
+ # Pre-load grammar evaluators.
60
  rater_models = []
61
  for model_name in grammar_model_names:
62
  p = load_grammar_pipeline(model_name)
63
  if p is not None:
64
  rater_models.append(p)
65
 
 
66
  def clean_text(text):
67
  return re.sub(r'[^a-zA-Z0-9]', '', text.lower())
68
 
 
70
  cleaned = clean_text(text)
71
  return cleaned == cleaned[::-1]
72
 
73
+ # Updated prompt: instruct output to contain only the palindrome.
74
  def build_prompt(lang):
75
  return (
76
  f"Instruction: Generate a single original palindrome in {lang}.\n"
77
  "Output only the palindrome. The palindrome should be a continuous text that reads the same forward and backward.\n"
78
+ "Do not output any additional text, commentary, or the prompt itself.\n"
79
  "Palindrome: "
80
  )
81
 
 
82
  def grammar_prompt(pal, lang):
83
  return (
84
  f"Rate from 0 to 100 how grammatically correct this palindrome is in {lang}. "
 
86
  f'"{pal}"\n'
87
  )
88
 
 
89
  def extract_score(text):
90
  match = re.search(r"\d{1,3}", text)
91
  if match:
 
93
  return min(max(score, 0), 100)
94
  return 0
95
 
96
+ # Main benchmark function that runs tests and saves CSV results.
97
  def run_benchmark_all():
98
  results = []
99
  for model_name in premium_models:
 
109
  valid = is_palindrome(gen_output)
110
  cleaned_len = len(clean_text(gen_output))
111
 
 
112
  scores = []
113
  for rater in rater_models:
114
  rprompt = grammar_prompt(gen_output, lang)
 
138
  print(f"CSV saved to {os.path.abspath(csv_path)}")
139
  return gr.Dataframe(df), csv_path
140
 
 
141
  with gr.Blocks(title="Premium Model Palindrome Benchmark") as demo:
142
  gr.Markdown("# Premium Model Palindrome Benchmark")
143
  gr.Markdown(
144
+ "This benchmark runs automatically over 6 premium text-generation models across 5 languages and saves the results "
145
+ "to a CSV file upon completion."
146
  )
147
  with gr.Row():
148
  run_button = gr.Button("Run All Benchmarks")