mihalykiss commited on
Commit
9c9a516
Β·
verified Β·
1 Parent(s): a1ff36a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -55
app.py CHANGED
@@ -68,7 +68,6 @@ if tokenizer: # Only set normalizer if tokenizer loaded successfully
68
  newline_to_space,
69
  Strip()
70
  ])
71
- # --- End Model & Tokenizer Configuration ---
72
 
73
 
74
  title_md = """
@@ -78,19 +77,19 @@ title_md = """
78
 
79
  description = """
80
  <div class="app-description">
81
- <p>This tool utilizes the <b>ModernBERT</b> model to discern whether a given text is human-authored or AI-generated. It employs a soft voting ensemble of <b>three</b> models, amalgamating their outputs to enhance detection accuracy.</p>
82
  <ul class="features-list">
83
- <li><span class="icon">βœ…</span> <strong>Human Verification:</strong> Clearly identifies human-written content.</li>
84
- <li><span class="icon">πŸ”</span> <strong>Model Detection:</strong> Capable of identifying content from over 40 AI models.</li>
85
- <li><span class="icon">πŸ“ˆ</span> <strong>Accuracy:</strong> Performs optimally with more extensive text inputs.</li>
86
- <li><span class="icon">πŸ“„</span> <strong>Read more:</strong> Our methodology is detailed in our research paper:
87
- <a href="https://aclanthology.org/2025.genaidetect-1.15/" target="_blank" class="learn-more-link"><b> LINK</b></a>.
88
  </li>
89
  </ul>
90
  <p class="instruction-text">Paste your text into the field below to analyze its origin.</p>
91
  </div>
92
  """
93
- bottom_text = "<p class='footer-text'>SzegedAI</p>" # Simplified footer, as requested
94
 
95
  AI_texts = [
96
  "Camels are remarkable desert animals known for their unique adaptations to harsh, arid environments. Native to the Middle East, North Africa, and parts of Asia, camels have been essential to human life for centuries, serving as a mode of transportation, a source of food, and even a symbol of endurance and survival. There are two primary species of camels: the dromedary camel, which has a single hump and is commonly found in the Middle East and North Africa, and the Bactrian camel, which has two humps and is native to Central Asia. Their humps store fat, not water, as commonly believed, allowing them to survive long periods without food by metabolizing the stored fat for energy. Camels are highly adapted to desert life. They can go for weeks without water, and when they do drink, they can consume up to 40 gallons in one sitting. Their thick eyelashes, sealable nostrils, and wide, padded feet protect them from sand and help them walk easily on loose desert terrain.",
@@ -105,7 +104,7 @@ def classify_text_interface(text):
105
  return "<p style='text-align: center; color: var(--ai-color);'><strong>Error: Models not loaded. Please check the console.</strong></p>"
106
 
107
  cleaned_text = clean_text(text)
108
- if not cleaned_text.strip(): # Check cleaned_text here
109
  result_message = "<p style='text-align: center; color: var(--text-secondary);'>Please enter some text to analyze.</p>"
110
  return result_message
111
 
@@ -125,29 +124,25 @@ def classify_text_interface(text):
125
 
126
  ai_probs = probabilities.clone()
127
  human_label_index = -1
128
- for k, v in label_mapping.items(): # Find the human label index dynamically
129
  if v.lower() == 'human':
130
  human_label_index = k
131
  break
132
 
133
  if human_label_index != -1:
134
- ai_probs[human_label_index] = 0 # Zero out human probability for AI sum
135
  human_prob_value = probabilities[human_label_index].item() * 100
136
- else: # Fallback if 'human' not in label_mapping (should not happen with current map)
137
  human_prob_value = 0
138
  print("Warning: 'human' label not found in label_mapping.")
139
 
140
  ai_total_prob = ai_probs.sum().item() * 100
141
 
142
- # Recalculate human_prob based on ai_total_prob if necessary,
143
- # or ensure the logic correctly identifies human vs AI majority.
144
- # The original logic: human_prob = 100 - ai_total_prob might be confusing if ai_total_prob already excluded human.
145
- # Let's use the direct human probability from the model.
146
 
147
- ai_argmax_index = torch.argmax(ai_probs).item() # Argmax over non-human probabilities
148
  ai_argmax_model = label_mapping.get(ai_argmax_index, "Unknown AI")
149
 
150
- if human_prob_value > ai_total_prob : # Compare direct human probability with sum of AI probabilities
151
  result_message = (
152
  f"<p><strong>The text is</strong> <span class='highlight-human'><strong>{human_prob_value:.2f}%</strong> likely <b>Human written</b>.</span></p>"
153
  )
@@ -171,12 +166,12 @@ modern_css = """
171
  --border-color: #E0E0E0;
172
  --input-bg: #FFFFFF;
173
  --input-focus-border: var(--accent-color);
174
- --human-color: #2ECC71; /* Green */
175
  --human-bg: rgba(46, 204, 113, 0.1);
176
- --ai-color: #E74C3C; /* Red */
177
  --ai-bg: rgba(231, 76, 60, 0.1);
178
  --shadow-color: rgba(44, 62, 80, 0.1);
179
- --container-max-width: 800px; /* Increased width */
180
  --border-radius-md: 8px;
181
  --border-radius-lg: 12px;
182
  }
@@ -206,20 +201,19 @@ body {
206
  border: none;
207
  }
208
 
209
- .form.svelte-633qhp, .block.svelte-11xb1hd, .gradio-html .block { /* More generic selector for Gradio HTML block */
210
  background: none !important;
211
  border: none !important;
212
  box-shadow: none !important;
213
  padding: 0 !important;
214
  }
215
 
216
- /* Title and subtitle are now handled by Markdown with inline styles, h1 here is a fallback or for other h1s */
217
  h1 {
218
  color: var(--text-primary);
219
  font-size: clamp(24px, 5vw, 30px);
220
  font-weight: 700;
221
  text-align: center;
222
- margin-bottom: 20px; /* Adjusted default h1 margin */
223
  letter-spacing: -0.5px;
224
  }
225
 
@@ -293,7 +287,7 @@ h1 {
293
  }
294
 
295
  #result_output_box {
296
- background-color: var(--input-bg); /* Ensure background for the box */
297
  border: 1px solid var(--border-color);
298
  border-radius: var(--border-radius-md);
299
  padding: 20px;
@@ -301,15 +295,15 @@ h1 {
301
  width: 100%;
302
  box-sizing: border-box;
303
  text-align: center;
304
- font-size: clamp(16px, 3vw, 17px); /* Slightly adjusted font size for results */
305
  box-shadow: 0 4px 8px rgba(0,0,0,0.05);
306
- min-height: 80px; /* Give it some min height */
307
- display: flex; /* For centering content if needed */
308
  flex-direction: column;
309
  justify-content: center;
310
  }
311
- #result_output_box p { /* Style paragraphs inside the result box */
312
- margin-bottom: 8px; /* Space between lines in result */
313
  line-height: 1.6;
314
  }
315
  #result_output_box p:last-child {
@@ -319,22 +313,22 @@ h1 {
319
 
320
  .highlight-human, .highlight-ai {
321
  font-weight: 600;
322
- padding: 5px 10px; /* Adjusted padding */
323
  border-radius: var(--border-radius-md);
324
  display: inline-block;
325
- font-size: 1.05em; /* Adjusted size */
326
  }
327
 
328
  .highlight-human {
329
  color: var(--human-color);
330
  background-color: var(--human-bg);
331
- /* border: 1px solid var(--human-color); Removed border for cleaner look */
332
  }
333
 
334
  .highlight-ai {
335
  color: var(--ai-color);
336
  background-color: var(--ai-bg);
337
- /* border: 1px solid var(--ai-color); Removed border for cleaner look */
338
  }
339
 
340
  .tabs > div:first-child button {
@@ -359,14 +353,14 @@ h1 {
359
  border: 1px solid var(--border-color) !important;
360
  border-radius: var(--border-radius-md) !important;
361
  background-color: #fdfdfd !important;
362
- margin-top: 10px; /* Add some space above examples */
363
  }
364
  .gr-sample-textbox {
365
  border: 1px solid var(--border-color) !important;
366
  border-radius: var(--border-radius-md) !important;
367
  font-size: 14px !important;
368
  }
369
- .gr-accordion > .label-wrap button { /* Style accordion label */
370
  font-weight: 500 !important;
371
  color: var(--text-primary) !important;
372
  }
@@ -401,40 +395,36 @@ h1 {
401
  iface = gr.Blocks(css=modern_css, theme=gr.themes.Base(font=[gr.themes.GoogleFont("Inter"), "sans-serif"]))
402
 
403
  with iface:
404
- gr.Markdown(title_md) # Using combined Markdown for title and subtitle
405
  gr.Markdown(description)
406
 
407
  text_input = gr.Textbox(
408
  label="",
409
  placeholder="Type or paste your content here...",
410
  elem_id="text_input_box",
411
- lines=7 # Adjusted lines
412
  )
413
  result_output = gr.HTML(elem_id="result_output_box")
414
 
415
- # Only set up the change function if models are loaded
416
  if all([tokenizer, model_1, model_2, model_3]):
417
  text_input.change(classify_text_interface, inputs=text_input, outputs=result_output)
418
  else:
419
- # Display a persistent error if models couldn't load
420
  gr.HTML("<div id='result_output_box'><p style='color: var(--ai-color); text-align: center;'><strong>Application Error: Models could not be loaded. Please check the server console for details.</strong></p></div>")
421
 
 
 
 
 
 
 
 
422
 
423
- with gr.Row():
424
- with gr.Column(scale=1):
425
- with gr.Accordion("AI Text Examples", open=False):
426
- gr.Examples(
427
- examples=AI_texts,
428
- inputs=text_input,
429
- label="", # Label removed as accordion title is enough
430
- )
431
- with gr.Column(scale=1):
432
- with gr.Accordion("Human Text Examples", open=False):
433
- gr.Examples(
434
- examples=Human_texts,
435
- inputs=text_input,
436
- label="", # Label removed
437
- )
438
 
439
  gr.Markdown(bottom_text, elem_id="bottom_text")
440
 
 
68
  newline_to_space,
69
  Strip()
70
  ])
 
71
 
72
 
73
  title_md = """
 
77
 
78
  description = """
79
  <div class="app-description">
80
+ <p>This tool utilizes the <b>ModernBERT</b> model to decide whether a given text is human-authored or AI-generated. It employs a soft voting ensemble of <b>three</b> models to improve detection accuracy.</p>
81
  <ul class="features-list">
82
+ <li><span class="icon">βœ…</span> <strong>Human Verification: </strong> Clearly identifies human-written content.</li>
83
+ <li><span class="icon">πŸ”</span> <strong>Model Detection: </strong> Capable of identifying content from over 40 AI models.</li>
84
+ <li><span class="icon">πŸ“ˆ</span> <strong>Accuracy: </strong> Performs optimally with more extensive text inputs.</li>
85
+ <li><span class="icon">πŸ“„</span> <strong>Read more: </strong> Our methodology is detailed in our research paper:
86
+ <a href="https://aclanthology.org/2025.genaidetect-1.15/" target="_blank" class="learn-more-link"> <b> LINK </b></a>.
87
  </li>
88
  </ul>
89
  <p class="instruction-text">Paste your text into the field below to analyze its origin.</p>
90
  </div>
91
  """
92
+ bottom_text = "<p class='footer-text'>SzegedAI - Mihaly Kiss</p>"
93
 
94
  AI_texts = [
95
  "Camels are remarkable desert animals known for their unique adaptations to harsh, arid environments. Native to the Middle East, North Africa, and parts of Asia, camels have been essential to human life for centuries, serving as a mode of transportation, a source of food, and even a symbol of endurance and survival. There are two primary species of camels: the dromedary camel, which has a single hump and is commonly found in the Middle East and North Africa, and the Bactrian camel, which has two humps and is native to Central Asia. Their humps store fat, not water, as commonly believed, allowing them to survive long periods without food by metabolizing the stored fat for energy. Camels are highly adapted to desert life. They can go for weeks without water, and when they do drink, they can consume up to 40 gallons in one sitting. Their thick eyelashes, sealable nostrils, and wide, padded feet protect them from sand and help them walk easily on loose desert terrain.",
 
104
  return "<p style='text-align: center; color: var(--ai-color);'><strong>Error: Models not loaded. Please check the console.</strong></p>"
105
 
106
  cleaned_text = clean_text(text)
107
+ if not cleaned_text.strip():
108
  result_message = "<p style='text-align: center; color: var(--text-secondary);'>Please enter some text to analyze.</p>"
109
  return result_message
110
 
 
124
 
125
  ai_probs = probabilities.clone()
126
  human_label_index = -1
127
+ for k, v in label_mapping.items():
128
  if v.lower() == 'human':
129
  human_label_index = k
130
  break
131
 
132
  if human_label_index != -1:
133
+ ai_probs[human_label_index] = 0
134
  human_prob_value = probabilities[human_label_index].item() * 100
135
+ else:
136
  human_prob_value = 0
137
  print("Warning: 'human' label not found in label_mapping.")
138
 
139
  ai_total_prob = ai_probs.sum().item() * 100
140
 
 
 
 
 
141
 
142
+ ai_argmax_index = torch.argmax(ai_probs).item()
143
  ai_argmax_model = label_mapping.get(ai_argmax_index, "Unknown AI")
144
 
145
+ if human_prob_value > ai_total_prob :
146
  result_message = (
147
  f"<p><strong>The text is</strong> <span class='highlight-human'><strong>{human_prob_value:.2f}%</strong> likely <b>Human written</b>.</span></p>"
148
  )
 
166
  --border-color: #E0E0E0;
167
  --input-bg: #FFFFFF;
168
  --input-focus-border: var(--accent-color);
169
+ --human-color: #2ECC71;
170
  --human-bg: rgba(46, 204, 113, 0.1);
171
+ --ai-color: #E74C3C;
172
  --ai-bg: rgba(231, 76, 60, 0.1);
173
  --shadow-color: rgba(44, 62, 80, 0.1);
174
+ --container-max-width: 800px;
175
  --border-radius-md: 8px;
176
  --border-radius-lg: 12px;
177
  }
 
201
  border: none;
202
  }
203
 
204
+ .form.svelte-633qhp, .block.svelte-11xb1hd, .gradio-html .block {
205
  background: none !important;
206
  border: none !important;
207
  box-shadow: none !important;
208
  padding: 0 !important;
209
  }
210
 
 
211
  h1 {
212
  color: var(--text-primary);
213
  font-size: clamp(24px, 5vw, 30px);
214
  font-weight: 700;
215
  text-align: center;
216
+ margin-bottom: 20px;
217
  letter-spacing: -0.5px;
218
  }
219
 
 
287
  }
288
 
289
  #result_output_box {
290
+ background-color: var(--input-bg);
291
  border: 1px solid var(--border-color);
292
  border-radius: var(--border-radius-md);
293
  padding: 20px;
 
295
  width: 100%;
296
  box-sizing: border-box;
297
  text-align: center;
298
+ font-size: clamp(16px, 3vw, 17px);
299
  box-shadow: 0 4px 8px rgba(0,0,0,0.05);
300
+ min-height: 80px;
301
+ display: flex;
302
  flex-direction: column;
303
  justify-content: center;
304
  }
305
+ #result_output_box p {
306
+ margin-bottom: 8px;
307
  line-height: 1.6;
308
  }
309
  #result_output_box p:last-child {
 
313
 
314
  .highlight-human, .highlight-ai {
315
  font-weight: 600;
316
+ padding: 5px 10px;
317
  border-radius: var(--border-radius-md);
318
  display: inline-block;
319
+ font-size: 1.05em;
320
  }
321
 
322
  .highlight-human {
323
  color: var(--human-color);
324
  background-color: var(--human-bg);
325
+ /* border: 1px solid var(--human-color);
326
  }
327
 
328
  .highlight-ai {
329
  color: var(--ai-color);
330
  background-color: var(--ai-bg);
331
+ /* border: 1px solid var(--ai-color);
332
  }
333
 
334
  .tabs > div:first-child button {
 
353
  border: 1px solid var(--border-color) !important;
354
  border-radius: var(--border-radius-md) !important;
355
  background-color: #fdfdfd !important;
356
+ margin-top: 10px;
357
  }
358
  .gr-sample-textbox {
359
  border: 1px solid var(--border-color) !important;
360
  border-radius: var(--border-radius-md) !important;
361
  font-size: 14px !important;
362
  }
363
+ .gr-accordion > .label-wrap button {
364
  font-weight: 500 !important;
365
  color: var(--text-primary) !important;
366
  }
 
395
  iface = gr.Blocks(css=modern_css, theme=gr.themes.Base(font=[gr.themes.GoogleFont("Inter"), "sans-serif"]))
396
 
397
  with iface:
398
+ gr.Markdown(title_md)
399
  gr.Markdown(description)
400
 
401
  text_input = gr.Textbox(
402
  label="",
403
  placeholder="Type or paste your content here...",
404
  elem_id="text_input_box",
405
+ lines=10
406
  )
407
  result_output = gr.HTML(elem_id="result_output_box")
408
 
 
409
  if all([tokenizer, model_1, model_2, model_3]):
410
  text_input.change(classify_text_interface, inputs=text_input, outputs=result_output)
411
  else:
 
412
  gr.HTML("<div id='result_output_box'><p style='color: var(--ai-color); text-align: center;'><strong>Application Error: Models could not be loaded. Please check the server console for details.</strong></p></div>")
413
 
414
+ with gr.Accordion("AI Text Examples", open=False):
415
+ gr.Examples(
416
+ examples=AI_texts,
417
+ inputs=text_input,
418
+ label="",
419
+ elem_classes="gr-examples"
420
+ )
421
 
422
+ with gr.Accordion("Human Text Examples", open=False):
423
+ gr.Examples(
424
+ examples=Human_texts,
425
+ inputs=text_input,
426
+ label="",
427
+ )
 
 
 
 
 
 
 
 
 
428
 
429
  gr.Markdown(bottom_text, elem_id="bottom_text")
430