Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -68,7 +68,6 @@ if tokenizer: # Only set normalizer if tokenizer loaded successfully
|
|
68 |
newline_to_space,
|
69 |
Strip()
|
70 |
])
|
71 |
-
# --- End Model & Tokenizer Configuration ---
|
72 |
|
73 |
|
74 |
title_md = """
|
@@ -78,19 +77,19 @@ title_md = """
|
|
78 |
|
79 |
description = """
|
80 |
<div class="app-description">
|
81 |
-
<p>This tool utilizes the <b>ModernBERT</b> model to
|
82 |
<ul class="features-list">
|
83 |
-
<li><span class="icon">β
</span> <strong>Human Verification
|
84 |
-
<li><span class="icon">π</span> <strong>Model Detection
|
85 |
-
<li><span class="icon">π</span> <strong>Accuracy
|
86 |
-
<li><span class="icon">π</span> <strong>Read more
|
87 |
-
<a href="https://aclanthology.org/2025.genaidetect-1.15/" target="_blank" class="learn-more-link"
|
88 |
</li>
|
89 |
</ul>
|
90 |
<p class="instruction-text">Paste your text into the field below to analyze its origin.</p>
|
91 |
</div>
|
92 |
"""
|
93 |
-
bottom_text = "<p class='footer-text'>SzegedAI</p>"
|
94 |
|
95 |
AI_texts = [
|
96 |
"Camels are remarkable desert animals known for their unique adaptations to harsh, arid environments. Native to the Middle East, North Africa, and parts of Asia, camels have been essential to human life for centuries, serving as a mode of transportation, a source of food, and even a symbol of endurance and survival. There are two primary species of camels: the dromedary camel, which has a single hump and is commonly found in the Middle East and North Africa, and the Bactrian camel, which has two humps and is native to Central Asia. Their humps store fat, not water, as commonly believed, allowing them to survive long periods without food by metabolizing the stored fat for energy. Camels are highly adapted to desert life. They can go for weeks without water, and when they do drink, they can consume up to 40 gallons in one sitting. Their thick eyelashes, sealable nostrils, and wide, padded feet protect them from sand and help them walk easily on loose desert terrain.",
|
@@ -105,7 +104,7 @@ def classify_text_interface(text):
|
|
105 |
return "<p style='text-align: center; color: var(--ai-color);'><strong>Error: Models not loaded. Please check the console.</strong></p>"
|
106 |
|
107 |
cleaned_text = clean_text(text)
|
108 |
-
if not cleaned_text.strip():
|
109 |
result_message = "<p style='text-align: center; color: var(--text-secondary);'>Please enter some text to analyze.</p>"
|
110 |
return result_message
|
111 |
|
@@ -125,29 +124,25 @@ def classify_text_interface(text):
|
|
125 |
|
126 |
ai_probs = probabilities.clone()
|
127 |
human_label_index = -1
|
128 |
-
for k, v in label_mapping.items():
|
129 |
if v.lower() == 'human':
|
130 |
human_label_index = k
|
131 |
break
|
132 |
|
133 |
if human_label_index != -1:
|
134 |
-
ai_probs[human_label_index] = 0
|
135 |
human_prob_value = probabilities[human_label_index].item() * 100
|
136 |
-
else:
|
137 |
human_prob_value = 0
|
138 |
print("Warning: 'human' label not found in label_mapping.")
|
139 |
|
140 |
ai_total_prob = ai_probs.sum().item() * 100
|
141 |
|
142 |
-
# Recalculate human_prob based on ai_total_prob if necessary,
|
143 |
-
# or ensure the logic correctly identifies human vs AI majority.
|
144 |
-
# The original logic: human_prob = 100 - ai_total_prob might be confusing if ai_total_prob already excluded human.
|
145 |
-
# Let's use the direct human probability from the model.
|
146 |
|
147 |
-
ai_argmax_index = torch.argmax(ai_probs).item()
|
148 |
ai_argmax_model = label_mapping.get(ai_argmax_index, "Unknown AI")
|
149 |
|
150 |
-
if human_prob_value > ai_total_prob :
|
151 |
result_message = (
|
152 |
f"<p><strong>The text is</strong> <span class='highlight-human'><strong>{human_prob_value:.2f}%</strong> likely <b>Human written</b>.</span></p>"
|
153 |
)
|
@@ -171,12 +166,12 @@ modern_css = """
|
|
171 |
--border-color: #E0E0E0;
|
172 |
--input-bg: #FFFFFF;
|
173 |
--input-focus-border: var(--accent-color);
|
174 |
-
--human-color: #2ECC71;
|
175 |
--human-bg: rgba(46, 204, 113, 0.1);
|
176 |
-
--ai-color: #E74C3C;
|
177 |
--ai-bg: rgba(231, 76, 60, 0.1);
|
178 |
--shadow-color: rgba(44, 62, 80, 0.1);
|
179 |
-
--container-max-width: 800px;
|
180 |
--border-radius-md: 8px;
|
181 |
--border-radius-lg: 12px;
|
182 |
}
|
@@ -206,20 +201,19 @@ body {
|
|
206 |
border: none;
|
207 |
}
|
208 |
|
209 |
-
.form.svelte-633qhp, .block.svelte-11xb1hd, .gradio-html .block {
|
210 |
background: none !important;
|
211 |
border: none !important;
|
212 |
box-shadow: none !important;
|
213 |
padding: 0 !important;
|
214 |
}
|
215 |
|
216 |
-
/* Title and subtitle are now handled by Markdown with inline styles, h1 here is a fallback or for other h1s */
|
217 |
h1 {
|
218 |
color: var(--text-primary);
|
219 |
font-size: clamp(24px, 5vw, 30px);
|
220 |
font-weight: 700;
|
221 |
text-align: center;
|
222 |
-
margin-bottom: 20px;
|
223 |
letter-spacing: -0.5px;
|
224 |
}
|
225 |
|
@@ -293,7 +287,7 @@ h1 {
|
|
293 |
}
|
294 |
|
295 |
#result_output_box {
|
296 |
-
background-color: var(--input-bg);
|
297 |
border: 1px solid var(--border-color);
|
298 |
border-radius: var(--border-radius-md);
|
299 |
padding: 20px;
|
@@ -301,15 +295,15 @@ h1 {
|
|
301 |
width: 100%;
|
302 |
box-sizing: border-box;
|
303 |
text-align: center;
|
304 |
-
font-size: clamp(16px, 3vw, 17px);
|
305 |
box-shadow: 0 4px 8px rgba(0,0,0,0.05);
|
306 |
-
min-height: 80px;
|
307 |
-
display: flex;
|
308 |
flex-direction: column;
|
309 |
justify-content: center;
|
310 |
}
|
311 |
-
#result_output_box p {
|
312 |
-
margin-bottom: 8px;
|
313 |
line-height: 1.6;
|
314 |
}
|
315 |
#result_output_box p:last-child {
|
@@ -319,22 +313,22 @@ h1 {
|
|
319 |
|
320 |
.highlight-human, .highlight-ai {
|
321 |
font-weight: 600;
|
322 |
-
padding: 5px 10px;
|
323 |
border-radius: var(--border-radius-md);
|
324 |
display: inline-block;
|
325 |
-
font-size: 1.05em;
|
326 |
}
|
327 |
|
328 |
.highlight-human {
|
329 |
color: var(--human-color);
|
330 |
background-color: var(--human-bg);
|
331 |
-
/* border: 1px solid var(--human-color);
|
332 |
}
|
333 |
|
334 |
.highlight-ai {
|
335 |
color: var(--ai-color);
|
336 |
background-color: var(--ai-bg);
|
337 |
-
/* border: 1px solid var(--ai-color);
|
338 |
}
|
339 |
|
340 |
.tabs > div:first-child button {
|
@@ -359,14 +353,14 @@ h1 {
|
|
359 |
border: 1px solid var(--border-color) !important;
|
360 |
border-radius: var(--border-radius-md) !important;
|
361 |
background-color: #fdfdfd !important;
|
362 |
-
margin-top: 10px;
|
363 |
}
|
364 |
.gr-sample-textbox {
|
365 |
border: 1px solid var(--border-color) !important;
|
366 |
border-radius: var(--border-radius-md) !important;
|
367 |
font-size: 14px !important;
|
368 |
}
|
369 |
-
.gr-accordion > .label-wrap button {
|
370 |
font-weight: 500 !important;
|
371 |
color: var(--text-primary) !important;
|
372 |
}
|
@@ -401,40 +395,36 @@ h1 {
|
|
401 |
iface = gr.Blocks(css=modern_css, theme=gr.themes.Base(font=[gr.themes.GoogleFont("Inter"), "sans-serif"]))
|
402 |
|
403 |
with iface:
|
404 |
-
gr.Markdown(title_md)
|
405 |
gr.Markdown(description)
|
406 |
|
407 |
text_input = gr.Textbox(
|
408 |
label="",
|
409 |
placeholder="Type or paste your content here...",
|
410 |
elem_id="text_input_box",
|
411 |
-
lines=
|
412 |
)
|
413 |
result_output = gr.HTML(elem_id="result_output_box")
|
414 |
|
415 |
-
# Only set up the change function if models are loaded
|
416 |
if all([tokenizer, model_1, model_2, model_3]):
|
417 |
text_input.change(classify_text_interface, inputs=text_input, outputs=result_output)
|
418 |
else:
|
419 |
-
# Display a persistent error if models couldn't load
|
420 |
gr.HTML("<div id='result_output_box'><p style='color: var(--ai-color); text-align: center;'><strong>Application Error: Models could not be loaded. Please check the server console for details.</strong></p></div>")
|
421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
|
423 |
-
with gr.
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
label="", # Label removed as accordion title is enough
|
430 |
-
)
|
431 |
-
with gr.Column(scale=1):
|
432 |
-
with gr.Accordion("Human Text Examples", open=False):
|
433 |
-
gr.Examples(
|
434 |
-
examples=Human_texts,
|
435 |
-
inputs=text_input,
|
436 |
-
label="", # Label removed
|
437 |
-
)
|
438 |
|
439 |
gr.Markdown(bottom_text, elem_id="bottom_text")
|
440 |
|
|
|
68 |
newline_to_space,
|
69 |
Strip()
|
70 |
])
|
|
|
71 |
|
72 |
|
73 |
title_md = """
|
|
|
77 |
|
78 |
description = """
|
79 |
<div class="app-description">
|
80 |
+
<p>This tool utilizes the <b>ModernBERT</b> model to decide whether a given text is human-authored or AI-generated. It employs a soft voting ensemble of <b>three</b> models to improve detection accuracy.</p>
|
81 |
<ul class="features-list">
|
82 |
+
<li><span class="icon">β
</span> <strong>Human Verification: </strong> Clearly identifies human-written content.</li>
|
83 |
+
<li><span class="icon">π</span> <strong>Model Detection: </strong> Capable of identifying content from over 40 AI models.</li>
|
84 |
+
<li><span class="icon">π</span> <strong>Accuracy: </strong> Performs optimally with more extensive text inputs.</li>
|
85 |
+
<li><span class="icon">π</span> <strong>Read more: </strong> Our methodology is detailed in our research paper:
|
86 |
+
<a href="https://aclanthology.org/2025.genaidetect-1.15/" target="_blank" class="learn-more-link"> <b> LINK </b></a>.
|
87 |
</li>
|
88 |
</ul>
|
89 |
<p class="instruction-text">Paste your text into the field below to analyze its origin.</p>
|
90 |
</div>
|
91 |
"""
|
92 |
+
bottom_text = "<p class='footer-text'>SzegedAI - Mihaly Kiss</p>"
|
93 |
|
94 |
AI_texts = [
|
95 |
"Camels are remarkable desert animals known for their unique adaptations to harsh, arid environments. Native to the Middle East, North Africa, and parts of Asia, camels have been essential to human life for centuries, serving as a mode of transportation, a source of food, and even a symbol of endurance and survival. There are two primary species of camels: the dromedary camel, which has a single hump and is commonly found in the Middle East and North Africa, and the Bactrian camel, which has two humps and is native to Central Asia. Their humps store fat, not water, as commonly believed, allowing them to survive long periods without food by metabolizing the stored fat for energy. Camels are highly adapted to desert life. They can go for weeks without water, and when they do drink, they can consume up to 40 gallons in one sitting. Their thick eyelashes, sealable nostrils, and wide, padded feet protect them from sand and help them walk easily on loose desert terrain.",
|
|
|
104 |
return "<p style='text-align: center; color: var(--ai-color);'><strong>Error: Models not loaded. Please check the console.</strong></p>"
|
105 |
|
106 |
cleaned_text = clean_text(text)
|
107 |
+
if not cleaned_text.strip():
|
108 |
result_message = "<p style='text-align: center; color: var(--text-secondary);'>Please enter some text to analyze.</p>"
|
109 |
return result_message
|
110 |
|
|
|
124 |
|
125 |
ai_probs = probabilities.clone()
|
126 |
human_label_index = -1
|
127 |
+
for k, v in label_mapping.items():
|
128 |
if v.lower() == 'human':
|
129 |
human_label_index = k
|
130 |
break
|
131 |
|
132 |
if human_label_index != -1:
|
133 |
+
ai_probs[human_label_index] = 0
|
134 |
human_prob_value = probabilities[human_label_index].item() * 100
|
135 |
+
else:
|
136 |
human_prob_value = 0
|
137 |
print("Warning: 'human' label not found in label_mapping.")
|
138 |
|
139 |
ai_total_prob = ai_probs.sum().item() * 100
|
140 |
|
|
|
|
|
|
|
|
|
141 |
|
142 |
+
ai_argmax_index = torch.argmax(ai_probs).item()
|
143 |
ai_argmax_model = label_mapping.get(ai_argmax_index, "Unknown AI")
|
144 |
|
145 |
+
if human_prob_value > ai_total_prob :
|
146 |
result_message = (
|
147 |
f"<p><strong>The text is</strong> <span class='highlight-human'><strong>{human_prob_value:.2f}%</strong> likely <b>Human written</b>.</span></p>"
|
148 |
)
|
|
|
166 |
--border-color: #E0E0E0;
|
167 |
--input-bg: #FFFFFF;
|
168 |
--input-focus-border: var(--accent-color);
|
169 |
+
--human-color: #2ECC71;
|
170 |
--human-bg: rgba(46, 204, 113, 0.1);
|
171 |
+
--ai-color: #E74C3C;
|
172 |
--ai-bg: rgba(231, 76, 60, 0.1);
|
173 |
--shadow-color: rgba(44, 62, 80, 0.1);
|
174 |
+
--container-max-width: 800px;
|
175 |
--border-radius-md: 8px;
|
176 |
--border-radius-lg: 12px;
|
177 |
}
|
|
|
201 |
border: none;
|
202 |
}
|
203 |
|
204 |
+
.form.svelte-633qhp, .block.svelte-11xb1hd, .gradio-html .block {
|
205 |
background: none !important;
|
206 |
border: none !important;
|
207 |
box-shadow: none !important;
|
208 |
padding: 0 !important;
|
209 |
}
|
210 |
|
|
|
211 |
h1 {
|
212 |
color: var(--text-primary);
|
213 |
font-size: clamp(24px, 5vw, 30px);
|
214 |
font-weight: 700;
|
215 |
text-align: center;
|
216 |
+
margin-bottom: 20px;
|
217 |
letter-spacing: -0.5px;
|
218 |
}
|
219 |
|
|
|
287 |
}
|
288 |
|
289 |
#result_output_box {
|
290 |
+
background-color: var(--input-bg);
|
291 |
border: 1px solid var(--border-color);
|
292 |
border-radius: var(--border-radius-md);
|
293 |
padding: 20px;
|
|
|
295 |
width: 100%;
|
296 |
box-sizing: border-box;
|
297 |
text-align: center;
|
298 |
+
font-size: clamp(16px, 3vw, 17px);
|
299 |
box-shadow: 0 4px 8px rgba(0,0,0,0.05);
|
300 |
+
min-height: 80px;
|
301 |
+
display: flex;
|
302 |
flex-direction: column;
|
303 |
justify-content: center;
|
304 |
}
|
305 |
+
#result_output_box p {
|
306 |
+
margin-bottom: 8px;
|
307 |
line-height: 1.6;
|
308 |
}
|
309 |
#result_output_box p:last-child {
|
|
|
313 |
|
314 |
.highlight-human, .highlight-ai {
|
315 |
font-weight: 600;
|
316 |
+
padding: 5px 10px;
|
317 |
border-radius: var(--border-radius-md);
|
318 |
display: inline-block;
|
319 |
+
font-size: 1.05em;
|
320 |
}
|
321 |
|
322 |
.highlight-human {
|
323 |
color: var(--human-color);
|
324 |
background-color: var(--human-bg);
|
325 |
+
/* border: 1px solid var(--human-color);
|
326 |
}
|
327 |
|
328 |
.highlight-ai {
|
329 |
color: var(--ai-color);
|
330 |
background-color: var(--ai-bg);
|
331 |
+
/* border: 1px solid var(--ai-color);
|
332 |
}
|
333 |
|
334 |
.tabs > div:first-child button {
|
|
|
353 |
border: 1px solid var(--border-color) !important;
|
354 |
border-radius: var(--border-radius-md) !important;
|
355 |
background-color: #fdfdfd !important;
|
356 |
+
margin-top: 10px;
|
357 |
}
|
358 |
.gr-sample-textbox {
|
359 |
border: 1px solid var(--border-color) !important;
|
360 |
border-radius: var(--border-radius-md) !important;
|
361 |
font-size: 14px !important;
|
362 |
}
|
363 |
+
.gr-accordion > .label-wrap button {
|
364 |
font-weight: 500 !important;
|
365 |
color: var(--text-primary) !important;
|
366 |
}
|
|
|
395 |
iface = gr.Blocks(css=modern_css, theme=gr.themes.Base(font=[gr.themes.GoogleFont("Inter"), "sans-serif"]))
|
396 |
|
397 |
with iface:
|
398 |
+
gr.Markdown(title_md)
|
399 |
gr.Markdown(description)
|
400 |
|
401 |
text_input = gr.Textbox(
|
402 |
label="",
|
403 |
placeholder="Type or paste your content here...",
|
404 |
elem_id="text_input_box",
|
405 |
+
lines=10
|
406 |
)
|
407 |
result_output = gr.HTML(elem_id="result_output_box")
|
408 |
|
|
|
409 |
if all([tokenizer, model_1, model_2, model_3]):
|
410 |
text_input.change(classify_text_interface, inputs=text_input, outputs=result_output)
|
411 |
else:
|
|
|
412 |
gr.HTML("<div id='result_output_box'><p style='color: var(--ai-color); text-align: center;'><strong>Application Error: Models could not be loaded. Please check the server console for details.</strong></p></div>")
|
413 |
|
414 |
+
with gr.Accordion("AI Text Examples", open=False):
|
415 |
+
gr.Examples(
|
416 |
+
examples=AI_texts,
|
417 |
+
inputs=text_input,
|
418 |
+
label="",
|
419 |
+
elem_classes="gr-examples"
|
420 |
+
)
|
421 |
|
422 |
+
with gr.Accordion("Human Text Examples", open=False):
|
423 |
+
gr.Examples(
|
424 |
+
examples=Human_texts,
|
425 |
+
inputs=text_input,
|
426 |
+
label="",
|
427 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
|
429 |
gr.Markdown(bottom_text, elem_id="bottom_text")
|
430 |
|