import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import re from tokenizers import normalizers # For isinstance check from tokenizers.normalizers import Sequence, Replace, Strip from tokenizers import Regex import os # --- Model & Tokenizer Configuration --- model1_path = "https://huggingface.co/spaces/SzegedAI/AI_Detector/resolve/main/modernbert.bin" model2_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed12" model3_path = "https://huggingface.co/mihalykiss/modernbert_2/resolve/main/Model_groups_3class_seed22" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"Using device: {device}") try: tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") model_1 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41) model_1.load_state_dict(torch.hub.load_state_dict_from_url(model1_path, map_location=device, progress=True)) model_1.to(device).eval() model_2 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41) model_2.load_state_dict(torch.hub.load_state_dict_from_url(model2_path, map_location=device, progress=True)) model_2.to(device).eval() model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=41) model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device, progress=True)) model_3.to(device).eval() except Exception as e: print(f"Error during model loading: {e}") tokenizer = None model_1, model_2, model_3 = None, None, None label_mapping = { 0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b', 6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b', 11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small', 14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it', 18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o', 22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b', 27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b', 31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b', 35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b', 39: 'text-davinci-002', 40: 'text-davinci-003' } def clean_text(text: str) -> str: text = re.sub(r'\s{2,}', ' ', text) text = re.sub(r'\s+([,.;:?!])', r'\1', text) return text if tokenizer: custom_normalizers_to_add = [ Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2"), Replace(Regex(r'\s*\n\s*'), " "), Strip() ] current_backend_normalizer = tokenizer.backend_tokenizer.normalizer if current_backend_normalizer is None: tokenizer.backend_tokenizer.normalizer = Sequence(custom_normalizers_to_add) elif isinstance(current_backend_normalizer, normalizers.Sequence): # Extend the existing list of normalizers within the Sequence object current_backend_normalizer.normalizers.extend(custom_normalizers_to_add) # Re-assign if `extend` doesn't modify in place or if Sequence needs explicit update # For `tokenizers.normalizers.Sequence`, `normalizers` is a list and `extend` modifies it in place. # No explicit re-assignment of tokenizer.backend_tokenizer.normalizer needed here unless Sequence is immutable. # To be safe, one might re-create: # tokenizer.backend_tokenizer.normalizer = Sequence(current_backend_normalizer.normalizers) else: # It's a single normalizer object, not a Sequence tokenizer.backend_tokenizer.normalizer = Sequence([current_backend_normalizer] + custom_normalizers_to_add) # --- End Model & Tokenizer Configuration --- title_md = """
Developed by SzegedAI
""" description = """This tool utilizes the ModernBERT model to decide whether a given text is human-authored or AI-generated. It employs a soft voting ensemble of three models to improve detection accuracy.
Paste your text into the field below to analyze its origin.
Error: Models not loaded. Please check the console.
" cleaned_text = clean_text(text) if not cleaned_text.strip(): result_message = "Please enter some text to analyze.
" return result_message inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device) with torch.no_grad(): logits_1 = model_1(**inputs).logits logits_2 = model_2(**inputs).logits logits_3 = model_3(**inputs).logits softmax_1 = torch.softmax(logits_1, dim=1) softmax_2 = torch.softmax(logits_2, dim=1) softmax_3 = torch.softmax(logits_3, dim=1) averaged_probabilities = (softmax_1 + softmax_2 + softmax_3) / 3 probabilities = averaged_probabilities[0] ai_probs = probabilities.clone() human_label_index = -1 for k, v in label_mapping.items(): if v.lower() == 'human': human_label_index = k break if human_label_index != -1: ai_probs[human_label_index] = 0 human_prob_value = probabilities[human_label_index].item() * 100 else: human_prob_value = 0 print("Warning: 'human' label not found in label_mapping.") ai_total_prob = ai_probs.sum().item() * 100 ai_argmax_index = torch.argmax(ai_probs).item() ai_argmax_model = label_mapping.get(ai_argmax_index, "Unknown AI") if human_prob_value > ai_total_prob : result_message = ( f"The text is {human_prob_value:.2f}% likely Human written.
" ) else: result_message = ( f"The text is {ai_total_prob:.2f}% likely AI generated.
" f"Most Likely AI Source: {ai_argmax_model} (with {probabilities[ai_argmax_index].item()*100:.2f}% confidence among AI models)
" ) return result_message modern_css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); /* Define values for light and dark themes */ :root { --primary-bg-light: #F4F7FC; --app-bg-light: #FFFFFF; --text-primary-light: #2C3E50; --text-secondary-light: #7F8C8D; --accent-color-light: #1ABC9C; --accent-color-darker-light: #16A085; --border-color-light: #E0E6ED; --input-bg-light: #FFFFFF; --human-color-light: #2ECC71; --human-bg-light: rgba(46, 204, 113, 0.1); --ai-color-light: #E74C3C; --ai-bg-light: rgba(231, 76, 60, 0.1); --shadow-color-light: rgba(44, 62, 80, 0.1); --examples-bg-light: #F8F9FA; --placeholder-color-light: #B0BEC5; --accordion-label-color-light: var(--text-primary-light); --accordion-bg-light: var(--app-bg-light); --accordion-border-light: var(--border-color-light); --sample-textbox-bg-light: var(--input-bg-light); --primary-bg-dark: #121828; /* Even darker body for more contrast */ --app-bg-dark: #1B2134; /* Darker app container */ --text-primary-dark: #E0E7FF; /* Lighter text for dark mode */ --text-secondary-dark: #98A0B3; /* Softer secondary text */ --accent-color-dark: #2CE1C7; /* Brighter accent */ --accent-color-darker-dark: #15B8A5; --border-color-dark: #2F364D; /* Subtle borders */ --input-bg-dark: #22283E; /* Dark input fields */ --human-color-dark: #50FA7B; /* Brighter lime green */ --human-bg-dark: rgba(80, 250, 123, 0.15); --ai-color-dark: #FF79C6; /* Bright pink/magenta for AI */ --ai-bg-dark: rgba(255, 121, 198, 0.15); --shadow-color-dark: rgba(0, 0, 0, 0.3); /* Shadow for dark mode */ --examples-bg-dark: #22283E; /* Examples bg same as input */ --placeholder-color-dark: #687083; --accordion-label-color-dark: var(--text-primary-dark); --accordion-bg-dark: var(--app-bg-dark); --accordion-border-dark: var(--border-color-dark); --sample-textbox-bg-dark: var(--input-bg-dark); /* Default to light theme variables */ --primary-bg: var(--primary-bg-light); --app-bg: var(--app-bg-light); --text-primary: var(--text-primary-light); --text-secondary: var(--text-secondary-light); --accent-color: var(--accent-color-light); --accent-color-darker: var(--accent-color-darker-light); --border-color: var(--border-color-light); --input-bg: var(--input-bg-light); --input-focus-border: var(--accent-color-light); /* Default focus for light */ --human-color: var(--human-color-light); --human-bg: var(--human-bg-light); --ai-color: var(--ai-color-light); --ai-bg: var(--ai-bg-light); --shadow-color: var(--shadow-color-light); --examples-bg: var(--examples-bg-light); --placeholder-color: var(--placeholder-color-light); --accordion-label-color: var(--accordion-label-color-light); --accordion-bg: var(--accordion-bg-light); --accordion-border: var(--accordion-border-light); --sample-textbox-bg: var(--sample-textbox-bg-light); --container-max-width: 800px; --border-radius-md: 8px; --border-radius-lg: 12px; } /* Apply Dark Theme when html.dark class is present (Hugging Face Spaces) */ html.dark { --primary-bg: var(--primary-bg-dark); --app-bg: var(--app-bg-dark); --text-primary: var(--text-primary-dark); --text-secondary: var(--text-secondary-dark); --accent-color: var(--accent-color-dark); --accent-color-darker: var(--accent-color-darker-dark); --border-color: var(--border-color-dark); --input-bg: var(--input-bg-dark); --input-focus-border: var(--accent-color-dark); /* Focus for dark */ --human-color: var(--human-color-dark); --human-bg: var(--human-bg-dark); --ai-color: var(--ai-color-dark); --ai-bg: var(--ai-bg-dark); --shadow-color: var(--shadow-color-dark); --examples-bg: var(--examples-bg-dark); --placeholder-color: var(--placeholder-color-dark); --accordion-label-color: var(--accordion-label-color-dark); --accordion-bg: var(--accordion-bg-dark); --accordion-border: var(--accordion-border-dark); --sample-textbox-bg: var(--sample-textbox-bg-dark); } /* Fallback for system preference if html.dark is not set */ @media (prefers-color-scheme: dark) { html:not(.dark) :root { /* Apply only if HF class is not already active */ --primary-bg: var(--primary-bg-dark); --app-bg: var(--app-bg-dark); --text-primary: var(--text-primary-dark); --text-secondary: var(--text-secondary-dark); --accent-color: var(--accent-color-dark); --accent-color-darker: var(--accent-color-darker-dark); --border-color: var(--border-color-dark); --input-bg: var(--input-bg-dark); --input-focus-border: var(--accent-color-dark); --human-color: var(--human-color-dark); --human-bg: var(--human-bg-dark); --ai-color: var(--ai-color-dark); --ai-bg: var(--ai-bg-dark); --shadow-color: var(--shadow-color-dark); --examples-bg: var(--examples-bg-dark); --placeholder-color: var(--placeholder-color-dark); --accordion-label-color: var(--accordion-label-color-dark); --accordion-bg: var(--accordion-bg-dark); --accordion-border: var(--accordion-border-dark); --sample-textbox-bg: var(--sample-textbox-bg-dark); } } .features-list strong::after { content: " "; display: inline-block; width: 0.2em; } body { font-family: 'Inter', sans-serif; background: var(--primary-bg); color: var(--text-primary); margin: 0; padding: 20px; display: flex; justify-content: center; align-items: flex-start; min-height: 100vh; box-sizing: border-box; overflow-y: auto; transition: background-color 0.2s ease-out, color 0.2s ease-out; } .gradio-container { background-color: var(--app-bg); border-radius: var(--border-radius-lg); padding: clamp(25px, 5vw, 40px); box-shadow: 0 8px 25px var(--shadow-color); max-width: var(--container-max-width); width: 100%; margin: 20px auto; border: 1px solid var(--border-color); /* Add subtle border consistent with theme */ transition: background-color 0.2s ease-out, box-shadow 0.2s ease-out, border-color 0.2s ease-out; } /* Reset Gradio default styles that might interfere */ .form.svelte-633qhp, .block.svelte-11xb1hd, .gradio-html .block, .gradio-markdown > *:first-child { background: none !important; border: none !important; box-shadow: none !important; padding: 0 !important; /* Reset padding if it causes issues */ margin: 0 !important; /* Reset margin for Markdown wrapper */ } /* Ensure Markdown text color inherits correctly */ .gradio-markdown p, .gradio-markdown ul, .gradio-markdown li, .gradio-markdown h1, .gradio-markdown h2 { color: inherit !important; } .gradio-markdown a { color: var(--accent-color) !important; } .gradio-markdown a:hover { color: var(--accent-color-darker) !important; } .app-description p { color: var(--text-secondary); font-size: clamp(14px, 2.5vw, 16px); line-height: 1.7; margin-bottom: 15px !important; /* Override Gradio's specific p margin */ } .app-description .instruction-text { font-weight: 500; color: var(--text-primary); margin-top: 20px !important; text-align: center; } .features-list { list-style: none; padding-left: 0; margin: 20px 0 !important; } .features-list li { display: flex; align-items: center; font-size: clamp(14px, 2.5vw, 16px); color: var(--text-secondary); margin-bottom: 12px !important; line-height: 1.6; } .features-list .icon { margin-right: 12px; font-size: 1.2em; color: var(--accent-color); flex-shrink: 0; } #text_input_box textarea { background-color: var(--input-bg); border: 1px solid var(--border-color); border-radius: var(--border-radius-md); font-size: clamp(15px, 2.5vw, 16px); padding: 15px; width: 100%; box-sizing: border-box; color: var(--text-primary); transition: background-color 0.2s ease-out, border-color 0.2s ease-out, box-shadow 0.2s ease-out, color 0.2s ease-out; min-height: 120px; box-shadow: 0 1px 3px rgba(0,0,0,0.03); /* Softer shadow */ } #text_input_box textarea::placeholder { color: var(--placeholder-color); transition: color 0.2s ease-out; } #text_input_box textarea:focus { border-color: var(--input-focus-border); box-shadow: 0 0 0 3px color-mix(in srgb, var(--input-focus-border) 20%, transparent); outline: none; } #result_output_box { background-color: var(--input-bg); border: 1px solid var(--border-color); border-radius: var(--border-radius-md); padding: 20px; margin-top: 25px !important; /* Override Gradio */ width: 100%; box-sizing: border-box; text-align: center; font-size: clamp(16px, 3vw, 17px); box-shadow: 0 1px 3px rgba(0,0,0,0.03); min-height: 80px; display: flex; flex-direction: column; justify-content: center; transition: background-color 0.2s ease-out, border-color 0.2s ease-out, color 0.2s ease-out; } #result_output_box p { margin-bottom: 8px !important; line-height: 1.6; color: var(--text-primary) !important; } #result_output_box p:last-child { margin-bottom: 0 !important; } #result_output_box strong { color: var(--text-primary) !important; } .highlight-human, .highlight-ai { font-weight: 600; padding: 5px 10px; border-radius: var(--border-radius-md); display: inline-block; font-size: 1.05em; transition: background-color 0.2s ease-out, color 0.2s ease-out; } .highlight-human { color: var(--human-color); background-color: var(--human-bg); } .highlight-ai { color: var(--ai-color); background-color: var(--ai-bg); } .gr-accordion { border: 1px solid var(--accordion-border) !important; border-radius: var(--border-radius-lg) !important; box-shadow: none !important; padding: 0 15px 15px 15px !important; margin-bottom: 20px !important; background-color: var(--accordion-bg) !important; transition: background-color 0.2s ease-out, border-color 0.2s ease-out; } .gr-accordion > .label-wrap button { font-weight: 600 !important; color: var(--accordion-label-color) !important; padding: 15px 0px !important; font-size: 1.05em !important; transition: color 0.2s ease-out; } .gr-accordion > .label-wrap { border-bottom: none !important; } .gr-examples { padding: 15px 0px 0px 0px !important; border: none !important; border-radius: 0 !important; background-color: transparent !important; margin-top: 0px !important; } .gr-sample-textbox { border: 1px solid var(--border-color) !important; border-radius: var(--border-radius-md) !important; font-size: 14px !important; background-color: var(--sample-textbox-bg) !important; color: var(--text-primary) !important; transition: background-color 0.2s ease-out, border-color 0.2s ease-out, color 0.2s ease-out; } .gr-sample-textbox:hover { border-color: var(--accent-color) !important; } .footer-text, #bottom_text { text-align: center; margin-top: 40px !important; font-size: clamp(13px, 2vw, 14px); color: var(--text-secondary); } #bottom_text p { margin: 0 !important; } @media (max-width: 768px) { body { padding: 10px; align-items: flex-start; } .gradio-container { padding: 20px; margin: 10px; } /* h1 { font-size: 22px; } Handled by Markdown inline style which uses clamp */ .app-description p, .features-list li { font-size: 14px; } #text_input_box textarea { font-size: 15px; min-height: 100px; } #result_output_box { font-size: 15px; padding: 15px; } .gr-accordion > .label-wrap button { padding: 12px 0 !important; } } """ iface = gr.Blocks(css=modern_css, theme=gr.themes.Base(font=[gr.themes.GoogleFont("Inter"), "sans-serif"])) with iface: gr.Markdown(title_md) gr.Markdown(description) text_input = gr.Textbox( label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=10 ) result_output = gr.HTML(elem_id="result_output_box") if all([tokenizer, model_1, model_2, model_3]): text_input.change(classify_text_interface, inputs=text_input, outputs=result_output) else: gr.HTML("Application Error: Models could not be loaded. Please check the server console for details.