Spaces:

Sleepyriizi
/

Orify-Text-Detection

Running on Zero

App Files Files Community

Sleepyriizi commited on 16 days ago

Commit

f1ccd02

verified ·

1 Parent(s): 919951a

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -141

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 """
-  Orify Text Detector  – Hugging Face Space (Zero-GPU ready)
-  • Three ModernBERT-base checkpoints (soft-vote ensemble)
-  • Per-line colour coding, probability tool-tips, top-3 AI model hints
-  • Weights auto-downloaded once and cached on the Zero-GPU T4
 """
-# ── Imports ──────────────────────────────────────────────────────────────
 from pathlib import Path
 import os, re, html, typing
 import torch, gradio as gr
@@ -14,158 +14,115 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import hf_hub_download
 import spaces
-# ───────────────── torch.compile hot-patch ───────────────────────────────
 if hasattr(torch, "compile"):
     def _no_compile(model: typing.Any = None, *args, **kwargs):
-        """
-        • torch.compile(model, …)           → return model unchanged
-        • @torch.compile(**kw) decorator    → return identity decorator
-        """
-        if callable(model):                # direct call pattern
             return model
-        def decorator(fn):                 # decorator pattern
-            return fn
-        return decorator
     torch.compile = _no_compile
-    os.environ["TORCHINDUCTOR_DISABLED"] = "1"  # extra safety
-# ── Config / constants ───────────────────────────────────────────────────
-DEVICE        = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-WEIGHT_REPO   = "Sleepyriizi/Orify-Text-Detection-Weights"
-FILE_MAP      = {
-    "ensamble_1"     : "ensamble_1",
-    "ensamble_2.bin" : "ensamble_2.bin",
-    "ensamble_3"     : "ensamble_3",
-}
-BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
-NUM_LABELS       = 41
-LABELS = {  # id → readable label
-    0:"13B",1:"30B",2:"65B",3:"7B",4:"GLM130B",5:"bloom_7b",6:"bloomz",7:"cohere",
-    8:"davinci",9:"dolly",10:"dolly-v2-12b",11:"flan_t5_base",12:"flan_t5_large",
-    13:"flan_t5_small",14:"flan_t5_xl",15:"flan_t5_xxl",16:"gemma-7b-it",
-    17:"gemma2-9b-it",18:"gpt-3.5-turbo",19:"gpt-35",20:"gpt-4",21:"gpt-4o",
-    22:"gpt-j",23:"gpt-neox",24:"human",25:"llama3-70b",26:"llama3-8b",
-    27:"mixtral-8x7b",28:"opt-1.3b",29:"opt-125m",30:"opt-13b",31:"opt-2.7b",
-    32:"opt-30b",33:"opt-350m",34:"opt-6.7b",35:"opt-iml-30b",
-    36:"opt-iml-max-1.3b",37:"t0-11b",38:"t0-3b",39:"text-davinci-002",
-    40:"text-davinci-003"
-}
-# ── CSS (inline fallback) ────────────────────────────────────────────────
-CSS = (
-    Path(__file__).with_name("style.css").read_text()
-    if Path(__file__).with_name("style.css").exists()
-    else """
-:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
-body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
-textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
-.output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
 .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
 .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
 """
-)
-# ── Weight download & model init ─────────────────────────────────────────
 print("🔄 Downloading weights …")
-local_paths = {
-    alias: hf_hub_download(WEIGHT_REPO, remote, resume_download=True)
-    for alias, remote in FILE_MAP.items()
-}
-print("🧩 Loading tokenizer & ensemble …")
-tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
-models = []
-for path in local_paths.values():
-    net = AutoModelForSequenceClassification.from_pretrained(
-        BASE_MODEL_NAME, num_labels=NUM_LABELS
-    )
-    net.load_state_dict(torch.load(path, map_location=DEVICE))
-    net.to(DEVICE).eval()
-    models.append(net)
-# ── Helper functions ─────────────────────────────────────────────────────
-def tidy(text: str) -> str:
-    text = text.replace("\r\n", "\n").replace("\r", "\n")
-    text = re.sub(r"\n\s*\n+", "\n\n", text)
-    text = re.sub(r"[ \t]+", " ", text)
-    text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
-    text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
-    return text.strip()
-def infer(segment: str):
-    """Return (human%, ai%, list[top-3 AI names])."""
-    inputs = tokenizer(segment, return_tensors="pt", truncation=True,
-                       padding=True).to(DEVICE)
     with torch.no_grad():
-        probs = torch.stack([
-            torch.softmax(m(**inputs).logits, dim=1) for m in models
-        ]).mean(0)[0]
-    ai_probs = probs.clone(); ai_probs[24] = 0               # remove 'human'
-    ai_score   = ai_probs.sum().item() * 100
-    human_score = 100 - ai_score
-    top3 = torch.topk(ai_probs, 3).indices.tolist()
-    return human_score, ai_score, [LABELS[i] for i in top3]
-# ── Inference with explanations ─────────────────────────────────────────
 @spaces.GPU
-def analyse(text: str):
-    if not text.strip():
         return "✏️ Please paste or type some text to analyse…"
-    lines = tidy(text).split("\n")
-    highlighted, h_sum, ai_sum, n = [], 0.0, 0.0, 0
     for ln in lines:
-        if not ln.strip():
-            highlighted.append("<br>")
-            continue
-        n += 1
-        human_p, ai_p, top3 = infer(ln)
-        h_sum += human_p
-        ai_sum += ai_p
-        tooltip = (
-            f"AI {ai_p:.2f}% • Top-3: {', '.join(top3)}"
-            if ai_p > human_p else f"Human {human_p:.2f}%"
-        )
-        cls = "ai-line" if ai_p > human_p else "human-line"
-        span = (
-            f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
-            f"{html.escape(ln)}</span>"
-        )
-        highlighted.append(span)
-    human_avg, ai_avg = h_sum / n, ai_sum / n
-    verdict = (
-        f"<p><strong>Overall verdict:</strong> "
-        f"<span class='human-line' style='padding:4px 8px;'>"
-        f"Human-written {human_avg:.2f}%</span>"
-        if human_avg >= ai_avg else
-        f"<p><strong>Overall verdict:</strong> "
-        f"<span class='ai-line' style='padding:4px 8px;'>"
-        f"AI-generated {ai_avg:.2f}%</span>"
-    )
-    return verdict + "<hr>" + "<br>".join(highlighted)
 # ── Gradio UI ───────────────────────────────────────────────────────────
-with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
-    gr.Markdown("""
-    ### Orify Text Detector
-    Paste any English text and press **Analyse**.<br>
-    <span class='human-line'>Green</span> = human |
-    <span class='ai-line'>Red</span> = AI.<br>
-    Hover a line to see confidence & the top-3 AI models it matches.
-    """)
-    inp = gr.Textbox(lines=8, placeholder="Paste text here …")
-    out = gr.HTML(elem_classes=["output-box"])
-    gr.Button("Analyse").click(analyse, inp, out)
     gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble © 2025</sub>")
-if __name__ == "__main__":
     demo.launch()

 """
+  Orify Text Detector  – full-width UI + explicit verdict (Zero-GPU ready)
+  • Three ModernBERT-base checkpoints (soft-vote)
+  • Per-line highlights, hover tool-tips, and a big verdict banner
+  • Weights auto-downloaded & cached
 """
+# ── Imports ─────────────────────────────────────────────────────────────
 from pathlib import Path
 import os, re, html, typing
 import torch, gradio as gr
 from huggingface_hub import hf_hub_download
 import spaces
+# ── Robust torch.compile shim (same as before) ──────────────────────────
 if hasattr(torch, "compile"):
     def _no_compile(model: typing.Any = None, *args, **kwargs):
+        if callable(model):
             return model
+        return lambda fn: fn
     torch.compile = _no_compile
+    os.environ["TORCHINDUCTOR_DISABLED"] = "1"
+# ── Config ──────────────────────────────────────────────────────────────
+DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
+FILE_MAP    = {"ensamble_1":"ensamble_1",
+               "ensamble_2.bin":"ensamble_2.bin",
+               "ensamble_3":"ensamble_3"}
+BASE_MODEL  = "answerdotai/ModernBERT-base"
+NUM_LABELS  = 41
+LABELS = {i:name for i, name in enumerate([
+    "13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
+    "dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
+    "flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
+    "gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
+    "llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
+    "opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
+    "t0-11b","t0-3b","text-davinci-002","text-davinci-003"
+])}
+# ── CSS (full-width layout) ─────────────────────────────────────────────
+CSS = """
+:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
+body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
+input,textarea,.output-box{width:100%;box-sizing:border-box}
+textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
+.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
+.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
 .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
 .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
 """
+# ── Load weights & models - one time ─────────────────────────────────────
 print("🔄 Downloading weights …")
+local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
+               for a,f in FILE_MAP.items()}
+print("🧩 Initialising models …")
+tok = AutoTokenizer.from_pretrained(BASE_MODEL)
+models=[]
+for p in local_paths.values():
+    m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
+                                                           num_labels=NUM_LABELS)
+    m.load_state_dict(torch.load(p,map_location=DEVICE))
+    m.to(DEVICE).eval()
+    models.append(m)
+# ── Helpers ─────────────────────────────────────────────────────────────
+def tidy(txt:str)->str:
+    txt=txt.replace("\r\n","\n").replace("\r","\n")
+    txt=re.sub(r"\n\s*\n+","\n\n",txt)
+    txt=re.sub(r"[ \t]+"," ",txt)
+    txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
+    txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
+    return txt.strip()
+def infer(seg:str):
+    inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
     with torch.no_grad():
+        probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
+    ai_probs=probs.clone(); ai_probs[24]=0
+    ai=ai_probs.sum().item()*100; human=100-ai
+    top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
+    return human,ai,top3
+# ── Main analyse fn ─────────────────────────────────────────────────────
 @spaces.GPU
+def analyse(txt:str):
+    if not txt.strip():
         return "✏️ Please paste or type some text to analyse…"
+    lines=tidy(txt).split("\n")
+    out, h_sum, ai_sum, n=[],0.0,0.0,0
     for ln in lines:
+        if not ln.strip(): out.append("<br>"); continue
+        n+=1
+        h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
+        cls="ai-line" if ai>h else "human-line"
+        tip=f"AI {ai:.2f}% • Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
+        out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
+    human_avg,ai_avg=h_sum/n,ai_sum/n
+    verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
+             f"Human-written {human_avg:.2f}%</span>"
+             if human_avg>=ai_avg else
+             f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
+             f"AI-generated {ai_avg:.2f}%</span>")
+    return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)
 # ── Gradio UI ───────────────────────────────────────────────────────────
+with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
+    gr.Markdown("## Orify Text Detector")
+    gr.Markdown(
+        "Paste text, click **Analyse**.<br>"
+        "<span class='human-line'>Green</span>=human &nbsp;|&nbsp; "
+        "<span class='ai-line'>Red</span>=AI.<br>"
+        "Hover a line to see confidence & top-3 AI models."
+    )
+    inp=gr.Textbox(lines=8,placeholder="Paste text here …")
+    btn=gr.Button("Analyse")
+    out=gr.HTML(elem_classes=["output-box"])
+    btn.click(analyse,inp,out)
     gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble © 2025</sub>")
+if __name__=="__main__":
     demo.launch()