Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
"""
|
2 |
-
Orify Text Detector β
|
3 |
|
4 |
-
β’ Three ModernBERT-base checkpoints (soft-vote
|
5 |
-
β’ Per-line
|
6 |
-
β’ Weights auto-downloaded
|
7 |
"""
|
8 |
|
9 |
-
# ββ Imports
|
10 |
from pathlib import Path
|
11 |
import os, re, html, typing
|
12 |
import torch, gradio as gr
|
@@ -14,158 +14,115 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
14 |
from huggingface_hub import hf_hub_download
|
15 |
import spaces
|
16 |
|
17 |
-
#
|
18 |
if hasattr(torch, "compile"):
|
19 |
def _no_compile(model: typing.Any = None, *args, **kwargs):
|
20 |
-
|
21 |
-
β’ torch.compile(model, β¦) β return model unchanged
|
22 |
-
β’ @torch.compile(**kw) decorator β return identity decorator
|
23 |
-
"""
|
24 |
-
if callable(model): # direct call pattern
|
25 |
return model
|
26 |
-
|
27 |
-
def decorator(fn): # decorator pattern
|
28 |
-
return fn
|
29 |
-
return decorator
|
30 |
-
|
31 |
torch.compile = _no_compile
|
32 |
-
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
|
33 |
-
|
34 |
-
# ββ Config
|
35 |
-
DEVICE
|
36 |
-
WEIGHT_REPO
|
37 |
-
FILE_MAP
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
}
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
if Path(__file__).with_name("style.css").exists()
|
62 |
-
else """
|
63 |
-
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
|
64 |
-
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
|
65 |
-
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
|
66 |
-
.output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
|
67 |
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
|
68 |
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
|
69 |
"""
|
70 |
-
)
|
71 |
|
72 |
-
# ββ
|
73 |
print("π Downloading weights β¦")
|
74 |
-
local_paths = {
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
)
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
def infer(segment: str):
|
100 |
-
"""Return (human%, ai%, list[top-3 AI names])."""
|
101 |
-
inputs = tokenizer(segment, return_tensors="pt", truncation=True,
|
102 |
-
padding=True).to(DEVICE)
|
103 |
with torch.no_grad():
|
104 |
-
probs
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
ai_score = ai_probs.sum().item() * 100
|
110 |
-
human_score = 100 - ai_score
|
111 |
-
top3 = torch.topk(ai_probs, 3).indices.tolist()
|
112 |
-
return human_score, ai_score, [LABELS[i] for i in top3]
|
113 |
|
114 |
-
# ββ
|
115 |
@spaces.GPU
|
116 |
-
def analyse(
|
117 |
-
if not
|
118 |
return "βοΈ Please paste or type some text to analyseβ¦"
|
119 |
-
|
120 |
-
|
121 |
-
highlighted, h_sum, ai_sum, n = [], 0.0, 0.0, 0
|
122 |
-
|
123 |
for ln in lines:
|
124 |
-
if not ln.strip():
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
cls = "ai-line" if ai_p > human_p else "human-line"
|
138 |
-
span = (
|
139 |
-
f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
|
140 |
-
f"{html.escape(ln)}</span>"
|
141 |
-
)
|
142 |
-
highlighted.append(span)
|
143 |
-
|
144 |
-
human_avg, ai_avg = h_sum / n, ai_sum / n
|
145 |
-
verdict = (
|
146 |
-
f"<p><strong>Overall verdict:</strong> "
|
147 |
-
f"<span class='human-line' style='padding:4px 8px;'>"
|
148 |
-
f"Human-written {human_avg:.2f}%</span>"
|
149 |
-
if human_avg >= ai_avg else
|
150 |
-
f"<p><strong>Overall verdict:</strong> "
|
151 |
-
f"<span class='ai-line' style='padding:4px 8px;'>"
|
152 |
-
f"AI-generated {ai_avg:.2f}%</span>"
|
153 |
-
)
|
154 |
-
return verdict + "<hr>" + "<br>".join(highlighted)
|
155 |
|
156 |
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
157 |
-
with gr.Blocks(css=CSS,
|
158 |
-
gr.Markdown(""
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
inp
|
166 |
-
|
167 |
-
gr.
|
|
|
168 |
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
|
169 |
|
170 |
-
if __name__
|
171 |
demo.launch()
|
|
|
1 |
"""
|
2 |
+
Orify Text Detector β full-width UI + explicit verdict (Zero-GPU ready)
|
3 |
|
4 |
+
β’ Three ModernBERT-base checkpoints (soft-vote)
|
5 |
+
β’ Per-line highlights, hover tool-tips, and a big verdict banner
|
6 |
+
β’ Weights auto-downloaded & cached
|
7 |
"""
|
8 |
|
9 |
+
# ββ Imports βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
10 |
from pathlib import Path
|
11 |
import os, re, html, typing
|
12 |
import torch, gradio as gr
|
|
|
14 |
from huggingface_hub import hf_hub_download
|
15 |
import spaces
|
16 |
|
17 |
+
# ββ Robust torch.compile shim (same as before) ββββββββββββββββββββββββββ
|
18 |
if hasattr(torch, "compile"):
|
19 |
def _no_compile(model: typing.Any = None, *args, **kwargs):
|
20 |
+
if callable(model):
|
|
|
|
|
|
|
|
|
21 |
return model
|
22 |
+
return lambda fn: fn
|
|
|
|
|
|
|
|
|
23 |
torch.compile = _no_compile
|
24 |
+
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
|
25 |
+
|
26 |
+
# ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
27 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
28 |
+
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
|
29 |
+
FILE_MAP = {"ensamble_1":"ensamble_1",
|
30 |
+
"ensamble_2.bin":"ensamble_2.bin",
|
31 |
+
"ensamble_3":"ensamble_3"}
|
32 |
+
BASE_MODEL = "answerdotai/ModernBERT-base"
|
33 |
+
NUM_LABELS = 41
|
34 |
+
|
35 |
+
LABELS = {i:name for i, name in enumerate([
|
36 |
+
"13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
|
37 |
+
"dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
|
38 |
+
"flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
|
39 |
+
"gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
|
40 |
+
"llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
|
41 |
+
"opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
|
42 |
+
"t0-11b","t0-3b","text-davinci-002","text-davinci-003"
|
43 |
+
])}
|
44 |
+
|
45 |
+
# ββ CSS (full-width layout) βββββββββββββββββββββββββββββββββββββββββββββ
|
46 |
+
CSS = """
|
47 |
+
:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
|
48 |
+
body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
|
49 |
+
input,textarea,.output-box{width:100%;box-sizing:border-box}
|
50 |
+
textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
|
51 |
+
.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
|
52 |
+
.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
|
54 |
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
|
55 |
"""
|
|
|
56 |
|
57 |
+
# ββ Load weights & models - one time βββββββββββββββββββββββββββββββββββββ
|
58 |
print("π Downloading weights β¦")
|
59 |
+
local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
|
60 |
+
for a,f in FILE_MAP.items()}
|
61 |
+
|
62 |
+
print("π§© Initialising models β¦")
|
63 |
+
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
|
64 |
+
models=[]
|
65 |
+
for p in local_paths.values():
|
66 |
+
m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
|
67 |
+
num_labels=NUM_LABELS)
|
68 |
+
m.load_state_dict(torch.load(p,map_location=DEVICE))
|
69 |
+
m.to(DEVICE).eval()
|
70 |
+
models.append(m)
|
71 |
+
|
72 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
73 |
+
def tidy(txt:str)->str:
|
74 |
+
txt=txt.replace("\r\n","\n").replace("\r","\n")
|
75 |
+
txt=re.sub(r"\n\s*\n+","\n\n",txt)
|
76 |
+
txt=re.sub(r"[ \t]+"," ",txt)
|
77 |
+
txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
|
78 |
+
txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
|
79 |
+
return txt.strip()
|
80 |
+
|
81 |
+
def infer(seg:str):
|
82 |
+
inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
|
|
|
|
|
|
|
|
|
|
|
83 |
with torch.no_grad():
|
84 |
+
probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
|
85 |
+
ai_probs=probs.clone(); ai_probs[24]=0
|
86 |
+
ai=ai_probs.sum().item()*100; human=100-ai
|
87 |
+
top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
|
88 |
+
return human,ai,top3
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
# ββ Main analyse fn βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
91 |
@spaces.GPU
|
92 |
+
def analyse(txt:str):
|
93 |
+
if not txt.strip():
|
94 |
return "βοΈ Please paste or type some text to analyseβ¦"
|
95 |
+
lines=tidy(txt).split("\n")
|
96 |
+
out, h_sum, ai_sum, n=[],0.0,0.0,0
|
|
|
|
|
97 |
for ln in lines:
|
98 |
+
if not ln.strip(): out.append("<br>"); continue
|
99 |
+
n+=1
|
100 |
+
h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
|
101 |
+
cls="ai-line" if ai>h else "human-line"
|
102 |
+
tip=f"AI {ai:.2f}% β’ Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
|
103 |
+
out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
|
104 |
+
human_avg,ai_avg=h_sum/n,ai_sum/n
|
105 |
+
verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
|
106 |
+
f"Human-written {human_avg:.2f}%</span>"
|
107 |
+
if human_avg>=ai_avg else
|
108 |
+
f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
|
109 |
+
f"AI-generated {ai_avg:.2f}%</span>")
|
110 |
+
return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
113 |
+
with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
|
114 |
+
gr.Markdown("## Orify Text Detector")
|
115 |
+
gr.Markdown(
|
116 |
+
"Paste text, click **Analyse**.<br>"
|
117 |
+
"<span class='human-line'>Green</span>=human | "
|
118 |
+
"<span class='ai-line'>Red</span>=AI.<br>"
|
119 |
+
"Hover a line to see confidence & top-3 AI models."
|
120 |
+
)
|
121 |
+
inp=gr.Textbox(lines=8,placeholder="Paste text here β¦")
|
122 |
+
btn=gr.Button("Analyse")
|
123 |
+
out=gr.HTML(elem_classes=["output-box"])
|
124 |
+
btn.click(analyse,inp,out)
|
125 |
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
|
126 |
|
127 |
+
if __name__=="__main__":
|
128 |
demo.launch()
|