Sleepyriizi commited on
Commit
f1ccd02
Β·
verified Β·
1 Parent(s): 919951a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -141
app.py CHANGED
@@ -1,12 +1,12 @@
1
  """
2
- Orify Text Detector – Hugging Face Space (Zero-GPU ready)
3
 
4
- β€’ Three ModernBERT-base checkpoints (soft-vote ensemble)
5
- β€’ Per-line colour coding, probability tool-tips, top-3 AI model hints
6
- β€’ Weights auto-downloaded once and cached on the Zero-GPU T4
7
  """
8
 
9
- # ── Imports ──────────────────────────────────────────────────────────────
10
  from pathlib import Path
11
  import os, re, html, typing
12
  import torch, gradio as gr
@@ -14,158 +14,115 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
14
  from huggingface_hub import hf_hub_download
15
  import spaces
16
 
17
- # ───────────────── torch.compile hot-patch ───────────────────────────────
18
  if hasattr(torch, "compile"):
19
  def _no_compile(model: typing.Any = None, *args, **kwargs):
20
- """
21
- β€’ torch.compile(model, …) β†’ return model unchanged
22
- β€’ @torch.compile(**kw) decorator β†’ return identity decorator
23
- """
24
- if callable(model): # direct call pattern
25
  return model
26
-
27
- def decorator(fn): # decorator pattern
28
- return fn
29
- return decorator
30
-
31
  torch.compile = _no_compile
32
- os.environ["TORCHINDUCTOR_DISABLED"] = "1" # extra safety
33
-
34
- # ── Config / constants ───────────────────────────────────────────────────
35
- DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
- WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
37
- FILE_MAP = {
38
- "ensamble_1" : "ensamble_1",
39
- "ensamble_2.bin" : "ensamble_2.bin",
40
- "ensamble_3" : "ensamble_3",
41
- }
42
-
43
- BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
44
- NUM_LABELS = 41
45
-
46
- LABELS = { # id β†’ readable label
47
- 0:"13B",1:"30B",2:"65B",3:"7B",4:"GLM130B",5:"bloom_7b",6:"bloomz",7:"cohere",
48
- 8:"davinci",9:"dolly",10:"dolly-v2-12b",11:"flan_t5_base",12:"flan_t5_large",
49
- 13:"flan_t5_small",14:"flan_t5_xl",15:"flan_t5_xxl",16:"gemma-7b-it",
50
- 17:"gemma2-9b-it",18:"gpt-3.5-turbo",19:"gpt-35",20:"gpt-4",21:"gpt-4o",
51
- 22:"gpt-j",23:"gpt-neox",24:"human",25:"llama3-70b",26:"llama3-8b",
52
- 27:"mixtral-8x7b",28:"opt-1.3b",29:"opt-125m",30:"opt-13b",31:"opt-2.7b",
53
- 32:"opt-30b",33:"opt-350m",34:"opt-6.7b",35:"opt-iml-30b",
54
- 36:"opt-iml-max-1.3b",37:"t0-11b",38:"t0-3b",39:"text-davinci-002",
55
- 40:"text-davinci-003"
56
- }
57
-
58
- # ── CSS (inline fallback) ────────────────────────────────────────────────
59
- CSS = (
60
- Path(__file__).with_name("style.css").read_text()
61
- if Path(__file__).with_name("style.css").exists()
62
- else """
63
- :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
64
- body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
65
- textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
66
- .output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
67
  .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
68
  .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
69
  """
70
- )
71
 
72
- # ── Weight download & model init ─────────────────────────────────────────
73
  print("πŸ”„ Downloading weights …")
74
- local_paths = {
75
- alias: hf_hub_download(WEIGHT_REPO, remote, resume_download=True)
76
- for alias, remote in FILE_MAP.items()
77
- }
78
-
79
- print("🧩 Loading tokenizer & ensemble …")
80
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
81
- models = []
82
- for path in local_paths.values():
83
- net = AutoModelForSequenceClassification.from_pretrained(
84
- BASE_MODEL_NAME, num_labels=NUM_LABELS
85
- )
86
- net.load_state_dict(torch.load(path, map_location=DEVICE))
87
- net.to(DEVICE).eval()
88
- models.append(net)
89
-
90
- # ── Helper functions ─────────────────────────────────────────────────────
91
- def tidy(text: str) -> str:
92
- text = text.replace("\r\n", "\n").replace("\r", "\n")
93
- text = re.sub(r"\n\s*\n+", "\n\n", text)
94
- text = re.sub(r"[ \t]+", " ", text)
95
- text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
96
- text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
97
- return text.strip()
98
-
99
- def infer(segment: str):
100
- """Return (human%, ai%, list[top-3 AI names])."""
101
- inputs = tokenizer(segment, return_tensors="pt", truncation=True,
102
- padding=True).to(DEVICE)
103
  with torch.no_grad():
104
- probs = torch.stack([
105
- torch.softmax(m(**inputs).logits, dim=1) for m in models
106
- ]).mean(0)[0]
107
-
108
- ai_probs = probs.clone(); ai_probs[24] = 0 # remove 'human'
109
- ai_score = ai_probs.sum().item() * 100
110
- human_score = 100 - ai_score
111
- top3 = torch.topk(ai_probs, 3).indices.tolist()
112
- return human_score, ai_score, [LABELS[i] for i in top3]
113
 
114
- # ── Inference with explanations ─────────────────────────────────────────
115
  @spaces.GPU
116
- def analyse(text: str):
117
- if not text.strip():
118
  return "✏️ Please paste or type some text to analyse…"
119
-
120
- lines = tidy(text).split("\n")
121
- highlighted, h_sum, ai_sum, n = [], 0.0, 0.0, 0
122
-
123
  for ln in lines:
124
- if not ln.strip():
125
- highlighted.append("<br>")
126
- continue
127
-
128
- n += 1
129
- human_p, ai_p, top3 = infer(ln)
130
- h_sum += human_p
131
- ai_sum += ai_p
132
-
133
- tooltip = (
134
- f"AI {ai_p:.2f}% β€’ Top-3: {', '.join(top3)}"
135
- if ai_p > human_p else f"Human {human_p:.2f}%"
136
- )
137
- cls = "ai-line" if ai_p > human_p else "human-line"
138
- span = (
139
- f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
140
- f"{html.escape(ln)}</span>"
141
- )
142
- highlighted.append(span)
143
-
144
- human_avg, ai_avg = h_sum / n, ai_sum / n
145
- verdict = (
146
- f"<p><strong>Overall verdict:</strong> "
147
- f"<span class='human-line' style='padding:4px 8px;'>"
148
- f"Human-written {human_avg:.2f}%</span>"
149
- if human_avg >= ai_avg else
150
- f"<p><strong>Overall verdict:</strong> "
151
- f"<span class='ai-line' style='padding:4px 8px;'>"
152
- f"AI-generated {ai_avg:.2f}%</span>"
153
- )
154
- return verdict + "<hr>" + "<br>".join(highlighted)
155
 
156
  # ── Gradio UI ───────────────────────────────────────────────────────────
157
- with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
158
- gr.Markdown("""
159
- ### Orify Text Detector
160
- Paste any English text and press **Analyse**.<br>
161
- <span class='human-line'>Green</span> = human | 
162
- <span class='ai-line'>Red</span> = AI.<br>
163
- Hover a line to see confidence & the top-3 AI models it matches.
164
- """)
165
- inp = gr.Textbox(lines=8, placeholder="Paste text here …")
166
- out = gr.HTML(elem_classes=["output-box"])
167
- gr.Button("Analyse").click(analyse, inp, out)
 
168
  gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
169
 
170
- if __name__ == "__main__":
171
  demo.launch()
 
1
  """
2
+ Orify Text Detector – full-width UI + explicit verdict (Zero-GPU ready)
3
 
4
+ β€’ Three ModernBERT-base checkpoints (soft-vote)
5
+ β€’ Per-line highlights, hover tool-tips, and a big verdict banner
6
+ β€’ Weights auto-downloaded & cached
7
  """
8
 
9
+ # ── Imports ─────────────────────────────────────────────────────────────
10
  from pathlib import Path
11
  import os, re, html, typing
12
  import torch, gradio as gr
 
14
  from huggingface_hub import hf_hub_download
15
  import spaces
16
 
17
+ # ── Robust torch.compile shim (same as before) ──────────────────────────
18
  if hasattr(torch, "compile"):
19
  def _no_compile(model: typing.Any = None, *args, **kwargs):
20
+ if callable(model):
 
 
 
 
21
  return model
22
+ return lambda fn: fn
 
 
 
 
23
  torch.compile = _no_compile
24
+ os.environ["TORCHINDUCTOR_DISABLED"] = "1"
25
+
26
+ # ── Config ──────────────────────────────────────────────────────────────
27
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
+ WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
29
+ FILE_MAP = {"ensamble_1":"ensamble_1",
30
+ "ensamble_2.bin":"ensamble_2.bin",
31
+ "ensamble_3":"ensamble_3"}
32
+ BASE_MODEL = "answerdotai/ModernBERT-base"
33
+ NUM_LABELS = 41
34
+
35
+ LABELS = {i:name for i, name in enumerate([
36
+ "13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
37
+ "dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
38
+ "flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
39
+ "gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
40
+ "llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
41
+ "opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
42
+ "t0-11b","t0-3b","text-davinci-002","text-davinci-003"
43
+ ])}
44
+
45
+ # ── CSS (full-width layout) ─────────────────────────────────────────────
46
+ CSS = """
47
+ :root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
48
+ body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
49
+ input,textarea,.output-box{width:100%;box-sizing:border-box}
50
+ textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
51
+ .output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
52
+ .ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
 
 
 
 
 
 
53
  .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
54
  .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
55
  """
 
56
 
57
+ # ── Load weights & models - one time ─────────────────────────────────────
58
  print("πŸ”„ Downloading weights …")
59
+ local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
60
+ for a,f in FILE_MAP.items()}
61
+
62
+ print("🧩 Initialising models …")
63
+ tok = AutoTokenizer.from_pretrained(BASE_MODEL)
64
+ models=[]
65
+ for p in local_paths.values():
66
+ m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
67
+ num_labels=NUM_LABELS)
68
+ m.load_state_dict(torch.load(p,map_location=DEVICE))
69
+ m.to(DEVICE).eval()
70
+ models.append(m)
71
+
72
+ # ── Helpers ─────────────────────────────────────────────────────────────
73
+ def tidy(txt:str)->str:
74
+ txt=txt.replace("\r\n","\n").replace("\r","\n")
75
+ txt=re.sub(r"\n\s*\n+","\n\n",txt)
76
+ txt=re.sub(r"[ \t]+"," ",txt)
77
+ txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
78
+ txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
79
+ return txt.strip()
80
+
81
+ def infer(seg:str):
82
+ inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
 
 
 
 
 
83
  with torch.no_grad():
84
+ probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
85
+ ai_probs=probs.clone(); ai_probs[24]=0
86
+ ai=ai_probs.sum().item()*100; human=100-ai
87
+ top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
88
+ return human,ai,top3
 
 
 
 
89
 
90
+ # ── Main analyse fn ─────────────────────────────────────────────────────
91
  @spaces.GPU
92
+ def analyse(txt:str):
93
+ if not txt.strip():
94
  return "✏️ Please paste or type some text to analyse…"
95
+ lines=tidy(txt).split("\n")
96
+ out, h_sum, ai_sum, n=[],0.0,0.0,0
 
 
97
  for ln in lines:
98
+ if not ln.strip(): out.append("<br>"); continue
99
+ n+=1
100
+ h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
101
+ cls="ai-line" if ai>h else "human-line"
102
+ tip=f"AI {ai:.2f}% β€’ Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
103
+ out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
104
+ human_avg,ai_avg=h_sum/n,ai_sum/n
105
+ verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
106
+ f"Human-written {human_avg:.2f}%</span>"
107
+ if human_avg>=ai_avg else
108
+ f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
109
+ f"AI-generated {ai_avg:.2f}%</span>")
110
+ return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # ── Gradio UI ───────────────────────────────────────────────────────────
113
+ with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
114
+ gr.Markdown("## Orify Text Detector")
115
+ gr.Markdown(
116
+ "Paste text, click **Analyse**.<br>"
117
+ "<span class='human-line'>Green</span>=human &nbsp;|&nbsp; "
118
+ "<span class='ai-line'>Red</span>=AI.<br>"
119
+ "Hover a line to see confidence & top-3 AI models."
120
+ )
121
+ inp=gr.Textbox(lines=8,placeholder="Paste text here …")
122
+ btn=gr.Button("Analyse")
123
+ out=gr.HTML(elem_classes=["output-box"])
124
+ btn.click(analyse,inp,out)
125
  gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
126
 
127
+ if __name__=="__main__":
128
  demo.launch()