Spaces:
Running
Running
File size: 1,927 Bytes
57bafce 582b2f2 cf62d27 57bafce 582b2f2 57bafce 582b2f2 57bafce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
import torch
import torch.nn as nn
device = torch.device("cpu")
class MLP(nn.Module):
def __init__(self, input_dim):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, 256)
self.fc2 = nn.Linear(256, 2)
self.gelu = nn.GELU()
def forward(self, x):
x = self.gelu(self.fc1(x))
x = self.fc2(x)
return x
def extract_features(text):
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaModel.from_pretrained("roberta-base").to(device)
tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt")
outputs = model(tokenized_text)
last_hidden_states = outputs.last_hidden_state
TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy()
return TClassification
def RobertaSentinelOpenGPTInference(input_text):
features = extract_features(input_text)
loaded_model = MLP(768).to(device)
loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelOpenGPT.pth", map_location=device))
# Define the tokenizer and model for feature extraction
with torch.no_grad():
inputs = torch.tensor(features).to(device)
outputs = loaded_model(inputs.float())
_, predicted = torch.max(outputs, 1)
return predicted.item()
def RobertaSentinelCSAbstractInference(input_text):
features = extract_features(input_text)
loaded_model = MLP(768).to(device)
loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelCSAbstract.pth", map_location=device))
# Define the tokenizer and model for feature extraction
with torch.no_grad():
inputs = torch.tensor(features).to(device)
outputs = loaded_model(inputs.float())
_, predicted = torch.max(outputs, 1)
return predicted.item()
|