from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel import torch import torch.nn as nn device = torch.device("cuda" if torch.cuda.is_available() else "cpu") class MLP(nn.Module): def __init__(self, input_dim): super(MLP, self).__init__() self.fc1 = nn.Linear(input_dim, 256) self.fc2 = nn.Linear(256, 2) self.gelu = nn.GELU() def forward(self, x): x = self.gelu(self.fc1(x)) x = self.fc2(x) return x def extract_features(text): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = RobertaTokenizer.from_pretrained("roberta-base") model = RobertaModel.from_pretrained("roberta-base").to(device) tokenized_text = tokenizer.encode(text, truncation=True, max_length=512, return_tensors="pt") outputs = model(tokenized_text) last_hidden_states = outputs.last_hidden_state TClassification = last_hidden_states[:, 0, :].squeeze().detach().numpy() return TClassification def RobertaSentinelOpenGPTInference(input_text): features = extract_features(input_text) loaded_model = MLP(768).to(device) loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelOpenGPT.pth")) # Define the tokenizer and model for feature extraction with torch.no_grad(): inputs = torch.tensor(features).to(device) outputs = loaded_model(inputs.float()) _, predicted = torch.max(outputs, 1) return predicted.item() def RobertaSentinelCSAbstractInference(input_text): features = extract_features(input_text) loaded_model = MLP(768).to(device) loaded_model.load_state_dict(torch.load("MLPDictStates/RobertaSentinelCSAbstract.pth")) # Define the tokenizer and model for feature extraction with torch.no_grad(): inputs = torch.tensor(features).to(device) outputs = loaded_model(inputs.float()) _, predicted = torch.max(outputs, 1) return predicted.item()