|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
import spacy |
|
import time |
|
from langdetect import detect |
|
from transformers import BertTokenizer, BertModel |
|
|
|
app = FastAPI(title="Text Processing API") |
|
|
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased') |
|
model = BertModel.from_pretrained('bert-base-multilingual-uncased') |
|
|
|
def process_text(text: str): |
|
|
|
lang = detect(text) |
|
|
|
|
|
start_time = time.time() |
|
|
|
|
|
doc = nlp(text) |
|
tokens = [token.text for token in doc] |
|
entities = [(ent.text, ent.label_) for ent in doc.ents] |
|
|
|
|
|
encoded_input = tokenizer(text, return_tensors='pt') |
|
output = model(**encoded_input) |
|
|
|
|
|
end_time = time.time() |
|
time_taken = end_time - start_time |
|
|
|
return { |
|
"language": lang, |
|
"tokens": tokens, |
|
"named_entities": entities, |
|
"query_length": len(text), |
|
"time_taken": time_taken |
|
} |
|
|
|
|
|
class Query(BaseModel): |
|
text: str |
|
|
|
|
|
@app.post("/process/") |
|
async def process_query(query: Query): |
|
results = process_text(query.text) |
|
return results |
|
|