ocr-quality-assessor-unigram-light / ocr_qa_assessment.py
emanuelaboros's picture
modified readme
5e2a201
from transformers import Pipeline
class QAAssessmentPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs):
preprocess_kwargs = {}
if "text" in kwargs:
preprocess_kwargs["text"] = kwargs["text"]
return preprocess_kwargs, {}, {}
def preprocess(self, text, **kwargs):
# Nothing to preprocess
return text
def _forward(self, text, **kwargs):
predictions = self.model(text)
return predictions
def postprocess(self, outputs, **kwargs):
predictions = outputs
# print(f"Predictions: {predictions}")
# Format as JSON-compatible dictionary
# model_output = {"label": label, "score": round(score, 4)}
return {"ocr_quality_score": round(predictions[0], 4)}