|
import io |
|
import torch |
|
import cv2 |
|
import base64 |
|
import numpy as np |
|
from PIL import Image |
|
from ultralytics import YOLO |
|
from transformers import AutoImageProcessor |
|
from datasets import Image as HFImage |
|
|
|
def overlay_boxes(image, boxes, scores, labels, class_names, conf_threshold=0.5): |
|
"""Draw bounding boxes on the image with labels and scores""" |
|
img = np.array(image.copy()) |
|
colors = {0: (0, 255, 0)} |
|
|
|
for box, score, label in zip(boxes, scores, labels): |
|
if score >= conf_threshold: |
|
x1, y1, x2, y2 = map(int, box) |
|
label_text = f"{class_names[int(label)]}: {score:.2f}" |
|
color = colors.get(int(label), (255, 0, 0)) |
|
cv2_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) |
|
cv2.rectangle(cv2_image, (x1, y1), (x2, y2), color, 2) |
|
cv2.putText(cv2_image, label_text, (x1, y1 - 10), |
|
cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2) |
|
img = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB) |
|
|
|
return Image.fromarray(img) |
|
|
|
class YOLOv9CardDetector: |
|
def __init__(self): |
|
self.model = YOLO("best.pt") |
|
self.config = { |
|
"class_names": ["card"], |
|
"conf_threshold": 0.5 |
|
} |
|
self.image_processor = AutoImageProcessor.from_pretrained(".") |
|
|
|
def __call__(self, inputs): |
|
"""Process input for the Hugging Face inference API""" |
|
if isinstance(inputs, HFImage): |
|
image = inputs.convert("RGB") |
|
else: |
|
if isinstance(inputs, dict) and "image" in inputs: |
|
|
|
image = inputs["image"] |
|
if isinstance(image, str): |
|
image = Image.open(io.BytesIO(base64.b64decode(image))) |
|
else: |
|
image = inputs |
|
|
|
|
|
with torch.no_grad(): |
|
results = self.model(image) |
|
|
|
|
|
result = results[0] |
|
boxes = result.boxes.xyxy.cpu().numpy() |
|
scores = result.boxes.conf.cpu().numpy() |
|
labels = result.boxes.cls.cpu().numpy() |
|
|
|
|
|
output = { |
|
"boxes": boxes.tolist(), |
|
"scores": scores.tolist(), |
|
"labels": labels.tolist(), |
|
"class_names": self.config["class_names"] |
|
} |
|
|
|
|
|
try: |
|
import cv2 |
|
annotated_image = overlay_boxes( |
|
image, |
|
boxes, |
|
scores, |
|
labels, |
|
self.config["class_names"], |
|
self.config["conf_threshold"] |
|
) |
|
buffered = io.BytesIO() |
|
annotated_image.save(buffered, format="JPEG") |
|
output["annotated_image"] = buffered.getvalue() |
|
except ImportError: |
|
|
|
pass |
|
|
|
return output |
|
|
|
|
|
detector = YOLOv9CardDetector() |
|
|
|
def run_inference(inputs): |
|
"""Entry point for the model""" |
|
return detector(inputs) |
|
|