Spaces:

Samarth991
/

CV-Agent

Sleeping

App Files Files Community

Samarth991 commited on Feb 23

Commit

d766b17

1 Parent(s): b60841d

added image detection code to display predicted bboxes

Browse files

Files changed (2) hide show

tool_utils/yolo_world.py +53 -0
utils.py +28 -1

tool_utils/yolo_world.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import os
 import logging
 import numpy as np
 from typing import List
 from ultralytics import YOLOWorld
 class YoloWorld:
@@ -27,4 +30,54 @@ class YoloWorld:
                     }
                 object_details.append(object_data)
         return object_details

 import os
 import logging
+import cv2
 import numpy as np
 from typing import List
+import torch
+import random
 from ultralytics import YOLOWorld
 class YoloWorld:
                     }
                 object_details.append(object_data)
         return object_details
+    @staticmethod
+    def draw_bboxes(rgb_frame,boxes,labels,color=None,line_thickness=3):
+        rgb_frame = cv2.imread(rgb_frame)
+        rgb_frame = cv2.cvtColor(rgb_frame,cv2.COLOR_BGR2RGB)
+        tl = line_thickness or round(0.002 * (rgb_frame.shape[0] + rgb_frame.shape[1]) / 2) + 1  # line/font thickness
+        rgb_frame_copy = rgb_frame.copy()
+        if color is None :
+            color = color or [random.randint(0, 255) for _ in range(3)]
+        for box,label in zip(boxes,labels):
+            if box.type() == 'torch.IntTensor':
+                box = box.numpy()
+            # extract coordinates
+            x1,y1,x2,y2 = box
+            c1,c2  = (x1,y1),(x2,y2)
+            # Draw rectangle
+            cv2.rectangle(rgb_frame_copy, c1,c2, color, thickness=tl, lineType=cv2.LINE_AA)
+            tf = max(tl - 1, 1)  # font thickness
+            # label = label2id[int(label.numpy())]
+            t_size = cv2.getTextSize(str(label), 0, fontScale=tl / 3, thickness=tf)[0]
+            c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+            cv2.putText(rgb_frame_copy, str(label), (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+        return rgb_frame_copy
+    def run_yolo_infer(self,image_path:str,object_prompts:List):
+        self.model.set_classes(object_prompts)
+        results = self.model.predict(image_path)
+        processed_predictions = []
+        bounding_boxes = []
+        labels = []
+        scores = []
+        for result in results:
+            for i,box in enumerate(result.boxes):
+                x1, y1, x2, y2 = np.array(box.xyxy.cpu(), dtype=np.int32).squeeze()
+                bounding_boxes.append([x1,y1,x2,y2])
+                labels.append(int(box.cls.cpu()))
+                scores.append(round(float(box.conf.cpu()),2))
+        processed_predictions.append(dict(boxes= torch.tensor(bounding_boxes),
+                                        labels= torch.IntTensor(labels),
+                                        scores=torch.tensor(scores))
+                                    )
+        detected_image = self.draw_bboxes(rgb_frame=image_path,
+                         boxes=processed_predictions[0]['boxes'],
+                         labels=processed_predictions[0]['labels']
+                         )
+        cv2.imwrite('final_mask.jpg',detected_image)
+        return "Predicted image : final_mask.jpg . Details :{}".format(processed_predictions[0])

utils.py CHANGED Viewed

@@ -3,6 +3,8 @@ import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 from matplotlib import cm
 import torch
 def draw_panoptic_segmentation(model,segmentation, segments_info):
     # get the used color map
@@ -23,4 +25,29 @@ def draw_panoptic_segmentation(model,segmentation, segments_info):
     # ax.legend(handles=handles)
     fig.savefig('final_mask.png')
-    return 'final_mask.png'

 import matplotlib.patches as mpatches
 from matplotlib import cm
 import torch
+import cv2
+import random
 def draw_panoptic_segmentation(model,segmentation, segments_info):
     # get the used color map
     # ax.legend(handles=handles)
     fig.savefig('final_mask.png')
+    return 'final_mask.png'
+def draw_bboxes(rgb_frame,boxes,labels,color=None,line_thickness=3):
+    rgb_frame = cv2.imread(rgb_frame)
+    rgb_frame = cv2.cvtColor(rgb_frame,cv2.COLOR_BGR2RGB)
+    tl = line_thickness or round(0.002 * (rgb_frame.shape[0] + rgb_frame.shape[1]) / 2) + 1  # line/font thickness
+    rgb_frame_copy = rgb_frame.copy()
+    if color is None :
+        color = color or [random.randint(0, 255) for _ in range(3)]
+    for box,label in zip(boxes,labels):
+        if box.type() == 'torch.IntTensor':
+            box = box.numpy()
+        # extract coordinates
+        x1,y1,x2,y2 = box
+        c1,c2  = (x1,y1),(x2,y2)
+        # Draw rectangle
+        cv2.rectangle(rgb_frame_copy, c1,c2, color, thickness=tl, lineType=cv2.LINE_AA)
+        tf = max(tl - 1, 1)  # font thickness
+        # label = label2id[int(label.numpy())]
+        t_size = cv2.getTextSize(str(label), 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.putText(rgb_frame_copy, str(label), (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+    return rgb_frame_copy