Below is an example on how to run mask generation given an image and a 2D point: thon import torch from PIL import Image import requests from transformers import SamModel, SamProcessor device = "cuda" if torch.cuda.is_available() else "cpu" model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device) processor = SamProcessor.from_pretrained("facebook/sam-vit-huge") img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png" raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB") input_points = [[[450, 600]]] # 2D location of a window in the image inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device) with torch.no_grad(): outputs = model(**inputs) masks = processor.image_processor.post_process_masks( outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu() ) scores = outputs.iou_scores You can also process your own masks alongside the input images in the processor to be passed to the model.