Hence, inputs can be loaded via: | |
from PIL import Image | |
from transformers import AutoTokenizer | |
from transformers.models.fuyu.processing_fuyu import FuyuProcessor | |
from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor | |
tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b') | |
image_processor = FuyuImageProcessor() | |
processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer) | |
text_prompt = "Generate a coco-style caption.\n" | |
bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png" | |
bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content)) | |
inputs_to_model = processor(text=text_prompt, images=image_pil) | |
This model was contributed by Molbap. |