thon from transformers import pipeline from transformers.pipelines.pt_utils import KeyDataset import datasets dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised") pipe = pipeline("text-classification", device=0) for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"): print(out) # [{'label': 'POSITIVE', 'score': 0.9998743534088135}] # Exactly the same output as before, but the content are passed # as batches to the model However, this is not automatically a win for performance.