thon | |
from transformers import pipeline | |
from transformers.pipelines.pt_utils import KeyDataset | |
import datasets | |
dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised") | |
pipe = pipeline("text-classification", device=0) | |
for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"): | |
print(out) | |
# [{'label': 'POSITIVE', 'score': 0.9998743534088135}] | |
# Exactly the same output as before, but the content are passed | |
# as batches to the model | |
However, this is not automatically a win for performance. |