File size: 464 Bytes
5fa1a76 |
1 2 3 4 5 6 7 8 9 10 |
prefix = "summarize: " def preprocess_function(examples): inputs = [prefix + doc for doc in examples["text"]] model_inputs = tokenizer(inputs, max_length=1024, truncation=True) labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True) model_inputs["labels"] = labels["input_ids"] return model_inputs To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. |