training_args = TrainingArguments( | |
output_dir="my_awesome_model", | |
learning_rate=2e-5, | |
per_device_train_batch_size=16, | |
per_device_eval_batch_size=16, | |
num_train_epochs=2, | |
weight_decay=0.01, | |
evaluation_strategy="epoch", | |
save_strategy="epoch", | |
load_best_model_at_end=True, | |
push_to_hub=True, | |
) | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_imdb["train"], | |
eval_dataset=tokenized_imdb["test"], | |
tokenizer=tokenizer, | |
data_collator=data_collator, | |
compute_metrics=compute_metrics, | |
) | |
trainer.train() | |
[Trainer] applies dynamic padding by default when you pass tokenizer to it. |