Ubuntu
commited on
Commit
·
ace32a8
1
Parent(s):
68a2fd4
103
Browse files
GRPO.py
CHANGED
@@ -95,20 +95,22 @@ tokenizer.padding_side = "left"
|
|
95 |
training_args = GRPOConfig(
|
96 |
output_dir="phi2-grpo-openassistant",
|
97 |
num_train_epochs=3,
|
98 |
-
per_device_train_batch_size=
|
99 |
-
gradient_accumulation_steps=
|
100 |
gradient_checkpointing=True,
|
101 |
learning_rate=5e-6,
|
102 |
logging_steps=10,
|
103 |
save_steps=100,
|
104 |
fp16=True,
|
105 |
remove_unused_columns=False,
|
106 |
-
report_to="
|
107 |
optim="adamw_torch",
|
108 |
lr_scheduler_type="cosine",
|
109 |
warmup_ratio=0.1,
|
|
|
110 |
)
|
111 |
|
|
|
112 |
# Initialize the GRPO trainer with preference dataset
|
113 |
trainer = GRPOTrainer(
|
114 |
model=model,
|
|
|
95 |
training_args = GRPOConfig(
|
96 |
output_dir="phi2-grpo-openassistant",
|
97 |
num_train_epochs=3,
|
98 |
+
per_device_train_batch_size=2,
|
99 |
+
gradient_accumulation_steps=16,
|
100 |
gradient_checkpointing=True,
|
101 |
learning_rate=5e-6,
|
102 |
logging_steps=10,
|
103 |
save_steps=100,
|
104 |
fp16=True,
|
105 |
remove_unused_columns=False,
|
106 |
+
report_to="none",
|
107 |
optim="adamw_torch",
|
108 |
lr_scheduler_type="cosine",
|
109 |
warmup_ratio=0.1,
|
110 |
+
num_generations=2, # Set the desired number of generations per prompt
|
111 |
)
|
112 |
|
113 |
+
|
114 |
# Initialize the GRPO trainer with preference dataset
|
115 |
trainer = GRPOTrainer(
|
116 |
model=model,
|