Ubuntu commited on
Commit
ace32a8
·
1 Parent(s): 68a2fd4
Files changed (1) hide show
  1. GRPO.py +5 -3
GRPO.py CHANGED
@@ -95,20 +95,22 @@ tokenizer.padding_side = "left"
95
  training_args = GRPOConfig(
96
  output_dir="phi2-grpo-openassistant",
97
  num_train_epochs=3,
98
- per_device_train_batch_size=8,
99
- gradient_accumulation_steps=4,
100
  gradient_checkpointing=True,
101
  learning_rate=5e-6,
102
  logging_steps=10,
103
  save_steps=100,
104
  fp16=True,
105
  remove_unused_columns=False,
106
- report_to="wandb",
107
  optim="adamw_torch",
108
  lr_scheduler_type="cosine",
109
  warmup_ratio=0.1,
 
110
  )
111
 
 
112
  # Initialize the GRPO trainer with preference dataset
113
  trainer = GRPOTrainer(
114
  model=model,
 
95
  training_args = GRPOConfig(
96
  output_dir="phi2-grpo-openassistant",
97
  num_train_epochs=3,
98
+ per_device_train_batch_size=2,
99
+ gradient_accumulation_steps=16,
100
  gradient_checkpointing=True,
101
  learning_rate=5e-6,
102
  logging_steps=10,
103
  save_steps=100,
104
  fp16=True,
105
  remove_unused_columns=False,
106
+ report_to="none",
107
  optim="adamw_torch",
108
  lr_scheduler_type="cosine",
109
  warmup_ratio=0.1,
110
+ num_generations=2, # Set the desired number of generations per prompt
111
  )
112
 
113
+
114
  # Initialize the GRPO trainer with preference dataset
115
  trainer = GRPOTrainer(
116
  model=model,