silviasapora commited on
Commit
ed492e5
·
verified ·
1 Parent(s): 8cd16be

Model save

Browse files
Files changed (4) hide show
  1. README.md +67 -0
  2. all_results.json +9 -0
  3. train_results.json +9 -0
  4. trainer_state.json +651 -0
README.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-7b
3
+ library_name: transformers
4
+ model_name: gemma-7b-silvia_cpo-basic_capibara-5e-5-025-v151
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - orpo
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for gemma-7b-silvia_cpo-basic_capibara-5e-5-025-v151
13
+
14
+ This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="silviasapora/gemma-7b-silvia_cpo-basic_capibara-5e-5-025-v151", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/silvias/huggingface/runs/mw5gj0gs)
31
+
32
+
33
+ This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.16.0
38
+ - Transformers: 4.50.3
39
+ - Pytorch: 2.5.1
40
+ - Datasets: 3.3.2
41
+ - Tokenizers: 0.21.0
42
+
43
+ ## Citations
44
+
45
+ Cite ORPO as:
46
+
47
+ ```bibtex
48
+ @article{hong2024orpo,
49
+ title = {{ORPO: Monolithic Preference Optimization without Reference Model}},
50
+ author = {Jiwoo Hong and Noah Lee and James Thorne},
51
+ year = 2024,
52
+ eprint = {arXiv:2403.07691}
53
+ }
54
+ ```
55
+
56
+ Cite TRL as:
57
+
58
+ ```bibtex
59
+ @misc{vonwerra2022trl,
60
+ title = {{TRL: Transformer Reinforcement Learning}},
61
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
62
+ year = 2020,
63
+ journal = {GitHub repository},
64
+ publisher = {GitHub},
65
+ howpublished = {\url{https://github.com/huggingface/trl}}
66
+ }
67
+ ```
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.9814696485623005,
3
+ "total_flos": 0.0,
4
+ "train_loss": 62.26987236738205,
5
+ "train_runtime": 5592.1515,
6
+ "train_samples": 3130,
7
+ "train_samples_per_second": 2.239,
8
+ "train_steps_per_second": 0.034
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.9814696485623005,
3
+ "total_flos": 0.0,
4
+ "train_loss": 62.26987236738205,
5
+ "train_runtime": 5592.1515,
6
+ "train_samples": 3130,
7
+ "train_samples_per_second": 2.239,
8
+ "train_steps_per_second": 0.034
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,651 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.9814696485623005,
6
+ "eval_steps": 500,
7
+ "global_step": 192,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.10223642172523961,
14
+ "grad_norm": 827.7908935546875,
15
+ "learning_rate": 1.25e-05,
16
+ "log_odds_chosen": -4.91914176940918,
17
+ "log_odds_ratio": -7.912337303161621,
18
+ "logps/chosen": -17.909399032592773,
19
+ "logps/rejected": -12.991429328918457,
20
+ "loss": 404.4927,
21
+ "nll_loss": 10.663026809692383,
22
+ "rewards/accuracies": 0.40312498807907104,
23
+ "rewards/chosen": -4.477349758148193,
24
+ "rewards/margins": -1.2294929027557373,
25
+ "rewards/rejected": -3.2478573322296143,
26
+ "step": 5
27
+ },
28
+ {
29
+ "epoch": 0.20447284345047922,
30
+ "grad_norm": 538.9002685546875,
31
+ "learning_rate": 2.5e-05,
32
+ "log_odds_chosen": -1.99321711063385,
33
+ "log_odds_ratio": -5.780060291290283,
34
+ "logps/chosen": -15.17625617980957,
35
+ "logps/rejected": -13.184392929077148,
36
+ "loss": 342.9983,
37
+ "nll_loss": 9.274579048156738,
38
+ "rewards/accuracies": 0.4156250059604645,
39
+ "rewards/chosen": -3.7940640449523926,
40
+ "rewards/margins": -0.4979652762413025,
41
+ "rewards/rejected": -3.296098232269287,
42
+ "step": 10
43
+ },
44
+ {
45
+ "epoch": 0.30670926517571884,
46
+ "grad_norm": 1268.311279296875,
47
+ "learning_rate": 3.7500000000000003e-05,
48
+ "log_odds_chosen": -1.9389375448226929,
49
+ "log_odds_ratio": -5.051381587982178,
50
+ "logps/chosen": -12.37875747680664,
51
+ "logps/rejected": -10.439730644226074,
52
+ "loss": 271.8747,
53
+ "nll_loss": 7.234231472015381,
54
+ "rewards/accuracies": 0.484375,
55
+ "rewards/chosen": -3.09468936920166,
56
+ "rewards/margins": -0.4847566485404968,
57
+ "rewards/rejected": -2.6099326610565186,
58
+ "step": 15
59
+ },
60
+ {
61
+ "epoch": 0.40894568690095845,
62
+ "grad_norm": 500.36431884765625,
63
+ "learning_rate": 5e-05,
64
+ "log_odds_chosen": -0.3404741883277893,
65
+ "log_odds_ratio": -1.416075348854065,
66
+ "logps/chosen": -3.7283692359924316,
67
+ "logps/rejected": -3.374943494796753,
68
+ "loss": 108.1039,
69
+ "nll_loss": 3.0280256271362305,
70
+ "rewards/accuracies": 0.528124988079071,
71
+ "rewards/chosen": -0.9320923089981079,
72
+ "rewards/margins": -0.08835643529891968,
73
+ "rewards/rejected": -0.8437358736991882,
74
+ "step": 20
75
+ },
76
+ {
77
+ "epoch": 0.5111821086261981,
78
+ "grad_norm": 416.5755615234375,
79
+ "learning_rate": 4.989581840408562e-05,
80
+ "log_odds_chosen": 0.25642240047454834,
81
+ "log_odds_ratio": -0.655372142791748,
82
+ "logps/chosen": -1.5129865407943726,
83
+ "logps/rejected": -1.724565863609314,
84
+ "loss": 61.4784,
85
+ "nll_loss": 1.7581512928009033,
86
+ "rewards/accuracies": 0.643750011920929,
87
+ "rewards/chosen": -0.37824663519859314,
88
+ "rewards/margins": 0.052894867956638336,
89
+ "rewards/rejected": -0.4311414659023285,
90
+ "step": 25
91
+ },
92
+ {
93
+ "epoch": 0.6134185303514377,
94
+ "grad_norm": 166.52938842773438,
95
+ "learning_rate": 4.9584141920736656e-05,
96
+ "log_odds_chosen": 0.1395658254623413,
97
+ "log_odds_ratio": -0.7378019094467163,
98
+ "logps/chosen": -1.480513334274292,
99
+ "logps/rejected": -1.599832534790039,
100
+ "loss": 59.0905,
101
+ "nll_loss": 1.6694732904434204,
102
+ "rewards/accuracies": 0.612500011920929,
103
+ "rewards/chosen": -0.370128333568573,
104
+ "rewards/margins": 0.02982982061803341,
105
+ "rewards/rejected": -0.39995813369750977,
106
+ "step": 30
107
+ },
108
+ {
109
+ "epoch": 0.7156549520766773,
110
+ "grad_norm": 81.22210693359375,
111
+ "learning_rate": 4.9067568226228646e-05,
112
+ "log_odds_chosen": 0.23929011821746826,
113
+ "log_odds_ratio": -0.6636127233505249,
114
+ "logps/chosen": -1.3320153951644897,
115
+ "logps/rejected": -1.5101702213287354,
116
+ "loss": 52.0935,
117
+ "nll_loss": 1.4632833003997803,
118
+ "rewards/accuracies": 0.6343749761581421,
119
+ "rewards/chosen": -0.33300384879112244,
120
+ "rewards/margins": 0.04453865438699722,
121
+ "rewards/rejected": -0.37754255533218384,
122
+ "step": 35
123
+ },
124
+ {
125
+ "epoch": 0.8178913738019169,
126
+ "grad_norm": 102.73954010009766,
127
+ "learning_rate": 4.83504027183137e-05,
128
+ "log_odds_chosen": 0.3248418867588043,
129
+ "log_odds_ratio": -0.6250384449958801,
130
+ "logps/chosen": -1.249929666519165,
131
+ "logps/rejected": -1.4835537672042847,
132
+ "loss": 50.6161,
133
+ "nll_loss": 1.4245882034301758,
134
+ "rewards/accuracies": 0.684374988079071,
135
+ "rewards/chosen": -0.31248241662979126,
136
+ "rewards/margins": 0.05840602517127991,
137
+ "rewards/rejected": -0.37088844180107117,
138
+ "step": 40
139
+ },
140
+ {
141
+ "epoch": 0.9201277955271565,
142
+ "grad_norm": 103.2927474975586,
143
+ "learning_rate": 4.743862263276376e-05,
144
+ "log_odds_chosen": 0.30287498235702515,
145
+ "log_odds_ratio": -0.6419548988342285,
146
+ "logps/chosen": -1.2506297826766968,
147
+ "logps/rejected": -1.4725720882415771,
148
+ "loss": 49.8597,
149
+ "nll_loss": 1.3988924026489258,
150
+ "rewards/accuracies": 0.6781250238418579,
151
+ "rewards/chosen": -0.3126574456691742,
152
+ "rewards/margins": 0.0554855652153492,
153
+ "rewards/rejected": -0.3681430220603943,
154
+ "step": 45
155
+ },
156
+ {
157
+ "epoch": 1.040894568690096,
158
+ "grad_norm": 152.4938201904297,
159
+ "learning_rate": 4.6339827225933665e-05,
160
+ "log_odds_chosen": 0.39640626311302185,
161
+ "log_odds_ratio": -0.6115574836730957,
162
+ "logps/chosen": -1.1695352792739868,
163
+ "logps/rejected": -1.471874713897705,
164
+ "loss": 54.7336,
165
+ "nll_loss": 1.3199635744094849,
166
+ "rewards/accuracies": 0.6935483813285828,
167
+ "rewards/chosen": -0.2923838198184967,
168
+ "rewards/margins": 0.07558488100767136,
169
+ "rewards/rejected": -0.36796867847442627,
170
+ "step": 50
171
+ },
172
+ {
173
+ "epoch": 1.1431309904153355,
174
+ "grad_norm": 89.43785858154297,
175
+ "learning_rate": 4.506317443854877e-05,
176
+ "log_odds_chosen": 0.5089691877365112,
177
+ "log_odds_ratio": -0.5568146705627441,
178
+ "logps/chosen": -1.0600959062576294,
179
+ "logps/rejected": -1.4196648597717285,
180
+ "loss": 43.6744,
181
+ "nll_loss": 1.2228777408599854,
182
+ "rewards/accuracies": 0.75,
183
+ "rewards/chosen": -0.26502397656440735,
184
+ "rewards/margins": 0.08989225327968597,
185
+ "rewards/rejected": -0.35491621494293213,
186
+ "step": 55
187
+ },
188
+ {
189
+ "epoch": 1.2453674121405751,
190
+ "grad_norm": 87.75115203857422,
191
+ "learning_rate": 4.361930456859455e-05,
192
+ "log_odds_chosen": 0.7825835943222046,
193
+ "log_odds_ratio": -0.47958317399024963,
194
+ "logps/chosen": -0.9152556657791138,
195
+ "logps/rejected": -1.431404948234558,
196
+ "loss": 38.5085,
197
+ "nll_loss": 1.072222352027893,
198
+ "rewards/accuracies": 0.8031250238418579,
199
+ "rewards/chosen": -0.22881391644477844,
200
+ "rewards/margins": 0.12903733551502228,
201
+ "rewards/rejected": -0.3578512370586395,
202
+ "step": 60
203
+ },
204
+ {
205
+ "epoch": 1.3476038338658147,
206
+ "grad_norm": 184.32460021972656,
207
+ "learning_rate": 4.202025158945856e-05,
208
+ "log_odds_chosen": 0.6628133654594421,
209
+ "log_odds_ratio": -0.5079701542854309,
210
+ "logps/chosen": -0.9785585403442383,
211
+ "logps/rejected": -1.419032335281372,
212
+ "loss": 40.1897,
213
+ "nll_loss": 1.1202460527420044,
214
+ "rewards/accuracies": 0.7875000238418579,
215
+ "rewards/chosen": -0.24463963508605957,
216
+ "rewards/margins": 0.11011841148138046,
217
+ "rewards/rejected": -0.354758083820343,
218
+ "step": 65
219
+ },
220
+ {
221
+ "epoch": 1.4498402555910543,
222
+ "grad_norm": 270.92669677734375,
223
+ "learning_rate": 4.0279342852446234e-05,
224
+ "log_odds_chosen": 0.6627331972122192,
225
+ "log_odds_ratio": -0.529289722442627,
226
+ "logps/chosen": -0.9792484045028687,
227
+ "logps/rejected": -1.4207617044448853,
228
+ "loss": 39.7387,
229
+ "nll_loss": 1.1052920818328857,
230
+ "rewards/accuracies": 0.768750011920929,
231
+ "rewards/chosen": -0.24481210112571716,
232
+ "rewards/margins": 0.11037830263376236,
233
+ "rewards/rejected": -0.3551904261112213,
234
+ "step": 70
235
+ },
236
+ {
237
+ "epoch": 1.552076677316294,
238
+ "grad_norm": 75.13140869140625,
239
+ "learning_rate": 3.8411088009602646e-05,
240
+ "log_odds_chosen": 0.7180580496788025,
241
+ "log_odds_ratio": -0.49540311098098755,
242
+ "logps/chosen": -0.9218428730964661,
243
+ "logps/rejected": -1.3873802423477173,
244
+ "loss": 38.359,
245
+ "nll_loss": 1.0657460689544678,
246
+ "rewards/accuracies": 0.793749988079071,
247
+ "rewards/chosen": -0.23046071827411652,
248
+ "rewards/margins": 0.11638432741165161,
249
+ "rewards/rejected": -0.3468450605869293,
250
+ "step": 75
251
+ },
252
+ {
253
+ "epoch": 1.6543130990415336,
254
+ "grad_norm": 76.11426544189453,
255
+ "learning_rate": 3.6431058082615964e-05,
256
+ "log_odds_chosen": 0.6318256258964539,
257
+ "log_odds_ratio": -0.5325360298156738,
258
+ "logps/chosen": -0.922771155834198,
259
+ "logps/rejected": -1.320297360420227,
260
+ "loss": 38.8799,
261
+ "nll_loss": 1.0614503622055054,
262
+ "rewards/accuracies": 0.7437499761581421,
263
+ "rewards/chosen": -0.2306927889585495,
264
+ "rewards/margins": 0.09938153624534607,
265
+ "rewards/rejected": -0.33007434010505676,
266
+ "step": 80
267
+ },
268
+ {
269
+ "epoch": 1.7565495207667732,
270
+ "grad_norm": 87.10155487060547,
271
+ "learning_rate": 3.435575568570633e-05,
272
+ "log_odds_chosen": 0.7393198013305664,
273
+ "log_odds_ratio": -0.5058027505874634,
274
+ "logps/chosen": -0.8932069540023804,
275
+ "logps/rejected": -1.3613816499710083,
276
+ "loss": 38.9466,
277
+ "nll_loss": 1.0541436672210693,
278
+ "rewards/accuracies": 0.765625,
279
+ "rewards/chosen": -0.2233017385005951,
280
+ "rewards/margins": 0.117043636739254,
281
+ "rewards/rejected": -0.3403454124927521,
282
+ "step": 85
283
+ },
284
+ {
285
+ "epoch": 1.8587859424920128,
286
+ "grad_norm": 116.91316223144531,
287
+ "learning_rate": 3.220247748413094e-05,
288
+ "log_odds_chosen": 0.7619136571884155,
289
+ "log_odds_ratio": -0.5002596974372864,
290
+ "logps/chosen": -0.9479422569274902,
291
+ "logps/rejected": -1.4273831844329834,
292
+ "loss": 38.4031,
293
+ "nll_loss": 1.0556285381317139,
294
+ "rewards/accuracies": 0.778124988079071,
295
+ "rewards/chosen": -0.23698556423187256,
296
+ "rewards/margins": 0.1198602169752121,
297
+ "rewards/rejected": -0.35684579610824585,
298
+ "step": 90
299
+ },
300
+ {
301
+ "epoch": 1.9610223642172524,
302
+ "grad_norm": 103.88111877441406,
303
+ "learning_rate": 2.9989170034648823e-05,
304
+ "log_odds_chosen": 0.6734960079193115,
305
+ "log_odds_ratio": -0.5330161452293396,
306
+ "logps/chosen": -0.8996769189834595,
307
+ "logps/rejected": -1.3381158113479614,
308
+ "loss": 38.0074,
309
+ "nll_loss": 1.03584623336792,
310
+ "rewards/accuracies": 0.7718750238418579,
311
+ "rewards/chosen": -0.22491922974586487,
312
+ "rewards/margins": 0.1096097081899643,
313
+ "rewards/rejected": -0.33452895283699036,
314
+ "step": 95
315
+ },
316
+ {
317
+ "epoch": 2.081789137380192,
318
+ "grad_norm": 93.64875030517578,
319
+ "learning_rate": 2.7734280209446865e-05,
320
+ "log_odds_chosen": 1.0058292150497437,
321
+ "log_odds_ratio": -0.42482826113700867,
322
+ "logps/chosen": -0.7912481427192688,
323
+ "logps/rejected": -1.4134339094161987,
324
+ "loss": 39.7125,
325
+ "nll_loss": 0.9234015345573425,
326
+ "rewards/accuracies": 0.8602150678634644,
327
+ "rewards/chosen": -0.1978120356798172,
328
+ "rewards/margins": 0.15554644167423248,
329
+ "rewards/rejected": -0.3533584773540497,
330
+ "step": 100
331
+ },
332
+ {
333
+ "epoch": 2.1840255591054314,
334
+ "grad_norm": 54.99567794799805,
335
+ "learning_rate": 2.5456601450173125e-05,
336
+ "log_odds_chosen": 1.354565978050232,
337
+ "log_odds_ratio": -0.32823148369789124,
338
+ "logps/chosen": -0.7432836890220642,
339
+ "logps/rejected": -1.5525643825531006,
340
+ "loss": 31.9032,
341
+ "nll_loss": 0.8803617358207703,
342
+ "rewards/accuracies": 0.903124988079071,
343
+ "rewards/chosen": -0.18582092225551605,
344
+ "rewards/margins": 0.2023201882839203,
345
+ "rewards/rejected": -0.38814109563827515,
346
+ "step": 105
347
+ },
348
+ {
349
+ "epoch": 2.286261980830671,
350
+ "grad_norm": 79.57364654541016,
351
+ "learning_rate": 2.3175117133477313e-05,
352
+ "log_odds_chosen": 1.357545018196106,
353
+ "log_odds_ratio": -0.339216947555542,
354
+ "logps/chosen": -0.7088838815689087,
355
+ "logps/rejected": -1.5366592407226562,
356
+ "loss": 31.0042,
357
+ "nll_loss": 0.8524471521377563,
358
+ "rewards/accuracies": 0.909375011920929,
359
+ "rewards/chosen": -0.17722097039222717,
360
+ "rewards/margins": 0.2069438397884369,
361
+ "rewards/rejected": -0.38416481018066406,
362
+ "step": 110
363
+ },
364
+ {
365
+ "epoch": 2.3884984025559106,
366
+ "grad_norm": 69.78687286376953,
367
+ "learning_rate": 2.0908842353532803e-05,
368
+ "log_odds_chosen": 1.4747217893600464,
369
+ "log_odds_ratio": -0.3158169388771057,
370
+ "logps/chosen": -0.6909688711166382,
371
+ "logps/rejected": -1.5954649448394775,
372
+ "loss": 30.9753,
373
+ "nll_loss": 0.8266602754592896,
374
+ "rewards/accuracies": 0.9125000238418579,
375
+ "rewards/chosen": -0.17274221777915955,
376
+ "rewards/margins": 0.22612404823303223,
377
+ "rewards/rejected": -0.3988662362098694,
378
+ "step": 115
379
+ },
380
+ {
381
+ "epoch": 2.4907348242811502,
382
+ "grad_norm": 42.07189178466797,
383
+ "learning_rate": 1.867666544020798e-05,
384
+ "log_odds_chosen": 1.5207545757293701,
385
+ "log_odds_ratio": -0.30957141518592834,
386
+ "logps/chosen": -0.7246582508087158,
387
+ "logps/rejected": -1.694667100906372,
388
+ "loss": 31.3816,
389
+ "nll_loss": 0.8578312993049622,
390
+ "rewards/accuracies": 0.90625,
391
+ "rewards/chosen": -0.18116456270217896,
392
+ "rewards/margins": 0.24250221252441406,
393
+ "rewards/rejected": -0.423666775226593,
394
+ "step": 120
395
+ },
396
+ {
397
+ "epoch": 2.59297124600639,
398
+ "grad_norm": 79.26730346679688,
399
+ "learning_rate": 1.6497190533758348e-05,
400
+ "log_odds_chosen": 1.571445107460022,
401
+ "log_odds_ratio": -0.2881600260734558,
402
+ "logps/chosen": -0.7087723016738892,
403
+ "logps/rejected": -1.6907808780670166,
404
+ "loss": 32.1674,
405
+ "nll_loss": 0.8415018320083618,
406
+ "rewards/accuracies": 0.9281250238418579,
407
+ "rewards/chosen": -0.1771930754184723,
408
+ "rewards/margins": 0.24550211429595947,
409
+ "rewards/rejected": -0.42269521951675415,
410
+ "step": 125
411
+ },
412
+ {
413
+ "epoch": 2.6952076677316295,
414
+ "grad_norm": 48.99087142944336,
415
+ "learning_rate": 1.4388582528104628e-05,
416
+ "log_odds_chosen": 1.4256895780563354,
417
+ "log_odds_ratio": -0.3456164300441742,
418
+ "logps/chosen": -0.6957945823669434,
419
+ "logps/rejected": -1.5854990482330322,
420
+ "loss": 30.7822,
421
+ "nll_loss": 0.8388240933418274,
422
+ "rewards/accuracies": 0.878125011920929,
423
+ "rewards/chosen": -0.17394864559173584,
424
+ "rewards/margins": 0.22242608666419983,
425
+ "rewards/rejected": -0.39637476205825806,
426
+ "step": 130
427
+ },
428
+ {
429
+ "epoch": 2.797444089456869,
430
+ "grad_norm": 45.743141174316406,
431
+ "learning_rate": 1.2368415675021768e-05,
432
+ "log_odds_chosen": 1.6803510189056396,
433
+ "log_odds_ratio": -0.29377657175064087,
434
+ "logps/chosen": -0.6751910448074341,
435
+ "logps/rejected": -1.7030166387557983,
436
+ "loss": 29.4657,
437
+ "nll_loss": 0.8158040046691895,
438
+ "rewards/accuracies": 0.9125000238418579,
439
+ "rewards/chosen": -0.16879776120185852,
440
+ "rewards/margins": 0.2569563686847687,
441
+ "rewards/rejected": -0.4257541596889496,
442
+ "step": 135
443
+ },
444
+ {
445
+ "epoch": 2.8996805111821087,
446
+ "grad_norm": 45.214969635009766,
447
+ "learning_rate": 1.0453527111051184e-05,
448
+ "log_odds_chosen": 1.5122963190078735,
449
+ "log_odds_ratio": -0.2940905690193176,
450
+ "logps/chosen": -0.6969622373580933,
451
+ "logps/rejected": -1.6159965991973877,
452
+ "loss": 31.0349,
453
+ "nll_loss": 0.8314476013183594,
454
+ "rewards/accuracies": 0.934374988079071,
455
+ "rewards/chosen": -0.17424055933952332,
456
+ "rewards/margins": 0.22975853085517883,
457
+ "rewards/rejected": -0.4039991497993469,
458
+ "step": 140
459
+ },
460
+ {
461
+ "epoch": 3.020447284345048,
462
+ "grad_norm": 74.37804412841797,
463
+ "learning_rate": 8.659876527920277e-06,
464
+ "log_odds_chosen": 1.7624249458312988,
465
+ "log_odds_ratio": -0.2721032202243805,
466
+ "logps/chosen": -0.6561669707298279,
467
+ "logps/rejected": -1.740464687347412,
468
+ "loss": 34.4684,
469
+ "nll_loss": 0.7908505797386169,
470
+ "rewards/accuracies": 0.9408602118492126,
471
+ "rewards/chosen": -0.16404174268245697,
472
+ "rewards/margins": 0.27107441425323486,
473
+ "rewards/rejected": -0.435116171836853,
474
+ "step": 145
475
+ },
476
+ {
477
+ "epoch": 3.1226837060702874,
478
+ "grad_norm": 45.229400634765625,
479
+ "learning_rate": 7.002413156050108e-06,
480
+ "log_odds_chosen": 2.416283130645752,
481
+ "log_odds_ratio": -0.1788024604320526,
482
+ "logps/chosen": -0.5219112634658813,
483
+ "logps/rejected": -1.9957574605941772,
484
+ "loss": 27.9227,
485
+ "nll_loss": 0.6778407692909241,
486
+ "rewards/accuracies": 0.9781249761581421,
487
+ "rewards/chosen": -0.13047781586647034,
488
+ "rewards/margins": 0.3684615194797516,
489
+ "rewards/rejected": -0.4989393651485443,
490
+ "step": 150
491
+ },
492
+ {
493
+ "epoch": 3.224920127795527,
494
+ "grad_norm": 57.19989776611328,
495
+ "learning_rate": 5.4949511697807765e-06,
496
+ "log_odds_chosen": 2.6506989002227783,
497
+ "log_odds_ratio": -0.15020069479942322,
498
+ "logps/chosen": -0.5697158575057983,
499
+ "logps/rejected": -2.3274993896484375,
500
+ "loss": 27.0184,
501
+ "nll_loss": 0.6888783574104309,
502
+ "rewards/accuracies": 0.96875,
503
+ "rewards/chosen": -0.14242896437644958,
504
+ "rewards/margins": 0.439445823431015,
505
+ "rewards/rejected": -0.5818748474121094,
506
+ "step": 155
507
+ },
508
+ {
509
+ "epoch": 3.3271565495207667,
510
+ "grad_norm": 59.800697326660156,
511
+ "learning_rate": 4.150054552753055e-06,
512
+ "log_odds_chosen": 2.5286622047424316,
513
+ "log_odds_ratio": -0.19968460500240326,
514
+ "logps/chosen": -0.5568191409111023,
515
+ "logps/rejected": -2.1876730918884277,
516
+ "loss": 27.8054,
517
+ "nll_loss": 0.6883557438850403,
518
+ "rewards/accuracies": 0.9468749761581421,
519
+ "rewards/chosen": -0.13920478522777557,
520
+ "rewards/margins": 0.40771350264549255,
521
+ "rewards/rejected": -0.5469182729721069,
522
+ "step": 160
523
+ },
524
+ {
525
+ "epoch": 3.4293929712460063,
526
+ "grad_norm": 54.28583908081055,
527
+ "learning_rate": 2.978932383039093e-06,
528
+ "log_odds_chosen": 2.5673203468322754,
529
+ "log_odds_ratio": -0.18180711567401886,
530
+ "logps/chosen": -0.5963090062141418,
531
+ "logps/rejected": -2.295097589492798,
532
+ "loss": 26.8584,
533
+ "nll_loss": 0.737380862236023,
534
+ "rewards/accuracies": 0.9624999761581421,
535
+ "rewards/chosen": -0.14907725155353546,
536
+ "rewards/margins": 0.4246971607208252,
537
+ "rewards/rejected": -0.5737743973731995,
538
+ "step": 165
539
+ },
540
+ {
541
+ "epoch": 3.531629392971246,
542
+ "grad_norm": 55.28144836425781,
543
+ "learning_rate": 1.9913454107710173e-06,
544
+ "log_odds_chosen": 2.435741901397705,
545
+ "log_odds_ratio": -0.20390479266643524,
546
+ "logps/chosen": -0.5668953061103821,
547
+ "logps/rejected": -2.1278016567230225,
548
+ "loss": 27.9555,
549
+ "nll_loss": 0.6951500177383423,
550
+ "rewards/accuracies": 0.9375,
551
+ "rewards/chosen": -0.14172382652759552,
552
+ "rewards/margins": 0.3902266025543213,
553
+ "rewards/rejected": -0.5319504141807556,
554
+ "step": 170
555
+ },
556
+ {
557
+ "epoch": 3.6338658146964855,
558
+ "grad_norm": 46.94998550415039,
559
+ "learning_rate": 1.1955247068988261e-06,
560
+ "log_odds_chosen": 2.725480556488037,
561
+ "log_odds_ratio": -0.15135148167610168,
562
+ "logps/chosen": -0.5273882150650024,
563
+ "logps/rejected": -2.2832539081573486,
564
+ "loss": 28.734,
565
+ "nll_loss": 0.6737874746322632,
566
+ "rewards/accuracies": 0.965624988079071,
567
+ "rewards/chosen": -0.1318470537662506,
568
+ "rewards/margins": 0.43896645307540894,
569
+ "rewards/rejected": -0.5708134770393372,
570
+ "step": 175
571
+ },
572
+ {
573
+ "epoch": 3.736102236421725,
574
+ "grad_norm": 66.62850189208984,
575
+ "learning_rate": 5.981030611018234e-07,
576
+ "log_odds_chosen": 2.5903310775756836,
577
+ "log_odds_ratio": -0.15854457020759583,
578
+ "logps/chosen": -0.5564672946929932,
579
+ "logps/rejected": -2.2155492305755615,
580
+ "loss": 25.0194,
581
+ "nll_loss": 0.684872031211853,
582
+ "rewards/accuracies": 0.9781249761581421,
583
+ "rewards/chosen": -0.1391168236732483,
584
+ "rewards/margins": 0.4147705137729645,
585
+ "rewards/rejected": -0.5538873076438904,
586
+ "step": 180
587
+ },
588
+ {
589
+ "epoch": 3.8383386581469647,
590
+ "grad_norm": 50.985652923583984,
591
+ "learning_rate": 2.0405970061943002e-07,
592
+ "log_odds_chosen": 2.7051525115966797,
593
+ "log_odds_ratio": -0.16894596815109253,
594
+ "logps/chosen": -0.543402910232544,
595
+ "logps/rejected": -2.2726523876190186,
596
+ "loss": 30.0762,
597
+ "nll_loss": 0.6836854219436646,
598
+ "rewards/accuracies": 0.9624999761581421,
599
+ "rewards/chosen": -0.135850727558136,
600
+ "rewards/margins": 0.43231239914894104,
601
+ "rewards/rejected": -0.5681630969047546,
602
+ "step": 185
603
+ },
604
+ {
605
+ "epoch": 3.9405750798722043,
606
+ "grad_norm": 50.59651565551758,
607
+ "learning_rate": 1.6678790744015238e-08,
608
+ "log_odds_chosen": 2.563751697540283,
609
+ "log_odds_ratio": -0.16612394154071808,
610
+ "logps/chosen": -0.5317026972770691,
611
+ "logps/rejected": -2.1680614948272705,
612
+ "loss": 25.6433,
613
+ "nll_loss": 0.6562982797622681,
614
+ "rewards/accuracies": 0.965624988079071,
615
+ "rewards/chosen": -0.13292567431926727,
616
+ "rewards/margins": 0.4090896546840668,
617
+ "rewards/rejected": -0.5420153737068176,
618
+ "step": 190
619
+ },
620
+ {
621
+ "epoch": 3.9814696485623005,
622
+ "step": 192,
623
+ "total_flos": 0.0,
624
+ "train_loss": 62.26987236738205,
625
+ "train_runtime": 5592.1515,
626
+ "train_samples_per_second": 2.239,
627
+ "train_steps_per_second": 0.034
628
+ }
629
+ ],
630
+ "logging_steps": 5,
631
+ "max_steps": 192,
632
+ "num_input_tokens_seen": 0,
633
+ "num_train_epochs": 4,
634
+ "save_steps": 100,
635
+ "stateful_callbacks": {
636
+ "TrainerControl": {
637
+ "args": {
638
+ "should_epoch_stop": false,
639
+ "should_evaluate": false,
640
+ "should_log": false,
641
+ "should_save": true,
642
+ "should_training_stop": true
643
+ },
644
+ "attributes": {}
645
+ }
646
+ },
647
+ "total_flos": 0.0,
648
+ "train_batch_size": 1,
649
+ "trial_name": null,
650
+ "trial_params": null
651
+ }