li-muyang commited on
Commit
b7fa9c0
·
verified ·
1 Parent(s): f8f1126

Model save

Browse files
README.md CHANGED
@@ -16,15 +16,15 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.5754
20
- - Rewards/chosen: -0.2750
21
- - Rewards/rejected: -0.7430
22
  - Rewards/accuracies: 0.75
23
- - Rewards/margins: 0.4681
24
- - Logps/rejected: -342.4480
25
- - Logps/chosen: -296.1005
26
- - Logits/rejected: -2.7716
27
- - Logits/chosen: -2.7940
28
 
29
  ## Model description
30
 
@@ -61,7 +61,7 @@ The following hyperparameters were used during training:
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
- | 0.5794 | 0.4982 | 238 | 0.5754 | -0.2750 | -0.7430 | 0.75 | 0.4681 | -342.4480 | -296.1005 | -2.7716 | -2.7940 |
65
 
66
 
67
  ### Framework versions
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.5765
20
+ - Rewards/chosen: -0.2836
21
+ - Rewards/rejected: -0.7661
22
  - Rewards/accuracies: 0.75
23
+ - Rewards/margins: 0.4825
24
+ - Logps/rejected: -342.4585
25
+ - Logps/chosen: -295.9262
26
+ - Logits/rejected: -2.6851
27
+ - Logits/chosen: -2.7117
28
 
29
  ## Model description
30
 
 
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.5798 | 0.4982 | 238 | 0.5765 | -0.2836 | -0.7661 | 0.75 | 0.4825 | -342.4585 | -295.9262 | -2.6851 | -2.7117 |
65
 
66
 
67
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.4981684981684982,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6078168283991453,
5
- "train_runtime": 7171.6132,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 4.248,
8
  "train_steps_per_second": 0.033
9
  }
 
1
  {
2
  "epoch": 0.4981684981684982,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6047679316095945,
5
+ "train_runtime": 7201.8834,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 4.23,
8
  "train_steps_per_second": 0.033
9
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "data/sft/zephyr-7b-sft-1e-every25/checkpoint-800",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "data/sft/zephyr-7b-sft-1e-every25/checkpoint-900",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ab4c3dd2c0d3fd4f7fcfff31c89a77d2f74b0fc576d9fe7599f27bd8b2a4b4b
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d93eb6300b877466ea2f4e521cf819b0ef638a9ea09ad57832ad62ea8917b3f
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:222400a563ac3e0a92272027354d4791e76e7c0d11fa4bb7048704eab69b9332
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09174fa607bcca81805e06045dda478fdf2a5dc93bfb0a52c322bf4fa96de23a
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6966cc8a2dce33eff6646f5951b4ceaa5645114baa7ef5127ff52edcfc21f1c0
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81fa5d1748aec6d9e1442511af340a774590249d11e7d8f0b2e7d2f38c73ea44
3
  size 4540516344
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.4981684981684982,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6078168283991453,
5
- "train_runtime": 7171.6132,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 4.248,
8
  "train_steps_per_second": 0.033
9
  }
 
1
  {
2
  "epoch": 0.4981684981684982,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6047679316095945,
5
+ "train_runtime": 7201.8834,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 4.23,
8
  "train_steps_per_second": 0.033
9
  }
trainer_state.json CHANGED
@@ -10,12 +10,12 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0020931449502878076,
13
- "grad_norm": 12.806140486914385,
14
  "learning_rate": 2.083333333333333e-08,
15
- "logits/chosen": -2.975465774536133,
16
- "logits/rejected": -2.9397153854370117,
17
- "logps/chosen": -317.1812744140625,
18
- "logps/rejected": -362.2382507324219,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -25,372 +25,372 @@
25
  },
26
  {
27
  "epoch": 0.020931449502878074,
28
- "grad_norm": 10.350232636069011,
29
  "learning_rate": 2.0833333333333333e-07,
30
- "logits/chosen": -2.766873836517334,
31
- "logits/rejected": -2.7434821128845215,
32
- "logps/chosen": -315.6714782714844,
33
- "logps/rejected": -285.2418518066406,
34
  "loss": 0.693,
35
- "rewards/accuracies": 0.4652777910232544,
36
- "rewards/chosen": 0.0004917462356388569,
37
- "rewards/margins": 0.00031292904168367386,
38
- "rewards/rejected": 0.00017881726671475917,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.04186289900575615,
43
- "grad_norm": 12.821109233939524,
44
  "learning_rate": 4.1666666666666667e-07,
45
- "logits/chosen": -2.844773530960083,
46
- "logits/rejected": -2.794069766998291,
47
- "logps/chosen": -312.2287902832031,
48
- "logps/rejected": -285.95184326171875,
49
- "loss": 0.6913,
50
- "rewards/accuracies": 0.5687500238418579,
51
- "rewards/chosen": 0.012318294495344162,
52
- "rewards/margins": 0.005521962884813547,
53
- "rewards/rejected": 0.0067963311448693275,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.06279434850863422,
58
- "grad_norm": 7.795531413975491,
59
  "learning_rate": 4.99030821197584e-07,
60
- "logits/chosen": -2.828461170196533,
61
- "logits/rejected": -2.771340847015381,
62
- "logps/chosen": -291.7936706542969,
63
- "logps/rejected": -249.0801544189453,
64
- "loss": 0.6845,
65
- "rewards/accuracies": 0.6312500238418579,
66
- "rewards/chosen": 0.05275644734501839,
67
- "rewards/margins": 0.02750842645764351,
68
- "rewards/rejected": 0.02524801716208458,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.0837257980115123,
73
- "grad_norm": 7.729964276089227,
74
  "learning_rate": 4.931352528237397e-07,
75
- "logits/chosen": -2.7441623210906982,
76
- "logits/rejected": -2.7178843021392822,
77
- "logps/chosen": -270.9248962402344,
78
- "logps/rejected": -264.02130126953125,
79
- "loss": 0.6709,
80
- "rewards/accuracies": 0.668749988079071,
81
- "rewards/chosen": 0.047667648643255234,
82
- "rewards/margins": 0.04670105502009392,
83
- "rewards/rejected": 0.0009665900724940002,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.10465724751439037,
88
- "grad_norm": 11.62937518271555,
89
  "learning_rate": 4.820092227512735e-07,
90
- "logits/chosen": -2.778378963470459,
91
- "logits/rejected": -2.7200958728790283,
92
- "logps/chosen": -285.19012451171875,
93
- "logps/rejected": -288.2344665527344,
94
- "loss": 0.6545,
95
  "rewards/accuracies": 0.71875,
96
- "rewards/chosen": -0.017544183880090714,
97
- "rewards/margins": 0.08298598229885101,
98
- "rewards/rejected": -0.10053016990423203,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.12558869701726844,
103
- "grad_norm": 13.56070671513526,
104
  "learning_rate": 4.658920803689553e-07,
105
- "logits/chosen": -2.8251986503601074,
106
- "logits/rejected": -2.7807350158691406,
107
- "logps/chosen": -258.30609130859375,
108
- "logps/rejected": -263.3247985839844,
109
- "loss": 0.639,
110
- "rewards/accuracies": 0.71875,
111
- "rewards/chosen": -0.0033275161404162645,
112
- "rewards/margins": 0.15021316707134247,
113
- "rewards/rejected": -0.15354067087173462,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.14652014652014653,
118
- "grad_norm": 10.218106790158542,
119
  "learning_rate": 4.4513054666826144e-07,
120
- "logits/chosen": -2.790792942047119,
121
- "logits/rejected": -2.7651076316833496,
122
- "logps/chosen": -312.69342041015625,
123
- "logps/rejected": -314.4419860839844,
124
- "loss": 0.6185,
125
- "rewards/accuracies": 0.6937500238418579,
126
- "rewards/chosen": -0.11530323326587677,
127
- "rewards/margins": 0.186301589012146,
128
- "rewards/rejected": -0.3016048073768616,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.1674515960230246,
133
- "grad_norm": 17.77112056066791,
134
  "learning_rate": 4.201712553872657e-07,
135
- "logits/chosen": -2.862198829650879,
136
- "logits/rejected": -2.790428400039673,
137
- "logps/chosen": -347.1138610839844,
138
- "logps/rejected": -294.9046325683594,
139
- "loss": 0.6071,
140
  "rewards/accuracies": 0.7562500238418579,
141
- "rewards/chosen": -0.060042936354875565,
142
- "rewards/margins": 0.2697007358074188,
143
- "rewards/rejected": -0.3297436833381653,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.18838304552590268,
148
- "grad_norm": 13.034563565082575,
149
  "learning_rate": 3.9155114477557926e-07,
150
- "logits/chosen": -2.8054041862487793,
151
- "logits/rejected": -2.764477252960205,
152
- "logps/chosen": -276.40655517578125,
153
- "logps/rejected": -307.20379638671875,
154
- "loss": 0.5917,
155
- "rewards/accuracies": 0.6812499761581421,
156
- "rewards/chosen": -0.19227294623851776,
157
- "rewards/margins": 0.30155879259109497,
158
- "rewards/rejected": -0.49383172392845154,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.20931449502878074,
163
- "grad_norm": 16.1083959917105,
164
  "learning_rate": 3.598859066780754e-07,
165
- "logits/chosen": -2.7885613441467285,
166
- "logits/rejected": -2.766613483428955,
167
- "logps/chosen": -324.69219970703125,
168
- "logps/rejected": -338.00518798828125,
169
- "loss": 0.5974,
170
- "rewards/accuracies": 0.762499988079071,
171
- "rewards/chosen": -0.1883513629436493,
172
- "rewards/margins": 0.3904525637626648,
173
- "rewards/rejected": -0.5788038969039917,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.2302459445316588,
178
- "grad_norm": 22.332005589786586,
179
  "learning_rate": 3.2585674142717477e-07,
180
- "logits/chosen": -2.8293983936309814,
181
- "logits/rejected": -2.7860958576202393,
182
- "logps/chosen": -325.58624267578125,
183
- "logps/rejected": -338.5185546875,
184
- "loss": 0.6035,
185
- "rewards/accuracies": 0.6312500238418579,
186
- "rewards/chosen": -0.29208147525787354,
187
- "rewards/margins": 0.30902299284935,
188
- "rewards/rejected": -0.6011044383049011,
189
  "step": 110
190
  },
191
  {
192
  "epoch": 0.25117739403453687,
193
- "grad_norm": 13.280894933092178,
194
  "learning_rate": 2.9019570347986706e-07,
195
- "logits/chosen": -2.745093822479248,
196
- "logits/rejected": -2.7524163722991943,
197
- "logps/chosen": -314.64752197265625,
198
- "logps/rejected": -340.332275390625,
199
- "loss": 0.5889,
200
- "rewards/accuracies": 0.731249988079071,
201
- "rewards/chosen": -0.3470572233200073,
202
- "rewards/margins": 0.3631521761417389,
203
- "rewards/rejected": -0.7102094292640686,
204
  "step": 120
205
  },
206
  {
207
  "epoch": 0.272108843537415,
208
- "grad_norm": 13.497407162201402,
209
  "learning_rate": 2.536699530523291e-07,
210
- "logits/chosen": -2.8007476329803467,
211
- "logits/rejected": -2.786205768585205,
212
- "logps/chosen": -319.64202880859375,
213
- "logps/rejected": -330.56878662109375,
214
- "loss": 0.5715,
215
- "rewards/accuracies": 0.6937500238418579,
216
- "rewards/chosen": -0.23361213505268097,
217
- "rewards/margins": 0.30114299058914185,
218
- "rewards/rejected": -0.5347551107406616,
219
  "step": 130
220
  },
221
  {
222
  "epoch": 0.29304029304029305,
223
- "grad_norm": 20.933507562985636,
224
  "learning_rate": 2.1706525253979534e-07,
225
- "logits/chosen": -2.835508108139038,
226
- "logits/rejected": -2.8120312690734863,
227
- "logps/chosen": -345.1457214355469,
228
- "logps/rejected": -339.02618408203125,
229
- "loss": 0.5833,
230
  "rewards/accuracies": 0.7562500238418579,
231
- "rewards/chosen": -0.22297212481498718,
232
- "rewards/margins": 0.44189220666885376,
233
- "rewards/rejected": -0.6648643612861633,
234
  "step": 140
235
  },
236
  {
237
  "epoch": 0.3139717425431711,
238
- "grad_norm": 19.228587426808158,
239
  "learning_rate": 1.8116906275593507e-07,
240
- "logits/chosen": -2.821563720703125,
241
- "logits/rejected": -2.793996572494507,
242
- "logps/chosen": -326.88836669921875,
243
- "logps/rejected": -324.4208984375,
244
- "loss": 0.5803,
245
- "rewards/accuracies": 0.706250011920929,
246
- "rewards/chosen": -0.378085196018219,
247
- "rewards/margins": 0.3385787606239319,
248
- "rewards/rejected": -0.7166639566421509,
249
  "step": 150
250
  },
251
  {
252
  "epoch": 0.3349031920460492,
253
- "grad_norm": 19.174465914045467,
254
  "learning_rate": 1.4675360263490295e-07,
255
- "logits/chosen": -2.8593170642852783,
256
- "logits/rejected": -2.820272922515869,
257
- "logps/chosen": -310.5607604980469,
258
- "logps/rejected": -326.0252990722656,
259
- "loss": 0.582,
260
  "rewards/accuracies": 0.7250000238418579,
261
- "rewards/chosen": -0.3000241219997406,
262
- "rewards/margins": 0.4080016016960144,
263
- "rewards/rejected": -0.7080257534980774,
264
  "step": 160
265
  },
266
  {
267
  "epoch": 0.35583464154892724,
268
- "grad_norm": 18.167727970452024,
269
  "learning_rate": 1.1455923682523475e-07,
270
- "logits/chosen": -2.747973918914795,
271
- "logits/rejected": -2.7372567653656006,
272
- "logps/chosen": -295.9173278808594,
273
- "logps/rejected": -303.07861328125,
274
- "loss": 0.587,
275
- "rewards/accuracies": 0.643750011920929,
276
- "rewards/chosen": -0.3046445846557617,
277
- "rewards/margins": 0.3219013214111328,
278
- "rewards/rejected": -0.6265459060668945,
279
  "step": 170
280
  },
281
  {
282
  "epoch": 0.37676609105180536,
283
- "grad_norm": 14.998685600639057,
284
  "learning_rate": 8.527854855097224e-08,
285
- "logits/chosen": -2.834468364715576,
286
- "logits/rejected": -2.8099303245544434,
287
- "logps/chosen": -307.76336669921875,
288
- "logps/rejected": -314.2436828613281,
289
- "loss": 0.5608,
290
  "rewards/accuracies": 0.706250011920929,
291
- "rewards/chosen": -0.32118621468544006,
292
- "rewards/margins": 0.3334035873413086,
293
- "rewards/rejected": -0.6545897722244263,
294
  "step": 180
295
  },
296
  {
297
  "epoch": 0.3976975405546834,
298
- "grad_norm": 15.220786257352342,
299
  "learning_rate": 5.9541440373546445e-08,
300
- "logits/chosen": -2.776503801345825,
301
- "logits/rejected": -2.7547481060028076,
302
- "logps/chosen": -294.9460754394531,
303
- "logps/rejected": -318.6103820800781,
304
- "loss": 0.5767,
305
- "rewards/accuracies": 0.6499999761581421,
306
- "rewards/chosen": -0.32699090242385864,
307
- "rewards/margins": 0.2969752252101898,
308
- "rewards/rejected": -0.6239660978317261,
309
  "step": 190
310
  },
311
  {
312
  "epoch": 0.4186289900575615,
313
- "grad_norm": 19.32668573129983,
314
  "learning_rate": 3.790158337517127e-08,
315
- "logits/chosen": -2.746075391769409,
316
- "logits/rejected": -2.7452704906463623,
317
- "logps/chosen": -289.711669921875,
318
- "logps/rejected": -340.4427795410156,
319
- "loss": 0.5829,
320
- "rewards/accuracies": 0.7124999761581421,
321
- "rewards/chosen": -0.31456780433654785,
322
- "rewards/margins": 0.3399312496185303,
323
- "rewards/rejected": -0.6544990539550781,
324
  "step": 200
325
  },
326
  {
327
  "epoch": 0.43956043956043955,
328
- "grad_norm": 15.573661203693316,
329
  "learning_rate": 2.0824506276503894e-08,
330
- "logits/chosen": -2.794292688369751,
331
- "logits/rejected": -2.7379298210144043,
332
- "logps/chosen": -354.16455078125,
333
- "logps/rejected": -326.73974609375,
334
- "loss": 0.5882,
335
- "rewards/accuracies": 0.699999988079071,
336
- "rewards/chosen": -0.3771507441997528,
337
- "rewards/margins": 0.36129769682884216,
338
- "rewards/rejected": -0.7384485006332397,
339
  "step": 210
340
  },
341
  {
342
  "epoch": 0.4604918890633176,
343
- "grad_norm": 15.206763088206086,
344
  "learning_rate": 8.677580722139671e-09,
345
- "logits/chosen": -2.721893310546875,
346
- "logits/rejected": -2.735137939453125,
347
- "logps/chosen": -293.5481872558594,
348
- "logps/rejected": -323.8240661621094,
349
- "loss": 0.5737,
350
- "rewards/accuracies": 0.675000011920929,
351
- "rewards/chosen": -0.3610805869102478,
352
- "rewards/margins": 0.3861430883407593,
353
- "rewards/rejected": -0.7472237348556519,
354
  "step": 220
355
  },
356
  {
357
  "epoch": 0.48142333856619574,
358
- "grad_norm": 16.74159983073626,
359
  "learning_rate": 1.722118176089915e-09,
360
- "logits/chosen": -2.821955919265747,
361
- "logits/rejected": -2.7882137298583984,
362
- "logps/chosen": -294.47637939453125,
363
- "logps/rejected": -329.96240234375,
364
- "loss": 0.5794,
365
- "rewards/accuracies": 0.762499988079071,
366
- "rewards/chosen": -0.27040696144104004,
367
- "rewards/margins": 0.5722111463546753,
368
- "rewards/rejected": -0.8426181077957153,
369
  "step": 230
370
  },
371
  {
372
  "epoch": 0.4981684981684982,
373
- "eval_logits/chosen": -2.794038772583008,
374
- "eval_logits/rejected": -2.771556854248047,
375
- "eval_logps/chosen": -296.10052490234375,
376
- "eval_logps/rejected": -342.4480285644531,
377
- "eval_loss": 0.5754034519195557,
378
  "eval_rewards/accuracies": 0.75,
379
- "eval_rewards/chosen": -0.27497145533561707,
380
- "eval_rewards/margins": 0.4680546224117279,
381
- "eval_rewards/rejected": -0.743026077747345,
382
- "eval_runtime": 169.1974,
383
- "eval_samples_per_second": 11.821,
384
- "eval_steps_per_second": 0.189,
385
  "step": 238
386
  },
387
  {
388
  "epoch": 0.4981684981684982,
389
  "step": 238,
390
  "total_flos": 0.0,
391
- "train_loss": 0.6078168283991453,
392
- "train_runtime": 7171.6132,
393
- "train_samples_per_second": 4.248,
394
  "train_steps_per_second": 0.033
395
  }
396
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0020931449502878076,
13
+ "grad_norm": 12.610560260880211,
14
  "learning_rate": 2.083333333333333e-08,
15
+ "logits/chosen": -2.99812388420105,
16
+ "logits/rejected": -2.9638350009918213,
17
+ "logps/chosen": -315.4977722167969,
18
+ "logps/rejected": -361.9093322753906,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.020931449502878074,
28
+ "grad_norm": 9.936649993419687,
29
  "learning_rate": 2.0833333333333333e-07,
30
+ "logits/chosen": -2.788989543914795,
31
+ "logits/rejected": -2.7662315368652344,
32
+ "logps/chosen": -314.0841064453125,
33
+ "logps/rejected": -282.2810974121094,
34
  "loss": 0.693,
35
+ "rewards/accuracies": 0.4722222089767456,
36
+ "rewards/chosen": 0.0008651986136101186,
37
+ "rewards/margins": 0.0003082120092585683,
38
+ "rewards/rejected": 0.0005569865461438894,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.04186289900575615,
43
+ "grad_norm": 8.667356504170359,
44
  "learning_rate": 4.1666666666666667e-07,
45
+ "logits/chosen": -2.8659820556640625,
46
+ "logits/rejected": -2.8171162605285645,
47
+ "logps/chosen": -311.6136779785156,
48
+ "logps/rejected": -284.09893798828125,
49
+ "loss": 0.6909,
50
+ "rewards/accuracies": 0.6000000238418579,
51
+ "rewards/chosen": 0.013467146083712578,
52
+ "rewards/margins": 0.004726298153400421,
53
+ "rewards/rejected": 0.008740848861634731,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.06279434850863422,
58
+ "grad_norm": 8.034528876736365,
59
  "learning_rate": 4.99030821197584e-07,
60
+ "logits/chosen": -2.8421683311462402,
61
+ "logits/rejected": -2.7854294776916504,
62
+ "logps/chosen": -289.38385009765625,
63
+ "logps/rejected": -247.28732299804688,
64
+ "loss": 0.6833,
65
+ "rewards/accuracies": 0.65625,
66
+ "rewards/chosen": 0.05833645910024643,
67
+ "rewards/margins": 0.024186396971344948,
68
+ "rewards/rejected": 0.03415006399154663,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.0837257980115123,
73
+ "grad_norm": 7.833193286645188,
74
  "learning_rate": 4.931352528237397e-07,
75
+ "logits/chosen": -2.7391459941864014,
76
+ "logits/rejected": -2.716399669647217,
77
+ "logps/chosen": -269.075927734375,
78
+ "logps/rejected": -262.9975891113281,
79
+ "loss": 0.6682,
80
+ "rewards/accuracies": 0.6625000238418579,
81
+ "rewards/chosen": 0.052591562271118164,
82
+ "rewards/margins": 0.0480208583176136,
83
+ "rewards/rejected": 0.0045707011595368385,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.10465724751439037,
88
+ "grad_norm": 8.72813708365478,
89
  "learning_rate": 4.820092227512735e-07,
90
+ "logits/chosen": -2.7570505142211914,
91
+ "logits/rejected": -2.696572780609131,
92
+ "logps/chosen": -282.76092529296875,
93
+ "logps/rejected": -286.132568359375,
94
+ "loss": 0.6548,
95
  "rewards/accuracies": 0.71875,
96
+ "rewards/chosen": -0.010979737155139446,
97
+ "rewards/margins": 0.09300607442855835,
98
+ "rewards/rejected": -0.10398580878973007,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.12558869701726844,
103
+ "grad_norm": 13.480613892862241,
104
  "learning_rate": 4.658920803689553e-07,
105
+ "logits/chosen": -2.789520502090454,
106
+ "logits/rejected": -2.7424087524414062,
107
+ "logps/chosen": -258.74908447265625,
108
+ "logps/rejected": -264.25225830078125,
109
+ "loss": 0.6357,
110
+ "rewards/accuracies": 0.737500011920929,
111
+ "rewards/chosen": -0.012783573940396309,
112
+ "rewards/margins": 0.15775156021118164,
113
+ "rewards/rejected": -0.1705351322889328,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.14652014652014653,
118
+ "grad_norm": 10.234102018070638,
119
  "learning_rate": 4.4513054666826144e-07,
120
+ "logits/chosen": -2.7376105785369873,
121
+ "logits/rejected": -2.709526300430298,
122
+ "logps/chosen": -311.8622131347656,
123
+ "logps/rejected": -316.22576904296875,
124
+ "loss": 0.6163,
125
+ "rewards/accuracies": 0.699999988079071,
126
+ "rewards/chosen": -0.11481525003910065,
127
+ "rewards/margins": 0.20760241150856018,
128
+ "rewards/rejected": -0.32241764664649963,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.1674515960230246,
133
+ "grad_norm": 20.552548250652773,
134
  "learning_rate": 4.201712553872657e-07,
135
+ "logits/chosen": -2.816467761993408,
136
+ "logits/rejected": -2.7381579875946045,
137
+ "logps/chosen": -341.7183837890625,
138
+ "logps/rejected": -293.2793884277344,
139
+ "loss": 0.6055,
140
  "rewards/accuracies": 0.7562500238418579,
141
+ "rewards/chosen": -0.03545045107603073,
142
+ "rewards/margins": 0.28459519147872925,
143
+ "rewards/rejected": -0.3200456500053406,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.18838304552590268,
148
+ "grad_norm": 13.720188030623374,
149
  "learning_rate": 3.9155114477557926e-07,
150
+ "logits/chosen": -2.7560811042785645,
151
+ "logits/rejected": -2.7106573581695557,
152
+ "logps/chosen": -275.1942443847656,
153
+ "logps/rejected": -306.68206787109375,
154
+ "loss": 0.5887,
155
+ "rewards/accuracies": 0.6875,
156
+ "rewards/chosen": -0.1888897567987442,
157
+ "rewards/margins": 0.30051952600479126,
158
+ "rewards/rejected": -0.48940929770469666,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.20931449502878074,
163
+ "grad_norm": 16.303248781308948,
164
  "learning_rate": 3.598859066780754e-07,
165
+ "logits/chosen": -2.7324023246765137,
166
+ "logits/rejected": -2.7062618732452393,
167
+ "logps/chosen": -322.99603271484375,
168
+ "logps/rejected": -339.0654602050781,
169
+ "loss": 0.5902,
170
+ "rewards/accuracies": 0.768750011920929,
171
+ "rewards/chosen": -0.19065451622009277,
172
+ "rewards/margins": 0.39800626039505005,
173
+ "rewards/rejected": -0.588660717010498,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.2302459445316588,
178
+ "grad_norm": 20.16983103502839,
179
  "learning_rate": 3.2585674142717477e-07,
180
+ "logits/chosen": -2.7619636058807373,
181
+ "logits/rejected": -2.713339328765869,
182
+ "logps/chosen": -338.515869140625,
183
+ "logps/rejected": -351.4220886230469,
184
+ "loss": 0.5967,
185
+ "rewards/accuracies": 0.6499999761581421,
186
+ "rewards/chosen": -0.4326956868171692,
187
+ "rewards/margins": 0.302705854177475,
188
+ "rewards/rejected": -0.7354015111923218,
189
  "step": 110
190
  },
191
  {
192
  "epoch": 0.25117739403453687,
193
+ "grad_norm": 18.288819495100896,
194
  "learning_rate": 2.9019570347986706e-07,
195
+ "logits/chosen": -2.670757293701172,
196
+ "logits/rejected": -2.6768569946289062,
197
+ "logps/chosen": -309.2828674316406,
198
+ "logps/rejected": -339.7432861328125,
199
+ "loss": 0.579,
200
+ "rewards/accuracies": 0.71875,
201
+ "rewards/chosen": -0.33222970366477966,
202
+ "rewards/margins": 0.40109533071517944,
203
+ "rewards/rejected": -0.7333250045776367,
204
  "step": 120
205
  },
206
  {
207
  "epoch": 0.272108843537415,
208
+ "grad_norm": 15.354064982647847,
209
  "learning_rate": 2.536699530523291e-07,
210
+ "logits/chosen": -2.722832679748535,
211
+ "logits/rejected": -2.7066872119903564,
212
+ "logps/chosen": -318.85833740234375,
213
+ "logps/rejected": -335.77911376953125,
214
+ "loss": 0.5666,
215
+ "rewards/accuracies": 0.6812499761581421,
216
+ "rewards/chosen": -0.2824149429798126,
217
+ "rewards/margins": 0.33272355794906616,
218
+ "rewards/rejected": -0.6151384711265564,
219
  "step": 130
220
  },
221
  {
222
  "epoch": 0.29304029304029305,
223
+ "grad_norm": 17.28336393009731,
224
  "learning_rate": 2.1706525253979534e-07,
225
+ "logits/chosen": -2.7583320140838623,
226
+ "logits/rejected": -2.729793071746826,
227
+ "logps/chosen": -346.2691955566406,
228
+ "logps/rejected": -344.5732116699219,
229
+ "loss": 0.5763,
230
  "rewards/accuracies": 0.7562500238418579,
231
+ "rewards/chosen": -0.24767926335334778,
232
+ "rewards/margins": 0.4790892004966736,
233
+ "rewards/rejected": -0.7267683744430542,
234
  "step": 140
235
  },
236
  {
237
  "epoch": 0.3139717425431711,
238
+ "grad_norm": 19.361596281568826,
239
  "learning_rate": 1.8116906275593507e-07,
240
+ "logits/chosen": -2.7468533515930176,
241
+ "logits/rejected": -2.7160446643829346,
242
+ "logps/chosen": -323.2176208496094,
243
+ "logps/rejected": -324.5989990234375,
244
+ "loss": 0.5796,
245
+ "rewards/accuracies": 0.6875,
246
+ "rewards/chosen": -0.3653566539287567,
247
+ "rewards/margins": 0.3610517382621765,
248
+ "rewards/rejected": -0.7264083623886108,
249
  "step": 150
250
  },
251
  {
252
  "epoch": 0.3349031920460492,
253
+ "grad_norm": 16.313995999292402,
254
  "learning_rate": 1.4675360263490295e-07,
255
+ "logits/chosen": -2.7838118076324463,
256
+ "logits/rejected": -2.7365283966064453,
257
+ "logps/chosen": -319.6917724609375,
258
+ "logps/rejected": -340.581787109375,
259
+ "loss": 0.5759,
260
  "rewards/accuracies": 0.7250000238418579,
261
+ "rewards/chosen": -0.40762004256248474,
262
+ "rewards/margins": 0.4462064802646637,
263
+ "rewards/rejected": -0.8538265228271484,
264
  "step": 160
265
  },
266
  {
267
  "epoch": 0.35583464154892724,
268
+ "grad_norm": 22.925451744364146,
269
  "learning_rate": 1.1455923682523475e-07,
270
+ "logits/chosen": -2.659465789794922,
271
+ "logits/rejected": -2.644275188446045,
272
+ "logps/chosen": -311.614990234375,
273
+ "logps/rejected": -323.3099670410156,
274
+ "loss": 0.585,
275
+ "rewards/accuracies": 0.668749988079071,
276
+ "rewards/chosen": -0.4685409665107727,
277
+ "rewards/margins": 0.36185160279273987,
278
+ "rewards/rejected": -0.8303925395011902,
279
  "step": 170
280
  },
281
  {
282
  "epoch": 0.37676609105180536,
283
+ "grad_norm": 13.975706307085083,
284
  "learning_rate": 8.527854855097224e-08,
285
+ "logits/chosen": -2.7547802925109863,
286
+ "logits/rejected": -2.7258901596069336,
287
+ "logps/chosen": -313.0289001464844,
288
+ "logps/rejected": -321.9787902832031,
289
+ "loss": 0.5538,
290
  "rewards/accuracies": 0.706250011920929,
291
+ "rewards/chosen": -0.37977224588394165,
292
+ "rewards/margins": 0.3600301146507263,
293
+ "rewards/rejected": -0.739802360534668,
294
  "step": 180
295
  },
296
  {
297
  "epoch": 0.3976975405546834,
298
+ "grad_norm": 16.933160186335314,
299
  "learning_rate": 5.9541440373546445e-08,
300
+ "logits/chosen": -2.689134120941162,
301
+ "logits/rejected": -2.664074420928955,
302
+ "logps/chosen": -296.32672119140625,
303
+ "logps/rejected": -319.95001220703125,
304
+ "loss": 0.5755,
305
+ "rewards/accuracies": 0.668749988079071,
306
+ "rewards/chosen": -0.34621429443359375,
307
+ "rewards/margins": 0.2947639226913452,
308
+ "rewards/rejected": -0.6409782767295837,
309
  "step": 190
310
  },
311
  {
312
  "epoch": 0.4186289900575615,
313
+ "grad_norm": 21.908273233954745,
314
  "learning_rate": 3.790158337517127e-08,
315
+ "logits/chosen": -2.653926372528076,
316
+ "logits/rejected": -2.6524300575256348,
317
+ "logps/chosen": -289.92926025390625,
318
+ "logps/rejected": -341.435546875,
319
+ "loss": 0.5823,
320
+ "rewards/accuracies": 0.6937500238418579,
321
+ "rewards/chosen": -0.3269258737564087,
322
+ "rewards/margins": 0.3465423882007599,
323
+ "rewards/rejected": -0.6734683513641357,
324
  "step": 200
325
  },
326
  {
327
  "epoch": 0.43956043956043955,
328
+ "grad_norm": 18.994509414379117,
329
  "learning_rate": 2.0824506276503894e-08,
330
+ "logits/chosen": -2.7132248878479004,
331
+ "logits/rejected": -2.648906946182251,
332
+ "logps/chosen": -352.23211669921875,
333
+ "logps/rejected": -327.8628234863281,
334
+ "loss": 0.5851,
335
+ "rewards/accuracies": 0.6937500238418579,
336
+ "rewards/chosen": -0.38004761934280396,
337
+ "rewards/margins": 0.3670490086078644,
338
+ "rewards/rejected": -0.7470966577529907,
339
  "step": 210
340
  },
341
  {
342
  "epoch": 0.4604918890633176,
343
+ "grad_norm": 16.241884556359377,
344
  "learning_rate": 8.677580722139671e-09,
345
+ "logits/chosen": -2.635542154312134,
346
+ "logits/rejected": -2.6469016075134277,
347
+ "logps/chosen": -288.9513244628906,
348
+ "logps/rejected": -323.00494384765625,
349
+ "loss": 0.573,
350
+ "rewards/accuracies": 0.6937500238418579,
351
+ "rewards/chosen": -0.3638390898704529,
352
+ "rewards/margins": 0.3917424976825714,
353
+ "rewards/rejected": -0.7555815577507019,
354
  "step": 220
355
  },
356
  {
357
  "epoch": 0.48142333856619574,
358
+ "grad_norm": 22.899672406649472,
359
  "learning_rate": 1.722118176089915e-09,
360
+ "logits/chosen": -2.742950439453125,
361
+ "logits/rejected": -2.702911376953125,
362
+ "logps/chosen": -292.0196533203125,
363
+ "logps/rejected": -330.7127380371094,
364
+ "loss": 0.5798,
365
+ "rewards/accuracies": 0.800000011920929,
366
+ "rewards/chosen": -0.25979113578796387,
367
+ "rewards/margins": 0.5986371040344238,
368
+ "rewards/rejected": -0.8584282994270325,
369
  "step": 230
370
  },
371
  {
372
  "epoch": 0.4981684981684982,
373
+ "eval_logits/chosen": -2.711683750152588,
374
+ "eval_logits/rejected": -2.68514084815979,
375
+ "eval_logps/chosen": -295.9261779785156,
376
+ "eval_logps/rejected": -342.4585266113281,
377
+ "eval_loss": 0.576471209526062,
378
  "eval_rewards/accuracies": 0.75,
379
+ "eval_rewards/chosen": -0.283584326505661,
380
+ "eval_rewards/margins": 0.48247623443603516,
381
+ "eval_rewards/rejected": -0.7660605311393738,
382
+ "eval_runtime": 168.4238,
383
+ "eval_samples_per_second": 11.875,
384
+ "eval_steps_per_second": 0.19,
385
  "step": 238
386
  },
387
  {
388
  "epoch": 0.4981684981684982,
389
  "step": 238,
390
  "total_flos": 0.0,
391
+ "train_loss": 0.6047679316095945,
392
+ "train_runtime": 7201.8834,
393
+ "train_samples_per_second": 4.23,
394
  "train_steps_per_second": 0.033
395
  }
396
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99dcad4a7ce8ebd62f827204bb8667baa90a4ae760788ca43c162ee9caab450d
3
  size 7672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13885dc2cfddf0eb7080d5bad9f264e574a65e3f8468c00bc6a65382edb01768
3
  size 7672