Incomple commited on
Commit
cb043d9
·
verified ·
1 Parent(s): f7514aa

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: peft
3
  license: llama3.1
4
  base_model: meta-llama/Llama-3.1-8B-Instruct
5
  tags:
 
 
6
  - trl
7
  - dpo
8
- - llama-factory
9
  - generated_from_trainer
10
  model-index:
11
  - name: Llama-3.1-8B-Instruct_holistic_40
@@ -17,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # Llama-3.1-8B-Instruct_holistic_40
19
 
20
- This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on an unknown dataset.
21
 
22
  ## Model description
23
 
 
3
  license: llama3.1
4
  base_model: meta-llama/Llama-3.1-8B-Instruct
5
  tags:
6
+ - llama-factory
7
+ - lora
8
  - trl
9
  - dpo
 
10
  - generated_from_trainer
11
  model-index:
12
  - name: Llama-3.1-8B-Instruct_holistic_40
 
18
 
19
  # Llama-3.1-8B-Instruct_holistic_40
20
 
21
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the holistic_40 dataset.
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9999015651146766,
3
+ "total_flos": 3.074560994106409e+18,
4
+ "train_loss": 0.08538520530960554,
5
+ "train_runtime": 75713.4377,
6
+ "train_samples_per_second": 0.537,
7
+ "train_steps_per_second": 0.067
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9999015651146766,
3
+ "total_flos": 3.074560994106409e+18,
4
+ "train_loss": 0.08538520530960554,
5
+ "train_runtime": 75713.4377,
6
+ "train_samples_per_second": 0.537,
7
+ "train_steps_per_second": 0.067
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9999015651146766,
5
+ "eval_steps": 500,
6
+ "global_step": 5079,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05000492174426617,
13
+ "grad_norm": 5.337889194488525,
14
+ "learning_rate": 5e-07,
15
+ "logits/chosen": -0.5339647531509399,
16
+ "logits/rejected": -0.47227513790130615,
17
+ "logps/chosen": -71.58138275146484,
18
+ "logps/rejected": -13.533102989196777,
19
+ "loss": 0.6865,
20
+ "rewards/accuracies": 0.5871062874794006,
21
+ "rewards/chosen": 0.012079809792339802,
22
+ "rewards/margins": 0.013962473720312119,
23
+ "rewards/rejected": -0.001882663695141673,
24
+ "step": 254
25
+ },
26
+ {
27
+ "epoch": 0.10000984348853234,
28
+ "grad_norm": 5.7681660652160645,
29
+ "learning_rate": 1e-06,
30
+ "logits/chosen": -0.524512529373169,
31
+ "logits/rejected": -0.470420241355896,
32
+ "logps/chosen": -68.49125671386719,
33
+ "logps/rejected": -14.996256828308105,
34
+ "loss": 0.5103,
35
+ "rewards/accuracies": 0.8449802994728088,
36
+ "rewards/chosen": 0.4529332220554352,
37
+ "rewards/margins": 0.5830409526824951,
38
+ "rewards/rejected": -0.13010773062705994,
39
+ "step": 508
40
+ },
41
+ {
42
+ "epoch": 0.1500147652327985,
43
+ "grad_norm": 1.7311837673187256,
44
+ "learning_rate": 9.44432290527237e-07,
45
+ "logits/chosen": -0.5406456589698792,
46
+ "logits/rejected": -0.49012085795402527,
47
+ "logps/chosen": -61.494964599609375,
48
+ "logps/rejected": -28.769521713256836,
49
+ "loss": 0.1423,
50
+ "rewards/accuracies": 0.9788385629653931,
51
+ "rewards/chosen": 1.3764965534210205,
52
+ "rewards/margins": 2.9073357582092285,
53
+ "rewards/rejected": -1.5308390855789185,
54
+ "step": 762
55
+ },
56
+ {
57
+ "epoch": 0.20001968697706468,
58
+ "grad_norm": 4.213954925537109,
59
+ "learning_rate": 8.888645810544738e-07,
60
+ "logits/chosen": -0.5104743838310242,
61
+ "logits/rejected": -0.43841081857681274,
62
+ "logps/chosen": -56.98832702636719,
63
+ "logps/rejected": -44.329437255859375,
64
+ "loss": 0.0364,
65
+ "rewards/accuracies": 0.9872047305107117,
66
+ "rewards/chosen": 1.6219738721847534,
67
+ "rewards/margins": 4.794076919555664,
68
+ "rewards/rejected": -3.1721031665802,
69
+ "step": 1016
70
+ },
71
+ {
72
+ "epoch": 0.25002460872133087,
73
+ "grad_norm": 0.14951969683170319,
74
+ "learning_rate": 8.332968715817108e-07,
75
+ "logits/chosen": -0.4608861804008484,
76
+ "logits/rejected": -0.3742350935935974,
77
+ "logps/chosen": -55.1967887878418,
78
+ "logps/rejected": -51.54916000366211,
79
+ "loss": 0.0258,
80
+ "rewards/accuracies": 0.9886810779571533,
81
+ "rewards/chosen": 1.6978679895401,
82
+ "rewards/margins": 5.571296691894531,
83
+ "rewards/rejected": -3.8734288215637207,
84
+ "step": 1270
85
+ },
86
+ {
87
+ "epoch": 0.300029530465597,
88
+ "grad_norm": 0.11377181112766266,
89
+ "learning_rate": 7.777291621089477e-07,
90
+ "logits/chosen": -0.4032284915447235,
91
+ "logits/rejected": -0.2789752185344696,
92
+ "logps/chosen": -57.66849899291992,
93
+ "logps/rejected": -57.404354095458984,
94
+ "loss": 0.0181,
95
+ "rewards/accuracies": 0.9936023354530334,
96
+ "rewards/chosen": 1.815442681312561,
97
+ "rewards/margins": 6.287877559661865,
98
+ "rewards/rejected": -4.472434043884277,
99
+ "step": 1524
100
+ },
101
+ {
102
+ "epoch": 0.3500344522098632,
103
+ "grad_norm": 61.85912322998047,
104
+ "learning_rate": 7.221614526361847e-07,
105
+ "logits/chosen": -0.39197683334350586,
106
+ "logits/rejected": -0.2711484432220459,
107
+ "logps/chosen": -56.51285934448242,
108
+ "logps/rejected": -66.6546630859375,
109
+ "loss": 0.0243,
110
+ "rewards/accuracies": 0.9906495809555054,
111
+ "rewards/chosen": 1.6953144073486328,
112
+ "rewards/margins": 6.998918533325195,
113
+ "rewards/rejected": -5.3036041259765625,
114
+ "step": 1778
115
+ },
116
+ {
117
+ "epoch": 0.40003937395412936,
118
+ "grad_norm": 0.09933885931968689,
119
+ "learning_rate": 6.665937431634215e-07,
120
+ "logits/chosen": -0.35676872730255127,
121
+ "logits/rejected": -0.21091562509536743,
122
+ "logps/chosen": -58.933048248291016,
123
+ "logps/rejected": -72.55093383789062,
124
+ "loss": 0.022,
125
+ "rewards/accuracies": 0.9906495809555054,
126
+ "rewards/chosen": 1.709058165550232,
127
+ "rewards/margins": 7.6529622077941895,
128
+ "rewards/rejected": -5.943903923034668,
129
+ "step": 2032
130
+ },
131
+ {
132
+ "epoch": 0.4500442956983955,
133
+ "grad_norm": 0.3078814446926117,
134
+ "learning_rate": 6.110260336906585e-07,
135
+ "logits/chosen": -0.3692930340766907,
136
+ "logits/rejected": -0.21181651949882507,
137
+ "logps/chosen": -56.33852767944336,
138
+ "logps/rejected": -78.77378845214844,
139
+ "loss": 0.0193,
140
+ "rewards/accuracies": 0.9916338324546814,
141
+ "rewards/chosen": 1.583296775817871,
142
+ "rewards/margins": 8.18080997467041,
143
+ "rewards/rejected": -6.597513675689697,
144
+ "step": 2286
145
+ },
146
+ {
147
+ "epoch": 0.5000492174426617,
148
+ "grad_norm": 0.026140812784433365,
149
+ "learning_rate": 5.554583242178954e-07,
150
+ "logits/chosen": -0.36246979236602783,
151
+ "logits/rejected": -0.21610520780086517,
152
+ "logps/chosen": -56.37531280517578,
153
+ "logps/rejected": -81.92805480957031,
154
+ "loss": 0.0298,
155
+ "rewards/accuracies": 0.9886810779571533,
156
+ "rewards/chosen": 1.608428955078125,
157
+ "rewards/margins": 8.545919418334961,
158
+ "rewards/rejected": -6.9374895095825195,
159
+ "step": 2540
160
+ },
161
+ {
162
+ "epoch": 0.5500541391869278,
163
+ "grad_norm": 0.0485980287194252,
164
+ "learning_rate": 4.998906147451324e-07,
165
+ "logits/chosen": -0.35004737973213196,
166
+ "logits/rejected": -0.1877668797969818,
167
+ "logps/chosen": -56.29869079589844,
168
+ "logps/rejected": -84.21609497070312,
169
+ "loss": 0.0229,
170
+ "rewards/accuracies": 0.9901574850082397,
171
+ "rewards/chosen": 1.6171692609786987,
172
+ "rewards/margins": 8.883115768432617,
173
+ "rewards/rejected": -7.265947341918945,
174
+ "step": 2794
175
+ },
176
+ {
177
+ "epoch": 0.600059060931194,
178
+ "grad_norm": 0.13149231672286987,
179
+ "learning_rate": 4.4432290527236927e-07,
180
+ "logits/chosen": -0.3316061198711395,
181
+ "logits/rejected": -0.17913725972175598,
182
+ "logps/chosen": -57.6456413269043,
183
+ "logps/rejected": -89.23247528076172,
184
+ "loss": 0.0144,
185
+ "rewards/accuracies": 0.9960629940032959,
186
+ "rewards/chosen": 1.622791051864624,
187
+ "rewards/margins": 9.201993942260742,
188
+ "rewards/rejected": -7.579202651977539,
189
+ "step": 3048
190
+ },
191
+ {
192
+ "epoch": 0.6500639826754602,
193
+ "grad_norm": 0.03225807845592499,
194
+ "learning_rate": 3.887551957996062e-07,
195
+ "logits/chosen": -0.3303147554397583,
196
+ "logits/rejected": -0.16745421290397644,
197
+ "logps/chosen": -57.618045806884766,
198
+ "logps/rejected": -90.39539337158203,
199
+ "loss": 0.0165,
200
+ "rewards/accuracies": 0.9921259880065918,
201
+ "rewards/chosen": 1.5876142978668213,
202
+ "rewards/margins": 9.376455307006836,
203
+ "rewards/rejected": -7.788840293884277,
204
+ "step": 3302
205
+ },
206
+ {
207
+ "epoch": 0.7000689044197264,
208
+ "grad_norm": 0.024763241410255432,
209
+ "learning_rate": 3.3318748632684314e-07,
210
+ "logits/chosen": -0.3256986141204834,
211
+ "logits/rejected": -0.1574079841375351,
212
+ "logps/chosen": -59.10237121582031,
213
+ "logps/rejected": -92.18179321289062,
214
+ "loss": 0.0203,
215
+ "rewards/accuracies": 0.9901574850082397,
216
+ "rewards/chosen": 1.5978204011917114,
217
+ "rewards/margins": 9.474949836730957,
218
+ "rewards/rejected": -7.877129554748535,
219
+ "step": 3556
220
+ },
221
+ {
222
+ "epoch": 0.7500738261639925,
223
+ "grad_norm": 0.18685077130794525,
224
+ "learning_rate": 2.7761977685408005e-07,
225
+ "logits/chosen": -0.31320706009864807,
226
+ "logits/rejected": -0.14784303307533264,
227
+ "logps/chosen": -58.15943908691406,
228
+ "logps/rejected": -92.56378936767578,
229
+ "loss": 0.0265,
230
+ "rewards/accuracies": 0.9906495809555054,
231
+ "rewards/chosen": 1.5292613506317139,
232
+ "rewards/margins": 9.509092330932617,
233
+ "rewards/rejected": -7.979831218719482,
234
+ "step": 3810
235
+ },
236
+ {
237
+ "epoch": 0.8000787479082587,
238
+ "grad_norm": 0.01488853245973587,
239
+ "learning_rate": 2.22052067381317e-07,
240
+ "logits/chosen": -0.3247720003128052,
241
+ "logits/rejected": -0.15560078620910645,
242
+ "logps/chosen": -56.86127471923828,
243
+ "logps/rejected": -93.76302337646484,
244
+ "loss": 0.0189,
245
+ "rewards/accuracies": 0.9931102395057678,
246
+ "rewards/chosen": 1.586428165435791,
247
+ "rewards/margins": 9.719764709472656,
248
+ "rewards/rejected": -8.133337020874023,
249
+ "step": 4064
250
+ },
251
+ {
252
+ "epoch": 0.8500836696525249,
253
+ "grad_norm": 4.113521575927734,
254
+ "learning_rate": 1.6648435790855392e-07,
255
+ "logits/chosen": -0.31977561116218567,
256
+ "logits/rejected": -0.164890855550766,
257
+ "logps/chosen": -56.98260498046875,
258
+ "logps/rejected": -95.06165313720703,
259
+ "loss": 0.0233,
260
+ "rewards/accuracies": 0.9921259880065918,
261
+ "rewards/chosen": 1.4957386255264282,
262
+ "rewards/margins": 9.688507080078125,
263
+ "rewards/rejected": -8.192767143249512,
264
+ "step": 4318
265
+ },
266
+ {
267
+ "epoch": 0.900088591396791,
268
+ "grad_norm": 0.0642678439617157,
269
+ "learning_rate": 1.1091664843579085e-07,
270
+ "logits/chosen": -0.3185438811779022,
271
+ "logits/rejected": -0.1604050248861313,
272
+ "logps/chosen": -57.956459045410156,
273
+ "logps/rejected": -96.31430053710938,
274
+ "loss": 0.0202,
275
+ "rewards/accuracies": 0.9916338324546814,
276
+ "rewards/chosen": 1.4874851703643799,
277
+ "rewards/margins": 9.749979019165039,
278
+ "rewards/rejected": -8.262493133544922,
279
+ "step": 4572
280
+ },
281
+ {
282
+ "epoch": 0.9500935131410572,
283
+ "grad_norm": 0.0038960117381066084,
284
+ "learning_rate": 5.534893896302778e-08,
285
+ "logits/chosen": -0.3140643537044525,
286
+ "logits/rejected": -0.1565851867198944,
287
+ "logps/chosen": -59.727909088134766,
288
+ "logps/rejected": -95.10466003417969,
289
+ "loss": 0.0173,
290
+ "rewards/accuracies": 0.9936023354530334,
291
+ "rewards/chosen": 1.604878544807434,
292
+ "rewards/margins": 9.842850685119629,
293
+ "rewards/rejected": -8.237971305847168,
294
+ "step": 4826
295
+ },
296
+ {
297
+ "epoch": 0.9999015651146766,
298
+ "step": 5079,
299
+ "total_flos": 3.074560994106409e+18,
300
+ "train_loss": 0.08538520530960554,
301
+ "train_runtime": 75713.4377,
302
+ "train_samples_per_second": 0.537,
303
+ "train_steps_per_second": 0.067
304
+ }
305
+ ],
306
+ "logging_steps": 254,
307
+ "max_steps": 5079,
308
+ "num_input_tokens_seen": 0,
309
+ "num_train_epochs": 1,
310
+ "save_steps": 500,
311
+ "stateful_callbacks": {
312
+ "TrainerControl": {
313
+ "args": {
314
+ "should_epoch_stop": false,
315
+ "should_evaluate": false,
316
+ "should_log": false,
317
+ "should_save": true,
318
+ "should_training_stop": true
319
+ },
320
+ "attributes": {}
321
+ }
322
+ },
323
+ "total_flos": 3.074560994106409e+18,
324
+ "train_batch_size": 2,
325
+ "trial_name": null,
326
+ "trial_params": null
327
+ }
training_loss.png ADDED
training_rewards_accuracies.png ADDED