chloeli commited on
Commit
3efc39f
·
verified ·
1 Parent(s): b44b7e6

Model save

Browse files
Files changed (4) hide show
  1. README.md +58 -0
  2. all_results.json +8 -0
  3. train_results.json +8 -0
  4. trainer_state.json +475 -0
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-7B-Instruct
3
+ library_name: transformers
4
+ model_name: qwen-2.5-7b-instruct-sft-qlora-countdown-search-1k
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for qwen-2.5-7b-instruct-sft-qlora-countdown-search-1k
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="chloeli/qwen-2.5-7b-instruct-sft-qlora-countdown-search-1k", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chloeli/huggingface/runs/bvttf6gb)
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.15.2
38
+ - Transformers: 4.49.0
39
+ - Pytorch: 2.6.0
40
+ - Datasets: 3.3.2
41
+ - Tokenizers: 0.21.0
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 8.539757562822656e+16,
3
+ "train_loss": 0.07947162318229675,
4
+ "train_runtime": 16205.2206,
5
+ "train_samples": 1000,
6
+ "train_samples_per_second": 0.062,
7
+ "train_steps_per_second": 0.008
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 8.539757562822656e+16,
3
+ "train_loss": 0.07947162318229675,
4
+ "train_runtime": 16205.2206,
5
+ "train_samples": 1000,
6
+ "train_samples_per_second": 0.062,
7
+ "train_steps_per_second": 0.008
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 5,
6
+ "global_step": 125,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.008,
13
+ "grad_norm": 0.2523050010204315,
14
+ "learning_rate": 1.5384615384615387e-05,
15
+ "loss": 0.4523,
16
+ "mean_token_accuracy": 0.9231184720993042,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.04,
21
+ "grad_norm": 0.24141842126846313,
22
+ "learning_rate": 7.692307692307693e-05,
23
+ "loss": 0.4396,
24
+ "mean_token_accuracy": 0.9300202205777168,
25
+ "step": 5
26
+ },
27
+ {
28
+ "epoch": 0.04,
29
+ "eval_loss": 0.38802939653396606,
30
+ "eval_mean_token_accuracy": 0.931274386882782,
31
+ "eval_runtime": 572.2225,
32
+ "eval_samples_per_second": 1.748,
33
+ "eval_steps_per_second": 0.218,
34
+ "step": 5
35
+ },
36
+ {
37
+ "epoch": 0.08,
38
+ "grad_norm": 0.16969740390777588,
39
+ "learning_rate": 0.00015384615384615385,
40
+ "loss": 0.3426,
41
+ "mean_token_accuracy": 0.93111452460289,
42
+ "step": 10
43
+ },
44
+ {
45
+ "epoch": 0.08,
46
+ "eval_loss": 0.24519848823547363,
47
+ "eval_mean_token_accuracy": 0.943490716934204,
48
+ "eval_runtime": 572.2716,
49
+ "eval_samples_per_second": 1.747,
50
+ "eval_steps_per_second": 0.218,
51
+ "step": 10
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "grad_norm": 0.15904344618320465,
56
+ "learning_rate": 0.00019984268150178167,
57
+ "loss": 0.1922,
58
+ "mean_token_accuracy": 0.9507227122783661,
59
+ "step": 15
60
+ },
61
+ {
62
+ "epoch": 0.12,
63
+ "eval_loss": 0.12690386176109314,
64
+ "eval_mean_token_accuracy": 0.962551230430603,
65
+ "eval_runtime": 572.2269,
66
+ "eval_samples_per_second": 1.748,
67
+ "eval_steps_per_second": 0.218,
68
+ "step": 15
69
+ },
70
+ {
71
+ "epoch": 0.16,
72
+ "grad_norm": 0.12561501562595367,
73
+ "learning_rate": 0.00019807852804032305,
74
+ "loss": 0.094,
75
+ "mean_token_accuracy": 0.9698278903961182,
76
+ "step": 20
77
+ },
78
+ {
79
+ "epoch": 0.16,
80
+ "eval_loss": 0.07218066602945328,
81
+ "eval_mean_token_accuracy": 0.9754553689956665,
82
+ "eval_runtime": 572.3734,
83
+ "eval_samples_per_second": 1.747,
84
+ "eval_steps_per_second": 0.218,
85
+ "step": 20
86
+ },
87
+ {
88
+ "epoch": 0.2,
89
+ "grad_norm": 0.09696544706821442,
90
+ "learning_rate": 0.00019438833303083678,
91
+ "loss": 0.0578,
92
+ "mean_token_accuracy": 0.9804269909858704,
93
+ "step": 25
94
+ },
95
+ {
96
+ "epoch": 0.2,
97
+ "eval_loss": 0.06172483041882515,
98
+ "eval_mean_token_accuracy": 0.9775349216461182,
99
+ "eval_runtime": 571.3615,
100
+ "eval_samples_per_second": 1.75,
101
+ "eval_steps_per_second": 0.219,
102
+ "step": 25
103
+ },
104
+ {
105
+ "epoch": 0.24,
106
+ "grad_norm": 0.07436518371105194,
107
+ "learning_rate": 0.00018884456359788724,
108
+ "loss": 0.0474,
109
+ "mean_token_accuracy": 0.9817552506923676,
110
+ "step": 30
111
+ },
112
+ {
113
+ "epoch": 0.24,
114
+ "eval_loss": 0.051653869450092316,
115
+ "eval_mean_token_accuracy": 0.97948202419281,
116
+ "eval_runtime": 572.6759,
117
+ "eval_samples_per_second": 1.746,
118
+ "eval_steps_per_second": 0.218,
119
+ "step": 30
120
+ },
121
+ {
122
+ "epoch": 0.28,
123
+ "grad_norm": 0.058379314839839935,
124
+ "learning_rate": 0.00018155608689592604,
125
+ "loss": 0.0473,
126
+ "mean_token_accuracy": 0.9801956593990326,
127
+ "step": 35
128
+ },
129
+ {
130
+ "epoch": 0.28,
131
+ "eval_loss": 0.04774034023284912,
132
+ "eval_mean_token_accuracy": 0.9803278684616089,
133
+ "eval_runtime": 572.9678,
134
+ "eval_samples_per_second": 1.745,
135
+ "eval_steps_per_second": 0.218,
136
+ "step": 35
137
+ },
138
+ {
139
+ "epoch": 0.32,
140
+ "grad_norm": 0.05066407099366188,
141
+ "learning_rate": 0.0001726660322034027,
142
+ "loss": 0.0471,
143
+ "mean_token_accuracy": 0.9803119540214539,
144
+ "step": 40
145
+ },
146
+ {
147
+ "epoch": 0.32,
148
+ "eval_loss": 0.04579387605190277,
149
+ "eval_mean_token_accuracy": 0.980647020816803,
150
+ "eval_runtime": 572.7685,
151
+ "eval_samples_per_second": 1.746,
152
+ "eval_steps_per_second": 0.218,
153
+ "step": 40
154
+ },
155
+ {
156
+ "epoch": 0.36,
157
+ "grad_norm": 0.05025569722056389,
158
+ "learning_rate": 0.00016234898018587337,
159
+ "loss": 0.0423,
160
+ "mean_token_accuracy": 0.9822601974010468,
161
+ "step": 45
162
+ },
163
+ {
164
+ "epoch": 0.36,
165
+ "eval_loss": 0.04449079558253288,
166
+ "eval_mean_token_accuracy": 0.981070707321167,
167
+ "eval_runtime": 572.4781,
168
+ "eval_samples_per_second": 1.747,
169
+ "eval_steps_per_second": 0.218,
170
+ "step": 45
171
+ },
172
+ {
173
+ "epoch": 0.4,
174
+ "grad_norm": 0.03607247769832611,
175
+ "learning_rate": 0.00015080753452465296,
176
+ "loss": 0.0465,
177
+ "mean_token_accuracy": 0.9798099577426911,
178
+ "step": 50
179
+ },
180
+ {
181
+ "epoch": 0.4,
182
+ "eval_loss": 0.04439844563603401,
183
+ "eval_mean_token_accuracy": 0.9811892700195313,
184
+ "eval_runtime": 572.6901,
185
+ "eval_samples_per_second": 1.746,
186
+ "eval_steps_per_second": 0.218,
187
+ "step": 50
188
+ },
189
+ {
190
+ "epoch": 0.44,
191
+ "grad_norm": 0.037045057862997055,
192
+ "learning_rate": 0.000138268343236509,
193
+ "loss": 0.0431,
194
+ "mean_token_accuracy": 0.9816874027252197,
195
+ "step": 55
196
+ },
197
+ {
198
+ "epoch": 0.44,
199
+ "eval_loss": 0.04384024068713188,
200
+ "eval_mean_token_accuracy": 0.9812552394866944,
201
+ "eval_runtime": 572.4389,
202
+ "eval_samples_per_second": 1.747,
203
+ "eval_steps_per_second": 0.218,
204
+ "step": 55
205
+ },
206
+ {
207
+ "epoch": 0.48,
208
+ "grad_norm": 0.03762717917561531,
209
+ "learning_rate": 0.0001249776478167227,
210
+ "loss": 0.0437,
211
+ "mean_token_accuracy": 0.9811290085315705,
212
+ "step": 60
213
+ },
214
+ {
215
+ "epoch": 0.48,
216
+ "eval_loss": 0.04316685348749161,
217
+ "eval_mean_token_accuracy": 0.9816457834243775,
218
+ "eval_runtime": 572.5081,
219
+ "eval_samples_per_second": 1.747,
220
+ "eval_steps_per_second": 0.218,
221
+ "step": 60
222
+ },
223
+ {
224
+ "epoch": 0.52,
225
+ "grad_norm": 0.02974274381995201,
226
+ "learning_rate": 0.00011119644761033078,
227
+ "loss": 0.0412,
228
+ "mean_token_accuracy": 0.9832498252391815,
229
+ "step": 65
230
+ },
231
+ {
232
+ "epoch": 0.52,
233
+ "eval_loss": 0.04281109571456909,
234
+ "eval_mean_token_accuracy": 0.9816416425704956,
235
+ "eval_runtime": 572.3877,
236
+ "eval_samples_per_second": 1.747,
237
+ "eval_steps_per_second": 0.218,
238
+ "step": 65
239
+ },
240
+ {
241
+ "epoch": 0.56,
242
+ "grad_norm": 0.03527842089533806,
243
+ "learning_rate": 9.719537437241312e-05,
244
+ "loss": 0.0422,
245
+ "mean_token_accuracy": 0.9818739414215087,
246
+ "step": 70
247
+ },
248
+ {
249
+ "epoch": 0.56,
250
+ "eval_loss": 0.042156100273132324,
251
+ "eval_mean_token_accuracy": 0.9818385190963745,
252
+ "eval_runtime": 572.8793,
253
+ "eval_samples_per_second": 1.746,
254
+ "eval_steps_per_second": 0.218,
255
+ "step": 70
256
+ },
257
+ {
258
+ "epoch": 0.6,
259
+ "grad_norm": 0.03159947320818901,
260
+ "learning_rate": 8.324937766952638e-05,
261
+ "loss": 0.0415,
262
+ "mean_token_accuracy": 0.9812052190303803,
263
+ "step": 75
264
+ },
265
+ {
266
+ "epoch": 0.6,
267
+ "eval_loss": 0.04201458767056465,
268
+ "eval_mean_token_accuracy": 0.9818556265830993,
269
+ "eval_runtime": 572.8981,
270
+ "eval_samples_per_second": 1.746,
271
+ "eval_steps_per_second": 0.218,
272
+ "step": 75
273
+ },
274
+ {
275
+ "epoch": 0.64,
276
+ "grad_norm": 0.05335124209523201,
277
+ "learning_rate": 6.963232548903853e-05,
278
+ "loss": 0.0434,
279
+ "mean_token_accuracy": 0.981024295091629,
280
+ "step": 80
281
+ },
282
+ {
283
+ "epoch": 0.64,
284
+ "eval_loss": 0.0419701524078846,
285
+ "eval_mean_token_accuracy": 0.9820275835990906,
286
+ "eval_runtime": 572.5542,
287
+ "eval_samples_per_second": 1.747,
288
+ "eval_steps_per_second": 0.218,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 0.68,
293
+ "grad_norm": 0.03178861737251282,
294
+ "learning_rate": 5.6611626088244194e-05,
295
+ "loss": 0.0446,
296
+ "mean_token_accuracy": 0.9806674957275391,
297
+ "step": 85
298
+ },
299
+ {
300
+ "epoch": 0.68,
301
+ "eval_loss": 0.04141935706138611,
302
+ "eval_mean_token_accuracy": 0.9820602812767029,
303
+ "eval_runtime": 571.1387,
304
+ "eval_samples_per_second": 1.751,
305
+ "eval_steps_per_second": 0.219,
306
+ "step": 85
307
+ },
308
+ {
309
+ "epoch": 0.72,
310
+ "grad_norm": 0.040076956152915955,
311
+ "learning_rate": 4.444297669803981e-05,
312
+ "loss": 0.04,
313
+ "mean_token_accuracy": 0.9825624287128448,
314
+ "step": 90
315
+ },
316
+ {
317
+ "epoch": 0.72,
318
+ "eval_loss": 0.04136450216174126,
319
+ "eval_mean_token_accuracy": 0.9821350569725037,
320
+ "eval_runtime": 571.1079,
321
+ "eval_samples_per_second": 1.751,
322
+ "eval_steps_per_second": 0.219,
323
+ "step": 90
324
+ },
325
+ {
326
+ "epoch": 0.76,
327
+ "grad_norm": 0.037734489887952805,
328
+ "learning_rate": 3.336534220479961e-05,
329
+ "loss": 0.0443,
330
+ "mean_token_accuracy": 0.9814210355281829,
331
+ "step": 95
332
+ },
333
+ {
334
+ "epoch": 0.76,
335
+ "eval_loss": 0.041187919676303864,
336
+ "eval_mean_token_accuracy": 0.9821804766654968,
337
+ "eval_runtime": 572.5641,
338
+ "eval_samples_per_second": 1.747,
339
+ "eval_steps_per_second": 0.218,
340
+ "step": 95
341
+ },
342
+ {
343
+ "epoch": 0.8,
344
+ "grad_norm": 0.04483392462134361,
345
+ "learning_rate": 2.3596262417839255e-05,
346
+ "loss": 0.0394,
347
+ "mean_token_accuracy": 0.9827538132667542,
348
+ "step": 100
349
+ },
350
+ {
351
+ "epoch": 0.8,
352
+ "eval_loss": 0.041005875915288925,
353
+ "eval_mean_token_accuracy": 0.9823568396568298,
354
+ "eval_runtime": 572.6853,
355
+ "eval_samples_per_second": 1.746,
356
+ "eval_steps_per_second": 0.218,
357
+ "step": 100
358
+ },
359
+ {
360
+ "epoch": 0.84,
361
+ "grad_norm": 0.035200804471969604,
362
+ "learning_rate": 1.5327580077171587e-05,
363
+ "loss": 0.0406,
364
+ "mean_token_accuracy": 0.9826211035251617,
365
+ "step": 105
366
+ },
367
+ {
368
+ "epoch": 0.84,
369
+ "eval_loss": 0.040836114436388016,
370
+ "eval_mean_token_accuracy": 0.9824077115058899,
371
+ "eval_runtime": 572.7135,
372
+ "eval_samples_per_second": 1.746,
373
+ "eval_steps_per_second": 0.218,
374
+ "step": 105
375
+ },
376
+ {
377
+ "epoch": 0.88,
378
+ "grad_norm": 0.033276792615652084,
379
+ "learning_rate": 8.72167349386811e-06,
380
+ "loss": 0.0418,
381
+ "mean_token_accuracy": 0.9821678340435028,
382
+ "step": 110
383
+ },
384
+ {
385
+ "epoch": 0.88,
386
+ "eval_loss": 0.040718287229537964,
387
+ "eval_mean_token_accuracy": 0.9824112377166748,
388
+ "eval_runtime": 571.9537,
389
+ "eval_samples_per_second": 1.748,
390
+ "eval_steps_per_second": 0.219,
391
+ "step": 110
392
+ },
393
+ {
394
+ "epoch": 0.92,
395
+ "grad_norm": 0.03913981840014458,
396
+ "learning_rate": 3.908267805490051e-06,
397
+ "loss": 0.0413,
398
+ "mean_token_accuracy": 0.9817866742610931,
399
+ "step": 115
400
+ },
401
+ {
402
+ "epoch": 0.92,
403
+ "eval_loss": 0.040665969252586365,
404
+ "eval_mean_token_accuracy": 0.9824027805328369,
405
+ "eval_runtime": 572.0692,
406
+ "eval_samples_per_second": 1.748,
407
+ "eval_steps_per_second": 0.219,
408
+ "step": 115
409
+ },
410
+ {
411
+ "epoch": 0.96,
412
+ "grad_norm": 0.053303398191928864,
413
+ "learning_rate": 9.818874663554357e-07,
414
+ "loss": 0.0391,
415
+ "mean_token_accuracy": 0.9823383331298828,
416
+ "step": 120
417
+ },
418
+ {
419
+ "epoch": 0.96,
420
+ "eval_loss": 0.04064611718058586,
421
+ "eval_mean_token_accuracy": 0.9824066462516785,
422
+ "eval_runtime": 571.9701,
423
+ "eval_samples_per_second": 1.748,
424
+ "eval_steps_per_second": 0.219,
425
+ "step": 120
426
+ },
427
+ {
428
+ "epoch": 1.0,
429
+ "grad_norm": 0.031917620450258255,
430
+ "learning_rate": 0.0,
431
+ "loss": 0.0414,
432
+ "mean_token_accuracy": 0.9825141310691834,
433
+ "step": 125
434
+ },
435
+ {
436
+ "epoch": 1.0,
437
+ "eval_loss": 0.04065406322479248,
438
+ "eval_mean_token_accuracy": 0.9823896341323852,
439
+ "eval_runtime": 571.6781,
440
+ "eval_samples_per_second": 1.749,
441
+ "eval_steps_per_second": 0.219,
442
+ "step": 125
443
+ },
444
+ {
445
+ "epoch": 1.0,
446
+ "step": 125,
447
+ "total_flos": 8.539757562822656e+16,
448
+ "train_loss": 0.07947162318229675,
449
+ "train_runtime": 16205.2206,
450
+ "train_samples_per_second": 0.062,
451
+ "train_steps_per_second": 0.008
452
+ }
453
+ ],
454
+ "logging_steps": 5,
455
+ "max_steps": 125,
456
+ "num_input_tokens_seen": 0,
457
+ "num_train_epochs": 1,
458
+ "save_steps": 100,
459
+ "stateful_callbacks": {
460
+ "TrainerControl": {
461
+ "args": {
462
+ "should_epoch_stop": false,
463
+ "should_evaluate": false,
464
+ "should_log": false,
465
+ "should_save": true,
466
+ "should_training_stop": true
467
+ },
468
+ "attributes": {}
469
+ }
470
+ },
471
+ "total_flos": 8.539757562822656e+16,
472
+ "train_batch_size": 4,
473
+ "trial_name": null,
474
+ "trial_params": null
475
+ }