datasaur-dev commited on
Commit
e0aa77e
·
verified ·
1 Parent(s): 4a46c66

End of training

Browse files
Files changed (5) hide show
  1. README.md +13 -13
  2. all_results.json +18 -0
  3. eval_results.json +12 -0
  4. train_results.json +9 -0
  5. trainer_state.json +496 -0
README.md CHANGED
@@ -5,7 +5,7 @@ base_model: bert-base-uncased
5
  tags:
6
  - generated_from_trainer
7
  datasets:
8
- - datasaur-mt_fi_zj_uw_m2_q-zw_ji_zd_rm_ngi
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +18,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: datasaur-mt_fi_zj_uw_m2_q-zw_ji_zd_rm_ngi
22
- type: datasaur-mt_fi_zj_uw_m2_q-zw_ji_zd_rm_ngi
23
  config: default
24
  split: validation
25
  args: default
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.7681159420289855
30
  - name: Recall
31
  type: recall
32
- value: 0.828125
33
  - name: F1
34
  type: f1
35
- value: 0.7969924812030075
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.9696686887540831
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +43,13 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # autotrain-radesky-lab-span-v1
45
 
46
- This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the datasaur-mt_fi_zj_uw_m2_q-zw_ji_zd_rm_ngi dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.2650
49
- - Precision: 0.7681
50
- - Recall: 0.8281
51
- - F1: 0.7970
52
- - Accuracy: 0.9697
53
 
54
  ## Model description
55
 
 
5
  tags:
6
  - generated_from_trainer
7
  datasets:
8
+ - datasaur-dev/datasaur-MTFiZjUwM2Q-ZWJiZDRmNGI
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: datasaur-dev/datasaur-MTFiZjUwM2Q-ZWJiZDRmNGI
22
+ type: datasaur-dev/datasaur-MTFiZjUwM2Q-ZWJiZDRmNGI
23
  config: default
24
  split: validation
25
  args: default
26
  metrics:
27
  - name: Precision
28
  type: precision
29
+ value: 0.7853658536585366
30
  - name: Recall
31
  type: recall
32
+ value: 0.8385416666666666
33
  - name: F1
34
  type: f1
35
+ value: 0.8110831234256927
36
  - name: Accuracy
37
  type: accuracy
38
+ value: 0.9709519365375642
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
43
 
44
  # autotrain-radesky-lab-span-v1
45
 
46
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the datasaur-dev/datasaur-MTFiZjUwM2Q-ZWJiZDRmNGI dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.2518
49
+ - Precision: 0.7854
50
+ - Recall: 0.8385
51
+ - F1: 0.8111
52
+ - Accuracy: 0.9710
53
 
54
  ## Model description
55
 
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.9709519365375642,
4
+ "eval_f1": 0.8110831234256927,
5
+ "eval_loss": 0.2518477439880371,
6
+ "eval_precision": 0.7853658536585366,
7
+ "eval_recall": 0.8385416666666666,
8
+ "eval_runtime": 3.2853,
9
+ "eval_samples": 909,
10
+ "eval_samples_per_second": 276.687,
11
+ "eval_steps_per_second": 34.7,
12
+ "total_flos": 2472027466589526.0,
13
+ "train_loss": 0.03293397539264553,
14
+ "train_runtime": 1492.2693,
15
+ "train_samples": 3635,
16
+ "train_samples_per_second": 60.897,
17
+ "train_steps_per_second": 7.623
18
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.9709519365375642,
4
+ "eval_f1": 0.8110831234256927,
5
+ "eval_loss": 0.2518477439880371,
6
+ "eval_precision": 0.7853658536585366,
7
+ "eval_recall": 0.8385416666666666,
8
+ "eval_runtime": 3.2853,
9
+ "eval_samples": 909,
10
+ "eval_samples_per_second": 276.687,
11
+ "eval_steps_per_second": 34.7
12
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "total_flos": 2472027466589526.0,
4
+ "train_loss": 0.03293397539264553,
5
+ "train_runtime": 1492.2693,
6
+ "train_samples": 3635,
7
+ "train_samples_per_second": 60.897,
8
+ "train_steps_per_second": 7.623
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8110831234256927,
3
+ "best_model_checkpoint": "checkpoint-v1/checkpoint-10465",
4
+ "epoch": 25.0,
5
+ "eval_steps": 500,
6
+ "global_step": 11375,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.9440037330844611,
14
+ "eval_f1": 0.48913043478260865,
15
+ "eval_loss": 0.25645163655281067,
16
+ "eval_precision": 0.5113636363636364,
17
+ "eval_recall": 0.46875,
18
+ "eval_runtime": 3.4617,
19
+ "eval_samples_per_second": 262.585,
20
+ "eval_steps_per_second": 32.931,
21
+ "step": 455
22
+ },
23
+ {
24
+ "epoch": 1.098901098901099,
25
+ "grad_norm": 3.66561222076416,
26
+ "learning_rate": 9.560439560439561e-05,
27
+ "loss": 0.2875,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.9238217452169856,
33
+ "eval_f1": 0.29100529100529104,
34
+ "eval_loss": 0.3291769027709961,
35
+ "eval_precision": 0.2956989247311828,
36
+ "eval_recall": 0.2864583333333333,
37
+ "eval_runtime": 3.3496,
38
+ "eval_samples_per_second": 271.38,
39
+ "eval_steps_per_second": 34.034,
40
+ "step": 910
41
+ },
42
+ {
43
+ "epoch": 2.197802197802198,
44
+ "grad_norm": 4.647622585296631,
45
+ "learning_rate": 9.12087912087912e-05,
46
+ "loss": 0.1173,
47
+ "step": 1000
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "eval_accuracy": 0.9531031264582361,
52
+ "eval_f1": 0.5489443378119002,
53
+ "eval_loss": 0.19305621087551117,
54
+ "eval_precision": 0.43465045592705165,
55
+ "eval_recall": 0.7447916666666666,
56
+ "eval_runtime": 3.3449,
57
+ "eval_samples_per_second": 271.756,
58
+ "eval_steps_per_second": 34.082,
59
+ "step": 1365
60
+ },
61
+ {
62
+ "epoch": 3.2967032967032965,
63
+ "grad_norm": 0.18239367008209229,
64
+ "learning_rate": 8.681318681318682e-05,
65
+ "loss": 0.0945,
66
+ "step": 1500
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "eval_accuracy": 0.957769482034531,
71
+ "eval_f1": 0.603448275862069,
72
+ "eval_loss": 0.1780281662940979,
73
+ "eval_precision": 0.5147058823529411,
74
+ "eval_recall": 0.7291666666666666,
75
+ "eval_runtime": 3.3629,
76
+ "eval_samples_per_second": 270.306,
77
+ "eval_steps_per_second": 33.9,
78
+ "step": 1820
79
+ },
80
+ {
81
+ "epoch": 4.395604395604396,
82
+ "grad_norm": 0.10920047760009766,
83
+ "learning_rate": 8.241758241758242e-05,
84
+ "loss": 0.0559,
85
+ "step": 2000
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "eval_accuracy": 0.9591693887074195,
90
+ "eval_f1": 0.6343612334801763,
91
+ "eval_loss": 0.19242332875728607,
92
+ "eval_precision": 0.549618320610687,
93
+ "eval_recall": 0.75,
94
+ "eval_runtime": 3.3708,
95
+ "eval_samples_per_second": 269.67,
96
+ "eval_steps_per_second": 33.82,
97
+ "step": 2275
98
+ },
99
+ {
100
+ "epoch": 5.4945054945054945,
101
+ "grad_norm": 0.08735374361276627,
102
+ "learning_rate": 7.802197802197802e-05,
103
+ "loss": 0.0412,
104
+ "step": 2500
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.9653523098460103,
109
+ "eval_f1": 0.7132530120481928,
110
+ "eval_loss": 0.16734345257282257,
111
+ "eval_precision": 0.6636771300448431,
112
+ "eval_recall": 0.7708333333333334,
113
+ "eval_runtime": 3.3682,
114
+ "eval_samples_per_second": 269.875,
115
+ "eval_steps_per_second": 33.846,
116
+ "step": 2730
117
+ },
118
+ {
119
+ "epoch": 6.593406593406593,
120
+ "grad_norm": 15.276603698730469,
121
+ "learning_rate": 7.362637362637363e-05,
122
+ "loss": 0.0309,
123
+ "step": 3000
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.9634857676154923,
128
+ "eval_f1": 0.6906474820143884,
129
+ "eval_loss": 0.1927553117275238,
130
+ "eval_precision": 0.64,
131
+ "eval_recall": 0.75,
132
+ "eval_runtime": 3.3642,
133
+ "eval_samples_per_second": 270.199,
134
+ "eval_steps_per_second": 33.886,
135
+ "step": 3185
136
+ },
137
+ {
138
+ "epoch": 7.6923076923076925,
139
+ "grad_norm": 0.018443187698721886,
140
+ "learning_rate": 6.923076923076924e-05,
141
+ "loss": 0.0231,
142
+ "step": 3500
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "eval_accuracy": 0.9643023798413439,
147
+ "eval_f1": 0.6888361045130641,
148
+ "eval_loss": 0.1938297301530838,
149
+ "eval_precision": 0.6331877729257642,
150
+ "eval_recall": 0.7552083333333334,
151
+ "eval_runtime": 3.3614,
152
+ "eval_samples_per_second": 270.421,
153
+ "eval_steps_per_second": 33.914,
154
+ "step": 3640
155
+ },
156
+ {
157
+ "epoch": 8.791208791208792,
158
+ "grad_norm": 0.002568595577031374,
159
+ "learning_rate": 6.483516483516484e-05,
160
+ "loss": 0.0191,
161
+ "step": 4000
162
+ },
163
+ {
164
+ "epoch": 9.0,
165
+ "eval_accuracy": 0.9669855342977135,
166
+ "eval_f1": 0.7194244604316546,
167
+ "eval_loss": 0.18560947477817535,
168
+ "eval_precision": 0.6666666666666666,
169
+ "eval_recall": 0.78125,
170
+ "eval_runtime": 3.3745,
171
+ "eval_samples_per_second": 269.37,
172
+ "eval_steps_per_second": 33.782,
173
+ "step": 4095
174
+ },
175
+ {
176
+ "epoch": 9.89010989010989,
177
+ "grad_norm": 0.022610006853938103,
178
+ "learning_rate": 6.043956043956044e-05,
179
+ "loss": 0.018,
180
+ "step": 4500
181
+ },
182
+ {
183
+ "epoch": 10.0,
184
+ "eval_accuracy": 0.9659356042930471,
185
+ "eval_f1": 0.7289719626168225,
186
+ "eval_loss": 0.20415335893630981,
187
+ "eval_precision": 0.6610169491525424,
188
+ "eval_recall": 0.8125,
189
+ "eval_runtime": 3.3613,
190
+ "eval_samples_per_second": 270.431,
191
+ "eval_steps_per_second": 33.915,
192
+ "step": 4550
193
+ },
194
+ {
195
+ "epoch": 10.989010989010989,
196
+ "grad_norm": 1.0466951131820679,
197
+ "learning_rate": 5.604395604395605e-05,
198
+ "loss": 0.0138,
199
+ "step": 5000
200
+ },
201
+ {
202
+ "epoch": 11.0,
203
+ "eval_accuracy": 0.9648856742883808,
204
+ "eval_f1": 0.7002288329519452,
205
+ "eval_loss": 0.22543059289455414,
206
+ "eval_precision": 0.6244897959183674,
207
+ "eval_recall": 0.796875,
208
+ "eval_runtime": 3.3542,
209
+ "eval_samples_per_second": 271.007,
210
+ "eval_steps_per_second": 33.988,
211
+ "step": 5005
212
+ },
213
+ {
214
+ "epoch": 12.0,
215
+ "eval_accuracy": 0.969318712085861,
216
+ "eval_f1": 0.7815533980582524,
217
+ "eval_loss": 0.2193489372730255,
218
+ "eval_precision": 0.7318181818181818,
219
+ "eval_recall": 0.8385416666666666,
220
+ "eval_runtime": 3.3347,
221
+ "eval_samples_per_second": 272.592,
222
+ "eval_steps_per_second": 34.186,
223
+ "step": 5460
224
+ },
225
+ {
226
+ "epoch": 12.087912087912088,
227
+ "grad_norm": 0.005279447417706251,
228
+ "learning_rate": 5.164835164835166e-05,
229
+ "loss": 0.0104,
230
+ "step": 5500
231
+ },
232
+ {
233
+ "epoch": 13.0,
234
+ "eval_accuracy": 0.9643023798413439,
235
+ "eval_f1": 0.7242990654205607,
236
+ "eval_loss": 0.2286960929632187,
237
+ "eval_precision": 0.6567796610169492,
238
+ "eval_recall": 0.8072916666666666,
239
+ "eval_runtime": 3.3478,
240
+ "eval_samples_per_second": 271.521,
241
+ "eval_steps_per_second": 34.052,
242
+ "step": 5915
243
+ },
244
+ {
245
+ "epoch": 13.186813186813186,
246
+ "grad_norm": 0.02039416879415512,
247
+ "learning_rate": 4.7252747252747257e-05,
248
+ "loss": 0.0088,
249
+ "step": 6000
250
+ },
251
+ {
252
+ "epoch": 14.0,
253
+ "eval_accuracy": 0.9682687820811946,
254
+ "eval_f1": 0.7553444180522564,
255
+ "eval_loss": 0.2258196324110031,
256
+ "eval_precision": 0.6943231441048034,
257
+ "eval_recall": 0.828125,
258
+ "eval_runtime": 3.3427,
259
+ "eval_samples_per_second": 271.935,
260
+ "eval_steps_per_second": 34.104,
261
+ "step": 6370
262
+ },
263
+ {
264
+ "epoch": 14.285714285714286,
265
+ "grad_norm": 0.00889052078127861,
266
+ "learning_rate": 4.2857142857142856e-05,
267
+ "loss": 0.0052,
268
+ "step": 6500
269
+ },
270
+ {
271
+ "epoch": 15.0,
272
+ "eval_accuracy": 0.9676854876341577,
273
+ "eval_f1": 0.7746835443037974,
274
+ "eval_loss": 0.23228472471237183,
275
+ "eval_precision": 0.7536945812807881,
276
+ "eval_recall": 0.796875,
277
+ "eval_runtime": 3.348,
278
+ "eval_samples_per_second": 271.503,
279
+ "eval_steps_per_second": 34.05,
280
+ "step": 6825
281
+ },
282
+ {
283
+ "epoch": 15.384615384615385,
284
+ "grad_norm": 0.0009669111459515989,
285
+ "learning_rate": 3.846153846153846e-05,
286
+ "loss": 0.0091,
287
+ "step": 7000
288
+ },
289
+ {
290
+ "epoch": 16.0,
291
+ "eval_accuracy": 0.9678021465235651,
292
+ "eval_f1": 0.762589928057554,
293
+ "eval_loss": 0.22262629866600037,
294
+ "eval_precision": 0.7066666666666667,
295
+ "eval_recall": 0.828125,
296
+ "eval_runtime": 3.3638,
297
+ "eval_samples_per_second": 270.226,
298
+ "eval_steps_per_second": 33.89,
299
+ "step": 7280
300
+ },
301
+ {
302
+ "epoch": 16.483516483516482,
303
+ "grad_norm": 0.0009670483996160328,
304
+ "learning_rate": 3.406593406593407e-05,
305
+ "loss": 0.0039,
306
+ "step": 7500
307
+ },
308
+ {
309
+ "epoch": 17.0,
310
+ "eval_accuracy": 0.9695520298646757,
311
+ "eval_f1": 0.7741935483870966,
312
+ "eval_loss": 0.2151697427034378,
313
+ "eval_precision": 0.7393364928909952,
314
+ "eval_recall": 0.8125,
315
+ "eval_runtime": 3.3394,
316
+ "eval_samples_per_second": 272.206,
317
+ "eval_steps_per_second": 34.138,
318
+ "step": 7735
319
+ },
320
+ {
321
+ "epoch": 17.582417582417584,
322
+ "grad_norm": 0.0011728066019713879,
323
+ "learning_rate": 2.9670329670329673e-05,
324
+ "loss": 0.006,
325
+ "step": 8000
326
+ },
327
+ {
328
+ "epoch": 18.0,
329
+ "eval_accuracy": 0.9672188520765282,
330
+ "eval_f1": 0.7544303797468355,
331
+ "eval_loss": 0.26869261264801025,
332
+ "eval_precision": 0.7339901477832512,
333
+ "eval_recall": 0.7760416666666666,
334
+ "eval_runtime": 3.3437,
335
+ "eval_samples_per_second": 271.857,
336
+ "eval_steps_per_second": 34.094,
337
+ "step": 8190
338
+ },
339
+ {
340
+ "epoch": 18.681318681318682,
341
+ "grad_norm": 0.0027556538116186857,
342
+ "learning_rate": 2.5274725274725276e-05,
343
+ "loss": 0.0024,
344
+ "step": 8500
345
+ },
346
+ {
347
+ "epoch": 19.0,
348
+ "eval_accuracy": 0.9689687354176388,
349
+ "eval_f1": 0.7722772277227722,
350
+ "eval_loss": 0.2464381903409958,
351
+ "eval_precision": 0.7358490566037735,
352
+ "eval_recall": 0.8125,
353
+ "eval_runtime": 3.3427,
354
+ "eval_samples_per_second": 271.937,
355
+ "eval_steps_per_second": 34.104,
356
+ "step": 8645
357
+ },
358
+ {
359
+ "epoch": 19.78021978021978,
360
+ "grad_norm": 0.000459605420473963,
361
+ "learning_rate": 2.0879120879120882e-05,
362
+ "loss": 0.0004,
363
+ "step": 9000
364
+ },
365
+ {
366
+ "epoch": 20.0,
367
+ "eval_accuracy": 0.9694353709752683,
368
+ "eval_f1": 0.794044665012407,
369
+ "eval_loss": 0.2462543547153473,
370
+ "eval_precision": 0.7582938388625592,
371
+ "eval_recall": 0.8333333333333334,
372
+ "eval_runtime": 3.3329,
373
+ "eval_samples_per_second": 272.736,
374
+ "eval_steps_per_second": 34.205,
375
+ "step": 9100
376
+ },
377
+ {
378
+ "epoch": 20.87912087912088,
379
+ "grad_norm": 0.0002796004991978407,
380
+ "learning_rate": 1.6483516483516486e-05,
381
+ "loss": 0.0003,
382
+ "step": 9500
383
+ },
384
+ {
385
+ "epoch": 21.0,
386
+ "eval_accuracy": 0.9700186654223052,
387
+ "eval_f1": 0.806045340050378,
388
+ "eval_loss": 0.24657504260540009,
389
+ "eval_precision": 0.7804878048780488,
390
+ "eval_recall": 0.8333333333333334,
391
+ "eval_runtime": 3.3393,
392
+ "eval_samples_per_second": 272.21,
393
+ "eval_steps_per_second": 34.139,
394
+ "step": 9555
395
+ },
396
+ {
397
+ "epoch": 21.978021978021978,
398
+ "grad_norm": 0.0019895117729902267,
399
+ "learning_rate": 1.2087912087912089e-05,
400
+ "loss": 0.001,
401
+ "step": 10000
402
+ },
403
+ {
404
+ "epoch": 22.0,
405
+ "eval_accuracy": 0.9706019598693421,
406
+ "eval_f1": 0.8020304568527918,
407
+ "eval_loss": 0.2513893246650696,
408
+ "eval_precision": 0.7821782178217822,
409
+ "eval_recall": 0.8229166666666666,
410
+ "eval_runtime": 3.3391,
411
+ "eval_samples_per_second": 272.232,
412
+ "eval_steps_per_second": 34.141,
413
+ "step": 10010
414
+ },
415
+ {
416
+ "epoch": 23.0,
417
+ "eval_accuracy": 0.9709519365375642,
418
+ "eval_f1": 0.8110831234256927,
419
+ "eval_loss": 0.2518477439880371,
420
+ "eval_precision": 0.7853658536585366,
421
+ "eval_recall": 0.8385416666666666,
422
+ "eval_runtime": 3.3453,
423
+ "eval_samples_per_second": 271.724,
424
+ "eval_steps_per_second": 34.078,
425
+ "step": 10465
426
+ },
427
+ {
428
+ "epoch": 23.076923076923077,
429
+ "grad_norm": 0.00038343301275745034,
430
+ "learning_rate": 7.692307692307694e-06,
431
+ "loss": 0.0002,
432
+ "step": 10500
433
+ },
434
+ {
435
+ "epoch": 24.0,
436
+ "eval_accuracy": 0.9704853009799347,
437
+ "eval_f1": 0.8050632911392406,
438
+ "eval_loss": 0.25856369733810425,
439
+ "eval_precision": 0.7832512315270936,
440
+ "eval_recall": 0.828125,
441
+ "eval_runtime": 3.3535,
442
+ "eval_samples_per_second": 271.062,
443
+ "eval_steps_per_second": 33.995,
444
+ "step": 10920
445
+ },
446
+ {
447
+ "epoch": 24.175824175824175,
448
+ "grad_norm": 0.0008269179961644113,
449
+ "learning_rate": 3.2967032967032968e-06,
450
+ "loss": 0.0002,
451
+ "step": 11000
452
+ },
453
+ {
454
+ "epoch": 25.0,
455
+ "eval_accuracy": 0.9696686887540831,
456
+ "eval_f1": 0.7969924812030075,
457
+ "eval_loss": 0.2650408446788788,
458
+ "eval_precision": 0.7681159420289855,
459
+ "eval_recall": 0.828125,
460
+ "eval_runtime": 3.3206,
461
+ "eval_samples_per_second": 273.743,
462
+ "eval_steps_per_second": 34.331,
463
+ "step": 11375
464
+ },
465
+ {
466
+ "epoch": 25.0,
467
+ "step": 11375,
468
+ "total_flos": 2472027466589526.0,
469
+ "train_loss": 0.03293397539264553,
470
+ "train_runtime": 1492.2693,
471
+ "train_samples_per_second": 60.897,
472
+ "train_steps_per_second": 7.623
473
+ }
474
+ ],
475
+ "logging_steps": 500,
476
+ "max_steps": 11375,
477
+ "num_input_tokens_seen": 0,
478
+ "num_train_epochs": 25,
479
+ "save_steps": 500,
480
+ "stateful_callbacks": {
481
+ "TrainerControl": {
482
+ "args": {
483
+ "should_epoch_stop": false,
484
+ "should_evaluate": false,
485
+ "should_log": false,
486
+ "should_save": true,
487
+ "should_training_stop": true
488
+ },
489
+ "attributes": {}
490
+ }
491
+ },
492
+ "total_flos": 2472027466589526.0,
493
+ "train_batch_size": 8,
494
+ "trial_name": null,
495
+ "trial_params": null
496
+ }