update model with newer version

#1
Files changed (7) hide show
  1. config.json +1 -2
  2. model.safetensors +1 -1
  3. optimizer.pt +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +120 -102
  7. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "SAVSNET/PetBERT",
3
  "architectures": [
4
  "BertForTokenClassification"
5
  ],
@@ -45,7 +44,7 @@
45
  "pad_token_id": 0,
46
  "position_embedding_type": "absolute",
47
  "torch_dtype": "float32",
48
- "transformers_version": "4.49.0",
49
  "type_vocab_size": 2,
50
  "use_cache": true,
51
  "vocab_size": 28996
 
1
  {
 
2
  "architectures": [
3
  "BertForTokenClassification"
4
  ],
 
44
  "pad_token_id": 0,
45
  "position_embedding_type": "absolute",
46
  "torch_dtype": "float32",
47
+ "transformers_version": "4.52.0.dev0",
48
  "type_vocab_size": 2,
49
  "use_cache": true,
50
  "vocab_size": 28996
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d787fa373e2a20405729f5f8fd20634ed7d89854401f2e9f76598c6d8a2838fa
3
  size 430935892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3442c1081ff78c2774dadc957664f460ad8e7aff09f69795426e44bc110617fb
3
  size 430935892
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34d1a9d1c56d780ee32cc5ea9d148c155eab84dcb4f470f18cded1843f567057
3
  size 861991482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78548e49b6e07a5dd70401777e183a938018221d95bacec37deb5cfea7c3cea4
3
  size 861991482
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3febb3b3f425e0c608f3cbfd4dada46dff84a0ab4e6f0d5af3509635b3625b1d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9389e3c975d9d9e32d2f321f5b6b0fb6c890b1d505211a96a7d310402634a1
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f92cf41b26baf29a9e9859022afbcc34c3cf250a34ff0d0481210b689244adb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b945fec086e115cf1a1d6e87c7a969f5cbcc0d3d107c7173d88cc87d5ba391d6
3
  size 1064
trainer_state.json CHANGED
@@ -1,150 +1,168 @@
1
  {
2
- "best_metric": 0.00631814356893301,
3
- "best_model_checkpoint": "projects/PetEVAL/new_splits/1.annonymisation/models/PetBERT/checkpoint-1720",
4
- "epoch": 8.0,
 
5
  "eval_steps": 500,
6
- "global_step": 2752,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
- "eval_f1": 0.026656853558445003,
14
- "eval_loss": 2.3131918907165527,
15
- "eval_precision": 0.0902669662721398,
16
- "eval_recall": 0.08842821455048296,
17
- "eval_runtime": 12.0984,
18
- "eval_samples_per_second": 136.96,
19
- "eval_steps_per_second": 4.298,
20
  "step": 0
21
  },
 
 
 
 
 
 
 
22
  {
23
  "epoch": 1.0,
24
- "eval_f1": 0.8744901409239865,
25
- "eval_loss": 0.00956493616104126,
26
- "eval_precision": 0.924093580277631,
27
- "eval_recall": 0.8507210610221329,
28
- "eval_runtime": 13.1724,
29
- "eval_samples_per_second": 125.793,
30
- "eval_steps_per_second": 3.948,
31
- "step": 344
32
  },
33
  {
34
  "epoch": 1.4534883720930232,
35
- "grad_norm": 0.13440628349781036,
36
- "learning_rate": 4.992732558139535e-05,
37
- "loss": 0.0512,
38
- "step": 500
39
  },
40
  {
41
  "epoch": 2.0,
42
- "eval_f1": 0.9214380148383657,
43
- "eval_loss": 0.007954263128340244,
44
- "eval_precision": 0.9385945722545549,
45
- "eval_recall": 0.906823887476179,
46
- "eval_runtime": 13.6821,
47
- "eval_samples_per_second": 121.107,
48
- "eval_steps_per_second": 3.801,
49
- "step": 688
 
 
 
 
 
 
 
50
  },
51
  {
52
  "epoch": 2.9069767441860463,
53
- "grad_norm": 0.5610215067863464,
54
- "learning_rate": 4.98546511627907e-05,
55
- "loss": 0.0031,
56
- "step": 1000
57
  },
58
  {
59
  "epoch": 3.0,
60
- "eval_f1": 0.9389481710629304,
61
- "eval_loss": 0.010447051376104355,
62
- "eval_precision": 0.9747087173167618,
63
- "eval_recall": 0.9103769387756578,
64
- "eval_runtime": 13.7684,
65
- "eval_samples_per_second": 120.348,
66
- "eval_steps_per_second": 3.777,
67
- "step": 1032
 
 
 
 
 
 
 
68
  },
69
  {
70
  "epoch": 4.0,
71
- "eval_f1": 0.9195094134396675,
72
- "eval_loss": 0.008201838470995426,
73
- "eval_precision": 0.9279236120830319,
74
- "eval_recall": 0.9142686302660742,
75
- "eval_runtime": 13.9313,
76
- "eval_samples_per_second": 118.941,
77
- "eval_steps_per_second": 3.733,
78
- "step": 1376
79
  },
80
  {
81
  "epoch": 4.3604651162790695,
82
- "grad_norm": 0.013739518821239471,
83
- "learning_rate": 4.978197674418605e-05,
84
- "loss": 0.0019,
85
- "step": 1500
86
  },
87
  {
88
  "epoch": 5.0,
89
- "eval_f1": 0.94402458580998,
90
- "eval_loss": 0.00631814356893301,
91
- "eval_precision": 0.9511782836087214,
92
- "eval_recall": 0.9389296462653962,
93
- "eval_runtime": 15.0907,
94
- "eval_samples_per_second": 109.803,
95
- "eval_steps_per_second": 3.446,
96
- "step": 1720
 
 
 
 
 
 
 
97
  },
98
  {
99
  "epoch": 5.813953488372093,
100
- "grad_norm": 0.0020324508659541607,
101
- "learning_rate": 4.97093023255814e-05,
102
- "loss": 0.001,
103
- "step": 2000
104
  },
105
  {
106
  "epoch": 6.0,
107
- "eval_f1": 0.9457105386380245,
108
- "eval_loss": 0.007111889310181141,
109
- "eval_precision": 0.9460455625427084,
110
- "eval_recall": 0.9459687123326699,
111
- "eval_runtime": 14.2199,
112
- "eval_samples_per_second": 116.527,
113
- "eval_steps_per_second": 3.657,
114
- "step": 2064
115
  },
116
  {
117
- "epoch": 7.0,
118
- "eval_f1": 0.9223422806557113,
119
- "eval_loss": 0.011202913708984852,
120
- "eval_precision": 0.9596132263722048,
121
- "eval_recall": 0.893850072449603,
122
- "eval_runtime": 14.2319,
123
- "eval_samples_per_second": 116.429,
124
- "eval_steps_per_second": 3.654,
125
- "step": 2408
126
- },
127
- {
128
- "epoch": 7.267441860465116,
129
- "grad_norm": 0.013395079411566257,
130
- "learning_rate": 4.963662790697674e-05,
131
- "loss": 0.0011,
132
- "step": 2500
133
  },
134
  {
135
- "epoch": 8.0,
136
- "eval_f1": 0.9369894102286561,
137
- "eval_loss": 0.007823295891284943,
138
- "eval_precision": 0.93213302687968,
139
- "eval_recall": 0.9433066007345208,
140
- "eval_runtime": 15.2166,
141
- "eval_samples_per_second": 108.894,
142
- "eval_steps_per_second": 3.417,
143
- "step": 2752
144
  }
145
  ],
146
  "logging_steps": 500,
147
- "max_steps": 344000,
148
  "num_input_tokens_seen": 0,
149
  "num_train_epochs": 1000,
150
  "save_steps": 500,
@@ -169,7 +187,7 @@
169
  "attributes": {}
170
  }
171
  },
172
- "total_flos": 2.2993895044079616e+16,
173
  "train_batch_size": 32,
174
  "trial_name": null,
175
  "trial_params": null
 
1
  {
2
+ "best_global_step": 2752,
3
+ "best_metric": 0.0057959225960075855,
4
+ "best_model_checkpoint": "projects/PetBERT_annonymisation/data/case_sensitive/model/checkpoint-2752",
5
+ "epoch": 7.0,
6
  "eval_steps": 500,
7
+ "global_step": 4816,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_f1": 0.011880094095301812,
15
+ "eval_loss": 2.4431910514831543,
16
+ "eval_precision": 0.08875808758622007,
17
+ "eval_recall": 0.05952071418656867,
18
+ "eval_runtime": 23.7929,
19
+ "eval_samples_per_second": 139.285,
20
+ "eval_steps_per_second": 4.371,
21
  "step": 0
22
  },
23
+ {
24
+ "epoch": 0.7267441860465116,
25
+ "grad_norm": 0.14246754348278046,
26
+ "learning_rate": 4.996373546511628e-05,
27
+ "loss": 0.0494,
28
+ "step": 500
29
+ },
30
  {
31
  "epoch": 1.0,
32
+ "eval_f1": 0.8935989317265615,
33
+ "eval_loss": 0.006424472201615572,
34
+ "eval_precision": 0.8856397165335348,
35
+ "eval_recall": 0.9052291580842897,
36
+ "eval_runtime": 24.8439,
37
+ "eval_samples_per_second": 133.393,
38
+ "eval_steps_per_second": 4.186,
39
+ "step": 688
40
  },
41
  {
42
  "epoch": 1.4534883720930232,
43
+ "grad_norm": 0.013319989666342735,
44
+ "learning_rate": 4.992739825581396e-05,
45
+ "loss": 0.0044,
46
+ "step": 1000
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_f1": 0.921251638253682,
51
+ "eval_loss": 0.006238908972591162,
52
+ "eval_precision": 0.935008628082375,
53
+ "eval_recall": 0.9144731612976222,
54
+ "eval_runtime": 24.802,
55
+ "eval_samples_per_second": 133.618,
56
+ "eval_steps_per_second": 4.193,
57
+ "step": 1376
58
+ },
59
+ {
60
+ "epoch": 2.1802325581395348,
61
+ "grad_norm": 0.04109887406229973,
62
+ "learning_rate": 4.989106104651163e-05,
63
+ "loss": 0.0032,
64
+ "step": 1500
65
  },
66
  {
67
  "epoch": 2.9069767441860463,
68
+ "grad_norm": 0.3754558861255646,
69
+ "learning_rate": 4.985472383720931e-05,
70
+ "loss": 0.0023,
71
+ "step": 2000
72
  },
73
  {
74
  "epoch": 3.0,
75
+ "eval_f1": 0.931311286064836,
76
+ "eval_loss": 0.006664152257144451,
77
+ "eval_precision": 0.9347111088313973,
78
+ "eval_recall": 0.9310447645995317,
79
+ "eval_runtime": 24.7968,
80
+ "eval_samples_per_second": 133.646,
81
+ "eval_steps_per_second": 4.194,
82
+ "step": 2064
83
+ },
84
+ {
85
+ "epoch": 3.633720930232558,
86
+ "grad_norm": 0.002286644419655204,
87
+ "learning_rate": 4.981838662790698e-05,
88
+ "loss": 0.002,
89
+ "step": 2500
90
  },
91
  {
92
  "epoch": 4.0,
93
+ "eval_f1": 0.9188838249757393,
94
+ "eval_loss": 0.0057959225960075855,
95
+ "eval_precision": 0.9199713934746218,
96
+ "eval_recall": 0.9194240810494807,
97
+ "eval_runtime": 24.791,
98
+ "eval_samples_per_second": 133.677,
99
+ "eval_steps_per_second": 4.195,
100
+ "step": 2752
101
  },
102
  {
103
  "epoch": 4.3604651162790695,
104
+ "grad_norm": 0.0007801814354024827,
105
+ "learning_rate": 4.978204941860465e-05,
106
+ "loss": 0.0016,
107
+ "step": 3000
108
  },
109
  {
110
  "epoch": 5.0,
111
+ "eval_f1": 0.8780986671708252,
112
+ "eval_loss": 0.008505718782544136,
113
+ "eval_precision": 0.868721341594642,
114
+ "eval_recall": 0.9097564277642886,
115
+ "eval_runtime": 24.8121,
116
+ "eval_samples_per_second": 133.564,
117
+ "eval_steps_per_second": 4.192,
118
+ "step": 3440
119
+ },
120
+ {
121
+ "epoch": 5.087209302325581,
122
+ "grad_norm": 0.09036080539226532,
123
+ "learning_rate": 4.974571220930232e-05,
124
+ "loss": 0.0011,
125
+ "step": 3500
126
  },
127
  {
128
  "epoch": 5.813953488372093,
129
+ "grad_norm": 0.08693202584981918,
130
+ "learning_rate": 4.9709375e-05,
131
+ "loss": 0.0012,
132
+ "step": 4000
133
  },
134
  {
135
  "epoch": 6.0,
136
+ "eval_f1": 0.9238025924793921,
137
+ "eval_loss": 0.008721762336790562,
138
+ "eval_precision": 0.9375153926584012,
139
+ "eval_recall": 0.917450432310493,
140
+ "eval_runtime": 24.7949,
141
+ "eval_samples_per_second": 133.656,
142
+ "eval_steps_per_second": 4.194,
143
+ "step": 4128
144
  },
145
  {
146
+ "epoch": 6.540697674418604,
147
+ "grad_norm": 0.03196759149432182,
148
+ "learning_rate": 4.967303779069767e-05,
149
+ "loss": 0.0013,
150
+ "step": 4500
 
 
 
 
 
 
 
 
 
 
 
151
  },
152
  {
153
+ "epoch": 7.0,
154
+ "eval_f1": 0.9309400750123076,
155
+ "eval_loss": 0.009504728950560093,
156
+ "eval_precision": 0.9228721238354496,
157
+ "eval_recall": 0.9401722615087472,
158
+ "eval_runtime": 24.8542,
159
+ "eval_samples_per_second": 133.338,
160
+ "eval_steps_per_second": 4.184,
161
+ "step": 4816
162
  }
163
  ],
164
  "logging_steps": 500,
165
+ "max_steps": 688000,
166
  "num_input_tokens_seen": 0,
167
  "num_train_epochs": 1000,
168
  "save_steps": 500,
 
187
  "attributes": {}
188
  }
189
  },
190
+ "total_flos": 4.023931632713933e+16,
191
  "train_batch_size": 32,
192
  "trial_name": null,
193
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5d7a6d63c5d8abe86bc27987fca8ab5373350831d6ac76610b2f6f020217a56
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7f8df01d5a1682106eccdf01898b00019477ad3b989dbc3741b897bbcf18bbf
3
  size 5304