update model with newer version

#2
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +60 -103
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3442c1081ff78c2774dadc957664f460ad8e7aff09f69795426e44bc110617fb
3
  size 430935892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8a1fc823d3ba0d45afc06b1105997300ad38030351b439d85610fa9fa38a75
3
  size 430935892
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78548e49b6e07a5dd70401777e183a938018221d95bacec37deb5cfea7c3cea4
3
  size 861991482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed95c6f0459164abd29b411040228bdd2d3ab9ef8949ba8423fab4ddfecad90
3
  size 861991482
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9389e3c975d9d9e32d2f321f5b6b0fb6c890b1d505211a96a7d310402634a1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4679e8a1fa9d31242916d045260107779d3bf18ad04f68edc0e2a86739d10fbd
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b945fec086e115cf1a1d6e87c7a969f5cbcc0d3d107c7173d88cc87d5ba391d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ec52a655b60bd9b36d5e598a010f1db1893ca2657401a77e3d62edf772ae6f
3
  size 1064
trainer_state.json CHANGED
@@ -1,164 +1,121 @@
1
  {
2
- "best_global_step": 2752,
3
- "best_metric": 0.0057959225960075855,
4
- "best_model_checkpoint": "projects/PetBERT_annonymisation/data/case_sensitive/model/checkpoint-2752",
5
- "epoch": 7.0,
6
  "eval_steps": 500,
7
- "global_step": 4816,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_f1": 0.011880094095301812,
15
- "eval_loss": 2.4431910514831543,
16
- "eval_precision": 0.08875808758622007,
17
- "eval_recall": 0.05952071418656867,
18
- "eval_runtime": 23.7929,
19
- "eval_samples_per_second": 139.285,
20
- "eval_steps_per_second": 4.371,
21
  "step": 0
22
  },
23
  {
24
  "epoch": 0.7267441860465116,
25
- "grad_norm": 0.14246754348278046,
26
  "learning_rate": 4.996373546511628e-05,
27
- "loss": 0.0494,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 1.0,
32
- "eval_f1": 0.8935989317265615,
33
- "eval_loss": 0.006424472201615572,
34
- "eval_precision": 0.8856397165335348,
35
- "eval_recall": 0.9052291580842897,
36
- "eval_runtime": 24.8439,
37
- "eval_samples_per_second": 133.393,
38
- "eval_steps_per_second": 4.186,
39
  "step": 688
40
  },
41
  {
42
  "epoch": 1.4534883720930232,
43
- "grad_norm": 0.013319989666342735,
44
  "learning_rate": 4.992739825581396e-05,
45
- "loss": 0.0044,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 2.0,
50
- "eval_f1": 0.921251638253682,
51
- "eval_loss": 0.006238908972591162,
52
- "eval_precision": 0.935008628082375,
53
- "eval_recall": 0.9144731612976222,
54
- "eval_runtime": 24.802,
55
- "eval_samples_per_second": 133.618,
56
- "eval_steps_per_second": 4.193,
57
  "step": 1376
58
  },
59
  {
60
  "epoch": 2.1802325581395348,
61
- "grad_norm": 0.04109887406229973,
62
  "learning_rate": 4.989106104651163e-05,
63
- "loss": 0.0032,
64
  "step": 1500
65
  },
66
  {
67
  "epoch": 2.9069767441860463,
68
- "grad_norm": 0.3754558861255646,
69
  "learning_rate": 4.985472383720931e-05,
70
- "loss": 0.0023,
71
  "step": 2000
72
  },
73
  {
74
  "epoch": 3.0,
75
- "eval_f1": 0.931311286064836,
76
- "eval_loss": 0.006664152257144451,
77
- "eval_precision": 0.9347111088313973,
78
- "eval_recall": 0.9310447645995317,
79
- "eval_runtime": 24.7968,
80
- "eval_samples_per_second": 133.646,
81
- "eval_steps_per_second": 4.194,
82
  "step": 2064
83
  },
84
  {
85
  "epoch": 3.633720930232558,
86
- "grad_norm": 0.002286644419655204,
87
  "learning_rate": 4.981838662790698e-05,
88
- "loss": 0.002,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 4.0,
93
- "eval_f1": 0.9188838249757393,
94
- "eval_loss": 0.0057959225960075855,
95
- "eval_precision": 0.9199713934746218,
96
- "eval_recall": 0.9194240810494807,
97
- "eval_runtime": 24.791,
98
- "eval_samples_per_second": 133.677,
99
- "eval_steps_per_second": 4.195,
100
  "step": 2752
101
  },
102
  {
103
  "epoch": 4.3604651162790695,
104
- "grad_norm": 0.0007801814354024827,
105
  "learning_rate": 4.978204941860465e-05,
106
- "loss": 0.0016,
107
  "step": 3000
108
  },
109
  {
110
  "epoch": 5.0,
111
- "eval_f1": 0.8780986671708252,
112
- "eval_loss": 0.008505718782544136,
113
- "eval_precision": 0.868721341594642,
114
- "eval_recall": 0.9097564277642886,
115
- "eval_runtime": 24.8121,
116
- "eval_samples_per_second": 133.564,
117
- "eval_steps_per_second": 4.192,
118
  "step": 3440
119
- },
120
- {
121
- "epoch": 5.087209302325581,
122
- "grad_norm": 0.09036080539226532,
123
- "learning_rate": 4.974571220930232e-05,
124
- "loss": 0.0011,
125
- "step": 3500
126
- },
127
- {
128
- "epoch": 5.813953488372093,
129
- "grad_norm": 0.08693202584981918,
130
- "learning_rate": 4.9709375e-05,
131
- "loss": 0.0012,
132
- "step": 4000
133
- },
134
- {
135
- "epoch": 6.0,
136
- "eval_f1": 0.9238025924793921,
137
- "eval_loss": 0.008721762336790562,
138
- "eval_precision": 0.9375153926584012,
139
- "eval_recall": 0.917450432310493,
140
- "eval_runtime": 24.7949,
141
- "eval_samples_per_second": 133.656,
142
- "eval_steps_per_second": 4.194,
143
- "step": 4128
144
- },
145
- {
146
- "epoch": 6.540697674418604,
147
- "grad_norm": 0.03196759149432182,
148
- "learning_rate": 4.967303779069767e-05,
149
- "loss": 0.0013,
150
- "step": 4500
151
- },
152
- {
153
- "epoch": 7.0,
154
- "eval_f1": 0.9309400750123076,
155
- "eval_loss": 0.009504728950560093,
156
- "eval_precision": 0.9228721238354496,
157
- "eval_recall": 0.9401722615087472,
158
- "eval_runtime": 24.8542,
159
- "eval_samples_per_second": 133.338,
160
- "eval_steps_per_second": 4.184,
161
- "step": 4816
162
  }
163
  ],
164
  "logging_steps": 500,
@@ -187,7 +144,7 @@
187
  "attributes": {}
188
  }
189
  },
190
- "total_flos": 4.023931632713933e+16,
191
  "train_batch_size": 32,
192
  "trial_name": null,
193
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1376,
3
+ "best_metric": 0.008029412478208542,
4
+ "best_model_checkpoint": "projects/PetBERT_annonymisation/data/case_sensitive/model/checkpoint-1376",
5
+ "epoch": 5.0,
6
  "eval_steps": 500,
7
+ "global_step": 3440,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_f1": 0.010973861448813302,
15
+ "eval_loss": 2.4063162803649902,
16
+ "eval_precision": 0.09108980806958722,
17
+ "eval_recall": 0.10473207318294019,
18
+ "eval_runtime": 23.7722,
19
+ "eval_samples_per_second": 139.407,
20
+ "eval_steps_per_second": 4.375,
21
  "step": 0
22
  },
23
  {
24
  "epoch": 0.7267441860465116,
25
+ "grad_norm": 0.2666139304637909,
26
  "learning_rate": 4.996373546511628e-05,
27
+ "loss": 0.0518,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 1.0,
32
+ "eval_f1": 0.9198586422350306,
33
+ "eval_loss": 0.010010140016674995,
34
+ "eval_precision": 0.9136306910527949,
35
+ "eval_recall": 0.9281047546302535,
36
+ "eval_runtime": 24.8489,
37
+ "eval_samples_per_second": 133.366,
38
+ "eval_steps_per_second": 4.185,
39
  "step": 688
40
  },
41
  {
42
  "epoch": 1.4534883720930232,
43
+ "grad_norm": 0.11741874366998672,
44
  "learning_rate": 4.992739825581396e-05,
45
+ "loss": 0.0036,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_f1": 0.9300254346144702,
51
+ "eval_loss": 0.008029412478208542,
52
+ "eval_precision": 0.9130114164496201,
53
+ "eval_recall": 0.9489397415434179,
54
+ "eval_runtime": 25.1412,
55
+ "eval_samples_per_second": 131.815,
56
+ "eval_steps_per_second": 4.137,
57
  "step": 1376
58
  },
59
  {
60
  "epoch": 2.1802325581395348,
61
+ "grad_norm": 0.3377048373222351,
62
  "learning_rate": 4.989106104651163e-05,
63
+ "loss": 0.0026,
64
  "step": 1500
65
  },
66
  {
67
  "epoch": 2.9069767441860463,
68
+ "grad_norm": 0.2929118573665619,
69
  "learning_rate": 4.985472383720931e-05,
70
+ "loss": 0.0011,
71
  "step": 2000
72
  },
73
  {
74
  "epoch": 3.0,
75
+ "eval_f1": 0.9402795283929368,
76
+ "eval_loss": 0.01124291867017746,
77
+ "eval_precision": 0.9422754585534239,
78
+ "eval_recall": 0.9386206401984958,
79
+ "eval_runtime": 24.8815,
80
+ "eval_samples_per_second": 133.191,
81
+ "eval_steps_per_second": 4.18,
82
  "step": 2064
83
  },
84
  {
85
  "epoch": 3.633720930232558,
86
+ "grad_norm": 0.002858501160517335,
87
  "learning_rate": 4.981838662790698e-05,
88
+ "loss": 0.0012,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 4.0,
93
+ "eval_f1": 0.9321076846329125,
94
+ "eval_loss": 0.011350538581609726,
95
+ "eval_precision": 0.9585579423470411,
96
+ "eval_recall": 0.9092514508781232,
97
+ "eval_runtime": 24.8844,
98
+ "eval_samples_per_second": 133.176,
99
+ "eval_steps_per_second": 4.179,
100
  "step": 2752
101
  },
102
  {
103
  "epoch": 4.3604651162790695,
104
+ "grad_norm": 0.3254820704460144,
105
  "learning_rate": 4.978204941860465e-05,
106
+ "loss": 0.001,
107
  "step": 3000
108
  },
109
  {
110
  "epoch": 5.0,
111
+ "eval_f1": 0.9375959937426187,
112
+ "eval_loss": 0.012251886539161205,
113
+ "eval_precision": 0.9347887957568951,
114
+ "eval_recall": 0.9407576824824448,
115
+ "eval_runtime": 24.848,
116
+ "eval_samples_per_second": 133.371,
117
+ "eval_steps_per_second": 4.185,
118
  "step": 3440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  }
120
  ],
121
  "logging_steps": 500,
 
144
  "attributes": {}
145
  }
146
  },
147
+ "total_flos": 2.874236880509952e+16,
148
  "train_batch_size": 32,
149
  "trial_name": null,
150
  "trial_params": null