Jinawei commited on
Commit
cc491f8
·
1 Parent(s): c49cfe9

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 160,
9
+ "finetuning_task": "cola",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 160,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 560,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 10,
20
+ "num_hidden_layers": 7,
21
+ "output_intermediate": true,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "problem_type": "single_label_classification",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.17.0",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 30522
31
+ }
log_bs32_lr3e-05_20221124_034416_123214.txt ADDED
@@ -0,0 +1,1784 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ------------> log file ==runs2/cola/OUTPUT_ID/log_bs32_lr3e-05_20221124_034416_123214.txt
2
+ Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/CoLA', do_eval=False, early_stop=False, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/cola/OUTPUT_ID', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='cola', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0)
3
+ Distributed environment: NO
4
+ Num processes: 1
5
+ Process index: 0
6
+ Local process index: 0
7
+ Device: cuda
8
+ Mixed precision type: fp16
9
+
10
+ Sample 3305 of the training set: (tensor([ 101, 2058, 1996, 3481, 2045, 18360, 1037, 2312, 5210, 1012,
11
+ 102, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
21
+ Sample 4580 of the training set: (tensor([ 101, 7525, 2097, 3191, 2115, 4311, 1010, 2021, 7157, 2097, 2025, 1012,
22
+ 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
31
+ Sample 1031 of the training set: (tensor([ 101, 2040, 7164, 2505, 2008, 2040, 2758, 1029, 102, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
41
+ ***** Running training *****
42
+ Num examples = 8551
43
+ Num Epochs = 30
44
+ Instantaneous batch size per device = 32
45
+ Total train batch size (w. parallel, distributed & accumulation) = 32
46
+ Gradient Accumulation steps = 1
47
+ Total optimization steps = 8040
48
+ 000005/008040, loss: 0.652222, avg_loss: 0.651953
49
+ 000010/008040, loss: 0.661713, avg_loss: 0.653055
50
+ 000015/008040, loss: 0.677567, avg_loss: 0.654401
51
+ 000020/008040, loss: 0.650467, avg_loss: 0.655538
52
+ 000025/008040, loss: 0.677872, avg_loss: 0.658810
53
+ 000030/008040, loss: 0.661591, avg_loss: 0.658843
54
+ 000035/008040, loss: 0.645645, avg_loss: 0.659464
55
+ 000040/008040, loss: 0.652420, avg_loss: 0.659061
56
+ 000045/008040, loss: 0.687469, avg_loss: 0.657230
57
+ 000050/008040, loss: 0.626709, avg_loss: 0.656326
58
+ 000055/008040, loss: 0.644302, avg_loss: 0.655348
59
+ 000060/008040, loss: 0.648483, avg_loss: 0.655236
60
+ 000065/008040, loss: 0.688477, avg_loss: 0.655078
61
+ 000070/008040, loss: 0.695236, avg_loss: 0.656294
62
+ 000075/008040, loss: 0.659332, avg_loss: 0.655568
63
+ 000080/008040, loss: 0.658981, avg_loss: 0.654595
64
+ 000085/008040, loss: 0.667786, avg_loss: 0.654187
65
+ 000090/008040, loss: 0.658127, avg_loss: 0.653667
66
+ 000095/008040, loss: 0.671814, avg_loss: 0.652597
67
+ 000100/008040, loss: 0.633545, avg_loss: 0.652570
68
+ 000105/008040, loss: 0.641556, avg_loss: 0.651439
69
+ 000110/008040, loss: 0.605942, avg_loss: 0.650307
70
+ 000115/008040, loss: 0.668716, avg_loss: 0.649737
71
+ 000120/008040, loss: 0.565681, avg_loss: 0.648114
72
+ 000125/008040, loss: 0.645332, avg_loss: 0.647180
73
+ 000130/008040, loss: 0.654228, avg_loss: 0.645764
74
+ 000135/008040, loss: 0.647469, avg_loss: 0.644160
75
+ 000140/008040, loss: 0.589897, avg_loss: 0.644381
76
+ 000145/008040, loss: 0.622665, avg_loss: 0.643773
77
+ 000150/008040, loss: 0.582314, avg_loss: 0.642514
78
+ 000155/008040, loss: 0.706223, avg_loss: 0.642192
79
+ 000160/008040, loss: 0.573463, avg_loss: 0.640978
80
+ 000165/008040, loss: 0.596039, avg_loss: 0.641289
81
+ 000170/008040, loss: 0.629677, avg_loss: 0.641707
82
+ 000175/008040, loss: 0.581818, avg_loss: 0.640926
83
+ 000180/008040, loss: 0.573257, avg_loss: 0.640330
84
+ 000185/008040, loss: 0.536911, avg_loss: 0.639325
85
+ 000190/008040, loss: 0.607376, avg_loss: 0.638997
86
+ 000195/008040, loss: 0.565323, avg_loss: 0.638167
87
+ 000200/008040, loss: 0.561142, avg_loss: 0.637244
88
+ 000205/008040, loss: 0.709908, avg_loss: 0.636707
89
+ 000210/008040, loss: 0.655846, avg_loss: 0.635711
90
+ 000215/008040, loss: 0.562042, avg_loss: 0.635813
91
+ 000220/008040, loss: 0.623016, avg_loss: 0.635337
92
+ 000225/008040, loss: 0.665192, avg_loss: 0.636041
93
+ 000230/008040, loss: 0.602562, avg_loss: 0.636022
94
+ 000235/008040, loss: 0.579742, avg_loss: 0.636067
95
+ 000240/008040, loss: 0.643562, avg_loss: 0.635816
96
+ 000245/008040, loss: 0.572937, avg_loss: 0.635337
97
+ 000250/008040, loss: 0.609734, avg_loss: 0.635397
98
+ 000255/008040, loss: 0.540230, avg_loss: 0.634626
99
+ 000260/008040, loss: 0.688667, avg_loss: 0.634769
100
+ 000265/008040, loss: 0.600624, avg_loss: 0.633670
101
+ ***** Running dev evaluation *****
102
+ Num examples = 1042
103
+ Instantaneous batch size per device = 32
104
+ epoch 0, step 268/8040: {'matthews_correlation': 0.0}
105
+ 000270/008040, loss: 0.532043, avg_loss: 0.633611
106
+ 000275/008040, loss: 0.593201, avg_loss: 0.633069
107
+ 000280/008040, loss: 0.706932, avg_loss: 0.633117
108
+ 000285/008040, loss: 0.684525, avg_loss: 0.632790
109
+ 000290/008040, loss: 0.642426, avg_loss: 0.632020
110
+ 000295/008040, loss: 0.617699, avg_loss: 0.632309
111
+ 000300/008040, loss: 0.693405, avg_loss: 0.632864
112
+ 000305/008040, loss: 0.721481, avg_loss: 0.632628
113
+ 000310/008040, loss: 0.551292, avg_loss: 0.631563
114
+ 000315/008040, loss: 0.619102, avg_loss: 0.631161
115
+ 000320/008040, loss: 0.573761, avg_loss: 0.631119
116
+ 000325/008040, loss: 0.621773, avg_loss: 0.631340
117
+ 000330/008040, loss: 0.504166, avg_loss: 0.630779
118
+ 000335/008040, loss: 0.627258, avg_loss: 0.630988
119
+ 000340/008040, loss: 0.620651, avg_loss: 0.630650
120
+ 000345/008040, loss: 0.579193, avg_loss: 0.630047
121
+ 000350/008040, loss: 0.621361, avg_loss: 0.629630
122
+ 000355/008040, loss: 0.638191, avg_loss: 0.629452
123
+ 000360/008040, loss: 0.589127, avg_loss: 0.629559
124
+ 000365/008040, loss: 0.725616, avg_loss: 0.629725
125
+ 000370/008040, loss: 0.677101, avg_loss: 0.629617
126
+ 000375/008040, loss: 0.575119, avg_loss: 0.628965
127
+ 000380/008040, loss: 0.493011, avg_loss: 0.628739
128
+ 000385/008040, loss: 0.653969, avg_loss: 0.629037
129
+ 000390/008040, loss: 0.672195, avg_loss: 0.628962
130
+ 000395/008040, loss: 0.665962, avg_loss: 0.628864
131
+ 000400/008040, loss: 0.515625, avg_loss: 0.627842
132
+ 000405/008040, loss: 0.596878, avg_loss: 0.627075
133
+ 000410/008040, loss: 0.624397, avg_loss: 0.626773
134
+ 000415/008040, loss: 0.568840, avg_loss: 0.626794
135
+ 000420/008040, loss: 0.678520, avg_loss: 0.626516
136
+ 000425/008040, loss: 0.691116, avg_loss: 0.626167
137
+ 000430/008040, loss: 0.629356, avg_loss: 0.625845
138
+ 000435/008040, loss: 0.592529, avg_loss: 0.624969
139
+ 000440/008040, loss: 0.621323, avg_loss: 0.624545
140
+ 000445/008040, loss: 0.713966, avg_loss: 0.624795
141
+ 000450/008040, loss: 0.510971, avg_loss: 0.624669
142
+ 000455/008040, loss: 0.672722, avg_loss: 0.625094
143
+ 000460/008040, loss: 0.569397, avg_loss: 0.624232
144
+ 000465/008040, loss: 0.543617, avg_loss: 0.624290
145
+ 000470/008040, loss: 0.701065, avg_loss: 0.623677
146
+ 000475/008040, loss: 0.697563, avg_loss: 0.623886
147
+ 000480/008040, loss: 0.516510, avg_loss: 0.623712
148
+ 000485/008040, loss: 0.623947, avg_loss: 0.624315
149
+ 000490/008040, loss: 0.571724, avg_loss: 0.623997
150
+ 000495/008040, loss: 0.652824, avg_loss: 0.624104
151
+ 000500/008040, loss: 0.591492, avg_loss: 0.623351
152
+ 000505/008040, loss: 0.562141, avg_loss: 0.623008
153
+ 000510/008040, loss: 0.571037, avg_loss: 0.622772
154
+ 000515/008040, loss: 0.594940, avg_loss: 0.622726
155
+ 000520/008040, loss: 0.595879, avg_loss: 0.622236
156
+ 000525/008040, loss: 0.420456, avg_loss: 0.621296
157
+ 000530/008040, loss: 0.679199, avg_loss: 0.621323
158
+ 000535/008040, loss: 0.630341, avg_loss: 0.621167
159
+ ***** Running dev evaluation *****
160
+ Num examples = 1042
161
+ Instantaneous batch size per device = 32
162
+ epoch 1, step 536/8040: {'matthews_correlation': 0.0}
163
+ 000540/008040, loss: 0.539192, avg_loss: 0.620370
164
+ 000545/008040, loss: 0.624496, avg_loss: 0.620560
165
+ 000550/008040, loss: 0.478798, avg_loss: 0.620446
166
+ 000555/008040, loss: 0.623642, avg_loss: 0.619969
167
+ 000560/008040, loss: 0.588112, avg_loss: 0.620092
168
+ 000565/008040, loss: 0.537552, avg_loss: 0.619577
169
+ 000570/008040, loss: 0.650581, avg_loss: 0.619168
170
+ 000575/008040, loss: 0.587677, avg_loss: 0.619309
171
+ 000580/008040, loss: 0.557770, avg_loss: 0.619064
172
+ 000585/008040, loss: 0.741516, avg_loss: 0.619524
173
+ 000590/008040, loss: 0.600594, avg_loss: 0.619864
174
+ 000595/008040, loss: 0.669243, avg_loss: 0.619429
175
+ 000600/008040, loss: 0.649200, avg_loss: 0.619334
176
+ 000605/008040, loss: 0.707039, avg_loss: 0.619096
177
+ 000610/008040, loss: 0.570152, avg_loss: 0.618888
178
+ 000615/008040, loss: 0.678078, avg_loss: 0.618893
179
+ 000620/008040, loss: 0.590279, avg_loss: 0.618822
180
+ 000625/008040, loss: 0.648178, avg_loss: 0.618851
181
+ 000630/008040, loss: 0.707413, avg_loss: 0.619119
182
+ 000635/008040, loss: 0.613609, avg_loss: 0.619032
183
+ 000640/008040, loss: 0.637474, avg_loss: 0.619302
184
+ 000645/008040, loss: 0.620033, avg_loss: 0.619061
185
+ 000650/008040, loss: 0.674812, avg_loss: 0.619388
186
+ 000655/008040, loss: 0.603241, avg_loss: 0.619122
187
+ 000660/008040, loss: 0.596001, avg_loss: 0.619185
188
+ 000665/008040, loss: 0.546028, avg_loss: 0.618988
189
+ 000670/008040, loss: 0.467194, avg_loss: 0.618476
190
+ 000675/008040, loss: 0.539108, avg_loss: 0.617887
191
+ 000680/008040, loss: 0.630531, avg_loss: 0.617587
192
+ 000685/008040, loss: 0.628906, avg_loss: 0.617714
193
+ 000690/008040, loss: 0.662460, avg_loss: 0.617408
194
+ 000695/008040, loss: 0.556480, avg_loss: 0.617490
195
+ 000700/008040, loss: 0.562729, avg_loss: 0.617541
196
+ 000705/008040, loss: 0.534187, avg_loss: 0.617240
197
+ 000710/008040, loss: 0.705315, avg_loss: 0.617217
198
+ 000715/008040, loss: 0.628326, avg_loss: 0.617076
199
+ 000720/008040, loss: 0.451012, avg_loss: 0.616425
200
+ 000725/008040, loss: 0.679764, avg_loss: 0.616494
201
+ 000730/008040, loss: 0.648232, avg_loss: 0.616603
202
+ 000735/008040, loss: 0.775688, avg_loss: 0.616731
203
+ 000740/008040, loss: 0.483841, avg_loss: 0.616351
204
+ 000745/008040, loss: 0.480392, avg_loss: 0.616453
205
+ 000750/008040, loss: 0.641754, avg_loss: 0.616477
206
+ 000755/008040, loss: 0.655449, avg_loss: 0.616491
207
+ 000760/008040, loss: 0.620735, avg_loss: 0.616589
208
+ 000765/008040, loss: 0.698372, avg_loss: 0.616510
209
+ 000770/008040, loss: 0.541710, avg_loss: 0.616465
210
+ 000775/008040, loss: 0.616829, avg_loss: 0.616582
211
+ 000780/008040, loss: 0.615852, avg_loss: 0.616320
212
+ 000785/008040, loss: 0.645004, avg_loss: 0.616204
213
+ 000790/008040, loss: 0.620392, avg_loss: 0.616373
214
+ 000795/008040, loss: 0.621986, avg_loss: 0.616532
215
+ 000800/008040, loss: 0.698029, avg_loss: 0.616433
216
+ ***** Running dev evaluation *****
217
+ Num examples = 1042
218
+ Instantaneous batch size per device = 32
219
+ epoch 2, step 804/8040: {'matthews_correlation': 0.0}
220
+ 000805/008040, loss: 0.666588, avg_loss: 0.616280
221
+ 000810/008040, loss: 0.513000, avg_loss: 0.616144
222
+ 000815/008040, loss: 0.678246, avg_loss: 0.616261
223
+ 000820/008040, loss: 0.508728, avg_loss: 0.616257
224
+ 000825/008040, loss: 0.561485, avg_loss: 0.615815
225
+ 000830/008040, loss: 0.629829, avg_loss: 0.615827
226
+ 000835/008040, loss: 0.614128, avg_loss: 0.615832
227
+ 000840/008040, loss: 0.594833, avg_loss: 0.615989
228
+ 000845/008040, loss: 0.536339, avg_loss: 0.616183
229
+ 000850/008040, loss: 0.599487, avg_loss: 0.616021
230
+ 000855/008040, loss: 0.673401, avg_loss: 0.616031
231
+ 000860/008040, loss: 0.539940, avg_loss: 0.616041
232
+ 000865/008040, loss: 0.572212, avg_loss: 0.616227
233
+ 000870/008040, loss: 0.609131, avg_loss: 0.615873
234
+ 000875/008040, loss: 0.614037, avg_loss: 0.615787
235
+ 000880/008040, loss: 0.571541, avg_loss: 0.615553
236
+ 000885/008040, loss: 0.544708, avg_loss: 0.615402
237
+ 000890/008040, loss: 0.536331, avg_loss: 0.615272
238
+ 000895/008040, loss: 0.675529, avg_loss: 0.615234
239
+ 000900/008040, loss: 0.627602, avg_loss: 0.615170
240
+ 000905/008040, loss: 0.660126, avg_loss: 0.615088
241
+ 000910/008040, loss: 0.558937, avg_loss: 0.614803
242
+ 000915/008040, loss: 0.549660, avg_loss: 0.614641
243
+ 000920/008040, loss: 0.643845, avg_loss: 0.614743
244
+ 000925/008040, loss: 0.572258, avg_loss: 0.614782
245
+ 000930/008040, loss: 0.634689, avg_loss: 0.614739
246
+ 000935/008040, loss: 0.543579, avg_loss: 0.614672
247
+ 000940/008040, loss: 0.714607, avg_loss: 0.614516
248
+ 000945/008040, loss: 0.596260, avg_loss: 0.614343
249
+ 000950/008040, loss: 0.564911, avg_loss: 0.614114
250
+ 000955/008040, loss: 0.735931, avg_loss: 0.614013
251
+ 000960/008040, loss: 0.742020, avg_loss: 0.614185
252
+ 000965/008040, loss: 0.632996, avg_loss: 0.613984
253
+ 000970/008040, loss: 0.505898, avg_loss: 0.614050
254
+ 000975/008040, loss: 0.632904, avg_loss: 0.614360
255
+ 000980/008040, loss: 0.637787, avg_loss: 0.614284
256
+ 000985/008040, loss: 0.544106, avg_loss: 0.614300
257
+ 000990/008040, loss: 0.627823, avg_loss: 0.614128
258
+ 000995/008040, loss: 0.518829, avg_loss: 0.613865
259
+ 001000/008040, loss: 0.682663, avg_loss: 0.613936
260
+ 001005/008040, loss: 0.702003, avg_loss: 0.614055
261
+ 001010/008040, loss: 0.638351, avg_loss: 0.613856
262
+ 001015/008040, loss: 0.643425, avg_loss: 0.613794
263
+ 001020/008040, loss: 0.511559, avg_loss: 0.613882
264
+ 001025/008040, loss: 0.570694, avg_loss: 0.613526
265
+ 001030/008040, loss: 0.742371, avg_loss: 0.613564
266
+ 001035/008040, loss: 0.525169, avg_loss: 0.613386
267
+ 001040/008040, loss: 0.631721, avg_loss: 0.613472
268
+ 001045/008040, loss: 0.678436, avg_loss: 0.613299
269
+ 001050/008040, loss: 0.496040, avg_loss: 0.613325
270
+ 001055/008040, loss: 0.656860, avg_loss: 0.613305
271
+ 001060/008040, loss: 0.574501, avg_loss: 0.613204
272
+ 001065/008040, loss: 0.547646, avg_loss: 0.613304
273
+ 001070/008040, loss: 0.567757, avg_loss: 0.613248
274
+ ***** Running dev evaluation *****
275
+ Num examples = 1042
276
+ Instantaneous batch size per device = 32
277
+ epoch 3, step 1072/8040: {'matthews_correlation': 0.0}
278
+ 001075/008040, loss: 0.516525, avg_loss: 0.613198
279
+ 001080/008040, loss: 0.646126, avg_loss: 0.613297
280
+ 001085/008040, loss: 0.559669, avg_loss: 0.613150
281
+ 001090/008040, loss: 0.466164, avg_loss: 0.612698
282
+ 001095/008040, loss: 0.651367, avg_loss: 0.612560
283
+ 001100/008040, loss: 0.654549, avg_loss: 0.612559
284
+ 001105/008040, loss: 0.541718, avg_loss: 0.612571
285
+ 001110/008040, loss: 0.508430, avg_loss: 0.612473
286
+ 001115/008040, loss: 0.573200, avg_loss: 0.612111
287
+ 001120/008040, loss: 0.557476, avg_loss: 0.611973
288
+ 001125/008040, loss: 0.652611, avg_loss: 0.611617
289
+ 001130/008040, loss: 0.526085, avg_loss: 0.611471
290
+ 001135/008040, loss: 0.444359, avg_loss: 0.610979
291
+ 001140/008040, loss: 0.628677, avg_loss: 0.610810
292
+ 001145/008040, loss: 0.503414, avg_loss: 0.610575
293
+ 001150/008040, loss: 0.473530, avg_loss: 0.610274
294
+ 001155/008040, loss: 0.545444, avg_loss: 0.609830
295
+ 001160/008040, loss: 0.505241, avg_loss: 0.609483
296
+ 001165/008040, loss: 0.495068, avg_loss: 0.609279
297
+ 001170/008040, loss: 0.588757, avg_loss: 0.609380
298
+ 001175/008040, loss: 0.567360, avg_loss: 0.609198
299
+ 001180/008040, loss: 0.648659, avg_loss: 0.609238
300
+ 001185/008040, loss: 0.637810, avg_loss: 0.609066
301
+ 001190/008040, loss: 0.824970, avg_loss: 0.608990
302
+ 001195/008040, loss: 0.519943, avg_loss: 0.608662
303
+ 001200/008040, loss: 0.601860, avg_loss: 0.608558
304
+ 001205/008040, loss: 0.583908, avg_loss: 0.608157
305
+ 001210/008040, loss: 0.725353, avg_loss: 0.608082
306
+ 001215/008040, loss: 0.490597, avg_loss: 0.607950
307
+ 001220/008040, loss: 0.425632, avg_loss: 0.607686
308
+ 001225/008040, loss: 0.596016, avg_loss: 0.607460
309
+ 001230/008040, loss: 0.577240, avg_loss: 0.607340
310
+ 001235/008040, loss: 0.586720, avg_loss: 0.607166
311
+ 001240/008040, loss: 0.395218, avg_loss: 0.606811
312
+ 001245/008040, loss: 0.633438, avg_loss: 0.606556
313
+ 001250/008040, loss: 0.680958, avg_loss: 0.606540
314
+ 001255/008040, loss: 0.628593, avg_loss: 0.606489
315
+ 001260/008040, loss: 0.440163, avg_loss: 0.606267
316
+ 001265/008040, loss: 0.556847, avg_loss: 0.606262
317
+ 001270/008040, loss: 0.504066, avg_loss: 0.605940
318
+ 001275/008040, loss: 0.572201, avg_loss: 0.605561
319
+ 001280/008040, loss: 0.718948, avg_loss: 0.605577
320
+ 001285/008040, loss: 0.485722, avg_loss: 0.605334
321
+ 001290/008040, loss: 0.612705, avg_loss: 0.605227
322
+ 001295/008040, loss: 0.666271, avg_loss: 0.604907
323
+ 001300/008040, loss: 0.501457, avg_loss: 0.604697
324
+ 001305/008040, loss: 0.642426, avg_loss: 0.604318
325
+ 001310/008040, loss: 0.627075, avg_loss: 0.604377
326
+ 001315/008040, loss: 0.406536, avg_loss: 0.603942
327
+ 001320/008040, loss: 0.498077, avg_loss: 0.603909
328
+ 001325/008040, loss: 0.622322, avg_loss: 0.604236
329
+ 001330/008040, loss: 0.563160, avg_loss: 0.604285
330
+ 001335/008040, loss: 0.545391, avg_loss: 0.604257
331
+ 001340/008040, loss: 0.478202, avg_loss: 0.604044
332
+ ***** Running dev evaluation *****
333
+ Num examples = 1042
334
+ Instantaneous batch size per device = 32
335
+ epoch 4, step 1340/8040: {'matthews_correlation': 0.15816318746785782}
336
+ 001345/008040, loss: 0.497211, avg_loss: 0.603771
337
+ 001350/008040, loss: 0.504078, avg_loss: 0.603257
338
+ 001355/008040, loss: 0.563469, avg_loss: 0.603027
339
+ 001360/008040, loss: 0.436638, avg_loss: 0.602608
340
+ 001365/008040, loss: 0.503475, avg_loss: 0.602021
341
+ 001370/008040, loss: 0.512344, avg_loss: 0.601490
342
+ 001375/008040, loss: 0.479725, avg_loss: 0.601133
343
+ 001380/008040, loss: 0.547909, avg_loss: 0.600951
344
+ 001385/008040, loss: 0.400013, avg_loss: 0.600657
345
+ 001390/008040, loss: 0.542755, avg_loss: 0.600452
346
+ 001395/008040, loss: 0.376102, avg_loss: 0.600132
347
+ 001400/008040, loss: 0.548512, avg_loss: 0.599679
348
+ 001405/008040, loss: 0.676727, avg_loss: 0.599476
349
+ 001410/008040, loss: 0.657574, avg_loss: 0.599159
350
+ 001415/008040, loss: 0.496979, avg_loss: 0.598694
351
+ 001420/008040, loss: 0.572510, avg_loss: 0.598379
352
+ 001425/008040, loss: 0.535450, avg_loss: 0.598210
353
+ 001430/008040, loss: 0.394039, avg_loss: 0.597776
354
+ 001435/008040, loss: 0.513657, avg_loss: 0.597460
355
+ 001440/008040, loss: 0.511215, avg_loss: 0.597127
356
+ 001445/008040, loss: 0.339886, avg_loss: 0.596767
357
+ 001450/008040, loss: 0.510815, avg_loss: 0.596433
358
+ 001455/008040, loss: 0.654274, avg_loss: 0.596212
359
+ 001460/008040, loss: 0.349232, avg_loss: 0.595762
360
+ 001465/008040, loss: 0.493137, avg_loss: 0.595544
361
+ 001470/008040, loss: 0.592804, avg_loss: 0.595768
362
+ 001475/008040, loss: 0.455566, avg_loss: 0.595485
363
+ 001480/008040, loss: 0.570206, avg_loss: 0.595109
364
+ 001485/008040, loss: 0.385925, avg_loss: 0.594787
365
+ 001490/008040, loss: 0.431305, avg_loss: 0.594303
366
+ 001495/008040, loss: 0.571796, avg_loss: 0.594218
367
+ 001500/008040, loss: 0.523552, avg_loss: 0.594122
368
+ 001505/008040, loss: 0.777546, avg_loss: 0.594023
369
+ 001510/008040, loss: 0.629959, avg_loss: 0.593755
370
+ 001515/008040, loss: 0.650162, avg_loss: 0.593670
371
+ 001520/008040, loss: 0.509098, avg_loss: 0.593517
372
+ 001525/008040, loss: 0.462357, avg_loss: 0.593270
373
+ 001530/008040, loss: 0.643883, avg_loss: 0.593174
374
+ 001535/008040, loss: 0.417114, avg_loss: 0.592827
375
+ 001540/008040, loss: 0.470078, avg_loss: 0.592541
376
+ 001545/008040, loss: 0.504375, avg_loss: 0.592232
377
+ 001550/008040, loss: 0.536144, avg_loss: 0.592052
378
+ 001555/008040, loss: 0.408104, avg_loss: 0.591786
379
+ 001560/008040, loss: 0.476906, avg_loss: 0.591672
380
+ 001565/008040, loss: 0.560863, avg_loss: 0.591418
381
+ 001570/008040, loss: 0.519066, avg_loss: 0.591105
382
+ 001575/008040, loss: 0.594341, avg_loss: 0.590856
383
+ 001580/008040, loss: 0.466764, avg_loss: 0.590662
384
+ 001585/008040, loss: 0.450207, avg_loss: 0.590457
385
+ 001590/008040, loss: 0.435432, avg_loss: 0.590293
386
+ 001595/008040, loss: 0.556160, avg_loss: 0.589989
387
+ 001600/008040, loss: 0.611170, avg_loss: 0.589724
388
+ 001605/008040, loss: 0.402641, avg_loss: 0.589453
389
+ ***** Running dev evaluation *****
390
+ Num examples = 1042
391
+ Instantaneous batch size per device = 32
392
+ epoch 5, step 1608/8040: {'matthews_correlation': 0.1895854925674006}
393
+ 001610/008040, loss: 0.432098, avg_loss: 0.588991
394
+ 001615/008040, loss: 0.498215, avg_loss: 0.588663
395
+ 001620/008040, loss: 0.663460, avg_loss: 0.588371
396
+ 001625/008040, loss: 0.360754, avg_loss: 0.587877
397
+ 001630/008040, loss: 0.387835, avg_loss: 0.587359
398
+ 001635/008040, loss: 0.324583, avg_loss: 0.586922
399
+ 001640/008040, loss: 0.535975, avg_loss: 0.586623
400
+ 001645/008040, loss: 0.592346, avg_loss: 0.586544
401
+ 001650/008040, loss: 0.525536, avg_loss: 0.586103
402
+ 001655/008040, loss: 0.575745, avg_loss: 0.585766
403
+ 001660/008040, loss: 0.449825, avg_loss: 0.585359
404
+ 001665/008040, loss: 0.420126, avg_loss: 0.585023
405
+ 001670/008040, loss: 0.639540, avg_loss: 0.584721
406
+ 001675/008040, loss: 0.344273, avg_loss: 0.584341
407
+ 001680/008040, loss: 0.540806, avg_loss: 0.583815
408
+ 001685/008040, loss: 0.384409, avg_loss: 0.583182
409
+ 001690/008040, loss: 0.440865, avg_loss: 0.583039
410
+ 001695/008040, loss: 0.555412, avg_loss: 0.582610
411
+ 001700/008040, loss: 0.407997, avg_loss: 0.582275
412
+ 001705/008040, loss: 0.383024, avg_loss: 0.581847
413
+ 001710/008040, loss: 0.537008, avg_loss: 0.581442
414
+ 001715/008040, loss: 0.355068, avg_loss: 0.581088
415
+ 001720/008040, loss: 0.338402, avg_loss: 0.580900
416
+ 001725/008040, loss: 0.566572, avg_loss: 0.580647
417
+ 001730/008040, loss: 0.548752, avg_loss: 0.580402
418
+ 001735/008040, loss: 0.535681, avg_loss: 0.580117
419
+ 001740/008040, loss: 0.614624, avg_loss: 0.579823
420
+ 001745/008040, loss: 0.530151, avg_loss: 0.579474
421
+ 001750/008040, loss: 0.489605, avg_loss: 0.579157
422
+ 001755/008040, loss: 0.584639, avg_loss: 0.578986
423
+ 001760/008040, loss: 0.501501, avg_loss: 0.578577
424
+ 001765/008040, loss: 0.511179, avg_loss: 0.578278
425
+ 001770/008040, loss: 0.400345, avg_loss: 0.577970
426
+ 001775/008040, loss: 0.562296, avg_loss: 0.577756
427
+ 001780/008040, loss: 0.460430, avg_loss: 0.577570
428
+ 001785/008040, loss: 0.420773, avg_loss: 0.577251
429
+ 001790/008040, loss: 0.476810, avg_loss: 0.576814
430
+ 001795/008040, loss: 0.473215, avg_loss: 0.576538
431
+ 001800/008040, loss: 0.324244, avg_loss: 0.576321
432
+ 001805/008040, loss: 0.466421, avg_loss: 0.575848
433
+ 001810/008040, loss: 0.515072, avg_loss: 0.575525
434
+ 001815/008040, loss: 0.348133, avg_loss: 0.575163
435
+ 001820/008040, loss: 0.405993, avg_loss: 0.574717
436
+ 001825/008040, loss: 0.724440, avg_loss: 0.574358
437
+ 001830/008040, loss: 0.404591, avg_loss: 0.573989
438
+ 001835/008040, loss: 0.386953, avg_loss: 0.573703
439
+ 001840/008040, loss: 0.297220, avg_loss: 0.573313
440
+ 001845/008040, loss: 0.549763, avg_loss: 0.573012
441
+ 001850/008040, loss: 0.439955, avg_loss: 0.572694
442
+ 001855/008040, loss: 0.379715, avg_loss: 0.572410
443
+ 001860/008040, loss: 0.301922, avg_loss: 0.571970
444
+ 001865/008040, loss: 0.442181, avg_loss: 0.571632
445
+ 001870/008040, loss: 0.403500, avg_loss: 0.571553
446
+ 001875/008040, loss: 0.479061, avg_loss: 0.571291
447
+ ***** Running dev evaluation *****
448
+ Num examples = 1042
449
+ Instantaneous batch size per device = 32
450
+ epoch 6, step 1876/8040: {'matthews_correlation': 0.21307686539085852}
451
+ 001880/008040, loss: 0.669218, avg_loss: 0.571140
452
+ 001885/008040, loss: 0.542337, avg_loss: 0.570861
453
+ 001890/008040, loss: 0.589821, avg_loss: 0.570818
454
+ 001895/008040, loss: 0.476381, avg_loss: 0.570683
455
+ 001900/008040, loss: 0.404007, avg_loss: 0.570266
456
+ 001905/008040, loss: 0.356413, avg_loss: 0.569667
457
+ 001910/008040, loss: 0.477184, avg_loss: 0.569371
458
+ 001915/008040, loss: 0.490374, avg_loss: 0.568927
459
+ 001920/008040, loss: 0.415562, avg_loss: 0.568538
460
+ 001925/008040, loss: 0.388258, avg_loss: 0.568085
461
+ 001930/008040, loss: 0.456861, avg_loss: 0.567801
462
+ 001935/008040, loss: 0.384424, avg_loss: 0.567589
463
+ 001940/008040, loss: 0.353615, avg_loss: 0.567248
464
+ 001945/008040, loss: 0.391302, avg_loss: 0.566774
465
+ 001950/008040, loss: 0.465639, avg_loss: 0.566302
466
+ 001955/008040, loss: 0.230816, avg_loss: 0.565751
467
+ 001960/008040, loss: 0.518036, avg_loss: 0.565276
468
+ 001965/008040, loss: 0.410194, avg_loss: 0.564808
469
+ 001970/008040, loss: 0.349751, avg_loss: 0.564646
470
+ 001975/008040, loss: 0.334585, avg_loss: 0.564052
471
+ 001980/008040, loss: 0.534948, avg_loss: 0.563787
472
+ 001985/008040, loss: 0.362720, avg_loss: 0.563456
473
+ 001990/008040, loss: 0.448977, avg_loss: 0.563030
474
+ 001995/008040, loss: 0.355686, avg_loss: 0.562736
475
+ 002000/008040, loss: 0.439312, avg_loss: 0.562282
476
+ 002005/008040, loss: 0.560658, avg_loss: 0.561938
477
+ 002010/008040, loss: 0.257093, avg_loss: 0.561694
478
+ 002015/008040, loss: 0.427464, avg_loss: 0.561446
479
+ 002020/008040, loss: 0.501780, avg_loss: 0.561143
480
+ 002025/008040, loss: 0.395622, avg_loss: 0.560958
481
+ 002030/008040, loss: 0.593700, avg_loss: 0.560682
482
+ 002035/008040, loss: 0.260624, avg_loss: 0.560042
483
+ 002040/008040, loss: 0.331761, avg_loss: 0.559656
484
+ 002045/008040, loss: 0.454527, avg_loss: 0.559403
485
+ 002050/008040, loss: 0.379538, avg_loss: 0.559265
486
+ 002055/008040, loss: 0.376312, avg_loss: 0.558873
487
+ 002060/008040, loss: 0.522795, avg_loss: 0.558628
488
+ 002065/008040, loss: 0.457785, avg_loss: 0.558271
489
+ 002070/008040, loss: 0.561003, avg_loss: 0.557992
490
+ 002075/008040, loss: 0.269575, avg_loss: 0.557529
491
+ 002080/008040, loss: 0.476014, avg_loss: 0.557187
492
+ 002085/008040, loss: 0.425539, avg_loss: 0.556978
493
+ 002090/008040, loss: 0.350451, avg_loss: 0.556575
494
+ 002095/008040, loss: 0.275333, avg_loss: 0.556114
495
+ 002100/008040, loss: 0.463511, avg_loss: 0.556032
496
+ 002105/008040, loss: 0.313173, avg_loss: 0.555600
497
+ 002110/008040, loss: 0.555047, avg_loss: 0.555265
498
+ 002115/008040, loss: 0.554151, avg_loss: 0.555004
499
+ 002120/008040, loss: 0.383408, avg_loss: 0.554883
500
+ 002125/008040, loss: 0.488256, avg_loss: 0.554548
501
+ 002130/008040, loss: 0.357615, avg_loss: 0.554264
502
+ 002135/008040, loss: 0.157333, avg_loss: 0.553637
503
+ 002140/008040, loss: 0.288863, avg_loss: 0.553479
504
+ ***** Running dev evaluation *****
505
+ Num examples = 1042
506
+ Instantaneous batch size per device = 32
507
+ epoch 7, step 2144/8040: {'matthews_correlation': 0.22254373946847703}
508
+ 002145/008040, loss: 0.303406, avg_loss: 0.553004
509
+ 002150/008040, loss: 0.306277, avg_loss: 0.552618
510
+ 002155/008040, loss: 0.338732, avg_loss: 0.552232
511
+ 002160/008040, loss: 0.427835, avg_loss: 0.551830
512
+ 002165/008040, loss: 0.414758, avg_loss: 0.551564
513
+ 002170/008040, loss: 0.576152, avg_loss: 0.551253
514
+ 002175/008040, loss: 0.407147, avg_loss: 0.550871
515
+ 002180/008040, loss: 0.399303, avg_loss: 0.550418
516
+ 002185/008040, loss: 0.417184, avg_loss: 0.550105
517
+ 002190/008040, loss: 0.373152, avg_loss: 0.549766
518
+ 002195/008040, loss: 0.357236, avg_loss: 0.549473
519
+ 002200/008040, loss: 0.488667, avg_loss: 0.549189
520
+ 002205/008040, loss: 0.430184, avg_loss: 0.548822
521
+ 002210/008040, loss: 0.509947, avg_loss: 0.548544
522
+ 002215/008040, loss: 0.248991, avg_loss: 0.548000
523
+ 002220/008040, loss: 0.524855, avg_loss: 0.547764
524
+ 002225/008040, loss: 0.307470, avg_loss: 0.547397
525
+ 002230/008040, loss: 0.317670, avg_loss: 0.546941
526
+ 002235/008040, loss: 0.257109, avg_loss: 0.546452
527
+ 002240/008040, loss: 0.437452, avg_loss: 0.546180
528
+ 002245/008040, loss: 0.416744, avg_loss: 0.545923
529
+ 002250/008040, loss: 0.301953, avg_loss: 0.545449
530
+ 002255/008040, loss: 0.505546, avg_loss: 0.545019
531
+ 002260/008040, loss: 0.438693, avg_loss: 0.544505
532
+ 002265/008040, loss: 0.269212, avg_loss: 0.544109
533
+ 002270/008040, loss: 0.436381, avg_loss: 0.543815
534
+ 002275/008040, loss: 0.455789, avg_loss: 0.543614
535
+ 002280/008040, loss: 0.498013, avg_loss: 0.543247
536
+ 002285/008040, loss: 0.329160, avg_loss: 0.542899
537
+ 002290/008040, loss: 0.281924, avg_loss: 0.542624
538
+ 002295/008040, loss: 0.366100, avg_loss: 0.542508
539
+ 002300/008040, loss: 0.371017, avg_loss: 0.542094
540
+ 002305/008040, loss: 0.395582, avg_loss: 0.541701
541
+ 002310/008040, loss: 0.396773, avg_loss: 0.541435
542
+ 002315/008040, loss: 0.382794, avg_loss: 0.541095
543
+ 002320/008040, loss: 0.306365, avg_loss: 0.540708
544
+ 002325/008040, loss: 0.434783, avg_loss: 0.540375
545
+ 002330/008040, loss: 0.357806, avg_loss: 0.539949
546
+ 002335/008040, loss: 0.380549, avg_loss: 0.539729
547
+ 002340/008040, loss: 0.216732, avg_loss: 0.539146
548
+ 002345/008040, loss: 0.385883, avg_loss: 0.538827
549
+ 002350/008040, loss: 0.594073, avg_loss: 0.538528
550
+ 002355/008040, loss: 0.413212, avg_loss: 0.538440
551
+ 002360/008040, loss: 0.210932, avg_loss: 0.538051
552
+ 002365/008040, loss: 0.336315, avg_loss: 0.537636
553
+ 002370/008040, loss: 0.447609, avg_loss: 0.537165
554
+ 002375/008040, loss: 0.580924, avg_loss: 0.537083
555
+ 002380/008040, loss: 0.482136, avg_loss: 0.536826
556
+ 002385/008040, loss: 0.290261, avg_loss: 0.536410
557
+ 002390/008040, loss: 0.567177, avg_loss: 0.536199
558
+ 002395/008040, loss: 0.543372, avg_loss: 0.535950
559
+ 002400/008040, loss: 0.344522, avg_loss: 0.535539
560
+ 002405/008040, loss: 0.219617, avg_loss: 0.535235
561
+ 002410/008040, loss: 0.449083, avg_loss: 0.534978
562
+ ***** Running dev evaluation *****
563
+ Num examples = 1042
564
+ Instantaneous batch size per device = 32
565
+ epoch 8, step 2412/8040: {'matthews_correlation': 0.22951168079779777}
566
+ 002415/008040, loss: 0.347907, avg_loss: 0.534702
567
+ 002420/008040, loss: 0.380767, avg_loss: 0.534286
568
+ 002425/008040, loss: 0.322007, avg_loss: 0.533838
569
+ 002430/008040, loss: 0.468786, avg_loss: 0.533497
570
+ 002435/008040, loss: 0.309734, avg_loss: 0.533102
571
+ 002440/008040, loss: 0.197897, avg_loss: 0.532586
572
+ 002445/008040, loss: 0.345694, avg_loss: 0.532278
573
+ 002450/008040, loss: 0.559374, avg_loss: 0.531879
574
+ 002455/008040, loss: 0.311327, avg_loss: 0.531552
575
+ 002460/008040, loss: 0.300930, avg_loss: 0.531075
576
+ 002465/008040, loss: 0.339101, avg_loss: 0.530702
577
+ 002470/008040, loss: 0.473831, avg_loss: 0.530428
578
+ 002475/008040, loss: 0.467193, avg_loss: 0.530181
579
+ 002480/008040, loss: 0.500907, avg_loss: 0.529953
580
+ 002485/008040, loss: 0.457974, avg_loss: 0.529605
581
+ 002490/008040, loss: 0.343074, avg_loss: 0.529418
582
+ 002495/008040, loss: 0.444531, avg_loss: 0.529064
583
+ 002500/008040, loss: 0.330952, avg_loss: 0.528731
584
+ 002505/008040, loss: 0.258092, avg_loss: 0.528329
585
+ 002510/008040, loss: 0.373878, avg_loss: 0.527944
586
+ 002515/008040, loss: 0.298052, avg_loss: 0.527590
587
+ 002520/008040, loss: 0.290030, avg_loss: 0.527212
588
+ 002525/008040, loss: 0.235393, avg_loss: 0.526718
589
+ 002530/008040, loss: 0.293452, avg_loss: 0.526214
590
+ 002535/008040, loss: 0.193506, avg_loss: 0.525848
591
+ 002540/008040, loss: 0.344110, avg_loss: 0.525426
592
+ 002545/008040, loss: 0.457061, avg_loss: 0.525270
593
+ 002550/008040, loss: 0.443786, avg_loss: 0.524819
594
+ 002555/008040, loss: 0.245760, avg_loss: 0.524364
595
+ 002560/008040, loss: 0.319549, avg_loss: 0.524011
596
+ 002565/008040, loss: 0.259768, avg_loss: 0.523602
597
+ 002570/008040, loss: 0.575876, avg_loss: 0.523300
598
+ 002575/008040, loss: 0.411968, avg_loss: 0.522964
599
+ 002580/008040, loss: 0.563390, avg_loss: 0.522671
600
+ 002585/008040, loss: 0.227027, avg_loss: 0.522272
601
+ 002590/008040, loss: 0.354241, avg_loss: 0.521908
602
+ 002595/008040, loss: 0.265463, avg_loss: 0.521373
603
+ 002600/008040, loss: 0.427238, avg_loss: 0.521074
604
+ 002605/008040, loss: 0.287523, avg_loss: 0.520749
605
+ 002610/008040, loss: 0.325408, avg_loss: 0.520449
606
+ 002615/008040, loss: 0.423108, avg_loss: 0.520330
607
+ 002620/008040, loss: 0.443160, avg_loss: 0.520100
608
+ 002625/008040, loss: 0.299728, avg_loss: 0.519799
609
+ 002630/008040, loss: 0.535900, avg_loss: 0.519659
610
+ 002635/008040, loss: 0.479263, avg_loss: 0.519475
611
+ 002640/008040, loss: 0.471956, avg_loss: 0.519183
612
+ 002645/008040, loss: 0.267595, avg_loss: 0.518814
613
+ 002650/008040, loss: 0.322007, avg_loss: 0.518672
614
+ 002655/008040, loss: 0.332003, avg_loss: 0.518388
615
+ 002660/008040, loss: 0.439392, avg_loss: 0.518129
616
+ 002665/008040, loss: 0.322509, avg_loss: 0.517892
617
+ 002670/008040, loss: 0.328728, avg_loss: 0.517541
618
+ 002675/008040, loss: 0.224217, avg_loss: 0.517160
619
+ 002680/008040, loss: 0.213649, avg_loss: 0.516849
620
+ ***** Running dev evaluation *****
621
+ Num examples = 1042
622
+ Instantaneous batch size per device = 32
623
+ epoch 9, step 2680/8040: {'matthews_correlation': 0.23063296136375847}
624
+ 002685/008040, loss: 0.357351, avg_loss: 0.516590
625
+ 002690/008040, loss: 0.191270, avg_loss: 0.516160
626
+ 002695/008040, loss: 0.370189, avg_loss: 0.515724
627
+ 002700/008040, loss: 0.336470, avg_loss: 0.515376
628
+ 002705/008040, loss: 0.268067, avg_loss: 0.515164
629
+ 002710/008040, loss: 0.179003, avg_loss: 0.514576
630
+ 002715/008040, loss: 0.427791, avg_loss: 0.514281
631
+ 002720/008040, loss: 0.361250, avg_loss: 0.513829
632
+ 002725/008040, loss: 0.189704, avg_loss: 0.513455
633
+ 002730/008040, loss: 0.273372, avg_loss: 0.513143
634
+ 002735/008040, loss: 0.239124, avg_loss: 0.512584
635
+ 002740/008040, loss: 0.378276, avg_loss: 0.512291
636
+ 002745/008040, loss: 0.646600, avg_loss: 0.512093
637
+ 002750/008040, loss: 0.267004, avg_loss: 0.511666
638
+ 002755/008040, loss: 0.422471, avg_loss: 0.511320
639
+ 002760/008040, loss: 0.209467, avg_loss: 0.510807
640
+ 002765/008040, loss: 0.346109, avg_loss: 0.510532
641
+ 002770/008040, loss: 0.233545, avg_loss: 0.510248
642
+ 002775/008040, loss: 0.466287, avg_loss: 0.509798
643
+ 002780/008040, loss: 0.403711, avg_loss: 0.509469
644
+ 002785/008040, loss: 0.327383, avg_loss: 0.509096
645
+ 002790/008040, loss: 0.404777, avg_loss: 0.508683
646
+ 002795/008040, loss: 0.542493, avg_loss: 0.508458
647
+ 002800/008040, loss: 0.367202, avg_loss: 0.508142
648
+ 002805/008040, loss: 0.368768, avg_loss: 0.507783
649
+ 002810/008040, loss: 0.312175, avg_loss: 0.507447
650
+ 002815/008040, loss: 0.302803, avg_loss: 0.507226
651
+ 002820/008040, loss: 0.401391, avg_loss: 0.507006
652
+ 002825/008040, loss: 0.244652, avg_loss: 0.506694
653
+ 002830/008040, loss: 0.370769, avg_loss: 0.506369
654
+ 002835/008040, loss: 0.182358, avg_loss: 0.505992
655
+ 002840/008040, loss: 0.266695, avg_loss: 0.505660
656
+ 002845/008040, loss: 0.334674, avg_loss: 0.505358
657
+ 002850/008040, loss: 0.348623, avg_loss: 0.505099
658
+ 002855/008040, loss: 0.187014, avg_loss: 0.504682
659
+ 002860/008040, loss: 0.259943, avg_loss: 0.504276
660
+ 002865/008040, loss: 0.382271, avg_loss: 0.503964
661
+ 002870/008040, loss: 0.570188, avg_loss: 0.503794
662
+ 002875/008040, loss: 0.218833, avg_loss: 0.503395
663
+ 002880/008040, loss: 0.356927, avg_loss: 0.502927
664
+ 002885/008040, loss: 0.173357, avg_loss: 0.502596
665
+ 002890/008040, loss: 0.360153, avg_loss: 0.502279
666
+ 002895/008040, loss: 0.295114, avg_loss: 0.501944
667
+ 002900/008040, loss: 0.210005, avg_loss: 0.501620
668
+ 002905/008040, loss: 0.300519, avg_loss: 0.501388
669
+ 002910/008040, loss: 0.207313, avg_loss: 0.501142
670
+ 002915/008040, loss: 0.333078, avg_loss: 0.500983
671
+ 002920/008040, loss: 0.285614, avg_loss: 0.500578
672
+ 002925/008040, loss: 0.373211, avg_loss: 0.500342
673
+ 002930/008040, loss: 0.308893, avg_loss: 0.500114
674
+ 002935/008040, loss: 0.350144, avg_loss: 0.499820
675
+ 002940/008040, loss: 0.439544, avg_loss: 0.499650
676
+ 002945/008040, loss: 0.320980, avg_loss: 0.499399
677
+ ***** Running dev evaluation *****
678
+ Num examples = 1042
679
+ Instantaneous batch size per device = 32
680
+ epoch 10, step 2948/8040: {'matthews_correlation': 0.18813850606847293}
681
+ 002950/008040, loss: 0.588822, avg_loss: 0.499118
682
+ 002955/008040, loss: 0.375319, avg_loss: 0.498777
683
+ 002960/008040, loss: 0.218809, avg_loss: 0.498292
684
+ 002965/008040, loss: 0.346797, avg_loss: 0.497883
685
+ 002970/008040, loss: 0.256976, avg_loss: 0.497420
686
+ 002975/008040, loss: 0.267139, avg_loss: 0.496947
687
+ 002980/008040, loss: 0.096225, avg_loss: 0.496468
688
+ 002985/008040, loss: 0.540555, avg_loss: 0.496152
689
+ 002990/008040, loss: 0.360075, avg_loss: 0.495919
690
+ 002995/008040, loss: 0.439329, avg_loss: 0.495439
691
+ 003000/008040, loss: 0.407537, avg_loss: 0.495246
692
+ 003005/008040, loss: 0.148738, avg_loss: 0.494893
693
+ 003010/008040, loss: 0.212634, avg_loss: 0.494479
694
+ 003015/008040, loss: 0.413274, avg_loss: 0.494294
695
+ 003020/008040, loss: 0.505027, avg_loss: 0.493973
696
+ 003025/008040, loss: 0.353874, avg_loss: 0.493663
697
+ 003030/008040, loss: 0.191752, avg_loss: 0.493211
698
+ 003035/008040, loss: 0.262918, avg_loss: 0.492834
699
+ 003040/008040, loss: 0.251038, avg_loss: 0.492580
700
+ 003045/008040, loss: 0.291340, avg_loss: 0.492184
701
+ 003050/008040, loss: 0.387451, avg_loss: 0.491783
702
+ 003055/008040, loss: 0.277742, avg_loss: 0.491582
703
+ 003060/008040, loss: 0.147416, avg_loss: 0.491181
704
+ 003065/008040, loss: 0.383072, avg_loss: 0.490947
705
+ 003070/008040, loss: 0.332592, avg_loss: 0.490599
706
+ 003075/008040, loss: 0.376683, avg_loss: 0.490302
707
+ 003080/008040, loss: 0.316549, avg_loss: 0.490015
708
+ 003085/008040, loss: 0.248254, avg_loss: 0.489692
709
+ 003090/008040, loss: 0.446958, avg_loss: 0.489378
710
+ 003095/008040, loss: 0.534116, avg_loss: 0.489182
711
+ 003100/008040, loss: 0.186525, avg_loss: 0.488832
712
+ 003105/008040, loss: 0.367679, avg_loss: 0.488583
713
+ 003110/008040, loss: 0.263721, avg_loss: 0.488235
714
+ 003115/008040, loss: 0.160604, avg_loss: 0.487953
715
+ 003120/008040, loss: 0.351810, avg_loss: 0.487793
716
+ 003125/008040, loss: 0.282861, avg_loss: 0.487567
717
+ 003130/008040, loss: 0.291616, avg_loss: 0.487259
718
+ 003135/008040, loss: 0.215605, avg_loss: 0.486928
719
+ 003140/008040, loss: 0.497842, avg_loss: 0.486740
720
+ 003145/008040, loss: 0.141915, avg_loss: 0.486475
721
+ 003150/008040, loss: 0.493731, avg_loss: 0.486197
722
+ 003155/008040, loss: 0.337679, avg_loss: 0.486007
723
+ 003160/008040, loss: 0.423936, avg_loss: 0.485696
724
+ 003165/008040, loss: 0.325907, avg_loss: 0.485502
725
+ 003170/008040, loss: 0.373995, avg_loss: 0.485242
726
+ 003175/008040, loss: 0.221332, avg_loss: 0.484958
727
+ 003180/008040, loss: 0.245173, avg_loss: 0.484625
728
+ 003185/008040, loss: 0.243983, avg_loss: 0.484395
729
+ 003190/008040, loss: 0.279999, avg_loss: 0.484095
730
+ 003195/008040, loss: 0.289218, avg_loss: 0.483891
731
+ 003200/008040, loss: 0.619335, avg_loss: 0.483651
732
+ 003205/008040, loss: 0.231071, avg_loss: 0.483458
733
+ 003210/008040, loss: 0.285882, avg_loss: 0.483162
734
+ 003215/008040, loss: 0.306520, avg_loss: 0.482869
735
+ ***** Running dev evaluation *****
736
+ Num examples = 1042
737
+ Instantaneous batch size per device = 32
738
+ epoch 11, step 3216/8040: {'matthews_correlation': 0.20603205189543294}
739
+ 003220/008040, loss: 0.297244, avg_loss: 0.482532
740
+ 003225/008040, loss: 0.182570, avg_loss: 0.482196
741
+ 003230/008040, loss: 0.242626, avg_loss: 0.481782
742
+ 003235/008040, loss: 0.463844, avg_loss: 0.481595
743
+ 003240/008040, loss: 0.132137, avg_loss: 0.481325
744
+ 003245/008040, loss: 0.392527, avg_loss: 0.481101
745
+ 003250/008040, loss: 0.270020, avg_loss: 0.480822
746
+ 003255/008040, loss: 0.137306, avg_loss: 0.480509
747
+ 003260/008040, loss: 0.391590, avg_loss: 0.480182
748
+ 003265/008040, loss: 0.262886, avg_loss: 0.479862
749
+ 003270/008040, loss: 0.058543, avg_loss: 0.479458
750
+ 003275/008040, loss: 0.404657, avg_loss: 0.479210
751
+ 003280/008040, loss: 0.276438, avg_loss: 0.478893
752
+ 003285/008040, loss: 0.190946, avg_loss: 0.478536
753
+ 003290/008040, loss: 0.490697, avg_loss: 0.478209
754
+ 003295/008040, loss: 0.276675, avg_loss: 0.478014
755
+ 003300/008040, loss: 0.402889, avg_loss: 0.477756
756
+ 003305/008040, loss: 0.233364, avg_loss: 0.477388
757
+ 003310/008040, loss: 0.231294, avg_loss: 0.477059
758
+ 003315/008040, loss: 0.109852, avg_loss: 0.476722
759
+ 003320/008040, loss: 0.320070, avg_loss: 0.476551
760
+ 003325/008040, loss: 0.244717, avg_loss: 0.476204
761
+ 003330/008040, loss: 0.181756, avg_loss: 0.475863
762
+ 003335/008040, loss: 0.222641, avg_loss: 0.475576
763
+ 003340/008040, loss: 0.121102, avg_loss: 0.475165
764
+ 003345/008040, loss: 0.265407, avg_loss: 0.474816
765
+ 003350/008040, loss: 0.322964, avg_loss: 0.474535
766
+ 003355/008040, loss: 0.237767, avg_loss: 0.474252
767
+ 003360/008040, loss: 0.343557, avg_loss: 0.473983
768
+ 003365/008040, loss: 0.257172, avg_loss: 0.473661
769
+ 003370/008040, loss: 0.260944, avg_loss: 0.473336
770
+ 003375/008040, loss: 0.292535, avg_loss: 0.473136
771
+ 003380/008040, loss: 0.228900, avg_loss: 0.472876
772
+ 003385/008040, loss: 0.133238, avg_loss: 0.472483
773
+ 003390/008040, loss: 0.090823, avg_loss: 0.472257
774
+ 003395/008040, loss: 0.353693, avg_loss: 0.471962
775
+ 003400/008040, loss: 0.349422, avg_loss: 0.471682
776
+ 003405/008040, loss: 0.257864, avg_loss: 0.471517
777
+ 003410/008040, loss: 0.252595, avg_loss: 0.471166
778
+ 003415/008040, loss: 0.247344, avg_loss: 0.470882
779
+ 003420/008040, loss: 0.263462, avg_loss: 0.470580
780
+ 003425/008040, loss: 0.343597, avg_loss: 0.470315
781
+ 003430/008040, loss: 0.544077, avg_loss: 0.470094
782
+ 003435/008040, loss: 0.303536, avg_loss: 0.469789
783
+ 003440/008040, loss: 0.327594, avg_loss: 0.469431
784
+ 003445/008040, loss: 0.325582, avg_loss: 0.469118
785
+ 003450/008040, loss: 0.270005, avg_loss: 0.468807
786
+ 003455/008040, loss: 0.373651, avg_loss: 0.468467
787
+ 003460/008040, loss: 0.296829, avg_loss: 0.468144
788
+ 003465/008040, loss: 0.382215, avg_loss: 0.467829
789
+ 003470/008040, loss: 0.412389, avg_loss: 0.467540
790
+ 003475/008040, loss: 0.434352, avg_loss: 0.467421
791
+ 003480/008040, loss: 0.624913, avg_loss: 0.467201
792
+ ***** Running dev evaluation *****
793
+ Num examples = 1042
794
+ Instantaneous batch size per device = 32
795
+ epoch 12, step 3484/8040: {'matthews_correlation': 0.2118432448298745}
796
+ 003485/008040, loss: 0.174334, avg_loss: 0.467056
797
+ 003490/008040, loss: 0.165397, avg_loss: 0.466660
798
+ 003495/008040, loss: 0.265744, avg_loss: 0.466368
799
+ 003500/008040, loss: 0.194233, avg_loss: 0.466006
800
+ 003505/008040, loss: 0.293150, avg_loss: 0.465843
801
+ 003510/008040, loss: 0.190316, avg_loss: 0.465447
802
+ 003515/008040, loss: 0.186641, avg_loss: 0.465075
803
+ 003520/008040, loss: 0.197128, avg_loss: 0.464644
804
+ 003525/008040, loss: 0.258738, avg_loss: 0.464389
805
+ 003530/008040, loss: 0.500658, avg_loss: 0.464118
806
+ 003535/008040, loss: 0.181811, avg_loss: 0.463812
807
+ 003540/008040, loss: 0.255612, avg_loss: 0.463451
808
+ 003545/008040, loss: 0.301980, avg_loss: 0.463198
809
+ 003550/008040, loss: 0.262177, avg_loss: 0.462909
810
+ 003555/008040, loss: 0.150805, avg_loss: 0.462686
811
+ 003560/008040, loss: 0.106480, avg_loss: 0.462353
812
+ 003565/008040, loss: 0.353977, avg_loss: 0.462018
813
+ 003570/008040, loss: 0.095433, avg_loss: 0.461672
814
+ 003575/008040, loss: 0.218111, avg_loss: 0.461353
815
+ 003580/008040, loss: 0.149131, avg_loss: 0.461044
816
+ 003585/008040, loss: 0.267326, avg_loss: 0.460682
817
+ 003590/008040, loss: 0.132778, avg_loss: 0.460332
818
+ 003595/008040, loss: 0.084128, avg_loss: 0.460019
819
+ 003600/008040, loss: 0.240429, avg_loss: 0.459736
820
+ 003605/008040, loss: 0.087250, avg_loss: 0.459388
821
+ 003610/008040, loss: 0.350120, avg_loss: 0.459246
822
+ 003615/008040, loss: 0.268835, avg_loss: 0.458947
823
+ 003620/008040, loss: 0.269842, avg_loss: 0.458634
824
+ 003625/008040, loss: 0.156989, avg_loss: 0.458435
825
+ 003630/008040, loss: 0.230001, avg_loss: 0.458124
826
+ 003635/008040, loss: 0.326509, avg_loss: 0.457910
827
+ 003640/008040, loss: 0.336903, avg_loss: 0.457636
828
+ 003645/008040, loss: 0.484366, avg_loss: 0.457448
829
+ 003650/008040, loss: 0.292469, avg_loss: 0.457143
830
+ 003655/008040, loss: 0.196029, avg_loss: 0.456941
831
+ 003660/008040, loss: 0.388540, avg_loss: 0.456735
832
+ 003665/008040, loss: 0.392700, avg_loss: 0.456447
833
+ 003670/008040, loss: 0.214234, avg_loss: 0.456100
834
+ 003675/008040, loss: 0.166740, avg_loss: 0.455776
835
+ 003680/008040, loss: 0.476048, avg_loss: 0.455626
836
+ 003685/008040, loss: 0.133647, avg_loss: 0.455296
837
+ 003690/008040, loss: 0.138388, avg_loss: 0.454962
838
+ 003695/008040, loss: 0.419241, avg_loss: 0.454699
839
+ 003700/008040, loss: 0.273247, avg_loss: 0.454571
840
+ 003705/008040, loss: 0.335091, avg_loss: 0.454264
841
+ 003710/008040, loss: 0.352429, avg_loss: 0.454025
842
+ 003715/008040, loss: 0.424709, avg_loss: 0.453788
843
+ 003720/008040, loss: 0.169942, avg_loss: 0.453501
844
+ 003725/008040, loss: 0.356818, avg_loss: 0.453249
845
+ 003730/008040, loss: 0.165223, avg_loss: 0.452948
846
+ 003735/008040, loss: 0.186675, avg_loss: 0.452718
847
+ 003740/008040, loss: 0.381955, avg_loss: 0.452507
848
+ 003745/008040, loss: 0.304955, avg_loss: 0.452266
849
+ 003750/008040, loss: 0.405848, avg_loss: 0.452040
850
+ ***** Running dev evaluation *****
851
+ Num examples = 1042
852
+ Instantaneous batch size per device = 32
853
+ epoch 13, step 3752/8040: {'matthews_correlation': 0.20261239362380884}
854
+ 003755/008040, loss: 0.216770, avg_loss: 0.451750
855
+ 003760/008040, loss: 0.223490, avg_loss: 0.451394
856
+ 003765/008040, loss: 0.375553, avg_loss: 0.451131
857
+ 003770/008040, loss: 0.196744, avg_loss: 0.450761
858
+ 003775/008040, loss: 0.363349, avg_loss: 0.450549
859
+ 003780/008040, loss: 0.293711, avg_loss: 0.450257
860
+ 003785/008040, loss: 0.413372, avg_loss: 0.450013
861
+ 003790/008040, loss: 0.122226, avg_loss: 0.449670
862
+ 003795/008040, loss: 0.356951, avg_loss: 0.449397
863
+ 003800/008040, loss: 0.118402, avg_loss: 0.449095
864
+ 003805/008040, loss: 0.146611, avg_loss: 0.448795
865
+ 003810/008040, loss: 0.157524, avg_loss: 0.448482
866
+ 003815/008040, loss: 0.343435, avg_loss: 0.448149
867
+ 003820/008040, loss: 0.368691, avg_loss: 0.447827
868
+ 003825/008040, loss: 0.201137, avg_loss: 0.447525
869
+ 003830/008040, loss: 0.132014, avg_loss: 0.447167
870
+ 003835/008040, loss: 0.090910, avg_loss: 0.446837
871
+ 003840/008040, loss: 0.232675, avg_loss: 0.446527
872
+ 003845/008040, loss: 0.275011, avg_loss: 0.446207
873
+ 003850/008040, loss: 0.152451, avg_loss: 0.446016
874
+ 003855/008040, loss: 0.314412, avg_loss: 0.445785
875
+ 003860/008040, loss: 0.148032, avg_loss: 0.445503
876
+ 003865/008040, loss: 0.165825, avg_loss: 0.445180
877
+ 003870/008040, loss: 0.094333, avg_loss: 0.444829
878
+ 003875/008040, loss: 0.375745, avg_loss: 0.444747
879
+ 003880/008040, loss: 0.403074, avg_loss: 0.444519
880
+ 003885/008040, loss: 0.165065, avg_loss: 0.444287
881
+ 003890/008040, loss: 0.234658, avg_loss: 0.444039
882
+ 003895/008040, loss: 0.319576, avg_loss: 0.443776
883
+ 003900/008040, loss: 0.217123, avg_loss: 0.443509
884
+ 003905/008040, loss: 0.378043, avg_loss: 0.443243
885
+ 003910/008040, loss: 0.122735, avg_loss: 0.443042
886
+ 003915/008040, loss: 0.264233, avg_loss: 0.442776
887
+ 003920/008040, loss: 0.076704, avg_loss: 0.442449
888
+ 003925/008040, loss: 0.125913, avg_loss: 0.442138
889
+ 003930/008040, loss: 0.272993, avg_loss: 0.441843
890
+ 003935/008040, loss: 0.068447, avg_loss: 0.441514
891
+ 003940/008040, loss: 0.244199, avg_loss: 0.441180
892
+ 003945/008040, loss: 0.228176, avg_loss: 0.440980
893
+ 003950/008040, loss: 0.148637, avg_loss: 0.440699
894
+ 003955/008040, loss: 0.299796, avg_loss: 0.440381
895
+ 003960/008040, loss: 0.276640, avg_loss: 0.440127
896
+ 003965/008040, loss: 0.344766, avg_loss: 0.439945
897
+ 003970/008040, loss: 0.085695, avg_loss: 0.439692
898
+ 003975/008040, loss: 0.467870, avg_loss: 0.439398
899
+ 003980/008040, loss: 0.275529, avg_loss: 0.439101
900
+ 003985/008040, loss: 0.342526, avg_loss: 0.438883
901
+ 003990/008040, loss: 0.117043, avg_loss: 0.438652
902
+ 003995/008040, loss: 0.216054, avg_loss: 0.438416
903
+ 004000/008040, loss: 0.194812, avg_loss: 0.438143
904
+ 004005/008040, loss: 0.173094, avg_loss: 0.437904
905
+ 004010/008040, loss: 0.072183, avg_loss: 0.437564
906
+ 004015/008040, loss: 0.403915, avg_loss: 0.437357
907
+ 004020/008040, loss: 0.087834, avg_loss: 0.436997
908
+ ***** Running dev evaluation *****
909
+ Num examples = 1042
910
+ Instantaneous batch size per device = 32
911
+ epoch 14, step 4020/8040: {'matthews_correlation': 0.22518881045488998}
912
+ 004025/008040, loss: 0.165382, avg_loss: 0.436778
913
+ 004030/008040, loss: 0.179677, avg_loss: 0.436415
914
+ 004035/008040, loss: 0.185260, avg_loss: 0.436160
915
+ 004040/008040, loss: 0.095814, avg_loss: 0.435886
916
+ 004045/008040, loss: 0.345136, avg_loss: 0.435673
917
+ 004050/008040, loss: 0.150750, avg_loss: 0.435363
918
+ 004055/008040, loss: 0.185758, avg_loss: 0.435133
919
+ 004060/008040, loss: 0.212922, avg_loss: 0.434953
920
+ 004065/008040, loss: 0.149902, avg_loss: 0.434669
921
+ 004070/008040, loss: 0.089165, avg_loss: 0.434428
922
+ 004075/008040, loss: 0.168942, avg_loss: 0.434088
923
+ 004080/008040, loss: 0.170014, avg_loss: 0.433740
924
+ 004085/008040, loss: 0.148718, avg_loss: 0.433445
925
+ 004090/008040, loss: 0.307841, avg_loss: 0.433193
926
+ 004095/008040, loss: 0.170424, avg_loss: 0.432888
927
+ 004100/008040, loss: 0.253233, avg_loss: 0.432721
928
+ 004105/008040, loss: 0.243379, avg_loss: 0.432492
929
+ 004110/008040, loss: 0.197091, avg_loss: 0.432229
930
+ 004115/008040, loss: 0.149977, avg_loss: 0.431969
931
+ 004120/008040, loss: 0.255616, avg_loss: 0.431698
932
+ 004125/008040, loss: 0.144500, avg_loss: 0.431442
933
+ 004130/008040, loss: 0.214776, avg_loss: 0.431226
934
+ 004135/008040, loss: 0.155176, avg_loss: 0.430925
935
+ 004140/008040, loss: 0.195912, avg_loss: 0.430689
936
+ 004145/008040, loss: 0.148231, avg_loss: 0.430308
937
+ 004150/008040, loss: 0.309211, avg_loss: 0.430062
938
+ 004155/008040, loss: 0.400494, avg_loss: 0.429949
939
+ 004160/008040, loss: 0.190466, avg_loss: 0.429596
940
+ 004165/008040, loss: 0.115031, avg_loss: 0.429430
941
+ 004170/008040, loss: 0.154674, avg_loss: 0.429143
942
+ 004175/008040, loss: 0.087612, avg_loss: 0.428819
943
+ 004180/008040, loss: 0.391154, avg_loss: 0.428581
944
+ 004185/008040, loss: 0.233200, avg_loss: 0.428438
945
+ 004190/008040, loss: 0.113590, avg_loss: 0.428204
946
+ 004195/008040, loss: 0.139859, avg_loss: 0.427997
947
+ 004200/008040, loss: 0.365552, avg_loss: 0.427729
948
+ 004205/008040, loss: 0.285945, avg_loss: 0.427567
949
+ 004210/008040, loss: 0.298795, avg_loss: 0.427337
950
+ 004215/008040, loss: 0.184676, avg_loss: 0.427015
951
+ 004220/008040, loss: 0.347303, avg_loss: 0.426763
952
+ 004225/008040, loss: 0.249475, avg_loss: 0.426473
953
+ 004230/008040, loss: 0.345056, avg_loss: 0.426234
954
+ 004235/008040, loss: 0.132455, avg_loss: 0.425935
955
+ 004240/008040, loss: 0.083139, avg_loss: 0.425697
956
+ 004245/008040, loss: 0.186649, avg_loss: 0.425451
957
+ 004250/008040, loss: 0.159150, avg_loss: 0.425129
958
+ 004255/008040, loss: 0.119297, avg_loss: 0.424885
959
+ 004260/008040, loss: 0.233108, avg_loss: 0.424649
960
+ 004265/008040, loss: 0.144114, avg_loss: 0.424408
961
+ 004270/008040, loss: 0.210518, avg_loss: 0.424164
962
+ 004275/008040, loss: 0.242731, avg_loss: 0.423926
963
+ 004280/008040, loss: 0.157653, avg_loss: 0.423696
964
+ 004285/008040, loss: 0.191035, avg_loss: 0.423368
965
+ ***** Running dev evaluation *****
966
+ Num examples = 1042
967
+ Instantaneous batch size per device = 32
968
+ epoch 15, step 4288/8040: {'matthews_correlation': 0.24863648291608131}
969
+ 004290/008040, loss: 0.266971, avg_loss: 0.423141
970
+ 004295/008040, loss: 0.322248, avg_loss: 0.422900
971
+ 004300/008040, loss: 0.231828, avg_loss: 0.422700
972
+ 004305/008040, loss: 0.072297, avg_loss: 0.422383
973
+ 004310/008040, loss: 0.193845, avg_loss: 0.422057
974
+ 004315/008040, loss: 0.217596, avg_loss: 0.421784
975
+ 004320/008040, loss: 0.393519, avg_loss: 0.421559
976
+ 004325/008040, loss: 0.400877, avg_loss: 0.421312
977
+ 004330/008040, loss: 0.087280, avg_loss: 0.420988
978
+ 004335/008040, loss: 0.124804, avg_loss: 0.420766
979
+ 004340/008040, loss: 0.130229, avg_loss: 0.420461
980
+ 004345/008040, loss: 0.304618, avg_loss: 0.420192
981
+ 004350/008040, loss: 0.189475, avg_loss: 0.419871
982
+ 004355/008040, loss: 0.203492, avg_loss: 0.419608
983
+ 004360/008040, loss: 0.144623, avg_loss: 0.419307
984
+ 004365/008040, loss: 0.127642, avg_loss: 0.418997
985
+ 004370/008040, loss: 0.218811, avg_loss: 0.418663
986
+ 004375/008040, loss: 0.059012, avg_loss: 0.418390
987
+ 004380/008040, loss: 0.237954, avg_loss: 0.418244
988
+ 004385/008040, loss: 0.248724, avg_loss: 0.418026
989
+ 004390/008040, loss: 0.160703, avg_loss: 0.417783
990
+ 004395/008040, loss: 0.276666, avg_loss: 0.417556
991
+ 004400/008040, loss: 0.101335, avg_loss: 0.417345
992
+ 004405/008040, loss: 0.468661, avg_loss: 0.417191
993
+ 004410/008040, loss: 0.154267, avg_loss: 0.416879
994
+ 004415/008040, loss: 0.196224, avg_loss: 0.416633
995
+ 004420/008040, loss: 0.092791, avg_loss: 0.416405
996
+ 004425/008040, loss: 0.447025, avg_loss: 0.416244
997
+ 004430/008040, loss: 0.225542, avg_loss: 0.415959
998
+ 004435/008040, loss: 0.091548, avg_loss: 0.415671
999
+ 004440/008040, loss: 0.116130, avg_loss: 0.415333
1000
+ 004445/008040, loss: 0.225495, avg_loss: 0.415069
1001
+ 004450/008040, loss: 0.213666, avg_loss: 0.414828
1002
+ 004455/008040, loss: 0.130709, avg_loss: 0.414580
1003
+ 004460/008040, loss: 0.225696, avg_loss: 0.414366
1004
+ 004465/008040, loss: 0.322512, avg_loss: 0.414150
1005
+ 004470/008040, loss: 0.409171, avg_loss: 0.413917
1006
+ 004475/008040, loss: 0.459070, avg_loss: 0.413800
1007
+ 004480/008040, loss: 0.205403, avg_loss: 0.413537
1008
+ 004485/008040, loss: 0.097172, avg_loss: 0.413289
1009
+ 004490/008040, loss: 0.104971, avg_loss: 0.413039
1010
+ 004495/008040, loss: 0.269551, avg_loss: 0.412801
1011
+ 004500/008040, loss: 0.151229, avg_loss: 0.412506
1012
+ 004505/008040, loss: 0.137360, avg_loss: 0.412240
1013
+ 004510/008040, loss: 0.349339, avg_loss: 0.412084
1014
+ 004515/008040, loss: 0.074355, avg_loss: 0.411830
1015
+ 004520/008040, loss: 0.165137, avg_loss: 0.411674
1016
+ 004525/008040, loss: 0.120821, avg_loss: 0.411390
1017
+ 004530/008040, loss: 0.156756, avg_loss: 0.411117
1018
+ 004535/008040, loss: 0.131685, avg_loss: 0.410892
1019
+ 004540/008040, loss: 0.215486, avg_loss: 0.410740
1020
+ 004545/008040, loss: 0.276792, avg_loss: 0.410564
1021
+ 004550/008040, loss: 0.163451, avg_loss: 0.410302
1022
+ 004555/008040, loss: 0.153240, avg_loss: 0.410035
1023
+ ***** Running dev evaluation *****
1024
+ Num examples = 1042
1025
+ Instantaneous batch size per device = 32
1026
+ epoch 16, step 4556/8040: {'matthews_correlation': 0.19984853723708582}
1027
+ 004560/008040, loss: 0.091924, avg_loss: 0.409807
1028
+ 004565/008040, loss: 0.285278, avg_loss: 0.409558
1029
+ 004570/008040, loss: 0.153244, avg_loss: 0.409386
1030
+ 004575/008040, loss: 0.117640, avg_loss: 0.409192
1031
+ 004580/008040, loss: 0.196797, avg_loss: 0.408980
1032
+ 004585/008040, loss: 0.170434, avg_loss: 0.408724
1033
+ 004590/008040, loss: 0.291520, avg_loss: 0.408442
1034
+ 004595/008040, loss: 0.095928, avg_loss: 0.408124
1035
+ 004600/008040, loss: 0.133423, avg_loss: 0.407880
1036
+ 004605/008040, loss: 0.224401, avg_loss: 0.407606
1037
+ 004610/008040, loss: 0.244196, avg_loss: 0.407368
1038
+ 004615/008040, loss: 0.086107, avg_loss: 0.407023
1039
+ 004620/008040, loss: 0.088616, avg_loss: 0.406692
1040
+ 004625/008040, loss: 0.182435, avg_loss: 0.406431
1041
+ 004630/008040, loss: 0.108714, avg_loss: 0.406193
1042
+ 004635/008040, loss: 0.052255, avg_loss: 0.405910
1043
+ 004640/008040, loss: 0.201341, avg_loss: 0.405591
1044
+ 004645/008040, loss: 0.125202, avg_loss: 0.405348
1045
+ 004650/008040, loss: 0.262958, avg_loss: 0.405130
1046
+ 004655/008040, loss: 0.136696, avg_loss: 0.404868
1047
+ 004660/008040, loss: 0.232297, avg_loss: 0.404751
1048
+ 004665/008040, loss: 0.169257, avg_loss: 0.404498
1049
+ 004670/008040, loss: 0.086830, avg_loss: 0.404246
1050
+ 004675/008040, loss: 0.199137, avg_loss: 0.404007
1051
+ 004680/008040, loss: 0.169171, avg_loss: 0.403782
1052
+ 004685/008040, loss: 0.112735, avg_loss: 0.403494
1053
+ 004690/008040, loss: 0.240913, avg_loss: 0.403283
1054
+ 004695/008040, loss: 0.228971, avg_loss: 0.403096
1055
+ 004700/008040, loss: 0.203035, avg_loss: 0.402813
1056
+ 004705/008040, loss: 0.252691, avg_loss: 0.402542
1057
+ 004710/008040, loss: 0.183577, avg_loss: 0.402340
1058
+ 004715/008040, loss: 0.152381, avg_loss: 0.402066
1059
+ 004720/008040, loss: 0.081548, avg_loss: 0.401885
1060
+ 004725/008040, loss: 0.243844, avg_loss: 0.401712
1061
+ 004730/008040, loss: 0.191078, avg_loss: 0.401477
1062
+ 004735/008040, loss: 0.309967, avg_loss: 0.401239
1063
+ 004740/008040, loss: 0.177741, avg_loss: 0.400996
1064
+ 004745/008040, loss: 0.443657, avg_loss: 0.400854
1065
+ 004750/008040, loss: 0.174745, avg_loss: 0.400599
1066
+ 004755/008040, loss: 0.117440, avg_loss: 0.400411
1067
+ 004760/008040, loss: 0.216662, avg_loss: 0.400179
1068
+ 004765/008040, loss: 0.331156, avg_loss: 0.399990
1069
+ 004770/008040, loss: 0.239916, avg_loss: 0.399799
1070
+ 004775/008040, loss: 0.392543, avg_loss: 0.399598
1071
+ 004780/008040, loss: 0.310010, avg_loss: 0.399412
1072
+ 004785/008040, loss: 0.176596, avg_loss: 0.399203
1073
+ 004790/008040, loss: 0.193463, avg_loss: 0.399010
1074
+ 004795/008040, loss: 0.065539, avg_loss: 0.398731
1075
+ 004800/008040, loss: 0.078370, avg_loss: 0.398525
1076
+ 004805/008040, loss: 0.268404, avg_loss: 0.398395
1077
+ 004810/008040, loss: 0.130425, avg_loss: 0.398184
1078
+ 004815/008040, loss: 0.124595, avg_loss: 0.397953
1079
+ 004820/008040, loss: 0.351294, avg_loss: 0.397781
1080
+ ***** Running dev evaluation *****
1081
+ Num examples = 1042
1082
+ Instantaneous batch size per device = 32
1083
+ epoch 17, step 4824/8040: {'matthews_correlation': 0.23319244596326755}
1084
+ 004825/008040, loss: 0.117143, avg_loss: 0.397605
1085
+ 004830/008040, loss: 0.085320, avg_loss: 0.397402
1086
+ 004835/008040, loss: 0.329389, avg_loss: 0.397171
1087
+ 004840/008040, loss: 0.167244, avg_loss: 0.396923
1088
+ 004845/008040, loss: 0.084977, avg_loss: 0.396725
1089
+ 004850/008040, loss: 0.170633, avg_loss: 0.396479
1090
+ 004855/008040, loss: 0.162252, avg_loss: 0.396249
1091
+ 004860/008040, loss: 0.242330, avg_loss: 0.396023
1092
+ 004865/008040, loss: 0.158724, avg_loss: 0.395797
1093
+ 004870/008040, loss: 0.145546, avg_loss: 0.395552
1094
+ 004875/008040, loss: 0.154330, avg_loss: 0.395311
1095
+ 004880/008040, loss: 0.156234, avg_loss: 0.395089
1096
+ 004885/008040, loss: 0.104371, avg_loss: 0.394829
1097
+ 004890/008040, loss: 0.116719, avg_loss: 0.394561
1098
+ 004895/008040, loss: 0.167743, avg_loss: 0.394364
1099
+ 004900/008040, loss: 0.064857, avg_loss: 0.394083
1100
+ 004905/008040, loss: 0.067141, avg_loss: 0.393839
1101
+ 004910/008040, loss: 0.099388, avg_loss: 0.393620
1102
+ 004915/008040, loss: 0.121026, avg_loss: 0.393411
1103
+ 004920/008040, loss: 0.237852, avg_loss: 0.393219
1104
+ 004925/008040, loss: 0.116583, avg_loss: 0.392966
1105
+ 004930/008040, loss: 0.066189, avg_loss: 0.392755
1106
+ 004935/008040, loss: 0.100841, avg_loss: 0.392546
1107
+ 004940/008040, loss: 0.184811, avg_loss: 0.392326
1108
+ 004945/008040, loss: 0.261129, avg_loss: 0.392141
1109
+ 004950/008040, loss: 0.227229, avg_loss: 0.391920
1110
+ 004955/008040, loss: 0.073722, avg_loss: 0.391622
1111
+ 004960/008040, loss: 0.217448, avg_loss: 0.391421
1112
+ 004965/008040, loss: 0.166534, avg_loss: 0.391247
1113
+ 004970/008040, loss: 0.169978, avg_loss: 0.391054
1114
+ 004975/008040, loss: 0.162336, avg_loss: 0.390803
1115
+ 004980/008040, loss: 0.100707, avg_loss: 0.390613
1116
+ 004985/008040, loss: 0.066454, avg_loss: 0.390380
1117
+ 004990/008040, loss: 0.268293, avg_loss: 0.390155
1118
+ 004995/008040, loss: 0.299800, avg_loss: 0.389940
1119
+ 005000/008040, loss: 0.159697, avg_loss: 0.389720
1120
+ 005005/008040, loss: 0.089164, avg_loss: 0.389519
1121
+ 005010/008040, loss: 0.083933, avg_loss: 0.389239
1122
+ 005015/008040, loss: 0.109845, avg_loss: 0.389072
1123
+ 005020/008040, loss: 0.323453, avg_loss: 0.388975
1124
+ 005025/008040, loss: 0.218569, avg_loss: 0.388836
1125
+ 005030/008040, loss: 0.306596, avg_loss: 0.388684
1126
+ 005035/008040, loss: 0.214361, avg_loss: 0.388518
1127
+ 005040/008040, loss: 0.203883, avg_loss: 0.388300
1128
+ 005045/008040, loss: 0.119648, avg_loss: 0.388050
1129
+ 005050/008040, loss: 0.241945, avg_loss: 0.387813
1130
+ 005055/008040, loss: 0.295856, avg_loss: 0.387694
1131
+ 005060/008040, loss: 0.299737, avg_loss: 0.387502
1132
+ 005065/008040, loss: 0.173353, avg_loss: 0.387261
1133
+ 005070/008040, loss: 0.148706, avg_loss: 0.387023
1134
+ 005075/008040, loss: 0.235021, avg_loss: 0.386797
1135
+ 005080/008040, loss: 0.309368, avg_loss: 0.386680
1136
+ 005085/008040, loss: 0.187352, avg_loss: 0.386492
1137
+ 005090/008040, loss: 0.124517, avg_loss: 0.386330
1138
+ ***** Running dev evaluation *****
1139
+ Num examples = 1042
1140
+ Instantaneous batch size per device = 32
1141
+ epoch 18, step 5092/8040: {'matthews_correlation': 0.24348660475263997}
1142
+ 005095/008040, loss: 0.244682, avg_loss: 0.386098
1143
+ 005100/008040, loss: 0.038414, avg_loss: 0.385838
1144
+ 005105/008040, loss: 0.095592, avg_loss: 0.385585
1145
+ 005110/008040, loss: 0.112026, avg_loss: 0.385404
1146
+ 005115/008040, loss: 0.193563, avg_loss: 0.385160
1147
+ 005120/008040, loss: 0.169361, avg_loss: 0.384950
1148
+ 005125/008040, loss: 0.115310, avg_loss: 0.384794
1149
+ 005130/008040, loss: 0.111171, avg_loss: 0.384595
1150
+ 005135/008040, loss: 0.347275, avg_loss: 0.384407
1151
+ 005140/008040, loss: 0.066092, avg_loss: 0.384158
1152
+ 005145/008040, loss: 0.038691, avg_loss: 0.383875
1153
+ 005150/008040, loss: 0.187798, avg_loss: 0.383690
1154
+ 005155/008040, loss: 0.080341, avg_loss: 0.383389
1155
+ 005160/008040, loss: 0.250113, avg_loss: 0.383158
1156
+ 005165/008040, loss: 0.230404, avg_loss: 0.382940
1157
+ 005170/008040, loss: 0.199019, avg_loss: 0.382771
1158
+ 005175/008040, loss: 0.042526, avg_loss: 0.382549
1159
+ 005180/008040, loss: 0.107391, avg_loss: 0.382405
1160
+ 005185/008040, loss: 0.123089, avg_loss: 0.382158
1161
+ 005190/008040, loss: 0.211129, avg_loss: 0.381957
1162
+ 005195/008040, loss: 0.191329, avg_loss: 0.381727
1163
+ 005200/008040, loss: 0.247005, avg_loss: 0.381528
1164
+ 005205/008040, loss: 0.151045, avg_loss: 0.381342
1165
+ 005210/008040, loss: 0.221767, avg_loss: 0.381150
1166
+ 005215/008040, loss: 0.098915, avg_loss: 0.380916
1167
+ 005220/008040, loss: 0.120604, avg_loss: 0.380805
1168
+ 005225/008040, loss: 0.198758, avg_loss: 0.380580
1169
+ 005230/008040, loss: 0.170238, avg_loss: 0.380375
1170
+ 005235/008040, loss: 0.287471, avg_loss: 0.380185
1171
+ 005240/008040, loss: 0.099829, avg_loss: 0.379951
1172
+ 005245/008040, loss: 0.192130, avg_loss: 0.379746
1173
+ 005250/008040, loss: 0.174886, avg_loss: 0.379625
1174
+ 005255/008040, loss: 0.154950, avg_loss: 0.379463
1175
+ 005260/008040, loss: 0.315916, avg_loss: 0.379261
1176
+ 005265/008040, loss: 0.142192, avg_loss: 0.379091
1177
+ 005270/008040, loss: 0.157938, avg_loss: 0.378953
1178
+ 005275/008040, loss: 0.112631, avg_loss: 0.378706
1179
+ 005280/008040, loss: 0.112058, avg_loss: 0.378528
1180
+ 005285/008040, loss: 0.109890, avg_loss: 0.378278
1181
+ 005290/008040, loss: 0.096205, avg_loss: 0.378069
1182
+ 005295/008040, loss: 0.101920, avg_loss: 0.377793
1183
+ 005300/008040, loss: 0.223294, avg_loss: 0.377597
1184
+ 005305/008040, loss: 0.334869, avg_loss: 0.377448
1185
+ 005310/008040, loss: 0.133110, avg_loss: 0.377219
1186
+ 005315/008040, loss: 0.285188, avg_loss: 0.377080
1187
+ 005320/008040, loss: 0.093014, avg_loss: 0.376865
1188
+ 005325/008040, loss: 0.064642, avg_loss: 0.376663
1189
+ 005330/008040, loss: 0.399625, avg_loss: 0.376612
1190
+ 005335/008040, loss: 0.099368, avg_loss: 0.376419
1191
+ 005340/008040, loss: 0.127971, avg_loss: 0.376192
1192
+ 005345/008040, loss: 0.276726, avg_loss: 0.376028
1193
+ 005350/008040, loss: 0.203088, avg_loss: 0.375818
1194
+ 005355/008040, loss: 0.162861, avg_loss: 0.375639
1195
+ 005360/008040, loss: 0.111333, avg_loss: 0.375443
1196
+ ***** Running dev evaluation *****
1197
+ Num examples = 1042
1198
+ Instantaneous batch size per device = 32
1199
+ epoch 19, step 5360/8040: {'matthews_correlation': 0.2545245288314363}
1200
+ 005365/008040, loss: 0.182490, avg_loss: 0.375250
1201
+ 005370/008040, loss: 0.306611, avg_loss: 0.375098
1202
+ 005375/008040, loss: 0.086688, avg_loss: 0.374876
1203
+ 005380/008040, loss: 0.073351, avg_loss: 0.374639
1204
+ 005385/008040, loss: 0.076141, avg_loss: 0.374468
1205
+ 005390/008040, loss: 0.263192, avg_loss: 0.374279
1206
+ 005395/008040, loss: 0.057974, avg_loss: 0.374064
1207
+ 005400/008040, loss: 0.168771, avg_loss: 0.373868
1208
+ 005405/008040, loss: 0.101774, avg_loss: 0.373638
1209
+ 005410/008040, loss: 0.233914, avg_loss: 0.373431
1210
+ 005415/008040, loss: 0.199603, avg_loss: 0.373236
1211
+ 005420/008040, loss: 0.069979, avg_loss: 0.373044
1212
+ 005425/008040, loss: 0.157929, avg_loss: 0.372784
1213
+ 005430/008040, loss: 0.102463, avg_loss: 0.372559
1214
+ 005435/008040, loss: 0.311656, avg_loss: 0.372387
1215
+ 005440/008040, loss: 0.025280, avg_loss: 0.372177
1216
+ 005445/008040, loss: 0.199722, avg_loss: 0.372008
1217
+ 005450/008040, loss: 0.037342, avg_loss: 0.371794
1218
+ 005455/008040, loss: 0.464360, avg_loss: 0.371634
1219
+ 005460/008040, loss: 0.151161, avg_loss: 0.371387
1220
+ 005465/008040, loss: 0.131248, avg_loss: 0.371174
1221
+ 005470/008040, loss: 0.091763, avg_loss: 0.370973
1222
+ 005475/008040, loss: 0.124437, avg_loss: 0.370771
1223
+ 005480/008040, loss: 0.056806, avg_loss: 0.370576
1224
+ 005485/008040, loss: 0.053934, avg_loss: 0.370370
1225
+ 005490/008040, loss: 0.134340, avg_loss: 0.370134
1226
+ 005495/008040, loss: 0.403093, avg_loss: 0.369976
1227
+ 005500/008040, loss: 0.295253, avg_loss: 0.369797
1228
+ 005505/008040, loss: 0.123554, avg_loss: 0.369599
1229
+ 005510/008040, loss: 0.146412, avg_loss: 0.369405
1230
+ 005515/008040, loss: 0.086848, avg_loss: 0.369164
1231
+ 005520/008040, loss: 0.190395, avg_loss: 0.368969
1232
+ 005525/008040, loss: 0.214298, avg_loss: 0.368869
1233
+ 005530/008040, loss: 0.157094, avg_loss: 0.368707
1234
+ 005535/008040, loss: 0.236498, avg_loss: 0.368588
1235
+ 005540/008040, loss: 0.150522, avg_loss: 0.368397
1236
+ 005545/008040, loss: 0.056312, avg_loss: 0.368192
1237
+ 005550/008040, loss: 0.132276, avg_loss: 0.367994
1238
+ 005555/008040, loss: 0.152209, avg_loss: 0.367803
1239
+ 005560/008040, loss: 0.123136, avg_loss: 0.367632
1240
+ 005565/008040, loss: 0.295406, avg_loss: 0.367456
1241
+ 005570/008040, loss: 0.163695, avg_loss: 0.367255
1242
+ 005575/008040, loss: 0.032764, avg_loss: 0.367021
1243
+ 005580/008040, loss: 0.077804, avg_loss: 0.366803
1244
+ 005585/008040, loss: 0.426609, avg_loss: 0.366718
1245
+ 005590/008040, loss: 0.170544, avg_loss: 0.366554
1246
+ 005595/008040, loss: 0.121247, avg_loss: 0.366328
1247
+ 005600/008040, loss: 0.118504, avg_loss: 0.366139
1248
+ 005605/008040, loss: 0.127036, avg_loss: 0.365945
1249
+ 005610/008040, loss: 0.253191, avg_loss: 0.365772
1250
+ 005615/008040, loss: 0.132579, avg_loss: 0.365584
1251
+ 005620/008040, loss: 0.206162, avg_loss: 0.365378
1252
+ 005625/008040, loss: 0.138357, avg_loss: 0.365198
1253
+ ***** Running dev evaluation *****
1254
+ Num examples = 1042
1255
+ Instantaneous batch size per device = 32
1256
+ epoch 20, step 5628/8040: {'matthews_correlation': 0.20994533418798944}
1257
+ 005630/008040, loss: 0.220501, avg_loss: 0.364957
1258
+ 005635/008040, loss: 0.176737, avg_loss: 0.364715
1259
+ 005640/008040, loss: 0.150695, avg_loss: 0.364518
1260
+ 005645/008040, loss: 0.186189, avg_loss: 0.364357
1261
+ 005650/008040, loss: 0.238791, avg_loss: 0.364176
1262
+ 005655/008040, loss: 0.128307, avg_loss: 0.363933
1263
+ 005660/008040, loss: 0.098545, avg_loss: 0.363732
1264
+ 005665/008040, loss: 0.059385, avg_loss: 0.363478
1265
+ 005670/008040, loss: 0.106437, avg_loss: 0.363278
1266
+ 005675/008040, loss: 0.051390, avg_loss: 0.363102
1267
+ 005680/008040, loss: 0.264690, avg_loss: 0.362942
1268
+ 005685/008040, loss: 0.051885, avg_loss: 0.362837
1269
+ 005690/008040, loss: 0.186132, avg_loss: 0.362659
1270
+ 005695/008040, loss: 0.110282, avg_loss: 0.362424
1271
+ 005700/008040, loss: 0.035696, avg_loss: 0.362194
1272
+ 005705/008040, loss: 0.210790, avg_loss: 0.362002
1273
+ 005710/008040, loss: 0.176916, avg_loss: 0.361813
1274
+ 005715/008040, loss: 0.068533, avg_loss: 0.361627
1275
+ 005720/008040, loss: 0.059564, avg_loss: 0.361417
1276
+ 005725/008040, loss: 0.087551, avg_loss: 0.361274
1277
+ 005730/008040, loss: 0.166153, avg_loss: 0.361102
1278
+ 005735/008040, loss: 0.123037, avg_loss: 0.360934
1279
+ 005740/008040, loss: 0.175274, avg_loss: 0.360734
1280
+ 005745/008040, loss: 0.053053, avg_loss: 0.360546
1281
+ 005750/008040, loss: 0.193917, avg_loss: 0.360384
1282
+ 005755/008040, loss: 0.192751, avg_loss: 0.360214
1283
+ 005760/008040, loss: 0.101878, avg_loss: 0.360026
1284
+ 005765/008040, loss: 0.085134, avg_loss: 0.359832
1285
+ 005770/008040, loss: 0.293009, avg_loss: 0.359642
1286
+ 005775/008040, loss: 0.104310, avg_loss: 0.359413
1287
+ 005780/008040, loss: 0.011819, avg_loss: 0.359168
1288
+ 005785/008040, loss: 0.210015, avg_loss: 0.359009
1289
+ 005790/008040, loss: 0.200174, avg_loss: 0.358879
1290
+ 005795/008040, loss: 0.141055, avg_loss: 0.358671
1291
+ 005800/008040, loss: 0.078129, avg_loss: 0.358469
1292
+ 005805/008040, loss: 0.083557, avg_loss: 0.358299
1293
+ 005810/008040, loss: 0.075039, avg_loss: 0.358079
1294
+ 005815/008040, loss: 0.080684, avg_loss: 0.357903
1295
+ 005820/008040, loss: 0.345792, avg_loss: 0.357738
1296
+ 005825/008040, loss: 0.078370, avg_loss: 0.357590
1297
+ 005830/008040, loss: 0.199539, avg_loss: 0.357436
1298
+ 005835/008040, loss: 0.196801, avg_loss: 0.357251
1299
+ 005840/008040, loss: 0.173617, avg_loss: 0.357071
1300
+ 005845/008040, loss: 0.056907, avg_loss: 0.356874
1301
+ 005850/008040, loss: 0.165107, avg_loss: 0.356683
1302
+ 005855/008040, loss: 0.100072, avg_loss: 0.356521
1303
+ 005860/008040, loss: 0.178491, avg_loss: 0.356324
1304
+ 005865/008040, loss: 0.212101, avg_loss: 0.356138
1305
+ 005870/008040, loss: 0.215021, avg_loss: 0.355963
1306
+ 005875/008040, loss: 0.273816, avg_loss: 0.355788
1307
+ 005880/008040, loss: 0.364194, avg_loss: 0.355641
1308
+ 005885/008040, loss: 0.270123, avg_loss: 0.355498
1309
+ 005890/008040, loss: 0.047443, avg_loss: 0.355291
1310
+ 005895/008040, loss: 0.142198, avg_loss: 0.355145
1311
+ ***** Running dev evaluation *****
1312
+ Num examples = 1042
1313
+ Instantaneous batch size per device = 32
1314
+ epoch 21, step 5896/8040: {'matthews_correlation': 0.21551745055261307}
1315
+ 005900/008040, loss: 0.191457, avg_loss: 0.354973
1316
+ 005905/008040, loss: 0.295734, avg_loss: 0.354803
1317
+ 005910/008040, loss: 0.075735, avg_loss: 0.354682
1318
+ 005915/008040, loss: 0.142483, avg_loss: 0.354498
1319
+ 005920/008040, loss: 0.117506, avg_loss: 0.354280
1320
+ 005925/008040, loss: 0.108497, avg_loss: 0.354039
1321
+ 005930/008040, loss: 0.023560, avg_loss: 0.353889
1322
+ 005935/008040, loss: 0.051859, avg_loss: 0.353655
1323
+ 005940/008040, loss: 0.096430, avg_loss: 0.353502
1324
+ 005945/008040, loss: 0.168284, avg_loss: 0.353288
1325
+ 005950/008040, loss: 0.137047, avg_loss: 0.353141
1326
+ 005955/008040, loss: 0.182130, avg_loss: 0.353003
1327
+ 005960/008040, loss: 0.052544, avg_loss: 0.352779
1328
+ 005965/008040, loss: 0.148201, avg_loss: 0.352614
1329
+ 005970/008040, loss: 0.203060, avg_loss: 0.352449
1330
+ 005975/008040, loss: 0.152961, avg_loss: 0.352309
1331
+ 005980/008040, loss: 0.149886, avg_loss: 0.352082
1332
+ 005985/008040, loss: 0.108204, avg_loss: 0.351957
1333
+ 005990/008040, loss: 0.102725, avg_loss: 0.351766
1334
+ 005995/008040, loss: 0.023260, avg_loss: 0.351590
1335
+ 006000/008040, loss: 0.115315, avg_loss: 0.351441
1336
+ 006005/008040, loss: 0.074605, avg_loss: 0.351242
1337
+ 006010/008040, loss: 0.142932, avg_loss: 0.351052
1338
+ 006015/008040, loss: 0.083695, avg_loss: 0.350857
1339
+ 006020/008040, loss: 0.043695, avg_loss: 0.350694
1340
+ 006025/008040, loss: 0.099229, avg_loss: 0.350506
1341
+ 006030/008040, loss: 0.024634, avg_loss: 0.350314
1342
+ 006035/008040, loss: 0.213198, avg_loss: 0.350172
1343
+ 006040/008040, loss: 0.090062, avg_loss: 0.349989
1344
+ 006045/008040, loss: 0.165650, avg_loss: 0.349814
1345
+ 006050/008040, loss: 0.375118, avg_loss: 0.349689
1346
+ 006055/008040, loss: 0.092574, avg_loss: 0.349502
1347
+ 006060/008040, loss: 0.076881, avg_loss: 0.349333
1348
+ 006065/008040, loss: 0.215473, avg_loss: 0.349175
1349
+ 006070/008040, loss: 0.047726, avg_loss: 0.349023
1350
+ 006075/008040, loss: 0.275449, avg_loss: 0.348853
1351
+ 006080/008040, loss: 0.091764, avg_loss: 0.348649
1352
+ 006085/008040, loss: 0.158617, avg_loss: 0.348518
1353
+ 006090/008040, loss: 0.398433, avg_loss: 0.348350
1354
+ 006095/008040, loss: 0.249465, avg_loss: 0.348170
1355
+ 006100/008040, loss: 0.230916, avg_loss: 0.348021
1356
+ 006105/008040, loss: 0.138895, avg_loss: 0.347855
1357
+ 006110/008040, loss: 0.023905, avg_loss: 0.347659
1358
+ 006115/008040, loss: 0.183222, avg_loss: 0.347486
1359
+ 006120/008040, loss: 0.149845, avg_loss: 0.347367
1360
+ 006125/008040, loss: 0.120646, avg_loss: 0.347237
1361
+ 006130/008040, loss: 0.232747, avg_loss: 0.347078
1362
+ 006135/008040, loss: 0.086326, avg_loss: 0.346889
1363
+ 006140/008040, loss: 0.044021, avg_loss: 0.346692
1364
+ 006145/008040, loss: 0.173458, avg_loss: 0.346579
1365
+ 006150/008040, loss: 0.110168, avg_loss: 0.346419
1366
+ 006155/008040, loss: 0.041496, avg_loss: 0.346239
1367
+ 006160/008040, loss: 0.048964, avg_loss: 0.346022
1368
+ ***** Running dev evaluation *****
1369
+ Num examples = 1042
1370
+ Instantaneous batch size per device = 32
1371
+ epoch 22, step 6164/8040: {'matthews_correlation': 0.20483291444361929}
1372
+ 006165/008040, loss: 0.256147, avg_loss: 0.345885
1373
+ 006170/008040, loss: 0.100646, avg_loss: 0.345729
1374
+ 006175/008040, loss: 0.135744, avg_loss: 0.345560
1375
+ 006180/008040, loss: 0.070830, avg_loss: 0.345336
1376
+ 006185/008040, loss: 0.183400, avg_loss: 0.345210
1377
+ 006190/008040, loss: 0.171377, avg_loss: 0.345125
1378
+ 006195/008040, loss: 0.104681, avg_loss: 0.344985
1379
+ 006200/008040, loss: 0.047664, avg_loss: 0.344778
1380
+ 006205/008040, loss: 0.132229, avg_loss: 0.344638
1381
+ 006210/008040, loss: 0.212232, avg_loss: 0.344449
1382
+ 006215/008040, loss: 0.037690, avg_loss: 0.344257
1383
+ 006220/008040, loss: 0.265332, avg_loss: 0.344097
1384
+ 006225/008040, loss: 0.114738, avg_loss: 0.343896
1385
+ 006230/008040, loss: 0.075357, avg_loss: 0.343712
1386
+ 006235/008040, loss: 0.082196, avg_loss: 0.343519
1387
+ 006240/008040, loss: 0.141044, avg_loss: 0.343342
1388
+ 006245/008040, loss: 0.061539, avg_loss: 0.343173
1389
+ 006250/008040, loss: 0.156940, avg_loss: 0.342981
1390
+ 006255/008040, loss: 0.074917, avg_loss: 0.342844
1391
+ 006260/008040, loss: 0.182297, avg_loss: 0.342644
1392
+ 006265/008040, loss: 0.188166, avg_loss: 0.342492
1393
+ 006270/008040, loss: 0.098817, avg_loss: 0.342310
1394
+ 006275/008040, loss: 0.026582, avg_loss: 0.342126
1395
+ 006280/008040, loss: 0.093155, avg_loss: 0.341935
1396
+ 006285/008040, loss: 0.121849, avg_loss: 0.341775
1397
+ 006290/008040, loss: 0.117257, avg_loss: 0.341654
1398
+ 006295/008040, loss: 0.470718, avg_loss: 0.341518
1399
+ 006300/008040, loss: 0.071532, avg_loss: 0.341330
1400
+ 006305/008040, loss: 0.077978, avg_loss: 0.341138
1401
+ 006310/008040, loss: 0.280971, avg_loss: 0.341040
1402
+ 006315/008040, loss: 0.083832, avg_loss: 0.340839
1403
+ 006320/008040, loss: 0.123453, avg_loss: 0.340651
1404
+ 006325/008040, loss: 0.293229, avg_loss: 0.340510
1405
+ 006330/008040, loss: 0.010351, avg_loss: 0.340313
1406
+ 006335/008040, loss: 0.206183, avg_loss: 0.340134
1407
+ 006340/008040, loss: 0.157802, avg_loss: 0.340008
1408
+ 006345/008040, loss: 0.045268, avg_loss: 0.339836
1409
+ 006350/008040, loss: 0.096029, avg_loss: 0.339639
1410
+ 006355/008040, loss: 0.094634, avg_loss: 0.339505
1411
+ 006360/008040, loss: 0.020495, avg_loss: 0.339314
1412
+ 006365/008040, loss: 0.099276, avg_loss: 0.339191
1413
+ 006370/008040, loss: 0.040780, avg_loss: 0.339016
1414
+ 006375/008040, loss: 0.107320, avg_loss: 0.338862
1415
+ 006380/008040, loss: 0.212123, avg_loss: 0.338757
1416
+ 006385/008040, loss: 0.228286, avg_loss: 0.338587
1417
+ 006390/008040, loss: 0.094378, avg_loss: 0.338381
1418
+ 006395/008040, loss: 0.094123, avg_loss: 0.338215
1419
+ 006400/008040, loss: 0.153880, avg_loss: 0.338045
1420
+ 006405/008040, loss: 0.025904, avg_loss: 0.337846
1421
+ 006410/008040, loss: 0.082967, avg_loss: 0.337646
1422
+ 006415/008040, loss: 0.040689, avg_loss: 0.337446
1423
+ 006420/008040, loss: 0.075779, avg_loss: 0.337272
1424
+ 006425/008040, loss: 0.150025, avg_loss: 0.337107
1425
+ 006430/008040, loss: 0.141630, avg_loss: 0.336983
1426
+ ***** Running dev evaluation *****
1427
+ Num examples = 1042
1428
+ Instantaneous batch size per device = 32
1429
+ epoch 23, step 6432/8040: {'matthews_correlation': 0.24931944187781385}
1430
+ 006435/008040, loss: 0.153386, avg_loss: 0.336812
1431
+ 006440/008040, loss: 0.222147, avg_loss: 0.336705
1432
+ 006445/008040, loss: 0.156677, avg_loss: 0.336554
1433
+ 006450/008040, loss: 0.010944, avg_loss: 0.336408
1434
+ 006455/008040, loss: 0.038571, avg_loss: 0.336201
1435
+ 006460/008040, loss: 0.114449, avg_loss: 0.336032
1436
+ 006465/008040, loss: 0.089689, avg_loss: 0.335848
1437
+ 006470/008040, loss: 0.329702, avg_loss: 0.335703
1438
+ 006475/008040, loss: 0.234976, avg_loss: 0.335533
1439
+ 006480/008040, loss: 0.090094, avg_loss: 0.335370
1440
+ 006485/008040, loss: 0.090959, avg_loss: 0.335211
1441
+ 006490/008040, loss: 0.184350, avg_loss: 0.335043
1442
+ 006495/008040, loss: 0.013678, avg_loss: 0.334875
1443
+ 006500/008040, loss: 0.066592, avg_loss: 0.334706
1444
+ 006505/008040, loss: 0.044437, avg_loss: 0.334594
1445
+ 006510/008040, loss: 0.044384, avg_loss: 0.334395
1446
+ 006515/008040, loss: 0.141515, avg_loss: 0.334245
1447
+ 006520/008040, loss: 0.147063, avg_loss: 0.334082
1448
+ 006525/008040, loss: 0.132490, avg_loss: 0.333948
1449
+ 006530/008040, loss: 0.115439, avg_loss: 0.333813
1450
+ 006535/008040, loss: 0.072417, avg_loss: 0.333611
1451
+ 006540/008040, loss: 0.122740, avg_loss: 0.333438
1452
+ 006545/008040, loss: 0.153896, avg_loss: 0.333265
1453
+ 006550/008040, loss: 0.216064, avg_loss: 0.333112
1454
+ 006555/008040, loss: 0.074902, avg_loss: 0.332915
1455
+ 006560/008040, loss: 0.229158, avg_loss: 0.332779
1456
+ 006565/008040, loss: 0.142630, avg_loss: 0.332649
1457
+ 006570/008040, loss: 0.106839, avg_loss: 0.332540
1458
+ 006575/008040, loss: 0.016569, avg_loss: 0.332368
1459
+ 006580/008040, loss: 0.110829, avg_loss: 0.332200
1460
+ 006585/008040, loss: 0.072897, avg_loss: 0.332014
1461
+ 006590/008040, loss: 0.148401, avg_loss: 0.331831
1462
+ 006595/008040, loss: 0.472177, avg_loss: 0.331741
1463
+ 006600/008040, loss: 0.026618, avg_loss: 0.331582
1464
+ 006605/008040, loss: 0.192052, avg_loss: 0.331442
1465
+ 006610/008040, loss: 0.176737, avg_loss: 0.331313
1466
+ 006615/008040, loss: 0.256435, avg_loss: 0.331163
1467
+ 006620/008040, loss: 0.112773, avg_loss: 0.330998
1468
+ 006625/008040, loss: 0.168097, avg_loss: 0.330900
1469
+ 006630/008040, loss: 0.149819, avg_loss: 0.330756
1470
+ 006635/008040, loss: 0.035909, avg_loss: 0.330597
1471
+ 006640/008040, loss: 0.116389, avg_loss: 0.330473
1472
+ 006645/008040, loss: 0.067924, avg_loss: 0.330317
1473
+ 006650/008040, loss: 0.029742, avg_loss: 0.330124
1474
+ 006655/008040, loss: 0.111944, avg_loss: 0.329962
1475
+ 006660/008040, loss: 0.138587, avg_loss: 0.329805
1476
+ 006665/008040, loss: 0.214782, avg_loss: 0.329670
1477
+ 006670/008040, loss: 0.134468, avg_loss: 0.329520
1478
+ 006675/008040, loss: 0.131746, avg_loss: 0.329344
1479
+ 006680/008040, loss: 0.085801, avg_loss: 0.329181
1480
+ 006685/008040, loss: 0.123189, avg_loss: 0.329020
1481
+ 006690/008040, loss: 0.059361, avg_loss: 0.328905
1482
+ 006695/008040, loss: 0.131232, avg_loss: 0.328752
1483
+ 006700/008040, loss: 0.036765, avg_loss: 0.328543
1484
+ ***** Running dev evaluation *****
1485
+ Num examples = 1042
1486
+ Instantaneous batch size per device = 32
1487
+ epoch 24, step 6700/8040: {'matthews_correlation': 0.23227684406858393}
1488
+ 006705/008040, loss: 0.030755, avg_loss: 0.328399
1489
+ 006710/008040, loss: 0.099883, avg_loss: 0.328219
1490
+ 006715/008040, loss: 0.177528, avg_loss: 0.328066
1491
+ 006720/008040, loss: 0.272356, avg_loss: 0.327913
1492
+ 006725/008040, loss: 0.024312, avg_loss: 0.327721
1493
+ 006730/008040, loss: 0.085378, avg_loss: 0.327553
1494
+ 006735/008040, loss: 0.042832, avg_loss: 0.327387
1495
+ 006740/008040, loss: 0.151696, avg_loss: 0.327222
1496
+ 006745/008040, loss: 0.125671, avg_loss: 0.327072
1497
+ 006750/008040, loss: 0.065977, avg_loss: 0.326951
1498
+ 006755/008040, loss: 0.120872, avg_loss: 0.326772
1499
+ 006760/008040, loss: 0.024726, avg_loss: 0.326601
1500
+ 006765/008040, loss: 0.058831, avg_loss: 0.326394
1501
+ 006770/008040, loss: 0.360663, avg_loss: 0.326308
1502
+ 006775/008040, loss: 0.055613, avg_loss: 0.326138
1503
+ 006780/008040, loss: 0.068713, avg_loss: 0.325965
1504
+ 006785/008040, loss: 0.048198, avg_loss: 0.325800
1505
+ 006790/008040, loss: 0.176144, avg_loss: 0.325608
1506
+ 006795/008040, loss: 0.084840, avg_loss: 0.325489
1507
+ 006800/008040, loss: 0.252102, avg_loss: 0.325354
1508
+ 006805/008040, loss: 0.021211, avg_loss: 0.325169
1509
+ 006810/008040, loss: 0.206506, avg_loss: 0.325002
1510
+ 006815/008040, loss: 0.049258, avg_loss: 0.324803
1511
+ 006820/008040, loss: 0.062864, avg_loss: 0.324719
1512
+ 006825/008040, loss: 0.211644, avg_loss: 0.324574
1513
+ 006830/008040, loss: 0.244467, avg_loss: 0.324422
1514
+ 006835/008040, loss: 0.125059, avg_loss: 0.324291
1515
+ 006840/008040, loss: 0.126497, avg_loss: 0.324127
1516
+ 006845/008040, loss: 0.195687, avg_loss: 0.323964
1517
+ 006850/008040, loss: 0.020408, avg_loss: 0.323804
1518
+ 006855/008040, loss: 0.039552, avg_loss: 0.323659
1519
+ 006860/008040, loss: 0.053010, avg_loss: 0.323502
1520
+ 006865/008040, loss: 0.240591, avg_loss: 0.323372
1521
+ 006870/008040, loss: 0.067442, avg_loss: 0.323200
1522
+ 006875/008040, loss: 0.070645, avg_loss: 0.323066
1523
+ 006880/008040, loss: 0.043463, avg_loss: 0.322892
1524
+ 006885/008040, loss: 0.065176, avg_loss: 0.322750
1525
+ 006890/008040, loss: 0.022665, avg_loss: 0.322594
1526
+ 006895/008040, loss: 0.024392, avg_loss: 0.322466
1527
+ 006900/008040, loss: 0.065461, avg_loss: 0.322293
1528
+ 006905/008040, loss: 0.137459, avg_loss: 0.322161
1529
+ 006910/008040, loss: 0.183977, avg_loss: 0.322088
1530
+ 006915/008040, loss: 0.082571, avg_loss: 0.321932
1531
+ 006920/008040, loss: 0.047769, avg_loss: 0.321773
1532
+ 006925/008040, loss: 0.185715, avg_loss: 0.321622
1533
+ 006930/008040, loss: 0.120999, avg_loss: 0.321527
1534
+ 006935/008040, loss: 0.011521, avg_loss: 0.321368
1535
+ 006940/008040, loss: 0.056693, avg_loss: 0.321238
1536
+ 006945/008040, loss: 0.312799, avg_loss: 0.321111
1537
+ 006950/008040, loss: 0.093848, avg_loss: 0.320974
1538
+ 006955/008040, loss: 0.147207, avg_loss: 0.320829
1539
+ 006960/008040, loss: 0.244541, avg_loss: 0.320667
1540
+ 006965/008040, loss: 0.171516, avg_loss: 0.320572
1541
+ ***** Running dev evaluation *****
1542
+ Num examples = 1042
1543
+ Instantaneous batch size per device = 32
1544
+ epoch 25, step 6968/8040: {'matthews_correlation': 0.2203939727085643}
1545
+ 006970/008040, loss: 0.090653, avg_loss: 0.320431
1546
+ 006975/008040, loss: 0.113621, avg_loss: 0.320278
1547
+ 006980/008040, loss: 0.050388, avg_loss: 0.320129
1548
+ 006985/008040, loss: 0.195083, avg_loss: 0.319979
1549
+ 006990/008040, loss: 0.071205, avg_loss: 0.319799
1550
+ 006995/008040, loss: 0.020149, avg_loss: 0.319621
1551
+ 007000/008040, loss: 0.059265, avg_loss: 0.319458
1552
+ 007005/008040, loss: 0.179539, avg_loss: 0.319284
1553
+ 007010/008040, loss: 0.012788, avg_loss: 0.319089
1554
+ 007015/008040, loss: 0.034613, avg_loss: 0.318934
1555
+ 007020/008040, loss: 0.168260, avg_loss: 0.318806
1556
+ 007025/008040, loss: 0.188078, avg_loss: 0.318649
1557
+ 007030/008040, loss: 0.011455, avg_loss: 0.318545
1558
+ 007035/008040, loss: 0.040954, avg_loss: 0.318369
1559
+ 007040/008040, loss: 0.096969, avg_loss: 0.318195
1560
+ 007045/008040, loss: 0.071101, avg_loss: 0.318037
1561
+ 007050/008040, loss: 0.081423, avg_loss: 0.317851
1562
+ 007055/008040, loss: 0.195587, avg_loss: 0.317688
1563
+ 007060/008040, loss: 0.032233, avg_loss: 0.317557
1564
+ 007065/008040, loss: 0.271995, avg_loss: 0.317422
1565
+ 007070/008040, loss: 0.037814, avg_loss: 0.317280
1566
+ 007075/008040, loss: 0.068158, avg_loss: 0.317114
1567
+ 007080/008040, loss: 0.254120, avg_loss: 0.317008
1568
+ 007085/008040, loss: 0.078963, avg_loss: 0.316860
1569
+ 007090/008040, loss: 0.024606, avg_loss: 0.316704
1570
+ 007095/008040, loss: 0.112365, avg_loss: 0.316543
1571
+ 007100/008040, loss: 0.068048, avg_loss: 0.316381
1572
+ 007105/008040, loss: 0.029112, avg_loss: 0.316234
1573
+ 007110/008040, loss: 0.055819, avg_loss: 0.316104
1574
+ 007115/008040, loss: 0.025860, avg_loss: 0.315943
1575
+ 007120/008040, loss: 0.094708, avg_loss: 0.315802
1576
+ 007125/008040, loss: 0.087746, avg_loss: 0.315670
1577
+ 007130/008040, loss: 0.134385, avg_loss: 0.315513
1578
+ 007135/008040, loss: 0.135339, avg_loss: 0.315398
1579
+ 007140/008040, loss: 0.315180, avg_loss: 0.315259
1580
+ 007145/008040, loss: 0.054737, avg_loss: 0.315112
1581
+ 007150/008040, loss: 0.405788, avg_loss: 0.315005
1582
+ 007155/008040, loss: 0.188528, avg_loss: 0.314918
1583
+ 007160/008040, loss: 0.061403, avg_loss: 0.314754
1584
+ 007165/008040, loss: 0.077819, avg_loss: 0.314588
1585
+ 007170/008040, loss: 0.136640, avg_loss: 0.314467
1586
+ 007175/008040, loss: 0.055474, avg_loss: 0.314311
1587
+ 007180/008040, loss: 0.023272, avg_loss: 0.314152
1588
+ 007185/008040, loss: 0.098981, avg_loss: 0.314002
1589
+ 007190/008040, loss: 0.019560, avg_loss: 0.313822
1590
+ 007195/008040, loss: 0.348302, avg_loss: 0.313728
1591
+ 007200/008040, loss: 0.105960, avg_loss: 0.313588
1592
+ 007205/008040, loss: 0.246406, avg_loss: 0.313467
1593
+ 007210/008040, loss: 0.074683, avg_loss: 0.313337
1594
+ 007215/008040, loss: 0.291595, avg_loss: 0.313251
1595
+ 007220/008040, loss: 0.034121, avg_loss: 0.313123
1596
+ 007225/008040, loss: 0.074492, avg_loss: 0.313020
1597
+ 007230/008040, loss: 0.108867, avg_loss: 0.312875
1598
+ 007235/008040, loss: 0.158608, avg_loss: 0.312725
1599
+ ***** Running dev evaluation *****
1600
+ Num examples = 1042
1601
+ Instantaneous batch size per device = 32
1602
+ epoch 26, step 7236/8040: {'matthews_correlation': 0.23696373689939254}
1603
+ 007240/008040, loss: 0.150083, avg_loss: 0.312579
1604
+ 007245/008040, loss: 0.122167, avg_loss: 0.312434
1605
+ 007250/008040, loss: 0.083941, avg_loss: 0.312296
1606
+ 007255/008040, loss: 0.191218, avg_loss: 0.312159
1607
+ 007260/008040, loss: 0.092216, avg_loss: 0.312034
1608
+ 007265/008040, loss: 0.081775, avg_loss: 0.311902
1609
+ 007270/008040, loss: 0.059207, avg_loss: 0.311740
1610
+ 007275/008040, loss: 0.273339, avg_loss: 0.311638
1611
+ 007280/008040, loss: 0.271834, avg_loss: 0.311530
1612
+ 007285/008040, loss: 0.179015, avg_loss: 0.311387
1613
+ 007290/008040, loss: 0.046599, avg_loss: 0.311231
1614
+ 007295/008040, loss: 0.051559, avg_loss: 0.311052
1615
+ 007300/008040, loss: 0.105356, avg_loss: 0.310882
1616
+ 007305/008040, loss: 0.043740, avg_loss: 0.310739
1617
+ 007310/008040, loss: 0.080959, avg_loss: 0.310613
1618
+ 007315/008040, loss: 0.058263, avg_loss: 0.310452
1619
+ 007320/008040, loss: 0.167761, avg_loss: 0.310307
1620
+ 007325/008040, loss: 0.128561, avg_loss: 0.310206
1621
+ 007330/008040, loss: 0.103146, avg_loss: 0.310052
1622
+ 007335/008040, loss: 0.219730, avg_loss: 0.309914
1623
+ 007340/008040, loss: 0.055324, avg_loss: 0.309757
1624
+ 007345/008040, loss: 0.057465, avg_loss: 0.309610
1625
+ 007350/008040, loss: 0.402242, avg_loss: 0.309490
1626
+ 007355/008040, loss: 0.352928, avg_loss: 0.309368
1627
+ 007360/008040, loss: 0.270440, avg_loss: 0.309237
1628
+ 007365/008040, loss: 0.094341, avg_loss: 0.309134
1629
+ 007370/008040, loss: 0.325051, avg_loss: 0.309029
1630
+ 007375/008040, loss: 0.040529, avg_loss: 0.308867
1631
+ 007380/008040, loss: 0.090125, avg_loss: 0.308736
1632
+ 007385/008040, loss: 0.053935, avg_loss: 0.308590
1633
+ 007390/008040, loss: 0.430134, avg_loss: 0.308482
1634
+ 007395/008040, loss: 0.147528, avg_loss: 0.308348
1635
+ 007400/008040, loss: 0.121706, avg_loss: 0.308195
1636
+ 007405/008040, loss: 0.178868, avg_loss: 0.308087
1637
+ 007410/008040, loss: 0.178170, avg_loss: 0.307974
1638
+ 007415/008040, loss: 0.023204, avg_loss: 0.307802
1639
+ 007420/008040, loss: 0.182678, avg_loss: 0.307687
1640
+ 007425/008040, loss: 0.090694, avg_loss: 0.307537
1641
+ 007430/008040, loss: 0.028491, avg_loss: 0.307386
1642
+ 007435/008040, loss: 0.027389, avg_loss: 0.307237
1643
+ 007440/008040, loss: 0.283375, avg_loss: 0.307118
1644
+ 007445/008040, loss: 0.036991, avg_loss: 0.306987
1645
+ 007450/008040, loss: 0.103909, avg_loss: 0.306909
1646
+ 007455/008040, loss: 0.036829, avg_loss: 0.306770
1647
+ 007460/008040, loss: 0.052082, avg_loss: 0.306626
1648
+ 007465/008040, loss: 0.338257, avg_loss: 0.306543
1649
+ 007470/008040, loss: 0.037553, avg_loss: 0.306415
1650
+ 007475/008040, loss: 0.031671, avg_loss: 0.306279
1651
+ 007480/008040, loss: 0.039051, avg_loss: 0.306135
1652
+ 007485/008040, loss: 0.096327, avg_loss: 0.305981
1653
+ 007490/008040, loss: 0.198999, avg_loss: 0.305869
1654
+ 007495/008040, loss: 0.182575, avg_loss: 0.305730
1655
+ 007500/008040, loss: 0.257397, avg_loss: 0.305617
1656
+ ***** Running dev evaluation *****
1657
+ Num examples = 1042
1658
+ Instantaneous batch size per device = 32
1659
+ epoch 27, step 7504/8040: {'matthews_correlation': 0.22894062387495076}
1660
+ 007505/008040, loss: 0.123742, avg_loss: 0.305505
1661
+ 007510/008040, loss: 0.061330, avg_loss: 0.305378
1662
+ 007515/008040, loss: 0.016107, avg_loss: 0.305229
1663
+ 007520/008040, loss: 0.035412, avg_loss: 0.305081
1664
+ 007525/008040, loss: 0.043585, avg_loss: 0.304929
1665
+ 007530/008040, loss: 0.019222, avg_loss: 0.304760
1666
+ 007535/008040, loss: 0.071748, avg_loss: 0.304596
1667
+ 007540/008040, loss: 0.045426, avg_loss: 0.304455
1668
+ 007545/008040, loss: 0.020044, avg_loss: 0.304301
1669
+ 007550/008040, loss: 0.062295, avg_loss: 0.304174
1670
+ 007555/008040, loss: 0.017569, avg_loss: 0.304044
1671
+ 007560/008040, loss: 0.180191, avg_loss: 0.303921
1672
+ 007565/008040, loss: 0.049493, avg_loss: 0.303774
1673
+ 007570/008040, loss: 0.173383, avg_loss: 0.303647
1674
+ 007575/008040, loss: 0.193030, avg_loss: 0.303516
1675
+ 007580/008040, loss: 0.131787, avg_loss: 0.303401
1676
+ 007585/008040, loss: 0.007346, avg_loss: 0.303243
1677
+ 007590/008040, loss: 0.155826, avg_loss: 0.303116
1678
+ 007595/008040, loss: 0.163601, avg_loss: 0.302978
1679
+ 007600/008040, loss: 0.035214, avg_loss: 0.302842
1680
+ 007605/008040, loss: 0.029514, avg_loss: 0.302685
1681
+ 007610/008040, loss: 0.060444, avg_loss: 0.302529
1682
+ 007615/008040, loss: 0.318817, avg_loss: 0.302458
1683
+ 007620/008040, loss: 0.080297, avg_loss: 0.302338
1684
+ 007625/008040, loss: 0.186482, avg_loss: 0.302204
1685
+ 007630/008040, loss: 0.279500, avg_loss: 0.302077
1686
+ 007635/008040, loss: 0.200537, avg_loss: 0.301970
1687
+ 007640/008040, loss: 0.194061, avg_loss: 0.301894
1688
+ 007645/008040, loss: 0.024023, avg_loss: 0.301730
1689
+ 007650/008040, loss: 0.033270, avg_loss: 0.301606
1690
+ 007655/008040, loss: 0.031241, avg_loss: 0.301439
1691
+ 007660/008040, loss: 0.113729, avg_loss: 0.301307
1692
+ 007665/008040, loss: 0.041298, avg_loss: 0.301153
1693
+ 007670/008040, loss: 0.044293, avg_loss: 0.301029
1694
+ 007675/008040, loss: 0.075025, avg_loss: 0.300890
1695
+ 007680/008040, loss: 0.125628, avg_loss: 0.300743
1696
+ 007685/008040, loss: 0.041533, avg_loss: 0.300654
1697
+ 007690/008040, loss: 0.026194, avg_loss: 0.300504
1698
+ 007695/008040, loss: 0.045489, avg_loss: 0.300366
1699
+ 007700/008040, loss: 0.075324, avg_loss: 0.300245
1700
+ 007705/008040, loss: 0.141094, avg_loss: 0.300132
1701
+ 007710/008040, loss: 0.036195, avg_loss: 0.299978
1702
+ 007715/008040, loss: 0.017604, avg_loss: 0.299834
1703
+ 007720/008040, loss: 0.081078, avg_loss: 0.299699
1704
+ 007725/008040, loss: 0.021621, avg_loss: 0.299564
1705
+ 007730/008040, loss: 0.144577, avg_loss: 0.299449
1706
+ 007735/008040, loss: 0.079561, avg_loss: 0.299298
1707
+ 007740/008040, loss: 0.040003, avg_loss: 0.299200
1708
+ 007745/008040, loss: 0.211169, avg_loss: 0.299085
1709
+ 007750/008040, loss: 0.029886, avg_loss: 0.298986
1710
+ 007755/008040, loss: 0.138877, avg_loss: 0.298871
1711
+ 007760/008040, loss: 0.043360, avg_loss: 0.298753
1712
+ 007765/008040, loss: 0.152495, avg_loss: 0.298619
1713
+ 007770/008040, loss: 0.060497, avg_loss: 0.298466
1714
+ ***** Running dev evaluation *****
1715
+ Num examples = 1042
1716
+ Instantaneous batch size per device = 32
1717
+ epoch 28, step 7772/8040: {'matthews_correlation': 0.23262243281540648}
1718
+ 007775/008040, loss: 0.118019, avg_loss: 0.298329
1719
+ 007780/008040, loss: 0.028191, avg_loss: 0.298189
1720
+ 007785/008040, loss: 0.135941, avg_loss: 0.298065
1721
+ 007790/008040, loss: 0.356724, avg_loss: 0.297975
1722
+ 007795/008040, loss: 0.026043, avg_loss: 0.297834
1723
+ 007800/008040, loss: 0.034852, avg_loss: 0.297691
1724
+ 007805/008040, loss: 0.154475, avg_loss: 0.297570
1725
+ 007810/008040, loss: 0.037241, avg_loss: 0.297409
1726
+ 007815/008040, loss: 0.073721, avg_loss: 0.297300
1727
+ 007820/008040, loss: 0.050705, avg_loss: 0.297156
1728
+ 007825/008040, loss: 0.105259, avg_loss: 0.297006
1729
+ 007830/008040, loss: 0.088514, avg_loss: 0.296893
1730
+ 007835/008040, loss: 0.095720, avg_loss: 0.296728
1731
+ 007840/008040, loss: 0.055406, avg_loss: 0.296592
1732
+ 007845/008040, loss: 0.045261, avg_loss: 0.296458
1733
+ 007850/008040, loss: 0.020414, avg_loss: 0.296311
1734
+ 007855/008040, loss: 0.060944, avg_loss: 0.296162
1735
+ 007860/008040, loss: 0.270967, avg_loss: 0.296048
1736
+ 007865/008040, loss: 0.287739, avg_loss: 0.295965
1737
+ 007870/008040, loss: 0.151463, avg_loss: 0.295820
1738
+ 007875/008040, loss: 0.029142, avg_loss: 0.295686
1739
+ 007880/008040, loss: 0.064737, avg_loss: 0.295532
1740
+ 007885/008040, loss: 0.077103, avg_loss: 0.295377
1741
+ 007890/008040, loss: 0.095900, avg_loss: 0.295271
1742
+ 007895/008040, loss: 0.030873, avg_loss: 0.295135
1743
+ 007900/008040, loss: 0.041561, avg_loss: 0.295050
1744
+ 007905/008040, loss: 0.052920, avg_loss: 0.294904
1745
+ 007910/008040, loss: 0.090048, avg_loss: 0.294785
1746
+ 007915/008040, loss: 0.392368, avg_loss: 0.294698
1747
+ 007920/008040, loss: 0.053813, avg_loss: 0.294579
1748
+ 007925/008040, loss: 0.122067, avg_loss: 0.294466
1749
+ 007930/008040, loss: 0.108241, avg_loss: 0.294332
1750
+ 007935/008040, loss: 0.047713, avg_loss: 0.294191
1751
+ 007940/008040, loss: 0.146655, avg_loss: 0.294085
1752
+ 007945/008040, loss: 0.041561, avg_loss: 0.293967
1753
+ 007950/008040, loss: 0.104168, avg_loss: 0.293888
1754
+ 007955/008040, loss: 0.036348, avg_loss: 0.293742
1755
+ 007960/008040, loss: 0.185095, avg_loss: 0.293636
1756
+ 007965/008040, loss: 0.250438, avg_loss: 0.293519
1757
+ 007970/008040, loss: 0.060160, avg_loss: 0.293385
1758
+ 007975/008040, loss: 0.030413, avg_loss: 0.293244
1759
+ 007980/008040, loss: 0.119690, avg_loss: 0.293127
1760
+ 007985/008040, loss: 0.157349, avg_loss: 0.293020
1761
+ 007990/008040, loss: 0.076855, avg_loss: 0.292911
1762
+ 007995/008040, loss: 0.060965, avg_loss: 0.292781
1763
+ 008000/008040, loss: 0.152858, avg_loss: 0.292687
1764
+ 008005/008040, loss: 0.257621, avg_loss: 0.292574
1765
+ 008010/008040, loss: 0.050748, avg_loss: 0.292464
1766
+ 008015/008040, loss: 0.078382, avg_loss: 0.292323
1767
+ 008020/008040, loss: 0.218891, avg_loss: 0.292214
1768
+ 008025/008040, loss: 0.052570, avg_loss: 0.292084
1769
+ 008030/008040, loss: 0.058064, avg_loss: 0.291971
1770
+ 008035/008040, loss: 0.017564, avg_loss: 0.291854
1771
+ 008040/008040, loss: 0.016145, avg_loss: 0.291706
1772
+ ***** Running dev evaluation *****
1773
+ Num examples = 1042
1774
+ Instantaneous batch size per device = 32
1775
+ epoch 29, step 8040/8040: {'matthews_correlation': 0.20156166898476155}
1776
+ ***** Running train evaluation *****
1777
+ Num examples = 8551
1778
+ Instantaneous batch size per device = 32
1779
+ Train Dataset Result: {'matthews_correlation': 0.9609897432355321}
1780
+ ***** Running dev evaluation *****
1781
+ Num examples = 1042
1782
+ Instantaneous batch size per device = 32
1783
+ Dev Dataset Result: {'matthews_correlation': 0.20156166898476155}
1784
+ Training time 0:05:46
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28a7d74920b730a1cff525d934075a8a8be71b7ae5780f8108d8f4091784d7c
3
+ size 34299149
result.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'matthews_correlation': 0.0}
2
+ {'matthews_correlation': 0.0}
3
+ {'matthews_correlation': 0.0}
4
+ {'matthews_correlation': 0.0}
5
+ {'matthews_correlation': 0.15816318746785782}
6
+ {'matthews_correlation': 0.1895854925674006}
7
+ {'matthews_correlation': 0.21307686539085852}
8
+ {'matthews_correlation': 0.22254373946847703}
9
+ {'matthews_correlation': 0.22951168079779777}
10
+ {'matthews_correlation': 0.23063296136375847}
11
+ {'matthews_correlation': 0.18813850606847293}
12
+ {'matthews_correlation': 0.20603205189543294}
13
+ {'matthews_correlation': 0.2118432448298745}
14
+ {'matthews_correlation': 0.20261239362380884}
15
+ {'matthews_correlation': 0.22518881045488998}
16
+ {'matthews_correlation': 0.24863648291608131}
17
+ {'matthews_correlation': 0.19984853723708582}
18
+ {'matthews_correlation': 0.23319244596326755}
19
+ {'matthews_correlation': 0.24348660475263997}
20
+ {'matthews_correlation': 0.2545245288314363}
21
+ {'matthews_correlation': 0.20994533418798944}
22
+ {'matthews_correlation': 0.21551745055261307}
23
+ {'matthews_correlation': 0.20483291444361929}
24
+ {'matthews_correlation': 0.24931944187781385}
25
+ {'matthews_correlation': 0.23227684406858393}
26
+ {'matthews_correlation': 0.2203939727085643}
27
+ {'matthews_correlation': 0.23696373689939254}
28
+ {'matthews_correlation': 0.22894062387495076}
29
+ {'matthews_correlation': 0.23262243281540648}
30
+ {'matthews_correlation': 0.20156166898476155}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "model_max_length": 512, "name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5", "never_split": null, "special_tokens_map_file": "/home.local/jianwei/.cache/huggingface/transformers/b680d52711d2451bbd6c6b1700365d6d731977c1357ae86bd7227f61145d3be2.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_class": "BertTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff