Upload 8 files
Browse files- config.json +31 -0
- log_bs32_lr3e-05_20221124_034416_123214.txt +1784 -0
- pytorch_model.bin +3 -0
- result.txt +30 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"embedding_size": 160,
|
9 |
+
"finetuning_task": "cola",
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 160,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 560,
|
16 |
+
"layer_norm_eps": 1e-12,
|
17 |
+
"max_position_embeddings": 512,
|
18 |
+
"model_type": "bert",
|
19 |
+
"num_attention_heads": 10,
|
20 |
+
"num_hidden_layers": 7,
|
21 |
+
"output_intermediate": true,
|
22 |
+
"output_past": true,
|
23 |
+
"pad_token_id": 0,
|
24 |
+
"position_embedding_type": "absolute",
|
25 |
+
"problem_type": "single_label_classification",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.17.0",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
log_bs32_lr3e-05_20221124_034416_123214.txt
ADDED
@@ -0,0 +1,1784 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
------------> log file ==runs2/cola/OUTPUT_ID/log_bs32_lr3e-05_20221124_034416_123214.txt
|
2 |
+
Namespace(aug_train=False, data_dir='/home.local/jianwei/datasets/nlp/glue_data/CoLA', do_eval=False, early_stop=False, early_stop_metric='accuracy', eval_step=120, gradient_accumulation_steps=1, learning_rate=3e-05, local_rank=0, lr_scheduler_type=<SchedulerType.CONSTANT_WITH_WARMUP: 'constant_with_warmup'>, max_length=128, max_train_steps=None, model_name_or_path='/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5', num_train_epochs=30, num_warmup_steps=0, output_dir='runs2/cola/OUTPUT_ID', pad_to_max_length=False, per_device_eval_batch_size=32, per_device_train_batch_size=32, print_step=5, save_last=False, seed=None, task_name='cola', train_file=None, use_slow_tokenizer=False, validation_file=None, weight_decay=0.0)
|
3 |
+
Distributed environment: NO
|
4 |
+
Num processes: 1
|
5 |
+
Process index: 0
|
6 |
+
Local process index: 0
|
7 |
+
Device: cuda
|
8 |
+
Mixed precision type: fp16
|
9 |
+
|
10 |
+
Sample 3305 of the training set: (tensor([ 101, 2058, 1996, 3481, 2045, 18360, 1037, 2312, 5210, 1012,
|
11 |
+
102, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
12 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
13 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
14 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
15 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
16 |
+
0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
17 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
18 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
19 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
20 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
|
21 |
+
Sample 4580 of the training set: (tensor([ 101, 7525, 2097, 3191, 2115, 4311, 1010, 2021, 7157, 2097, 2025, 1012,
|
22 |
+
102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
25 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26 |
+
0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
|
31 |
+
Sample 1031 of the training set: (tensor([ 101, 2040, 7164, 2505, 2008, 2040, 2758, 1029, 102, 0, 0, 0,
|
32 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36 |
+
0, 0, 0, 0]), tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
40 |
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), tensor(1)).
|
41 |
+
***** Running training *****
|
42 |
+
Num examples = 8551
|
43 |
+
Num Epochs = 30
|
44 |
+
Instantaneous batch size per device = 32
|
45 |
+
Total train batch size (w. parallel, distributed & accumulation) = 32
|
46 |
+
Gradient Accumulation steps = 1
|
47 |
+
Total optimization steps = 8040
|
48 |
+
000005/008040, loss: 0.652222, avg_loss: 0.651953
|
49 |
+
000010/008040, loss: 0.661713, avg_loss: 0.653055
|
50 |
+
000015/008040, loss: 0.677567, avg_loss: 0.654401
|
51 |
+
000020/008040, loss: 0.650467, avg_loss: 0.655538
|
52 |
+
000025/008040, loss: 0.677872, avg_loss: 0.658810
|
53 |
+
000030/008040, loss: 0.661591, avg_loss: 0.658843
|
54 |
+
000035/008040, loss: 0.645645, avg_loss: 0.659464
|
55 |
+
000040/008040, loss: 0.652420, avg_loss: 0.659061
|
56 |
+
000045/008040, loss: 0.687469, avg_loss: 0.657230
|
57 |
+
000050/008040, loss: 0.626709, avg_loss: 0.656326
|
58 |
+
000055/008040, loss: 0.644302, avg_loss: 0.655348
|
59 |
+
000060/008040, loss: 0.648483, avg_loss: 0.655236
|
60 |
+
000065/008040, loss: 0.688477, avg_loss: 0.655078
|
61 |
+
000070/008040, loss: 0.695236, avg_loss: 0.656294
|
62 |
+
000075/008040, loss: 0.659332, avg_loss: 0.655568
|
63 |
+
000080/008040, loss: 0.658981, avg_loss: 0.654595
|
64 |
+
000085/008040, loss: 0.667786, avg_loss: 0.654187
|
65 |
+
000090/008040, loss: 0.658127, avg_loss: 0.653667
|
66 |
+
000095/008040, loss: 0.671814, avg_loss: 0.652597
|
67 |
+
000100/008040, loss: 0.633545, avg_loss: 0.652570
|
68 |
+
000105/008040, loss: 0.641556, avg_loss: 0.651439
|
69 |
+
000110/008040, loss: 0.605942, avg_loss: 0.650307
|
70 |
+
000115/008040, loss: 0.668716, avg_loss: 0.649737
|
71 |
+
000120/008040, loss: 0.565681, avg_loss: 0.648114
|
72 |
+
000125/008040, loss: 0.645332, avg_loss: 0.647180
|
73 |
+
000130/008040, loss: 0.654228, avg_loss: 0.645764
|
74 |
+
000135/008040, loss: 0.647469, avg_loss: 0.644160
|
75 |
+
000140/008040, loss: 0.589897, avg_loss: 0.644381
|
76 |
+
000145/008040, loss: 0.622665, avg_loss: 0.643773
|
77 |
+
000150/008040, loss: 0.582314, avg_loss: 0.642514
|
78 |
+
000155/008040, loss: 0.706223, avg_loss: 0.642192
|
79 |
+
000160/008040, loss: 0.573463, avg_loss: 0.640978
|
80 |
+
000165/008040, loss: 0.596039, avg_loss: 0.641289
|
81 |
+
000170/008040, loss: 0.629677, avg_loss: 0.641707
|
82 |
+
000175/008040, loss: 0.581818, avg_loss: 0.640926
|
83 |
+
000180/008040, loss: 0.573257, avg_loss: 0.640330
|
84 |
+
000185/008040, loss: 0.536911, avg_loss: 0.639325
|
85 |
+
000190/008040, loss: 0.607376, avg_loss: 0.638997
|
86 |
+
000195/008040, loss: 0.565323, avg_loss: 0.638167
|
87 |
+
000200/008040, loss: 0.561142, avg_loss: 0.637244
|
88 |
+
000205/008040, loss: 0.709908, avg_loss: 0.636707
|
89 |
+
000210/008040, loss: 0.655846, avg_loss: 0.635711
|
90 |
+
000215/008040, loss: 0.562042, avg_loss: 0.635813
|
91 |
+
000220/008040, loss: 0.623016, avg_loss: 0.635337
|
92 |
+
000225/008040, loss: 0.665192, avg_loss: 0.636041
|
93 |
+
000230/008040, loss: 0.602562, avg_loss: 0.636022
|
94 |
+
000235/008040, loss: 0.579742, avg_loss: 0.636067
|
95 |
+
000240/008040, loss: 0.643562, avg_loss: 0.635816
|
96 |
+
000245/008040, loss: 0.572937, avg_loss: 0.635337
|
97 |
+
000250/008040, loss: 0.609734, avg_loss: 0.635397
|
98 |
+
000255/008040, loss: 0.540230, avg_loss: 0.634626
|
99 |
+
000260/008040, loss: 0.688667, avg_loss: 0.634769
|
100 |
+
000265/008040, loss: 0.600624, avg_loss: 0.633670
|
101 |
+
***** Running dev evaluation *****
|
102 |
+
Num examples = 1042
|
103 |
+
Instantaneous batch size per device = 32
|
104 |
+
epoch 0, step 268/8040: {'matthews_correlation': 0.0}
|
105 |
+
000270/008040, loss: 0.532043, avg_loss: 0.633611
|
106 |
+
000275/008040, loss: 0.593201, avg_loss: 0.633069
|
107 |
+
000280/008040, loss: 0.706932, avg_loss: 0.633117
|
108 |
+
000285/008040, loss: 0.684525, avg_loss: 0.632790
|
109 |
+
000290/008040, loss: 0.642426, avg_loss: 0.632020
|
110 |
+
000295/008040, loss: 0.617699, avg_loss: 0.632309
|
111 |
+
000300/008040, loss: 0.693405, avg_loss: 0.632864
|
112 |
+
000305/008040, loss: 0.721481, avg_loss: 0.632628
|
113 |
+
000310/008040, loss: 0.551292, avg_loss: 0.631563
|
114 |
+
000315/008040, loss: 0.619102, avg_loss: 0.631161
|
115 |
+
000320/008040, loss: 0.573761, avg_loss: 0.631119
|
116 |
+
000325/008040, loss: 0.621773, avg_loss: 0.631340
|
117 |
+
000330/008040, loss: 0.504166, avg_loss: 0.630779
|
118 |
+
000335/008040, loss: 0.627258, avg_loss: 0.630988
|
119 |
+
000340/008040, loss: 0.620651, avg_loss: 0.630650
|
120 |
+
000345/008040, loss: 0.579193, avg_loss: 0.630047
|
121 |
+
000350/008040, loss: 0.621361, avg_loss: 0.629630
|
122 |
+
000355/008040, loss: 0.638191, avg_loss: 0.629452
|
123 |
+
000360/008040, loss: 0.589127, avg_loss: 0.629559
|
124 |
+
000365/008040, loss: 0.725616, avg_loss: 0.629725
|
125 |
+
000370/008040, loss: 0.677101, avg_loss: 0.629617
|
126 |
+
000375/008040, loss: 0.575119, avg_loss: 0.628965
|
127 |
+
000380/008040, loss: 0.493011, avg_loss: 0.628739
|
128 |
+
000385/008040, loss: 0.653969, avg_loss: 0.629037
|
129 |
+
000390/008040, loss: 0.672195, avg_loss: 0.628962
|
130 |
+
000395/008040, loss: 0.665962, avg_loss: 0.628864
|
131 |
+
000400/008040, loss: 0.515625, avg_loss: 0.627842
|
132 |
+
000405/008040, loss: 0.596878, avg_loss: 0.627075
|
133 |
+
000410/008040, loss: 0.624397, avg_loss: 0.626773
|
134 |
+
000415/008040, loss: 0.568840, avg_loss: 0.626794
|
135 |
+
000420/008040, loss: 0.678520, avg_loss: 0.626516
|
136 |
+
000425/008040, loss: 0.691116, avg_loss: 0.626167
|
137 |
+
000430/008040, loss: 0.629356, avg_loss: 0.625845
|
138 |
+
000435/008040, loss: 0.592529, avg_loss: 0.624969
|
139 |
+
000440/008040, loss: 0.621323, avg_loss: 0.624545
|
140 |
+
000445/008040, loss: 0.713966, avg_loss: 0.624795
|
141 |
+
000450/008040, loss: 0.510971, avg_loss: 0.624669
|
142 |
+
000455/008040, loss: 0.672722, avg_loss: 0.625094
|
143 |
+
000460/008040, loss: 0.569397, avg_loss: 0.624232
|
144 |
+
000465/008040, loss: 0.543617, avg_loss: 0.624290
|
145 |
+
000470/008040, loss: 0.701065, avg_loss: 0.623677
|
146 |
+
000475/008040, loss: 0.697563, avg_loss: 0.623886
|
147 |
+
000480/008040, loss: 0.516510, avg_loss: 0.623712
|
148 |
+
000485/008040, loss: 0.623947, avg_loss: 0.624315
|
149 |
+
000490/008040, loss: 0.571724, avg_loss: 0.623997
|
150 |
+
000495/008040, loss: 0.652824, avg_loss: 0.624104
|
151 |
+
000500/008040, loss: 0.591492, avg_loss: 0.623351
|
152 |
+
000505/008040, loss: 0.562141, avg_loss: 0.623008
|
153 |
+
000510/008040, loss: 0.571037, avg_loss: 0.622772
|
154 |
+
000515/008040, loss: 0.594940, avg_loss: 0.622726
|
155 |
+
000520/008040, loss: 0.595879, avg_loss: 0.622236
|
156 |
+
000525/008040, loss: 0.420456, avg_loss: 0.621296
|
157 |
+
000530/008040, loss: 0.679199, avg_loss: 0.621323
|
158 |
+
000535/008040, loss: 0.630341, avg_loss: 0.621167
|
159 |
+
***** Running dev evaluation *****
|
160 |
+
Num examples = 1042
|
161 |
+
Instantaneous batch size per device = 32
|
162 |
+
epoch 1, step 536/8040: {'matthews_correlation': 0.0}
|
163 |
+
000540/008040, loss: 0.539192, avg_loss: 0.620370
|
164 |
+
000545/008040, loss: 0.624496, avg_loss: 0.620560
|
165 |
+
000550/008040, loss: 0.478798, avg_loss: 0.620446
|
166 |
+
000555/008040, loss: 0.623642, avg_loss: 0.619969
|
167 |
+
000560/008040, loss: 0.588112, avg_loss: 0.620092
|
168 |
+
000565/008040, loss: 0.537552, avg_loss: 0.619577
|
169 |
+
000570/008040, loss: 0.650581, avg_loss: 0.619168
|
170 |
+
000575/008040, loss: 0.587677, avg_loss: 0.619309
|
171 |
+
000580/008040, loss: 0.557770, avg_loss: 0.619064
|
172 |
+
000585/008040, loss: 0.741516, avg_loss: 0.619524
|
173 |
+
000590/008040, loss: 0.600594, avg_loss: 0.619864
|
174 |
+
000595/008040, loss: 0.669243, avg_loss: 0.619429
|
175 |
+
000600/008040, loss: 0.649200, avg_loss: 0.619334
|
176 |
+
000605/008040, loss: 0.707039, avg_loss: 0.619096
|
177 |
+
000610/008040, loss: 0.570152, avg_loss: 0.618888
|
178 |
+
000615/008040, loss: 0.678078, avg_loss: 0.618893
|
179 |
+
000620/008040, loss: 0.590279, avg_loss: 0.618822
|
180 |
+
000625/008040, loss: 0.648178, avg_loss: 0.618851
|
181 |
+
000630/008040, loss: 0.707413, avg_loss: 0.619119
|
182 |
+
000635/008040, loss: 0.613609, avg_loss: 0.619032
|
183 |
+
000640/008040, loss: 0.637474, avg_loss: 0.619302
|
184 |
+
000645/008040, loss: 0.620033, avg_loss: 0.619061
|
185 |
+
000650/008040, loss: 0.674812, avg_loss: 0.619388
|
186 |
+
000655/008040, loss: 0.603241, avg_loss: 0.619122
|
187 |
+
000660/008040, loss: 0.596001, avg_loss: 0.619185
|
188 |
+
000665/008040, loss: 0.546028, avg_loss: 0.618988
|
189 |
+
000670/008040, loss: 0.467194, avg_loss: 0.618476
|
190 |
+
000675/008040, loss: 0.539108, avg_loss: 0.617887
|
191 |
+
000680/008040, loss: 0.630531, avg_loss: 0.617587
|
192 |
+
000685/008040, loss: 0.628906, avg_loss: 0.617714
|
193 |
+
000690/008040, loss: 0.662460, avg_loss: 0.617408
|
194 |
+
000695/008040, loss: 0.556480, avg_loss: 0.617490
|
195 |
+
000700/008040, loss: 0.562729, avg_loss: 0.617541
|
196 |
+
000705/008040, loss: 0.534187, avg_loss: 0.617240
|
197 |
+
000710/008040, loss: 0.705315, avg_loss: 0.617217
|
198 |
+
000715/008040, loss: 0.628326, avg_loss: 0.617076
|
199 |
+
000720/008040, loss: 0.451012, avg_loss: 0.616425
|
200 |
+
000725/008040, loss: 0.679764, avg_loss: 0.616494
|
201 |
+
000730/008040, loss: 0.648232, avg_loss: 0.616603
|
202 |
+
000735/008040, loss: 0.775688, avg_loss: 0.616731
|
203 |
+
000740/008040, loss: 0.483841, avg_loss: 0.616351
|
204 |
+
000745/008040, loss: 0.480392, avg_loss: 0.616453
|
205 |
+
000750/008040, loss: 0.641754, avg_loss: 0.616477
|
206 |
+
000755/008040, loss: 0.655449, avg_loss: 0.616491
|
207 |
+
000760/008040, loss: 0.620735, avg_loss: 0.616589
|
208 |
+
000765/008040, loss: 0.698372, avg_loss: 0.616510
|
209 |
+
000770/008040, loss: 0.541710, avg_loss: 0.616465
|
210 |
+
000775/008040, loss: 0.616829, avg_loss: 0.616582
|
211 |
+
000780/008040, loss: 0.615852, avg_loss: 0.616320
|
212 |
+
000785/008040, loss: 0.645004, avg_loss: 0.616204
|
213 |
+
000790/008040, loss: 0.620392, avg_loss: 0.616373
|
214 |
+
000795/008040, loss: 0.621986, avg_loss: 0.616532
|
215 |
+
000800/008040, loss: 0.698029, avg_loss: 0.616433
|
216 |
+
***** Running dev evaluation *****
|
217 |
+
Num examples = 1042
|
218 |
+
Instantaneous batch size per device = 32
|
219 |
+
epoch 2, step 804/8040: {'matthews_correlation': 0.0}
|
220 |
+
000805/008040, loss: 0.666588, avg_loss: 0.616280
|
221 |
+
000810/008040, loss: 0.513000, avg_loss: 0.616144
|
222 |
+
000815/008040, loss: 0.678246, avg_loss: 0.616261
|
223 |
+
000820/008040, loss: 0.508728, avg_loss: 0.616257
|
224 |
+
000825/008040, loss: 0.561485, avg_loss: 0.615815
|
225 |
+
000830/008040, loss: 0.629829, avg_loss: 0.615827
|
226 |
+
000835/008040, loss: 0.614128, avg_loss: 0.615832
|
227 |
+
000840/008040, loss: 0.594833, avg_loss: 0.615989
|
228 |
+
000845/008040, loss: 0.536339, avg_loss: 0.616183
|
229 |
+
000850/008040, loss: 0.599487, avg_loss: 0.616021
|
230 |
+
000855/008040, loss: 0.673401, avg_loss: 0.616031
|
231 |
+
000860/008040, loss: 0.539940, avg_loss: 0.616041
|
232 |
+
000865/008040, loss: 0.572212, avg_loss: 0.616227
|
233 |
+
000870/008040, loss: 0.609131, avg_loss: 0.615873
|
234 |
+
000875/008040, loss: 0.614037, avg_loss: 0.615787
|
235 |
+
000880/008040, loss: 0.571541, avg_loss: 0.615553
|
236 |
+
000885/008040, loss: 0.544708, avg_loss: 0.615402
|
237 |
+
000890/008040, loss: 0.536331, avg_loss: 0.615272
|
238 |
+
000895/008040, loss: 0.675529, avg_loss: 0.615234
|
239 |
+
000900/008040, loss: 0.627602, avg_loss: 0.615170
|
240 |
+
000905/008040, loss: 0.660126, avg_loss: 0.615088
|
241 |
+
000910/008040, loss: 0.558937, avg_loss: 0.614803
|
242 |
+
000915/008040, loss: 0.549660, avg_loss: 0.614641
|
243 |
+
000920/008040, loss: 0.643845, avg_loss: 0.614743
|
244 |
+
000925/008040, loss: 0.572258, avg_loss: 0.614782
|
245 |
+
000930/008040, loss: 0.634689, avg_loss: 0.614739
|
246 |
+
000935/008040, loss: 0.543579, avg_loss: 0.614672
|
247 |
+
000940/008040, loss: 0.714607, avg_loss: 0.614516
|
248 |
+
000945/008040, loss: 0.596260, avg_loss: 0.614343
|
249 |
+
000950/008040, loss: 0.564911, avg_loss: 0.614114
|
250 |
+
000955/008040, loss: 0.735931, avg_loss: 0.614013
|
251 |
+
000960/008040, loss: 0.742020, avg_loss: 0.614185
|
252 |
+
000965/008040, loss: 0.632996, avg_loss: 0.613984
|
253 |
+
000970/008040, loss: 0.505898, avg_loss: 0.614050
|
254 |
+
000975/008040, loss: 0.632904, avg_loss: 0.614360
|
255 |
+
000980/008040, loss: 0.637787, avg_loss: 0.614284
|
256 |
+
000985/008040, loss: 0.544106, avg_loss: 0.614300
|
257 |
+
000990/008040, loss: 0.627823, avg_loss: 0.614128
|
258 |
+
000995/008040, loss: 0.518829, avg_loss: 0.613865
|
259 |
+
001000/008040, loss: 0.682663, avg_loss: 0.613936
|
260 |
+
001005/008040, loss: 0.702003, avg_loss: 0.614055
|
261 |
+
001010/008040, loss: 0.638351, avg_loss: 0.613856
|
262 |
+
001015/008040, loss: 0.643425, avg_loss: 0.613794
|
263 |
+
001020/008040, loss: 0.511559, avg_loss: 0.613882
|
264 |
+
001025/008040, loss: 0.570694, avg_loss: 0.613526
|
265 |
+
001030/008040, loss: 0.742371, avg_loss: 0.613564
|
266 |
+
001035/008040, loss: 0.525169, avg_loss: 0.613386
|
267 |
+
001040/008040, loss: 0.631721, avg_loss: 0.613472
|
268 |
+
001045/008040, loss: 0.678436, avg_loss: 0.613299
|
269 |
+
001050/008040, loss: 0.496040, avg_loss: 0.613325
|
270 |
+
001055/008040, loss: 0.656860, avg_loss: 0.613305
|
271 |
+
001060/008040, loss: 0.574501, avg_loss: 0.613204
|
272 |
+
001065/008040, loss: 0.547646, avg_loss: 0.613304
|
273 |
+
001070/008040, loss: 0.567757, avg_loss: 0.613248
|
274 |
+
***** Running dev evaluation *****
|
275 |
+
Num examples = 1042
|
276 |
+
Instantaneous batch size per device = 32
|
277 |
+
epoch 3, step 1072/8040: {'matthews_correlation': 0.0}
|
278 |
+
001075/008040, loss: 0.516525, avg_loss: 0.613198
|
279 |
+
001080/008040, loss: 0.646126, avg_loss: 0.613297
|
280 |
+
001085/008040, loss: 0.559669, avg_loss: 0.613150
|
281 |
+
001090/008040, loss: 0.466164, avg_loss: 0.612698
|
282 |
+
001095/008040, loss: 0.651367, avg_loss: 0.612560
|
283 |
+
001100/008040, loss: 0.654549, avg_loss: 0.612559
|
284 |
+
001105/008040, loss: 0.541718, avg_loss: 0.612571
|
285 |
+
001110/008040, loss: 0.508430, avg_loss: 0.612473
|
286 |
+
001115/008040, loss: 0.573200, avg_loss: 0.612111
|
287 |
+
001120/008040, loss: 0.557476, avg_loss: 0.611973
|
288 |
+
001125/008040, loss: 0.652611, avg_loss: 0.611617
|
289 |
+
001130/008040, loss: 0.526085, avg_loss: 0.611471
|
290 |
+
001135/008040, loss: 0.444359, avg_loss: 0.610979
|
291 |
+
001140/008040, loss: 0.628677, avg_loss: 0.610810
|
292 |
+
001145/008040, loss: 0.503414, avg_loss: 0.610575
|
293 |
+
001150/008040, loss: 0.473530, avg_loss: 0.610274
|
294 |
+
001155/008040, loss: 0.545444, avg_loss: 0.609830
|
295 |
+
001160/008040, loss: 0.505241, avg_loss: 0.609483
|
296 |
+
001165/008040, loss: 0.495068, avg_loss: 0.609279
|
297 |
+
001170/008040, loss: 0.588757, avg_loss: 0.609380
|
298 |
+
001175/008040, loss: 0.567360, avg_loss: 0.609198
|
299 |
+
001180/008040, loss: 0.648659, avg_loss: 0.609238
|
300 |
+
001185/008040, loss: 0.637810, avg_loss: 0.609066
|
301 |
+
001190/008040, loss: 0.824970, avg_loss: 0.608990
|
302 |
+
001195/008040, loss: 0.519943, avg_loss: 0.608662
|
303 |
+
001200/008040, loss: 0.601860, avg_loss: 0.608558
|
304 |
+
001205/008040, loss: 0.583908, avg_loss: 0.608157
|
305 |
+
001210/008040, loss: 0.725353, avg_loss: 0.608082
|
306 |
+
001215/008040, loss: 0.490597, avg_loss: 0.607950
|
307 |
+
001220/008040, loss: 0.425632, avg_loss: 0.607686
|
308 |
+
001225/008040, loss: 0.596016, avg_loss: 0.607460
|
309 |
+
001230/008040, loss: 0.577240, avg_loss: 0.607340
|
310 |
+
001235/008040, loss: 0.586720, avg_loss: 0.607166
|
311 |
+
001240/008040, loss: 0.395218, avg_loss: 0.606811
|
312 |
+
001245/008040, loss: 0.633438, avg_loss: 0.606556
|
313 |
+
001250/008040, loss: 0.680958, avg_loss: 0.606540
|
314 |
+
001255/008040, loss: 0.628593, avg_loss: 0.606489
|
315 |
+
001260/008040, loss: 0.440163, avg_loss: 0.606267
|
316 |
+
001265/008040, loss: 0.556847, avg_loss: 0.606262
|
317 |
+
001270/008040, loss: 0.504066, avg_loss: 0.605940
|
318 |
+
001275/008040, loss: 0.572201, avg_loss: 0.605561
|
319 |
+
001280/008040, loss: 0.718948, avg_loss: 0.605577
|
320 |
+
001285/008040, loss: 0.485722, avg_loss: 0.605334
|
321 |
+
001290/008040, loss: 0.612705, avg_loss: 0.605227
|
322 |
+
001295/008040, loss: 0.666271, avg_loss: 0.604907
|
323 |
+
001300/008040, loss: 0.501457, avg_loss: 0.604697
|
324 |
+
001305/008040, loss: 0.642426, avg_loss: 0.604318
|
325 |
+
001310/008040, loss: 0.627075, avg_loss: 0.604377
|
326 |
+
001315/008040, loss: 0.406536, avg_loss: 0.603942
|
327 |
+
001320/008040, loss: 0.498077, avg_loss: 0.603909
|
328 |
+
001325/008040, loss: 0.622322, avg_loss: 0.604236
|
329 |
+
001330/008040, loss: 0.563160, avg_loss: 0.604285
|
330 |
+
001335/008040, loss: 0.545391, avg_loss: 0.604257
|
331 |
+
001340/008040, loss: 0.478202, avg_loss: 0.604044
|
332 |
+
***** Running dev evaluation *****
|
333 |
+
Num examples = 1042
|
334 |
+
Instantaneous batch size per device = 32
|
335 |
+
epoch 4, step 1340/8040: {'matthews_correlation': 0.15816318746785782}
|
336 |
+
001345/008040, loss: 0.497211, avg_loss: 0.603771
|
337 |
+
001350/008040, loss: 0.504078, avg_loss: 0.603257
|
338 |
+
001355/008040, loss: 0.563469, avg_loss: 0.603027
|
339 |
+
001360/008040, loss: 0.436638, avg_loss: 0.602608
|
340 |
+
001365/008040, loss: 0.503475, avg_loss: 0.602021
|
341 |
+
001370/008040, loss: 0.512344, avg_loss: 0.601490
|
342 |
+
001375/008040, loss: 0.479725, avg_loss: 0.601133
|
343 |
+
001380/008040, loss: 0.547909, avg_loss: 0.600951
|
344 |
+
001385/008040, loss: 0.400013, avg_loss: 0.600657
|
345 |
+
001390/008040, loss: 0.542755, avg_loss: 0.600452
|
346 |
+
001395/008040, loss: 0.376102, avg_loss: 0.600132
|
347 |
+
001400/008040, loss: 0.548512, avg_loss: 0.599679
|
348 |
+
001405/008040, loss: 0.676727, avg_loss: 0.599476
|
349 |
+
001410/008040, loss: 0.657574, avg_loss: 0.599159
|
350 |
+
001415/008040, loss: 0.496979, avg_loss: 0.598694
|
351 |
+
001420/008040, loss: 0.572510, avg_loss: 0.598379
|
352 |
+
001425/008040, loss: 0.535450, avg_loss: 0.598210
|
353 |
+
001430/008040, loss: 0.394039, avg_loss: 0.597776
|
354 |
+
001435/008040, loss: 0.513657, avg_loss: 0.597460
|
355 |
+
001440/008040, loss: 0.511215, avg_loss: 0.597127
|
356 |
+
001445/008040, loss: 0.339886, avg_loss: 0.596767
|
357 |
+
001450/008040, loss: 0.510815, avg_loss: 0.596433
|
358 |
+
001455/008040, loss: 0.654274, avg_loss: 0.596212
|
359 |
+
001460/008040, loss: 0.349232, avg_loss: 0.595762
|
360 |
+
001465/008040, loss: 0.493137, avg_loss: 0.595544
|
361 |
+
001470/008040, loss: 0.592804, avg_loss: 0.595768
|
362 |
+
001475/008040, loss: 0.455566, avg_loss: 0.595485
|
363 |
+
001480/008040, loss: 0.570206, avg_loss: 0.595109
|
364 |
+
001485/008040, loss: 0.385925, avg_loss: 0.594787
|
365 |
+
001490/008040, loss: 0.431305, avg_loss: 0.594303
|
366 |
+
001495/008040, loss: 0.571796, avg_loss: 0.594218
|
367 |
+
001500/008040, loss: 0.523552, avg_loss: 0.594122
|
368 |
+
001505/008040, loss: 0.777546, avg_loss: 0.594023
|
369 |
+
001510/008040, loss: 0.629959, avg_loss: 0.593755
|
370 |
+
001515/008040, loss: 0.650162, avg_loss: 0.593670
|
371 |
+
001520/008040, loss: 0.509098, avg_loss: 0.593517
|
372 |
+
001525/008040, loss: 0.462357, avg_loss: 0.593270
|
373 |
+
001530/008040, loss: 0.643883, avg_loss: 0.593174
|
374 |
+
001535/008040, loss: 0.417114, avg_loss: 0.592827
|
375 |
+
001540/008040, loss: 0.470078, avg_loss: 0.592541
|
376 |
+
001545/008040, loss: 0.504375, avg_loss: 0.592232
|
377 |
+
001550/008040, loss: 0.536144, avg_loss: 0.592052
|
378 |
+
001555/008040, loss: 0.408104, avg_loss: 0.591786
|
379 |
+
001560/008040, loss: 0.476906, avg_loss: 0.591672
|
380 |
+
001565/008040, loss: 0.560863, avg_loss: 0.591418
|
381 |
+
001570/008040, loss: 0.519066, avg_loss: 0.591105
|
382 |
+
001575/008040, loss: 0.594341, avg_loss: 0.590856
|
383 |
+
001580/008040, loss: 0.466764, avg_loss: 0.590662
|
384 |
+
001585/008040, loss: 0.450207, avg_loss: 0.590457
|
385 |
+
001590/008040, loss: 0.435432, avg_loss: 0.590293
|
386 |
+
001595/008040, loss: 0.556160, avg_loss: 0.589989
|
387 |
+
001600/008040, loss: 0.611170, avg_loss: 0.589724
|
388 |
+
001605/008040, loss: 0.402641, avg_loss: 0.589453
|
389 |
+
***** Running dev evaluation *****
|
390 |
+
Num examples = 1042
|
391 |
+
Instantaneous batch size per device = 32
|
392 |
+
epoch 5, step 1608/8040: {'matthews_correlation': 0.1895854925674006}
|
393 |
+
001610/008040, loss: 0.432098, avg_loss: 0.588991
|
394 |
+
001615/008040, loss: 0.498215, avg_loss: 0.588663
|
395 |
+
001620/008040, loss: 0.663460, avg_loss: 0.588371
|
396 |
+
001625/008040, loss: 0.360754, avg_loss: 0.587877
|
397 |
+
001630/008040, loss: 0.387835, avg_loss: 0.587359
|
398 |
+
001635/008040, loss: 0.324583, avg_loss: 0.586922
|
399 |
+
001640/008040, loss: 0.535975, avg_loss: 0.586623
|
400 |
+
001645/008040, loss: 0.592346, avg_loss: 0.586544
|
401 |
+
001650/008040, loss: 0.525536, avg_loss: 0.586103
|
402 |
+
001655/008040, loss: 0.575745, avg_loss: 0.585766
|
403 |
+
001660/008040, loss: 0.449825, avg_loss: 0.585359
|
404 |
+
001665/008040, loss: 0.420126, avg_loss: 0.585023
|
405 |
+
001670/008040, loss: 0.639540, avg_loss: 0.584721
|
406 |
+
001675/008040, loss: 0.344273, avg_loss: 0.584341
|
407 |
+
001680/008040, loss: 0.540806, avg_loss: 0.583815
|
408 |
+
001685/008040, loss: 0.384409, avg_loss: 0.583182
|
409 |
+
001690/008040, loss: 0.440865, avg_loss: 0.583039
|
410 |
+
001695/008040, loss: 0.555412, avg_loss: 0.582610
|
411 |
+
001700/008040, loss: 0.407997, avg_loss: 0.582275
|
412 |
+
001705/008040, loss: 0.383024, avg_loss: 0.581847
|
413 |
+
001710/008040, loss: 0.537008, avg_loss: 0.581442
|
414 |
+
001715/008040, loss: 0.355068, avg_loss: 0.581088
|
415 |
+
001720/008040, loss: 0.338402, avg_loss: 0.580900
|
416 |
+
001725/008040, loss: 0.566572, avg_loss: 0.580647
|
417 |
+
001730/008040, loss: 0.548752, avg_loss: 0.580402
|
418 |
+
001735/008040, loss: 0.535681, avg_loss: 0.580117
|
419 |
+
001740/008040, loss: 0.614624, avg_loss: 0.579823
|
420 |
+
001745/008040, loss: 0.530151, avg_loss: 0.579474
|
421 |
+
001750/008040, loss: 0.489605, avg_loss: 0.579157
|
422 |
+
001755/008040, loss: 0.584639, avg_loss: 0.578986
|
423 |
+
001760/008040, loss: 0.501501, avg_loss: 0.578577
|
424 |
+
001765/008040, loss: 0.511179, avg_loss: 0.578278
|
425 |
+
001770/008040, loss: 0.400345, avg_loss: 0.577970
|
426 |
+
001775/008040, loss: 0.562296, avg_loss: 0.577756
|
427 |
+
001780/008040, loss: 0.460430, avg_loss: 0.577570
|
428 |
+
001785/008040, loss: 0.420773, avg_loss: 0.577251
|
429 |
+
001790/008040, loss: 0.476810, avg_loss: 0.576814
|
430 |
+
001795/008040, loss: 0.473215, avg_loss: 0.576538
|
431 |
+
001800/008040, loss: 0.324244, avg_loss: 0.576321
|
432 |
+
001805/008040, loss: 0.466421, avg_loss: 0.575848
|
433 |
+
001810/008040, loss: 0.515072, avg_loss: 0.575525
|
434 |
+
001815/008040, loss: 0.348133, avg_loss: 0.575163
|
435 |
+
001820/008040, loss: 0.405993, avg_loss: 0.574717
|
436 |
+
001825/008040, loss: 0.724440, avg_loss: 0.574358
|
437 |
+
001830/008040, loss: 0.404591, avg_loss: 0.573989
|
438 |
+
001835/008040, loss: 0.386953, avg_loss: 0.573703
|
439 |
+
001840/008040, loss: 0.297220, avg_loss: 0.573313
|
440 |
+
001845/008040, loss: 0.549763, avg_loss: 0.573012
|
441 |
+
001850/008040, loss: 0.439955, avg_loss: 0.572694
|
442 |
+
001855/008040, loss: 0.379715, avg_loss: 0.572410
|
443 |
+
001860/008040, loss: 0.301922, avg_loss: 0.571970
|
444 |
+
001865/008040, loss: 0.442181, avg_loss: 0.571632
|
445 |
+
001870/008040, loss: 0.403500, avg_loss: 0.571553
|
446 |
+
001875/008040, loss: 0.479061, avg_loss: 0.571291
|
447 |
+
***** Running dev evaluation *****
|
448 |
+
Num examples = 1042
|
449 |
+
Instantaneous batch size per device = 32
|
450 |
+
epoch 6, step 1876/8040: {'matthews_correlation': 0.21307686539085852}
|
451 |
+
001880/008040, loss: 0.669218, avg_loss: 0.571140
|
452 |
+
001885/008040, loss: 0.542337, avg_loss: 0.570861
|
453 |
+
001890/008040, loss: 0.589821, avg_loss: 0.570818
|
454 |
+
001895/008040, loss: 0.476381, avg_loss: 0.570683
|
455 |
+
001900/008040, loss: 0.404007, avg_loss: 0.570266
|
456 |
+
001905/008040, loss: 0.356413, avg_loss: 0.569667
|
457 |
+
001910/008040, loss: 0.477184, avg_loss: 0.569371
|
458 |
+
001915/008040, loss: 0.490374, avg_loss: 0.568927
|
459 |
+
001920/008040, loss: 0.415562, avg_loss: 0.568538
|
460 |
+
001925/008040, loss: 0.388258, avg_loss: 0.568085
|
461 |
+
001930/008040, loss: 0.456861, avg_loss: 0.567801
|
462 |
+
001935/008040, loss: 0.384424, avg_loss: 0.567589
|
463 |
+
001940/008040, loss: 0.353615, avg_loss: 0.567248
|
464 |
+
001945/008040, loss: 0.391302, avg_loss: 0.566774
|
465 |
+
001950/008040, loss: 0.465639, avg_loss: 0.566302
|
466 |
+
001955/008040, loss: 0.230816, avg_loss: 0.565751
|
467 |
+
001960/008040, loss: 0.518036, avg_loss: 0.565276
|
468 |
+
001965/008040, loss: 0.410194, avg_loss: 0.564808
|
469 |
+
001970/008040, loss: 0.349751, avg_loss: 0.564646
|
470 |
+
001975/008040, loss: 0.334585, avg_loss: 0.564052
|
471 |
+
001980/008040, loss: 0.534948, avg_loss: 0.563787
|
472 |
+
001985/008040, loss: 0.362720, avg_loss: 0.563456
|
473 |
+
001990/008040, loss: 0.448977, avg_loss: 0.563030
|
474 |
+
001995/008040, loss: 0.355686, avg_loss: 0.562736
|
475 |
+
002000/008040, loss: 0.439312, avg_loss: 0.562282
|
476 |
+
002005/008040, loss: 0.560658, avg_loss: 0.561938
|
477 |
+
002010/008040, loss: 0.257093, avg_loss: 0.561694
|
478 |
+
002015/008040, loss: 0.427464, avg_loss: 0.561446
|
479 |
+
002020/008040, loss: 0.501780, avg_loss: 0.561143
|
480 |
+
002025/008040, loss: 0.395622, avg_loss: 0.560958
|
481 |
+
002030/008040, loss: 0.593700, avg_loss: 0.560682
|
482 |
+
002035/008040, loss: 0.260624, avg_loss: 0.560042
|
483 |
+
002040/008040, loss: 0.331761, avg_loss: 0.559656
|
484 |
+
002045/008040, loss: 0.454527, avg_loss: 0.559403
|
485 |
+
002050/008040, loss: 0.379538, avg_loss: 0.559265
|
486 |
+
002055/008040, loss: 0.376312, avg_loss: 0.558873
|
487 |
+
002060/008040, loss: 0.522795, avg_loss: 0.558628
|
488 |
+
002065/008040, loss: 0.457785, avg_loss: 0.558271
|
489 |
+
002070/008040, loss: 0.561003, avg_loss: 0.557992
|
490 |
+
002075/008040, loss: 0.269575, avg_loss: 0.557529
|
491 |
+
002080/008040, loss: 0.476014, avg_loss: 0.557187
|
492 |
+
002085/008040, loss: 0.425539, avg_loss: 0.556978
|
493 |
+
002090/008040, loss: 0.350451, avg_loss: 0.556575
|
494 |
+
002095/008040, loss: 0.275333, avg_loss: 0.556114
|
495 |
+
002100/008040, loss: 0.463511, avg_loss: 0.556032
|
496 |
+
002105/008040, loss: 0.313173, avg_loss: 0.555600
|
497 |
+
002110/008040, loss: 0.555047, avg_loss: 0.555265
|
498 |
+
002115/008040, loss: 0.554151, avg_loss: 0.555004
|
499 |
+
002120/008040, loss: 0.383408, avg_loss: 0.554883
|
500 |
+
002125/008040, loss: 0.488256, avg_loss: 0.554548
|
501 |
+
002130/008040, loss: 0.357615, avg_loss: 0.554264
|
502 |
+
002135/008040, loss: 0.157333, avg_loss: 0.553637
|
503 |
+
002140/008040, loss: 0.288863, avg_loss: 0.553479
|
504 |
+
***** Running dev evaluation *****
|
505 |
+
Num examples = 1042
|
506 |
+
Instantaneous batch size per device = 32
|
507 |
+
epoch 7, step 2144/8040: {'matthews_correlation': 0.22254373946847703}
|
508 |
+
002145/008040, loss: 0.303406, avg_loss: 0.553004
|
509 |
+
002150/008040, loss: 0.306277, avg_loss: 0.552618
|
510 |
+
002155/008040, loss: 0.338732, avg_loss: 0.552232
|
511 |
+
002160/008040, loss: 0.427835, avg_loss: 0.551830
|
512 |
+
002165/008040, loss: 0.414758, avg_loss: 0.551564
|
513 |
+
002170/008040, loss: 0.576152, avg_loss: 0.551253
|
514 |
+
002175/008040, loss: 0.407147, avg_loss: 0.550871
|
515 |
+
002180/008040, loss: 0.399303, avg_loss: 0.550418
|
516 |
+
002185/008040, loss: 0.417184, avg_loss: 0.550105
|
517 |
+
002190/008040, loss: 0.373152, avg_loss: 0.549766
|
518 |
+
002195/008040, loss: 0.357236, avg_loss: 0.549473
|
519 |
+
002200/008040, loss: 0.488667, avg_loss: 0.549189
|
520 |
+
002205/008040, loss: 0.430184, avg_loss: 0.548822
|
521 |
+
002210/008040, loss: 0.509947, avg_loss: 0.548544
|
522 |
+
002215/008040, loss: 0.248991, avg_loss: 0.548000
|
523 |
+
002220/008040, loss: 0.524855, avg_loss: 0.547764
|
524 |
+
002225/008040, loss: 0.307470, avg_loss: 0.547397
|
525 |
+
002230/008040, loss: 0.317670, avg_loss: 0.546941
|
526 |
+
002235/008040, loss: 0.257109, avg_loss: 0.546452
|
527 |
+
002240/008040, loss: 0.437452, avg_loss: 0.546180
|
528 |
+
002245/008040, loss: 0.416744, avg_loss: 0.545923
|
529 |
+
002250/008040, loss: 0.301953, avg_loss: 0.545449
|
530 |
+
002255/008040, loss: 0.505546, avg_loss: 0.545019
|
531 |
+
002260/008040, loss: 0.438693, avg_loss: 0.544505
|
532 |
+
002265/008040, loss: 0.269212, avg_loss: 0.544109
|
533 |
+
002270/008040, loss: 0.436381, avg_loss: 0.543815
|
534 |
+
002275/008040, loss: 0.455789, avg_loss: 0.543614
|
535 |
+
002280/008040, loss: 0.498013, avg_loss: 0.543247
|
536 |
+
002285/008040, loss: 0.329160, avg_loss: 0.542899
|
537 |
+
002290/008040, loss: 0.281924, avg_loss: 0.542624
|
538 |
+
002295/008040, loss: 0.366100, avg_loss: 0.542508
|
539 |
+
002300/008040, loss: 0.371017, avg_loss: 0.542094
|
540 |
+
002305/008040, loss: 0.395582, avg_loss: 0.541701
|
541 |
+
002310/008040, loss: 0.396773, avg_loss: 0.541435
|
542 |
+
002315/008040, loss: 0.382794, avg_loss: 0.541095
|
543 |
+
002320/008040, loss: 0.306365, avg_loss: 0.540708
|
544 |
+
002325/008040, loss: 0.434783, avg_loss: 0.540375
|
545 |
+
002330/008040, loss: 0.357806, avg_loss: 0.539949
|
546 |
+
002335/008040, loss: 0.380549, avg_loss: 0.539729
|
547 |
+
002340/008040, loss: 0.216732, avg_loss: 0.539146
|
548 |
+
002345/008040, loss: 0.385883, avg_loss: 0.538827
|
549 |
+
002350/008040, loss: 0.594073, avg_loss: 0.538528
|
550 |
+
002355/008040, loss: 0.413212, avg_loss: 0.538440
|
551 |
+
002360/008040, loss: 0.210932, avg_loss: 0.538051
|
552 |
+
002365/008040, loss: 0.336315, avg_loss: 0.537636
|
553 |
+
002370/008040, loss: 0.447609, avg_loss: 0.537165
|
554 |
+
002375/008040, loss: 0.580924, avg_loss: 0.537083
|
555 |
+
002380/008040, loss: 0.482136, avg_loss: 0.536826
|
556 |
+
002385/008040, loss: 0.290261, avg_loss: 0.536410
|
557 |
+
002390/008040, loss: 0.567177, avg_loss: 0.536199
|
558 |
+
002395/008040, loss: 0.543372, avg_loss: 0.535950
|
559 |
+
002400/008040, loss: 0.344522, avg_loss: 0.535539
|
560 |
+
002405/008040, loss: 0.219617, avg_loss: 0.535235
|
561 |
+
002410/008040, loss: 0.449083, avg_loss: 0.534978
|
562 |
+
***** Running dev evaluation *****
|
563 |
+
Num examples = 1042
|
564 |
+
Instantaneous batch size per device = 32
|
565 |
+
epoch 8, step 2412/8040: {'matthews_correlation': 0.22951168079779777}
|
566 |
+
002415/008040, loss: 0.347907, avg_loss: 0.534702
|
567 |
+
002420/008040, loss: 0.380767, avg_loss: 0.534286
|
568 |
+
002425/008040, loss: 0.322007, avg_loss: 0.533838
|
569 |
+
002430/008040, loss: 0.468786, avg_loss: 0.533497
|
570 |
+
002435/008040, loss: 0.309734, avg_loss: 0.533102
|
571 |
+
002440/008040, loss: 0.197897, avg_loss: 0.532586
|
572 |
+
002445/008040, loss: 0.345694, avg_loss: 0.532278
|
573 |
+
002450/008040, loss: 0.559374, avg_loss: 0.531879
|
574 |
+
002455/008040, loss: 0.311327, avg_loss: 0.531552
|
575 |
+
002460/008040, loss: 0.300930, avg_loss: 0.531075
|
576 |
+
002465/008040, loss: 0.339101, avg_loss: 0.530702
|
577 |
+
002470/008040, loss: 0.473831, avg_loss: 0.530428
|
578 |
+
002475/008040, loss: 0.467193, avg_loss: 0.530181
|
579 |
+
002480/008040, loss: 0.500907, avg_loss: 0.529953
|
580 |
+
002485/008040, loss: 0.457974, avg_loss: 0.529605
|
581 |
+
002490/008040, loss: 0.343074, avg_loss: 0.529418
|
582 |
+
002495/008040, loss: 0.444531, avg_loss: 0.529064
|
583 |
+
002500/008040, loss: 0.330952, avg_loss: 0.528731
|
584 |
+
002505/008040, loss: 0.258092, avg_loss: 0.528329
|
585 |
+
002510/008040, loss: 0.373878, avg_loss: 0.527944
|
586 |
+
002515/008040, loss: 0.298052, avg_loss: 0.527590
|
587 |
+
002520/008040, loss: 0.290030, avg_loss: 0.527212
|
588 |
+
002525/008040, loss: 0.235393, avg_loss: 0.526718
|
589 |
+
002530/008040, loss: 0.293452, avg_loss: 0.526214
|
590 |
+
002535/008040, loss: 0.193506, avg_loss: 0.525848
|
591 |
+
002540/008040, loss: 0.344110, avg_loss: 0.525426
|
592 |
+
002545/008040, loss: 0.457061, avg_loss: 0.525270
|
593 |
+
002550/008040, loss: 0.443786, avg_loss: 0.524819
|
594 |
+
002555/008040, loss: 0.245760, avg_loss: 0.524364
|
595 |
+
002560/008040, loss: 0.319549, avg_loss: 0.524011
|
596 |
+
002565/008040, loss: 0.259768, avg_loss: 0.523602
|
597 |
+
002570/008040, loss: 0.575876, avg_loss: 0.523300
|
598 |
+
002575/008040, loss: 0.411968, avg_loss: 0.522964
|
599 |
+
002580/008040, loss: 0.563390, avg_loss: 0.522671
|
600 |
+
002585/008040, loss: 0.227027, avg_loss: 0.522272
|
601 |
+
002590/008040, loss: 0.354241, avg_loss: 0.521908
|
602 |
+
002595/008040, loss: 0.265463, avg_loss: 0.521373
|
603 |
+
002600/008040, loss: 0.427238, avg_loss: 0.521074
|
604 |
+
002605/008040, loss: 0.287523, avg_loss: 0.520749
|
605 |
+
002610/008040, loss: 0.325408, avg_loss: 0.520449
|
606 |
+
002615/008040, loss: 0.423108, avg_loss: 0.520330
|
607 |
+
002620/008040, loss: 0.443160, avg_loss: 0.520100
|
608 |
+
002625/008040, loss: 0.299728, avg_loss: 0.519799
|
609 |
+
002630/008040, loss: 0.535900, avg_loss: 0.519659
|
610 |
+
002635/008040, loss: 0.479263, avg_loss: 0.519475
|
611 |
+
002640/008040, loss: 0.471956, avg_loss: 0.519183
|
612 |
+
002645/008040, loss: 0.267595, avg_loss: 0.518814
|
613 |
+
002650/008040, loss: 0.322007, avg_loss: 0.518672
|
614 |
+
002655/008040, loss: 0.332003, avg_loss: 0.518388
|
615 |
+
002660/008040, loss: 0.439392, avg_loss: 0.518129
|
616 |
+
002665/008040, loss: 0.322509, avg_loss: 0.517892
|
617 |
+
002670/008040, loss: 0.328728, avg_loss: 0.517541
|
618 |
+
002675/008040, loss: 0.224217, avg_loss: 0.517160
|
619 |
+
002680/008040, loss: 0.213649, avg_loss: 0.516849
|
620 |
+
***** Running dev evaluation *****
|
621 |
+
Num examples = 1042
|
622 |
+
Instantaneous batch size per device = 32
|
623 |
+
epoch 9, step 2680/8040: {'matthews_correlation': 0.23063296136375847}
|
624 |
+
002685/008040, loss: 0.357351, avg_loss: 0.516590
|
625 |
+
002690/008040, loss: 0.191270, avg_loss: 0.516160
|
626 |
+
002695/008040, loss: 0.370189, avg_loss: 0.515724
|
627 |
+
002700/008040, loss: 0.336470, avg_loss: 0.515376
|
628 |
+
002705/008040, loss: 0.268067, avg_loss: 0.515164
|
629 |
+
002710/008040, loss: 0.179003, avg_loss: 0.514576
|
630 |
+
002715/008040, loss: 0.427791, avg_loss: 0.514281
|
631 |
+
002720/008040, loss: 0.361250, avg_loss: 0.513829
|
632 |
+
002725/008040, loss: 0.189704, avg_loss: 0.513455
|
633 |
+
002730/008040, loss: 0.273372, avg_loss: 0.513143
|
634 |
+
002735/008040, loss: 0.239124, avg_loss: 0.512584
|
635 |
+
002740/008040, loss: 0.378276, avg_loss: 0.512291
|
636 |
+
002745/008040, loss: 0.646600, avg_loss: 0.512093
|
637 |
+
002750/008040, loss: 0.267004, avg_loss: 0.511666
|
638 |
+
002755/008040, loss: 0.422471, avg_loss: 0.511320
|
639 |
+
002760/008040, loss: 0.209467, avg_loss: 0.510807
|
640 |
+
002765/008040, loss: 0.346109, avg_loss: 0.510532
|
641 |
+
002770/008040, loss: 0.233545, avg_loss: 0.510248
|
642 |
+
002775/008040, loss: 0.466287, avg_loss: 0.509798
|
643 |
+
002780/008040, loss: 0.403711, avg_loss: 0.509469
|
644 |
+
002785/008040, loss: 0.327383, avg_loss: 0.509096
|
645 |
+
002790/008040, loss: 0.404777, avg_loss: 0.508683
|
646 |
+
002795/008040, loss: 0.542493, avg_loss: 0.508458
|
647 |
+
002800/008040, loss: 0.367202, avg_loss: 0.508142
|
648 |
+
002805/008040, loss: 0.368768, avg_loss: 0.507783
|
649 |
+
002810/008040, loss: 0.312175, avg_loss: 0.507447
|
650 |
+
002815/008040, loss: 0.302803, avg_loss: 0.507226
|
651 |
+
002820/008040, loss: 0.401391, avg_loss: 0.507006
|
652 |
+
002825/008040, loss: 0.244652, avg_loss: 0.506694
|
653 |
+
002830/008040, loss: 0.370769, avg_loss: 0.506369
|
654 |
+
002835/008040, loss: 0.182358, avg_loss: 0.505992
|
655 |
+
002840/008040, loss: 0.266695, avg_loss: 0.505660
|
656 |
+
002845/008040, loss: 0.334674, avg_loss: 0.505358
|
657 |
+
002850/008040, loss: 0.348623, avg_loss: 0.505099
|
658 |
+
002855/008040, loss: 0.187014, avg_loss: 0.504682
|
659 |
+
002860/008040, loss: 0.259943, avg_loss: 0.504276
|
660 |
+
002865/008040, loss: 0.382271, avg_loss: 0.503964
|
661 |
+
002870/008040, loss: 0.570188, avg_loss: 0.503794
|
662 |
+
002875/008040, loss: 0.218833, avg_loss: 0.503395
|
663 |
+
002880/008040, loss: 0.356927, avg_loss: 0.502927
|
664 |
+
002885/008040, loss: 0.173357, avg_loss: 0.502596
|
665 |
+
002890/008040, loss: 0.360153, avg_loss: 0.502279
|
666 |
+
002895/008040, loss: 0.295114, avg_loss: 0.501944
|
667 |
+
002900/008040, loss: 0.210005, avg_loss: 0.501620
|
668 |
+
002905/008040, loss: 0.300519, avg_loss: 0.501388
|
669 |
+
002910/008040, loss: 0.207313, avg_loss: 0.501142
|
670 |
+
002915/008040, loss: 0.333078, avg_loss: 0.500983
|
671 |
+
002920/008040, loss: 0.285614, avg_loss: 0.500578
|
672 |
+
002925/008040, loss: 0.373211, avg_loss: 0.500342
|
673 |
+
002930/008040, loss: 0.308893, avg_loss: 0.500114
|
674 |
+
002935/008040, loss: 0.350144, avg_loss: 0.499820
|
675 |
+
002940/008040, loss: 0.439544, avg_loss: 0.499650
|
676 |
+
002945/008040, loss: 0.320980, avg_loss: 0.499399
|
677 |
+
***** Running dev evaluation *****
|
678 |
+
Num examples = 1042
|
679 |
+
Instantaneous batch size per device = 32
|
680 |
+
epoch 10, step 2948/8040: {'matthews_correlation': 0.18813850606847293}
|
681 |
+
002950/008040, loss: 0.588822, avg_loss: 0.499118
|
682 |
+
002955/008040, loss: 0.375319, avg_loss: 0.498777
|
683 |
+
002960/008040, loss: 0.218809, avg_loss: 0.498292
|
684 |
+
002965/008040, loss: 0.346797, avg_loss: 0.497883
|
685 |
+
002970/008040, loss: 0.256976, avg_loss: 0.497420
|
686 |
+
002975/008040, loss: 0.267139, avg_loss: 0.496947
|
687 |
+
002980/008040, loss: 0.096225, avg_loss: 0.496468
|
688 |
+
002985/008040, loss: 0.540555, avg_loss: 0.496152
|
689 |
+
002990/008040, loss: 0.360075, avg_loss: 0.495919
|
690 |
+
002995/008040, loss: 0.439329, avg_loss: 0.495439
|
691 |
+
003000/008040, loss: 0.407537, avg_loss: 0.495246
|
692 |
+
003005/008040, loss: 0.148738, avg_loss: 0.494893
|
693 |
+
003010/008040, loss: 0.212634, avg_loss: 0.494479
|
694 |
+
003015/008040, loss: 0.413274, avg_loss: 0.494294
|
695 |
+
003020/008040, loss: 0.505027, avg_loss: 0.493973
|
696 |
+
003025/008040, loss: 0.353874, avg_loss: 0.493663
|
697 |
+
003030/008040, loss: 0.191752, avg_loss: 0.493211
|
698 |
+
003035/008040, loss: 0.262918, avg_loss: 0.492834
|
699 |
+
003040/008040, loss: 0.251038, avg_loss: 0.492580
|
700 |
+
003045/008040, loss: 0.291340, avg_loss: 0.492184
|
701 |
+
003050/008040, loss: 0.387451, avg_loss: 0.491783
|
702 |
+
003055/008040, loss: 0.277742, avg_loss: 0.491582
|
703 |
+
003060/008040, loss: 0.147416, avg_loss: 0.491181
|
704 |
+
003065/008040, loss: 0.383072, avg_loss: 0.490947
|
705 |
+
003070/008040, loss: 0.332592, avg_loss: 0.490599
|
706 |
+
003075/008040, loss: 0.376683, avg_loss: 0.490302
|
707 |
+
003080/008040, loss: 0.316549, avg_loss: 0.490015
|
708 |
+
003085/008040, loss: 0.248254, avg_loss: 0.489692
|
709 |
+
003090/008040, loss: 0.446958, avg_loss: 0.489378
|
710 |
+
003095/008040, loss: 0.534116, avg_loss: 0.489182
|
711 |
+
003100/008040, loss: 0.186525, avg_loss: 0.488832
|
712 |
+
003105/008040, loss: 0.367679, avg_loss: 0.488583
|
713 |
+
003110/008040, loss: 0.263721, avg_loss: 0.488235
|
714 |
+
003115/008040, loss: 0.160604, avg_loss: 0.487953
|
715 |
+
003120/008040, loss: 0.351810, avg_loss: 0.487793
|
716 |
+
003125/008040, loss: 0.282861, avg_loss: 0.487567
|
717 |
+
003130/008040, loss: 0.291616, avg_loss: 0.487259
|
718 |
+
003135/008040, loss: 0.215605, avg_loss: 0.486928
|
719 |
+
003140/008040, loss: 0.497842, avg_loss: 0.486740
|
720 |
+
003145/008040, loss: 0.141915, avg_loss: 0.486475
|
721 |
+
003150/008040, loss: 0.493731, avg_loss: 0.486197
|
722 |
+
003155/008040, loss: 0.337679, avg_loss: 0.486007
|
723 |
+
003160/008040, loss: 0.423936, avg_loss: 0.485696
|
724 |
+
003165/008040, loss: 0.325907, avg_loss: 0.485502
|
725 |
+
003170/008040, loss: 0.373995, avg_loss: 0.485242
|
726 |
+
003175/008040, loss: 0.221332, avg_loss: 0.484958
|
727 |
+
003180/008040, loss: 0.245173, avg_loss: 0.484625
|
728 |
+
003185/008040, loss: 0.243983, avg_loss: 0.484395
|
729 |
+
003190/008040, loss: 0.279999, avg_loss: 0.484095
|
730 |
+
003195/008040, loss: 0.289218, avg_loss: 0.483891
|
731 |
+
003200/008040, loss: 0.619335, avg_loss: 0.483651
|
732 |
+
003205/008040, loss: 0.231071, avg_loss: 0.483458
|
733 |
+
003210/008040, loss: 0.285882, avg_loss: 0.483162
|
734 |
+
003215/008040, loss: 0.306520, avg_loss: 0.482869
|
735 |
+
***** Running dev evaluation *****
|
736 |
+
Num examples = 1042
|
737 |
+
Instantaneous batch size per device = 32
|
738 |
+
epoch 11, step 3216/8040: {'matthews_correlation': 0.20603205189543294}
|
739 |
+
003220/008040, loss: 0.297244, avg_loss: 0.482532
|
740 |
+
003225/008040, loss: 0.182570, avg_loss: 0.482196
|
741 |
+
003230/008040, loss: 0.242626, avg_loss: 0.481782
|
742 |
+
003235/008040, loss: 0.463844, avg_loss: 0.481595
|
743 |
+
003240/008040, loss: 0.132137, avg_loss: 0.481325
|
744 |
+
003245/008040, loss: 0.392527, avg_loss: 0.481101
|
745 |
+
003250/008040, loss: 0.270020, avg_loss: 0.480822
|
746 |
+
003255/008040, loss: 0.137306, avg_loss: 0.480509
|
747 |
+
003260/008040, loss: 0.391590, avg_loss: 0.480182
|
748 |
+
003265/008040, loss: 0.262886, avg_loss: 0.479862
|
749 |
+
003270/008040, loss: 0.058543, avg_loss: 0.479458
|
750 |
+
003275/008040, loss: 0.404657, avg_loss: 0.479210
|
751 |
+
003280/008040, loss: 0.276438, avg_loss: 0.478893
|
752 |
+
003285/008040, loss: 0.190946, avg_loss: 0.478536
|
753 |
+
003290/008040, loss: 0.490697, avg_loss: 0.478209
|
754 |
+
003295/008040, loss: 0.276675, avg_loss: 0.478014
|
755 |
+
003300/008040, loss: 0.402889, avg_loss: 0.477756
|
756 |
+
003305/008040, loss: 0.233364, avg_loss: 0.477388
|
757 |
+
003310/008040, loss: 0.231294, avg_loss: 0.477059
|
758 |
+
003315/008040, loss: 0.109852, avg_loss: 0.476722
|
759 |
+
003320/008040, loss: 0.320070, avg_loss: 0.476551
|
760 |
+
003325/008040, loss: 0.244717, avg_loss: 0.476204
|
761 |
+
003330/008040, loss: 0.181756, avg_loss: 0.475863
|
762 |
+
003335/008040, loss: 0.222641, avg_loss: 0.475576
|
763 |
+
003340/008040, loss: 0.121102, avg_loss: 0.475165
|
764 |
+
003345/008040, loss: 0.265407, avg_loss: 0.474816
|
765 |
+
003350/008040, loss: 0.322964, avg_loss: 0.474535
|
766 |
+
003355/008040, loss: 0.237767, avg_loss: 0.474252
|
767 |
+
003360/008040, loss: 0.343557, avg_loss: 0.473983
|
768 |
+
003365/008040, loss: 0.257172, avg_loss: 0.473661
|
769 |
+
003370/008040, loss: 0.260944, avg_loss: 0.473336
|
770 |
+
003375/008040, loss: 0.292535, avg_loss: 0.473136
|
771 |
+
003380/008040, loss: 0.228900, avg_loss: 0.472876
|
772 |
+
003385/008040, loss: 0.133238, avg_loss: 0.472483
|
773 |
+
003390/008040, loss: 0.090823, avg_loss: 0.472257
|
774 |
+
003395/008040, loss: 0.353693, avg_loss: 0.471962
|
775 |
+
003400/008040, loss: 0.349422, avg_loss: 0.471682
|
776 |
+
003405/008040, loss: 0.257864, avg_loss: 0.471517
|
777 |
+
003410/008040, loss: 0.252595, avg_loss: 0.471166
|
778 |
+
003415/008040, loss: 0.247344, avg_loss: 0.470882
|
779 |
+
003420/008040, loss: 0.263462, avg_loss: 0.470580
|
780 |
+
003425/008040, loss: 0.343597, avg_loss: 0.470315
|
781 |
+
003430/008040, loss: 0.544077, avg_loss: 0.470094
|
782 |
+
003435/008040, loss: 0.303536, avg_loss: 0.469789
|
783 |
+
003440/008040, loss: 0.327594, avg_loss: 0.469431
|
784 |
+
003445/008040, loss: 0.325582, avg_loss: 0.469118
|
785 |
+
003450/008040, loss: 0.270005, avg_loss: 0.468807
|
786 |
+
003455/008040, loss: 0.373651, avg_loss: 0.468467
|
787 |
+
003460/008040, loss: 0.296829, avg_loss: 0.468144
|
788 |
+
003465/008040, loss: 0.382215, avg_loss: 0.467829
|
789 |
+
003470/008040, loss: 0.412389, avg_loss: 0.467540
|
790 |
+
003475/008040, loss: 0.434352, avg_loss: 0.467421
|
791 |
+
003480/008040, loss: 0.624913, avg_loss: 0.467201
|
792 |
+
***** Running dev evaluation *****
|
793 |
+
Num examples = 1042
|
794 |
+
Instantaneous batch size per device = 32
|
795 |
+
epoch 12, step 3484/8040: {'matthews_correlation': 0.2118432448298745}
|
796 |
+
003485/008040, loss: 0.174334, avg_loss: 0.467056
|
797 |
+
003490/008040, loss: 0.165397, avg_loss: 0.466660
|
798 |
+
003495/008040, loss: 0.265744, avg_loss: 0.466368
|
799 |
+
003500/008040, loss: 0.194233, avg_loss: 0.466006
|
800 |
+
003505/008040, loss: 0.293150, avg_loss: 0.465843
|
801 |
+
003510/008040, loss: 0.190316, avg_loss: 0.465447
|
802 |
+
003515/008040, loss: 0.186641, avg_loss: 0.465075
|
803 |
+
003520/008040, loss: 0.197128, avg_loss: 0.464644
|
804 |
+
003525/008040, loss: 0.258738, avg_loss: 0.464389
|
805 |
+
003530/008040, loss: 0.500658, avg_loss: 0.464118
|
806 |
+
003535/008040, loss: 0.181811, avg_loss: 0.463812
|
807 |
+
003540/008040, loss: 0.255612, avg_loss: 0.463451
|
808 |
+
003545/008040, loss: 0.301980, avg_loss: 0.463198
|
809 |
+
003550/008040, loss: 0.262177, avg_loss: 0.462909
|
810 |
+
003555/008040, loss: 0.150805, avg_loss: 0.462686
|
811 |
+
003560/008040, loss: 0.106480, avg_loss: 0.462353
|
812 |
+
003565/008040, loss: 0.353977, avg_loss: 0.462018
|
813 |
+
003570/008040, loss: 0.095433, avg_loss: 0.461672
|
814 |
+
003575/008040, loss: 0.218111, avg_loss: 0.461353
|
815 |
+
003580/008040, loss: 0.149131, avg_loss: 0.461044
|
816 |
+
003585/008040, loss: 0.267326, avg_loss: 0.460682
|
817 |
+
003590/008040, loss: 0.132778, avg_loss: 0.460332
|
818 |
+
003595/008040, loss: 0.084128, avg_loss: 0.460019
|
819 |
+
003600/008040, loss: 0.240429, avg_loss: 0.459736
|
820 |
+
003605/008040, loss: 0.087250, avg_loss: 0.459388
|
821 |
+
003610/008040, loss: 0.350120, avg_loss: 0.459246
|
822 |
+
003615/008040, loss: 0.268835, avg_loss: 0.458947
|
823 |
+
003620/008040, loss: 0.269842, avg_loss: 0.458634
|
824 |
+
003625/008040, loss: 0.156989, avg_loss: 0.458435
|
825 |
+
003630/008040, loss: 0.230001, avg_loss: 0.458124
|
826 |
+
003635/008040, loss: 0.326509, avg_loss: 0.457910
|
827 |
+
003640/008040, loss: 0.336903, avg_loss: 0.457636
|
828 |
+
003645/008040, loss: 0.484366, avg_loss: 0.457448
|
829 |
+
003650/008040, loss: 0.292469, avg_loss: 0.457143
|
830 |
+
003655/008040, loss: 0.196029, avg_loss: 0.456941
|
831 |
+
003660/008040, loss: 0.388540, avg_loss: 0.456735
|
832 |
+
003665/008040, loss: 0.392700, avg_loss: 0.456447
|
833 |
+
003670/008040, loss: 0.214234, avg_loss: 0.456100
|
834 |
+
003675/008040, loss: 0.166740, avg_loss: 0.455776
|
835 |
+
003680/008040, loss: 0.476048, avg_loss: 0.455626
|
836 |
+
003685/008040, loss: 0.133647, avg_loss: 0.455296
|
837 |
+
003690/008040, loss: 0.138388, avg_loss: 0.454962
|
838 |
+
003695/008040, loss: 0.419241, avg_loss: 0.454699
|
839 |
+
003700/008040, loss: 0.273247, avg_loss: 0.454571
|
840 |
+
003705/008040, loss: 0.335091, avg_loss: 0.454264
|
841 |
+
003710/008040, loss: 0.352429, avg_loss: 0.454025
|
842 |
+
003715/008040, loss: 0.424709, avg_loss: 0.453788
|
843 |
+
003720/008040, loss: 0.169942, avg_loss: 0.453501
|
844 |
+
003725/008040, loss: 0.356818, avg_loss: 0.453249
|
845 |
+
003730/008040, loss: 0.165223, avg_loss: 0.452948
|
846 |
+
003735/008040, loss: 0.186675, avg_loss: 0.452718
|
847 |
+
003740/008040, loss: 0.381955, avg_loss: 0.452507
|
848 |
+
003745/008040, loss: 0.304955, avg_loss: 0.452266
|
849 |
+
003750/008040, loss: 0.405848, avg_loss: 0.452040
|
850 |
+
***** Running dev evaluation *****
|
851 |
+
Num examples = 1042
|
852 |
+
Instantaneous batch size per device = 32
|
853 |
+
epoch 13, step 3752/8040: {'matthews_correlation': 0.20261239362380884}
|
854 |
+
003755/008040, loss: 0.216770, avg_loss: 0.451750
|
855 |
+
003760/008040, loss: 0.223490, avg_loss: 0.451394
|
856 |
+
003765/008040, loss: 0.375553, avg_loss: 0.451131
|
857 |
+
003770/008040, loss: 0.196744, avg_loss: 0.450761
|
858 |
+
003775/008040, loss: 0.363349, avg_loss: 0.450549
|
859 |
+
003780/008040, loss: 0.293711, avg_loss: 0.450257
|
860 |
+
003785/008040, loss: 0.413372, avg_loss: 0.450013
|
861 |
+
003790/008040, loss: 0.122226, avg_loss: 0.449670
|
862 |
+
003795/008040, loss: 0.356951, avg_loss: 0.449397
|
863 |
+
003800/008040, loss: 0.118402, avg_loss: 0.449095
|
864 |
+
003805/008040, loss: 0.146611, avg_loss: 0.448795
|
865 |
+
003810/008040, loss: 0.157524, avg_loss: 0.448482
|
866 |
+
003815/008040, loss: 0.343435, avg_loss: 0.448149
|
867 |
+
003820/008040, loss: 0.368691, avg_loss: 0.447827
|
868 |
+
003825/008040, loss: 0.201137, avg_loss: 0.447525
|
869 |
+
003830/008040, loss: 0.132014, avg_loss: 0.447167
|
870 |
+
003835/008040, loss: 0.090910, avg_loss: 0.446837
|
871 |
+
003840/008040, loss: 0.232675, avg_loss: 0.446527
|
872 |
+
003845/008040, loss: 0.275011, avg_loss: 0.446207
|
873 |
+
003850/008040, loss: 0.152451, avg_loss: 0.446016
|
874 |
+
003855/008040, loss: 0.314412, avg_loss: 0.445785
|
875 |
+
003860/008040, loss: 0.148032, avg_loss: 0.445503
|
876 |
+
003865/008040, loss: 0.165825, avg_loss: 0.445180
|
877 |
+
003870/008040, loss: 0.094333, avg_loss: 0.444829
|
878 |
+
003875/008040, loss: 0.375745, avg_loss: 0.444747
|
879 |
+
003880/008040, loss: 0.403074, avg_loss: 0.444519
|
880 |
+
003885/008040, loss: 0.165065, avg_loss: 0.444287
|
881 |
+
003890/008040, loss: 0.234658, avg_loss: 0.444039
|
882 |
+
003895/008040, loss: 0.319576, avg_loss: 0.443776
|
883 |
+
003900/008040, loss: 0.217123, avg_loss: 0.443509
|
884 |
+
003905/008040, loss: 0.378043, avg_loss: 0.443243
|
885 |
+
003910/008040, loss: 0.122735, avg_loss: 0.443042
|
886 |
+
003915/008040, loss: 0.264233, avg_loss: 0.442776
|
887 |
+
003920/008040, loss: 0.076704, avg_loss: 0.442449
|
888 |
+
003925/008040, loss: 0.125913, avg_loss: 0.442138
|
889 |
+
003930/008040, loss: 0.272993, avg_loss: 0.441843
|
890 |
+
003935/008040, loss: 0.068447, avg_loss: 0.441514
|
891 |
+
003940/008040, loss: 0.244199, avg_loss: 0.441180
|
892 |
+
003945/008040, loss: 0.228176, avg_loss: 0.440980
|
893 |
+
003950/008040, loss: 0.148637, avg_loss: 0.440699
|
894 |
+
003955/008040, loss: 0.299796, avg_loss: 0.440381
|
895 |
+
003960/008040, loss: 0.276640, avg_loss: 0.440127
|
896 |
+
003965/008040, loss: 0.344766, avg_loss: 0.439945
|
897 |
+
003970/008040, loss: 0.085695, avg_loss: 0.439692
|
898 |
+
003975/008040, loss: 0.467870, avg_loss: 0.439398
|
899 |
+
003980/008040, loss: 0.275529, avg_loss: 0.439101
|
900 |
+
003985/008040, loss: 0.342526, avg_loss: 0.438883
|
901 |
+
003990/008040, loss: 0.117043, avg_loss: 0.438652
|
902 |
+
003995/008040, loss: 0.216054, avg_loss: 0.438416
|
903 |
+
004000/008040, loss: 0.194812, avg_loss: 0.438143
|
904 |
+
004005/008040, loss: 0.173094, avg_loss: 0.437904
|
905 |
+
004010/008040, loss: 0.072183, avg_loss: 0.437564
|
906 |
+
004015/008040, loss: 0.403915, avg_loss: 0.437357
|
907 |
+
004020/008040, loss: 0.087834, avg_loss: 0.436997
|
908 |
+
***** Running dev evaluation *****
|
909 |
+
Num examples = 1042
|
910 |
+
Instantaneous batch size per device = 32
|
911 |
+
epoch 14, step 4020/8040: {'matthews_correlation': 0.22518881045488998}
|
912 |
+
004025/008040, loss: 0.165382, avg_loss: 0.436778
|
913 |
+
004030/008040, loss: 0.179677, avg_loss: 0.436415
|
914 |
+
004035/008040, loss: 0.185260, avg_loss: 0.436160
|
915 |
+
004040/008040, loss: 0.095814, avg_loss: 0.435886
|
916 |
+
004045/008040, loss: 0.345136, avg_loss: 0.435673
|
917 |
+
004050/008040, loss: 0.150750, avg_loss: 0.435363
|
918 |
+
004055/008040, loss: 0.185758, avg_loss: 0.435133
|
919 |
+
004060/008040, loss: 0.212922, avg_loss: 0.434953
|
920 |
+
004065/008040, loss: 0.149902, avg_loss: 0.434669
|
921 |
+
004070/008040, loss: 0.089165, avg_loss: 0.434428
|
922 |
+
004075/008040, loss: 0.168942, avg_loss: 0.434088
|
923 |
+
004080/008040, loss: 0.170014, avg_loss: 0.433740
|
924 |
+
004085/008040, loss: 0.148718, avg_loss: 0.433445
|
925 |
+
004090/008040, loss: 0.307841, avg_loss: 0.433193
|
926 |
+
004095/008040, loss: 0.170424, avg_loss: 0.432888
|
927 |
+
004100/008040, loss: 0.253233, avg_loss: 0.432721
|
928 |
+
004105/008040, loss: 0.243379, avg_loss: 0.432492
|
929 |
+
004110/008040, loss: 0.197091, avg_loss: 0.432229
|
930 |
+
004115/008040, loss: 0.149977, avg_loss: 0.431969
|
931 |
+
004120/008040, loss: 0.255616, avg_loss: 0.431698
|
932 |
+
004125/008040, loss: 0.144500, avg_loss: 0.431442
|
933 |
+
004130/008040, loss: 0.214776, avg_loss: 0.431226
|
934 |
+
004135/008040, loss: 0.155176, avg_loss: 0.430925
|
935 |
+
004140/008040, loss: 0.195912, avg_loss: 0.430689
|
936 |
+
004145/008040, loss: 0.148231, avg_loss: 0.430308
|
937 |
+
004150/008040, loss: 0.309211, avg_loss: 0.430062
|
938 |
+
004155/008040, loss: 0.400494, avg_loss: 0.429949
|
939 |
+
004160/008040, loss: 0.190466, avg_loss: 0.429596
|
940 |
+
004165/008040, loss: 0.115031, avg_loss: 0.429430
|
941 |
+
004170/008040, loss: 0.154674, avg_loss: 0.429143
|
942 |
+
004175/008040, loss: 0.087612, avg_loss: 0.428819
|
943 |
+
004180/008040, loss: 0.391154, avg_loss: 0.428581
|
944 |
+
004185/008040, loss: 0.233200, avg_loss: 0.428438
|
945 |
+
004190/008040, loss: 0.113590, avg_loss: 0.428204
|
946 |
+
004195/008040, loss: 0.139859, avg_loss: 0.427997
|
947 |
+
004200/008040, loss: 0.365552, avg_loss: 0.427729
|
948 |
+
004205/008040, loss: 0.285945, avg_loss: 0.427567
|
949 |
+
004210/008040, loss: 0.298795, avg_loss: 0.427337
|
950 |
+
004215/008040, loss: 0.184676, avg_loss: 0.427015
|
951 |
+
004220/008040, loss: 0.347303, avg_loss: 0.426763
|
952 |
+
004225/008040, loss: 0.249475, avg_loss: 0.426473
|
953 |
+
004230/008040, loss: 0.345056, avg_loss: 0.426234
|
954 |
+
004235/008040, loss: 0.132455, avg_loss: 0.425935
|
955 |
+
004240/008040, loss: 0.083139, avg_loss: 0.425697
|
956 |
+
004245/008040, loss: 0.186649, avg_loss: 0.425451
|
957 |
+
004250/008040, loss: 0.159150, avg_loss: 0.425129
|
958 |
+
004255/008040, loss: 0.119297, avg_loss: 0.424885
|
959 |
+
004260/008040, loss: 0.233108, avg_loss: 0.424649
|
960 |
+
004265/008040, loss: 0.144114, avg_loss: 0.424408
|
961 |
+
004270/008040, loss: 0.210518, avg_loss: 0.424164
|
962 |
+
004275/008040, loss: 0.242731, avg_loss: 0.423926
|
963 |
+
004280/008040, loss: 0.157653, avg_loss: 0.423696
|
964 |
+
004285/008040, loss: 0.191035, avg_loss: 0.423368
|
965 |
+
***** Running dev evaluation *****
|
966 |
+
Num examples = 1042
|
967 |
+
Instantaneous batch size per device = 32
|
968 |
+
epoch 15, step 4288/8040: {'matthews_correlation': 0.24863648291608131}
|
969 |
+
004290/008040, loss: 0.266971, avg_loss: 0.423141
|
970 |
+
004295/008040, loss: 0.322248, avg_loss: 0.422900
|
971 |
+
004300/008040, loss: 0.231828, avg_loss: 0.422700
|
972 |
+
004305/008040, loss: 0.072297, avg_loss: 0.422383
|
973 |
+
004310/008040, loss: 0.193845, avg_loss: 0.422057
|
974 |
+
004315/008040, loss: 0.217596, avg_loss: 0.421784
|
975 |
+
004320/008040, loss: 0.393519, avg_loss: 0.421559
|
976 |
+
004325/008040, loss: 0.400877, avg_loss: 0.421312
|
977 |
+
004330/008040, loss: 0.087280, avg_loss: 0.420988
|
978 |
+
004335/008040, loss: 0.124804, avg_loss: 0.420766
|
979 |
+
004340/008040, loss: 0.130229, avg_loss: 0.420461
|
980 |
+
004345/008040, loss: 0.304618, avg_loss: 0.420192
|
981 |
+
004350/008040, loss: 0.189475, avg_loss: 0.419871
|
982 |
+
004355/008040, loss: 0.203492, avg_loss: 0.419608
|
983 |
+
004360/008040, loss: 0.144623, avg_loss: 0.419307
|
984 |
+
004365/008040, loss: 0.127642, avg_loss: 0.418997
|
985 |
+
004370/008040, loss: 0.218811, avg_loss: 0.418663
|
986 |
+
004375/008040, loss: 0.059012, avg_loss: 0.418390
|
987 |
+
004380/008040, loss: 0.237954, avg_loss: 0.418244
|
988 |
+
004385/008040, loss: 0.248724, avg_loss: 0.418026
|
989 |
+
004390/008040, loss: 0.160703, avg_loss: 0.417783
|
990 |
+
004395/008040, loss: 0.276666, avg_loss: 0.417556
|
991 |
+
004400/008040, loss: 0.101335, avg_loss: 0.417345
|
992 |
+
004405/008040, loss: 0.468661, avg_loss: 0.417191
|
993 |
+
004410/008040, loss: 0.154267, avg_loss: 0.416879
|
994 |
+
004415/008040, loss: 0.196224, avg_loss: 0.416633
|
995 |
+
004420/008040, loss: 0.092791, avg_loss: 0.416405
|
996 |
+
004425/008040, loss: 0.447025, avg_loss: 0.416244
|
997 |
+
004430/008040, loss: 0.225542, avg_loss: 0.415959
|
998 |
+
004435/008040, loss: 0.091548, avg_loss: 0.415671
|
999 |
+
004440/008040, loss: 0.116130, avg_loss: 0.415333
|
1000 |
+
004445/008040, loss: 0.225495, avg_loss: 0.415069
|
1001 |
+
004450/008040, loss: 0.213666, avg_loss: 0.414828
|
1002 |
+
004455/008040, loss: 0.130709, avg_loss: 0.414580
|
1003 |
+
004460/008040, loss: 0.225696, avg_loss: 0.414366
|
1004 |
+
004465/008040, loss: 0.322512, avg_loss: 0.414150
|
1005 |
+
004470/008040, loss: 0.409171, avg_loss: 0.413917
|
1006 |
+
004475/008040, loss: 0.459070, avg_loss: 0.413800
|
1007 |
+
004480/008040, loss: 0.205403, avg_loss: 0.413537
|
1008 |
+
004485/008040, loss: 0.097172, avg_loss: 0.413289
|
1009 |
+
004490/008040, loss: 0.104971, avg_loss: 0.413039
|
1010 |
+
004495/008040, loss: 0.269551, avg_loss: 0.412801
|
1011 |
+
004500/008040, loss: 0.151229, avg_loss: 0.412506
|
1012 |
+
004505/008040, loss: 0.137360, avg_loss: 0.412240
|
1013 |
+
004510/008040, loss: 0.349339, avg_loss: 0.412084
|
1014 |
+
004515/008040, loss: 0.074355, avg_loss: 0.411830
|
1015 |
+
004520/008040, loss: 0.165137, avg_loss: 0.411674
|
1016 |
+
004525/008040, loss: 0.120821, avg_loss: 0.411390
|
1017 |
+
004530/008040, loss: 0.156756, avg_loss: 0.411117
|
1018 |
+
004535/008040, loss: 0.131685, avg_loss: 0.410892
|
1019 |
+
004540/008040, loss: 0.215486, avg_loss: 0.410740
|
1020 |
+
004545/008040, loss: 0.276792, avg_loss: 0.410564
|
1021 |
+
004550/008040, loss: 0.163451, avg_loss: 0.410302
|
1022 |
+
004555/008040, loss: 0.153240, avg_loss: 0.410035
|
1023 |
+
***** Running dev evaluation *****
|
1024 |
+
Num examples = 1042
|
1025 |
+
Instantaneous batch size per device = 32
|
1026 |
+
epoch 16, step 4556/8040: {'matthews_correlation': 0.19984853723708582}
|
1027 |
+
004560/008040, loss: 0.091924, avg_loss: 0.409807
|
1028 |
+
004565/008040, loss: 0.285278, avg_loss: 0.409558
|
1029 |
+
004570/008040, loss: 0.153244, avg_loss: 0.409386
|
1030 |
+
004575/008040, loss: 0.117640, avg_loss: 0.409192
|
1031 |
+
004580/008040, loss: 0.196797, avg_loss: 0.408980
|
1032 |
+
004585/008040, loss: 0.170434, avg_loss: 0.408724
|
1033 |
+
004590/008040, loss: 0.291520, avg_loss: 0.408442
|
1034 |
+
004595/008040, loss: 0.095928, avg_loss: 0.408124
|
1035 |
+
004600/008040, loss: 0.133423, avg_loss: 0.407880
|
1036 |
+
004605/008040, loss: 0.224401, avg_loss: 0.407606
|
1037 |
+
004610/008040, loss: 0.244196, avg_loss: 0.407368
|
1038 |
+
004615/008040, loss: 0.086107, avg_loss: 0.407023
|
1039 |
+
004620/008040, loss: 0.088616, avg_loss: 0.406692
|
1040 |
+
004625/008040, loss: 0.182435, avg_loss: 0.406431
|
1041 |
+
004630/008040, loss: 0.108714, avg_loss: 0.406193
|
1042 |
+
004635/008040, loss: 0.052255, avg_loss: 0.405910
|
1043 |
+
004640/008040, loss: 0.201341, avg_loss: 0.405591
|
1044 |
+
004645/008040, loss: 0.125202, avg_loss: 0.405348
|
1045 |
+
004650/008040, loss: 0.262958, avg_loss: 0.405130
|
1046 |
+
004655/008040, loss: 0.136696, avg_loss: 0.404868
|
1047 |
+
004660/008040, loss: 0.232297, avg_loss: 0.404751
|
1048 |
+
004665/008040, loss: 0.169257, avg_loss: 0.404498
|
1049 |
+
004670/008040, loss: 0.086830, avg_loss: 0.404246
|
1050 |
+
004675/008040, loss: 0.199137, avg_loss: 0.404007
|
1051 |
+
004680/008040, loss: 0.169171, avg_loss: 0.403782
|
1052 |
+
004685/008040, loss: 0.112735, avg_loss: 0.403494
|
1053 |
+
004690/008040, loss: 0.240913, avg_loss: 0.403283
|
1054 |
+
004695/008040, loss: 0.228971, avg_loss: 0.403096
|
1055 |
+
004700/008040, loss: 0.203035, avg_loss: 0.402813
|
1056 |
+
004705/008040, loss: 0.252691, avg_loss: 0.402542
|
1057 |
+
004710/008040, loss: 0.183577, avg_loss: 0.402340
|
1058 |
+
004715/008040, loss: 0.152381, avg_loss: 0.402066
|
1059 |
+
004720/008040, loss: 0.081548, avg_loss: 0.401885
|
1060 |
+
004725/008040, loss: 0.243844, avg_loss: 0.401712
|
1061 |
+
004730/008040, loss: 0.191078, avg_loss: 0.401477
|
1062 |
+
004735/008040, loss: 0.309967, avg_loss: 0.401239
|
1063 |
+
004740/008040, loss: 0.177741, avg_loss: 0.400996
|
1064 |
+
004745/008040, loss: 0.443657, avg_loss: 0.400854
|
1065 |
+
004750/008040, loss: 0.174745, avg_loss: 0.400599
|
1066 |
+
004755/008040, loss: 0.117440, avg_loss: 0.400411
|
1067 |
+
004760/008040, loss: 0.216662, avg_loss: 0.400179
|
1068 |
+
004765/008040, loss: 0.331156, avg_loss: 0.399990
|
1069 |
+
004770/008040, loss: 0.239916, avg_loss: 0.399799
|
1070 |
+
004775/008040, loss: 0.392543, avg_loss: 0.399598
|
1071 |
+
004780/008040, loss: 0.310010, avg_loss: 0.399412
|
1072 |
+
004785/008040, loss: 0.176596, avg_loss: 0.399203
|
1073 |
+
004790/008040, loss: 0.193463, avg_loss: 0.399010
|
1074 |
+
004795/008040, loss: 0.065539, avg_loss: 0.398731
|
1075 |
+
004800/008040, loss: 0.078370, avg_loss: 0.398525
|
1076 |
+
004805/008040, loss: 0.268404, avg_loss: 0.398395
|
1077 |
+
004810/008040, loss: 0.130425, avg_loss: 0.398184
|
1078 |
+
004815/008040, loss: 0.124595, avg_loss: 0.397953
|
1079 |
+
004820/008040, loss: 0.351294, avg_loss: 0.397781
|
1080 |
+
***** Running dev evaluation *****
|
1081 |
+
Num examples = 1042
|
1082 |
+
Instantaneous batch size per device = 32
|
1083 |
+
epoch 17, step 4824/8040: {'matthews_correlation': 0.23319244596326755}
|
1084 |
+
004825/008040, loss: 0.117143, avg_loss: 0.397605
|
1085 |
+
004830/008040, loss: 0.085320, avg_loss: 0.397402
|
1086 |
+
004835/008040, loss: 0.329389, avg_loss: 0.397171
|
1087 |
+
004840/008040, loss: 0.167244, avg_loss: 0.396923
|
1088 |
+
004845/008040, loss: 0.084977, avg_loss: 0.396725
|
1089 |
+
004850/008040, loss: 0.170633, avg_loss: 0.396479
|
1090 |
+
004855/008040, loss: 0.162252, avg_loss: 0.396249
|
1091 |
+
004860/008040, loss: 0.242330, avg_loss: 0.396023
|
1092 |
+
004865/008040, loss: 0.158724, avg_loss: 0.395797
|
1093 |
+
004870/008040, loss: 0.145546, avg_loss: 0.395552
|
1094 |
+
004875/008040, loss: 0.154330, avg_loss: 0.395311
|
1095 |
+
004880/008040, loss: 0.156234, avg_loss: 0.395089
|
1096 |
+
004885/008040, loss: 0.104371, avg_loss: 0.394829
|
1097 |
+
004890/008040, loss: 0.116719, avg_loss: 0.394561
|
1098 |
+
004895/008040, loss: 0.167743, avg_loss: 0.394364
|
1099 |
+
004900/008040, loss: 0.064857, avg_loss: 0.394083
|
1100 |
+
004905/008040, loss: 0.067141, avg_loss: 0.393839
|
1101 |
+
004910/008040, loss: 0.099388, avg_loss: 0.393620
|
1102 |
+
004915/008040, loss: 0.121026, avg_loss: 0.393411
|
1103 |
+
004920/008040, loss: 0.237852, avg_loss: 0.393219
|
1104 |
+
004925/008040, loss: 0.116583, avg_loss: 0.392966
|
1105 |
+
004930/008040, loss: 0.066189, avg_loss: 0.392755
|
1106 |
+
004935/008040, loss: 0.100841, avg_loss: 0.392546
|
1107 |
+
004940/008040, loss: 0.184811, avg_loss: 0.392326
|
1108 |
+
004945/008040, loss: 0.261129, avg_loss: 0.392141
|
1109 |
+
004950/008040, loss: 0.227229, avg_loss: 0.391920
|
1110 |
+
004955/008040, loss: 0.073722, avg_loss: 0.391622
|
1111 |
+
004960/008040, loss: 0.217448, avg_loss: 0.391421
|
1112 |
+
004965/008040, loss: 0.166534, avg_loss: 0.391247
|
1113 |
+
004970/008040, loss: 0.169978, avg_loss: 0.391054
|
1114 |
+
004975/008040, loss: 0.162336, avg_loss: 0.390803
|
1115 |
+
004980/008040, loss: 0.100707, avg_loss: 0.390613
|
1116 |
+
004985/008040, loss: 0.066454, avg_loss: 0.390380
|
1117 |
+
004990/008040, loss: 0.268293, avg_loss: 0.390155
|
1118 |
+
004995/008040, loss: 0.299800, avg_loss: 0.389940
|
1119 |
+
005000/008040, loss: 0.159697, avg_loss: 0.389720
|
1120 |
+
005005/008040, loss: 0.089164, avg_loss: 0.389519
|
1121 |
+
005010/008040, loss: 0.083933, avg_loss: 0.389239
|
1122 |
+
005015/008040, loss: 0.109845, avg_loss: 0.389072
|
1123 |
+
005020/008040, loss: 0.323453, avg_loss: 0.388975
|
1124 |
+
005025/008040, loss: 0.218569, avg_loss: 0.388836
|
1125 |
+
005030/008040, loss: 0.306596, avg_loss: 0.388684
|
1126 |
+
005035/008040, loss: 0.214361, avg_loss: 0.388518
|
1127 |
+
005040/008040, loss: 0.203883, avg_loss: 0.388300
|
1128 |
+
005045/008040, loss: 0.119648, avg_loss: 0.388050
|
1129 |
+
005050/008040, loss: 0.241945, avg_loss: 0.387813
|
1130 |
+
005055/008040, loss: 0.295856, avg_loss: 0.387694
|
1131 |
+
005060/008040, loss: 0.299737, avg_loss: 0.387502
|
1132 |
+
005065/008040, loss: 0.173353, avg_loss: 0.387261
|
1133 |
+
005070/008040, loss: 0.148706, avg_loss: 0.387023
|
1134 |
+
005075/008040, loss: 0.235021, avg_loss: 0.386797
|
1135 |
+
005080/008040, loss: 0.309368, avg_loss: 0.386680
|
1136 |
+
005085/008040, loss: 0.187352, avg_loss: 0.386492
|
1137 |
+
005090/008040, loss: 0.124517, avg_loss: 0.386330
|
1138 |
+
***** Running dev evaluation *****
|
1139 |
+
Num examples = 1042
|
1140 |
+
Instantaneous batch size per device = 32
|
1141 |
+
epoch 18, step 5092/8040: {'matthews_correlation': 0.24348660475263997}
|
1142 |
+
005095/008040, loss: 0.244682, avg_loss: 0.386098
|
1143 |
+
005100/008040, loss: 0.038414, avg_loss: 0.385838
|
1144 |
+
005105/008040, loss: 0.095592, avg_loss: 0.385585
|
1145 |
+
005110/008040, loss: 0.112026, avg_loss: 0.385404
|
1146 |
+
005115/008040, loss: 0.193563, avg_loss: 0.385160
|
1147 |
+
005120/008040, loss: 0.169361, avg_loss: 0.384950
|
1148 |
+
005125/008040, loss: 0.115310, avg_loss: 0.384794
|
1149 |
+
005130/008040, loss: 0.111171, avg_loss: 0.384595
|
1150 |
+
005135/008040, loss: 0.347275, avg_loss: 0.384407
|
1151 |
+
005140/008040, loss: 0.066092, avg_loss: 0.384158
|
1152 |
+
005145/008040, loss: 0.038691, avg_loss: 0.383875
|
1153 |
+
005150/008040, loss: 0.187798, avg_loss: 0.383690
|
1154 |
+
005155/008040, loss: 0.080341, avg_loss: 0.383389
|
1155 |
+
005160/008040, loss: 0.250113, avg_loss: 0.383158
|
1156 |
+
005165/008040, loss: 0.230404, avg_loss: 0.382940
|
1157 |
+
005170/008040, loss: 0.199019, avg_loss: 0.382771
|
1158 |
+
005175/008040, loss: 0.042526, avg_loss: 0.382549
|
1159 |
+
005180/008040, loss: 0.107391, avg_loss: 0.382405
|
1160 |
+
005185/008040, loss: 0.123089, avg_loss: 0.382158
|
1161 |
+
005190/008040, loss: 0.211129, avg_loss: 0.381957
|
1162 |
+
005195/008040, loss: 0.191329, avg_loss: 0.381727
|
1163 |
+
005200/008040, loss: 0.247005, avg_loss: 0.381528
|
1164 |
+
005205/008040, loss: 0.151045, avg_loss: 0.381342
|
1165 |
+
005210/008040, loss: 0.221767, avg_loss: 0.381150
|
1166 |
+
005215/008040, loss: 0.098915, avg_loss: 0.380916
|
1167 |
+
005220/008040, loss: 0.120604, avg_loss: 0.380805
|
1168 |
+
005225/008040, loss: 0.198758, avg_loss: 0.380580
|
1169 |
+
005230/008040, loss: 0.170238, avg_loss: 0.380375
|
1170 |
+
005235/008040, loss: 0.287471, avg_loss: 0.380185
|
1171 |
+
005240/008040, loss: 0.099829, avg_loss: 0.379951
|
1172 |
+
005245/008040, loss: 0.192130, avg_loss: 0.379746
|
1173 |
+
005250/008040, loss: 0.174886, avg_loss: 0.379625
|
1174 |
+
005255/008040, loss: 0.154950, avg_loss: 0.379463
|
1175 |
+
005260/008040, loss: 0.315916, avg_loss: 0.379261
|
1176 |
+
005265/008040, loss: 0.142192, avg_loss: 0.379091
|
1177 |
+
005270/008040, loss: 0.157938, avg_loss: 0.378953
|
1178 |
+
005275/008040, loss: 0.112631, avg_loss: 0.378706
|
1179 |
+
005280/008040, loss: 0.112058, avg_loss: 0.378528
|
1180 |
+
005285/008040, loss: 0.109890, avg_loss: 0.378278
|
1181 |
+
005290/008040, loss: 0.096205, avg_loss: 0.378069
|
1182 |
+
005295/008040, loss: 0.101920, avg_loss: 0.377793
|
1183 |
+
005300/008040, loss: 0.223294, avg_loss: 0.377597
|
1184 |
+
005305/008040, loss: 0.334869, avg_loss: 0.377448
|
1185 |
+
005310/008040, loss: 0.133110, avg_loss: 0.377219
|
1186 |
+
005315/008040, loss: 0.285188, avg_loss: 0.377080
|
1187 |
+
005320/008040, loss: 0.093014, avg_loss: 0.376865
|
1188 |
+
005325/008040, loss: 0.064642, avg_loss: 0.376663
|
1189 |
+
005330/008040, loss: 0.399625, avg_loss: 0.376612
|
1190 |
+
005335/008040, loss: 0.099368, avg_loss: 0.376419
|
1191 |
+
005340/008040, loss: 0.127971, avg_loss: 0.376192
|
1192 |
+
005345/008040, loss: 0.276726, avg_loss: 0.376028
|
1193 |
+
005350/008040, loss: 0.203088, avg_loss: 0.375818
|
1194 |
+
005355/008040, loss: 0.162861, avg_loss: 0.375639
|
1195 |
+
005360/008040, loss: 0.111333, avg_loss: 0.375443
|
1196 |
+
***** Running dev evaluation *****
|
1197 |
+
Num examples = 1042
|
1198 |
+
Instantaneous batch size per device = 32
|
1199 |
+
epoch 19, step 5360/8040: {'matthews_correlation': 0.2545245288314363}
|
1200 |
+
005365/008040, loss: 0.182490, avg_loss: 0.375250
|
1201 |
+
005370/008040, loss: 0.306611, avg_loss: 0.375098
|
1202 |
+
005375/008040, loss: 0.086688, avg_loss: 0.374876
|
1203 |
+
005380/008040, loss: 0.073351, avg_loss: 0.374639
|
1204 |
+
005385/008040, loss: 0.076141, avg_loss: 0.374468
|
1205 |
+
005390/008040, loss: 0.263192, avg_loss: 0.374279
|
1206 |
+
005395/008040, loss: 0.057974, avg_loss: 0.374064
|
1207 |
+
005400/008040, loss: 0.168771, avg_loss: 0.373868
|
1208 |
+
005405/008040, loss: 0.101774, avg_loss: 0.373638
|
1209 |
+
005410/008040, loss: 0.233914, avg_loss: 0.373431
|
1210 |
+
005415/008040, loss: 0.199603, avg_loss: 0.373236
|
1211 |
+
005420/008040, loss: 0.069979, avg_loss: 0.373044
|
1212 |
+
005425/008040, loss: 0.157929, avg_loss: 0.372784
|
1213 |
+
005430/008040, loss: 0.102463, avg_loss: 0.372559
|
1214 |
+
005435/008040, loss: 0.311656, avg_loss: 0.372387
|
1215 |
+
005440/008040, loss: 0.025280, avg_loss: 0.372177
|
1216 |
+
005445/008040, loss: 0.199722, avg_loss: 0.372008
|
1217 |
+
005450/008040, loss: 0.037342, avg_loss: 0.371794
|
1218 |
+
005455/008040, loss: 0.464360, avg_loss: 0.371634
|
1219 |
+
005460/008040, loss: 0.151161, avg_loss: 0.371387
|
1220 |
+
005465/008040, loss: 0.131248, avg_loss: 0.371174
|
1221 |
+
005470/008040, loss: 0.091763, avg_loss: 0.370973
|
1222 |
+
005475/008040, loss: 0.124437, avg_loss: 0.370771
|
1223 |
+
005480/008040, loss: 0.056806, avg_loss: 0.370576
|
1224 |
+
005485/008040, loss: 0.053934, avg_loss: 0.370370
|
1225 |
+
005490/008040, loss: 0.134340, avg_loss: 0.370134
|
1226 |
+
005495/008040, loss: 0.403093, avg_loss: 0.369976
|
1227 |
+
005500/008040, loss: 0.295253, avg_loss: 0.369797
|
1228 |
+
005505/008040, loss: 0.123554, avg_loss: 0.369599
|
1229 |
+
005510/008040, loss: 0.146412, avg_loss: 0.369405
|
1230 |
+
005515/008040, loss: 0.086848, avg_loss: 0.369164
|
1231 |
+
005520/008040, loss: 0.190395, avg_loss: 0.368969
|
1232 |
+
005525/008040, loss: 0.214298, avg_loss: 0.368869
|
1233 |
+
005530/008040, loss: 0.157094, avg_loss: 0.368707
|
1234 |
+
005535/008040, loss: 0.236498, avg_loss: 0.368588
|
1235 |
+
005540/008040, loss: 0.150522, avg_loss: 0.368397
|
1236 |
+
005545/008040, loss: 0.056312, avg_loss: 0.368192
|
1237 |
+
005550/008040, loss: 0.132276, avg_loss: 0.367994
|
1238 |
+
005555/008040, loss: 0.152209, avg_loss: 0.367803
|
1239 |
+
005560/008040, loss: 0.123136, avg_loss: 0.367632
|
1240 |
+
005565/008040, loss: 0.295406, avg_loss: 0.367456
|
1241 |
+
005570/008040, loss: 0.163695, avg_loss: 0.367255
|
1242 |
+
005575/008040, loss: 0.032764, avg_loss: 0.367021
|
1243 |
+
005580/008040, loss: 0.077804, avg_loss: 0.366803
|
1244 |
+
005585/008040, loss: 0.426609, avg_loss: 0.366718
|
1245 |
+
005590/008040, loss: 0.170544, avg_loss: 0.366554
|
1246 |
+
005595/008040, loss: 0.121247, avg_loss: 0.366328
|
1247 |
+
005600/008040, loss: 0.118504, avg_loss: 0.366139
|
1248 |
+
005605/008040, loss: 0.127036, avg_loss: 0.365945
|
1249 |
+
005610/008040, loss: 0.253191, avg_loss: 0.365772
|
1250 |
+
005615/008040, loss: 0.132579, avg_loss: 0.365584
|
1251 |
+
005620/008040, loss: 0.206162, avg_loss: 0.365378
|
1252 |
+
005625/008040, loss: 0.138357, avg_loss: 0.365198
|
1253 |
+
***** Running dev evaluation *****
|
1254 |
+
Num examples = 1042
|
1255 |
+
Instantaneous batch size per device = 32
|
1256 |
+
epoch 20, step 5628/8040: {'matthews_correlation': 0.20994533418798944}
|
1257 |
+
005630/008040, loss: 0.220501, avg_loss: 0.364957
|
1258 |
+
005635/008040, loss: 0.176737, avg_loss: 0.364715
|
1259 |
+
005640/008040, loss: 0.150695, avg_loss: 0.364518
|
1260 |
+
005645/008040, loss: 0.186189, avg_loss: 0.364357
|
1261 |
+
005650/008040, loss: 0.238791, avg_loss: 0.364176
|
1262 |
+
005655/008040, loss: 0.128307, avg_loss: 0.363933
|
1263 |
+
005660/008040, loss: 0.098545, avg_loss: 0.363732
|
1264 |
+
005665/008040, loss: 0.059385, avg_loss: 0.363478
|
1265 |
+
005670/008040, loss: 0.106437, avg_loss: 0.363278
|
1266 |
+
005675/008040, loss: 0.051390, avg_loss: 0.363102
|
1267 |
+
005680/008040, loss: 0.264690, avg_loss: 0.362942
|
1268 |
+
005685/008040, loss: 0.051885, avg_loss: 0.362837
|
1269 |
+
005690/008040, loss: 0.186132, avg_loss: 0.362659
|
1270 |
+
005695/008040, loss: 0.110282, avg_loss: 0.362424
|
1271 |
+
005700/008040, loss: 0.035696, avg_loss: 0.362194
|
1272 |
+
005705/008040, loss: 0.210790, avg_loss: 0.362002
|
1273 |
+
005710/008040, loss: 0.176916, avg_loss: 0.361813
|
1274 |
+
005715/008040, loss: 0.068533, avg_loss: 0.361627
|
1275 |
+
005720/008040, loss: 0.059564, avg_loss: 0.361417
|
1276 |
+
005725/008040, loss: 0.087551, avg_loss: 0.361274
|
1277 |
+
005730/008040, loss: 0.166153, avg_loss: 0.361102
|
1278 |
+
005735/008040, loss: 0.123037, avg_loss: 0.360934
|
1279 |
+
005740/008040, loss: 0.175274, avg_loss: 0.360734
|
1280 |
+
005745/008040, loss: 0.053053, avg_loss: 0.360546
|
1281 |
+
005750/008040, loss: 0.193917, avg_loss: 0.360384
|
1282 |
+
005755/008040, loss: 0.192751, avg_loss: 0.360214
|
1283 |
+
005760/008040, loss: 0.101878, avg_loss: 0.360026
|
1284 |
+
005765/008040, loss: 0.085134, avg_loss: 0.359832
|
1285 |
+
005770/008040, loss: 0.293009, avg_loss: 0.359642
|
1286 |
+
005775/008040, loss: 0.104310, avg_loss: 0.359413
|
1287 |
+
005780/008040, loss: 0.011819, avg_loss: 0.359168
|
1288 |
+
005785/008040, loss: 0.210015, avg_loss: 0.359009
|
1289 |
+
005790/008040, loss: 0.200174, avg_loss: 0.358879
|
1290 |
+
005795/008040, loss: 0.141055, avg_loss: 0.358671
|
1291 |
+
005800/008040, loss: 0.078129, avg_loss: 0.358469
|
1292 |
+
005805/008040, loss: 0.083557, avg_loss: 0.358299
|
1293 |
+
005810/008040, loss: 0.075039, avg_loss: 0.358079
|
1294 |
+
005815/008040, loss: 0.080684, avg_loss: 0.357903
|
1295 |
+
005820/008040, loss: 0.345792, avg_loss: 0.357738
|
1296 |
+
005825/008040, loss: 0.078370, avg_loss: 0.357590
|
1297 |
+
005830/008040, loss: 0.199539, avg_loss: 0.357436
|
1298 |
+
005835/008040, loss: 0.196801, avg_loss: 0.357251
|
1299 |
+
005840/008040, loss: 0.173617, avg_loss: 0.357071
|
1300 |
+
005845/008040, loss: 0.056907, avg_loss: 0.356874
|
1301 |
+
005850/008040, loss: 0.165107, avg_loss: 0.356683
|
1302 |
+
005855/008040, loss: 0.100072, avg_loss: 0.356521
|
1303 |
+
005860/008040, loss: 0.178491, avg_loss: 0.356324
|
1304 |
+
005865/008040, loss: 0.212101, avg_loss: 0.356138
|
1305 |
+
005870/008040, loss: 0.215021, avg_loss: 0.355963
|
1306 |
+
005875/008040, loss: 0.273816, avg_loss: 0.355788
|
1307 |
+
005880/008040, loss: 0.364194, avg_loss: 0.355641
|
1308 |
+
005885/008040, loss: 0.270123, avg_loss: 0.355498
|
1309 |
+
005890/008040, loss: 0.047443, avg_loss: 0.355291
|
1310 |
+
005895/008040, loss: 0.142198, avg_loss: 0.355145
|
1311 |
+
***** Running dev evaluation *****
|
1312 |
+
Num examples = 1042
|
1313 |
+
Instantaneous batch size per device = 32
|
1314 |
+
epoch 21, step 5896/8040: {'matthews_correlation': 0.21551745055261307}
|
1315 |
+
005900/008040, loss: 0.191457, avg_loss: 0.354973
|
1316 |
+
005905/008040, loss: 0.295734, avg_loss: 0.354803
|
1317 |
+
005910/008040, loss: 0.075735, avg_loss: 0.354682
|
1318 |
+
005915/008040, loss: 0.142483, avg_loss: 0.354498
|
1319 |
+
005920/008040, loss: 0.117506, avg_loss: 0.354280
|
1320 |
+
005925/008040, loss: 0.108497, avg_loss: 0.354039
|
1321 |
+
005930/008040, loss: 0.023560, avg_loss: 0.353889
|
1322 |
+
005935/008040, loss: 0.051859, avg_loss: 0.353655
|
1323 |
+
005940/008040, loss: 0.096430, avg_loss: 0.353502
|
1324 |
+
005945/008040, loss: 0.168284, avg_loss: 0.353288
|
1325 |
+
005950/008040, loss: 0.137047, avg_loss: 0.353141
|
1326 |
+
005955/008040, loss: 0.182130, avg_loss: 0.353003
|
1327 |
+
005960/008040, loss: 0.052544, avg_loss: 0.352779
|
1328 |
+
005965/008040, loss: 0.148201, avg_loss: 0.352614
|
1329 |
+
005970/008040, loss: 0.203060, avg_loss: 0.352449
|
1330 |
+
005975/008040, loss: 0.152961, avg_loss: 0.352309
|
1331 |
+
005980/008040, loss: 0.149886, avg_loss: 0.352082
|
1332 |
+
005985/008040, loss: 0.108204, avg_loss: 0.351957
|
1333 |
+
005990/008040, loss: 0.102725, avg_loss: 0.351766
|
1334 |
+
005995/008040, loss: 0.023260, avg_loss: 0.351590
|
1335 |
+
006000/008040, loss: 0.115315, avg_loss: 0.351441
|
1336 |
+
006005/008040, loss: 0.074605, avg_loss: 0.351242
|
1337 |
+
006010/008040, loss: 0.142932, avg_loss: 0.351052
|
1338 |
+
006015/008040, loss: 0.083695, avg_loss: 0.350857
|
1339 |
+
006020/008040, loss: 0.043695, avg_loss: 0.350694
|
1340 |
+
006025/008040, loss: 0.099229, avg_loss: 0.350506
|
1341 |
+
006030/008040, loss: 0.024634, avg_loss: 0.350314
|
1342 |
+
006035/008040, loss: 0.213198, avg_loss: 0.350172
|
1343 |
+
006040/008040, loss: 0.090062, avg_loss: 0.349989
|
1344 |
+
006045/008040, loss: 0.165650, avg_loss: 0.349814
|
1345 |
+
006050/008040, loss: 0.375118, avg_loss: 0.349689
|
1346 |
+
006055/008040, loss: 0.092574, avg_loss: 0.349502
|
1347 |
+
006060/008040, loss: 0.076881, avg_loss: 0.349333
|
1348 |
+
006065/008040, loss: 0.215473, avg_loss: 0.349175
|
1349 |
+
006070/008040, loss: 0.047726, avg_loss: 0.349023
|
1350 |
+
006075/008040, loss: 0.275449, avg_loss: 0.348853
|
1351 |
+
006080/008040, loss: 0.091764, avg_loss: 0.348649
|
1352 |
+
006085/008040, loss: 0.158617, avg_loss: 0.348518
|
1353 |
+
006090/008040, loss: 0.398433, avg_loss: 0.348350
|
1354 |
+
006095/008040, loss: 0.249465, avg_loss: 0.348170
|
1355 |
+
006100/008040, loss: 0.230916, avg_loss: 0.348021
|
1356 |
+
006105/008040, loss: 0.138895, avg_loss: 0.347855
|
1357 |
+
006110/008040, loss: 0.023905, avg_loss: 0.347659
|
1358 |
+
006115/008040, loss: 0.183222, avg_loss: 0.347486
|
1359 |
+
006120/008040, loss: 0.149845, avg_loss: 0.347367
|
1360 |
+
006125/008040, loss: 0.120646, avg_loss: 0.347237
|
1361 |
+
006130/008040, loss: 0.232747, avg_loss: 0.347078
|
1362 |
+
006135/008040, loss: 0.086326, avg_loss: 0.346889
|
1363 |
+
006140/008040, loss: 0.044021, avg_loss: 0.346692
|
1364 |
+
006145/008040, loss: 0.173458, avg_loss: 0.346579
|
1365 |
+
006150/008040, loss: 0.110168, avg_loss: 0.346419
|
1366 |
+
006155/008040, loss: 0.041496, avg_loss: 0.346239
|
1367 |
+
006160/008040, loss: 0.048964, avg_loss: 0.346022
|
1368 |
+
***** Running dev evaluation *****
|
1369 |
+
Num examples = 1042
|
1370 |
+
Instantaneous batch size per device = 32
|
1371 |
+
epoch 22, step 6164/8040: {'matthews_correlation': 0.20483291444361929}
|
1372 |
+
006165/008040, loss: 0.256147, avg_loss: 0.345885
|
1373 |
+
006170/008040, loss: 0.100646, avg_loss: 0.345729
|
1374 |
+
006175/008040, loss: 0.135744, avg_loss: 0.345560
|
1375 |
+
006180/008040, loss: 0.070830, avg_loss: 0.345336
|
1376 |
+
006185/008040, loss: 0.183400, avg_loss: 0.345210
|
1377 |
+
006190/008040, loss: 0.171377, avg_loss: 0.345125
|
1378 |
+
006195/008040, loss: 0.104681, avg_loss: 0.344985
|
1379 |
+
006200/008040, loss: 0.047664, avg_loss: 0.344778
|
1380 |
+
006205/008040, loss: 0.132229, avg_loss: 0.344638
|
1381 |
+
006210/008040, loss: 0.212232, avg_loss: 0.344449
|
1382 |
+
006215/008040, loss: 0.037690, avg_loss: 0.344257
|
1383 |
+
006220/008040, loss: 0.265332, avg_loss: 0.344097
|
1384 |
+
006225/008040, loss: 0.114738, avg_loss: 0.343896
|
1385 |
+
006230/008040, loss: 0.075357, avg_loss: 0.343712
|
1386 |
+
006235/008040, loss: 0.082196, avg_loss: 0.343519
|
1387 |
+
006240/008040, loss: 0.141044, avg_loss: 0.343342
|
1388 |
+
006245/008040, loss: 0.061539, avg_loss: 0.343173
|
1389 |
+
006250/008040, loss: 0.156940, avg_loss: 0.342981
|
1390 |
+
006255/008040, loss: 0.074917, avg_loss: 0.342844
|
1391 |
+
006260/008040, loss: 0.182297, avg_loss: 0.342644
|
1392 |
+
006265/008040, loss: 0.188166, avg_loss: 0.342492
|
1393 |
+
006270/008040, loss: 0.098817, avg_loss: 0.342310
|
1394 |
+
006275/008040, loss: 0.026582, avg_loss: 0.342126
|
1395 |
+
006280/008040, loss: 0.093155, avg_loss: 0.341935
|
1396 |
+
006285/008040, loss: 0.121849, avg_loss: 0.341775
|
1397 |
+
006290/008040, loss: 0.117257, avg_loss: 0.341654
|
1398 |
+
006295/008040, loss: 0.470718, avg_loss: 0.341518
|
1399 |
+
006300/008040, loss: 0.071532, avg_loss: 0.341330
|
1400 |
+
006305/008040, loss: 0.077978, avg_loss: 0.341138
|
1401 |
+
006310/008040, loss: 0.280971, avg_loss: 0.341040
|
1402 |
+
006315/008040, loss: 0.083832, avg_loss: 0.340839
|
1403 |
+
006320/008040, loss: 0.123453, avg_loss: 0.340651
|
1404 |
+
006325/008040, loss: 0.293229, avg_loss: 0.340510
|
1405 |
+
006330/008040, loss: 0.010351, avg_loss: 0.340313
|
1406 |
+
006335/008040, loss: 0.206183, avg_loss: 0.340134
|
1407 |
+
006340/008040, loss: 0.157802, avg_loss: 0.340008
|
1408 |
+
006345/008040, loss: 0.045268, avg_loss: 0.339836
|
1409 |
+
006350/008040, loss: 0.096029, avg_loss: 0.339639
|
1410 |
+
006355/008040, loss: 0.094634, avg_loss: 0.339505
|
1411 |
+
006360/008040, loss: 0.020495, avg_loss: 0.339314
|
1412 |
+
006365/008040, loss: 0.099276, avg_loss: 0.339191
|
1413 |
+
006370/008040, loss: 0.040780, avg_loss: 0.339016
|
1414 |
+
006375/008040, loss: 0.107320, avg_loss: 0.338862
|
1415 |
+
006380/008040, loss: 0.212123, avg_loss: 0.338757
|
1416 |
+
006385/008040, loss: 0.228286, avg_loss: 0.338587
|
1417 |
+
006390/008040, loss: 0.094378, avg_loss: 0.338381
|
1418 |
+
006395/008040, loss: 0.094123, avg_loss: 0.338215
|
1419 |
+
006400/008040, loss: 0.153880, avg_loss: 0.338045
|
1420 |
+
006405/008040, loss: 0.025904, avg_loss: 0.337846
|
1421 |
+
006410/008040, loss: 0.082967, avg_loss: 0.337646
|
1422 |
+
006415/008040, loss: 0.040689, avg_loss: 0.337446
|
1423 |
+
006420/008040, loss: 0.075779, avg_loss: 0.337272
|
1424 |
+
006425/008040, loss: 0.150025, avg_loss: 0.337107
|
1425 |
+
006430/008040, loss: 0.141630, avg_loss: 0.336983
|
1426 |
+
***** Running dev evaluation *****
|
1427 |
+
Num examples = 1042
|
1428 |
+
Instantaneous batch size per device = 32
|
1429 |
+
epoch 23, step 6432/8040: {'matthews_correlation': 0.24931944187781385}
|
1430 |
+
006435/008040, loss: 0.153386, avg_loss: 0.336812
|
1431 |
+
006440/008040, loss: 0.222147, avg_loss: 0.336705
|
1432 |
+
006445/008040, loss: 0.156677, avg_loss: 0.336554
|
1433 |
+
006450/008040, loss: 0.010944, avg_loss: 0.336408
|
1434 |
+
006455/008040, loss: 0.038571, avg_loss: 0.336201
|
1435 |
+
006460/008040, loss: 0.114449, avg_loss: 0.336032
|
1436 |
+
006465/008040, loss: 0.089689, avg_loss: 0.335848
|
1437 |
+
006470/008040, loss: 0.329702, avg_loss: 0.335703
|
1438 |
+
006475/008040, loss: 0.234976, avg_loss: 0.335533
|
1439 |
+
006480/008040, loss: 0.090094, avg_loss: 0.335370
|
1440 |
+
006485/008040, loss: 0.090959, avg_loss: 0.335211
|
1441 |
+
006490/008040, loss: 0.184350, avg_loss: 0.335043
|
1442 |
+
006495/008040, loss: 0.013678, avg_loss: 0.334875
|
1443 |
+
006500/008040, loss: 0.066592, avg_loss: 0.334706
|
1444 |
+
006505/008040, loss: 0.044437, avg_loss: 0.334594
|
1445 |
+
006510/008040, loss: 0.044384, avg_loss: 0.334395
|
1446 |
+
006515/008040, loss: 0.141515, avg_loss: 0.334245
|
1447 |
+
006520/008040, loss: 0.147063, avg_loss: 0.334082
|
1448 |
+
006525/008040, loss: 0.132490, avg_loss: 0.333948
|
1449 |
+
006530/008040, loss: 0.115439, avg_loss: 0.333813
|
1450 |
+
006535/008040, loss: 0.072417, avg_loss: 0.333611
|
1451 |
+
006540/008040, loss: 0.122740, avg_loss: 0.333438
|
1452 |
+
006545/008040, loss: 0.153896, avg_loss: 0.333265
|
1453 |
+
006550/008040, loss: 0.216064, avg_loss: 0.333112
|
1454 |
+
006555/008040, loss: 0.074902, avg_loss: 0.332915
|
1455 |
+
006560/008040, loss: 0.229158, avg_loss: 0.332779
|
1456 |
+
006565/008040, loss: 0.142630, avg_loss: 0.332649
|
1457 |
+
006570/008040, loss: 0.106839, avg_loss: 0.332540
|
1458 |
+
006575/008040, loss: 0.016569, avg_loss: 0.332368
|
1459 |
+
006580/008040, loss: 0.110829, avg_loss: 0.332200
|
1460 |
+
006585/008040, loss: 0.072897, avg_loss: 0.332014
|
1461 |
+
006590/008040, loss: 0.148401, avg_loss: 0.331831
|
1462 |
+
006595/008040, loss: 0.472177, avg_loss: 0.331741
|
1463 |
+
006600/008040, loss: 0.026618, avg_loss: 0.331582
|
1464 |
+
006605/008040, loss: 0.192052, avg_loss: 0.331442
|
1465 |
+
006610/008040, loss: 0.176737, avg_loss: 0.331313
|
1466 |
+
006615/008040, loss: 0.256435, avg_loss: 0.331163
|
1467 |
+
006620/008040, loss: 0.112773, avg_loss: 0.330998
|
1468 |
+
006625/008040, loss: 0.168097, avg_loss: 0.330900
|
1469 |
+
006630/008040, loss: 0.149819, avg_loss: 0.330756
|
1470 |
+
006635/008040, loss: 0.035909, avg_loss: 0.330597
|
1471 |
+
006640/008040, loss: 0.116389, avg_loss: 0.330473
|
1472 |
+
006645/008040, loss: 0.067924, avg_loss: 0.330317
|
1473 |
+
006650/008040, loss: 0.029742, avg_loss: 0.330124
|
1474 |
+
006655/008040, loss: 0.111944, avg_loss: 0.329962
|
1475 |
+
006660/008040, loss: 0.138587, avg_loss: 0.329805
|
1476 |
+
006665/008040, loss: 0.214782, avg_loss: 0.329670
|
1477 |
+
006670/008040, loss: 0.134468, avg_loss: 0.329520
|
1478 |
+
006675/008040, loss: 0.131746, avg_loss: 0.329344
|
1479 |
+
006680/008040, loss: 0.085801, avg_loss: 0.329181
|
1480 |
+
006685/008040, loss: 0.123189, avg_loss: 0.329020
|
1481 |
+
006690/008040, loss: 0.059361, avg_loss: 0.328905
|
1482 |
+
006695/008040, loss: 0.131232, avg_loss: 0.328752
|
1483 |
+
006700/008040, loss: 0.036765, avg_loss: 0.328543
|
1484 |
+
***** Running dev evaluation *****
|
1485 |
+
Num examples = 1042
|
1486 |
+
Instantaneous batch size per device = 32
|
1487 |
+
epoch 24, step 6700/8040: {'matthews_correlation': 0.23227684406858393}
|
1488 |
+
006705/008040, loss: 0.030755, avg_loss: 0.328399
|
1489 |
+
006710/008040, loss: 0.099883, avg_loss: 0.328219
|
1490 |
+
006715/008040, loss: 0.177528, avg_loss: 0.328066
|
1491 |
+
006720/008040, loss: 0.272356, avg_loss: 0.327913
|
1492 |
+
006725/008040, loss: 0.024312, avg_loss: 0.327721
|
1493 |
+
006730/008040, loss: 0.085378, avg_loss: 0.327553
|
1494 |
+
006735/008040, loss: 0.042832, avg_loss: 0.327387
|
1495 |
+
006740/008040, loss: 0.151696, avg_loss: 0.327222
|
1496 |
+
006745/008040, loss: 0.125671, avg_loss: 0.327072
|
1497 |
+
006750/008040, loss: 0.065977, avg_loss: 0.326951
|
1498 |
+
006755/008040, loss: 0.120872, avg_loss: 0.326772
|
1499 |
+
006760/008040, loss: 0.024726, avg_loss: 0.326601
|
1500 |
+
006765/008040, loss: 0.058831, avg_loss: 0.326394
|
1501 |
+
006770/008040, loss: 0.360663, avg_loss: 0.326308
|
1502 |
+
006775/008040, loss: 0.055613, avg_loss: 0.326138
|
1503 |
+
006780/008040, loss: 0.068713, avg_loss: 0.325965
|
1504 |
+
006785/008040, loss: 0.048198, avg_loss: 0.325800
|
1505 |
+
006790/008040, loss: 0.176144, avg_loss: 0.325608
|
1506 |
+
006795/008040, loss: 0.084840, avg_loss: 0.325489
|
1507 |
+
006800/008040, loss: 0.252102, avg_loss: 0.325354
|
1508 |
+
006805/008040, loss: 0.021211, avg_loss: 0.325169
|
1509 |
+
006810/008040, loss: 0.206506, avg_loss: 0.325002
|
1510 |
+
006815/008040, loss: 0.049258, avg_loss: 0.324803
|
1511 |
+
006820/008040, loss: 0.062864, avg_loss: 0.324719
|
1512 |
+
006825/008040, loss: 0.211644, avg_loss: 0.324574
|
1513 |
+
006830/008040, loss: 0.244467, avg_loss: 0.324422
|
1514 |
+
006835/008040, loss: 0.125059, avg_loss: 0.324291
|
1515 |
+
006840/008040, loss: 0.126497, avg_loss: 0.324127
|
1516 |
+
006845/008040, loss: 0.195687, avg_loss: 0.323964
|
1517 |
+
006850/008040, loss: 0.020408, avg_loss: 0.323804
|
1518 |
+
006855/008040, loss: 0.039552, avg_loss: 0.323659
|
1519 |
+
006860/008040, loss: 0.053010, avg_loss: 0.323502
|
1520 |
+
006865/008040, loss: 0.240591, avg_loss: 0.323372
|
1521 |
+
006870/008040, loss: 0.067442, avg_loss: 0.323200
|
1522 |
+
006875/008040, loss: 0.070645, avg_loss: 0.323066
|
1523 |
+
006880/008040, loss: 0.043463, avg_loss: 0.322892
|
1524 |
+
006885/008040, loss: 0.065176, avg_loss: 0.322750
|
1525 |
+
006890/008040, loss: 0.022665, avg_loss: 0.322594
|
1526 |
+
006895/008040, loss: 0.024392, avg_loss: 0.322466
|
1527 |
+
006900/008040, loss: 0.065461, avg_loss: 0.322293
|
1528 |
+
006905/008040, loss: 0.137459, avg_loss: 0.322161
|
1529 |
+
006910/008040, loss: 0.183977, avg_loss: 0.322088
|
1530 |
+
006915/008040, loss: 0.082571, avg_loss: 0.321932
|
1531 |
+
006920/008040, loss: 0.047769, avg_loss: 0.321773
|
1532 |
+
006925/008040, loss: 0.185715, avg_loss: 0.321622
|
1533 |
+
006930/008040, loss: 0.120999, avg_loss: 0.321527
|
1534 |
+
006935/008040, loss: 0.011521, avg_loss: 0.321368
|
1535 |
+
006940/008040, loss: 0.056693, avg_loss: 0.321238
|
1536 |
+
006945/008040, loss: 0.312799, avg_loss: 0.321111
|
1537 |
+
006950/008040, loss: 0.093848, avg_loss: 0.320974
|
1538 |
+
006955/008040, loss: 0.147207, avg_loss: 0.320829
|
1539 |
+
006960/008040, loss: 0.244541, avg_loss: 0.320667
|
1540 |
+
006965/008040, loss: 0.171516, avg_loss: 0.320572
|
1541 |
+
***** Running dev evaluation *****
|
1542 |
+
Num examples = 1042
|
1543 |
+
Instantaneous batch size per device = 32
|
1544 |
+
epoch 25, step 6968/8040: {'matthews_correlation': 0.2203939727085643}
|
1545 |
+
006970/008040, loss: 0.090653, avg_loss: 0.320431
|
1546 |
+
006975/008040, loss: 0.113621, avg_loss: 0.320278
|
1547 |
+
006980/008040, loss: 0.050388, avg_loss: 0.320129
|
1548 |
+
006985/008040, loss: 0.195083, avg_loss: 0.319979
|
1549 |
+
006990/008040, loss: 0.071205, avg_loss: 0.319799
|
1550 |
+
006995/008040, loss: 0.020149, avg_loss: 0.319621
|
1551 |
+
007000/008040, loss: 0.059265, avg_loss: 0.319458
|
1552 |
+
007005/008040, loss: 0.179539, avg_loss: 0.319284
|
1553 |
+
007010/008040, loss: 0.012788, avg_loss: 0.319089
|
1554 |
+
007015/008040, loss: 0.034613, avg_loss: 0.318934
|
1555 |
+
007020/008040, loss: 0.168260, avg_loss: 0.318806
|
1556 |
+
007025/008040, loss: 0.188078, avg_loss: 0.318649
|
1557 |
+
007030/008040, loss: 0.011455, avg_loss: 0.318545
|
1558 |
+
007035/008040, loss: 0.040954, avg_loss: 0.318369
|
1559 |
+
007040/008040, loss: 0.096969, avg_loss: 0.318195
|
1560 |
+
007045/008040, loss: 0.071101, avg_loss: 0.318037
|
1561 |
+
007050/008040, loss: 0.081423, avg_loss: 0.317851
|
1562 |
+
007055/008040, loss: 0.195587, avg_loss: 0.317688
|
1563 |
+
007060/008040, loss: 0.032233, avg_loss: 0.317557
|
1564 |
+
007065/008040, loss: 0.271995, avg_loss: 0.317422
|
1565 |
+
007070/008040, loss: 0.037814, avg_loss: 0.317280
|
1566 |
+
007075/008040, loss: 0.068158, avg_loss: 0.317114
|
1567 |
+
007080/008040, loss: 0.254120, avg_loss: 0.317008
|
1568 |
+
007085/008040, loss: 0.078963, avg_loss: 0.316860
|
1569 |
+
007090/008040, loss: 0.024606, avg_loss: 0.316704
|
1570 |
+
007095/008040, loss: 0.112365, avg_loss: 0.316543
|
1571 |
+
007100/008040, loss: 0.068048, avg_loss: 0.316381
|
1572 |
+
007105/008040, loss: 0.029112, avg_loss: 0.316234
|
1573 |
+
007110/008040, loss: 0.055819, avg_loss: 0.316104
|
1574 |
+
007115/008040, loss: 0.025860, avg_loss: 0.315943
|
1575 |
+
007120/008040, loss: 0.094708, avg_loss: 0.315802
|
1576 |
+
007125/008040, loss: 0.087746, avg_loss: 0.315670
|
1577 |
+
007130/008040, loss: 0.134385, avg_loss: 0.315513
|
1578 |
+
007135/008040, loss: 0.135339, avg_loss: 0.315398
|
1579 |
+
007140/008040, loss: 0.315180, avg_loss: 0.315259
|
1580 |
+
007145/008040, loss: 0.054737, avg_loss: 0.315112
|
1581 |
+
007150/008040, loss: 0.405788, avg_loss: 0.315005
|
1582 |
+
007155/008040, loss: 0.188528, avg_loss: 0.314918
|
1583 |
+
007160/008040, loss: 0.061403, avg_loss: 0.314754
|
1584 |
+
007165/008040, loss: 0.077819, avg_loss: 0.314588
|
1585 |
+
007170/008040, loss: 0.136640, avg_loss: 0.314467
|
1586 |
+
007175/008040, loss: 0.055474, avg_loss: 0.314311
|
1587 |
+
007180/008040, loss: 0.023272, avg_loss: 0.314152
|
1588 |
+
007185/008040, loss: 0.098981, avg_loss: 0.314002
|
1589 |
+
007190/008040, loss: 0.019560, avg_loss: 0.313822
|
1590 |
+
007195/008040, loss: 0.348302, avg_loss: 0.313728
|
1591 |
+
007200/008040, loss: 0.105960, avg_loss: 0.313588
|
1592 |
+
007205/008040, loss: 0.246406, avg_loss: 0.313467
|
1593 |
+
007210/008040, loss: 0.074683, avg_loss: 0.313337
|
1594 |
+
007215/008040, loss: 0.291595, avg_loss: 0.313251
|
1595 |
+
007220/008040, loss: 0.034121, avg_loss: 0.313123
|
1596 |
+
007225/008040, loss: 0.074492, avg_loss: 0.313020
|
1597 |
+
007230/008040, loss: 0.108867, avg_loss: 0.312875
|
1598 |
+
007235/008040, loss: 0.158608, avg_loss: 0.312725
|
1599 |
+
***** Running dev evaluation *****
|
1600 |
+
Num examples = 1042
|
1601 |
+
Instantaneous batch size per device = 32
|
1602 |
+
epoch 26, step 7236/8040: {'matthews_correlation': 0.23696373689939254}
|
1603 |
+
007240/008040, loss: 0.150083, avg_loss: 0.312579
|
1604 |
+
007245/008040, loss: 0.122167, avg_loss: 0.312434
|
1605 |
+
007250/008040, loss: 0.083941, avg_loss: 0.312296
|
1606 |
+
007255/008040, loss: 0.191218, avg_loss: 0.312159
|
1607 |
+
007260/008040, loss: 0.092216, avg_loss: 0.312034
|
1608 |
+
007265/008040, loss: 0.081775, avg_loss: 0.311902
|
1609 |
+
007270/008040, loss: 0.059207, avg_loss: 0.311740
|
1610 |
+
007275/008040, loss: 0.273339, avg_loss: 0.311638
|
1611 |
+
007280/008040, loss: 0.271834, avg_loss: 0.311530
|
1612 |
+
007285/008040, loss: 0.179015, avg_loss: 0.311387
|
1613 |
+
007290/008040, loss: 0.046599, avg_loss: 0.311231
|
1614 |
+
007295/008040, loss: 0.051559, avg_loss: 0.311052
|
1615 |
+
007300/008040, loss: 0.105356, avg_loss: 0.310882
|
1616 |
+
007305/008040, loss: 0.043740, avg_loss: 0.310739
|
1617 |
+
007310/008040, loss: 0.080959, avg_loss: 0.310613
|
1618 |
+
007315/008040, loss: 0.058263, avg_loss: 0.310452
|
1619 |
+
007320/008040, loss: 0.167761, avg_loss: 0.310307
|
1620 |
+
007325/008040, loss: 0.128561, avg_loss: 0.310206
|
1621 |
+
007330/008040, loss: 0.103146, avg_loss: 0.310052
|
1622 |
+
007335/008040, loss: 0.219730, avg_loss: 0.309914
|
1623 |
+
007340/008040, loss: 0.055324, avg_loss: 0.309757
|
1624 |
+
007345/008040, loss: 0.057465, avg_loss: 0.309610
|
1625 |
+
007350/008040, loss: 0.402242, avg_loss: 0.309490
|
1626 |
+
007355/008040, loss: 0.352928, avg_loss: 0.309368
|
1627 |
+
007360/008040, loss: 0.270440, avg_loss: 0.309237
|
1628 |
+
007365/008040, loss: 0.094341, avg_loss: 0.309134
|
1629 |
+
007370/008040, loss: 0.325051, avg_loss: 0.309029
|
1630 |
+
007375/008040, loss: 0.040529, avg_loss: 0.308867
|
1631 |
+
007380/008040, loss: 0.090125, avg_loss: 0.308736
|
1632 |
+
007385/008040, loss: 0.053935, avg_loss: 0.308590
|
1633 |
+
007390/008040, loss: 0.430134, avg_loss: 0.308482
|
1634 |
+
007395/008040, loss: 0.147528, avg_loss: 0.308348
|
1635 |
+
007400/008040, loss: 0.121706, avg_loss: 0.308195
|
1636 |
+
007405/008040, loss: 0.178868, avg_loss: 0.308087
|
1637 |
+
007410/008040, loss: 0.178170, avg_loss: 0.307974
|
1638 |
+
007415/008040, loss: 0.023204, avg_loss: 0.307802
|
1639 |
+
007420/008040, loss: 0.182678, avg_loss: 0.307687
|
1640 |
+
007425/008040, loss: 0.090694, avg_loss: 0.307537
|
1641 |
+
007430/008040, loss: 0.028491, avg_loss: 0.307386
|
1642 |
+
007435/008040, loss: 0.027389, avg_loss: 0.307237
|
1643 |
+
007440/008040, loss: 0.283375, avg_loss: 0.307118
|
1644 |
+
007445/008040, loss: 0.036991, avg_loss: 0.306987
|
1645 |
+
007450/008040, loss: 0.103909, avg_loss: 0.306909
|
1646 |
+
007455/008040, loss: 0.036829, avg_loss: 0.306770
|
1647 |
+
007460/008040, loss: 0.052082, avg_loss: 0.306626
|
1648 |
+
007465/008040, loss: 0.338257, avg_loss: 0.306543
|
1649 |
+
007470/008040, loss: 0.037553, avg_loss: 0.306415
|
1650 |
+
007475/008040, loss: 0.031671, avg_loss: 0.306279
|
1651 |
+
007480/008040, loss: 0.039051, avg_loss: 0.306135
|
1652 |
+
007485/008040, loss: 0.096327, avg_loss: 0.305981
|
1653 |
+
007490/008040, loss: 0.198999, avg_loss: 0.305869
|
1654 |
+
007495/008040, loss: 0.182575, avg_loss: 0.305730
|
1655 |
+
007500/008040, loss: 0.257397, avg_loss: 0.305617
|
1656 |
+
***** Running dev evaluation *****
|
1657 |
+
Num examples = 1042
|
1658 |
+
Instantaneous batch size per device = 32
|
1659 |
+
epoch 27, step 7504/8040: {'matthews_correlation': 0.22894062387495076}
|
1660 |
+
007505/008040, loss: 0.123742, avg_loss: 0.305505
|
1661 |
+
007510/008040, loss: 0.061330, avg_loss: 0.305378
|
1662 |
+
007515/008040, loss: 0.016107, avg_loss: 0.305229
|
1663 |
+
007520/008040, loss: 0.035412, avg_loss: 0.305081
|
1664 |
+
007525/008040, loss: 0.043585, avg_loss: 0.304929
|
1665 |
+
007530/008040, loss: 0.019222, avg_loss: 0.304760
|
1666 |
+
007535/008040, loss: 0.071748, avg_loss: 0.304596
|
1667 |
+
007540/008040, loss: 0.045426, avg_loss: 0.304455
|
1668 |
+
007545/008040, loss: 0.020044, avg_loss: 0.304301
|
1669 |
+
007550/008040, loss: 0.062295, avg_loss: 0.304174
|
1670 |
+
007555/008040, loss: 0.017569, avg_loss: 0.304044
|
1671 |
+
007560/008040, loss: 0.180191, avg_loss: 0.303921
|
1672 |
+
007565/008040, loss: 0.049493, avg_loss: 0.303774
|
1673 |
+
007570/008040, loss: 0.173383, avg_loss: 0.303647
|
1674 |
+
007575/008040, loss: 0.193030, avg_loss: 0.303516
|
1675 |
+
007580/008040, loss: 0.131787, avg_loss: 0.303401
|
1676 |
+
007585/008040, loss: 0.007346, avg_loss: 0.303243
|
1677 |
+
007590/008040, loss: 0.155826, avg_loss: 0.303116
|
1678 |
+
007595/008040, loss: 0.163601, avg_loss: 0.302978
|
1679 |
+
007600/008040, loss: 0.035214, avg_loss: 0.302842
|
1680 |
+
007605/008040, loss: 0.029514, avg_loss: 0.302685
|
1681 |
+
007610/008040, loss: 0.060444, avg_loss: 0.302529
|
1682 |
+
007615/008040, loss: 0.318817, avg_loss: 0.302458
|
1683 |
+
007620/008040, loss: 0.080297, avg_loss: 0.302338
|
1684 |
+
007625/008040, loss: 0.186482, avg_loss: 0.302204
|
1685 |
+
007630/008040, loss: 0.279500, avg_loss: 0.302077
|
1686 |
+
007635/008040, loss: 0.200537, avg_loss: 0.301970
|
1687 |
+
007640/008040, loss: 0.194061, avg_loss: 0.301894
|
1688 |
+
007645/008040, loss: 0.024023, avg_loss: 0.301730
|
1689 |
+
007650/008040, loss: 0.033270, avg_loss: 0.301606
|
1690 |
+
007655/008040, loss: 0.031241, avg_loss: 0.301439
|
1691 |
+
007660/008040, loss: 0.113729, avg_loss: 0.301307
|
1692 |
+
007665/008040, loss: 0.041298, avg_loss: 0.301153
|
1693 |
+
007670/008040, loss: 0.044293, avg_loss: 0.301029
|
1694 |
+
007675/008040, loss: 0.075025, avg_loss: 0.300890
|
1695 |
+
007680/008040, loss: 0.125628, avg_loss: 0.300743
|
1696 |
+
007685/008040, loss: 0.041533, avg_loss: 0.300654
|
1697 |
+
007690/008040, loss: 0.026194, avg_loss: 0.300504
|
1698 |
+
007695/008040, loss: 0.045489, avg_loss: 0.300366
|
1699 |
+
007700/008040, loss: 0.075324, avg_loss: 0.300245
|
1700 |
+
007705/008040, loss: 0.141094, avg_loss: 0.300132
|
1701 |
+
007710/008040, loss: 0.036195, avg_loss: 0.299978
|
1702 |
+
007715/008040, loss: 0.017604, avg_loss: 0.299834
|
1703 |
+
007720/008040, loss: 0.081078, avg_loss: 0.299699
|
1704 |
+
007725/008040, loss: 0.021621, avg_loss: 0.299564
|
1705 |
+
007730/008040, loss: 0.144577, avg_loss: 0.299449
|
1706 |
+
007735/008040, loss: 0.079561, avg_loss: 0.299298
|
1707 |
+
007740/008040, loss: 0.040003, avg_loss: 0.299200
|
1708 |
+
007745/008040, loss: 0.211169, avg_loss: 0.299085
|
1709 |
+
007750/008040, loss: 0.029886, avg_loss: 0.298986
|
1710 |
+
007755/008040, loss: 0.138877, avg_loss: 0.298871
|
1711 |
+
007760/008040, loss: 0.043360, avg_loss: 0.298753
|
1712 |
+
007765/008040, loss: 0.152495, avg_loss: 0.298619
|
1713 |
+
007770/008040, loss: 0.060497, avg_loss: 0.298466
|
1714 |
+
***** Running dev evaluation *****
|
1715 |
+
Num examples = 1042
|
1716 |
+
Instantaneous batch size per device = 32
|
1717 |
+
epoch 28, step 7772/8040: {'matthews_correlation': 0.23262243281540648}
|
1718 |
+
007775/008040, loss: 0.118019, avg_loss: 0.298329
|
1719 |
+
007780/008040, loss: 0.028191, avg_loss: 0.298189
|
1720 |
+
007785/008040, loss: 0.135941, avg_loss: 0.298065
|
1721 |
+
007790/008040, loss: 0.356724, avg_loss: 0.297975
|
1722 |
+
007795/008040, loss: 0.026043, avg_loss: 0.297834
|
1723 |
+
007800/008040, loss: 0.034852, avg_loss: 0.297691
|
1724 |
+
007805/008040, loss: 0.154475, avg_loss: 0.297570
|
1725 |
+
007810/008040, loss: 0.037241, avg_loss: 0.297409
|
1726 |
+
007815/008040, loss: 0.073721, avg_loss: 0.297300
|
1727 |
+
007820/008040, loss: 0.050705, avg_loss: 0.297156
|
1728 |
+
007825/008040, loss: 0.105259, avg_loss: 0.297006
|
1729 |
+
007830/008040, loss: 0.088514, avg_loss: 0.296893
|
1730 |
+
007835/008040, loss: 0.095720, avg_loss: 0.296728
|
1731 |
+
007840/008040, loss: 0.055406, avg_loss: 0.296592
|
1732 |
+
007845/008040, loss: 0.045261, avg_loss: 0.296458
|
1733 |
+
007850/008040, loss: 0.020414, avg_loss: 0.296311
|
1734 |
+
007855/008040, loss: 0.060944, avg_loss: 0.296162
|
1735 |
+
007860/008040, loss: 0.270967, avg_loss: 0.296048
|
1736 |
+
007865/008040, loss: 0.287739, avg_loss: 0.295965
|
1737 |
+
007870/008040, loss: 0.151463, avg_loss: 0.295820
|
1738 |
+
007875/008040, loss: 0.029142, avg_loss: 0.295686
|
1739 |
+
007880/008040, loss: 0.064737, avg_loss: 0.295532
|
1740 |
+
007885/008040, loss: 0.077103, avg_loss: 0.295377
|
1741 |
+
007890/008040, loss: 0.095900, avg_loss: 0.295271
|
1742 |
+
007895/008040, loss: 0.030873, avg_loss: 0.295135
|
1743 |
+
007900/008040, loss: 0.041561, avg_loss: 0.295050
|
1744 |
+
007905/008040, loss: 0.052920, avg_loss: 0.294904
|
1745 |
+
007910/008040, loss: 0.090048, avg_loss: 0.294785
|
1746 |
+
007915/008040, loss: 0.392368, avg_loss: 0.294698
|
1747 |
+
007920/008040, loss: 0.053813, avg_loss: 0.294579
|
1748 |
+
007925/008040, loss: 0.122067, avg_loss: 0.294466
|
1749 |
+
007930/008040, loss: 0.108241, avg_loss: 0.294332
|
1750 |
+
007935/008040, loss: 0.047713, avg_loss: 0.294191
|
1751 |
+
007940/008040, loss: 0.146655, avg_loss: 0.294085
|
1752 |
+
007945/008040, loss: 0.041561, avg_loss: 0.293967
|
1753 |
+
007950/008040, loss: 0.104168, avg_loss: 0.293888
|
1754 |
+
007955/008040, loss: 0.036348, avg_loss: 0.293742
|
1755 |
+
007960/008040, loss: 0.185095, avg_loss: 0.293636
|
1756 |
+
007965/008040, loss: 0.250438, avg_loss: 0.293519
|
1757 |
+
007970/008040, loss: 0.060160, avg_loss: 0.293385
|
1758 |
+
007975/008040, loss: 0.030413, avg_loss: 0.293244
|
1759 |
+
007980/008040, loss: 0.119690, avg_loss: 0.293127
|
1760 |
+
007985/008040, loss: 0.157349, avg_loss: 0.293020
|
1761 |
+
007990/008040, loss: 0.076855, avg_loss: 0.292911
|
1762 |
+
007995/008040, loss: 0.060965, avg_loss: 0.292781
|
1763 |
+
008000/008040, loss: 0.152858, avg_loss: 0.292687
|
1764 |
+
008005/008040, loss: 0.257621, avg_loss: 0.292574
|
1765 |
+
008010/008040, loss: 0.050748, avg_loss: 0.292464
|
1766 |
+
008015/008040, loss: 0.078382, avg_loss: 0.292323
|
1767 |
+
008020/008040, loss: 0.218891, avg_loss: 0.292214
|
1768 |
+
008025/008040, loss: 0.052570, avg_loss: 0.292084
|
1769 |
+
008030/008040, loss: 0.058064, avg_loss: 0.291971
|
1770 |
+
008035/008040, loss: 0.017564, avg_loss: 0.291854
|
1771 |
+
008040/008040, loss: 0.016145, avg_loss: 0.291706
|
1772 |
+
***** Running dev evaluation *****
|
1773 |
+
Num examples = 1042
|
1774 |
+
Instantaneous batch size per device = 32
|
1775 |
+
epoch 29, step 8040/8040: {'matthews_correlation': 0.20156166898476155}
|
1776 |
+
***** Running train evaluation *****
|
1777 |
+
Num examples = 8551
|
1778 |
+
Instantaneous batch size per device = 32
|
1779 |
+
Train Dataset Result: {'matthews_correlation': 0.9609897432355321}
|
1780 |
+
***** Running dev evaluation *****
|
1781 |
+
Num examples = 1042
|
1782 |
+
Instantaneous batch size per device = 32
|
1783 |
+
Dev Dataset Result: {'matthews_correlation': 0.20156166898476155}
|
1784 |
+
Training time 0:05:46
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f28a7d74920b730a1cff525d934075a8a8be71b7ae5780f8108d8f4091784d7c
|
3 |
+
size 34299149
|
result.txt
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{'matthews_correlation': 0.0}
|
2 |
+
{'matthews_correlation': 0.0}
|
3 |
+
{'matthews_correlation': 0.0}
|
4 |
+
{'matthews_correlation': 0.0}
|
5 |
+
{'matthews_correlation': 0.15816318746785782}
|
6 |
+
{'matthews_correlation': 0.1895854925674006}
|
7 |
+
{'matthews_correlation': 0.21307686539085852}
|
8 |
+
{'matthews_correlation': 0.22254373946847703}
|
9 |
+
{'matthews_correlation': 0.22951168079779777}
|
10 |
+
{'matthews_correlation': 0.23063296136375847}
|
11 |
+
{'matthews_correlation': 0.18813850606847293}
|
12 |
+
{'matthews_correlation': 0.20603205189543294}
|
13 |
+
{'matthews_correlation': 0.2118432448298745}
|
14 |
+
{'matthews_correlation': 0.20261239362380884}
|
15 |
+
{'matthews_correlation': 0.22518881045488998}
|
16 |
+
{'matthews_correlation': 0.24863648291608131}
|
17 |
+
{'matthews_correlation': 0.19984853723708582}
|
18 |
+
{'matthews_correlation': 0.23319244596326755}
|
19 |
+
{'matthews_correlation': 0.24348660475263997}
|
20 |
+
{'matthews_correlation': 0.2545245288314363}
|
21 |
+
{'matthews_correlation': 0.20994533418798944}
|
22 |
+
{'matthews_correlation': 0.21551745055261307}
|
23 |
+
{'matthews_correlation': 0.20483291444361929}
|
24 |
+
{'matthews_correlation': 0.24931944187781385}
|
25 |
+
{'matthews_correlation': 0.23227684406858393}
|
26 |
+
{'matthews_correlation': 0.2203939727085643}
|
27 |
+
{'matthews_correlation': 0.23696373689939254}
|
28 |
+
{'matthews_correlation': 0.22894062387495076}
|
29 |
+
{'matthews_correlation': 0.23262243281540648}
|
30 |
+
{'matthews_correlation': 0.20156166898476155}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_basic_tokenize": true, "model_max_length": 512, "name_or_path": "/home.local/jianwei/workspace/archive/SparseOptimizer/output/Layer_7_12_Hid_160_768_Head_10_12_IMRatio_3.5", "never_split": null, "special_tokens_map_file": "/home.local/jianwei/.cache/huggingface/transformers/b680d52711d2451bbd6c6b1700365d6d731977c1357ae86bd7227f61145d3be2.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "tokenizer_class": "BertTokenizer"}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|