hlillemark commited on
Commit
57c941c
·
verified ·
1 Parent(s): 804f338

Training in progress, step 1614

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f14be9add4fb96e7325ea0f040e22d4f32e28647872b7fa08ccd80eae22809df
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9191bc8bd1489a0fd25041a154d25a8bd44146fa1f3ac7cf25a1939fc8c7099
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56128a68abac6983c855be7755a96191b14259d181a8d63d6301327ccb468b7
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17371b6d5a3465bc8ede4ccf126c0f6f71ff7fc66f6516f049238ae83ac09854
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c52047d6cda2c75a1b619d2af100a1e0cac4aaf3a744901ddba78b607358bf
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2752c196f0fc24d9109e9a15f2c8ca55681748e22afa72aa08266074dd60afd
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd3285af1d82189a1c7d1cfea40a5c1639f9d15d2850ca77640d53b8b55ddadb
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03ca3a0b92586d72d948078bd44ba0ca510d7336edf9af211505a7cc62d21ae
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,81 +1,194 @@
1
- {"current_steps": 10, "total_steps": 670, "loss": 1.5237, "lr": 1.4925373134328358e-06, "epoch": 0.14925373134328357, "percentage": 1.49, "elapsed_time": "0:00:35", "remaining_time": "0:39:08"}
2
- {"current_steps": 20, "total_steps": 670, "loss": 0.8967, "lr": 2.9850746268656716e-06, "epoch": 0.29850746268656714, "percentage": 2.99, "elapsed_time": "0:01:07", "remaining_time": "0:36:22"}
3
- {"current_steps": 30, "total_steps": 670, "loss": 0.7064, "lr": 4.477611940298508e-06, "epoch": 0.44776119402985076, "percentage": 4.48, "elapsed_time": "0:01:39", "remaining_time": "0:35:29"}
4
- {"current_steps": 40, "total_steps": 670, "loss": 0.7563, "lr": 5.970149253731343e-06, "epoch": 0.5970149253731343, "percentage": 5.97, "elapsed_time": "0:02:11", "remaining_time": "0:34:36"}
5
- {"current_steps": 50, "total_steps": 670, "loss": 0.7504, "lr": 7.46268656716418e-06, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:44", "remaining_time": "0:33:58"}
6
- {"current_steps": 50, "total_steps": 670, "eval_loss": 0.7051475048065186, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:45", "remaining_time": "0:34:12"}
7
- {"current_steps": 60, "total_steps": 670, "loss": 0.6893, "lr": 8.955223880597016e-06, "epoch": 0.8955223880597015, "percentage": 8.96, "elapsed_time": "0:03:17", "remaining_time": "0:33:27"}
8
- {"current_steps": 70, "total_steps": 670, "loss": 0.7673, "lr": 9.999389284703265e-06, "epoch": 1.044776119402985, "percentage": 10.45, "elapsed_time": "0:03:51", "remaining_time": "0:33:03"}
9
- {"current_steps": 80, "total_steps": 670, "loss": 0.4853, "lr": 9.988536273658876e-06, "epoch": 1.1940298507462686, "percentage": 11.94, "elapsed_time": "0:04:23", "remaining_time": "0:32:22"}
10
- {"current_steps": 90, "total_steps": 670, "loss": 0.4875, "lr": 9.964145714351633e-06, "epoch": 1.3432835820895521, "percentage": 13.43, "elapsed_time": "0:04:54", "remaining_time": "0:31:39"}
11
- {"current_steps": 100, "total_steps": 670, "loss": 0.6328, "lr": 9.926283796211796e-06, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:05:26", "remaining_time": "0:31:00"}
12
- {"current_steps": 100, "total_steps": 670, "eval_loss": 0.6948466897010803, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:05:27", "remaining_time": "0:31:06"}
13
- {"current_steps": 110, "total_steps": 670, "loss": 0.5766, "lr": 9.87505326632108e-06, "epoch": 1.6417910447761193, "percentage": 16.42, "elapsed_time": "0:06:00", "remaining_time": "0:30:37"}
14
- {"current_steps": 120, "total_steps": 670, "loss": 0.5581, "lr": 9.810593150584658e-06, "epoch": 1.7910447761194028, "percentage": 17.91, "elapsed_time": "0:06:34", "remaining_time": "0:30:09"}
15
- {"current_steps": 130, "total_steps": 670, "loss": 0.5858, "lr": 9.733078376452172e-06, "epoch": 1.9402985074626866, "percentage": 19.4, "elapsed_time": "0:07:07", "remaining_time": "0:29:37"}
16
- {"current_steps": 140, "total_steps": 670, "loss": 0.3937, "lr": 9.642719298211602e-06, "epoch": 2.08955223880597, "percentage": 20.9, "elapsed_time": "0:07:41", "remaining_time": "0:29:08"}
17
- {"current_steps": 150, "total_steps": 670, "loss": 0.2823, "lr": 9.539761126144193e-06, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:08:14", "remaining_time": "0:28:35"}
18
- {"current_steps": 150, "total_steps": 670, "eval_loss": 0.8329303860664368, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:08:15", "remaining_time": "0:28:39"}
19
- {"current_steps": 160, "total_steps": 670, "loss": 0.3042, "lr": 9.424483261089584e-06, "epoch": 2.388059701492537, "percentage": 23.88, "elapsed_time": "0:08:47", "remaining_time": "0:28:01"}
20
- {"current_steps": 170, "total_steps": 670, "loss": 0.3432, "lr": 9.297198536226927e-06, "epoch": 2.5373134328358207, "percentage": 25.37, "elapsed_time": "0:09:19", "remaining_time": "0:27:24"}
21
- {"current_steps": 180, "total_steps": 670, "loss": 0.3385, "lr": 9.158252368129628e-06, "epoch": 2.6865671641791042, "percentage": 26.87, "elapsed_time": "0:09:50", "remaining_time": "0:26:47"}
22
- {"current_steps": 190, "total_steps": 670, "loss": 0.3322, "lr": 9.008021819397488e-06, "epoch": 2.835820895522388, "percentage": 28.36, "elapsed_time": "0:10:21", "remaining_time": "0:26:09"}
23
- {"current_steps": 200, "total_steps": 670, "loss": 0.3349, "lr": 8.846914575410035e-06, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:10:54", "remaining_time": "0:25:39"}
24
- {"current_steps": 200, "total_steps": 670, "eval_loss": 0.8281504511833191, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:10:56", "remaining_time": "0:25:41"}
25
- {"current_steps": 210, "total_steps": 670, "loss": 0.2317, "lr": 8.675367837977848e-06, "epoch": 3.1343283582089554, "percentage": 31.34, "elapsed_time": "0:11:28", "remaining_time": "0:25:07"}
26
- {"current_steps": 220, "total_steps": 670, "loss": 0.219, "lr": 8.49384713889421e-06, "epoch": 3.283582089552239, "percentage": 32.84, "elapsed_time": "0:12:01", "remaining_time": "0:24:36"}
27
- {"current_steps": 230, "total_steps": 670, "loss": 0.2004, "lr": 8.302845076606786e-06, "epoch": 3.4328358208955225, "percentage": 34.33, "elapsed_time": "0:12:35", "remaining_time": "0:24:04"}
28
- {"current_steps": 240, "total_steps": 670, "loss": 0.2106, "lr": 8.10287997943769e-06, "epoch": 3.582089552238806, "percentage": 35.82, "elapsed_time": "0:13:06", "remaining_time": "0:23:28"}
29
- {"current_steps": 250, "total_steps": 670, "loss": 0.2069, "lr": 7.894494498979558e-06, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:13:38", "remaining_time": "0:22:55"}
30
- {"current_steps": 250, "total_steps": 670, "eval_loss": 0.7403361797332764, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:13:40", "remaining_time": "0:22:57"}
31
- {"current_steps": 260, "total_steps": 670, "loss": 0.2099, "lr": 7.678254137484797e-06, "epoch": 3.8805970149253732, "percentage": 38.81, "elapsed_time": "0:14:11", "remaining_time": "0:22:22"}
32
- {"current_steps": 270, "total_steps": 670, "loss": 0.1866, "lr": 7.4547457132442895e-06, "epoch": 4.029850746268656, "percentage": 40.3, "elapsed_time": "0:14:42", "remaining_time": "0:21:47"}
33
- {"current_steps": 280, "total_steps": 670, "loss": 0.0992, "lr": 7.2245757681200835e-06, "epoch": 4.17910447761194, "percentage": 41.79, "elapsed_time": "0:15:15", "remaining_time": "0:21:15"}
34
- {"current_steps": 290, "total_steps": 670, "loss": 0.1249, "lr": 6.988368921553601e-06, "epoch": 4.3283582089552235, "percentage": 43.28, "elapsed_time": "0:15:47", "remaining_time": "0:20:42"}
35
- {"current_steps": 300, "total_steps": 670, "loss": 0.0942, "lr": 6.746766175516159e-06, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:16:18", "remaining_time": "0:20:07"}
36
- {"current_steps": 300, "total_steps": 670, "eval_loss": 0.8359870910644531, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:16:19", "remaining_time": "0:20:08"}
37
- {"current_steps": 310, "total_steps": 670, "loss": 0.1187, "lr": 6.500423175001705e-06, "epoch": 4.6268656716417915, "percentage": 46.27, "elapsed_time": "0:16:53", "remaining_time": "0:19:37"}
38
- {"current_steps": 320, "total_steps": 670, "loss": 0.131, "lr": 6.2500084287822925e-06, "epoch": 4.776119402985074, "percentage": 47.76, "elapsed_time": "0:17:27", "remaining_time": "0:19:05"}
39
- {"current_steps": 330, "total_steps": 670, "loss": 0.1343, "lr": 5.996201495254757e-06, "epoch": 4.925373134328359, "percentage": 49.25, "elapsed_time": "0:17:59", "remaining_time": "0:18:32"}
40
- {"current_steps": 340, "total_steps": 670, "loss": 0.1011, "lr": 5.73969113830165e-06, "epoch": 5.074626865671641, "percentage": 50.75, "elapsed_time": "0:18:30", "remaining_time": "0:17:57"}
41
- {"current_steps": 350, "total_steps": 670, "loss": 0.0553, "lr": 5.481173458170952e-06, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:19:03", "remaining_time": "0:17:25"}
42
- {"current_steps": 350, "total_steps": 670, "eval_loss": 0.8742682933807373, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:19:04", "remaining_time": "0:17:26"}
43
- {"current_steps": 360, "total_steps": 670, "loss": 0.061, "lr": 5.221350002446882e-06, "epoch": 5.373134328358209, "percentage": 53.73, "elapsed_time": "0:19:35", "remaining_time": "0:16:52"}
44
- {"current_steps": 370, "total_steps": 670, "loss": 0.0554, "lr": 4.96092586223808e-06, "epoch": 5.522388059701493, "percentage": 55.22, "elapsed_time": "0:20:08", "remaining_time": "0:16:19"}
45
- {"current_steps": 380, "total_steps": 670, "loss": 0.0627, "lr": 4.700607758749626e-06, "epoch": 5.6716417910447765, "percentage": 56.72, "elapsed_time": "0:20:41", "remaining_time": "0:15:47"}
46
- {"current_steps": 390, "total_steps": 670, "loss": 0.0642, "lr": 4.441102125431398e-06, "epoch": 5.82089552238806, "percentage": 58.21, "elapsed_time": "0:21:13", "remaining_time": "0:15:14"}
47
- {"current_steps": 400, "total_steps": 670, "loss": 0.0774, "lr": 4.183113190907349e-06, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:21:46", "remaining_time": "0:14:41"}
48
- {"current_steps": 400, "total_steps": 670, "eval_loss": 0.834354817867279, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:21:47", "remaining_time": "0:14:42"}
49
- {"current_steps": 410, "total_steps": 670, "loss": 0.0478, "lr": 3.927341067888065e-06, "epoch": 6.119402985074627, "percentage": 61.19, "elapsed_time": "0:22:20", "remaining_time": "0:14:10"}
50
- {"current_steps": 420, "total_steps": 670, "loss": 0.0356, "lr": 3.6744798532528137e-06, "epoch": 6.268656716417911, "percentage": 62.69, "elapsed_time": "0:22:53", "remaining_time": "0:13:37"}
51
- {"current_steps": 430, "total_steps": 670, "loss": 0.0264, "lr": 3.4252157444569478e-06, "epoch": 6.417910447761194, "percentage": 64.18, "elapsed_time": "0:23:24", "remaining_time": "0:13:04"}
52
- {"current_steps": 440, "total_steps": 670, "loss": 0.039, "lr": 3.1802251773762294e-06, "epoch": 6.567164179104478, "percentage": 65.67, "elapsed_time": "0:23:57", "remaining_time": "0:12:31"}
53
- {"current_steps": 450, "total_steps": 670, "loss": 0.0352, "lr": 2.9401729906414385e-06, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:24:30", "remaining_time": "0:11:58"}
54
- {"current_steps": 450, "total_steps": 670, "eval_loss": 0.9773427844047546, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:24:31", "remaining_time": "0:11:59"}
55
- {"current_steps": 460, "total_steps": 670, "loss": 0.043, "lr": 2.7057106214448216e-06, "epoch": 6.865671641791045, "percentage": 68.66, "elapsed_time": "0:25:04", "remaining_time": "0:11:26"}
56
- {"current_steps": 470, "total_steps": 670, "loss": 0.0236, "lr": 2.4774743377144265e-06, "epoch": 7.014925373134329, "percentage": 70.15, "elapsed_time": "0:25:36", "remaining_time": "0:10:54"}
57
- {"current_steps": 480, "total_steps": 670, "loss": 0.0173, "lr": 2.256083511453747e-06, "epoch": 7.164179104477612, "percentage": 71.64, "elapsed_time": "0:26:10", "remaining_time": "0:10:21"}
58
- {"current_steps": 490, "total_steps": 670, "loss": 0.0167, "lr": 2.042138937932388e-06, "epoch": 7.313432835820896, "percentage": 73.13, "elapsed_time": "0:26:42", "remaining_time": "0:09:48"}
59
- {"current_steps": 500, "total_steps": 670, "loss": 0.0176, "lr": 1.8362212052889827e-06, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:27:15", "remaining_time": "0:09:16"}
60
- {"current_steps": 500, "total_steps": 670, "eval_loss": 0.9960648417472839, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:27:16", "remaining_time": "0:09:16"}
61
- {"current_steps": 510, "total_steps": 670, "loss": 0.0175, "lr": 1.63888911897084e-06, "epoch": 7.611940298507463, "percentage": 76.12, "elapsed_time": "0:27:49", "remaining_time": "0:08:43"}
62
- {"current_steps": 520, "total_steps": 670, "loss": 0.0171, "lr": 1.4506781852859836e-06, "epoch": 7.7611940298507465, "percentage": 77.61, "elapsed_time": "0:28:21", "remaining_time": "0:08:10"}
63
- {"current_steps": 530, "total_steps": 670, "loss": 0.0123, "lr": 1.2720991581827852e-06, "epoch": 7.91044776119403, "percentage": 79.1, "elapsed_time": "0:28:54", "remaining_time": "0:07:38"}
64
- {"current_steps": 540, "total_steps": 670, "loss": 0.0061, "lr": 1.1036366532008552e-06, "epoch": 8.059701492537313, "percentage": 80.6, "elapsed_time": "0:29:27", "remaining_time": "0:07:05"}
65
- {"current_steps": 550, "total_steps": 670, "loss": 0.0083, "lr": 9.457478323545749e-07, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:30:00", "remaining_time": "0:06:32"}
66
- {"current_steps": 550, "total_steps": 670, "eval_loss": 1.016642689704895, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:30:01", "remaining_time": "0:06:33"}
67
- {"current_steps": 560, "total_steps": 670, "loss": 0.007, "lr": 7.988611635181099e-07, "epoch": 8.35820895522388, "percentage": 83.58, "elapsed_time": "0:30:35", "remaining_time": "0:06:00"}
68
- {"current_steps": 570, "total_steps": 670, "loss": 0.0119, "lr": 6.633752576786251e-07, "epoch": 8.507462686567164, "percentage": 85.07, "elapsed_time": "0:31:06", "remaining_time": "0:05:27"}
69
- {"current_steps": 580, "total_steps": 670, "loss": 0.0107, "lr": 5.396577872130676e-07, "epoch": 8.656716417910447, "percentage": 86.57, "elapsed_time": "0:31:38", "remaining_time": "0:04:54"}
70
- {"current_steps": 590, "total_steps": 670, "loss": 0.0041, "lr": 4.2804448812404754e-07, "epoch": 8.805970149253731, "percentage": 88.06, "elapsed_time": "0:32:11", "remaining_time": "0:04:21"}
71
- {"current_steps": 600, "total_steps": 670, "loss": 0.0075, "lr": 3.288382489424502e-07, "epoch": 8.955223880597014, "percentage": 89.55, "elapsed_time": "0:32:44", "remaining_time": "0:03:49"}
72
- {"current_steps": 600, "total_steps": 670, "eval_loss": 1.043831467628479, "epoch": 8.955223880597014, "percentage": 89.55, "elapsed_time": "0:32:45", "remaining_time": "0:03:49"}
73
- {"current_steps": 610, "total_steps": 670, "loss": 0.0047, "lr": 2.4230828876927293e-07, "epoch": 9.104477611940299, "percentage": 91.04, "elapsed_time": "0:33:17", "remaining_time": "0:03:16"}
74
- {"current_steps": 620, "total_steps": 670, "loss": 0.004, "lr": 1.6868942668726408e-07, "epoch": 9.253731343283581, "percentage": 92.54, "elapsed_time": "0:33:48", "remaining_time": "0:02:43"}
75
- {"current_steps": 630, "total_steps": 670, "loss": 0.005, "lr": 1.0818144452496293e-07, "epoch": 9.402985074626866, "percentage": 94.03, "elapsed_time": "0:34:20", "remaining_time": "0:02:10"}
76
- {"current_steps": 640, "total_steps": 670, "loss": 0.003, "lr": 6.094854470245326e-08, "epoch": 9.552238805970148, "percentage": 95.52, "elapsed_time": "0:34:52", "remaining_time": "0:01:38"}
77
- {"current_steps": 650, "total_steps": 670, "loss": 0.0028, "lr": 2.711890463007405e-08, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:35:24", "remaining_time": "0:01:05"}
78
- {"current_steps": 650, "total_steps": 670, "eval_loss": 1.0667099952697754, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:35:25", "remaining_time": "0:01:05"}
79
- {"current_steps": 660, "total_steps": 670, "loss": 0.0046, "lr": 6.784328869339218e-09, "epoch": 9.850746268656717, "percentage": 98.51, "elapsed_time": "0:35:58", "remaining_time": "0:00:32"}
80
- {"current_steps": 670, "total_steps": 670, "loss": 0.0054, "lr": 0.0, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:36:30", "remaining_time": "0:00:00"}
81
- {"current_steps": 670, "total_steps": 670, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:38:11", "remaining_time": "0:00:00"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 1614, "loss": 0.8947, "lr": 6.17283950617284e-07, "epoch": 0.018570102135561744, "percentage": 0.62, "elapsed_time": "0:01:14", "remaining_time": "3:17:55"}
2
+ {"current_steps": 20, "total_steps": 1614, "loss": 0.6994, "lr": 1.234567901234568e-06, "epoch": 0.03714020427112349, "percentage": 1.24, "elapsed_time": "0:02:25", "remaining_time": "3:13:06"}
3
+ {"current_steps": 30, "total_steps": 1614, "loss": 0.624, "lr": 1.8518518518518519e-06, "epoch": 0.055710306406685235, "percentage": 1.86, "elapsed_time": "0:03:36", "remaining_time": "3:10:13"}
4
+ {"current_steps": 40, "total_steps": 1614, "loss": 0.4977, "lr": 2.469135802469136e-06, "epoch": 0.07428040854224698, "percentage": 2.48, "elapsed_time": "0:04:48", "remaining_time": "3:09:00"}
5
+ {"current_steps": 50, "total_steps": 1614, "loss": 0.4639, "lr": 3.08641975308642e-06, "epoch": 0.09285051067780872, "percentage": 3.1, "elapsed_time": "0:05:59", "remaining_time": "3:07:28"}
6
+ {"current_steps": 50, "total_steps": 1614, "eval_loss": 0.5398163199424744, "epoch": 0.09285051067780872, "percentage": 3.1, "elapsed_time": "0:06:19", "remaining_time": "3:17:36"}
7
+ {"current_steps": 60, "total_steps": 1614, "loss": 0.4468, "lr": 3.7037037037037037e-06, "epoch": 0.11142061281337047, "percentage": 3.72, "elapsed_time": "0:07:30", "remaining_time": "3:14:23"}
8
+ {"current_steps": 70, "total_steps": 1614, "loss": 0.3832, "lr": 4.3209876543209875e-06, "epoch": 0.12999071494893222, "percentage": 4.34, "elapsed_time": "0:08:41", "remaining_time": "3:11:48"}
9
+ {"current_steps": 80, "total_steps": 1614, "loss": 0.4276, "lr": 4.938271604938272e-06, "epoch": 0.14856081708449395, "percentage": 4.96, "elapsed_time": "0:09:53", "remaining_time": "3:09:49"}
10
+ {"current_steps": 90, "total_steps": 1614, "loss": 0.5276, "lr": 5.555555555555557e-06, "epoch": 0.1671309192200557, "percentage": 5.58, "elapsed_time": "0:11:06", "remaining_time": "3:07:58"}
11
+ {"current_steps": 100, "total_steps": 1614, "loss": 0.4939, "lr": 6.17283950617284e-06, "epoch": 0.18570102135561745, "percentage": 6.2, "elapsed_time": "0:12:16", "remaining_time": "3:05:54"}
12
+ {"current_steps": 100, "total_steps": 1614, "eval_loss": 0.5121593475341797, "epoch": 0.18570102135561745, "percentage": 6.2, "elapsed_time": "0:12:36", "remaining_time": "3:10:48"}
13
+ {"current_steps": 110, "total_steps": 1614, "loss": 0.4866, "lr": 6.790123456790124e-06, "epoch": 0.2042711234911792, "percentage": 6.82, "elapsed_time": "0:13:48", "remaining_time": "3:08:42"}
14
+ {"current_steps": 120, "total_steps": 1614, "loss": 0.4282, "lr": 7.4074074074074075e-06, "epoch": 0.22284122562674094, "percentage": 7.43, "elapsed_time": "0:14:59", "remaining_time": "3:06:36"}
15
+ {"current_steps": 130, "total_steps": 1614, "loss": 0.4918, "lr": 8.024691358024692e-06, "epoch": 0.2414113277623027, "percentage": 8.05, "elapsed_time": "0:16:11", "remaining_time": "3:04:55"}
16
+ {"current_steps": 140, "total_steps": 1614, "loss": 0.4841, "lr": 8.641975308641975e-06, "epoch": 0.25998142989786444, "percentage": 8.67, "elapsed_time": "0:17:22", "remaining_time": "3:03:01"}
17
+ {"current_steps": 150, "total_steps": 1614, "loss": 0.4822, "lr": 9.25925925925926e-06, "epoch": 0.2785515320334262, "percentage": 9.29, "elapsed_time": "0:18:33", "remaining_time": "3:01:11"}
18
+ {"current_steps": 150, "total_steps": 1614, "eval_loss": 0.5242471694946289, "epoch": 0.2785515320334262, "percentage": 9.29, "elapsed_time": "0:18:53", "remaining_time": "3:04:21"}
19
+ {"current_steps": 160, "total_steps": 1614, "loss": 0.4886, "lr": 9.876543209876543e-06, "epoch": 0.2971216341689879, "percentage": 9.91, "elapsed_time": "0:20:04", "remaining_time": "3:02:25"}
20
+ {"current_steps": 170, "total_steps": 1614, "loss": 0.4529, "lr": 9.99925101063302e-06, "epoch": 0.31569173630454966, "percentage": 10.53, "elapsed_time": "0:21:16", "remaining_time": "3:00:40"}
21
+ {"current_steps": 180, "total_steps": 1614, "loss": 0.4463, "lr": 9.99620862590714e-06, "epoch": 0.3342618384401114, "percentage": 11.15, "elapsed_time": "0:22:28", "remaining_time": "2:59:02"}
22
+ {"current_steps": 190, "total_steps": 1614, "loss": 0.4885, "lr": 9.990827457067342e-06, "epoch": 0.3528319405756732, "percentage": 11.77, "elapsed_time": "0:23:39", "remaining_time": "2:57:20"}
23
+ {"current_steps": 200, "total_steps": 1614, "loss": 0.4701, "lr": 9.983110023102148e-06, "epoch": 0.3714020427112349, "percentage": 12.39, "elapsed_time": "0:24:49", "remaining_time": "2:55:33"}
24
+ {"current_steps": 200, "total_steps": 1614, "eval_loss": 0.5521253943443298, "epoch": 0.3714020427112349, "percentage": 12.39, "elapsed_time": "0:25:09", "remaining_time": "2:57:50"}
25
+ {"current_steps": 210, "total_steps": 1614, "loss": 0.4983, "lr": 9.973059936633308e-06, "epoch": 0.38997214484679665, "percentage": 13.01, "elapsed_time": "0:26:20", "remaining_time": "2:56:07"}
26
+ {"current_steps": 220, "total_steps": 1614, "loss": 0.4983, "lr": 9.960681902224692e-06, "epoch": 0.4085422469823584, "percentage": 13.63, "elapsed_time": "0:27:30", "remaining_time": "2:54:17"}
27
+ {"current_steps": 230, "total_steps": 1614, "loss": 0.493, "lr": 9.945981714180021e-06, "epoch": 0.4271123491179202, "percentage": 14.25, "elapsed_time": "0:28:42", "remaining_time": "2:52:45"}
28
+ {"current_steps": 240, "total_steps": 1614, "loss": 0.4753, "lr": 9.928966253830492e-06, "epoch": 0.4456824512534819, "percentage": 14.87, "elapsed_time": "0:29:53", "remaining_time": "2:51:10"}
29
+ {"current_steps": 250, "total_steps": 1614, "loss": 0.4216, "lr": 9.909643486313533e-06, "epoch": 0.46425255338904364, "percentage": 15.49, "elapsed_time": "0:31:05", "remaining_time": "2:49:40"}
30
+ {"current_steps": 250, "total_steps": 1614, "eval_loss": 0.5373674631118774, "epoch": 0.46425255338904364, "percentage": 15.49, "elapsed_time": "0:31:25", "remaining_time": "2:51:26"}
31
+ {"current_steps": 260, "total_steps": 1614, "loss": 0.4874, "lr": 9.888022456844251e-06, "epoch": 0.4828226555246054, "percentage": 16.11, "elapsed_time": "0:32:36", "remaining_time": "2:49:48"}
32
+ {"current_steps": 270, "total_steps": 1614, "loss": 0.4499, "lr": 9.864113286481237e-06, "epoch": 0.5013927576601671, "percentage": 16.73, "elapsed_time": "0:33:46", "remaining_time": "2:48:08"}
33
+ {"current_steps": 280, "total_steps": 1614, "loss": 0.4604, "lr": 9.837927167388793e-06, "epoch": 0.5199628597957289, "percentage": 17.35, "elapsed_time": "0:34:58", "remaining_time": "2:46:38"}
34
+ {"current_steps": 290, "total_steps": 1614, "loss": 0.4697, "lr": 9.809476357597738e-06, "epoch": 0.5385329619312906, "percentage": 17.97, "elapsed_time": "0:36:10", "remaining_time": "2:45:09"}
35
+ {"current_steps": 300, "total_steps": 1614, "loss": 0.4159, "lr": 9.778774175267294e-06, "epoch": 0.5571030640668524, "percentage": 18.59, "elapsed_time": "0:37:21", "remaining_time": "2:43:38"}
36
+ {"current_steps": 300, "total_steps": 1614, "eval_loss": 0.5146207213401794, "epoch": 0.5571030640668524, "percentage": 18.59, "elapsed_time": "0:37:41", "remaining_time": "2:45:03"}
37
+ {"current_steps": 310, "total_steps": 1614, "loss": 0.487, "lr": 9.745834992450688e-06, "epoch": 0.5756731662024142, "percentage": 19.21, "elapsed_time": "0:38:52", "remaining_time": "2:43:31"}
38
+ {"current_steps": 320, "total_steps": 1614, "loss": 0.4751, "lr": 9.710674228367422e-06, "epoch": 0.5942432683379758, "percentage": 19.83, "elapsed_time": "0:40:03", "remaining_time": "2:42:00"}
39
+ {"current_steps": 330, "total_steps": 1614, "loss": 0.4289, "lr": 9.673308342185366e-06, "epoch": 0.6128133704735376, "percentage": 20.45, "elapsed_time": "0:41:15", "remaining_time": "2:40:31"}
40
+ {"current_steps": 340, "total_steps": 1614, "loss": 0.4783, "lr": 9.633754825316015e-06, "epoch": 0.6313834726090993, "percentage": 21.07, "elapsed_time": "0:42:27", "remaining_time": "2:39:06"}
41
+ {"current_steps": 350, "total_steps": 1614, "loss": 0.4502, "lr": 9.592032193226564e-06, "epoch": 0.6499535747446611, "percentage": 21.69, "elapsed_time": "0:43:39", "remaining_time": "2:37:41"}
42
+ {"current_steps": 350, "total_steps": 1614, "eval_loss": 0.5022189617156982, "epoch": 0.6499535747446611, "percentage": 21.69, "elapsed_time": "0:43:59", "remaining_time": "2:38:51"}
43
+ {"current_steps": 360, "total_steps": 1614, "loss": 0.4341, "lr": 9.548159976772593e-06, "epoch": 0.6685236768802229, "percentage": 22.3, "elapsed_time": "0:45:11", "remaining_time": "2:37:23"}
44
+ {"current_steps": 370, "total_steps": 1614, "loss": 0.4612, "lr": 9.502158713055444e-06, "epoch": 0.6870937790157846, "percentage": 22.92, "elapsed_time": "0:46:22", "remaining_time": "2:35:55"}
45
+ {"current_steps": 380, "total_steps": 1614, "loss": 0.4352, "lr": 9.454049935808568e-06, "epoch": 0.7056638811513464, "percentage": 23.54, "elapsed_time": "0:47:34", "remaining_time": "2:34:29"}
46
+ {"current_steps": 390, "total_steps": 1614, "loss": 0.3996, "lr": 9.403856165317322e-06, "epoch": 0.724233983286908, "percentage": 24.16, "elapsed_time": "0:48:45", "remaining_time": "2:33:00"}
47
+ {"current_steps": 400, "total_steps": 1614, "loss": 0.4625, "lr": 9.351600897876964e-06, "epoch": 0.7428040854224698, "percentage": 24.78, "elapsed_time": "0:49:56", "remaining_time": "2:31:35"}
48
+ {"current_steps": 400, "total_steps": 1614, "eval_loss": 0.49847906827926636, "epoch": 0.7428040854224698, "percentage": 24.78, "elapsed_time": "0:50:16", "remaining_time": "2:32:34"}
49
+ {"current_steps": 410, "total_steps": 1614, "loss": 0.3962, "lr": 9.297308594793757e-06, "epoch": 0.7613741875580315, "percentage": 25.4, "elapsed_time": "0:51:27", "remaining_time": "2:31:06"}
50
+ {"current_steps": 420, "total_steps": 1614, "loss": 0.458, "lr": 9.241004670934348e-06, "epoch": 0.7799442896935933, "percentage": 26.02, "elapsed_time": "0:52:40", "remaining_time": "2:29:43"}
51
+ {"current_steps": 430, "total_steps": 1614, "loss": 0.3995, "lr": 9.182715482828764e-06, "epoch": 0.7985143918291551, "percentage": 26.64, "elapsed_time": "0:53:50", "remaining_time": "2:28:15"}
52
+ {"current_steps": 440, "total_steps": 1614, "loss": 0.5064, "lr": 9.122468316332611e-06, "epoch": 0.8170844939647168, "percentage": 27.26, "elapsed_time": "0:55:01", "remaining_time": "2:26:48"}
53
+ {"current_steps": 450, "total_steps": 1614, "loss": 0.4313, "lr": 9.060291373854252e-06, "epoch": 0.8356545961002786, "percentage": 27.88, "elapsed_time": "0:56:13", "remaining_time": "2:25:26"}
54
+ {"current_steps": 450, "total_steps": 1614, "eval_loss": 0.47159674763679504, "epoch": 0.8356545961002786, "percentage": 27.88, "elapsed_time": "0:56:32", "remaining_time": "2:26:16"}
55
+ {"current_steps": 460, "total_steps": 1614, "loss": 0.3925, "lr": 8.99621376115291e-06, "epoch": 0.8542246982358404, "percentage": 28.5, "elapsed_time": "0:57:45", "remaining_time": "2:24:53"}
56
+ {"current_steps": 470, "total_steps": 1614, "loss": 0.4499, "lr": 8.930265473713939e-06, "epoch": 0.872794800371402, "percentage": 29.12, "elapsed_time": "0:58:56", "remaining_time": "2:23:28"}
57
+ {"current_steps": 480, "total_steps": 1614, "loss": 0.3699, "lr": 8.862477382707569e-06, "epoch": 0.8913649025069638, "percentage": 29.74, "elapsed_time": "1:00:07", "remaining_time": "2:22:03"}
58
+ {"current_steps": 490, "total_steps": 1614, "loss": 0.4436, "lr": 8.792881220537752e-06, "epoch": 0.9099350046425255, "percentage": 30.36, "elapsed_time": "1:01:19", "remaining_time": "2:20:40"}
59
+ {"current_steps": 500, "total_steps": 1614, "loss": 0.4472, "lr": 8.721509565987858e-06, "epoch": 0.9285051067780873, "percentage": 30.98, "elapsed_time": "1:02:30", "remaining_time": "2:19:16"}
60
+ {"current_steps": 500, "total_steps": 1614, "eval_loss": 0.47707709670066833, "epoch": 0.9285051067780873, "percentage": 30.98, "elapsed_time": "1:02:49", "remaining_time": "2:19:59"}
61
+ {"current_steps": 510, "total_steps": 1614, "loss": 0.4165, "lr": 8.64839582897015e-06, "epoch": 0.947075208913649, "percentage": 31.6, "elapsed_time": "1:04:00", "remaining_time": "2:18:34"}
62
+ {"current_steps": 520, "total_steps": 1614, "loss": 0.4319, "lr": 8.573574234886217e-06, "epoch": 0.9656453110492108, "percentage": 32.22, "elapsed_time": "1:05:10", "remaining_time": "2:17:08"}
63
+ {"current_steps": 530, "total_steps": 1614, "loss": 0.4254, "lr": 8.497079808605659e-06, "epoch": 0.9842154131847726, "percentage": 32.84, "elapsed_time": "1:06:21", "remaining_time": "2:15:44"}
64
+ {"current_steps": 540, "total_steps": 1614, "loss": 0.4035, "lr": 8.418948358070535e-06, "epoch": 1.0018570102135562, "percentage": 33.46, "elapsed_time": "1:07:29", "remaining_time": "2:14:13"}
65
+ {"current_steps": 550, "total_steps": 1614, "loss": 0.2753, "lr": 8.339216457533244e-06, "epoch": 1.020427112349118, "percentage": 34.08, "elapsed_time": "1:08:40", "remaining_time": "2:12:50"}
66
+ {"current_steps": 550, "total_steps": 1614, "eval_loss": 0.5026086568832397, "epoch": 1.020427112349118, "percentage": 34.08, "elapsed_time": "1:08:59", "remaining_time": "2:13:28"}
67
+ {"current_steps": 560, "total_steps": 1614, "loss": 0.2879, "lr": 8.257921430435678e-06, "epoch": 1.0389972144846797, "percentage": 34.7, "elapsed_time": "1:10:11", "remaining_time": "2:12:06"}
68
+ {"current_steps": 570, "total_steps": 1614, "loss": 0.2567, "lr": 8.175101331937692e-06, "epoch": 1.0575673166202415, "percentage": 35.32, "elapsed_time": "1:11:23", "remaining_time": "2:10:46"}
69
+ {"current_steps": 580, "total_steps": 1614, "loss": 0.2597, "lr": 8.090794931103026e-06, "epoch": 1.076137418755803, "percentage": 35.94, "elapsed_time": "1:12:35", "remaining_time": "2:09:25"}
70
+ {"current_steps": 590, "total_steps": 1614, "loss": 0.2831, "lr": 8.005041692751055e-06, "epoch": 1.0947075208913648, "percentage": 36.56, "elapsed_time": "1:13:48", "remaining_time": "2:08:05"}
71
+ {"current_steps": 600, "total_steps": 1614, "loss": 0.2877, "lr": 7.917881758982838e-06, "epoch": 1.1132776230269266, "percentage": 37.17, "elapsed_time": "1:14:59", "remaining_time": "2:06:43"}
72
+ {"current_steps": 600, "total_steps": 1614, "eval_loss": 0.4783521890640259, "epoch": 1.1132776230269266, "percentage": 37.17, "elapsed_time": "1:15:18", "remaining_time": "2:07:16"}
73
+ {"current_steps": 610, "total_steps": 1614, "loss": 0.2965, "lr": 7.829355930390126e-06, "epoch": 1.1318477251624883, "percentage": 37.79, "elapsed_time": "1:16:30", "remaining_time": "2:05:56"}
74
+ {"current_steps": 620, "total_steps": 1614, "loss": 0.2889, "lr": 7.739505646956136e-06, "epoch": 1.15041782729805, "percentage": 38.41, "elapsed_time": "1:17:43", "remaining_time": "2:04:36"}
75
+ {"current_steps": 630, "total_steps": 1614, "loss": 0.2891, "lr": 7.648372968656995e-06, "epoch": 1.1689879294336118, "percentage": 39.03, "elapsed_time": "1:18:54", "remaining_time": "2:03:14"}
76
+ {"current_steps": 640, "total_steps": 1614, "loss": 0.2774, "lr": 7.5560005557729664e-06, "epoch": 1.1875580315691736, "percentage": 39.65, "elapsed_time": "1:20:06", "remaining_time": "2:01:54"}
77
+ {"current_steps": 650, "total_steps": 1614, "loss": 0.3038, "lr": 7.462431648918689e-06, "epoch": 1.2061281337047354, "percentage": 40.27, "elapsed_time": "1:21:17", "remaining_time": "2:00:33"}
78
+ {"current_steps": 650, "total_steps": 1614, "eval_loss": 0.4794943630695343, "epoch": 1.2061281337047354, "percentage": 40.27, "elapsed_time": "1:21:36", "remaining_time": "2:01:02"}
79
+ {"current_steps": 660, "total_steps": 1614, "loss": 0.292, "lr": 7.367710048801715e-06, "epoch": 1.2246982358402971, "percentage": 40.89, "elapsed_time": "1:22:48", "remaining_time": "1:59:41"}
80
+ {"current_steps": 670, "total_steps": 1614, "loss": 0.2753, "lr": 7.271880095718895e-06, "epoch": 1.243268337975859, "percentage": 41.51, "elapsed_time": "1:24:00", "remaining_time": "1:58:21"}
81
+ {"current_steps": 680, "total_steps": 1614, "loss": 0.2802, "lr": 7.1749866488001604e-06, "epoch": 1.2618384401114207, "percentage": 42.13, "elapsed_time": "1:25:11", "remaining_time": "1:57:00"}
82
+ {"current_steps": 690, "total_steps": 1614, "loss": 0.284, "lr": 7.0770750650094335e-06, "epoch": 1.2804085422469824, "percentage": 42.75, "elapsed_time": "1:26:22", "remaining_time": "1:55:40"}
83
+ {"current_steps": 700, "total_steps": 1614, "loss": 0.2944, "lr": 6.978191177912499e-06, "epoch": 1.2989786443825442, "percentage": 43.37, "elapsed_time": "1:27:34", "remaining_time": "1:54:20"}
84
+ {"current_steps": 700, "total_steps": 1614, "eval_loss": 0.4681582748889923, "epoch": 1.2989786443825442, "percentage": 43.37, "elapsed_time": "1:27:53", "remaining_time": "1:54:46"}
85
+ {"current_steps": 710, "total_steps": 1614, "loss": 0.2786, "lr": 6.878381276221777e-06, "epoch": 1.317548746518106, "percentage": 43.99, "elapsed_time": "1:29:05", "remaining_time": "1:53:26"}
86
+ {"current_steps": 720, "total_steps": 1614, "loss": 0.2748, "lr": 6.777692082128024e-06, "epoch": 1.3361188486536677, "percentage": 44.61, "elapsed_time": "1:30:17", "remaining_time": "1:52:06"}
87
+ {"current_steps": 730, "total_steps": 1614, "loss": 0.2635, "lr": 6.676170729429132e-06, "epoch": 1.3546889507892295, "percentage": 45.23, "elapsed_time": "1:31:27", "remaining_time": "1:50:44"}
88
+ {"current_steps": 740, "total_steps": 1614, "loss": 0.2978, "lr": 6.573864741466236e-06, "epoch": 1.3732590529247912, "percentage": 45.85, "elapsed_time": "1:32:38", "remaining_time": "1:49:24"}
89
+ {"current_steps": 750, "total_steps": 1614, "loss": 0.2722, "lr": 6.470822008877482e-06, "epoch": 1.3918291550603528, "percentage": 46.47, "elapsed_time": "1:33:50", "remaining_time": "1:48:06"}
90
+ {"current_steps": 750, "total_steps": 1614, "eval_loss": 0.4681358337402344, "epoch": 1.3918291550603528, "percentage": 46.47, "elapsed_time": "1:34:09", "remaining_time": "1:48:28"}
91
+ {"current_steps": 760, "total_steps": 1614, "loss": 0.2691, "lr": 6.367090767179855e-06, "epoch": 1.4103992571959145, "percentage": 47.09, "elapsed_time": "1:35:20", "remaining_time": "1:47:07"}
92
+ {"current_steps": 770, "total_steps": 1614, "loss": 0.2974, "lr": 6.262719574189564e-06, "epoch": 1.4289693593314763, "percentage": 47.71, "elapsed_time": "1:36:31", "remaining_time": "1:45:48"}
93
+ {"current_steps": 780, "total_steps": 1614, "loss": 0.272, "lr": 6.157757287291557e-06, "epoch": 1.447539461467038, "percentage": 48.33, "elapsed_time": "1:37:43", "remaining_time": "1:44:29"}
94
+ {"current_steps": 790, "total_steps": 1614, "loss": 0.2924, "lr": 6.052253040568804e-06, "epoch": 1.4661095636025998, "percentage": 48.95, "elapsed_time": "1:38:55", "remaining_time": "1:43:11"}
95
+ {"current_steps": 800, "total_steps": 1614, "loss": 0.2734, "lr": 5.946256221802052e-06, "epoch": 1.4846796657381616, "percentage": 49.57, "elapsed_time": "1:40:07", "remaining_time": "1:41:52"}
96
+ {"current_steps": 800, "total_steps": 1614, "eval_loss": 0.4480016827583313, "epoch": 1.4846796657381616, "percentage": 49.57, "elapsed_time": "1:40:26", "remaining_time": "1:42:12"}
97
+ {"current_steps": 810, "total_steps": 1614, "loss": 0.2277, "lr": 5.839816449350824e-06, "epoch": 1.5032497678737233, "percentage": 50.19, "elapsed_time": "1:41:38", "remaining_time": "1:40:53"}
98
+ {"current_steps": 820, "total_steps": 1614, "loss": 0.296, "lr": 5.7329835489264855e-06, "epoch": 1.521819870009285, "percentage": 50.81, "elapsed_time": "1:42:51", "remaining_time": "1:39:35"}
99
+ {"current_steps": 830, "total_steps": 1614, "loss": 0.2521, "lr": 5.62580753026823e-06, "epoch": 1.5403899721448466, "percentage": 51.43, "elapsed_time": "1:44:02", "remaining_time": "1:38:16"}
100
+ {"current_steps": 840, "total_steps": 1614, "loss": 0.2523, "lr": 5.518338563732945e-06, "epoch": 1.5589600742804084, "percentage": 52.04, "elapsed_time": "1:45:14", "remaining_time": "1:36:58"}
101
+ {"current_steps": 850, "total_steps": 1614, "loss": 0.2826, "lr": 5.410626956809864e-06, "epoch": 1.5775301764159702, "percentage": 52.66, "elapsed_time": "1:46:26", "remaining_time": "1:35:40"}
102
+ {"current_steps": 850, "total_steps": 1614, "eval_loss": 0.44835272431373596, "epoch": 1.5775301764159702, "percentage": 52.66, "elapsed_time": "1:46:45", "remaining_time": "1:35:57"}
103
+ {"current_steps": 860, "total_steps": 1614, "loss": 0.2436, "lr": 5.30272313057105e-06, "epoch": 1.596100278551532, "percentage": 53.28, "elapsed_time": "1:47:56", "remaining_time": "1:34:38"}
104
+ {"current_steps": 870, "total_steps": 1614, "loss": 0.2633, "lr": 5.194677596068689e-06, "epoch": 1.6146703806870937, "percentage": 53.9, "elapsed_time": "1:49:07", "remaining_time": "1:33:19"}
105
+ {"current_steps": 880, "total_steps": 1614, "loss": 0.2525, "lr": 5.0865409306902755e-06, "epoch": 1.6332404828226554, "percentage": 54.52, "elapsed_time": "1:50:20", "remaining_time": "1:32:01"}
106
+ {"current_steps": 890, "total_steps": 1614, "loss": 0.2776, "lr": 4.978363754482741e-06, "epoch": 1.6518105849582172, "percentage": 55.14, "elapsed_time": "1:51:31", "remaining_time": "1:30:43"}
107
+ {"current_steps": 900, "total_steps": 1614, "loss": 0.2344, "lr": 4.870196706456609e-06, "epoch": 1.670380687093779, "percentage": 55.76, "elapsed_time": "1:52:42", "remaining_time": "1:29:24"}
108
+ {"current_steps": 900, "total_steps": 1614, "eval_loss": 0.43884018063545227, "epoch": 1.670380687093779, "percentage": 55.76, "elapsed_time": "1:53:01", "remaining_time": "1:29:40"}
109
+ {"current_steps": 910, "total_steps": 1614, "loss": 0.247, "lr": 4.762090420881289e-06, "epoch": 1.6889507892293407, "percentage": 56.38, "elapsed_time": "1:54:12", "remaining_time": "1:28:21"}
110
+ {"current_steps": 920, "total_steps": 1614, "loss": 0.2864, "lr": 4.654095503582568e-06, "epoch": 1.7075208913649025, "percentage": 57.0, "elapsed_time": "1:55:23", "remaining_time": "1:27:02"}
111
+ {"current_steps": 930, "total_steps": 1614, "loss": 0.248, "lr": 4.546262508253429e-06, "epoch": 1.7260909935004642, "percentage": 57.62, "elapsed_time": "1:56:35", "remaining_time": "1:25:44"}
112
+ {"current_steps": 940, "total_steps": 1614, "loss": 0.265, "lr": 4.438641912789277e-06, "epoch": 1.744661095636026, "percentage": 58.24, "elapsed_time": "1:57:46", "remaining_time": "1:24:26"}
113
+ {"current_steps": 950, "total_steps": 1614, "loss": 0.2437, "lr": 4.331284095658637e-06, "epoch": 1.7632311977715878, "percentage": 58.86, "elapsed_time": "1:58:58", "remaining_time": "1:23:09"}
114
+ {"current_steps": 950, "total_steps": 1614, "eval_loss": 0.42715513706207275, "epoch": 1.7632311977715878, "percentage": 58.86, "elapsed_time": "1:59:17", "remaining_time": "1:23:22"}
115
+ {"current_steps": 960, "total_steps": 1614, "loss": 0.2493, "lr": 4.224239312320399e-06, "epoch": 1.7818012999071495, "percentage": 59.48, "elapsed_time": "2:00:29", "remaining_time": "1:22:04"}
116
+ {"current_steps": 970, "total_steps": 1614, "loss": 0.2371, "lr": 4.117557671698648e-06, "epoch": 1.8003714020427113, "percentage": 60.1, "elapsed_time": "2:01:40", "remaining_time": "1:20:46"}
117
+ {"current_steps": 980, "total_steps": 1614, "loss": 0.2605, "lr": 4.011289112726085e-06, "epoch": 1.818941504178273, "percentage": 60.72, "elapsed_time": "2:02:51", "remaining_time": "1:19:29"}
118
+ {"current_steps": 990, "total_steps": 1614, "loss": 0.2457, "lr": 3.905483380967027e-06, "epoch": 1.8375116063138348, "percentage": 61.34, "elapsed_time": "2:04:02", "remaining_time": "1:18:10"}
119
+ {"current_steps": 1000, "total_steps": 1614, "loss": 0.2113, "lr": 3.800190005330918e-06, "epoch": 1.8560817084493966, "percentage": 61.96, "elapsed_time": "2:05:13", "remaining_time": "1:16:53"}
120
+ {"current_steps": 1000, "total_steps": 1614, "eval_loss": 0.4232879877090454, "epoch": 1.8560817084493966, "percentage": 61.96, "elapsed_time": "2:05:33", "remaining_time": "1:17:05"}
121
+ {"current_steps": 1010, "total_steps": 1614, "loss": 0.2709, "lr": 3.695458274887268e-06, "epoch": 1.8746518105849583, "percentage": 62.58, "elapsed_time": "2:06:45", "remaining_time": "1:15:48"}
122
+ {"current_steps": 1020, "total_steps": 1614, "loss": 0.2678, "lr": 3.5913372157928515e-06, "epoch": 1.89322191272052, "percentage": 63.2, "elapsed_time": "2:07:56", "remaining_time": "1:14:30"}
123
+ {"current_steps": 1030, "total_steps": 1614, "loss": 0.2439, "lr": 3.487875568341995e-06, "epoch": 1.9117920148560819, "percentage": 63.82, "elapsed_time": "2:09:06", "remaining_time": "1:13:12"}
124
+ {"current_steps": 1040, "total_steps": 1614, "loss": 0.2691, "lr": 3.3851217641506657e-06, "epoch": 1.9303621169916436, "percentage": 64.44, "elapsed_time": "2:10:18", "remaining_time": "1:11:55"}
125
+ {"current_steps": 1050, "total_steps": 1614, "loss": 0.2548, "lr": 3.2831239034850593e-06, "epoch": 1.9489322191272052, "percentage": 65.06, "elapsed_time": "2:11:29", "remaining_time": "1:10:38"}
126
+ {"current_steps": 1050, "total_steps": 1614, "eval_loss": 0.4117203652858734, "epoch": 1.9489322191272052, "percentage": 65.06, "elapsed_time": "2:11:49", "remaining_time": "1:10:48"}
127
+ {"current_steps": 1060, "total_steps": 1614, "loss": 0.2381, "lr": 3.1819297327453045e-06, "epoch": 1.967502321262767, "percentage": 65.68, "elapsed_time": "2:13:02", "remaining_time": "1:09:31"}
128
+ {"current_steps": 1070, "total_steps": 1614, "loss": 0.219, "lr": 3.081586622114809e-06, "epoch": 1.9860724233983287, "percentage": 66.29, "elapsed_time": "2:14:13", "remaining_time": "1:08:14"}
129
+ {"current_steps": 1080, "total_steps": 1614, "loss": 0.245, "lr": 2.9821415433857174e-06, "epoch": 2.0037140204271124, "percentage": 66.91, "elapsed_time": "2:15:21", "remaining_time": "1:06:55"}
130
+ {"current_steps": 1090, "total_steps": 1614, "loss": 0.1082, "lr": 2.8836410479708625e-06, "epoch": 2.022284122562674, "percentage": 67.53, "elapsed_time": "2:16:33", "remaining_time": "1:05:38"}
131
+ {"current_steps": 1100, "total_steps": 1614, "loss": 0.1126, "lr": 2.786131245112495e-06, "epoch": 2.040854224698236, "percentage": 68.15, "elapsed_time": "2:17:45", "remaining_time": "1:04:22"}
132
+ {"current_steps": 1100, "total_steps": 1614, "eval_loss": 0.5031464695930481, "epoch": 2.040854224698236, "percentage": 68.15, "elapsed_time": "2:18:04", "remaining_time": "1:04:31"}
133
+ {"current_steps": 1110, "total_steps": 1614, "loss": 0.1195, "lr": 2.689657780298019e-06, "epoch": 2.0594243268337977, "percentage": 68.77, "elapsed_time": "2:19:17", "remaining_time": "1:03:14"}
134
+ {"current_steps": 1120, "total_steps": 1614, "loss": 0.1006, "lr": 2.5942658138927866e-06, "epoch": 2.0779944289693595, "percentage": 69.39, "elapsed_time": "2:20:28", "remaining_time": "1:01:57"}
135
+ {"current_steps": 1130, "total_steps": 1614, "loss": 0.1069, "lr": 2.5000000000000015e-06, "epoch": 2.0965645311049212, "percentage": 70.01, "elapsed_time": "2:21:39", "remaining_time": "1:00:40"}
136
+ {"current_steps": 1140, "total_steps": 1614, "loss": 0.0936, "lr": 2.406904465557614e-06, "epoch": 2.115134633240483, "percentage": 70.63, "elapsed_time": "2:22:51", "remaining_time": "0:59:24"}
137
+ {"current_steps": 1150, "total_steps": 1614, "loss": 0.1128, "lr": 2.3150227896819782e-06, "epoch": 2.1337047353760448, "percentage": 71.25, "elapsed_time": "2:24:02", "remaining_time": "0:58:06"}
138
+ {"current_steps": 1150, "total_steps": 1614, "eval_loss": 0.48206087946891785, "epoch": 2.1337047353760448, "percentage": 71.25, "elapsed_time": "2:24:21", "remaining_time": "0:58:14"}
139
+ {"current_steps": 1160, "total_steps": 1614, "loss": 0.1172, "lr": 2.2243979832679515e-06, "epoch": 2.152274837511606, "percentage": 71.87, "elapsed_time": "2:25:34", "remaining_time": "0:56:58"}
140
+ {"current_steps": 1170, "total_steps": 1614, "loss": 0.1098, "lr": 2.1350724688549906e-06, "epoch": 2.170844939647168, "percentage": 72.49, "elapsed_time": "2:26:45", "remaining_time": "0:55:41"}
141
+ {"current_steps": 1180, "total_steps": 1614, "loss": 0.0953, "lr": 2.0470880607686605e-06, "epoch": 2.1894150417827296, "percentage": 73.11, "elapsed_time": "2:27:57", "remaining_time": "0:54:25"}
142
+ {"current_steps": 1190, "total_steps": 1614, "loss": 0.1081, "lr": 1.9604859455468587e-06, "epoch": 2.2079851439182914, "percentage": 73.73, "elapsed_time": "2:29:10", "remaining_time": "0:53:08"}
143
+ {"current_steps": 1200, "total_steps": 1614, "loss": 0.0993, "lr": 1.8753066626599086e-06, "epoch": 2.226555246053853, "percentage": 74.35, "elapsed_time": "2:30:21", "remaining_time": "0:51:52"}
144
+ {"current_steps": 1200, "total_steps": 1614, "eval_loss": 0.49974343180656433, "epoch": 2.226555246053853, "percentage": 74.35, "elapsed_time": "2:30:40", "remaining_time": "0:51:59"}
145
+ {"current_steps": 1210, "total_steps": 1614, "loss": 0.1187, "lr": 1.7915900855335506e-06, "epoch": 2.245125348189415, "percentage": 74.97, "elapsed_time": "2:31:51", "remaining_time": "0:50:42"}
146
+ {"current_steps": 1220, "total_steps": 1614, "loss": 0.1, "lr": 1.7093754028837345e-06, "epoch": 2.2636954503249767, "percentage": 75.59, "elapsed_time": "2:33:03", "remaining_time": "0:49:25"}
147
+ {"current_steps": 1230, "total_steps": 1614, "loss": 0.1044, "lr": 1.6287011003719105e-06, "epoch": 2.2822655524605384, "percentage": 76.21, "elapsed_time": "2:34:15", "remaining_time": "0:48:09"}
148
+ {"current_steps": 1240, "total_steps": 1614, "loss": 0.1012, "lr": 1.549604942589441e-06, "epoch": 2.3008356545961, "percentage": 76.83, "elapsed_time": "2:35:26", "remaining_time": "0:46:53"}
149
+ {"current_steps": 1250, "total_steps": 1614, "loss": 0.0978, "lr": 1.4721239553795485e-06, "epoch": 2.319405756731662, "percentage": 77.45, "elapsed_time": "2:36:36", "remaining_time": "0:45:36"}
150
+ {"current_steps": 1250, "total_steps": 1614, "eval_loss": 0.4895870089530945, "epoch": 2.319405756731662, "percentage": 77.45, "elapsed_time": "2:36:56", "remaining_time": "0:45:41"}
151
+ {"current_steps": 1260, "total_steps": 1614, "loss": 0.1094, "lr": 1.3962944085050833e-06, "epoch": 2.3379758588672237, "percentage": 78.07, "elapsed_time": "2:38:08", "remaining_time": "0:44:25"}
152
+ {"current_steps": 1270, "total_steps": 1614, "loss": 0.1023, "lr": 1.3221517986702249e-06, "epoch": 2.3565459610027855, "percentage": 78.69, "elapsed_time": "2:39:19", "remaining_time": "0:43:09"}
153
+ {"current_steps": 1280, "total_steps": 1614, "loss": 0.0953, "lr": 1.2497308329040475e-06, "epoch": 2.3751160631383472, "percentage": 79.31, "elapsed_time": "2:40:30", "remaining_time": "0:41:52"}
154
+ {"current_steps": 1290, "total_steps": 1614, "loss": 0.1014, "lr": 1.1790654123137552e-06, "epoch": 2.393686165273909, "percentage": 79.93, "elapsed_time": "2:41:42", "remaining_time": "0:40:36"}
155
+ {"current_steps": 1300, "total_steps": 1614, "loss": 0.1056, "lr": 1.1101886162151764e-06, "epoch": 2.4122562674094707, "percentage": 80.55, "elapsed_time": "2:42:54", "remaining_time": "0:39:21"}
156
+ {"current_steps": 1300, "total_steps": 1614, "eval_loss": 0.4979850947856903, "epoch": 2.4122562674094707, "percentage": 80.55, "elapsed_time": "2:43:14", "remaining_time": "0:39:25"}
157
+ {"current_steps": 1310, "total_steps": 1614, "loss": 0.0855, "lr": 1.0431326866479457e-06, "epoch": 2.4308263695450325, "percentage": 81.16, "elapsed_time": "2:44:26", "remaining_time": "0:38:09"}
158
+ {"current_steps": 1320, "total_steps": 1614, "loss": 0.1018, "lr": 9.779290132826224e-07, "epoch": 2.4493964716805943, "percentage": 81.78, "elapsed_time": "2:45:39", "remaining_time": "0:36:53"}
159
+ {"current_steps": 1330, "total_steps": 1614, "loss": 0.1057, "lr": 9.146081187268185e-07, "epoch": 2.467966573816156, "percentage": 82.4, "elapsed_time": "2:46:49", "remaining_time": "0:35:37"}
160
+ {"current_steps": 1340, "total_steps": 1614, "loss": 0.0969, "lr": 8.531996442372048e-07, "epoch": 2.486536675951718, "percentage": 83.02, "elapsed_time": "2:48:01", "remaining_time": "0:34:21"}
161
+ {"current_steps": 1350, "total_steps": 1614, "loss": 0.0897, "lr": 7.937323358440935e-07, "epoch": 2.5051067780872796, "percentage": 83.64, "elapsed_time": "2:49:13", "remaining_time": "0:33:05"}
162
+ {"current_steps": 1350, "total_steps": 1614, "eval_loss": 0.4882669448852539, "epoch": 2.5051067780872796, "percentage": 83.64, "elapsed_time": "2:49:33", "remaining_time": "0:33:09"}
163
+ {"current_steps": 1360, "total_steps": 1614, "loss": 0.0976, "lr": 7.362340308950783e-07, "epoch": 2.5236768802228413, "percentage": 84.26, "elapsed_time": "2:50:45", "remaining_time": "0:31:53"}
164
+ {"current_steps": 1370, "total_steps": 1614, "loss": 0.0957, "lr": 6.807316450240425e-07, "epoch": 2.542246982358403, "percentage": 84.88, "elapsed_time": "2:51:58", "remaining_time": "0:30:37"}
165
+ {"current_steps": 1380, "total_steps": 1614, "loss": 0.1087, "lr": 6.2725115955164e-07, "epoch": 2.560817084493965, "percentage": 85.5, "elapsed_time": "2:53:11", "remaining_time": "0:29:21"}
166
+ {"current_steps": 1390, "total_steps": 1614, "loss": 0.0952, "lr": 5.758176093231294e-07, "epoch": 2.5793871866295266, "percentage": 86.12, "elapsed_time": "2:54:23", "remaining_time": "0:28:06"}
167
+ {"current_steps": 1400, "total_steps": 1614, "loss": 0.0872, "lr": 5.264550709892685e-07, "epoch": 2.5979572887650884, "percentage": 86.74, "elapsed_time": "2:55:34", "remaining_time": "0:26:50"}
168
+ {"current_steps": 1400, "total_steps": 1614, "eval_loss": 0.49406710267066956, "epoch": 2.5979572887650884, "percentage": 86.74, "elapsed_time": "2:55:53", "remaining_time": "0:26:53"}
169
+ {"current_steps": 1410, "total_steps": 1614, "loss": 0.1028, "lr": 4.791866517357491e-07, "epoch": 2.61652739090065, "percentage": 87.36, "elapsed_time": "2:57:05", "remaining_time": "0:25:37"}
170
+ {"current_steps": 1420, "total_steps": 1614, "loss": 0.0929, "lr": 4.3403447846645355e-07, "epoch": 2.635097493036212, "percentage": 87.98, "elapsed_time": "2:58:16", "remaining_time": "0:24:21"}
171
+ {"current_steps": 1430, "total_steps": 1614, "loss": 0.0956, "lr": 3.910196874455896e-07, "epoch": 2.6536675951717736, "percentage": 88.6, "elapsed_time": "2:59:26", "remaining_time": "0:23:05"}
172
+ {"current_steps": 1440, "total_steps": 1614, "loss": 0.1015, "lr": 3.501624144035559e-07, "epoch": 2.6722376973073354, "percentage": 89.22, "elapsed_time": "3:00:38", "remaining_time": "0:21:49"}
173
+ {"current_steps": 1450, "total_steps": 1614, "loss": 0.0916, "lr": 3.1148178511116624e-07, "epoch": 2.690807799442897, "percentage": 89.84, "elapsed_time": "3:01:49", "remaining_time": "0:20:33"}
174
+ {"current_steps": 1450, "total_steps": 1614, "eval_loss": 0.49390342831611633, "epoch": 2.690807799442897, "percentage": 89.84, "elapsed_time": "3:02:09", "remaining_time": "0:20:36"}
175
+ {"current_steps": 1460, "total_steps": 1614, "loss": 0.101, "lr": 2.7499590642665773e-07, "epoch": 2.709377901578459, "percentage": 90.46, "elapsed_time": "3:03:21", "remaining_time": "0:19:20"}
176
+ {"current_steps": 1470, "total_steps": 1614, "loss": 0.0899, "lr": 2.407218578196524e-07, "epoch": 2.7279480037140207, "percentage": 91.08, "elapsed_time": "3:04:33", "remaining_time": "0:18:04"}
177
+ {"current_steps": 1480, "total_steps": 1614, "loss": 0.1063, "lr": 2.0867568337605616e-07, "epoch": 2.7465181058495824, "percentage": 91.7, "elapsed_time": "3:05:44", "remaining_time": "0:16:49"}
178
+ {"current_steps": 1490, "total_steps": 1614, "loss": 0.086, "lr": 1.7887238428763553e-07, "epoch": 2.7650882079851438, "percentage": 92.32, "elapsed_time": "3:06:57", "remaining_time": "0:15:33"}
179
+ {"current_steps": 1500, "total_steps": 1614, "loss": 0.0844, "lr": 1.5132591182978107e-07, "epoch": 2.7836583101207055, "percentage": 92.94, "elapsed_time": "3:08:08", "remaining_time": "0:14:17"}
180
+ {"current_steps": 1500, "total_steps": 1614, "eval_loss": 0.4944659173488617, "epoch": 2.7836583101207055, "percentage": 92.94, "elapsed_time": "3:08:27", "remaining_time": "0:14:19"}
181
+ {"current_steps": 1510, "total_steps": 1614, "loss": 0.0936, "lr": 1.2604916083075236e-07, "epoch": 2.8022284122562673, "percentage": 93.56, "elapsed_time": "3:09:39", "remaining_time": "0:13:03"}
182
+ {"current_steps": 1520, "total_steps": 1614, "loss": 0.1114, "lr": 1.0305396363545717e-07, "epoch": 2.820798514391829, "percentage": 94.18, "elapsed_time": "3:10:51", "remaining_time": "0:11:48"}
183
+ {"current_steps": 1530, "total_steps": 1614, "loss": 0.0933, "lr": 8.235108456658814e-08, "epoch": 2.839368616527391, "percentage": 94.8, "elapsed_time": "3:12:02", "remaining_time": "0:10:32"}
184
+ {"current_steps": 1540, "total_steps": 1614, "loss": 0.1059, "lr": 6.395021488572128e-08, "epoch": 2.8579387186629526, "percentage": 95.42, "elapsed_time": "3:13:15", "remaining_time": "0:09:17"}
185
+ {"current_steps": 1550, "total_steps": 1614, "loss": 0.0959, "lr": 4.7859968256719344e-08, "epoch": 2.8765088207985143, "percentage": 96.03, "elapsed_time": "3:14:27", "remaining_time": "0:08:01"}
186
+ {"current_steps": 1550, "total_steps": 1614, "eval_loss": 0.4942573308944702, "epoch": 2.8765088207985143, "percentage": 96.03, "elapsed_time": "3:14:46", "remaining_time": "0:08:02"}
187
+ {"current_steps": 1560, "total_steps": 1614, "loss": 0.0945, "lr": 3.408787671357494e-08, "epoch": 2.895078922934076, "percentage": 96.65, "elapsed_time": "3:15:57", "remaining_time": "0:06:46"}
188
+ {"current_steps": 1570, "total_steps": 1614, "loss": 0.088, "lr": 2.264038713457706e-08, "epoch": 2.913649025069638, "percentage": 97.27, "elapsed_time": "3:17:09", "remaining_time": "0:05:31"}
189
+ {"current_steps": 1580, "total_steps": 1614, "loss": 0.0992, "lr": 1.3522858224450652e-08, "epoch": 2.9322191272051996, "percentage": 97.89, "elapsed_time": "3:18:19", "remaining_time": "0:04:16"}
190
+ {"current_steps": 1590, "total_steps": 1614, "loss": 0.0805, "lr": 6.739558005884883e-09, "epoch": 2.9507892293407614, "percentage": 98.51, "elapsed_time": "3:19:31", "remaining_time": "0:03:00"}
191
+ {"current_steps": 1600, "total_steps": 1614, "loss": 0.094, "lr": 2.2936618216201635e-09, "epoch": 2.969359331476323, "percentage": 99.13, "elapsed_time": "3:20:43", "remaining_time": "0:01:45"}
192
+ {"current_steps": 1600, "total_steps": 1614, "eval_loss": 0.4940944015979767, "epoch": 2.969359331476323, "percentage": 99.13, "elapsed_time": "3:21:03", "remaining_time": "0:01:45"}
193
+ {"current_steps": 1610, "total_steps": 1614, "loss": 0.0803, "lr": 1.872508480332824e-10, "epoch": 2.987929433611885, "percentage": 99.75, "elapsed_time": "3:22:15", "remaining_time": "0:00:30"}
194
+ {"current_steps": 1614, "total_steps": 1614, "epoch": 2.9953574744661093, "percentage": 100.0, "elapsed_time": "3:24:16", "remaining_time": "0:00:00"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1046e22a46885e96c74cb94c6d3a9903bf2b0b666fa116308ef3b76af86fe5e
3
  size 7480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81c0adf8cc1d6ebe93de13a29c5d68ad8332043bbb31dc4087c041aa55b2fdb
3
  size 7480