cervisiarius
Upload model: nll-bg-en-trainAB_NO-MASKING
73ee3ff
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 96,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010416666666666666,
"grad_norm": 0.46898284554481506,
"learning_rate": 0.0001,
"loss": 0.4984,
"objective/entropy": 1536.0,
"step": 1,
"train/nll_loss_a": 0.46235302090644836,
"train/nll_loss_b": 0.5343712766965231,
"val/completion_length": 141.19872029622397,
"val/contain_eos_token": 0.9294871687889099,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.35256410638491315,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.03846153989434242,
"val/fraction_both_incorrect": 0.628205140431722,
"val/fraction_correct": 0.20512820780277252,
"val/fraction_cyrillic_a": 0.09817602237065633,
"val/fraction_cyrillic_b": 0.07149943461020787,
"val/fraction_latin_a": 0.48449812332789105,
"val/fraction_latin_b": 0.5023942093054453,
"val/fraction_number_a": 0.18907449146111807,
"val/fraction_number_b": 0.19445918997128805,
"val/fraction_other_a": 0.22825137277444205,
"val/fraction_other_b": 0.2316471884648005,
"val/fraction_ties": 0.6666666666666666,
"val/lang_prob_bg": 0.0268978967020909,
"val/lang_prob_en": 0.6749410231908163,
"val/latin_first_token": 0.6474358836809794,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.020833333333333332,
"grad_norm": 0.49648183584213257,
"learning_rate": 0.0001,
"loss": 0.4431,
"objective/entropy": 1424.0,
"step": 2,
"train/nll_loss_a": 0.4012756248315175,
"train/nll_loss_b": 0.4849816660086314,
"val/completion_length": 138.66666666666666,
"val/contain_eos_token": 0.9102564056714376,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.32692308227221173,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.7179487148920695,
"val/fraction_correct": 0.1730769251783689,
"val/fraction_cyrillic_a": 0.0820654605825742,
"val/fraction_cyrillic_b": 0.06807015091180801,
"val/fraction_latin_a": 0.46912774443626404,
"val/fraction_latin_b": 0.4876726269721985,
"val/fraction_number_a": 0.21081160008907318,
"val/fraction_number_b": 0.2032010406255722,
"val/fraction_other_a": 0.2379952073097229,
"val/fraction_other_b": 0.24105618397394815,
"val/fraction_ties": 0.7820512851079305,
"val/lang_prob_bg": 0.03282865695655346,
"val/lang_prob_en": 0.6723186572392782,
"val/latin_first_token": 0.6666666666666666,
"val/number_first_token": 0.0,
"val/other_first_token": 0.006410256649057071
},
{
"epoch": 0.03125,
"grad_norm": 0.6362881660461426,
"learning_rate": 0.0001,
"loss": 0.505,
"objective/entropy": 1469.3333333333333,
"step": 3,
"train/nll_loss_a": 0.41949082414309186,
"train/nll_loss_b": 0.5905094941457113,
"val/completion_length": 153.07691955566406,
"val/contain_eos_token": 0.878205140431722,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.3076923092206319,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.03846153989434242,
"val/fraction_both_incorrect": 0.7820512851079305,
"val/fraction_correct": 0.12820513298114142,
"val/fraction_cyrillic_a": 0.09064680337905884,
"val/fraction_cyrillic_b": 0.07422023018201192,
"val/fraction_latin_a": 0.4563806454340617,
"val/fraction_latin_b": 0.4618365466594696,
"val/fraction_number_a": 0.21234740813573202,
"val/fraction_number_b": 0.21217785278956094,
"val/fraction_other_a": 0.24062515298525491,
"val/fraction_other_b": 0.2517653902371724,
"val/fraction_ties": 0.8205128312110901,
"val/lang_prob_bg": 0.03160186484456062,
"val/lang_prob_en": 0.6696631709734598,
"val/latin_first_token": 0.6923076709111532,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.041666666666666664,
"grad_norm": 0.5365626215934753,
"learning_rate": 0.0001,
"loss": 0.3568,
"objective/entropy": 1538.6666666666667,
"step": 4,
"train/nll_loss_a": 0.3584041992823283,
"train/nll_loss_b": 0.3552741805712382,
"val/completion_length": 139.39102172851562,
"val/contain_eos_token": 0.9230769077936808,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.21794872482617697,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.1602564131220182,
"val/fraction_cyrillic_a": 0.06686830148100853,
"val/fraction_cyrillic_b": 0.040630811204512916,
"val/fraction_latin_a": 0.48262248436609906,
"val/fraction_latin_b": 0.4933239420255025,
"val/fraction_number_a": 0.20719597240289053,
"val/fraction_number_b": 0.22095757722854614,
"val/fraction_other_a": 0.2433132529258728,
"val/fraction_other_b": 0.24508768320083618,
"val/fraction_ties": 0.8589743574460348,
"val/lang_prob_bg": 0.02131816806892554,
"val/lang_prob_en": 0.6912566820780436,
"val/latin_first_token": 0.7820512851079305,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.052083333333333336,
"grad_norm": 0.6317083835601807,
"learning_rate": 0.0001,
"loss": 0.3542,
"objective/entropy": 1616.0,
"step": 5,
"train/nll_loss_a": 0.3730636735757192,
"train/nll_loss_b": 0.33537689844767254,
"val/completion_length": 141.45512898763022,
"val/contain_eos_token": 0.9230769276618958,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.03846153927346071,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7692307829856873,
"val/fraction_correct": 0.16666666915019354,
"val/fraction_cyrillic_a": 0.018022120309372742,
"val/fraction_cyrillic_b": 0.008442190863812963,
"val/fraction_latin_a": 0.530072808265686,
"val/fraction_latin_b": 0.5371540983517965,
"val/fraction_number_a": 0.20419377585252127,
"val/fraction_number_b": 0.19861711064974466,
"val/fraction_other_a": 0.2477113058169683,
"val/fraction_other_b": 0.25578661759694415,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.005829914240166545,
"val/lang_prob_en": 0.6994746724764506,
"val/latin_first_token": 0.9615384538968405,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.0625,
"grad_norm": 0.6112334728240967,
"learning_rate": 0.0001,
"loss": 0.3526,
"objective/entropy": 1450.6666666666667,
"step": 6,
"train/nll_loss_a": 0.3616310755411784,
"train/nll_loss_b": 0.3435203830401103,
"val/completion_length": 139.3397420247396,
"val/contain_eos_token": 0.942307690779368,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.012820513298114141,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307978868484,
"val/fraction_both_incorrect": 0.7564102411270142,
"val/fraction_correct": 0.1602564131220182,
"val/fraction_cyrillic_a": 0.005665303532926676,
"val/fraction_cyrillic_b": 0.0012210012258340914,
"val/fraction_latin_a": 0.5337207714716593,
"val/fraction_latin_b": 0.5389339327812195,
"val/fraction_number_a": 0.20246068636576334,
"val/fraction_number_b": 0.20605232814947763,
"val/fraction_other_a": 0.2581532299518585,
"val/fraction_other_b": 0.25379273295402527,
"val/fraction_ties": 0.8333333134651184,
"val/lang_prob_bg": 0.0024220591488604746,
"val/lang_prob_en": 0.716150164604187,
"val/latin_first_token": 0.9871794780095419,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.07291666666666667,
"grad_norm": 0.5834570527076721,
"learning_rate": 0.0001,
"loss": 0.3122,
"objective/entropy": 1482.6666666666667,
"step": 7,
"train/nll_loss_a": 0.31628555059432983,
"train/nll_loss_b": 0.3082062304019928,
"val/completion_length": 140.28205362955728,
"val/contain_eos_token": 0.935897429784139,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.012820513298114141,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.12820513298114142,
"val/fraction_cyrillic_a": 0.004876403972351302,
"val/fraction_cyrillic_b": 0.0055291604561110335,
"val/fraction_latin_a": 0.51544189453125,
"val/fraction_latin_b": 0.5175811052322388,
"val/fraction_number_a": 0.21839049458503723,
"val/fraction_number_b": 0.21114349365234375,
"val/fraction_other_a": 0.2612912356853485,
"val/fraction_other_b": 0.26574622591336566,
"val/fraction_ties": 0.8461538553237915,
"val/lang_prob_bg": 0.0024805181116486588,
"val/lang_prob_en": 0.7189218997955322,
"val/latin_first_token": 0.9871794780095419,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.08333333333333333,
"grad_norm": 0.5234330296516418,
"learning_rate": 0.0001,
"loss": 0.3689,
"objective/entropy": 1560.0,
"step": 8,
"train/nll_loss_a": 0.38403966029485065,
"train/nll_loss_b": 0.353829691807429,
"val/completion_length": 147.18589782714844,
"val/contain_eos_token": 0.8910256226857504,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.032051283245285354,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.12820513173937798,
"val/fraction_both_incorrect": 0.6794871886571249,
"val/fraction_correct": 0.22435897588729858,
"val/fraction_cyrillic_a": 0.005463951422522466,
"val/fraction_cyrillic_b": 0.00364190728093187,
"val/fraction_latin_a": 0.519624650478363,
"val/fraction_latin_b": 0.5310182571411133,
"val/fraction_number_a": 0.22126641869544983,
"val/fraction_number_b": 0.20790701607863107,
"val/fraction_other_a": 0.2536449631055196,
"val/fraction_other_b": 0.25743279854456586,
"val/fraction_ties": 0.807692309220632,
"val/lang_prob_bg": 0.0022993393164748945,
"val/lang_prob_en": 0.7228630383809408,
"val/latin_first_token": 0.9679486950238546,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.09375,
"grad_norm": 0.46253740787506104,
"learning_rate": 0.0001,
"loss": 0.3548,
"objective/entropy": 1626.6666666666667,
"step": 9,
"train/nll_loss_a": 0.353506733973821,
"train/nll_loss_b": 0.3560173710187276,
"val/completion_length": 141.10897318522134,
"val/contain_eos_token": 0.8974359035491943,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.03846153927346071,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.17948718120654425,
"val/fraction_both_incorrect": 0.6666666865348816,
"val/fraction_correct": 0.25641026099522907,
"val/fraction_cyrillic_a": 0.009551782781879107,
"val/fraction_cyrillic_b": 0.007778597995638847,
"val/fraction_latin_a": 0.5005057454109192,
"val/fraction_latin_b": 0.5112853447596232,
"val/fraction_number_a": 0.2224580099185308,
"val/fraction_number_b": 0.2299031764268875,
"val/fraction_other_a": 0.26748446623484295,
"val/fraction_other_b": 0.25103287398815155,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.003838104816774527,
"val/lang_prob_en": 0.7203066547711691,
"val/latin_first_token": 0.9615384538968405,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.10416666666666667,
"grad_norm": 0.6793639063835144,
"learning_rate": 0.0001,
"loss": 0.4072,
"objective/entropy": 1544.0,
"step": 10,
"train/nll_loss_a": 0.40951302647590637,
"train/nll_loss_b": 0.4048899710178375,
"val/completion_length": 139.59615580240884,
"val/contain_eos_token": 0.9230769276618958,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.044871795922517776,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7564102609952291,
"val/fraction_correct": 0.1730769251783689,
"val/fraction_cyrillic_a": 0.01814807563399275,
"val/fraction_cyrillic_b": 0.01091132735988746,
"val/fraction_latin_a": 0.479096124569575,
"val/fraction_latin_b": 0.48450469970703125,
"val/fraction_number_a": 0.24191749095916748,
"val/fraction_number_b": 0.23329021533330283,
"val/fraction_other_a": 0.2608383099238078,
"val/fraction_other_b": 0.2712937593460083,
"val/fraction_ties": 0.8589743375778198,
"val/lang_prob_bg": 0.005877171643078327,
"val/lang_prob_en": 0.6905626058578491,
"val/latin_first_token": 0.9551281929016113,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.11458333333333333,
"grad_norm": 0.6676069498062134,
"learning_rate": 0.0001,
"loss": 0.3261,
"objective/entropy": 1296.0,
"step": 11,
"train/nll_loss_a": 0.3414422770341237,
"train/nll_loss_b": 0.31081566711266834,
"val/completion_length": 133.35897318522134,
"val/contain_eos_token": 0.9615384538968405,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.012820513298114141,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.15384616081913313,
"val/fraction_cyrillic_a": 0.009972451565166315,
"val/fraction_cyrillic_b": 0.008257918680707613,
"val/fraction_latin_a": 0.4708147446314494,
"val/fraction_latin_b": 0.4841614067554474,
"val/fraction_number_a": 0.2511301040649414,
"val/fraction_number_b": 0.23997685313224792,
"val/fraction_other_a": 0.26808270812034607,
"val/fraction_other_b": 0.2676038245360057,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.004965859581716359,
"val/lang_prob_en": 0.7087472081184387,
"val/latin_first_token": 0.9871794780095419,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.125,
"grad_norm": 0.48878854513168335,
"learning_rate": 0.0001,
"loss": 0.3202,
"objective/entropy": 1520.0,
"step": 12,
"train/nll_loss_a": 0.3013697862625122,
"train/nll_loss_b": 0.33911073207855225,
"val/completion_length": 125.4551289876302,
"val/contain_eos_token": 0.9551282127698263,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.01923076994717121,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.6410256425539652,
"val/fraction_correct": 0.21794872482617697,
"val/fraction_cyrillic_a": 0.0016106078983284533,
"val/fraction_cyrillic_b": 0.00164324635018905,
"val/fraction_latin_a": 0.4850431780020396,
"val/fraction_latin_b": 0.49556367595990497,
"val/fraction_number_a": 0.24694832662741342,
"val/fraction_number_b": 0.24452554682890573,
"val/fraction_other_a": 0.26639790336290997,
"val/fraction_other_b": 0.25826754172643024,
"val/fraction_ties": 0.7179487347602844,
"val/lang_prob_bg": 0.0020299581810832024,
"val/lang_prob_en": 0.7036298712094625,
"val/latin_first_token": 0.9807692170143127,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.13541666666666666,
"grad_norm": 0.47080346941947937,
"learning_rate": 0.0001,
"loss": 0.3551,
"objective/entropy": 1610.6666666666667,
"step": 13,
"train/nll_loss_a": 0.3537709911664327,
"train/nll_loss_b": 0.35645443201065063,
"val/completion_length": 132.8397471110026,
"val/contain_eos_token": 0.942307690779368,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.01923076994717121,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.11538461844126384,
"val/fraction_both_incorrect": 0.6410256425539652,
"val/fraction_correct": 0.23717949291070303,
"val/fraction_cyrillic_a": 0.0035886759869754314,
"val/fraction_cyrillic_b": 0.002304682352890571,
"val/fraction_latin_a": 0.5025050441424052,
"val/fraction_latin_b": 0.49992923935254413,
"val/fraction_number_a": 0.22844381630420685,
"val/fraction_number_b": 0.23804503679275513,
"val/fraction_other_a": 0.2654624879360199,
"val/fraction_other_b": 0.2597210705280304,
"val/fraction_ties": 0.7564102609952291,
"val/lang_prob_bg": 0.0021097887850676975,
"val/lang_prob_en": 0.7135748863220215,
"val/latin_first_token": 0.9807692170143127,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.14583333333333334,
"grad_norm": 0.5261266827583313,
"learning_rate": 0.0001,
"loss": 0.3553,
"objective/entropy": 1768.0,
"step": 14,
"train/nll_loss_a": 0.36536062757174176,
"train/nll_loss_b": 0.34532251954078674,
"val/completion_length": 130.3205134073893,
"val/contain_eos_token": 0.942307690779368,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.006410256649057071,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.14102564255396524,
"val/fraction_both_incorrect": 0.7435897588729858,
"val/fraction_correct": 0.19871795177459717,
"val/fraction_cyrillic_a": 0.0008547008813669285,
"val/fraction_cyrillic_b": 0.0002670940205765267,
"val/fraction_latin_a": 0.48295870423316956,
"val/fraction_latin_b": 0.4754104216893514,
"val/fraction_number_a": 0.24797451992829642,
"val/fraction_number_b": 0.25482123096783954,
"val/fraction_other_a": 0.26821208000183105,
"val/fraction_other_b": 0.269501268863678,
"val/fraction_ties": 0.8846153616905212,
"val/lang_prob_bg": 0.0016354583591843646,
"val/lang_prob_en": 0.7125194072723389,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.15625,
"grad_norm": 0.5608013868331909,
"learning_rate": 0.0001,
"loss": 0.375,
"objective/entropy": 1813.3333333333333,
"step": 15,
"train/nll_loss_a": 0.3743097384770711,
"train/nll_loss_b": 0.3756645123163859,
"val/completion_length": 113.73076883951823,
"val/contain_eos_token": 0.942307690779368,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.6666666666666666,
"val/fraction_correct": 0.21794872482617697,
"val/fraction_cyrillic_a": 0.0005128205132981142,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.4844670593738556,
"val/fraction_latin_b": 0.48018120725949603,
"val/fraction_number_a": 0.24094298481941223,
"val/fraction_number_b": 0.2496140201886495,
"val/fraction_other_a": 0.2740771571795146,
"val/fraction_other_b": 0.2702048122882843,
"val/fraction_ties": 0.7692307829856873,
"val/lang_prob_bg": 0.0012819842668250203,
"val/lang_prob_en": 0.712844451268514,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.5459577441215515,
"learning_rate": 0.0001,
"loss": 0.3467,
"objective/entropy": 1781.3333333333333,
"step": 16,
"train/nll_loss_a": 0.35024779041608173,
"train/nll_loss_b": 0.3431568145751953,
"val/completion_length": 125.86538696289062,
"val/contain_eos_token": 0.9551281929016113,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.7179487148920695,
"val/fraction_correct": 0.1666666716337204,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.4834948579470317,
"val/fraction_latin_b": 0.48427216211954754,
"val/fraction_number_a": 0.24997142453988394,
"val/fraction_number_b": 0.24836017191410065,
"val/fraction_other_a": 0.2665337175130844,
"val/fraction_other_b": 0.26736770073572796,
"val/fraction_ties": 0.7692307631174723,
"val/lang_prob_bg": 0.0015082452834273379,
"val/lang_prob_en": 0.6896043419837952,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.0,
"val/other_first_token": 0.006410256649057071
},
{
"epoch": 0.17708333333333334,
"grad_norm": 0.6729714870452881,
"learning_rate": 0.0001,
"loss": 0.3411,
"objective/entropy": 1653.3333333333333,
"step": 17,
"train/nll_loss_a": 0.3364799916744232,
"train/nll_loss_b": 0.34577877322832745,
"val/completion_length": 112.4551264444987,
"val/contain_eos_token": 0.9807692368825277,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.012820513298114141,
"val/fraction_both_incorrect": 0.7564102411270142,
"val/fraction_correct": 0.12820513049761453,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.45367395877838135,
"val/fraction_latin_b": 0.46362873911857605,
"val/fraction_number_a": 0.2715826133886973,
"val/fraction_number_b": 0.2677338620026906,
"val/fraction_other_a": 0.2747434576352437,
"val/fraction_other_b": 0.2686373790105184,
"val/fraction_ties": 0.7692307631174723,
"val/lang_prob_bg": 0.0013079955242574215,
"val/lang_prob_en": 0.7055089473724365,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.1875,
"grad_norm": 0.5278697609901428,
"learning_rate": 0.0001,
"loss": 0.3637,
"objective/entropy": 1666.6666666666667,
"step": 18,
"train/nll_loss_a": 0.3630356788635254,
"train/nll_loss_b": 0.36438990632692975,
"val/completion_length": 106.38461303710938,
"val/contain_eos_token": 0.9743589758872986,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.006410256649057071,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.7179487148920695,
"val/fraction_correct": 0.18589743971824646,
"val/fraction_cyrillic_a": 0.00018853696140771112,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.46353839834531146,
"val/fraction_latin_b": 0.4717874725659688,
"val/fraction_number_a": 0.26522815227508545,
"val/fraction_number_b": 0.27208030720551807,
"val/fraction_other_a": 0.27104492982228595,
"val/fraction_other_b": 0.25613221526145935,
"val/fraction_ties": 0.807692309220632,
"val/lang_prob_bg": 0.0014787697000429034,
"val/lang_prob_en": 0.7207486033439636,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.19791666666666666,
"grad_norm": 0.623100996017456,
"learning_rate": 0.0001,
"loss": 0.339,
"objective/entropy": 1757.3333333333333,
"step": 19,
"train/nll_loss_a": 0.3500674267609914,
"train/nll_loss_b": 0.32788631319999695,
"val/completion_length": 115.87820434570312,
"val/contain_eos_token": 0.9615384538968405,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.14102564503749213,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.47628764311472577,
"val/fraction_latin_b": 0.47780614097913104,
"val/fraction_number_a": 0.25701290369033813,
"val/fraction_number_b": 0.2525850087404251,
"val/fraction_other_a": 0.2666994432608287,
"val/fraction_other_b": 0.2696088453133901,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.0013311682268977165,
"val/lang_prob_en": 0.7212471763292948,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.20833333333333334,
"grad_norm": 0.7427172064781189,
"learning_rate": 0.0001,
"loss": 0.3136,
"objective/entropy": 1717.3333333333333,
"step": 20,
"train/nll_loss_a": 0.3320723871390025,
"train/nll_loss_b": 0.2952205240726471,
"val/completion_length": 103.90384674072266,
"val/contain_eos_token": 0.9871794780095419,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256649057071,
"val/fraction_both_incorrect": 0.7820512851079305,
"val/fraction_correct": 0.14102564255396524,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.47085316975911456,
"val/fraction_latin_b": 0.46781421701113385,
"val/fraction_number_a": 0.2578504929939906,
"val/fraction_number_b": 0.2744967540105184,
"val/fraction_other_a": 0.27129634221394855,
"val/fraction_other_b": 0.2576890190442403,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0014502551639452577,
"val/lang_prob_en": 0.7153881192207336,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.21875,
"grad_norm": 0.4812980592250824,
"learning_rate": 0.0001,
"loss": 0.2812,
"objective/entropy": 1712.0,
"step": 21,
"train/nll_loss_a": 0.2877577245235443,
"train/nll_loss_b": 0.27465402086575824,
"val/completion_length": 100.94871775309245,
"val/contain_eos_token": 0.9871794780095419,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359308679898,
"val/fraction_both_incorrect": 0.6794871886571249,
"val/fraction_correct": 0.20512820780277252,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.449023316303889,
"val/fraction_latin_b": 0.43931347131729126,
"val/fraction_number_a": 0.2805411020914714,
"val/fraction_number_b": 0.2886248826980591,
"val/fraction_other_a": 0.2704355716705322,
"val/fraction_other_b": 0.27206166585286456,
"val/fraction_ties": 0.7692307631174723,
"val/lang_prob_bg": 0.0012512764272590478,
"val/lang_prob_en": 0.7036919593811035,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.22916666666666666,
"grad_norm": 0.5802024006843567,
"learning_rate": 0.0001,
"loss": 0.2079,
"objective/entropy": 1754.6666666666667,
"step": 22,
"train/nll_loss_a": 0.2198613981405894,
"train/nll_loss_b": 0.19591793914635977,
"val/completion_length": 105.2051289876302,
"val/contain_eos_token": 0.9615384538968405,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.8589743375778198,
"val/fraction_correct": 0.11538461844126384,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.4607094426949819,
"val/fraction_latin_b": 0.4435524543126424,
"val/fraction_number_a": 0.2806512117385864,
"val/fraction_number_b": 0.2736863394578298,
"val/fraction_other_a": 0.2586393306652705,
"val/fraction_other_b": 0.28276123603185016,
"val/fraction_ties": 0.9487179517745972,
"val/lang_prob_bg": 0.0013595524554451306,
"val/lang_prob_en": 0.6991243163744608,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.23958333333333334,
"grad_norm": 0.9169826507568359,
"learning_rate": 0.0001,
"loss": 0.3761,
"objective/entropy": 1493.3333333333333,
"step": 23,
"train/nll_loss_a": 0.36857877175013226,
"train/nll_loss_b": 0.38360129793485004,
"val/completion_length": 92.24359130859375,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.025641026596228283,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.11538461844126384,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.4382043182849884,
"val/fraction_latin_b": 0.42687369386355084,
"val/fraction_number_a": 0.2829488515853882,
"val/fraction_number_b": 0.296395738919576,
"val/fraction_other_a": 0.2788468599319458,
"val/fraction_other_b": 0.2767305870850881,
"val/fraction_ties": 0.8205128312110901,
"val/lang_prob_bg": 0.0014082260507469375,
"val/lang_prob_en": 0.7083008488019308,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.25,
"grad_norm": 0.6027104258537292,
"learning_rate": 0.0001,
"loss": 0.2801,
"objective/entropy": 1490.6666666666667,
"step": 24,
"train/nll_loss_a": 0.278068482875824,
"train/nll_loss_b": 0.28207358221213025,
"val/completion_length": 86.58333333333333,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.6794871886571249,
"val/fraction_correct": 0.20512820780277252,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.44356054067611694,
"val/fraction_latin_b": 0.44240028659502667,
"val/fraction_number_a": 0.2843793531258901,
"val/fraction_number_b": 0.2815621296564738,
"val/fraction_other_a": 0.272060106197993,
"val/fraction_other_b": 0.2760376036167145,
"val/fraction_ties": 0.7692307631174723,
"val/lang_prob_bg": 0.0013788756914436817,
"val/lang_prob_en": 0.707394023736318,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.2604166666666667,
"grad_norm": 0.6588619351387024,
"learning_rate": 0.0001,
"loss": 0.2373,
"objective/entropy": 1805.3333333333333,
"step": 25,
"train/nll_loss_a": 0.2221569369236628,
"train/nll_loss_b": 0.25250792503356934,
"val/completion_length": 97.98077138264973,
"val/contain_eos_token": 0.9871794780095419,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.8461538354555765,
"val/fraction_correct": 0.11538461844126384,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.4407140811284383,
"val/fraction_latin_b": 0.449174165725708,
"val/fraction_number_a": 0.2688818077246348,
"val/fraction_number_b": 0.264347364505132,
"val/fraction_other_a": 0.2904041012128194,
"val/fraction_other_b": 0.2864784598350525,
"val/fraction_ties": 0.9230769077936808,
"val/lang_prob_bg": 0.0015077214144791167,
"val/lang_prob_en": 0.7170586188634237,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.2708333333333333,
"grad_norm": 0.7737333178520203,
"learning_rate": 0.0001,
"loss": 0.2641,
"objective/entropy": 1989.3333333333333,
"step": 26,
"train/nll_loss_a": 0.26104875405629474,
"train/nll_loss_b": 0.26720015704631805,
"val/completion_length": 93.08974202473958,
"val/contain_eos_token": 0.9743589758872986,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.11538461844126384,
"val/fraction_both_incorrect": 0.7179487148920695,
"val/fraction_correct": 0.19871795177459717,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.43882275621096295,
"val/fraction_latin_b": 0.4281532069047292,
"val/fraction_number_a": 0.29091211160024005,
"val/fraction_number_b": 0.3004717230796814,
"val/fraction_other_a": 0.2702651371558507,
"val/fraction_other_b": 0.2713750700155894,
"val/fraction_ties": 0.8333333134651184,
"val/lang_prob_bg": 0.0013183245512967308,
"val/lang_prob_en": 0.6850736141204834,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.28125,
"grad_norm": 0.7201813459396362,
"learning_rate": 0.0001,
"loss": 0.2468,
"objective/entropy": 1309.3333333333333,
"step": 27,
"train/nll_loss_a": 0.2481851428747177,
"train/nll_loss_b": 0.24535122017065683,
"val/completion_length": 81.52563985188802,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307730515797,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.15384615709384283,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.42121972640355426,
"val/fraction_latin_b": 0.3958790997664134,
"val/fraction_number_a": 0.30145979921023053,
"val/fraction_number_b": 0.3247312208016713,
"val/fraction_other_a": 0.2773204942544301,
"val/fraction_other_b": 0.2793896694978078,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0012046323778728645,
"val/lang_prob_en": 0.690701444943746,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.2916666666666667,
"grad_norm": 0.5405112504959106,
"learning_rate": 0.0001,
"loss": 0.2421,
"objective/entropy": 1466.6666666666667,
"step": 28,
"train/nll_loss_a": 0.258198360602061,
"train/nll_loss_b": 0.225906973083814,
"val/completion_length": 91.30128224690755,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.6025640964508057,
"val/fraction_correct": 0.2435897489388784,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.41246400276819867,
"val/fraction_latin_b": 0.4263813893000285,
"val/fraction_number_a": 0.30804495016733807,
"val/fraction_number_b": 0.299861341714859,
"val/fraction_other_a": 0.27949108680089313,
"val/fraction_other_b": 0.2737572491168976,
"val/fraction_ties": 0.692307710647583,
"val/lang_prob_bg": 0.0014168053554991882,
"val/lang_prob_en": 0.7019821604092916,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.3020833333333333,
"grad_norm": 0.6843443512916565,
"learning_rate": 0.0001,
"loss": 0.2173,
"objective/entropy": 1717.3333333333333,
"step": 29,
"train/nll_loss_a": 0.2260708212852478,
"train/nll_loss_b": 0.208594411611557,
"val/completion_length": 86.48076883951823,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359184503555,
"val/fraction_both_incorrect": 0.7179487148920695,
"val/fraction_correct": 0.18589743971824646,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.41310587525367737,
"val/fraction_latin_b": 0.4215882420539856,
"val/fraction_number_a": 0.3036368489265442,
"val/fraction_number_b": 0.3048081199328105,
"val/fraction_other_a": 0.2832573155562083,
"val/fraction_other_b": 0.2736036380132039,
"val/fraction_ties": 0.807692309220632,
"val/lang_prob_bg": 0.0014089663745835423,
"val/lang_prob_en": 0.6810818711916605,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.3125,
"grad_norm": 0.6732069849967957,
"learning_rate": 0.0001,
"loss": 0.1954,
"objective/entropy": 1698.6666666666667,
"step": 30,
"train/nll_loss_a": 0.190487802028656,
"train/nll_loss_b": 0.20023786028226218,
"val/completion_length": 83.47436014811198,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.03846153989434242,
"val/fraction_both_incorrect": 0.692307690779368,
"val/fraction_correct": 0.1730769251783689,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.37804505228996277,
"val/fraction_latin_b": 0.3864828248818715,
"val/fraction_number_a": 0.32556501030921936,
"val/fraction_number_b": 0.3207090497016907,
"val/fraction_other_a": 0.2963899274667104,
"val/fraction_other_b": 0.29280807574590045,
"val/fraction_ties": 0.7307692368825277,
"val/lang_prob_bg": 0.0012525273875022929,
"val/lang_prob_en": 0.6773750185966492,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.3229166666666667,
"grad_norm": 1.181767225265503,
"learning_rate": 0.0001,
"loss": 0.2398,
"objective/entropy": 1254.6666666666667,
"step": 31,
"train/nll_loss_a": 0.2306948055823644,
"train/nll_loss_b": 0.2489608426888784,
"val/completion_length": 80.55127970377605,
"val/contain_eos_token": 0.9871794780095419,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.03846153989434242,
"val/fraction_both_incorrect": 0.8461538354555765,
"val/fraction_correct": 0.09615384911497434,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.3839823206265767,
"val/fraction_latin_b": 0.40403392910957336,
"val/fraction_number_a": 0.3259160916010539,
"val/fraction_number_b": 0.3016844590504964,
"val/fraction_other_a": 0.2901015877723694,
"val/fraction_other_b": 0.29428161183993023,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.001477631429831187,
"val/lang_prob_en": 0.693560004234314,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.006410256649057071,
"val/other_first_token": 0.0
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.7345649600028992,
"learning_rate": 0.0001,
"loss": 0.2023,
"objective/entropy": 1706.6666666666667,
"step": 32,
"train/nll_loss_a": 0.18820939461390176,
"train/nll_loss_b": 0.2164141039053599,
"val/completion_length": 86.39102681477864,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.025641026596228283,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.11538461844126384,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.3734320302804311,
"val/fraction_latin_b": 0.39075586199760437,
"val/fraction_number_a": 0.3208834727605184,
"val/fraction_number_b": 0.3199572165807088,
"val/fraction_other_a": 0.30568451682726544,
"val/fraction_other_b": 0.28928691148757935,
"val/fraction_ties": 0.8205128113428751,
"val/lang_prob_bg": 0.001555828144773841,
"val/lang_prob_en": 0.6997750600179037,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.34375,
"grad_norm": 0.6794171929359436,
"learning_rate": 0.0001,
"loss": 0.208,
"objective/entropy": 1765.3333333333333,
"step": 33,
"train/nll_loss_a": 0.197875847419103,
"train/nll_loss_b": 0.2181676377852758,
"val/completion_length": 81.42307790120442,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.12820513049761453,
"val/fraction_both_incorrect": 0.692307710647583,
"val/fraction_correct": 0.21794872482617697,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.36189671357472736,
"val/fraction_latin_b": 0.3368810514609019,
"val/fraction_number_a": 0.34198596080144245,
"val/fraction_number_b": 0.3494710822900136,
"val/fraction_other_a": 0.2961173454920451,
"val/fraction_other_b": 0.3136478662490845,
"val/fraction_ties": 0.8205128113428751,
"val/lang_prob_bg": 0.0014510581968352199,
"val/lang_prob_en": 0.6856165130933126,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.3541666666666667,
"grad_norm": 1.2053074836730957,
"learning_rate": 0.0001,
"loss": 0.1771,
"objective/entropy": 1565.3333333333333,
"step": 34,
"train/nll_loss_a": 0.17536027232805887,
"train/nll_loss_b": 0.1789391835530599,
"val/completion_length": 75.41666666666667,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.12820513298114142,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.34743695457776386,
"val/fraction_latin_b": 0.3424769341945648,
"val/fraction_number_a": 0.3374132215976715,
"val/fraction_number_b": 0.3465127448240916,
"val/fraction_other_a": 0.31514982382456463,
"val/fraction_other_b": 0.31101036071777344,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0016215697008495529,
"val/lang_prob_en": 0.6558386087417603,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.3645833333333333,
"grad_norm": 0.8247714042663574,
"learning_rate": 0.0001,
"loss": 0.1831,
"objective/entropy": 2050.6666666666665,
"step": 35,
"train/nll_loss_a": 0.1831777443488439,
"train/nll_loss_b": 0.18294000625610352,
"val/completion_length": 79.56410217285156,
"val/contain_eos_token": 0.9871794780095419,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307978868484,
"val/fraction_both_incorrect": 0.7179487347602844,
"val/fraction_correct": 0.17948718617359796,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.3364667197068532,
"val/fraction_latin_b": 0.3431033293406169,
"val/fraction_number_a": 0.35041239857673645,
"val/fraction_number_b": 0.3312891523043315,
"val/fraction_other_a": 0.31312089165051776,
"val/fraction_other_b": 0.3256075282891591,
"val/fraction_ties": 0.7948718070983887,
"val/lang_prob_bg": 0.0014793235653390486,
"val/lang_prob_en": 0.6674719850222269,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.375,
"grad_norm": 0.7892264127731323,
"learning_rate": 0.0001,
"loss": 0.2262,
"objective/entropy": 2029.3333333333333,
"step": 36,
"train/nll_loss_a": 0.21969079971313477,
"train/nll_loss_b": 0.23275785644849142,
"val/completion_length": 81.88461558024089,
"val/contain_eos_token": 0.9807692170143127,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359184503555,
"val/fraction_both_incorrect": 0.7564102609952291,
"val/fraction_correct": 0.16666666915019354,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.33385714888572693,
"val/fraction_latin_b": 0.31389813621838886,
"val/fraction_number_a": 0.36293908953666687,
"val/fraction_number_b": 0.368167241414388,
"val/fraction_other_a": 0.3032037814458211,
"val/fraction_other_b": 0.3179346521695455,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0014597336606432993,
"val/lang_prob_en": 0.6864216725031534,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.3854166666666667,
"grad_norm": 0.9461960196495056,
"learning_rate": 0.0001,
"loss": 0.1815,
"objective/entropy": 2218.6666666666665,
"step": 37,
"train/nll_loss_a": 0.1762543668349584,
"train/nll_loss_b": 0.18676617741584778,
"val/completion_length": 78.16666666666667,
"val/contain_eos_token": 0.9743589560190836,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307978868484,
"val/fraction_both_incorrect": 0.7307692368825277,
"val/fraction_correct": 0.1730769251783689,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.31174399455388385,
"val/fraction_latin_b": 0.3052766025066376,
"val/fraction_number_a": 0.3656782905260722,
"val/fraction_number_b": 0.37882108489672345,
"val/fraction_other_a": 0.32257768511772156,
"val/fraction_other_b": 0.3159022927284241,
"val/fraction_ties": 0.807692309220632,
"val/lang_prob_bg": 0.0016823052040611703,
"val/lang_prob_en": 0.682081917921702,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.006410256649057071,
"val/other_first_token": 0.0
},
{
"epoch": 0.3958333333333333,
"grad_norm": 0.7971638441085815,
"learning_rate": 0.0001,
"loss": 0.1558,
"objective/entropy": 1834.6666666666667,
"step": 38,
"train/nll_loss_a": 0.1687836398681005,
"train/nll_loss_b": 0.142802856862545,
"val/completion_length": 75.09615325927734,
"val/contain_eos_token": 0.9807692170143127,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.11538461844126384,
"val/fraction_both_incorrect": 0.7692307829856873,
"val/fraction_correct": 0.1730769251783689,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.3084398905436198,
"val/fraction_latin_b": 0.3190213441848755,
"val/fraction_number_a": 0.38394031922022503,
"val/fraction_number_b": 0.3600513239701589,
"val/fraction_other_a": 0.3076198200384776,
"val/fraction_other_b": 0.32092733184496564,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.0013936974961931508,
"val/lang_prob_en": 0.6955586870511373,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.40625,
"grad_norm": 0.5998182892799377,
"learning_rate": 0.0001,
"loss": 0.1452,
"objective/entropy": 928.0,
"step": 39,
"train/nll_loss_a": 0.14895252386728922,
"train/nll_loss_b": 0.14153108249107996,
"val/completion_length": 71.1602554321289,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359308679898,
"val/fraction_both_incorrect": 0.7051282127698263,
"val/fraction_correct": 0.19230769326289496,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.28946439425150555,
"val/fraction_latin_b": 0.287621130545934,
"val/fraction_number_a": 0.3734682301680247,
"val/fraction_number_b": 0.38121453921000165,
"val/fraction_other_a": 0.3370673954486847,
"val/fraction_other_b": 0.33116433024406433,
"val/fraction_ties": 0.7948718070983887,
"val/lang_prob_bg": 0.0014785424573346972,
"val/lang_prob_en": 0.6788028081258138,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.006410256649057071,
"val/other_first_token": 0.0
},
{
"epoch": 0.4166666666666667,
"grad_norm": 1.0486812591552734,
"learning_rate": 0.0001,
"loss": 0.1397,
"objective/entropy": 1610.6666666666667,
"step": 40,
"train/nll_loss_a": 0.124597763021787,
"train/nll_loss_b": 0.15475992610057196,
"val/completion_length": 66.44871775309245,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.03846153989434242,
"val/fraction_both_incorrect": 0.8846153815587362,
"val/fraction_correct": 0.07692307730515797,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.26715315381685895,
"val/fraction_latin_b": 0.267175629734993,
"val/fraction_number_a": 0.40364818771680194,
"val/fraction_number_b": 0.40295613805452984,
"val/fraction_other_a": 0.3291986882686615,
"val/fraction_other_b": 0.32986828684806824,
"val/fraction_ties": 0.9230769077936808,
"val/lang_prob_bg": 0.0016380803814778726,
"val/lang_prob_en": 0.6558632055918375,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.4270833333333333,
"grad_norm": 1.1442676782608032,
"learning_rate": 0.0001,
"loss": 0.1368,
"objective/entropy": 1390.6666666666667,
"step": 41,
"train/nll_loss_a": 0.13521244128545126,
"train/nll_loss_b": 0.13834577798843384,
"val/completion_length": 66.53845977783203,
"val/contain_eos_token": 0.9871794780095419,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.012820513298114141,
"val/fraction_both_incorrect": 0.8589743375778198,
"val/fraction_correct": 0.07692307916780312,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2713295817375183,
"val/fraction_latin_b": 0.2757815072933833,
"val/fraction_number_a": 0.39802590012550354,
"val/fraction_number_b": 0.3848887085914612,
"val/fraction_other_a": 0.33064452807108563,
"val/fraction_other_b": 0.3393297791481018,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.0014013544811556737,
"val/lang_prob_en": 0.6843119462331136,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.4375,
"grad_norm": 1.032313585281372,
"learning_rate": 0.0001,
"loss": 0.1505,
"objective/entropy": 1338.6666666666667,
"step": 42,
"train/nll_loss_a": 0.1546641836563746,
"train/nll_loss_b": 0.146413487692674,
"val/completion_length": 57.61538314819336,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256649057071,
"val/fraction_both_incorrect": 0.8333333333333334,
"val/fraction_correct": 0.11538461595773697,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2686810294787089,
"val/fraction_latin_b": 0.2704887390136719,
"val/fraction_number_a": 0.39721407492955524,
"val/fraction_number_b": 0.38931016127268475,
"val/fraction_other_a": 0.33410489559173584,
"val/fraction_other_b": 0.3402010997136434,
"val/fraction_ties": 0.8974359035491943,
"val/lang_prob_bg": 0.0015042958548292518,
"val/lang_prob_en": 0.6720715363820394,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.4479166666666667,
"grad_norm": 1.00838303565979,
"learning_rate": 0.0001,
"loss": 0.1867,
"objective/entropy": 1114.6666666666667,
"step": 43,
"train/nll_loss_a": 0.16402535637219748,
"train/nll_loss_b": 0.20946120719114938,
"val/completion_length": 57.775641123453774,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.12820513173937798,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2534715036551158,
"val/fraction_latin_b": 0.26074858009815216,
"val/fraction_number_a": 0.3934909800688426,
"val/fraction_number_b": 0.40743691722551983,
"val/fraction_other_a": 0.3530375460783641,
"val/fraction_other_b": 0.3318144778410594,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0016378250826771061,
"val/lang_prob_en": 0.6859935522079468,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.4583333333333333,
"grad_norm": 0.746932327747345,
"learning_rate": 0.0001,
"loss": 0.1126,
"objective/entropy": 926.0,
"step": 44,
"train/nll_loss_a": 0.12073729187250137,
"train/nll_loss_b": 0.10447523991266887,
"val/completion_length": 57.910256703694664,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410390138626,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.16666666915019354,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.24723881979783377,
"val/fraction_latin_b": 0.2573054035504659,
"val/fraction_number_a": 0.4117300808429718,
"val/fraction_number_b": 0.4151102900505066,
"val/fraction_other_a": 0.3410310943921407,
"val/fraction_other_b": 0.32758431633313495,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.0016183808135489623,
"val/lang_prob_en": 0.6595947543780009,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.46875,
"grad_norm": 0.751488208770752,
"learning_rate": 0.0001,
"loss": 0.1458,
"objective/entropy": 1064.0,
"step": 45,
"train/nll_loss_a": 0.1560243566830953,
"train/nll_loss_b": 0.13560334593057632,
"val/completion_length": 55.36538569132487,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7307692368825277,
"val/fraction_correct": 0.1858974372347196,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2510768473148346,
"val/fraction_latin_b": 0.23666884501775107,
"val/fraction_number_a": 0.39663148919741315,
"val/fraction_number_b": 0.4140782058238983,
"val/fraction_other_a": 0.35229164361953735,
"val/fraction_other_b": 0.34925296902656555,
"val/fraction_ties": 0.8333333333333334,
"val/lang_prob_bg": 0.0012542977929115295,
"val/lang_prob_en": 0.6892314950625101,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.4791666666666667,
"grad_norm": 0.7010864019393921,
"learning_rate": 0.0001,
"loss": 0.1338,
"objective/entropy": 997.3333333333334,
"step": 46,
"train/nll_loss_a": 0.14200725158055624,
"train/nll_loss_b": 0.1256332869331042,
"val/completion_length": 51.71153767903646,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.1666666679084301,
"val/fraction_both_incorrect": 0.7435897390047709,
"val/fraction_correct": 0.21153846631447473,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.24885622163613638,
"val/fraction_latin_b": 0.2301209419965744,
"val/fraction_number_a": 0.396872212489446,
"val/fraction_number_b": 0.4051181972026825,
"val/fraction_other_a": 0.3542715708414714,
"val/fraction_other_b": 0.3647608856360118,
"val/fraction_ties": 0.9102564056714376,
"val/lang_prob_bg": 0.0016821225096161168,
"val/lang_prob_en": 0.6777702768643697,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.4895833333333333,
"grad_norm": 1.0925281047821045,
"learning_rate": 0.0001,
"loss": 0.1205,
"objective/entropy": 881.3333333333334,
"step": 47,
"train/nll_loss_a": 0.12804403652747473,
"train/nll_loss_b": 0.11300961673259735,
"val/completion_length": 52.36538569132487,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307978868484,
"val/fraction_both_incorrect": 0.8333333333333334,
"val/fraction_correct": 0.12179487322767575,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.23128040631612143,
"val/fraction_latin_b": 0.2396069417397181,
"val/fraction_number_a": 0.4000318944454193,
"val/fraction_number_b": 0.40589800477027893,
"val/fraction_other_a": 0.3686876992384593,
"val/fraction_other_b": 0.3544950584570567,
"val/fraction_ties": 0.9102564056714376,
"val/lang_prob_bg": 0.0015037450551365812,
"val/lang_prob_en": 0.6730888287226359,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5,
"grad_norm": 1.2894670963287354,
"learning_rate": 0.0001,
"loss": 0.1487,
"objective/entropy": 714.6666666666666,
"step": 48,
"train/nll_loss_a": 0.139557013909022,
"train/nll_loss_b": 0.15790955225626627,
"val/completion_length": 51.801282246907554,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359184503555,
"val/fraction_both_incorrect": 0.8333333134651184,
"val/fraction_correct": 0.12820513049761453,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20845835904280344,
"val/fraction_latin_b": 0.2055131047964096,
"val/fraction_number_a": 0.428351491689682,
"val/fraction_number_b": 0.4272334774335225,
"val/fraction_other_a": 0.3631901641686757,
"val/fraction_other_b": 0.3672534426053365,
"val/fraction_ties": 0.9230769276618958,
"val/lang_prob_bg": 0.0014591465005651116,
"val/lang_prob_en": 0.6774142583211263,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5104166666666666,
"grad_norm": 1.2826536893844604,
"learning_rate": 0.0001,
"loss": 0.11,
"objective/entropy": 1096.6666666666667,
"step": 49,
"train/nll_loss_a": 0.09283561259508133,
"train/nll_loss_b": 0.1271024172504743,
"val/completion_length": 53.8012809753418,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.8461538354555765,
"val/fraction_correct": 0.10897436365485191,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2129476418097814,
"val/fraction_latin_b": 0.20714954535166422,
"val/fraction_number_a": 0.4297573169072469,
"val/fraction_number_b": 0.4259600241978963,
"val/fraction_other_a": 0.35729504625002545,
"val/fraction_other_b": 0.36689044038454693,
"val/fraction_ties": 0.9102564056714376,
"val/lang_prob_bg": 0.0016641345185538132,
"val/lang_prob_en": 0.6516953508059183,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5208333333333334,
"grad_norm": 0.9105807542800903,
"learning_rate": 0.0001,
"loss": 0.112,
"objective/entropy": 668.6666666666666,
"step": 50,
"train/nll_loss_a": 0.1011932243903478,
"train/nll_loss_b": 0.12279495596885681,
"val/completion_length": 52.903846740722656,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.012820513298114141,
"val/fraction_both_incorrect": 0.7179487347602844,
"val/fraction_correct": 0.14743590354919434,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2198729912439982,
"val/fraction_latin_b": 0.21549966434637705,
"val/fraction_number_a": 0.4184926648934682,
"val/fraction_number_b": 0.4257381657759349,
"val/fraction_other_a": 0.36163437366485596,
"val/fraction_other_b": 0.3587621847788493,
"val/fraction_ties": 0.7307692368825277,
"val/lang_prob_bg": 0.0013077266824742158,
"val/lang_prob_en": 0.6708633701006571,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.53125,
"grad_norm": 0.6333914399147034,
"learning_rate": 0.0001,
"loss": 0.0843,
"objective/entropy": 578.6666666666666,
"step": 51,
"train/nll_loss_a": 0.08048844834168752,
"train/nll_loss_b": 0.08813040951887767,
"val/completion_length": 50.442307790120445,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.12820513173937798,
"val/fraction_both_incorrect": 0.7179487148920695,
"val/fraction_correct": 0.20512820780277252,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1867583046356837,
"val/fraction_latin_b": 0.1920985778172811,
"val/fraction_number_a": 0.46239819129308063,
"val/fraction_number_b": 0.45383066932360333,
"val/fraction_other_a": 0.35084352890650433,
"val/fraction_other_b": 0.3540707727273305,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0013779281095291178,
"val/lang_prob_en": 0.6875834663709005,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5416666666666666,
"grad_norm": 0.597698450088501,
"learning_rate": 0.0001,
"loss": 0.0711,
"objective/entropy": 494.0,
"step": 52,
"train/nll_loss_a": 0.0706160341699918,
"train/nll_loss_b": 0.07159051423271497,
"val/completion_length": 52.0961545308431,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.1346153865257899,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19636401534080505,
"val/fraction_latin_b": 0.19669000804424286,
"val/fraction_number_a": 0.4432801107565562,
"val/fraction_number_b": 0.4444128175576528,
"val/fraction_other_a": 0.3603558838367462,
"val/fraction_other_b": 0.3588971694310506,
"val/fraction_ties": 0.8589743574460348,
"val/lang_prob_bg": 0.0014325374116500218,
"val/lang_prob_en": 0.6964165170987447,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5520833333333334,
"grad_norm": 0.9163941144943237,
"learning_rate": 0.0001,
"loss": 0.0759,
"objective/entropy": 582.6666666666666,
"step": 53,
"train/nll_loss_a": 0.061987257252136864,
"train/nll_loss_b": 0.0898251583178838,
"val/completion_length": 51.95512771606445,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.871794859568278,
"val/fraction_correct": 0.10256410390138626,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19930331905682883,
"val/fraction_latin_b": 0.20022966961065927,
"val/fraction_number_a": 0.4515662391980489,
"val/fraction_number_b": 0.4426102936267853,
"val/fraction_other_a": 0.34913045167922974,
"val/fraction_other_b": 0.35716002186139423,
"val/fraction_ties": 0.9487179319063822,
"val/lang_prob_bg": 0.0013443352266525228,
"val/lang_prob_en": 0.6959804097811381,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5625,
"grad_norm": 0.7437398433685303,
"learning_rate": 0.0001,
"loss": 0.0617,
"objective/entropy": 917.3333333333334,
"step": 54,
"train/nll_loss_a": 0.06697492549816768,
"train/nll_loss_b": 0.05643160889546076,
"val/completion_length": 54.410255432128906,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.807692309220632,
"val/fraction_correct": 0.12820513298114142,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1963134010632833,
"val/fraction_latin_b": 0.19755952060222626,
"val/fraction_number_a": 0.4544989267985026,
"val/fraction_number_b": 0.4627720316251119,
"val/fraction_other_a": 0.3491876721382141,
"val/fraction_other_b": 0.3396684726079305,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.001135329968140771,
"val/lang_prob_en": 0.6944870551427206,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.5729166666666666,
"grad_norm": 0.8625170588493347,
"learning_rate": 0.0001,
"loss": 0.0791,
"objective/entropy": 1185.3333333333333,
"step": 55,
"train/nll_loss_a": 0.0869336798787117,
"train/nll_loss_b": 0.0713660145799319,
"val/completion_length": 54.77564239501953,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.8333333333333334,
"val/fraction_correct": 0.1153846209247907,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19297984739144644,
"val/fraction_latin_b": 0.2026552508274714,
"val/fraction_number_a": 0.4540139436721802,
"val/fraction_number_b": 0.45077388485272724,
"val/fraction_other_a": 0.3530062139034271,
"val/fraction_other_b": 0.34657086928685504,
"val/fraction_ties": 0.8974358836809794,
"val/lang_prob_bg": 0.00151369022205472,
"val/lang_prob_en": 0.6880850593249003,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.006410256649057071,
"val/other_first_token": 0.0
},
{
"epoch": 0.5833333333333334,
"grad_norm": 1.0587146282196045,
"learning_rate": 0.0001,
"loss": 0.0684,
"objective/entropy": 1028.6666666666667,
"step": 56,
"train/nll_loss_a": 0.06075024977326393,
"train/nll_loss_b": 0.07601286098361015,
"val/completion_length": 53.96153895060221,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7820512851079305,
"val/fraction_correct": 0.1602564106384913,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18348072469234467,
"val/fraction_latin_b": 0.18514560659726462,
"val/fraction_number_a": 0.45656461517016095,
"val/fraction_number_b": 0.4575365384419759,
"val/fraction_other_a": 0.35995468497276306,
"val/fraction_other_b": 0.35731785496075946,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.00106729429292803,
"val/lang_prob_en": 0.6962061325709025,
"val/latin_first_token": 0.9935897390047709,
"val/number_first_token": 0.006410256649057071,
"val/other_first_token": 0.0
},
{
"epoch": 0.59375,
"grad_norm": 0.8600048422813416,
"learning_rate": 0.0001,
"loss": 0.1015,
"objective/entropy": 1142.0,
"step": 57,
"train/nll_loss_a": 0.10414389024178188,
"train/nll_loss_b": 0.09876606116692226,
"val/completion_length": 54.6025644938151,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307730515797,
"val/fraction_both_incorrect": 0.807692309220632,
"val/fraction_correct": 0.13461538776755333,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19213247299194336,
"val/fraction_latin_b": 0.18131321668624878,
"val/fraction_number_a": 0.44502533475557965,
"val/fraction_number_b": 0.45092181364695233,
"val/fraction_other_a": 0.36284218231836957,
"val/fraction_other_b": 0.36776500940322876,
"val/fraction_ties": 0.8846153616905212,
"val/lang_prob_bg": 0.001513439230620861,
"val/lang_prob_en": 0.702368974685669,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6041666666666666,
"grad_norm": 0.8079760074615479,
"learning_rate": 0.0001,
"loss": 0.0778,
"objective/entropy": 505.3333333333333,
"step": 58,
"train/nll_loss_a": 0.09139975905418396,
"train/nll_loss_b": 0.06421066199739774,
"val/completion_length": 46.98076883951823,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.14102564752101898,
"val/fraction_both_incorrect": 0.7051282127698263,
"val/fraction_correct": 0.2179487223426501,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20126528044541678,
"val/fraction_latin_b": 0.19763841231664023,
"val/fraction_number_a": 0.4304538468519847,
"val/fraction_number_b": 0.43739889065424603,
"val/fraction_other_a": 0.3682809074719747,
"val/fraction_other_b": 0.3649626870950063,
"val/fraction_ties": 0.8461538553237915,
"val/lang_prob_bg": 0.0011517573924114306,
"val/lang_prob_en": 0.709509551525116,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6145833333333334,
"grad_norm": 1.1331056356430054,
"learning_rate": 0.0001,
"loss": 0.128,
"objective/entropy": 633.3333333333334,
"step": 59,
"train/nll_loss_a": 0.1187543123960495,
"train/nll_loss_b": 0.1372635985414187,
"val/completion_length": 46.737178802490234,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359184503555,
"val/fraction_both_incorrect": 0.7307692170143127,
"val/fraction_correct": 0.1794871836900711,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1910944233338038,
"val/fraction_latin_b": 0.18430300056934357,
"val/fraction_number_a": 0.4224574863910675,
"val/fraction_number_b": 0.439374307791392,
"val/fraction_other_a": 0.38644809524218243,
"val/fraction_other_b": 0.3763226866722107,
"val/fraction_ties": 0.8205128113428751,
"val/lang_prob_bg": 0.0015292856842279434,
"val/lang_prob_en": 0.6743942896525065,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.625,
"grad_norm": 0.8633061647415161,
"learning_rate": 0.0001,
"loss": 0.0539,
"objective/entropy": 554.0,
"step": 60,
"train/nll_loss_a": 0.05499819417794546,
"train/nll_loss_b": 0.05279202883442243,
"val/completion_length": 44.160256703694664,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.8461538354555765,
"val/fraction_correct": 0.11538461844126384,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19793511927127838,
"val/fraction_latin_b": 0.19460705916086832,
"val/fraction_number_a": 0.4268949230511983,
"val/fraction_number_b": 0.4235563079516093,
"val/fraction_other_a": 0.375169962644577,
"val/fraction_other_b": 0.3818366428216298,
"val/fraction_ties": 0.9230769077936808,
"val/lang_prob_bg": 0.0016635601253559191,
"val/lang_prob_en": 0.6812194387118021,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6354166666666666,
"grad_norm": 0.8153233528137207,
"learning_rate": 0.0001,
"loss": 0.0809,
"objective/entropy": 630.6666666666666,
"step": 61,
"train/nll_loss_a": 0.09423964222272237,
"train/nll_loss_b": 0.06749718139568965,
"val/completion_length": 48.903846740722656,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.15384615709384283,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.19230769574642181,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18816453218460083,
"val/fraction_latin_b": 0.18853654464085898,
"val/fraction_number_a": 0.45722946524620056,
"val/fraction_number_b": 0.43430561820665997,
"val/fraction_other_a": 0.3546060423056285,
"val/fraction_other_b": 0.37715784708658856,
"val/fraction_ties": 0.9230769276618958,
"val/lang_prob_bg": 0.0015237585175782442,
"val/lang_prob_en": 0.6806376179059347,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6458333333333334,
"grad_norm": 1.104749321937561,
"learning_rate": 0.0001,
"loss": 0.0912,
"objective/entropy": 1110.0,
"step": 62,
"train/nll_loss_a": 0.09771117568016052,
"train/nll_loss_b": 0.08477205038070679,
"val/completion_length": 50.39743677775065,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.05128205195069313,
"val/fraction_both_incorrect": 0.807692289352417,
"val/fraction_correct": 0.1217948744694392,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19005567828814188,
"val/fraction_latin_b": 0.18684939543406168,
"val/fraction_number_a": 0.44661805033683777,
"val/fraction_number_b": 0.45144979159037274,
"val/fraction_other_a": 0.3633263309796651,
"val/fraction_other_b": 0.3617008129755656,
"val/fraction_ties": 0.8589743574460348,
"val/lang_prob_bg": 0.0012832956854254007,
"val/lang_prob_en": 0.690887967745463,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.65625,
"grad_norm": 1.0379236936569214,
"learning_rate": 0.0001,
"loss": 0.0724,
"objective/entropy": 648.0,
"step": 63,
"train/nll_loss_a": 0.06959323212504387,
"train/nll_loss_b": 0.07520159830649693,
"val/completion_length": 46.92948786417643,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.8846153815587362,
"val/fraction_correct": 0.08974359060327212,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19109234710534415,
"val/fraction_latin_b": 0.2014519323905309,
"val/fraction_number_a": 0.4389280279477437,
"val/fraction_number_b": 0.4367695450782776,
"val/fraction_other_a": 0.36997965971628827,
"val/fraction_other_b": 0.36177852749824524,
"val/fraction_ties": 0.9487179517745972,
"val/lang_prob_bg": 0.0012157799016373854,
"val/lang_prob_en": 0.6699715455373129,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.8731870651245117,
"learning_rate": 0.0001,
"loss": 0.0729,
"objective/entropy": 430.6666666666667,
"step": 64,
"train/nll_loss_a": 0.08839354167381923,
"train/nll_loss_b": 0.05736600855986277,
"val/completion_length": 46.35897445678711,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.05128205195069313,
"val/fraction_both_incorrect": 0.7435897390047709,
"val/fraction_correct": 0.1538461558520794,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18153652052084604,
"val/fraction_latin_b": 0.1694901337226232,
"val/fraction_number_a": 0.45514161388079327,
"val/fraction_number_b": 0.47041670481363934,
"val/fraction_other_a": 0.36332186063130695,
"val/fraction_other_b": 0.3600931664307912,
"val/fraction_ties": 0.7948717872301737,
"val/lang_prob_bg": 0.0013296857941895723,
"val/lang_prob_en": 0.6832193930943807,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6770833333333334,
"grad_norm": 0.8836628198623657,
"learning_rate": 0.0001,
"loss": 0.0866,
"objective/entropy": 508.0,
"step": 65,
"train/nll_loss_a": 0.0954609215259552,
"train/nll_loss_b": 0.07772823919852574,
"val/completion_length": 46.75640996297201,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256649057071,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.14743590106566748,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18177295724550882,
"val/fraction_latin_b": 0.18375508983929953,
"val/fraction_number_a": 0.44039592146873474,
"val/fraction_number_b": 0.45297469695409137,
"val/fraction_other_a": 0.377831111351649,
"val/fraction_other_b": 0.36327023307482403,
"val/fraction_ties": 0.8333333333333334,
"val/lang_prob_bg": 0.0013483318810661633,
"val/lang_prob_en": 0.6739258567492167,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6875,
"grad_norm": 0.6342687606811523,
"learning_rate": 0.0001,
"loss": 0.0428,
"objective/entropy": 676.0,
"step": 66,
"train/nll_loss_a": 0.03163577119509379,
"train/nll_loss_b": 0.053979563216368355,
"val/completion_length": 46.833333333333336,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410638491313,
"val/fraction_both_incorrect": 0.871794859568278,
"val/fraction_correct": 0.11538461844126384,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19603643318017325,
"val/fraction_latin_b": 0.18948069214820862,
"val/fraction_number_a": 0.4410245418548584,
"val/fraction_number_b": 0.4377235968907674,
"val/fraction_other_a": 0.3629390597343445,
"val/fraction_other_b": 0.3727957208951314,
"val/fraction_ties": 0.9743589560190836,
"val/lang_prob_bg": 0.0014910439883048336,
"val/lang_prob_en": 0.6833223501841227,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.6979166666666666,
"grad_norm": 1.594736099243164,
"learning_rate": 0.0001,
"loss": 0.106,
"objective/entropy": 673.3333333333334,
"step": 67,
"train/nll_loss_a": 0.12174060692389806,
"train/nll_loss_b": 0.0902355636159579,
"val/completion_length": 45.9038454691569,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.8333333333333334,
"val/fraction_correct": 0.10897436241308849,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18011977771917978,
"val/fraction_latin_b": 0.18526378770669302,
"val/fraction_number_a": 0.468562384446462,
"val/fraction_number_b": 0.4472152789433797,
"val/fraction_other_a": 0.3513178726037343,
"val/fraction_other_b": 0.367520938316981,
"val/fraction_ties": 0.8846153616905212,
"val/lang_prob_bg": 0.0013117061074202259,
"val/lang_prob_en": 0.6828027566274008,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.7083333333333334,
"grad_norm": 1.8821147680282593,
"learning_rate": 0.0001,
"loss": 0.0571,
"objective/entropy": 564.0,
"step": 68,
"train/nll_loss_a": 0.06897679592172305,
"train/nll_loss_b": 0.045296087861061096,
"val/completion_length": 46.057692209879555,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.8974358836809794,
"val/fraction_correct": 0.08333333643774192,
"val/fraction_cyrillic_a": 0.0002913753075214724,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18623560667037964,
"val/fraction_latin_b": 0.18657231827576956,
"val/fraction_number_a": 0.45339445273081463,
"val/fraction_number_b": 0.43990714351336163,
"val/fraction_other_a": 0.36007853349049884,
"val/fraction_other_b": 0.3735205630461375,
"val/fraction_ties": 0.9615384340286255,
"val/lang_prob_bg": 0.001663261791691184,
"val/lang_prob_en": 0.6788983543713888,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.71875,
"grad_norm": 1.5763386487960815,
"learning_rate": 0.0001,
"loss": 0.1605,
"objective/entropy": 600.6666666666666,
"step": 69,
"train/nll_loss_a": 0.13918370008468628,
"train/nll_loss_b": 0.18188542127609253,
"val/completion_length": 47.73076883951823,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.8846153815587362,
"val/fraction_correct": 0.08333333457509677,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18599570790926614,
"val/fraction_latin_b": 0.18386761844158173,
"val/fraction_number_a": 0.45702726642290753,
"val/fraction_number_b": 0.4447290301322937,
"val/fraction_other_a": 0.3569770057996114,
"val/fraction_other_b": 0.37140337626139325,
"val/fraction_ties": 0.935897429784139,
"val/lang_prob_bg": 0.0014010549833377202,
"val/lang_prob_en": 0.6976925929387411,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.7291666666666666,
"grad_norm": 0.9730682373046875,
"learning_rate": 0.0001,
"loss": 0.1056,
"objective/entropy": 539.3333333333334,
"step": 70,
"train/nll_loss_a": 0.10723391423622768,
"train/nll_loss_b": 0.10387907922267914,
"val/completion_length": 46.28846104939779,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.11538461844126384,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.17307692766189575,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18785375853379568,
"val/fraction_latin_b": 0.183917502562205,
"val/fraction_number_a": 0.44820621609687805,
"val/fraction_number_b": 0.4407848119735718,
"val/fraction_other_a": 0.3639400204022725,
"val/fraction_other_b": 0.3752976953983307,
"val/fraction_ties": 0.8846153616905212,
"val/lang_prob_bg": 0.001435416905830304,
"val/lang_prob_en": 0.6813340584437052,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.7395833333333334,
"grad_norm": 0.8169851899147034,
"learning_rate": 0.0001,
"loss": 0.0777,
"objective/entropy": 989.3333333333334,
"step": 71,
"train/nll_loss_a": 0.08104708914955457,
"train/nll_loss_b": 0.07427798646191756,
"val/completion_length": 50.17307662963867,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.807692289352417,
"val/fraction_correct": 0.14743590106566748,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18745056788126627,
"val/fraction_latin_b": 0.18821453551451364,
"val/fraction_number_a": 0.44776029388109845,
"val/fraction_number_b": 0.4434706171353658,
"val/fraction_other_a": 0.36478914817174274,
"val/fraction_other_b": 0.3683148721853892,
"val/fraction_ties": 0.9102564056714376,
"val/lang_prob_bg": 0.0018095905349279444,
"val/lang_prob_en": 0.6661532719930013,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.75,
"grad_norm": 0.9820713400840759,
"learning_rate": 0.0001,
"loss": 0.0655,
"objective/entropy": 783.3333333333334,
"step": 72,
"train/nll_loss_a": 0.05890080084403356,
"train/nll_loss_b": 0.07206882474323113,
"val/completion_length": 48.096153259277344,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359184503555,
"val/fraction_both_incorrect": 0.7051282127698263,
"val/fraction_correct": 0.19230769574642181,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18735684951146445,
"val/fraction_latin_b": 0.18214772641658783,
"val/fraction_number_a": 0.44764822721481323,
"val/fraction_number_b": 0.45329829057057697,
"val/fraction_other_a": 0.3649949332078298,
"val/fraction_other_b": 0.3645539879798889,
"val/fraction_ties": 0.7948717872301737,
"val/lang_prob_bg": 0.0014935457923760016,
"val/lang_prob_en": 0.677817722161611,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.7604166666666666,
"grad_norm": 1.0932166576385498,
"learning_rate": 0.0001,
"loss": 0.0759,
"objective/entropy": 480.6666666666667,
"step": 73,
"train/nll_loss_a": 0.0820641169945399,
"train/nll_loss_b": 0.06981213887532552,
"val/completion_length": 47.480770111083984,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.03846153989434242,
"val/fraction_both_incorrect": 0.8461538354555765,
"val/fraction_correct": 0.09615384787321091,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18352153400580087,
"val/fraction_latin_b": 0.18374391893545786,
"val/fraction_number_a": 0.46121812860171,
"val/fraction_number_b": 0.45421716570854187,
"val/fraction_other_a": 0.3552603522936503,
"val/fraction_other_b": 0.36203893025716144,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.0015151738189160824,
"val/lang_prob_en": 0.6664467255274454,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.7708333333333334,
"grad_norm": 0.7673856616020203,
"learning_rate": 0.0001,
"loss": 0.1001,
"objective/entropy": 689.3333333333334,
"step": 74,
"train/nll_loss_a": 0.09625528007745743,
"train/nll_loss_b": 0.10403718302647273,
"val/completion_length": 47.63461685180664,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.12820513049761453,
"val/fraction_both_incorrect": 0.6794871687889099,
"val/fraction_correct": 0.22435897588729858,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19491079449653625,
"val/fraction_latin_b": 0.2002050280570984,
"val/fraction_number_a": 0.4457240104675293,
"val/fraction_number_b": 0.4501633048057556,
"val/fraction_other_a": 0.3593652347723643,
"val/fraction_other_b": 0.3496316770712535,
"val/fraction_ties": 0.807692309220632,
"val/lang_prob_bg": 0.0012248660592983167,
"val/lang_prob_en": 0.6904502312342325,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.78125,
"grad_norm": 0.6796796917915344,
"learning_rate": 0.0001,
"loss": 0.0624,
"objective/entropy": 419.3333333333333,
"step": 75,
"train/nll_loss_a": 0.06967851271231969,
"train/nll_loss_b": 0.055133428424596786,
"val/completion_length": 56.21794764200846,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.025641026596228283,
"val/fraction_both_incorrect": 0.807692309220632,
"val/fraction_correct": 0.10897436241308849,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1962293783823649,
"val/fraction_latin_b": 0.1906676342089971,
"val/fraction_number_a": 0.45563430587450665,
"val/fraction_number_b": 0.45803216099739075,
"val/fraction_other_a": 0.3481363157431285,
"val/fraction_other_b": 0.3513002196947734,
"val/fraction_ties": 0.8333333333333334,
"val/lang_prob_bg": 0.0013489985916142662,
"val/lang_prob_en": 0.6775683760643005,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.7916666666666666,
"grad_norm": 0.6524468064308167,
"learning_rate": 0.0001,
"loss": 0.0818,
"objective/entropy": 445.3333333333333,
"step": 76,
"train/nll_loss_a": 0.08122942348321278,
"train/nll_loss_b": 0.08239901314179103,
"val/completion_length": 51.17948786417643,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.6794871886571249,
"val/fraction_correct": 0.1987179567416509,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18705879151821136,
"val/fraction_latin_b": 0.18814867238203684,
"val/fraction_number_a": 0.4660409986972809,
"val/fraction_number_b": 0.44541672865549725,
"val/fraction_other_a": 0.34690022468566895,
"val/fraction_other_b": 0.36643461386362713,
"val/fraction_ties": 0.7564102609952291,
"val/lang_prob_bg": 0.0012404399070267875,
"val/lang_prob_en": 0.6947490374247233,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8020833333333334,
"grad_norm": 1.0618677139282227,
"learning_rate": 0.0001,
"loss": 0.0775,
"objective/entropy": 617.3333333333334,
"step": 77,
"train/nll_loss_a": 0.065880270053943,
"train/nll_loss_b": 0.08919013664126396,
"val/completion_length": 53.089744567871094,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.051282053192456566,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.12820513298114142,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20119517048199972,
"val/fraction_latin_b": 0.21087687214215597,
"val/fraction_number_a": 0.4479774336020152,
"val/fraction_number_b": 0.43641577164332074,
"val/fraction_other_a": 0.3508274257183075,
"val/fraction_other_b": 0.3527073661486308,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.001091090574239691,
"val/lang_prob_en": 0.6832720836003622,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8125,
"grad_norm": 0.8138633966445923,
"learning_rate": 0.0001,
"loss": 0.0646,
"objective/entropy": 952.0,
"step": 78,
"train/nll_loss_a": 0.07945199559132259,
"train/nll_loss_b": 0.04983629286289215,
"val/completion_length": 59.54487228393555,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256649057071,
"val/fraction_both_incorrect": 0.7948718070983887,
"val/fraction_correct": 0.13461538900931677,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19928557177384695,
"val/fraction_latin_b": 0.19274388253688812,
"val/fraction_number_a": 0.4533061484495799,
"val/fraction_number_b": 0.44338251153628033,
"val/fraction_other_a": 0.347408264875412,
"val/fraction_other_b": 0.3638736108938853,
"val/fraction_ties": 0.8589743574460348,
"val/lang_prob_bg": 0.0009999903462206323,
"val/lang_prob_en": 0.7025496959686279,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8229166666666666,
"grad_norm": 1.2982169389724731,
"learning_rate": 0.0001,
"loss": 0.1331,
"objective/entropy": 958.6666666666666,
"step": 79,
"train/nll_loss_a": 0.08611624377469222,
"train/nll_loss_b": 0.18007302532593408,
"val/completion_length": 59.903846740722656,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.807692289352417,
"val/fraction_correct": 0.12820513173937798,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19513183335463205,
"val/fraction_latin_b": 0.19802862405776978,
"val/fraction_number_a": 0.4579313596089681,
"val/fraction_number_b": 0.4683246115843455,
"val/fraction_other_a": 0.34693684180577594,
"val/fraction_other_b": 0.3336467544237773,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.0009022102652428051,
"val/lang_prob_en": 0.6803127328554789,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.6850357055664062,
"learning_rate": 0.0001,
"loss": 0.0623,
"objective/entropy": 588.6666666666666,
"step": 80,
"train/nll_loss_a": 0.065490427116553,
"train/nll_loss_b": 0.05901301031311353,
"val/completion_length": 59.04487228393555,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.1153846171995004,
"val/fraction_both_incorrect": 0.7307692368825277,
"val/fraction_correct": 0.19230769326289496,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1877680371205012,
"val/fraction_latin_b": 0.2046613891919454,
"val/fraction_number_a": 0.4560255507628123,
"val/fraction_number_b": 0.44008644421895343,
"val/fraction_other_a": 0.35620641708374023,
"val/fraction_other_b": 0.3552521864573161,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0010920044733211398,
"val/lang_prob_en": 0.700651208559672,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.84375,
"grad_norm": 0.6132178902626038,
"learning_rate": 0.0001,
"loss": 0.0744,
"objective/entropy": 588.0,
"step": 81,
"train/nll_loss_a": 0.06263614570101102,
"train/nll_loss_b": 0.08618492384751637,
"val/completion_length": 59.333334604899086,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.807692289352417,
"val/fraction_correct": 0.13461538900931677,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20071592926979065,
"val/fraction_latin_b": 0.20454128086566925,
"val/fraction_number_a": 0.47261403997739154,
"val/fraction_number_b": 0.44823821385701496,
"val/fraction_other_a": 0.3266700307528178,
"val/fraction_other_b": 0.3472205400466919,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.0008189170233284434,
"val/lang_prob_en": 0.694216271241506,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8541666666666666,
"grad_norm": 0.8821066617965698,
"learning_rate": 0.0001,
"loss": 0.1014,
"objective/entropy": 677.3333333333334,
"step": 82,
"train/nll_loss_a": 0.10356273377935092,
"train/nll_loss_b": 0.0993096400052309,
"val/completion_length": 59.61538441975912,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.7692307631174723,
"val/fraction_correct": 0.1602564131220182,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20315592487653097,
"val/fraction_latin_b": 0.19276542464892069,
"val/fraction_number_a": 0.43547679980595905,
"val/fraction_number_b": 0.44143152236938477,
"val/fraction_other_a": 0.36136728525161743,
"val/fraction_other_b": 0.3658030529816945,
"val/fraction_ties": 0.8589743574460348,
"val/lang_prob_bg": 0.0010875378696558375,
"val/lang_prob_en": 0.6829221844673157,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8645833333333334,
"grad_norm": 0.6248305439949036,
"learning_rate": 0.0001,
"loss": 0.0693,
"objective/entropy": 1244.0,
"step": 83,
"train/nll_loss_a": 0.0709990132600069,
"train/nll_loss_b": 0.06762294905881087,
"val/completion_length": 62.92307790120443,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.1538461595773697,
"val/fraction_both_incorrect": 0.7820512851079305,
"val/fraction_correct": 0.18589743971824646,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1916692852973938,
"val/fraction_latin_b": 0.2029605656862259,
"val/fraction_number_a": 0.4679671327273051,
"val/fraction_number_b": 0.4397442440191905,
"val/fraction_other_a": 0.3403635819753011,
"val/fraction_other_b": 0.3572951853275299,
"val/fraction_ties": 0.935897429784139,
"val/lang_prob_bg": 0.0010764972733644147,
"val/lang_prob_en": 0.7133564352989197,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.875,
"grad_norm": 0.7387636303901672,
"learning_rate": 0.0001,
"loss": 0.0642,
"objective/entropy": 744.0,
"step": 84,
"train/nll_loss_a": 0.06808524702986081,
"train/nll_loss_b": 0.06040983274579048,
"val/completion_length": 62.487178802490234,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.05128205195069313,
"val/fraction_both_incorrect": 0.8205128312110901,
"val/fraction_correct": 0.1153846209247907,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19735526541868845,
"val/fraction_latin_b": 0.19533702731132507,
"val/fraction_number_a": 0.4576469858487447,
"val/fraction_number_b": 0.4456251660982768,
"val/fraction_other_a": 0.3449977735678355,
"val/fraction_other_b": 0.3590378165245056,
"val/fraction_ties": 0.871794859568278,
"val/lang_prob_bg": 0.0012146817559065919,
"val/lang_prob_en": 0.6715325911839803,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8854166666666666,
"grad_norm": 0.6266259551048279,
"learning_rate": 0.0001,
"loss": 0.0674,
"objective/entropy": 1516.0,
"step": 85,
"train/nll_loss_a": 0.06911032895247142,
"train/nll_loss_b": 0.06564381966988246,
"val/completion_length": 62.846153259277344,
"val/contain_eos_token": 0.9935897390047709,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08974359060327212,
"val/fraction_both_incorrect": 0.7564102609952291,
"val/fraction_correct": 0.1666666716337204,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19015646974245706,
"val/fraction_latin_b": 0.18910319606463113,
"val/fraction_number_a": 0.4569472173849742,
"val/fraction_number_b": 0.4444814920425415,
"val/fraction_other_a": 0.3528963228066762,
"val/fraction_other_b": 0.3664153416951497,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0013917646914099653,
"val/lang_prob_en": 0.6793488065401713,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.8958333333333334,
"grad_norm": 0.5713610649108887,
"learning_rate": 0.0001,
"loss": 0.0663,
"objective/entropy": 509.3333333333333,
"step": 86,
"train/nll_loss_a": 0.06172050287326177,
"train/nll_loss_b": 0.0708620510995388,
"val/completion_length": 57.46794891357422,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7820512652397156,
"val/fraction_correct": 0.1602564180890719,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20560947060585022,
"val/fraction_latin_b": 0.2044855753580729,
"val/fraction_number_a": 0.45070673028628033,
"val/fraction_number_b": 0.43896862864494324,
"val/fraction_other_a": 0.3436838189760844,
"val/fraction_other_b": 0.3565457959969838,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.0011454424432789285,
"val/lang_prob_en": 0.6845836440722147,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.90625,
"grad_norm": 0.6155418753623962,
"learning_rate": 0.0001,
"loss": 0.0632,
"objective/entropy": 746.6666666666666,
"step": 87,
"train/nll_loss_a": 0.05684895565112432,
"train/nll_loss_b": 0.06955469151337941,
"val/completion_length": 61.166666666666664,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.1346153865257899,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1965359846750895,
"val/fraction_latin_b": 0.19205531477928162,
"val/fraction_number_a": 0.4567938546339671,
"val/fraction_number_b": 0.45538153251012164,
"val/fraction_other_a": 0.3466701805591583,
"val/fraction_other_b": 0.35256315271059674,
"val/fraction_ties": 0.8589743574460348,
"val/lang_prob_bg": 0.0013497412437573075,
"val/lang_prob_en": 0.6935842831929525,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9166666666666666,
"grad_norm": 0.704394519329071,
"learning_rate": 0.0001,
"loss": 0.0757,
"objective/entropy": 753.3333333333334,
"step": 88,
"train/nll_loss_a": 0.07899581392606099,
"train/nll_loss_b": 0.07242523382107417,
"val/completion_length": 60.48718007405599,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.06410256524880727,
"val/fraction_both_incorrect": 0.8205128113428751,
"val/fraction_correct": 0.1217948744694392,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.2038972576459249,
"val/fraction_latin_b": 0.19931572178999582,
"val/fraction_number_a": 0.4489727218945821,
"val/fraction_number_b": 0.4421346386273702,
"val/fraction_other_a": 0.347130020459493,
"val/fraction_other_b": 0.3585496445496877,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.001280973704221348,
"val/lang_prob_en": 0.6853939096132914,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9270833333333334,
"grad_norm": 0.6524101495742798,
"learning_rate": 0.0001,
"loss": 0.0574,
"objective/entropy": 667.3333333333334,
"step": 89,
"train/nll_loss_a": 0.051913000643253326,
"train/nll_loss_b": 0.06291309744119644,
"val/completion_length": 57.307692209879555,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307978868484,
"val/fraction_both_incorrect": 0.7307692368825277,
"val/fraction_correct": 0.17307692766189575,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19886599977811178,
"val/fraction_latin_b": 0.20171213150024414,
"val/fraction_number_a": 0.4476595123608907,
"val/fraction_number_b": 0.45179522037506104,
"val/fraction_other_a": 0.3534744878609975,
"val/fraction_other_b": 0.3464926779270172,
"val/fraction_ties": 0.807692309220632,
"val/lang_prob_bg": 0.001182676952642699,
"val/lang_prob_en": 0.6873807509740194,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9375,
"grad_norm": 0.5485818386077881,
"learning_rate": 0.0001,
"loss": 0.0613,
"objective/entropy": 547.3333333333334,
"step": 90,
"train/nll_loss_a": 0.06135800232489904,
"train/nll_loss_b": 0.06123900165160497,
"val/completion_length": 54.20512898763021,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.14102564379572868,
"val/fraction_both_incorrect": 0.6794871687889099,
"val/fraction_correct": 0.23076922943194708,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1838169644276301,
"val/fraction_latin_b": 0.18703489502271017,
"val/fraction_number_a": 0.46563466389973956,
"val/fraction_number_b": 0.446638544400533,
"val/fraction_other_a": 0.3505483865737915,
"val/fraction_other_b": 0.3663265605767568,
"val/fraction_ties": 0.8205128113428751,
"val/lang_prob_bg": 0.0013577812739337485,
"val/lang_prob_en": 0.6915837923685709,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9479166666666666,
"grad_norm": 0.5063994526863098,
"learning_rate": 0.0001,
"loss": 0.0639,
"objective/entropy": 719.3333333333334,
"step": 91,
"train/nll_loss_a": 0.07966503500938416,
"train/nll_loss_b": 0.04821213148534298,
"val/completion_length": 55.025641123453774,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.11538461968302727,
"val/fraction_both_incorrect": 0.7307692368825277,
"val/fraction_correct": 0.19230770071347555,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1899570773045222,
"val/fraction_latin_b": 0.1830365608135859,
"val/fraction_number_a": 0.45852789282798767,
"val/fraction_number_b": 0.46170011162757874,
"val/fraction_other_a": 0.3515150249004364,
"val/fraction_other_b": 0.3552633424599965,
"val/fraction_ties": 0.8461538354555765,
"val/lang_prob_bg": 0.0010812885011546314,
"val/lang_prob_en": 0.7102627555529276,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9583333333333334,
"grad_norm": 0.6831931471824646,
"learning_rate": 0.0001,
"loss": 0.0584,
"objective/entropy": 514.0,
"step": 92,
"train/nll_loss_a": 0.06738898778955142,
"train/nll_loss_b": 0.04947723634541035,
"val/completion_length": 50.307692209879555,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.07692307854692142,
"val/fraction_both_incorrect": 0.807692309220632,
"val/fraction_correct": 0.13461538900931677,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.19622700413068137,
"val/fraction_latin_b": 0.18972766399383545,
"val/fraction_number_a": 0.434969961643219,
"val/fraction_number_b": 0.4513050417105357,
"val/fraction_other_a": 0.36880303422609967,
"val/fraction_other_b": 0.35896732409795123,
"val/fraction_ties": 0.8846153616905212,
"val/lang_prob_bg": 0.0014005635942642887,
"val/lang_prob_en": 0.6858188907305399,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.96875,
"grad_norm": 0.5057593584060669,
"learning_rate": 0.0001,
"loss": 0.0754,
"objective/entropy": 482.6666666666667,
"step": 93,
"train/nll_loss_a": 0.08236912513772647,
"train/nll_loss_b": 0.06851410741607349,
"val/completion_length": 47.5,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.15384615709384283,
"val/fraction_both_incorrect": 0.7564102609952291,
"val/fraction_correct": 0.19871795177459717,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18862396478652954,
"val/fraction_latin_b": 0.19087292750676474,
"val/fraction_number_a": 0.4471252163251241,
"val/fraction_number_b": 0.4374854067961375,
"val/fraction_other_a": 0.3642508288224538,
"val/fraction_other_b": 0.37164167563120526,
"val/fraction_ties": 0.9102563858032227,
"val/lang_prob_bg": 0.0012994079540173213,
"val/lang_prob_en": 0.6864928205808004,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9791666666666666,
"grad_norm": 0.6987188458442688,
"learning_rate": 0.0001,
"loss": 0.0556,
"objective/entropy": 797.3333333333334,
"step": 94,
"train/nll_loss_a": 0.05544371157884598,
"train/nll_loss_b": 0.055833750714858375,
"val/completion_length": 53.6025644938151,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7948717872301737,
"val/fraction_correct": 0.1538461595773697,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.1824193944533666,
"val/fraction_latin_b": 0.18101008733113608,
"val/fraction_number_a": 0.4529682397842407,
"val/fraction_number_b": 0.4553934931755066,
"val/fraction_other_a": 0.36461236079533893,
"val/fraction_other_b": 0.3635964592297872,
"val/fraction_ties": 0.8974358836809794,
"val/lang_prob_bg": 0.001927042962051928,
"val/lang_prob_en": 0.6548982262611389,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 0.9895833333333334,
"grad_norm": 0.5544689893722534,
"learning_rate": 0.0001,
"loss": 0.0758,
"objective/entropy": 487.3333333333333,
"step": 95,
"train/nll_loss_a": 0.06908356895049413,
"train/nll_loss_b": 0.08260532716910045,
"val/completion_length": 46.96794764200846,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.10256410514314969,
"val/fraction_both_incorrect": 0.7820512851079305,
"val/fraction_correct": 0.1602564131220182,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.18986154099305472,
"val/fraction_latin_b": 0.18999535342057547,
"val/fraction_number_a": 0.4548261861006419,
"val/fraction_number_b": 0.4395192861557007,
"val/fraction_other_a": 0.3553122878074646,
"val/fraction_other_b": 0.3704853653907776,
"val/fraction_ties": 0.8846153815587362,
"val/lang_prob_bg": 0.0012377959792502224,
"val/lang_prob_en": 0.6733607252438863,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
},
{
"epoch": 1.0,
"grad_norm": 0.783270537853241,
"learning_rate": 0.0001,
"loss": 0.0509,
"objective/entropy": 242.66666666666666,
"step": 96,
"train/nll_loss_a": 0.04177509993314743,
"train/nll_loss_b": 0.06005720297495524,
"val/completion_length": 43.97843805948893,
"val/contain_eos_token": 1.0,
"val/contains_guillemets": 0.0,
"val/cyrillic_first_token": 0.0,
"val/empty_batch": 0.0,
"val/fraction_both_correct": 0.08158508439858754,
"val/fraction_both_incorrect": 0.7424242496490479,
"val/fraction_correct": 0.16958042482535043,
"val/fraction_cyrillic_a": 0.0,
"val/fraction_cyrillic_b": 0.0,
"val/fraction_latin_a": 0.20345724125703177,
"val/fraction_latin_b": 0.212227334578832,
"val/fraction_number_a": 0.4268727699915568,
"val/fraction_number_b": 0.40884650746981305,
"val/fraction_other_a": 0.36967000365257263,
"val/fraction_other_b": 0.3789261778195699,
"val/fraction_ties": 0.8240093191464742,
"val/lang_prob_bg": 0.001514516188763082,
"val/lang_prob_en": 0.6803627212842306,
"val/latin_first_token": 1.0,
"val/number_first_token": 0.0,
"val/other_first_token": 0.0
}
],
"logging_steps": 1,
"max_steps": 96,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 26,
"trial_name": null,
"trial_params": null
}