|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 96, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 0.46898284554481506, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4984, |
|
"objective/entropy": 1536.0, |
|
"step": 1, |
|
"train/nll_loss_a": 0.46235302090644836, |
|
"train/nll_loss_b": 0.5343712766965231, |
|
"val/completion_length": 141.19872029622397, |
|
"val/contain_eos_token": 0.9294871687889099, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.35256410638491315, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.628205140431722, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.09817602237065633, |
|
"val/fraction_cyrillic_b": 0.07149943461020787, |
|
"val/fraction_latin_a": 0.48449812332789105, |
|
"val/fraction_latin_b": 0.5023942093054453, |
|
"val/fraction_number_a": 0.18907449146111807, |
|
"val/fraction_number_b": 0.19445918997128805, |
|
"val/fraction_other_a": 0.22825137277444205, |
|
"val/fraction_other_b": 0.2316471884648005, |
|
"val/fraction_ties": 0.6666666666666666, |
|
"val/lang_prob_bg": 0.0268978967020909, |
|
"val/lang_prob_en": 0.6749410231908163, |
|
"val/latin_first_token": 0.6474358836809794, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 0.49648183584213257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4431, |
|
"objective/entropy": 1424.0, |
|
"step": 2, |
|
"train/nll_loss_a": 0.4012756248315175, |
|
"train/nll_loss_b": 0.4849816660086314, |
|
"val/completion_length": 138.66666666666666, |
|
"val/contain_eos_token": 0.9102564056714376, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.32692308227221173, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0820654605825742, |
|
"val/fraction_cyrillic_b": 0.06807015091180801, |
|
"val/fraction_latin_a": 0.46912774443626404, |
|
"val/fraction_latin_b": 0.4876726269721985, |
|
"val/fraction_number_a": 0.21081160008907318, |
|
"val/fraction_number_b": 0.2032010406255722, |
|
"val/fraction_other_a": 0.2379952073097229, |
|
"val/fraction_other_b": 0.24105618397394815, |
|
"val/fraction_ties": 0.7820512851079305, |
|
"val/lang_prob_bg": 0.03282865695655346, |
|
"val/lang_prob_en": 0.6723186572392782, |
|
"val/latin_first_token": 0.6666666666666666, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.006410256649057071 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 0.6362881660461426, |
|
"learning_rate": 0.0001, |
|
"loss": 0.505, |
|
"objective/entropy": 1469.3333333333333, |
|
"step": 3, |
|
"train/nll_loss_a": 0.41949082414309186, |
|
"train/nll_loss_b": 0.5905094941457113, |
|
"val/completion_length": 153.07691955566406, |
|
"val/contain_eos_token": 0.878205140431722, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.3076923092206319, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.09064680337905884, |
|
"val/fraction_cyrillic_b": 0.07422023018201192, |
|
"val/fraction_latin_a": 0.4563806454340617, |
|
"val/fraction_latin_b": 0.4618365466594696, |
|
"val/fraction_number_a": 0.21234740813573202, |
|
"val/fraction_number_b": 0.21217785278956094, |
|
"val/fraction_other_a": 0.24062515298525491, |
|
"val/fraction_other_b": 0.2517653902371724, |
|
"val/fraction_ties": 0.8205128312110901, |
|
"val/lang_prob_bg": 0.03160186484456062, |
|
"val/lang_prob_en": 0.6696631709734598, |
|
"val/latin_first_token": 0.6923076709111532, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 0.5365626215934753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3568, |
|
"objective/entropy": 1538.6666666666667, |
|
"step": 4, |
|
"train/nll_loss_a": 0.3584041992823283, |
|
"train/nll_loss_b": 0.3552741805712382, |
|
"val/completion_length": 139.39102172851562, |
|
"val/contain_eos_token": 0.9230769077936808, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.21794872482617697, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.1602564131220182, |
|
"val/fraction_cyrillic_a": 0.06686830148100853, |
|
"val/fraction_cyrillic_b": 0.040630811204512916, |
|
"val/fraction_latin_a": 0.48262248436609906, |
|
"val/fraction_latin_b": 0.4933239420255025, |
|
"val/fraction_number_a": 0.20719597240289053, |
|
"val/fraction_number_b": 0.22095757722854614, |
|
"val/fraction_other_a": 0.2433132529258728, |
|
"val/fraction_other_b": 0.24508768320083618, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.02131816806892554, |
|
"val/lang_prob_en": 0.6912566820780436, |
|
"val/latin_first_token": 0.7820512851079305, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"grad_norm": 0.6317083835601807, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3542, |
|
"objective/entropy": 1616.0, |
|
"step": 5, |
|
"train/nll_loss_a": 0.3730636735757192, |
|
"train/nll_loss_b": 0.33537689844767254, |
|
"val/completion_length": 141.45512898763022, |
|
"val/contain_eos_token": 0.9230769276618958, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.03846153927346071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7692307829856873, |
|
"val/fraction_correct": 0.16666666915019354, |
|
"val/fraction_cyrillic_a": 0.018022120309372742, |
|
"val/fraction_cyrillic_b": 0.008442190863812963, |
|
"val/fraction_latin_a": 0.530072808265686, |
|
"val/fraction_latin_b": 0.5371540983517965, |
|
"val/fraction_number_a": 0.20419377585252127, |
|
"val/fraction_number_b": 0.19861711064974466, |
|
"val/fraction_other_a": 0.2477113058169683, |
|
"val/fraction_other_b": 0.25578661759694415, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.005829914240166545, |
|
"val/lang_prob_en": 0.6994746724764506, |
|
"val/latin_first_token": 0.9615384538968405, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 0.6112334728240967, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3526, |
|
"objective/entropy": 1450.6666666666667, |
|
"step": 6, |
|
"train/nll_loss_a": 0.3616310755411784, |
|
"train/nll_loss_b": 0.3435203830401103, |
|
"val/completion_length": 139.3397420247396, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.012820513298114141, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7564102411270142, |
|
"val/fraction_correct": 0.1602564131220182, |
|
"val/fraction_cyrillic_a": 0.005665303532926676, |
|
"val/fraction_cyrillic_b": 0.0012210012258340914, |
|
"val/fraction_latin_a": 0.5337207714716593, |
|
"val/fraction_latin_b": 0.5389339327812195, |
|
"val/fraction_number_a": 0.20246068636576334, |
|
"val/fraction_number_b": 0.20605232814947763, |
|
"val/fraction_other_a": 0.2581532299518585, |
|
"val/fraction_other_b": 0.25379273295402527, |
|
"val/fraction_ties": 0.8333333134651184, |
|
"val/lang_prob_bg": 0.0024220591488604746, |
|
"val/lang_prob_en": 0.716150164604187, |
|
"val/latin_first_token": 0.9871794780095419, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.07291666666666667, |
|
"grad_norm": 0.5834570527076721, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3122, |
|
"objective/entropy": 1482.6666666666667, |
|
"step": 7, |
|
"train/nll_loss_a": 0.31628555059432983, |
|
"train/nll_loss_b": 0.3082062304019928, |
|
"val/completion_length": 140.28205362955728, |
|
"val/contain_eos_token": 0.935897429784139, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.012820513298114141, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.004876403972351302, |
|
"val/fraction_cyrillic_b": 0.0055291604561110335, |
|
"val/fraction_latin_a": 0.51544189453125, |
|
"val/fraction_latin_b": 0.5175811052322388, |
|
"val/fraction_number_a": 0.21839049458503723, |
|
"val/fraction_number_b": 0.21114349365234375, |
|
"val/fraction_other_a": 0.2612912356853485, |
|
"val/fraction_other_b": 0.26574622591336566, |
|
"val/fraction_ties": 0.8461538553237915, |
|
"val/lang_prob_bg": 0.0024805181116486588, |
|
"val/lang_prob_en": 0.7189218997955322, |
|
"val/latin_first_token": 0.9871794780095419, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.5234330296516418, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3689, |
|
"objective/entropy": 1560.0, |
|
"step": 8, |
|
"train/nll_loss_a": 0.38403966029485065, |
|
"train/nll_loss_b": 0.353829691807429, |
|
"val/completion_length": 147.18589782714844, |
|
"val/contain_eos_token": 0.8910256226857504, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.032051283245285354, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513173937798, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.22435897588729858, |
|
"val/fraction_cyrillic_a": 0.005463951422522466, |
|
"val/fraction_cyrillic_b": 0.00364190728093187, |
|
"val/fraction_latin_a": 0.519624650478363, |
|
"val/fraction_latin_b": 0.5310182571411133, |
|
"val/fraction_number_a": 0.22126641869544983, |
|
"val/fraction_number_b": 0.20790701607863107, |
|
"val/fraction_other_a": 0.2536449631055196, |
|
"val/fraction_other_b": 0.25743279854456586, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0022993393164748945, |
|
"val/lang_prob_en": 0.7228630383809408, |
|
"val/latin_first_token": 0.9679486950238546, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 0.46253740787506104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3548, |
|
"objective/entropy": 1626.6666666666667, |
|
"step": 9, |
|
"train/nll_loss_a": 0.353506733973821, |
|
"train/nll_loss_b": 0.3560173710187276, |
|
"val/completion_length": 141.10897318522134, |
|
"val/contain_eos_token": 0.8974359035491943, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.03846153927346071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.17948718120654425, |
|
"val/fraction_both_incorrect": 0.6666666865348816, |
|
"val/fraction_correct": 0.25641026099522907, |
|
"val/fraction_cyrillic_a": 0.009551782781879107, |
|
"val/fraction_cyrillic_b": 0.007778597995638847, |
|
"val/fraction_latin_a": 0.5005057454109192, |
|
"val/fraction_latin_b": 0.5112853447596232, |
|
"val/fraction_number_a": 0.2224580099185308, |
|
"val/fraction_number_b": 0.2299031764268875, |
|
"val/fraction_other_a": 0.26748446623484295, |
|
"val/fraction_other_b": 0.25103287398815155, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.003838104816774527, |
|
"val/lang_prob_en": 0.7203066547711691, |
|
"val/latin_first_token": 0.9615384538968405, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 0.6793639063835144, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4072, |
|
"objective/entropy": 1544.0, |
|
"step": 10, |
|
"train/nll_loss_a": 0.40951302647590637, |
|
"train/nll_loss_b": 0.4048899710178375, |
|
"val/completion_length": 139.59615580240884, |
|
"val/contain_eos_token": 0.9230769276618958, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.044871795922517776, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7564102609952291, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.01814807563399275, |
|
"val/fraction_cyrillic_b": 0.01091132735988746, |
|
"val/fraction_latin_a": 0.479096124569575, |
|
"val/fraction_latin_b": 0.48450469970703125, |
|
"val/fraction_number_a": 0.24191749095916748, |
|
"val/fraction_number_b": 0.23329021533330283, |
|
"val/fraction_other_a": 0.2608383099238078, |
|
"val/fraction_other_b": 0.2712937593460083, |
|
"val/fraction_ties": 0.8589743375778198, |
|
"val/lang_prob_bg": 0.005877171643078327, |
|
"val/lang_prob_en": 0.6905626058578491, |
|
"val/latin_first_token": 0.9551281929016113, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.11458333333333333, |
|
"grad_norm": 0.6676069498062134, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3261, |
|
"objective/entropy": 1296.0, |
|
"step": 11, |
|
"train/nll_loss_a": 0.3414422770341237, |
|
"train/nll_loss_b": 0.31081566711266834, |
|
"val/completion_length": 133.35897318522134, |
|
"val/contain_eos_token": 0.9615384538968405, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.012820513298114141, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.15384616081913313, |
|
"val/fraction_cyrillic_a": 0.009972451565166315, |
|
"val/fraction_cyrillic_b": 0.008257918680707613, |
|
"val/fraction_latin_a": 0.4708147446314494, |
|
"val/fraction_latin_b": 0.4841614067554474, |
|
"val/fraction_number_a": 0.2511301040649414, |
|
"val/fraction_number_b": 0.23997685313224792, |
|
"val/fraction_other_a": 0.26808270812034607, |
|
"val/fraction_other_b": 0.2676038245360057, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.004965859581716359, |
|
"val/lang_prob_en": 0.7087472081184387, |
|
"val/latin_first_token": 0.9871794780095419, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.48878854513168335, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3202, |
|
"objective/entropy": 1520.0, |
|
"step": 12, |
|
"train/nll_loss_a": 0.3013697862625122, |
|
"train/nll_loss_b": 0.33911073207855225, |
|
"val/completion_length": 125.4551289876302, |
|
"val/contain_eos_token": 0.9551282127698263, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.01923076994717121, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.6410256425539652, |
|
"val/fraction_correct": 0.21794872482617697, |
|
"val/fraction_cyrillic_a": 0.0016106078983284533, |
|
"val/fraction_cyrillic_b": 0.00164324635018905, |
|
"val/fraction_latin_a": 0.4850431780020396, |
|
"val/fraction_latin_b": 0.49556367595990497, |
|
"val/fraction_number_a": 0.24694832662741342, |
|
"val/fraction_number_b": 0.24452554682890573, |
|
"val/fraction_other_a": 0.26639790336290997, |
|
"val/fraction_other_b": 0.25826754172643024, |
|
"val/fraction_ties": 0.7179487347602844, |
|
"val/lang_prob_bg": 0.0020299581810832024, |
|
"val/lang_prob_en": 0.7036298712094625, |
|
"val/latin_first_token": 0.9807692170143127, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.13541666666666666, |
|
"grad_norm": 0.47080346941947937, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3551, |
|
"objective/entropy": 1610.6666666666667, |
|
"step": 13, |
|
"train/nll_loss_a": 0.3537709911664327, |
|
"train/nll_loss_b": 0.35645443201065063, |
|
"val/completion_length": 132.8397471110026, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.01923076994717121, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.6410256425539652, |
|
"val/fraction_correct": 0.23717949291070303, |
|
"val/fraction_cyrillic_a": 0.0035886759869754314, |
|
"val/fraction_cyrillic_b": 0.002304682352890571, |
|
"val/fraction_latin_a": 0.5025050441424052, |
|
"val/fraction_latin_b": 0.49992923935254413, |
|
"val/fraction_number_a": 0.22844381630420685, |
|
"val/fraction_number_b": 0.23804503679275513, |
|
"val/fraction_other_a": 0.2654624879360199, |
|
"val/fraction_other_b": 0.2597210705280304, |
|
"val/fraction_ties": 0.7564102609952291, |
|
"val/lang_prob_bg": 0.0021097887850676975, |
|
"val/lang_prob_en": 0.7135748863220215, |
|
"val/latin_first_token": 0.9807692170143127, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 0.5261266827583313, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3553, |
|
"objective/entropy": 1768.0, |
|
"step": 14, |
|
"train/nll_loss_a": 0.36536062757174176, |
|
"train/nll_loss_b": 0.34532251954078674, |
|
"val/completion_length": 130.3205134073893, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.006410256649057071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.14102564255396524, |
|
"val/fraction_both_incorrect": 0.7435897588729858, |
|
"val/fraction_correct": 0.19871795177459717, |
|
"val/fraction_cyrillic_a": 0.0008547008813669285, |
|
"val/fraction_cyrillic_b": 0.0002670940205765267, |
|
"val/fraction_latin_a": 0.48295870423316956, |
|
"val/fraction_latin_b": 0.4754104216893514, |
|
"val/fraction_number_a": 0.24797451992829642, |
|
"val/fraction_number_b": 0.25482123096783954, |
|
"val/fraction_other_a": 0.26821208000183105, |
|
"val/fraction_other_b": 0.269501268863678, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.0016354583591843646, |
|
"val/lang_prob_en": 0.7125194072723389, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.5608013868331909, |
|
"learning_rate": 0.0001, |
|
"loss": 0.375, |
|
"objective/entropy": 1813.3333333333333, |
|
"step": 15, |
|
"train/nll_loss_a": 0.3743097384770711, |
|
"train/nll_loss_b": 0.3756645123163859, |
|
"val/completion_length": 113.73076883951823, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.6666666666666666, |
|
"val/fraction_correct": 0.21794872482617697, |
|
"val/fraction_cyrillic_a": 0.0005128205132981142, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4844670593738556, |
|
"val/fraction_latin_b": 0.48018120725949603, |
|
"val/fraction_number_a": 0.24094298481941223, |
|
"val/fraction_number_b": 0.2496140201886495, |
|
"val/fraction_other_a": 0.2740771571795146, |
|
"val/fraction_other_b": 0.2702048122882843, |
|
"val/fraction_ties": 0.7692307829856873, |
|
"val/lang_prob_bg": 0.0012819842668250203, |
|
"val/lang_prob_en": 0.712844451268514, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.5459577441215515, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3467, |
|
"objective/entropy": 1781.3333333333333, |
|
"step": 16, |
|
"train/nll_loss_a": 0.35024779041608173, |
|
"train/nll_loss_b": 0.3431568145751953, |
|
"val/completion_length": 125.86538696289062, |
|
"val/contain_eos_token": 0.9551281929016113, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.1666666716337204, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4834948579470317, |
|
"val/fraction_latin_b": 0.48427216211954754, |
|
"val/fraction_number_a": 0.24997142453988394, |
|
"val/fraction_number_b": 0.24836017191410065, |
|
"val/fraction_other_a": 0.2665337175130844, |
|
"val/fraction_other_b": 0.26736770073572796, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0015082452834273379, |
|
"val/lang_prob_en": 0.6896043419837952, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.006410256649057071 |
|
}, |
|
{ |
|
"epoch": 0.17708333333333334, |
|
"grad_norm": 0.6729714870452881, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3411, |
|
"objective/entropy": 1653.3333333333333, |
|
"step": 17, |
|
"train/nll_loss_a": 0.3364799916744232, |
|
"train/nll_loss_b": 0.34577877322832745, |
|
"val/completion_length": 112.4551264444987, |
|
"val/contain_eos_token": 0.9807692368825277, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.012820513298114141, |
|
"val/fraction_both_incorrect": 0.7564102411270142, |
|
"val/fraction_correct": 0.12820513049761453, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.45367395877838135, |
|
"val/fraction_latin_b": 0.46362873911857605, |
|
"val/fraction_number_a": 0.2715826133886973, |
|
"val/fraction_number_b": 0.2677338620026906, |
|
"val/fraction_other_a": 0.2747434576352437, |
|
"val/fraction_other_b": 0.2686373790105184, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0013079955242574215, |
|
"val/lang_prob_en": 0.7055089473724365, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 0.5278697609901428, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3637, |
|
"objective/entropy": 1666.6666666666667, |
|
"step": 18, |
|
"train/nll_loss_a": 0.3630356788635254, |
|
"train/nll_loss_b": 0.36438990632692975, |
|
"val/completion_length": 106.38461303710938, |
|
"val/contain_eos_token": 0.9743589758872986, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.006410256649057071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.18589743971824646, |
|
"val/fraction_cyrillic_a": 0.00018853696140771112, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.46353839834531146, |
|
"val/fraction_latin_b": 0.4717874725659688, |
|
"val/fraction_number_a": 0.26522815227508545, |
|
"val/fraction_number_b": 0.27208030720551807, |
|
"val/fraction_other_a": 0.27104492982228595, |
|
"val/fraction_other_b": 0.25613221526145935, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0014787697000429034, |
|
"val/lang_prob_en": 0.7207486033439636, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.19791666666666666, |
|
"grad_norm": 0.623100996017456, |
|
"learning_rate": 0.0001, |
|
"loss": 0.339, |
|
"objective/entropy": 1757.3333333333333, |
|
"step": 19, |
|
"train/nll_loss_a": 0.3500674267609914, |
|
"train/nll_loss_b": 0.32788631319999695, |
|
"val/completion_length": 115.87820434570312, |
|
"val/contain_eos_token": 0.9615384538968405, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.14102564503749213, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.47628764311472577, |
|
"val/fraction_latin_b": 0.47780614097913104, |
|
"val/fraction_number_a": 0.25701290369033813, |
|
"val/fraction_number_b": 0.2525850087404251, |
|
"val/fraction_other_a": 0.2666994432608287, |
|
"val/fraction_other_b": 0.2696088453133901, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0013311682268977165, |
|
"val/lang_prob_en": 0.7212471763292948, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 0.7427172064781189, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3136, |
|
"objective/entropy": 1717.3333333333333, |
|
"step": 20, |
|
"train/nll_loss_a": 0.3320723871390025, |
|
"train/nll_loss_b": 0.2952205240726471, |
|
"val/completion_length": 103.90384674072266, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.14102564255396524, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.47085316975911456, |
|
"val/fraction_latin_b": 0.46781421701113385, |
|
"val/fraction_number_a": 0.2578504929939906, |
|
"val/fraction_number_b": 0.2744967540105184, |
|
"val/fraction_other_a": 0.27129634221394855, |
|
"val/fraction_other_b": 0.2576890190442403, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0014502551639452577, |
|
"val/lang_prob_en": 0.7153881192207336, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 0.4812980592250824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2812, |
|
"objective/entropy": 1712.0, |
|
"step": 21, |
|
"train/nll_loss_a": 0.2877577245235443, |
|
"train/nll_loss_b": 0.27465402086575824, |
|
"val/completion_length": 100.94871775309245, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359308679898, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.449023316303889, |
|
"val/fraction_latin_b": 0.43931347131729126, |
|
"val/fraction_number_a": 0.2805411020914714, |
|
"val/fraction_number_b": 0.2886248826980591, |
|
"val/fraction_other_a": 0.2704355716705322, |
|
"val/fraction_other_b": 0.27206166585286456, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0012512764272590478, |
|
"val/lang_prob_en": 0.7036919593811035, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 0.5802024006843567, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2079, |
|
"objective/entropy": 1754.6666666666667, |
|
"step": 22, |
|
"train/nll_loss_a": 0.2198613981405894, |
|
"train/nll_loss_b": 0.19591793914635977, |
|
"val/completion_length": 105.2051289876302, |
|
"val/contain_eos_token": 0.9615384538968405, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.8589743375778198, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4607094426949819, |
|
"val/fraction_latin_b": 0.4435524543126424, |
|
"val/fraction_number_a": 0.2806512117385864, |
|
"val/fraction_number_b": 0.2736863394578298, |
|
"val/fraction_other_a": 0.2586393306652705, |
|
"val/fraction_other_b": 0.28276123603185016, |
|
"val/fraction_ties": 0.9487179517745972, |
|
"val/lang_prob_bg": 0.0013595524554451306, |
|
"val/lang_prob_en": 0.6991243163744608, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.23958333333333334, |
|
"grad_norm": 0.9169826507568359, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3761, |
|
"objective/entropy": 1493.3333333333333, |
|
"step": 23, |
|
"train/nll_loss_a": 0.36857877175013226, |
|
"train/nll_loss_b": 0.38360129793485004, |
|
"val/completion_length": 92.24359130859375, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.025641026596228283, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4382043182849884, |
|
"val/fraction_latin_b": 0.42687369386355084, |
|
"val/fraction_number_a": 0.2829488515853882, |
|
"val/fraction_number_b": 0.296395738919576, |
|
"val/fraction_other_a": 0.2788468599319458, |
|
"val/fraction_other_b": 0.2767305870850881, |
|
"val/fraction_ties": 0.8205128312110901, |
|
"val/lang_prob_bg": 0.0014082260507469375, |
|
"val/lang_prob_en": 0.7083008488019308, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6027104258537292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2801, |
|
"objective/entropy": 1490.6666666666667, |
|
"step": 24, |
|
"train/nll_loss_a": 0.278068482875824, |
|
"train/nll_loss_b": 0.28207358221213025, |
|
"val/completion_length": 86.58333333333333, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.44356054067611694, |
|
"val/fraction_latin_b": 0.44240028659502667, |
|
"val/fraction_number_a": 0.2843793531258901, |
|
"val/fraction_number_b": 0.2815621296564738, |
|
"val/fraction_other_a": 0.272060106197993, |
|
"val/fraction_other_b": 0.2760376036167145, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0013788756914436817, |
|
"val/lang_prob_en": 0.707394023736318, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.2604166666666667, |
|
"grad_norm": 0.6588619351387024, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2373, |
|
"objective/entropy": 1805.3333333333333, |
|
"step": 25, |
|
"train/nll_loss_a": 0.2221569369236628, |
|
"train/nll_loss_b": 0.25250792503356934, |
|
"val/completion_length": 97.98077138264973, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4407140811284383, |
|
"val/fraction_latin_b": 0.449174165725708, |
|
"val/fraction_number_a": 0.2688818077246348, |
|
"val/fraction_number_b": 0.264347364505132, |
|
"val/fraction_other_a": 0.2904041012128194, |
|
"val/fraction_other_b": 0.2864784598350525, |
|
"val/fraction_ties": 0.9230769077936808, |
|
"val/lang_prob_bg": 0.0015077214144791167, |
|
"val/lang_prob_en": 0.7170586188634237, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 0.7737333178520203, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2641, |
|
"objective/entropy": 1989.3333333333333, |
|
"step": 26, |
|
"train/nll_loss_a": 0.26104875405629474, |
|
"train/nll_loss_b": 0.26720015704631805, |
|
"val/completion_length": 93.08974202473958, |
|
"val/contain_eos_token": 0.9743589758872986, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.19871795177459717, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.43882275621096295, |
|
"val/fraction_latin_b": 0.4281532069047292, |
|
"val/fraction_number_a": 0.29091211160024005, |
|
"val/fraction_number_b": 0.3004717230796814, |
|
"val/fraction_other_a": 0.2702651371558507, |
|
"val/fraction_other_b": 0.2713750700155894, |
|
"val/fraction_ties": 0.8333333134651184, |
|
"val/lang_prob_bg": 0.0013183245512967308, |
|
"val/lang_prob_en": 0.6850736141204834, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 0.7201813459396362, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2468, |
|
"objective/entropy": 1309.3333333333333, |
|
"step": 27, |
|
"train/nll_loss_a": 0.2481851428747177, |
|
"train/nll_loss_b": 0.24535122017065683, |
|
"val/completion_length": 81.52563985188802, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307730515797, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.15384615709384283, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.42121972640355426, |
|
"val/fraction_latin_b": 0.3958790997664134, |
|
"val/fraction_number_a": 0.30145979921023053, |
|
"val/fraction_number_b": 0.3247312208016713, |
|
"val/fraction_other_a": 0.2773204942544301, |
|
"val/fraction_other_b": 0.2793896694978078, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0012046323778728645, |
|
"val/lang_prob_en": 0.690701444943746, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 0.5405112504959106, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2421, |
|
"objective/entropy": 1466.6666666666667, |
|
"step": 28, |
|
"train/nll_loss_a": 0.258198360602061, |
|
"train/nll_loss_b": 0.225906973083814, |
|
"val/completion_length": 91.30128224690755, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.6025640964508057, |
|
"val/fraction_correct": 0.2435897489388784, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.41246400276819867, |
|
"val/fraction_latin_b": 0.4263813893000285, |
|
"val/fraction_number_a": 0.30804495016733807, |
|
"val/fraction_number_b": 0.299861341714859, |
|
"val/fraction_other_a": 0.27949108680089313, |
|
"val/fraction_other_b": 0.2737572491168976, |
|
"val/fraction_ties": 0.692307710647583, |
|
"val/lang_prob_bg": 0.0014168053554991882, |
|
"val/lang_prob_en": 0.7019821604092916, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3020833333333333, |
|
"grad_norm": 0.6843443512916565, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2173, |
|
"objective/entropy": 1717.3333333333333, |
|
"step": 29, |
|
"train/nll_loss_a": 0.2260708212852478, |
|
"train/nll_loss_b": 0.208594411611557, |
|
"val/completion_length": 86.48076883951823, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.18589743971824646, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.41310587525367737, |
|
"val/fraction_latin_b": 0.4215882420539856, |
|
"val/fraction_number_a": 0.3036368489265442, |
|
"val/fraction_number_b": 0.3048081199328105, |
|
"val/fraction_other_a": 0.2832573155562083, |
|
"val/fraction_other_b": 0.2736036380132039, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0014089663745835423, |
|
"val/lang_prob_en": 0.6810818711916605, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.6732069849967957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1954, |
|
"objective/entropy": 1698.6666666666667, |
|
"step": 30, |
|
"train/nll_loss_a": 0.190487802028656, |
|
"train/nll_loss_b": 0.20023786028226218, |
|
"val/completion_length": 83.47436014811198, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.692307690779368, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.37804505228996277, |
|
"val/fraction_latin_b": 0.3864828248818715, |
|
"val/fraction_number_a": 0.32556501030921936, |
|
"val/fraction_number_b": 0.3207090497016907, |
|
"val/fraction_other_a": 0.2963899274667104, |
|
"val/fraction_other_b": 0.29280807574590045, |
|
"val/fraction_ties": 0.7307692368825277, |
|
"val/lang_prob_bg": 0.0012525273875022929, |
|
"val/lang_prob_en": 0.6773750185966492, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3229166666666667, |
|
"grad_norm": 1.181767225265503, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2398, |
|
"objective/entropy": 1254.6666666666667, |
|
"step": 31, |
|
"train/nll_loss_a": 0.2306948055823644, |
|
"train/nll_loss_b": 0.2489608426888784, |
|
"val/completion_length": 80.55127970377605, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.09615384911497434, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3839823206265767, |
|
"val/fraction_latin_b": 0.40403392910957336, |
|
"val/fraction_number_a": 0.3259160916010539, |
|
"val/fraction_number_b": 0.3016844590504964, |
|
"val/fraction_other_a": 0.2901015877723694, |
|
"val/fraction_other_b": 0.29428161183993023, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.001477631429831187, |
|
"val/lang_prob_en": 0.693560004234314, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.7345649600028992, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2023, |
|
"objective/entropy": 1706.6666666666667, |
|
"step": 32, |
|
"train/nll_loss_a": 0.18820939461390176, |
|
"train/nll_loss_b": 0.2164141039053599, |
|
"val/completion_length": 86.39102681477864, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.025641026596228283, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3734320302804311, |
|
"val/fraction_latin_b": 0.39075586199760437, |
|
"val/fraction_number_a": 0.3208834727605184, |
|
"val/fraction_number_b": 0.3199572165807088, |
|
"val/fraction_other_a": 0.30568451682726544, |
|
"val/fraction_other_b": 0.28928691148757935, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.001555828144773841, |
|
"val/lang_prob_en": 0.6997750600179037, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 0.6794171929359436, |
|
"learning_rate": 0.0001, |
|
"loss": 0.208, |
|
"objective/entropy": 1765.3333333333333, |
|
"step": 33, |
|
"train/nll_loss_a": 0.197875847419103, |
|
"train/nll_loss_b": 0.2181676377852758, |
|
"val/completion_length": 81.42307790120442, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513049761453, |
|
"val/fraction_both_incorrect": 0.692307710647583, |
|
"val/fraction_correct": 0.21794872482617697, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.36189671357472736, |
|
"val/fraction_latin_b": 0.3368810514609019, |
|
"val/fraction_number_a": 0.34198596080144245, |
|
"val/fraction_number_b": 0.3494710822900136, |
|
"val/fraction_other_a": 0.2961173454920451, |
|
"val/fraction_other_b": 0.3136478662490845, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.0014510581968352199, |
|
"val/lang_prob_en": 0.6856165130933126, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3541666666666667, |
|
"grad_norm": 1.2053074836730957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1771, |
|
"objective/entropy": 1565.3333333333333, |
|
"step": 34, |
|
"train/nll_loss_a": 0.17536027232805887, |
|
"train/nll_loss_b": 0.1789391835530599, |
|
"val/completion_length": 75.41666666666667, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.34743695457776386, |
|
"val/fraction_latin_b": 0.3424769341945648, |
|
"val/fraction_number_a": 0.3374132215976715, |
|
"val/fraction_number_b": 0.3465127448240916, |
|
"val/fraction_other_a": 0.31514982382456463, |
|
"val/fraction_other_b": 0.31101036071777344, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0016215697008495529, |
|
"val/lang_prob_en": 0.6558386087417603, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3645833333333333, |
|
"grad_norm": 0.8247714042663574, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1831, |
|
"objective/entropy": 2050.6666666666665, |
|
"step": 35, |
|
"train/nll_loss_a": 0.1831777443488439, |
|
"train/nll_loss_b": 0.18294000625610352, |
|
"val/completion_length": 79.56410217285156, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7179487347602844, |
|
"val/fraction_correct": 0.17948718617359796, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3364667197068532, |
|
"val/fraction_latin_b": 0.3431033293406169, |
|
"val/fraction_number_a": 0.35041239857673645, |
|
"val/fraction_number_b": 0.3312891523043315, |
|
"val/fraction_other_a": 0.31312089165051776, |
|
"val/fraction_other_b": 0.3256075282891591, |
|
"val/fraction_ties": 0.7948718070983887, |
|
"val/lang_prob_bg": 0.0014793235653390486, |
|
"val/lang_prob_en": 0.6674719850222269, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.7892264127731323, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2262, |
|
"objective/entropy": 2029.3333333333333, |
|
"step": 36, |
|
"train/nll_loss_a": 0.21969079971313477, |
|
"train/nll_loss_b": 0.23275785644849142, |
|
"val/completion_length": 81.88461558024089, |
|
"val/contain_eos_token": 0.9807692170143127, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7564102609952291, |
|
"val/fraction_correct": 0.16666666915019354, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.33385714888572693, |
|
"val/fraction_latin_b": 0.31389813621838886, |
|
"val/fraction_number_a": 0.36293908953666687, |
|
"val/fraction_number_b": 0.368167241414388, |
|
"val/fraction_other_a": 0.3032037814458211, |
|
"val/fraction_other_b": 0.3179346521695455, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0014597336606432993, |
|
"val/lang_prob_en": 0.6864216725031534, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3854166666666667, |
|
"grad_norm": 0.9461960196495056, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1815, |
|
"objective/entropy": 2218.6666666666665, |
|
"step": 37, |
|
"train/nll_loss_a": 0.1762543668349584, |
|
"train/nll_loss_b": 0.18676617741584778, |
|
"val/completion_length": 78.16666666666667, |
|
"val/contain_eos_token": 0.9743589560190836, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.31174399455388385, |
|
"val/fraction_latin_b": 0.3052766025066376, |
|
"val/fraction_number_a": 0.3656782905260722, |
|
"val/fraction_number_b": 0.37882108489672345, |
|
"val/fraction_other_a": 0.32257768511772156, |
|
"val/fraction_other_b": 0.3159022927284241, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0016823052040611703, |
|
"val/lang_prob_en": 0.682081917921702, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3958333333333333, |
|
"grad_norm": 0.7971638441085815, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1558, |
|
"objective/entropy": 1834.6666666666667, |
|
"step": 38, |
|
"train/nll_loss_a": 0.1687836398681005, |
|
"train/nll_loss_b": 0.142802856862545, |
|
"val/completion_length": 75.09615325927734, |
|
"val/contain_eos_token": 0.9807692170143127, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.7692307829856873, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3084398905436198, |
|
"val/fraction_latin_b": 0.3190213441848755, |
|
"val/fraction_number_a": 0.38394031922022503, |
|
"val/fraction_number_b": 0.3600513239701589, |
|
"val/fraction_other_a": 0.3076198200384776, |
|
"val/fraction_other_b": 0.32092733184496564, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.0013936974961931508, |
|
"val/lang_prob_en": 0.6955586870511373, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 0.5998182892799377, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1452, |
|
"objective/entropy": 928.0, |
|
"step": 39, |
|
"train/nll_loss_a": 0.14895252386728922, |
|
"train/nll_loss_b": 0.14153108249107996, |
|
"val/completion_length": 71.1602554321289, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359308679898, |
|
"val/fraction_both_incorrect": 0.7051282127698263, |
|
"val/fraction_correct": 0.19230769326289496, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.28946439425150555, |
|
"val/fraction_latin_b": 0.287621130545934, |
|
"val/fraction_number_a": 0.3734682301680247, |
|
"val/fraction_number_b": 0.38121453921000165, |
|
"val/fraction_other_a": 0.3370673954486847, |
|
"val/fraction_other_b": 0.33116433024406433, |
|
"val/fraction_ties": 0.7948718070983887, |
|
"val/lang_prob_bg": 0.0014785424573346972, |
|
"val/lang_prob_en": 0.6788028081258138, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 1.0486812591552734, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1397, |
|
"objective/entropy": 1610.6666666666667, |
|
"step": 40, |
|
"train/nll_loss_a": 0.124597763021787, |
|
"train/nll_loss_b": 0.15475992610057196, |
|
"val/completion_length": 66.44871775309245, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.8846153815587362, |
|
"val/fraction_correct": 0.07692307730515797, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.26715315381685895, |
|
"val/fraction_latin_b": 0.267175629734993, |
|
"val/fraction_number_a": 0.40364818771680194, |
|
"val/fraction_number_b": 0.40295613805452984, |
|
"val/fraction_other_a": 0.3291986882686615, |
|
"val/fraction_other_b": 0.32986828684806824, |
|
"val/fraction_ties": 0.9230769077936808, |
|
"val/lang_prob_bg": 0.0016380803814778726, |
|
"val/lang_prob_en": 0.6558632055918375, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4270833333333333, |
|
"grad_norm": 1.1442676782608032, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1368, |
|
"objective/entropy": 1390.6666666666667, |
|
"step": 41, |
|
"train/nll_loss_a": 0.13521244128545126, |
|
"train/nll_loss_b": 0.13834577798843384, |
|
"val/completion_length": 66.53845977783203, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.012820513298114141, |
|
"val/fraction_both_incorrect": 0.8589743375778198, |
|
"val/fraction_correct": 0.07692307916780312, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2713295817375183, |
|
"val/fraction_latin_b": 0.2757815072933833, |
|
"val/fraction_number_a": 0.39802590012550354, |
|
"val/fraction_number_b": 0.3848887085914612, |
|
"val/fraction_other_a": 0.33064452807108563, |
|
"val/fraction_other_b": 0.3393297791481018, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0014013544811556737, |
|
"val/lang_prob_en": 0.6843119462331136, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.032313585281372, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1505, |
|
"objective/entropy": 1338.6666666666667, |
|
"step": 42, |
|
"train/nll_loss_a": 0.1546641836563746, |
|
"train/nll_loss_b": 0.146413487692674, |
|
"val/completion_length": 57.61538314819336, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.11538461595773697, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2686810294787089, |
|
"val/fraction_latin_b": 0.2704887390136719, |
|
"val/fraction_number_a": 0.39721407492955524, |
|
"val/fraction_number_b": 0.38931016127268475, |
|
"val/fraction_other_a": 0.33410489559173584, |
|
"val/fraction_other_b": 0.3402010997136434, |
|
"val/fraction_ties": 0.8974359035491943, |
|
"val/lang_prob_bg": 0.0015042958548292518, |
|
"val/lang_prob_en": 0.6720715363820394, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4479166666666667, |
|
"grad_norm": 1.00838303565979, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1867, |
|
"objective/entropy": 1114.6666666666667, |
|
"step": 43, |
|
"train/nll_loss_a": 0.16402535637219748, |
|
"train/nll_loss_b": 0.20946120719114938, |
|
"val/completion_length": 57.775641123453774, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513173937798, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2534715036551158, |
|
"val/fraction_latin_b": 0.26074858009815216, |
|
"val/fraction_number_a": 0.3934909800688426, |
|
"val/fraction_number_b": 0.40743691722551983, |
|
"val/fraction_other_a": 0.3530375460783641, |
|
"val/fraction_other_b": 0.3318144778410594, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0016378250826771061, |
|
"val/lang_prob_en": 0.6859935522079468, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4583333333333333, |
|
"grad_norm": 0.746932327747345, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1126, |
|
"objective/entropy": 926.0, |
|
"step": 44, |
|
"train/nll_loss_a": 0.12073729187250137, |
|
"train/nll_loss_b": 0.10447523991266887, |
|
"val/completion_length": 57.910256703694664, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410390138626, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.16666666915019354, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.24723881979783377, |
|
"val/fraction_latin_b": 0.2573054035504659, |
|
"val/fraction_number_a": 0.4117300808429718, |
|
"val/fraction_number_b": 0.4151102900505066, |
|
"val/fraction_other_a": 0.3410310943921407, |
|
"val/fraction_other_b": 0.32758431633313495, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0016183808135489623, |
|
"val/lang_prob_en": 0.6595947543780009, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 0.751488208770752, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1458, |
|
"objective/entropy": 1064.0, |
|
"step": 45, |
|
"train/nll_loss_a": 0.1560243566830953, |
|
"train/nll_loss_b": 0.13560334593057632, |
|
"val/completion_length": 55.36538569132487, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.1858974372347196, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2510768473148346, |
|
"val/fraction_latin_b": 0.23666884501775107, |
|
"val/fraction_number_a": 0.39663148919741315, |
|
"val/fraction_number_b": 0.4140782058238983, |
|
"val/fraction_other_a": 0.35229164361953735, |
|
"val/fraction_other_b": 0.34925296902656555, |
|
"val/fraction_ties": 0.8333333333333334, |
|
"val/lang_prob_bg": 0.0012542977929115295, |
|
"val/lang_prob_en": 0.6892314950625101, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4791666666666667, |
|
"grad_norm": 0.7010864019393921, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1338, |
|
"objective/entropy": 997.3333333333334, |
|
"step": 46, |
|
"train/nll_loss_a": 0.14200725158055624, |
|
"train/nll_loss_b": 0.1256332869331042, |
|
"val/completion_length": 51.71153767903646, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.1666666679084301, |
|
"val/fraction_both_incorrect": 0.7435897390047709, |
|
"val/fraction_correct": 0.21153846631447473, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.24885622163613638, |
|
"val/fraction_latin_b": 0.2301209419965744, |
|
"val/fraction_number_a": 0.396872212489446, |
|
"val/fraction_number_b": 0.4051181972026825, |
|
"val/fraction_other_a": 0.3542715708414714, |
|
"val/fraction_other_b": 0.3647608856360118, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0016821225096161168, |
|
"val/lang_prob_en": 0.6777702768643697, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4895833333333333, |
|
"grad_norm": 1.0925281047821045, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1205, |
|
"objective/entropy": 881.3333333333334, |
|
"step": 47, |
|
"train/nll_loss_a": 0.12804403652747473, |
|
"train/nll_loss_b": 0.11300961673259735, |
|
"val/completion_length": 52.36538569132487, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.12179487322767575, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.23128040631612143, |
|
"val/fraction_latin_b": 0.2396069417397181, |
|
"val/fraction_number_a": 0.4000318944454193, |
|
"val/fraction_number_b": 0.40589800477027893, |
|
"val/fraction_other_a": 0.3686876992384593, |
|
"val/fraction_other_b": 0.3544950584570567, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0015037450551365812, |
|
"val/lang_prob_en": 0.6730888287226359, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2894670963287354, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1487, |
|
"objective/entropy": 714.6666666666666, |
|
"step": 48, |
|
"train/nll_loss_a": 0.139557013909022, |
|
"train/nll_loss_b": 0.15790955225626627, |
|
"val/completion_length": 51.801282246907554, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.8333333134651184, |
|
"val/fraction_correct": 0.12820513049761453, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20845835904280344, |
|
"val/fraction_latin_b": 0.2055131047964096, |
|
"val/fraction_number_a": 0.428351491689682, |
|
"val/fraction_number_b": 0.4272334774335225, |
|
"val/fraction_other_a": 0.3631901641686757, |
|
"val/fraction_other_b": 0.3672534426053365, |
|
"val/fraction_ties": 0.9230769276618958, |
|
"val/lang_prob_bg": 0.0014591465005651116, |
|
"val/lang_prob_en": 0.6774142583211263, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5104166666666666, |
|
"grad_norm": 1.2826536893844604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.11, |
|
"objective/entropy": 1096.6666666666667, |
|
"step": 49, |
|
"train/nll_loss_a": 0.09283561259508133, |
|
"train/nll_loss_b": 0.1271024172504743, |
|
"val/completion_length": 53.8012809753418, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.10897436365485191, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2129476418097814, |
|
"val/fraction_latin_b": 0.20714954535166422, |
|
"val/fraction_number_a": 0.4297573169072469, |
|
"val/fraction_number_b": 0.4259600241978963, |
|
"val/fraction_other_a": 0.35729504625002545, |
|
"val/fraction_other_b": 0.36689044038454693, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0016641345185538132, |
|
"val/lang_prob_en": 0.6516953508059183, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 0.9105807542800903, |
|
"learning_rate": 0.0001, |
|
"loss": 0.112, |
|
"objective/entropy": 668.6666666666666, |
|
"step": 50, |
|
"train/nll_loss_a": 0.1011932243903478, |
|
"train/nll_loss_b": 0.12279495596885681, |
|
"val/completion_length": 52.903846740722656, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.012820513298114141, |
|
"val/fraction_both_incorrect": 0.7179487347602844, |
|
"val/fraction_correct": 0.14743590354919434, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2198729912439982, |
|
"val/fraction_latin_b": 0.21549966434637705, |
|
"val/fraction_number_a": 0.4184926648934682, |
|
"val/fraction_number_b": 0.4257381657759349, |
|
"val/fraction_other_a": 0.36163437366485596, |
|
"val/fraction_other_b": 0.3587621847788493, |
|
"val/fraction_ties": 0.7307692368825277, |
|
"val/lang_prob_bg": 0.0013077266824742158, |
|
"val/lang_prob_en": 0.6708633701006571, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 0.6333914399147034, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0843, |
|
"objective/entropy": 578.6666666666666, |
|
"step": 51, |
|
"train/nll_loss_a": 0.08048844834168752, |
|
"train/nll_loss_b": 0.08813040951887767, |
|
"val/completion_length": 50.442307790120445, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513173937798, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1867583046356837, |
|
"val/fraction_latin_b": 0.1920985778172811, |
|
"val/fraction_number_a": 0.46239819129308063, |
|
"val/fraction_number_b": 0.45383066932360333, |
|
"val/fraction_other_a": 0.35084352890650433, |
|
"val/fraction_other_b": 0.3540707727273305, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0013779281095291178, |
|
"val/lang_prob_en": 0.6875834663709005, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 0.597698450088501, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0711, |
|
"objective/entropy": 494.0, |
|
"step": 52, |
|
"train/nll_loss_a": 0.0706160341699918, |
|
"train/nll_loss_b": 0.07159051423271497, |
|
"val/completion_length": 52.0961545308431, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.1346153865257899, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19636401534080505, |
|
"val/fraction_latin_b": 0.19669000804424286, |
|
"val/fraction_number_a": 0.4432801107565562, |
|
"val/fraction_number_b": 0.4444128175576528, |
|
"val/fraction_other_a": 0.3603558838367462, |
|
"val/fraction_other_b": 0.3588971694310506, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0014325374116500218, |
|
"val/lang_prob_en": 0.6964165170987447, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5520833333333334, |
|
"grad_norm": 0.9163941144943237, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0759, |
|
"objective/entropy": 582.6666666666666, |
|
"step": 53, |
|
"train/nll_loss_a": 0.061987257252136864, |
|
"train/nll_loss_b": 0.0898251583178838, |
|
"val/completion_length": 51.95512771606445, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.871794859568278, |
|
"val/fraction_correct": 0.10256410390138626, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19930331905682883, |
|
"val/fraction_latin_b": 0.20022966961065927, |
|
"val/fraction_number_a": 0.4515662391980489, |
|
"val/fraction_number_b": 0.4426102936267853, |
|
"val/fraction_other_a": 0.34913045167922974, |
|
"val/fraction_other_b": 0.35716002186139423, |
|
"val/fraction_ties": 0.9487179319063822, |
|
"val/lang_prob_bg": 0.0013443352266525228, |
|
"val/lang_prob_en": 0.6959804097811381, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 0.7437398433685303, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0617, |
|
"objective/entropy": 917.3333333333334, |
|
"step": 54, |
|
"train/nll_loss_a": 0.06697492549816768, |
|
"train/nll_loss_b": 0.05643160889546076, |
|
"val/completion_length": 54.410255432128906, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.807692309220632, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1963134010632833, |
|
"val/fraction_latin_b": 0.19755952060222626, |
|
"val/fraction_number_a": 0.4544989267985026, |
|
"val/fraction_number_b": 0.4627720316251119, |
|
"val/fraction_other_a": 0.3491876721382141, |
|
"val/fraction_other_b": 0.3396684726079305, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.001135329968140771, |
|
"val/lang_prob_en": 0.6944870551427206, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5729166666666666, |
|
"grad_norm": 0.8625170588493347, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0791, |
|
"objective/entropy": 1185.3333333333333, |
|
"step": 55, |
|
"train/nll_loss_a": 0.0869336798787117, |
|
"train/nll_loss_b": 0.0713660145799319, |
|
"val/completion_length": 54.77564239501953, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.1153846209247907, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19297984739144644, |
|
"val/fraction_latin_b": 0.2026552508274714, |
|
"val/fraction_number_a": 0.4540139436721802, |
|
"val/fraction_number_b": 0.45077388485272724, |
|
"val/fraction_other_a": 0.3530062139034271, |
|
"val/fraction_other_b": 0.34657086928685504, |
|
"val/fraction_ties": 0.8974358836809794, |
|
"val/lang_prob_bg": 0.00151369022205472, |
|
"val/lang_prob_en": 0.6880850593249003, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 1.0587146282196045, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0684, |
|
"objective/entropy": 1028.6666666666667, |
|
"step": 56, |
|
"train/nll_loss_a": 0.06075024977326393, |
|
"train/nll_loss_b": 0.07601286098361015, |
|
"val/completion_length": 53.96153895060221, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.1602564106384913, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18348072469234467, |
|
"val/fraction_latin_b": 0.18514560659726462, |
|
"val/fraction_number_a": 0.45656461517016095, |
|
"val/fraction_number_b": 0.4575365384419759, |
|
"val/fraction_other_a": 0.35995468497276306, |
|
"val/fraction_other_b": 0.35731785496075946, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.00106729429292803, |
|
"val/lang_prob_en": 0.6962061325709025, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 0.8600048422813416, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1015, |
|
"objective/entropy": 1142.0, |
|
"step": 57, |
|
"train/nll_loss_a": 0.10414389024178188, |
|
"train/nll_loss_b": 0.09876606116692226, |
|
"val/completion_length": 54.6025644938151, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307730515797, |
|
"val/fraction_both_incorrect": 0.807692309220632, |
|
"val/fraction_correct": 0.13461538776755333, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19213247299194336, |
|
"val/fraction_latin_b": 0.18131321668624878, |
|
"val/fraction_number_a": 0.44502533475557965, |
|
"val/fraction_number_b": 0.45092181364695233, |
|
"val/fraction_other_a": 0.36284218231836957, |
|
"val/fraction_other_b": 0.36776500940322876, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.001513439230620861, |
|
"val/lang_prob_en": 0.702368974685669, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6041666666666666, |
|
"grad_norm": 0.8079760074615479, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0778, |
|
"objective/entropy": 505.3333333333333, |
|
"step": 58, |
|
"train/nll_loss_a": 0.09139975905418396, |
|
"train/nll_loss_b": 0.06421066199739774, |
|
"val/completion_length": 46.98076883951823, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.14102564752101898, |
|
"val/fraction_both_incorrect": 0.7051282127698263, |
|
"val/fraction_correct": 0.2179487223426501, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20126528044541678, |
|
"val/fraction_latin_b": 0.19763841231664023, |
|
"val/fraction_number_a": 0.4304538468519847, |
|
"val/fraction_number_b": 0.43739889065424603, |
|
"val/fraction_other_a": 0.3682809074719747, |
|
"val/fraction_other_b": 0.3649626870950063, |
|
"val/fraction_ties": 0.8461538553237915, |
|
"val/lang_prob_bg": 0.0011517573924114306, |
|
"val/lang_prob_en": 0.709509551525116, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6145833333333334, |
|
"grad_norm": 1.1331056356430054, |
|
"learning_rate": 0.0001, |
|
"loss": 0.128, |
|
"objective/entropy": 633.3333333333334, |
|
"step": 59, |
|
"train/nll_loss_a": 0.1187543123960495, |
|
"train/nll_loss_b": 0.1372635985414187, |
|
"val/completion_length": 46.737178802490234, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7307692170143127, |
|
"val/fraction_correct": 0.1794871836900711, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1910944233338038, |
|
"val/fraction_latin_b": 0.18430300056934357, |
|
"val/fraction_number_a": 0.4224574863910675, |
|
"val/fraction_number_b": 0.439374307791392, |
|
"val/fraction_other_a": 0.38644809524218243, |
|
"val/fraction_other_b": 0.3763226866722107, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.0015292856842279434, |
|
"val/lang_prob_en": 0.6743942896525065, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.8633061647415161, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0539, |
|
"objective/entropy": 554.0, |
|
"step": 60, |
|
"train/nll_loss_a": 0.05499819417794546, |
|
"train/nll_loss_b": 0.05279202883442243, |
|
"val/completion_length": 44.160256703694664, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19793511927127838, |
|
"val/fraction_latin_b": 0.19460705916086832, |
|
"val/fraction_number_a": 0.4268949230511983, |
|
"val/fraction_number_b": 0.4235563079516093, |
|
"val/fraction_other_a": 0.375169962644577, |
|
"val/fraction_other_b": 0.3818366428216298, |
|
"val/fraction_ties": 0.9230769077936808, |
|
"val/lang_prob_bg": 0.0016635601253559191, |
|
"val/lang_prob_en": 0.6812194387118021, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6354166666666666, |
|
"grad_norm": 0.8153233528137207, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0809, |
|
"objective/entropy": 630.6666666666666, |
|
"step": 61, |
|
"train/nll_loss_a": 0.09423964222272237, |
|
"train/nll_loss_b": 0.06749718139568965, |
|
"val/completion_length": 48.903846740722656, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.15384615709384283, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.19230769574642181, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18816453218460083, |
|
"val/fraction_latin_b": 0.18853654464085898, |
|
"val/fraction_number_a": 0.45722946524620056, |
|
"val/fraction_number_b": 0.43430561820665997, |
|
"val/fraction_other_a": 0.3546060423056285, |
|
"val/fraction_other_b": 0.37715784708658856, |
|
"val/fraction_ties": 0.9230769276618958, |
|
"val/lang_prob_bg": 0.0015237585175782442, |
|
"val/lang_prob_en": 0.6806376179059347, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6458333333333334, |
|
"grad_norm": 1.104749321937561, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0912, |
|
"objective/entropy": 1110.0, |
|
"step": 62, |
|
"train/nll_loss_a": 0.09771117568016052, |
|
"train/nll_loss_b": 0.08477205038070679, |
|
"val/completion_length": 50.39743677775065, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.05128205195069313, |
|
"val/fraction_both_incorrect": 0.807692289352417, |
|
"val/fraction_correct": 0.1217948744694392, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19005567828814188, |
|
"val/fraction_latin_b": 0.18684939543406168, |
|
"val/fraction_number_a": 0.44661805033683777, |
|
"val/fraction_number_b": 0.45144979159037274, |
|
"val/fraction_other_a": 0.3633263309796651, |
|
"val/fraction_other_b": 0.3617008129755656, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0012832956854254007, |
|
"val/lang_prob_en": 0.690887967745463, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 1.0379236936569214, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0724, |
|
"objective/entropy": 648.0, |
|
"step": 63, |
|
"train/nll_loss_a": 0.06959323212504387, |
|
"train/nll_loss_b": 0.07520159830649693, |
|
"val/completion_length": 46.92948786417643, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8846153815587362, |
|
"val/fraction_correct": 0.08974359060327212, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19109234710534415, |
|
"val/fraction_latin_b": 0.2014519323905309, |
|
"val/fraction_number_a": 0.4389280279477437, |
|
"val/fraction_number_b": 0.4367695450782776, |
|
"val/fraction_other_a": 0.36997965971628827, |
|
"val/fraction_other_b": 0.36177852749824524, |
|
"val/fraction_ties": 0.9487179517745972, |
|
"val/lang_prob_bg": 0.0012157799016373854, |
|
"val/lang_prob_en": 0.6699715455373129, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.8731870651245117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0729, |
|
"objective/entropy": 430.6666666666667, |
|
"step": 64, |
|
"train/nll_loss_a": 0.08839354167381923, |
|
"train/nll_loss_b": 0.05736600855986277, |
|
"val/completion_length": 46.35897445678711, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.05128205195069313, |
|
"val/fraction_both_incorrect": 0.7435897390047709, |
|
"val/fraction_correct": 0.1538461558520794, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18153652052084604, |
|
"val/fraction_latin_b": 0.1694901337226232, |
|
"val/fraction_number_a": 0.45514161388079327, |
|
"val/fraction_number_b": 0.47041670481363934, |
|
"val/fraction_other_a": 0.36332186063130695, |
|
"val/fraction_other_b": 0.3600931664307912, |
|
"val/fraction_ties": 0.7948717872301737, |
|
"val/lang_prob_bg": 0.0013296857941895723, |
|
"val/lang_prob_en": 0.6832193930943807, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6770833333333334, |
|
"grad_norm": 0.8836628198623657, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0866, |
|
"objective/entropy": 508.0, |
|
"step": 65, |
|
"train/nll_loss_a": 0.0954609215259552, |
|
"train/nll_loss_b": 0.07772823919852574, |
|
"val/completion_length": 46.75640996297201, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.14743590106566748, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18177295724550882, |
|
"val/fraction_latin_b": 0.18375508983929953, |
|
"val/fraction_number_a": 0.44039592146873474, |
|
"val/fraction_number_b": 0.45297469695409137, |
|
"val/fraction_other_a": 0.377831111351649, |
|
"val/fraction_other_b": 0.36327023307482403, |
|
"val/fraction_ties": 0.8333333333333334, |
|
"val/lang_prob_bg": 0.0013483318810661633, |
|
"val/lang_prob_en": 0.6739258567492167, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 0.6342687606811523, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0428, |
|
"objective/entropy": 676.0, |
|
"step": 66, |
|
"train/nll_loss_a": 0.03163577119509379, |
|
"train/nll_loss_b": 0.053979563216368355, |
|
"val/completion_length": 46.833333333333336, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410638491313, |
|
"val/fraction_both_incorrect": 0.871794859568278, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19603643318017325, |
|
"val/fraction_latin_b": 0.18948069214820862, |
|
"val/fraction_number_a": 0.4410245418548584, |
|
"val/fraction_number_b": 0.4377235968907674, |
|
"val/fraction_other_a": 0.3629390597343445, |
|
"val/fraction_other_b": 0.3727957208951314, |
|
"val/fraction_ties": 0.9743589560190836, |
|
"val/lang_prob_bg": 0.0014910439883048336, |
|
"val/lang_prob_en": 0.6833223501841227, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6979166666666666, |
|
"grad_norm": 1.594736099243164, |
|
"learning_rate": 0.0001, |
|
"loss": 0.106, |
|
"objective/entropy": 673.3333333333334, |
|
"step": 67, |
|
"train/nll_loss_a": 0.12174060692389806, |
|
"train/nll_loss_b": 0.0902355636159579, |
|
"val/completion_length": 45.9038454691569, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.10897436241308849, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18011977771917978, |
|
"val/fraction_latin_b": 0.18526378770669302, |
|
"val/fraction_number_a": 0.468562384446462, |
|
"val/fraction_number_b": 0.4472152789433797, |
|
"val/fraction_other_a": 0.3513178726037343, |
|
"val/fraction_other_b": 0.367520938316981, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.0013117061074202259, |
|
"val/lang_prob_en": 0.6828027566274008, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.7083333333333334, |
|
"grad_norm": 1.8821147680282593, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0571, |
|
"objective/entropy": 564.0, |
|
"step": 68, |
|
"train/nll_loss_a": 0.06897679592172305, |
|
"train/nll_loss_b": 0.045296087861061096, |
|
"val/completion_length": 46.057692209879555, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8974358836809794, |
|
"val/fraction_correct": 0.08333333643774192, |
|
"val/fraction_cyrillic_a": 0.0002913753075214724, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18623560667037964, |
|
"val/fraction_latin_b": 0.18657231827576956, |
|
"val/fraction_number_a": 0.45339445273081463, |
|
"val/fraction_number_b": 0.43990714351336163, |
|
"val/fraction_other_a": 0.36007853349049884, |
|
"val/fraction_other_b": 0.3735205630461375, |
|
"val/fraction_ties": 0.9615384340286255, |
|
"val/lang_prob_bg": 0.001663261791691184, |
|
"val/lang_prob_en": 0.6788983543713888, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 1.5763386487960815, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1605, |
|
"objective/entropy": 600.6666666666666, |
|
"step": 69, |
|
"train/nll_loss_a": 0.13918370008468628, |
|
"train/nll_loss_b": 0.18188542127609253, |
|
"val/completion_length": 47.73076883951823, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.8846153815587362, |
|
"val/fraction_correct": 0.08333333457509677, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18599570790926614, |
|
"val/fraction_latin_b": 0.18386761844158173, |
|
"val/fraction_number_a": 0.45702726642290753, |
|
"val/fraction_number_b": 0.4447290301322937, |
|
"val/fraction_other_a": 0.3569770057996114, |
|
"val/fraction_other_b": 0.37140337626139325, |
|
"val/fraction_ties": 0.935897429784139, |
|
"val/lang_prob_bg": 0.0014010549833377202, |
|
"val/lang_prob_en": 0.6976925929387411, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 0.9730682373046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1056, |
|
"objective/entropy": 539.3333333333334, |
|
"step": 70, |
|
"train/nll_loss_a": 0.10723391423622768, |
|
"train/nll_loss_b": 0.10387907922267914, |
|
"val/completion_length": 46.28846104939779, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.17307692766189575, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18785375853379568, |
|
"val/fraction_latin_b": 0.183917502562205, |
|
"val/fraction_number_a": 0.44820621609687805, |
|
"val/fraction_number_b": 0.4407848119735718, |
|
"val/fraction_other_a": 0.3639400204022725, |
|
"val/fraction_other_b": 0.3752976953983307, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.001435416905830304, |
|
"val/lang_prob_en": 0.6813340584437052, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.7395833333333334, |
|
"grad_norm": 0.8169851899147034, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0777, |
|
"objective/entropy": 989.3333333333334, |
|
"step": 71, |
|
"train/nll_loss_a": 0.08104708914955457, |
|
"train/nll_loss_b": 0.07427798646191756, |
|
"val/completion_length": 50.17307662963867, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.807692289352417, |
|
"val/fraction_correct": 0.14743590106566748, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18745056788126627, |
|
"val/fraction_latin_b": 0.18821453551451364, |
|
"val/fraction_number_a": 0.44776029388109845, |
|
"val/fraction_number_b": 0.4434706171353658, |
|
"val/fraction_other_a": 0.36478914817174274, |
|
"val/fraction_other_b": 0.3683148721853892, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0018095905349279444, |
|
"val/lang_prob_en": 0.6661532719930013, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.9820713400840759, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0655, |
|
"objective/entropy": 783.3333333333334, |
|
"step": 72, |
|
"train/nll_loss_a": 0.05890080084403356, |
|
"train/nll_loss_b": 0.07206882474323113, |
|
"val/completion_length": 48.096153259277344, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7051282127698263, |
|
"val/fraction_correct": 0.19230769574642181, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18735684951146445, |
|
"val/fraction_latin_b": 0.18214772641658783, |
|
"val/fraction_number_a": 0.44764822721481323, |
|
"val/fraction_number_b": 0.45329829057057697, |
|
"val/fraction_other_a": 0.3649949332078298, |
|
"val/fraction_other_b": 0.3645539879798889, |
|
"val/fraction_ties": 0.7948717872301737, |
|
"val/lang_prob_bg": 0.0014935457923760016, |
|
"val/lang_prob_en": 0.677817722161611, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.7604166666666666, |
|
"grad_norm": 1.0932166576385498, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0759, |
|
"objective/entropy": 480.6666666666667, |
|
"step": 73, |
|
"train/nll_loss_a": 0.0820641169945399, |
|
"train/nll_loss_b": 0.06981213887532552, |
|
"val/completion_length": 47.480770111083984, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.09615384787321091, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18352153400580087, |
|
"val/fraction_latin_b": 0.18374391893545786, |
|
"val/fraction_number_a": 0.46121812860171, |
|
"val/fraction_number_b": 0.45421716570854187, |
|
"val/fraction_other_a": 0.3552603522936503, |
|
"val/fraction_other_b": 0.36203893025716144, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.0015151738189160824, |
|
"val/lang_prob_en": 0.6664467255274454, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.7708333333333334, |
|
"grad_norm": 0.7673856616020203, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1001, |
|
"objective/entropy": 689.3333333333334, |
|
"step": 74, |
|
"train/nll_loss_a": 0.09625528007745743, |
|
"train/nll_loss_b": 0.10403718302647273, |
|
"val/completion_length": 47.63461685180664, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513049761453, |
|
"val/fraction_both_incorrect": 0.6794871687889099, |
|
"val/fraction_correct": 0.22435897588729858, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19491079449653625, |
|
"val/fraction_latin_b": 0.2002050280570984, |
|
"val/fraction_number_a": 0.4457240104675293, |
|
"val/fraction_number_b": 0.4501633048057556, |
|
"val/fraction_other_a": 0.3593652347723643, |
|
"val/fraction_other_b": 0.3496316770712535, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0012248660592983167, |
|
"val/lang_prob_en": 0.6904502312342325, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.6796796917915344, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0624, |
|
"objective/entropy": 419.3333333333333, |
|
"step": 75, |
|
"train/nll_loss_a": 0.06967851271231969, |
|
"train/nll_loss_b": 0.055133428424596786, |
|
"val/completion_length": 56.21794764200846, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.025641026596228283, |
|
"val/fraction_both_incorrect": 0.807692309220632, |
|
"val/fraction_correct": 0.10897436241308849, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1962293783823649, |
|
"val/fraction_latin_b": 0.1906676342089971, |
|
"val/fraction_number_a": 0.45563430587450665, |
|
"val/fraction_number_b": 0.45803216099739075, |
|
"val/fraction_other_a": 0.3481363157431285, |
|
"val/fraction_other_b": 0.3513002196947734, |
|
"val/fraction_ties": 0.8333333333333334, |
|
"val/lang_prob_bg": 0.0013489985916142662, |
|
"val/lang_prob_en": 0.6775683760643005, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.7916666666666666, |
|
"grad_norm": 0.6524468064308167, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0818, |
|
"objective/entropy": 445.3333333333333, |
|
"step": 76, |
|
"train/nll_loss_a": 0.08122942348321278, |
|
"train/nll_loss_b": 0.08239901314179103, |
|
"val/completion_length": 51.17948786417643, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.1987179567416509, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18705879151821136, |
|
"val/fraction_latin_b": 0.18814867238203684, |
|
"val/fraction_number_a": 0.4660409986972809, |
|
"val/fraction_number_b": 0.44541672865549725, |
|
"val/fraction_other_a": 0.34690022468566895, |
|
"val/fraction_other_b": 0.36643461386362713, |
|
"val/fraction_ties": 0.7564102609952291, |
|
"val/lang_prob_bg": 0.0012404399070267875, |
|
"val/lang_prob_en": 0.6947490374247233, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8020833333333334, |
|
"grad_norm": 1.0618677139282227, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0775, |
|
"objective/entropy": 617.3333333333334, |
|
"step": 77, |
|
"train/nll_loss_a": 0.065880270053943, |
|
"train/nll_loss_b": 0.08919013664126396, |
|
"val/completion_length": 53.089744567871094, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20119517048199972, |
|
"val/fraction_latin_b": 0.21087687214215597, |
|
"val/fraction_number_a": 0.4479774336020152, |
|
"val/fraction_number_b": 0.43641577164332074, |
|
"val/fraction_other_a": 0.3508274257183075, |
|
"val/fraction_other_b": 0.3527073661486308, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.001091090574239691, |
|
"val/lang_prob_en": 0.6832720836003622, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 0.8138633966445923, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0646, |
|
"objective/entropy": 952.0, |
|
"step": 78, |
|
"train/nll_loss_a": 0.07945199559132259, |
|
"train/nll_loss_b": 0.04983629286289215, |
|
"val/completion_length": 59.54487228393555, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.7948718070983887, |
|
"val/fraction_correct": 0.13461538900931677, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19928557177384695, |
|
"val/fraction_latin_b": 0.19274388253688812, |
|
"val/fraction_number_a": 0.4533061484495799, |
|
"val/fraction_number_b": 0.44338251153628033, |
|
"val/fraction_other_a": 0.347408264875412, |
|
"val/fraction_other_b": 0.3638736108938853, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0009999903462206323, |
|
"val/lang_prob_en": 0.7025496959686279, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8229166666666666, |
|
"grad_norm": 1.2982169389724731, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1331, |
|
"objective/entropy": 958.6666666666666, |
|
"step": 79, |
|
"train/nll_loss_a": 0.08611624377469222, |
|
"train/nll_loss_b": 0.18007302532593408, |
|
"val/completion_length": 59.903846740722656, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.807692289352417, |
|
"val/fraction_correct": 0.12820513173937798, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19513183335463205, |
|
"val/fraction_latin_b": 0.19802862405776978, |
|
"val/fraction_number_a": 0.4579313596089681, |
|
"val/fraction_number_b": 0.4683246115843455, |
|
"val/fraction_other_a": 0.34693684180577594, |
|
"val/fraction_other_b": 0.3336467544237773, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0009022102652428051, |
|
"val/lang_prob_en": 0.6803127328554789, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.6850357055664062, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0623, |
|
"objective/entropy": 588.6666666666666, |
|
"step": 80, |
|
"train/nll_loss_a": 0.065490427116553, |
|
"train/nll_loss_b": 0.05901301031311353, |
|
"val/completion_length": 59.04487228393555, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.1153846171995004, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.19230769326289496, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1877680371205012, |
|
"val/fraction_latin_b": 0.2046613891919454, |
|
"val/fraction_number_a": 0.4560255507628123, |
|
"val/fraction_number_b": 0.44008644421895343, |
|
"val/fraction_other_a": 0.35620641708374023, |
|
"val/fraction_other_b": 0.3552521864573161, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0010920044733211398, |
|
"val/lang_prob_en": 0.700651208559672, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 0.6132178902626038, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0744, |
|
"objective/entropy": 588.0, |
|
"step": 81, |
|
"train/nll_loss_a": 0.06263614570101102, |
|
"train/nll_loss_b": 0.08618492384751637, |
|
"val/completion_length": 59.333334604899086, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.807692289352417, |
|
"val/fraction_correct": 0.13461538900931677, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20071592926979065, |
|
"val/fraction_latin_b": 0.20454128086566925, |
|
"val/fraction_number_a": 0.47261403997739154, |
|
"val/fraction_number_b": 0.44823821385701496, |
|
"val/fraction_other_a": 0.3266700307528178, |
|
"val/fraction_other_b": 0.3472205400466919, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.0008189170233284434, |
|
"val/lang_prob_en": 0.694216271241506, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8541666666666666, |
|
"grad_norm": 0.8821066617965698, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1014, |
|
"objective/entropy": 677.3333333333334, |
|
"step": 82, |
|
"train/nll_loss_a": 0.10356273377935092, |
|
"train/nll_loss_b": 0.0993096400052309, |
|
"val/completion_length": 59.61538441975912, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.1602564131220182, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20315592487653097, |
|
"val/fraction_latin_b": 0.19276542464892069, |
|
"val/fraction_number_a": 0.43547679980595905, |
|
"val/fraction_number_b": 0.44143152236938477, |
|
"val/fraction_other_a": 0.36136728525161743, |
|
"val/fraction_other_b": 0.3658030529816945, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0010875378696558375, |
|
"val/lang_prob_en": 0.6829221844673157, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8645833333333334, |
|
"grad_norm": 0.6248305439949036, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0693, |
|
"objective/entropy": 1244.0, |
|
"step": 83, |
|
"train/nll_loss_a": 0.0709990132600069, |
|
"train/nll_loss_b": 0.06762294905881087, |
|
"val/completion_length": 62.92307790120443, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.1538461595773697, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.18589743971824646, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1916692852973938, |
|
"val/fraction_latin_b": 0.2029605656862259, |
|
"val/fraction_number_a": 0.4679671327273051, |
|
"val/fraction_number_b": 0.4397442440191905, |
|
"val/fraction_other_a": 0.3403635819753011, |
|
"val/fraction_other_b": 0.3572951853275299, |
|
"val/fraction_ties": 0.935897429784139, |
|
"val/lang_prob_bg": 0.0010764972733644147, |
|
"val/lang_prob_en": 0.7133564352989197, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.7387636303901672, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0642, |
|
"objective/entropy": 744.0, |
|
"step": 84, |
|
"train/nll_loss_a": 0.06808524702986081, |
|
"train/nll_loss_b": 0.06040983274579048, |
|
"val/completion_length": 62.487178802490234, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.05128205195069313, |
|
"val/fraction_both_incorrect": 0.8205128312110901, |
|
"val/fraction_correct": 0.1153846209247907, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19735526541868845, |
|
"val/fraction_latin_b": 0.19533702731132507, |
|
"val/fraction_number_a": 0.4576469858487447, |
|
"val/fraction_number_b": 0.4456251660982768, |
|
"val/fraction_other_a": 0.3449977735678355, |
|
"val/fraction_other_b": 0.3590378165245056, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0012146817559065919, |
|
"val/lang_prob_en": 0.6715325911839803, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8854166666666666, |
|
"grad_norm": 0.6266259551048279, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0674, |
|
"objective/entropy": 1516.0, |
|
"step": 85, |
|
"train/nll_loss_a": 0.06911032895247142, |
|
"train/nll_loss_b": 0.06564381966988246, |
|
"val/completion_length": 62.846153259277344, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.7564102609952291, |
|
"val/fraction_correct": 0.1666666716337204, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19015646974245706, |
|
"val/fraction_latin_b": 0.18910319606463113, |
|
"val/fraction_number_a": 0.4569472173849742, |
|
"val/fraction_number_b": 0.4444814920425415, |
|
"val/fraction_other_a": 0.3528963228066762, |
|
"val/fraction_other_b": 0.3664153416951497, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0013917646914099653, |
|
"val/lang_prob_en": 0.6793488065401713, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8958333333333334, |
|
"grad_norm": 0.5713610649108887, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0663, |
|
"objective/entropy": 509.3333333333333, |
|
"step": 86, |
|
"train/nll_loss_a": 0.06172050287326177, |
|
"train/nll_loss_b": 0.0708620510995388, |
|
"val/completion_length": 57.46794891357422, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7820512652397156, |
|
"val/fraction_correct": 0.1602564180890719, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20560947060585022, |
|
"val/fraction_latin_b": 0.2044855753580729, |
|
"val/fraction_number_a": 0.45070673028628033, |
|
"val/fraction_number_b": 0.43896862864494324, |
|
"val/fraction_other_a": 0.3436838189760844, |
|
"val/fraction_other_b": 0.3565457959969838, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.0011454424432789285, |
|
"val/lang_prob_en": 0.6845836440722147, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 0.6155418753623962, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0632, |
|
"objective/entropy": 746.6666666666666, |
|
"step": 87, |
|
"train/nll_loss_a": 0.05684895565112432, |
|
"train/nll_loss_b": 0.06955469151337941, |
|
"val/completion_length": 61.166666666666664, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.1346153865257899, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1965359846750895, |
|
"val/fraction_latin_b": 0.19205531477928162, |
|
"val/fraction_number_a": 0.4567938546339671, |
|
"val/fraction_number_b": 0.45538153251012164, |
|
"val/fraction_other_a": 0.3466701805591583, |
|
"val/fraction_other_b": 0.35256315271059674, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0013497412437573075, |
|
"val/lang_prob_en": 0.6935842831929525, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 0.704394519329071, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0757, |
|
"objective/entropy": 753.3333333333334, |
|
"step": 88, |
|
"train/nll_loss_a": 0.07899581392606099, |
|
"train/nll_loss_b": 0.07242523382107417, |
|
"val/completion_length": 60.48718007405599, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8205128113428751, |
|
"val/fraction_correct": 0.1217948744694392, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2038972576459249, |
|
"val/fraction_latin_b": 0.19931572178999582, |
|
"val/fraction_number_a": 0.4489727218945821, |
|
"val/fraction_number_b": 0.4421346386273702, |
|
"val/fraction_other_a": 0.347130020459493, |
|
"val/fraction_other_b": 0.3585496445496877, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.001280973704221348, |
|
"val/lang_prob_en": 0.6853939096132914, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9270833333333334, |
|
"grad_norm": 0.6524101495742798, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0574, |
|
"objective/entropy": 667.3333333333334, |
|
"step": 89, |
|
"train/nll_loss_a": 0.051913000643253326, |
|
"train/nll_loss_b": 0.06291309744119644, |
|
"val/completion_length": 57.307692209879555, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.17307692766189575, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19886599977811178, |
|
"val/fraction_latin_b": 0.20171213150024414, |
|
"val/fraction_number_a": 0.4476595123608907, |
|
"val/fraction_number_b": 0.45179522037506104, |
|
"val/fraction_other_a": 0.3534744878609975, |
|
"val/fraction_other_b": 0.3464926779270172, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.001182676952642699, |
|
"val/lang_prob_en": 0.6873807509740194, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.5485818386077881, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0613, |
|
"objective/entropy": 547.3333333333334, |
|
"step": 90, |
|
"train/nll_loss_a": 0.06135800232489904, |
|
"train/nll_loss_b": 0.06123900165160497, |
|
"val/completion_length": 54.20512898763021, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.14102564379572868, |
|
"val/fraction_both_incorrect": 0.6794871687889099, |
|
"val/fraction_correct": 0.23076922943194708, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1838169644276301, |
|
"val/fraction_latin_b": 0.18703489502271017, |
|
"val/fraction_number_a": 0.46563466389973956, |
|
"val/fraction_number_b": 0.446638544400533, |
|
"val/fraction_other_a": 0.3505483865737915, |
|
"val/fraction_other_b": 0.3663265605767568, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.0013577812739337485, |
|
"val/lang_prob_en": 0.6915837923685709, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9479166666666666, |
|
"grad_norm": 0.5063994526863098, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0639, |
|
"objective/entropy": 719.3333333333334, |
|
"step": 91, |
|
"train/nll_loss_a": 0.07966503500938416, |
|
"train/nll_loss_b": 0.04821213148534298, |
|
"val/completion_length": 55.025641123453774, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461968302727, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.19230770071347555, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1899570773045222, |
|
"val/fraction_latin_b": 0.1830365608135859, |
|
"val/fraction_number_a": 0.45852789282798767, |
|
"val/fraction_number_b": 0.46170011162757874, |
|
"val/fraction_other_a": 0.3515150249004364, |
|
"val/fraction_other_b": 0.3552633424599965, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0010812885011546314, |
|
"val/lang_prob_en": 0.7102627555529276, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9583333333333334, |
|
"grad_norm": 0.6831931471824646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0584, |
|
"objective/entropy": 514.0, |
|
"step": 92, |
|
"train/nll_loss_a": 0.06738898778955142, |
|
"train/nll_loss_b": 0.04947723634541035, |
|
"val/completion_length": 50.307692209879555, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.807692309220632, |
|
"val/fraction_correct": 0.13461538900931677, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19622700413068137, |
|
"val/fraction_latin_b": 0.18972766399383545, |
|
"val/fraction_number_a": 0.434969961643219, |
|
"val/fraction_number_b": 0.4513050417105357, |
|
"val/fraction_other_a": 0.36880303422609967, |
|
"val/fraction_other_b": 0.35896732409795123, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.0014005635942642887, |
|
"val/lang_prob_en": 0.6858188907305399, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 0.5057593584060669, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0754, |
|
"objective/entropy": 482.6666666666667, |
|
"step": 93, |
|
"train/nll_loss_a": 0.08236912513772647, |
|
"train/nll_loss_b": 0.06851410741607349, |
|
"val/completion_length": 47.5, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.15384615709384283, |
|
"val/fraction_both_incorrect": 0.7564102609952291, |
|
"val/fraction_correct": 0.19871795177459717, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18862396478652954, |
|
"val/fraction_latin_b": 0.19087292750676474, |
|
"val/fraction_number_a": 0.4471252163251241, |
|
"val/fraction_number_b": 0.4374854067961375, |
|
"val/fraction_other_a": 0.3642508288224538, |
|
"val/fraction_other_b": 0.37164167563120526, |
|
"val/fraction_ties": 0.9102563858032227, |
|
"val/lang_prob_bg": 0.0012994079540173213, |
|
"val/lang_prob_en": 0.6864928205808004, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9791666666666666, |
|
"grad_norm": 0.6987188458442688, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0556, |
|
"objective/entropy": 797.3333333333334, |
|
"step": 94, |
|
"train/nll_loss_a": 0.05544371157884598, |
|
"train/nll_loss_b": 0.055833750714858375, |
|
"val/completion_length": 53.6025644938151, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.1538461595773697, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1824193944533666, |
|
"val/fraction_latin_b": 0.18101008733113608, |
|
"val/fraction_number_a": 0.4529682397842407, |
|
"val/fraction_number_b": 0.4553934931755066, |
|
"val/fraction_other_a": 0.36461236079533893, |
|
"val/fraction_other_b": 0.3635964592297872, |
|
"val/fraction_ties": 0.8974358836809794, |
|
"val/lang_prob_bg": 0.001927042962051928, |
|
"val/lang_prob_en": 0.6548982262611389, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9895833333333334, |
|
"grad_norm": 0.5544689893722534, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0758, |
|
"objective/entropy": 487.3333333333333, |
|
"step": 95, |
|
"train/nll_loss_a": 0.06908356895049413, |
|
"train/nll_loss_b": 0.08260532716910045, |
|
"val/completion_length": 46.96794764200846, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.1602564131220182, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18986154099305472, |
|
"val/fraction_latin_b": 0.18999535342057547, |
|
"val/fraction_number_a": 0.4548261861006419, |
|
"val/fraction_number_b": 0.4395192861557007, |
|
"val/fraction_other_a": 0.3553122878074646, |
|
"val/fraction_other_b": 0.3704853653907776, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.0012377959792502224, |
|
"val/lang_prob_en": 0.6733607252438863, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.783270537853241, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0509, |
|
"objective/entropy": 242.66666666666666, |
|
"step": 96, |
|
"train/nll_loss_a": 0.04177509993314743, |
|
"train/nll_loss_b": 0.06005720297495524, |
|
"val/completion_length": 43.97843805948893, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08158508439858754, |
|
"val/fraction_both_incorrect": 0.7424242496490479, |
|
"val/fraction_correct": 0.16958042482535043, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20345724125703177, |
|
"val/fraction_latin_b": 0.212227334578832, |
|
"val/fraction_number_a": 0.4268727699915568, |
|
"val/fraction_number_b": 0.40884650746981305, |
|
"val/fraction_other_a": 0.36967000365257263, |
|
"val/fraction_other_b": 0.3789261778195699, |
|
"val/fraction_ties": 0.8240093191464742, |
|
"val/lang_prob_bg": 0.001514516188763082, |
|
"val/lang_prob_en": 0.6803627212842306, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 26, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|