|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6770833333333334, |
|
"eval_steps": 500, |
|
"global_step": 65, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 0.46898284554481506, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4984, |
|
"objective/entropy": 1536.0, |
|
"step": 1, |
|
"train/nll_loss_a": 0.46235302090644836, |
|
"train/nll_loss_b": 0.5343712766965231, |
|
"val/completion_length": 141.19872029622397, |
|
"val/contain_eos_token": 0.9294871687889099, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.35256410638491315, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.628205140431722, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.09817602237065633, |
|
"val/fraction_cyrillic_b": 0.07149943461020787, |
|
"val/fraction_latin_a": 0.48449812332789105, |
|
"val/fraction_latin_b": 0.5023942093054453, |
|
"val/fraction_number_a": 0.18907449146111807, |
|
"val/fraction_number_b": 0.19445918997128805, |
|
"val/fraction_other_a": 0.22825137277444205, |
|
"val/fraction_other_b": 0.2316471884648005, |
|
"val/fraction_ties": 0.6666666666666666, |
|
"val/lang_prob_bg": 0.0268978967020909, |
|
"val/lang_prob_en": 0.6749410231908163, |
|
"val/latin_first_token": 0.6474358836809794, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 0.49648183584213257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4431, |
|
"objective/entropy": 1424.0, |
|
"step": 2, |
|
"train/nll_loss_a": 0.4012756248315175, |
|
"train/nll_loss_b": 0.4849816660086314, |
|
"val/completion_length": 138.66666666666666, |
|
"val/contain_eos_token": 0.9102564056714376, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.32692308227221173, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0820654605825742, |
|
"val/fraction_cyrillic_b": 0.06807015091180801, |
|
"val/fraction_latin_a": 0.46912774443626404, |
|
"val/fraction_latin_b": 0.4876726269721985, |
|
"val/fraction_number_a": 0.21081160008907318, |
|
"val/fraction_number_b": 0.2032010406255722, |
|
"val/fraction_other_a": 0.2379952073097229, |
|
"val/fraction_other_b": 0.24105618397394815, |
|
"val/fraction_ties": 0.7820512851079305, |
|
"val/lang_prob_bg": 0.03282865695655346, |
|
"val/lang_prob_en": 0.6723186572392782, |
|
"val/latin_first_token": 0.6666666666666666, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.006410256649057071 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 0.6362881660461426, |
|
"learning_rate": 0.0001, |
|
"loss": 0.505, |
|
"objective/entropy": 1469.3333333333333, |
|
"step": 3, |
|
"train/nll_loss_a": 0.41949082414309186, |
|
"train/nll_loss_b": 0.5905094941457113, |
|
"val/completion_length": 153.07691955566406, |
|
"val/contain_eos_token": 0.878205140431722, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.3076923092206319, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.09064680337905884, |
|
"val/fraction_cyrillic_b": 0.07422023018201192, |
|
"val/fraction_latin_a": 0.4563806454340617, |
|
"val/fraction_latin_b": 0.4618365466594696, |
|
"val/fraction_number_a": 0.21234740813573202, |
|
"val/fraction_number_b": 0.21217785278956094, |
|
"val/fraction_other_a": 0.24062515298525491, |
|
"val/fraction_other_b": 0.2517653902371724, |
|
"val/fraction_ties": 0.8205128312110901, |
|
"val/lang_prob_bg": 0.03160186484456062, |
|
"val/lang_prob_en": 0.6696631709734598, |
|
"val/latin_first_token": 0.6923076709111532, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 0.5365626215934753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3568, |
|
"objective/entropy": 1538.6666666666667, |
|
"step": 4, |
|
"train/nll_loss_a": 0.3584041992823283, |
|
"train/nll_loss_b": 0.3552741805712382, |
|
"val/completion_length": 139.39102172851562, |
|
"val/contain_eos_token": 0.9230769077936808, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.21794872482617697, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.1602564131220182, |
|
"val/fraction_cyrillic_a": 0.06686830148100853, |
|
"val/fraction_cyrillic_b": 0.040630811204512916, |
|
"val/fraction_latin_a": 0.48262248436609906, |
|
"val/fraction_latin_b": 0.4933239420255025, |
|
"val/fraction_number_a": 0.20719597240289053, |
|
"val/fraction_number_b": 0.22095757722854614, |
|
"val/fraction_other_a": 0.2433132529258728, |
|
"val/fraction_other_b": 0.24508768320083618, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.02131816806892554, |
|
"val/lang_prob_en": 0.6912566820780436, |
|
"val/latin_first_token": 0.7820512851079305, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"grad_norm": 0.6317083835601807, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3542, |
|
"objective/entropy": 1616.0, |
|
"step": 5, |
|
"train/nll_loss_a": 0.3730636735757192, |
|
"train/nll_loss_b": 0.33537689844767254, |
|
"val/completion_length": 141.45512898763022, |
|
"val/contain_eos_token": 0.9230769276618958, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.03846153927346071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7692307829856873, |
|
"val/fraction_correct": 0.16666666915019354, |
|
"val/fraction_cyrillic_a": 0.018022120309372742, |
|
"val/fraction_cyrillic_b": 0.008442190863812963, |
|
"val/fraction_latin_a": 0.530072808265686, |
|
"val/fraction_latin_b": 0.5371540983517965, |
|
"val/fraction_number_a": 0.20419377585252127, |
|
"val/fraction_number_b": 0.19861711064974466, |
|
"val/fraction_other_a": 0.2477113058169683, |
|
"val/fraction_other_b": 0.25578661759694415, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.005829914240166545, |
|
"val/lang_prob_en": 0.6994746724764506, |
|
"val/latin_first_token": 0.9615384538968405, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 0.6112334728240967, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3526, |
|
"objective/entropy": 1450.6666666666667, |
|
"step": 6, |
|
"train/nll_loss_a": 0.3616310755411784, |
|
"train/nll_loss_b": 0.3435203830401103, |
|
"val/completion_length": 139.3397420247396, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.012820513298114141, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7564102411270142, |
|
"val/fraction_correct": 0.1602564131220182, |
|
"val/fraction_cyrillic_a": 0.005665303532926676, |
|
"val/fraction_cyrillic_b": 0.0012210012258340914, |
|
"val/fraction_latin_a": 0.5337207714716593, |
|
"val/fraction_latin_b": 0.5389339327812195, |
|
"val/fraction_number_a": 0.20246068636576334, |
|
"val/fraction_number_b": 0.20605232814947763, |
|
"val/fraction_other_a": 0.2581532299518585, |
|
"val/fraction_other_b": 0.25379273295402527, |
|
"val/fraction_ties": 0.8333333134651184, |
|
"val/lang_prob_bg": 0.0024220591488604746, |
|
"val/lang_prob_en": 0.716150164604187, |
|
"val/latin_first_token": 0.9871794780095419, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.07291666666666667, |
|
"grad_norm": 0.5834570527076721, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3122, |
|
"objective/entropy": 1482.6666666666667, |
|
"step": 7, |
|
"train/nll_loss_a": 0.31628555059432983, |
|
"train/nll_loss_b": 0.3082062304019928, |
|
"val/completion_length": 140.28205362955728, |
|
"val/contain_eos_token": 0.935897429784139, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.012820513298114141, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.004876403972351302, |
|
"val/fraction_cyrillic_b": 0.0055291604561110335, |
|
"val/fraction_latin_a": 0.51544189453125, |
|
"val/fraction_latin_b": 0.5175811052322388, |
|
"val/fraction_number_a": 0.21839049458503723, |
|
"val/fraction_number_b": 0.21114349365234375, |
|
"val/fraction_other_a": 0.2612912356853485, |
|
"val/fraction_other_b": 0.26574622591336566, |
|
"val/fraction_ties": 0.8461538553237915, |
|
"val/lang_prob_bg": 0.0024805181116486588, |
|
"val/lang_prob_en": 0.7189218997955322, |
|
"val/latin_first_token": 0.9871794780095419, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.5234330296516418, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3689, |
|
"objective/entropy": 1560.0, |
|
"step": 8, |
|
"train/nll_loss_a": 0.38403966029485065, |
|
"train/nll_loss_b": 0.353829691807429, |
|
"val/completion_length": 147.18589782714844, |
|
"val/contain_eos_token": 0.8910256226857504, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.032051283245285354, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513173937798, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.22435897588729858, |
|
"val/fraction_cyrillic_a": 0.005463951422522466, |
|
"val/fraction_cyrillic_b": 0.00364190728093187, |
|
"val/fraction_latin_a": 0.519624650478363, |
|
"val/fraction_latin_b": 0.5310182571411133, |
|
"val/fraction_number_a": 0.22126641869544983, |
|
"val/fraction_number_b": 0.20790701607863107, |
|
"val/fraction_other_a": 0.2536449631055196, |
|
"val/fraction_other_b": 0.25743279854456586, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0022993393164748945, |
|
"val/lang_prob_en": 0.7228630383809408, |
|
"val/latin_first_token": 0.9679486950238546, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 0.46253740787506104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3548, |
|
"objective/entropy": 1626.6666666666667, |
|
"step": 9, |
|
"train/nll_loss_a": 0.353506733973821, |
|
"train/nll_loss_b": 0.3560173710187276, |
|
"val/completion_length": 141.10897318522134, |
|
"val/contain_eos_token": 0.8974359035491943, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.03846153927346071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.17948718120654425, |
|
"val/fraction_both_incorrect": 0.6666666865348816, |
|
"val/fraction_correct": 0.25641026099522907, |
|
"val/fraction_cyrillic_a": 0.009551782781879107, |
|
"val/fraction_cyrillic_b": 0.007778597995638847, |
|
"val/fraction_latin_a": 0.5005057454109192, |
|
"val/fraction_latin_b": 0.5112853447596232, |
|
"val/fraction_number_a": 0.2224580099185308, |
|
"val/fraction_number_b": 0.2299031764268875, |
|
"val/fraction_other_a": 0.26748446623484295, |
|
"val/fraction_other_b": 0.25103287398815155, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.003838104816774527, |
|
"val/lang_prob_en": 0.7203066547711691, |
|
"val/latin_first_token": 0.9615384538968405, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 0.6793639063835144, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4072, |
|
"objective/entropy": 1544.0, |
|
"step": 10, |
|
"train/nll_loss_a": 0.40951302647590637, |
|
"train/nll_loss_b": 0.4048899710178375, |
|
"val/completion_length": 139.59615580240884, |
|
"val/contain_eos_token": 0.9230769276618958, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.044871795922517776, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7564102609952291, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.01814807563399275, |
|
"val/fraction_cyrillic_b": 0.01091132735988746, |
|
"val/fraction_latin_a": 0.479096124569575, |
|
"val/fraction_latin_b": 0.48450469970703125, |
|
"val/fraction_number_a": 0.24191749095916748, |
|
"val/fraction_number_b": 0.23329021533330283, |
|
"val/fraction_other_a": 0.2608383099238078, |
|
"val/fraction_other_b": 0.2712937593460083, |
|
"val/fraction_ties": 0.8589743375778198, |
|
"val/lang_prob_bg": 0.005877171643078327, |
|
"val/lang_prob_en": 0.6905626058578491, |
|
"val/latin_first_token": 0.9551281929016113, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.11458333333333333, |
|
"grad_norm": 0.6676069498062134, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3261, |
|
"objective/entropy": 1296.0, |
|
"step": 11, |
|
"train/nll_loss_a": 0.3414422770341237, |
|
"train/nll_loss_b": 0.31081566711266834, |
|
"val/completion_length": 133.35897318522134, |
|
"val/contain_eos_token": 0.9615384538968405, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.012820513298114141, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.15384616081913313, |
|
"val/fraction_cyrillic_a": 0.009972451565166315, |
|
"val/fraction_cyrillic_b": 0.008257918680707613, |
|
"val/fraction_latin_a": 0.4708147446314494, |
|
"val/fraction_latin_b": 0.4841614067554474, |
|
"val/fraction_number_a": 0.2511301040649414, |
|
"val/fraction_number_b": 0.23997685313224792, |
|
"val/fraction_other_a": 0.26808270812034607, |
|
"val/fraction_other_b": 0.2676038245360057, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.004965859581716359, |
|
"val/lang_prob_en": 0.7087472081184387, |
|
"val/latin_first_token": 0.9871794780095419, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.48878854513168335, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3202, |
|
"objective/entropy": 1520.0, |
|
"step": 12, |
|
"train/nll_loss_a": 0.3013697862625122, |
|
"train/nll_loss_b": 0.33911073207855225, |
|
"val/completion_length": 125.4551289876302, |
|
"val/contain_eos_token": 0.9551282127698263, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.01923076994717121, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.6410256425539652, |
|
"val/fraction_correct": 0.21794872482617697, |
|
"val/fraction_cyrillic_a": 0.0016106078983284533, |
|
"val/fraction_cyrillic_b": 0.00164324635018905, |
|
"val/fraction_latin_a": 0.4850431780020396, |
|
"val/fraction_latin_b": 0.49556367595990497, |
|
"val/fraction_number_a": 0.24694832662741342, |
|
"val/fraction_number_b": 0.24452554682890573, |
|
"val/fraction_other_a": 0.26639790336290997, |
|
"val/fraction_other_b": 0.25826754172643024, |
|
"val/fraction_ties": 0.7179487347602844, |
|
"val/lang_prob_bg": 0.0020299581810832024, |
|
"val/lang_prob_en": 0.7036298712094625, |
|
"val/latin_first_token": 0.9807692170143127, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.13541666666666666, |
|
"grad_norm": 0.47080346941947937, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3551, |
|
"objective/entropy": 1610.6666666666667, |
|
"step": 13, |
|
"train/nll_loss_a": 0.3537709911664327, |
|
"train/nll_loss_b": 0.35645443201065063, |
|
"val/completion_length": 132.8397471110026, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.01923076994717121, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.6410256425539652, |
|
"val/fraction_correct": 0.23717949291070303, |
|
"val/fraction_cyrillic_a": 0.0035886759869754314, |
|
"val/fraction_cyrillic_b": 0.002304682352890571, |
|
"val/fraction_latin_a": 0.5025050441424052, |
|
"val/fraction_latin_b": 0.49992923935254413, |
|
"val/fraction_number_a": 0.22844381630420685, |
|
"val/fraction_number_b": 0.23804503679275513, |
|
"val/fraction_other_a": 0.2654624879360199, |
|
"val/fraction_other_b": 0.2597210705280304, |
|
"val/fraction_ties": 0.7564102609952291, |
|
"val/lang_prob_bg": 0.0021097887850676975, |
|
"val/lang_prob_en": 0.7135748863220215, |
|
"val/latin_first_token": 0.9807692170143127, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 0.5261266827583313, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3553, |
|
"objective/entropy": 1768.0, |
|
"step": 14, |
|
"train/nll_loss_a": 0.36536062757174176, |
|
"train/nll_loss_b": 0.34532251954078674, |
|
"val/completion_length": 130.3205134073893, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.006410256649057071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.14102564255396524, |
|
"val/fraction_both_incorrect": 0.7435897588729858, |
|
"val/fraction_correct": 0.19871795177459717, |
|
"val/fraction_cyrillic_a": 0.0008547008813669285, |
|
"val/fraction_cyrillic_b": 0.0002670940205765267, |
|
"val/fraction_latin_a": 0.48295870423316956, |
|
"val/fraction_latin_b": 0.4754104216893514, |
|
"val/fraction_number_a": 0.24797451992829642, |
|
"val/fraction_number_b": 0.25482123096783954, |
|
"val/fraction_other_a": 0.26821208000183105, |
|
"val/fraction_other_b": 0.269501268863678, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.0016354583591843646, |
|
"val/lang_prob_en": 0.7125194072723389, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.5608013868331909, |
|
"learning_rate": 0.0001, |
|
"loss": 0.375, |
|
"objective/entropy": 1813.3333333333333, |
|
"step": 15, |
|
"train/nll_loss_a": 0.3743097384770711, |
|
"train/nll_loss_b": 0.3756645123163859, |
|
"val/completion_length": 113.73076883951823, |
|
"val/contain_eos_token": 0.942307690779368, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.6666666666666666, |
|
"val/fraction_correct": 0.21794872482617697, |
|
"val/fraction_cyrillic_a": 0.0005128205132981142, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4844670593738556, |
|
"val/fraction_latin_b": 0.48018120725949603, |
|
"val/fraction_number_a": 0.24094298481941223, |
|
"val/fraction_number_b": 0.2496140201886495, |
|
"val/fraction_other_a": 0.2740771571795146, |
|
"val/fraction_other_b": 0.2702048122882843, |
|
"val/fraction_ties": 0.7692307829856873, |
|
"val/lang_prob_bg": 0.0012819842668250203, |
|
"val/lang_prob_en": 0.712844451268514, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.5459577441215515, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3467, |
|
"objective/entropy": 1781.3333333333333, |
|
"step": 16, |
|
"train/nll_loss_a": 0.35024779041608173, |
|
"train/nll_loss_b": 0.3431568145751953, |
|
"val/completion_length": 125.86538696289062, |
|
"val/contain_eos_token": 0.9551281929016113, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.1666666716337204, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4834948579470317, |
|
"val/fraction_latin_b": 0.48427216211954754, |
|
"val/fraction_number_a": 0.24997142453988394, |
|
"val/fraction_number_b": 0.24836017191410065, |
|
"val/fraction_other_a": 0.2665337175130844, |
|
"val/fraction_other_b": 0.26736770073572796, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0015082452834273379, |
|
"val/lang_prob_en": 0.6896043419837952, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.006410256649057071 |
|
}, |
|
{ |
|
"epoch": 0.17708333333333334, |
|
"grad_norm": 0.6729714870452881, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3411, |
|
"objective/entropy": 1653.3333333333333, |
|
"step": 17, |
|
"train/nll_loss_a": 0.3364799916744232, |
|
"train/nll_loss_b": 0.34577877322832745, |
|
"val/completion_length": 112.4551264444987, |
|
"val/contain_eos_token": 0.9807692368825277, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.012820513298114141, |
|
"val/fraction_both_incorrect": 0.7564102411270142, |
|
"val/fraction_correct": 0.12820513049761453, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.45367395877838135, |
|
"val/fraction_latin_b": 0.46362873911857605, |
|
"val/fraction_number_a": 0.2715826133886973, |
|
"val/fraction_number_b": 0.2677338620026906, |
|
"val/fraction_other_a": 0.2747434576352437, |
|
"val/fraction_other_b": 0.2686373790105184, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0013079955242574215, |
|
"val/lang_prob_en": 0.7055089473724365, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 0.5278697609901428, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3637, |
|
"objective/entropy": 1666.6666666666667, |
|
"step": 18, |
|
"train/nll_loss_a": 0.3630356788635254, |
|
"train/nll_loss_b": 0.36438990632692975, |
|
"val/completion_length": 106.38461303710938, |
|
"val/contain_eos_token": 0.9743589758872986, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.006410256649057071, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.18589743971824646, |
|
"val/fraction_cyrillic_a": 0.00018853696140771112, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.46353839834531146, |
|
"val/fraction_latin_b": 0.4717874725659688, |
|
"val/fraction_number_a": 0.26522815227508545, |
|
"val/fraction_number_b": 0.27208030720551807, |
|
"val/fraction_other_a": 0.27104492982228595, |
|
"val/fraction_other_b": 0.25613221526145935, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0014787697000429034, |
|
"val/lang_prob_en": 0.7207486033439636, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.19791666666666666, |
|
"grad_norm": 0.623100996017456, |
|
"learning_rate": 0.0001, |
|
"loss": 0.339, |
|
"objective/entropy": 1757.3333333333333, |
|
"step": 19, |
|
"train/nll_loss_a": 0.3500674267609914, |
|
"train/nll_loss_b": 0.32788631319999695, |
|
"val/completion_length": 115.87820434570312, |
|
"val/contain_eos_token": 0.9615384538968405, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.14102564503749213, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.47628764311472577, |
|
"val/fraction_latin_b": 0.47780614097913104, |
|
"val/fraction_number_a": 0.25701290369033813, |
|
"val/fraction_number_b": 0.2525850087404251, |
|
"val/fraction_other_a": 0.2666994432608287, |
|
"val/fraction_other_b": 0.2696088453133901, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0013311682268977165, |
|
"val/lang_prob_en": 0.7212471763292948, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 0.7427172064781189, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3136, |
|
"objective/entropy": 1717.3333333333333, |
|
"step": 20, |
|
"train/nll_loss_a": 0.3320723871390025, |
|
"train/nll_loss_b": 0.2952205240726471, |
|
"val/completion_length": 103.90384674072266, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.14102564255396524, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.47085316975911456, |
|
"val/fraction_latin_b": 0.46781421701113385, |
|
"val/fraction_number_a": 0.2578504929939906, |
|
"val/fraction_number_b": 0.2744967540105184, |
|
"val/fraction_other_a": 0.27129634221394855, |
|
"val/fraction_other_b": 0.2576890190442403, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0014502551639452577, |
|
"val/lang_prob_en": 0.7153881192207336, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 0.4812980592250824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2812, |
|
"objective/entropy": 1712.0, |
|
"step": 21, |
|
"train/nll_loss_a": 0.2877577245235443, |
|
"train/nll_loss_b": 0.27465402086575824, |
|
"val/completion_length": 100.94871775309245, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359308679898, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.449023316303889, |
|
"val/fraction_latin_b": 0.43931347131729126, |
|
"val/fraction_number_a": 0.2805411020914714, |
|
"val/fraction_number_b": 0.2886248826980591, |
|
"val/fraction_other_a": 0.2704355716705322, |
|
"val/fraction_other_b": 0.27206166585286456, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0012512764272590478, |
|
"val/lang_prob_en": 0.7036919593811035, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 0.5802024006843567, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2079, |
|
"objective/entropy": 1754.6666666666667, |
|
"step": 22, |
|
"train/nll_loss_a": 0.2198613981405894, |
|
"train/nll_loss_b": 0.19591793914635977, |
|
"val/completion_length": 105.2051289876302, |
|
"val/contain_eos_token": 0.9615384538968405, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.8589743375778198, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4607094426949819, |
|
"val/fraction_latin_b": 0.4435524543126424, |
|
"val/fraction_number_a": 0.2806512117385864, |
|
"val/fraction_number_b": 0.2736863394578298, |
|
"val/fraction_other_a": 0.2586393306652705, |
|
"val/fraction_other_b": 0.28276123603185016, |
|
"val/fraction_ties": 0.9487179517745972, |
|
"val/lang_prob_bg": 0.0013595524554451306, |
|
"val/lang_prob_en": 0.6991243163744608, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.23958333333333334, |
|
"grad_norm": 0.9169826507568359, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3761, |
|
"objective/entropy": 1493.3333333333333, |
|
"step": 23, |
|
"train/nll_loss_a": 0.36857877175013226, |
|
"train/nll_loss_b": 0.38360129793485004, |
|
"val/completion_length": 92.24359130859375, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.025641026596228283, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4382043182849884, |
|
"val/fraction_latin_b": 0.42687369386355084, |
|
"val/fraction_number_a": 0.2829488515853882, |
|
"val/fraction_number_b": 0.296395738919576, |
|
"val/fraction_other_a": 0.2788468599319458, |
|
"val/fraction_other_b": 0.2767305870850881, |
|
"val/fraction_ties": 0.8205128312110901, |
|
"val/lang_prob_bg": 0.0014082260507469375, |
|
"val/lang_prob_en": 0.7083008488019308, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6027104258537292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2801, |
|
"objective/entropy": 1490.6666666666667, |
|
"step": 24, |
|
"train/nll_loss_a": 0.278068482875824, |
|
"train/nll_loss_b": 0.28207358221213025, |
|
"val/completion_length": 86.58333333333333, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.6794871886571249, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.44356054067611694, |
|
"val/fraction_latin_b": 0.44240028659502667, |
|
"val/fraction_number_a": 0.2843793531258901, |
|
"val/fraction_number_b": 0.2815621296564738, |
|
"val/fraction_other_a": 0.272060106197993, |
|
"val/fraction_other_b": 0.2760376036167145, |
|
"val/fraction_ties": 0.7692307631174723, |
|
"val/lang_prob_bg": 0.0013788756914436817, |
|
"val/lang_prob_en": 0.707394023736318, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.2604166666666667, |
|
"grad_norm": 0.6588619351387024, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2373, |
|
"objective/entropy": 1805.3333333333333, |
|
"step": 25, |
|
"train/nll_loss_a": 0.2221569369236628, |
|
"train/nll_loss_b": 0.25250792503356934, |
|
"val/completion_length": 97.98077138264973, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.4407140811284383, |
|
"val/fraction_latin_b": 0.449174165725708, |
|
"val/fraction_number_a": 0.2688818077246348, |
|
"val/fraction_number_b": 0.264347364505132, |
|
"val/fraction_other_a": 0.2904041012128194, |
|
"val/fraction_other_b": 0.2864784598350525, |
|
"val/fraction_ties": 0.9230769077936808, |
|
"val/lang_prob_bg": 0.0015077214144791167, |
|
"val/lang_prob_en": 0.7170586188634237, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 0.7737333178520203, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2641, |
|
"objective/entropy": 1989.3333333333333, |
|
"step": 26, |
|
"train/nll_loss_a": 0.26104875405629474, |
|
"train/nll_loss_b": 0.26720015704631805, |
|
"val/completion_length": 93.08974202473958, |
|
"val/contain_eos_token": 0.9743589758872986, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.19871795177459717, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.43882275621096295, |
|
"val/fraction_latin_b": 0.4281532069047292, |
|
"val/fraction_number_a": 0.29091211160024005, |
|
"val/fraction_number_b": 0.3004717230796814, |
|
"val/fraction_other_a": 0.2702651371558507, |
|
"val/fraction_other_b": 0.2713750700155894, |
|
"val/fraction_ties": 0.8333333134651184, |
|
"val/lang_prob_bg": 0.0013183245512967308, |
|
"val/lang_prob_en": 0.6850736141204834, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 0.7201813459396362, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2468, |
|
"objective/entropy": 1309.3333333333333, |
|
"step": 27, |
|
"train/nll_loss_a": 0.2481851428747177, |
|
"train/nll_loss_b": 0.24535122017065683, |
|
"val/completion_length": 81.52563985188802, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307730515797, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.15384615709384283, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.42121972640355426, |
|
"val/fraction_latin_b": 0.3958790997664134, |
|
"val/fraction_number_a": 0.30145979921023053, |
|
"val/fraction_number_b": 0.3247312208016713, |
|
"val/fraction_other_a": 0.2773204942544301, |
|
"val/fraction_other_b": 0.2793896694978078, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0012046323778728645, |
|
"val/lang_prob_en": 0.690701444943746, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 0.5405112504959106, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2421, |
|
"objective/entropy": 1466.6666666666667, |
|
"step": 28, |
|
"train/nll_loss_a": 0.258198360602061, |
|
"train/nll_loss_b": 0.225906973083814, |
|
"val/completion_length": 91.30128224690755, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359060327212, |
|
"val/fraction_both_incorrect": 0.6025640964508057, |
|
"val/fraction_correct": 0.2435897489388784, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.41246400276819867, |
|
"val/fraction_latin_b": 0.4263813893000285, |
|
"val/fraction_number_a": 0.30804495016733807, |
|
"val/fraction_number_b": 0.299861341714859, |
|
"val/fraction_other_a": 0.27949108680089313, |
|
"val/fraction_other_b": 0.2737572491168976, |
|
"val/fraction_ties": 0.692307710647583, |
|
"val/lang_prob_bg": 0.0014168053554991882, |
|
"val/lang_prob_en": 0.7019821604092916, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3020833333333333, |
|
"grad_norm": 0.6843443512916565, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2173, |
|
"objective/entropy": 1717.3333333333333, |
|
"step": 29, |
|
"train/nll_loss_a": 0.2260708212852478, |
|
"train/nll_loss_b": 0.208594411611557, |
|
"val/completion_length": 86.48076883951823, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.18589743971824646, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.41310587525367737, |
|
"val/fraction_latin_b": 0.4215882420539856, |
|
"val/fraction_number_a": 0.3036368489265442, |
|
"val/fraction_number_b": 0.3048081199328105, |
|
"val/fraction_other_a": 0.2832573155562083, |
|
"val/fraction_other_b": 0.2736036380132039, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0014089663745835423, |
|
"val/lang_prob_en": 0.6810818711916605, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.6732069849967957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1954, |
|
"objective/entropy": 1698.6666666666667, |
|
"step": 30, |
|
"train/nll_loss_a": 0.190487802028656, |
|
"train/nll_loss_b": 0.20023786028226218, |
|
"val/completion_length": 83.47436014811198, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.692307690779368, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.37804505228996277, |
|
"val/fraction_latin_b": 0.3864828248818715, |
|
"val/fraction_number_a": 0.32556501030921936, |
|
"val/fraction_number_b": 0.3207090497016907, |
|
"val/fraction_other_a": 0.2963899274667104, |
|
"val/fraction_other_b": 0.29280807574590045, |
|
"val/fraction_ties": 0.7307692368825277, |
|
"val/lang_prob_bg": 0.0012525273875022929, |
|
"val/lang_prob_en": 0.6773750185966492, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3229166666666667, |
|
"grad_norm": 1.181767225265503, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2398, |
|
"objective/entropy": 1254.6666666666667, |
|
"step": 31, |
|
"train/nll_loss_a": 0.2306948055823644, |
|
"train/nll_loss_b": 0.2489608426888784, |
|
"val/completion_length": 80.55127970377605, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.09615384911497434, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3839823206265767, |
|
"val/fraction_latin_b": 0.40403392910957336, |
|
"val/fraction_number_a": 0.3259160916010539, |
|
"val/fraction_number_b": 0.3016844590504964, |
|
"val/fraction_other_a": 0.2901015877723694, |
|
"val/fraction_other_b": 0.29428161183993023, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.001477631429831187, |
|
"val/lang_prob_en": 0.693560004234314, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.7345649600028992, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2023, |
|
"objective/entropy": 1706.6666666666667, |
|
"step": 32, |
|
"train/nll_loss_a": 0.18820939461390176, |
|
"train/nll_loss_b": 0.2164141039053599, |
|
"val/completion_length": 86.39102681477864, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.025641026596228283, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3734320302804311, |
|
"val/fraction_latin_b": 0.39075586199760437, |
|
"val/fraction_number_a": 0.3208834727605184, |
|
"val/fraction_number_b": 0.3199572165807088, |
|
"val/fraction_other_a": 0.30568451682726544, |
|
"val/fraction_other_b": 0.28928691148757935, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.001555828144773841, |
|
"val/lang_prob_en": 0.6997750600179037, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 0.6794171929359436, |
|
"learning_rate": 0.0001, |
|
"loss": 0.208, |
|
"objective/entropy": 1765.3333333333333, |
|
"step": 33, |
|
"train/nll_loss_a": 0.197875847419103, |
|
"train/nll_loss_b": 0.2181676377852758, |
|
"val/completion_length": 81.42307790120442, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513049761453, |
|
"val/fraction_both_incorrect": 0.692307710647583, |
|
"val/fraction_correct": 0.21794872482617697, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.36189671357472736, |
|
"val/fraction_latin_b": 0.3368810514609019, |
|
"val/fraction_number_a": 0.34198596080144245, |
|
"val/fraction_number_b": 0.3494710822900136, |
|
"val/fraction_other_a": 0.2961173454920451, |
|
"val/fraction_other_b": 0.3136478662490845, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.0014510581968352199, |
|
"val/lang_prob_en": 0.6856165130933126, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3541666666666667, |
|
"grad_norm": 1.2053074836730957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1771, |
|
"objective/entropy": 1565.3333333333333, |
|
"step": 34, |
|
"train/nll_loss_a": 0.17536027232805887, |
|
"train/nll_loss_b": 0.1789391835530599, |
|
"val/completion_length": 75.41666666666667, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.34743695457776386, |
|
"val/fraction_latin_b": 0.3424769341945648, |
|
"val/fraction_number_a": 0.3374132215976715, |
|
"val/fraction_number_b": 0.3465127448240916, |
|
"val/fraction_other_a": 0.31514982382456463, |
|
"val/fraction_other_b": 0.31101036071777344, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0016215697008495529, |
|
"val/lang_prob_en": 0.6558386087417603, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3645833333333333, |
|
"grad_norm": 0.8247714042663574, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1831, |
|
"objective/entropy": 2050.6666666666665, |
|
"step": 35, |
|
"train/nll_loss_a": 0.1831777443488439, |
|
"train/nll_loss_b": 0.18294000625610352, |
|
"val/completion_length": 79.56410217285156, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7179487347602844, |
|
"val/fraction_correct": 0.17948718617359796, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3364667197068532, |
|
"val/fraction_latin_b": 0.3431033293406169, |
|
"val/fraction_number_a": 0.35041239857673645, |
|
"val/fraction_number_b": 0.3312891523043315, |
|
"val/fraction_other_a": 0.31312089165051776, |
|
"val/fraction_other_b": 0.3256075282891591, |
|
"val/fraction_ties": 0.7948718070983887, |
|
"val/lang_prob_bg": 0.0014793235653390486, |
|
"val/lang_prob_en": 0.6674719850222269, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.7892264127731323, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2262, |
|
"objective/entropy": 2029.3333333333333, |
|
"step": 36, |
|
"train/nll_loss_a": 0.21969079971313477, |
|
"train/nll_loss_b": 0.23275785644849142, |
|
"val/completion_length": 81.88461558024089, |
|
"val/contain_eos_token": 0.9807692170143127, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7564102609952291, |
|
"val/fraction_correct": 0.16666666915019354, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.33385714888572693, |
|
"val/fraction_latin_b": 0.31389813621838886, |
|
"val/fraction_number_a": 0.36293908953666687, |
|
"val/fraction_number_b": 0.368167241414388, |
|
"val/fraction_other_a": 0.3032037814458211, |
|
"val/fraction_other_b": 0.3179346521695455, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0014597336606432993, |
|
"val/lang_prob_en": 0.6864216725031534, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3854166666666667, |
|
"grad_norm": 0.9461960196495056, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1815, |
|
"objective/entropy": 2218.6666666666665, |
|
"step": 37, |
|
"train/nll_loss_a": 0.1762543668349584, |
|
"train/nll_loss_b": 0.18676617741584778, |
|
"val/completion_length": 78.16666666666667, |
|
"val/contain_eos_token": 0.9743589560190836, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.31174399455388385, |
|
"val/fraction_latin_b": 0.3052766025066376, |
|
"val/fraction_number_a": 0.3656782905260722, |
|
"val/fraction_number_b": 0.37882108489672345, |
|
"val/fraction_other_a": 0.32257768511772156, |
|
"val/fraction_other_b": 0.3159022927284241, |
|
"val/fraction_ties": 0.807692309220632, |
|
"val/lang_prob_bg": 0.0016823052040611703, |
|
"val/lang_prob_en": 0.682081917921702, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.3958333333333333, |
|
"grad_norm": 0.7971638441085815, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1558, |
|
"objective/entropy": 1834.6666666666667, |
|
"step": 38, |
|
"train/nll_loss_a": 0.1687836398681005, |
|
"train/nll_loss_b": 0.142802856862545, |
|
"val/completion_length": 75.09615325927734, |
|
"val/contain_eos_token": 0.9807692170143127, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.11538461844126384, |
|
"val/fraction_both_incorrect": 0.7692307829856873, |
|
"val/fraction_correct": 0.1730769251783689, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.3084398905436198, |
|
"val/fraction_latin_b": 0.3190213441848755, |
|
"val/fraction_number_a": 0.38394031922022503, |
|
"val/fraction_number_b": 0.3600513239701589, |
|
"val/fraction_other_a": 0.3076198200384776, |
|
"val/fraction_other_b": 0.32092733184496564, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.0013936974961931508, |
|
"val/lang_prob_en": 0.6955586870511373, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 0.5998182892799377, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1452, |
|
"objective/entropy": 928.0, |
|
"step": 39, |
|
"train/nll_loss_a": 0.14895252386728922, |
|
"train/nll_loss_b": 0.14153108249107996, |
|
"val/completion_length": 71.1602554321289, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359308679898, |
|
"val/fraction_both_incorrect": 0.7051282127698263, |
|
"val/fraction_correct": 0.19230769326289496, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.28946439425150555, |
|
"val/fraction_latin_b": 0.287621130545934, |
|
"val/fraction_number_a": 0.3734682301680247, |
|
"val/fraction_number_b": 0.38121453921000165, |
|
"val/fraction_other_a": 0.3370673954486847, |
|
"val/fraction_other_b": 0.33116433024406433, |
|
"val/fraction_ties": 0.7948718070983887, |
|
"val/lang_prob_bg": 0.0014785424573346972, |
|
"val/lang_prob_en": 0.6788028081258138, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 1.0486812591552734, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1397, |
|
"objective/entropy": 1610.6666666666667, |
|
"step": 40, |
|
"train/nll_loss_a": 0.124597763021787, |
|
"train/nll_loss_b": 0.15475992610057196, |
|
"val/completion_length": 66.44871775309245, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.03846153989434242, |
|
"val/fraction_both_incorrect": 0.8846153815587362, |
|
"val/fraction_correct": 0.07692307730515797, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.26715315381685895, |
|
"val/fraction_latin_b": 0.267175629734993, |
|
"val/fraction_number_a": 0.40364818771680194, |
|
"val/fraction_number_b": 0.40295613805452984, |
|
"val/fraction_other_a": 0.3291986882686615, |
|
"val/fraction_other_b": 0.32986828684806824, |
|
"val/fraction_ties": 0.9230769077936808, |
|
"val/lang_prob_bg": 0.0016380803814778726, |
|
"val/lang_prob_en": 0.6558632055918375, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4270833333333333, |
|
"grad_norm": 1.1442676782608032, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1368, |
|
"objective/entropy": 1390.6666666666667, |
|
"step": 41, |
|
"train/nll_loss_a": 0.13521244128545126, |
|
"train/nll_loss_b": 0.13834577798843384, |
|
"val/completion_length": 66.53845977783203, |
|
"val/contain_eos_token": 0.9871794780095419, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.012820513298114141, |
|
"val/fraction_both_incorrect": 0.8589743375778198, |
|
"val/fraction_correct": 0.07692307916780312, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2713295817375183, |
|
"val/fraction_latin_b": 0.2757815072933833, |
|
"val/fraction_number_a": 0.39802590012550354, |
|
"val/fraction_number_b": 0.3848887085914612, |
|
"val/fraction_other_a": 0.33064452807108563, |
|
"val/fraction_other_b": 0.3393297791481018, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0014013544811556737, |
|
"val/lang_prob_en": 0.6843119462331136, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.032313585281372, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1505, |
|
"objective/entropy": 1338.6666666666667, |
|
"step": 42, |
|
"train/nll_loss_a": 0.1546641836563746, |
|
"train/nll_loss_b": 0.146413487692674, |
|
"val/completion_length": 57.61538314819336, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.11538461595773697, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2686810294787089, |
|
"val/fraction_latin_b": 0.2704887390136719, |
|
"val/fraction_number_a": 0.39721407492955524, |
|
"val/fraction_number_b": 0.38931016127268475, |
|
"val/fraction_other_a": 0.33410489559173584, |
|
"val/fraction_other_b": 0.3402010997136434, |
|
"val/fraction_ties": 0.8974359035491943, |
|
"val/lang_prob_bg": 0.0015042958548292518, |
|
"val/lang_prob_en": 0.6720715363820394, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4479166666666667, |
|
"grad_norm": 1.00838303565979, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1867, |
|
"objective/entropy": 1114.6666666666667, |
|
"step": 43, |
|
"train/nll_loss_a": 0.16402535637219748, |
|
"train/nll_loss_b": 0.20946120719114938, |
|
"val/completion_length": 57.775641123453774, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.051282053192456566, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.12820513173937798, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2534715036551158, |
|
"val/fraction_latin_b": 0.26074858009815216, |
|
"val/fraction_number_a": 0.3934909800688426, |
|
"val/fraction_number_b": 0.40743691722551983, |
|
"val/fraction_other_a": 0.3530375460783641, |
|
"val/fraction_other_b": 0.3318144778410594, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0016378250826771061, |
|
"val/lang_prob_en": 0.6859935522079468, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4583333333333333, |
|
"grad_norm": 0.746932327747345, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1126, |
|
"objective/entropy": 926.0, |
|
"step": 44, |
|
"train/nll_loss_a": 0.12073729187250137, |
|
"train/nll_loss_b": 0.10447523991266887, |
|
"val/completion_length": 57.910256703694664, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410390138626, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.16666666915019354, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.24723881979783377, |
|
"val/fraction_latin_b": 0.2573054035504659, |
|
"val/fraction_number_a": 0.4117300808429718, |
|
"val/fraction_number_b": 0.4151102900505066, |
|
"val/fraction_other_a": 0.3410310943921407, |
|
"val/fraction_other_b": 0.32758431633313495, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.0016183808135489623, |
|
"val/lang_prob_en": 0.6595947543780009, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 0.751488208770752, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1458, |
|
"objective/entropy": 1064.0, |
|
"step": 45, |
|
"train/nll_loss_a": 0.1560243566830953, |
|
"train/nll_loss_b": 0.13560334593057632, |
|
"val/completion_length": 55.36538569132487, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7307692368825277, |
|
"val/fraction_correct": 0.1858974372347196, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2510768473148346, |
|
"val/fraction_latin_b": 0.23666884501775107, |
|
"val/fraction_number_a": 0.39663148919741315, |
|
"val/fraction_number_b": 0.4140782058238983, |
|
"val/fraction_other_a": 0.35229164361953735, |
|
"val/fraction_other_b": 0.34925296902656555, |
|
"val/fraction_ties": 0.8333333333333334, |
|
"val/lang_prob_bg": 0.0012542977929115295, |
|
"val/lang_prob_en": 0.6892314950625101, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4791666666666667, |
|
"grad_norm": 0.7010864019393921, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1338, |
|
"objective/entropy": 997.3333333333334, |
|
"step": 46, |
|
"train/nll_loss_a": 0.14200725158055624, |
|
"train/nll_loss_b": 0.1256332869331042, |
|
"val/completion_length": 51.71153767903646, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.1666666679084301, |
|
"val/fraction_both_incorrect": 0.7435897390047709, |
|
"val/fraction_correct": 0.21153846631447473, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.24885622163613638, |
|
"val/fraction_latin_b": 0.2301209419965744, |
|
"val/fraction_number_a": 0.396872212489446, |
|
"val/fraction_number_b": 0.4051181972026825, |
|
"val/fraction_other_a": 0.3542715708414714, |
|
"val/fraction_other_b": 0.3647608856360118, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0016821225096161168, |
|
"val/lang_prob_en": 0.6777702768643697, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4895833333333333, |
|
"grad_norm": 1.0925281047821045, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1205, |
|
"objective/entropy": 881.3333333333334, |
|
"step": 47, |
|
"train/nll_loss_a": 0.12804403652747473, |
|
"train/nll_loss_b": 0.11300961673259735, |
|
"val/completion_length": 52.36538569132487, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307978868484, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.12179487322767575, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.23128040631612143, |
|
"val/fraction_latin_b": 0.2396069417397181, |
|
"val/fraction_number_a": 0.4000318944454193, |
|
"val/fraction_number_b": 0.40589800477027893, |
|
"val/fraction_other_a": 0.3686876992384593, |
|
"val/fraction_other_b": 0.3544950584570567, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0015037450551365812, |
|
"val/lang_prob_en": 0.6730888287226359, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2894670963287354, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1487, |
|
"objective/entropy": 714.6666666666666, |
|
"step": 48, |
|
"train/nll_loss_a": 0.139557013909022, |
|
"train/nll_loss_b": 0.15790955225626627, |
|
"val/completion_length": 51.801282246907554, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.8333333134651184, |
|
"val/fraction_correct": 0.12820513049761453, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20845835904280344, |
|
"val/fraction_latin_b": 0.2055131047964096, |
|
"val/fraction_number_a": 0.428351491689682, |
|
"val/fraction_number_b": 0.4272334774335225, |
|
"val/fraction_other_a": 0.3631901641686757, |
|
"val/fraction_other_b": 0.3672534426053365, |
|
"val/fraction_ties": 0.9230769276618958, |
|
"val/lang_prob_bg": 0.0014591465005651116, |
|
"val/lang_prob_en": 0.6774142583211263, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5104166666666666, |
|
"grad_norm": 1.2826536893844604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.11, |
|
"objective/entropy": 1096.6666666666667, |
|
"step": 49, |
|
"train/nll_loss_a": 0.09283561259508133, |
|
"train/nll_loss_b": 0.1271024172504743, |
|
"val/completion_length": 53.8012809753418, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.10897436365485191, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2129476418097814, |
|
"val/fraction_latin_b": 0.20714954535166422, |
|
"val/fraction_number_a": 0.4297573169072469, |
|
"val/fraction_number_b": 0.4259600241978963, |
|
"val/fraction_other_a": 0.35729504625002545, |
|
"val/fraction_other_b": 0.36689044038454693, |
|
"val/fraction_ties": 0.9102564056714376, |
|
"val/lang_prob_bg": 0.0016641345185538132, |
|
"val/lang_prob_en": 0.6516953508059183, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 0.9105807542800903, |
|
"learning_rate": 0.0001, |
|
"loss": 0.112, |
|
"objective/entropy": 668.6666666666666, |
|
"step": 50, |
|
"train/nll_loss_a": 0.1011932243903478, |
|
"train/nll_loss_b": 0.12279495596885681, |
|
"val/completion_length": 52.903846740722656, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.012820513298114141, |
|
"val/fraction_both_incorrect": 0.7179487347602844, |
|
"val/fraction_correct": 0.14743590354919434, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.2198729912439982, |
|
"val/fraction_latin_b": 0.21549966434637705, |
|
"val/fraction_number_a": 0.4184926648934682, |
|
"val/fraction_number_b": 0.4257381657759349, |
|
"val/fraction_other_a": 0.36163437366485596, |
|
"val/fraction_other_b": 0.3587621847788493, |
|
"val/fraction_ties": 0.7307692368825277, |
|
"val/lang_prob_bg": 0.0013077266824742158, |
|
"val/lang_prob_en": 0.6708633701006571, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 0.6333914399147034, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0843, |
|
"objective/entropy": 578.6666666666666, |
|
"step": 51, |
|
"train/nll_loss_a": 0.08048844834168752, |
|
"train/nll_loss_b": 0.08813040951887767, |
|
"val/completion_length": 50.442307790120445, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.12820513173937798, |
|
"val/fraction_both_incorrect": 0.7179487148920695, |
|
"val/fraction_correct": 0.20512820780277252, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1867583046356837, |
|
"val/fraction_latin_b": 0.1920985778172811, |
|
"val/fraction_number_a": 0.46239819129308063, |
|
"val/fraction_number_b": 0.45383066932360333, |
|
"val/fraction_other_a": 0.35084352890650433, |
|
"val/fraction_other_b": 0.3540707727273305, |
|
"val/fraction_ties": 0.8461538354555765, |
|
"val/lang_prob_bg": 0.0013779281095291178, |
|
"val/lang_prob_en": 0.6875834663709005, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 0.597698450088501, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0711, |
|
"objective/entropy": 494.0, |
|
"step": 52, |
|
"train/nll_loss_a": 0.0706160341699918, |
|
"train/nll_loss_b": 0.07159051423271497, |
|
"val/completion_length": 52.0961545308431, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.7948717872301737, |
|
"val/fraction_correct": 0.1346153865257899, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19636401534080505, |
|
"val/fraction_latin_b": 0.19669000804424286, |
|
"val/fraction_number_a": 0.4432801107565562, |
|
"val/fraction_number_b": 0.4444128175576528, |
|
"val/fraction_other_a": 0.3603558838367462, |
|
"val/fraction_other_b": 0.3588971694310506, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0014325374116500218, |
|
"val/lang_prob_en": 0.6964165170987447, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5520833333333334, |
|
"grad_norm": 0.9163941144943237, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0759, |
|
"objective/entropy": 582.6666666666666, |
|
"step": 53, |
|
"train/nll_loss_a": 0.061987257252136864, |
|
"train/nll_loss_b": 0.0898251583178838, |
|
"val/completion_length": 51.95512771606445, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.871794859568278, |
|
"val/fraction_correct": 0.10256410390138626, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19930331905682883, |
|
"val/fraction_latin_b": 0.20022966961065927, |
|
"val/fraction_number_a": 0.4515662391980489, |
|
"val/fraction_number_b": 0.4426102936267853, |
|
"val/fraction_other_a": 0.34913045167922974, |
|
"val/fraction_other_b": 0.35716002186139423, |
|
"val/fraction_ties": 0.9487179319063822, |
|
"val/lang_prob_bg": 0.0013443352266525228, |
|
"val/lang_prob_en": 0.6959804097811381, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 0.7437398433685303, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0617, |
|
"objective/entropy": 917.3333333333334, |
|
"step": 54, |
|
"train/nll_loss_a": 0.06697492549816768, |
|
"train/nll_loss_b": 0.05643160889546076, |
|
"val/completion_length": 54.410255432128906, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.807692309220632, |
|
"val/fraction_correct": 0.12820513298114142, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1963134010632833, |
|
"val/fraction_latin_b": 0.19755952060222626, |
|
"val/fraction_number_a": 0.4544989267985026, |
|
"val/fraction_number_b": 0.4627720316251119, |
|
"val/fraction_other_a": 0.3491876721382141, |
|
"val/fraction_other_b": 0.3396684726079305, |
|
"val/fraction_ties": 0.871794859568278, |
|
"val/lang_prob_bg": 0.001135329968140771, |
|
"val/lang_prob_en": 0.6944870551427206, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5729166666666666, |
|
"grad_norm": 0.8625170588493347, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0791, |
|
"objective/entropy": 1185.3333333333333, |
|
"step": 55, |
|
"train/nll_loss_a": 0.0869336798787117, |
|
"train/nll_loss_b": 0.0713660145799319, |
|
"val/completion_length": 54.77564239501953, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8333333333333334, |
|
"val/fraction_correct": 0.1153846209247907, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19297984739144644, |
|
"val/fraction_latin_b": 0.2026552508274714, |
|
"val/fraction_number_a": 0.4540139436721802, |
|
"val/fraction_number_b": 0.45077388485272724, |
|
"val/fraction_other_a": 0.3530062139034271, |
|
"val/fraction_other_b": 0.34657086928685504, |
|
"val/fraction_ties": 0.8974358836809794, |
|
"val/lang_prob_bg": 0.00151369022205472, |
|
"val/lang_prob_en": 0.6880850593249003, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 1.0587146282196045, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0684, |
|
"objective/entropy": 1028.6666666666667, |
|
"step": 56, |
|
"train/nll_loss_a": 0.06075024977326393, |
|
"train/nll_loss_b": 0.07601286098361015, |
|
"val/completion_length": 53.96153895060221, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.10256410514314969, |
|
"val/fraction_both_incorrect": 0.7820512851079305, |
|
"val/fraction_correct": 0.1602564106384913, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18348072469234467, |
|
"val/fraction_latin_b": 0.18514560659726462, |
|
"val/fraction_number_a": 0.45656461517016095, |
|
"val/fraction_number_b": 0.4575365384419759, |
|
"val/fraction_other_a": 0.35995468497276306, |
|
"val/fraction_other_b": 0.35731785496075946, |
|
"val/fraction_ties": 0.8846153815587362, |
|
"val/lang_prob_bg": 0.00106729429292803, |
|
"val/lang_prob_en": 0.6962061325709025, |
|
"val/latin_first_token": 0.9935897390047709, |
|
"val/number_first_token": 0.006410256649057071, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 0.8600048422813416, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1015, |
|
"objective/entropy": 1142.0, |
|
"step": 57, |
|
"train/nll_loss_a": 0.10414389024178188, |
|
"train/nll_loss_b": 0.09876606116692226, |
|
"val/completion_length": 54.6025644938151, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307730515797, |
|
"val/fraction_both_incorrect": 0.807692309220632, |
|
"val/fraction_correct": 0.13461538776755333, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19213247299194336, |
|
"val/fraction_latin_b": 0.18131321668624878, |
|
"val/fraction_number_a": 0.44502533475557965, |
|
"val/fraction_number_b": 0.45092181364695233, |
|
"val/fraction_other_a": 0.36284218231836957, |
|
"val/fraction_other_b": 0.36776500940322876, |
|
"val/fraction_ties": 0.8846153616905212, |
|
"val/lang_prob_bg": 0.001513439230620861, |
|
"val/lang_prob_en": 0.702368974685669, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6041666666666666, |
|
"grad_norm": 0.8079760074615479, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0778, |
|
"objective/entropy": 505.3333333333333, |
|
"step": 58, |
|
"train/nll_loss_a": 0.09139975905418396, |
|
"train/nll_loss_b": 0.06421066199739774, |
|
"val/completion_length": 46.98076883951823, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.14102564752101898, |
|
"val/fraction_both_incorrect": 0.7051282127698263, |
|
"val/fraction_correct": 0.2179487223426501, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.20126528044541678, |
|
"val/fraction_latin_b": 0.19763841231664023, |
|
"val/fraction_number_a": 0.4304538468519847, |
|
"val/fraction_number_b": 0.43739889065424603, |
|
"val/fraction_other_a": 0.3682809074719747, |
|
"val/fraction_other_b": 0.3649626870950063, |
|
"val/fraction_ties": 0.8461538553237915, |
|
"val/lang_prob_bg": 0.0011517573924114306, |
|
"val/lang_prob_en": 0.709509551525116, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6145833333333334, |
|
"grad_norm": 1.1331056356430054, |
|
"learning_rate": 0.0001, |
|
"loss": 0.128, |
|
"objective/entropy": 633.3333333333334, |
|
"step": 59, |
|
"train/nll_loss_a": 0.1187543123960495, |
|
"train/nll_loss_b": 0.1372635985414187, |
|
"val/completion_length": 46.737178802490234, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.08974359184503555, |
|
"val/fraction_both_incorrect": 0.7307692170143127, |
|
"val/fraction_correct": 0.1794871836900711, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.1910944233338038, |
|
"val/fraction_latin_b": 0.18430300056934357, |
|
"val/fraction_number_a": 0.4224574863910675, |
|
"val/fraction_number_b": 0.439374307791392, |
|
"val/fraction_other_a": 0.38644809524218243, |
|
"val/fraction_other_b": 0.3763226866722107, |
|
"val/fraction_ties": 0.8205128113428751, |
|
"val/lang_prob_bg": 0.0015292856842279434, |
|
"val/lang_prob_en": 0.6743942896525065, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.8633061647415161, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0539, |
|
"objective/entropy": 554.0, |
|
"step": 60, |
|
"train/nll_loss_a": 0.05499819417794546, |
|
"train/nll_loss_b": 0.05279202883442243, |
|
"val/completion_length": 44.160256703694664, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.07692307854692142, |
|
"val/fraction_both_incorrect": 0.8461538354555765, |
|
"val/fraction_correct": 0.11538461844126384, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19793511927127838, |
|
"val/fraction_latin_b": 0.19460705916086832, |
|
"val/fraction_number_a": 0.4268949230511983, |
|
"val/fraction_number_b": 0.4235563079516093, |
|
"val/fraction_other_a": 0.375169962644577, |
|
"val/fraction_other_b": 0.3818366428216298, |
|
"val/fraction_ties": 0.9230769077936808, |
|
"val/lang_prob_bg": 0.0016635601253559191, |
|
"val/lang_prob_en": 0.6812194387118021, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6354166666666666, |
|
"grad_norm": 0.8153233528137207, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0809, |
|
"objective/entropy": 630.6666666666666, |
|
"step": 61, |
|
"train/nll_loss_a": 0.09423964222272237, |
|
"train/nll_loss_b": 0.06749718139568965, |
|
"val/completion_length": 48.903846740722656, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.15384615709384283, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.19230769574642181, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18816453218460083, |
|
"val/fraction_latin_b": 0.18853654464085898, |
|
"val/fraction_number_a": 0.45722946524620056, |
|
"val/fraction_number_b": 0.43430561820665997, |
|
"val/fraction_other_a": 0.3546060423056285, |
|
"val/fraction_other_b": 0.37715784708658856, |
|
"val/fraction_ties": 0.9230769276618958, |
|
"val/lang_prob_bg": 0.0015237585175782442, |
|
"val/lang_prob_en": 0.6806376179059347, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6458333333333334, |
|
"grad_norm": 1.104749321937561, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0912, |
|
"objective/entropy": 1110.0, |
|
"step": 62, |
|
"train/nll_loss_a": 0.09771117568016052, |
|
"train/nll_loss_b": 0.08477205038070679, |
|
"val/completion_length": 50.39743677775065, |
|
"val/contain_eos_token": 0.9935897390047709, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.05128205195069313, |
|
"val/fraction_both_incorrect": 0.807692289352417, |
|
"val/fraction_correct": 0.1217948744694392, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19005567828814188, |
|
"val/fraction_latin_b": 0.18684939543406168, |
|
"val/fraction_number_a": 0.44661805033683777, |
|
"val/fraction_number_b": 0.45144979159037274, |
|
"val/fraction_other_a": 0.3633263309796651, |
|
"val/fraction_other_b": 0.3617008129755656, |
|
"val/fraction_ties": 0.8589743574460348, |
|
"val/lang_prob_bg": 0.0012832956854254007, |
|
"val/lang_prob_en": 0.690887967745463, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 1.0379236936569214, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0724, |
|
"objective/entropy": 648.0, |
|
"step": 63, |
|
"train/nll_loss_a": 0.06959323212504387, |
|
"train/nll_loss_b": 0.07520159830649693, |
|
"val/completion_length": 46.92948786417643, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256524880727, |
|
"val/fraction_both_incorrect": 0.8846153815587362, |
|
"val/fraction_correct": 0.08974359060327212, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.19109234710534415, |
|
"val/fraction_latin_b": 0.2014519323905309, |
|
"val/fraction_number_a": 0.4389280279477437, |
|
"val/fraction_number_b": 0.4367695450782776, |
|
"val/fraction_other_a": 0.36997965971628827, |
|
"val/fraction_other_b": 0.36177852749824524, |
|
"val/fraction_ties": 0.9487179517745972, |
|
"val/lang_prob_bg": 0.0012157799016373854, |
|
"val/lang_prob_en": 0.6699715455373129, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.8731870651245117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0729, |
|
"objective/entropy": 430.6666666666667, |
|
"step": 64, |
|
"train/nll_loss_a": 0.08839354167381923, |
|
"train/nll_loss_b": 0.05736600855986277, |
|
"val/completion_length": 46.35897445678711, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.05128205195069313, |
|
"val/fraction_both_incorrect": 0.7435897390047709, |
|
"val/fraction_correct": 0.1538461558520794, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18153652052084604, |
|
"val/fraction_latin_b": 0.1694901337226232, |
|
"val/fraction_number_a": 0.45514161388079327, |
|
"val/fraction_number_b": 0.47041670481363934, |
|
"val/fraction_other_a": 0.36332186063130695, |
|
"val/fraction_other_b": 0.3600931664307912, |
|
"val/fraction_ties": 0.7948717872301737, |
|
"val/lang_prob_bg": 0.0013296857941895723, |
|
"val/lang_prob_en": 0.6832193930943807, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.6770833333333334, |
|
"grad_norm": 0.8836628198623657, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0866, |
|
"objective/entropy": 508.0, |
|
"step": 65, |
|
"train/nll_loss_a": 0.0954609215259552, |
|
"train/nll_loss_b": 0.07772823919852574, |
|
"val/completion_length": 46.75640996297201, |
|
"val/contain_eos_token": 1.0, |
|
"val/contains_guillemets": 0.0, |
|
"val/cyrillic_first_token": 0.0, |
|
"val/empty_batch": 0.0, |
|
"val/fraction_both_correct": 0.06410256649057071, |
|
"val/fraction_both_incorrect": 0.7692307631174723, |
|
"val/fraction_correct": 0.14743590106566748, |
|
"val/fraction_cyrillic_a": 0.0, |
|
"val/fraction_cyrillic_b": 0.0, |
|
"val/fraction_latin_a": 0.18177295724550882, |
|
"val/fraction_latin_b": 0.18375508983929953, |
|
"val/fraction_number_a": 0.44039592146873474, |
|
"val/fraction_number_b": 0.45297469695409137, |
|
"val/fraction_other_a": 0.377831111351649, |
|
"val/fraction_other_b": 0.36327023307482403, |
|
"val/fraction_ties": 0.8333333333333334, |
|
"val/lang_prob_bg": 0.0013483318810661633, |
|
"val/lang_prob_en": 0.6739258567492167, |
|
"val/latin_first_token": 1.0, |
|
"val/number_first_token": 0.0, |
|
"val/other_first_token": 0.0 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 26, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|