pj-mathematician's picture
Add files using upload-large-folder tool
1155584 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 200,
"global_step": 4480,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011160714285714285,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 9.119,
"step": 1
},
{
"epoch": 0.11160714285714286,
"grad_norm": 17.632238388061523,
"learning_rate": 2.1428571428571428e-05,
"loss": 4.1469,
"step": 100
},
{
"epoch": 0.22321428571428573,
"grad_norm": 14.409270286560059,
"learning_rate": 4.375e-05,
"loss": 2.5294,
"step": 200
},
{
"epoch": 0.22321428571428573,
"eval_full_en_cosine_accuracy@1": 0.7467105263157895,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7467105263157895,
"eval_full_en_cosine_map@100": 0.2121058701298033,
"eval_full_en_cosine_map@150": 0.2294109301872967,
"eval_full_en_cosine_map@20": 0.34167650006204187,
"eval_full_en_cosine_map@200": 0.2492171685943861,
"eval_full_en_cosine_map@50": 0.237336657426832,
"eval_full_en_cosine_map@500": 0.3000288940307502,
"eval_full_en_cosine_mrr@1": 0.7467105263157895,
"eval_full_en_cosine_mrr@100": 0.8460592769803298,
"eval_full_en_cosine_mrr@150": 0.8460592769803298,
"eval_full_en_cosine_mrr@20": 0.8458948032961192,
"eval_full_en_cosine_mrr@200": 0.8460592769803298,
"eval_full_en_cosine_mrr@50": 0.8460122844991269,
"eval_full_en_cosine_ndcg@1": 0.7467105263157895,
"eval_full_en_cosine_ndcg@100": 0.4430509248084704,
"eval_full_en_cosine_ndcg@150": 0.4894828917681416,
"eval_full_en_cosine_ndcg@20": 0.5367541274871807,
"eval_full_en_cosine_ndcg@200": 0.5361903606133726,
"eval_full_en_cosine_ndcg@50": 0.448683811733402,
"eval_full_en_cosine_precision@1": 0.7467105263157895,
"eval_full_en_cosine_precision@100": 0.31240131578947367,
"eval_full_en_cosine_precision@150": 0.26592105263157895,
"eval_full_en_cosine_precision@20": 0.4965460526315789,
"eval_full_en_cosine_precision@200": 0.23370065789473685,
"eval_full_en_cosine_precision@50": 0.3904605263157895,
"eval_full_en_cosine_recall@1": 0.010753343030902496,
"eval_full_en_cosine_recall@100": 0.39446255566624855,
"eval_full_en_cosine_recall@150": 0.49544823712709557,
"eval_full_en_cosine_recall@20": 0.13279013317825217,
"eval_full_en_cosine_recall@200": 0.5739614992682516,
"eval_full_en_cosine_recall@50": 0.25254843470147753,
"eval_runtime": 1.5828,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5361903606133726,
"eval_steps_per_second": 0.0,
"step": 200
},
{
"epoch": 0.33482142857142855,
"grad_norm": 16.260934829711914,
"learning_rate": 4.915413533834587e-05,
"loss": 2.3611,
"step": 300
},
{
"epoch": 0.44642857142857145,
"grad_norm": 13.242988586425781,
"learning_rate": 4.797932330827068e-05,
"loss": 2.192,
"step": 400
},
{
"epoch": 0.44642857142857145,
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7368421052631579,
"eval_full_en_cosine_map@100": 0.2088144416212806,
"eval_full_en_cosine_map@150": 0.22677217670719133,
"eval_full_en_cosine_map@20": 0.3349832137166454,
"eval_full_en_cosine_map@200": 0.245946497368659,
"eval_full_en_cosine_map@50": 0.23473921202287384,
"eval_full_en_cosine_map@500": 0.2973985707303743,
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
"eval_full_en_cosine_mrr@100": 0.8394156306336016,
"eval_full_en_cosine_mrr@150": 0.8394156306336016,
"eval_full_en_cosine_mrr@20": 0.8392713554720135,
"eval_full_en_cosine_mrr@200": 0.8394156306336016,
"eval_full_en_cosine_mrr@50": 0.8393810045948205,
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
"eval_full_en_cosine_ndcg@100": 0.43855475512592684,
"eval_full_en_cosine_ndcg@150": 0.48609390907359196,
"eval_full_en_cosine_ndcg@20": 0.5288083416910968,
"eval_full_en_cosine_ndcg@200": 0.5318117937684201,
"eval_full_en_cosine_ndcg@50": 0.4453338982563473,
"eval_full_en_cosine_precision@1": 0.7368421052631579,
"eval_full_en_cosine_precision@100": 0.3088157894736842,
"eval_full_en_cosine_precision@150": 0.2644517543859649,
"eval_full_en_cosine_precision@20": 0.4875,
"eval_full_en_cosine_precision@200": 0.23172697368421055,
"eval_full_en_cosine_precision@50": 0.38782894736842105,
"eval_full_en_cosine_recall@1": 0.010619007443519193,
"eval_full_en_cosine_recall@100": 0.3902042311088277,
"eval_full_en_cosine_recall@150": 0.4925745165667779,
"eval_full_en_cosine_recall@20": 0.1301764615450556,
"eval_full_en_cosine_recall@200": 0.5696006364444781,
"eval_full_en_cosine_recall@50": 0.2518199886564403,
"eval_runtime": 1.5596,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5318117937684201,
"eval_steps_per_second": 0.0,
"step": 400
},
{
"epoch": 0.5580357142857143,
"grad_norm": 13.307888984680176,
"learning_rate": 4.680451127819549e-05,
"loss": 2.0338,
"step": 500
},
{
"epoch": 0.6696428571428571,
"grad_norm": 12.763930320739746,
"learning_rate": 4.56296992481203e-05,
"loss": 1.9009,
"step": 600
},
{
"epoch": 0.6696428571428571,
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7302631578947368,
"eval_full_en_cosine_map@100": 0.2146410944227793,
"eval_full_en_cosine_map@150": 0.23271596511985665,
"eval_full_en_cosine_map@20": 0.3429678297332613,
"eval_full_en_cosine_map@200": 0.2520997707361607,
"eval_full_en_cosine_map@50": 0.2404899713826549,
"eval_full_en_cosine_map@500": 0.302904619520322,
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
"eval_full_en_cosine_mrr@100": 0.8306572094298247,
"eval_full_en_cosine_mrr@150": 0.8306572094298247,
"eval_full_en_cosine_mrr@20": 0.8304491697994989,
"eval_full_en_cosine_mrr@200": 0.8306572094298247,
"eval_full_en_cosine_mrr@50": 0.8306058114035089,
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
"eval_full_en_cosine_ndcg@100": 0.4445617284976941,
"eval_full_en_cosine_ndcg@150": 0.4922393935902775,
"eval_full_en_cosine_ndcg@20": 0.5357880041966661,
"eval_full_en_cosine_ndcg@200": 0.5383209000398446,
"eval_full_en_cosine_ndcg@50": 0.4504820590447715,
"eval_full_en_cosine_precision@1": 0.7302631578947368,
"eval_full_en_cosine_precision@100": 0.31358552631578945,
"eval_full_en_cosine_precision@150": 0.2677412280701754,
"eval_full_en_cosine_precision@20": 0.49720394736842105,
"eval_full_en_cosine_precision@200": 0.23452302631578953,
"eval_full_en_cosine_precision@50": 0.3932894736842105,
"eval_full_en_cosine_recall@1": 0.010303516134180577,
"eval_full_en_cosine_recall@100": 0.3970033142271577,
"eval_full_en_cosine_recall@150": 0.5001101850184368,
"eval_full_en_cosine_recall@20": 0.13302896177814508,
"eval_full_en_cosine_recall@200": 0.5777429812058247,
"eval_full_en_cosine_recall@50": 0.254528957048419,
"eval_runtime": 1.5616,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5383209000398446,
"eval_steps_per_second": 0.0,
"step": 600
},
{
"epoch": 0.78125,
"grad_norm": 13.439990997314453,
"learning_rate": 4.4454887218045117e-05,
"loss": 1.8404,
"step": 700
},
{
"epoch": 0.8928571428571429,
"grad_norm": 12.594465255737305,
"learning_rate": 4.3280075187969924e-05,
"loss": 1.7692,
"step": 800
},
{
"epoch": 0.8928571428571429,
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7368421052631579,
"eval_full_en_cosine_map@100": 0.21030614519224017,
"eval_full_en_cosine_map@150": 0.22737063252522982,
"eval_full_en_cosine_map@20": 0.3442880676713117,
"eval_full_en_cosine_map@200": 0.24764067563282596,
"eval_full_en_cosine_map@50": 0.23827484272575025,
"eval_full_en_cosine_map@500": 0.2987091429260604,
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
"eval_full_en_cosine_mrr@100": 0.8404268619187053,
"eval_full_en_cosine_mrr@150": 0.8404268619187053,
"eval_full_en_cosine_mrr@20": 0.8402307852965749,
"eval_full_en_cosine_mrr@200": 0.8404268619187053,
"eval_full_en_cosine_mrr@50": 0.8403738058915406,
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
"eval_full_en_cosine_ndcg@100": 0.440670430732987,
"eval_full_en_cosine_ndcg@150": 0.486778222456143,
"eval_full_en_cosine_ndcg@20": 0.5383903905850532,
"eval_full_en_cosine_ndcg@200": 0.5352292016764449,
"eval_full_en_cosine_ndcg@50": 0.45046850998342597,
"eval_full_en_cosine_precision@1": 0.7368421052631579,
"eval_full_en_cosine_precision@100": 0.3099342105263158,
"eval_full_en_cosine_precision@150": 0.26390350877192986,
"eval_full_en_cosine_precision@20": 0.5,
"eval_full_en_cosine_precision@200": 0.23320723684210526,
"eval_full_en_cosine_precision@50": 0.39335526315789476,
"eval_full_en_cosine_recall@1": 0.01051277780149725,
"eval_full_en_cosine_recall@100": 0.39158535797000443,
"eval_full_en_cosine_recall@150": 0.4917399858788313,
"eval_full_en_cosine_recall@20": 0.13328036442285973,
"eval_full_en_cosine_recall@200": 0.5734492892933252,
"eval_full_en_cosine_recall@50": 0.254129727850083,
"eval_runtime": 1.5752,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5352292016764449,
"eval_steps_per_second": 0.0,
"step": 800
},
{
"epoch": 1.0044642857142858,
"grad_norm": 13.140974998474121,
"learning_rate": 4.212875939849624e-05,
"loss": 1.6921,
"step": 900
},
{
"epoch": 1.1160714285714286,
"grad_norm": 12.160736083984375,
"learning_rate": 4.096569548872181e-05,
"loss": 1.3861,
"step": 1000
},
{
"epoch": 1.1160714285714286,
"eval_full_en_cosine_accuracy@1": 0.7401315789473685,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7401315789473685,
"eval_full_en_cosine_map@100": 0.21155466872463927,
"eval_full_en_cosine_map@150": 0.2291636549745022,
"eval_full_en_cosine_map@20": 0.3373673798048492,
"eval_full_en_cosine_map@200": 0.24905074192004603,
"eval_full_en_cosine_map@50": 0.2376950112180141,
"eval_full_en_cosine_map@500": 0.3006802538137734,
"eval_full_en_cosine_mrr@1": 0.7401315789473685,
"eval_full_en_cosine_mrr@100": 0.8405236576289212,
"eval_full_en_cosine_mrr@150": 0.8405236576289212,
"eval_full_en_cosine_mrr@20": 0.8403143274853806,
"eval_full_en_cosine_mrr@200": 0.8405236576289212,
"eval_full_en_cosine_mrr@50": 0.840463849016481,
"eval_full_en_cosine_ndcg@1": 0.7401315789473685,
"eval_full_en_cosine_ndcg@100": 0.44212858816477746,
"eval_full_en_cosine_ndcg@150": 0.48946706445562127,
"eval_full_en_cosine_ndcg@20": 0.5332180756481385,
"eval_full_en_cosine_ndcg@200": 0.5367929588661781,
"eval_full_en_cosine_ndcg@50": 0.44979391873656477,
"eval_full_en_cosine_precision@1": 0.7401315789473685,
"eval_full_en_cosine_precision@100": 0.3114473684210526,
"eval_full_en_cosine_precision@150": 0.266469298245614,
"eval_full_en_cosine_precision@20": 0.49243421052631575,
"eval_full_en_cosine_precision@200": 0.2345888157894737,
"eval_full_en_cosine_precision@50": 0.3921052631578947,
"eval_full_en_cosine_recall@1": 0.010392607884295562,
"eval_full_en_cosine_recall@100": 0.3933254279416559,
"eval_full_en_cosine_recall@150": 0.4957503189606009,
"eval_full_en_cosine_recall@20": 0.13107623492706288,
"eval_full_en_cosine_recall@200": 0.5753954619760326,
"eval_full_en_cosine_recall@50": 0.2539746341397596,
"eval_runtime": 1.6397,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5367929588661781,
"eval_steps_per_second": 0.0,
"step": 1000
},
{
"epoch": 1.2276785714285714,
"grad_norm": 13.078369140625,
"learning_rate": 3.9790883458646615e-05,
"loss": 1.3863,
"step": 1100
},
{
"epoch": 1.3392857142857144,
"grad_norm": 11.990692138671875,
"learning_rate": 3.861607142857143e-05,
"loss": 1.3546,
"step": 1200
},
{
"epoch": 1.3392857142857144,
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7203947368421053,
"eval_full_en_cosine_map@100": 0.20552277525856266,
"eval_full_en_cosine_map@150": 0.22274311961933413,
"eval_full_en_cosine_map@20": 0.3363904557549852,
"eval_full_en_cosine_map@200": 0.24106738760441354,
"eval_full_en_cosine_map@50": 0.23370113464760453,
"eval_full_en_cosine_map@500": 0.28981293048421486,
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
"eval_full_en_cosine_mrr@100": 0.8322617799738206,
"eval_full_en_cosine_mrr@150": 0.8322617799738206,
"eval_full_en_cosine_mrr@20": 0.8320620443153339,
"eval_full_en_cosine_mrr@200": 0.8322617799738206,
"eval_full_en_cosine_mrr@50": 0.8322050649102997,
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
"eval_full_en_cosine_ndcg@100": 0.43445871937106545,
"eval_full_en_cosine_ndcg@150": 0.48130417146010107,
"eval_full_en_cosine_ndcg@20": 0.531477407982968,
"eval_full_en_cosine_ndcg@200": 0.5259375639543232,
"eval_full_en_cosine_ndcg@50": 0.4444057356887903,
"eval_full_en_cosine_precision@1": 0.7203947368421053,
"eval_full_en_cosine_precision@100": 0.3039802631578947,
"eval_full_en_cosine_precision@150": 0.25999999999999995,
"eval_full_en_cosine_precision@20": 0.4925986842105263,
"eval_full_en_cosine_precision@200": 0.22763157894736838,
"eval_full_en_cosine_precision@50": 0.3867105263157895,
"eval_full_en_cosine_recall@1": 0.010318104890368607,
"eval_full_en_cosine_recall@100": 0.385615965839615,
"eval_full_en_cosine_recall@150": 0.48656381032984825,
"eval_full_en_cosine_recall@20": 0.13139326985918445,
"eval_full_en_cosine_recall@200": 0.5617757383007209,
"eval_full_en_cosine_recall@50": 0.2506285703289517,
"eval_runtime": 1.5585,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5259375639543232,
"eval_steps_per_second": 0.0,
"step": 1200
},
{
"epoch": 1.4508928571428572,
"grad_norm": 15.019533157348633,
"learning_rate": 3.744125939849624e-05,
"loss": 1.373,
"step": 1300
},
{
"epoch": 1.5625,
"grad_norm": 10.545878410339355,
"learning_rate": 3.626644736842105e-05,
"loss": 1.3364,
"step": 1400
},
{
"epoch": 1.5625,
"eval_full_en_cosine_accuracy@1": 0.7171052631578947,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7171052631578947,
"eval_full_en_cosine_map@100": 0.20833018055660496,
"eval_full_en_cosine_map@150": 0.22583322401021033,
"eval_full_en_cosine_map@20": 0.34006318172507877,
"eval_full_en_cosine_map@200": 0.24462161151730188,
"eval_full_en_cosine_map@50": 0.23483789231739935,
"eval_full_en_cosine_map@500": 0.2946124561805931,
"eval_full_en_cosine_mrr@1": 0.7171052631578947,
"eval_full_en_cosine_mrr@100": 0.8267713172687238,
"eval_full_en_cosine_mrr@150": 0.8267713172687238,
"eval_full_en_cosine_mrr@20": 0.8265913362952838,
"eval_full_en_cosine_mrr@200": 0.8267713172687238,
"eval_full_en_cosine_mrr@50": 0.8267343568902494,
"eval_full_en_cosine_ndcg@1": 0.7171052631578947,
"eval_full_en_cosine_ndcg@100": 0.4377486787968229,
"eval_full_en_cosine_ndcg@150": 0.4850669425848544,
"eval_full_en_cosine_ndcg@20": 0.5331724259953773,
"eval_full_en_cosine_ndcg@200": 0.5302927064126869,
"eval_full_en_cosine_ndcg@50": 0.4451308688476405,
"eval_full_en_cosine_precision@1": 0.7171052631578947,
"eval_full_en_cosine_precision@100": 0.3074671052631579,
"eval_full_en_cosine_precision@150": 0.2625657894736842,
"eval_full_en_cosine_precision@20": 0.4947368421052632,
"eval_full_en_cosine_precision@200": 0.23016447368421053,
"eval_full_en_cosine_precision@50": 0.38769736842105257,
"eval_full_en_cosine_recall@1": 0.010208074045806198,
"eval_full_en_cosine_recall@100": 0.3902466549235702,
"eval_full_en_cosine_recall@150": 0.49226776551348056,
"eval_full_en_cosine_recall@20": 0.13255572846134298,
"eval_full_en_cosine_recall@200": 0.5680994353864672,
"eval_full_en_cosine_recall@50": 0.25126941591084845,
"eval_runtime": 1.5595,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5302927064126869,
"eval_steps_per_second": 0.0,
"step": 1400
},
{
"epoch": 1.6741071428571428,
"grad_norm": 18.495975494384766,
"learning_rate": 3.509163533834587e-05,
"loss": 1.2876,
"step": 1500
},
{
"epoch": 1.7857142857142856,
"grad_norm": 12.646751403808594,
"learning_rate": 3.391682330827068e-05,
"loss": 1.3094,
"step": 1600
},
{
"epoch": 1.7857142857142856,
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7072368421052632,
"eval_full_en_cosine_map@100": 0.20923239071614674,
"eval_full_en_cosine_map@150": 0.225604138471006,
"eval_full_en_cosine_map@20": 0.34034356587585846,
"eval_full_en_cosine_map@200": 0.24539737099429304,
"eval_full_en_cosine_map@50": 0.23464702413938254,
"eval_full_en_cosine_map@500": 0.29597166286299953,
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
"eval_full_en_cosine_mrr@100": 0.8214137967940215,
"eval_full_en_cosine_mrr@150": 0.8214137967940215,
"eval_full_en_cosine_mrr@20": 0.8213699371448987,
"eval_full_en_cosine_mrr@200": 0.8214137967940215,
"eval_full_en_cosine_mrr@50": 0.8213699371448987,
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
"eval_full_en_cosine_ndcg@100": 0.4396726832556684,
"eval_full_en_cosine_ndcg@150": 0.4847816359827512,
"eval_full_en_cosine_ndcg@20": 0.532792025753163,
"eval_full_en_cosine_ndcg@200": 0.5323403273572274,
"eval_full_en_cosine_ndcg@50": 0.4452189433184465,
"eval_full_en_cosine_precision@1": 0.7072368421052632,
"eval_full_en_cosine_precision@100": 0.3098026315789474,
"eval_full_en_cosine_precision@150": 0.26274122807017547,
"eval_full_en_cosine_precision@20": 0.4935855263157895,
"eval_full_en_cosine_precision@200": 0.23192434210526314,
"eval_full_en_cosine_precision@50": 0.38763157894736844,
"eval_full_en_cosine_recall@1": 0.010122149362902188,
"eval_full_en_cosine_recall@100": 0.39236988612007834,
"eval_full_en_cosine_recall@150": 0.4910778378543689,
"eval_full_en_cosine_recall@20": 0.13108496301513997,
"eval_full_en_cosine_recall@200": 0.5709689534914331,
"eval_full_en_cosine_recall@50": 0.25093448303772187,
"eval_runtime": 1.5873,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5323403273572274,
"eval_steps_per_second": 0.0,
"step": 1600
},
{
"epoch": 1.8973214285714286,
"grad_norm": 11.858412742614746,
"learning_rate": 3.274201127819549e-05,
"loss": 1.2784,
"step": 1700
},
{
"epoch": 2.0089285714285716,
"grad_norm": 11.152688026428223,
"learning_rate": 3.1567199248120306e-05,
"loss": 1.2204,
"step": 1800
},
{
"epoch": 2.0089285714285716,
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7368421052631579,
"eval_full_en_cosine_map@100": 0.2098412194483687,
"eval_full_en_cosine_map@150": 0.22663911455304064,
"eval_full_en_cosine_map@20": 0.3433147887298301,
"eval_full_en_cosine_map@200": 0.24620266722190678,
"eval_full_en_cosine_map@50": 0.23714915519951082,
"eval_full_en_cosine_map@500": 0.29690932859887553,
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
"eval_full_en_cosine_mrr@100": 0.8394024772357531,
"eval_full_en_cosine_mrr@150": 0.8394024772357531,
"eval_full_en_cosine_mrr@20": 0.8393426686233129,
"eval_full_en_cosine_mrr@200": 0.8394024772357531,
"eval_full_en_cosine_mrr@50": 0.8393426686233129,
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
"eval_full_en_cosine_ndcg@100": 0.4396519841053572,
"eval_full_en_cosine_ndcg@150": 0.4856325134708184,
"eval_full_en_cosine_ndcg@20": 0.5375317893335387,
"eval_full_en_cosine_ndcg@200": 0.533015167774829,
"eval_full_en_cosine_ndcg@50": 0.44810398395306655,
"eval_full_en_cosine_precision@1": 0.7368421052631579,
"eval_full_en_cosine_precision@100": 0.3084539473684211,
"eval_full_en_cosine_precision@150": 0.2627631578947368,
"eval_full_en_cosine_precision@20": 0.49769736842105267,
"eval_full_en_cosine_precision@200": 0.2314309210526316,
"eval_full_en_cosine_precision@50": 0.3891447368421053,
"eval_full_en_cosine_recall@1": 0.010440810366523372,
"eval_full_en_cosine_recall@100": 0.39036009395952986,
"eval_full_en_cosine_recall@150": 0.49041982254882954,
"eval_full_en_cosine_recall@20": 0.13228070304056636,
"eval_full_en_cosine_recall@200": 0.5704962189819233,
"eval_full_en_cosine_recall@50": 0.25248213212752935,
"eval_runtime": 1.6049,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.533015167774829,
"eval_steps_per_second": 0.0,
"step": 1800
},
{
"epoch": 2.1205357142857144,
"grad_norm": 12.91015625,
"learning_rate": 3.0392387218045114e-05,
"loss": 0.9617,
"step": 1900
},
{
"epoch": 2.232142857142857,
"grad_norm": 11.646313667297363,
"learning_rate": 2.9217575187969924e-05,
"loss": 1.0004,
"step": 2000
},
{
"epoch": 2.232142857142857,
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 1.0,
"eval_full_en_cosine_map@1": 0.7236842105263158,
"eval_full_en_cosine_map@100": 0.20775225168018954,
"eval_full_en_cosine_map@150": 0.22393096419950168,
"eval_full_en_cosine_map@20": 0.3380596885262807,
"eval_full_en_cosine_map@200": 0.24259765295506924,
"eval_full_en_cosine_map@50": 0.23452814948810471,
"eval_full_en_cosine_map@500": 0.2920026964508484,
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
"eval_full_en_cosine_mrr@100": 0.8325452625382137,
"eval_full_en_cosine_mrr@150": 0.8325452625382137,
"eval_full_en_cosine_mrr@20": 0.8324781304222094,
"eval_full_en_cosine_mrr@200": 0.8325452625382137,
"eval_full_en_cosine_mrr@50": 0.8325452625382137,
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
"eval_full_en_cosine_ndcg@100": 0.4376001104057169,
"eval_full_en_cosine_ndcg@150": 0.48181431955382,
"eval_full_en_cosine_ndcg@20": 0.5323035546433559,
"eval_full_en_cosine_ndcg@200": 0.5276663014224582,
"eval_full_en_cosine_ndcg@50": 0.44660441452063837,
"eval_full_en_cosine_precision@1": 0.7236842105263158,
"eval_full_en_cosine_precision@100": 0.30644736842105263,
"eval_full_en_cosine_precision@150": 0.259890350877193,
"eval_full_en_cosine_precision@20": 0.4916118421052632,
"eval_full_en_cosine_precision@200": 0.2280921052631579,
"eval_full_en_cosine_precision@50": 0.3886842105263158,
"eval_full_en_cosine_recall@1": 0.010329446437905086,
"eval_full_en_cosine_recall@100": 0.38885062846601265,
"eval_full_en_cosine_recall@150": 0.4854595951837256,
"eval_full_en_cosine_recall@20": 0.131078016933875,
"eval_full_en_cosine_recall@200": 0.5630724982932908,
"eval_full_en_cosine_recall@50": 0.252357645205228,
"eval_runtime": 1.5613,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5276663014224582,
"eval_steps_per_second": 0.0,
"step": 2000
},
{
"epoch": 2.34375,
"grad_norm": 12.087961196899414,
"learning_rate": 2.8042763157894735e-05,
"loss": 0.9694,
"step": 2100
},
{
"epoch": 2.455357142857143,
"grad_norm": 8.181659698486328,
"learning_rate": 2.6867951127819552e-05,
"loss": 0.9843,
"step": 2200
},
{
"epoch": 2.455357142857143,
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 1.0,
"eval_full_en_cosine_map@1": 0.7236842105263158,
"eval_full_en_cosine_map@100": 0.209953160245849,
"eval_full_en_cosine_map@150": 0.22760030144833215,
"eval_full_en_cosine_map@20": 0.34078157961918865,
"eval_full_en_cosine_map@200": 0.24749824184265867,
"eval_full_en_cosine_map@50": 0.2365248444512811,
"eval_full_en_cosine_map@500": 0.29789431690676116,
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
"eval_full_en_cosine_mrr@100": 0.8318935359231412,
"eval_full_en_cosine_mrr@150": 0.8318935359231412,
"eval_full_en_cosine_mrr@20": 0.8316833751044278,
"eval_full_en_cosine_mrr@200": 0.8318935359231412,
"eval_full_en_cosine_mrr@50": 0.8318935359231412,
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
"eval_full_en_cosine_ndcg@100": 0.44076958126493176,
"eval_full_en_cosine_ndcg@150": 0.48838061313116793,
"eval_full_en_cosine_ndcg@20": 0.5350320556020238,
"eval_full_en_cosine_ndcg@200": 0.5355574509263721,
"eval_full_en_cosine_ndcg@50": 0.44803994906340594,
"eval_full_en_cosine_precision@1": 0.7236842105263158,
"eval_full_en_cosine_precision@100": 0.3099671052631579,
"eval_full_en_cosine_precision@150": 0.2648464912280702,
"eval_full_en_cosine_precision@20": 0.49588815789473684,
"eval_full_en_cosine_precision@200": 0.23342105263157892,
"eval_full_en_cosine_precision@50": 0.39052631578947367,
"eval_full_en_cosine_recall@1": 0.010284539147879572,
"eval_full_en_cosine_recall@100": 0.39296182819932773,
"eval_full_en_cosine_recall@150": 0.4959148528891931,
"eval_full_en_cosine_recall@20": 0.13200577828629578,
"eval_full_en_cosine_recall@200": 0.5749370249014907,
"eval_full_en_cosine_recall@50": 0.25310992970173135,
"eval_runtime": 1.8632,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5355574509263721,
"eval_steps_per_second": 0.0,
"step": 2200
},
{
"epoch": 2.5669642857142856,
"grad_norm": 13.6882905960083,
"learning_rate": 2.5693139097744363e-05,
"loss": 0.9743,
"step": 2300
},
{
"epoch": 2.678571428571429,
"grad_norm": 11.966975212097168,
"learning_rate": 2.4518327067669177e-05,
"loss": 0.9252,
"step": 2400
},
{
"epoch": 2.678571428571429,
"eval_full_en_cosine_accuracy@1": 0.7335526315789473,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7335526315789473,
"eval_full_en_cosine_map@100": 0.20983286336268822,
"eval_full_en_cosine_map@150": 0.22675852672419078,
"eval_full_en_cosine_map@20": 0.34004090105732804,
"eval_full_en_cosine_map@200": 0.24584993568226646,
"eval_full_en_cosine_map@50": 0.23672594782424658,
"eval_full_en_cosine_map@500": 0.29632183596698103,
"eval_full_en_cosine_mrr@1": 0.7335526315789473,
"eval_full_en_cosine_mrr@100": 0.83135268727374,
"eval_full_en_cosine_mrr@150": 0.83135268727374,
"eval_full_en_cosine_mrr@20": 0.8311351294903929,
"eval_full_en_cosine_mrr@200": 0.83135268727374,
"eval_full_en_cosine_mrr@50": 0.8312917710944029,
"eval_full_en_cosine_ndcg@1": 0.7335526315789473,
"eval_full_en_cosine_ndcg@100": 0.4400577813719261,
"eval_full_en_cosine_ndcg@150": 0.4859220111165228,
"eval_full_en_cosine_ndcg@20": 0.5344170691501652,
"eval_full_en_cosine_ndcg@200": 0.5320416498978522,
"eval_full_en_cosine_ndcg@50": 0.4485020943766835,
"eval_full_en_cosine_precision@1": 0.7335526315789473,
"eval_full_en_cosine_precision@100": 0.30907894736842106,
"eval_full_en_cosine_precision@150": 0.26278508771929826,
"eval_full_en_cosine_precision@20": 0.4960526315789474,
"eval_full_en_cosine_precision@200": 0.23090460526315787,
"eval_full_en_cosine_precision@50": 0.39151315789473684,
"eval_full_en_cosine_recall@1": 0.010402156873475942,
"eval_full_en_cosine_recall@100": 0.39206565501916524,
"eval_full_en_cosine_recall@150": 0.49176955829136443,
"eval_full_en_cosine_recall@20": 0.1321996647113643,
"eval_full_en_cosine_recall@200": 0.569344104113959,
"eval_full_en_cosine_recall@50": 0.2535254041631645,
"eval_runtime": 1.5826,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5320416498978522,
"eval_steps_per_second": 0.0,
"step": 2400
},
{
"epoch": 2.790178571428571,
"grad_norm": 11.857823371887207,
"learning_rate": 2.3343515037593984e-05,
"loss": 0.9272,
"step": 2500
},
{
"epoch": 2.9017857142857144,
"grad_norm": 12.297764778137207,
"learning_rate": 2.2168703007518798e-05,
"loss": 0.9279,
"step": 2600
},
{
"epoch": 2.9017857142857144,
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7368421052631579,
"eval_full_en_cosine_map@100": 0.20939105710550232,
"eval_full_en_cosine_map@150": 0.22725165687553775,
"eval_full_en_cosine_map@20": 0.3403680329074837,
"eval_full_en_cosine_map@200": 0.24658865195474836,
"eval_full_en_cosine_map@50": 0.23612691752121232,
"eval_full_en_cosine_map@500": 0.29718900909315255,
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
"eval_full_en_cosine_mrr@100": 0.8391709003546018,
"eval_full_en_cosine_mrr@150": 0.8391709003546018,
"eval_full_en_cosine_mrr@20": 0.8391064008705977,
"eval_full_en_cosine_mrr@200": 0.8391709003546018,
"eval_full_en_cosine_mrr@50": 0.8391064008705977,
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
"eval_full_en_cosine_ndcg@100": 0.4389185422351881,
"eval_full_en_cosine_ndcg@150": 0.4868646893605612,
"eval_full_en_cosine_ndcg@20": 0.5359014833764041,
"eval_full_en_cosine_ndcg@200": 0.5332804255738979,
"eval_full_en_cosine_ndcg@50": 0.44749591453362436,
"eval_full_en_cosine_precision@1": 0.7368421052631579,
"eval_full_en_cosine_precision@100": 0.30779605263157894,
"eval_full_en_cosine_precision@150": 0.26355263157894737,
"eval_full_en_cosine_precision@20": 0.49588815789473684,
"eval_full_en_cosine_precision@200": 0.2316282894736842,
"eval_full_en_cosine_precision@50": 0.38901315789473684,
"eval_full_en_cosine_recall@1": 0.010425572953236805,
"eval_full_en_cosine_recall@100": 0.3892001066901767,
"eval_full_en_cosine_recall@150": 0.492569756570653,
"eval_full_en_cosine_recall@20": 0.13284603422933672,
"eval_full_en_cosine_recall@200": 0.5706210722984945,
"eval_full_en_cosine_recall@50": 0.2518705529759721,
"eval_runtime": 1.577,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5332804255738979,
"eval_steps_per_second": 0.0,
"step": 2600
},
{
"epoch": 3.013392857142857,
"grad_norm": 12.120986938476562,
"learning_rate": 2.099389097744361e-05,
"loss": 0.857,
"step": 2700
},
{
"epoch": 3.125,
"grad_norm": 14.276410102844238,
"learning_rate": 1.9819078947368423e-05,
"loss": 0.7313,
"step": 2800
},
{
"epoch": 3.125,
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 1.0,
"eval_full_en_cosine_map@1": 0.7269736842105263,
"eval_full_en_cosine_map@100": 0.20830025965749158,
"eval_full_en_cosine_map@150": 0.22525408557521698,
"eval_full_en_cosine_map@20": 0.34094306993307805,
"eval_full_en_cosine_map@200": 0.24400549054611867,
"eval_full_en_cosine_map@50": 0.23400685602624646,
"eval_full_en_cosine_map@500": 0.29401532392219154,
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
"eval_full_en_cosine_mrr@100": 0.8315051952798665,
"eval_full_en_cosine_mrr@150": 0.8315051952798665,
"eval_full_en_cosine_mrr@20": 0.8314268744778616,
"eval_full_en_cosine_mrr@200": 0.8315051952798665,
"eval_full_en_cosine_mrr@50": 0.8315051952798665,
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
"eval_full_en_cosine_ndcg@100": 0.43885977048304636,
"eval_full_en_cosine_ndcg@150": 0.48486671483618976,
"eval_full_en_cosine_ndcg@20": 0.5365677326031855,
"eval_full_en_cosine_ndcg@200": 0.5299990147795507,
"eval_full_en_cosine_ndcg@50": 0.44591298214905706,
"eval_full_en_cosine_precision@1": 0.7269736842105263,
"eval_full_en_cosine_precision@100": 0.308125,
"eval_full_en_cosine_precision@150": 0.2621052631578948,
"eval_full_en_cosine_precision@20": 0.49786184210526313,
"eval_full_en_cosine_precision@200": 0.22980263157894737,
"eval_full_en_cosine_precision@50": 0.3870394736842105,
"eval_full_en_cosine_recall@1": 0.010317820884117123,
"eval_full_en_cosine_recall@100": 0.38998825691236244,
"eval_full_en_cosine_recall@150": 0.4900687458798103,
"eval_full_en_cosine_recall@20": 0.13271573138828288,
"eval_full_en_cosine_recall@200": 0.5659226272090475,
"eval_full_en_cosine_recall@50": 0.25218483369820577,
"eval_runtime": 1.607,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5299990147795507,
"eval_steps_per_second": 0.0,
"step": 2800
},
{
"epoch": 3.236607142857143,
"grad_norm": 8.85190486907959,
"learning_rate": 1.8644266917293237e-05,
"loss": 0.7103,
"step": 2900
},
{
"epoch": 3.3482142857142856,
"grad_norm": 8.932626724243164,
"learning_rate": 1.7469454887218044e-05,
"loss": 0.7187,
"step": 3000
},
{
"epoch": 3.3482142857142856,
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7269736842105263,
"eval_full_en_cosine_map@100": 0.20842370079433947,
"eval_full_en_cosine_map@150": 0.22608431932756923,
"eval_full_en_cosine_map@20": 0.34026464907579207,
"eval_full_en_cosine_map@200": 0.2451065024940476,
"eval_full_en_cosine_map@50": 0.23418777403622906,
"eval_full_en_cosine_map@500": 0.2945476002258968,
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
"eval_full_en_cosine_mrr@100": 0.8303256958684593,
"eval_full_en_cosine_mrr@150": 0.8303256958684593,
"eval_full_en_cosine_mrr@20": 0.830265887256019,
"eval_full_en_cosine_mrr@200": 0.8303256958684593,
"eval_full_en_cosine_mrr@50": 0.830265887256019,
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
"eval_full_en_cosine_ndcg@100": 0.4379203478644915,
"eval_full_en_cosine_ndcg@150": 0.4860723616469748,
"eval_full_en_cosine_ndcg@20": 0.534483012777908,
"eval_full_en_cosine_ndcg@200": 0.5318565059446251,
"eval_full_en_cosine_ndcg@50": 0.4443024102705765,
"eval_full_en_cosine_precision@1": 0.7269736842105263,
"eval_full_en_cosine_precision@100": 0.30750000000000005,
"eval_full_en_cosine_precision@150": 0.26370614035087714,
"eval_full_en_cosine_precision@20": 0.49588815789473684,
"eval_full_en_cosine_precision@200": 0.23116776315789475,
"eval_full_en_cosine_precision@50": 0.38539473684210523,
"eval_full_en_cosine_recall@1": 0.010298189290703101,
"eval_full_en_cosine_recall@100": 0.38891472258186655,
"eval_full_en_cosine_recall@150": 0.4925623824521817,
"eval_full_en_cosine_recall@20": 0.13215936080151625,
"eval_full_en_cosine_recall@200": 0.5698259119139981,
"eval_full_en_cosine_recall@50": 0.2502092759755724,
"eval_runtime": 1.6179,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5318565059446251,
"eval_steps_per_second": 0.0,
"step": 3000
},
{
"epoch": 3.4598214285714284,
"grad_norm": 12.761665344238281,
"learning_rate": 1.6294642857142858e-05,
"loss": 0.7067,
"step": 3100
},
{
"epoch": 3.571428571428571,
"grad_norm": 12.318887710571289,
"learning_rate": 1.5119830827067668e-05,
"loss": 0.7157,
"step": 3200
},
{
"epoch": 3.571428571428571,
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 1.0,
"eval_full_en_cosine_map@1": 0.7072368421052632,
"eval_full_en_cosine_map@100": 0.21126096647489126,
"eval_full_en_cosine_map@150": 0.22897332387217115,
"eval_full_en_cosine_map@20": 0.34020926250086975,
"eval_full_en_cosine_map@200": 0.24883265008518762,
"eval_full_en_cosine_map@50": 0.2366562995235259,
"eval_full_en_cosine_map@500": 0.30009134506130936,
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
"eval_full_en_cosine_mrr@100": 0.8208446325794724,
"eval_full_en_cosine_mrr@150": 0.8208446325794724,
"eval_full_en_cosine_mrr@20": 0.8206285125693021,
"eval_full_en_cosine_mrr@200": 0.8208446325794724,
"eval_full_en_cosine_mrr@50": 0.8208446325794724,
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
"eval_full_en_cosine_ndcg@100": 0.4420871692985379,
"eval_full_en_cosine_ndcg@150": 0.48983718804719595,
"eval_full_en_cosine_ndcg@20": 0.5349182539944062,
"eval_full_en_cosine_ndcg@200": 0.5368995914478877,
"eval_full_en_cosine_ndcg@50": 0.4481578438397021,
"eval_full_en_cosine_precision@1": 0.7072368421052632,
"eval_full_en_cosine_precision@100": 0.3118421052631579,
"eval_full_en_cosine_precision@150": 0.26625,
"eval_full_en_cosine_precision@20": 0.49786184210526313,
"eval_full_en_cosine_precision@200": 0.2341282894736842,
"eval_full_en_cosine_precision@50": 0.39125,
"eval_full_en_cosine_recall@1": 0.010071368365416018,
"eval_full_en_cosine_recall@100": 0.39435465355460575,
"eval_full_en_cosine_recall@150": 0.49776297598034985,
"eval_full_en_cosine_recall@20": 0.1332224887798492,
"eval_full_en_cosine_recall@200": 0.5769437157052201,
"eval_full_en_cosine_recall@50": 0.25406609475829245,
"eval_runtime": 1.5833,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5368995914478877,
"eval_steps_per_second": 0.0,
"step": 3200
},
{
"epoch": 3.6830357142857144,
"grad_norm": 10.974320411682129,
"learning_rate": 1.3945018796992482e-05,
"loss": 0.7113,
"step": 3300
},
{
"epoch": 3.794642857142857,
"grad_norm": 11.004631042480469,
"learning_rate": 1.2770206766917295e-05,
"loss": 0.7013,
"step": 3400
},
{
"epoch": 3.794642857142857,
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 1.0,
"eval_full_en_cosine_map@1": 0.7269736842105263,
"eval_full_en_cosine_map@100": 0.20998333195374114,
"eval_full_en_cosine_map@150": 0.22683318021248486,
"eval_full_en_cosine_map@20": 0.34034679376659244,
"eval_full_en_cosine_map@200": 0.24654495691213385,
"eval_full_en_cosine_map@50": 0.23617479010012724,
"eval_full_en_cosine_map@500": 0.29617185416029185,
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
"eval_full_en_cosine_mrr@100": 0.8291805255603549,
"eval_full_en_cosine_mrr@150": 0.8291805255603549,
"eval_full_en_cosine_mrr@20": 0.8291105367585632,
"eval_full_en_cosine_mrr@200": 0.8291805255603549,
"eval_full_en_cosine_mrr@50": 0.8291805255603549,
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
"eval_full_en_cosine_ndcg@100": 0.4407299508694298,
"eval_full_en_cosine_ndcg@150": 0.48655314671133576,
"eval_full_en_cosine_ndcg@20": 0.5349966588302529,
"eval_full_en_cosine_ndcg@200": 0.5341334488223752,
"eval_full_en_cosine_ndcg@50": 0.448065635044085,
"eval_full_en_cosine_precision@1": 0.7269736842105263,
"eval_full_en_cosine_precision@100": 0.30973684210526314,
"eval_full_en_cosine_precision@150": 0.26320175438596494,
"eval_full_en_cosine_precision@20": 0.4965460526315789,
"eval_full_en_cosine_precision@200": 0.23210526315789473,
"eval_full_en_cosine_precision@50": 0.3907894736842106,
"eval_full_en_cosine_recall@1": 0.010311461817674684,
"eval_full_en_cosine_recall@100": 0.3931693265429022,
"eval_full_en_cosine_recall@150": 0.49300140763214356,
"eval_full_en_cosine_recall@20": 0.1329270784727238,
"eval_full_en_cosine_recall@200": 0.573228327517634,
"eval_full_en_cosine_recall@50": 0.25330386821616296,
"eval_runtime": 1.577,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5341334488223752,
"eval_steps_per_second": 0.0,
"step": 3400
},
{
"epoch": 3.90625,
"grad_norm": 12.102640151977539,
"learning_rate": 1.1595394736842107e-05,
"loss": 0.6903,
"step": 3500
},
{
"epoch": 4.017857142857143,
"grad_norm": 7.348757743835449,
"learning_rate": 1.0420582706766918e-05,
"loss": 0.6462,
"step": 3600
},
{
"epoch": 4.017857142857143,
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7203947368421053,
"eval_full_en_cosine_map@100": 0.2102732775077637,
"eval_full_en_cosine_map@150": 0.22767943965852241,
"eval_full_en_cosine_map@20": 0.338502447126724,
"eval_full_en_cosine_map@200": 0.24667619158922902,
"eval_full_en_cosine_map@50": 0.23576300870587916,
"eval_full_en_cosine_map@500": 0.2971463650911015,
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
"eval_full_en_cosine_mrr@100": 0.8263833835420962,
"eval_full_en_cosine_mrr@150": 0.8263833835420962,
"eval_full_en_cosine_mrr@20": 0.8263213180008847,
"eval_full_en_cosine_mrr@200": 0.8263833835420962,
"eval_full_en_cosine_mrr@50": 0.8263213180008847,
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
"eval_full_en_cosine_ndcg@100": 0.44114478517461736,
"eval_full_en_cosine_ndcg@150": 0.4883455168714466,
"eval_full_en_cosine_ndcg@20": 0.53288860900767,
"eval_full_en_cosine_ndcg@200": 0.5334866046140189,
"eval_full_en_cosine_ndcg@50": 0.4473951526251337,
"eval_full_en_cosine_precision@1": 0.7203947368421053,
"eval_full_en_cosine_precision@100": 0.31078947368421056,
"eval_full_en_cosine_precision@150": 0.265219298245614,
"eval_full_en_cosine_precision@20": 0.4934210526315789,
"eval_full_en_cosine_precision@200": 0.23212171052631578,
"eval_full_en_cosine_precision@50": 0.3899342105263158,
"eval_full_en_cosine_recall@1": 0.01018155854728512,
"eval_full_en_cosine_recall@100": 0.3935816727444405,
"eval_full_en_cosine_recall@150": 0.4958028561341766,
"eval_full_en_cosine_recall@20": 0.13181077303144853,
"eval_full_en_cosine_recall@200": 0.5716317929962068,
"eval_full_en_cosine_recall@50": 0.25274553753777246,
"eval_runtime": 1.6024,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5334866046140189,
"eval_steps_per_second": 0.0,
"step": 3600
},
{
"epoch": 4.129464285714286,
"grad_norm": 8.786450386047363,
"learning_rate": 9.24577067669173e-06,
"loss": 0.5162,
"step": 3700
},
{
"epoch": 4.241071428571429,
"grad_norm": 10.602435111999512,
"learning_rate": 8.070958646616542e-06,
"loss": 0.524,
"step": 3800
},
{
"epoch": 4.241071428571429,
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7302631578947368,
"eval_full_en_cosine_map@100": 0.21150798737582682,
"eval_full_en_cosine_map@150": 0.22868847990327232,
"eval_full_en_cosine_map@20": 0.3411525812655742,
"eval_full_en_cosine_map@200": 0.2480155691306444,
"eval_full_en_cosine_map@50": 0.23814436251631807,
"eval_full_en_cosine_map@500": 0.29792672341621373,
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
"eval_full_en_cosine_mrr@100": 0.8323485085820613,
"eval_full_en_cosine_mrr@150": 0.8323485085820613,
"eval_full_en_cosine_mrr@20": 0.8321467731829576,
"eval_full_en_cosine_mrr@200": 0.8323485085820613,
"eval_full_en_cosine_mrr@50": 0.832296294714058,
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
"eval_full_en_cosine_ndcg@100": 0.44247378999755477,
"eval_full_en_cosine_ndcg@150": 0.48886293038433404,
"eval_full_en_cosine_ndcg@20": 0.5351701323930714,
"eval_full_en_cosine_ndcg@200": 0.5352268343210608,
"eval_full_en_cosine_ndcg@50": 0.4502625298651447,
"eval_full_en_cosine_precision@1": 0.7302631578947368,
"eval_full_en_cosine_precision@100": 0.311546052631579,
"eval_full_en_cosine_precision@150": 0.265219298245614,
"eval_full_en_cosine_precision@20": 0.49588815789473684,
"eval_full_en_cosine_precision@200": 0.23268092105263163,
"eval_full_en_cosine_precision@50": 0.3930921052631579,
"eval_full_en_cosine_recall@1": 0.010244630514181254,
"eval_full_en_cosine_recall@100": 0.39498767852245736,
"eval_full_en_cosine_recall@150": 0.49574169519464223,
"eval_full_en_cosine_recall@20": 0.1324589336710221,
"eval_full_en_cosine_recall@200": 0.574019804020236,
"eval_full_en_cosine_recall@50": 0.2548099607629461,
"eval_runtime": 1.5919,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5352268343210608,
"eval_steps_per_second": 0.0,
"step": 3800
},
{
"epoch": 4.352678571428571,
"grad_norm": 11.65066909790039,
"learning_rate": 6.896146616541354e-06,
"loss": 0.5303,
"step": 3900
},
{
"epoch": 4.464285714285714,
"grad_norm": 10.764215469360352,
"learning_rate": 5.721334586466166e-06,
"loss": 0.5269,
"step": 4000
},
{
"epoch": 4.464285714285714,
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7368421052631579,
"eval_full_en_cosine_map@100": 0.2101198919267321,
"eval_full_en_cosine_map@150": 0.2276536266469315,
"eval_full_en_cosine_map@20": 0.34076177455520346,
"eval_full_en_cosine_map@200": 0.24678319516569472,
"eval_full_en_cosine_map@50": 0.23677969810249233,
"eval_full_en_cosine_map@500": 0.297249372287514,
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
"eval_full_en_cosine_mrr@100": 0.8373899157616261,
"eval_full_en_cosine_mrr@150": 0.8373899157616261,
"eval_full_en_cosine_mrr@20": 0.837172357978279,
"eval_full_en_cosine_mrr@200": 0.8373899157616261,
"eval_full_en_cosine_mrr@50": 0.837328999582289,
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
"eval_full_en_cosine_ndcg@100": 0.4408521323246635,
"eval_full_en_cosine_ndcg@150": 0.48834055710549873,
"eval_full_en_cosine_ndcg@20": 0.5353264293739176,
"eval_full_en_cosine_ndcg@200": 0.5341206282180626,
"eval_full_en_cosine_ndcg@50": 0.44939083758113085,
"eval_full_en_cosine_precision@1": 0.7368421052631579,
"eval_full_en_cosine_precision@100": 0.30953947368421053,
"eval_full_en_cosine_precision@150": 0.26460526315789473,
"eval_full_en_cosine_precision@20": 0.4947368421052632,
"eval_full_en_cosine_precision@200": 0.23187500000000003,
"eval_full_en_cosine_precision@50": 0.3913815789473684,
"eval_full_en_cosine_recall@1": 0.010305566449078924,
"eval_full_en_cosine_recall@100": 0.3922740640225546,
"eval_full_en_cosine_recall@150": 0.4949163913773604,
"eval_full_en_cosine_recall@20": 0.13233275450376297,
"eval_full_en_cosine_recall@200": 0.572041877895568,
"eval_full_en_cosine_recall@50": 0.2535655251683108,
"eval_runtime": 1.5798,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5341206282180626,
"eval_steps_per_second": 0.0,
"step": 4000
},
{
"epoch": 4.575892857142857,
"grad_norm": 10.328286170959473,
"learning_rate": 4.546522556390977e-06,
"loss": 0.4824,
"step": 4100
},
{
"epoch": 4.6875,
"grad_norm": 10.712604522705078,
"learning_rate": 3.3717105263157897e-06,
"loss": 0.5222,
"step": 4200
},
{
"epoch": 4.6875,
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7302631578947368,
"eval_full_en_cosine_map@100": 0.21090472549603356,
"eval_full_en_cosine_map@150": 0.22845220292726734,
"eval_full_en_cosine_map@20": 0.342326318294358,
"eval_full_en_cosine_map@200": 0.24733168088568283,
"eval_full_en_cosine_map@50": 0.23774626029530496,
"eval_full_en_cosine_map@500": 0.2977609786459198,
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
"eval_full_en_cosine_mrr@100": 0.8356281328320803,
"eval_full_en_cosine_mrr@150": 0.8356281328320803,
"eval_full_en_cosine_mrr@20": 0.8354127506265665,
"eval_full_en_cosine_mrr@200": 0.8356281328320803,
"eval_full_en_cosine_mrr@50": 0.8355693922305765,
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
"eval_full_en_cosine_ndcg@100": 0.44188546614809043,
"eval_full_en_cosine_ndcg@150": 0.48899866366733713,
"eval_full_en_cosine_ndcg@20": 0.5372965681233445,
"eval_full_en_cosine_ndcg@200": 0.5342393130950145,
"eval_full_en_cosine_ndcg@50": 0.4501724823363586,
"eval_full_en_cosine_precision@1": 0.7302631578947368,
"eval_full_en_cosine_precision@100": 0.3101973684210526,
"eval_full_en_cosine_precision@150": 0.2649780701754386,
"eval_full_en_cosine_precision@20": 0.49786184210526313,
"eval_full_en_cosine_precision@200": 0.23199013157894738,
"eval_full_en_cosine_precision@50": 0.3921710526315789,
"eval_full_en_cosine_recall@1": 0.01024195976751409,
"eval_full_en_cosine_recall@100": 0.3937143686320033,
"eval_full_en_cosine_recall@150": 0.4954717634968576,
"eval_full_en_cosine_recall@20": 0.13293505289394864,
"eval_full_en_cosine_recall@200": 0.5715937768635994,
"eval_full_en_cosine_recall@50": 0.2542542782427721,
"eval_runtime": 1.579,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5342393130950145,
"eval_steps_per_second": 0.0,
"step": 4200
},
{
"epoch": 4.799107142857143,
"grad_norm": 12.404372215270996,
"learning_rate": 2.1968984962406015e-06,
"loss": 0.5104,
"step": 4300
},
{
"epoch": 4.910714285714286,
"grad_norm": 12.05720329284668,
"learning_rate": 1.0220864661654136e-06,
"loss": 0.5002,
"step": 4400
},
{
"epoch": 4.910714285714286,
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
"eval_full_en_cosine_accuracy@100": 1.0,
"eval_full_en_cosine_accuracy@150": 1.0,
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
"eval_full_en_cosine_accuracy@200": 1.0,
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
"eval_full_en_cosine_map@1": 0.7302631578947368,
"eval_full_en_cosine_map@100": 0.21161540263537876,
"eval_full_en_cosine_map@150": 0.22899252179487295,
"eval_full_en_cosine_map@20": 0.3434603918412553,
"eval_full_en_cosine_map@200": 0.24784282323083537,
"eval_full_en_cosine_map@50": 0.23779270403918282,
"eval_full_en_cosine_map@500": 0.298154972004029,
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
"eval_full_en_cosine_mrr@100": 0.8343905966424682,
"eval_full_en_cosine_mrr@150": 0.8343905966424682,
"eval_full_en_cosine_mrr@20": 0.8341772399749373,
"eval_full_en_cosine_mrr@200": 0.8343905966424682,
"eval_full_en_cosine_mrr@50": 0.8343338815789473,
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
"eval_full_en_cosine_ndcg@100": 0.44277699637488865,
"eval_full_en_cosine_ndcg@150": 0.4895063673734854,
"eval_full_en_cosine_ndcg@20": 0.5384654647855256,
"eval_full_en_cosine_ndcg@200": 0.5346148440105628,
"eval_full_en_cosine_ndcg@50": 0.44986527953229877,
"eval_full_en_cosine_precision@1": 0.7302631578947368,
"eval_full_en_cosine_precision@100": 0.3111842105263158,
"eval_full_en_cosine_precision@150": 0.2652412280701754,
"eval_full_en_cosine_precision@20": 0.4998355263157894,
"eval_full_en_cosine_precision@200": 0.232171052631579,
"eval_full_en_cosine_precision@50": 0.39184210526315794,
"eval_full_en_cosine_recall@1": 0.010227350724729817,
"eval_full_en_cosine_recall@100": 0.3948435268881245,
"eval_full_en_cosine_recall@150": 0.49626849018850344,
"eval_full_en_cosine_recall@20": 0.13368254620254577,
"eval_full_en_cosine_recall@200": 0.5720837677245543,
"eval_full_en_cosine_recall@50": 0.2541249933594102,
"eval_runtime": 1.5839,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5346148440105628,
"eval_steps_per_second": 0.0,
"step": 4400
}
],
"logging_steps": 100,
"max_steps": 4480,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}