Vinitha2004's picture
Upload distilled Qwen2.5-Coder-3B model with knowledge distillation
6eaddc2 verified
{
"epoch": 3,
"checkpoint_type": "epoch",
"is_best": false,
"validation_metrics": {
"total": 1.9706446496658447,
"distill": 0.8303216299544629,
"super": 23.636781707911577
},
"training_metrics": {
"total": 1.9690602091820009,
"distill": 0.8288625208889284,
"super": 23.63281596153013
},
"hyperparameters": {
"temperature": 2.0,
"alpha": 0.95,
"learning_rate": 0.001,
"batch_size": 1,
"gradient_accumulation_steps": 16
},
"best_val_loss_so_far": 1.9706641339331337,
"best_epoch_so_far": 1,
"patience_counter": 1,
"save_time": "2025-06-13 12:39:51"
}