Vinitha2004's picture
Upload distilled Qwen2.5-Coder-3B model with knowledge distillation
eee74b7 verified
{
"epoch": 3,
"checkpoint_type": "epoch",
"is_best": false,
"validation_metrics": {
"total": 1.6648176369258776,
"distill": 0.4663255028366465,
"super": 24.436167814042147
},
"training_metrics": {
"total": 1.6754711744398976,
"distill": 0.48240784243990176,
"super": 24.343674139487042
},
"hyperparameters": {
"temperature": 2.0,
"alpha": 0.95,
"learning_rate": 0.001,
"batch_size": 1,
"gradient_accumulation_steps": 16
},
"best_val_loss_so_far": 1.6648216817606247,
"best_epoch_so_far": 1,
"patience_counter": 1,
"save_time": "2025-06-13 19:03:51"
}