|
2025-04-11 02:47:00,195 - Hyperparameters: {'output_dir': './lora_finetuned', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': <IntervalStrategy.STEPS: 'steps'>, 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 1, 'max_steps': 15000, 'lr_scheduler_type': <SchedulerType.LINEAR: 'linear'>, 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 4000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './logs', 'logging_strategy': <IntervalStrategy.STEPS: 'steps'>, 'logging_first_step': False, 'logging_steps': 100, 'logging_nan_inf_filter': True, 'save_strategy': <SaveStrategy.STEPS: 'steps'>, 'save_steps': 2500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': True, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './lora_finetuned', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': <OptimizerNames.PAGED_ADAMW_8BIT: 'paged_adamw_8bit'>, 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'decryptellix/Llama-3.1-8B-LoRA-only', 'hub_strategy': <HubStrategy.EVERY_SAVE: 'every_save'>, 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'distributed_state': Distributed environment: NO |
|
2025-04-11 02:47:00,195 - Training metrics: {'epochs': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300, 5400, 5500, 5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800, 8900, 9000, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, 10000, 10100, 10200, 10300, 10400, 10500, 10600, 10700, 10800, 10900, 11000, 11100, 11200, 11300, 11400, 11500, 11600, 11700, 11800, 11900, 12000, 12100, 12200, 12300, 12400, 12500, 12600, 12700, 12800, 12900, 13000, 13100, 13200, 13300, 13400, 13500, 13600, 13700, 13800, 13900, 14000, 14100, 14200, 14300, 14400, 14500, 14600, 14700, 14800, 14900, 15000], 'loss': [0.826, 0.6726, 0.5578, 0.5117, 0.4815, 0.4659, 0.4564, 0.4512, 0.4388, 0.4344, 0.4291, 0.4244, 0.4158, 0.4175, 0.4167, 0.4102, 0.4027, 0.4041, 0.4055, 0.4, 0.4009, 0.3995, 0.3943, 0.3925, 0.3945, 0.3903, 0.3858, 0.3857, 0.3827, 0.3851, 0.3803, 0.3785, 0.3826, 0.3761, 0.3763, 0.3735, 0.373, 0.3738, 0.369, 0.371, 0.3669, 0.3654, 0.3727, 0.3608, 0.3648, 0.3582, 0.3628, 0.358, 0.3614, 0.3604, 0.3552, 0.3582, 0.3616, 0.3509, 0.3516, 0.3566, 0.3498, 0.3498, 0.3494, 0.3498, 0.3507, 0.3528, 0.3504, 0.3489, 0.3473, 0.3441, 0.3454, 0.3444, 0.346, 0.3462, 0.3443, 0.3405, 0.3422, 0.3426, 0.3418, 0.3411, 0.3428, 0.3396, 0.3443, 0.3409, 0.339, 0.3406, 0.3351, 0.3393, 0.3394, 0.3358, 0.337, 0.3348, 0.3359, 0.3355, 0.3353, 0.3386, 0.3329, 0.3359, 0.3321, 0.3304, 0.3315, 0.333, 0.33, 0.3341, 0.3321, 0.333, 0.3302, 0.3288, 0.3356, 0.3307, 0.3306, 0.328, 0.3302, 0.326, 0.33, 0.3294, 0.3292, 0.3251, 0.328, 0.3256, 0.3267, 0.3225, 0.327, 0.3283, 0.3243, 0.3269, 0.327, 0.3266, 0.3255, 0.3229, 0.3235, 0.3256, 0.328, 0.3233, 0.3227, 0.3259, 0.3245, 0.3249, 0.3214, 0.3229, 0.3262, 0.3234, 0.3195, 0.3231, 0.3282, 0.3231, 0.3254, 0.3233, 0.3211, 0.3194, 0.323, 0.3235, 0.3208, 0.3222], 'learning_rate': [2.5e-06, 5e-06, 7.5e-06, 1e-05, 1.25e-05, 1.5e-05, 1.75e-05, 2e-05, 2.25e-05, 2.5e-05, 2.7500000000000004e-05, 3e-05, 3.2500000000000004e-05, 3.5e-05, 3.7500000000000003e-05, 4e-05, 4.25e-05, 4.5e-05, 4.75e-05, 5e-05, 5.25e-05, 5.500000000000001e-05, 5.7499999999999995e-05, 6e-05, 6.25e-05, 6.500000000000001e-05, 6.750000000000001e-05, 7e-05, 7.25e-05, 7.500000000000001e-05, 7.75e-05, 8e-05, 8.25e-05, 8.5e-05, 8.75e-05, 9e-05, 9.250000000000001e-05, 9.5e-05, 9.75e-05, 0.0001, 9.909090909090911e-05, 9.818181818181818e-05, 9.727272727272728e-05, 9.636363636363637e-05, 9.545454545454546e-05, 9.454545454545455e-05, 9.363636363636364e-05, 9.272727272727273e-05, 9.181818181818183e-05, 9.090909090909092e-05, 9e-05, 8.90909090909091e-05, 8.818181818181818e-05, 8.727272727272727e-05, 8.636363636363637e-05, 8.545454545454545e-05, 8.454545454545455e-05, 8.363636363636364e-05, 8.272727272727273e-05, 8.181818181818183e-05, 8.090909090909092e-05, 8e-05, 7.90909090909091e-05, 7.818181818181818e-05, 7.727272727272727e-05, 7.636363636363637e-05, 7.545454545454545e-05, 7.454545454545455e-05, 7.363636363636364e-05, 7.272727272727273e-05, 7.181818181818182e-05, 7.090909090909092e-05, 7e-05, 6.90909090909091e-05, 6.818181818181818e-05, 6.727272727272727e-05, 6.636363636363638e-05, 6.545454545454546e-05, 6.454545454545455e-05, 6.363636363636364e-05, 6.272727272727273e-05, 6.181818181818182e-05, 6.090909090909091e-05, 6e-05, 5.90909090909091e-05, 5.818181818181818e-05, 5.727272727272728e-05, 5.636363636363636e-05, 5.545454545454546e-05, 5.4545454545454546e-05, 5.363636363636364e-05, 5.272727272727272e-05, 5.181818181818182e-05, 5.090909090909091e-05, 5e-05, 4.909090909090909e-05, 4.8181818181818186e-05, 4.7272727272727275e-05, 4.636363636363636e-05, 4.545454545454546e-05, 4.454545454545455e-05, 4.3636363636363636e-05, 4.2727272727272724e-05, 4.181818181818182e-05, 4.0909090909090915e-05, 4e-05, 3.909090909090909e-05, 3.818181818181819e-05, 3.7272727272727276e-05, 3.6363636363636364e-05, 3.545454545454546e-05, 3.454545454545455e-05, 3.3636363636363636e-05, 3.272727272727273e-05, 3.181818181818182e-05, 3.090909090909091e-05, 3e-05, 2.909090909090909e-05, 2.818181818181818e-05, 2.7272727272727273e-05, 2.636363636363636e-05, 2.5454545454545454e-05, 2.4545454545454545e-05, 2.3636363636363637e-05, 2.272727272727273e-05, 2.1818181818181818e-05, 2.090909090909091e-05, 2e-05, 1.9090909090909094e-05, 1.8181818181818182e-05, 1.7272727272727274e-05, 1.6363636363636366e-05, 1.5454545454545454e-05, 1.4545454545454545e-05, 1.3636363636363637e-05, 1.2727272727272727e-05, 1.1818181818181819e-05, 1.0909090909090909e-05, 1e-05, 9.090909090909091e-06, 8.181818181818183e-06, 7.272727272727272e-06, 6.363636363636363e-06, 5.4545454545454545e-06, 4.5454545454545455e-06, 3.636363636363636e-06, 2.7272727272727272e-06, 1.818181818181818e-06, 9.09090909090909e-07, 0.0], 'training_time': 497716.1720509529} |