| { | |
| "best_global_step": 180, | |
| "best_metric": 0.9744756817817688, | |
| "best_model_checkpoint": "j05hr3d/SFT-Qwen2.5-Coder-3B_v7/checkpoint-180", | |
| "epoch": 3.0, | |
| "eval_steps": 20, | |
| "global_step": 204, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.29739776951672864, | |
| "grad_norm": 0.379160612821579, | |
| "learning_rate": 9.390862944162437e-05, | |
| "loss": 1.0502, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.29739776951672864, | |
| "eval_loss": 1.1186413764953613, | |
| "eval_runtime": 11.634, | |
| "eval_samples_per_second": 4.899, | |
| "eval_steps_per_second": 0.688, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5947955390334573, | |
| "grad_norm": 0.45619407296180725, | |
| "learning_rate": 8.375634517766498e-05, | |
| "loss": 0.9305, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5947955390334573, | |
| "eval_loss": 1.0608510971069336, | |
| "eval_runtime": 9.5232, | |
| "eval_samples_per_second": 5.985, | |
| "eval_steps_per_second": 0.84, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8921933085501859, | |
| "grad_norm": 0.8720942139625549, | |
| "learning_rate": 7.360406091370558e-05, | |
| "loss": 0.8083, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8921933085501859, | |
| "eval_loss": 1.0287775993347168, | |
| "eval_runtime": 9.518, | |
| "eval_samples_per_second": 5.989, | |
| "eval_steps_per_second": 0.841, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.178438661710037, | |
| "grad_norm": 0.4681706726551056, | |
| "learning_rate": 6.34517766497462e-05, | |
| "loss": 0.8055, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.178438661710037, | |
| "eval_loss": 1.0086703300476074, | |
| "eval_runtime": 9.5494, | |
| "eval_samples_per_second": 5.969, | |
| "eval_steps_per_second": 0.838, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4758364312267658, | |
| "grad_norm": 0.3220897912979126, | |
| "learning_rate": 5.329949238578681e-05, | |
| "loss": 0.732, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4758364312267658, | |
| "eval_loss": 0.9919774532318115, | |
| "eval_runtime": 9.5217, | |
| "eval_samples_per_second": 5.986, | |
| "eval_steps_per_second": 0.84, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.7732342007434945, | |
| "grad_norm": 0.5077484846115112, | |
| "learning_rate": 4.3147208121827415e-05, | |
| "loss": 0.7372, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.7732342007434945, | |
| "eval_loss": 0.9832707047462463, | |
| "eval_runtime": 9.5285, | |
| "eval_samples_per_second": 5.982, | |
| "eval_steps_per_second": 0.84, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.059479553903346, | |
| "grad_norm": 0.3499664068222046, | |
| "learning_rate": 3.299492385786802e-05, | |
| "loss": 0.7176, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.059479553903346, | |
| "eval_loss": 0.9775140881538391, | |
| "eval_runtime": 9.5173, | |
| "eval_samples_per_second": 5.989, | |
| "eval_steps_per_second": 0.841, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.356877323420074, | |
| "grad_norm": 0.5079831480979919, | |
| "learning_rate": 2.284263959390863e-05, | |
| "loss": 0.6343, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.356877323420074, | |
| "eval_loss": 0.9806727170944214, | |
| "eval_runtime": 9.5235, | |
| "eval_samples_per_second": 5.985, | |
| "eval_steps_per_second": 0.84, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.654275092936803, | |
| "grad_norm": 0.26470282673835754, | |
| "learning_rate": 1.2690355329949238e-05, | |
| "loss": 0.7514, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.654275092936803, | |
| "eval_loss": 0.9744756817817688, | |
| "eval_runtime": 9.5274, | |
| "eval_samples_per_second": 5.983, | |
| "eval_steps_per_second": 0.84, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.9516728624535316, | |
| "grad_norm": 0.485846608877182, | |
| "learning_rate": 2.5380710659898476e-06, | |
| "loss": 0.6888, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.9516728624535316, | |
| "eval_loss": 0.9752256870269775, | |
| "eval_runtime": 9.5253, | |
| "eval_samples_per_second": 5.984, | |
| "eval_steps_per_second": 0.84, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 204, | |
| "total_flos": 3.179854802878464e+16, | |
| "train_loss": 0.7846405132144105, | |
| "train_runtime": 873.1746, | |
| "train_samples_per_second": 1.848, | |
| "train_steps_per_second": 0.234 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.9744756817817688, | |
| "eval_runtime": 9.5474, | |
| "eval_samples_per_second": 5.97, | |
| "eval_steps_per_second": 0.838, | |
| "step": 204 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 204, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.179854802878464e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |