{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 2072, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": { "accuracy": 0.7460424385314921 }, "eval_f1": { "f1": 0.6468745856581467 }, "eval_loss": 0.7669360637664795, "eval_precision": { "precision": 0.7498298692486852 }, "eval_recall": { "recall": 0.626548030580631 }, "eval_runtime": 25.654, "eval_samples_per_second": 347.197, "eval_steps_per_second": 21.712, "step": 259 }, { "epoch": 1.9305019305019306, "grad_norm": 1.1135650873184204, "learning_rate": 2.2760617760617762e-05, "loss": 0.5355, "step": 500 }, { "epoch": 2.0, "eval_accuracy": { "accuracy": 0.7547995958235096 }, "eval_f1": { "f1": 0.7693345628019765 }, "eval_loss": 1.091949224472046, "eval_precision": { "precision": 0.7745827666215183 }, "eval_recall": { "recall": 0.775266047858661 }, "eval_runtime": 25.7836, "eval_samples_per_second": 345.452, "eval_steps_per_second": 21.603, "step": 518 }, { "epoch": 3.0, "eval_accuracy": { "accuracy": 0.7595149882115191 }, "eval_f1": { "f1": 0.7726494361094581 }, "eval_loss": 1.3202872276306152, "eval_precision": { "precision": 0.7741992798858378 }, "eval_recall": { "recall": 0.7862141909666348 }, "eval_runtime": 25.6795, "eval_samples_per_second": 346.852, "eval_steps_per_second": 21.69, "step": 777 }, { "epoch": 3.861003861003861, "grad_norm": 0.5814288854598999, "learning_rate": 1.552123552123552e-05, "loss": 0.1121, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": { "accuracy": 0.7726507241495453 }, "eval_f1": { "f1": 0.7837280343276152 }, "eval_loss": 1.3736612796783447, "eval_precision": { "precision": 0.7837340802618142 }, "eval_recall": { "recall": 0.7843424989171847 }, "eval_runtime": 25.6794, "eval_samples_per_second": 346.854, "eval_steps_per_second": 21.691, "step": 1036 }, { "epoch": 5.0, "eval_accuracy": { "accuracy": 0.7656899068148647 }, "eval_f1": { "f1": 0.792071365170026 }, "eval_loss": 1.5462839603424072, "eval_precision": { "precision": 0.818913170460716 }, "eval_recall": { "recall": 0.7749402535371823 }, "eval_runtime": 25.7253, "eval_samples_per_second": 346.235, "eval_steps_per_second": 21.652, "step": 1295 }, { "epoch": 5.7915057915057915, "grad_norm": 0.7229886651039124, "learning_rate": 8.281853281853282e-06, "loss": 0.0489, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": { "accuracy": 0.7669248905355338 }, "eval_f1": { "f1": 0.7860035799974232 }, "eval_loss": 1.5993564128875732, "eval_precision": { "precision": 0.7762488236121527 }, "eval_recall": { "recall": 0.8041145184160382 }, "eval_runtime": 25.6577, "eval_samples_per_second": 347.148, "eval_steps_per_second": 21.709, "step": 1554 }, { "epoch": 7.0, "eval_accuracy": { "accuracy": 0.7629953968788593 }, "eval_f1": { "f1": 0.7931621649235138 }, "eval_loss": 1.670336365699768, "eval_precision": { "precision": 0.7944488176619807 }, "eval_recall": { "recall": 0.7984954459796072 }, "eval_runtime": 25.6877, "eval_samples_per_second": 346.742, "eval_steps_per_second": 21.684, "step": 1813 }, { "epoch": 7.722007722007722, "grad_norm": 0.011735806241631508, "learning_rate": 1.0424710424710424e-06, "loss": 0.0123, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": { "accuracy": 0.7683844167508701 }, "eval_f1": { "f1": 0.7959143631565831 }, "eval_loss": 1.6564217805862427, "eval_precision": { "precision": 0.7925798438872362 }, "eval_recall": { "recall": 0.8046718233013863 }, "eval_runtime": 25.8167, "eval_samples_per_second": 345.009, "eval_steps_per_second": 21.575, "step": 2072 } ], "logging_steps": 500, "max_steps": 2072, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4773365380707600.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }