{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9777777777777777, "eval_steps": 500, "global_step": 22, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.044444444444444446, "grad_norm": 0.08367291093673593, "learning_rate": 0.0, "loss": 0.5482, "num_tokens": 6288444.0, "step": 1 }, { "epoch": 0.08888888888888889, "grad_norm": 0.07875395721115776, "learning_rate": 4e-05, "loss": 0.5655, "num_tokens": 12577300.0, "step": 2 }, { "epoch": 0.13333333333333333, "grad_norm": 0.07913921006018489, "learning_rate": 2.2000000000000003e-05, "loss": 0.5542, "num_tokens": 18860690.0, "step": 3 }, { "epoch": 0.17777777777777778, "grad_norm": 0.08049450634750233, "learning_rate": 4.000000000000001e-06, "loss": 0.559, "num_tokens": 6288873.0, "step": 4 }, { "epoch": 0.2222222222222222, "grad_norm": 0.07849054673300468, "learning_rate": 4e-05, "loss": 0.5752, "num_tokens": 12566970.0, "step": 5 }, { "epoch": 0.26666666666666666, "grad_norm": 0.08551302230509794, "learning_rate": 3.9992791128129796e-05, "loss": 0.5581, "num_tokens": 18850402.0, "step": 6 }, { "epoch": 0.3111111111111111, "grad_norm": 0.08689858561178568, "learning_rate": 3.99711702867229e-05, "loss": 0.557, "num_tokens": 25107301.0, "step": 7 }, { "epoch": 0.35555555555555557, "grad_norm": 0.08889582972451578, "learning_rate": 3.993515479376548e-05, "loss": 0.5469, "num_tokens": 31386429.0, "step": 8 }, { "epoch": 0.4, "grad_norm": 0.08695144533471684, "learning_rate": 3.988477349715466e-05, "loss": 0.5543, "num_tokens": 37667761.0, "step": 9 }, { "epoch": 0.4444444444444444, "grad_norm": 0.08862002835267728, "learning_rate": 3.982006675159175e-05, "loss": 0.5712, "num_tokens": 43955337.0, "step": 10 }, { "epoch": 0.4888888888888889, "grad_norm": 0.0858590512367662, "learning_rate": 3.9741086386258753e-05, "loss": 0.5349, "num_tokens": 50223401.0, "step": 11 }, { "epoch": 0.5333333333333333, "grad_norm": 0.08169881690710488, "learning_rate": 3.964789566330388e-05, "loss": 0.5367, "num_tokens": 56512071.0, "step": 12 }, { "epoch": 0.5777777777777777, "grad_norm": 0.07849820932974692, "learning_rate": 3.95405692271695e-05, "loss": 0.5307, "num_tokens": 62766485.0, "step": 13 }, { "epoch": 0.6222222222222222, "grad_norm": 0.07377884515816503, "learning_rate": 3.9419193044802986e-05, "loss": 0.5196, "num_tokens": 69055179.0, "step": 14 }, { "epoch": 0.6666666666666666, "grad_norm": 0.06953990227189195, "learning_rate": 3.928386433679841e-05, "loss": 0.5595, "num_tokens": 75311700.0, "step": 15 }, { "epoch": 0.7111111111111111, "grad_norm": 0.06533799864475098, "learning_rate": 3.9134691499524276e-05, "loss": 0.4955, "num_tokens": 81587667.0, "step": 16 }, { "epoch": 0.7555555555555555, "grad_norm": 0.061111534293718496, "learning_rate": 3.897179401829952e-05, "loss": 0.5072, "num_tokens": 87869515.0, "step": 17 }, { "epoch": 0.8, "grad_norm": 0.05929839410326827, "learning_rate": 3.879530237168747e-05, "loss": 0.5303, "num_tokens": 94116999.0, "step": 18 }, { "epoch": 0.8444444444444444, "grad_norm": 0.05478787891939966, "learning_rate": 3.860535792698444e-05, "loss": 0.5053, "num_tokens": 100391937.0, "step": 19 }, { "epoch": 0.8888888888888888, "grad_norm": 0.04990273451939609, "learning_rate": 3.840211282698645e-05, "loss": 0.4987, "num_tokens": 106680748.0, "step": 20 }, { "epoch": 0.9333333333333333, "grad_norm": 0.04677345013018686, "learning_rate": 3.8185729868125066e-05, "loss": 0.5096, "num_tokens": 112932721.0, "step": 21 }, { "epoch": 0.9777777777777777, "grad_norm": 0.04489959314534258, "learning_rate": 3.795638237006978e-05, "loss": 0.5066, "num_tokens": 119215789.0, "step": 22 } ], "logging_steps": 1, "max_steps": 115, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.260618810962739e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }