{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 50.0, "global_step": 86, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0237741456166419, "grad_norm": 3.34375, "learning_rate": 2e-05, "loss": 1.1976238489151, "step": 1, "token_acc": 0.7184428746928747 }, { "epoch": 0.1188707280832095, "grad_norm": 3.765625, "learning_rate": 0.0001, "loss": 1.2833234071731567, "step": 5, "token_acc": 0.6957437157113635 }, { "epoch": 0.237741456166419, "grad_norm": 300.0, "learning_rate": 9.906276553136923e-05, "loss": 2.3091596603393554, "step": 10, "token_acc": 0.6008980821950969 }, { "epoch": 0.35661218424962854, "grad_norm": 3.28125, "learning_rate": 9.628619846344454e-05, "loss": 1.3249249458312988, "step": 15, "token_acc": 0.7083573751573646 }, { "epoch": 0.475482912332838, "grad_norm": 2.203125, "learning_rate": 9.177439057064683e-05, "loss": 0.9449030876159668, "step": 20, "token_acc": 0.7733868243825437 }, { "epoch": 0.5943536404160475, "grad_norm": 1.4375, "learning_rate": 8.569648672789497e-05, "loss": 0.804572582244873, "step": 25, "token_acc": 0.8030009224918576 }, { "epoch": 0.7132243684992571, "grad_norm": 1.1953125, "learning_rate": 7.828034377432693e-05, "loss": 0.6343977928161622, "step": 30, "token_acc": 0.8435477802859292 }, { "epoch": 0.8320950965824666, "grad_norm": 1.0546875, "learning_rate": 6.980398830195785e-05, "loss": 0.3958749771118164, "step": 35, "token_acc": 0.8998997800202961 }, { "epoch": 0.950965824665676, "grad_norm": 0.74609375, "learning_rate": 6.058519361147055e-05, "loss": 0.38391425609588625, "step": 40, "token_acc": 0.9056173526140155 }, { "epoch": 1.0475482912332839, "grad_norm": 0.82421875, "learning_rate": 5.096956658859122e-05, "loss": 0.29045734405517576, "step": 45, "token_acc": 0.9264699310283186 }, { "epoch": 1.1664190193164934, "grad_norm": 0.87109375, "learning_rate": 4.131759111665349e-05, "loss": 0.13265597820281982, "step": 50, "token_acc": 0.9676710929519918 }, { "epoch": 1.2852897473997027, "grad_norm": 0.48046875, "learning_rate": 3.199111375976449e-05, "loss": 0.11603926420211792, "step": 55, "token_acc": 0.9712261616975557 }, { "epoch": 1.4041604754829122, "grad_norm": 0.5703125, "learning_rate": 2.333977835991545e-05, "loss": 0.09179887771606446, "step": 60, "token_acc": 0.9757825780724563 }, { "epoch": 1.5230312035661218, "grad_norm": 0.59765625, "learning_rate": 1.5687918106563326e-05, "loss": 0.11050317287445069, "step": 65, "token_acc": 0.9738332162768583 }, { "epoch": 1.6419019316493313, "grad_norm": 0.69140625, "learning_rate": 9.322396486851626e-06, "loss": 0.0894925355911255, "step": 70, "token_acc": 0.9766682632919051 }, { "epoch": 1.7607726597325408, "grad_norm": 0.58984375, "learning_rate": 4.4818529516926726e-06, "loss": 0.0985899806022644, "step": 75, "token_acc": 0.9736953912894402 }, { "epoch": 1.8796433878157504, "grad_norm": 0.6640625, "learning_rate": 1.3477564710088098e-06, "loss": 0.11772974729537963, "step": 80, "token_acc": 0.970642493007756 }, { "epoch": 1.9985141158989599, "grad_norm": 0.72265625, "learning_rate": 3.760237478849793e-08, "loss": 0.10874439477920532, "step": 85, "token_acc": 0.9721623048495361 } ], "logging_steps": 5, "max_steps": 86, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3391554666248192e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }