| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9990049751243781, | |
| "eval_steps": 100, | |
| "global_step": 753, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0013266998341625207, | |
| "grad_norm": 40.25, | |
| "learning_rate": 6.578947368421052e-09, | |
| "logits/chosen": -1.2802138328552246, | |
| "logits/rejected": -1.3739961385726929, | |
| "logps/chosen": -584.777587890625, | |
| "logps/rejected": -533.882080078125, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.013266998341625208, | |
| "grad_norm": 54.5, | |
| "learning_rate": 6.578947368421052e-08, | |
| "logits/chosen": -1.1525533199310303, | |
| "logits/rejected": -1.1556764841079712, | |
| "logps/chosen": -577.8804321289062, | |
| "logps/rejected": -498.16986083984375, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.4930555522441864, | |
| "rewards/chosen": 0.00347831379622221, | |
| "rewards/margins": 0.00828113965690136, | |
| "rewards/rejected": -0.004802825395017862, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026533996683250415, | |
| "grad_norm": 44.75, | |
| "learning_rate": 1.3157894736842104e-07, | |
| "logits/chosen": -1.1788235902786255, | |
| "logits/rejected": -1.2242963314056396, | |
| "logps/chosen": -611.204833984375, | |
| "logps/rejected": -543.866455078125, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.0023336124140769243, | |
| "rewards/margins": 0.003125081304460764, | |
| "rewards/rejected": -0.0007914667949080467, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03980099502487562, | |
| "grad_norm": 36.25, | |
| "learning_rate": 1.9736842105263157e-07, | |
| "logits/chosen": -1.158151388168335, | |
| "logits/rejected": -1.1601974964141846, | |
| "logps/chosen": -633.5345458984375, | |
| "logps/rejected": -536.1189575195312, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.013027493841946125, | |
| "rewards/margins": 0.015905674546957016, | |
| "rewards/rejected": -0.0028781811706721783, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05306799336650083, | |
| "grad_norm": 42.5, | |
| "learning_rate": 2.631578947368421e-07, | |
| "logits/chosen": -1.19637930393219, | |
| "logits/rejected": -1.22651207447052, | |
| "logps/chosen": -540.5403442382812, | |
| "logps/rejected": -552.4425048828125, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.0009165151277557015, | |
| "rewards/margins": 0.011455372907221317, | |
| "rewards/rejected": -0.01053885743021965, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06633499170812604, | |
| "grad_norm": 48.25, | |
| "learning_rate": 3.2894736842105264e-07, | |
| "logits/chosen": -1.198271632194519, | |
| "logits/rejected": -1.2518165111541748, | |
| "logps/chosen": -579.4686279296875, | |
| "logps/rejected": -571.4375, | |
| "loss": 0.6818, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.005964647978544235, | |
| "rewards/margins": 0.019765758886933327, | |
| "rewards/rejected": -0.013801109977066517, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07960199004975124, | |
| "grad_norm": 39.25, | |
| "learning_rate": 3.9473684210526315e-07, | |
| "logits/chosen": -1.1785616874694824, | |
| "logits/rejected": -1.226552963256836, | |
| "logps/chosen": -595.6961059570312, | |
| "logps/rejected": -572.9510498046875, | |
| "loss": 0.6691, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 0.022173848003149033, | |
| "rewards/margins": 0.06000928208231926, | |
| "rewards/rejected": -0.03783543407917023, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09286898839137644, | |
| "grad_norm": 44.5, | |
| "learning_rate": 4.6052631578947365e-07, | |
| "logits/chosen": -1.1982749700546265, | |
| "logits/rejected": -1.236537218093872, | |
| "logps/chosen": -634.6461181640625, | |
| "logps/rejected": -673.8761596679688, | |
| "loss": 0.6543, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": 0.02825814113020897, | |
| "rewards/margins": 0.08534505218267441, | |
| "rewards/rejected": -0.05708691477775574, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10613598673300166, | |
| "grad_norm": 36.75, | |
| "learning_rate": 4.999569334646955e-07, | |
| "logits/chosen": -1.0726072788238525, | |
| "logits/rejected": -1.1171576976776123, | |
| "logps/chosen": -614.9038696289062, | |
| "logps/rejected": -572.2459716796875, | |
| "loss": 0.638, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.04197516664862633, | |
| "rewards/margins": 0.1271333396434784, | |
| "rewards/rejected": -0.08515818417072296, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11940298507462686, | |
| "grad_norm": 38.25, | |
| "learning_rate": 4.994726053293702e-07, | |
| "logits/chosen": -1.1955012083053589, | |
| "logits/rejected": -1.2350232601165771, | |
| "logps/chosen": -590.61376953125, | |
| "logps/rejected": -564.5113525390625, | |
| "loss": 0.6237, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 0.031416941434144974, | |
| "rewards/margins": 0.15016348659992218, | |
| "rewards/rejected": -0.1187465563416481, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13266998341625208, | |
| "grad_norm": 39.0, | |
| "learning_rate": 4.984511621268102e-07, | |
| "logits/chosen": -1.162690281867981, | |
| "logits/rejected": -1.1824209690093994, | |
| "logps/chosen": -573.8049926757812, | |
| "logps/rejected": -528.6422119140625, | |
| "loss": 0.5984, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.038635846227407455, | |
| "rewards/margins": 0.2009139508008957, | |
| "rewards/rejected": -0.16227811574935913, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13266998341625208, | |
| "eval_logits/chosen": -1.139477252960205, | |
| "eval_logits/rejected": -1.160577416419983, | |
| "eval_logps/chosen": -601.4292602539062, | |
| "eval_logps/rejected": -539.8974609375, | |
| "eval_loss": 0.5903807878494263, | |
| "eval_rewards/accuracies": 0.9029850959777832, | |
| "eval_rewards/chosen": 0.05485348403453827, | |
| "eval_rewards/margins": 0.22831708192825317, | |
| "eval_rewards/rejected": -0.1734635829925537, | |
| "eval_runtime": 685.2004, | |
| "eval_samples_per_second": 7.821, | |
| "eval_steps_per_second": 0.489, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14593698175787728, | |
| "grad_norm": 33.5, | |
| "learning_rate": 4.968948030264742e-07, | |
| "logits/chosen": -1.1363273859024048, | |
| "logits/rejected": -1.1610157489776611, | |
| "logps/chosen": -592.44482421875, | |
| "logps/rejected": -573.2866821289062, | |
| "loss": 0.582, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 0.07824783772230148, | |
| "rewards/margins": 0.2655600905418396, | |
| "rewards/rejected": -0.1873122602701187, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15920398009950248, | |
| "grad_norm": 33.25, | |
| "learning_rate": 4.948068788729238e-07, | |
| "logits/chosen": -1.1630527973175049, | |
| "logits/rejected": -1.202096939086914, | |
| "logps/chosen": -579.3135375976562, | |
| "logps/rejected": -578.4791259765625, | |
| "loss": 0.5691, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.07454140484333038, | |
| "rewards/margins": 0.28431838750839233, | |
| "rewards/rejected": -0.20977696776390076, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1724709784411277, | |
| "grad_norm": 35.25, | |
| "learning_rate": 4.921918849714475e-07, | |
| "logits/chosen": -1.185011625289917, | |
| "logits/rejected": -1.1927728652954102, | |
| "logps/chosen": -621.1232299804688, | |
| "logps/rejected": -598.525634765625, | |
| "loss": 0.5436, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 0.07992725074291229, | |
| "rewards/margins": 0.3480888307094574, | |
| "rewards/rejected": -0.2681615948677063, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1857379767827529, | |
| "grad_norm": 31.375, | |
| "learning_rate": 4.890554514096591e-07, | |
| "logits/chosen": -1.1601734161376953, | |
| "logits/rejected": -1.1828594207763672, | |
| "logps/chosen": -589.795654296875, | |
| "logps/rejected": -544.245849609375, | |
| "loss": 0.5303, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.06376471370458603, | |
| "rewards/margins": 0.3611634075641632, | |
| "rewards/rejected": -0.29739871621131897, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19900497512437812, | |
| "grad_norm": 28.25, | |
| "learning_rate": 4.854043309359063e-07, | |
| "logits/chosen": -1.2025436162948608, | |
| "logits/rejected": -1.2480312585830688, | |
| "logps/chosen": -550.0576782226562, | |
| "logps/rejected": -483.212646484375, | |
| "loss": 0.5173, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 0.08968071639537811, | |
| "rewards/margins": 0.39301761984825134, | |
| "rewards/rejected": -0.30333688855171204, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.21227197346600332, | |
| "grad_norm": 28.125, | |
| "learning_rate": 4.812463844205884e-07, | |
| "logits/chosen": -1.1907384395599365, | |
| "logits/rejected": -1.218056321144104, | |
| "logps/chosen": -612.6536865234375, | |
| "logps/rejected": -552.7808837890625, | |
| "loss": 0.5032, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.12153647094964981, | |
| "rewards/margins": 0.4728039801120758, | |
| "rewards/rejected": -0.3512675166130066, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.22553897180762852, | |
| "grad_norm": 27.125, | |
| "learning_rate": 4.7659056393168604e-07, | |
| "logits/chosen": -1.2418904304504395, | |
| "logits/rejected": -1.3286497592926025, | |
| "logps/chosen": -532.3972778320312, | |
| "logps/rejected": -481.52459716796875, | |
| "loss": 0.5031, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 0.10282758623361588, | |
| "rewards/margins": 0.44313424825668335, | |
| "rewards/rejected": -0.3403066396713257, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.23880597014925373, | |
| "grad_norm": 27.0, | |
| "learning_rate": 4.714468934609381e-07, | |
| "logits/chosen": -1.2157742977142334, | |
| "logits/rejected": -1.2327635288238525, | |
| "logps/chosen": -541.45361328125, | |
| "logps/rejected": -482.06103515625, | |
| "loss": 0.4824, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.1126769408583641, | |
| "rewards/margins": 0.5232059359550476, | |
| "rewards/rejected": -0.4105289876461029, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.25207296849087896, | |
| "grad_norm": 23.875, | |
| "learning_rate": 4.658264473421659e-07, | |
| "logits/chosen": -1.2209118604660034, | |
| "logits/rejected": -1.2742892503738403, | |
| "logps/chosen": -600.0318603515625, | |
| "logps/rejected": -533.5223388671875, | |
| "loss": 0.4874, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 0.09032230079174042, | |
| "rewards/margins": 0.5253477096557617, | |
| "rewards/rejected": -0.4350253939628601, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.26533996683250416, | |
| "grad_norm": 30.125, | |
| "learning_rate": 4.597413264082086e-07, | |
| "logits/chosen": -1.2209361791610718, | |
| "logits/rejected": -1.295668125152588, | |
| "logps/chosen": -573.05859375, | |
| "logps/rejected": -572.3590087890625, | |
| "loss": 0.4622, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.08147934824228287, | |
| "rewards/margins": 0.5580233931541443, | |
| "rewards/rejected": -0.4765440821647644, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.26533996683250416, | |
| "eval_logits/chosen": -1.2179902791976929, | |
| "eval_logits/rejected": -1.271428108215332, | |
| "eval_logps/chosen": -600.8441162109375, | |
| "eval_logps/rejected": -543.1426391601562, | |
| "eval_loss": 0.45805710554122925, | |
| "eval_rewards/accuracies": 0.9350746273994446, | |
| "eval_rewards/chosen": 0.11337064951658249, | |
| "eval_rewards/margins": 0.6113449931144714, | |
| "eval_rewards/rejected": -0.49797430634498596, | |
| "eval_runtime": 676.1331, | |
| "eval_samples_per_second": 7.926, | |
| "eval_steps_per_second": 0.495, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.27860696517412936, | |
| "grad_norm": 25.25, | |
| "learning_rate": 4.5320463193780256e-07, | |
| "logits/chosen": -1.1879713535308838, | |
| "logits/rejected": -1.234440565109253, | |
| "logps/chosen": -570.4061279296875, | |
| "logps/rejected": -540.0577392578125, | |
| "loss": 0.4485, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 0.1254458725452423, | |
| "rewards/margins": 0.5999458432197571, | |
| "rewards/rejected": -0.4744999408721924, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.29187396351575456, | |
| "grad_norm": 22.625, | |
| "learning_rate": 4.4623043744850044e-07, | |
| "logits/chosen": -1.1740987300872803, | |
| "logits/rejected": -1.2075783014297485, | |
| "logps/chosen": -588.6426391601562, | |
| "logps/rejected": -534.1394653320312, | |
| "loss": 0.4551, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.11971279233694077, | |
| "rewards/margins": 0.642697811126709, | |
| "rewards/rejected": -0.5229849815368652, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.30514096185737977, | |
| "grad_norm": 24.875, | |
| "learning_rate": 4.388337583963563e-07, | |
| "logits/chosen": -1.1734439134597778, | |
| "logits/rejected": -1.1642463207244873, | |
| "logps/chosen": -630.2869873046875, | |
| "logps/rejected": -650.8348388671875, | |
| "loss": 0.4326, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.11086304485797882, | |
| "rewards/margins": 0.7283642888069153, | |
| "rewards/rejected": -0.6175012588500977, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.31840796019900497, | |
| "grad_norm": 28.375, | |
| "learning_rate": 4.31030519847616e-07, | |
| "logits/chosen": -1.170459508895874, | |
| "logits/rejected": -1.1728956699371338, | |
| "logps/chosen": -568.0635986328125, | |
| "logps/rejected": -495.91094970703125, | |
| "loss": 0.4293, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.09963791817426682, | |
| "rewards/margins": 0.654728889465332, | |
| "rewards/rejected": -0.5550910234451294, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.33167495854063017, | |
| "grad_norm": 22.875, | |
| "learning_rate": 4.2283752219201464e-07, | |
| "logits/chosen": -1.0885179042816162, | |
| "logits/rejected": -1.133748173713684, | |
| "logps/chosen": -556.9310913085938, | |
| "logps/rejected": -509.1935119628906, | |
| "loss": 0.4244, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.10411699116230011, | |
| "rewards/margins": 0.6922882795333862, | |
| "rewards/rejected": -0.5881712436676025, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3449419568822554, | |
| "grad_norm": 24.75, | |
| "learning_rate": 4.1427240497150047e-07, | |
| "logits/chosen": -1.132869839668274, | |
| "logits/rejected": -1.1097866296768188, | |
| "logps/chosen": -604.4608154296875, | |
| "logps/rejected": -561.1189575195312, | |
| "loss": 0.4112, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.10400505363941193, | |
| "rewards/margins": 0.708010196685791, | |
| "rewards/rejected": -0.6040050983428955, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3582089552238806, | |
| "grad_norm": 27.125, | |
| "learning_rate": 4.053536089022623e-07, | |
| "logits/chosen": -1.1613821983337402, | |
| "logits/rejected": -1.195441722869873, | |
| "logps/chosen": -502.41607666015625, | |
| "logps/rejected": -479.5301208496094, | |
| "loss": 0.4202, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.11941847950220108, | |
| "rewards/margins": 0.7337124347686768, | |
| "rewards/rejected": -0.6142939329147339, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3714759535655058, | |
| "grad_norm": 27.625, | |
| "learning_rate": 3.9610033617182715e-07, | |
| "logits/chosen": -1.1407119035720825, | |
| "logits/rejected": -1.1306835412979126, | |
| "logps/chosen": -632.7389526367188, | |
| "logps/rejected": -621.2080078125, | |
| "loss": 0.4085, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.15029093623161316, | |
| "rewards/margins": 0.8855623006820679, | |
| "rewards/rejected": -0.7352713346481323, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.38474295190713104, | |
| "grad_norm": 20.75, | |
| "learning_rate": 3.865325090967081e-07, | |
| "logits/chosen": -1.1717865467071533, | |
| "logits/rejected": -1.1872893571853638, | |
| "logps/chosen": -561.0062255859375, | |
| "logps/rejected": -548.43603515625, | |
| "loss": 0.4078, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.11881165206432343, | |
| "rewards/margins": 0.7872866988182068, | |
| "rewards/rejected": -0.6684750318527222, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.39800995024875624, | |
| "grad_norm": 23.25, | |
| "learning_rate": 3.7667072722961357e-07, | |
| "logits/chosen": -1.1743600368499756, | |
| "logits/rejected": -1.1946338415145874, | |
| "logps/chosen": -590.4188232421875, | |
| "logps/rejected": -515.8514404296875, | |
| "loss": 0.3934, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.14561176300048828, | |
| "rewards/margins": 0.7691020369529724, | |
| "rewards/rejected": -0.6234902739524841, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39800995024875624, | |
| "eval_logits/chosen": -1.0754693746566772, | |
| "eval_logits/rejected": -1.0528287887573242, | |
| "eval_logps/chosen": -600.71435546875, | |
| "eval_logps/rejected": -545.3746948242188, | |
| "eval_loss": 0.39592820405960083, | |
| "eval_rewards/accuracies": 0.9365671873092651, | |
| "eval_rewards/chosen": 0.12633956968784332, | |
| "eval_rewards/margins": 0.8475195169448853, | |
| "eval_rewards/rejected": -0.7211799621582031, | |
| "eval_runtime": 694.2423, | |
| "eval_samples_per_second": 7.719, | |
| "eval_steps_per_second": 0.483, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.41127694859038144, | |
| "grad_norm": 22.375, | |
| "learning_rate": 3.6653622300856457e-07, | |
| "logits/chosen": -1.154953956604004, | |
| "logits/rejected": -1.1766210794448853, | |
| "logps/chosen": -573.0366821289062, | |
| "logps/rejected": -538.422119140625, | |
| "loss": 0.3901, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.10485813766717911, | |
| "rewards/margins": 0.8158906698226929, | |
| "rewards/rejected": -0.711032509803772, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.42454394693200664, | |
| "grad_norm": 22.0, | |
| "learning_rate": 3.5615081604340903e-07, | |
| "logits/chosen": -1.196800708770752, | |
| "logits/rejected": -1.248241662979126, | |
| "logps/chosen": -629.1947631835938, | |
| "logps/rejected": -599.6900634765625, | |
| "loss": 0.3898, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.13255654275417328, | |
| "rewards/margins": 0.8788650631904602, | |
| "rewards/rejected": -0.7463085055351257, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.43781094527363185, | |
| "grad_norm": 22.125, | |
| "learning_rate": 3.455368661381543e-07, | |
| "logits/chosen": -1.1678781509399414, | |
| "logits/rejected": -1.178554892539978, | |
| "logps/chosen": -493.3885803222656, | |
| "logps/rejected": -474.68402099609375, | |
| "loss": 0.3899, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.07870842516422272, | |
| "rewards/margins": 0.7927115559577942, | |
| "rewards/rejected": -0.7140030860900879, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.45107794361525705, | |
| "grad_norm": 22.625, | |
| "learning_rate": 3.347172251502598e-07, | |
| "logits/chosen": -1.1612517833709717, | |
| "logits/rejected": -1.1822433471679688, | |
| "logps/chosen": -608.53271484375, | |
| "logps/rejected": -532.9712524414062, | |
| "loss": 0.3763, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.12583430111408234, | |
| "rewards/margins": 0.9212196469306946, | |
| "rewards/rejected": -0.7953853607177734, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.46434494195688225, | |
| "grad_norm": 21.25, | |
| "learning_rate": 3.2371518779053744e-07, | |
| "logits/chosen": -1.101665735244751, | |
| "logits/rejected": -1.0791598558425903, | |
| "logps/chosen": -651.3994750976562, | |
| "logps/rejected": -633.3742065429688, | |
| "loss": 0.3678, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 0.12759700417518616, | |
| "rewards/margins": 0.9944127798080444, | |
| "rewards/rejected": -0.8668158650398254, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.47761194029850745, | |
| "grad_norm": 24.75, | |
| "learning_rate": 3.1255444146958844e-07, | |
| "logits/chosen": -1.1323697566986084, | |
| "logits/rejected": -1.118276596069336, | |
| "logps/chosen": -565.9261474609375, | |
| "logps/rejected": -552.2655639648438, | |
| "loss": 0.3684, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.06775705516338348, | |
| "rewards/margins": 0.8904596567153931, | |
| "rewards/rejected": -0.8227025270462036, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.49087893864013266, | |
| "grad_norm": 20.875, | |
| "learning_rate": 3.012590152987561e-07, | |
| "logits/chosen": -1.1285905838012695, | |
| "logits/rejected": -1.1064956188201904, | |
| "logps/chosen": -601.677734375, | |
| "logps/rejected": -547.2380981445312, | |
| "loss": 0.3663, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.12407927215099335, | |
| "rewards/margins": 0.9496960639953613, | |
| "rewards/rejected": -0.825616717338562, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5041459369817579, | |
| "grad_norm": 21.125, | |
| "learning_rate": 2.8985322835539626e-07, | |
| "logits/chosen": -1.0900777578353882, | |
| "logits/rejected": -1.0679134130477905, | |
| "logps/chosen": -622.616455078125, | |
| "logps/rejected": -570.1490478515625, | |
| "loss": 0.3705, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.15115661919116974, | |
| "rewards/margins": 0.9384227991104126, | |
| "rewards/rejected": -0.7872661352157593, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5174129353233831, | |
| "grad_norm": 21.375, | |
| "learning_rate": 2.7836163732385063e-07, | |
| "logits/chosen": -1.146226167678833, | |
| "logits/rejected": -1.131203293800354, | |
| "logps/chosen": -637.7056274414062, | |
| "logps/rejected": -580.0550537109375, | |
| "loss": 0.3568, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.09901280701160431, | |
| "rewards/margins": 0.9571696519851685, | |
| "rewards/rejected": -0.8581568598747253, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5306799336650083, | |
| "grad_norm": 21.125, | |
| "learning_rate": 2.6680898362485124e-07, | |
| "logits/chosen": -1.0712168216705322, | |
| "logits/rejected": -1.0648881196975708, | |
| "logps/chosen": -538.2913208007812, | |
| "logps/rejected": -526.31689453125, | |
| "loss": 0.3629, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.10081170499324799, | |
| "rewards/margins": 0.889560341835022, | |
| "rewards/rejected": -0.788748562335968, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5306799336650083, | |
| "eval_logits/chosen": -1.1154277324676514, | |
| "eval_logits/rejected": -1.110862135887146, | |
| "eval_logps/chosen": -600.8080444335938, | |
| "eval_logps/rejected": -546.7705078125, | |
| "eval_loss": 0.3673515021800995, | |
| "eval_rewards/accuracies": 0.9380596876144409, | |
| "eval_rewards/chosen": 0.11697468906641006, | |
| "eval_rewards/margins": 0.9777337312698364, | |
| "eval_rewards/rejected": -0.8607590198516846, | |
| "eval_runtime": 712.8834, | |
| "eval_samples_per_second": 7.517, | |
| "eval_steps_per_second": 0.47, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5439469320066335, | |
| "grad_norm": 19.625, | |
| "learning_rate": 2.5522014014718697e-07, | |
| "logits/chosen": -1.0688056945800781, | |
| "logits/rejected": -1.0452687740325928, | |
| "logps/chosen": -548.5653076171875, | |
| "logps/rejected": -500.2899475097656, | |
| "loss": 0.3686, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.09097462147474289, | |
| "rewards/margins": 0.8869258761405945, | |
| "rewards/rejected": -0.7959513664245605, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5572139303482587, | |
| "grad_norm": 20.75, | |
| "learning_rate": 2.436200576963198e-07, | |
| "logits/chosen": -1.1284773349761963, | |
| "logits/rejected": -1.0750479698181152, | |
| "logps/chosen": -570.462890625, | |
| "logps/rejected": -484.501708984375, | |
| "loss": 0.3638, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.1417674571275711, | |
| "rewards/margins": 0.9661204218864441, | |
| "rewards/rejected": -0.8243529200553894, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5704809286898839, | |
| "grad_norm": 26.625, | |
| "learning_rate": 2.3203371127524588e-07, | |
| "logits/chosen": -1.142064094543457, | |
| "logits/rejected": -1.0941470861434937, | |
| "logps/chosen": -540.7872314453125, | |
| "logps/rejected": -460.7454528808594, | |
| "loss": 0.3751, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.09556931257247925, | |
| "rewards/margins": 0.9223111271858215, | |
| "rewards/rejected": -0.8267418742179871, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5837479270315091, | |
| "grad_norm": 21.75, | |
| "learning_rate": 2.2048604631325892e-07, | |
| "logits/chosen": -1.0380522012710571, | |
| "logits/rejected": -1.036592721939087, | |
| "logps/chosen": -568.5135498046875, | |
| "logps/rejected": -558.4591064453125, | |
| "loss": 0.3722, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.05709138512611389, | |
| "rewards/margins": 0.89503014087677, | |
| "rewards/rejected": -0.837938666343689, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 20.5, | |
| "learning_rate": 2.0900192495838615e-07, | |
| "logits/chosen": -1.0975573062896729, | |
| "logits/rejected": -1.0408273935317993, | |
| "logps/chosen": -531.0595703125, | |
| "logps/rejected": -478.84222412109375, | |
| "loss": 0.3591, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.07921108603477478, | |
| "rewards/margins": 0.9134254455566406, | |
| "rewards/rejected": -0.8342143893241882, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6102819237147595, | |
| "grad_norm": 18.5, | |
| "learning_rate": 1.9760607254912926e-07, | |
| "logits/chosen": -1.0876163244247437, | |
| "logits/rejected": -1.0495961904525757, | |
| "logps/chosen": -616.9581298828125, | |
| "logps/rejected": -538.6895751953125, | |
| "loss": 0.3562, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.09119832515716553, | |
| "rewards/margins": 0.9315482974052429, | |
| "rewards/rejected": -0.8403499722480774, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6235489220563848, | |
| "grad_norm": 20.75, | |
| "learning_rate": 1.8632302438075613e-07, | |
| "logits/chosen": -1.1088566780090332, | |
| "logits/rejected": -1.1191766262054443, | |
| "logps/chosen": -589.3919067382812, | |
| "logps/rejected": -567.9808349609375, | |
| "loss": 0.3629, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.1344141662120819, | |
| "rewards/margins": 1.0434377193450928, | |
| "rewards/rejected": -0.9090234637260437, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6368159203980099, | |
| "grad_norm": 18.625, | |
| "learning_rate": 1.7517707288075614e-07, | |
| "logits/chosen": -1.106209635734558, | |
| "logits/rejected": -1.1109434366226196, | |
| "logps/chosen": -565.6036987304688, | |
| "logps/rejected": -519.3636474609375, | |
| "loss": 0.3464, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.09805373102426529, | |
| "rewards/margins": 0.9947841763496399, | |
| "rewards/rejected": -0.8967304229736328, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6500829187396352, | |
| "grad_norm": 20.5, | |
| "learning_rate": 1.641922153071906e-07, | |
| "logits/chosen": -1.0548484325408936, | |
| "logits/rejected": -1.0250844955444336, | |
| "logps/chosen": -573.2247924804688, | |
| "logps/rejected": -566.44970703125, | |
| "loss": 0.365, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.09521574527025223, | |
| "rewards/margins": 1.005110740661621, | |
| "rewards/rejected": -0.9098949432373047, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6633499170812603, | |
| "grad_norm": 19.125, | |
| "learning_rate": 1.5339210208254344e-07, | |
| "logits/chosen": -1.060248613357544, | |
| "logits/rejected": -1.0458314418792725, | |
| "logps/chosen": -541.8770751953125, | |
| "logps/rejected": -513.5958251953125, | |
| "loss": 0.3556, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.11755643784999847, | |
| "rewards/margins": 0.9871570467948914, | |
| "rewards/rejected": -0.8696004748344421, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6633499170812603, | |
| "eval_logits/chosen": -0.9289145469665527, | |
| "eval_logits/rejected": -0.8265557885169983, | |
| "eval_logps/chosen": -600.8418579101562, | |
| "eval_logps/rejected": -547.3089599609375, | |
| "eval_loss": 0.3561394512653351, | |
| "eval_rewards/accuracies": 0.9388059973716736, | |
| "eval_rewards/chosen": 0.11359576135873795, | |
| "eval_rewards/margins": 1.0282028913497925, | |
| "eval_rewards/rejected": -0.9146071672439575, | |
| "eval_runtime": 731.2224, | |
| "eval_samples_per_second": 7.329, | |
| "eval_steps_per_second": 0.458, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6766169154228856, | |
| "grad_norm": 19.75, | |
| "learning_rate": 1.4279998587430943e-07, | |
| "logits/chosen": -1.0720138549804688, | |
| "logits/rejected": -1.0440585613250732, | |
| "logps/chosen": -576.8155517578125, | |
| "logps/rejected": -474.7138671875, | |
| "loss": 0.3397, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 0.12137231975793839, | |
| "rewards/margins": 1.0447041988372803, | |
| "rewards/rejected": -0.9233318567276001, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6898839137645107, | |
| "grad_norm": 19.375, | |
| "learning_rate": 1.324386715319503e-07, | |
| "logits/chosen": -1.0745595693588257, | |
| "logits/rejected": -1.0517549514770508, | |
| "logps/chosen": -547.5264282226562, | |
| "logps/rejected": -513.98974609375, | |
| "loss": 0.3567, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.12890011072158813, | |
| "rewards/margins": 0.9725991487503052, | |
| "rewards/rejected": -0.8436989784240723, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.703150912106136, | |
| "grad_norm": 20.125, | |
| "learning_rate": 1.2233046698800343e-07, | |
| "logits/chosen": -1.0820659399032593, | |
| "logits/rejected": -1.0528825521469116, | |
| "logps/chosen": -623.875, | |
| "logps/rejected": -623.9129638671875, | |
| "loss": 0.3541, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 0.141106516122818, | |
| "rewards/margins": 1.074299931526184, | |
| "rewards/rejected": -0.9331933856010437, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7164179104477612, | |
| "grad_norm": 20.625, | |
| "learning_rate": 1.124971352290545e-07, | |
| "logits/chosen": -1.108722448348999, | |
| "logits/rejected": -1.0873199701309204, | |
| "logps/chosen": -593.7824096679688, | |
| "logps/rejected": -559.232421875, | |
| "loss": 0.337, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.1328240931034088, | |
| "rewards/margins": 1.019162654876709, | |
| "rewards/rejected": -0.8863385915756226, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7296849087893864, | |
| "grad_norm": 21.75, | |
| "learning_rate": 1.0295984743997909e-07, | |
| "logits/chosen": -1.085311770439148, | |
| "logits/rejected": -1.0750799179077148, | |
| "logps/chosen": -602.3040161132812, | |
| "logps/rejected": -558.0685424804688, | |
| "loss": 0.3544, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.08248591423034668, | |
| "rewards/margins": 1.015809178352356, | |
| "rewards/rejected": -0.9333232641220093, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7429519071310116, | |
| "grad_norm": 20.375, | |
| "learning_rate": 9.37391374223355e-08, | |
| "logits/chosen": -1.1596343517303467, | |
| "logits/rejected": -1.1693814992904663, | |
| "logps/chosen": -583.8175659179688, | |
| "logps/rejected": -574.1356811523438, | |
| "loss": 0.3416, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.1034403070807457, | |
| "rewards/margins": 1.0356991291046143, | |
| "rewards/rejected": -0.9322587251663208, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7562189054726368, | |
| "grad_norm": 17.875, | |
| "learning_rate": 8.485485738504488e-08, | |
| "logits/chosen": -1.1387842893600464, | |
| "logits/rejected": -1.108246922492981, | |
| "logps/chosen": -627.6776123046875, | |
| "logps/rejected": -469.2587890625, | |
| "loss": 0.3468, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.1544940024614334, | |
| "rewards/margins": 1.0551023483276367, | |
| "rewards/rejected": -0.900608241558075, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7694859038142621, | |
| "grad_norm": 19.25, | |
| "learning_rate": 7.632613520254158e-08, | |
| "logits/chosen": -1.0649652481079102, | |
| "logits/rejected": -1.064888834953308, | |
| "logps/chosen": -601.84033203125, | |
| "logps/rejected": -529.4444580078125, | |
| "loss": 0.359, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 0.10679004341363907, | |
| "rewards/margins": 1.0746941566467285, | |
| "rewards/rejected": -0.9679039716720581, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7827529021558872, | |
| "grad_norm": 20.75, | |
| "learning_rate": 6.817133323241755e-08, | |
| "logits/chosen": -1.1302725076675415, | |
| "logits/rejected": -1.1106232404708862, | |
| "logps/chosen": -675.71533203125, | |
| "logps/rejected": -525.1083984375, | |
| "loss": 0.3478, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 0.09434429556131363, | |
| "rewards/margins": 1.0678186416625977, | |
| "rewards/rejected": -0.9734743237495422, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7960199004975125, | |
| "grad_norm": 22.25, | |
| "learning_rate": 6.040800878122654e-08, | |
| "logits/chosen": -1.1192970275878906, | |
| "logits/rejected": -1.1343142986297607, | |
| "logps/chosen": -567.078125, | |
| "logps/rejected": -525.439697265625, | |
| "loss": 0.3488, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.09638460725545883, | |
| "rewards/margins": 0.9892334938049316, | |
| "rewards/rejected": -0.8928488492965698, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7960199004975125, | |
| "eval_logits/chosen": -1.0876879692077637, | |
| "eval_logits/rejected": -1.067589521408081, | |
| "eval_logps/chosen": -600.8737182617188, | |
| "eval_logps/rejected": -547.4734497070312, | |
| "eval_loss": 0.35399559140205383, | |
| "eval_rewards/accuracies": 0.9410447478294373, | |
| "eval_rewards/chosen": 0.11041063815355301, | |
| "eval_rewards/margins": 1.0414601564407349, | |
| "eval_rewards/rejected": -0.9310495853424072, | |
| "eval_runtime": 680.93, | |
| "eval_samples_per_second": 7.87, | |
| "eval_steps_per_second": 0.492, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8092868988391376, | |
| "grad_norm": 20.625, | |
| "learning_rate": 5.305287630356362e-08, | |
| "logits/chosen": -1.1514161825180054, | |
| "logits/rejected": -1.1489306688308716, | |
| "logps/chosen": -603.4745483398438, | |
| "logps/rejected": -562.2152709960938, | |
| "loss": 0.3556, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.08816297352313995, | |
| "rewards/margins": 1.0151373147964478, | |
| "rewards/rejected": -0.9269744157791138, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8225538971807629, | |
| "grad_norm": 23.375, | |
| "learning_rate": 4.612177141580875e-08, | |
| "logits/chosen": -1.0614503622055054, | |
| "logits/rejected": -1.0462639331817627, | |
| "logps/chosen": -600.1735229492188, | |
| "logps/rejected": -563.9249877929688, | |
| "loss": 0.3555, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.1291816681623459, | |
| "rewards/margins": 1.0133174657821655, | |
| "rewards/rejected": -0.8841358423233032, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.835820895522388, | |
| "grad_norm": 19.25, | |
| "learning_rate": 3.962961680200927e-08, | |
| "logits/chosen": -1.154007911682129, | |
| "logits/rejected": -1.1668691635131836, | |
| "logps/chosen": -587.5554809570312, | |
| "logps/rejected": -579.425537109375, | |
| "loss": 0.3591, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": 0.1071285754442215, | |
| "rewards/margins": 1.01383376121521, | |
| "rewards/rejected": -0.9067050814628601, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8490878938640133, | |
| "grad_norm": 19.625, | |
| "learning_rate": 3.359039008530845e-08, | |
| "logits/chosen": -1.1280542612075806, | |
| "logits/rejected": -1.1074917316436768, | |
| "logps/chosen": -638.248779296875, | |
| "logps/rejected": -570.1997680664062, | |
| "loss": 0.3504, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.10942580550909042, | |
| "rewards/margins": 1.0872418880462646, | |
| "rewards/rejected": -0.9778162240982056, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8623548922056384, | |
| "grad_norm": 21.125, | |
| "learning_rate": 2.8017093734092474e-08, | |
| "logits/chosen": -1.0559157133102417, | |
| "logits/rejected": -0.9880287051200867, | |
| "logps/chosen": -617.8060302734375, | |
| "logps/rejected": -542.3763427734375, | |
| "loss": 0.3563, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.06925593316555023, | |
| "rewards/margins": 1.0108643770217896, | |
| "rewards/rejected": -0.9416083097457886, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8756218905472637, | |
| "grad_norm": 20.25, | |
| "learning_rate": 2.292172706764703e-08, | |
| "logits/chosen": -1.0475237369537354, | |
| "logits/rejected": -1.0124943256378174, | |
| "logps/chosen": -625.7453002929688, | |
| "logps/rejected": -627.2447509765625, | |
| "loss": 0.3567, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.11004464328289032, | |
| "rewards/margins": 1.0592918395996094, | |
| "rewards/rejected": -0.9492471814155579, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 19.875, | |
| "learning_rate": 1.8315260421596924e-08, | |
| "logits/chosen": -1.16936457157135, | |
| "logits/rejected": -1.1426036357879639, | |
| "logps/chosen": -555.6038818359375, | |
| "logps/rejected": -494.2486267089844, | |
| "loss": 0.3509, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.12268821895122528, | |
| "rewards/margins": 0.9934048652648926, | |
| "rewards/rejected": -0.8707167506217957, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9021558872305141, | |
| "grad_norm": 20.125, | |
| "learning_rate": 1.4207611528748997e-08, | |
| "logits/chosen": -1.122236967086792, | |
| "logits/rejected": -1.0928575992584229, | |
| "logps/chosen": -568.34765625, | |
| "logps/rejected": -548.4368896484375, | |
| "loss": 0.3548, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.10156118869781494, | |
| "rewards/margins": 1.01285719871521, | |
| "rewards/rejected": -0.9112960696220398, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9154228855721394, | |
| "grad_norm": 20.875, | |
| "learning_rate": 1.0607624166191958e-08, | |
| "logits/chosen": -1.102480411529541, | |
| "logits/rejected": -1.097570776939392, | |
| "logps/chosen": -670.6092529296875, | |
| "logps/rejected": -724.0338134765625, | |
| "loss": 0.3508, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.122245192527771, | |
| "rewards/margins": 1.0727375745773315, | |
| "rewards/rejected": -0.9504923820495605, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9286898839137645, | |
| "grad_norm": 20.75, | |
| "learning_rate": 7.523049114624647e-09, | |
| "logits/chosen": -1.067058801651001, | |
| "logits/rejected": -1.0042006969451904, | |
| "logps/chosen": -610.7342529296875, | |
| "logps/rejected": -569.3170776367188, | |
| "loss": 0.3563, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 0.13328425586223602, | |
| "rewards/margins": 1.0865224599838257, | |
| "rewards/rejected": -0.9532381296157837, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9286898839137645, | |
| "eval_logits/chosen": -0.9600119590759277, | |
| "eval_logits/rejected": -0.8735809922218323, | |
| "eval_logps/chosen": -600.8121337890625, | |
| "eval_logps/rejected": -547.42236328125, | |
| "eval_loss": 0.3540438711643219, | |
| "eval_rewards/accuracies": 0.9395522475242615, | |
| "eval_rewards/chosen": 0.11656844615936279, | |
| "eval_rewards/margins": 1.0425076484680176, | |
| "eval_rewards/rejected": -0.92593914270401, | |
| "eval_runtime": 733.9957, | |
| "eval_samples_per_second": 7.301, | |
| "eval_steps_per_second": 0.456, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9419568822553898, | |
| "grad_norm": 20.25, | |
| "learning_rate": 4.960527470908277e-09, | |
| "logits/chosen": -0.9644180536270142, | |
| "logits/rejected": -0.860200047492981, | |
| "logps/chosen": -622.1219482421875, | |
| "logps/rejected": -567.1380615234375, | |
| "loss": 0.3555, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.11110372841358185, | |
| "rewards/margins": 1.0618221759796143, | |
| "rewards/rejected": -0.9507185220718384, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.9552238805970149, | |
| "grad_norm": 19.75, | |
| "learning_rate": 2.925576349770337e-09, | |
| "logits/chosen": -0.9986553192138672, | |
| "logits/rejected": -0.8984715342521667, | |
| "logps/chosen": -605.7318725585938, | |
| "logps/rejected": -542.7632446289062, | |
| "loss": 0.359, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.10133838653564453, | |
| "rewards/margins": 0.9883183240890503, | |
| "rewards/rejected": -0.886979877948761, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9684908789386402, | |
| "grad_norm": 21.25, | |
| "learning_rate": 1.4225770054443197e-09, | |
| "logits/chosen": -0.9282068014144897, | |
| "logits/rejected": -0.8550642132759094, | |
| "logps/chosen": -571.7738037109375, | |
| "logps/rejected": -500.0634765625, | |
| "loss": 0.3571, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.10189428180456161, | |
| "rewards/margins": 1.0126664638519287, | |
| "rewards/rejected": -0.9107722043991089, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.9817578772802653, | |
| "grad_norm": 19.625, | |
| "learning_rate": 4.547653988198619e-10, | |
| "logits/chosen": -0.9236332774162292, | |
| "logits/rejected": -0.8542205095291138, | |
| "logps/chosen": -632.85546875, | |
| "logps/rejected": -597.6421508789062, | |
| "loss": 0.3493, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 0.147782564163208, | |
| "rewards/margins": 1.067484736442566, | |
| "rewards/rejected": -0.9197022318840027, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9950248756218906, | |
| "grad_norm": 20.5, | |
| "learning_rate": 2.4225230411789588e-11, | |
| "logits/chosen": -0.9963258504867554, | |
| "logits/rejected": -0.8823927044868469, | |
| "logps/chosen": -617.5396728515625, | |
| "logps/rejected": -596.9856567382812, | |
| "loss": 0.36, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.14549708366394043, | |
| "rewards/margins": 1.0541255474090576, | |
| "rewards/rejected": -0.9086285829544067, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9990049751243781, | |
| "step": 753, | |
| "total_flos": 0.0, | |
| "train_loss": 0.4291752041731856, | |
| "train_runtime": 22849.6211, | |
| "train_samples_per_second": 2.111, | |
| "train_steps_per_second": 0.033 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 753, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |