{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 51093, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005871645822323997, "grad_norm": 11.852810859680176, "learning_rate": 1.9569471624266147e-08, "loss": 1.3568, "step": 10 }, { "epoch": 0.0011743291644647994, "grad_norm": 9.351451873779297, "learning_rate": 3.9138943248532294e-08, "loss": 1.1198, "step": 20 }, { "epoch": 0.0017614937466971991, "grad_norm": 11.812681198120117, "learning_rate": 5.870841487279844e-08, "loss": 1.3078, "step": 30 }, { "epoch": 0.002348658328929599, "grad_norm": 21.447154998779297, "learning_rate": 7.827788649706459e-08, "loss": 1.3053, "step": 40 }, { "epoch": 0.0029358229111619985, "grad_norm": 9.820186614990234, "learning_rate": 9.784735812133072e-08, "loss": 1.1172, "step": 50 }, { "epoch": 0.0035229874933943982, "grad_norm": 5.767258167266846, "learning_rate": 1.1741682974559687e-07, "loss": 1.296, "step": 60 }, { "epoch": 0.004110152075626798, "grad_norm": 17.715818405151367, "learning_rate": 1.36986301369863e-07, "loss": 0.996, "step": 70 }, { "epoch": 0.004697316657859198, "grad_norm": 7.453824520111084, "learning_rate": 1.5655577299412917e-07, "loss": 1.2415, "step": 80 }, { "epoch": 0.005284481240091597, "grad_norm": 10.603113174438477, "learning_rate": 1.761252446183953e-07, "loss": 1.0423, "step": 90 }, { "epoch": 0.005871645822323997, "grad_norm": 6.129235744476318, "learning_rate": 1.9569471624266145e-07, "loss": 1.2711, "step": 100 }, { "epoch": 0.006458810404556397, "grad_norm": 5.113358020782471, "learning_rate": 2.152641878669276e-07, "loss": 1.289, "step": 110 }, { "epoch": 0.0070459749867887965, "grad_norm": 25.557531356811523, "learning_rate": 2.3483365949119375e-07, "loss": 1.3975, "step": 120 }, { "epoch": 0.007633139569021197, "grad_norm": 8.283552169799805, "learning_rate": 2.544031311154599e-07, "loss": 1.058, "step": 130 }, { "epoch": 0.008220304151253596, "grad_norm": 6.104159832000732, "learning_rate": 2.73972602739726e-07, "loss": 1.2815, "step": 140 }, { "epoch": 0.008807468733485996, "grad_norm": 8.834199905395508, "learning_rate": 2.9354207436399216e-07, "loss": 1.3457, "step": 150 }, { "epoch": 0.009394633315718395, "grad_norm": 13.238929748535156, "learning_rate": 3.1311154598825835e-07, "loss": 1.2357, "step": 160 }, { "epoch": 0.009981797897950796, "grad_norm": 5.2566447257995605, "learning_rate": 3.326810176125245e-07, "loss": 1.2521, "step": 170 }, { "epoch": 0.010568962480183195, "grad_norm": 9.053670883178711, "learning_rate": 3.522504892367906e-07, "loss": 1.0687, "step": 180 }, { "epoch": 0.011156127062415595, "grad_norm": 4.995806694030762, "learning_rate": 3.7181996086105676e-07, "loss": 1.2706, "step": 190 }, { "epoch": 0.011743291644647994, "grad_norm": 6.305687427520752, "learning_rate": 3.913894324853229e-07, "loss": 0.9625, "step": 200 }, { "epoch": 0.012330456226880395, "grad_norm": 4.994568824768066, "learning_rate": 4.1095890410958903e-07, "loss": 0.9998, "step": 210 }, { "epoch": 0.012917620809112794, "grad_norm": 7.02569055557251, "learning_rate": 4.305283757338552e-07, "loss": 1.4161, "step": 220 }, { "epoch": 0.013504785391345194, "grad_norm": 6.936280727386475, "learning_rate": 4.5009784735812136e-07, "loss": 0.9161, "step": 230 }, { "epoch": 0.014091949973577593, "grad_norm": 12.678434371948242, "learning_rate": 4.696673189823875e-07, "loss": 1.1194, "step": 240 }, { "epoch": 0.014679114555809994, "grad_norm": 8.697948455810547, "learning_rate": 4.892367906066536e-07, "loss": 1.1259, "step": 250 }, { "epoch": 0.015266279138042394, "grad_norm": 17.974035263061523, "learning_rate": 5.088062622309198e-07, "loss": 0.958, "step": 260 }, { "epoch": 0.015853443720274793, "grad_norm": 4.115818023681641, "learning_rate": 5.283757338551859e-07, "loss": 0.9879, "step": 270 }, { "epoch": 0.016440608302507192, "grad_norm": 10.122729301452637, "learning_rate": 5.47945205479452e-07, "loss": 1.0335, "step": 280 }, { "epoch": 0.017027772884739594, "grad_norm": 5.268524169921875, "learning_rate": 5.675146771037182e-07, "loss": 1.0711, "step": 290 }, { "epoch": 0.017614937466971993, "grad_norm": 8.405926704406738, "learning_rate": 5.870841487279843e-07, "loss": 1.1831, "step": 300 }, { "epoch": 0.018202102049204392, "grad_norm": 2.7645773887634277, "learning_rate": 6.066536203522505e-07, "loss": 1.0622, "step": 310 }, { "epoch": 0.01878926663143679, "grad_norm": 2.216038703918457, "learning_rate": 6.262230919765167e-07, "loss": 1.0094, "step": 320 }, { "epoch": 0.019376431213669193, "grad_norm": 2.966169595718384, "learning_rate": 6.457925636007828e-07, "loss": 1.0211, "step": 330 }, { "epoch": 0.019963595795901592, "grad_norm": 6.286742687225342, "learning_rate": 6.65362035225049e-07, "loss": 1.047, "step": 340 }, { "epoch": 0.02055076037813399, "grad_norm": 3.261387348175049, "learning_rate": 6.849315068493151e-07, "loss": 0.993, "step": 350 }, { "epoch": 0.02113792496036639, "grad_norm": 11.515077590942383, "learning_rate": 7.045009784735812e-07, "loss": 1.1738, "step": 360 }, { "epoch": 0.021725089542598792, "grad_norm": 7.518805980682373, "learning_rate": 7.240704500978474e-07, "loss": 1.1581, "step": 370 }, { "epoch": 0.02231225412483119, "grad_norm": 7.710671901702881, "learning_rate": 7.436399217221135e-07, "loss": 1.1568, "step": 380 }, { "epoch": 0.02289941870706359, "grad_norm": 6.162614822387695, "learning_rate": 7.632093933463797e-07, "loss": 1.0455, "step": 390 }, { "epoch": 0.02348658328929599, "grad_norm": 6.587527751922607, "learning_rate": 7.827788649706458e-07, "loss": 0.7843, "step": 400 }, { "epoch": 0.02407374787152839, "grad_norm": 4.370530605316162, "learning_rate": 8.023483365949119e-07, "loss": 0.9784, "step": 410 }, { "epoch": 0.02466091245376079, "grad_norm": 9.330163955688477, "learning_rate": 8.219178082191781e-07, "loss": 0.9635, "step": 420 }, { "epoch": 0.02524807703599319, "grad_norm": 7.629013538360596, "learning_rate": 8.414872798434442e-07, "loss": 0.9926, "step": 430 }, { "epoch": 0.025835241618225587, "grad_norm": 6.095184803009033, "learning_rate": 8.610567514677104e-07, "loss": 0.9074, "step": 440 }, { "epoch": 0.02642240620045799, "grad_norm": 4.830789089202881, "learning_rate": 8.806262230919766e-07, "loss": 1.0836, "step": 450 }, { "epoch": 0.02700957078269039, "grad_norm": 11.666213989257812, "learning_rate": 9.001956947162427e-07, "loss": 0.969, "step": 460 }, { "epoch": 0.027596735364922787, "grad_norm": 2.2569453716278076, "learning_rate": 9.197651663405089e-07, "loss": 0.8344, "step": 470 }, { "epoch": 0.028183899947155186, "grad_norm": 4.119716644287109, "learning_rate": 9.39334637964775e-07, "loss": 0.852, "step": 480 }, { "epoch": 0.02877106452938759, "grad_norm": 13.943022727966309, "learning_rate": 9.589041095890411e-07, "loss": 1.1291, "step": 490 }, { "epoch": 0.029358229111619987, "grad_norm": 5.95318603515625, "learning_rate": 9.784735812133073e-07, "loss": 1.1357, "step": 500 }, { "epoch": 0.029945393693852386, "grad_norm": 5.031583309173584, "learning_rate": 9.980430528375734e-07, "loss": 0.9007, "step": 510 }, { "epoch": 0.03053255827608479, "grad_norm": 3.915508985519409, "learning_rate": 1.0176125244618395e-06, "loss": 0.9926, "step": 520 }, { "epoch": 0.031119722858317187, "grad_norm": 5.9026384353637695, "learning_rate": 1.0371819960861057e-06, "loss": 1.079, "step": 530 }, { "epoch": 0.031706887440549586, "grad_norm": 4.613630771636963, "learning_rate": 1.0567514677103718e-06, "loss": 0.9292, "step": 540 }, { "epoch": 0.03229405202278199, "grad_norm": 9.442523002624512, "learning_rate": 1.076320939334638e-06, "loss": 1.0382, "step": 550 }, { "epoch": 0.032881216605014384, "grad_norm": 2.592625617980957, "learning_rate": 1.095890410958904e-06, "loss": 1.0158, "step": 560 }, { "epoch": 0.033468381187246786, "grad_norm": 9.926965713500977, "learning_rate": 1.1154598825831702e-06, "loss": 0.7136, "step": 570 }, { "epoch": 0.03405554576947919, "grad_norm": 6.529898166656494, "learning_rate": 1.1350293542074364e-06, "loss": 1.1512, "step": 580 }, { "epoch": 0.034642710351711584, "grad_norm": 5.547245502471924, "learning_rate": 1.1545988258317025e-06, "loss": 1.0364, "step": 590 }, { "epoch": 0.035229874933943986, "grad_norm": 3.5290074348449707, "learning_rate": 1.1741682974559686e-06, "loss": 1.0897, "step": 600 }, { "epoch": 0.03581703951617638, "grad_norm": 8.627989768981934, "learning_rate": 1.1937377690802348e-06, "loss": 1.1008, "step": 610 }, { "epoch": 0.036404204098408784, "grad_norm": 6.897231578826904, "learning_rate": 1.213307240704501e-06, "loss": 0.8648, "step": 620 }, { "epoch": 0.036991368680641186, "grad_norm": 11.020597457885742, "learning_rate": 1.2328767123287673e-06, "loss": 0.7562, "step": 630 }, { "epoch": 0.03757853326287358, "grad_norm": 7.8218278884887695, "learning_rate": 1.2524461839530334e-06, "loss": 0.9004, "step": 640 }, { "epoch": 0.038165697845105984, "grad_norm": 3.0688769817352295, "learning_rate": 1.2720156555772995e-06, "loss": 0.9283, "step": 650 }, { "epoch": 0.038752862427338386, "grad_norm": 14.62563419342041, "learning_rate": 1.2915851272015657e-06, "loss": 1.0343, "step": 660 }, { "epoch": 0.03934002700957078, "grad_norm": 5.281921863555908, "learning_rate": 1.3111545988258318e-06, "loss": 0.8706, "step": 670 }, { "epoch": 0.039927191591803184, "grad_norm": 6.722355365753174, "learning_rate": 1.330724070450098e-06, "loss": 0.7521, "step": 680 }, { "epoch": 0.04051435617403558, "grad_norm": 4.367257118225098, "learning_rate": 1.350293542074364e-06, "loss": 1.0983, "step": 690 }, { "epoch": 0.04110152075626798, "grad_norm": 3.475581169128418, "learning_rate": 1.3698630136986302e-06, "loss": 0.8049, "step": 700 }, { "epoch": 0.041688685338500384, "grad_norm": 9.295185089111328, "learning_rate": 1.3894324853228964e-06, "loss": 0.7591, "step": 710 }, { "epoch": 0.04227584992073278, "grad_norm": 4.647137641906738, "learning_rate": 1.4090019569471625e-06, "loss": 1.0243, "step": 720 }, { "epoch": 0.04286301450296518, "grad_norm": 7.035990238189697, "learning_rate": 1.4285714285714286e-06, "loss": 0.9846, "step": 730 }, { "epoch": 0.043450179085197584, "grad_norm": 8.140578269958496, "learning_rate": 1.4481409001956948e-06, "loss": 1.0179, "step": 740 }, { "epoch": 0.04403734366742998, "grad_norm": 4.505372524261475, "learning_rate": 1.467710371819961e-06, "loss": 0.9665, "step": 750 }, { "epoch": 0.04462450824966238, "grad_norm": 3.992464542388916, "learning_rate": 1.487279843444227e-06, "loss": 0.7707, "step": 760 }, { "epoch": 0.04521167283189478, "grad_norm": 3.1136116981506348, "learning_rate": 1.5068493150684932e-06, "loss": 0.899, "step": 770 }, { "epoch": 0.04579883741412718, "grad_norm": 8.264520645141602, "learning_rate": 1.5264187866927593e-06, "loss": 0.9117, "step": 780 }, { "epoch": 0.04638600199635958, "grad_norm": 3.6961781978607178, "learning_rate": 1.5459882583170254e-06, "loss": 0.8919, "step": 790 }, { "epoch": 0.04697316657859198, "grad_norm": 4.549679756164551, "learning_rate": 1.5655577299412916e-06, "loss": 1.0371, "step": 800 }, { "epoch": 0.04756033116082438, "grad_norm": 3.8782589435577393, "learning_rate": 1.5851272015655577e-06, "loss": 0.8914, "step": 810 }, { "epoch": 0.04814749574305678, "grad_norm": 4.2711005210876465, "learning_rate": 1.6046966731898239e-06, "loss": 0.6564, "step": 820 }, { "epoch": 0.04873466032528918, "grad_norm": 11.336820602416992, "learning_rate": 1.62426614481409e-06, "loss": 0.9005, "step": 830 }, { "epoch": 0.04932182490752158, "grad_norm": 1.3392462730407715, "learning_rate": 1.6438356164383561e-06, "loss": 0.8459, "step": 840 }, { "epoch": 0.04990898948975398, "grad_norm": 8.910131454467773, "learning_rate": 1.6634050880626223e-06, "loss": 0.9985, "step": 850 }, { "epoch": 0.05049615407198638, "grad_norm": 7.427406311035156, "learning_rate": 1.6829745596868884e-06, "loss": 0.8043, "step": 860 }, { "epoch": 0.05108331865421878, "grad_norm": 3.0567097663879395, "learning_rate": 1.7025440313111545e-06, "loss": 0.8552, "step": 870 }, { "epoch": 0.051670483236451174, "grad_norm": 3.9745965003967285, "learning_rate": 1.7221135029354209e-06, "loss": 0.9288, "step": 880 }, { "epoch": 0.05225764781868358, "grad_norm": 8.409229278564453, "learning_rate": 1.741682974559687e-06, "loss": 0.9565, "step": 890 }, { "epoch": 0.05284481240091598, "grad_norm": 4.802201271057129, "learning_rate": 1.7612524461839532e-06, "loss": 0.8669, "step": 900 }, { "epoch": 0.053431976983148374, "grad_norm": 8.344857215881348, "learning_rate": 1.7808219178082193e-06, "loss": 0.8937, "step": 910 }, { "epoch": 0.05401914156538078, "grad_norm": 5.126839637756348, "learning_rate": 1.8003913894324854e-06, "loss": 0.6875, "step": 920 }, { "epoch": 0.05460630614761318, "grad_norm": 3.737562894821167, "learning_rate": 1.8199608610567516e-06, "loss": 0.8277, "step": 930 }, { "epoch": 0.055193470729845574, "grad_norm": 10.49797534942627, "learning_rate": 1.8395303326810177e-06, "loss": 1.0563, "step": 940 }, { "epoch": 0.05578063531207798, "grad_norm": 3.2062673568725586, "learning_rate": 1.8590998043052839e-06, "loss": 0.9682, "step": 950 }, { "epoch": 0.05636779989431037, "grad_norm": 10.416680335998535, "learning_rate": 1.87866927592955e-06, "loss": 0.847, "step": 960 }, { "epoch": 0.056954964476542774, "grad_norm": 5.925994873046875, "learning_rate": 1.8982387475538161e-06, "loss": 0.8658, "step": 970 }, { "epoch": 0.05754212905877518, "grad_norm": 3.351245403289795, "learning_rate": 1.9178082191780823e-06, "loss": 0.8997, "step": 980 }, { "epoch": 0.05812929364100757, "grad_norm": 7.502943515777588, "learning_rate": 1.937377690802348e-06, "loss": 1.0307, "step": 990 }, { "epoch": 0.058716458223239974, "grad_norm": 8.538854598999023, "learning_rate": 1.9569471624266145e-06, "loss": 0.901, "step": 1000 }, { "epoch": 0.05930362280547238, "grad_norm": 7.37427282333374, "learning_rate": 1.976516634050881e-06, "loss": 0.949, "step": 1010 }, { "epoch": 0.05989078738770477, "grad_norm": 7.054262161254883, "learning_rate": 1.996086105675147e-06, "loss": 0.8819, "step": 1020 }, { "epoch": 0.060477951969937174, "grad_norm": 9.373048782348633, "learning_rate": 2.015655577299413e-06, "loss": 0.844, "step": 1030 }, { "epoch": 0.06106511655216958, "grad_norm": 9.244135856628418, "learning_rate": 2.035225048923679e-06, "loss": 1.0324, "step": 1040 }, { "epoch": 0.06165228113440197, "grad_norm": 4.539050102233887, "learning_rate": 2.0547945205479454e-06, "loss": 0.7655, "step": 1050 }, { "epoch": 0.062239445716634374, "grad_norm": 4.737427711486816, "learning_rate": 2.0743639921722114e-06, "loss": 0.7227, "step": 1060 }, { "epoch": 0.06282661029886677, "grad_norm": 2.004404067993164, "learning_rate": 2.0939334637964777e-06, "loss": 0.7824, "step": 1070 }, { "epoch": 0.06341377488109917, "grad_norm": 7.367832660675049, "learning_rate": 2.1135029354207436e-06, "loss": 0.8483, "step": 1080 }, { "epoch": 0.06400093946333157, "grad_norm": 10.737615585327148, "learning_rate": 2.13307240704501e-06, "loss": 0.77, "step": 1090 }, { "epoch": 0.06458810404556398, "grad_norm": 9.504667282104492, "learning_rate": 2.152641878669276e-06, "loss": 0.8978, "step": 1100 }, { "epoch": 0.06517526862779636, "grad_norm": 5.111658573150635, "learning_rate": 2.1722113502935423e-06, "loss": 0.6916, "step": 1110 }, { "epoch": 0.06576243321002877, "grad_norm": 6.176421165466309, "learning_rate": 2.191780821917808e-06, "loss": 0.6713, "step": 1120 }, { "epoch": 0.06634959779226117, "grad_norm": 15.053933143615723, "learning_rate": 2.2113502935420745e-06, "loss": 0.8604, "step": 1130 }, { "epoch": 0.06693676237449357, "grad_norm": 8.712594985961914, "learning_rate": 2.2309197651663405e-06, "loss": 0.7797, "step": 1140 }, { "epoch": 0.06752392695672597, "grad_norm": 8.10671329498291, "learning_rate": 2.250489236790607e-06, "loss": 0.657, "step": 1150 }, { "epoch": 0.06811109153895838, "grad_norm": 8.042224884033203, "learning_rate": 2.2700587084148727e-06, "loss": 0.8692, "step": 1160 }, { "epoch": 0.06869825612119076, "grad_norm": 6.120015621185303, "learning_rate": 2.289628180039139e-06, "loss": 0.8444, "step": 1170 }, { "epoch": 0.06928542070342317, "grad_norm": 5.816368103027344, "learning_rate": 2.309197651663405e-06, "loss": 0.6468, "step": 1180 }, { "epoch": 0.06987258528565557, "grad_norm": 5.625744342803955, "learning_rate": 2.3287671232876713e-06, "loss": 0.7808, "step": 1190 }, { "epoch": 0.07045974986788797, "grad_norm": 9.16716194152832, "learning_rate": 2.3483365949119373e-06, "loss": 0.8807, "step": 1200 }, { "epoch": 0.07104691445012037, "grad_norm": 1.5296014547348022, "learning_rate": 2.3679060665362036e-06, "loss": 0.9152, "step": 1210 }, { "epoch": 0.07163407903235276, "grad_norm": 1.6544889211654663, "learning_rate": 2.3874755381604695e-06, "loss": 0.7759, "step": 1220 }, { "epoch": 0.07222124361458516, "grad_norm": 7.549999237060547, "learning_rate": 2.407045009784736e-06, "loss": 0.8334, "step": 1230 }, { "epoch": 0.07280840819681757, "grad_norm": 5.4043402671813965, "learning_rate": 2.426614481409002e-06, "loss": 0.9303, "step": 1240 }, { "epoch": 0.07339557277904997, "grad_norm": 2.945380210876465, "learning_rate": 2.446183953033268e-06, "loss": 0.6834, "step": 1250 }, { "epoch": 0.07398273736128237, "grad_norm": 4.103153705596924, "learning_rate": 2.4657534246575345e-06, "loss": 0.733, "step": 1260 }, { "epoch": 0.07456990194351476, "grad_norm": 4.970322132110596, "learning_rate": 2.4853228962818004e-06, "loss": 0.7588, "step": 1270 }, { "epoch": 0.07515706652574716, "grad_norm": 5.448657512664795, "learning_rate": 2.504892367906067e-06, "loss": 0.8092, "step": 1280 }, { "epoch": 0.07574423110797956, "grad_norm": 6.001285552978516, "learning_rate": 2.5244618395303327e-06, "loss": 0.757, "step": 1290 }, { "epoch": 0.07633139569021197, "grad_norm": 2.550957202911377, "learning_rate": 2.544031311154599e-06, "loss": 0.8246, "step": 1300 }, { "epoch": 0.07691856027244437, "grad_norm": 9.331028938293457, "learning_rate": 2.563600782778865e-06, "loss": 0.7909, "step": 1310 }, { "epoch": 0.07750572485467677, "grad_norm": 4.507303237915039, "learning_rate": 2.5831702544031313e-06, "loss": 0.8495, "step": 1320 }, { "epoch": 0.07809288943690916, "grad_norm": 6.720885276794434, "learning_rate": 2.6027397260273973e-06, "loss": 0.9092, "step": 1330 }, { "epoch": 0.07868005401914156, "grad_norm": 12.301284790039062, "learning_rate": 2.6223091976516636e-06, "loss": 1.0056, "step": 1340 }, { "epoch": 0.07926721860137396, "grad_norm": 8.403606414794922, "learning_rate": 2.6418786692759295e-06, "loss": 0.8461, "step": 1350 }, { "epoch": 0.07985438318360637, "grad_norm": 3.6369428634643555, "learning_rate": 2.661448140900196e-06, "loss": 0.7988, "step": 1360 }, { "epoch": 0.08044154776583877, "grad_norm": 3.9106009006500244, "learning_rate": 2.681017612524462e-06, "loss": 0.7316, "step": 1370 }, { "epoch": 0.08102871234807116, "grad_norm": 8.390782356262207, "learning_rate": 2.700587084148728e-06, "loss": 0.7574, "step": 1380 }, { "epoch": 0.08161587693030356, "grad_norm": 5.562564849853516, "learning_rate": 2.720156555772994e-06, "loss": 0.7828, "step": 1390 }, { "epoch": 0.08220304151253596, "grad_norm": 3.0843100547790527, "learning_rate": 2.7397260273972604e-06, "loss": 0.6958, "step": 1400 }, { "epoch": 0.08279020609476836, "grad_norm": 5.951654434204102, "learning_rate": 2.7592954990215264e-06, "loss": 0.7406, "step": 1410 }, { "epoch": 0.08337737067700077, "grad_norm": 7.4815239906311035, "learning_rate": 2.7788649706457927e-06, "loss": 0.7699, "step": 1420 }, { "epoch": 0.08396453525923317, "grad_norm": 10.967490196228027, "learning_rate": 2.7984344422700586e-06, "loss": 0.7579, "step": 1430 }, { "epoch": 0.08455169984146556, "grad_norm": 14.499906539916992, "learning_rate": 2.818003913894325e-06, "loss": 1.0014, "step": 1440 }, { "epoch": 0.08513886442369796, "grad_norm": 4.706295490264893, "learning_rate": 2.837573385518591e-06, "loss": 0.6693, "step": 1450 }, { "epoch": 0.08572602900593036, "grad_norm": 2.0982460975646973, "learning_rate": 2.8571428571428573e-06, "loss": 0.9706, "step": 1460 }, { "epoch": 0.08631319358816276, "grad_norm": 4.807570457458496, "learning_rate": 2.876712328767123e-06, "loss": 0.7012, "step": 1470 }, { "epoch": 0.08690035817039517, "grad_norm": 7.2082600593566895, "learning_rate": 2.8962818003913895e-06, "loss": 0.7609, "step": 1480 }, { "epoch": 0.08748752275262756, "grad_norm": 2.857929229736328, "learning_rate": 2.9158512720156555e-06, "loss": 0.8136, "step": 1490 }, { "epoch": 0.08807468733485996, "grad_norm": 6.691984176635742, "learning_rate": 2.935420743639922e-06, "loss": 0.7188, "step": 1500 }, { "epoch": 0.08866185191709236, "grad_norm": 8.256745338439941, "learning_rate": 2.954990215264188e-06, "loss": 0.9056, "step": 1510 }, { "epoch": 0.08924901649932476, "grad_norm": 9.165206909179688, "learning_rate": 2.974559686888454e-06, "loss": 0.7071, "step": 1520 }, { "epoch": 0.08983618108155716, "grad_norm": 4.714917182922363, "learning_rate": 2.9941291585127204e-06, "loss": 0.8538, "step": 1530 }, { "epoch": 0.09042334566378955, "grad_norm": 6.193662643432617, "learning_rate": 3.0136986301369864e-06, "loss": 0.8392, "step": 1540 }, { "epoch": 0.09101051024602196, "grad_norm": 13.171388626098633, "learning_rate": 3.0332681017612527e-06, "loss": 0.77, "step": 1550 }, { "epoch": 0.09159767482825436, "grad_norm": 4.543354034423828, "learning_rate": 3.0528375733855186e-06, "loss": 0.8416, "step": 1560 }, { "epoch": 0.09218483941048676, "grad_norm": 2.489943027496338, "learning_rate": 3.072407045009785e-06, "loss": 0.8305, "step": 1570 }, { "epoch": 0.09277200399271916, "grad_norm": 2.410578727722168, "learning_rate": 3.091976516634051e-06, "loss": 0.852, "step": 1580 }, { "epoch": 0.09335916857495156, "grad_norm": 8.25346565246582, "learning_rate": 3.1115459882583172e-06, "loss": 0.8109, "step": 1590 }, { "epoch": 0.09394633315718395, "grad_norm": 4.14901876449585, "learning_rate": 3.131115459882583e-06, "loss": 0.704, "step": 1600 }, { "epoch": 0.09453349773941636, "grad_norm": 2.387298345565796, "learning_rate": 3.1506849315068495e-06, "loss": 0.7899, "step": 1610 }, { "epoch": 0.09512066232164876, "grad_norm": 4.66149377822876, "learning_rate": 3.1702544031311154e-06, "loss": 0.8063, "step": 1620 }, { "epoch": 0.09570782690388116, "grad_norm": 7.686683177947998, "learning_rate": 3.189823874755382e-06, "loss": 0.7566, "step": 1630 }, { "epoch": 0.09629499148611356, "grad_norm": 8.59265422821045, "learning_rate": 3.2093933463796477e-06, "loss": 0.7867, "step": 1640 }, { "epoch": 0.09688215606834595, "grad_norm": 5.300751209259033, "learning_rate": 3.228962818003914e-06, "loss": 0.7097, "step": 1650 }, { "epoch": 0.09746932065057835, "grad_norm": 6.386059284210205, "learning_rate": 3.24853228962818e-06, "loss": 0.6891, "step": 1660 }, { "epoch": 0.09805648523281076, "grad_norm": 9.446337699890137, "learning_rate": 3.2681017612524463e-06, "loss": 0.7909, "step": 1670 }, { "epoch": 0.09864364981504316, "grad_norm": 4.47544002532959, "learning_rate": 3.2876712328767123e-06, "loss": 0.9308, "step": 1680 }, { "epoch": 0.09923081439727556, "grad_norm": 7.066776752471924, "learning_rate": 3.3072407045009786e-06, "loss": 0.838, "step": 1690 }, { "epoch": 0.09981797897950796, "grad_norm": 8.731528282165527, "learning_rate": 3.3268101761252445e-06, "loss": 0.6929, "step": 1700 }, { "epoch": 0.10040514356174035, "grad_norm": 6.757815837860107, "learning_rate": 3.346379647749511e-06, "loss": 0.7381, "step": 1710 }, { "epoch": 0.10099230814397275, "grad_norm": 7.490019798278809, "learning_rate": 3.365949119373777e-06, "loss": 0.7808, "step": 1720 }, { "epoch": 0.10157947272620516, "grad_norm": 5.837859153747559, "learning_rate": 3.385518590998043e-06, "loss": 0.741, "step": 1730 }, { "epoch": 0.10216663730843756, "grad_norm": 6.385608673095703, "learning_rate": 3.405088062622309e-06, "loss": 0.745, "step": 1740 }, { "epoch": 0.10275380189066996, "grad_norm": 5.929920196533203, "learning_rate": 3.4246575342465754e-06, "loss": 0.6475, "step": 1750 }, { "epoch": 0.10334096647290235, "grad_norm": 13.11124324798584, "learning_rate": 3.4442270058708418e-06, "loss": 0.9064, "step": 1760 }, { "epoch": 0.10392813105513475, "grad_norm": 6.085298538208008, "learning_rate": 3.4637964774951077e-06, "loss": 0.7499, "step": 1770 }, { "epoch": 0.10451529563736715, "grad_norm": 6.37535285949707, "learning_rate": 3.483365949119374e-06, "loss": 0.6996, "step": 1780 }, { "epoch": 0.10510246021959956, "grad_norm": 8.960735321044922, "learning_rate": 3.50293542074364e-06, "loss": 0.8877, "step": 1790 }, { "epoch": 0.10568962480183196, "grad_norm": 4.762284755706787, "learning_rate": 3.5225048923679063e-06, "loss": 0.7733, "step": 1800 }, { "epoch": 0.10627678938406436, "grad_norm": 4.133889675140381, "learning_rate": 3.5420743639921723e-06, "loss": 0.7129, "step": 1810 }, { "epoch": 0.10686395396629675, "grad_norm": 4.371268272399902, "learning_rate": 3.5616438356164386e-06, "loss": 0.6444, "step": 1820 }, { "epoch": 0.10745111854852915, "grad_norm": 3.267202138900757, "learning_rate": 3.5812133072407045e-06, "loss": 0.7897, "step": 1830 }, { "epoch": 0.10803828313076155, "grad_norm": 4.603424549102783, "learning_rate": 3.600782778864971e-06, "loss": 0.7484, "step": 1840 }, { "epoch": 0.10862544771299396, "grad_norm": 7.30171012878418, "learning_rate": 3.620352250489237e-06, "loss": 0.8621, "step": 1850 }, { "epoch": 0.10921261229522636, "grad_norm": 7.769885540008545, "learning_rate": 3.639921722113503e-06, "loss": 0.7409, "step": 1860 }, { "epoch": 0.10979977687745875, "grad_norm": 4.90231466293335, "learning_rate": 3.659491193737769e-06, "loss": 0.7286, "step": 1870 }, { "epoch": 0.11038694145969115, "grad_norm": 8.224363327026367, "learning_rate": 3.6790606653620354e-06, "loss": 0.8175, "step": 1880 }, { "epoch": 0.11097410604192355, "grad_norm": 17.55459213256836, "learning_rate": 3.6986301369863014e-06, "loss": 0.6696, "step": 1890 }, { "epoch": 0.11156127062415595, "grad_norm": 3.317012071609497, "learning_rate": 3.7181996086105677e-06, "loss": 0.8506, "step": 1900 }, { "epoch": 0.11214843520638836, "grad_norm": 11.448431968688965, "learning_rate": 3.7377690802348336e-06, "loss": 0.6977, "step": 1910 }, { "epoch": 0.11273559978862074, "grad_norm": 2.5139875411987305, "learning_rate": 3.7573385518591e-06, "loss": 0.8233, "step": 1920 }, { "epoch": 0.11332276437085315, "grad_norm": 4.39483118057251, "learning_rate": 3.776908023483366e-06, "loss": 0.6619, "step": 1930 }, { "epoch": 0.11390992895308555, "grad_norm": 2.800823926925659, "learning_rate": 3.7964774951076322e-06, "loss": 0.6941, "step": 1940 }, { "epoch": 0.11449709353531795, "grad_norm": 12.821027755737305, "learning_rate": 3.816046966731898e-06, "loss": 0.8054, "step": 1950 }, { "epoch": 0.11508425811755035, "grad_norm": 2.121894121170044, "learning_rate": 3.8356164383561645e-06, "loss": 0.8059, "step": 1960 }, { "epoch": 0.11567142269978276, "grad_norm": 10.366315841674805, "learning_rate": 3.855185909980431e-06, "loss": 0.9704, "step": 1970 }, { "epoch": 0.11625858728201514, "grad_norm": 3.851391077041626, "learning_rate": 3.874755381604696e-06, "loss": 0.7881, "step": 1980 }, { "epoch": 0.11684575186424755, "grad_norm": 1.9610929489135742, "learning_rate": 3.894324853228963e-06, "loss": 0.7766, "step": 1990 }, { "epoch": 0.11743291644647995, "grad_norm": 6.045734405517578, "learning_rate": 3.913894324853229e-06, "loss": 0.6918, "step": 2000 }, { "epoch": 0.11802008102871235, "grad_norm": 6.6448235511779785, "learning_rate": 3.933463796477495e-06, "loss": 0.7948, "step": 2010 }, { "epoch": 0.11860724561094475, "grad_norm": 3.849478244781494, "learning_rate": 3.953033268101762e-06, "loss": 0.8174, "step": 2020 }, { "epoch": 0.11919441019317714, "grad_norm": 1.9723014831542969, "learning_rate": 3.972602739726027e-06, "loss": 0.7378, "step": 2030 }, { "epoch": 0.11978157477540954, "grad_norm": 4.545279026031494, "learning_rate": 3.992172211350294e-06, "loss": 0.8621, "step": 2040 }, { "epoch": 0.12036873935764195, "grad_norm": 4.2657928466796875, "learning_rate": 4.01174168297456e-06, "loss": 0.6898, "step": 2050 }, { "epoch": 0.12095590393987435, "grad_norm": 4.181396007537842, "learning_rate": 4.031311154598826e-06, "loss": 0.7308, "step": 2060 }, { "epoch": 0.12154306852210675, "grad_norm": 7.177546977996826, "learning_rate": 4.050880626223092e-06, "loss": 0.6575, "step": 2070 }, { "epoch": 0.12213023310433915, "grad_norm": 2.638078451156616, "learning_rate": 4.070450097847358e-06, "loss": 0.6675, "step": 2080 }, { "epoch": 0.12271739768657154, "grad_norm": 5.988828659057617, "learning_rate": 4.0900195694716245e-06, "loss": 0.7436, "step": 2090 }, { "epoch": 0.12330456226880394, "grad_norm": 2.3120574951171875, "learning_rate": 4.109589041095891e-06, "loss": 0.6795, "step": 2100 }, { "epoch": 0.12389172685103635, "grad_norm": 4.592883586883545, "learning_rate": 4.129158512720156e-06, "loss": 0.6557, "step": 2110 }, { "epoch": 0.12447889143326875, "grad_norm": 6.267236232757568, "learning_rate": 4.148727984344423e-06, "loss": 0.71, "step": 2120 }, { "epoch": 0.12506605601550114, "grad_norm": 5.621852397918701, "learning_rate": 4.168297455968689e-06, "loss": 0.7354, "step": 2130 }, { "epoch": 0.12565322059773354, "grad_norm": 9.699333190917969, "learning_rate": 4.187866927592955e-06, "loss": 0.9046, "step": 2140 }, { "epoch": 0.12624038517996594, "grad_norm": 3.1035711765289307, "learning_rate": 4.207436399217221e-06, "loss": 0.7152, "step": 2150 }, { "epoch": 0.12682754976219834, "grad_norm": 3.3194291591644287, "learning_rate": 4.227005870841487e-06, "loss": 0.6435, "step": 2160 }, { "epoch": 0.12741471434443075, "grad_norm": 20.146209716796875, "learning_rate": 4.246575342465754e-06, "loss": 0.8355, "step": 2170 }, { "epoch": 0.12800187892666315, "grad_norm": 13.883506774902344, "learning_rate": 4.26614481409002e-06, "loss": 0.6917, "step": 2180 }, { "epoch": 0.12858904350889555, "grad_norm": 4.671999454498291, "learning_rate": 4.2857142857142855e-06, "loss": 0.804, "step": 2190 }, { "epoch": 0.12917620809112795, "grad_norm": 6.344011306762695, "learning_rate": 4.305283757338552e-06, "loss": 0.7877, "step": 2200 }, { "epoch": 0.12976337267336036, "grad_norm": 2.58798885345459, "learning_rate": 4.324853228962818e-06, "loss": 0.6364, "step": 2210 }, { "epoch": 0.13035053725559273, "grad_norm": 3.647090196609497, "learning_rate": 4.3444227005870845e-06, "loss": 0.7262, "step": 2220 }, { "epoch": 0.13093770183782513, "grad_norm": 6.377803802490234, "learning_rate": 4.36399217221135e-06, "loss": 0.6511, "step": 2230 }, { "epoch": 0.13152486642005753, "grad_norm": 1.822828769683838, "learning_rate": 4.383561643835616e-06, "loss": 0.7173, "step": 2240 }, { "epoch": 0.13211203100228994, "grad_norm": 1.7653292417526245, "learning_rate": 4.403131115459883e-06, "loss": 0.8337, "step": 2250 }, { "epoch": 0.13269919558452234, "grad_norm": 4.188485145568848, "learning_rate": 4.422700587084149e-06, "loss": 0.8206, "step": 2260 }, { "epoch": 0.13328636016675474, "grad_norm": 1.7894469499588013, "learning_rate": 4.442270058708415e-06, "loss": 0.7165, "step": 2270 }, { "epoch": 0.13387352474898714, "grad_norm": 3.5130021572113037, "learning_rate": 4.461839530332681e-06, "loss": 0.6766, "step": 2280 }, { "epoch": 0.13446068933121955, "grad_norm": 9.267215728759766, "learning_rate": 4.481409001956947e-06, "loss": 0.7137, "step": 2290 }, { "epoch": 0.13504785391345195, "grad_norm": 9.45638370513916, "learning_rate": 4.500978473581214e-06, "loss": 0.8302, "step": 2300 }, { "epoch": 0.13563501849568435, "grad_norm": 5.808602809906006, "learning_rate": 4.52054794520548e-06, "loss": 0.9748, "step": 2310 }, { "epoch": 0.13622218307791675, "grad_norm": 7.003822326660156, "learning_rate": 4.5401174168297455e-06, "loss": 0.6583, "step": 2320 }, { "epoch": 0.13680934766014913, "grad_norm": 6.219771385192871, "learning_rate": 4.559686888454012e-06, "loss": 0.6736, "step": 2330 }, { "epoch": 0.13739651224238153, "grad_norm": 6.362701416015625, "learning_rate": 4.579256360078278e-06, "loss": 0.7583, "step": 2340 }, { "epoch": 0.13798367682461393, "grad_norm": 3.0174388885498047, "learning_rate": 4.5988258317025445e-06, "loss": 0.7662, "step": 2350 }, { "epoch": 0.13857084140684633, "grad_norm": 3.479679584503174, "learning_rate": 4.61839530332681e-06, "loss": 0.7119, "step": 2360 }, { "epoch": 0.13915800598907874, "grad_norm": 8.28512954711914, "learning_rate": 4.637964774951076e-06, "loss": 0.7096, "step": 2370 }, { "epoch": 0.13974517057131114, "grad_norm": 12.326287269592285, "learning_rate": 4.657534246575343e-06, "loss": 0.5577, "step": 2380 }, { "epoch": 0.14033233515354354, "grad_norm": 6.707863807678223, "learning_rate": 4.677103718199609e-06, "loss": 0.7222, "step": 2390 }, { "epoch": 0.14091949973577594, "grad_norm": 5.311136722564697, "learning_rate": 4.6966731898238745e-06, "loss": 0.6892, "step": 2400 }, { "epoch": 0.14150666431800835, "grad_norm": 16.324462890625, "learning_rate": 4.716242661448141e-06, "loss": 0.7024, "step": 2410 }, { "epoch": 0.14209382890024075, "grad_norm": 6.7972259521484375, "learning_rate": 4.735812133072407e-06, "loss": 0.5988, "step": 2420 }, { "epoch": 0.14268099348247315, "grad_norm": 2.7404658794403076, "learning_rate": 4.755381604696674e-06, "loss": 0.7048, "step": 2430 }, { "epoch": 0.14326815806470553, "grad_norm": 6.040501117706299, "learning_rate": 4.774951076320939e-06, "loss": 0.6071, "step": 2440 }, { "epoch": 0.14385532264693793, "grad_norm": 11.101715087890625, "learning_rate": 4.7945205479452054e-06, "loss": 0.7219, "step": 2450 }, { "epoch": 0.14444248722917033, "grad_norm": 7.276677131652832, "learning_rate": 4.814090019569472e-06, "loss": 0.6249, "step": 2460 }, { "epoch": 0.14502965181140273, "grad_norm": 2.3674707412719727, "learning_rate": 4.833659491193738e-06, "loss": 0.8037, "step": 2470 }, { "epoch": 0.14561681639363513, "grad_norm": 6.0126190185546875, "learning_rate": 4.853228962818004e-06, "loss": 0.6921, "step": 2480 }, { "epoch": 0.14620398097586754, "grad_norm": 8.469931602478027, "learning_rate": 4.87279843444227e-06, "loss": 0.5527, "step": 2490 }, { "epoch": 0.14679114555809994, "grad_norm": 7.0929388999938965, "learning_rate": 4.892367906066536e-06, "loss": 0.7958, "step": 2500 }, { "epoch": 0.14737831014033234, "grad_norm": 1.8125132322311401, "learning_rate": 4.911937377690803e-06, "loss": 0.7543, "step": 2510 }, { "epoch": 0.14796547472256474, "grad_norm": 7.899388313293457, "learning_rate": 4.931506849315069e-06, "loss": 0.7517, "step": 2520 }, { "epoch": 0.14855263930479715, "grad_norm": 7.623599529266357, "learning_rate": 4.9510763209393345e-06, "loss": 0.6724, "step": 2530 }, { "epoch": 0.14913980388702952, "grad_norm": 6.308513641357422, "learning_rate": 4.970645792563601e-06, "loss": 0.7732, "step": 2540 }, { "epoch": 0.14972696846926192, "grad_norm": 7.435992240905762, "learning_rate": 4.990215264187867e-06, "loss": 0.7383, "step": 2550 }, { "epoch": 0.15031413305149433, "grad_norm": 13.817732810974121, "learning_rate": 5.009784735812134e-06, "loss": 0.5774, "step": 2560 }, { "epoch": 0.15090129763372673, "grad_norm": 3.3867268562316895, "learning_rate": 5.0293542074364e-06, "loss": 0.7487, "step": 2570 }, { "epoch": 0.15148846221595913, "grad_norm": 5.583853244781494, "learning_rate": 5.0489236790606654e-06, "loss": 0.6768, "step": 2580 }, { "epoch": 0.15207562679819153, "grad_norm": 4.28045654296875, "learning_rate": 5.068493150684932e-06, "loss": 0.7144, "step": 2590 }, { "epoch": 0.15266279138042393, "grad_norm": 7.279781341552734, "learning_rate": 5.088062622309198e-06, "loss": 0.6891, "step": 2600 }, { "epoch": 0.15324995596265634, "grad_norm": 7.158484935760498, "learning_rate": 5.1076320939334645e-06, "loss": 0.6077, "step": 2610 }, { "epoch": 0.15383712054488874, "grad_norm": 1.4810936450958252, "learning_rate": 5.12720156555773e-06, "loss": 0.6019, "step": 2620 }, { "epoch": 0.15442428512712114, "grad_norm": 2.0737361907958984, "learning_rate": 5.146771037181997e-06, "loss": 0.6842, "step": 2630 }, { "epoch": 0.15501144970935354, "grad_norm": 9.282384872436523, "learning_rate": 5.166340508806263e-06, "loss": 0.6621, "step": 2640 }, { "epoch": 0.15559861429158592, "grad_norm": 8.695781707763672, "learning_rate": 5.185909980430529e-06, "loss": 0.6138, "step": 2650 }, { "epoch": 0.15618577887381832, "grad_norm": 6.928513526916504, "learning_rate": 5.2054794520547945e-06, "loss": 0.6502, "step": 2660 }, { "epoch": 0.15677294345605072, "grad_norm": 4.860149383544922, "learning_rate": 5.225048923679062e-06, "loss": 0.5592, "step": 2670 }, { "epoch": 0.15736010803828313, "grad_norm": 1.5829118490219116, "learning_rate": 5.244618395303327e-06, "loss": 0.7289, "step": 2680 }, { "epoch": 0.15794727262051553, "grad_norm": 1.541205644607544, "learning_rate": 5.2641878669275936e-06, "loss": 0.684, "step": 2690 }, { "epoch": 0.15853443720274793, "grad_norm": 19.76561737060547, "learning_rate": 5.283757338551859e-06, "loss": 0.6927, "step": 2700 }, { "epoch": 0.15912160178498033, "grad_norm": 1.7562305927276611, "learning_rate": 5.303326810176126e-06, "loss": 0.5778, "step": 2710 }, { "epoch": 0.15970876636721273, "grad_norm": 5.901500701904297, "learning_rate": 5.322896281800392e-06, "loss": 0.6466, "step": 2720 }, { "epoch": 0.16029593094944514, "grad_norm": 5.041698455810547, "learning_rate": 5.342465753424658e-06, "loss": 0.7039, "step": 2730 }, { "epoch": 0.16088309553167754, "grad_norm": 6.070769309997559, "learning_rate": 5.362035225048924e-06, "loss": 0.743, "step": 2740 }, { "epoch": 0.16147026011390994, "grad_norm": 3.4495351314544678, "learning_rate": 5.381604696673191e-06, "loss": 0.7595, "step": 2750 }, { "epoch": 0.16205742469614232, "grad_norm": 2.3201093673706055, "learning_rate": 5.401174168297456e-06, "loss": 0.7812, "step": 2760 }, { "epoch": 0.16264458927837472, "grad_norm": 2.728562116622925, "learning_rate": 5.420743639921723e-06, "loss": 0.6582, "step": 2770 }, { "epoch": 0.16323175386060712, "grad_norm": 7.617251396179199, "learning_rate": 5.440313111545988e-06, "loss": 0.5549, "step": 2780 }, { "epoch": 0.16381891844283952, "grad_norm": 4.796322822570801, "learning_rate": 5.459882583170255e-06, "loss": 0.668, "step": 2790 }, { "epoch": 0.16440608302507193, "grad_norm": 6.790625095367432, "learning_rate": 5.479452054794521e-06, "loss": 0.7131, "step": 2800 }, { "epoch": 0.16499324760730433, "grad_norm": 4.138622760772705, "learning_rate": 5.499021526418787e-06, "loss": 0.7452, "step": 2810 }, { "epoch": 0.16558041218953673, "grad_norm": 2.5190672874450684, "learning_rate": 5.518590998043053e-06, "loss": 0.58, "step": 2820 }, { "epoch": 0.16616757677176913, "grad_norm": 3.5544793605804443, "learning_rate": 5.53816046966732e-06, "loss": 0.5788, "step": 2830 }, { "epoch": 0.16675474135400153, "grad_norm": 8.13762378692627, "learning_rate": 5.557729941291585e-06, "loss": 0.7364, "step": 2840 }, { "epoch": 0.16734190593623394, "grad_norm": 7.75924825668335, "learning_rate": 5.577299412915852e-06, "loss": 0.5772, "step": 2850 }, { "epoch": 0.16792907051846634, "grad_norm": 3.732496738433838, "learning_rate": 5.596868884540117e-06, "loss": 0.6204, "step": 2860 }, { "epoch": 0.1685162351006987, "grad_norm": 4.41631555557251, "learning_rate": 5.6164383561643845e-06, "loss": 0.7071, "step": 2870 }, { "epoch": 0.16910339968293112, "grad_norm": 2.3435213565826416, "learning_rate": 5.63600782778865e-06, "loss": 0.6875, "step": 2880 }, { "epoch": 0.16969056426516352, "grad_norm": 8.808669090270996, "learning_rate": 5.655577299412916e-06, "loss": 0.6824, "step": 2890 }, { "epoch": 0.17027772884739592, "grad_norm": 9.395929336547852, "learning_rate": 5.675146771037182e-06, "loss": 0.5737, "step": 2900 }, { "epoch": 0.17086489342962832, "grad_norm": 8.890952110290527, "learning_rate": 5.694716242661449e-06, "loss": 0.6285, "step": 2910 }, { "epoch": 0.17145205801186073, "grad_norm": 1.988358736038208, "learning_rate": 5.7142857142857145e-06, "loss": 0.7047, "step": 2920 }, { "epoch": 0.17203922259409313, "grad_norm": 6.641796588897705, "learning_rate": 5.733855185909981e-06, "loss": 0.7605, "step": 2930 }, { "epoch": 0.17262638717632553, "grad_norm": 3.772524356842041, "learning_rate": 5.753424657534246e-06, "loss": 0.6689, "step": 2940 }, { "epoch": 0.17321355175855793, "grad_norm": 4.537923336029053, "learning_rate": 5.7729941291585136e-06, "loss": 0.5789, "step": 2950 }, { "epoch": 0.17380071634079033, "grad_norm": 5.282272815704346, "learning_rate": 5.792563600782779e-06, "loss": 0.6213, "step": 2960 }, { "epoch": 0.17438788092302274, "grad_norm": 6.201147079467773, "learning_rate": 5.812133072407045e-06, "loss": 0.7386, "step": 2970 }, { "epoch": 0.1749750455052551, "grad_norm": 3.9213414192199707, "learning_rate": 5.831702544031311e-06, "loss": 0.6784, "step": 2980 }, { "epoch": 0.1755622100874875, "grad_norm": 0.9509634971618652, "learning_rate": 5.851272015655578e-06, "loss": 0.5253, "step": 2990 }, { "epoch": 0.17614937466971992, "grad_norm": 3.6924402713775635, "learning_rate": 5.870841487279844e-06, "loss": 0.5311, "step": 3000 }, { "epoch": 0.17673653925195232, "grad_norm": 4.796311855316162, "learning_rate": 5.89041095890411e-06, "loss": 0.6152, "step": 3010 }, { "epoch": 0.17732370383418472, "grad_norm": 4.609403133392334, "learning_rate": 5.909980430528376e-06, "loss": 0.7566, "step": 3020 }, { "epoch": 0.17791086841641712, "grad_norm": 4.651664733886719, "learning_rate": 5.929549902152643e-06, "loss": 0.5747, "step": 3030 }, { "epoch": 0.17849803299864953, "grad_norm": 4.41327428817749, "learning_rate": 5.949119373776908e-06, "loss": 0.871, "step": 3040 }, { "epoch": 0.17908519758088193, "grad_norm": 6.656513690948486, "learning_rate": 5.9686888454011745e-06, "loss": 0.6973, "step": 3050 }, { "epoch": 0.17967236216311433, "grad_norm": 3.931112051010132, "learning_rate": 5.988258317025441e-06, "loss": 0.5744, "step": 3060 }, { "epoch": 0.18025952674534673, "grad_norm": 6.865795135498047, "learning_rate": 6.007827788649707e-06, "loss": 0.687, "step": 3070 }, { "epoch": 0.1808466913275791, "grad_norm": 4.796347618103027, "learning_rate": 6.027397260273973e-06, "loss": 0.8506, "step": 3080 }, { "epoch": 0.1814338559098115, "grad_norm": 8.24303913116455, "learning_rate": 6.046966731898239e-06, "loss": 0.6058, "step": 3090 }, { "epoch": 0.1820210204920439, "grad_norm": 4.750746250152588, "learning_rate": 6.066536203522505e-06, "loss": 0.4742, "step": 3100 }, { "epoch": 0.1826081850742763, "grad_norm": 6.392967700958252, "learning_rate": 6.086105675146772e-06, "loss": 0.7176, "step": 3110 }, { "epoch": 0.18319534965650872, "grad_norm": 8.094481468200684, "learning_rate": 6.105675146771037e-06, "loss": 0.6262, "step": 3120 }, { "epoch": 0.18378251423874112, "grad_norm": 4.551448822021484, "learning_rate": 6.1252446183953044e-06, "loss": 0.5338, "step": 3130 }, { "epoch": 0.18436967882097352, "grad_norm": 5.079488754272461, "learning_rate": 6.14481409001957e-06, "loss": 0.757, "step": 3140 }, { "epoch": 0.18495684340320592, "grad_norm": 7.189990997314453, "learning_rate": 6.164383561643836e-06, "loss": 0.8152, "step": 3150 }, { "epoch": 0.18554400798543833, "grad_norm": 8.272801399230957, "learning_rate": 6.183953033268102e-06, "loss": 0.6792, "step": 3160 }, { "epoch": 0.18613117256767073, "grad_norm": 4.497807502746582, "learning_rate": 6.203522504892369e-06, "loss": 0.5842, "step": 3170 }, { "epoch": 0.18671833714990313, "grad_norm": 5.395936489105225, "learning_rate": 6.2230919765166345e-06, "loss": 0.6977, "step": 3180 }, { "epoch": 0.1873055017321355, "grad_norm": 4.3146562576293945, "learning_rate": 6.242661448140901e-06, "loss": 0.5357, "step": 3190 }, { "epoch": 0.1878926663143679, "grad_norm": 3.7717928886413574, "learning_rate": 6.262230919765166e-06, "loss": 0.7344, "step": 3200 }, { "epoch": 0.1884798308966003, "grad_norm": 4.36600923538208, "learning_rate": 6.2818003913894335e-06, "loss": 0.7291, "step": 3210 }, { "epoch": 0.1890669954788327, "grad_norm": 6.956234455108643, "learning_rate": 6.301369863013699e-06, "loss": 0.7161, "step": 3220 }, { "epoch": 0.1896541600610651, "grad_norm": 2.6327009201049805, "learning_rate": 6.320939334637965e-06, "loss": 0.6195, "step": 3230 }, { "epoch": 0.19024132464329752, "grad_norm": 2.979241132736206, "learning_rate": 6.340508806262231e-06, "loss": 0.5827, "step": 3240 }, { "epoch": 0.19082848922552992, "grad_norm": 15.189460754394531, "learning_rate": 6.360078277886498e-06, "loss": 0.4825, "step": 3250 }, { "epoch": 0.19141565380776232, "grad_norm": 7.241776943206787, "learning_rate": 6.379647749510764e-06, "loss": 0.56, "step": 3260 }, { "epoch": 0.19200281838999472, "grad_norm": 6.561065673828125, "learning_rate": 6.39921722113503e-06, "loss": 0.6278, "step": 3270 }, { "epoch": 0.19258998297222713, "grad_norm": 6.936814308166504, "learning_rate": 6.4187866927592954e-06, "loss": 0.6751, "step": 3280 }, { "epoch": 0.19317714755445953, "grad_norm": 7.4649176597595215, "learning_rate": 6.438356164383563e-06, "loss": 0.6461, "step": 3290 }, { "epoch": 0.1937643121366919, "grad_norm": 1.9496948719024658, "learning_rate": 6.457925636007828e-06, "loss": 0.5472, "step": 3300 }, { "epoch": 0.1943514767189243, "grad_norm": 6.600861072540283, "learning_rate": 6.4774951076320945e-06, "loss": 0.6276, "step": 3310 }, { "epoch": 0.1949386413011567, "grad_norm": 2.6394083499908447, "learning_rate": 6.49706457925636e-06, "loss": 0.6291, "step": 3320 }, { "epoch": 0.1955258058833891, "grad_norm": 9.67360782623291, "learning_rate": 6.516634050880627e-06, "loss": 0.539, "step": 3330 }, { "epoch": 0.1961129704656215, "grad_norm": 3.0532026290893555, "learning_rate": 6.536203522504893e-06, "loss": 0.7145, "step": 3340 }, { "epoch": 0.1967001350478539, "grad_norm": 5.364090442657471, "learning_rate": 6.555772994129159e-06, "loss": 0.5669, "step": 3350 }, { "epoch": 0.19728729963008632, "grad_norm": 11.27733039855957, "learning_rate": 6.5753424657534245e-06, "loss": 0.6432, "step": 3360 }, { "epoch": 0.19787446421231872, "grad_norm": 2.3305087089538574, "learning_rate": 6.594911937377692e-06, "loss": 0.8098, "step": 3370 }, { "epoch": 0.19846162879455112, "grad_norm": 6.545224666595459, "learning_rate": 6.614481409001957e-06, "loss": 0.4745, "step": 3380 }, { "epoch": 0.19904879337678352, "grad_norm": 2.076004981994629, "learning_rate": 6.634050880626224e-06, "loss": 0.498, "step": 3390 }, { "epoch": 0.19963595795901593, "grad_norm": 10.02435302734375, "learning_rate": 6.653620352250489e-06, "loss": 0.75, "step": 3400 }, { "epoch": 0.2002231225412483, "grad_norm": 10.873096466064453, "learning_rate": 6.673189823874756e-06, "loss": 0.7112, "step": 3410 }, { "epoch": 0.2008102871234807, "grad_norm": 10.263143539428711, "learning_rate": 6.692759295499022e-06, "loss": 0.7612, "step": 3420 }, { "epoch": 0.2013974517057131, "grad_norm": 17.05776596069336, "learning_rate": 6.712328767123288e-06, "loss": 0.5565, "step": 3430 }, { "epoch": 0.2019846162879455, "grad_norm": 5.272540092468262, "learning_rate": 6.731898238747554e-06, "loss": 0.6052, "step": 3440 }, { "epoch": 0.2025717808701779, "grad_norm": 8.571463584899902, "learning_rate": 6.751467710371821e-06, "loss": 0.6028, "step": 3450 }, { "epoch": 0.2031589454524103, "grad_norm": 2.989260196685791, "learning_rate": 6.771037181996086e-06, "loss": 0.5534, "step": 3460 }, { "epoch": 0.2037461100346427, "grad_norm": 2.8673324584960938, "learning_rate": 6.790606653620353e-06, "loss": 0.6334, "step": 3470 }, { "epoch": 0.20433327461687512, "grad_norm": 6.386069297790527, "learning_rate": 6.810176125244618e-06, "loss": 0.5836, "step": 3480 }, { "epoch": 0.20492043919910752, "grad_norm": 6.877231121063232, "learning_rate": 6.829745596868885e-06, "loss": 0.7327, "step": 3490 }, { "epoch": 0.20550760378133992, "grad_norm": 7.804016590118408, "learning_rate": 6.849315068493151e-06, "loss": 0.5454, "step": 3500 }, { "epoch": 0.20609476836357232, "grad_norm": 8.476634979248047, "learning_rate": 6.868884540117417e-06, "loss": 0.7423, "step": 3510 }, { "epoch": 0.2066819329458047, "grad_norm": 22.013744354248047, "learning_rate": 6.8884540117416836e-06, "loss": 0.83, "step": 3520 }, { "epoch": 0.2072690975280371, "grad_norm": 10.448344230651855, "learning_rate": 6.90802348336595e-06, "loss": 0.6944, "step": 3530 }, { "epoch": 0.2078562621102695, "grad_norm": 3.374620199203491, "learning_rate": 6.927592954990215e-06, "loss": 0.6473, "step": 3540 }, { "epoch": 0.2084434266925019, "grad_norm": 4.928771495819092, "learning_rate": 6.947162426614482e-06, "loss": 0.678, "step": 3550 }, { "epoch": 0.2090305912747343, "grad_norm": 4.385007381439209, "learning_rate": 6.966731898238748e-06, "loss": 0.7962, "step": 3560 }, { "epoch": 0.2096177558569667, "grad_norm": 4.050276279449463, "learning_rate": 6.9863013698630145e-06, "loss": 0.5745, "step": 3570 }, { "epoch": 0.2102049204391991, "grad_norm": 9.422019004821777, "learning_rate": 7.00587084148728e-06, "loss": 0.5742, "step": 3580 }, { "epoch": 0.2107920850214315, "grad_norm": 4.105819225311279, "learning_rate": 7.025440313111546e-06, "loss": 0.6406, "step": 3590 }, { "epoch": 0.21137924960366392, "grad_norm": 4.728799343109131, "learning_rate": 7.045009784735813e-06, "loss": 0.6663, "step": 3600 }, { "epoch": 0.21196641418589632, "grad_norm": 6.085674285888672, "learning_rate": 7.064579256360079e-06, "loss": 0.5671, "step": 3610 }, { "epoch": 0.21255357876812872, "grad_norm": 6.090618133544922, "learning_rate": 7.0841487279843445e-06, "loss": 0.5465, "step": 3620 }, { "epoch": 0.2131407433503611, "grad_norm": 10.811095237731934, "learning_rate": 7.103718199608612e-06, "loss": 0.5448, "step": 3630 }, { "epoch": 0.2137279079325935, "grad_norm": 6.497663974761963, "learning_rate": 7.123287671232877e-06, "loss": 0.6058, "step": 3640 }, { "epoch": 0.2143150725148259, "grad_norm": 8.652677536010742, "learning_rate": 7.1428571428571436e-06, "loss": 0.5748, "step": 3650 }, { "epoch": 0.2149022370970583, "grad_norm": 3.5520501136779785, "learning_rate": 7.162426614481409e-06, "loss": 0.6177, "step": 3660 }, { "epoch": 0.2154894016792907, "grad_norm": 2.7634449005126953, "learning_rate": 7.181996086105676e-06, "loss": 0.5685, "step": 3670 }, { "epoch": 0.2160765662615231, "grad_norm": 7.339604377746582, "learning_rate": 7.201565557729942e-06, "loss": 0.5992, "step": 3680 }, { "epoch": 0.2166637308437555, "grad_norm": 6.2451982498168945, "learning_rate": 7.221135029354208e-06, "loss": 0.6317, "step": 3690 }, { "epoch": 0.2172508954259879, "grad_norm": 5.857006072998047, "learning_rate": 7.240704500978474e-06, "loss": 0.7477, "step": 3700 }, { "epoch": 0.2178380600082203, "grad_norm": 8.751367568969727, "learning_rate": 7.260273972602741e-06, "loss": 0.5294, "step": 3710 }, { "epoch": 0.21842522459045272, "grad_norm": 7.702101230621338, "learning_rate": 7.279843444227006e-06, "loss": 0.788, "step": 3720 }, { "epoch": 0.2190123891726851, "grad_norm": 8.055541038513184, "learning_rate": 7.299412915851273e-06, "loss": 0.5164, "step": 3730 }, { "epoch": 0.2195995537549175, "grad_norm": 9.189759254455566, "learning_rate": 7.318982387475538e-06, "loss": 0.5966, "step": 3740 }, { "epoch": 0.2201867183371499, "grad_norm": 15.524504661560059, "learning_rate": 7.338551859099805e-06, "loss": 0.6516, "step": 3750 }, { "epoch": 0.2207738829193823, "grad_norm": 4.407480716705322, "learning_rate": 7.358121330724071e-06, "loss": 0.7827, "step": 3760 }, { "epoch": 0.2213610475016147, "grad_norm": 5.1341962814331055, "learning_rate": 7.377690802348337e-06, "loss": 0.5329, "step": 3770 }, { "epoch": 0.2219482120838471, "grad_norm": 5.3569254875183105, "learning_rate": 7.397260273972603e-06, "loss": 0.6256, "step": 3780 }, { "epoch": 0.2225353766660795, "grad_norm": 6.933311939239502, "learning_rate": 7.41682974559687e-06, "loss": 0.7034, "step": 3790 }, { "epoch": 0.2231225412483119, "grad_norm": 3.438169479370117, "learning_rate": 7.436399217221135e-06, "loss": 0.5898, "step": 3800 }, { "epoch": 0.2237097058305443, "grad_norm": 3.6164391040802, "learning_rate": 7.455968688845402e-06, "loss": 0.6267, "step": 3810 }, { "epoch": 0.2242968704127767, "grad_norm": 5.769728183746338, "learning_rate": 7.475538160469667e-06, "loss": 0.5733, "step": 3820 }, { "epoch": 0.2248840349950091, "grad_norm": 3.778078079223633, "learning_rate": 7.4951076320939344e-06, "loss": 0.6681, "step": 3830 }, { "epoch": 0.2254711995772415, "grad_norm": 2.89884614944458, "learning_rate": 7.5146771037182e-06, "loss": 0.5696, "step": 3840 }, { "epoch": 0.2260583641594739, "grad_norm": 8.595840454101562, "learning_rate": 7.534246575342466e-06, "loss": 0.6242, "step": 3850 }, { "epoch": 0.2266455287417063, "grad_norm": 4.439752578735352, "learning_rate": 7.553816046966732e-06, "loss": 0.6843, "step": 3860 }, { "epoch": 0.2272326933239387, "grad_norm": 5.47536563873291, "learning_rate": 7.573385518590999e-06, "loss": 0.6804, "step": 3870 }, { "epoch": 0.2278198579061711, "grad_norm": 5.761258125305176, "learning_rate": 7.5929549902152645e-06, "loss": 0.6933, "step": 3880 }, { "epoch": 0.2284070224884035, "grad_norm": 1.9384691715240479, "learning_rate": 7.612524461839531e-06, "loss": 0.5809, "step": 3890 }, { "epoch": 0.2289941870706359, "grad_norm": 5.109358310699463, "learning_rate": 7.632093933463796e-06, "loss": 0.5596, "step": 3900 }, { "epoch": 0.2295813516528683, "grad_norm": 6.541728973388672, "learning_rate": 7.651663405088063e-06, "loss": 0.6196, "step": 3910 }, { "epoch": 0.2301685162351007, "grad_norm": 2.248987913131714, "learning_rate": 7.671232876712329e-06, "loss": 0.5927, "step": 3920 }, { "epoch": 0.2307556808173331, "grad_norm": 4.76672887802124, "learning_rate": 7.690802348336595e-06, "loss": 0.6872, "step": 3930 }, { "epoch": 0.2313428453995655, "grad_norm": 2.9508025646209717, "learning_rate": 7.710371819960862e-06, "loss": 0.6155, "step": 3940 }, { "epoch": 0.23193000998179789, "grad_norm": 5.360123157501221, "learning_rate": 7.729941291585128e-06, "loss": 0.6527, "step": 3950 }, { "epoch": 0.2325171745640303, "grad_norm": 3.492568016052246, "learning_rate": 7.749510763209393e-06, "loss": 0.5888, "step": 3960 }, { "epoch": 0.2331043391462627, "grad_norm": 3.673431634902954, "learning_rate": 7.76908023483366e-06, "loss": 0.7505, "step": 3970 }, { "epoch": 0.2336915037284951, "grad_norm": 5.0812859535217285, "learning_rate": 7.788649706457925e-06, "loss": 0.7614, "step": 3980 }, { "epoch": 0.2342786683107275, "grad_norm": 3.786325216293335, "learning_rate": 7.808219178082192e-06, "loss": 0.6142, "step": 3990 }, { "epoch": 0.2348658328929599, "grad_norm": 5.799644470214844, "learning_rate": 7.827788649706458e-06, "loss": 0.7176, "step": 4000 }, { "epoch": 0.2354529974751923, "grad_norm": 3.411635398864746, "learning_rate": 7.847358121330724e-06, "loss": 0.6786, "step": 4010 }, { "epoch": 0.2360401620574247, "grad_norm": 8.857686996459961, "learning_rate": 7.86692759295499e-06, "loss": 0.4854, "step": 4020 }, { "epoch": 0.2366273266396571, "grad_norm": 10.310972213745117, "learning_rate": 7.886497064579257e-06, "loss": 0.535, "step": 4030 }, { "epoch": 0.2372144912218895, "grad_norm": 4.258606433868408, "learning_rate": 7.906066536203524e-06, "loss": 0.5035, "step": 4040 }, { "epoch": 0.2378016558041219, "grad_norm": 2.2179956436157227, "learning_rate": 7.92563600782779e-06, "loss": 0.7558, "step": 4050 }, { "epoch": 0.23838882038635428, "grad_norm": 2.8786802291870117, "learning_rate": 7.945205479452055e-06, "loss": 0.6616, "step": 4060 }, { "epoch": 0.23897598496858669, "grad_norm": 9.098756790161133, "learning_rate": 7.964774951076321e-06, "loss": 0.6075, "step": 4070 }, { "epoch": 0.2395631495508191, "grad_norm": 5.289139747619629, "learning_rate": 7.984344422700587e-06, "loss": 0.5941, "step": 4080 }, { "epoch": 0.2401503141330515, "grad_norm": 3.5185530185699463, "learning_rate": 8.003913894324854e-06, "loss": 0.5078, "step": 4090 }, { "epoch": 0.2407374787152839, "grad_norm": 8.202613830566406, "learning_rate": 8.02348336594912e-06, "loss": 0.6605, "step": 4100 }, { "epoch": 0.2413246432975163, "grad_norm": 9.914385795593262, "learning_rate": 8.043052837573386e-06, "loss": 0.5497, "step": 4110 }, { "epoch": 0.2419118078797487, "grad_norm": 3.929380178451538, "learning_rate": 8.062622309197653e-06, "loss": 0.6872, "step": 4120 }, { "epoch": 0.2424989724619811, "grad_norm": 2.6679744720458984, "learning_rate": 8.082191780821919e-06, "loss": 0.5438, "step": 4130 }, { "epoch": 0.2430861370442135, "grad_norm": 2.607328176498413, "learning_rate": 8.101761252446184e-06, "loss": 0.7427, "step": 4140 }, { "epoch": 0.2436733016264459, "grad_norm": 11.047883033752441, "learning_rate": 8.121330724070452e-06, "loss": 0.6852, "step": 4150 }, { "epoch": 0.2442604662086783, "grad_norm": 3.781940460205078, "learning_rate": 8.140900195694716e-06, "loss": 0.4755, "step": 4160 }, { "epoch": 0.24484763079091068, "grad_norm": 4.323094844818115, "learning_rate": 8.160469667318983e-06, "loss": 0.5363, "step": 4170 }, { "epoch": 0.24543479537314308, "grad_norm": 5.937984466552734, "learning_rate": 8.180039138943249e-06, "loss": 0.5279, "step": 4180 }, { "epoch": 0.24602195995537549, "grad_norm": 3.1866044998168945, "learning_rate": 8.199608610567515e-06, "loss": 0.6, "step": 4190 }, { "epoch": 0.2466091245376079, "grad_norm": 7.171806812286377, "learning_rate": 8.219178082191782e-06, "loss": 0.7267, "step": 4200 }, { "epoch": 0.2471962891198403, "grad_norm": 2.8264198303222656, "learning_rate": 8.238747553816048e-06, "loss": 0.5695, "step": 4210 }, { "epoch": 0.2477834537020727, "grad_norm": 6.773881912231445, "learning_rate": 8.258317025440313e-06, "loss": 0.4918, "step": 4220 }, { "epoch": 0.2483706182843051, "grad_norm": 4.005953311920166, "learning_rate": 8.27788649706458e-06, "loss": 0.827, "step": 4230 }, { "epoch": 0.2489577828665375, "grad_norm": 9.306106567382812, "learning_rate": 8.297455968688845e-06, "loss": 0.624, "step": 4240 }, { "epoch": 0.2495449474487699, "grad_norm": 2.261610507965088, "learning_rate": 8.317025440313112e-06, "loss": 0.4982, "step": 4250 }, { "epoch": 0.2501321120310023, "grad_norm": 10.220343589782715, "learning_rate": 8.336594911937378e-06, "loss": 0.5955, "step": 4260 }, { "epoch": 0.2507192766132347, "grad_norm": 3.321784019470215, "learning_rate": 8.356164383561644e-06, "loss": 0.4499, "step": 4270 }, { "epoch": 0.2513064411954671, "grad_norm": 1.9773684740066528, "learning_rate": 8.37573385518591e-06, "loss": 0.6396, "step": 4280 }, { "epoch": 0.2518936057776995, "grad_norm": 3.483315944671631, "learning_rate": 8.395303326810177e-06, "loss": 0.5079, "step": 4290 }, { "epoch": 0.2524807703599319, "grad_norm": 3.6201164722442627, "learning_rate": 8.414872798434442e-06, "loss": 0.6604, "step": 4300 }, { "epoch": 0.2530679349421643, "grad_norm": 6.333024978637695, "learning_rate": 8.43444227005871e-06, "loss": 0.7742, "step": 4310 }, { "epoch": 0.2536550995243967, "grad_norm": 3.683459997177124, "learning_rate": 8.454011741682975e-06, "loss": 0.5996, "step": 4320 }, { "epoch": 0.2542422641066291, "grad_norm": 6.38966178894043, "learning_rate": 8.473581213307241e-06, "loss": 0.7672, "step": 4330 }, { "epoch": 0.2548294286888615, "grad_norm": 9.817891120910645, "learning_rate": 8.493150684931507e-06, "loss": 0.6762, "step": 4340 }, { "epoch": 0.2554165932710939, "grad_norm": 7.825474739074707, "learning_rate": 8.512720156555774e-06, "loss": 0.6992, "step": 4350 }, { "epoch": 0.2560037578533263, "grad_norm": 3.5906262397766113, "learning_rate": 8.53228962818004e-06, "loss": 0.5113, "step": 4360 }, { "epoch": 0.2565909224355587, "grad_norm": 4.373824119567871, "learning_rate": 8.551859099804306e-06, "loss": 0.5391, "step": 4370 }, { "epoch": 0.2571780870177911, "grad_norm": 3.9752235412597656, "learning_rate": 8.571428571428571e-06, "loss": 0.5696, "step": 4380 }, { "epoch": 0.2577652516000235, "grad_norm": 3.404933214187622, "learning_rate": 8.590998043052839e-06, "loss": 0.477, "step": 4390 }, { "epoch": 0.2583524161822559, "grad_norm": 2.0752346515655518, "learning_rate": 8.610567514677104e-06, "loss": 0.6047, "step": 4400 }, { "epoch": 0.2589395807644883, "grad_norm": 2.2224361896514893, "learning_rate": 8.63013698630137e-06, "loss": 0.6288, "step": 4410 }, { "epoch": 0.2595267453467207, "grad_norm": 5.2124528884887695, "learning_rate": 8.649706457925636e-06, "loss": 0.6445, "step": 4420 }, { "epoch": 0.2601139099289531, "grad_norm": 6.626575946807861, "learning_rate": 8.669275929549903e-06, "loss": 0.5822, "step": 4430 }, { "epoch": 0.26070107451118546, "grad_norm": 4.58890438079834, "learning_rate": 8.688845401174169e-06, "loss": 0.5732, "step": 4440 }, { "epoch": 0.26128823909341786, "grad_norm": 3.2342369556427, "learning_rate": 8.708414872798435e-06, "loss": 0.4876, "step": 4450 }, { "epoch": 0.26187540367565026, "grad_norm": 7.817227363586426, "learning_rate": 8.7279843444227e-06, "loss": 0.7613, "step": 4460 }, { "epoch": 0.26246256825788267, "grad_norm": 7.8602166175842285, "learning_rate": 8.747553816046968e-06, "loss": 0.6199, "step": 4470 }, { "epoch": 0.26304973284011507, "grad_norm": 5.392611026763916, "learning_rate": 8.767123287671233e-06, "loss": 0.8085, "step": 4480 }, { "epoch": 0.26363689742234747, "grad_norm": 4.866722106933594, "learning_rate": 8.786692759295499e-06, "loss": 0.6355, "step": 4490 }, { "epoch": 0.2642240620045799, "grad_norm": 3.823310136795044, "learning_rate": 8.806262230919765e-06, "loss": 0.7046, "step": 4500 }, { "epoch": 0.2648112265868123, "grad_norm": 3.458864450454712, "learning_rate": 8.825831702544032e-06, "loss": 0.5737, "step": 4510 }, { "epoch": 0.2653983911690447, "grad_norm": 11.440035820007324, "learning_rate": 8.845401174168298e-06, "loss": 0.7588, "step": 4520 }, { "epoch": 0.2659855557512771, "grad_norm": 3.240044593811035, "learning_rate": 8.864970645792564e-06, "loss": 0.5954, "step": 4530 }, { "epoch": 0.2665727203335095, "grad_norm": 6.180783748626709, "learning_rate": 8.88454011741683e-06, "loss": 0.7185, "step": 4540 }, { "epoch": 0.2671598849157419, "grad_norm": 3.281062602996826, "learning_rate": 8.904109589041097e-06, "loss": 0.5032, "step": 4550 }, { "epoch": 0.2677470494979743, "grad_norm": 2.261284351348877, "learning_rate": 8.923679060665362e-06, "loss": 0.6768, "step": 4560 }, { "epoch": 0.2683342140802067, "grad_norm": 3.0362026691436768, "learning_rate": 8.943248532289628e-06, "loss": 0.6294, "step": 4570 }, { "epoch": 0.2689213786624391, "grad_norm": 5.488641262054443, "learning_rate": 8.962818003913895e-06, "loss": 0.5564, "step": 4580 }, { "epoch": 0.2695085432446715, "grad_norm": 7.4079813957214355, "learning_rate": 8.982387475538161e-06, "loss": 0.6177, "step": 4590 }, { "epoch": 0.2700957078269039, "grad_norm": 6.294201374053955, "learning_rate": 9.001956947162427e-06, "loss": 0.6017, "step": 4600 }, { "epoch": 0.2706828724091363, "grad_norm": 7.717345714569092, "learning_rate": 9.021526418786694e-06, "loss": 0.6863, "step": 4610 }, { "epoch": 0.2712700369913687, "grad_norm": 7.530275344848633, "learning_rate": 9.04109589041096e-06, "loss": 0.5527, "step": 4620 }, { "epoch": 0.2718572015736011, "grad_norm": 6.99866247177124, "learning_rate": 9.060665362035226e-06, "loss": 0.6839, "step": 4630 }, { "epoch": 0.2724443661558335, "grad_norm": 4.096059799194336, "learning_rate": 9.080234833659491e-06, "loss": 0.5276, "step": 4640 }, { "epoch": 0.27303153073806585, "grad_norm": 4.540721416473389, "learning_rate": 9.099804305283759e-06, "loss": 0.6968, "step": 4650 }, { "epoch": 0.27361869532029826, "grad_norm": 3.65529203414917, "learning_rate": 9.119373776908024e-06, "loss": 0.722, "step": 4660 }, { "epoch": 0.27420585990253066, "grad_norm": 5.810719966888428, "learning_rate": 9.13894324853229e-06, "loss": 0.5133, "step": 4670 }, { "epoch": 0.27479302448476306, "grad_norm": 6.909417629241943, "learning_rate": 9.158512720156556e-06, "loss": 0.6124, "step": 4680 }, { "epoch": 0.27538018906699546, "grad_norm": 3.062727212905884, "learning_rate": 9.178082191780823e-06, "loss": 0.4726, "step": 4690 }, { "epoch": 0.27596735364922786, "grad_norm": 2.067209482192993, "learning_rate": 9.197651663405089e-06, "loss": 0.712, "step": 4700 }, { "epoch": 0.27655451823146027, "grad_norm": 4.781550884246826, "learning_rate": 9.217221135029355e-06, "loss": 0.716, "step": 4710 }, { "epoch": 0.27714168281369267, "grad_norm": 10.713242530822754, "learning_rate": 9.23679060665362e-06, "loss": 0.6335, "step": 4720 }, { "epoch": 0.27772884739592507, "grad_norm": 3.2099177837371826, "learning_rate": 9.256360078277888e-06, "loss": 0.7545, "step": 4730 }, { "epoch": 0.2783160119781575, "grad_norm": 4.745733737945557, "learning_rate": 9.275929549902153e-06, "loss": 0.5426, "step": 4740 }, { "epoch": 0.2789031765603899, "grad_norm": 7.140280723571777, "learning_rate": 9.295499021526419e-06, "loss": 0.562, "step": 4750 }, { "epoch": 0.2794903411426223, "grad_norm": 6.0002593994140625, "learning_rate": 9.315068493150685e-06, "loss": 0.6453, "step": 4760 }, { "epoch": 0.2800775057248547, "grad_norm": 9.164667129516602, "learning_rate": 9.334637964774952e-06, "loss": 0.6446, "step": 4770 }, { "epoch": 0.2806646703070871, "grad_norm": 2.9685006141662598, "learning_rate": 9.354207436399218e-06, "loss": 0.5556, "step": 4780 }, { "epoch": 0.2812518348893195, "grad_norm": 5.483701705932617, "learning_rate": 9.373776908023484e-06, "loss": 0.6482, "step": 4790 }, { "epoch": 0.2818389994715519, "grad_norm": 3.977638006210327, "learning_rate": 9.393346379647749e-06, "loss": 0.6073, "step": 4800 }, { "epoch": 0.2824261640537843, "grad_norm": 5.797086715698242, "learning_rate": 9.412915851272017e-06, "loss": 0.6284, "step": 4810 }, { "epoch": 0.2830133286360167, "grad_norm": 6.94316291809082, "learning_rate": 9.432485322896282e-06, "loss": 0.495, "step": 4820 }, { "epoch": 0.2836004932182491, "grad_norm": 10.304445266723633, "learning_rate": 9.452054794520548e-06, "loss": 0.6804, "step": 4830 }, { "epoch": 0.2841876578004815, "grad_norm": 7.155735015869141, "learning_rate": 9.471624266144814e-06, "loss": 0.5639, "step": 4840 }, { "epoch": 0.2847748223827139, "grad_norm": 3.819913864135742, "learning_rate": 9.49119373776908e-06, "loss": 0.5725, "step": 4850 }, { "epoch": 0.2853619869649463, "grad_norm": 7.471410751342773, "learning_rate": 9.510763209393347e-06, "loss": 0.7276, "step": 4860 }, { "epoch": 0.28594915154717865, "grad_norm": 4.625710487365723, "learning_rate": 9.530332681017614e-06, "loss": 0.5935, "step": 4870 }, { "epoch": 0.28653631612941105, "grad_norm": 4.315067768096924, "learning_rate": 9.549902152641878e-06, "loss": 0.6949, "step": 4880 }, { "epoch": 0.28712348071164345, "grad_norm": 4.235372543334961, "learning_rate": 9.569471624266146e-06, "loss": 0.5853, "step": 4890 }, { "epoch": 0.28771064529387586, "grad_norm": 1.4787311553955078, "learning_rate": 9.589041095890411e-06, "loss": 0.6153, "step": 4900 }, { "epoch": 0.28829780987610826, "grad_norm": 3.2906205654144287, "learning_rate": 9.608610567514677e-06, "loss": 0.5445, "step": 4910 }, { "epoch": 0.28888497445834066, "grad_norm": 6.81498908996582, "learning_rate": 9.628180039138944e-06, "loss": 0.6209, "step": 4920 }, { "epoch": 0.28947213904057306, "grad_norm": 4.71338415145874, "learning_rate": 9.64774951076321e-06, "loss": 0.5911, "step": 4930 }, { "epoch": 0.29005930362280546, "grad_norm": 11.264694213867188, "learning_rate": 9.667318982387476e-06, "loss": 0.7129, "step": 4940 }, { "epoch": 0.29064646820503787, "grad_norm": 7.38629150390625, "learning_rate": 9.686888454011743e-06, "loss": 0.7781, "step": 4950 }, { "epoch": 0.29123363278727027, "grad_norm": 4.040987968444824, "learning_rate": 9.706457925636007e-06, "loss": 0.6209, "step": 4960 }, { "epoch": 0.29182079736950267, "grad_norm": 2.265995502471924, "learning_rate": 9.726027397260275e-06, "loss": 0.618, "step": 4970 }, { "epoch": 0.2924079619517351, "grad_norm": 6.1773905754089355, "learning_rate": 9.74559686888454e-06, "loss": 0.5967, "step": 4980 }, { "epoch": 0.2929951265339675, "grad_norm": 8.991494178771973, "learning_rate": 9.765166340508806e-06, "loss": 0.5711, "step": 4990 }, { "epoch": 0.2935822911161999, "grad_norm": 11.869132041931152, "learning_rate": 9.784735812133073e-06, "loss": 0.6316, "step": 5000 }, { "epoch": 0.2941694556984323, "grad_norm": 6.029277324676514, "learning_rate": 9.804305283757339e-06, "loss": 0.4883, "step": 5010 }, { "epoch": 0.2947566202806647, "grad_norm": 5.543087959289551, "learning_rate": 9.823874755381605e-06, "loss": 0.4928, "step": 5020 }, { "epoch": 0.2953437848628971, "grad_norm": 8.76091194152832, "learning_rate": 9.843444227005872e-06, "loss": 0.5986, "step": 5030 }, { "epoch": 0.2959309494451295, "grad_norm": 2.6387522220611572, "learning_rate": 9.863013698630138e-06, "loss": 0.6395, "step": 5040 }, { "epoch": 0.2965181140273619, "grad_norm": 3.0531487464904785, "learning_rate": 9.882583170254404e-06, "loss": 0.6075, "step": 5050 }, { "epoch": 0.2971052786095943, "grad_norm": 2.991830587387085, "learning_rate": 9.902152641878669e-06, "loss": 0.6181, "step": 5060 }, { "epoch": 0.2976924431918267, "grad_norm": 9.913690567016602, "learning_rate": 9.921722113502935e-06, "loss": 0.7006, "step": 5070 }, { "epoch": 0.29827960777405904, "grad_norm": 2.7302327156066895, "learning_rate": 9.941291585127202e-06, "loss": 0.6767, "step": 5080 }, { "epoch": 0.29886677235629144, "grad_norm": 7.019122123718262, "learning_rate": 9.960861056751468e-06, "loss": 0.5103, "step": 5090 }, { "epoch": 0.29945393693852385, "grad_norm": 4.218756675720215, "learning_rate": 9.980430528375734e-06, "loss": 0.5078, "step": 5100 }, { "epoch": 0.30004110152075625, "grad_norm": 3.235452651977539, "learning_rate": 1e-05, "loss": 0.644, "step": 5110 }, { "epoch": 0.30062826610298865, "grad_norm": 3.9394636154174805, "learning_rate": 9.999998833069118e-06, "loss": 0.5725, "step": 5120 }, { "epoch": 0.30121543068522105, "grad_norm": 6.7559990882873535, "learning_rate": 9.999995332277016e-06, "loss": 0.602, "step": 5130 }, { "epoch": 0.30180259526745346, "grad_norm": 1.6295207738876343, "learning_rate": 9.999989497625328e-06, "loss": 0.5887, "step": 5140 }, { "epoch": 0.30238975984968586, "grad_norm": 5.160134792327881, "learning_rate": 9.999981329116778e-06, "loss": 0.5531, "step": 5150 }, { "epoch": 0.30297692443191826, "grad_norm": 10.499273300170898, "learning_rate": 9.999970826755177e-06, "loss": 0.7256, "step": 5160 }, { "epoch": 0.30356408901415066, "grad_norm": 2.995018482208252, "learning_rate": 9.999957990545431e-06, "loss": 0.6297, "step": 5170 }, { "epoch": 0.30415125359638306, "grad_norm": 3.9389097690582275, "learning_rate": 9.999942820493527e-06, "loss": 0.5551, "step": 5180 }, { "epoch": 0.30473841817861547, "grad_norm": 6.116221904754639, "learning_rate": 9.999925316606549e-06, "loss": 0.8174, "step": 5190 }, { "epoch": 0.30532558276084787, "grad_norm": 7.173950672149658, "learning_rate": 9.999905478892666e-06, "loss": 0.6486, "step": 5200 }, { "epoch": 0.30591274734308027, "grad_norm": 7.122118949890137, "learning_rate": 9.99988330736114e-06, "loss": 0.5873, "step": 5210 }, { "epoch": 0.3064999119253127, "grad_norm": 5.60111665725708, "learning_rate": 9.999858802022317e-06, "loss": 0.5434, "step": 5220 }, { "epoch": 0.3070870765075451, "grad_norm": 15.755813598632812, "learning_rate": 9.999831962887637e-06, "loss": 0.725, "step": 5230 }, { "epoch": 0.3076742410897775, "grad_norm": 5.500053882598877, "learning_rate": 9.999802789969628e-06, "loss": 0.6784, "step": 5240 }, { "epoch": 0.3082614056720099, "grad_norm": 5.215601921081543, "learning_rate": 9.999771283281905e-06, "loss": 0.5685, "step": 5250 }, { "epoch": 0.3088485702542423, "grad_norm": 7.9521307945251465, "learning_rate": 9.999737442839177e-06, "loss": 0.6497, "step": 5260 }, { "epoch": 0.3094357348364747, "grad_norm": 7.308295249938965, "learning_rate": 9.99970126865724e-06, "loss": 0.5844, "step": 5270 }, { "epoch": 0.3100228994187071, "grad_norm": 6.186158180236816, "learning_rate": 9.999662760752975e-06, "loss": 0.5825, "step": 5280 }, { "epoch": 0.3106100640009395, "grad_norm": 15.919605255126953, "learning_rate": 9.999621919144363e-06, "loss": 0.6067, "step": 5290 }, { "epoch": 0.31119722858317184, "grad_norm": 3.07765531539917, "learning_rate": 9.999578743850461e-06, "loss": 0.6158, "step": 5300 }, { "epoch": 0.31178439316540424, "grad_norm": 4.1896586418151855, "learning_rate": 9.999533234891427e-06, "loss": 0.4831, "step": 5310 }, { "epoch": 0.31237155774763664, "grad_norm": 3.7150893211364746, "learning_rate": 9.999485392288499e-06, "loss": 0.5772, "step": 5320 }, { "epoch": 0.31295872232986904, "grad_norm": 3.0818932056427, "learning_rate": 9.999435216064014e-06, "loss": 0.5485, "step": 5330 }, { "epoch": 0.31354588691210145, "grad_norm": 8.142844200134277, "learning_rate": 9.99938270624139e-06, "loss": 0.5805, "step": 5340 }, { "epoch": 0.31413305149433385, "grad_norm": 3.7028982639312744, "learning_rate": 9.999327862845135e-06, "loss": 0.7195, "step": 5350 }, { "epoch": 0.31472021607656625, "grad_norm": 2.4604992866516113, "learning_rate": 9.99927068590085e-06, "loss": 0.7769, "step": 5360 }, { "epoch": 0.31530738065879865, "grad_norm": 3.5085506439208984, "learning_rate": 9.999211175435225e-06, "loss": 0.5893, "step": 5370 }, { "epoch": 0.31589454524103106, "grad_norm": 5.2295002937316895, "learning_rate": 9.999149331476037e-06, "loss": 0.7521, "step": 5380 }, { "epoch": 0.31648170982326346, "grad_norm": 2.2353434562683105, "learning_rate": 9.999085154052153e-06, "loss": 0.6748, "step": 5390 }, { "epoch": 0.31706887440549586, "grad_norm": 12.728256225585938, "learning_rate": 9.99901864319353e-06, "loss": 0.7465, "step": 5400 }, { "epoch": 0.31765603898772826, "grad_norm": 7.154354095458984, "learning_rate": 9.998949798931212e-06, "loss": 0.5486, "step": 5410 }, { "epoch": 0.31824320356996066, "grad_norm": 8.987802505493164, "learning_rate": 9.998878621297333e-06, "loss": 0.5242, "step": 5420 }, { "epoch": 0.31883036815219307, "grad_norm": 9.962322235107422, "learning_rate": 9.99880511032512e-06, "loss": 0.5995, "step": 5430 }, { "epoch": 0.31941753273442547, "grad_norm": 11.044449806213379, "learning_rate": 9.998729266048884e-06, "loss": 0.6344, "step": 5440 }, { "epoch": 0.32000469731665787, "grad_norm": 3.798555850982666, "learning_rate": 9.998651088504024e-06, "loss": 0.6202, "step": 5450 }, { "epoch": 0.3205918618988903, "grad_norm": 9.127548217773438, "learning_rate": 9.998570577727035e-06, "loss": 0.6546, "step": 5460 }, { "epoch": 0.3211790264811227, "grad_norm": 4.475358486175537, "learning_rate": 9.998487733755498e-06, "loss": 0.5211, "step": 5470 }, { "epoch": 0.3217661910633551, "grad_norm": 2.422621250152588, "learning_rate": 9.99840255662808e-06, "loss": 0.5683, "step": 5480 }, { "epoch": 0.3223533556455875, "grad_norm": 3.907390594482422, "learning_rate": 9.998315046384539e-06, "loss": 0.4688, "step": 5490 }, { "epoch": 0.3229405202278199, "grad_norm": 3.874915361404419, "learning_rate": 9.998225203065724e-06, "loss": 0.5964, "step": 5500 }, { "epoch": 0.3235276848100523, "grad_norm": 8.570391654968262, "learning_rate": 9.99813302671357e-06, "loss": 0.6633, "step": 5510 }, { "epoch": 0.32411484939228463, "grad_norm": 4.9544878005981445, "learning_rate": 9.998038517371103e-06, "loss": 0.5933, "step": 5520 }, { "epoch": 0.32470201397451703, "grad_norm": 4.367298603057861, "learning_rate": 9.997941675082437e-06, "loss": 0.5374, "step": 5530 }, { "epoch": 0.32528917855674944, "grad_norm": 8.735247611999512, "learning_rate": 9.997842499892775e-06, "loss": 0.5334, "step": 5540 }, { "epoch": 0.32587634313898184, "grad_norm": 7.210317134857178, "learning_rate": 9.99774099184841e-06, "loss": 0.5148, "step": 5550 }, { "epoch": 0.32646350772121424, "grad_norm": 5.529973983764648, "learning_rate": 9.997637150996723e-06, "loss": 0.6212, "step": 5560 }, { "epoch": 0.32705067230344664, "grad_norm": 3.8487045764923096, "learning_rate": 9.997530977386186e-06, "loss": 0.5584, "step": 5570 }, { "epoch": 0.32763783688567905, "grad_norm": 2.74155855178833, "learning_rate": 9.997422471066354e-06, "loss": 0.5775, "step": 5580 }, { "epoch": 0.32822500146791145, "grad_norm": 3.39546537399292, "learning_rate": 9.997311632087876e-06, "loss": 0.5991, "step": 5590 }, { "epoch": 0.32881216605014385, "grad_norm": 3.2050046920776367, "learning_rate": 9.99719846050249e-06, "loss": 0.4692, "step": 5600 }, { "epoch": 0.32939933063237625, "grad_norm": 3.6047050952911377, "learning_rate": 9.99708295636302e-06, "loss": 0.5659, "step": 5610 }, { "epoch": 0.32998649521460865, "grad_norm": 10.6340913772583, "learning_rate": 9.996965119723383e-06, "loss": 0.5699, "step": 5620 }, { "epoch": 0.33057365979684106, "grad_norm": 4.015137672424316, "learning_rate": 9.996844950638578e-06, "loss": 0.503, "step": 5630 }, { "epoch": 0.33116082437907346, "grad_norm": 4.619016170501709, "learning_rate": 9.996722449164698e-06, "loss": 0.5423, "step": 5640 }, { "epoch": 0.33174798896130586, "grad_norm": 5.152411460876465, "learning_rate": 9.996597615358923e-06, "loss": 0.4946, "step": 5650 }, { "epoch": 0.33233515354353826, "grad_norm": 8.746570587158203, "learning_rate": 9.996470449279522e-06, "loss": 0.5981, "step": 5660 }, { "epoch": 0.33292231812577067, "grad_norm": 11.537870407104492, "learning_rate": 9.996340950985856e-06, "loss": 0.6062, "step": 5670 }, { "epoch": 0.33350948270800307, "grad_norm": 6.788983345031738, "learning_rate": 9.996209120538368e-06, "loss": 0.5152, "step": 5680 }, { "epoch": 0.33409664729023547, "grad_norm": 3.3549017906188965, "learning_rate": 9.99607495799859e-06, "loss": 0.6901, "step": 5690 }, { "epoch": 0.3346838118724679, "grad_norm": 5.380126476287842, "learning_rate": 9.99593846342915e-06, "loss": 0.5397, "step": 5700 }, { "epoch": 0.3352709764547003, "grad_norm": 4.096959590911865, "learning_rate": 9.995799636893759e-06, "loss": 0.615, "step": 5710 }, { "epoch": 0.3358581410369327, "grad_norm": 3.5941827297210693, "learning_rate": 9.995658478457216e-06, "loss": 0.5207, "step": 5720 }, { "epoch": 0.336445305619165, "grad_norm": 1.6300021409988403, "learning_rate": 9.99551498818541e-06, "loss": 0.5537, "step": 5730 }, { "epoch": 0.3370324702013974, "grad_norm": 3.3654353618621826, "learning_rate": 9.99536916614532e-06, "loss": 0.5534, "step": 5740 }, { "epoch": 0.33761963478362983, "grad_norm": 8.123507499694824, "learning_rate": 9.995221012405012e-06, "loss": 0.7485, "step": 5750 }, { "epoch": 0.33820679936586223, "grad_norm": 3.114607095718384, "learning_rate": 9.995070527033637e-06, "loss": 0.6128, "step": 5760 }, { "epoch": 0.33879396394809463, "grad_norm": 10.342657089233398, "learning_rate": 9.99491771010144e-06, "loss": 0.5821, "step": 5770 }, { "epoch": 0.33938112853032704, "grad_norm": 7.309835433959961, "learning_rate": 9.994762561679749e-06, "loss": 0.6707, "step": 5780 }, { "epoch": 0.33996829311255944, "grad_norm": 5.747932434082031, "learning_rate": 9.994605081840985e-06, "loss": 0.5107, "step": 5790 }, { "epoch": 0.34055545769479184, "grad_norm": 2.5122182369232178, "learning_rate": 9.994445270658657e-06, "loss": 0.7826, "step": 5800 }, { "epoch": 0.34114262227702424, "grad_norm": 1.2095619440078735, "learning_rate": 9.994283128207358e-06, "loss": 0.6615, "step": 5810 }, { "epoch": 0.34172978685925665, "grad_norm": 4.407735824584961, "learning_rate": 9.99411865456277e-06, "loss": 0.6701, "step": 5820 }, { "epoch": 0.34231695144148905, "grad_norm": 1.7185497283935547, "learning_rate": 9.993951849801668e-06, "loss": 0.6776, "step": 5830 }, { "epoch": 0.34290411602372145, "grad_norm": 1.793148398399353, "learning_rate": 9.993782714001911e-06, "loss": 0.5237, "step": 5840 }, { "epoch": 0.34349128060595385, "grad_norm": 6.519779682159424, "learning_rate": 9.993611247242448e-06, "loss": 0.6474, "step": 5850 }, { "epoch": 0.34407844518818625, "grad_norm": 6.271698951721191, "learning_rate": 9.99343744960331e-06, "loss": 0.562, "step": 5860 }, { "epoch": 0.34466560977041866, "grad_norm": 2.9669554233551025, "learning_rate": 9.993261321165628e-06, "loss": 0.6193, "step": 5870 }, { "epoch": 0.34525277435265106, "grad_norm": 2.9163928031921387, "learning_rate": 9.993082862011609e-06, "loss": 0.5238, "step": 5880 }, { "epoch": 0.34583993893488346, "grad_norm": 4.638806343078613, "learning_rate": 9.992902072224556e-06, "loss": 0.6525, "step": 5890 }, { "epoch": 0.34642710351711586, "grad_norm": 2.345399856567383, "learning_rate": 9.99271895188885e-06, "loss": 0.7448, "step": 5900 }, { "epoch": 0.34701426809934827, "grad_norm": 6.022037029266357, "learning_rate": 9.992533501089977e-06, "loss": 0.5487, "step": 5910 }, { "epoch": 0.34760143268158067, "grad_norm": 2.764012336730957, "learning_rate": 9.992345719914492e-06, "loss": 0.6242, "step": 5920 }, { "epoch": 0.34818859726381307, "grad_norm": 3.020282030105591, "learning_rate": 9.992155608450047e-06, "loss": 0.5179, "step": 5930 }, { "epoch": 0.3487757618460455, "grad_norm": 2.4977781772613525, "learning_rate": 9.991963166785386e-06, "loss": 0.5908, "step": 5940 }, { "epoch": 0.3493629264282778, "grad_norm": 21.130285263061523, "learning_rate": 9.99176839501033e-06, "loss": 0.6316, "step": 5950 }, { "epoch": 0.3499500910105102, "grad_norm": 3.112691879272461, "learning_rate": 9.991571293215794e-06, "loss": 0.4599, "step": 5960 }, { "epoch": 0.3505372555927426, "grad_norm": 7.881024360656738, "learning_rate": 9.991371861493781e-06, "loss": 0.5163, "step": 5970 }, { "epoch": 0.351124420174975, "grad_norm": 7.45168924331665, "learning_rate": 9.991170099937382e-06, "loss": 0.6894, "step": 5980 }, { "epoch": 0.35171158475720743, "grad_norm": 10.640283584594727, "learning_rate": 9.99096600864077e-06, "loss": 0.6773, "step": 5990 }, { "epoch": 0.35229874933943983, "grad_norm": 2.5065724849700928, "learning_rate": 9.990759587699211e-06, "loss": 0.5653, "step": 6000 }, { "epoch": 0.35288591392167223, "grad_norm": 6.301635265350342, "learning_rate": 9.990550837209056e-06, "loss": 0.5072, "step": 6010 }, { "epoch": 0.35347307850390464, "grad_norm": 10.104084014892578, "learning_rate": 9.990339757267746e-06, "loss": 0.5607, "step": 6020 }, { "epoch": 0.35406024308613704, "grad_norm": 8.08241081237793, "learning_rate": 9.990126347973805e-06, "loss": 0.6223, "step": 6030 }, { "epoch": 0.35464740766836944, "grad_norm": 7.131372451782227, "learning_rate": 9.989910609426848e-06, "loss": 0.5672, "step": 6040 }, { "epoch": 0.35523457225060184, "grad_norm": 4.604576110839844, "learning_rate": 9.989692541727572e-06, "loss": 0.497, "step": 6050 }, { "epoch": 0.35582173683283425, "grad_norm": 4.210675239562988, "learning_rate": 9.98947214497777e-06, "loss": 0.6769, "step": 6060 }, { "epoch": 0.35640890141506665, "grad_norm": 7.392538547515869, "learning_rate": 9.989249419280314e-06, "loss": 0.5855, "step": 6070 }, { "epoch": 0.35699606599729905, "grad_norm": 4.50574254989624, "learning_rate": 9.989024364739168e-06, "loss": 0.6466, "step": 6080 }, { "epoch": 0.35758323057953145, "grad_norm": 7.6474080085754395, "learning_rate": 9.98879698145938e-06, "loss": 0.588, "step": 6090 }, { "epoch": 0.35817039516176385, "grad_norm": 3.4185569286346436, "learning_rate": 9.988567269547088e-06, "loss": 0.6722, "step": 6100 }, { "epoch": 0.35875755974399626, "grad_norm": 9.799389839172363, "learning_rate": 9.988335229109513e-06, "loss": 0.7102, "step": 6110 }, { "epoch": 0.35934472432622866, "grad_norm": 3.126814603805542, "learning_rate": 9.988100860254966e-06, "loss": 0.7009, "step": 6120 }, { "epoch": 0.35993188890846106, "grad_norm": 6.588407516479492, "learning_rate": 9.987864163092844e-06, "loss": 0.7347, "step": 6130 }, { "epoch": 0.36051905349069346, "grad_norm": 1.7972177267074585, "learning_rate": 9.987625137733631e-06, "loss": 0.5933, "step": 6140 }, { "epoch": 0.36110621807292587, "grad_norm": 4.455419540405273, "learning_rate": 9.987383784288896e-06, "loss": 0.5457, "step": 6150 }, { "epoch": 0.3616933826551582, "grad_norm": 5.6661272048950195, "learning_rate": 9.987140102871298e-06, "loss": 0.5926, "step": 6160 }, { "epoch": 0.3622805472373906, "grad_norm": 6.470241546630859, "learning_rate": 9.986894093594579e-06, "loss": 0.6646, "step": 6170 }, { "epoch": 0.362867711819623, "grad_norm": 4.451828956604004, "learning_rate": 9.986645756573569e-06, "loss": 0.609, "step": 6180 }, { "epoch": 0.3634548764018554, "grad_norm": 5.165185451507568, "learning_rate": 9.986395091924187e-06, "loss": 0.6924, "step": 6190 }, { "epoch": 0.3640420409840878, "grad_norm": 4.151994705200195, "learning_rate": 9.986142099763438e-06, "loss": 0.616, "step": 6200 }, { "epoch": 0.3646292055663202, "grad_norm": 2.930227756500244, "learning_rate": 9.985886780209405e-06, "loss": 0.5972, "step": 6210 }, { "epoch": 0.3652163701485526, "grad_norm": 2.7684619426727295, "learning_rate": 9.985629133381271e-06, "loss": 0.5505, "step": 6220 }, { "epoch": 0.36580353473078503, "grad_norm": 2.651028633117676, "learning_rate": 9.985369159399296e-06, "loss": 0.6381, "step": 6230 }, { "epoch": 0.36639069931301743, "grad_norm": 3.5409915447235107, "learning_rate": 9.985106858384827e-06, "loss": 0.6178, "step": 6240 }, { "epoch": 0.36697786389524983, "grad_norm": 8.514284133911133, "learning_rate": 9.9848422304603e-06, "loss": 0.5587, "step": 6250 }, { "epoch": 0.36756502847748224, "grad_norm": 2.6811904907226562, "learning_rate": 9.984575275749236e-06, "loss": 0.5352, "step": 6260 }, { "epoch": 0.36815219305971464, "grad_norm": 5.069333553314209, "learning_rate": 9.984305994376242e-06, "loss": 0.4225, "step": 6270 }, { "epoch": 0.36873935764194704, "grad_norm": 1.8971021175384521, "learning_rate": 9.984034386467011e-06, "loss": 0.7069, "step": 6280 }, { "epoch": 0.36932652222417944, "grad_norm": 4.372706890106201, "learning_rate": 9.983760452148325e-06, "loss": 0.5702, "step": 6290 }, { "epoch": 0.36991368680641185, "grad_norm": 12.852090835571289, "learning_rate": 9.983484191548046e-06, "loss": 0.643, "step": 6300 }, { "epoch": 0.37050085138864425, "grad_norm": 3.6704492568969727, "learning_rate": 9.983205604795123e-06, "loss": 0.5986, "step": 6310 }, { "epoch": 0.37108801597087665, "grad_norm": 2.2862977981567383, "learning_rate": 9.982924692019595e-06, "loss": 0.5595, "step": 6320 }, { "epoch": 0.37167518055310905, "grad_norm": 7.502762317657471, "learning_rate": 9.982641453352585e-06, "loss": 0.69, "step": 6330 }, { "epoch": 0.37226234513534145, "grad_norm": 11.606168746948242, "learning_rate": 9.982355888926299e-06, "loss": 0.6615, "step": 6340 }, { "epoch": 0.37284950971757386, "grad_norm": 5.932896614074707, "learning_rate": 9.982067998874032e-06, "loss": 0.6482, "step": 6350 }, { "epoch": 0.37343667429980626, "grad_norm": 7.688926696777344, "learning_rate": 9.981777783330164e-06, "loss": 0.7323, "step": 6360 }, { "epoch": 0.37402383888203866, "grad_norm": 6.55004358291626, "learning_rate": 9.981485242430159e-06, "loss": 0.5091, "step": 6370 }, { "epoch": 0.374611003464271, "grad_norm": 10.067377090454102, "learning_rate": 9.981190376310563e-06, "loss": 0.7209, "step": 6380 }, { "epoch": 0.3751981680465034, "grad_norm": 8.298162460327148, "learning_rate": 9.980893185109015e-06, "loss": 0.6159, "step": 6390 }, { "epoch": 0.3757853326287358, "grad_norm": 4.71786642074585, "learning_rate": 9.980593668964238e-06, "loss": 0.6048, "step": 6400 }, { "epoch": 0.3763724972109682, "grad_norm": 5.288054466247559, "learning_rate": 9.980291828016035e-06, "loss": 0.5979, "step": 6410 }, { "epoch": 0.3769596617932006, "grad_norm": 5.548589706420898, "learning_rate": 9.979987662405295e-06, "loss": 0.4613, "step": 6420 }, { "epoch": 0.377546826375433, "grad_norm": 9.280171394348145, "learning_rate": 9.979681172273998e-06, "loss": 0.7055, "step": 6430 }, { "epoch": 0.3781339909576654, "grad_norm": 6.500439643859863, "learning_rate": 9.979372357765202e-06, "loss": 0.6504, "step": 6440 }, { "epoch": 0.3787211555398978, "grad_norm": 6.261051654815674, "learning_rate": 9.979061219023056e-06, "loss": 0.5981, "step": 6450 }, { "epoch": 0.3793083201221302, "grad_norm": 2.609708309173584, "learning_rate": 9.978747756192789e-06, "loss": 0.5079, "step": 6460 }, { "epoch": 0.37989548470436263, "grad_norm": 10.266899108886719, "learning_rate": 9.978431969420718e-06, "loss": 0.5949, "step": 6470 }, { "epoch": 0.38048264928659503, "grad_norm": 11.339422225952148, "learning_rate": 9.978113858854242e-06, "loss": 0.6629, "step": 6480 }, { "epoch": 0.38106981386882743, "grad_norm": 13.437378883361816, "learning_rate": 9.977793424641845e-06, "loss": 0.5691, "step": 6490 }, { "epoch": 0.38165697845105984, "grad_norm": 14.395613670349121, "learning_rate": 9.977470666933102e-06, "loss": 0.5325, "step": 6500 }, { "epoch": 0.38224414303329224, "grad_norm": 2.7319774627685547, "learning_rate": 9.977145585878662e-06, "loss": 0.3942, "step": 6510 }, { "epoch": 0.38283130761552464, "grad_norm": 2.5312273502349854, "learning_rate": 9.976818181630268e-06, "loss": 0.5594, "step": 6520 }, { "epoch": 0.38341847219775704, "grad_norm": 5.414361953735352, "learning_rate": 9.97648845434074e-06, "loss": 0.4412, "step": 6530 }, { "epoch": 0.38400563677998945, "grad_norm": 3.0509438514709473, "learning_rate": 9.976156404163987e-06, "loss": 0.5522, "step": 6540 }, { "epoch": 0.38459280136222185, "grad_norm": 8.537776947021484, "learning_rate": 9.975822031255001e-06, "loss": 0.4939, "step": 6550 }, { "epoch": 0.38517996594445425, "grad_norm": 15.575627326965332, "learning_rate": 9.975485335769858e-06, "loss": 0.4469, "step": 6560 }, { "epoch": 0.38576713052668665, "grad_norm": 4.040783405303955, "learning_rate": 9.975146317865718e-06, "loss": 0.6414, "step": 6570 }, { "epoch": 0.38635429510891905, "grad_norm": 4.601790904998779, "learning_rate": 9.974804977700824e-06, "loss": 0.7132, "step": 6580 }, { "epoch": 0.38694145969115146, "grad_norm": 6.020332336425781, "learning_rate": 9.974461315434506e-06, "loss": 0.4702, "step": 6590 }, { "epoch": 0.3875286242733838, "grad_norm": 4.314665794372559, "learning_rate": 9.974115331227175e-06, "loss": 0.5392, "step": 6600 }, { "epoch": 0.3881157888556162, "grad_norm": 6.428915977478027, "learning_rate": 9.973767025240327e-06, "loss": 0.5581, "step": 6610 }, { "epoch": 0.3887029534378486, "grad_norm": 3.72163987159729, "learning_rate": 9.973416397636541e-06, "loss": 0.5704, "step": 6620 }, { "epoch": 0.389290118020081, "grad_norm": 3.5579073429107666, "learning_rate": 9.973063448579484e-06, "loss": 0.5173, "step": 6630 }, { "epoch": 0.3898772826023134, "grad_norm": 4.315727710723877, "learning_rate": 9.972708178233898e-06, "loss": 0.547, "step": 6640 }, { "epoch": 0.3904644471845458, "grad_norm": 5.145354747772217, "learning_rate": 9.972350586765614e-06, "loss": 0.5366, "step": 6650 }, { "epoch": 0.3910516117667782, "grad_norm": 2.9436864852905273, "learning_rate": 9.971990674341548e-06, "loss": 0.6038, "step": 6660 }, { "epoch": 0.3916387763490106, "grad_norm": 3.3838043212890625, "learning_rate": 9.971628441129697e-06, "loss": 0.5565, "step": 6670 }, { "epoch": 0.392225940931243, "grad_norm": 1.6684492826461792, "learning_rate": 9.971263887299141e-06, "loss": 0.5594, "step": 6680 }, { "epoch": 0.3928131055134754, "grad_norm": 6.691006660461426, "learning_rate": 9.970897013020041e-06, "loss": 0.5363, "step": 6690 }, { "epoch": 0.3934002700957078, "grad_norm": 4.6078972816467285, "learning_rate": 9.970527818463648e-06, "loss": 0.5895, "step": 6700 }, { "epoch": 0.39398743467794023, "grad_norm": 2.2187185287475586, "learning_rate": 9.97015630380229e-06, "loss": 0.5396, "step": 6710 }, { "epoch": 0.39457459926017263, "grad_norm": 4.78872013092041, "learning_rate": 9.96978246920938e-06, "loss": 0.5539, "step": 6720 }, { "epoch": 0.39516176384240503, "grad_norm": 1.7657095193862915, "learning_rate": 9.969406314859412e-06, "loss": 0.5497, "step": 6730 }, { "epoch": 0.39574892842463744, "grad_norm": 11.31129264831543, "learning_rate": 9.969027840927967e-06, "loss": 0.573, "step": 6740 }, { "epoch": 0.39633609300686984, "grad_norm": 5.778287410736084, "learning_rate": 9.968647047591705e-06, "loss": 0.5386, "step": 6750 }, { "epoch": 0.39692325758910224, "grad_norm": 2.912975549697876, "learning_rate": 9.968263935028367e-06, "loss": 0.5258, "step": 6760 }, { "epoch": 0.39751042217133464, "grad_norm": 2.072283983230591, "learning_rate": 9.967878503416785e-06, "loss": 0.5576, "step": 6770 }, { "epoch": 0.39809758675356705, "grad_norm": 3.12170672416687, "learning_rate": 9.967490752936864e-06, "loss": 0.5584, "step": 6780 }, { "epoch": 0.39868475133579945, "grad_norm": 7.238387584686279, "learning_rate": 9.967100683769596e-06, "loss": 0.5248, "step": 6790 }, { "epoch": 0.39927191591803185, "grad_norm": 5.4608378410339355, "learning_rate": 9.966708296097054e-06, "loss": 0.6259, "step": 6800 }, { "epoch": 0.3998590805002642, "grad_norm": 2.7188620567321777, "learning_rate": 9.966313590102396e-06, "loss": 0.4462, "step": 6810 }, { "epoch": 0.4004462450824966, "grad_norm": 5.420054912567139, "learning_rate": 9.965916565969857e-06, "loss": 0.5456, "step": 6820 }, { "epoch": 0.401033409664729, "grad_norm": 2.474376678466797, "learning_rate": 9.96551722388476e-06, "loss": 0.6195, "step": 6830 }, { "epoch": 0.4016205742469614, "grad_norm": 5.297699928283691, "learning_rate": 9.965115564033503e-06, "loss": 0.4189, "step": 6840 }, { "epoch": 0.4022077388291938, "grad_norm": 4.621553897857666, "learning_rate": 9.964711586603572e-06, "loss": 0.551, "step": 6850 }, { "epoch": 0.4027949034114262, "grad_norm": 3.39806866645813, "learning_rate": 9.964305291783532e-06, "loss": 0.6754, "step": 6860 }, { "epoch": 0.4033820679936586, "grad_norm": 9.939075469970703, "learning_rate": 9.96389667976303e-06, "loss": 0.4921, "step": 6870 }, { "epoch": 0.403969232575891, "grad_norm": 5.692109107971191, "learning_rate": 9.963485750732795e-06, "loss": 0.4593, "step": 6880 }, { "epoch": 0.4045563971581234, "grad_norm": 8.14481258392334, "learning_rate": 9.963072504884638e-06, "loss": 0.5252, "step": 6890 }, { "epoch": 0.4051435617403558, "grad_norm": 7.67168664932251, "learning_rate": 9.962656942411452e-06, "loss": 0.5866, "step": 6900 }, { "epoch": 0.4057307263225882, "grad_norm": 2.774575710296631, "learning_rate": 9.962239063507206e-06, "loss": 0.492, "step": 6910 }, { "epoch": 0.4063178909048206, "grad_norm": 4.643852710723877, "learning_rate": 9.961818868366957e-06, "loss": 0.6999, "step": 6920 }, { "epoch": 0.406905055487053, "grad_norm": 9.771285057067871, "learning_rate": 9.961396357186841e-06, "loss": 0.6475, "step": 6930 }, { "epoch": 0.4074922200692854, "grad_norm": 9.317487716674805, "learning_rate": 9.960971530164072e-06, "loss": 0.5642, "step": 6940 }, { "epoch": 0.40807938465151783, "grad_norm": 5.018975257873535, "learning_rate": 9.96054438749695e-06, "loss": 0.6262, "step": 6950 }, { "epoch": 0.40866654923375023, "grad_norm": 2.7766125202178955, "learning_rate": 9.960114929384853e-06, "loss": 0.4855, "step": 6960 }, { "epoch": 0.40925371381598263, "grad_norm": 4.8215484619140625, "learning_rate": 9.959683156028238e-06, "loss": 0.5432, "step": 6970 }, { "epoch": 0.40984087839821504, "grad_norm": 3.2492291927337646, "learning_rate": 9.959249067628647e-06, "loss": 0.6127, "step": 6980 }, { "epoch": 0.41042804298044744, "grad_norm": 5.823572158813477, "learning_rate": 9.958812664388701e-06, "loss": 0.7643, "step": 6990 }, { "epoch": 0.41101520756267984, "grad_norm": 5.256528854370117, "learning_rate": 9.9583739465121e-06, "loss": 0.5913, "step": 7000 }, { "epoch": 0.41160237214491224, "grad_norm": 7.374690532684326, "learning_rate": 9.957932914203625e-06, "loss": 0.7022, "step": 7010 }, { "epoch": 0.41218953672714465, "grad_norm": 5.901565074920654, "learning_rate": 9.957489567669137e-06, "loss": 0.5459, "step": 7020 }, { "epoch": 0.412776701309377, "grad_norm": 7.237551689147949, "learning_rate": 9.95704390711558e-06, "loss": 0.5053, "step": 7030 }, { "epoch": 0.4133638658916094, "grad_norm": 6.070929527282715, "learning_rate": 9.956595932750974e-06, "loss": 0.5543, "step": 7040 }, { "epoch": 0.4139510304738418, "grad_norm": 5.844623565673828, "learning_rate": 9.956145644784423e-06, "loss": 0.5956, "step": 7050 }, { "epoch": 0.4145381950560742, "grad_norm": 2.891343832015991, "learning_rate": 9.95569304342611e-06, "loss": 0.4874, "step": 7060 }, { "epoch": 0.4151253596383066, "grad_norm": 12.389043807983398, "learning_rate": 9.955238128887291e-06, "loss": 0.6335, "step": 7070 }, { "epoch": 0.415712524220539, "grad_norm": 1.4278638362884521, "learning_rate": 9.954780901380313e-06, "loss": 0.5553, "step": 7080 }, { "epoch": 0.4162996888027714, "grad_norm": 2.1015701293945312, "learning_rate": 9.954321361118595e-06, "loss": 0.6179, "step": 7090 }, { "epoch": 0.4168868533850038, "grad_norm": 4.257664680480957, "learning_rate": 9.953859508316638e-06, "loss": 0.5923, "step": 7100 }, { "epoch": 0.4174740179672362, "grad_norm": 7.729377746582031, "learning_rate": 9.953395343190022e-06, "loss": 0.623, "step": 7110 }, { "epoch": 0.4180611825494686, "grad_norm": 2.80397891998291, "learning_rate": 9.952928865955407e-06, "loss": 0.5216, "step": 7120 }, { "epoch": 0.418648347131701, "grad_norm": 6.517047882080078, "learning_rate": 9.952460076830532e-06, "loss": 0.5891, "step": 7130 }, { "epoch": 0.4192355117139334, "grad_norm": 5.210861682891846, "learning_rate": 9.951988976034213e-06, "loss": 0.446, "step": 7140 }, { "epoch": 0.4198226762961658, "grad_norm": 9.637191772460938, "learning_rate": 9.95151556378635e-06, "loss": 0.588, "step": 7150 }, { "epoch": 0.4204098408783982, "grad_norm": 2.5824360847473145, "learning_rate": 9.951039840307914e-06, "loss": 0.67, "step": 7160 }, { "epoch": 0.4209970054606306, "grad_norm": 7.123181343078613, "learning_rate": 9.950561805820965e-06, "loss": 0.4724, "step": 7170 }, { "epoch": 0.421584170042863, "grad_norm": 9.709158897399902, "learning_rate": 9.950081460548633e-06, "loss": 0.5663, "step": 7180 }, { "epoch": 0.42217133462509543, "grad_norm": 2.390664577484131, "learning_rate": 9.94959880471513e-06, "loss": 0.4687, "step": 7190 }, { "epoch": 0.42275849920732783, "grad_norm": 6.9214606285095215, "learning_rate": 9.949113838545748e-06, "loss": 0.5695, "step": 7200 }, { "epoch": 0.42334566378956023, "grad_norm": 12.18490219116211, "learning_rate": 9.948626562266851e-06, "loss": 0.5921, "step": 7210 }, { "epoch": 0.42393282837179264, "grad_norm": 7.987734317779541, "learning_rate": 9.948136976105893e-06, "loss": 0.5485, "step": 7220 }, { "epoch": 0.42451999295402504, "grad_norm": 4.425911903381348, "learning_rate": 9.947645080291397e-06, "loss": 0.5157, "step": 7230 }, { "epoch": 0.42510715753625744, "grad_norm": 4.304553508758545, "learning_rate": 9.947150875052963e-06, "loss": 0.5902, "step": 7240 }, { "epoch": 0.4256943221184898, "grad_norm": 4.688817977905273, "learning_rate": 9.946654360621275e-06, "loss": 0.5952, "step": 7250 }, { "epoch": 0.4262814867007222, "grad_norm": 4.008875846862793, "learning_rate": 9.946155537228094e-06, "loss": 0.6051, "step": 7260 }, { "epoch": 0.4268686512829546, "grad_norm": 3.874079704284668, "learning_rate": 9.945654405106253e-06, "loss": 0.4358, "step": 7270 }, { "epoch": 0.427455815865187, "grad_norm": 2.673732280731201, "learning_rate": 9.94515096448967e-06, "loss": 0.5936, "step": 7280 }, { "epoch": 0.4280429804474194, "grad_norm": 1.7275580167770386, "learning_rate": 9.944645215613336e-06, "loss": 0.6347, "step": 7290 }, { "epoch": 0.4286301450296518, "grad_norm": 3.0868475437164307, "learning_rate": 9.944137158713318e-06, "loss": 0.5489, "step": 7300 }, { "epoch": 0.4292173096118842, "grad_norm": 3.211582899093628, "learning_rate": 9.943626794026767e-06, "loss": 0.6909, "step": 7310 }, { "epoch": 0.4298044741941166, "grad_norm": 2.8233354091644287, "learning_rate": 9.943114121791906e-06, "loss": 0.511, "step": 7320 }, { "epoch": 0.430391638776349, "grad_norm": 11.487004280090332, "learning_rate": 9.942599142248036e-06, "loss": 0.5991, "step": 7330 }, { "epoch": 0.4309788033585814, "grad_norm": 2.59328293800354, "learning_rate": 9.942081855635533e-06, "loss": 0.6061, "step": 7340 }, { "epoch": 0.4315659679408138, "grad_norm": 9.819384574890137, "learning_rate": 9.941562262195855e-06, "loss": 0.6377, "step": 7350 }, { "epoch": 0.4321531325230462, "grad_norm": 2.9961068630218506, "learning_rate": 9.94104036217153e-06, "loss": 0.5793, "step": 7360 }, { "epoch": 0.4327402971052786, "grad_norm": 12.569365501403809, "learning_rate": 9.940516155806172e-06, "loss": 0.5561, "step": 7370 }, { "epoch": 0.433327461687511, "grad_norm": 3.7771849632263184, "learning_rate": 9.939989643344462e-06, "loss": 0.5914, "step": 7380 }, { "epoch": 0.4339146262697434, "grad_norm": 3.1193301677703857, "learning_rate": 9.939460825032165e-06, "loss": 0.6007, "step": 7390 }, { "epoch": 0.4345017908519758, "grad_norm": 2.467647075653076, "learning_rate": 9.938929701116115e-06, "loss": 0.5414, "step": 7400 }, { "epoch": 0.4350889554342082, "grad_norm": 7.381526470184326, "learning_rate": 9.938396271844227e-06, "loss": 0.5657, "step": 7410 }, { "epoch": 0.4356761200164406, "grad_norm": 4.960415363311768, "learning_rate": 9.937860537465493e-06, "loss": 0.6102, "step": 7420 }, { "epoch": 0.43626328459867303, "grad_norm": 12.260069847106934, "learning_rate": 9.937322498229976e-06, "loss": 0.6383, "step": 7430 }, { "epoch": 0.43685044918090543, "grad_norm": 7.953763484954834, "learning_rate": 9.93678215438882e-06, "loss": 0.6032, "step": 7440 }, { "epoch": 0.43743761376313783, "grad_norm": 2.2018167972564697, "learning_rate": 9.936239506194243e-06, "loss": 0.5402, "step": 7450 }, { "epoch": 0.4380247783453702, "grad_norm": 4.327651500701904, "learning_rate": 9.935694553899536e-06, "loss": 0.5331, "step": 7460 }, { "epoch": 0.4386119429276026, "grad_norm": 9.05737018585205, "learning_rate": 9.935147297759068e-06, "loss": 0.5387, "step": 7470 }, { "epoch": 0.439199107509835, "grad_norm": 2.670710325241089, "learning_rate": 9.934597738028284e-06, "loss": 0.5605, "step": 7480 }, { "epoch": 0.4397862720920674, "grad_norm": 5.4620280265808105, "learning_rate": 9.934045874963704e-06, "loss": 0.4715, "step": 7490 }, { "epoch": 0.4403734366742998, "grad_norm": 3.619861602783203, "learning_rate": 9.933491708822923e-06, "loss": 0.4535, "step": 7500 }, { "epoch": 0.4409606012565322, "grad_norm": 8.843908309936523, "learning_rate": 9.932935239864608e-06, "loss": 0.7443, "step": 7510 }, { "epoch": 0.4415477658387646, "grad_norm": 3.064089059829712, "learning_rate": 9.932376468348502e-06, "loss": 0.4437, "step": 7520 }, { "epoch": 0.442134930420997, "grad_norm": 3.1806600093841553, "learning_rate": 9.931815394535427e-06, "loss": 0.5512, "step": 7530 }, { "epoch": 0.4427220950032294, "grad_norm": 4.813238143920898, "learning_rate": 9.931252018687278e-06, "loss": 0.5876, "step": 7540 }, { "epoch": 0.4433092595854618, "grad_norm": 11.458456993103027, "learning_rate": 9.930686341067021e-06, "loss": 0.5712, "step": 7550 }, { "epoch": 0.4438964241676942, "grad_norm": 14.742348670959473, "learning_rate": 9.930118361938699e-06, "loss": 0.6612, "step": 7560 }, { "epoch": 0.4444835887499266, "grad_norm": 4.146764278411865, "learning_rate": 9.929548081567428e-06, "loss": 0.5938, "step": 7570 }, { "epoch": 0.445070753332159, "grad_norm": 4.599197864532471, "learning_rate": 9.928975500219399e-06, "loss": 0.5081, "step": 7580 }, { "epoch": 0.4456579179143914, "grad_norm": 3.877979278564453, "learning_rate": 9.928400618161882e-06, "loss": 0.4632, "step": 7590 }, { "epoch": 0.4462450824966238, "grad_norm": 2.3591575622558594, "learning_rate": 9.927823435663209e-06, "loss": 0.573, "step": 7600 }, { "epoch": 0.4468322470788562, "grad_norm": 6.506236553192139, "learning_rate": 9.927243952992798e-06, "loss": 0.4746, "step": 7610 }, { "epoch": 0.4474194116610886, "grad_norm": 4.935079574584961, "learning_rate": 9.926662170421134e-06, "loss": 0.462, "step": 7620 }, { "epoch": 0.448006576243321, "grad_norm": 7.424894332885742, "learning_rate": 9.926078088219775e-06, "loss": 0.6089, "step": 7630 }, { "epoch": 0.4485937408255534, "grad_norm": 7.302830696105957, "learning_rate": 9.925491706661357e-06, "loss": 0.5697, "step": 7640 }, { "epoch": 0.4491809054077858, "grad_norm": 3.4584414958953857, "learning_rate": 9.924903026019585e-06, "loss": 0.4276, "step": 7650 }, { "epoch": 0.4497680699900182, "grad_norm": 7.322295665740967, "learning_rate": 9.92431204656924e-06, "loss": 0.5931, "step": 7660 }, { "epoch": 0.45035523457225063, "grad_norm": 3.010613441467285, "learning_rate": 9.923718768586175e-06, "loss": 0.4647, "step": 7670 }, { "epoch": 0.450942399154483, "grad_norm": 4.247982501983643, "learning_rate": 9.923123192347314e-06, "loss": 0.6241, "step": 7680 }, { "epoch": 0.4515295637367154, "grad_norm": 4.939568042755127, "learning_rate": 9.922525318130657e-06, "loss": 0.586, "step": 7690 }, { "epoch": 0.4521167283189478, "grad_norm": 6.390995502471924, "learning_rate": 9.921925146215277e-06, "loss": 0.553, "step": 7700 }, { "epoch": 0.4527038929011802, "grad_norm": 3.749035596847534, "learning_rate": 9.921322676881316e-06, "loss": 0.5582, "step": 7710 }, { "epoch": 0.4532910574834126, "grad_norm": 2.1431407928466797, "learning_rate": 9.920717910409987e-06, "loss": 0.5577, "step": 7720 }, { "epoch": 0.453878222065645, "grad_norm": 6.4456048011779785, "learning_rate": 9.920110847083582e-06, "loss": 0.691, "step": 7730 }, { "epoch": 0.4544653866478774, "grad_norm": 3.577317237854004, "learning_rate": 9.91950148718546e-06, "loss": 0.4718, "step": 7740 }, { "epoch": 0.4550525512301098, "grad_norm": 14.45536994934082, "learning_rate": 9.918889831000054e-06, "loss": 0.5281, "step": 7750 }, { "epoch": 0.4556397158123422, "grad_norm": 6.386412143707275, "learning_rate": 9.918275878812866e-06, "loss": 0.7641, "step": 7760 }, { "epoch": 0.4562268803945746, "grad_norm": 3.356975555419922, "learning_rate": 9.917659630910476e-06, "loss": 0.5679, "step": 7770 }, { "epoch": 0.456814044976807, "grad_norm": 2.4522898197174072, "learning_rate": 9.917041087580529e-06, "loss": 0.5473, "step": 7780 }, { "epoch": 0.4574012095590394, "grad_norm": 3.8245625495910645, "learning_rate": 9.916420249111743e-06, "loss": 0.5297, "step": 7790 }, { "epoch": 0.4579883741412718, "grad_norm": 6.100375652313232, "learning_rate": 9.915797115793911e-06, "loss": 0.4813, "step": 7800 }, { "epoch": 0.4585755387235042, "grad_norm": 4.276587963104248, "learning_rate": 9.915171687917891e-06, "loss": 0.3913, "step": 7810 }, { "epoch": 0.4591627033057366, "grad_norm": 5.730615139007568, "learning_rate": 9.914543965775619e-06, "loss": 0.4676, "step": 7820 }, { "epoch": 0.459749867887969, "grad_norm": 6.1255011558532715, "learning_rate": 9.913913949660095e-06, "loss": 0.5904, "step": 7830 }, { "epoch": 0.4603370324702014, "grad_norm": 6.3633341789245605, "learning_rate": 9.913281639865395e-06, "loss": 0.7586, "step": 7840 }, { "epoch": 0.4609241970524338, "grad_norm": 2.1615896224975586, "learning_rate": 9.912647036686665e-06, "loss": 0.5735, "step": 7850 }, { "epoch": 0.4615113616346662, "grad_norm": 4.993041515350342, "learning_rate": 9.912010140420116e-06, "loss": 0.5847, "step": 7860 }, { "epoch": 0.4620985262168986, "grad_norm": 3.553004026412964, "learning_rate": 9.911370951363038e-06, "loss": 0.7059, "step": 7870 }, { "epoch": 0.462685690799131, "grad_norm": 5.9054274559021, "learning_rate": 9.910729469813784e-06, "loss": 0.5596, "step": 7880 }, { "epoch": 0.46327285538136337, "grad_norm": 6.726451396942139, "learning_rate": 9.91008569607178e-06, "loss": 0.5857, "step": 7890 }, { "epoch": 0.46386001996359577, "grad_norm": 2.6422789096832275, "learning_rate": 9.909439630437526e-06, "loss": 0.4518, "step": 7900 }, { "epoch": 0.4644471845458282, "grad_norm": 4.651497840881348, "learning_rate": 9.90879127321258e-06, "loss": 0.5417, "step": 7910 }, { "epoch": 0.4650343491280606, "grad_norm": 4.023793697357178, "learning_rate": 9.908140624699584e-06, "loss": 0.5388, "step": 7920 }, { "epoch": 0.465621513710293, "grad_norm": 7.675285816192627, "learning_rate": 9.90748768520224e-06, "loss": 0.4222, "step": 7930 }, { "epoch": 0.4662086782925254, "grad_norm": 4.330601215362549, "learning_rate": 9.90683245502532e-06, "loss": 0.5986, "step": 7940 }, { "epoch": 0.4667958428747578, "grad_norm": 4.338681697845459, "learning_rate": 9.906174934474673e-06, "loss": 0.537, "step": 7950 }, { "epoch": 0.4673830074569902, "grad_norm": 5.779381275177002, "learning_rate": 9.905515123857206e-06, "loss": 0.5101, "step": 7960 }, { "epoch": 0.4679701720392226, "grad_norm": 16.805360794067383, "learning_rate": 9.904853023480904e-06, "loss": 0.6772, "step": 7970 }, { "epoch": 0.468557336621455, "grad_norm": 2.7065060138702393, "learning_rate": 9.904188633654814e-06, "loss": 0.5714, "step": 7980 }, { "epoch": 0.4691445012036874, "grad_norm": 4.756760120391846, "learning_rate": 9.903521954689056e-06, "loss": 0.5557, "step": 7990 }, { "epoch": 0.4697316657859198, "grad_norm": 4.046006202697754, "learning_rate": 9.902852986894817e-06, "loss": 0.5976, "step": 8000 }, { "epoch": 0.4703188303681522, "grad_norm": 1.5275689363479614, "learning_rate": 9.902181730584356e-06, "loss": 0.5532, "step": 8010 }, { "epoch": 0.4709059949503846, "grad_norm": 4.075411319732666, "learning_rate": 9.901508186070992e-06, "loss": 0.4601, "step": 8020 }, { "epoch": 0.471493159532617, "grad_norm": 1.615488052368164, "learning_rate": 9.90083235366912e-06, "loss": 0.598, "step": 8030 }, { "epoch": 0.4720803241148494, "grad_norm": 2.083188533782959, "learning_rate": 9.9001542336942e-06, "loss": 0.6024, "step": 8040 }, { "epoch": 0.4726674886970818, "grad_norm": 6.01171350479126, "learning_rate": 9.899473826462756e-06, "loss": 0.4871, "step": 8050 }, { "epoch": 0.4732546532793142, "grad_norm": 5.505216598510742, "learning_rate": 9.898791132292387e-06, "loss": 0.6142, "step": 8060 }, { "epoch": 0.4738418178615466, "grad_norm": 3.0354533195495605, "learning_rate": 9.898106151501757e-06, "loss": 0.5362, "step": 8070 }, { "epoch": 0.474428982443779, "grad_norm": 5.378390789031982, "learning_rate": 9.89741888441059e-06, "loss": 0.5399, "step": 8080 }, { "epoch": 0.4750161470260114, "grad_norm": 6.095806121826172, "learning_rate": 9.89672933133969e-06, "loss": 0.5823, "step": 8090 }, { "epoch": 0.4756033116082438, "grad_norm": 12.62540340423584, "learning_rate": 9.896037492610917e-06, "loss": 0.5859, "step": 8100 }, { "epoch": 0.47619047619047616, "grad_norm": 1.5929666757583618, "learning_rate": 9.895343368547204e-06, "loss": 0.5455, "step": 8110 }, { "epoch": 0.47677764077270857, "grad_norm": 6.622389316558838, "learning_rate": 9.894646959472547e-06, "loss": 0.579, "step": 8120 }, { "epoch": 0.47736480535494097, "grad_norm": 9.806506156921387, "learning_rate": 9.893948265712015e-06, "loss": 0.536, "step": 8130 }, { "epoch": 0.47795196993717337, "grad_norm": 5.914865493774414, "learning_rate": 9.893247287591734e-06, "loss": 0.477, "step": 8140 }, { "epoch": 0.4785391345194058, "grad_norm": 6.712255954742432, "learning_rate": 9.892544025438904e-06, "loss": 0.5508, "step": 8150 }, { "epoch": 0.4791262991016382, "grad_norm": 7.56572961807251, "learning_rate": 9.891838479581786e-06, "loss": 0.5563, "step": 8160 }, { "epoch": 0.4797134636838706, "grad_norm": 5.3524885177612305, "learning_rate": 9.891130650349711e-06, "loss": 0.4055, "step": 8170 }, { "epoch": 0.480300628266103, "grad_norm": 3.4288389682769775, "learning_rate": 9.890420538073076e-06, "loss": 0.5559, "step": 8180 }, { "epoch": 0.4808877928483354, "grad_norm": 2.883146047592163, "learning_rate": 9.889708143083338e-06, "loss": 0.4582, "step": 8190 }, { "epoch": 0.4814749574305678, "grad_norm": 4.780776023864746, "learning_rate": 9.888993465713025e-06, "loss": 0.5322, "step": 8200 }, { "epoch": 0.4820621220128002, "grad_norm": 5.783346652984619, "learning_rate": 9.888276506295729e-06, "loss": 0.6041, "step": 8210 }, { "epoch": 0.4826492865950326, "grad_norm": 6.087161064147949, "learning_rate": 9.887557265166106e-06, "loss": 0.6178, "step": 8220 }, { "epoch": 0.483236451177265, "grad_norm": 2.8382227420806885, "learning_rate": 9.88683574265988e-06, "loss": 0.5848, "step": 8230 }, { "epoch": 0.4838236157594974, "grad_norm": 6.323184967041016, "learning_rate": 9.886111939113833e-06, "loss": 0.5027, "step": 8240 }, { "epoch": 0.4844107803417298, "grad_norm": 6.780508995056152, "learning_rate": 9.885385854865821e-06, "loss": 0.5977, "step": 8250 }, { "epoch": 0.4849979449239622, "grad_norm": 1.8141248226165771, "learning_rate": 9.884657490254758e-06, "loss": 0.6079, "step": 8260 }, { "epoch": 0.4855851095061946, "grad_norm": 8.224820137023926, "learning_rate": 9.883926845620628e-06, "loss": 0.4218, "step": 8270 }, { "epoch": 0.486172274088427, "grad_norm": 4.856410503387451, "learning_rate": 9.88319392130447e-06, "loss": 0.4927, "step": 8280 }, { "epoch": 0.4867594386706594, "grad_norm": 2.6439359188079834, "learning_rate": 9.882458717648396e-06, "loss": 0.4984, "step": 8290 }, { "epoch": 0.4873466032528918, "grad_norm": 2.086629629135132, "learning_rate": 9.881721234995577e-06, "loss": 0.5774, "step": 8300 }, { "epoch": 0.4879337678351242, "grad_norm": 4.155926704406738, "learning_rate": 9.880981473690254e-06, "loss": 0.4604, "step": 8310 }, { "epoch": 0.4885209324173566, "grad_norm": 1.829495906829834, "learning_rate": 9.880239434077722e-06, "loss": 0.4973, "step": 8320 }, { "epoch": 0.48910809699958896, "grad_norm": 5.218919277191162, "learning_rate": 9.879495116504348e-06, "loss": 0.5934, "step": 8330 }, { "epoch": 0.48969526158182136, "grad_norm": 13.232114791870117, "learning_rate": 9.878748521317554e-06, "loss": 0.51, "step": 8340 }, { "epoch": 0.49028242616405376, "grad_norm": 7.676084041595459, "learning_rate": 9.877999648865837e-06, "loss": 0.5274, "step": 8350 }, { "epoch": 0.49086959074628617, "grad_norm": 3.0456278324127197, "learning_rate": 9.877248499498745e-06, "loss": 0.5017, "step": 8360 }, { "epoch": 0.49145675532851857, "grad_norm": 6.4649786949157715, "learning_rate": 9.876495073566894e-06, "loss": 0.481, "step": 8370 }, { "epoch": 0.49204391991075097, "grad_norm": 4.235057830810547, "learning_rate": 9.875739371421966e-06, "loss": 0.5366, "step": 8380 }, { "epoch": 0.4926310844929834, "grad_norm": 5.046140193939209, "learning_rate": 9.874981393416698e-06, "loss": 0.5756, "step": 8390 }, { "epoch": 0.4932182490752158, "grad_norm": 6.20513391494751, "learning_rate": 9.874221139904893e-06, "loss": 0.5957, "step": 8400 }, { "epoch": 0.4938054136574482, "grad_norm": 6.648784160614014, "learning_rate": 9.873458611241419e-06, "loss": 0.592, "step": 8410 }, { "epoch": 0.4943925782396806, "grad_norm": 6.046889305114746, "learning_rate": 9.872693807782203e-06, "loss": 0.5261, "step": 8420 }, { "epoch": 0.494979742821913, "grad_norm": 4.449030876159668, "learning_rate": 9.871926729884234e-06, "loss": 0.4932, "step": 8430 }, { "epoch": 0.4955669074041454, "grad_norm": 5.747600078582764, "learning_rate": 9.87115737790556e-06, "loss": 0.6846, "step": 8440 }, { "epoch": 0.4961540719863778, "grad_norm": 2.7110917568206787, "learning_rate": 9.870385752205297e-06, "loss": 0.6533, "step": 8450 }, { "epoch": 0.4967412365686102, "grad_norm": 5.274699687957764, "learning_rate": 9.869611853143615e-06, "loss": 0.7095, "step": 8460 }, { "epoch": 0.4973284011508426, "grad_norm": 13.433452606201172, "learning_rate": 9.868835681081753e-06, "loss": 0.6807, "step": 8470 }, { "epoch": 0.497915565733075, "grad_norm": 15.356071472167969, "learning_rate": 9.868057236382002e-06, "loss": 0.6488, "step": 8480 }, { "epoch": 0.4985027303153074, "grad_norm": 3.186157464981079, "learning_rate": 9.86727651940772e-06, "loss": 0.5425, "step": 8490 }, { "epoch": 0.4990898948975398, "grad_norm": 2.1503119468688965, "learning_rate": 9.866493530523327e-06, "loss": 0.4235, "step": 8500 }, { "epoch": 0.4996770594797722, "grad_norm": 4.083325386047363, "learning_rate": 9.865708270094297e-06, "loss": 0.5315, "step": 8510 }, { "epoch": 0.5002642240620045, "grad_norm": 2.9447922706604004, "learning_rate": 9.86492073848717e-06, "loss": 0.5859, "step": 8520 }, { "epoch": 0.500851388644237, "grad_norm": 8.925594329833984, "learning_rate": 9.864130936069542e-06, "loss": 0.5735, "step": 8530 }, { "epoch": 0.5014385532264694, "grad_norm": 5.623430252075195, "learning_rate": 9.863338863210072e-06, "loss": 0.6393, "step": 8540 }, { "epoch": 0.5020257178087018, "grad_norm": 3.627338171005249, "learning_rate": 9.862544520278478e-06, "loss": 0.4386, "step": 8550 }, { "epoch": 0.5026128823909342, "grad_norm": 3.49172306060791, "learning_rate": 9.861747907645538e-06, "loss": 0.4369, "step": 8560 }, { "epoch": 0.5032000469731666, "grad_norm": 3.7341291904449463, "learning_rate": 9.860949025683087e-06, "loss": 0.6004, "step": 8570 }, { "epoch": 0.503787211555399, "grad_norm": 3.4065864086151123, "learning_rate": 9.860147874764023e-06, "loss": 0.5755, "step": 8580 }, { "epoch": 0.5043743761376314, "grad_norm": 8.283018112182617, "learning_rate": 9.859344455262299e-06, "loss": 0.4203, "step": 8590 }, { "epoch": 0.5049615407198638, "grad_norm": 8.70332145690918, "learning_rate": 9.85853876755293e-06, "loss": 0.6765, "step": 8600 }, { "epoch": 0.5055487053020962, "grad_norm": 8.308868408203125, "learning_rate": 9.857730812011988e-06, "loss": 0.4744, "step": 8610 }, { "epoch": 0.5061358698843286, "grad_norm": 3.6225781440734863, "learning_rate": 9.856920589016605e-06, "loss": 0.4573, "step": 8620 }, { "epoch": 0.506723034466561, "grad_norm": 3.0734188556671143, "learning_rate": 9.856108098944972e-06, "loss": 0.5299, "step": 8630 }, { "epoch": 0.5073101990487934, "grad_norm": 3.8895318508148193, "learning_rate": 9.855293342176335e-06, "loss": 0.6713, "step": 8640 }, { "epoch": 0.5078973636310258, "grad_norm": 4.129243850708008, "learning_rate": 9.854476319091e-06, "loss": 0.5919, "step": 8650 }, { "epoch": 0.5084845282132582, "grad_norm": 2.987363815307617, "learning_rate": 9.853657030070333e-06, "loss": 0.6869, "step": 8660 }, { "epoch": 0.5090716927954906, "grad_norm": 14.317805290222168, "learning_rate": 9.852835475496752e-06, "loss": 0.4785, "step": 8670 }, { "epoch": 0.509658857377723, "grad_norm": 4.473576545715332, "learning_rate": 9.85201165575374e-06, "loss": 0.5186, "step": 8680 }, { "epoch": 0.5102460219599554, "grad_norm": 2.324526071548462, "learning_rate": 9.851185571225828e-06, "loss": 0.5149, "step": 8690 }, { "epoch": 0.5108331865421878, "grad_norm": 8.622400283813477, "learning_rate": 9.850357222298614e-06, "loss": 0.4627, "step": 8700 }, { "epoch": 0.5114203511244202, "grad_norm": 4.005298614501953, "learning_rate": 9.849526609358747e-06, "loss": 0.5845, "step": 8710 }, { "epoch": 0.5120075157066526, "grad_norm": 4.676525592803955, "learning_rate": 9.848693732793934e-06, "loss": 0.6658, "step": 8720 }, { "epoch": 0.512594680288885, "grad_norm": 3.5338289737701416, "learning_rate": 9.847858592992938e-06, "loss": 0.6454, "step": 8730 }, { "epoch": 0.5131818448711174, "grad_norm": 3.5419199466705322, "learning_rate": 9.847021190345581e-06, "loss": 0.6215, "step": 8740 }, { "epoch": 0.5137690094533498, "grad_norm": 2.3952956199645996, "learning_rate": 9.846181525242739e-06, "loss": 0.5902, "step": 8750 }, { "epoch": 0.5143561740355822, "grad_norm": 5.03947639465332, "learning_rate": 9.845339598076343e-06, "loss": 0.6523, "step": 8760 }, { "epoch": 0.5149433386178146, "grad_norm": 2.954102039337158, "learning_rate": 9.844495409239381e-06, "loss": 0.5068, "step": 8770 }, { "epoch": 0.515530503200047, "grad_norm": 7.447690486907959, "learning_rate": 9.843648959125897e-06, "loss": 0.5282, "step": 8780 }, { "epoch": 0.5161176677822794, "grad_norm": 10.040531158447266, "learning_rate": 9.842800248130995e-06, "loss": 0.5375, "step": 8790 }, { "epoch": 0.5167048323645118, "grad_norm": 4.830393314361572, "learning_rate": 9.841949276650824e-06, "loss": 0.5358, "step": 8800 }, { "epoch": 0.5172919969467442, "grad_norm": 4.166080951690674, "learning_rate": 9.841096045082599e-06, "loss": 0.5032, "step": 8810 }, { "epoch": 0.5178791615289766, "grad_norm": 2.315826177597046, "learning_rate": 9.84024055382458e-06, "loss": 0.6316, "step": 8820 }, { "epoch": 0.518466326111209, "grad_norm": 3.061453104019165, "learning_rate": 9.83938280327609e-06, "loss": 0.393, "step": 8830 }, { "epoch": 0.5190534906934414, "grad_norm": 3.0611350536346436, "learning_rate": 9.838522793837502e-06, "loss": 0.5202, "step": 8840 }, { "epoch": 0.5196406552756738, "grad_norm": 5.502413749694824, "learning_rate": 9.837660525910244e-06, "loss": 0.4178, "step": 8850 }, { "epoch": 0.5202278198579062, "grad_norm": 7.628272533416748, "learning_rate": 9.8367959998968e-06, "loss": 0.5788, "step": 8860 }, { "epoch": 0.5208149844401385, "grad_norm": 2.784864902496338, "learning_rate": 9.835929216200707e-06, "loss": 0.4652, "step": 8870 }, { "epoch": 0.5214021490223709, "grad_norm": 5.279654026031494, "learning_rate": 9.835060175226555e-06, "loss": 0.6857, "step": 8880 }, { "epoch": 0.5219893136046033, "grad_norm": 4.850887775421143, "learning_rate": 9.834188877379988e-06, "loss": 0.6145, "step": 8890 }, { "epoch": 0.5225764781868357, "grad_norm": 2.331947088241577, "learning_rate": 9.833315323067706e-06, "loss": 0.5842, "step": 8900 }, { "epoch": 0.5231636427690681, "grad_norm": 7.419745445251465, "learning_rate": 9.832439512697457e-06, "loss": 0.5465, "step": 8910 }, { "epoch": 0.5237508073513005, "grad_norm": 2.3740463256835938, "learning_rate": 9.831561446678047e-06, "loss": 0.5202, "step": 8920 }, { "epoch": 0.5243379719335329, "grad_norm": 7.376945972442627, "learning_rate": 9.83068112541933e-06, "loss": 0.4248, "step": 8930 }, { "epoch": 0.5249251365157653, "grad_norm": 6.004100799560547, "learning_rate": 9.829798549332222e-06, "loss": 0.5425, "step": 8940 }, { "epoch": 0.5255123010979977, "grad_norm": 7.796032428741455, "learning_rate": 9.828913718828677e-06, "loss": 0.6205, "step": 8950 }, { "epoch": 0.5260994656802301, "grad_norm": 2.7344343662261963, "learning_rate": 9.828026634321715e-06, "loss": 0.4846, "step": 8960 }, { "epoch": 0.5266866302624625, "grad_norm": 3.191195487976074, "learning_rate": 9.827137296225401e-06, "loss": 0.53, "step": 8970 }, { "epoch": 0.5272737948446949, "grad_norm": 10.117526054382324, "learning_rate": 9.826245704954853e-06, "loss": 0.4776, "step": 8980 }, { "epoch": 0.5278609594269273, "grad_norm": 1.7814913988113403, "learning_rate": 9.825351860926243e-06, "loss": 0.6729, "step": 8990 }, { "epoch": 0.5284481240091597, "grad_norm": 6.431950092315674, "learning_rate": 9.82445576455679e-06, "loss": 0.5725, "step": 9000 }, { "epoch": 0.5290352885913921, "grad_norm": 12.738188743591309, "learning_rate": 9.823557416264766e-06, "loss": 0.593, "step": 9010 }, { "epoch": 0.5296224531736246, "grad_norm": 4.901657581329346, "learning_rate": 9.8226568164695e-06, "loss": 0.5653, "step": 9020 }, { "epoch": 0.530209617755857, "grad_norm": 8.338888168334961, "learning_rate": 9.821753965591366e-06, "loss": 0.5062, "step": 9030 }, { "epoch": 0.5307967823380894, "grad_norm": 4.973560810089111, "learning_rate": 9.820848864051784e-06, "loss": 0.543, "step": 9040 }, { "epoch": 0.5313839469203218, "grad_norm": 15.24169635772705, "learning_rate": 9.819941512273237e-06, "loss": 0.6172, "step": 9050 }, { "epoch": 0.5319711115025542, "grad_norm": 3.172322988510132, "learning_rate": 9.81903191067925e-06, "loss": 0.4877, "step": 9060 }, { "epoch": 0.5325582760847866, "grad_norm": 4.549586296081543, "learning_rate": 9.818120059694398e-06, "loss": 0.575, "step": 9070 }, { "epoch": 0.533145440667019, "grad_norm": 3.590604066848755, "learning_rate": 9.81720595974431e-06, "loss": 0.5846, "step": 9080 }, { "epoch": 0.5337326052492514, "grad_norm": 12.625965118408203, "learning_rate": 9.816289611255661e-06, "loss": 0.5552, "step": 9090 }, { "epoch": 0.5343197698314838, "grad_norm": 5.2620134353637695, "learning_rate": 9.815371014656179e-06, "loss": 0.5711, "step": 9100 }, { "epoch": 0.5349069344137162, "grad_norm": 3.5177018642425537, "learning_rate": 9.814450170374637e-06, "loss": 0.6581, "step": 9110 }, { "epoch": 0.5354940989959486, "grad_norm": 1.7808891534805298, "learning_rate": 9.813527078840862e-06, "loss": 0.5167, "step": 9120 }, { "epoch": 0.536081263578181, "grad_norm": 4.121121883392334, "learning_rate": 9.812601740485726e-06, "loss": 0.376, "step": 9130 }, { "epoch": 0.5366684281604134, "grad_norm": 4.227759838104248, "learning_rate": 9.811674155741151e-06, "loss": 0.584, "step": 9140 }, { "epoch": 0.5372555927426458, "grad_norm": 2.0682992935180664, "learning_rate": 9.81074432504011e-06, "loss": 0.44, "step": 9150 }, { "epoch": 0.5378427573248782, "grad_norm": 7.977725028991699, "learning_rate": 9.80981224881662e-06, "loss": 0.6706, "step": 9160 }, { "epoch": 0.5384299219071106, "grad_norm": 5.091983795166016, "learning_rate": 9.808877927505752e-06, "loss": 0.4773, "step": 9170 }, { "epoch": 0.539017086489343, "grad_norm": 4.021512508392334, "learning_rate": 9.807941361543617e-06, "loss": 0.5591, "step": 9180 }, { "epoch": 0.5396042510715754, "grad_norm": 3.013948440551758, "learning_rate": 9.807002551367378e-06, "loss": 0.4717, "step": 9190 }, { "epoch": 0.5401914156538078, "grad_norm": 2.8431334495544434, "learning_rate": 9.80606149741525e-06, "loss": 0.61, "step": 9200 }, { "epoch": 0.5407785802360402, "grad_norm": 2.8164560794830322, "learning_rate": 9.805118200126489e-06, "loss": 0.5275, "step": 9210 }, { "epoch": 0.5413657448182726, "grad_norm": 2.190776824951172, "learning_rate": 9.804172659941398e-06, "loss": 0.4284, "step": 9220 }, { "epoch": 0.541952909400505, "grad_norm": 3.250474691390991, "learning_rate": 9.803224877301331e-06, "loss": 0.4986, "step": 9230 }, { "epoch": 0.5425400739827374, "grad_norm": 4.6248369216918945, "learning_rate": 9.802274852648687e-06, "loss": 0.5418, "step": 9240 }, { "epoch": 0.5431272385649698, "grad_norm": 2.1930127143859863, "learning_rate": 9.80132258642691e-06, "loss": 0.52, "step": 9250 }, { "epoch": 0.5437144031472022, "grad_norm": 2.7822859287261963, "learning_rate": 9.800368079080495e-06, "loss": 0.5353, "step": 9260 }, { "epoch": 0.5443015677294346, "grad_norm": 4.1376471519470215, "learning_rate": 9.799411331054974e-06, "loss": 0.5814, "step": 9270 }, { "epoch": 0.544888732311667, "grad_norm": 4.687663555145264, "learning_rate": 9.798452342796934e-06, "loss": 0.5369, "step": 9280 }, { "epoch": 0.5454758968938994, "grad_norm": 3.1034629344940186, "learning_rate": 9.797491114754004e-06, "loss": 0.4599, "step": 9290 }, { "epoch": 0.5460630614761317, "grad_norm": 3.410820484161377, "learning_rate": 9.796527647374858e-06, "loss": 0.5736, "step": 9300 }, { "epoch": 0.5466502260583641, "grad_norm": 2.172558546066284, "learning_rate": 9.795561941109215e-06, "loss": 0.6566, "step": 9310 }, { "epoch": 0.5472373906405965, "grad_norm": 2.2672550678253174, "learning_rate": 9.794593996407843e-06, "loss": 0.454, "step": 9320 }, { "epoch": 0.5478245552228289, "grad_norm": 4.7471232414245605, "learning_rate": 9.79362381372255e-06, "loss": 0.5369, "step": 9330 }, { "epoch": 0.5484117198050613, "grad_norm": 5.355922222137451, "learning_rate": 9.792651393506189e-06, "loss": 0.6273, "step": 9340 }, { "epoch": 0.5489988843872937, "grad_norm": 1.4450231790542603, "learning_rate": 9.791676736212661e-06, "loss": 0.4823, "step": 9350 }, { "epoch": 0.5495860489695261, "grad_norm": 3.968808889389038, "learning_rate": 9.79069984229691e-06, "loss": 0.4694, "step": 9360 }, { "epoch": 0.5501732135517585, "grad_norm": 12.158526420593262, "learning_rate": 9.789720712214918e-06, "loss": 0.5358, "step": 9370 }, { "epoch": 0.5507603781339909, "grad_norm": 2.7470638751983643, "learning_rate": 9.78873934642372e-06, "loss": 0.4246, "step": 9380 }, { "epoch": 0.5513475427162233, "grad_norm": 3.286651611328125, "learning_rate": 9.787755745381392e-06, "loss": 0.5232, "step": 9390 }, { "epoch": 0.5519347072984557, "grad_norm": 2.5296542644500732, "learning_rate": 9.78676990954705e-06, "loss": 0.521, "step": 9400 }, { "epoch": 0.5525218718806881, "grad_norm": 2.544373035430908, "learning_rate": 9.785781839380852e-06, "loss": 0.6039, "step": 9410 }, { "epoch": 0.5531090364629205, "grad_norm": 3.167029619216919, "learning_rate": 9.784791535344005e-06, "loss": 0.5455, "step": 9420 }, { "epoch": 0.5536962010451529, "grad_norm": 2.3600895404815674, "learning_rate": 9.783798997898755e-06, "loss": 0.4851, "step": 9430 }, { "epoch": 0.5542833656273853, "grad_norm": 2.4944114685058594, "learning_rate": 9.78280422750839e-06, "loss": 0.4923, "step": 9440 }, { "epoch": 0.5548705302096177, "grad_norm": 4.81469202041626, "learning_rate": 9.781807224637243e-06, "loss": 0.3834, "step": 9450 }, { "epoch": 0.5554576947918501, "grad_norm": 8.330398559570312, "learning_rate": 9.780807989750688e-06, "loss": 0.472, "step": 9460 }, { "epoch": 0.5560448593740825, "grad_norm": 3.869076728820801, "learning_rate": 9.779806523315136e-06, "loss": 0.5843, "step": 9470 }, { "epoch": 0.556632023956315, "grad_norm": 11.851517677307129, "learning_rate": 9.778802825798049e-06, "loss": 0.5612, "step": 9480 }, { "epoch": 0.5572191885385473, "grad_norm": 5.368571758270264, "learning_rate": 9.777796897667922e-06, "loss": 0.4785, "step": 9490 }, { "epoch": 0.5578063531207798, "grad_norm": 6.781565189361572, "learning_rate": 9.776788739394294e-06, "loss": 0.6055, "step": 9500 }, { "epoch": 0.5583935177030122, "grad_norm": 3.303493022918701, "learning_rate": 9.775778351447748e-06, "loss": 0.4833, "step": 9510 }, { "epoch": 0.5589806822852446, "grad_norm": 2.280620574951172, "learning_rate": 9.774765734299903e-06, "loss": 0.4869, "step": 9520 }, { "epoch": 0.559567846867477, "grad_norm": 4.161054611206055, "learning_rate": 9.773750888423422e-06, "loss": 0.6361, "step": 9530 }, { "epoch": 0.5601550114497094, "grad_norm": 4.448292255401611, "learning_rate": 9.772733814292008e-06, "loss": 0.5631, "step": 9540 }, { "epoch": 0.5607421760319418, "grad_norm": 2.149733304977417, "learning_rate": 9.7717145123804e-06, "loss": 0.4371, "step": 9550 }, { "epoch": 0.5613293406141742, "grad_norm": 3.5532288551330566, "learning_rate": 9.770692983164382e-06, "loss": 0.4801, "step": 9560 }, { "epoch": 0.5619165051964066, "grad_norm": 3.157175302505493, "learning_rate": 9.769669227120774e-06, "loss": 0.5801, "step": 9570 }, { "epoch": 0.562503669778639, "grad_norm": 6.337553024291992, "learning_rate": 9.76864324472744e-06, "loss": 0.7482, "step": 9580 }, { "epoch": 0.5630908343608714, "grad_norm": 8.534930229187012, "learning_rate": 9.767615036463278e-06, "loss": 0.7272, "step": 9590 }, { "epoch": 0.5636779989431038, "grad_norm": 3.1977944374084473, "learning_rate": 9.766584602808227e-06, "loss": 0.5363, "step": 9600 }, { "epoch": 0.5642651635253362, "grad_norm": 2.983137369155884, "learning_rate": 9.765551944243266e-06, "loss": 0.5019, "step": 9610 }, { "epoch": 0.5648523281075686, "grad_norm": 5.125413417816162, "learning_rate": 9.764517061250412e-06, "loss": 0.6032, "step": 9620 }, { "epoch": 0.565439492689801, "grad_norm": 2.2186028957366943, "learning_rate": 9.763479954312717e-06, "loss": 0.4935, "step": 9630 }, { "epoch": 0.5660266572720334, "grad_norm": 3.8898799419403076, "learning_rate": 9.762440623914275e-06, "loss": 0.4796, "step": 9640 }, { "epoch": 0.5666138218542658, "grad_norm": 5.140921592712402, "learning_rate": 9.76139907054022e-06, "loss": 0.4906, "step": 9650 }, { "epoch": 0.5672009864364982, "grad_norm": 1.930474042892456, "learning_rate": 9.760355294676715e-06, "loss": 0.5758, "step": 9660 }, { "epoch": 0.5677881510187306, "grad_norm": 4.347428321838379, "learning_rate": 9.759309296810971e-06, "loss": 0.5119, "step": 9670 }, { "epoch": 0.568375315600963, "grad_norm": 4.005129814147949, "learning_rate": 9.758261077431227e-06, "loss": 0.4726, "step": 9680 }, { "epoch": 0.5689624801831954, "grad_norm": 2.0881404876708984, "learning_rate": 9.757210637026764e-06, "loss": 0.5385, "step": 9690 }, { "epoch": 0.5695496447654278, "grad_norm": 4.273829460144043, "learning_rate": 9.756157976087897e-06, "loss": 0.5825, "step": 9700 }, { "epoch": 0.5701368093476602, "grad_norm": 2.1896591186523438, "learning_rate": 9.755103095105983e-06, "loss": 0.5241, "step": 9710 }, { "epoch": 0.5707239739298926, "grad_norm": 3.966327428817749, "learning_rate": 9.754045994573408e-06, "loss": 0.4774, "step": 9720 }, { "epoch": 0.5713111385121249, "grad_norm": 2.2214088439941406, "learning_rate": 9.7529866749836e-06, "loss": 0.4222, "step": 9730 }, { "epoch": 0.5718983030943573, "grad_norm": 3.2833657264709473, "learning_rate": 9.751925136831016e-06, "loss": 0.3906, "step": 9740 }, { "epoch": 0.5724854676765897, "grad_norm": 2.9204273223876953, "learning_rate": 9.750861380611155e-06, "loss": 0.6198, "step": 9750 }, { "epoch": 0.5730726322588221, "grad_norm": 4.507615089416504, "learning_rate": 9.74979540682055e-06, "loss": 0.5162, "step": 9760 }, { "epoch": 0.5736597968410545, "grad_norm": 7.704033374786377, "learning_rate": 9.748727215956767e-06, "loss": 0.5664, "step": 9770 }, { "epoch": 0.5742469614232869, "grad_norm": 3.4356279373168945, "learning_rate": 9.747656808518406e-06, "loss": 0.5915, "step": 9780 }, { "epoch": 0.5748341260055193, "grad_norm": 2.975405693054199, "learning_rate": 9.746584185005108e-06, "loss": 0.5977, "step": 9790 }, { "epoch": 0.5754212905877517, "grad_norm": 3.0221548080444336, "learning_rate": 9.74550934591754e-06, "loss": 0.573, "step": 9800 }, { "epoch": 0.5760084551699841, "grad_norm": 2.695840835571289, "learning_rate": 9.74443229175741e-06, "loss": 0.6044, "step": 9810 }, { "epoch": 0.5765956197522165, "grad_norm": 3.9136481285095215, "learning_rate": 9.743353023027458e-06, "loss": 0.5265, "step": 9820 }, { "epoch": 0.5771827843344489, "grad_norm": 8.72352123260498, "learning_rate": 9.742271540231451e-06, "loss": 0.4348, "step": 9830 }, { "epoch": 0.5777699489166813, "grad_norm": 5.810318946838379, "learning_rate": 9.7411878438742e-06, "loss": 0.5148, "step": 9840 }, { "epoch": 0.5783571134989137, "grad_norm": 3.389808416366577, "learning_rate": 9.740101934461545e-06, "loss": 0.4972, "step": 9850 }, { "epoch": 0.5789442780811461, "grad_norm": 4.125001430511475, "learning_rate": 9.739013812500356e-06, "loss": 0.4649, "step": 9860 }, { "epoch": 0.5795314426633785, "grad_norm": 4.312631130218506, "learning_rate": 9.73792347849854e-06, "loss": 0.4443, "step": 9870 }, { "epoch": 0.5801186072456109, "grad_norm": 5.538539886474609, "learning_rate": 9.736830932965031e-06, "loss": 0.5388, "step": 9880 }, { "epoch": 0.5807057718278433, "grad_norm": 4.834696292877197, "learning_rate": 9.735736176409804e-06, "loss": 0.5174, "step": 9890 }, { "epoch": 0.5812929364100757, "grad_norm": 1.7389239072799683, "learning_rate": 9.734639209343859e-06, "loss": 0.5017, "step": 9900 }, { "epoch": 0.5818801009923081, "grad_norm": 16.31885528564453, "learning_rate": 9.73354003227923e-06, "loss": 0.4999, "step": 9910 }, { "epoch": 0.5824672655745405, "grad_norm": 5.033740997314453, "learning_rate": 9.732438645728983e-06, "loss": 0.5127, "step": 9920 }, { "epoch": 0.5830544301567729, "grad_norm": 8.981035232543945, "learning_rate": 9.731335050207214e-06, "loss": 0.5348, "step": 9930 }, { "epoch": 0.5836415947390053, "grad_norm": 2.8682048320770264, "learning_rate": 9.730229246229052e-06, "loss": 0.4061, "step": 9940 }, { "epoch": 0.5842287593212377, "grad_norm": 2.603468179702759, "learning_rate": 9.729121234310654e-06, "loss": 0.5219, "step": 9950 }, { "epoch": 0.5848159239034701, "grad_norm": 3.114611864089966, "learning_rate": 9.72801101496921e-06, "loss": 0.5382, "step": 9960 }, { "epoch": 0.5854030884857025, "grad_norm": 6.725522994995117, "learning_rate": 9.72689858872294e-06, "loss": 0.4527, "step": 9970 }, { "epoch": 0.585990253067935, "grad_norm": 1.5923625230789185, "learning_rate": 9.725783956091093e-06, "loss": 0.4698, "step": 9980 }, { "epoch": 0.5865774176501674, "grad_norm": 6.024008750915527, "learning_rate": 9.724667117593952e-06, "loss": 0.4152, "step": 9990 }, { "epoch": 0.5871645822323998, "grad_norm": 12.30221176147461, "learning_rate": 9.72354807375282e-06, "loss": 0.6716, "step": 10000 }, { "epoch": 0.5877517468146322, "grad_norm": 3.4702558517456055, "learning_rate": 9.722426825090042e-06, "loss": 0.5228, "step": 10010 }, { "epoch": 0.5883389113968646, "grad_norm": 8.096381187438965, "learning_rate": 9.721303372128983e-06, "loss": 0.5989, "step": 10020 }, { "epoch": 0.588926075979097, "grad_norm": 5.039669036865234, "learning_rate": 9.720177715394038e-06, "loss": 0.4916, "step": 10030 }, { "epoch": 0.5895132405613294, "grad_norm": 11.225176811218262, "learning_rate": 9.719049855410639e-06, "loss": 0.5188, "step": 10040 }, { "epoch": 0.5901004051435618, "grad_norm": 2.3249123096466064, "learning_rate": 9.717919792705232e-06, "loss": 0.4969, "step": 10050 }, { "epoch": 0.5906875697257942, "grad_norm": 2.1836400032043457, "learning_rate": 9.716787527805304e-06, "loss": 0.4537, "step": 10060 }, { "epoch": 0.5912747343080266, "grad_norm": 7.70976448059082, "learning_rate": 9.715653061239363e-06, "loss": 0.5188, "step": 10070 }, { "epoch": 0.591861898890259, "grad_norm": 2.5030319690704346, "learning_rate": 9.714516393536946e-06, "loss": 0.56, "step": 10080 }, { "epoch": 0.5924490634724914, "grad_norm": 3.784214496612549, "learning_rate": 9.71337752522862e-06, "loss": 0.4887, "step": 10090 }, { "epoch": 0.5930362280547238, "grad_norm": 7.837482929229736, "learning_rate": 9.712236456845977e-06, "loss": 0.4186, "step": 10100 }, { "epoch": 0.5936233926369562, "grad_norm": 6.767928600311279, "learning_rate": 9.711093188921634e-06, "loss": 0.4156, "step": 10110 }, { "epoch": 0.5942105572191886, "grad_norm": 1.4659948348999023, "learning_rate": 9.70994772198924e-06, "loss": 0.4394, "step": 10120 }, { "epoch": 0.594797721801421, "grad_norm": 5.347362995147705, "learning_rate": 9.708800056583465e-06, "loss": 0.4476, "step": 10130 }, { "epoch": 0.5953848863836534, "grad_norm": 7.593697547912598, "learning_rate": 9.707650193240008e-06, "loss": 0.5441, "step": 10140 }, { "epoch": 0.5959720509658858, "grad_norm": 3.6417336463928223, "learning_rate": 9.706498132495593e-06, "loss": 0.4628, "step": 10150 }, { "epoch": 0.5965592155481181, "grad_norm": 8.113247871398926, "learning_rate": 9.70534387488797e-06, "loss": 0.4871, "step": 10160 }, { "epoch": 0.5971463801303505, "grad_norm": 7.664656162261963, "learning_rate": 9.704187420955919e-06, "loss": 0.6964, "step": 10170 }, { "epoch": 0.5977335447125829, "grad_norm": 0.9775065779685974, "learning_rate": 9.703028771239233e-06, "loss": 0.4908, "step": 10180 }, { "epoch": 0.5983207092948153, "grad_norm": 3.2524943351745605, "learning_rate": 9.701867926278743e-06, "loss": 0.4656, "step": 10190 }, { "epoch": 0.5989078738770477, "grad_norm": 2.1076743602752686, "learning_rate": 9.700704886616299e-06, "loss": 0.6297, "step": 10200 }, { "epoch": 0.5994950384592801, "grad_norm": 2.377211332321167, "learning_rate": 9.699539652794773e-06, "loss": 0.5167, "step": 10210 }, { "epoch": 0.6000822030415125, "grad_norm": 3.261680841445923, "learning_rate": 9.698372225358066e-06, "loss": 0.39, "step": 10220 }, { "epoch": 0.6006693676237449, "grad_norm": 3.0648159980773926, "learning_rate": 9.697202604851101e-06, "loss": 0.4792, "step": 10230 }, { "epoch": 0.6012565322059773, "grad_norm": 3.0306966304779053, "learning_rate": 9.696030791819823e-06, "loss": 0.5065, "step": 10240 }, { "epoch": 0.6018436967882097, "grad_norm": 5.73069953918457, "learning_rate": 9.694856786811204e-06, "loss": 0.4868, "step": 10250 }, { "epoch": 0.6024308613704421, "grad_norm": 4.260950565338135, "learning_rate": 9.693680590373235e-06, "loss": 0.593, "step": 10260 }, { "epoch": 0.6030180259526745, "grad_norm": 4.212103366851807, "learning_rate": 9.692502203054934e-06, "loss": 0.5066, "step": 10270 }, { "epoch": 0.6036051905349069, "grad_norm": 3.2057840824127197, "learning_rate": 9.691321625406338e-06, "loss": 0.4381, "step": 10280 }, { "epoch": 0.6041923551171393, "grad_norm": 3.0760338306427, "learning_rate": 9.69013885797851e-06, "loss": 0.724, "step": 10290 }, { "epoch": 0.6047795196993717, "grad_norm": 3.158015251159668, "learning_rate": 9.68895390132353e-06, "loss": 0.4482, "step": 10300 }, { "epoch": 0.6053666842816041, "grad_norm": 3.2509148120880127, "learning_rate": 9.687766755994506e-06, "loss": 0.4917, "step": 10310 }, { "epoch": 0.6059538488638365, "grad_norm": 3.4924211502075195, "learning_rate": 9.686577422545562e-06, "loss": 0.696, "step": 10320 }, { "epoch": 0.6065410134460689, "grad_norm": 6.205427169799805, "learning_rate": 9.685385901531848e-06, "loss": 0.5244, "step": 10330 }, { "epoch": 0.6071281780283013, "grad_norm": 2.5815622806549072, "learning_rate": 9.684192193509533e-06, "loss": 0.4252, "step": 10340 }, { "epoch": 0.6077153426105337, "grad_norm": 2.506070852279663, "learning_rate": 9.682996299035805e-06, "loss": 0.5913, "step": 10350 }, { "epoch": 0.6083025071927661, "grad_norm": 1.836763858795166, "learning_rate": 9.681798218668876e-06, "loss": 0.5267, "step": 10360 }, { "epoch": 0.6088896717749985, "grad_norm": 1.883789300918579, "learning_rate": 9.680597952967979e-06, "loss": 0.5773, "step": 10370 }, { "epoch": 0.6094768363572309, "grad_norm": 3.1815855503082275, "learning_rate": 9.679395502493359e-06, "loss": 0.4984, "step": 10380 }, { "epoch": 0.6100640009394633, "grad_norm": 3.9183850288391113, "learning_rate": 9.678190867806292e-06, "loss": 0.5253, "step": 10390 }, { "epoch": 0.6106511655216957, "grad_norm": 3.027381181716919, "learning_rate": 9.676984049469066e-06, "loss": 0.5762, "step": 10400 }, { "epoch": 0.6112383301039281, "grad_norm": 3.251772403717041, "learning_rate": 9.67577504804499e-06, "loss": 0.5483, "step": 10410 }, { "epoch": 0.6118254946861605, "grad_norm": 8.065406799316406, "learning_rate": 9.674563864098394e-06, "loss": 0.569, "step": 10420 }, { "epoch": 0.612412659268393, "grad_norm": 5.880677700042725, "learning_rate": 9.673350498194623e-06, "loss": 0.4141, "step": 10430 }, { "epoch": 0.6129998238506253, "grad_norm": 5.681645393371582, "learning_rate": 9.672134950900045e-06, "loss": 0.5029, "step": 10440 }, { "epoch": 0.6135869884328577, "grad_norm": 3.926807403564453, "learning_rate": 9.670917222782041e-06, "loss": 0.5497, "step": 10450 }, { "epoch": 0.6141741530150902, "grad_norm": 4.989094257354736, "learning_rate": 9.669697314409016e-06, "loss": 0.3902, "step": 10460 }, { "epoch": 0.6147613175973226, "grad_norm": 2.8932766914367676, "learning_rate": 9.668475226350389e-06, "loss": 0.5603, "step": 10470 }, { "epoch": 0.615348482179555, "grad_norm": 4.599134922027588, "learning_rate": 9.667250959176596e-06, "loss": 0.4854, "step": 10480 }, { "epoch": 0.6159356467617874, "grad_norm": 4.551840305328369, "learning_rate": 9.66602451345909e-06, "loss": 0.5391, "step": 10490 }, { "epoch": 0.6165228113440198, "grad_norm": 3.129096746444702, "learning_rate": 9.664795889770343e-06, "loss": 0.5353, "step": 10500 }, { "epoch": 0.6171099759262522, "grad_norm": 7.089654445648193, "learning_rate": 9.663565088683844e-06, "loss": 0.5626, "step": 10510 }, { "epoch": 0.6176971405084846, "grad_norm": 4.378582000732422, "learning_rate": 9.662332110774095e-06, "loss": 0.4507, "step": 10520 }, { "epoch": 0.618284305090717, "grad_norm": 4.9818501472473145, "learning_rate": 9.661096956616616e-06, "loss": 0.5452, "step": 10530 }, { "epoch": 0.6188714696729494, "grad_norm": 3.757270336151123, "learning_rate": 9.659859626787942e-06, "loss": 0.4633, "step": 10540 }, { "epoch": 0.6194586342551818, "grad_norm": 1.949371337890625, "learning_rate": 9.658620121865628e-06, "loss": 0.4733, "step": 10550 }, { "epoch": 0.6200457988374142, "grad_norm": 2.2593138217926025, "learning_rate": 9.657378442428237e-06, "loss": 0.4149, "step": 10560 }, { "epoch": 0.6206329634196466, "grad_norm": 6.550942420959473, "learning_rate": 9.656134589055353e-06, "loss": 0.4869, "step": 10570 }, { "epoch": 0.621220128001879, "grad_norm": 4.243771076202393, "learning_rate": 9.65488856232757e-06, "loss": 0.4528, "step": 10580 }, { "epoch": 0.6218072925841114, "grad_norm": 11.658787727355957, "learning_rate": 9.653640362826502e-06, "loss": 0.5705, "step": 10590 }, { "epoch": 0.6223944571663437, "grad_norm": 12.609440803527832, "learning_rate": 9.652389991134772e-06, "loss": 0.386, "step": 10600 }, { "epoch": 0.6229816217485761, "grad_norm": 3.534109592437744, "learning_rate": 9.651137447836017e-06, "loss": 0.5121, "step": 10610 }, { "epoch": 0.6235687863308085, "grad_norm": 4.101050853729248, "learning_rate": 9.649882733514894e-06, "loss": 0.5679, "step": 10620 }, { "epoch": 0.6241559509130409, "grad_norm": 4.003134250640869, "learning_rate": 9.648625848757065e-06, "loss": 0.4588, "step": 10630 }, { "epoch": 0.6247431154952733, "grad_norm": 3.4556543827056885, "learning_rate": 9.647366794149211e-06, "loss": 0.5611, "step": 10640 }, { "epoch": 0.6253302800775057, "grad_norm": 6.0027570724487305, "learning_rate": 9.646105570279026e-06, "loss": 0.4348, "step": 10650 }, { "epoch": 0.6259174446597381, "grad_norm": 4.297792434692383, "learning_rate": 9.64484217773521e-06, "loss": 0.4431, "step": 10660 }, { "epoch": 0.6265046092419705, "grad_norm": 3.7780098915100098, "learning_rate": 9.643576617107481e-06, "loss": 0.524, "step": 10670 }, { "epoch": 0.6270917738242029, "grad_norm": 2.1031761169433594, "learning_rate": 9.64230888898657e-06, "loss": 0.5302, "step": 10680 }, { "epoch": 0.6276789384064353, "grad_norm": 5.821341514587402, "learning_rate": 9.641038993964216e-06, "loss": 0.525, "step": 10690 }, { "epoch": 0.6282661029886677, "grad_norm": 2.7963924407958984, "learning_rate": 9.63976693263317e-06, "loss": 0.4386, "step": 10700 }, { "epoch": 0.6288532675709001, "grad_norm": 7.609772682189941, "learning_rate": 9.638492705587194e-06, "loss": 0.4952, "step": 10710 }, { "epoch": 0.6294404321531325, "grad_norm": 6.370381832122803, "learning_rate": 9.637216313421068e-06, "loss": 0.4565, "step": 10720 }, { "epoch": 0.6300275967353649, "grad_norm": 1.3890832662582397, "learning_rate": 9.635937756730568e-06, "loss": 0.5156, "step": 10730 }, { "epoch": 0.6306147613175973, "grad_norm": 4.581550121307373, "learning_rate": 9.634657036112498e-06, "loss": 0.4929, "step": 10740 }, { "epoch": 0.6312019258998297, "grad_norm": 5.563602924346924, "learning_rate": 9.633374152164653e-06, "loss": 0.5358, "step": 10750 }, { "epoch": 0.6317890904820621, "grad_norm": 4.026636600494385, "learning_rate": 9.632089105485858e-06, "loss": 0.5845, "step": 10760 }, { "epoch": 0.6323762550642945, "grad_norm": 3.6751646995544434, "learning_rate": 9.63080189667593e-06, "loss": 0.4222, "step": 10770 }, { "epoch": 0.6329634196465269, "grad_norm": 6.7767157554626465, "learning_rate": 9.629512526335705e-06, "loss": 0.5667, "step": 10780 }, { "epoch": 0.6335505842287593, "grad_norm": 3.199643611907959, "learning_rate": 9.628220995067024e-06, "loss": 0.4952, "step": 10790 }, { "epoch": 0.6341377488109917, "grad_norm": 2.0006520748138428, "learning_rate": 9.62692730347274e-06, "loss": 0.5341, "step": 10800 }, { "epoch": 0.6347249133932241, "grad_norm": 14.325155258178711, "learning_rate": 9.625631452156712e-06, "loss": 0.6685, "step": 10810 }, { "epoch": 0.6353120779754565, "grad_norm": 2.2132887840270996, "learning_rate": 9.624333441723808e-06, "loss": 0.4677, "step": 10820 }, { "epoch": 0.6358992425576889, "grad_norm": 3.018826484680176, "learning_rate": 9.623033272779903e-06, "loss": 0.4546, "step": 10830 }, { "epoch": 0.6364864071399213, "grad_norm": 2.8392045497894287, "learning_rate": 9.621730945931877e-06, "loss": 0.4225, "step": 10840 }, { "epoch": 0.6370735717221537, "grad_norm": 4.185359954833984, "learning_rate": 9.620426461787626e-06, "loss": 0.3768, "step": 10850 }, { "epoch": 0.6376607363043861, "grad_norm": 1.9284067153930664, "learning_rate": 9.619119820956043e-06, "loss": 0.4839, "step": 10860 }, { "epoch": 0.6382479008866185, "grad_norm": 4.606189250946045, "learning_rate": 9.61781102404703e-06, "loss": 0.5387, "step": 10870 }, { "epoch": 0.6388350654688509, "grad_norm": 10.19775390625, "learning_rate": 9.616500071671503e-06, "loss": 0.5722, "step": 10880 }, { "epoch": 0.6394222300510833, "grad_norm": 5.112451076507568, "learning_rate": 9.615186964441374e-06, "loss": 0.4437, "step": 10890 }, { "epoch": 0.6400093946333157, "grad_norm": 4.951391220092773, "learning_rate": 9.613871702969567e-06, "loss": 0.4791, "step": 10900 }, { "epoch": 0.6405965592155481, "grad_norm": 11.070537567138672, "learning_rate": 9.612554287870008e-06, "loss": 0.5125, "step": 10910 }, { "epoch": 0.6411837237977805, "grad_norm": 8.646181106567383, "learning_rate": 9.611234719757634e-06, "loss": 0.4933, "step": 10920 }, { "epoch": 0.641770888380013, "grad_norm": 5.982475757598877, "learning_rate": 9.609912999248377e-06, "loss": 0.4739, "step": 10930 }, { "epoch": 0.6423580529622454, "grad_norm": 2.5046613216400146, "learning_rate": 9.608589126959185e-06, "loss": 0.5092, "step": 10940 }, { "epoch": 0.6429452175444778, "grad_norm": 5.450733184814453, "learning_rate": 9.607263103508e-06, "loss": 0.437, "step": 10950 }, { "epoch": 0.6435323821267102, "grad_norm": 4.062183856964111, "learning_rate": 9.605934929513777e-06, "loss": 0.5275, "step": 10960 }, { "epoch": 0.6441195467089426, "grad_norm": 10.367395401000977, "learning_rate": 9.60460460559647e-06, "loss": 0.6499, "step": 10970 }, { "epoch": 0.644706711291175, "grad_norm": 6.371763706207275, "learning_rate": 9.603272132377037e-06, "loss": 0.4102, "step": 10980 }, { "epoch": 0.6452938758734074, "grad_norm": 4.630781173706055, "learning_rate": 9.601937510477438e-06, "loss": 0.5966, "step": 10990 }, { "epoch": 0.6458810404556398, "grad_norm": 5.9592671394348145, "learning_rate": 9.60060074052064e-06, "loss": 0.5134, "step": 11000 }, { "epoch": 0.6464682050378722, "grad_norm": 1.3828318119049072, "learning_rate": 9.59926182313061e-06, "loss": 0.448, "step": 11010 }, { "epoch": 0.6470553696201046, "grad_norm": 3.4049389362335205, "learning_rate": 9.597920758932318e-06, "loss": 0.5306, "step": 11020 }, { "epoch": 0.6476425342023369, "grad_norm": 5.503774642944336, "learning_rate": 9.596577548551733e-06, "loss": 0.5374, "step": 11030 }, { "epoch": 0.6482296987845693, "grad_norm": 5.836912155151367, "learning_rate": 9.595232192615831e-06, "loss": 0.5509, "step": 11040 }, { "epoch": 0.6488168633668017, "grad_norm": 5.266071796417236, "learning_rate": 9.593884691752586e-06, "loss": 0.6528, "step": 11050 }, { "epoch": 0.6494040279490341, "grad_norm": 10.135128021240234, "learning_rate": 9.592535046590974e-06, "loss": 0.508, "step": 11060 }, { "epoch": 0.6499911925312665, "grad_norm": 2.5424530506134033, "learning_rate": 9.591183257760973e-06, "loss": 0.4037, "step": 11070 }, { "epoch": 0.6505783571134989, "grad_norm": 2.5368099212646484, "learning_rate": 9.58982932589356e-06, "loss": 0.4718, "step": 11080 }, { "epoch": 0.6511655216957313, "grad_norm": 4.094138145446777, "learning_rate": 9.588473251620713e-06, "loss": 0.6653, "step": 11090 }, { "epoch": 0.6517526862779637, "grad_norm": 1.642304539680481, "learning_rate": 9.587115035575412e-06, "loss": 0.372, "step": 11100 }, { "epoch": 0.6523398508601961, "grad_norm": 2.8892621994018555, "learning_rate": 9.58575467839163e-06, "loss": 0.4859, "step": 11110 }, { "epoch": 0.6529270154424285, "grad_norm": 4.220464706420898, "learning_rate": 9.584392180704348e-06, "loss": 0.4614, "step": 11120 }, { "epoch": 0.6535141800246609, "grad_norm": 2.9108777046203613, "learning_rate": 9.58302754314954e-06, "loss": 0.5087, "step": 11130 }, { "epoch": 0.6541013446068933, "grad_norm": 5.535151481628418, "learning_rate": 9.581660766364183e-06, "loss": 0.519, "step": 11140 }, { "epoch": 0.6546885091891257, "grad_norm": 13.084985733032227, "learning_rate": 9.580291850986249e-06, "loss": 0.5506, "step": 11150 }, { "epoch": 0.6552756737713581, "grad_norm": 4.7208027839660645, "learning_rate": 9.578920797654713e-06, "loss": 0.6266, "step": 11160 }, { "epoch": 0.6558628383535905, "grad_norm": 2.2896389961242676, "learning_rate": 9.57754760700954e-06, "loss": 0.44, "step": 11170 }, { "epoch": 0.6564500029358229, "grad_norm": 4.22946310043335, "learning_rate": 9.5761722796917e-06, "loss": 0.4956, "step": 11180 }, { "epoch": 0.6570371675180553, "grad_norm": 2.615999460220337, "learning_rate": 9.57479481634316e-06, "loss": 0.6197, "step": 11190 }, { "epoch": 0.6576243321002877, "grad_norm": 2.6413938999176025, "learning_rate": 9.573415217606877e-06, "loss": 0.4297, "step": 11200 }, { "epoch": 0.6582114966825201, "grad_norm": 2.2095680236816406, "learning_rate": 9.572033484126812e-06, "loss": 0.4859, "step": 11210 }, { "epoch": 0.6587986612647525, "grad_norm": 3.2768585681915283, "learning_rate": 9.570649616547922e-06, "loss": 0.6157, "step": 11220 }, { "epoch": 0.6593858258469849, "grad_norm": 4.465895175933838, "learning_rate": 9.569263615516156e-06, "loss": 0.5003, "step": 11230 }, { "epoch": 0.6599729904292173, "grad_norm": 3.2681660652160645, "learning_rate": 9.567875481678459e-06, "loss": 0.4403, "step": 11240 }, { "epoch": 0.6605601550114497, "grad_norm": 2.974916934967041, "learning_rate": 9.566485215682777e-06, "loss": 0.5396, "step": 11250 }, { "epoch": 0.6611473195936821, "grad_norm": 5.268638610839844, "learning_rate": 9.565092818178046e-06, "loss": 0.6449, "step": 11260 }, { "epoch": 0.6617344841759145, "grad_norm": 4.610267162322998, "learning_rate": 9.563698289814199e-06, "loss": 0.417, "step": 11270 }, { "epoch": 0.6623216487581469, "grad_norm": 3.25374436378479, "learning_rate": 9.562301631242162e-06, "loss": 0.4796, "step": 11280 }, { "epoch": 0.6629088133403793, "grad_norm": 2.0999839305877686, "learning_rate": 9.56090284311386e-06, "loss": 0.5542, "step": 11290 }, { "epoch": 0.6634959779226117, "grad_norm": 2.793524980545044, "learning_rate": 9.559501926082204e-06, "loss": 0.4947, "step": 11300 }, { "epoch": 0.6640831425048441, "grad_norm": 4.093522071838379, "learning_rate": 9.558098880801107e-06, "loss": 0.4349, "step": 11310 }, { "epoch": 0.6646703070870765, "grad_norm": 3.435558319091797, "learning_rate": 9.556693707925472e-06, "loss": 0.4922, "step": 11320 }, { "epoch": 0.6652574716693089, "grad_norm": 4.745330810546875, "learning_rate": 9.555286408111192e-06, "loss": 0.4298, "step": 11330 }, { "epoch": 0.6658446362515413, "grad_norm": 3.951096534729004, "learning_rate": 9.553876982015158e-06, "loss": 0.4065, "step": 11340 }, { "epoch": 0.6664318008337737, "grad_norm": 3.1986076831817627, "learning_rate": 9.552465430295248e-06, "loss": 0.4083, "step": 11350 }, { "epoch": 0.6670189654160061, "grad_norm": 8.300954818725586, "learning_rate": 9.55105175361034e-06, "loss": 0.4637, "step": 11360 }, { "epoch": 0.6676061299982385, "grad_norm": 3.537564992904663, "learning_rate": 9.549635952620295e-06, "loss": 0.5194, "step": 11370 }, { "epoch": 0.6681932945804709, "grad_norm": 5.3202691078186035, "learning_rate": 9.548218027985972e-06, "loss": 0.4871, "step": 11380 }, { "epoch": 0.6687804591627033, "grad_norm": 2.29596209526062, "learning_rate": 9.546797980369218e-06, "loss": 0.4938, "step": 11390 }, { "epoch": 0.6693676237449357, "grad_norm": 5.364410400390625, "learning_rate": 9.545375810432874e-06, "loss": 0.5397, "step": 11400 }, { "epoch": 0.6699547883271681, "grad_norm": 4.308865070343018, "learning_rate": 9.543951518840767e-06, "loss": 0.5535, "step": 11410 }, { "epoch": 0.6705419529094006, "grad_norm": 7.278843402862549, "learning_rate": 9.542525106257719e-06, "loss": 0.3908, "step": 11420 }, { "epoch": 0.671129117491633, "grad_norm": 4.518975257873535, "learning_rate": 9.541096573349536e-06, "loss": 0.5707, "step": 11430 }, { "epoch": 0.6717162820738654, "grad_norm": 3.2998180389404297, "learning_rate": 9.539665920783024e-06, "loss": 0.4793, "step": 11440 }, { "epoch": 0.6723034466560978, "grad_norm": 2.1724441051483154, "learning_rate": 9.538233149225967e-06, "loss": 0.4213, "step": 11450 }, { "epoch": 0.67289061123833, "grad_norm": 6.8949127197265625, "learning_rate": 9.536798259347144e-06, "loss": 0.4998, "step": 11460 }, { "epoch": 0.6734777758205625, "grad_norm": 2.302924871444702, "learning_rate": 9.535361251816324e-06, "loss": 0.4593, "step": 11470 }, { "epoch": 0.6740649404027949, "grad_norm": 1.4480018615722656, "learning_rate": 9.533922127304263e-06, "loss": 0.5707, "step": 11480 }, { "epoch": 0.6746521049850273, "grad_norm": 3.828117609024048, "learning_rate": 9.5324808864827e-06, "loss": 0.5738, "step": 11490 }, { "epoch": 0.6752392695672597, "grad_norm": 8.564737319946289, "learning_rate": 9.53103753002437e-06, "loss": 0.6618, "step": 11500 }, { "epoch": 0.6758264341494921, "grad_norm": 0.9662351608276367, "learning_rate": 9.52959205860299e-06, "loss": 0.5325, "step": 11510 }, { "epoch": 0.6764135987317245, "grad_norm": 4.855303764343262, "learning_rate": 9.528144472893268e-06, "loss": 0.3752, "step": 11520 }, { "epoch": 0.6770007633139569, "grad_norm": 5.9341535568237305, "learning_rate": 9.526694773570894e-06, "loss": 0.4537, "step": 11530 }, { "epoch": 0.6775879278961893, "grad_norm": 5.298686504364014, "learning_rate": 9.525242961312554e-06, "loss": 0.3564, "step": 11540 }, { "epoch": 0.6781750924784217, "grad_norm": 4.9797749519348145, "learning_rate": 9.523789036795906e-06, "loss": 0.4991, "step": 11550 }, { "epoch": 0.6787622570606541, "grad_norm": 4.509823799133301, "learning_rate": 9.522333000699606e-06, "loss": 0.5328, "step": 11560 }, { "epoch": 0.6793494216428865, "grad_norm": 8.232169151306152, "learning_rate": 9.52087485370329e-06, "loss": 0.4687, "step": 11570 }, { "epoch": 0.6799365862251189, "grad_norm": 14.340109825134277, "learning_rate": 9.519414596487582e-06, "loss": 0.6213, "step": 11580 }, { "epoch": 0.6805237508073513, "grad_norm": 7.979020118713379, "learning_rate": 9.517952229734088e-06, "loss": 0.5711, "step": 11590 }, { "epoch": 0.6811109153895837, "grad_norm": 1.9562602043151855, "learning_rate": 9.516487754125404e-06, "loss": 0.6241, "step": 11600 }, { "epoch": 0.6816980799718161, "grad_norm": 12.92496395111084, "learning_rate": 9.5150211703451e-06, "loss": 0.5468, "step": 11610 }, { "epoch": 0.6822852445540485, "grad_norm": 2.7964179515838623, "learning_rate": 9.513552479077744e-06, "loss": 0.5078, "step": 11620 }, { "epoch": 0.6828724091362809, "grad_norm": 4.858505725860596, "learning_rate": 9.512081681008875e-06, "loss": 0.6153, "step": 11630 }, { "epoch": 0.6834595737185133, "grad_norm": 1.9143192768096924, "learning_rate": 9.510608776825026e-06, "loss": 0.4237, "step": 11640 }, { "epoch": 0.6840467383007457, "grad_norm": 4.510788917541504, "learning_rate": 9.509133767213703e-06, "loss": 0.4853, "step": 11650 }, { "epoch": 0.6846339028829781, "grad_norm": 4.4877610206604, "learning_rate": 9.507656652863402e-06, "loss": 0.5264, "step": 11660 }, { "epoch": 0.6852210674652105, "grad_norm": 2.071007490158081, "learning_rate": 9.5061774344636e-06, "loss": 0.4874, "step": 11670 }, { "epoch": 0.6858082320474429, "grad_norm": 3.845916986465454, "learning_rate": 9.504696112704754e-06, "loss": 0.6688, "step": 11680 }, { "epoch": 0.6863953966296753, "grad_norm": 4.832038879394531, "learning_rate": 9.503212688278302e-06, "loss": 0.4757, "step": 11690 }, { "epoch": 0.6869825612119077, "grad_norm": 3.5705208778381348, "learning_rate": 9.501727161876671e-06, "loss": 0.3651, "step": 11700 }, { "epoch": 0.6875697257941401, "grad_norm": 4.047025203704834, "learning_rate": 9.500239534193259e-06, "loss": 0.5539, "step": 11710 }, { "epoch": 0.6881568903763725, "grad_norm": 9.047235488891602, "learning_rate": 9.498749805922452e-06, "loss": 0.4617, "step": 11720 }, { "epoch": 0.6887440549586049, "grad_norm": 6.992180824279785, "learning_rate": 9.497257977759612e-06, "loss": 0.579, "step": 11730 }, { "epoch": 0.6893312195408373, "grad_norm": 4.619629859924316, "learning_rate": 9.495764050401082e-06, "loss": 0.462, "step": 11740 }, { "epoch": 0.6899183841230697, "grad_norm": 2.922220230102539, "learning_rate": 9.494268024544191e-06, "loss": 0.4814, "step": 11750 }, { "epoch": 0.6905055487053021, "grad_norm": 4.991499900817871, "learning_rate": 9.49276990088724e-06, "loss": 0.5317, "step": 11760 }, { "epoch": 0.6910927132875345, "grad_norm": 6.0991010665893555, "learning_rate": 9.491269680129508e-06, "loss": 0.559, "step": 11770 }, { "epoch": 0.6916798778697669, "grad_norm": 5.811995983123779, "learning_rate": 9.489767362971261e-06, "loss": 0.4847, "step": 11780 }, { "epoch": 0.6922670424519993, "grad_norm": 3.4570698738098145, "learning_rate": 9.48826295011374e-06, "loss": 0.4511, "step": 11790 }, { "epoch": 0.6928542070342317, "grad_norm": 2.0449328422546387, "learning_rate": 9.48675644225916e-06, "loss": 0.5313, "step": 11800 }, { "epoch": 0.6934413716164641, "grad_norm": 2.765113115310669, "learning_rate": 9.485247840110717e-06, "loss": 0.5062, "step": 11810 }, { "epoch": 0.6940285361986965, "grad_norm": 3.080808401107788, "learning_rate": 9.483737144372588e-06, "loss": 0.496, "step": 11820 }, { "epoch": 0.6946157007809289, "grad_norm": 5.219304084777832, "learning_rate": 9.48222435574992e-06, "loss": 0.6016, "step": 11830 }, { "epoch": 0.6952028653631613, "grad_norm": 6.57525110244751, "learning_rate": 9.480709474948846e-06, "loss": 0.4494, "step": 11840 }, { "epoch": 0.6957900299453937, "grad_norm": 4.855754852294922, "learning_rate": 9.479192502676467e-06, "loss": 0.5565, "step": 11850 }, { "epoch": 0.6963771945276261, "grad_norm": 2.7686362266540527, "learning_rate": 9.477673439640862e-06, "loss": 0.604, "step": 11860 }, { "epoch": 0.6969643591098585, "grad_norm": 3.7473714351654053, "learning_rate": 9.476152286551093e-06, "loss": 0.6171, "step": 11870 }, { "epoch": 0.697551523692091, "grad_norm": 5.527706146240234, "learning_rate": 9.474629044117188e-06, "loss": 0.4258, "step": 11880 }, { "epoch": 0.6981386882743232, "grad_norm": 10.869209289550781, "learning_rate": 9.473103713050156e-06, "loss": 0.5312, "step": 11890 }, { "epoch": 0.6987258528565556, "grad_norm": 7.6562819480896, "learning_rate": 9.471576294061979e-06, "loss": 0.4896, "step": 11900 }, { "epoch": 0.699313017438788, "grad_norm": 10.398927688598633, "learning_rate": 9.470046787865612e-06, "loss": 0.462, "step": 11910 }, { "epoch": 0.6999001820210204, "grad_norm": 5.152966499328613, "learning_rate": 9.46851519517499e-06, "loss": 0.6501, "step": 11920 }, { "epoch": 0.7004873466032528, "grad_norm": 2.6859076023101807, "learning_rate": 9.466981516705016e-06, "loss": 0.4774, "step": 11930 }, { "epoch": 0.7010745111854852, "grad_norm": 5.381035327911377, "learning_rate": 9.465445753171569e-06, "loss": 0.4947, "step": 11940 }, { "epoch": 0.7016616757677177, "grad_norm": 3.1460258960723877, "learning_rate": 9.4639079052915e-06, "loss": 0.5722, "step": 11950 }, { "epoch": 0.70224884034995, "grad_norm": 2.820056676864624, "learning_rate": 9.462367973782636e-06, "loss": 0.6611, "step": 11960 }, { "epoch": 0.7028360049321825, "grad_norm": 21.585111618041992, "learning_rate": 9.460825959363773e-06, "loss": 0.6059, "step": 11970 }, { "epoch": 0.7034231695144149, "grad_norm": 8.49029541015625, "learning_rate": 9.45928186275468e-06, "loss": 0.462, "step": 11980 }, { "epoch": 0.7040103340966473, "grad_norm": 2.1695921421051025, "learning_rate": 9.457735684676101e-06, "loss": 0.6209, "step": 11990 }, { "epoch": 0.7045974986788797, "grad_norm": 3.8958046436309814, "learning_rate": 9.456187425849748e-06, "loss": 0.4292, "step": 12000 }, { "epoch": 0.7051846632611121, "grad_norm": 5.832396984100342, "learning_rate": 9.454637086998304e-06, "loss": 0.6203, "step": 12010 }, { "epoch": 0.7057718278433445, "grad_norm": 4.312843322753906, "learning_rate": 9.453084668845427e-06, "loss": 0.605, "step": 12020 }, { "epoch": 0.7063589924255769, "grad_norm": 2.674307346343994, "learning_rate": 9.45153017211574e-06, "loss": 0.4979, "step": 12030 }, { "epoch": 0.7069461570078093, "grad_norm": 3.803579330444336, "learning_rate": 9.449973597534841e-06, "loss": 0.5132, "step": 12040 }, { "epoch": 0.7075333215900417, "grad_norm": 5.483590602874756, "learning_rate": 9.448414945829296e-06, "loss": 0.576, "step": 12050 }, { "epoch": 0.7081204861722741, "grad_norm": 15.065774917602539, "learning_rate": 9.446854217726638e-06, "loss": 0.4493, "step": 12060 }, { "epoch": 0.7087076507545065, "grad_norm": 2.281729221343994, "learning_rate": 9.445291413955375e-06, "loss": 0.4623, "step": 12070 }, { "epoch": 0.7092948153367389, "grad_norm": 5.313885688781738, "learning_rate": 9.44372653524498e-06, "loss": 0.5464, "step": 12080 }, { "epoch": 0.7098819799189713, "grad_norm": 2.667206048965454, "learning_rate": 9.442159582325894e-06, "loss": 0.7053, "step": 12090 }, { "epoch": 0.7104691445012037, "grad_norm": 5.723644733428955, "learning_rate": 9.440590555929526e-06, "loss": 0.4289, "step": 12100 }, { "epoch": 0.7110563090834361, "grad_norm": 3.6078360080718994, "learning_rate": 9.439019456788256e-06, "loss": 0.5554, "step": 12110 }, { "epoch": 0.7116434736656685, "grad_norm": 6.799030303955078, "learning_rate": 9.437446285635432e-06, "loss": 0.4717, "step": 12120 }, { "epoch": 0.7122306382479009, "grad_norm": 3.963414430618286, "learning_rate": 9.435871043205363e-06, "loss": 0.5759, "step": 12130 }, { "epoch": 0.7128178028301333, "grad_norm": 1.4423569440841675, "learning_rate": 9.43429373023333e-06, "loss": 0.4702, "step": 12140 }, { "epoch": 0.7134049674123657, "grad_norm": 6.943474292755127, "learning_rate": 9.432714347455578e-06, "loss": 0.4417, "step": 12150 }, { "epoch": 0.7139921319945981, "grad_norm": 3.238577365875244, "learning_rate": 9.431132895609322e-06, "loss": 0.5342, "step": 12160 }, { "epoch": 0.7145792965768305, "grad_norm": 7.155550003051758, "learning_rate": 9.429549375432737e-06, "loss": 0.5701, "step": 12170 }, { "epoch": 0.7151664611590629, "grad_norm": 8.156291007995605, "learning_rate": 9.427963787664968e-06, "loss": 0.5506, "step": 12180 }, { "epoch": 0.7157536257412953, "grad_norm": 3.9009525775909424, "learning_rate": 9.426376133046122e-06, "loss": 0.4928, "step": 12190 }, { "epoch": 0.7163407903235277, "grad_norm": 2.9365625381469727, "learning_rate": 9.424786412317275e-06, "loss": 0.5076, "step": 12200 }, { "epoch": 0.7169279549057601, "grad_norm": 3.121856212615967, "learning_rate": 9.423194626220464e-06, "loss": 0.4981, "step": 12210 }, { "epoch": 0.7175151194879925, "grad_norm": 3.9682998657226562, "learning_rate": 9.421600775498687e-06, "loss": 0.4394, "step": 12220 }, { "epoch": 0.7181022840702249, "grad_norm": 4.035419940948486, "learning_rate": 9.420004860895914e-06, "loss": 0.5619, "step": 12230 }, { "epoch": 0.7186894486524573, "grad_norm": 4.424248695373535, "learning_rate": 9.418406883157074e-06, "loss": 0.5001, "step": 12240 }, { "epoch": 0.7192766132346897, "grad_norm": 9.205337524414062, "learning_rate": 9.416806843028054e-06, "loss": 0.5277, "step": 12250 }, { "epoch": 0.7198637778169221, "grad_norm": 2.9825074672698975, "learning_rate": 9.415204741255712e-06, "loss": 0.506, "step": 12260 }, { "epoch": 0.7204509423991545, "grad_norm": 11.706381797790527, "learning_rate": 9.413600578587865e-06, "loss": 0.5061, "step": 12270 }, { "epoch": 0.7210381069813869, "grad_norm": 2.972285270690918, "learning_rate": 9.41199435577329e-06, "loss": 0.4841, "step": 12280 }, { "epoch": 0.7216252715636193, "grad_norm": 7.214209079742432, "learning_rate": 9.41038607356173e-06, "loss": 0.5129, "step": 12290 }, { "epoch": 0.7222124361458517, "grad_norm": 2.0453412532806396, "learning_rate": 9.408775732703886e-06, "loss": 0.45, "step": 12300 }, { "epoch": 0.7227996007280841, "grad_norm": 5.270894527435303, "learning_rate": 9.407163333951417e-06, "loss": 0.4768, "step": 12310 }, { "epoch": 0.7233867653103164, "grad_norm": 2.2342255115509033, "learning_rate": 9.40554887805695e-06, "loss": 0.5306, "step": 12320 }, { "epoch": 0.7239739298925488, "grad_norm": 3.491196870803833, "learning_rate": 9.403932365774067e-06, "loss": 0.4229, "step": 12330 }, { "epoch": 0.7245610944747812, "grad_norm": 1.6893774271011353, "learning_rate": 9.402313797857313e-06, "loss": 0.5782, "step": 12340 }, { "epoch": 0.7251482590570136, "grad_norm": 3.3257956504821777, "learning_rate": 9.400693175062188e-06, "loss": 0.3774, "step": 12350 }, { "epoch": 0.725735423639246, "grad_norm": 4.857753753662109, "learning_rate": 9.399070498145156e-06, "loss": 0.4713, "step": 12360 }, { "epoch": 0.7263225882214784, "grad_norm": 2.476069450378418, "learning_rate": 9.397445767863636e-06, "loss": 0.5647, "step": 12370 }, { "epoch": 0.7269097528037108, "grad_norm": 4.2216925621032715, "learning_rate": 9.395818984976008e-06, "loss": 0.519, "step": 12380 }, { "epoch": 0.7274969173859432, "grad_norm": 2.1620240211486816, "learning_rate": 9.394190150241612e-06, "loss": 0.4087, "step": 12390 }, { "epoch": 0.7280840819681756, "grad_norm": 2.722578763961792, "learning_rate": 9.392559264420738e-06, "loss": 0.5674, "step": 12400 }, { "epoch": 0.728671246550408, "grad_norm": 1.3620331287384033, "learning_rate": 9.39092632827464e-06, "loss": 0.4093, "step": 12410 }, { "epoch": 0.7292584111326404, "grad_norm": 1.8262203931808472, "learning_rate": 9.38929134256553e-06, "loss": 0.44, "step": 12420 }, { "epoch": 0.7298455757148729, "grad_norm": 3.9083163738250732, "learning_rate": 9.387654308056571e-06, "loss": 0.4653, "step": 12430 }, { "epoch": 0.7304327402971053, "grad_norm": 1.8192081451416016, "learning_rate": 9.386015225511886e-06, "loss": 0.6268, "step": 12440 }, { "epoch": 0.7310199048793377, "grad_norm": 2.1725921630859375, "learning_rate": 9.384374095696558e-06, "loss": 0.431, "step": 12450 }, { "epoch": 0.7316070694615701, "grad_norm": 5.116398811340332, "learning_rate": 9.382730919376615e-06, "loss": 0.5985, "step": 12460 }, { "epoch": 0.7321942340438025, "grad_norm": 7.489431858062744, "learning_rate": 9.381085697319048e-06, "loss": 0.6589, "step": 12470 }, { "epoch": 0.7327813986260349, "grad_norm": 6.718683242797852, "learning_rate": 9.379438430291801e-06, "loss": 0.675, "step": 12480 }, { "epoch": 0.7333685632082673, "grad_norm": 3.469285488128662, "learning_rate": 9.377789119063776e-06, "loss": 0.503, "step": 12490 }, { "epoch": 0.7339557277904997, "grad_norm": 8.05574893951416, "learning_rate": 9.376137764404822e-06, "loss": 0.6173, "step": 12500 }, { "epoch": 0.7345428923727321, "grad_norm": 3.302668809890747, "learning_rate": 9.374484367085747e-06, "loss": 0.5682, "step": 12510 }, { "epoch": 0.7351300569549645, "grad_norm": 5.713916301727295, "learning_rate": 9.37282892787831e-06, "loss": 0.3899, "step": 12520 }, { "epoch": 0.7357172215371969, "grad_norm": 5.163667678833008, "learning_rate": 9.371171447555228e-06, "loss": 0.5381, "step": 12530 }, { "epoch": 0.7363043861194293, "grad_norm": 5.014130115509033, "learning_rate": 9.369511926890162e-06, "loss": 0.5051, "step": 12540 }, { "epoch": 0.7368915507016617, "grad_norm": 5.640323638916016, "learning_rate": 9.367850366657734e-06, "loss": 0.47, "step": 12550 }, { "epoch": 0.7374787152838941, "grad_norm": 4.227486610412598, "learning_rate": 9.366186767633512e-06, "loss": 0.5474, "step": 12560 }, { "epoch": 0.7380658798661265, "grad_norm": 2.465151786804199, "learning_rate": 9.364521130594021e-06, "loss": 0.5866, "step": 12570 }, { "epoch": 0.7386530444483589, "grad_norm": 6.621946811676025, "learning_rate": 9.362853456316731e-06, "loss": 0.3956, "step": 12580 }, { "epoch": 0.7392402090305913, "grad_norm": 4.326510906219482, "learning_rate": 9.361183745580069e-06, "loss": 0.4656, "step": 12590 }, { "epoch": 0.7398273736128237, "grad_norm": 4.282926559448242, "learning_rate": 9.359511999163406e-06, "loss": 0.4587, "step": 12600 }, { "epoch": 0.7404145381950561, "grad_norm": 3.621347188949585, "learning_rate": 9.357838217847073e-06, "loss": 0.4367, "step": 12610 }, { "epoch": 0.7410017027772885, "grad_norm": 3.13458514213562, "learning_rate": 9.356162402412339e-06, "loss": 0.4667, "step": 12620 }, { "epoch": 0.7415888673595209, "grad_norm": 5.076228618621826, "learning_rate": 9.354484553641432e-06, "loss": 0.4825, "step": 12630 }, { "epoch": 0.7421760319417533, "grad_norm": 4.993038177490234, "learning_rate": 9.352804672317523e-06, "loss": 0.4418, "step": 12640 }, { "epoch": 0.7427631965239857, "grad_norm": 8.821436882019043, "learning_rate": 9.351122759224736e-06, "loss": 0.5283, "step": 12650 }, { "epoch": 0.7433503611062181, "grad_norm": 9.859395027160645, "learning_rate": 9.349438815148141e-06, "loss": 0.5338, "step": 12660 }, { "epoch": 0.7439375256884505, "grad_norm": 2.299626588821411, "learning_rate": 9.347752840873757e-06, "loss": 0.4177, "step": 12670 }, { "epoch": 0.7445246902706829, "grad_norm": 2.8289005756378174, "learning_rate": 9.346064837188549e-06, "loss": 0.4886, "step": 12680 }, { "epoch": 0.7451118548529153, "grad_norm": 1.7385523319244385, "learning_rate": 9.34437480488043e-06, "loss": 0.5648, "step": 12690 }, { "epoch": 0.7456990194351477, "grad_norm": 6.668693542480469, "learning_rate": 9.342682744738262e-06, "loss": 0.4849, "step": 12700 }, { "epoch": 0.7462861840173801, "grad_norm": 2.0946850776672363, "learning_rate": 9.340988657551851e-06, "loss": 0.3909, "step": 12710 }, { "epoch": 0.7468733485996125, "grad_norm": 3.3706393241882324, "learning_rate": 9.339292544111951e-06, "loss": 0.6365, "step": 12720 }, { "epoch": 0.7474605131818449, "grad_norm": 1.6573847532272339, "learning_rate": 9.337594405210259e-06, "loss": 0.7906, "step": 12730 }, { "epoch": 0.7480476777640773, "grad_norm": 2.9548099040985107, "learning_rate": 9.335894241639421e-06, "loss": 0.4461, "step": 12740 }, { "epoch": 0.7486348423463097, "grad_norm": 5.425742149353027, "learning_rate": 9.334192054193028e-06, "loss": 0.4508, "step": 12750 }, { "epoch": 0.749222006928542, "grad_norm": 2.687133550643921, "learning_rate": 9.332487843665609e-06, "loss": 0.4048, "step": 12760 }, { "epoch": 0.7498091715107744, "grad_norm": 4.585319995880127, "learning_rate": 9.330781610852647e-06, "loss": 0.5187, "step": 12770 }, { "epoch": 0.7503963360930068, "grad_norm": 0.9761111736297607, "learning_rate": 9.32907335655056e-06, "loss": 0.4466, "step": 12780 }, { "epoch": 0.7509835006752392, "grad_norm": 9.283482551574707, "learning_rate": 9.32736308155672e-06, "loss": 0.4642, "step": 12790 }, { "epoch": 0.7515706652574716, "grad_norm": 5.153415203094482, "learning_rate": 9.325650786669428e-06, "loss": 0.5357, "step": 12800 }, { "epoch": 0.752157829839704, "grad_norm": 1.210571050643921, "learning_rate": 9.323936472687945e-06, "loss": 0.4201, "step": 12810 }, { "epoch": 0.7527449944219364, "grad_norm": 2.2346434593200684, "learning_rate": 9.322220140412457e-06, "loss": 0.607, "step": 12820 }, { "epoch": 0.7533321590041688, "grad_norm": 4.9901628494262695, "learning_rate": 9.320501790644107e-06, "loss": 0.6195, "step": 12830 }, { "epoch": 0.7539193235864012, "grad_norm": 10.27911376953125, "learning_rate": 9.318781424184968e-06, "loss": 0.499, "step": 12840 }, { "epoch": 0.7545064881686336, "grad_norm": 2.0663657188415527, "learning_rate": 9.317059041838063e-06, "loss": 0.5326, "step": 12850 }, { "epoch": 0.755093652750866, "grad_norm": 11.69629192352295, "learning_rate": 9.315334644407352e-06, "loss": 0.4819, "step": 12860 }, { "epoch": 0.7556808173330984, "grad_norm": 2.7034223079681396, "learning_rate": 9.313608232697735e-06, "loss": 0.3739, "step": 12870 }, { "epoch": 0.7562679819153308, "grad_norm": 4.398718357086182, "learning_rate": 9.311879807515054e-06, "loss": 0.4968, "step": 12880 }, { "epoch": 0.7568551464975632, "grad_norm": 11.145733833312988, "learning_rate": 9.310149369666088e-06, "loss": 0.3914, "step": 12890 }, { "epoch": 0.7574423110797956, "grad_norm": 5.18894624710083, "learning_rate": 9.30841691995856e-06, "loss": 0.5022, "step": 12900 }, { "epoch": 0.758029475662028, "grad_norm": 1.6739927530288696, "learning_rate": 9.30668245920113e-06, "loss": 0.4484, "step": 12910 }, { "epoch": 0.7586166402442605, "grad_norm": 4.632188320159912, "learning_rate": 9.304945988203395e-06, "loss": 0.5119, "step": 12920 }, { "epoch": 0.7592038048264929, "grad_norm": 3.176454544067383, "learning_rate": 9.303207507775893e-06, "loss": 0.5408, "step": 12930 }, { "epoch": 0.7597909694087253, "grad_norm": 7.2637128829956055, "learning_rate": 9.301467018730094e-06, "loss": 0.4741, "step": 12940 }, { "epoch": 0.7603781339909577, "grad_norm": 1.8084604740142822, "learning_rate": 9.299724521878418e-06, "loss": 0.4273, "step": 12950 }, { "epoch": 0.7609652985731901, "grad_norm": 4.928582668304443, "learning_rate": 9.297980018034207e-06, "loss": 0.5561, "step": 12960 }, { "epoch": 0.7615524631554225, "grad_norm": 2.9882986545562744, "learning_rate": 9.29623350801175e-06, "loss": 0.6726, "step": 12970 }, { "epoch": 0.7621396277376549, "grad_norm": 2.9178829193115234, "learning_rate": 9.294484992626271e-06, "loss": 0.5674, "step": 12980 }, { "epoch": 0.7627267923198873, "grad_norm": 2.782017230987549, "learning_rate": 9.292734472693927e-06, "loss": 0.5263, "step": 12990 }, { "epoch": 0.7633139569021197, "grad_norm": 4.931901454925537, "learning_rate": 9.290981949031812e-06, "loss": 0.4924, "step": 13000 }, { "epoch": 0.7639011214843521, "grad_norm": 4.739995956420898, "learning_rate": 9.289227422457958e-06, "loss": 0.458, "step": 13010 }, { "epoch": 0.7644882860665845, "grad_norm": 3.7107300758361816, "learning_rate": 9.287470893791326e-06, "loss": 0.4845, "step": 13020 }, { "epoch": 0.7650754506488169, "grad_norm": 3.4341912269592285, "learning_rate": 9.285712363851818e-06, "loss": 0.5569, "step": 13030 }, { "epoch": 0.7656626152310493, "grad_norm": 4.285268783569336, "learning_rate": 9.283951833460265e-06, "loss": 0.6311, "step": 13040 }, { "epoch": 0.7662497798132817, "grad_norm": 3.23203706741333, "learning_rate": 9.282189303438437e-06, "loss": 0.3588, "step": 13050 }, { "epoch": 0.7668369443955141, "grad_norm": 7.963992595672607, "learning_rate": 9.28042477460903e-06, "loss": 0.5202, "step": 13060 }, { "epoch": 0.7674241089777465, "grad_norm": 5.199499607086182, "learning_rate": 9.27865824779568e-06, "loss": 0.5919, "step": 13070 }, { "epoch": 0.7680112735599789, "grad_norm": 6.685162544250488, "learning_rate": 9.276889723822953e-06, "loss": 0.4496, "step": 13080 }, { "epoch": 0.7685984381422113, "grad_norm": 9.186629295349121, "learning_rate": 9.275119203516346e-06, "loss": 0.5775, "step": 13090 }, { "epoch": 0.7691856027244437, "grad_norm": 8.929206848144531, "learning_rate": 9.27334668770229e-06, "loss": 0.4929, "step": 13100 }, { "epoch": 0.7697727673066761, "grad_norm": 4.454958438873291, "learning_rate": 9.271572177208145e-06, "loss": 0.5991, "step": 13110 }, { "epoch": 0.7703599318889085, "grad_norm": 5.008171081542969, "learning_rate": 9.269795672862203e-06, "loss": 0.6503, "step": 13120 }, { "epoch": 0.7709470964711409, "grad_norm": 4.429811000823975, "learning_rate": 9.268017175493692e-06, "loss": 0.5463, "step": 13130 }, { "epoch": 0.7715342610533733, "grad_norm": 4.660113334655762, "learning_rate": 9.266236685932757e-06, "loss": 0.5434, "step": 13140 }, { "epoch": 0.7721214256356057, "grad_norm": 1.8652875423431396, "learning_rate": 9.26445420501049e-06, "loss": 0.5623, "step": 13150 }, { "epoch": 0.7727085902178381, "grad_norm": 1.832094669342041, "learning_rate": 9.262669733558897e-06, "loss": 0.5424, "step": 13160 }, { "epoch": 0.7732957548000705, "grad_norm": 7.933019161224365, "learning_rate": 9.260883272410924e-06, "loss": 0.6381, "step": 13170 }, { "epoch": 0.7738829193823029, "grad_norm": 7.692787170410156, "learning_rate": 9.259094822400439e-06, "loss": 0.4089, "step": 13180 }, { "epoch": 0.7744700839645352, "grad_norm": 7.104128837585449, "learning_rate": 9.257304384362245e-06, "loss": 0.7416, "step": 13190 }, { "epoch": 0.7750572485467676, "grad_norm": 10.270430564880371, "learning_rate": 9.255511959132064e-06, "loss": 0.4996, "step": 13200 }, { "epoch": 0.775644413129, "grad_norm": 6.185967922210693, "learning_rate": 9.253717547546555e-06, "loss": 0.5184, "step": 13210 }, { "epoch": 0.7762315777112324, "grad_norm": 3.5591073036193848, "learning_rate": 9.251921150443295e-06, "loss": 0.4248, "step": 13220 }, { "epoch": 0.7768187422934648, "grad_norm": 3.2474470138549805, "learning_rate": 9.250122768660797e-06, "loss": 0.5346, "step": 13230 }, { "epoch": 0.7774059068756972, "grad_norm": 2.71547269821167, "learning_rate": 9.248322403038494e-06, "loss": 0.6447, "step": 13240 }, { "epoch": 0.7779930714579296, "grad_norm": 5.9692158699035645, "learning_rate": 9.246520054416747e-06, "loss": 0.4022, "step": 13250 }, { "epoch": 0.778580236040162, "grad_norm": 2.4101369380950928, "learning_rate": 9.244715723636843e-06, "loss": 0.5043, "step": 13260 }, { "epoch": 0.7791674006223944, "grad_norm": 1.7049369812011719, "learning_rate": 9.242909411540992e-06, "loss": 0.4878, "step": 13270 }, { "epoch": 0.7797545652046268, "grad_norm": 7.5117573738098145, "learning_rate": 9.241101118972332e-06, "loss": 0.4458, "step": 13280 }, { "epoch": 0.7803417297868592, "grad_norm": 6.242073059082031, "learning_rate": 9.239290846774925e-06, "loss": 0.5159, "step": 13290 }, { "epoch": 0.7809288943690916, "grad_norm": 2.506283760070801, "learning_rate": 9.237478595793752e-06, "loss": 0.3492, "step": 13300 }, { "epoch": 0.781516058951324, "grad_norm": 3.053341865539551, "learning_rate": 9.235664366874727e-06, "loss": 0.5103, "step": 13310 }, { "epoch": 0.7821032235335564, "grad_norm": 11.625539779663086, "learning_rate": 9.233848160864678e-06, "loss": 0.523, "step": 13320 }, { "epoch": 0.7826903881157888, "grad_norm": 11.135226249694824, "learning_rate": 9.23202997861136e-06, "loss": 0.5469, "step": 13330 }, { "epoch": 0.7832775526980212, "grad_norm": 5.952450275421143, "learning_rate": 9.230209820963453e-06, "loss": 0.4954, "step": 13340 }, { "epoch": 0.7838647172802536, "grad_norm": 1.8016754388809204, "learning_rate": 9.228387688770554e-06, "loss": 0.4875, "step": 13350 }, { "epoch": 0.784451881862486, "grad_norm": 6.0412702560424805, "learning_rate": 9.226563582883186e-06, "loss": 0.419, "step": 13360 }, { "epoch": 0.7850390464447184, "grad_norm": 3.124335765838623, "learning_rate": 9.224737504152788e-06, "loss": 0.4161, "step": 13370 }, { "epoch": 0.7856262110269508, "grad_norm": 3.0378637313842773, "learning_rate": 9.222909453431725e-06, "loss": 0.5058, "step": 13380 }, { "epoch": 0.7862133756091833, "grad_norm": 1.4636070728302002, "learning_rate": 9.221079431573281e-06, "loss": 0.4914, "step": 13390 }, { "epoch": 0.7868005401914157, "grad_norm": 2.1241462230682373, "learning_rate": 9.219247439431659e-06, "loss": 0.5472, "step": 13400 }, { "epoch": 0.7873877047736481, "grad_norm": 1.345023274421692, "learning_rate": 9.21741347786198e-06, "loss": 0.4155, "step": 13410 }, { "epoch": 0.7879748693558805, "grad_norm": 4.39285945892334, "learning_rate": 9.215577547720288e-06, "loss": 0.4527, "step": 13420 }, { "epoch": 0.7885620339381129, "grad_norm": 1.9977585077285767, "learning_rate": 9.21373964986355e-06, "loss": 0.3274, "step": 13430 }, { "epoch": 0.7891491985203453, "grad_norm": 2.9735970497131348, "learning_rate": 9.211899785149636e-06, "loss": 0.4788, "step": 13440 }, { "epoch": 0.7897363631025777, "grad_norm": 3.1109249591827393, "learning_rate": 9.210057954437353e-06, "loss": 0.5266, "step": 13450 }, { "epoch": 0.7903235276848101, "grad_norm": 1.3855130672454834, "learning_rate": 9.20821415858641e-06, "loss": 0.5182, "step": 13460 }, { "epoch": 0.7909106922670425, "grad_norm": 3.9375498294830322, "learning_rate": 9.206368398457444e-06, "loss": 0.6396, "step": 13470 }, { "epoch": 0.7914978568492749, "grad_norm": 2.828423261642456, "learning_rate": 9.204520674912001e-06, "loss": 0.4428, "step": 13480 }, { "epoch": 0.7920850214315073, "grad_norm": 10.215435981750488, "learning_rate": 9.202670988812551e-06, "loss": 0.5362, "step": 13490 }, { "epoch": 0.7926721860137397, "grad_norm": 4.66542387008667, "learning_rate": 9.200819341022477e-06, "loss": 0.514, "step": 13500 }, { "epoch": 0.7932593505959721, "grad_norm": 2.9538466930389404, "learning_rate": 9.198965732406073e-06, "loss": 0.4781, "step": 13510 }, { "epoch": 0.7938465151782045, "grad_norm": 2.9454002380371094, "learning_rate": 9.197110163828555e-06, "loss": 0.6109, "step": 13520 }, { "epoch": 0.7944336797604369, "grad_norm": 2.3874733448028564, "learning_rate": 9.19525263615605e-06, "loss": 0.6562, "step": 13530 }, { "epoch": 0.7950208443426693, "grad_norm": 1.9403759241104126, "learning_rate": 9.193393150255601e-06, "loss": 0.4555, "step": 13540 }, { "epoch": 0.7956080089249017, "grad_norm": 5.879673957824707, "learning_rate": 9.191531706995165e-06, "loss": 0.5368, "step": 13550 }, { "epoch": 0.7961951735071341, "grad_norm": 1.7516776323318481, "learning_rate": 9.18966830724361e-06, "loss": 0.4985, "step": 13560 }, { "epoch": 0.7967823380893665, "grad_norm": 3.6322779655456543, "learning_rate": 9.187802951870724e-06, "loss": 0.4148, "step": 13570 }, { "epoch": 0.7973695026715989, "grad_norm": 2.079967975616455, "learning_rate": 9.1859356417472e-06, "loss": 0.4665, "step": 13580 }, { "epoch": 0.7979566672538313, "grad_norm": 7.649460792541504, "learning_rate": 9.184066377744645e-06, "loss": 0.5622, "step": 13590 }, { "epoch": 0.7985438318360637, "grad_norm": 6.528945446014404, "learning_rate": 9.182195160735582e-06, "loss": 0.3863, "step": 13600 }, { "epoch": 0.7991309964182961, "grad_norm": 7.139013767242432, "learning_rate": 9.180321991593446e-06, "loss": 0.5609, "step": 13610 }, { "epoch": 0.7997181610005284, "grad_norm": 6.562435626983643, "learning_rate": 9.178446871192578e-06, "loss": 0.4738, "step": 13620 }, { "epoch": 0.8003053255827608, "grad_norm": 1.8597567081451416, "learning_rate": 9.17656980040823e-06, "loss": 0.4182, "step": 13630 }, { "epoch": 0.8008924901649932, "grad_norm": 7.7519917488098145, "learning_rate": 9.17469078011657e-06, "loss": 0.6267, "step": 13640 }, { "epoch": 0.8014796547472256, "grad_norm": 4.898622989654541, "learning_rate": 9.172809811194674e-06, "loss": 0.4289, "step": 13650 }, { "epoch": 0.802066819329458, "grad_norm": 2.4099044799804688, "learning_rate": 9.170926894520521e-06, "loss": 0.6309, "step": 13660 }, { "epoch": 0.8026539839116904, "grad_norm": 1.8548582792282104, "learning_rate": 9.16904203097301e-06, "loss": 0.5419, "step": 13670 }, { "epoch": 0.8032411484939228, "grad_norm": 10.422383308410645, "learning_rate": 9.16715522143194e-06, "loss": 0.4334, "step": 13680 }, { "epoch": 0.8038283130761552, "grad_norm": 5.165528297424316, "learning_rate": 9.165266466778023e-06, "loss": 0.5791, "step": 13690 }, { "epoch": 0.8044154776583876, "grad_norm": 3.021989345550537, "learning_rate": 9.163375767892874e-06, "loss": 0.484, "step": 13700 }, { "epoch": 0.80500264224062, "grad_norm": 4.6208062171936035, "learning_rate": 9.161483125659025e-06, "loss": 0.4938, "step": 13710 }, { "epoch": 0.8055898068228524, "grad_norm": 3.44340443611145, "learning_rate": 9.159588540959904e-06, "loss": 0.3792, "step": 13720 }, { "epoch": 0.8061769714050848, "grad_norm": 9.184721946716309, "learning_rate": 9.157692014679854e-06, "loss": 0.383, "step": 13730 }, { "epoch": 0.8067641359873172, "grad_norm": 1.770555019378662, "learning_rate": 9.155793547704117e-06, "loss": 0.4615, "step": 13740 }, { "epoch": 0.8073513005695496, "grad_norm": 8.457690238952637, "learning_rate": 9.15389314091885e-06, "loss": 0.531, "step": 13750 }, { "epoch": 0.807938465151782, "grad_norm": 5.434418201446533, "learning_rate": 9.151990795211106e-06, "loss": 0.4497, "step": 13760 }, { "epoch": 0.8085256297340144, "grad_norm": 6.116329193115234, "learning_rate": 9.15008651146885e-06, "loss": 0.551, "step": 13770 }, { "epoch": 0.8091127943162468, "grad_norm": 4.550806999206543, "learning_rate": 9.14818029058095e-06, "loss": 0.5394, "step": 13780 }, { "epoch": 0.8096999588984792, "grad_norm": 2.7755024433135986, "learning_rate": 9.146272133437174e-06, "loss": 0.5413, "step": 13790 }, { "epoch": 0.8102871234807116, "grad_norm": 2.608285903930664, "learning_rate": 9.144362040928196e-06, "loss": 0.4755, "step": 13800 }, { "epoch": 0.810874288062944, "grad_norm": 4.487432479858398, "learning_rate": 9.1424500139456e-06, "loss": 0.6438, "step": 13810 }, { "epoch": 0.8114614526451764, "grad_norm": 3.2403042316436768, "learning_rate": 9.140536053381863e-06, "loss": 0.5285, "step": 13820 }, { "epoch": 0.8120486172274088, "grad_norm": 7.108200550079346, "learning_rate": 9.13862016013037e-06, "loss": 0.502, "step": 13830 }, { "epoch": 0.8126357818096412, "grad_norm": 1.9583790302276611, "learning_rate": 9.136702335085408e-06, "loss": 0.5645, "step": 13840 }, { "epoch": 0.8132229463918736, "grad_norm": 10.606881141662598, "learning_rate": 9.134782579142162e-06, "loss": 0.4812, "step": 13850 }, { "epoch": 0.813810110974106, "grad_norm": 3.333839178085327, "learning_rate": 9.132860893196725e-06, "loss": 0.6348, "step": 13860 }, { "epoch": 0.8143972755563385, "grad_norm": 6.241448402404785, "learning_rate": 9.130937278146081e-06, "loss": 0.5472, "step": 13870 }, { "epoch": 0.8149844401385709, "grad_norm": 4.154232501983643, "learning_rate": 9.129011734888127e-06, "loss": 0.6469, "step": 13880 }, { "epoch": 0.8155716047208033, "grad_norm": 4.006692886352539, "learning_rate": 9.127084264321648e-06, "loss": 0.5286, "step": 13890 }, { "epoch": 0.8161587693030357, "grad_norm": 3.0792596340179443, "learning_rate": 9.125154867346338e-06, "loss": 0.4871, "step": 13900 }, { "epoch": 0.8167459338852681, "grad_norm": 5.670593738555908, "learning_rate": 9.123223544862785e-06, "loss": 0.4082, "step": 13910 }, { "epoch": 0.8173330984675005, "grad_norm": 3.2651963233947754, "learning_rate": 9.121290297772475e-06, "loss": 0.5102, "step": 13920 }, { "epoch": 0.8179202630497329, "grad_norm": 4.489037036895752, "learning_rate": 9.119355126977796e-06, "loss": 0.3995, "step": 13930 }, { "epoch": 0.8185074276319653, "grad_norm": 2.0876922607421875, "learning_rate": 9.117418033382032e-06, "loss": 0.4415, "step": 13940 }, { "epoch": 0.8190945922141977, "grad_norm": 4.560018539428711, "learning_rate": 9.115479017889363e-06, "loss": 0.6025, "step": 13950 }, { "epoch": 0.8196817567964301, "grad_norm": 3.172314405441284, "learning_rate": 9.113538081404871e-06, "loss": 0.4501, "step": 13960 }, { "epoch": 0.8202689213786625, "grad_norm": 6.81245756149292, "learning_rate": 9.11159522483453e-06, "loss": 0.5067, "step": 13970 }, { "epoch": 0.8208560859608949, "grad_norm": 5.013938903808594, "learning_rate": 9.109650449085212e-06, "loss": 0.3496, "step": 13980 }, { "epoch": 0.8214432505431273, "grad_norm": 2.54074764251709, "learning_rate": 9.107703755064682e-06, "loss": 0.5397, "step": 13990 }, { "epoch": 0.8220304151253597, "grad_norm": 2.4543344974517822, "learning_rate": 9.105755143681608e-06, "loss": 0.4318, "step": 14000 }, { "epoch": 0.8226175797075921, "grad_norm": 2.2087996006011963, "learning_rate": 9.103804615845543e-06, "loss": 0.4782, "step": 14010 }, { "epoch": 0.8232047442898245, "grad_norm": 7.65953254699707, "learning_rate": 9.101852172466944e-06, "loss": 0.5812, "step": 14020 }, { "epoch": 0.8237919088720569, "grad_norm": 1.1698535680770874, "learning_rate": 9.099897814457152e-06, "loss": 0.4328, "step": 14030 }, { "epoch": 0.8243790734542893, "grad_norm": 2.2093505859375, "learning_rate": 9.097941542728413e-06, "loss": 0.4571, "step": 14040 }, { "epoch": 0.8249662380365216, "grad_norm": 1.6156033277511597, "learning_rate": 9.095983358193856e-06, "loss": 0.4396, "step": 14050 }, { "epoch": 0.825553402618754, "grad_norm": 2.7319955825805664, "learning_rate": 9.09402326176751e-06, "loss": 0.4388, "step": 14060 }, { "epoch": 0.8261405672009864, "grad_norm": 4.7866034507751465, "learning_rate": 9.092061254364294e-06, "loss": 0.4856, "step": 14070 }, { "epoch": 0.8267277317832188, "grad_norm": 15.420751571655273, "learning_rate": 9.090097336900017e-06, "loss": 0.5942, "step": 14080 }, { "epoch": 0.8273148963654512, "grad_norm": 6.3182268142700195, "learning_rate": 9.088131510291382e-06, "loss": 0.7054, "step": 14090 }, { "epoch": 0.8279020609476836, "grad_norm": 2.8412766456604004, "learning_rate": 9.086163775455985e-06, "loss": 0.4105, "step": 14100 }, { "epoch": 0.828489225529916, "grad_norm": 3.4086883068084717, "learning_rate": 9.084194133312307e-06, "loss": 0.5114, "step": 14110 }, { "epoch": 0.8290763901121484, "grad_norm": 5.039294719696045, "learning_rate": 9.082222584779723e-06, "loss": 0.5236, "step": 14120 }, { "epoch": 0.8296635546943808, "grad_norm": 2.5389392375946045, "learning_rate": 9.080249130778498e-06, "loss": 0.5016, "step": 14130 }, { "epoch": 0.8302507192766132, "grad_norm": 2.88401460647583, "learning_rate": 9.078273772229786e-06, "loss": 0.5084, "step": 14140 }, { "epoch": 0.8308378838588456, "grad_norm": 6.753936290740967, "learning_rate": 9.07629651005563e-06, "loss": 0.3758, "step": 14150 }, { "epoch": 0.831425048441078, "grad_norm": 2.264817953109741, "learning_rate": 9.07431734517896e-06, "loss": 0.6103, "step": 14160 }, { "epoch": 0.8320122130233104, "grad_norm": 2.588458299636841, "learning_rate": 9.072336278523598e-06, "loss": 0.5036, "step": 14170 }, { "epoch": 0.8325993776055428, "grad_norm": 3.4629364013671875, "learning_rate": 9.070353311014246e-06, "loss": 0.4307, "step": 14180 }, { "epoch": 0.8331865421877752, "grad_norm": 15.800674438476562, "learning_rate": 9.068368443576504e-06, "loss": 0.4493, "step": 14190 }, { "epoch": 0.8337737067700076, "grad_norm": 3.991905927658081, "learning_rate": 9.06638167713685e-06, "loss": 0.4276, "step": 14200 }, { "epoch": 0.83436087135224, "grad_norm": 1.2546303272247314, "learning_rate": 9.064393012622654e-06, "loss": 0.5904, "step": 14210 }, { "epoch": 0.8349480359344724, "grad_norm": 1.619856595993042, "learning_rate": 9.06240245096217e-06, "loss": 0.5568, "step": 14220 }, { "epoch": 0.8355352005167048, "grad_norm": 4.151194095611572, "learning_rate": 9.060409993084531e-06, "loss": 0.6712, "step": 14230 }, { "epoch": 0.8361223650989372, "grad_norm": 6.665749549865723, "learning_rate": 9.058415639919769e-06, "loss": 0.5325, "step": 14240 }, { "epoch": 0.8367095296811696, "grad_norm": 2.3093600273132324, "learning_rate": 9.056419392398788e-06, "loss": 0.3575, "step": 14250 }, { "epoch": 0.837296694263402, "grad_norm": 5.685398101806641, "learning_rate": 9.054421251453382e-06, "loss": 0.4767, "step": 14260 }, { "epoch": 0.8378838588456344, "grad_norm": 3.199169874191284, "learning_rate": 9.052421218016232e-06, "loss": 0.4515, "step": 14270 }, { "epoch": 0.8384710234278668, "grad_norm": 4.528548717498779, "learning_rate": 9.050419293020893e-06, "loss": 0.4215, "step": 14280 }, { "epoch": 0.8390581880100992, "grad_norm": 3.108905553817749, "learning_rate": 9.04841547740181e-06, "loss": 0.4314, "step": 14290 }, { "epoch": 0.8396453525923316, "grad_norm": 4.291233062744141, "learning_rate": 9.04640977209431e-06, "loss": 0.4871, "step": 14300 }, { "epoch": 0.840232517174564, "grad_norm": 3.8053627014160156, "learning_rate": 9.044402178034597e-06, "loss": 0.4995, "step": 14310 }, { "epoch": 0.8408196817567964, "grad_norm": 3.1637535095214844, "learning_rate": 9.042392696159767e-06, "loss": 0.4899, "step": 14320 }, { "epoch": 0.8414068463390288, "grad_norm": 2.639547348022461, "learning_rate": 9.040381327407784e-06, "loss": 0.5635, "step": 14330 }, { "epoch": 0.8419940109212612, "grad_norm": 4.35801887512207, "learning_rate": 9.038368072717505e-06, "loss": 0.5475, "step": 14340 }, { "epoch": 0.8425811755034937, "grad_norm": 3.2242794036865234, "learning_rate": 9.036352933028656e-06, "loss": 0.5026, "step": 14350 }, { "epoch": 0.843168340085726, "grad_norm": 3.5764482021331787, "learning_rate": 9.034335909281853e-06, "loss": 0.4137, "step": 14360 }, { "epoch": 0.8437555046679585, "grad_norm": 5.9987945556640625, "learning_rate": 9.032317002418584e-06, "loss": 0.4015, "step": 14370 }, { "epoch": 0.8443426692501909, "grad_norm": 3.486645460128784, "learning_rate": 9.03029621338122e-06, "loss": 0.3895, "step": 14380 }, { "epoch": 0.8449298338324233, "grad_norm": 3.6979928016662598, "learning_rate": 9.02827354311301e-06, "loss": 0.4193, "step": 14390 }, { "epoch": 0.8455169984146557, "grad_norm": 5.729255199432373, "learning_rate": 9.02624899255808e-06, "loss": 0.4564, "step": 14400 }, { "epoch": 0.8461041629968881, "grad_norm": 5.375575065612793, "learning_rate": 9.024222562661435e-06, "loss": 0.5214, "step": 14410 }, { "epoch": 0.8466913275791205, "grad_norm": 2.2878682613372803, "learning_rate": 9.022194254368954e-06, "loss": 0.4431, "step": 14420 }, { "epoch": 0.8472784921613529, "grad_norm": 5.829811096191406, "learning_rate": 9.020164068627399e-06, "loss": 0.5727, "step": 14430 }, { "epoch": 0.8478656567435853, "grad_norm": 9.150737762451172, "learning_rate": 9.018132006384401e-06, "loss": 0.5294, "step": 14440 }, { "epoch": 0.8484528213258177, "grad_norm": 4.800015449523926, "learning_rate": 9.016098068588472e-06, "loss": 0.4164, "step": 14450 }, { "epoch": 0.8490399859080501, "grad_norm": 3.0813958644866943, "learning_rate": 9.014062256189e-06, "loss": 0.5719, "step": 14460 }, { "epoch": 0.8496271504902825, "grad_norm": 8.420958518981934, "learning_rate": 9.012024570136242e-06, "loss": 0.4201, "step": 14470 }, { "epoch": 0.8502143150725149, "grad_norm": 2.958738327026367, "learning_rate": 9.009985011381335e-06, "loss": 0.4783, "step": 14480 }, { "epoch": 0.8508014796547472, "grad_norm": 4.732949733734131, "learning_rate": 9.007943580876291e-06, "loss": 0.4417, "step": 14490 }, { "epoch": 0.8513886442369796, "grad_norm": 6.419892311096191, "learning_rate": 9.00590027957399e-06, "loss": 0.4251, "step": 14500 }, { "epoch": 0.851975808819212, "grad_norm": 14.86893367767334, "learning_rate": 9.00385510842819e-06, "loss": 0.4856, "step": 14510 }, { "epoch": 0.8525629734014444, "grad_norm": 10.933667182922363, "learning_rate": 9.00180806839352e-06, "loss": 0.4774, "step": 14520 }, { "epoch": 0.8531501379836768, "grad_norm": 2.863762378692627, "learning_rate": 8.999759160425483e-06, "loss": 0.5729, "step": 14530 }, { "epoch": 0.8537373025659092, "grad_norm": 3.504822015762329, "learning_rate": 8.997708385480451e-06, "loss": 0.5598, "step": 14540 }, { "epoch": 0.8543244671481416, "grad_norm": 13.344596862792969, "learning_rate": 8.99565574451567e-06, "loss": 0.3827, "step": 14550 }, { "epoch": 0.854911631730374, "grad_norm": 2.578202247619629, "learning_rate": 8.993601238489256e-06, "loss": 0.4366, "step": 14560 }, { "epoch": 0.8554987963126064, "grad_norm": 5.3115057945251465, "learning_rate": 8.991544868360194e-06, "loss": 0.4607, "step": 14570 }, { "epoch": 0.8560859608948388, "grad_norm": 2.853926658630371, "learning_rate": 8.989486635088342e-06, "loss": 0.4612, "step": 14580 }, { "epoch": 0.8566731254770712, "grad_norm": 8.099177360534668, "learning_rate": 8.987426539634427e-06, "loss": 0.5552, "step": 14590 }, { "epoch": 0.8572602900593036, "grad_norm": 5.084115505218506, "learning_rate": 8.985364582960045e-06, "loss": 0.5302, "step": 14600 }, { "epoch": 0.857847454641536, "grad_norm": 3.858064889907837, "learning_rate": 8.983300766027659e-06, "loss": 0.4302, "step": 14610 }, { "epoch": 0.8584346192237684, "grad_norm": 6.2574687004089355, "learning_rate": 8.981235089800602e-06, "loss": 0.5579, "step": 14620 }, { "epoch": 0.8590217838060008, "grad_norm": 1.979286789894104, "learning_rate": 8.979167555243074e-06, "loss": 0.4597, "step": 14630 }, { "epoch": 0.8596089483882332, "grad_norm": 4.171705722808838, "learning_rate": 8.977098163320143e-06, "loss": 0.5703, "step": 14640 }, { "epoch": 0.8601961129704656, "grad_norm": 4.234729290008545, "learning_rate": 8.975026914997743e-06, "loss": 0.5198, "step": 14650 }, { "epoch": 0.860783277552698, "grad_norm": 3.3411903381347656, "learning_rate": 8.972953811242681e-06, "loss": 0.4835, "step": 14660 }, { "epoch": 0.8613704421349304, "grad_norm": 10.476396560668945, "learning_rate": 8.97087885302262e-06, "loss": 0.5104, "step": 14670 }, { "epoch": 0.8619576067171628, "grad_norm": 4.17925500869751, "learning_rate": 8.96880204130609e-06, "loss": 0.5353, "step": 14680 }, { "epoch": 0.8625447712993952, "grad_norm": 8.610871315002441, "learning_rate": 8.966723377062496e-06, "loss": 0.5131, "step": 14690 }, { "epoch": 0.8631319358816276, "grad_norm": 4.028407573699951, "learning_rate": 8.964642861262097e-06, "loss": 0.4917, "step": 14700 }, { "epoch": 0.86371910046386, "grad_norm": 5.460457801818848, "learning_rate": 8.962560494876021e-06, "loss": 0.5944, "step": 14710 }, { "epoch": 0.8643062650460924, "grad_norm": 6.10146427154541, "learning_rate": 8.96047627887626e-06, "loss": 0.5405, "step": 14720 }, { "epoch": 0.8648934296283248, "grad_norm": 2.3699281215667725, "learning_rate": 8.958390214235666e-06, "loss": 0.5601, "step": 14730 }, { "epoch": 0.8654805942105572, "grad_norm": 6.0744524002075195, "learning_rate": 8.95630230192796e-06, "loss": 0.5044, "step": 14740 }, { "epoch": 0.8660677587927896, "grad_norm": 1.6796046495437622, "learning_rate": 8.954212542927718e-06, "loss": 0.4514, "step": 14750 }, { "epoch": 0.866654923375022, "grad_norm": 2.722742795944214, "learning_rate": 8.952120938210384e-06, "loss": 0.4373, "step": 14760 }, { "epoch": 0.8672420879572544, "grad_norm": 2.5071489810943604, "learning_rate": 8.950027488752262e-06, "loss": 0.4664, "step": 14770 }, { "epoch": 0.8678292525394868, "grad_norm": 2.7151262760162354, "learning_rate": 8.947932195530514e-06, "loss": 0.401, "step": 14780 }, { "epoch": 0.8684164171217192, "grad_norm": 1.5327708721160889, "learning_rate": 8.945835059523164e-06, "loss": 0.5425, "step": 14790 }, { "epoch": 0.8690035817039516, "grad_norm": 5.94791841506958, "learning_rate": 8.9437360817091e-06, "loss": 0.4551, "step": 14800 }, { "epoch": 0.869590746286184, "grad_norm": 1.8688328266143799, "learning_rate": 8.941635263068067e-06, "loss": 0.4102, "step": 14810 }, { "epoch": 0.8701779108684164, "grad_norm": 7.173106670379639, "learning_rate": 8.939532604580668e-06, "loss": 0.418, "step": 14820 }, { "epoch": 0.8707650754506489, "grad_norm": 2.533494710922241, "learning_rate": 8.937428107228366e-06, "loss": 0.3659, "step": 14830 }, { "epoch": 0.8713522400328813, "grad_norm": 5.477327823638916, "learning_rate": 8.93532177199348e-06, "loss": 0.5316, "step": 14840 }, { "epoch": 0.8719394046151137, "grad_norm": 7.011613368988037, "learning_rate": 8.933213599859192e-06, "loss": 0.5182, "step": 14850 }, { "epoch": 0.8725265691973461, "grad_norm": 3.304349899291992, "learning_rate": 8.931103591809536e-06, "loss": 0.576, "step": 14860 }, { "epoch": 0.8731137337795785, "grad_norm": 2.90871000289917, "learning_rate": 8.92899174882941e-06, "loss": 0.5419, "step": 14870 }, { "epoch": 0.8737008983618109, "grad_norm": 2.414083480834961, "learning_rate": 8.926878071904558e-06, "loss": 0.4663, "step": 14880 }, { "epoch": 0.8742880629440433, "grad_norm": 2.886960983276367, "learning_rate": 8.92476256202159e-06, "loss": 0.4064, "step": 14890 }, { "epoch": 0.8748752275262757, "grad_norm": 2.6968629360198975, "learning_rate": 8.922645220167968e-06, "loss": 0.5363, "step": 14900 }, { "epoch": 0.8754623921085081, "grad_norm": 3.519819498062134, "learning_rate": 8.920526047332003e-06, "loss": 0.3925, "step": 14910 }, { "epoch": 0.8760495566907404, "grad_norm": 18.455629348754883, "learning_rate": 8.918405044502872e-06, "loss": 0.4731, "step": 14920 }, { "epoch": 0.8766367212729728, "grad_norm": 7.198457717895508, "learning_rate": 8.9162822126706e-06, "loss": 0.5365, "step": 14930 }, { "epoch": 0.8772238858552052, "grad_norm": 4.274626731872559, "learning_rate": 8.914157552826064e-06, "loss": 0.4356, "step": 14940 }, { "epoch": 0.8778110504374376, "grad_norm": 6.643884658813477, "learning_rate": 8.912031065960995e-06, "loss": 0.5047, "step": 14950 }, { "epoch": 0.87839821501967, "grad_norm": 1.004359483718872, "learning_rate": 8.909902753067984e-06, "loss": 0.5061, "step": 14960 }, { "epoch": 0.8789853796019024, "grad_norm": 3.55465030670166, "learning_rate": 8.907772615140464e-06, "loss": 0.4649, "step": 14970 }, { "epoch": 0.8795725441841348, "grad_norm": 2.6973726749420166, "learning_rate": 8.905640653172726e-06, "loss": 0.5811, "step": 14980 }, { "epoch": 0.8801597087663672, "grad_norm": 5.635622978210449, "learning_rate": 8.90350686815991e-06, "loss": 0.4511, "step": 14990 }, { "epoch": 0.8807468733485996, "grad_norm": 3.2479476928710938, "learning_rate": 8.90137126109801e-06, "loss": 0.4742, "step": 15000 }, { "epoch": 0.881334037930832, "grad_norm": 2.9895105361938477, "learning_rate": 8.899233832983865e-06, "loss": 0.4942, "step": 15010 }, { "epoch": 0.8819212025130644, "grad_norm": 2.305690288543701, "learning_rate": 8.897094584815169e-06, "loss": 0.3816, "step": 15020 }, { "epoch": 0.8825083670952968, "grad_norm": 8.224274635314941, "learning_rate": 8.894953517590465e-06, "loss": 0.4547, "step": 15030 }, { "epoch": 0.8830955316775292, "grad_norm": 7.526371002197266, "learning_rate": 8.892810632309143e-06, "loss": 0.4526, "step": 15040 }, { "epoch": 0.8836826962597616, "grad_norm": 2.5474586486816406, "learning_rate": 8.890665929971444e-06, "loss": 0.4551, "step": 15050 }, { "epoch": 0.884269860841994, "grad_norm": 6.138125419616699, "learning_rate": 8.888519411578454e-06, "loss": 0.4867, "step": 15060 }, { "epoch": 0.8848570254242264, "grad_norm": 4.170588970184326, "learning_rate": 8.886371078132107e-06, "loss": 0.4543, "step": 15070 }, { "epoch": 0.8854441900064588, "grad_norm": 7.962441921234131, "learning_rate": 8.884220930635187e-06, "loss": 0.4433, "step": 15080 }, { "epoch": 0.8860313545886912, "grad_norm": 9.028368949890137, "learning_rate": 8.882068970091327e-06, "loss": 0.4264, "step": 15090 }, { "epoch": 0.8866185191709236, "grad_norm": 2.2741477489471436, "learning_rate": 8.879915197504999e-06, "loss": 0.501, "step": 15100 }, { "epoch": 0.887205683753156, "grad_norm": 9.986212730407715, "learning_rate": 8.877759613881524e-06, "loss": 0.5491, "step": 15110 }, { "epoch": 0.8877928483353884, "grad_norm": 3.571911573410034, "learning_rate": 8.875602220227071e-06, "loss": 0.4752, "step": 15120 }, { "epoch": 0.8883800129176208, "grad_norm": 4.424628257751465, "learning_rate": 8.873443017548649e-06, "loss": 0.423, "step": 15130 }, { "epoch": 0.8889671774998532, "grad_norm": 6.216794967651367, "learning_rate": 8.871282006854118e-06, "loss": 0.477, "step": 15140 }, { "epoch": 0.8895543420820856, "grad_norm": 3.9049594402313232, "learning_rate": 8.869119189152174e-06, "loss": 0.4768, "step": 15150 }, { "epoch": 0.890141506664318, "grad_norm": 3.2414324283599854, "learning_rate": 8.866954565452362e-06, "loss": 0.5374, "step": 15160 }, { "epoch": 0.8907286712465504, "grad_norm": 4.316891193389893, "learning_rate": 8.86478813676507e-06, "loss": 0.5582, "step": 15170 }, { "epoch": 0.8913158358287828, "grad_norm": 2.2925615310668945, "learning_rate": 8.862619904101525e-06, "loss": 0.2844, "step": 15180 }, { "epoch": 0.8919030004110152, "grad_norm": 1.6846197843551636, "learning_rate": 8.8604498684738e-06, "loss": 0.7309, "step": 15190 }, { "epoch": 0.8924901649932476, "grad_norm": 3.5688581466674805, "learning_rate": 8.858278030894806e-06, "loss": 0.4951, "step": 15200 }, { "epoch": 0.89307732957548, "grad_norm": 4.743126392364502, "learning_rate": 8.856104392378298e-06, "loss": 0.4222, "step": 15210 }, { "epoch": 0.8936644941577124, "grad_norm": 1.9774812459945679, "learning_rate": 8.85392895393887e-06, "loss": 0.3923, "step": 15220 }, { "epoch": 0.8942516587399448, "grad_norm": 2.099579334259033, "learning_rate": 8.851751716591954e-06, "loss": 0.458, "step": 15230 }, { "epoch": 0.8948388233221772, "grad_norm": 10.564099311828613, "learning_rate": 8.849572681353829e-06, "loss": 0.4842, "step": 15240 }, { "epoch": 0.8954259879044096, "grad_norm": 4.45125675201416, "learning_rate": 8.847391849241604e-06, "loss": 0.4755, "step": 15250 }, { "epoch": 0.896013152486642, "grad_norm": 3.9395318031311035, "learning_rate": 8.845209221273233e-06, "loss": 0.6139, "step": 15260 }, { "epoch": 0.8966003170688744, "grad_norm": 8.354371070861816, "learning_rate": 8.843024798467507e-06, "loss": 0.537, "step": 15270 }, { "epoch": 0.8971874816511068, "grad_norm": 1.5103248357772827, "learning_rate": 8.840838581844054e-06, "loss": 0.5241, "step": 15280 }, { "epoch": 0.8977746462333392, "grad_norm": 4.239283561706543, "learning_rate": 8.838650572423337e-06, "loss": 0.5146, "step": 15290 }, { "epoch": 0.8983618108155716, "grad_norm": 4.033895969390869, "learning_rate": 8.836460771226663e-06, "loss": 0.5552, "step": 15300 }, { "epoch": 0.898948975397804, "grad_norm": 1.4307125806808472, "learning_rate": 8.834269179276164e-06, "loss": 0.4619, "step": 15310 }, { "epoch": 0.8995361399800365, "grad_norm": 1.319401741027832, "learning_rate": 8.832075797594822e-06, "loss": 0.5225, "step": 15320 }, { "epoch": 0.9001233045622689, "grad_norm": 6.032358169555664, "learning_rate": 8.829880627206442e-06, "loss": 0.5114, "step": 15330 }, { "epoch": 0.9007104691445013, "grad_norm": 2.7495267391204834, "learning_rate": 8.827683669135671e-06, "loss": 0.5187, "step": 15340 }, { "epoch": 0.9012976337267335, "grad_norm": 4.647736549377441, "learning_rate": 8.825484924407986e-06, "loss": 0.4265, "step": 15350 }, { "epoch": 0.901884798308966, "grad_norm": 4.464934349060059, "learning_rate": 8.823284394049702e-06, "loss": 0.4921, "step": 15360 }, { "epoch": 0.9024719628911984, "grad_norm": 2.6688663959503174, "learning_rate": 8.821082079087968e-06, "loss": 0.5456, "step": 15370 }, { "epoch": 0.9030591274734308, "grad_norm": 2.6054351329803467, "learning_rate": 8.81887798055076e-06, "loss": 0.5835, "step": 15380 }, { "epoch": 0.9036462920556632, "grad_norm": 2.156982183456421, "learning_rate": 8.816672099466889e-06, "loss": 0.4761, "step": 15390 }, { "epoch": 0.9042334566378956, "grad_norm": 8.744925498962402, "learning_rate": 8.814464436866004e-06, "loss": 0.4963, "step": 15400 }, { "epoch": 0.904820621220128, "grad_norm": 6.709301948547363, "learning_rate": 8.812254993778579e-06, "loss": 0.4832, "step": 15410 }, { "epoch": 0.9054077858023604, "grad_norm": 2.7335197925567627, "learning_rate": 8.810043771235919e-06, "loss": 0.4643, "step": 15420 }, { "epoch": 0.9059949503845928, "grad_norm": 10.516481399536133, "learning_rate": 8.807830770270166e-06, "loss": 0.5827, "step": 15430 }, { "epoch": 0.9065821149668252, "grad_norm": 3.40175724029541, "learning_rate": 8.805615991914282e-06, "loss": 0.4824, "step": 15440 }, { "epoch": 0.9071692795490576, "grad_norm": 2.3426883220672607, "learning_rate": 8.803399437202068e-06, "loss": 0.3955, "step": 15450 }, { "epoch": 0.90775644413129, "grad_norm": 15.000493049621582, "learning_rate": 8.80118110716815e-06, "loss": 0.5301, "step": 15460 }, { "epoch": 0.9083436087135224, "grad_norm": 4.418941497802734, "learning_rate": 8.798961002847982e-06, "loss": 0.5506, "step": 15470 }, { "epoch": 0.9089307732957548, "grad_norm": 1.4421942234039307, "learning_rate": 8.796739125277848e-06, "loss": 0.5243, "step": 15480 }, { "epoch": 0.9095179378779872, "grad_norm": 2.251504898071289, "learning_rate": 8.79451547549486e-06, "loss": 0.4005, "step": 15490 }, { "epoch": 0.9101051024602196, "grad_norm": 8.941954612731934, "learning_rate": 8.792290054536956e-06, "loss": 0.5204, "step": 15500 }, { "epoch": 0.910692267042452, "grad_norm": 4.394284725189209, "learning_rate": 8.790062863442898e-06, "loss": 0.659, "step": 15510 }, { "epoch": 0.9112794316246844, "grad_norm": 1.8960909843444824, "learning_rate": 8.78783390325228e-06, "loss": 0.4269, "step": 15520 }, { "epoch": 0.9118665962069168, "grad_norm": 3.417433977127075, "learning_rate": 8.78560317500552e-06, "loss": 0.5354, "step": 15530 }, { "epoch": 0.9124537607891492, "grad_norm": 7.336452484130859, "learning_rate": 8.78337067974386e-06, "loss": 0.429, "step": 15540 }, { "epoch": 0.9130409253713816, "grad_norm": 1.8586562871932983, "learning_rate": 8.781136418509362e-06, "loss": 0.6236, "step": 15550 }, { "epoch": 0.913628089953614, "grad_norm": 4.600498199462891, "learning_rate": 8.778900392344922e-06, "loss": 0.4346, "step": 15560 }, { "epoch": 0.9142152545358464, "grad_norm": 11.613080024719238, "learning_rate": 8.776662602294257e-06, "loss": 0.5593, "step": 15570 }, { "epoch": 0.9148024191180788, "grad_norm": 5.146651744842529, "learning_rate": 8.7744230494019e-06, "loss": 0.449, "step": 15580 }, { "epoch": 0.9153895837003112, "grad_norm": 9.757017135620117, "learning_rate": 8.772181734713217e-06, "loss": 0.4106, "step": 15590 }, { "epoch": 0.9159767482825436, "grad_norm": 4.6979780197143555, "learning_rate": 8.76993865927439e-06, "loss": 0.4835, "step": 15600 }, { "epoch": 0.916563912864776, "grad_norm": 5.117133617401123, "learning_rate": 8.767693824132426e-06, "loss": 0.4685, "step": 15610 }, { "epoch": 0.9171510774470084, "grad_norm": 2.3850326538085938, "learning_rate": 8.765447230335148e-06, "loss": 0.4523, "step": 15620 }, { "epoch": 0.9177382420292408, "grad_norm": 2.4345602989196777, "learning_rate": 8.763198878931207e-06, "loss": 0.4755, "step": 15630 }, { "epoch": 0.9183254066114732, "grad_norm": 11.309408187866211, "learning_rate": 8.760948770970072e-06, "loss": 0.5397, "step": 15640 }, { "epoch": 0.9189125711937056, "grad_norm": 2.1477131843566895, "learning_rate": 8.758696907502028e-06, "loss": 0.4413, "step": 15650 }, { "epoch": 0.919499735775938, "grad_norm": 6.881965637207031, "learning_rate": 8.756443289578187e-06, "loss": 0.4623, "step": 15660 }, { "epoch": 0.9200869003581704, "grad_norm": 2.3362131118774414, "learning_rate": 8.754187918250472e-06, "loss": 0.4441, "step": 15670 }, { "epoch": 0.9206740649404028, "grad_norm": 2.681896924972534, "learning_rate": 8.751930794571628e-06, "loss": 0.5855, "step": 15680 }, { "epoch": 0.9212612295226352, "grad_norm": 2.0129177570343018, "learning_rate": 8.74967191959522e-06, "loss": 0.4582, "step": 15690 }, { "epoch": 0.9218483941048676, "grad_norm": 7.612538814544678, "learning_rate": 8.747411294375627e-06, "loss": 0.5693, "step": 15700 }, { "epoch": 0.9224355586871, "grad_norm": 1.938195824623108, "learning_rate": 8.745148919968047e-06, "loss": 0.3011, "step": 15710 }, { "epoch": 0.9230227232693324, "grad_norm": 3.680964469909668, "learning_rate": 8.742884797428494e-06, "loss": 0.5417, "step": 15720 }, { "epoch": 0.9236098878515648, "grad_norm": 7.126930236816406, "learning_rate": 8.740618927813798e-06, "loss": 0.3865, "step": 15730 }, { "epoch": 0.9241970524337972, "grad_norm": 5.1429243087768555, "learning_rate": 8.738351312181603e-06, "loss": 0.4809, "step": 15740 }, { "epoch": 0.9247842170160296, "grad_norm": 10.068511962890625, "learning_rate": 8.73608195159037e-06, "loss": 0.4821, "step": 15750 }, { "epoch": 0.925371381598262, "grad_norm": 2.748671770095825, "learning_rate": 8.733810847099373e-06, "loss": 0.5422, "step": 15760 }, { "epoch": 0.9259585461804944, "grad_norm": 4.336177825927734, "learning_rate": 8.731537999768702e-06, "loss": 0.4373, "step": 15770 }, { "epoch": 0.9265457107627267, "grad_norm": 4.854515552520752, "learning_rate": 8.729263410659259e-06, "loss": 0.5299, "step": 15780 }, { "epoch": 0.9271328753449591, "grad_norm": 3.1607487201690674, "learning_rate": 8.72698708083276e-06, "loss": 0.4317, "step": 15790 }, { "epoch": 0.9277200399271915, "grad_norm": 2.404839277267456, "learning_rate": 8.724709011351732e-06, "loss": 0.5742, "step": 15800 }, { "epoch": 0.9283072045094239, "grad_norm": 7.473465919494629, "learning_rate": 8.722429203279512e-06, "loss": 0.5752, "step": 15810 }, { "epoch": 0.9288943690916563, "grad_norm": 3.765292167663574, "learning_rate": 8.720147657680257e-06, "loss": 0.4671, "step": 15820 }, { "epoch": 0.9294815336738887, "grad_norm": 3.153773784637451, "learning_rate": 8.717864375618925e-06, "loss": 0.394, "step": 15830 }, { "epoch": 0.9300686982561212, "grad_norm": 4.786364555358887, "learning_rate": 8.715579358161291e-06, "loss": 0.3987, "step": 15840 }, { "epoch": 0.9306558628383536, "grad_norm": 4.819456100463867, "learning_rate": 8.713292606373938e-06, "loss": 0.5271, "step": 15850 }, { "epoch": 0.931243027420586, "grad_norm": 7.070910930633545, "learning_rate": 8.711004121324259e-06, "loss": 0.4402, "step": 15860 }, { "epoch": 0.9318301920028184, "grad_norm": 9.63357925415039, "learning_rate": 8.708713904080453e-06, "loss": 0.575, "step": 15870 }, { "epoch": 0.9324173565850508, "grad_norm": 4.401060581207275, "learning_rate": 8.706421955711531e-06, "loss": 0.4628, "step": 15880 }, { "epoch": 0.9330045211672832, "grad_norm": 3.4512128829956055, "learning_rate": 8.704128277287313e-06, "loss": 0.4347, "step": 15890 }, { "epoch": 0.9335916857495156, "grad_norm": 6.750051498413086, "learning_rate": 8.701832869878423e-06, "loss": 0.6198, "step": 15900 }, { "epoch": 0.934178850331748, "grad_norm": 2.928267002105713, "learning_rate": 8.699535734556295e-06, "loss": 0.465, "step": 15910 }, { "epoch": 0.9347660149139804, "grad_norm": 2.9160454273223877, "learning_rate": 8.697236872393167e-06, "loss": 0.472, "step": 15920 }, { "epoch": 0.9353531794962128, "grad_norm": 3.200559616088867, "learning_rate": 8.694936284462086e-06, "loss": 0.3413, "step": 15930 }, { "epoch": 0.9359403440784452, "grad_norm": 3.026190757751465, "learning_rate": 8.692633971836898e-06, "loss": 0.4342, "step": 15940 }, { "epoch": 0.9365275086606776, "grad_norm": 4.657591342926025, "learning_rate": 8.690329935592266e-06, "loss": 0.4167, "step": 15950 }, { "epoch": 0.93711467324291, "grad_norm": 8.990087509155273, "learning_rate": 8.688024176803646e-06, "loss": 0.5599, "step": 15960 }, { "epoch": 0.9377018378251424, "grad_norm": 2.6539385318756104, "learning_rate": 8.685716696547305e-06, "loss": 0.4135, "step": 15970 }, { "epoch": 0.9382890024073748, "grad_norm": 14.509568214416504, "learning_rate": 8.683407495900307e-06, "loss": 0.6081, "step": 15980 }, { "epoch": 0.9388761669896072, "grad_norm": 2.0837206840515137, "learning_rate": 8.681096575940525e-06, "loss": 0.4553, "step": 15990 }, { "epoch": 0.9394633315718396, "grad_norm": 3.2188546657562256, "learning_rate": 8.678783937746633e-06, "loss": 0.4747, "step": 16000 }, { "epoch": 0.940050496154072, "grad_norm": 4.145969390869141, "learning_rate": 8.676469582398109e-06, "loss": 0.593, "step": 16010 }, { "epoch": 0.9406376607363044, "grad_norm": 8.873862266540527, "learning_rate": 8.674153510975225e-06, "loss": 0.4213, "step": 16020 }, { "epoch": 0.9412248253185368, "grad_norm": 7.807734966278076, "learning_rate": 8.671835724559062e-06, "loss": 0.5948, "step": 16030 }, { "epoch": 0.9418119899007692, "grad_norm": 6.6156392097473145, "learning_rate": 8.6695162242315e-06, "loss": 0.4229, "step": 16040 }, { "epoch": 0.9423991544830016, "grad_norm": 4.286728382110596, "learning_rate": 8.667195011075214e-06, "loss": 0.5855, "step": 16050 }, { "epoch": 0.942986319065234, "grad_norm": 2.534541606903076, "learning_rate": 8.664872086173685e-06, "loss": 0.3656, "step": 16060 }, { "epoch": 0.9435734836474664, "grad_norm": 7.997786045074463, "learning_rate": 8.66254745061119e-06, "loss": 0.4581, "step": 16070 }, { "epoch": 0.9441606482296988, "grad_norm": 1.4893512725830078, "learning_rate": 8.660221105472801e-06, "loss": 0.4357, "step": 16080 }, { "epoch": 0.9447478128119312, "grad_norm": 1.8682063817977905, "learning_rate": 8.657893051844398e-06, "loss": 0.3593, "step": 16090 }, { "epoch": 0.9453349773941636, "grad_norm": 1.9659322500228882, "learning_rate": 8.655563290812644e-06, "loss": 0.5335, "step": 16100 }, { "epoch": 0.945922141976396, "grad_norm": 1.936458945274353, "learning_rate": 8.653231823465016e-06, "loss": 0.6686, "step": 16110 }, { "epoch": 0.9465093065586284, "grad_norm": 6.437273025512695, "learning_rate": 8.650898650889771e-06, "loss": 0.5551, "step": 16120 }, { "epoch": 0.9470964711408608, "grad_norm": 1.663253903388977, "learning_rate": 8.648563774175975e-06, "loss": 0.4711, "step": 16130 }, { "epoch": 0.9476836357230932, "grad_norm": 3.088658571243286, "learning_rate": 8.64622719441348e-06, "loss": 0.4871, "step": 16140 }, { "epoch": 0.9482708003053256, "grad_norm": 2.755248785018921, "learning_rate": 8.643888912692936e-06, "loss": 0.5274, "step": 16150 }, { "epoch": 0.948857964887558, "grad_norm": 1.5255346298217773, "learning_rate": 8.641548930105795e-06, "loss": 0.5576, "step": 16160 }, { "epoch": 0.9494451294697904, "grad_norm": 3.324140787124634, "learning_rate": 8.639207247744289e-06, "loss": 0.4566, "step": 16170 }, { "epoch": 0.9500322940520228, "grad_norm": 5.3170576095581055, "learning_rate": 8.636863866701453e-06, "loss": 0.5817, "step": 16180 }, { "epoch": 0.9506194586342552, "grad_norm": 3.9031622409820557, "learning_rate": 8.634518788071114e-06, "loss": 0.5538, "step": 16190 }, { "epoch": 0.9512066232164876, "grad_norm": 2.810563564300537, "learning_rate": 8.632172012947886e-06, "loss": 0.6318, "step": 16200 }, { "epoch": 0.9517937877987199, "grad_norm": 7.213213920593262, "learning_rate": 8.629823542427185e-06, "loss": 0.4157, "step": 16210 }, { "epoch": 0.9523809523809523, "grad_norm": 4.956326484680176, "learning_rate": 8.627473377605205e-06, "loss": 0.5504, "step": 16220 }, { "epoch": 0.9529681169631847, "grad_norm": 3.737844467163086, "learning_rate": 8.625121519578943e-06, "loss": 0.3665, "step": 16230 }, { "epoch": 0.9535552815454171, "grad_norm": 3.1102774143218994, "learning_rate": 8.622767969446179e-06, "loss": 0.4002, "step": 16240 }, { "epoch": 0.9541424461276495, "grad_norm": 9.87657642364502, "learning_rate": 8.620412728305487e-06, "loss": 0.4682, "step": 16250 }, { "epoch": 0.9547296107098819, "grad_norm": 10.043784141540527, "learning_rate": 8.618055797256226e-06, "loss": 0.4753, "step": 16260 }, { "epoch": 0.9553167752921143, "grad_norm": 2.545201539993286, "learning_rate": 8.615697177398548e-06, "loss": 0.4649, "step": 16270 }, { "epoch": 0.9559039398743467, "grad_norm": 3.936140775680542, "learning_rate": 8.613336869833391e-06, "loss": 0.4701, "step": 16280 }, { "epoch": 0.9564911044565791, "grad_norm": 5.078255653381348, "learning_rate": 8.610974875662481e-06, "loss": 0.4829, "step": 16290 }, { "epoch": 0.9570782690388115, "grad_norm": 15.827037811279297, "learning_rate": 8.608611195988333e-06, "loss": 0.4381, "step": 16300 }, { "epoch": 0.957665433621044, "grad_norm": 2.2145745754241943, "learning_rate": 8.606245831914246e-06, "loss": 0.5671, "step": 16310 }, { "epoch": 0.9582525982032764, "grad_norm": 19.224828720092773, "learning_rate": 8.603878784544307e-06, "loss": 0.532, "step": 16320 }, { "epoch": 0.9588397627855088, "grad_norm": 2.1382951736450195, "learning_rate": 8.601510054983387e-06, "loss": 0.5801, "step": 16330 }, { "epoch": 0.9594269273677412, "grad_norm": 2.97377347946167, "learning_rate": 8.599139644337145e-06, "loss": 0.5046, "step": 16340 }, { "epoch": 0.9600140919499736, "grad_norm": 6.7385125160217285, "learning_rate": 8.596767553712023e-06, "loss": 0.5116, "step": 16350 }, { "epoch": 0.960601256532206, "grad_norm": 3.702303409576416, "learning_rate": 8.594393784215247e-06, "loss": 0.5363, "step": 16360 }, { "epoch": 0.9611884211144384, "grad_norm": 3.817025661468506, "learning_rate": 8.592018336954827e-06, "loss": 0.4295, "step": 16370 }, { "epoch": 0.9617755856966708, "grad_norm": 3.4576210975646973, "learning_rate": 8.589641213039556e-06, "loss": 0.4195, "step": 16380 }, { "epoch": 0.9623627502789032, "grad_norm": 5.232853889465332, "learning_rate": 8.58726241357901e-06, "loss": 0.5354, "step": 16390 }, { "epoch": 0.9629499148611356, "grad_norm": 5.3985466957092285, "learning_rate": 8.584881939683547e-06, "loss": 0.5086, "step": 16400 }, { "epoch": 0.963537079443368, "grad_norm": 2.6577646732330322, "learning_rate": 8.582499792464305e-06, "loss": 0.4573, "step": 16410 }, { "epoch": 0.9641242440256004, "grad_norm": 5.484042644500732, "learning_rate": 8.580115973033207e-06, "loss": 0.3535, "step": 16420 }, { "epoch": 0.9647114086078328, "grad_norm": 3.3357038497924805, "learning_rate": 8.57773048250295e-06, "loss": 0.4778, "step": 16430 }, { "epoch": 0.9652985731900652, "grad_norm": 9.336540222167969, "learning_rate": 8.57534332198702e-06, "loss": 0.4266, "step": 16440 }, { "epoch": 0.9658857377722976, "grad_norm": 4.326075077056885, "learning_rate": 8.572954492599674e-06, "loss": 0.5035, "step": 16450 }, { "epoch": 0.96647290235453, "grad_norm": 2.058699607849121, "learning_rate": 8.570563995455953e-06, "loss": 0.588, "step": 16460 }, { "epoch": 0.9670600669367624, "grad_norm": 3.2867493629455566, "learning_rate": 8.568171831671674e-06, "loss": 0.3522, "step": 16470 }, { "epoch": 0.9676472315189948, "grad_norm": 3.164684295654297, "learning_rate": 8.565778002363435e-06, "loss": 0.4023, "step": 16480 }, { "epoch": 0.9682343961012272, "grad_norm": 9.407050132751465, "learning_rate": 8.563382508648608e-06, "loss": 0.5101, "step": 16490 }, { "epoch": 0.9688215606834596, "grad_norm": 6.051121234893799, "learning_rate": 8.56098535164534e-06, "loss": 0.3743, "step": 16500 }, { "epoch": 0.969408725265692, "grad_norm": 3.769171714782715, "learning_rate": 8.558586532472564e-06, "loss": 0.3626, "step": 16510 }, { "epoch": 0.9699958898479244, "grad_norm": 2.3529045581817627, "learning_rate": 8.556186052249977e-06, "loss": 0.4691, "step": 16520 }, { "epoch": 0.9705830544301568, "grad_norm": 2.012840747833252, "learning_rate": 8.553783912098062e-06, "loss": 0.5318, "step": 16530 }, { "epoch": 0.9711702190123892, "grad_norm": 2.8072891235351562, "learning_rate": 8.551380113138065e-06, "loss": 0.4499, "step": 16540 }, { "epoch": 0.9717573835946216, "grad_norm": 1.5728999376296997, "learning_rate": 8.548974656492018e-06, "loss": 0.4111, "step": 16550 }, { "epoch": 0.972344548176854, "grad_norm": 5.044708251953125, "learning_rate": 8.54656754328272e-06, "loss": 0.4328, "step": 16560 }, { "epoch": 0.9729317127590864, "grad_norm": 1.8796778917312622, "learning_rate": 8.544158774633743e-06, "loss": 0.4998, "step": 16570 }, { "epoch": 0.9735188773413188, "grad_norm": 2.1809887886047363, "learning_rate": 8.54174835166944e-06, "loss": 0.5681, "step": 16580 }, { "epoch": 0.9741060419235512, "grad_norm": 3.615323543548584, "learning_rate": 8.539336275514922e-06, "loss": 0.5309, "step": 16590 }, { "epoch": 0.9746932065057836, "grad_norm": 2.4721028804779053, "learning_rate": 8.536922547296085e-06, "loss": 0.3651, "step": 16600 }, { "epoch": 0.975280371088016, "grad_norm": 8.687054634094238, "learning_rate": 8.534507168139588e-06, "loss": 0.5896, "step": 16610 }, { "epoch": 0.9758675356702484, "grad_norm": 1.6145811080932617, "learning_rate": 8.532090139172862e-06, "loss": 0.4618, "step": 16620 }, { "epoch": 0.9764547002524808, "grad_norm": 3.0128135681152344, "learning_rate": 8.529671461524112e-06, "loss": 0.4069, "step": 16630 }, { "epoch": 0.9770418648347132, "grad_norm": 20.809207916259766, "learning_rate": 8.527251136322309e-06, "loss": 0.6265, "step": 16640 }, { "epoch": 0.9776290294169455, "grad_norm": 2.8309240341186523, "learning_rate": 8.524829164697194e-06, "loss": 0.4647, "step": 16650 }, { "epoch": 0.9782161939991779, "grad_norm": 4.428897380828857, "learning_rate": 8.522405547779275e-06, "loss": 0.5208, "step": 16660 }, { "epoch": 0.9788033585814103, "grad_norm": 4.801155090332031, "learning_rate": 8.519980286699834e-06, "loss": 0.496, "step": 16670 }, { "epoch": 0.9793905231636427, "grad_norm": 3.848679542541504, "learning_rate": 8.517553382590909e-06, "loss": 0.4666, "step": 16680 }, { "epoch": 0.9799776877458751, "grad_norm": 3.1644890308380127, "learning_rate": 8.515124836585316e-06, "loss": 0.5837, "step": 16690 }, { "epoch": 0.9805648523281075, "grad_norm": 1.644539475440979, "learning_rate": 8.51269464981663e-06, "loss": 0.4782, "step": 16700 }, { "epoch": 0.9811520169103399, "grad_norm": 5.261853218078613, "learning_rate": 8.510262823419202e-06, "loss": 0.4758, "step": 16710 }, { "epoch": 0.9817391814925723, "grad_norm": 2.6915481090545654, "learning_rate": 8.507829358528133e-06, "loss": 0.3971, "step": 16720 }, { "epoch": 0.9823263460748047, "grad_norm": 4.44947624206543, "learning_rate": 8.505394256279302e-06, "loss": 0.5168, "step": 16730 }, { "epoch": 0.9829135106570371, "grad_norm": 5.015072345733643, "learning_rate": 8.502957517809346e-06, "loss": 0.4535, "step": 16740 }, { "epoch": 0.9835006752392695, "grad_norm": 4.206794261932373, "learning_rate": 8.500519144255665e-06, "loss": 0.3794, "step": 16750 }, { "epoch": 0.9840878398215019, "grad_norm": 5.4563889503479, "learning_rate": 8.498079136756429e-06, "loss": 0.5851, "step": 16760 }, { "epoch": 0.9846750044037343, "grad_norm": 3.6895816326141357, "learning_rate": 8.495637496450564e-06, "loss": 0.4809, "step": 16770 }, { "epoch": 0.9852621689859667, "grad_norm": 9.5702543258667, "learning_rate": 8.493194224477758e-06, "loss": 0.4086, "step": 16780 }, { "epoch": 0.9858493335681991, "grad_norm": 3.937539577484131, "learning_rate": 8.490749321978466e-06, "loss": 0.5295, "step": 16790 }, { "epoch": 0.9864364981504316, "grad_norm": 3.4173386096954346, "learning_rate": 8.4883027900939e-06, "loss": 0.5369, "step": 16800 }, { "epoch": 0.987023662732664, "grad_norm": 4.214296817779541, "learning_rate": 8.485854629966032e-06, "loss": 0.4931, "step": 16810 }, { "epoch": 0.9876108273148964, "grad_norm": 11.5013427734375, "learning_rate": 8.483404842737596e-06, "loss": 0.4855, "step": 16820 }, { "epoch": 0.9881979918971288, "grad_norm": 1.8500399589538574, "learning_rate": 8.480953429552085e-06, "loss": 0.5537, "step": 16830 }, { "epoch": 0.9887851564793612, "grad_norm": 4.568794250488281, "learning_rate": 8.478500391553752e-06, "loss": 0.4413, "step": 16840 }, { "epoch": 0.9893723210615936, "grad_norm": 6.068347454071045, "learning_rate": 8.476045729887608e-06, "loss": 0.5487, "step": 16850 }, { "epoch": 0.989959485643826, "grad_norm": 4.182713031768799, "learning_rate": 8.473589445699417e-06, "loss": 0.3731, "step": 16860 }, { "epoch": 0.9905466502260584, "grad_norm": 10.86662769317627, "learning_rate": 8.47113154013571e-06, "loss": 0.5295, "step": 16870 }, { "epoch": 0.9911338148082908, "grad_norm": 2.573446750640869, "learning_rate": 8.468672014343767e-06, "loss": 0.3972, "step": 16880 }, { "epoch": 0.9917209793905232, "grad_norm": 2.215233087539673, "learning_rate": 8.466210869471624e-06, "loss": 0.3693, "step": 16890 }, { "epoch": 0.9923081439727556, "grad_norm": 3.0137553215026855, "learning_rate": 8.463748106668078e-06, "loss": 0.4721, "step": 16900 }, { "epoch": 0.992895308554988, "grad_norm": 4.1360039710998535, "learning_rate": 8.461283727082679e-06, "loss": 0.5112, "step": 16910 }, { "epoch": 0.9934824731372204, "grad_norm": 6.178281784057617, "learning_rate": 8.45881773186573e-06, "loss": 0.5443, "step": 16920 }, { "epoch": 0.9940696377194528, "grad_norm": 6.096203327178955, "learning_rate": 8.456350122168291e-06, "loss": 0.4903, "step": 16930 }, { "epoch": 0.9946568023016852, "grad_norm": 2.0820305347442627, "learning_rate": 8.453880899142173e-06, "loss": 0.4842, "step": 16940 }, { "epoch": 0.9952439668839176, "grad_norm": 4.543998718261719, "learning_rate": 8.45141006393994e-06, "loss": 0.47, "step": 16950 }, { "epoch": 0.99583113146615, "grad_norm": 6.808078765869141, "learning_rate": 8.448937617714912e-06, "loss": 0.4247, "step": 16960 }, { "epoch": 0.9964182960483824, "grad_norm": 6.459181785583496, "learning_rate": 8.446463561621157e-06, "loss": 0.4242, "step": 16970 }, { "epoch": 0.9970054606306148, "grad_norm": 3.9254815578460693, "learning_rate": 8.443987896813494e-06, "loss": 0.377, "step": 16980 }, { "epoch": 0.9975926252128472, "grad_norm": 3.5057361125946045, "learning_rate": 8.441510624447498e-06, "loss": 0.4445, "step": 16990 }, { "epoch": 0.9981797897950796, "grad_norm": 1.1957800388336182, "learning_rate": 8.439031745679492e-06, "loss": 0.4896, "step": 17000 }, { "epoch": 0.998766954377312, "grad_norm": 9.600281715393066, "learning_rate": 8.436551261666544e-06, "loss": 0.4506, "step": 17010 }, { "epoch": 0.9993541189595444, "grad_norm": 1.2317569255828857, "learning_rate": 8.434069173566476e-06, "loss": 0.5573, "step": 17020 }, { "epoch": 0.9999412835417768, "grad_norm": 11.873080253601074, "learning_rate": 8.431585482537865e-06, "loss": 0.4051, "step": 17030 }, { "epoch": 1.000528448124009, "grad_norm": 4.115960597991943, "learning_rate": 8.42910018974002e-06, "loss": 0.4512, "step": 17040 }, { "epoch": 1.0011156127062415, "grad_norm": 3.535593032836914, "learning_rate": 8.426613296333013e-06, "loss": 0.5321, "step": 17050 }, { "epoch": 1.001702777288474, "grad_norm": 1.6516624689102173, "learning_rate": 8.424124803477653e-06, "loss": 0.5039, "step": 17060 }, { "epoch": 1.0022899418707063, "grad_norm": 3.7137093544006348, "learning_rate": 8.421634712335504e-06, "loss": 0.4569, "step": 17070 }, { "epoch": 1.0028771064529387, "grad_norm": 3.7385618686676025, "learning_rate": 8.41914302406887e-06, "loss": 0.4236, "step": 17080 }, { "epoch": 1.003464271035171, "grad_norm": 2.7174816131591797, "learning_rate": 8.416649739840798e-06, "loss": 0.4446, "step": 17090 }, { "epoch": 1.0040514356174035, "grad_norm": 2.3829331398010254, "learning_rate": 8.414154860815092e-06, "loss": 0.3274, "step": 17100 }, { "epoch": 1.004638600199636, "grad_norm": 2.227510929107666, "learning_rate": 8.411658388156285e-06, "loss": 0.4849, "step": 17110 }, { "epoch": 1.0052257647818683, "grad_norm": 1.9260997772216797, "learning_rate": 8.409160323029667e-06, "loss": 0.4936, "step": 17120 }, { "epoch": 1.0058129293641007, "grad_norm": 2.05444073677063, "learning_rate": 8.406660666601263e-06, "loss": 0.5201, "step": 17130 }, { "epoch": 1.0064000939463331, "grad_norm": 12.212514877319336, "learning_rate": 8.404159420037843e-06, "loss": 0.5559, "step": 17140 }, { "epoch": 1.0069872585285655, "grad_norm": 8.013599395751953, "learning_rate": 8.401656584506923e-06, "loss": 0.4481, "step": 17150 }, { "epoch": 1.007574423110798, "grad_norm": 7.457577228546143, "learning_rate": 8.399152161176754e-06, "loss": 0.5041, "step": 17160 }, { "epoch": 1.0081615876930303, "grad_norm": 3.4452481269836426, "learning_rate": 8.396646151216333e-06, "loss": 0.4479, "step": 17170 }, { "epoch": 1.0087487522752627, "grad_norm": 2.871997356414795, "learning_rate": 8.394138555795395e-06, "loss": 0.4131, "step": 17180 }, { "epoch": 1.0093359168574951, "grad_norm": 3.549942970275879, "learning_rate": 8.391629376084418e-06, "loss": 0.597, "step": 17190 }, { "epoch": 1.0099230814397275, "grad_norm": 4.6069016456604, "learning_rate": 8.389118613254617e-06, "loss": 0.5042, "step": 17200 }, { "epoch": 1.01051024602196, "grad_norm": 3.957409381866455, "learning_rate": 8.386606268477947e-06, "loss": 0.4662, "step": 17210 }, { "epoch": 1.0110974106041923, "grad_norm": 1.27299964427948, "learning_rate": 8.384092342927099e-06, "loss": 0.5082, "step": 17220 }, { "epoch": 1.0116845751864247, "grad_norm": 3.4135804176330566, "learning_rate": 8.381576837775506e-06, "loss": 0.5577, "step": 17230 }, { "epoch": 1.0122717397686571, "grad_norm": 1.7712161540985107, "learning_rate": 8.379059754197337e-06, "loss": 0.4339, "step": 17240 }, { "epoch": 1.0128589043508895, "grad_norm": 3.851827621459961, "learning_rate": 8.376541093367495e-06, "loss": 0.4521, "step": 17250 }, { "epoch": 1.013446068933122, "grad_norm": 3.8103601932525635, "learning_rate": 8.374020856461623e-06, "loss": 0.3865, "step": 17260 }, { "epoch": 1.0140332335153543, "grad_norm": 12.09384822845459, "learning_rate": 8.371499044656095e-06, "loss": 0.4961, "step": 17270 }, { "epoch": 1.0146203980975868, "grad_norm": 3.487652063369751, "learning_rate": 8.368975659128026e-06, "loss": 0.4415, "step": 17280 }, { "epoch": 1.0152075626798192, "grad_norm": 3.747565269470215, "learning_rate": 8.366450701055264e-06, "loss": 0.4768, "step": 17290 }, { "epoch": 1.0157947272620516, "grad_norm": 4.491301536560059, "learning_rate": 8.363924171616384e-06, "loss": 0.4406, "step": 17300 }, { "epoch": 1.016381891844284, "grad_norm": 2.6845431327819824, "learning_rate": 8.361396071990706e-06, "loss": 0.4604, "step": 17310 }, { "epoch": 1.0169690564265164, "grad_norm": 5.544565200805664, "learning_rate": 8.358866403358273e-06, "loss": 0.4332, "step": 17320 }, { "epoch": 1.0175562210087488, "grad_norm": 5.90191125869751, "learning_rate": 8.356335166899866e-06, "loss": 0.499, "step": 17330 }, { "epoch": 1.0181433855909812, "grad_norm": 2.6851539611816406, "learning_rate": 8.353802363796995e-06, "loss": 0.5067, "step": 17340 }, { "epoch": 1.0187305501732136, "grad_norm": 3.8616528511047363, "learning_rate": 8.351267995231904e-06, "loss": 0.4186, "step": 17350 }, { "epoch": 1.019317714755446, "grad_norm": 2.6306207180023193, "learning_rate": 8.348732062387566e-06, "loss": 0.5419, "step": 17360 }, { "epoch": 1.0199048793376784, "grad_norm": 4.718306541442871, "learning_rate": 8.346194566447685e-06, "loss": 0.4486, "step": 17370 }, { "epoch": 1.0204920439199108, "grad_norm": 2.4547390937805176, "learning_rate": 8.343655508596691e-06, "loss": 0.5142, "step": 17380 }, { "epoch": 1.0210792085021432, "grad_norm": 1.9109259843826294, "learning_rate": 8.341114890019748e-06, "loss": 0.4508, "step": 17390 }, { "epoch": 1.0216663730843756, "grad_norm": 5.437537670135498, "learning_rate": 8.338572711902747e-06, "loss": 0.4901, "step": 17400 }, { "epoch": 1.022253537666608, "grad_norm": 9.687323570251465, "learning_rate": 8.336028975432306e-06, "loss": 0.6346, "step": 17410 }, { "epoch": 1.0228407022488404, "grad_norm": 5.645108699798584, "learning_rate": 8.333483681795772e-06, "loss": 0.4078, "step": 17420 }, { "epoch": 1.0234278668310728, "grad_norm": 8.026515007019043, "learning_rate": 8.330936832181214e-06, "loss": 0.5263, "step": 17430 }, { "epoch": 1.0240150314133052, "grad_norm": 5.5554962158203125, "learning_rate": 8.328388427777434e-06, "loss": 0.4397, "step": 17440 }, { "epoch": 1.0246021959955376, "grad_norm": 4.552297592163086, "learning_rate": 8.325838469773957e-06, "loss": 0.3824, "step": 17450 }, { "epoch": 1.02518936057777, "grad_norm": 5.643883228302002, "learning_rate": 8.323286959361031e-06, "loss": 0.4684, "step": 17460 }, { "epoch": 1.0257765251600024, "grad_norm": 5.232275009155273, "learning_rate": 8.320733897729631e-06, "loss": 0.4414, "step": 17470 }, { "epoch": 1.0263636897422348, "grad_norm": 6.320252895355225, "learning_rate": 8.318179286071458e-06, "loss": 0.468, "step": 17480 }, { "epoch": 1.0269508543244672, "grad_norm": 3.986781120300293, "learning_rate": 8.315623125578931e-06, "loss": 0.5338, "step": 17490 }, { "epoch": 1.0275380189066996, "grad_norm": 2.122734785079956, "learning_rate": 8.313065417445198e-06, "loss": 0.4861, "step": 17500 }, { "epoch": 1.028125183488932, "grad_norm": 6.553761959075928, "learning_rate": 8.310506162864123e-06, "loss": 0.3492, "step": 17510 }, { "epoch": 1.0287123480711644, "grad_norm": 5.349218368530273, "learning_rate": 8.307945363030297e-06, "loss": 0.3904, "step": 17520 }, { "epoch": 1.0292995126533968, "grad_norm": 6.708255290985107, "learning_rate": 8.305383019139032e-06, "loss": 0.4604, "step": 17530 }, { "epoch": 1.0298866772356292, "grad_norm": 5.598525047302246, "learning_rate": 8.302819132386357e-06, "loss": 0.4605, "step": 17540 }, { "epoch": 1.0304738418178616, "grad_norm": 3.051161050796509, "learning_rate": 8.300253703969024e-06, "loss": 0.4339, "step": 17550 }, { "epoch": 1.031061006400094, "grad_norm": 4.946084499359131, "learning_rate": 8.297686735084504e-06, "loss": 0.3726, "step": 17560 }, { "epoch": 1.0316481709823264, "grad_norm": 2.285351514816284, "learning_rate": 8.295118226930988e-06, "loss": 0.6436, "step": 17570 }, { "epoch": 1.0322353355645588, "grad_norm": 2.871744155883789, "learning_rate": 8.292548180707385e-06, "loss": 0.5108, "step": 17580 }, { "epoch": 1.0328225001467912, "grad_norm": 10.584624290466309, "learning_rate": 8.289976597613318e-06, "loss": 0.3819, "step": 17590 }, { "epoch": 1.0334096647290236, "grad_norm": 8.991785049438477, "learning_rate": 8.287403478849136e-06, "loss": 0.4112, "step": 17600 }, { "epoch": 1.033996829311256, "grad_norm": 4.170888423919678, "learning_rate": 8.284828825615896e-06, "loss": 0.4846, "step": 17610 }, { "epoch": 1.0345839938934884, "grad_norm": 3.647148847579956, "learning_rate": 8.282252639115377e-06, "loss": 0.3978, "step": 17620 }, { "epoch": 1.0351711584757208, "grad_norm": 2.6064987182617188, "learning_rate": 8.27967492055007e-06, "loss": 0.5353, "step": 17630 }, { "epoch": 1.0357583230579532, "grad_norm": 7.1178154945373535, "learning_rate": 8.277095671123183e-06, "loss": 0.4784, "step": 17640 }, { "epoch": 1.0363454876401856, "grad_norm": 2.5453736782073975, "learning_rate": 8.27451489203864e-06, "loss": 0.3208, "step": 17650 }, { "epoch": 1.036932652222418, "grad_norm": 2.901657819747925, "learning_rate": 8.271932584501076e-06, "loss": 0.391, "step": 17660 }, { "epoch": 1.0375198168046504, "grad_norm": 2.959848642349243, "learning_rate": 8.26934874971584e-06, "loss": 0.4726, "step": 17670 }, { "epoch": 1.0381069813868828, "grad_norm": 3.884631872177124, "learning_rate": 8.266763388888997e-06, "loss": 0.4418, "step": 17680 }, { "epoch": 1.0386941459691152, "grad_norm": 2.5064518451690674, "learning_rate": 8.26417650322732e-06, "loss": 0.5988, "step": 17690 }, { "epoch": 1.0392813105513476, "grad_norm": 3.576103448867798, "learning_rate": 8.261588093938295e-06, "loss": 0.5043, "step": 17700 }, { "epoch": 1.0398684751335798, "grad_norm": 3.4957339763641357, "learning_rate": 8.258998162230123e-06, "loss": 0.4314, "step": 17710 }, { "epoch": 1.0404556397158125, "grad_norm": 6.416896343231201, "learning_rate": 8.256406709311708e-06, "loss": 0.4219, "step": 17720 }, { "epoch": 1.0410428042980446, "grad_norm": 4.97231912612915, "learning_rate": 8.253813736392675e-06, "loss": 0.4943, "step": 17730 }, { "epoch": 1.041629968880277, "grad_norm": 3.447036027908325, "learning_rate": 8.251219244683346e-06, "loss": 0.4763, "step": 17740 }, { "epoch": 1.0422171334625094, "grad_norm": 2.3696517944335938, "learning_rate": 8.24862323539476e-06, "loss": 0.4218, "step": 17750 }, { "epoch": 1.0428042980447418, "grad_norm": 2.7121572494506836, "learning_rate": 8.246025709738663e-06, "loss": 0.377, "step": 17760 }, { "epoch": 1.0433914626269742, "grad_norm": 3.169848918914795, "learning_rate": 8.243426668927508e-06, "loss": 0.5697, "step": 17770 }, { "epoch": 1.0439786272092066, "grad_norm": 5.211300373077393, "learning_rate": 8.240826114174456e-06, "loss": 0.3855, "step": 17780 }, { "epoch": 1.044565791791439, "grad_norm": 9.270273208618164, "learning_rate": 8.238224046693371e-06, "loss": 0.4798, "step": 17790 }, { "epoch": 1.0451529563736714, "grad_norm": 2.2036795616149902, "learning_rate": 8.235620467698831e-06, "loss": 0.4176, "step": 17800 }, { "epoch": 1.0457401209559039, "grad_norm": 5.448404312133789, "learning_rate": 8.233015378406111e-06, "loss": 0.5243, "step": 17810 }, { "epoch": 1.0463272855381363, "grad_norm": 8.955902099609375, "learning_rate": 8.230408780031196e-06, "loss": 0.4236, "step": 17820 }, { "epoch": 1.0469144501203687, "grad_norm": 21.439897537231445, "learning_rate": 8.227800673790773e-06, "loss": 0.4573, "step": 17830 }, { "epoch": 1.047501614702601, "grad_norm": 5.368468284606934, "learning_rate": 8.225191060902236e-06, "loss": 0.4211, "step": 17840 }, { "epoch": 1.0480887792848335, "grad_norm": 2.686631202697754, "learning_rate": 8.222579942583679e-06, "loss": 0.6178, "step": 17850 }, { "epoch": 1.0486759438670659, "grad_norm": 1.672528624534607, "learning_rate": 8.219967320053899e-06, "loss": 0.4252, "step": 17860 }, { "epoch": 1.0492631084492983, "grad_norm": 3.2502362728118896, "learning_rate": 8.217353194532395e-06, "loss": 0.3756, "step": 17870 }, { "epoch": 1.0498502730315307, "grad_norm": 2.0220985412597656, "learning_rate": 8.214737567239373e-06, "loss": 0.5073, "step": 17880 }, { "epoch": 1.050437437613763, "grad_norm": 5.587464332580566, "learning_rate": 8.212120439395733e-06, "loss": 0.4464, "step": 17890 }, { "epoch": 1.0510246021959955, "grad_norm": 2.259082555770874, "learning_rate": 8.209501812223076e-06, "loss": 0.5545, "step": 17900 }, { "epoch": 1.0516117667782279, "grad_norm": 3.7644734382629395, "learning_rate": 8.206881686943706e-06, "loss": 0.4152, "step": 17910 }, { "epoch": 1.0521989313604603, "grad_norm": 2.237837314605713, "learning_rate": 8.204260064780627e-06, "loss": 0.414, "step": 17920 }, { "epoch": 1.0527860959426927, "grad_norm": 2.0619823932647705, "learning_rate": 8.201636946957537e-06, "loss": 0.51, "step": 17930 }, { "epoch": 1.053373260524925, "grad_norm": 6.4986066818237305, "learning_rate": 8.199012334698837e-06, "loss": 0.4745, "step": 17940 }, { "epoch": 1.0539604251071575, "grad_norm": 4.822766304016113, "learning_rate": 8.196386229229621e-06, "loss": 0.5839, "step": 17950 }, { "epoch": 1.0545475896893899, "grad_norm": 2.135291337966919, "learning_rate": 8.193758631775686e-06, "loss": 0.4357, "step": 17960 }, { "epoch": 1.0551347542716223, "grad_norm": 7.611004829406738, "learning_rate": 8.191129543563519e-06, "loss": 0.4714, "step": 17970 }, { "epoch": 1.0557219188538547, "grad_norm": 4.297474384307861, "learning_rate": 8.188498965820307e-06, "loss": 0.5767, "step": 17980 }, { "epoch": 1.056309083436087, "grad_norm": 9.433745384216309, "learning_rate": 8.185866899773927e-06, "loss": 0.452, "step": 17990 }, { "epoch": 1.0568962480183195, "grad_norm": 3.2064666748046875, "learning_rate": 8.183233346652962e-06, "loss": 0.4472, "step": 18000 }, { "epoch": 1.057483412600552, "grad_norm": 7.015755653381348, "learning_rate": 8.180598307686675e-06, "loss": 0.5118, "step": 18010 }, { "epoch": 1.0580705771827843, "grad_norm": 11.469919204711914, "learning_rate": 8.177961784105035e-06, "loss": 0.4912, "step": 18020 }, { "epoch": 1.0586577417650167, "grad_norm": 1.45244300365448, "learning_rate": 8.175323777138695e-06, "loss": 0.5646, "step": 18030 }, { "epoch": 1.059244906347249, "grad_norm": 7.691417217254639, "learning_rate": 8.172684288019003e-06, "loss": 0.4811, "step": 18040 }, { "epoch": 1.0598320709294815, "grad_norm": 2.770780563354492, "learning_rate": 8.170043317978002e-06, "loss": 0.4737, "step": 18050 }, { "epoch": 1.060419235511714, "grad_norm": 3.1121816635131836, "learning_rate": 8.167400868248421e-06, "loss": 0.5265, "step": 18060 }, { "epoch": 1.0610064000939463, "grad_norm": 2.092012405395508, "learning_rate": 8.164756940063685e-06, "loss": 0.3774, "step": 18070 }, { "epoch": 1.0615935646761787, "grad_norm": 6.522275447845459, "learning_rate": 8.162111534657906e-06, "loss": 0.4263, "step": 18080 }, { "epoch": 1.0621807292584111, "grad_norm": 3.2532505989074707, "learning_rate": 8.159464653265885e-06, "loss": 0.4888, "step": 18090 }, { "epoch": 1.0627678938406435, "grad_norm": 3.4633703231811523, "learning_rate": 8.156816297123115e-06, "loss": 0.4381, "step": 18100 }, { "epoch": 1.063355058422876, "grad_norm": 3.5111594200134277, "learning_rate": 8.154166467465773e-06, "loss": 0.4205, "step": 18110 }, { "epoch": 1.0639422230051083, "grad_norm": 1.6227366924285889, "learning_rate": 8.15151516553073e-06, "loss": 0.5519, "step": 18120 }, { "epoch": 1.0645293875873407, "grad_norm": 4.849449157714844, "learning_rate": 8.148862392555534e-06, "loss": 0.4663, "step": 18130 }, { "epoch": 1.0651165521695731, "grad_norm": 2.4365203380584717, "learning_rate": 8.146208149778434e-06, "loss": 0.3893, "step": 18140 }, { "epoch": 1.0657037167518055, "grad_norm": 2.8878655433654785, "learning_rate": 8.14355243843835e-06, "loss": 0.4501, "step": 18150 }, { "epoch": 1.066290881334038, "grad_norm": 3.0651793479919434, "learning_rate": 8.1408952597749e-06, "loss": 0.4734, "step": 18160 }, { "epoch": 1.0668780459162703, "grad_norm": 2.266685962677002, "learning_rate": 8.138236615028378e-06, "loss": 0.4929, "step": 18170 }, { "epoch": 1.0674652104985027, "grad_norm": 15.0930814743042, "learning_rate": 8.135576505439767e-06, "loss": 0.4817, "step": 18180 }, { "epoch": 1.0680523750807351, "grad_norm": 2.9616708755493164, "learning_rate": 8.132914932250733e-06, "loss": 0.4679, "step": 18190 }, { "epoch": 1.0686395396629675, "grad_norm": 4.168118476867676, "learning_rate": 8.130251896703625e-06, "loss": 0.5337, "step": 18200 }, { "epoch": 1.0692267042452, "grad_norm": 10.045454025268555, "learning_rate": 8.127587400041475e-06, "loss": 0.343, "step": 18210 }, { "epoch": 1.0698138688274323, "grad_norm": 5.1775970458984375, "learning_rate": 8.124921443507994e-06, "loss": 0.4756, "step": 18220 }, { "epoch": 1.0704010334096647, "grad_norm": 4.941429615020752, "learning_rate": 8.122254028347576e-06, "loss": 0.4419, "step": 18230 }, { "epoch": 1.0709881979918972, "grad_norm": 3.3699119091033936, "learning_rate": 8.119585155805302e-06, "loss": 0.4986, "step": 18240 }, { "epoch": 1.0715753625741296, "grad_norm": 7.053046703338623, "learning_rate": 8.116914827126924e-06, "loss": 0.5494, "step": 18250 }, { "epoch": 1.072162527156362, "grad_norm": 8.465611457824707, "learning_rate": 8.114243043558878e-06, "loss": 0.4949, "step": 18260 }, { "epoch": 1.0727496917385944, "grad_norm": 3.517530918121338, "learning_rate": 8.111569806348278e-06, "loss": 0.4172, "step": 18270 }, { "epoch": 1.0733368563208268, "grad_norm": 3.309049129486084, "learning_rate": 8.108895116742918e-06, "loss": 0.5508, "step": 18280 }, { "epoch": 1.0739240209030592, "grad_norm": 2.886317729949951, "learning_rate": 8.106218975991272e-06, "loss": 0.3771, "step": 18290 }, { "epoch": 1.0745111854852916, "grad_norm": 2.3609163761138916, "learning_rate": 8.103541385342484e-06, "loss": 0.444, "step": 18300 }, { "epoch": 1.075098350067524, "grad_norm": 5.106937885284424, "learning_rate": 8.100862346046381e-06, "loss": 0.5042, "step": 18310 }, { "epoch": 1.0756855146497564, "grad_norm": 3.2872891426086426, "learning_rate": 8.098181859353466e-06, "loss": 0.5113, "step": 18320 }, { "epoch": 1.0762726792319888, "grad_norm": 2.7688710689544678, "learning_rate": 8.095499926514914e-06, "loss": 0.4509, "step": 18330 }, { "epoch": 1.0768598438142212, "grad_norm": 19.619298934936523, "learning_rate": 8.092816548782579e-06, "loss": 0.4103, "step": 18340 }, { "epoch": 1.0774470083964536, "grad_norm": 5.857625484466553, "learning_rate": 8.090131727408983e-06, "loss": 0.4233, "step": 18350 }, { "epoch": 1.078034172978686, "grad_norm": 3.368618965148926, "learning_rate": 8.087445463647332e-06, "loss": 0.4131, "step": 18360 }, { "epoch": 1.0786213375609184, "grad_norm": 1.6976536512374878, "learning_rate": 8.084757758751497e-06, "loss": 0.4305, "step": 18370 }, { "epoch": 1.0792085021431508, "grad_norm": 2.4941651821136475, "learning_rate": 8.082068613976026e-06, "loss": 0.4302, "step": 18380 }, { "epoch": 1.0797956667253832, "grad_norm": 4.906524658203125, "learning_rate": 8.079378030576135e-06, "loss": 0.427, "step": 18390 }, { "epoch": 1.0803828313076156, "grad_norm": 16.15652084350586, "learning_rate": 8.076686009807717e-06, "loss": 0.4032, "step": 18400 }, { "epoch": 1.080969995889848, "grad_norm": 2.5479159355163574, "learning_rate": 8.073992552927328e-06, "loss": 0.5555, "step": 18410 }, { "epoch": 1.0815571604720804, "grad_norm": 4.7834649085998535, "learning_rate": 8.071297661192204e-06, "loss": 0.4461, "step": 18420 }, { "epoch": 1.0821443250543128, "grad_norm": 3.045318365097046, "learning_rate": 8.068601335860245e-06, "loss": 0.3972, "step": 18430 }, { "epoch": 1.0827314896365452, "grad_norm": 11.438437461853027, "learning_rate": 8.065903578190018e-06, "loss": 0.4987, "step": 18440 }, { "epoch": 1.0833186542187776, "grad_norm": 10.741507530212402, "learning_rate": 8.063204389440765e-06, "loss": 0.6248, "step": 18450 }, { "epoch": 1.08390581880101, "grad_norm": 10.744539260864258, "learning_rate": 8.060503770872391e-06, "loss": 0.4913, "step": 18460 }, { "epoch": 1.0844929833832424, "grad_norm": 7.758328914642334, "learning_rate": 8.057801723745473e-06, "loss": 0.4636, "step": 18470 }, { "epoch": 1.0850801479654748, "grad_norm": 6.454914093017578, "learning_rate": 8.055098249321247e-06, "loss": 0.4198, "step": 18480 }, { "epoch": 1.0856673125477072, "grad_norm": 2.6406657695770264, "learning_rate": 8.052393348861624e-06, "loss": 0.5106, "step": 18490 }, { "epoch": 1.0862544771299396, "grad_norm": 2.5373196601867676, "learning_rate": 8.049687023629176e-06, "loss": 0.3987, "step": 18500 }, { "epoch": 1.086841641712172, "grad_norm": 7.649650573730469, "learning_rate": 8.046979274887138e-06, "loss": 0.6535, "step": 18510 }, { "epoch": 1.0874288062944044, "grad_norm": 5.038126468658447, "learning_rate": 8.044270103899418e-06, "loss": 0.4804, "step": 18520 }, { "epoch": 1.0880159708766368, "grad_norm": 4.1827216148376465, "learning_rate": 8.041559511930576e-06, "loss": 0.4437, "step": 18530 }, { "epoch": 1.0886031354588692, "grad_norm": 3.382209539413452, "learning_rate": 8.038847500245846e-06, "loss": 0.3817, "step": 18540 }, { "epoch": 1.0891903000411016, "grad_norm": 1.5726369619369507, "learning_rate": 8.036134070111116e-06, "loss": 0.457, "step": 18550 }, { "epoch": 1.089777464623334, "grad_norm": 5.820166110992432, "learning_rate": 8.033419222792944e-06, "loss": 0.4228, "step": 18560 }, { "epoch": 1.0903646292055664, "grad_norm": 2.397747039794922, "learning_rate": 8.030702959558543e-06, "loss": 0.4502, "step": 18570 }, { "epoch": 1.0909517937877986, "grad_norm": 2.408228635787964, "learning_rate": 8.027985281675791e-06, "loss": 0.6229, "step": 18580 }, { "epoch": 1.0915389583700312, "grad_norm": 2.8849854469299316, "learning_rate": 8.025266190413225e-06, "loss": 0.3239, "step": 18590 }, { "epoch": 1.0921261229522634, "grad_norm": 3.359431028366089, "learning_rate": 8.02254568704004e-06, "loss": 0.4573, "step": 18600 }, { "epoch": 1.092713287534496, "grad_norm": 2.729799509048462, "learning_rate": 8.019823772826094e-06, "loss": 0.4463, "step": 18610 }, { "epoch": 1.0933004521167282, "grad_norm": 2.3426926136016846, "learning_rate": 8.0171004490419e-06, "loss": 0.4396, "step": 18620 }, { "epoch": 1.0938876166989606, "grad_norm": 5.349496364593506, "learning_rate": 8.014375716958628e-06, "loss": 0.4365, "step": 18630 }, { "epoch": 1.094474781281193, "grad_norm": 1.3573037385940552, "learning_rate": 8.011649577848113e-06, "loss": 0.3845, "step": 18640 }, { "epoch": 1.0950619458634254, "grad_norm": 4.804746627807617, "learning_rate": 8.008922032982837e-06, "loss": 0.4175, "step": 18650 }, { "epoch": 1.0956491104456578, "grad_norm": 1.5377944707870483, "learning_rate": 8.006193083635944e-06, "loss": 0.4407, "step": 18660 }, { "epoch": 1.0962362750278902, "grad_norm": 2.411046266555786, "learning_rate": 8.003462731081233e-06, "loss": 0.4891, "step": 18670 }, { "epoch": 1.0968234396101226, "grad_norm": 3.153348684310913, "learning_rate": 8.000730976593153e-06, "loss": 0.4678, "step": 18680 }, { "epoch": 1.097410604192355, "grad_norm": 2.087939739227295, "learning_rate": 7.997997821446818e-06, "loss": 0.5417, "step": 18690 }, { "epoch": 1.0979977687745874, "grad_norm": 4.627558708190918, "learning_rate": 7.995263266917985e-06, "loss": 0.4167, "step": 18700 }, { "epoch": 1.0985849333568198, "grad_norm": 7.8232269287109375, "learning_rate": 7.99252731428307e-06, "loss": 0.4976, "step": 18710 }, { "epoch": 1.0991720979390522, "grad_norm": 4.336967468261719, "learning_rate": 7.989789964819137e-06, "loss": 0.4263, "step": 18720 }, { "epoch": 1.0997592625212846, "grad_norm": 5.672743320465088, "learning_rate": 7.987051219803909e-06, "loss": 0.5661, "step": 18730 }, { "epoch": 1.100346427103517, "grad_norm": 3.4237241744995117, "learning_rate": 7.984311080515756e-06, "loss": 0.3849, "step": 18740 }, { "epoch": 1.1009335916857494, "grad_norm": 8.890850067138672, "learning_rate": 7.981569548233695e-06, "loss": 0.4609, "step": 18750 }, { "epoch": 1.1015207562679818, "grad_norm": 3.5541250705718994, "learning_rate": 7.978826624237404e-06, "loss": 0.5729, "step": 18760 }, { "epoch": 1.1021079208502143, "grad_norm": 1.60630464553833, "learning_rate": 7.976082309807199e-06, "loss": 0.5482, "step": 18770 }, { "epoch": 1.1026950854324467, "grad_norm": 9.023519515991211, "learning_rate": 7.97333660622405e-06, "loss": 0.4724, "step": 18780 }, { "epoch": 1.103282250014679, "grad_norm": 4.359330177307129, "learning_rate": 7.97058951476958e-06, "loss": 0.4758, "step": 18790 }, { "epoch": 1.1038694145969115, "grad_norm": 2.7091901302337646, "learning_rate": 7.967841036726052e-06, "loss": 0.4499, "step": 18800 }, { "epoch": 1.1044565791791439, "grad_norm": 4.484881401062012, "learning_rate": 7.965091173376378e-06, "loss": 0.4166, "step": 18810 }, { "epoch": 1.1050437437613763, "grad_norm": 2.5010008811950684, "learning_rate": 7.962339926004123e-06, "loss": 0.5136, "step": 18820 }, { "epoch": 1.1056309083436087, "grad_norm": 5.478389739990234, "learning_rate": 7.95958729589349e-06, "loss": 0.5551, "step": 18830 }, { "epoch": 1.106218072925841, "grad_norm": 2.254476547241211, "learning_rate": 7.956833284329331e-06, "loss": 0.3878, "step": 18840 }, { "epoch": 1.1068052375080735, "grad_norm": 2.8275492191314697, "learning_rate": 7.954077892597143e-06, "loss": 0.4692, "step": 18850 }, { "epoch": 1.1073924020903059, "grad_norm": 1.8832796812057495, "learning_rate": 7.951321121983067e-06, "loss": 0.4947, "step": 18860 }, { "epoch": 1.1079795666725383, "grad_norm": 2.340301990509033, "learning_rate": 7.948562973773884e-06, "loss": 0.4896, "step": 18870 }, { "epoch": 1.1085667312547707, "grad_norm": 13.05655574798584, "learning_rate": 7.945803449257027e-06, "loss": 0.4162, "step": 18880 }, { "epoch": 1.109153895837003, "grad_norm": 10.044194221496582, "learning_rate": 7.94304254972056e-06, "loss": 0.4529, "step": 18890 }, { "epoch": 1.1097410604192355, "grad_norm": 9.189176559448242, "learning_rate": 7.940280276453198e-06, "loss": 0.5701, "step": 18900 }, { "epoch": 1.1103282250014679, "grad_norm": 4.358494758605957, "learning_rate": 7.937516630744294e-06, "loss": 0.4597, "step": 18910 }, { "epoch": 1.1109153895837003, "grad_norm": 6.101172924041748, "learning_rate": 7.93475161388384e-06, "loss": 0.5999, "step": 18920 }, { "epoch": 1.1115025541659327, "grad_norm": 3.770653486251831, "learning_rate": 7.931985227162471e-06, "loss": 0.3609, "step": 18930 }, { "epoch": 1.112089718748165, "grad_norm": 9.012862205505371, "learning_rate": 7.929217471871459e-06, "loss": 0.4056, "step": 18940 }, { "epoch": 1.1126768833303975, "grad_norm": 5.054669380187988, "learning_rate": 7.926448349302713e-06, "loss": 0.427, "step": 18950 }, { "epoch": 1.11326404791263, "grad_norm": 8.46132755279541, "learning_rate": 7.923677860748788e-06, "loss": 0.5754, "step": 18960 }, { "epoch": 1.1138512124948623, "grad_norm": 2.292980432510376, "learning_rate": 7.920906007502866e-06, "loss": 0.4411, "step": 18970 }, { "epoch": 1.1144383770770947, "grad_norm": 5.499203205108643, "learning_rate": 7.918132790858776e-06, "loss": 0.4921, "step": 18980 }, { "epoch": 1.115025541659327, "grad_norm": 2.877424716949463, "learning_rate": 7.915358212110978e-06, "loss": 0.4928, "step": 18990 }, { "epoch": 1.1156127062415595, "grad_norm": 4.082500457763672, "learning_rate": 7.912582272554567e-06, "loss": 0.48, "step": 19000 }, { "epoch": 1.116199870823792, "grad_norm": 2.856598138809204, "learning_rate": 7.909804973485276e-06, "loss": 0.5019, "step": 19010 }, { "epoch": 1.1167870354060243, "grad_norm": 5.69521427154541, "learning_rate": 7.90702631619947e-06, "loss": 0.5332, "step": 19020 }, { "epoch": 1.1173741999882567, "grad_norm": 14.450959205627441, "learning_rate": 7.90424630199415e-06, "loss": 0.3862, "step": 19030 }, { "epoch": 1.1179613645704891, "grad_norm": 3.4814865589141846, "learning_rate": 7.901464932166948e-06, "loss": 0.4859, "step": 19040 }, { "epoch": 1.1185485291527215, "grad_norm": 2.423933744430542, "learning_rate": 7.898682208016137e-06, "loss": 0.3969, "step": 19050 }, { "epoch": 1.119135693734954, "grad_norm": 5.790529251098633, "learning_rate": 7.89589813084061e-06, "loss": 0.5904, "step": 19060 }, { "epoch": 1.1197228583171863, "grad_norm": 2.7803211212158203, "learning_rate": 7.893112701939898e-06, "loss": 0.3821, "step": 19070 }, { "epoch": 1.1203100228994187, "grad_norm": 5.202017784118652, "learning_rate": 7.890325922614164e-06, "loss": 0.5161, "step": 19080 }, { "epoch": 1.1208971874816511, "grad_norm": 6.325150012969971, "learning_rate": 7.887537794164196e-06, "loss": 0.3918, "step": 19090 }, { "epoch": 1.1214843520638835, "grad_norm": 2.330612897872925, "learning_rate": 7.884748317891422e-06, "loss": 0.4895, "step": 19100 }, { "epoch": 1.122071516646116, "grad_norm": 3.2063653469085693, "learning_rate": 7.881957495097884e-06, "loss": 0.4929, "step": 19110 }, { "epoch": 1.1226586812283483, "grad_norm": 3.81659197807312, "learning_rate": 7.879165327086267e-06, "loss": 0.4916, "step": 19120 }, { "epoch": 1.1232458458105807, "grad_norm": 5.808104991912842, "learning_rate": 7.876371815159874e-06, "loss": 0.575, "step": 19130 }, { "epoch": 1.1238330103928131, "grad_norm": 4.694620132446289, "learning_rate": 7.873576960622643e-06, "loss": 0.3944, "step": 19140 }, { "epoch": 1.1244201749750455, "grad_norm": 6.954095840454102, "learning_rate": 7.87078076477913e-06, "loss": 0.529, "step": 19150 }, { "epoch": 1.125007339557278, "grad_norm": 1.6509569883346558, "learning_rate": 7.867983228934529e-06, "loss": 0.4686, "step": 19160 }, { "epoch": 1.1255945041395103, "grad_norm": 2.6924891471862793, "learning_rate": 7.865184354394647e-06, "loss": 0.2923, "step": 19170 }, { "epoch": 1.1261816687217427, "grad_norm": 4.157542705535889, "learning_rate": 7.86238414246592e-06, "loss": 0.4032, "step": 19180 }, { "epoch": 1.1267688333039751, "grad_norm": 2.932427167892456, "learning_rate": 7.859582594455413e-06, "loss": 0.4522, "step": 19190 }, { "epoch": 1.1273559978862076, "grad_norm": 2.968615770339966, "learning_rate": 7.856779711670809e-06, "loss": 0.6243, "step": 19200 }, { "epoch": 1.12794316246844, "grad_norm": 3.027837038040161, "learning_rate": 7.853975495420419e-06, "loss": 0.4401, "step": 19210 }, { "epoch": 1.1285303270506724, "grad_norm": 1.8486171960830688, "learning_rate": 7.851169947013171e-06, "loss": 0.5665, "step": 19220 }, { "epoch": 1.1291174916329048, "grad_norm": 2.8254354000091553, "learning_rate": 7.848363067758617e-06, "loss": 0.301, "step": 19230 }, { "epoch": 1.1297046562151372, "grad_norm": 1.562422513961792, "learning_rate": 7.845554858966934e-06, "loss": 0.3156, "step": 19240 }, { "epoch": 1.1302918207973696, "grad_norm": 6.199041843414307, "learning_rate": 7.842745321948912e-06, "loss": 0.4638, "step": 19250 }, { "epoch": 1.130878985379602, "grad_norm": 2.2752058506011963, "learning_rate": 7.83993445801597e-06, "loss": 0.5198, "step": 19260 }, { "epoch": 1.1314661499618344, "grad_norm": 7.582372665405273, "learning_rate": 7.837122268480137e-06, "loss": 0.3733, "step": 19270 }, { "epoch": 1.1320533145440668, "grad_norm": 2.1130177974700928, "learning_rate": 7.834308754654068e-06, "loss": 0.5485, "step": 19280 }, { "epoch": 1.1326404791262992, "grad_norm": 5.531780242919922, "learning_rate": 7.831493917851031e-06, "loss": 0.6215, "step": 19290 }, { "epoch": 1.1332276437085316, "grad_norm": 3.911618232727051, "learning_rate": 7.828677759384918e-06, "loss": 0.4342, "step": 19300 }, { "epoch": 1.133814808290764, "grad_norm": 3.658888816833496, "learning_rate": 7.82586028057023e-06, "loss": 0.4651, "step": 19310 }, { "epoch": 1.1344019728729964, "grad_norm": 1.4231964349746704, "learning_rate": 7.823041482722091e-06, "loss": 0.3453, "step": 19320 }, { "epoch": 1.1349891374552288, "grad_norm": 1.8848025798797607, "learning_rate": 7.820221367156235e-06, "loss": 0.492, "step": 19330 }, { "epoch": 1.1355763020374612, "grad_norm": 4.474392414093018, "learning_rate": 7.817399935189019e-06, "loss": 0.5052, "step": 19340 }, { "epoch": 1.1361634666196936, "grad_norm": 3.3395748138427734, "learning_rate": 7.814577188137402e-06, "loss": 0.4673, "step": 19350 }, { "epoch": 1.136750631201926, "grad_norm": 4.016549110412598, "learning_rate": 7.81175312731897e-06, "loss": 0.5096, "step": 19360 }, { "epoch": 1.1373377957841584, "grad_norm": 11.108416557312012, "learning_rate": 7.808927754051918e-06, "loss": 0.4149, "step": 19370 }, { "epoch": 1.1379249603663908, "grad_norm": 4.124271392822266, "learning_rate": 7.806101069655045e-06, "loss": 0.3977, "step": 19380 }, { "epoch": 1.1385121249486232, "grad_norm": 2.387871742248535, "learning_rate": 7.803273075447774e-06, "loss": 0.4252, "step": 19390 }, { "epoch": 1.1390992895308556, "grad_norm": 2.1569745540618896, "learning_rate": 7.800443772750135e-06, "loss": 0.3891, "step": 19400 }, { "epoch": 1.1396864541130878, "grad_norm": 5.589890956878662, "learning_rate": 7.797613162882767e-06, "loss": 0.5249, "step": 19410 }, { "epoch": 1.1402736186953204, "grad_norm": 4.419345855712891, "learning_rate": 7.79478124716692e-06, "loss": 0.4174, "step": 19420 }, { "epoch": 1.1408607832775526, "grad_norm": 2.2455029487609863, "learning_rate": 7.791948026924452e-06, "loss": 0.413, "step": 19430 }, { "epoch": 1.1414479478597852, "grad_norm": 2.3991167545318604, "learning_rate": 7.789113503477836e-06, "loss": 0.4296, "step": 19440 }, { "epoch": 1.1420351124420174, "grad_norm": 7.033661365509033, "learning_rate": 7.786277678150148e-06, "loss": 0.3613, "step": 19450 }, { "epoch": 1.14262227702425, "grad_norm": 1.7344022989273071, "learning_rate": 7.783440552265073e-06, "loss": 0.3248, "step": 19460 }, { "epoch": 1.1432094416064822, "grad_norm": 13.650116920471191, "learning_rate": 7.780602127146901e-06, "loss": 0.5303, "step": 19470 }, { "epoch": 1.1437966061887148, "grad_norm": 4.083112716674805, "learning_rate": 7.777762404120532e-06, "loss": 0.3983, "step": 19480 }, { "epoch": 1.144383770770947, "grad_norm": 8.5044527053833, "learning_rate": 7.774921384511469e-06, "loss": 0.3928, "step": 19490 }, { "epoch": 1.1449709353531796, "grad_norm": 3.2973856925964355, "learning_rate": 7.772079069645824e-06, "loss": 0.2639, "step": 19500 }, { "epoch": 1.1455580999354118, "grad_norm": 5.726169109344482, "learning_rate": 7.769235460850305e-06, "loss": 0.452, "step": 19510 }, { "epoch": 1.1461452645176442, "grad_norm": 3.510913133621216, "learning_rate": 7.76639055945224e-06, "loss": 0.3904, "step": 19520 }, { "epoch": 1.1467324290998766, "grad_norm": 1.4113248586654663, "learning_rate": 7.76354436677954e-06, "loss": 0.282, "step": 19530 }, { "epoch": 1.147319593682109, "grad_norm": 2.9317171573638916, "learning_rate": 7.760696884160735e-06, "loss": 0.3446, "step": 19540 }, { "epoch": 1.1479067582643414, "grad_norm": 6.06979513168335, "learning_rate": 7.757848112924949e-06, "loss": 0.3852, "step": 19550 }, { "epoch": 1.1484939228465738, "grad_norm": 5.087854862213135, "learning_rate": 7.754998054401911e-06, "loss": 0.4, "step": 19560 }, { "epoch": 1.1490810874288062, "grad_norm": 3.8843464851379395, "learning_rate": 7.75214670992195e-06, "loss": 0.496, "step": 19570 }, { "epoch": 1.1496682520110386, "grad_norm": 4.904289722442627, "learning_rate": 7.749294080815992e-06, "loss": 0.5888, "step": 19580 }, { "epoch": 1.150255416593271, "grad_norm": 3.572401523590088, "learning_rate": 7.746440168415568e-06, "loss": 0.5113, "step": 19590 }, { "epoch": 1.1508425811755034, "grad_norm": 3.6861648559570312, "learning_rate": 7.743584974052803e-06, "loss": 0.4441, "step": 19600 }, { "epoch": 1.1514297457577358, "grad_norm": 6.854784965515137, "learning_rate": 7.740728499060425e-06, "loss": 0.451, "step": 19610 }, { "epoch": 1.1520169103399682, "grad_norm": 3.8236000537872314, "learning_rate": 7.737870744771758e-06, "loss": 0.3576, "step": 19620 }, { "epoch": 1.1526040749222006, "grad_norm": 5.249654769897461, "learning_rate": 7.73501171252072e-06, "loss": 0.4319, "step": 19630 }, { "epoch": 1.153191239504433, "grad_norm": 7.186614036560059, "learning_rate": 7.73215140364183e-06, "loss": 0.2959, "step": 19640 }, { "epoch": 1.1537784040866654, "grad_norm": 5.175053596496582, "learning_rate": 7.729289819470201e-06, "loss": 0.4773, "step": 19650 }, { "epoch": 1.1543655686688978, "grad_norm": 4.206901550292969, "learning_rate": 7.726426961341542e-06, "loss": 0.5582, "step": 19660 }, { "epoch": 1.1549527332511302, "grad_norm": 1.5021294355392456, "learning_rate": 7.723562830592152e-06, "loss": 0.4147, "step": 19670 }, { "epoch": 1.1555398978333626, "grad_norm": 3.149374485015869, "learning_rate": 7.720697428558934e-06, "loss": 0.4075, "step": 19680 }, { "epoch": 1.156127062415595, "grad_norm": 2.7600271701812744, "learning_rate": 7.717830756579376e-06, "loss": 0.4648, "step": 19690 }, { "epoch": 1.1567142269978274, "grad_norm": 3.1101717948913574, "learning_rate": 7.71496281599156e-06, "loss": 0.4591, "step": 19700 }, { "epoch": 1.1573013915800598, "grad_norm": 1.6709630489349365, "learning_rate": 7.712093608134163e-06, "loss": 0.4576, "step": 19710 }, { "epoch": 1.1578885561622922, "grad_norm": 2.0987296104431152, "learning_rate": 7.70922313434645e-06, "loss": 0.4893, "step": 19720 }, { "epoch": 1.1584757207445247, "grad_norm": 3.381917953491211, "learning_rate": 7.70635139596828e-06, "loss": 0.4033, "step": 19730 }, { "epoch": 1.159062885326757, "grad_norm": 1.360689640045166, "learning_rate": 7.703478394340102e-06, "loss": 0.4718, "step": 19740 }, { "epoch": 1.1596500499089895, "grad_norm": 3.049525260925293, "learning_rate": 7.70060413080295e-06, "loss": 0.5074, "step": 19750 }, { "epoch": 1.1602372144912219, "grad_norm": 3.838028907775879, "learning_rate": 7.697728606698457e-06, "loss": 0.5591, "step": 19760 }, { "epoch": 1.1608243790734543, "grad_norm": 5.939558982849121, "learning_rate": 7.694851823368835e-06, "loss": 0.4444, "step": 19770 }, { "epoch": 1.1614115436556867, "grad_norm": 3.0025410652160645, "learning_rate": 7.691973782156886e-06, "loss": 0.4401, "step": 19780 }, { "epoch": 1.161998708237919, "grad_norm": 9.406353950500488, "learning_rate": 7.689094484405999e-06, "loss": 0.5487, "step": 19790 }, { "epoch": 1.1625858728201515, "grad_norm": 8.785649299621582, "learning_rate": 7.686213931460153e-06, "loss": 0.4004, "step": 19800 }, { "epoch": 1.1631730374023839, "grad_norm": 1.6222039461135864, "learning_rate": 7.68333212466391e-06, "loss": 0.4242, "step": 19810 }, { "epoch": 1.1637602019846163, "grad_norm": 1.6839478015899658, "learning_rate": 7.680449065362416e-06, "loss": 0.5644, "step": 19820 }, { "epoch": 1.1643473665668487, "grad_norm": 28.521886825561523, "learning_rate": 7.677564754901408e-06, "loss": 0.5425, "step": 19830 }, { "epoch": 1.164934531149081, "grad_norm": 3.699608325958252, "learning_rate": 7.674679194627198e-06, "loss": 0.4219, "step": 19840 }, { "epoch": 1.1655216957313135, "grad_norm": 3.4487273693084717, "learning_rate": 7.671792385886686e-06, "loss": 0.4838, "step": 19850 }, { "epoch": 1.1661088603135459, "grad_norm": 2.2711241245269775, "learning_rate": 7.668904330027356e-06, "loss": 0.4608, "step": 19860 }, { "epoch": 1.1666960248957783, "grad_norm": 4.066902160644531, "learning_rate": 7.666015028397273e-06, "loss": 0.5684, "step": 19870 }, { "epoch": 1.1672831894780107, "grad_norm": 1.4403035640716553, "learning_rate": 7.66312448234508e-06, "loss": 0.3384, "step": 19880 }, { "epoch": 1.167870354060243, "grad_norm": 3.9523863792419434, "learning_rate": 7.660232693220008e-06, "loss": 0.5032, "step": 19890 }, { "epoch": 1.1684575186424755, "grad_norm": 3.6431384086608887, "learning_rate": 7.657339662371864e-06, "loss": 0.4069, "step": 19900 }, { "epoch": 1.169044683224708, "grad_norm": 4.01939582824707, "learning_rate": 7.65444539115103e-06, "loss": 0.4198, "step": 19910 }, { "epoch": 1.1696318478069403, "grad_norm": 5.7366108894348145, "learning_rate": 7.651549880908478e-06, "loss": 0.4216, "step": 19920 }, { "epoch": 1.1702190123891727, "grad_norm": 2.9458186626434326, "learning_rate": 7.648653132995746e-06, "loss": 0.4007, "step": 19930 }, { "epoch": 1.170806176971405, "grad_norm": 3.902373790740967, "learning_rate": 7.645755148764963e-06, "loss": 0.5752, "step": 19940 }, { "epoch": 1.1713933415536375, "grad_norm": 11.462562561035156, "learning_rate": 7.642855929568821e-06, "loss": 0.4924, "step": 19950 }, { "epoch": 1.17198050613587, "grad_norm": 4.958068370819092, "learning_rate": 7.6399554767606e-06, "loss": 0.4629, "step": 19960 }, { "epoch": 1.1725676707181023, "grad_norm": 14.780145645141602, "learning_rate": 7.637053791694148e-06, "loss": 0.3515, "step": 19970 }, { "epoch": 1.1731548353003347, "grad_norm": 2.7935218811035156, "learning_rate": 7.634150875723893e-06, "loss": 0.5145, "step": 19980 }, { "epoch": 1.173741999882567, "grad_norm": 5.48145866394043, "learning_rate": 7.631246730204837e-06, "loss": 0.4366, "step": 19990 }, { "epoch": 1.1743291644647995, "grad_norm": 2.2359609603881836, "learning_rate": 7.628341356492552e-06, "loss": 0.4062, "step": 20000 }, { "epoch": 1.174916329047032, "grad_norm": 2.5128567218780518, "learning_rate": 7.6254347559431905e-06, "loss": 0.4535, "step": 20010 }, { "epoch": 1.1755034936292643, "grad_norm": 8.435670852661133, "learning_rate": 7.622526929913468e-06, "loss": 0.3595, "step": 20020 }, { "epoch": 1.1760906582114967, "grad_norm": 4.540210247039795, "learning_rate": 7.619617879760681e-06, "loss": 0.4179, "step": 20030 }, { "epoch": 1.1766778227937291, "grad_norm": 16.15162467956543, "learning_rate": 7.616707606842693e-06, "loss": 0.3632, "step": 20040 }, { "epoch": 1.1772649873759615, "grad_norm": 2.795699119567871, "learning_rate": 7.613796112517937e-06, "loss": 0.4005, "step": 20050 }, { "epoch": 1.177852151958194, "grad_norm": 4.654391288757324, "learning_rate": 7.61088339814542e-06, "loss": 0.3792, "step": 20060 }, { "epoch": 1.1784393165404263, "grad_norm": 2.7118773460388184, "learning_rate": 7.6079694650847155e-06, "loss": 0.5488, "step": 20070 }, { "epoch": 1.1790264811226587, "grad_norm": 4.732720375061035, "learning_rate": 7.6050543146959675e-06, "loss": 0.5427, "step": 20080 }, { "epoch": 1.1796136457048911, "grad_norm": 2.606759786605835, "learning_rate": 7.602137948339889e-06, "loss": 0.5185, "step": 20090 }, { "epoch": 1.1802008102871235, "grad_norm": 8.539316177368164, "learning_rate": 7.599220367377758e-06, "loss": 0.4733, "step": 20100 }, { "epoch": 1.180787974869356, "grad_norm": 2.0223758220672607, "learning_rate": 7.596301573171418e-06, "loss": 0.4022, "step": 20110 }, { "epoch": 1.1813751394515883, "grad_norm": 6.539281845092773, "learning_rate": 7.593381567083284e-06, "loss": 0.4375, "step": 20120 }, { "epoch": 1.1819623040338207, "grad_norm": 5.656796932220459, "learning_rate": 7.5904603504763366e-06, "loss": 0.4814, "step": 20130 }, { "epoch": 1.1825494686160531, "grad_norm": 7.315996170043945, "learning_rate": 7.587537924714114e-06, "loss": 0.5085, "step": 20140 }, { "epoch": 1.1831366331982855, "grad_norm": 7.85996150970459, "learning_rate": 7.584614291160727e-06, "loss": 0.4661, "step": 20150 }, { "epoch": 1.183723797780518, "grad_norm": 3.885576009750366, "learning_rate": 7.581689451180845e-06, "loss": 0.508, "step": 20160 }, { "epoch": 1.1843109623627504, "grad_norm": 5.587615966796875, "learning_rate": 7.578763406139704e-06, "loss": 0.3945, "step": 20170 }, { "epoch": 1.1848981269449828, "grad_norm": 1.949946641921997, "learning_rate": 7.575836157403101e-06, "loss": 0.4561, "step": 20180 }, { "epoch": 1.1854852915272152, "grad_norm": 7.2564311027526855, "learning_rate": 7.572907706337392e-06, "loss": 0.4774, "step": 20190 }, { "epoch": 1.1860724561094476, "grad_norm": 3.0287489891052246, "learning_rate": 7.569978054309499e-06, "loss": 0.4736, "step": 20200 }, { "epoch": 1.18665962069168, "grad_norm": 8.77490234375, "learning_rate": 7.567047202686904e-06, "loss": 0.4212, "step": 20210 }, { "epoch": 1.1872467852739124, "grad_norm": 5.015997409820557, "learning_rate": 7.564115152837648e-06, "loss": 0.3838, "step": 20220 }, { "epoch": 1.1878339498561448, "grad_norm": 1.933013916015625, "learning_rate": 7.561181906130326e-06, "loss": 0.3907, "step": 20230 }, { "epoch": 1.1884211144383772, "grad_norm": 11.588569641113281, "learning_rate": 7.558247463934101e-06, "loss": 0.447, "step": 20240 }, { "epoch": 1.1890082790206096, "grad_norm": 2.7352499961853027, "learning_rate": 7.5553118276186865e-06, "loss": 0.4066, "step": 20250 }, { "epoch": 1.189595443602842, "grad_norm": 1.5748792886734009, "learning_rate": 7.552374998554359e-06, "loss": 0.3598, "step": 20260 }, { "epoch": 1.1901826081850744, "grad_norm": 8.205339431762695, "learning_rate": 7.549436978111948e-06, "loss": 0.5317, "step": 20270 }, { "epoch": 1.1907697727673066, "grad_norm": 6.139186859130859, "learning_rate": 7.546497767662839e-06, "loss": 0.4949, "step": 20280 }, { "epoch": 1.1913569373495392, "grad_norm": 9.259969711303711, "learning_rate": 7.543557368578975e-06, "loss": 0.5289, "step": 20290 }, { "epoch": 1.1919441019317714, "grad_norm": 5.457942008972168, "learning_rate": 7.540615782232854e-06, "loss": 0.5894, "step": 20300 }, { "epoch": 1.192531266514004, "grad_norm": 2.3968796730041504, "learning_rate": 7.537673009997528e-06, "loss": 0.4531, "step": 20310 }, { "epoch": 1.1931184310962362, "grad_norm": 3.611821174621582, "learning_rate": 7.534729053246598e-06, "loss": 0.4, "step": 20320 }, { "epoch": 1.1937055956784688, "grad_norm": 5.632368087768555, "learning_rate": 7.531783913354225e-06, "loss": 0.4942, "step": 20330 }, { "epoch": 1.194292760260701, "grad_norm": 3.7811543941497803, "learning_rate": 7.528837591695117e-06, "loss": 0.4627, "step": 20340 }, { "epoch": 1.1948799248429336, "grad_norm": 2.025299310684204, "learning_rate": 7.525890089644536e-06, "loss": 0.3449, "step": 20350 }, { "epoch": 1.1954670894251658, "grad_norm": 4.617823600769043, "learning_rate": 7.522941408578295e-06, "loss": 0.4663, "step": 20360 }, { "epoch": 1.1960542540073984, "grad_norm": 3.1408767700195312, "learning_rate": 7.519991549872756e-06, "loss": 0.4381, "step": 20370 }, { "epoch": 1.1966414185896306, "grad_norm": 2.9922800064086914, "learning_rate": 7.517040514904834e-06, "loss": 0.4433, "step": 20380 }, { "epoch": 1.197228583171863, "grad_norm": 2.028430461883545, "learning_rate": 7.514088305051987e-06, "loss": 0.4125, "step": 20390 }, { "epoch": 1.1978157477540954, "grad_norm": 9.817422866821289, "learning_rate": 7.511134921692225e-06, "loss": 0.5858, "step": 20400 }, { "epoch": 1.1984029123363278, "grad_norm": 3.3120579719543457, "learning_rate": 7.508180366204108e-06, "loss": 0.5572, "step": 20410 }, { "epoch": 1.1989900769185602, "grad_norm": 2.737548589706421, "learning_rate": 7.5052246399667414e-06, "loss": 0.4637, "step": 20420 }, { "epoch": 1.1995772415007926, "grad_norm": 4.963471412658691, "learning_rate": 7.502267744359775e-06, "loss": 0.4263, "step": 20430 }, { "epoch": 1.200164406083025, "grad_norm": 4.728243827819824, "learning_rate": 7.499309680763404e-06, "loss": 0.4476, "step": 20440 }, { "epoch": 1.2007515706652574, "grad_norm": 7.542288303375244, "learning_rate": 7.496350450558373e-06, "loss": 0.4266, "step": 20450 }, { "epoch": 1.2013387352474898, "grad_norm": 4.628617286682129, "learning_rate": 7.493390055125968e-06, "loss": 0.4311, "step": 20460 }, { "epoch": 1.2019258998297222, "grad_norm": 8.213972091674805, "learning_rate": 7.490428495848022e-06, "loss": 0.4146, "step": 20470 }, { "epoch": 1.2025130644119546, "grad_norm": 1.4059685468673706, "learning_rate": 7.487465774106905e-06, "loss": 0.4769, "step": 20480 }, { "epoch": 1.203100228994187, "grad_norm": 2.6306254863739014, "learning_rate": 7.4845018912855385e-06, "loss": 0.56, "step": 20490 }, { "epoch": 1.2036873935764194, "grad_norm": 3.1189873218536377, "learning_rate": 7.481536848767376e-06, "loss": 0.4042, "step": 20500 }, { "epoch": 1.2042745581586518, "grad_norm": 3.9629297256469727, "learning_rate": 7.478570647936422e-06, "loss": 0.386, "step": 20510 }, { "epoch": 1.2048617227408842, "grad_norm": 11.445968627929688, "learning_rate": 7.475603290177213e-06, "loss": 0.4043, "step": 20520 }, { "epoch": 1.2054488873231166, "grad_norm": 8.197541236877441, "learning_rate": 7.472634776874832e-06, "loss": 0.5266, "step": 20530 }, { "epoch": 1.206036051905349, "grad_norm": 6.970523834228516, "learning_rate": 7.469665109414899e-06, "loss": 0.4096, "step": 20540 }, { "epoch": 1.2066232164875814, "grad_norm": 1.7146824598312378, "learning_rate": 7.466694289183571e-06, "loss": 0.4448, "step": 20550 }, { "epoch": 1.2072103810698138, "grad_norm": 1.842336893081665, "learning_rate": 7.463722317567546e-06, "loss": 0.4198, "step": 20560 }, { "epoch": 1.2077975456520462, "grad_norm": 3.7994630336761475, "learning_rate": 7.460749195954057e-06, "loss": 0.4694, "step": 20570 }, { "epoch": 1.2083847102342786, "grad_norm": 3.6569888591766357, "learning_rate": 7.457774925730876e-06, "loss": 0.4432, "step": 20580 }, { "epoch": 1.208971874816511, "grad_norm": 1.5122698545455933, "learning_rate": 7.454799508286311e-06, "loss": 0.4822, "step": 20590 }, { "epoch": 1.2095590393987434, "grad_norm": 2.012589931488037, "learning_rate": 7.451822945009201e-06, "loss": 0.3641, "step": 20600 }, { "epoch": 1.2101462039809758, "grad_norm": 3.6029229164123535, "learning_rate": 7.448845237288931e-06, "loss": 0.3351, "step": 20610 }, { "epoch": 1.2107333685632082, "grad_norm": 3.254457473754883, "learning_rate": 7.4458663865154034e-06, "loss": 0.3692, "step": 20620 }, { "epoch": 1.2113205331454406, "grad_norm": 2.6332757472991943, "learning_rate": 7.44288639407907e-06, "loss": 0.4644, "step": 20630 }, { "epoch": 1.211907697727673, "grad_norm": 6.831838607788086, "learning_rate": 7.439905261370905e-06, "loss": 0.4772, "step": 20640 }, { "epoch": 1.2124948623099054, "grad_norm": 3.265374183654785, "learning_rate": 7.4369229897824214e-06, "loss": 0.4265, "step": 20650 }, { "epoch": 1.2130820268921378, "grad_norm": 2.0043153762817383, "learning_rate": 7.433939580705659e-06, "loss": 0.3598, "step": 20660 }, { "epoch": 1.2136691914743702, "grad_norm": 3.3408727645874023, "learning_rate": 7.430955035533191e-06, "loss": 0.4383, "step": 20670 }, { "epoch": 1.2142563560566026, "grad_norm": 4.659994602203369, "learning_rate": 7.427969355658122e-06, "loss": 0.4525, "step": 20680 }, { "epoch": 1.214843520638835, "grad_norm": 4.739403247833252, "learning_rate": 7.424982542474084e-06, "loss": 0.5896, "step": 20690 }, { "epoch": 1.2154306852210675, "grad_norm": 5.105829238891602, "learning_rate": 7.421994597375241e-06, "loss": 0.5745, "step": 20700 }, { "epoch": 1.2160178498032999, "grad_norm": 4.223515033721924, "learning_rate": 7.419005521756278e-06, "loss": 0.4223, "step": 20710 }, { "epoch": 1.2166050143855323, "grad_norm": 3.750030755996704, "learning_rate": 7.416015317012417e-06, "loss": 0.4112, "step": 20720 }, { "epoch": 1.2171921789677647, "grad_norm": 4.6278395652771, "learning_rate": 7.413023984539401e-06, "loss": 0.6009, "step": 20730 }, { "epoch": 1.217779343549997, "grad_norm": 2.1175029277801514, "learning_rate": 7.4100315257335035e-06, "loss": 0.4102, "step": 20740 }, { "epoch": 1.2183665081322295, "grad_norm": 9.34257698059082, "learning_rate": 7.4070379419915205e-06, "loss": 0.4601, "step": 20750 }, { "epoch": 1.2189536727144619, "grad_norm": 4.943175315856934, "learning_rate": 7.404043234710771e-06, "loss": 0.4817, "step": 20760 }, { "epoch": 1.2195408372966943, "grad_norm": 4.970344543457031, "learning_rate": 7.401047405289107e-06, "loss": 0.5351, "step": 20770 }, { "epoch": 1.2201280018789267, "grad_norm": 4.4442219734191895, "learning_rate": 7.398050455124896e-06, "loss": 0.5913, "step": 20780 }, { "epoch": 1.220715166461159, "grad_norm": 5.970925331115723, "learning_rate": 7.395052385617031e-06, "loss": 0.5211, "step": 20790 }, { "epoch": 1.2213023310433915, "grad_norm": 7.705620765686035, "learning_rate": 7.392053198164927e-06, "loss": 0.4933, "step": 20800 }, { "epoch": 1.2218894956256239, "grad_norm": 11.228775978088379, "learning_rate": 7.389052894168525e-06, "loss": 0.6178, "step": 20810 }, { "epoch": 1.2224766602078563, "grad_norm": 2.3136754035949707, "learning_rate": 7.386051475028282e-06, "loss": 0.5296, "step": 20820 }, { "epoch": 1.2230638247900887, "grad_norm": 2.0388431549072266, "learning_rate": 7.383048942145179e-06, "loss": 0.5247, "step": 20830 }, { "epoch": 1.223650989372321, "grad_norm": 7.0328474044799805, "learning_rate": 7.380045296920713e-06, "loss": 0.4563, "step": 20840 }, { "epoch": 1.2242381539545535, "grad_norm": 5.2282819747924805, "learning_rate": 7.3770405407569035e-06, "loss": 0.3915, "step": 20850 }, { "epoch": 1.224825318536786, "grad_norm": 8.917223930358887, "learning_rate": 7.3740346750562884e-06, "loss": 0.5034, "step": 20860 }, { "epoch": 1.2254124831190183, "grad_norm": 3.822263717651367, "learning_rate": 7.371027701221921e-06, "loss": 0.4351, "step": 20870 }, { "epoch": 1.2259996477012507, "grad_norm": 4.482334136962891, "learning_rate": 7.368019620657376e-06, "loss": 0.3545, "step": 20880 }, { "epoch": 1.226586812283483, "grad_norm": 4.496711730957031, "learning_rate": 7.3650104347667395e-06, "loss": 0.5292, "step": 20890 }, { "epoch": 1.2271739768657155, "grad_norm": 3.9080774784088135, "learning_rate": 7.3620001449546185e-06, "loss": 0.4227, "step": 20900 }, { "epoch": 1.227761141447948, "grad_norm": 5.899505615234375, "learning_rate": 7.358988752626133e-06, "loss": 0.3822, "step": 20910 }, { "epoch": 1.2283483060301803, "grad_norm": 4.089540958404541, "learning_rate": 7.3559762591869155e-06, "loss": 0.3578, "step": 20920 }, { "epoch": 1.2289354706124127, "grad_norm": 10.150947570800781, "learning_rate": 7.352962666043118e-06, "loss": 0.3523, "step": 20930 }, { "epoch": 1.229522635194645, "grad_norm": 8.390397071838379, "learning_rate": 7.349947974601399e-06, "loss": 0.3663, "step": 20940 }, { "epoch": 1.2301097997768775, "grad_norm": 6.979901313781738, "learning_rate": 7.346932186268935e-06, "loss": 0.5476, "step": 20950 }, { "epoch": 1.23069696435911, "grad_norm": 5.822178840637207, "learning_rate": 7.343915302453412e-06, "loss": 0.3782, "step": 20960 }, { "epoch": 1.2312841289413423, "grad_norm": 15.448721885681152, "learning_rate": 7.340897324563027e-06, "loss": 0.4824, "step": 20970 }, { "epoch": 1.2318712935235747, "grad_norm": 3.4693784713745117, "learning_rate": 7.337878254006493e-06, "loss": 0.487, "step": 20980 }, { "epoch": 1.2324584581058071, "grad_norm": 4.680280685424805, "learning_rate": 7.3348580921930234e-06, "loss": 0.5197, "step": 20990 }, { "epoch": 1.2330456226880395, "grad_norm": 4.490157127380371, "learning_rate": 7.331836840532349e-06, "loss": 0.4711, "step": 21000 }, { "epoch": 1.233632787270272, "grad_norm": 4.983327865600586, "learning_rate": 7.3288145004347055e-06, "loss": 0.4438, "step": 21010 }, { "epoch": 1.2342199518525043, "grad_norm": 4.770772933959961, "learning_rate": 7.325791073310839e-06, "loss": 0.433, "step": 21020 }, { "epoch": 1.2348071164347367, "grad_norm": 3.6512975692749023, "learning_rate": 7.322766560572e-06, "loss": 0.4866, "step": 21030 }, { "epoch": 1.2353942810169691, "grad_norm": 1.524401068687439, "learning_rate": 7.319740963629949e-06, "loss": 0.5049, "step": 21040 }, { "epoch": 1.2359814455992015, "grad_norm": 3.8645524978637695, "learning_rate": 7.316714283896949e-06, "loss": 0.3769, "step": 21050 }, { "epoch": 1.236568610181434, "grad_norm": 6.365732669830322, "learning_rate": 7.313686522785772e-06, "loss": 0.369, "step": 21060 }, { "epoch": 1.2371557747636663, "grad_norm": 3.3889033794403076, "learning_rate": 7.3106576817096945e-06, "loss": 0.456, "step": 21070 }, { "epoch": 1.2377429393458987, "grad_norm": 5.267975330352783, "learning_rate": 7.307627762082491e-06, "loss": 0.4913, "step": 21080 }, { "epoch": 1.2383301039281311, "grad_norm": 6.537972450256348, "learning_rate": 7.30459676531845e-06, "loss": 0.5468, "step": 21090 }, { "epoch": 1.2389172685103635, "grad_norm": 9.53585433959961, "learning_rate": 7.301564692832352e-06, "loss": 0.463, "step": 21100 }, { "epoch": 1.239504433092596, "grad_norm": 2.363009452819824, "learning_rate": 7.298531546039489e-06, "loss": 0.4924, "step": 21110 }, { "epoch": 1.2400915976748283, "grad_norm": 6.391613006591797, "learning_rate": 7.295497326355647e-06, "loss": 0.4254, "step": 21120 }, { "epoch": 1.2406787622570605, "grad_norm": 4.20644474029541, "learning_rate": 7.292462035197118e-06, "loss": 0.521, "step": 21130 }, { "epoch": 1.2412659268392932, "grad_norm": 2.292397975921631, "learning_rate": 7.28942567398069e-06, "loss": 0.2933, "step": 21140 }, { "epoch": 1.2418530914215253, "grad_norm": 8.065433502197266, "learning_rate": 7.286388244123654e-06, "loss": 0.53, "step": 21150 }, { "epoch": 1.242440256003758, "grad_norm": 3.6841070652008057, "learning_rate": 7.283349747043796e-06, "loss": 0.3993, "step": 21160 }, { "epoch": 1.2430274205859901, "grad_norm": 4.399044036865234, "learning_rate": 7.280310184159405e-06, "loss": 0.5856, "step": 21170 }, { "epoch": 1.2436145851682228, "grad_norm": 2.6274852752685547, "learning_rate": 7.277269556889265e-06, "loss": 0.4672, "step": 21180 }, { "epoch": 1.244201749750455, "grad_norm": 2.9401051998138428, "learning_rate": 7.274227866652655e-06, "loss": 0.3867, "step": 21190 }, { "epoch": 1.2447889143326876, "grad_norm": 19.513383865356445, "learning_rate": 7.271185114869351e-06, "loss": 0.5933, "step": 21200 }, { "epoch": 1.2453760789149197, "grad_norm": 2.4839181900024414, "learning_rate": 7.268141302959628e-06, "loss": 0.4644, "step": 21210 }, { "epoch": 1.2459632434971524, "grad_norm": 3.7565054893493652, "learning_rate": 7.265096432344252e-06, "loss": 0.4602, "step": 21220 }, { "epoch": 1.2465504080793846, "grad_norm": 6.173889636993408, "learning_rate": 7.262050504444487e-06, "loss": 0.483, "step": 21230 }, { "epoch": 1.247137572661617, "grad_norm": 2.729943037033081, "learning_rate": 7.259003520682082e-06, "loss": 0.5828, "step": 21240 }, { "epoch": 1.2477247372438494, "grad_norm": 7.581305503845215, "learning_rate": 7.255955482479291e-06, "loss": 0.5036, "step": 21250 }, { "epoch": 1.2483119018260818, "grad_norm": 5.225300312042236, "learning_rate": 7.25290639125885e-06, "loss": 0.3819, "step": 21260 }, { "epoch": 1.2488990664083142, "grad_norm": 4.5110087394714355, "learning_rate": 7.249856248443993e-06, "loss": 0.5546, "step": 21270 }, { "epoch": 1.2494862309905466, "grad_norm": 5.4625420570373535, "learning_rate": 7.2468050554584405e-06, "loss": 0.4328, "step": 21280 }, { "epoch": 1.250073395572779, "grad_norm": 1.6002528667449951, "learning_rate": 7.243752813726404e-06, "loss": 0.5149, "step": 21290 }, { "epoch": 1.2506605601550114, "grad_norm": 3.5929386615753174, "learning_rate": 7.240699524672591e-06, "loss": 0.3767, "step": 21300 }, { "epoch": 1.2512477247372438, "grad_norm": 1.3438873291015625, "learning_rate": 7.2376451897221865e-06, "loss": 0.4996, "step": 21310 }, { "epoch": 1.2518348893194762, "grad_norm": 4.885663032531738, "learning_rate": 7.2345898103008714e-06, "loss": 0.4653, "step": 21320 }, { "epoch": 1.2524220539017086, "grad_norm": 3.838775396347046, "learning_rate": 7.231533387834813e-06, "loss": 0.4296, "step": 21330 }, { "epoch": 1.253009218483941, "grad_norm": 2.0841145515441895, "learning_rate": 7.2284759237506655e-06, "loss": 0.4436, "step": 21340 }, { "epoch": 1.2535963830661734, "grad_norm": 3.6925268173217773, "learning_rate": 7.225417419475567e-06, "loss": 0.4456, "step": 21350 }, { "epoch": 1.2541835476484058, "grad_norm": 6.373058319091797, "learning_rate": 7.2223578764371424e-06, "loss": 0.3956, "step": 21360 }, { "epoch": 1.2547707122306382, "grad_norm": 13.595552444458008, "learning_rate": 7.219297296063505e-06, "loss": 0.4051, "step": 21370 }, { "epoch": 1.2553578768128706, "grad_norm": 2.29191255569458, "learning_rate": 7.216235679783245e-06, "loss": 0.4379, "step": 21380 }, { "epoch": 1.255945041395103, "grad_norm": 3.7062742710113525, "learning_rate": 7.213173029025444e-06, "loss": 0.3242, "step": 21390 }, { "epoch": 1.2565322059773354, "grad_norm": 5.137221813201904, "learning_rate": 7.210109345219658e-06, "loss": 0.5322, "step": 21400 }, { "epoch": 1.2571193705595678, "grad_norm": 1.4710677862167358, "learning_rate": 7.2070446297959355e-06, "loss": 0.4812, "step": 21410 }, { "epoch": 1.2577065351418002, "grad_norm": 3.7641184329986572, "learning_rate": 7.203978884184797e-06, "loss": 0.5232, "step": 21420 }, { "epoch": 1.2582936997240326, "grad_norm": 1.6902546882629395, "learning_rate": 7.2009121098172485e-06, "loss": 0.4198, "step": 21430 }, { "epoch": 1.258880864306265, "grad_norm": 4.143491744995117, "learning_rate": 7.1978443081247766e-06, "loss": 0.6148, "step": 21440 }, { "epoch": 1.2594680288884974, "grad_norm": 3.891427516937256, "learning_rate": 7.194775480539344e-06, "loss": 0.4359, "step": 21450 }, { "epoch": 1.2600551934707298, "grad_norm": 1.8325468301773071, "learning_rate": 7.1917056284934e-06, "loss": 0.3705, "step": 21460 }, { "epoch": 1.2606423580529622, "grad_norm": 12.759049415588379, "learning_rate": 7.188634753419858e-06, "loss": 0.5222, "step": 21470 }, { "epoch": 1.2612295226351946, "grad_norm": 1.989132046699524, "learning_rate": 7.1855628567521265e-06, "loss": 0.4498, "step": 21480 }, { "epoch": 1.261816687217427, "grad_norm": 2.7918331623077393, "learning_rate": 7.182489939924075e-06, "loss": 0.5204, "step": 21490 }, { "epoch": 1.2624038517996594, "grad_norm": 6.007071495056152, "learning_rate": 7.179416004370061e-06, "loss": 0.527, "step": 21500 }, { "epoch": 1.2629910163818918, "grad_norm": 3.990840196609497, "learning_rate": 7.176341051524911e-06, "loss": 0.5298, "step": 21510 }, { "epoch": 1.2635781809641242, "grad_norm": 2.7299389839172363, "learning_rate": 7.173265082823925e-06, "loss": 0.4449, "step": 21520 }, { "epoch": 1.2641653455463566, "grad_norm": 6.109206676483154, "learning_rate": 7.1701880997028846e-06, "loss": 0.5422, "step": 21530 }, { "epoch": 1.264752510128589, "grad_norm": 9.076996803283691, "learning_rate": 7.167110103598039e-06, "loss": 0.4752, "step": 21540 }, { "epoch": 1.2653396747108214, "grad_norm": 3.326815605163574, "learning_rate": 7.164031095946111e-06, "loss": 0.4727, "step": 21550 }, { "epoch": 1.2659268392930538, "grad_norm": 3.802741050720215, "learning_rate": 7.160951078184295e-06, "loss": 0.4488, "step": 21560 }, { "epoch": 1.2665140038752862, "grad_norm": 3.2369887828826904, "learning_rate": 7.1578700517502615e-06, "loss": 0.3675, "step": 21570 }, { "epoch": 1.2671011684575186, "grad_norm": 3.468644142150879, "learning_rate": 7.154788018082146e-06, "loss": 0.468, "step": 21580 }, { "epoch": 1.267688333039751, "grad_norm": 1.9000482559204102, "learning_rate": 7.151704978618557e-06, "loss": 0.4294, "step": 21590 }, { "epoch": 1.2682754976219834, "grad_norm": 1.1503009796142578, "learning_rate": 7.148620934798574e-06, "loss": 0.474, "step": 21600 }, { "epoch": 1.2688626622042158, "grad_norm": 5.4391984939575195, "learning_rate": 7.145535888061739e-06, "loss": 0.4036, "step": 21610 }, { "epoch": 1.2694498267864482, "grad_norm": 15.136218070983887, "learning_rate": 7.142449839848071e-06, "loss": 0.4462, "step": 21620 }, { "epoch": 1.2700369913686806, "grad_norm": 5.77573299407959, "learning_rate": 7.13936279159805e-06, "loss": 0.3847, "step": 21630 }, { "epoch": 1.270624155950913, "grad_norm": 5.810324192047119, "learning_rate": 7.136274744752625e-06, "loss": 0.4689, "step": 21640 }, { "epoch": 1.2712113205331455, "grad_norm": 1.760392665863037, "learning_rate": 7.133185700753211e-06, "loss": 0.4228, "step": 21650 }, { "epoch": 1.2717984851153779, "grad_norm": 2.1347544193267822, "learning_rate": 7.1300956610416875e-06, "loss": 0.443, "step": 21660 }, { "epoch": 1.2723856496976103, "grad_norm": 1.179854154586792, "learning_rate": 7.127004627060402e-06, "loss": 0.3635, "step": 21670 }, { "epoch": 1.2729728142798427, "grad_norm": 9.43796157836914, "learning_rate": 7.12391260025216e-06, "loss": 0.5055, "step": 21680 }, { "epoch": 1.273559978862075, "grad_norm": 2.5712690353393555, "learning_rate": 7.120819582060237e-06, "loss": 0.4603, "step": 21690 }, { "epoch": 1.2741471434443075, "grad_norm": 3.2340927124023438, "learning_rate": 7.1177255739283676e-06, "loss": 0.5016, "step": 21700 }, { "epoch": 1.2747343080265399, "grad_norm": 3.151115655899048, "learning_rate": 7.114630577300751e-06, "loss": 0.485, "step": 21710 }, { "epoch": 1.2753214726087723, "grad_norm": 5.496007442474365, "learning_rate": 7.1115345936220415e-06, "loss": 0.481, "step": 21720 }, { "epoch": 1.2759086371910047, "grad_norm": 3.0337204933166504, "learning_rate": 7.108437624337362e-06, "loss": 0.4838, "step": 21730 }, { "epoch": 1.276495801773237, "grad_norm": 3.3365840911865234, "learning_rate": 7.105339670892293e-06, "loss": 0.3261, "step": 21740 }, { "epoch": 1.2770829663554695, "grad_norm": 4.1701178550720215, "learning_rate": 7.102240734732872e-06, "loss": 0.4518, "step": 21750 }, { "epoch": 1.2776701309377019, "grad_norm": 11.217915534973145, "learning_rate": 7.099140817305596e-06, "loss": 0.4028, "step": 21760 }, { "epoch": 1.2782572955199343, "grad_norm": 12.988170623779297, "learning_rate": 7.096039920057421e-06, "loss": 0.4045, "step": 21770 }, { "epoch": 1.2788444601021667, "grad_norm": 5.425238132476807, "learning_rate": 7.092938044435762e-06, "loss": 0.4059, "step": 21780 }, { "epoch": 1.279431624684399, "grad_norm": 3.748117208480835, "learning_rate": 7.0898351918884865e-06, "loss": 0.3526, "step": 21790 }, { "epoch": 1.2800187892666315, "grad_norm": 5.674234867095947, "learning_rate": 7.086731363863921e-06, "loss": 0.3613, "step": 21800 }, { "epoch": 1.2806059538488639, "grad_norm": 3.109699010848999, "learning_rate": 7.083626561810848e-06, "loss": 0.541, "step": 21810 }, { "epoch": 1.2811931184310963, "grad_norm": 4.747309684753418, "learning_rate": 7.080520787178501e-06, "loss": 0.471, "step": 21820 }, { "epoch": 1.2817802830133287, "grad_norm": 5.320910453796387, "learning_rate": 7.077414041416572e-06, "loss": 0.3599, "step": 21830 }, { "epoch": 1.282367447595561, "grad_norm": 5.468310356140137, "learning_rate": 7.074306325975202e-06, "loss": 0.4539, "step": 21840 }, { "epoch": 1.2829546121777935, "grad_norm": 2.0604143142700195, "learning_rate": 7.0711976423049885e-06, "loss": 0.4294, "step": 21850 }, { "epoch": 1.283541776760026, "grad_norm": 4.53375244140625, "learning_rate": 7.068087991856977e-06, "loss": 0.3633, "step": 21860 }, { "epoch": 1.2841289413422583, "grad_norm": 3.1650729179382324, "learning_rate": 7.064977376082669e-06, "loss": 0.405, "step": 21870 }, { "epoch": 1.2847161059244907, "grad_norm": 3.2903716564178467, "learning_rate": 7.061865796434011e-06, "loss": 0.5433, "step": 21880 }, { "epoch": 1.285303270506723, "grad_norm": 1.4796658754348755, "learning_rate": 7.058753254363404e-06, "loss": 0.361, "step": 21890 }, { "epoch": 1.2858904350889555, "grad_norm": 3.3122060298919678, "learning_rate": 7.055639751323698e-06, "loss": 0.5621, "step": 21900 }, { "epoch": 1.286477599671188, "grad_norm": 2.2787251472473145, "learning_rate": 7.052525288768186e-06, "loss": 0.384, "step": 21910 }, { "epoch": 1.2870647642534203, "grad_norm": 8.101844787597656, "learning_rate": 7.049409868150617e-06, "loss": 0.4362, "step": 21920 }, { "epoch": 1.2876519288356527, "grad_norm": 6.020942211151123, "learning_rate": 7.0462934909251816e-06, "loss": 0.4578, "step": 21930 }, { "epoch": 1.288239093417885, "grad_norm": 2.5020694732666016, "learning_rate": 7.043176158546518e-06, "loss": 0.5412, "step": 21940 }, { "epoch": 1.2888262580001175, "grad_norm": 5.969566822052002, "learning_rate": 7.040057872469713e-06, "loss": 0.4861, "step": 21950 }, { "epoch": 1.2894134225823497, "grad_norm": 4.155814170837402, "learning_rate": 7.036938634150293e-06, "loss": 0.424, "step": 21960 }, { "epoch": 1.2900005871645823, "grad_norm": 3.0457077026367188, "learning_rate": 7.033818445044235e-06, "loss": 0.4087, "step": 21970 }, { "epoch": 1.2905877517468145, "grad_norm": 1.3768473863601685, "learning_rate": 7.030697306607955e-06, "loss": 0.3568, "step": 21980 }, { "epoch": 1.2911749163290471, "grad_norm": 3.748028516769409, "learning_rate": 7.027575220298317e-06, "loss": 0.5053, "step": 21990 }, { "epoch": 1.2917620809112793, "grad_norm": 2.5322554111480713, "learning_rate": 7.024452187572621e-06, "loss": 0.422, "step": 22000 }, { "epoch": 1.292349245493512, "grad_norm": 1.4053869247436523, "learning_rate": 7.021328209888615e-06, "loss": 0.4983, "step": 22010 }, { "epoch": 1.2929364100757441, "grad_norm": 4.774690628051758, "learning_rate": 7.018203288704484e-06, "loss": 0.4309, "step": 22020 }, { "epoch": 1.2935235746579767, "grad_norm": 2.648242712020874, "learning_rate": 7.015077425478855e-06, "loss": 0.4599, "step": 22030 }, { "epoch": 1.294110739240209, "grad_norm": 3.0224192142486572, "learning_rate": 7.011950621670795e-06, "loss": 0.561, "step": 22040 }, { "epoch": 1.2946979038224415, "grad_norm": 3.4973912239074707, "learning_rate": 7.0088228787398095e-06, "loss": 0.4755, "step": 22050 }, { "epoch": 1.2952850684046737, "grad_norm": 2.4723196029663086, "learning_rate": 7.005694198145843e-06, "loss": 0.4419, "step": 22060 }, { "epoch": 1.2958722329869063, "grad_norm": 3.325340509414673, "learning_rate": 7.0025645813492756e-06, "loss": 0.4581, "step": 22070 }, { "epoch": 1.2964593975691385, "grad_norm": 2.944636106491089, "learning_rate": 6.999434029810927e-06, "loss": 0.4034, "step": 22080 }, { "epoch": 1.2970465621513712, "grad_norm": 3.0906054973602295, "learning_rate": 6.996302544992051e-06, "loss": 0.4487, "step": 22090 }, { "epoch": 1.2976337267336033, "grad_norm": 2.345013380050659, "learning_rate": 6.993170128354339e-06, "loss": 0.4743, "step": 22100 }, { "epoch": 1.298220891315836, "grad_norm": 2.9957525730133057, "learning_rate": 6.990036781359918e-06, "loss": 0.5606, "step": 22110 }, { "epoch": 1.2988080558980681, "grad_norm": 8.629894256591797, "learning_rate": 6.986902505471344e-06, "loss": 0.5419, "step": 22120 }, { "epoch": 1.2993952204803008, "grad_norm": 9.381072044372559, "learning_rate": 6.983767302151614e-06, "loss": 0.3058, "step": 22130 }, { "epoch": 1.299982385062533, "grad_norm": 2.588670253753662, "learning_rate": 6.980631172864152e-06, "loss": 0.346, "step": 22140 }, { "epoch": 1.3005695496447656, "grad_norm": 3.2427632808685303, "learning_rate": 6.977494119072819e-06, "loss": 0.4205, "step": 22150 }, { "epoch": 1.3011567142269977, "grad_norm": 5.596742630004883, "learning_rate": 6.974356142241901e-06, "loss": 0.4762, "step": 22160 }, { "epoch": 1.3017438788092301, "grad_norm": 3.1878299713134766, "learning_rate": 6.971217243836122e-06, "loss": 0.4891, "step": 22170 }, { "epoch": 1.3023310433914626, "grad_norm": 4.242136478424072, "learning_rate": 6.96807742532063e-06, "loss": 0.3225, "step": 22180 }, { "epoch": 1.302918207973695, "grad_norm": 9.158321380615234, "learning_rate": 6.964936688161009e-06, "loss": 0.523, "step": 22190 }, { "epoch": 1.3035053725559274, "grad_norm": 8.4746675491333, "learning_rate": 6.961795033823268e-06, "loss": 0.6427, "step": 22200 }, { "epoch": 1.3040925371381598, "grad_norm": 1.3174047470092773, "learning_rate": 6.958652463773841e-06, "loss": 0.4448, "step": 22210 }, { "epoch": 1.3046797017203922, "grad_norm": 2.48032808303833, "learning_rate": 6.955508979479596e-06, "loss": 0.5038, "step": 22220 }, { "epoch": 1.3052668663026246, "grad_norm": 5.629806041717529, "learning_rate": 6.952364582407822e-06, "loss": 0.5622, "step": 22230 }, { "epoch": 1.305854030884857, "grad_norm": 1.9896920919418335, "learning_rate": 6.94921927402624e-06, "loss": 0.3317, "step": 22240 }, { "epoch": 1.3064411954670894, "grad_norm": 19.10242462158203, "learning_rate": 6.946073055802991e-06, "loss": 0.4064, "step": 22250 }, { "epoch": 1.3070283600493218, "grad_norm": 2.5497372150421143, "learning_rate": 6.942925929206641e-06, "loss": 0.4012, "step": 22260 }, { "epoch": 1.3076155246315542, "grad_norm": 2.158818483352661, "learning_rate": 6.939777895706186e-06, "loss": 0.4128, "step": 22270 }, { "epoch": 1.3082026892137866, "grad_norm": 3.4462087154388428, "learning_rate": 6.9366289567710365e-06, "loss": 0.3598, "step": 22280 }, { "epoch": 1.308789853796019, "grad_norm": 11.27564811706543, "learning_rate": 6.933479113871033e-06, "loss": 0.2516, "step": 22290 }, { "epoch": 1.3093770183782514, "grad_norm": 2.7641472816467285, "learning_rate": 6.930328368476433e-06, "loss": 0.3188, "step": 22300 }, { "epoch": 1.3099641829604838, "grad_norm": 2.1628940105438232, "learning_rate": 6.927176722057921e-06, "loss": 0.4375, "step": 22310 }, { "epoch": 1.3105513475427162, "grad_norm": 3.3814444541931152, "learning_rate": 6.9240241760865944e-06, "loss": 0.4141, "step": 22320 }, { "epoch": 1.3111385121249486, "grad_norm": 2.1296396255493164, "learning_rate": 6.920870732033976e-06, "loss": 0.3526, "step": 22330 }, { "epoch": 1.311725676707181, "grad_norm": 7.541334629058838, "learning_rate": 6.917716391372007e-06, "loss": 0.4235, "step": 22340 }, { "epoch": 1.3123128412894134, "grad_norm": 1.679253101348877, "learning_rate": 6.914561155573044e-06, "loss": 0.3971, "step": 22350 }, { "epoch": 1.3129000058716458, "grad_norm": 4.80448579788208, "learning_rate": 6.911405026109868e-06, "loss": 0.456, "step": 22360 }, { "epoch": 1.3134871704538782, "grad_norm": 2.5743587017059326, "learning_rate": 6.908248004455668e-06, "loss": 0.7033, "step": 22370 }, { "epoch": 1.3140743350361106, "grad_norm": 5.420496463775635, "learning_rate": 6.905090092084059e-06, "loss": 0.3852, "step": 22380 }, { "epoch": 1.314661499618343, "grad_norm": 3.436478614807129, "learning_rate": 6.9019312904690645e-06, "loss": 0.4405, "step": 22390 }, { "epoch": 1.3152486642005754, "grad_norm": 7.438251972198486, "learning_rate": 6.8987716010851265e-06, "loss": 0.4763, "step": 22400 }, { "epoch": 1.3158358287828078, "grad_norm": 14.294803619384766, "learning_rate": 6.8956110254071e-06, "loss": 0.5165, "step": 22410 }, { "epoch": 1.3164229933650402, "grad_norm": 4.346827507019043, "learning_rate": 6.892449564910256e-06, "loss": 0.4236, "step": 22420 }, { "epoch": 1.3170101579472726, "grad_norm": 2.404667377471924, "learning_rate": 6.889287221070276e-06, "loss": 0.5179, "step": 22430 }, { "epoch": 1.317597322529505, "grad_norm": 3.630202531814575, "learning_rate": 6.886123995363254e-06, "loss": 0.4401, "step": 22440 }, { "epoch": 1.3181844871117374, "grad_norm": 2.829294443130493, "learning_rate": 6.882959889265697e-06, "loss": 0.4154, "step": 22450 }, { "epoch": 1.3187716516939698, "grad_norm": 3.554858684539795, "learning_rate": 6.879794904254522e-06, "loss": 0.447, "step": 22460 }, { "epoch": 1.3193588162762022, "grad_norm": 3.727357864379883, "learning_rate": 6.8766290418070565e-06, "loss": 0.3749, "step": 22470 }, { "epoch": 1.3199459808584346, "grad_norm": 7.654761791229248, "learning_rate": 6.8734623034010376e-06, "loss": 0.3791, "step": 22480 }, { "epoch": 1.320533145440667, "grad_norm": 4.754906177520752, "learning_rate": 6.87029469051461e-06, "loss": 0.3775, "step": 22490 }, { "epoch": 1.3211203100228994, "grad_norm": 4.250811576843262, "learning_rate": 6.867126204626331e-06, "loss": 0.5211, "step": 22500 }, { "epoch": 1.3217074746051318, "grad_norm": 4.793830394744873, "learning_rate": 6.863956847215159e-06, "loss": 0.3865, "step": 22510 }, { "epoch": 1.3222946391873642, "grad_norm": 9.178940773010254, "learning_rate": 6.860786619760465e-06, "loss": 0.4026, "step": 22520 }, { "epoch": 1.3228818037695966, "grad_norm": 6.2276787757873535, "learning_rate": 6.857615523742019e-06, "loss": 0.6303, "step": 22530 }, { "epoch": 1.323468968351829, "grad_norm": 6.935186862945557, "learning_rate": 6.8544435606400075e-06, "loss": 0.4097, "step": 22540 }, { "epoch": 1.3240561329340614, "grad_norm": 2.0001611709594727, "learning_rate": 6.851270731935011e-06, "loss": 0.4029, "step": 22550 }, { "epoch": 1.3246432975162938, "grad_norm": 3.885746955871582, "learning_rate": 6.848097039108018e-06, "loss": 0.3656, "step": 22560 }, { "epoch": 1.3252304620985262, "grad_norm": 6.206722259521484, "learning_rate": 6.844922483640422e-06, "loss": 0.3744, "step": 22570 }, { "epoch": 1.3258176266807586, "grad_norm": 1.671142339706421, "learning_rate": 6.841747067014017e-06, "loss": 0.3904, "step": 22580 }, { "epoch": 1.326404791262991, "grad_norm": 2.4858272075653076, "learning_rate": 6.838570790711002e-06, "loss": 0.4949, "step": 22590 }, { "epoch": 1.3269919558452234, "grad_norm": 2.7932775020599365, "learning_rate": 6.835393656213972e-06, "loss": 0.4937, "step": 22600 }, { "epoch": 1.3275791204274558, "grad_norm": 5.400173664093018, "learning_rate": 6.832215665005926e-06, "loss": 0.4964, "step": 22610 }, { "epoch": 1.3281662850096883, "grad_norm": 13.703007698059082, "learning_rate": 6.829036818570263e-06, "loss": 0.3982, "step": 22620 }, { "epoch": 1.3287534495919207, "grad_norm": 0.9768615961074829, "learning_rate": 6.825857118390781e-06, "loss": 0.4487, "step": 22630 }, { "epoch": 1.329340614174153, "grad_norm": 6.655693531036377, "learning_rate": 6.822676565951677e-06, "loss": 0.4261, "step": 22640 }, { "epoch": 1.3299277787563855, "grad_norm": 3.1268248558044434, "learning_rate": 6.819495162737541e-06, "loss": 0.3707, "step": 22650 }, { "epoch": 1.3305149433386179, "grad_norm": 1.9814400672912598, "learning_rate": 6.816312910233369e-06, "loss": 0.3877, "step": 22660 }, { "epoch": 1.3311021079208503, "grad_norm": 14.806632995605469, "learning_rate": 6.813129809924545e-06, "loss": 0.4288, "step": 22670 }, { "epoch": 1.3316892725030827, "grad_norm": 5.295779705047607, "learning_rate": 6.809945863296854e-06, "loss": 0.4307, "step": 22680 }, { "epoch": 1.332276437085315, "grad_norm": 1.6687284708023071, "learning_rate": 6.806761071836473e-06, "loss": 0.4013, "step": 22690 }, { "epoch": 1.3328636016675475, "grad_norm": 3.045065402984619, "learning_rate": 6.803575437029977e-06, "loss": 0.6484, "step": 22700 }, { "epoch": 1.3334507662497799, "grad_norm": 5.075021743774414, "learning_rate": 6.800388960364329e-06, "loss": 0.395, "step": 22710 }, { "epoch": 1.3340379308320123, "grad_norm": 2.322606325149536, "learning_rate": 6.79720164332689e-06, "loss": 0.4211, "step": 22720 }, { "epoch": 1.3346250954142447, "grad_norm": 2.8942770957946777, "learning_rate": 6.794013487405412e-06, "loss": 0.4655, "step": 22730 }, { "epoch": 1.335212259996477, "grad_norm": 3.2061381340026855, "learning_rate": 6.790824494088037e-06, "loss": 0.3054, "step": 22740 }, { "epoch": 1.3357994245787095, "grad_norm": 2.8258233070373535, "learning_rate": 6.7876346648633e-06, "loss": 0.4271, "step": 22750 }, { "epoch": 1.3363865891609419, "grad_norm": 3.293452501296997, "learning_rate": 6.784444001220123e-06, "loss": 0.4379, "step": 22760 }, { "epoch": 1.3369737537431743, "grad_norm": 2.0273444652557373, "learning_rate": 6.78125250464782e-06, "loss": 0.3571, "step": 22770 }, { "epoch": 1.3375609183254067, "grad_norm": 6.359280109405518, "learning_rate": 6.778060176636096e-06, "loss": 0.4971, "step": 22780 }, { "epoch": 1.338148082907639, "grad_norm": 3.9647462368011475, "learning_rate": 6.77486701867504e-06, "loss": 0.4365, "step": 22790 }, { "epoch": 1.3387352474898715, "grad_norm": 2.045466661453247, "learning_rate": 6.771673032255129e-06, "loss": 0.4438, "step": 22800 }, { "epoch": 1.3393224120721037, "grad_norm": 3.890037775039673, "learning_rate": 6.768478218867227e-06, "loss": 0.4699, "step": 22810 }, { "epoch": 1.3399095766543363, "grad_norm": 3.221949815750122, "learning_rate": 6.765282580002588e-06, "loss": 0.4234, "step": 22820 }, { "epoch": 1.3404967412365685, "grad_norm": 4.4271345138549805, "learning_rate": 6.762086117152844e-06, "loss": 0.4287, "step": 22830 }, { "epoch": 1.341083905818801, "grad_norm": 3.247077703475952, "learning_rate": 6.758888831810018e-06, "loss": 0.4242, "step": 22840 }, { "epoch": 1.3416710704010333, "grad_norm": 2.8842194080352783, "learning_rate": 6.755690725466511e-06, "loss": 0.5171, "step": 22850 }, { "epoch": 1.342258234983266, "grad_norm": 2.8400888442993164, "learning_rate": 6.752491799615115e-06, "loss": 0.4761, "step": 22860 }, { "epoch": 1.342845399565498, "grad_norm": 1.1522066593170166, "learning_rate": 6.749292055748999e-06, "loss": 0.4181, "step": 22870 }, { "epoch": 1.3434325641477307, "grad_norm": 10.521533966064453, "learning_rate": 6.7460914953617126e-06, "loss": 0.3574, "step": 22880 }, { "epoch": 1.344019728729963, "grad_norm": 2.71880841255188, "learning_rate": 6.742890119947191e-06, "loss": 0.6203, "step": 22890 }, { "epoch": 1.3446068933121955, "grad_norm": 1.3520864248275757, "learning_rate": 6.739687930999748e-06, "loss": 0.4799, "step": 22900 }, { "epoch": 1.3451940578944277, "grad_norm": 4.344876766204834, "learning_rate": 6.736484930014076e-06, "loss": 0.43, "step": 22910 }, { "epoch": 1.3457812224766603, "grad_norm": 3.356558322906494, "learning_rate": 6.733281118485246e-06, "loss": 0.3374, "step": 22920 }, { "epoch": 1.3463683870588925, "grad_norm": 3.952302932739258, "learning_rate": 6.73007649790871e-06, "loss": 0.5117, "step": 22930 }, { "epoch": 1.3469555516411251, "grad_norm": 5.042263031005859, "learning_rate": 6.7268710697802985e-06, "loss": 0.554, "step": 22940 }, { "epoch": 1.3475427162233573, "grad_norm": 12.077207565307617, "learning_rate": 6.723664835596213e-06, "loss": 0.3815, "step": 22950 }, { "epoch": 1.34812988080559, "grad_norm": 6.825807571411133, "learning_rate": 6.720457796853037e-06, "loss": 0.552, "step": 22960 }, { "epoch": 1.3487170453878221, "grad_norm": 8.154046058654785, "learning_rate": 6.717249955047727e-06, "loss": 0.6315, "step": 22970 }, { "epoch": 1.3493042099700547, "grad_norm": 5.845921516418457, "learning_rate": 6.7140413116776145e-06, "loss": 0.4041, "step": 22980 }, { "epoch": 1.349891374552287, "grad_norm": 2.5451581478118896, "learning_rate": 6.710831868240406e-06, "loss": 0.5971, "step": 22990 }, { "epoch": 1.3504785391345195, "grad_norm": 2.9580724239349365, "learning_rate": 6.707621626234182e-06, "loss": 0.3519, "step": 23000 }, { "epoch": 1.3510657037167517, "grad_norm": 2.819089889526367, "learning_rate": 6.7044105871573915e-06, "loss": 0.4412, "step": 23010 }, { "epoch": 1.3516528682989841, "grad_norm": 3.068232536315918, "learning_rate": 6.701198752508863e-06, "loss": 0.3126, "step": 23020 }, { "epoch": 1.3522400328812165, "grad_norm": 2.562300443649292, "learning_rate": 6.69798612378779e-06, "loss": 0.3827, "step": 23030 }, { "epoch": 1.352827197463449, "grad_norm": 4.148472785949707, "learning_rate": 6.694772702493737e-06, "loss": 0.4743, "step": 23040 }, { "epoch": 1.3534143620456813, "grad_norm": 7.558079719543457, "learning_rate": 6.691558490126643e-06, "loss": 0.4492, "step": 23050 }, { "epoch": 1.3540015266279137, "grad_norm": 2.580751419067383, "learning_rate": 6.6883434881868125e-06, "loss": 0.4613, "step": 23060 }, { "epoch": 1.3545886912101461, "grad_norm": 2.2975454330444336, "learning_rate": 6.68512769817492e-06, "loss": 0.394, "step": 23070 }, { "epoch": 1.3551758557923785, "grad_norm": 8.717642784118652, "learning_rate": 6.681911121592006e-06, "loss": 0.4321, "step": 23080 }, { "epoch": 1.355763020374611, "grad_norm": 11.824811935424805, "learning_rate": 6.678693759939481e-06, "loss": 0.3966, "step": 23090 }, { "epoch": 1.3563501849568433, "grad_norm": 1.972705364227295, "learning_rate": 6.675475614719119e-06, "loss": 0.3738, "step": 23100 }, { "epoch": 1.3569373495390757, "grad_norm": 8.456223487854004, "learning_rate": 6.672256687433063e-06, "loss": 0.496, "step": 23110 }, { "epoch": 1.3575245141213081, "grad_norm": 16.53792953491211, "learning_rate": 6.669036979583819e-06, "loss": 0.376, "step": 23120 }, { "epoch": 1.3581116787035405, "grad_norm": 3.923617124557495, "learning_rate": 6.665816492674254e-06, "loss": 0.3679, "step": 23130 }, { "epoch": 1.358698843285773, "grad_norm": 2.542341947555542, "learning_rate": 6.662595228207608e-06, "loss": 0.4077, "step": 23140 }, { "epoch": 1.3592860078680054, "grad_norm": 2.8824241161346436, "learning_rate": 6.6593731876874735e-06, "loss": 0.4482, "step": 23150 }, { "epoch": 1.3598731724502378, "grad_norm": 3.03920316696167, "learning_rate": 6.656150372617812e-06, "loss": 0.3342, "step": 23160 }, { "epoch": 1.3604603370324702, "grad_norm": 5.678842544555664, "learning_rate": 6.652926784502945e-06, "loss": 0.3857, "step": 23170 }, { "epoch": 1.3610475016147026, "grad_norm": 1.754739761352539, "learning_rate": 6.649702424847554e-06, "loss": 0.4207, "step": 23180 }, { "epoch": 1.361634666196935, "grad_norm": 3.6169652938842773, "learning_rate": 6.64647729515668e-06, "loss": 0.4489, "step": 23190 }, { "epoch": 1.3622218307791674, "grad_norm": 4.789350509643555, "learning_rate": 6.643251396935726e-06, "loss": 0.4456, "step": 23200 }, { "epoch": 1.3628089953613998, "grad_norm": 3.971156358718872, "learning_rate": 6.640024731690449e-06, "loss": 0.6316, "step": 23210 }, { "epoch": 1.3633961599436322, "grad_norm": 1.199270486831665, "learning_rate": 6.6367973009269705e-06, "loss": 0.4407, "step": 23220 }, { "epoch": 1.3639833245258646, "grad_norm": 2.2876713275909424, "learning_rate": 6.6335691061517644e-06, "loss": 0.4197, "step": 23230 }, { "epoch": 1.364570489108097, "grad_norm": 2.819443702697754, "learning_rate": 6.630340148871663e-06, "loss": 0.4599, "step": 23240 }, { "epoch": 1.3651576536903294, "grad_norm": 1.9437600374221802, "learning_rate": 6.627110430593853e-06, "loss": 0.3255, "step": 23250 }, { "epoch": 1.3657448182725618, "grad_norm": 6.702314376831055, "learning_rate": 6.62387995282588e-06, "loss": 0.5563, "step": 23260 }, { "epoch": 1.3663319828547942, "grad_norm": 4.311091423034668, "learning_rate": 6.62064871707564e-06, "loss": 0.5268, "step": 23270 }, { "epoch": 1.3669191474370266, "grad_norm": 4.269912242889404, "learning_rate": 6.617416724851386e-06, "loss": 0.4739, "step": 23280 }, { "epoch": 1.367506312019259, "grad_norm": 11.390532493591309, "learning_rate": 6.6141839776617215e-06, "loss": 0.4135, "step": 23290 }, { "epoch": 1.3680934766014914, "grad_norm": 3.0307841300964355, "learning_rate": 6.610950477015603e-06, "loss": 0.5407, "step": 23300 }, { "epoch": 1.3686806411837238, "grad_norm": 3.7255699634552, "learning_rate": 6.607716224422341e-06, "loss": 0.3553, "step": 23310 }, { "epoch": 1.3692678057659562, "grad_norm": 15.24376106262207, "learning_rate": 6.604481221391593e-06, "loss": 0.5708, "step": 23320 }, { "epoch": 1.3698549703481886, "grad_norm": 3.021009922027588, "learning_rate": 6.601245469433371e-06, "loss": 0.4364, "step": 23330 }, { "epoch": 1.370442134930421, "grad_norm": 4.558887004852295, "learning_rate": 6.598008970058032e-06, "loss": 0.4284, "step": 23340 }, { "epoch": 1.3710292995126534, "grad_norm": 3.6423208713531494, "learning_rate": 6.594771724776287e-06, "loss": 0.3458, "step": 23350 }, { "epoch": 1.3716164640948858, "grad_norm": 1.786261796951294, "learning_rate": 6.591533735099192e-06, "loss": 0.4587, "step": 23360 }, { "epoch": 1.3722036286771182, "grad_norm": 3.461054801940918, "learning_rate": 6.588295002538151e-06, "loss": 0.4445, "step": 23370 }, { "epoch": 1.3727907932593506, "grad_norm": 3.3168210983276367, "learning_rate": 6.585055528604913e-06, "loss": 0.5044, "step": 23380 }, { "epoch": 1.373377957841583, "grad_norm": 2.7603516578674316, "learning_rate": 6.581815314811578e-06, "loss": 0.3793, "step": 23390 }, { "epoch": 1.3739651224238154, "grad_norm": 7.541206359863281, "learning_rate": 6.578574362670587e-06, "loss": 0.4627, "step": 23400 }, { "epoch": 1.3745522870060478, "grad_norm": 11.150015830993652, "learning_rate": 6.575332673694724e-06, "loss": 0.4743, "step": 23410 }, { "epoch": 1.3751394515882802, "grad_norm": 3.764577865600586, "learning_rate": 6.572090249397125e-06, "loss": 0.2649, "step": 23420 }, { "epoch": 1.3757266161705126, "grad_norm": 4.354966640472412, "learning_rate": 6.568847091291259e-06, "loss": 0.4608, "step": 23430 }, { "epoch": 1.376313780752745, "grad_norm": 5.032268524169922, "learning_rate": 6.565603200890947e-06, "loss": 0.408, "step": 23440 }, { "epoch": 1.3769009453349774, "grad_norm": 2.0962517261505127, "learning_rate": 6.562358579710344e-06, "loss": 0.4813, "step": 23450 }, { "epoch": 1.3774881099172098, "grad_norm": 1.8237136602401733, "learning_rate": 6.559113229263952e-06, "loss": 0.4157, "step": 23460 }, { "epoch": 1.3780752744994422, "grad_norm": 20.403921127319336, "learning_rate": 6.5558671510666074e-06, "loss": 0.4932, "step": 23470 }, { "epoch": 1.3786624390816746, "grad_norm": 2.791766881942749, "learning_rate": 6.552620346633492e-06, "loss": 0.3162, "step": 23480 }, { "epoch": 1.379249603663907, "grad_norm": 9.096355438232422, "learning_rate": 6.549372817480125e-06, "loss": 0.4579, "step": 23490 }, { "epoch": 1.3798367682461394, "grad_norm": 2.8790969848632812, "learning_rate": 6.546124565122362e-06, "loss": 0.2864, "step": 23500 }, { "epoch": 1.3804239328283718, "grad_norm": 1.873207926750183, "learning_rate": 6.542875591076399e-06, "loss": 0.3816, "step": 23510 }, { "epoch": 1.3810110974106042, "grad_norm": 4.583241939544678, "learning_rate": 6.539625896858766e-06, "loss": 0.4831, "step": 23520 }, { "epoch": 1.3815982619928366, "grad_norm": 5.0296783447265625, "learning_rate": 6.536375483986329e-06, "loss": 0.4665, "step": 23530 }, { "epoch": 1.382185426575069, "grad_norm": 2.5861566066741943, "learning_rate": 6.533124353976293e-06, "loss": 0.4602, "step": 23540 }, { "epoch": 1.3827725911573014, "grad_norm": 4.237636089324951, "learning_rate": 6.529872508346195e-06, "loss": 0.3423, "step": 23550 }, { "epoch": 1.3833597557395338, "grad_norm": 9.908822059631348, "learning_rate": 6.526619948613907e-06, "loss": 0.5369, "step": 23560 }, { "epoch": 1.3839469203217662, "grad_norm": 2.0417723655700684, "learning_rate": 6.523366676297632e-06, "loss": 0.4957, "step": 23570 }, { "epoch": 1.3845340849039987, "grad_norm": 1.4707847833633423, "learning_rate": 6.520112692915912e-06, "loss": 0.5844, "step": 23580 }, { "epoch": 1.385121249486231, "grad_norm": 6.250591278076172, "learning_rate": 6.516857999987612e-06, "loss": 0.2916, "step": 23590 }, { "epoch": 1.3857084140684635, "grad_norm": 2.9402925968170166, "learning_rate": 6.513602599031935e-06, "loss": 0.5577, "step": 23600 }, { "epoch": 1.3862955786506959, "grad_norm": 4.105756759643555, "learning_rate": 6.510346491568409e-06, "loss": 0.4317, "step": 23610 }, { "epoch": 1.3868827432329283, "grad_norm": 3.226363182067871, "learning_rate": 6.5070896791169e-06, "loss": 0.416, "step": 23620 }, { "epoch": 1.3874699078151607, "grad_norm": 2.395486831665039, "learning_rate": 6.503832163197594e-06, "loss": 0.4102, "step": 23630 }, { "epoch": 1.388057072397393, "grad_norm": 2.7570958137512207, "learning_rate": 6.500573945331013e-06, "loss": 0.5209, "step": 23640 }, { "epoch": 1.3886442369796255, "grad_norm": 23.511943817138672, "learning_rate": 6.497315027037998e-06, "loss": 0.507, "step": 23650 }, { "epoch": 1.3892314015618579, "grad_norm": 2.819927215576172, "learning_rate": 6.494055409839725e-06, "loss": 0.4006, "step": 23660 }, { "epoch": 1.3898185661440903, "grad_norm": 3.132539987564087, "learning_rate": 6.490795095257696e-06, "loss": 0.4413, "step": 23670 }, { "epoch": 1.3904057307263225, "grad_norm": 3.8276052474975586, "learning_rate": 6.48753408481373e-06, "loss": 0.3986, "step": 23680 }, { "epoch": 1.390992895308555, "grad_norm": 3.8810908794403076, "learning_rate": 6.48427238002998e-06, "loss": 0.3903, "step": 23690 }, { "epoch": 1.3915800598907873, "grad_norm": 3.2216897010803223, "learning_rate": 6.481009982428918e-06, "loss": 0.4897, "step": 23700 }, { "epoch": 1.3921672244730199, "grad_norm": 4.632569313049316, "learning_rate": 6.477746893533342e-06, "loss": 0.4057, "step": 23710 }, { "epoch": 1.392754389055252, "grad_norm": 2.6212682723999023, "learning_rate": 6.474483114866373e-06, "loss": 0.4991, "step": 23720 }, { "epoch": 1.3933415536374847, "grad_norm": 2.6017215251922607, "learning_rate": 6.47121864795145e-06, "loss": 0.4138, "step": 23730 }, { "epoch": 1.3939287182197169, "grad_norm": 6.3842573165893555, "learning_rate": 6.4679534943123364e-06, "loss": 0.4486, "step": 23740 }, { "epoch": 1.3945158828019495, "grad_norm": 3.3752365112304688, "learning_rate": 6.464687655473118e-06, "loss": 0.4655, "step": 23750 }, { "epoch": 1.3951030473841817, "grad_norm": 5.463301181793213, "learning_rate": 6.461421132958195e-06, "loss": 0.4587, "step": 23760 }, { "epoch": 1.3956902119664143, "grad_norm": 1.3469644784927368, "learning_rate": 6.45815392829229e-06, "loss": 0.5805, "step": 23770 }, { "epoch": 1.3962773765486465, "grad_norm": 5.706088542938232, "learning_rate": 6.4548860430004465e-06, "loss": 0.4504, "step": 23780 }, { "epoch": 1.396864541130879, "grad_norm": 1.301403522491455, "learning_rate": 6.451617478608022e-06, "loss": 0.3969, "step": 23790 }, { "epoch": 1.3974517057131113, "grad_norm": 2.760868549346924, "learning_rate": 6.448348236640691e-06, "loss": 0.3623, "step": 23800 }, { "epoch": 1.398038870295344, "grad_norm": 2.1987035274505615, "learning_rate": 6.445078318624446e-06, "loss": 0.4616, "step": 23810 }, { "epoch": 1.398626034877576, "grad_norm": 3.722188711166382, "learning_rate": 6.441807726085593e-06, "loss": 0.4566, "step": 23820 }, { "epoch": 1.3992131994598087, "grad_norm": 12.540871620178223, "learning_rate": 6.438536460550757e-06, "loss": 0.5001, "step": 23830 }, { "epoch": 1.399800364042041, "grad_norm": 4.049479961395264, "learning_rate": 6.435264523546872e-06, "loss": 0.3725, "step": 23840 }, { "epoch": 1.4003875286242735, "grad_norm": 4.359329700469971, "learning_rate": 6.431991916601187e-06, "loss": 0.3204, "step": 23850 }, { "epoch": 1.4009746932065057, "grad_norm": 2.3956925868988037, "learning_rate": 6.4287186412412665e-06, "loss": 0.4865, "step": 23860 }, { "epoch": 1.4015618577887383, "grad_norm": 5.448425769805908, "learning_rate": 6.425444698994983e-06, "loss": 0.5388, "step": 23870 }, { "epoch": 1.4021490223709705, "grad_norm": 3.3587937355041504, "learning_rate": 6.422170091390525e-06, "loss": 0.516, "step": 23880 }, { "epoch": 1.402736186953203, "grad_norm": 2.876870632171631, "learning_rate": 6.418894819956386e-06, "loss": 0.4453, "step": 23890 }, { "epoch": 1.4033233515354353, "grad_norm": 6.457520484924316, "learning_rate": 6.415618886221376e-06, "loss": 0.4438, "step": 23900 }, { "epoch": 1.4039105161176677, "grad_norm": 6.530428886413574, "learning_rate": 6.412342291714604e-06, "loss": 0.4691, "step": 23910 }, { "epoch": 1.4044976806999, "grad_norm": 5.926314830780029, "learning_rate": 6.4090650379654984e-06, "loss": 0.4618, "step": 23920 }, { "epoch": 1.4050848452821325, "grad_norm": 3.8415064811706543, "learning_rate": 6.4057871265037905e-06, "loss": 0.4373, "step": 23930 }, { "epoch": 1.405672009864365, "grad_norm": 2.2847461700439453, "learning_rate": 6.402508558859517e-06, "loss": 0.4015, "step": 23940 }, { "epoch": 1.4062591744465973, "grad_norm": 2.1351003646850586, "learning_rate": 6.3992293365630255e-06, "loss": 0.3755, "step": 23950 }, { "epoch": 1.4068463390288297, "grad_norm": 2.6379518508911133, "learning_rate": 6.395949461144963e-06, "loss": 0.3767, "step": 23960 }, { "epoch": 1.4074335036110621, "grad_norm": 4.923498630523682, "learning_rate": 6.392668934136286e-06, "loss": 0.4825, "step": 23970 }, { "epoch": 1.4080206681932945, "grad_norm": 7.580397605895996, "learning_rate": 6.389387757068253e-06, "loss": 0.4687, "step": 23980 }, { "epoch": 1.408607832775527, "grad_norm": 2.356821060180664, "learning_rate": 6.3861059314724286e-06, "loss": 0.432, "step": 23990 }, { "epoch": 1.4091949973577593, "grad_norm": 5.1457719802856445, "learning_rate": 6.382823458880677e-06, "loss": 0.3863, "step": 24000 }, { "epoch": 1.4097821619399917, "grad_norm": 2.8524997234344482, "learning_rate": 6.379540340825164e-06, "loss": 0.3816, "step": 24010 }, { "epoch": 1.4103693265222241, "grad_norm": 4.425656795501709, "learning_rate": 6.376256578838362e-06, "loss": 0.3973, "step": 24020 }, { "epoch": 1.4109564911044565, "grad_norm": 3.5542900562286377, "learning_rate": 6.372972174453038e-06, "loss": 0.4355, "step": 24030 }, { "epoch": 1.411543655686689, "grad_norm": 3.5233447551727295, "learning_rate": 6.369687129202263e-06, "loss": 0.3535, "step": 24040 }, { "epoch": 1.4121308202689213, "grad_norm": 3.794671058654785, "learning_rate": 6.366401444619401e-06, "loss": 0.539, "step": 24050 }, { "epoch": 1.4127179848511537, "grad_norm": 1.9130620956420898, "learning_rate": 6.363115122238125e-06, "loss": 0.4317, "step": 24060 }, { "epoch": 1.4133051494333861, "grad_norm": 4.293254852294922, "learning_rate": 6.359828163592394e-06, "loss": 0.3952, "step": 24070 }, { "epoch": 1.4138923140156185, "grad_norm": 8.249225616455078, "learning_rate": 6.356540570216472e-06, "loss": 0.3109, "step": 24080 }, { "epoch": 1.414479478597851, "grad_norm": 1.7184360027313232, "learning_rate": 6.353252343644916e-06, "loss": 0.3706, "step": 24090 }, { "epoch": 1.4150666431800834, "grad_norm": 2.6761386394500732, "learning_rate": 6.349963485412579e-06, "loss": 0.4817, "step": 24100 }, { "epoch": 1.4156538077623158, "grad_norm": 8.244462966918945, "learning_rate": 6.346673997054611e-06, "loss": 0.4715, "step": 24110 }, { "epoch": 1.4162409723445482, "grad_norm": 4.5261454582214355, "learning_rate": 6.343383880106453e-06, "loss": 0.4525, "step": 24120 }, { "epoch": 1.4168281369267806, "grad_norm": 2.3963606357574463, "learning_rate": 6.340093136103839e-06, "loss": 0.2834, "step": 24130 }, { "epoch": 1.417415301509013, "grad_norm": 6.446377277374268, "learning_rate": 6.336801766582798e-06, "loss": 0.4422, "step": 24140 }, { "epoch": 1.4180024660912454, "grad_norm": 3.614774465560913, "learning_rate": 6.333509773079653e-06, "loss": 0.4549, "step": 24150 }, { "epoch": 1.4185896306734778, "grad_norm": 5.646047592163086, "learning_rate": 6.330217157131013e-06, "loss": 0.4345, "step": 24160 }, { "epoch": 1.4191767952557102, "grad_norm": 3.327476978302002, "learning_rate": 6.3269239202737796e-06, "loss": 0.5103, "step": 24170 }, { "epoch": 1.4197639598379426, "grad_norm": 3.3461825847625732, "learning_rate": 6.323630064045147e-06, "loss": 0.437, "step": 24180 }, { "epoch": 1.420351124420175, "grad_norm": 12.656195640563965, "learning_rate": 6.320335589982594e-06, "loss": 0.5177, "step": 24190 }, { "epoch": 1.4209382890024074, "grad_norm": 1.7281317710876465, "learning_rate": 6.3170404996238934e-06, "loss": 0.4113, "step": 24200 }, { "epoch": 1.4215254535846398, "grad_norm": 7.9957661628723145, "learning_rate": 6.3137447945070965e-06, "loss": 0.3983, "step": 24210 }, { "epoch": 1.4221126181668722, "grad_norm": 6.605863571166992, "learning_rate": 6.310448476170553e-06, "loss": 0.5195, "step": 24220 }, { "epoch": 1.4226997827491046, "grad_norm": 4.077634334564209, "learning_rate": 6.30715154615289e-06, "loss": 0.5291, "step": 24230 }, { "epoch": 1.423286947331337, "grad_norm": 5.812063694000244, "learning_rate": 6.303854005993024e-06, "loss": 0.5373, "step": 24240 }, { "epoch": 1.4238741119135694, "grad_norm": 4.3224663734436035, "learning_rate": 6.300555857230156e-06, "loss": 0.3173, "step": 24250 }, { "epoch": 1.4244612764958018, "grad_norm": 2.391465425491333, "learning_rate": 6.297257101403771e-06, "loss": 0.4239, "step": 24260 }, { "epoch": 1.4250484410780342, "grad_norm": 8.910714149475098, "learning_rate": 6.2939577400536365e-06, "loss": 0.3929, "step": 24270 }, { "epoch": 1.4256356056602666, "grad_norm": 2.696084499359131, "learning_rate": 6.290657774719803e-06, "loss": 0.3591, "step": 24280 }, { "epoch": 1.426222770242499, "grad_norm": 5.989938735961914, "learning_rate": 6.287357206942601e-06, "loss": 0.5209, "step": 24290 }, { "epoch": 1.4268099348247314, "grad_norm": 3.644343376159668, "learning_rate": 6.284056038262649e-06, "loss": 0.4695, "step": 24300 }, { "epoch": 1.4273970994069638, "grad_norm": 2.255422353744507, "learning_rate": 6.280754270220838e-06, "loss": 0.4107, "step": 24310 }, { "epoch": 1.4279842639891962, "grad_norm": 4.23624324798584, "learning_rate": 6.277451904358344e-06, "loss": 0.4492, "step": 24320 }, { "epoch": 1.4285714285714286, "grad_norm": 3.2587804794311523, "learning_rate": 6.274148942216616e-06, "loss": 0.5326, "step": 24330 }, { "epoch": 1.429158593153661, "grad_norm": 1.9570493698120117, "learning_rate": 6.270845385337391e-06, "loss": 0.3504, "step": 24340 }, { "epoch": 1.4297457577358934, "grad_norm": 3.6876776218414307, "learning_rate": 6.267541235262673e-06, "loss": 0.346, "step": 24350 }, { "epoch": 1.4303329223181258, "grad_norm": 6.776241779327393, "learning_rate": 6.2642364935347525e-06, "loss": 0.3982, "step": 24360 }, { "epoch": 1.4309200869003582, "grad_norm": 2.448617458343506, "learning_rate": 6.2609311616961886e-06, "loss": 0.4614, "step": 24370 }, { "epoch": 1.4315072514825906, "grad_norm": 5.690181732177734, "learning_rate": 6.257625241289819e-06, "loss": 0.4919, "step": 24380 }, { "epoch": 1.432094416064823, "grad_norm": 4.437642574310303, "learning_rate": 6.2543187338587555e-06, "loss": 0.5122, "step": 24390 }, { "epoch": 1.4326815806470554, "grad_norm": 5.039010047912598, "learning_rate": 6.251011640946386e-06, "loss": 0.4438, "step": 24400 }, { "epoch": 1.4332687452292878, "grad_norm": 7.829653739929199, "learning_rate": 6.247703964096371e-06, "loss": 0.4652, "step": 24410 }, { "epoch": 1.4338559098115202, "grad_norm": 2.576535701751709, "learning_rate": 6.244395704852638e-06, "loss": 0.5012, "step": 24420 }, { "epoch": 1.4344430743937526, "grad_norm": 3.119244337081909, "learning_rate": 6.241086864759396e-06, "loss": 0.3673, "step": 24430 }, { "epoch": 1.435030238975985, "grad_norm": 2.295107364654541, "learning_rate": 6.237777445361117e-06, "loss": 0.5473, "step": 24440 }, { "epoch": 1.4356174035582174, "grad_norm": 4.001691818237305, "learning_rate": 6.234467448202548e-06, "loss": 0.2856, "step": 24450 }, { "epoch": 1.4362045681404498, "grad_norm": 5.537625789642334, "learning_rate": 6.231156874828703e-06, "loss": 0.3462, "step": 24460 }, { "epoch": 1.4367917327226822, "grad_norm": 9.492250442504883, "learning_rate": 6.227845726784867e-06, "loss": 0.4035, "step": 24470 }, { "epoch": 1.4373788973049146, "grad_norm": 4.016399383544922, "learning_rate": 6.224534005616592e-06, "loss": 0.3743, "step": 24480 }, { "epoch": 1.437966061887147, "grad_norm": 9.298778533935547, "learning_rate": 6.2212217128696985e-06, "loss": 0.4097, "step": 24490 }, { "epoch": 1.4385532264693794, "grad_norm": 2.111768960952759, "learning_rate": 6.217908850090273e-06, "loss": 0.3825, "step": 24500 }, { "epoch": 1.4391403910516118, "grad_norm": 4.234512805938721, "learning_rate": 6.214595418824666e-06, "loss": 0.5113, "step": 24510 }, { "epoch": 1.4397275556338442, "grad_norm": 6.159545421600342, "learning_rate": 6.2112814206195e-06, "loss": 0.5097, "step": 24520 }, { "epoch": 1.4403147202160766, "grad_norm": 7.7601189613342285, "learning_rate": 6.207966857021654e-06, "loss": 0.4424, "step": 24530 }, { "epoch": 1.440901884798309, "grad_norm": 2.486480236053467, "learning_rate": 6.204651729578275e-06, "loss": 0.4052, "step": 24540 }, { "epoch": 1.4414890493805412, "grad_norm": 3.067476987838745, "learning_rate": 6.201336039836775e-06, "loss": 0.4269, "step": 24550 }, { "epoch": 1.4420762139627739, "grad_norm": 3.6346890926361084, "learning_rate": 6.198019789344825e-06, "loss": 0.4305, "step": 24560 }, { "epoch": 1.442663378545006, "grad_norm": 2.8512864112854004, "learning_rate": 6.194702979650359e-06, "loss": 0.5527, "step": 24570 }, { "epoch": 1.4432505431272387, "grad_norm": 4.141781330108643, "learning_rate": 6.1913856123015704e-06, "loss": 0.4868, "step": 24580 }, { "epoch": 1.4438377077094708, "grad_norm": 4.105470180511475, "learning_rate": 6.188067688846918e-06, "loss": 0.469, "step": 24590 }, { "epoch": 1.4444248722917035, "grad_norm": 3.0778560638427734, "learning_rate": 6.184749210835115e-06, "loss": 0.469, "step": 24600 }, { "epoch": 1.4450120368739356, "grad_norm": 14.389029502868652, "learning_rate": 6.181430179815135e-06, "loss": 0.4468, "step": 24610 }, { "epoch": 1.4455992014561683, "grad_norm": 2.8908796310424805, "learning_rate": 6.17811059733621e-06, "loss": 0.4417, "step": 24620 }, { "epoch": 1.4461863660384005, "grad_norm": 4.325832366943359, "learning_rate": 6.17479046494783e-06, "loss": 0.3579, "step": 24630 }, { "epoch": 1.446773530620633, "grad_norm": 3.269573926925659, "learning_rate": 6.17146978419974e-06, "loss": 0.3171, "step": 24640 }, { "epoch": 1.4473606952028653, "grad_norm": 3.394853115081787, "learning_rate": 6.168148556641943e-06, "loss": 0.4808, "step": 24650 }, { "epoch": 1.4479478597850979, "grad_norm": 2.374488592147827, "learning_rate": 6.164826783824694e-06, "loss": 0.4998, "step": 24660 }, { "epoch": 1.44853502436733, "grad_norm": 4.610802173614502, "learning_rate": 6.161504467298508e-06, "loss": 0.3438, "step": 24670 }, { "epoch": 1.4491221889495627, "grad_norm": 2.232447624206543, "learning_rate": 6.158181608614149e-06, "loss": 0.4249, "step": 24680 }, { "epoch": 1.4497093535317949, "grad_norm": 2.5312798023223877, "learning_rate": 6.154858209322635e-06, "loss": 0.4329, "step": 24690 }, { "epoch": 1.4502965181140275, "grad_norm": 4.2657151222229, "learning_rate": 6.151534270975237e-06, "loss": 0.4587, "step": 24700 }, { "epoch": 1.4508836826962597, "grad_norm": 3.181809663772583, "learning_rate": 6.148209795123479e-06, "loss": 0.4187, "step": 24710 }, { "epoch": 1.4514708472784923, "grad_norm": 2.7945125102996826, "learning_rate": 6.144884783319131e-06, "loss": 0.486, "step": 24720 }, { "epoch": 1.4520580118607245, "grad_norm": 5.692716598510742, "learning_rate": 6.141559237114222e-06, "loss": 0.5736, "step": 24730 }, { "epoch": 1.452645176442957, "grad_norm": 9.289669036865234, "learning_rate": 6.138233158061019e-06, "loss": 0.438, "step": 24740 }, { "epoch": 1.4532323410251893, "grad_norm": 1.9580464363098145, "learning_rate": 6.1349065477120486e-06, "loss": 0.4065, "step": 24750 }, { "epoch": 1.4538195056074217, "grad_norm": 2.4631593227386475, "learning_rate": 6.1315794076200775e-06, "loss": 0.397, "step": 24760 }, { "epoch": 1.454406670189654, "grad_norm": 3.922022819519043, "learning_rate": 6.1282517393381235e-06, "loss": 0.4447, "step": 24770 }, { "epoch": 1.4549938347718865, "grad_norm": 2.1580750942230225, "learning_rate": 6.124923544419451e-06, "loss": 0.5108, "step": 24780 }, { "epoch": 1.4555809993541189, "grad_norm": 1.369139313697815, "learning_rate": 6.1215948244175684e-06, "loss": 0.3873, "step": 24790 }, { "epoch": 1.4561681639363513, "grad_norm": 8.859969139099121, "learning_rate": 6.118265580886232e-06, "loss": 0.4202, "step": 24800 }, { "epoch": 1.4567553285185837, "grad_norm": 2.280749797821045, "learning_rate": 6.114935815379438e-06, "loss": 0.4446, "step": 24810 }, { "epoch": 1.457342493100816, "grad_norm": 2.793025255203247, "learning_rate": 6.11160552945143e-06, "loss": 0.4399, "step": 24820 }, { "epoch": 1.4579296576830485, "grad_norm": 3.515467882156372, "learning_rate": 6.108274724656694e-06, "loss": 0.3766, "step": 24830 }, { "epoch": 1.458516822265281, "grad_norm": 2.4592702388763428, "learning_rate": 6.104943402549957e-06, "loss": 0.5429, "step": 24840 }, { "epoch": 1.4591039868475133, "grad_norm": 1.4135849475860596, "learning_rate": 6.101611564686188e-06, "loss": 0.3208, "step": 24850 }, { "epoch": 1.4596911514297457, "grad_norm": 5.524046421051025, "learning_rate": 6.098279212620597e-06, "loss": 0.4914, "step": 24860 }, { "epoch": 1.460278316011978, "grad_norm": 4.729963302612305, "learning_rate": 6.094946347908635e-06, "loss": 0.4815, "step": 24870 }, { "epoch": 1.4608654805942105, "grad_norm": 4.821824550628662, "learning_rate": 6.091612972105988e-06, "loss": 0.4283, "step": 24880 }, { "epoch": 1.461452645176443, "grad_norm": 6.946046352386475, "learning_rate": 6.088279086768587e-06, "loss": 0.3965, "step": 24890 }, { "epoch": 1.4620398097586753, "grad_norm": 8.285632133483887, "learning_rate": 6.084944693452594e-06, "loss": 0.5436, "step": 24900 }, { "epoch": 1.4626269743409077, "grad_norm": 5.004469394683838, "learning_rate": 6.081609793714414e-06, "loss": 0.4511, "step": 24910 }, { "epoch": 1.4632141389231401, "grad_norm": 3.1688218116760254, "learning_rate": 6.078274389110688e-06, "loss": 0.4859, "step": 24920 }, { "epoch": 1.4638013035053725, "grad_norm": 13.578142166137695, "learning_rate": 6.0749384811982855e-06, "loss": 0.5642, "step": 24930 }, { "epoch": 1.464388468087605, "grad_norm": 5.162440776824951, "learning_rate": 6.07160207153432e-06, "loss": 0.5076, "step": 24940 }, { "epoch": 1.4649756326698373, "grad_norm": 6.144972801208496, "learning_rate": 6.068265161676135e-06, "loss": 0.455, "step": 24950 }, { "epoch": 1.4655627972520697, "grad_norm": 7.380753517150879, "learning_rate": 6.064927753181307e-06, "loss": 0.5252, "step": 24960 }, { "epoch": 1.4661499618343021, "grad_norm": 3.577712297439575, "learning_rate": 6.061589847607645e-06, "loss": 0.5193, "step": 24970 }, { "epoch": 1.4667371264165345, "grad_norm": 2.497765064239502, "learning_rate": 6.058251446513192e-06, "loss": 0.4465, "step": 24980 }, { "epoch": 1.467324290998767, "grad_norm": 2.507110118865967, "learning_rate": 6.054912551456221e-06, "loss": 0.4332, "step": 24990 }, { "epoch": 1.4679114555809993, "grad_norm": 4.0909600257873535, "learning_rate": 6.0515731639952366e-06, "loss": 0.4368, "step": 25000 }, { "epoch": 1.4684986201632317, "grad_norm": 3.233271360397339, "learning_rate": 6.0482332856889735e-06, "loss": 0.4157, "step": 25010 }, { "epoch": 1.4690857847454641, "grad_norm": 5.342386722564697, "learning_rate": 6.044892918096391e-06, "loss": 0.6073, "step": 25020 }, { "epoch": 1.4696729493276965, "grad_norm": 8.703189849853516, "learning_rate": 6.041552062776684e-06, "loss": 0.3627, "step": 25030 }, { "epoch": 1.470260113909929, "grad_norm": 2.3691930770874023, "learning_rate": 6.0382107212892705e-06, "loss": 0.3997, "step": 25040 }, { "epoch": 1.4708472784921613, "grad_norm": 2.3693299293518066, "learning_rate": 6.034868895193795e-06, "loss": 0.5686, "step": 25050 }, { "epoch": 1.4714344430743937, "grad_norm": 3.1594250202178955, "learning_rate": 6.031526586050129e-06, "loss": 0.6048, "step": 25060 }, { "epoch": 1.4720216076566262, "grad_norm": 3.4655256271362305, "learning_rate": 6.028183795418373e-06, "loss": 0.4312, "step": 25070 }, { "epoch": 1.4726087722388586, "grad_norm": 2.7205088138580322, "learning_rate": 6.0248405248588486e-06, "loss": 0.4249, "step": 25080 }, { "epoch": 1.473195936821091, "grad_norm": 5.001770973205566, "learning_rate": 6.0214967759320985e-06, "loss": 0.4727, "step": 25090 }, { "epoch": 1.4737831014033234, "grad_norm": 13.343342781066895, "learning_rate": 6.0181525501988955e-06, "loss": 0.4366, "step": 25100 }, { "epoch": 1.4743702659855558, "grad_norm": 2.833242654800415, "learning_rate": 6.014807849220232e-06, "loss": 0.4906, "step": 25110 }, { "epoch": 1.4749574305677882, "grad_norm": 3.8710741996765137, "learning_rate": 6.011462674557321e-06, "loss": 0.4185, "step": 25120 }, { "epoch": 1.4755445951500206, "grad_norm": 4.424587726593018, "learning_rate": 6.008117027771597e-06, "loss": 0.4191, "step": 25130 }, { "epoch": 1.476131759732253, "grad_norm": 3.8049542903900146, "learning_rate": 6.004770910424715e-06, "loss": 0.4038, "step": 25140 }, { "epoch": 1.4767189243144854, "grad_norm": 4.246323108673096, "learning_rate": 6.001424324078552e-06, "loss": 0.4919, "step": 25150 }, { "epoch": 1.4773060888967178, "grad_norm": 4.054759502410889, "learning_rate": 5.9980772702952004e-06, "loss": 0.4148, "step": 25160 }, { "epoch": 1.4778932534789502, "grad_norm": 7.432585716247559, "learning_rate": 5.994729750636975e-06, "loss": 0.3326, "step": 25170 }, { "epoch": 1.4784804180611826, "grad_norm": 2.0200366973876953, "learning_rate": 5.9913817666664e-06, "loss": 0.5233, "step": 25180 }, { "epoch": 1.479067582643415, "grad_norm": 2.8984415531158447, "learning_rate": 5.9880333199462285e-06, "loss": 0.4291, "step": 25190 }, { "epoch": 1.4796547472256474, "grad_norm": 3.777966260910034, "learning_rate": 5.984684412039417e-06, "loss": 0.4669, "step": 25200 }, { "epoch": 1.4802419118078798, "grad_norm": 3.2520909309387207, "learning_rate": 5.981335044509147e-06, "loss": 0.3684, "step": 25210 }, { "epoch": 1.4808290763901122, "grad_norm": 2.6219754219055176, "learning_rate": 5.977985218918807e-06, "loss": 0.4211, "step": 25220 }, { "epoch": 1.4814162409723446, "grad_norm": 2.3338968753814697, "learning_rate": 5.9746349368320075e-06, "loss": 0.4468, "step": 25230 }, { "epoch": 1.482003405554577, "grad_norm": 3.0107409954071045, "learning_rate": 5.971284199812566e-06, "loss": 0.4185, "step": 25240 }, { "epoch": 1.4825905701368094, "grad_norm": 3.147998332977295, "learning_rate": 5.967933009424512e-06, "loss": 0.4641, "step": 25250 }, { "epoch": 1.4831777347190418, "grad_norm": 1.6671985387802124, "learning_rate": 5.964581367232088e-06, "loss": 0.4528, "step": 25260 }, { "epoch": 1.4837648993012742, "grad_norm": 10.062752723693848, "learning_rate": 5.9612292747997514e-06, "loss": 0.3903, "step": 25270 }, { "epoch": 1.4843520638835066, "grad_norm": 4.890443801879883, "learning_rate": 5.957876733692165e-06, "loss": 0.3481, "step": 25280 }, { "epoch": 1.484939228465739, "grad_norm": 5.883554935455322, "learning_rate": 5.9545237454742025e-06, "loss": 0.3559, "step": 25290 }, { "epoch": 1.4855263930479714, "grad_norm": 5.042175769805908, "learning_rate": 5.951170311710942e-06, "loss": 0.4795, "step": 25300 }, { "epoch": 1.4861135576302038, "grad_norm": 2.4075653553009033, "learning_rate": 5.94781643396768e-06, "loss": 0.4632, "step": 25310 }, { "epoch": 1.4867007222124362, "grad_norm": 0.8435953855514526, "learning_rate": 5.94446211380991e-06, "loss": 0.2824, "step": 25320 }, { "epoch": 1.4872878867946686, "grad_norm": 6.64626932144165, "learning_rate": 5.941107352803338e-06, "loss": 0.4602, "step": 25330 }, { "epoch": 1.487875051376901, "grad_norm": 2.0594193935394287, "learning_rate": 5.9377521525138705e-06, "loss": 0.4178, "step": 25340 }, { "epoch": 1.4884622159591334, "grad_norm": 5.251984119415283, "learning_rate": 5.934396514507626e-06, "loss": 0.4486, "step": 25350 }, { "epoch": 1.4890493805413658, "grad_norm": 5.493160724639893, "learning_rate": 5.931040440350921e-06, "loss": 0.3534, "step": 25360 }, { "epoch": 1.4896365451235982, "grad_norm": 3.218322992324829, "learning_rate": 5.92768393161028e-06, "loss": 0.5036, "step": 25370 }, { "epoch": 1.4902237097058306, "grad_norm": 14.138084411621094, "learning_rate": 5.924326989852426e-06, "loss": 0.5166, "step": 25380 }, { "epoch": 1.490810874288063, "grad_norm": 3.6633009910583496, "learning_rate": 5.9209696166442885e-06, "loss": 0.4146, "step": 25390 }, { "epoch": 1.4913980388702952, "grad_norm": 1.701246976852417, "learning_rate": 5.9176118135529984e-06, "loss": 0.3859, "step": 25400 }, { "epoch": 1.4919852034525278, "grad_norm": 3.4219391345977783, "learning_rate": 5.91425358214588e-06, "loss": 0.4459, "step": 25410 }, { "epoch": 1.49257236803476, "grad_norm": 2.1504623889923096, "learning_rate": 5.910894923990467e-06, "loss": 0.4054, "step": 25420 }, { "epoch": 1.4931595326169926, "grad_norm": 5.010767459869385, "learning_rate": 5.907535840654487e-06, "loss": 0.4612, "step": 25430 }, { "epoch": 1.4937466971992248, "grad_norm": 5.638700008392334, "learning_rate": 5.904176333705867e-06, "loss": 0.4653, "step": 25440 }, { "epoch": 1.4943338617814574, "grad_norm": 4.237824440002441, "learning_rate": 5.9008164047127334e-06, "loss": 0.3806, "step": 25450 }, { "epoch": 1.4949210263636896, "grad_norm": 7.809217929840088, "learning_rate": 5.8974560552434045e-06, "loss": 0.4218, "step": 25460 }, { "epoch": 1.4955081909459222, "grad_norm": 2.3766732215881348, "learning_rate": 5.894095286866404e-06, "loss": 0.4616, "step": 25470 }, { "epoch": 1.4960953555281544, "grad_norm": 5.38548469543457, "learning_rate": 5.890734101150442e-06, "loss": 0.474, "step": 25480 }, { "epoch": 1.496682520110387, "grad_norm": 2.135474443435669, "learning_rate": 5.887372499664428e-06, "loss": 0.5429, "step": 25490 }, { "epoch": 1.4972696846926192, "grad_norm": 1.9596426486968994, "learning_rate": 5.8840104839774615e-06, "loss": 0.4368, "step": 25500 }, { "epoch": 1.4978568492748519, "grad_norm": 3.244389295578003, "learning_rate": 5.880648055658844e-06, "loss": 0.3939, "step": 25510 }, { "epoch": 1.498444013857084, "grad_norm": 5.394364833831787, "learning_rate": 5.877285216278062e-06, "loss": 0.3975, "step": 25520 }, { "epoch": 1.4990311784393167, "grad_norm": 3.180438995361328, "learning_rate": 5.873921967404793e-06, "loss": 0.3395, "step": 25530 }, { "epoch": 1.4996183430215488, "grad_norm": 6.510666370391846, "learning_rate": 5.8705583106089124e-06, "loss": 0.4098, "step": 25540 }, { "epoch": 1.5002055076037815, "grad_norm": 6.373774528503418, "learning_rate": 5.867194247460481e-06, "loss": 0.4756, "step": 25550 }, { "epoch": 1.5007926721860136, "grad_norm": 3.4164834022521973, "learning_rate": 5.863829779529751e-06, "loss": 0.412, "step": 25560 }, { "epoch": 1.5013798367682463, "grad_norm": 13.251077651977539, "learning_rate": 5.860464908387161e-06, "loss": 0.4571, "step": 25570 }, { "epoch": 1.5019670013504784, "grad_norm": 5.360465049743652, "learning_rate": 5.857099635603341e-06, "loss": 0.3141, "step": 25580 }, { "epoch": 1.502554165932711, "grad_norm": 10.986614227294922, "learning_rate": 5.853733962749109e-06, "loss": 0.5015, "step": 25590 }, { "epoch": 1.5031413305149433, "grad_norm": 3.5753140449523926, "learning_rate": 5.850367891395464e-06, "loss": 0.4235, "step": 25600 }, { "epoch": 1.5037284950971759, "grad_norm": 10.833492279052734, "learning_rate": 5.8470014231136e-06, "loss": 0.4876, "step": 25610 }, { "epoch": 1.504315659679408, "grad_norm": 2.434398651123047, "learning_rate": 5.8436345594748864e-06, "loss": 0.4477, "step": 25620 }, { "epoch": 1.5049028242616407, "grad_norm": 15.744391441345215, "learning_rate": 5.840267302050886e-06, "loss": 0.4969, "step": 25630 }, { "epoch": 1.5054899888438729, "grad_norm": 15.199982643127441, "learning_rate": 5.83689965241334e-06, "loss": 0.4617, "step": 25640 }, { "epoch": 1.5060771534261055, "grad_norm": 2.9171054363250732, "learning_rate": 5.833531612134174e-06, "loss": 0.356, "step": 25650 }, { "epoch": 1.5066643180083377, "grad_norm": 1.819899559020996, "learning_rate": 5.830163182785495e-06, "loss": 0.4042, "step": 25660 }, { "epoch": 1.5072514825905703, "grad_norm": 1.8148213624954224, "learning_rate": 5.826794365939595e-06, "loss": 0.4594, "step": 25670 }, { "epoch": 1.5078386471728025, "grad_norm": 3.3870606422424316, "learning_rate": 5.823425163168944e-06, "loss": 0.4665, "step": 25680 }, { "epoch": 1.508425811755035, "grad_norm": 1.7762240171432495, "learning_rate": 5.820055576046191e-06, "loss": 0.3666, "step": 25690 }, { "epoch": 1.5090129763372673, "grad_norm": 4.9180684089660645, "learning_rate": 5.816685606144167e-06, "loss": 0.4823, "step": 25700 }, { "epoch": 1.5096001409195, "grad_norm": 3.1681253910064697, "learning_rate": 5.813315255035882e-06, "loss": 0.4216, "step": 25710 }, { "epoch": 1.510187305501732, "grad_norm": 1.473673939704895, "learning_rate": 5.809944524294522e-06, "loss": 0.4094, "step": 25720 }, { "epoch": 1.5107744700839645, "grad_norm": 5.189204216003418, "learning_rate": 5.8065734154934495e-06, "loss": 0.3278, "step": 25730 }, { "epoch": 1.5113616346661969, "grad_norm": 20.458084106445312, "learning_rate": 5.803201930206206e-06, "loss": 0.4824, "step": 25740 }, { "epoch": 1.5119487992484293, "grad_norm": 9.91854190826416, "learning_rate": 5.799830070006508e-06, "loss": 0.4024, "step": 25750 }, { "epoch": 1.5125359638306617, "grad_norm": 3.9895176887512207, "learning_rate": 5.796457836468247e-06, "loss": 0.3925, "step": 25760 }, { "epoch": 1.513123128412894, "grad_norm": 9.964841842651367, "learning_rate": 5.793085231165487e-06, "loss": 0.426, "step": 25770 }, { "epoch": 1.5137102929951265, "grad_norm": 2.5346996784210205, "learning_rate": 5.789712255672467e-06, "loss": 0.5236, "step": 25780 }, { "epoch": 1.514297457577359, "grad_norm": 3.474583148956299, "learning_rate": 5.7863389115636015e-06, "loss": 0.3999, "step": 25790 }, { "epoch": 1.5148846221595913, "grad_norm": 2.5178959369659424, "learning_rate": 5.782965200413469e-06, "loss": 0.5281, "step": 25800 }, { "epoch": 1.5154717867418237, "grad_norm": 4.393153667449951, "learning_rate": 5.779591123796831e-06, "loss": 0.3866, "step": 25810 }, { "epoch": 1.516058951324056, "grad_norm": 3.221822738647461, "learning_rate": 5.7762166832886065e-06, "loss": 0.4596, "step": 25820 }, { "epoch": 1.5166461159062885, "grad_norm": 2.890727996826172, "learning_rate": 5.7728418804638966e-06, "loss": 0.3555, "step": 25830 }, { "epoch": 1.517233280488521, "grad_norm": 1.6863470077514648, "learning_rate": 5.769466716897964e-06, "loss": 0.471, "step": 25840 }, { "epoch": 1.5178204450707533, "grad_norm": 1.451902151107788, "learning_rate": 5.766091194166239e-06, "loss": 0.4064, "step": 25850 }, { "epoch": 1.5184076096529857, "grad_norm": 3.442387580871582, "learning_rate": 5.762715313844327e-06, "loss": 0.4018, "step": 25860 }, { "epoch": 1.5189947742352181, "grad_norm": 3.024193286895752, "learning_rate": 5.759339077507991e-06, "loss": 0.4783, "step": 25870 }, { "epoch": 1.5195819388174505, "grad_norm": 7.2007975578308105, "learning_rate": 5.755962486733171e-06, "loss": 0.453, "step": 25880 }, { "epoch": 1.520169103399683, "grad_norm": 2.3379738330841064, "learning_rate": 5.75258554309596e-06, "loss": 0.6197, "step": 25890 }, { "epoch": 1.5207562679819153, "grad_norm": 7.09408712387085, "learning_rate": 5.749208248172624e-06, "loss": 0.4468, "step": 25900 }, { "epoch": 1.5213434325641477, "grad_norm": 9.274141311645508, "learning_rate": 5.745830603539591e-06, "loss": 0.478, "step": 25910 }, { "epoch": 1.5219305971463801, "grad_norm": 4.278811931610107, "learning_rate": 5.742452610773452e-06, "loss": 0.4535, "step": 25920 }, { "epoch": 1.5225177617286125, "grad_norm": 12.622769355773926, "learning_rate": 5.739074271450963e-06, "loss": 0.649, "step": 25930 }, { "epoch": 1.523104926310845, "grad_norm": 13.044401168823242, "learning_rate": 5.735695587149035e-06, "loss": 0.5005, "step": 25940 }, { "epoch": 1.5236920908930773, "grad_norm": 4.397469520568848, "learning_rate": 5.732316559444748e-06, "loss": 0.4148, "step": 25950 }, { "epoch": 1.5242792554753097, "grad_norm": 4.261443138122559, "learning_rate": 5.728937189915338e-06, "loss": 0.3966, "step": 25960 }, { "epoch": 1.5248664200575421, "grad_norm": 2.717486619949341, "learning_rate": 5.7255574801382e-06, "loss": 0.4436, "step": 25970 }, { "epoch": 1.5254535846397745, "grad_norm": 3.1108076572418213, "learning_rate": 5.722177431690889e-06, "loss": 0.441, "step": 25980 }, { "epoch": 1.526040749222007, "grad_norm": 2.8734068870544434, "learning_rate": 5.71879704615112e-06, "loss": 0.4073, "step": 25990 }, { "epoch": 1.5266279138042393, "grad_norm": 6.107803821563721, "learning_rate": 5.7154163250967635e-06, "loss": 0.4561, "step": 26000 }, { "epoch": 1.5272150783864717, "grad_norm": 2.2226221561431885, "learning_rate": 5.7120352701058435e-06, "loss": 0.4132, "step": 26010 }, { "epoch": 1.5278022429687041, "grad_norm": 4.277340412139893, "learning_rate": 5.708653882756546e-06, "loss": 0.4008, "step": 26020 }, { "epoch": 1.5283894075509366, "grad_norm": 4.279139518737793, "learning_rate": 5.705272164627208e-06, "loss": 0.5484, "step": 26030 }, { "epoch": 1.528976572133169, "grad_norm": 2.2796926498413086, "learning_rate": 5.701890117296323e-06, "loss": 0.4701, "step": 26040 }, { "epoch": 1.5295637367154014, "grad_norm": 1.9525223970413208, "learning_rate": 5.698507742342536e-06, "loss": 0.3483, "step": 26050 }, { "epoch": 1.5301509012976338, "grad_norm": 0.9391319751739502, "learning_rate": 5.6951250413446454e-06, "loss": 0.5002, "step": 26060 }, { "epoch": 1.5307380658798662, "grad_norm": 3.345402479171753, "learning_rate": 5.691742015881606e-06, "loss": 0.4293, "step": 26070 }, { "epoch": 1.5313252304620986, "grad_norm": 4.079432010650635, "learning_rate": 5.6883586675325175e-06, "loss": 0.3262, "step": 26080 }, { "epoch": 1.531912395044331, "grad_norm": 3.1804039478302, "learning_rate": 5.684974997876635e-06, "loss": 0.4735, "step": 26090 }, { "epoch": 1.5324995596265634, "grad_norm": 2.777184247970581, "learning_rate": 5.681591008493358e-06, "loss": 0.4231, "step": 26100 }, { "epoch": 1.5330867242087958, "grad_norm": 1.9473143815994263, "learning_rate": 5.678206700962244e-06, "loss": 0.5167, "step": 26110 }, { "epoch": 1.5336738887910282, "grad_norm": 2.4028613567352295, "learning_rate": 5.674822076862992e-06, "loss": 0.4191, "step": 26120 }, { "epoch": 1.5342610533732606, "grad_norm": 1.7810617685317993, "learning_rate": 5.671437137775452e-06, "loss": 0.4098, "step": 26130 }, { "epoch": 1.534848217955493, "grad_norm": 4.648862361907959, "learning_rate": 5.66805188527962e-06, "loss": 0.4519, "step": 26140 }, { "epoch": 1.5354353825377254, "grad_norm": 3.3577914237976074, "learning_rate": 5.664666320955636e-06, "loss": 0.4306, "step": 26150 }, { "epoch": 1.5360225471199578, "grad_norm": 4.1482720375061035, "learning_rate": 5.661280446383791e-06, "loss": 0.4015, "step": 26160 }, { "epoch": 1.53660971170219, "grad_norm": 5.288893222808838, "learning_rate": 5.657894263144515e-06, "loss": 0.4373, "step": 26170 }, { "epoch": 1.5371968762844226, "grad_norm": 3.140216827392578, "learning_rate": 5.654507772818386e-06, "loss": 0.4606, "step": 26180 }, { "epoch": 1.5377840408666548, "grad_norm": 1.9066338539123535, "learning_rate": 5.651120976986124e-06, "loss": 0.492, "step": 26190 }, { "epoch": 1.5383712054488874, "grad_norm": 4.0578718185424805, "learning_rate": 5.6477338772285914e-06, "loss": 0.5109, "step": 26200 }, { "epoch": 1.5389583700311196, "grad_norm": 2.3836400508880615, "learning_rate": 5.644346475126794e-06, "loss": 0.4805, "step": 26210 }, { "epoch": 1.5395455346133522, "grad_norm": 10.921405792236328, "learning_rate": 5.6409587722618744e-06, "loss": 0.4319, "step": 26220 }, { "epoch": 1.5401326991955844, "grad_norm": 4.804671764373779, "learning_rate": 5.6375707702151215e-06, "loss": 0.417, "step": 26230 }, { "epoch": 1.540719863777817, "grad_norm": 5.1806321144104, "learning_rate": 5.6341824705679605e-06, "loss": 0.379, "step": 26240 }, { "epoch": 1.5413070283600492, "grad_norm": 2.4078714847564697, "learning_rate": 5.630793874901955e-06, "loss": 0.4652, "step": 26250 }, { "epoch": 1.5418941929422818, "grad_norm": 2.952723264694214, "learning_rate": 5.627404984798808e-06, "loss": 0.2932, "step": 26260 }, { "epoch": 1.542481357524514, "grad_norm": 2.5378289222717285, "learning_rate": 5.624015801840362e-06, "loss": 0.391, "step": 26270 }, { "epoch": 1.5430685221067466, "grad_norm": 7.516534328460693, "learning_rate": 5.620626327608589e-06, "loss": 0.4776, "step": 26280 }, { "epoch": 1.5436556866889788, "grad_norm": 3.2261364459991455, "learning_rate": 5.617236563685606e-06, "loss": 0.3439, "step": 26290 }, { "epoch": 1.5442428512712114, "grad_norm": 5.749922275543213, "learning_rate": 5.61384651165366e-06, "loss": 0.4104, "step": 26300 }, { "epoch": 1.5448300158534436, "grad_norm": 3.4870948791503906, "learning_rate": 5.610456173095132e-06, "loss": 0.4108, "step": 26310 }, { "epoch": 1.5454171804356762, "grad_norm": 4.601074695587158, "learning_rate": 5.607065549592543e-06, "loss": 0.4197, "step": 26320 }, { "epoch": 1.5460043450179084, "grad_norm": 5.328718185424805, "learning_rate": 5.603674642728536e-06, "loss": 0.3824, "step": 26330 }, { "epoch": 1.546591509600141, "grad_norm": 2.71893310546875, "learning_rate": 5.6002834540858975e-06, "loss": 0.476, "step": 26340 }, { "epoch": 1.5471786741823732, "grad_norm": 3.4402191638946533, "learning_rate": 5.596891985247537e-06, "loss": 0.3923, "step": 26350 }, { "epoch": 1.5477658387646058, "grad_norm": 3.3066065311431885, "learning_rate": 5.5935002377965005e-06, "loss": 0.3267, "step": 26360 }, { "epoch": 1.548353003346838, "grad_norm": 7.54945182800293, "learning_rate": 5.590108213315964e-06, "loss": 0.3674, "step": 26370 }, { "epoch": 1.5489401679290706, "grad_norm": 6.755549430847168, "learning_rate": 5.586715913389224e-06, "loss": 0.4902, "step": 26380 }, { "epoch": 1.5495273325113028, "grad_norm": 5.374086856842041, "learning_rate": 5.583323339599721e-06, "loss": 0.4351, "step": 26390 }, { "epoch": 1.5501144970935354, "grad_norm": 3.1056370735168457, "learning_rate": 5.579930493531008e-06, "loss": 0.508, "step": 26400 }, { "epoch": 1.5507016616757676, "grad_norm": 4.802290439605713, "learning_rate": 5.576537376766775e-06, "loss": 0.3545, "step": 26410 }, { "epoch": 1.5512888262580002, "grad_norm": 2.191568374633789, "learning_rate": 5.573143990890833e-06, "loss": 0.3314, "step": 26420 }, { "epoch": 1.5518759908402324, "grad_norm": 3.6501331329345703, "learning_rate": 5.569750337487121e-06, "loss": 0.4736, "step": 26430 }, { "epoch": 1.552463155422465, "grad_norm": 6.443139553070068, "learning_rate": 5.566356418139705e-06, "loss": 0.467, "step": 26440 }, { "epoch": 1.5530503200046972, "grad_norm": 1.7127400636672974, "learning_rate": 5.56296223443277e-06, "loss": 0.443, "step": 26450 }, { "epoch": 1.5536374845869299, "grad_norm": 2.2313573360443115, "learning_rate": 5.5595677879506296e-06, "loss": 0.3824, "step": 26460 }, { "epoch": 1.554224649169162, "grad_norm": 4.644425392150879, "learning_rate": 5.556173080277715e-06, "loss": 0.4577, "step": 26470 }, { "epoch": 1.5548118137513947, "grad_norm": 9.285862922668457, "learning_rate": 5.552778112998585e-06, "loss": 0.4026, "step": 26480 }, { "epoch": 1.5553989783336268, "grad_norm": 2.2361342906951904, "learning_rate": 5.549382887697913e-06, "loss": 0.4281, "step": 26490 }, { "epoch": 1.5559861429158595, "grad_norm": 3.9729347229003906, "learning_rate": 5.545987405960498e-06, "loss": 0.4633, "step": 26500 }, { "epoch": 1.5565733074980916, "grad_norm": 5.459007263183594, "learning_rate": 5.542591669371256e-06, "loss": 0.3759, "step": 26510 }, { "epoch": 1.5571604720803243, "grad_norm": 6.258236408233643, "learning_rate": 5.539195679515224e-06, "loss": 0.3779, "step": 26520 }, { "epoch": 1.5577476366625564, "grad_norm": 6.90873384475708, "learning_rate": 5.535799437977558e-06, "loss": 0.353, "step": 26530 }, { "epoch": 1.558334801244789, "grad_norm": 3.130094528198242, "learning_rate": 5.5324029463435244e-06, "loss": 0.504, "step": 26540 }, { "epoch": 1.5589219658270212, "grad_norm": 4.275976181030273, "learning_rate": 5.529006206198518e-06, "loss": 0.4107, "step": 26550 }, { "epoch": 1.5595091304092539, "grad_norm": 3.375331163406372, "learning_rate": 5.525609219128039e-06, "loss": 0.4017, "step": 26560 }, { "epoch": 1.560096294991486, "grad_norm": 2.638801336288452, "learning_rate": 5.5222119867177074e-06, "loss": 0.4379, "step": 26570 }, { "epoch": 1.5606834595737187, "grad_norm": 5.8653717041015625, "learning_rate": 5.518814510553259e-06, "loss": 0.6061, "step": 26580 }, { "epoch": 1.5612706241559509, "grad_norm": 1.691998839378357, "learning_rate": 5.515416792220539e-06, "loss": 0.3391, "step": 26590 }, { "epoch": 1.5618577887381833, "grad_norm": 9.516860008239746, "learning_rate": 5.512018833305513e-06, "loss": 0.534, "step": 26600 }, { "epoch": 1.5624449533204157, "grad_norm": 7.522252559661865, "learning_rate": 5.5086206353942494e-06, "loss": 0.4732, "step": 26610 }, { "epoch": 1.563032117902648, "grad_norm": 1.6765828132629395, "learning_rate": 5.505222200072935e-06, "loss": 0.3561, "step": 26620 }, { "epoch": 1.5636192824848805, "grad_norm": 9.05114459991455, "learning_rate": 5.501823528927866e-06, "loss": 0.3668, "step": 26630 }, { "epoch": 1.5642064470671129, "grad_norm": 3.111464023590088, "learning_rate": 5.498424623545448e-06, "loss": 0.5494, "step": 26640 }, { "epoch": 1.5647936116493453, "grad_norm": 2.171844244003296, "learning_rate": 5.495025485512196e-06, "loss": 0.3696, "step": 26650 }, { "epoch": 1.5653807762315777, "grad_norm": 1.410831093788147, "learning_rate": 5.491626116414732e-06, "loss": 0.3945, "step": 26660 }, { "epoch": 1.56596794081381, "grad_norm": 4.727792739868164, "learning_rate": 5.48822651783979e-06, "loss": 0.4443, "step": 26670 }, { "epoch": 1.5665551053960425, "grad_norm": 3.7283811569213867, "learning_rate": 5.484826691374206e-06, "loss": 0.4933, "step": 26680 }, { "epoch": 1.5671422699782749, "grad_norm": 2.5765936374664307, "learning_rate": 5.48142663860493e-06, "loss": 0.5407, "step": 26690 }, { "epoch": 1.5677294345605073, "grad_norm": 3.1903598308563232, "learning_rate": 5.478026361119004e-06, "loss": 0.5521, "step": 26700 }, { "epoch": 1.5683165991427397, "grad_norm": 3.6737220287323, "learning_rate": 5.474625860503593e-06, "loss": 0.4454, "step": 26710 }, { "epoch": 1.568903763724972, "grad_norm": 9.370905876159668, "learning_rate": 5.47122513834595e-06, "loss": 0.5685, "step": 26720 }, { "epoch": 1.5694909283072045, "grad_norm": 10.904895782470703, "learning_rate": 5.467824196233442e-06, "loss": 0.3946, "step": 26730 }, { "epoch": 1.570078092889437, "grad_norm": 4.036834239959717, "learning_rate": 5.464423035753532e-06, "loss": 0.4409, "step": 26740 }, { "epoch": 1.5706652574716693, "grad_norm": 1.269299030303955, "learning_rate": 5.461021658493789e-06, "loss": 0.5248, "step": 26750 }, { "epoch": 1.5712524220539017, "grad_norm": 10.964492797851562, "learning_rate": 5.457620066041883e-06, "loss": 0.436, "step": 26760 }, { "epoch": 1.571839586636134, "grad_norm": 13.444524765014648, "learning_rate": 5.454218259985581e-06, "loss": 0.3641, "step": 26770 }, { "epoch": 1.5724267512183665, "grad_norm": 10.188955307006836, "learning_rate": 5.4508162419127546e-06, "loss": 0.4927, "step": 26780 }, { "epoch": 1.573013915800599, "grad_norm": 5.084463596343994, "learning_rate": 5.447414013411366e-06, "loss": 0.3623, "step": 26790 }, { "epoch": 1.5736010803828313, "grad_norm": 3.641296148300171, "learning_rate": 5.44401157606949e-06, "loss": 0.4137, "step": 26800 }, { "epoch": 1.5741882449650637, "grad_norm": 1.705478310585022, "learning_rate": 5.440608931475284e-06, "loss": 0.3149, "step": 26810 }, { "epoch": 1.5747754095472961, "grad_norm": 5.680304527282715, "learning_rate": 5.43720608121701e-06, "loss": 0.4244, "step": 26820 }, { "epoch": 1.5753625741295285, "grad_norm": 3.2744269371032715, "learning_rate": 5.433803026883025e-06, "loss": 0.4296, "step": 26830 }, { "epoch": 1.575949738711761, "grad_norm": 14.190695762634277, "learning_rate": 5.43039977006178e-06, "loss": 0.4806, "step": 26840 }, { "epoch": 1.5765369032939933, "grad_norm": 2.479954957962036, "learning_rate": 5.426996312341824e-06, "loss": 0.596, "step": 26850 }, { "epoch": 1.5771240678762257, "grad_norm": 3.1666059494018555, "learning_rate": 5.423592655311792e-06, "loss": 0.4129, "step": 26860 }, { "epoch": 1.5777112324584581, "grad_norm": 4.67750358581543, "learning_rate": 5.420188800560421e-06, "loss": 0.4659, "step": 26870 }, { "epoch": 1.5782983970406905, "grad_norm": 3.729187250137329, "learning_rate": 5.4167847496765345e-06, "loss": 0.3359, "step": 26880 }, { "epoch": 1.578885561622923, "grad_norm": 2.4987056255340576, "learning_rate": 5.41338050424905e-06, "loss": 0.4528, "step": 26890 }, { "epoch": 1.5794727262051553, "grad_norm": 3.328301429748535, "learning_rate": 5.409976065866975e-06, "loss": 0.4455, "step": 26900 }, { "epoch": 1.5800598907873877, "grad_norm": 9.617570877075195, "learning_rate": 5.4065714361194056e-06, "loss": 0.5721, "step": 26910 }, { "epoch": 1.5806470553696201, "grad_norm": 2.847957134246826, "learning_rate": 5.403166616595532e-06, "loss": 0.3878, "step": 26920 }, { "epoch": 1.5812342199518525, "grad_norm": 3.1856255531311035, "learning_rate": 5.399761608884627e-06, "loss": 0.3092, "step": 26930 }, { "epoch": 1.581821384534085, "grad_norm": 2.644878387451172, "learning_rate": 5.396356414576056e-06, "loss": 0.4904, "step": 26940 }, { "epoch": 1.5824085491163173, "grad_norm": 4.703897476196289, "learning_rate": 5.3929510352592685e-06, "loss": 0.4343, "step": 26950 }, { "epoch": 1.5829957136985497, "grad_norm": 5.232110023498535, "learning_rate": 5.389545472523801e-06, "loss": 0.5074, "step": 26960 }, { "epoch": 1.5835828782807821, "grad_norm": 10.22461986541748, "learning_rate": 5.386139727959279e-06, "loss": 0.3935, "step": 26970 }, { "epoch": 1.5841700428630145, "grad_norm": 4.385377407073975, "learning_rate": 5.382733803155405e-06, "loss": 0.3671, "step": 26980 }, { "epoch": 1.584757207445247, "grad_norm": 4.541256904602051, "learning_rate": 5.379327699701975e-06, "loss": 0.4021, "step": 26990 }, { "epoch": 1.5853443720274794, "grad_norm": 3.7632527351379395, "learning_rate": 5.375921419188861e-06, "loss": 0.2862, "step": 27000 }, { "epoch": 1.5859315366097118, "grad_norm": 5.853674411773682, "learning_rate": 5.372514963206022e-06, "loss": 0.4666, "step": 27010 }, { "epoch": 1.586518701191944, "grad_norm": 3.949174642562866, "learning_rate": 5.369108333343497e-06, "loss": 0.5329, "step": 27020 }, { "epoch": 1.5871058657741766, "grad_norm": 1.919176697731018, "learning_rate": 5.365701531191407e-06, "loss": 0.5467, "step": 27030 }, { "epoch": 1.5876930303564087, "grad_norm": 1.7766481637954712, "learning_rate": 5.362294558339952e-06, "loss": 0.3847, "step": 27040 }, { "epoch": 1.5882801949386414, "grad_norm": 3.0085501670837402, "learning_rate": 5.358887416379415e-06, "loss": 0.3862, "step": 27050 }, { "epoch": 1.5888673595208735, "grad_norm": 1.7961061000823975, "learning_rate": 5.355480106900154e-06, "loss": 0.6588, "step": 27060 }, { "epoch": 1.5894545241031062, "grad_norm": 4.750105857849121, "learning_rate": 5.352072631492605e-06, "loss": 0.4242, "step": 27070 }, { "epoch": 1.5900416886853384, "grad_norm": 7.431764125823975, "learning_rate": 5.348664991747288e-06, "loss": 0.5149, "step": 27080 }, { "epoch": 1.590628853267571, "grad_norm": 4.638680934906006, "learning_rate": 5.345257189254791e-06, "loss": 0.4853, "step": 27090 }, { "epoch": 1.5912160178498032, "grad_norm": 2.5349676609039307, "learning_rate": 5.341849225605784e-06, "loss": 0.4826, "step": 27100 }, { "epoch": 1.5918031824320358, "grad_norm": 5.1875, "learning_rate": 5.338441102391009e-06, "loss": 0.4812, "step": 27110 }, { "epoch": 1.592390347014268, "grad_norm": 6.594388961791992, "learning_rate": 5.335032821201285e-06, "loss": 0.4754, "step": 27120 }, { "epoch": 1.5929775115965006, "grad_norm": 4.343113422393799, "learning_rate": 5.331624383627503e-06, "loss": 0.4732, "step": 27130 }, { "epoch": 1.5935646761787328, "grad_norm": 2.6239640712738037, "learning_rate": 5.3282157912606256e-06, "loss": 0.4247, "step": 27140 }, { "epoch": 1.5941518407609654, "grad_norm": 3.67411208152771, "learning_rate": 5.3248070456916925e-06, "loss": 0.4827, "step": 27150 }, { "epoch": 1.5947390053431976, "grad_norm": 2.128511667251587, "learning_rate": 5.3213981485118095e-06, "loss": 0.3942, "step": 27160 }, { "epoch": 1.5953261699254302, "grad_norm": 4.056434631347656, "learning_rate": 5.317989101312158e-06, "loss": 0.3806, "step": 27170 }, { "epoch": 1.5959133345076624, "grad_norm": 1.9671591520309448, "learning_rate": 5.314579905683984e-06, "loss": 0.4548, "step": 27180 }, { "epoch": 1.596500499089895, "grad_norm": 1.4786649942398071, "learning_rate": 5.311170563218606e-06, "loss": 0.5126, "step": 27190 }, { "epoch": 1.5970876636721272, "grad_norm": 2.2144622802734375, "learning_rate": 5.307761075507415e-06, "loss": 0.4926, "step": 27200 }, { "epoch": 1.5976748282543598, "grad_norm": 1.925140380859375, "learning_rate": 5.304351444141862e-06, "loss": 0.4863, "step": 27210 }, { "epoch": 1.598261992836592, "grad_norm": 3.700235366821289, "learning_rate": 5.300941670713469e-06, "loss": 0.4082, "step": 27220 }, { "epoch": 1.5988491574188246, "grad_norm": 2.7559783458709717, "learning_rate": 5.297531756813823e-06, "loss": 0.4116, "step": 27230 }, { "epoch": 1.5994363220010568, "grad_norm": 1.3475102186203003, "learning_rate": 5.294121704034581e-06, "loss": 0.4963, "step": 27240 }, { "epoch": 1.6000234865832894, "grad_norm": 7.513086795806885, "learning_rate": 5.290711513967457e-06, "loss": 0.4723, "step": 27250 }, { "epoch": 1.6006106511655216, "grad_norm": 3.181277275085449, "learning_rate": 5.287301188204237e-06, "loss": 0.3373, "step": 27260 }, { "epoch": 1.6011978157477542, "grad_norm": 9.77661418914795, "learning_rate": 5.283890728336765e-06, "loss": 0.3153, "step": 27270 }, { "epoch": 1.6017849803299864, "grad_norm": 3.658015012741089, "learning_rate": 5.280480135956949e-06, "loss": 0.3611, "step": 27280 }, { "epoch": 1.602372144912219, "grad_norm": 2.8790719509124756, "learning_rate": 5.277069412656761e-06, "loss": 0.4678, "step": 27290 }, { "epoch": 1.6029593094944512, "grad_norm": 2.858078718185425, "learning_rate": 5.273658560028231e-06, "loss": 0.4667, "step": 27300 }, { "epoch": 1.6035464740766838, "grad_norm": 5.439065933227539, "learning_rate": 5.270247579663451e-06, "loss": 0.4161, "step": 27310 }, { "epoch": 1.604133638658916, "grad_norm": 13.759428977966309, "learning_rate": 5.266836473154571e-06, "loss": 0.3551, "step": 27320 }, { "epoch": 1.6047208032411486, "grad_norm": 3.4884748458862305, "learning_rate": 5.263425242093805e-06, "loss": 0.3543, "step": 27330 }, { "epoch": 1.6053079678233808, "grad_norm": 4.065945625305176, "learning_rate": 5.260013888073418e-06, "loss": 0.4685, "step": 27340 }, { "epoch": 1.6058951324056134, "grad_norm": 1.1970601081848145, "learning_rate": 5.256602412685736e-06, "loss": 0.4957, "step": 27350 }, { "epoch": 1.6064822969878456, "grad_norm": 2.403738021850586, "learning_rate": 5.253190817523144e-06, "loss": 0.4473, "step": 27360 }, { "epoch": 1.6070694615700782, "grad_norm": 8.751830101013184, "learning_rate": 5.2497791041780765e-06, "loss": 0.4974, "step": 27370 }, { "epoch": 1.6076566261523104, "grad_norm": 2.3541224002838135, "learning_rate": 5.246367274243031e-06, "loss": 0.3617, "step": 27380 }, { "epoch": 1.608243790734543, "grad_norm": 2.695955753326416, "learning_rate": 5.24295532931055e-06, "loss": 0.4274, "step": 27390 }, { "epoch": 1.6088309553167752, "grad_norm": 5.391914367675781, "learning_rate": 5.239543270973241e-06, "loss": 0.4186, "step": 27400 }, { "epoch": 1.6094181198990078, "grad_norm": 1.9763985872268677, "learning_rate": 5.236131100823753e-06, "loss": 0.4114, "step": 27410 }, { "epoch": 1.61000528448124, "grad_norm": 5.43499755859375, "learning_rate": 5.232718820454797e-06, "loss": 0.3824, "step": 27420 }, { "epoch": 1.6105924490634727, "grad_norm": 10.866804122924805, "learning_rate": 5.22930643145913e-06, "loss": 0.4856, "step": 27430 }, { "epoch": 1.6111796136457048, "grad_norm": 4.828431606292725, "learning_rate": 5.22589393542956e-06, "loss": 0.4694, "step": 27440 }, { "epoch": 1.6117667782279372, "grad_norm": 4.755549430847168, "learning_rate": 5.2224813339589475e-06, "loss": 0.4555, "step": 27450 }, { "epoch": 1.6123539428101696, "grad_norm": 2.994847536087036, "learning_rate": 5.219068628640198e-06, "loss": 0.3914, "step": 27460 }, { "epoch": 1.612941107392402, "grad_norm": 5.655765533447266, "learning_rate": 5.215655821066269e-06, "loss": 0.4208, "step": 27470 }, { "epoch": 1.6135282719746344, "grad_norm": 2.357738494873047, "learning_rate": 5.212242912830166e-06, "loss": 0.5728, "step": 27480 }, { "epoch": 1.6141154365568668, "grad_norm": 7.048739433288574, "learning_rate": 5.208829905524938e-06, "loss": 0.3859, "step": 27490 }, { "epoch": 1.6147026011390992, "grad_norm": 2.4101109504699707, "learning_rate": 5.205416800743685e-06, "loss": 0.5316, "step": 27500 }, { "epoch": 1.6152897657213316, "grad_norm": 2.2070229053497314, "learning_rate": 5.202003600079547e-06, "loss": 0.5333, "step": 27510 }, { "epoch": 1.615876930303564, "grad_norm": 1.1670111417770386, "learning_rate": 5.1985903051257145e-06, "loss": 0.45, "step": 27520 }, { "epoch": 1.6164640948857965, "grad_norm": 9.423130989074707, "learning_rate": 5.195176917475417e-06, "loss": 0.5018, "step": 27530 }, { "epoch": 1.6170512594680289, "grad_norm": 5.039424896240234, "learning_rate": 5.191763438721933e-06, "loss": 0.5402, "step": 27540 }, { "epoch": 1.6176384240502613, "grad_norm": 2.1522843837738037, "learning_rate": 5.188349870458574e-06, "loss": 0.3292, "step": 27550 }, { "epoch": 1.6182255886324937, "grad_norm": 2.9946742057800293, "learning_rate": 5.184936214278706e-06, "loss": 0.4779, "step": 27560 }, { "epoch": 1.618812753214726, "grad_norm": 4.4409098625183105, "learning_rate": 5.181522471775724e-06, "loss": 0.3418, "step": 27570 }, { "epoch": 1.6193999177969585, "grad_norm": 2.7566182613372803, "learning_rate": 5.178108644543071e-06, "loss": 0.3534, "step": 27580 }, { "epoch": 1.6199870823791909, "grad_norm": 1.0765349864959717, "learning_rate": 5.174694734174227e-06, "loss": 0.4734, "step": 27590 }, { "epoch": 1.6205742469614233, "grad_norm": 3.1843814849853516, "learning_rate": 5.171280742262711e-06, "loss": 0.5441, "step": 27600 }, { "epoch": 1.6211614115436557, "grad_norm": 2.6772913932800293, "learning_rate": 5.167866670402081e-06, "loss": 0.3325, "step": 27610 }, { "epoch": 1.621748576125888, "grad_norm": 5.931136608123779, "learning_rate": 5.164452520185929e-06, "loss": 0.5011, "step": 27620 }, { "epoch": 1.6223357407081205, "grad_norm": 3.873290777206421, "learning_rate": 5.161038293207887e-06, "loss": 0.5282, "step": 27630 }, { "epoch": 1.6229229052903529, "grad_norm": 4.31177282333374, "learning_rate": 5.157623991061622e-06, "loss": 0.3322, "step": 27640 }, { "epoch": 1.6235100698725853, "grad_norm": 5.456490516662598, "learning_rate": 5.154209615340836e-06, "loss": 0.4724, "step": 27650 }, { "epoch": 1.6240972344548177, "grad_norm": 7.511719703674316, "learning_rate": 5.150795167639267e-06, "loss": 0.5161, "step": 27660 }, { "epoch": 1.62468439903705, "grad_norm": 6.855751991271973, "learning_rate": 5.1473806495506795e-06, "loss": 0.3901, "step": 27670 }, { "epoch": 1.6252715636192825, "grad_norm": 4.235129356384277, "learning_rate": 5.143966062668882e-06, "loss": 0.4799, "step": 27680 }, { "epoch": 1.625858728201515, "grad_norm": 1.9669616222381592, "learning_rate": 5.140551408587705e-06, "loss": 0.47, "step": 27690 }, { "epoch": 1.6264458927837473, "grad_norm": 6.048293113708496, "learning_rate": 5.1371366889010165e-06, "loss": 0.4423, "step": 27700 }, { "epoch": 1.6270330573659797, "grad_norm": 2.79091215133667, "learning_rate": 5.133721905202714e-06, "loss": 0.3845, "step": 27710 }, { "epoch": 1.627620221948212, "grad_norm": 13.86210823059082, "learning_rate": 5.130307059086722e-06, "loss": 0.608, "step": 27720 }, { "epoch": 1.6282073865304445, "grad_norm": 2.4655089378356934, "learning_rate": 5.1268921521469995e-06, "loss": 0.4634, "step": 27730 }, { "epoch": 1.628794551112677, "grad_norm": 1.969228982925415, "learning_rate": 5.123477185977527e-06, "loss": 0.3978, "step": 27740 }, { "epoch": 1.6293817156949093, "grad_norm": 1.892039179801941, "learning_rate": 5.120062162172318e-06, "loss": 0.5123, "step": 27750 }, { "epoch": 1.6299688802771417, "grad_norm": 2.6593103408813477, "learning_rate": 5.116647082325411e-06, "loss": 0.4375, "step": 27760 }, { "epoch": 1.630556044859374, "grad_norm": 6.402005672454834, "learning_rate": 5.1132319480308725e-06, "loss": 0.431, "step": 27770 }, { "epoch": 1.6311432094416065, "grad_norm": 2.9767091274261475, "learning_rate": 5.109816760882788e-06, "loss": 0.6101, "step": 27780 }, { "epoch": 1.631730374023839, "grad_norm": 4.864443778991699, "learning_rate": 5.106401522475278e-06, "loss": 0.4155, "step": 27790 }, { "epoch": 1.6323175386060713, "grad_norm": 6.867979526519775, "learning_rate": 5.102986234402479e-06, "loss": 0.4483, "step": 27800 }, { "epoch": 1.6329047031883037, "grad_norm": 3.4514834880828857, "learning_rate": 5.099570898258551e-06, "loss": 0.337, "step": 27810 }, { "epoch": 1.6334918677705361, "grad_norm": 3.472400665283203, "learning_rate": 5.096155515637684e-06, "loss": 0.3333, "step": 27820 }, { "epoch": 1.6340790323527685, "grad_norm": 2.6269237995147705, "learning_rate": 5.092740088134077e-06, "loss": 0.3601, "step": 27830 }, { "epoch": 1.634666196935001, "grad_norm": 5.711497783660889, "learning_rate": 5.089324617341963e-06, "loss": 0.5088, "step": 27840 }, { "epoch": 1.6352533615172333, "grad_norm": 2.9754960536956787, "learning_rate": 5.085909104855588e-06, "loss": 0.4045, "step": 27850 }, { "epoch": 1.6358405260994657, "grad_norm": 3.0964107513427734, "learning_rate": 5.082493552269215e-06, "loss": 0.4175, "step": 27860 }, { "epoch": 1.6364276906816981, "grad_norm": 11.684020042419434, "learning_rate": 5.079077961177134e-06, "loss": 0.3797, "step": 27870 }, { "epoch": 1.6370148552639305, "grad_norm": 1.9693069458007812, "learning_rate": 5.075662333173648e-06, "loss": 0.5782, "step": 27880 }, { "epoch": 1.6376020198461627, "grad_norm": 8.34860610961914, "learning_rate": 5.072246669853077e-06, "loss": 0.546, "step": 27890 }, { "epoch": 1.6381891844283953, "grad_norm": 9.680225372314453, "learning_rate": 5.0688309728097565e-06, "loss": 0.443, "step": 27900 }, { "epoch": 1.6387763490106275, "grad_norm": 2.1745104789733887, "learning_rate": 5.065415243638041e-06, "loss": 0.4396, "step": 27910 }, { "epoch": 1.6393635135928601, "grad_norm": 7.963735103607178, "learning_rate": 5.061999483932299e-06, "loss": 0.5279, "step": 27920 }, { "epoch": 1.6399506781750923, "grad_norm": 4.108334541320801, "learning_rate": 5.058583695286911e-06, "loss": 0.4148, "step": 27930 }, { "epoch": 1.640537842757325, "grad_norm": 4.758573532104492, "learning_rate": 5.055167879296275e-06, "loss": 0.4281, "step": 27940 }, { "epoch": 1.6411250073395571, "grad_norm": 6.226439476013184, "learning_rate": 5.0517520375547965e-06, "loss": 0.4007, "step": 27950 }, { "epoch": 1.6417121719217898, "grad_norm": 4.26749849319458, "learning_rate": 5.048336171656899e-06, "loss": 0.314, "step": 27960 }, { "epoch": 1.642299336504022, "grad_norm": 7.822189807891846, "learning_rate": 5.044920283197012e-06, "loss": 0.4585, "step": 27970 }, { "epoch": 1.6428865010862546, "grad_norm": 3.809750556945801, "learning_rate": 5.04150437376958e-06, "loss": 0.4975, "step": 27980 }, { "epoch": 1.6434736656684867, "grad_norm": 3.600837469100952, "learning_rate": 5.038088444969052e-06, "loss": 0.3531, "step": 27990 }, { "epoch": 1.6440608302507194, "grad_norm": 3.458528995513916, "learning_rate": 5.034672498389893e-06, "loss": 0.4738, "step": 28000 }, { "epoch": 1.6446479948329515, "grad_norm": 2.4042530059814453, "learning_rate": 5.03125653562657e-06, "loss": 0.4267, "step": 28010 }, { "epoch": 1.6452351594151842, "grad_norm": 2.0565714836120605, "learning_rate": 5.027840558273559e-06, "loss": 0.5036, "step": 28020 }, { "epoch": 1.6458223239974163, "grad_norm": 2.225984811782837, "learning_rate": 5.024424567925346e-06, "loss": 0.4633, "step": 28030 }, { "epoch": 1.646409488579649, "grad_norm": 7.113766193389893, "learning_rate": 5.021008566176418e-06, "loss": 0.3462, "step": 28040 }, { "epoch": 1.6469966531618812, "grad_norm": 10.189746856689453, "learning_rate": 5.017592554621276e-06, "loss": 0.4177, "step": 28050 }, { "epoch": 1.6475838177441138, "grad_norm": 2.196178913116455, "learning_rate": 5.014176534854414e-06, "loss": 0.4463, "step": 28060 }, { "epoch": 1.648170982326346, "grad_norm": 3.4630720615386963, "learning_rate": 5.010760508470336e-06, "loss": 0.3661, "step": 28070 }, { "epoch": 1.6487581469085786, "grad_norm": 1.9093466997146606, "learning_rate": 5.007344477063552e-06, "loss": 0.4626, "step": 28080 }, { "epoch": 1.6493453114908108, "grad_norm": 4.474472522735596, "learning_rate": 5.003928442228567e-06, "loss": 0.3916, "step": 28090 }, { "epoch": 1.6499324760730434, "grad_norm": 2.1376588344573975, "learning_rate": 5.000512405559896e-06, "loss": 0.413, "step": 28100 }, { "epoch": 1.6505196406552756, "grad_norm": 3.528332233428955, "learning_rate": 4.997096368652048e-06, "loss": 0.3932, "step": 28110 }, { "epoch": 1.6511068052375082, "grad_norm": 1.7175514698028564, "learning_rate": 4.9936803330995355e-06, "loss": 0.5282, "step": 28120 }, { "epoch": 1.6516939698197404, "grad_norm": 4.279748916625977, "learning_rate": 4.990264300496865e-06, "loss": 0.5682, "step": 28130 }, { "epoch": 1.652281134401973, "grad_norm": 2.94350004196167, "learning_rate": 4.986848272438554e-06, "loss": 0.3723, "step": 28140 }, { "epoch": 1.6528682989842052, "grad_norm": 7.024534225463867, "learning_rate": 4.9834322505191045e-06, "loss": 0.3621, "step": 28150 }, { "epoch": 1.6534554635664378, "grad_norm": 4.057692050933838, "learning_rate": 4.980016236333023e-06, "loss": 0.4534, "step": 28160 }, { "epoch": 1.65404262814867, "grad_norm": 5.497777938842773, "learning_rate": 4.976600231474808e-06, "loss": 0.4783, "step": 28170 }, { "epoch": 1.6546297927309026, "grad_norm": 2.0395355224609375, "learning_rate": 4.973184237538959e-06, "loss": 0.4329, "step": 28180 }, { "epoch": 1.6552169573131348, "grad_norm": 7.580916881561279, "learning_rate": 4.969768256119968e-06, "loss": 0.4029, "step": 28190 }, { "epoch": 1.6558041218953674, "grad_norm": 3.117142677307129, "learning_rate": 4.9663522888123174e-06, "loss": 0.4602, "step": 28200 }, { "epoch": 1.6563912864775996, "grad_norm": 2.1240530014038086, "learning_rate": 4.962936337210488e-06, "loss": 0.5145, "step": 28210 }, { "epoch": 1.6569784510598322, "grad_norm": 4.523952007293701, "learning_rate": 4.959520402908952e-06, "loss": 0.3882, "step": 28220 }, { "epoch": 1.6575656156420644, "grad_norm": 4.02252721786499, "learning_rate": 4.956104487502173e-06, "loss": 0.3109, "step": 28230 }, { "epoch": 1.658152780224297, "grad_norm": 6.037666320800781, "learning_rate": 4.952688592584606e-06, "loss": 0.5316, "step": 28240 }, { "epoch": 1.6587399448065292, "grad_norm": 2.3364808559417725, "learning_rate": 4.949272719750696e-06, "loss": 0.555, "step": 28250 }, { "epoch": 1.6593271093887618, "grad_norm": 2.3713231086730957, "learning_rate": 4.945856870594878e-06, "loss": 0.5481, "step": 28260 }, { "epoch": 1.659914273970994, "grad_norm": 3.041388988494873, "learning_rate": 4.942441046711576e-06, "loss": 0.5792, "step": 28270 }, { "epoch": 1.6605014385532266, "grad_norm": 3.519874334335327, "learning_rate": 4.939025249695202e-06, "loss": 0.4193, "step": 28280 }, { "epoch": 1.6610886031354588, "grad_norm": 3.2130792140960693, "learning_rate": 4.935609481140153e-06, "loss": 0.3472, "step": 28290 }, { "epoch": 1.6616757677176914, "grad_norm": 4.163264274597168, "learning_rate": 4.9321937426408214e-06, "loss": 0.4845, "step": 28300 }, { "epoch": 1.6622629322999236, "grad_norm": 4.218867778778076, "learning_rate": 4.9287780357915745e-06, "loss": 0.5833, "step": 28310 }, { "epoch": 1.662850096882156, "grad_norm": 3.794590473175049, "learning_rate": 4.925362362186773e-06, "loss": 0.4032, "step": 28320 }, { "epoch": 1.6634372614643884, "grad_norm": 6.208439826965332, "learning_rate": 4.9219467234207564e-06, "loss": 0.4984, "step": 28330 }, { "epoch": 1.6640244260466208, "grad_norm": 3.3073503971099854, "learning_rate": 4.918531121087851e-06, "loss": 0.3941, "step": 28340 }, { "epoch": 1.6646115906288532, "grad_norm": 2.39725661277771, "learning_rate": 4.915115556782367e-06, "loss": 0.4376, "step": 28350 }, { "epoch": 1.6651987552110856, "grad_norm": 10.751689910888672, "learning_rate": 4.911700032098593e-06, "loss": 0.4921, "step": 28360 }, { "epoch": 1.665785919793318, "grad_norm": 3.551356077194214, "learning_rate": 4.908284548630802e-06, "loss": 0.387, "step": 28370 }, { "epoch": 1.6663730843755504, "grad_norm": 3.387972831726074, "learning_rate": 4.9048691079732494e-06, "loss": 0.381, "step": 28380 }, { "epoch": 1.6669602489577828, "grad_norm": 7.1708550453186035, "learning_rate": 4.901453711720167e-06, "loss": 0.5348, "step": 28390 }, { "epoch": 1.6675474135400152, "grad_norm": 4.561079978942871, "learning_rate": 4.898038361465767e-06, "loss": 0.3957, "step": 28400 }, { "epoch": 1.6681345781222476, "grad_norm": 2.110133409500122, "learning_rate": 4.894623058804242e-06, "loss": 0.3329, "step": 28410 }, { "epoch": 1.66872174270448, "grad_norm": 2.8327040672302246, "learning_rate": 4.891207805329759e-06, "loss": 0.4313, "step": 28420 }, { "epoch": 1.6693089072867124, "grad_norm": 5.452343940734863, "learning_rate": 4.887792602636465e-06, "loss": 0.459, "step": 28430 }, { "epoch": 1.6698960718689448, "grad_norm": 2.6644647121429443, "learning_rate": 4.884377452318483e-06, "loss": 0.4282, "step": 28440 }, { "epoch": 1.6704832364511772, "grad_norm": 4.046319961547852, "learning_rate": 4.8809623559699055e-06, "loss": 0.4739, "step": 28450 }, { "epoch": 1.6710704010334096, "grad_norm": 2.6572346687316895, "learning_rate": 4.8775473151848125e-06, "loss": 0.4795, "step": 28460 }, { "epoch": 1.671657565615642, "grad_norm": 3.3787529468536377, "learning_rate": 4.874132331557247e-06, "loss": 0.4351, "step": 28470 }, { "epoch": 1.6722447301978745, "grad_norm": 4.042003154754639, "learning_rate": 4.870717406681228e-06, "loss": 0.5035, "step": 28480 }, { "epoch": 1.6728318947801069, "grad_norm": 2.1263248920440674, "learning_rate": 4.867302542150752e-06, "loss": 0.4903, "step": 28490 }, { "epoch": 1.6734190593623393, "grad_norm": 16.453136444091797, "learning_rate": 4.8638877395597785e-06, "loss": 0.3448, "step": 28500 }, { "epoch": 1.6740062239445717, "grad_norm": 3.5100743770599365, "learning_rate": 4.860473000502246e-06, "loss": 0.347, "step": 28510 }, { "epoch": 1.674593388526804, "grad_norm": 15.467438697814941, "learning_rate": 4.857058326572059e-06, "loss": 0.4633, "step": 28520 }, { "epoch": 1.6751805531090365, "grad_norm": 9.022804260253906, "learning_rate": 4.853643719363092e-06, "loss": 0.4234, "step": 28530 }, { "epoch": 1.6757677176912689, "grad_norm": 2.2666947841644287, "learning_rate": 4.850229180469191e-06, "loss": 0.494, "step": 28540 }, { "epoch": 1.6763548822735013, "grad_norm": 2.2741074562072754, "learning_rate": 4.8468147114841696e-06, "loss": 0.4097, "step": 28550 }, { "epoch": 1.6769420468557337, "grad_norm": 5.691890239715576, "learning_rate": 4.843400314001804e-06, "loss": 0.4678, "step": 28560 }, { "epoch": 1.677529211437966, "grad_norm": 1.8255608081817627, "learning_rate": 4.839985989615843e-06, "loss": 0.3082, "step": 28570 }, { "epoch": 1.6781163760201985, "grad_norm": 3.0283148288726807, "learning_rate": 4.836571739919999e-06, "loss": 0.4372, "step": 28580 }, { "epoch": 1.6787035406024309, "grad_norm": 6.301861763000488, "learning_rate": 4.833157566507947e-06, "loss": 0.4765, "step": 28590 }, { "epoch": 1.6792907051846633, "grad_norm": 3.1649675369262695, "learning_rate": 4.829743470973331e-06, "loss": 0.4464, "step": 28600 }, { "epoch": 1.6798778697668957, "grad_norm": 11.830769538879395, "learning_rate": 4.826329454909754e-06, "loss": 0.4651, "step": 28610 }, { "epoch": 1.680465034349128, "grad_norm": 3.091510057449341, "learning_rate": 4.822915519910788e-06, "loss": 0.4085, "step": 28620 }, { "epoch": 1.6810521989313605, "grad_norm": 7.003025531768799, "learning_rate": 4.81950166756996e-06, "loss": 0.4291, "step": 28630 }, { "epoch": 1.6816393635135929, "grad_norm": 4.229546070098877, "learning_rate": 4.816087899480765e-06, "loss": 0.3676, "step": 28640 }, { "epoch": 1.6822265280958253, "grad_norm": 2.2881667613983154, "learning_rate": 4.812674217236654e-06, "loss": 0.4477, "step": 28650 }, { "epoch": 1.6828136926780577, "grad_norm": 2.0704379081726074, "learning_rate": 4.809260622431038e-06, "loss": 0.4171, "step": 28660 }, { "epoch": 1.68340085726029, "grad_norm": 3.754830837249756, "learning_rate": 4.805847116657291e-06, "loss": 0.2896, "step": 28670 }, { "epoch": 1.6839880218425225, "grad_norm": 2.07936954498291, "learning_rate": 4.802433701508742e-06, "loss": 0.4032, "step": 28680 }, { "epoch": 1.684575186424755, "grad_norm": 9.968195915222168, "learning_rate": 4.799020378578676e-06, "loss": 0.457, "step": 28690 }, { "epoch": 1.6851623510069873, "grad_norm": 5.528543472290039, "learning_rate": 4.795607149460344e-06, "loss": 0.436, "step": 28700 }, { "epoch": 1.6857495155892197, "grad_norm": 2.0613644123077393, "learning_rate": 4.792194015746944e-06, "loss": 0.3418, "step": 28710 }, { "epoch": 1.686336680171452, "grad_norm": 4.360691070556641, "learning_rate": 4.788780979031631e-06, "loss": 0.4674, "step": 28720 }, { "epoch": 1.6869238447536845, "grad_norm": 2.753021478652954, "learning_rate": 4.785368040907518e-06, "loss": 0.3349, "step": 28730 }, { "epoch": 1.687511009335917, "grad_norm": 6.23213529586792, "learning_rate": 4.78195520296767e-06, "loss": 0.5106, "step": 28740 }, { "epoch": 1.6880981739181493, "grad_norm": 4.853461742401123, "learning_rate": 4.778542466805104e-06, "loss": 0.457, "step": 28750 }, { "epoch": 1.6886853385003815, "grad_norm": 8.210811614990234, "learning_rate": 4.7751298340127926e-06, "loss": 0.3134, "step": 28760 }, { "epoch": 1.6892725030826141, "grad_norm": 3.5536646842956543, "learning_rate": 4.771717306183656e-06, "loss": 0.4522, "step": 28770 }, { "epoch": 1.6898596676648463, "grad_norm": 3.066075086593628, "learning_rate": 4.768304884910571e-06, "loss": 0.2973, "step": 28780 }, { "epoch": 1.690446832247079, "grad_norm": 8.193660736083984, "learning_rate": 4.764892571786359e-06, "loss": 0.4095, "step": 28790 }, { "epoch": 1.691033996829311, "grad_norm": 3.4400475025177, "learning_rate": 4.761480368403796e-06, "loss": 0.373, "step": 28800 }, { "epoch": 1.6916211614115437, "grad_norm": 4.475313663482666, "learning_rate": 4.758068276355602e-06, "loss": 0.3278, "step": 28810 }, { "epoch": 1.692208325993776, "grad_norm": 2.5287258625030518, "learning_rate": 4.754656297234448e-06, "loss": 0.462, "step": 28820 }, { "epoch": 1.6927954905760085, "grad_norm": 4.227770805358887, "learning_rate": 4.751244432632952e-06, "loss": 0.3797, "step": 28830 }, { "epoch": 1.6933826551582407, "grad_norm": 3.925023317337036, "learning_rate": 4.747832684143676e-06, "loss": 0.4875, "step": 28840 }, { "epoch": 1.6939698197404733, "grad_norm": 14.44031810760498, "learning_rate": 4.744421053359131e-06, "loss": 0.4505, "step": 28850 }, { "epoch": 1.6945569843227055, "grad_norm": 2.821448802947998, "learning_rate": 4.741009541871774e-06, "loss": 0.5292, "step": 28860 }, { "epoch": 1.6951441489049381, "grad_norm": 6.500932693481445, "learning_rate": 4.737598151274001e-06, "loss": 0.4483, "step": 28870 }, { "epoch": 1.6957313134871703, "grad_norm": 2.857480049133301, "learning_rate": 4.7341868831581574e-06, "loss": 0.3656, "step": 28880 }, { "epoch": 1.696318478069403, "grad_norm": 10.601956367492676, "learning_rate": 4.730775739116528e-06, "loss": 0.4922, "step": 28890 }, { "epoch": 1.6969056426516351, "grad_norm": 5.0107879638671875, "learning_rate": 4.727364720741341e-06, "loss": 0.397, "step": 28900 }, { "epoch": 1.6974928072338678, "grad_norm": 1.8779946565628052, "learning_rate": 4.723953829624763e-06, "loss": 0.3731, "step": 28910 }, { "epoch": 1.6980799718161, "grad_norm": 7.16525936126709, "learning_rate": 4.720543067358906e-06, "loss": 0.4279, "step": 28920 }, { "epoch": 1.6986671363983326, "grad_norm": 8.230134963989258, "learning_rate": 4.717132435535818e-06, "loss": 0.4338, "step": 28930 }, { "epoch": 1.6992543009805647, "grad_norm": 4.434521675109863, "learning_rate": 4.71372193574749e-06, "loss": 0.4188, "step": 28940 }, { "epoch": 1.6998414655627974, "grad_norm": 3.9650657176971436, "learning_rate": 4.710311569585847e-06, "loss": 0.4561, "step": 28950 }, { "epoch": 1.7004286301450295, "grad_norm": 2.3979997634887695, "learning_rate": 4.706901338642755e-06, "loss": 0.3659, "step": 28960 }, { "epoch": 1.7010157947272622, "grad_norm": 16.215383529663086, "learning_rate": 4.7034912445100154e-06, "loss": 0.5101, "step": 28970 }, { "epoch": 1.7016029593094943, "grad_norm": 1.4890162944793701, "learning_rate": 4.700081288779364e-06, "loss": 0.3415, "step": 28980 }, { "epoch": 1.702190123891727, "grad_norm": 6.0299787521362305, "learning_rate": 4.696671473042476e-06, "loss": 0.3751, "step": 28990 }, { "epoch": 1.7027772884739591, "grad_norm": 1.7463719844818115, "learning_rate": 4.693261798890958e-06, "loss": 0.4408, "step": 29000 }, { "epoch": 1.7033644530561918, "grad_norm": 4.462378025054932, "learning_rate": 4.68985226791635e-06, "loss": 0.4235, "step": 29010 }, { "epoch": 1.703951617638424, "grad_norm": 4.727412223815918, "learning_rate": 4.68644288171013e-06, "loss": 0.3971, "step": 29020 }, { "epoch": 1.7045387822206566, "grad_norm": 6.784727573394775, "learning_rate": 4.6830336418637045e-06, "loss": 0.451, "step": 29030 }, { "epoch": 1.7051259468028888, "grad_norm": 3.787842035293579, "learning_rate": 4.679624549968412e-06, "loss": 0.4016, "step": 29040 }, { "epoch": 1.7057131113851214, "grad_norm": 1.9500980377197266, "learning_rate": 4.676215607615522e-06, "loss": 0.4391, "step": 29050 }, { "epoch": 1.7063002759673536, "grad_norm": 2.8674733638763428, "learning_rate": 4.672806816396234e-06, "loss": 0.3585, "step": 29060 }, { "epoch": 1.7068874405495862, "grad_norm": 3.1870474815368652, "learning_rate": 4.669398177901679e-06, "loss": 0.4887, "step": 29070 }, { "epoch": 1.7074746051318184, "grad_norm": 4.739397048950195, "learning_rate": 4.6659896937229135e-06, "loss": 0.474, "step": 29080 }, { "epoch": 1.708061769714051, "grad_norm": 2.651350498199463, "learning_rate": 4.662581365450923e-06, "loss": 0.5728, "step": 29090 }, { "epoch": 1.7086489342962832, "grad_norm": 5.6119208335876465, "learning_rate": 4.659173194676625e-06, "loss": 0.3828, "step": 29100 }, { "epoch": 1.7092360988785158, "grad_norm": 2.8338398933410645, "learning_rate": 4.655765182990856e-06, "loss": 0.3257, "step": 29110 }, { "epoch": 1.709823263460748, "grad_norm": 9.440893173217773, "learning_rate": 4.652357331984382e-06, "loss": 0.3968, "step": 29120 }, { "epoch": 1.7104104280429806, "grad_norm": 5.442182540893555, "learning_rate": 4.648949643247896e-06, "loss": 0.4645, "step": 29130 }, { "epoch": 1.7109975926252128, "grad_norm": 4.585474967956543, "learning_rate": 4.64554211837201e-06, "loss": 0.3965, "step": 29140 }, { "epoch": 1.7115847572074454, "grad_norm": 9.1046781539917, "learning_rate": 4.6421347589472624e-06, "loss": 0.4946, "step": 29150 }, { "epoch": 1.7121719217896776, "grad_norm": 6.457292556762695, "learning_rate": 4.638727566564118e-06, "loss": 0.3936, "step": 29160 }, { "epoch": 1.7127590863719102, "grad_norm": 6.585474491119385, "learning_rate": 4.635320542812953e-06, "loss": 0.4255, "step": 29170 }, { "epoch": 1.7133462509541424, "grad_norm": 4.624105930328369, "learning_rate": 4.631913689284079e-06, "loss": 0.4314, "step": 29180 }, { "epoch": 1.7139334155363748, "grad_norm": 3.0577826499938965, "learning_rate": 4.628507007567718e-06, "loss": 0.3292, "step": 29190 }, { "epoch": 1.7145205801186072, "grad_norm": 2.5361268520355225, "learning_rate": 4.625100499254017e-06, "loss": 0.2796, "step": 29200 }, { "epoch": 1.7151077447008396, "grad_norm": 4.46102237701416, "learning_rate": 4.621694165933036e-06, "loss": 0.3564, "step": 29210 }, { "epoch": 1.715694909283072, "grad_norm": 2.9630675315856934, "learning_rate": 4.6182880091947595e-06, "loss": 0.3842, "step": 29220 }, { "epoch": 1.7162820738653044, "grad_norm": 3.199612617492676, "learning_rate": 4.614882030629088e-06, "loss": 0.4641, "step": 29230 }, { "epoch": 1.7168692384475368, "grad_norm": 5.016296863555908, "learning_rate": 4.611476231825836e-06, "loss": 0.4137, "step": 29240 }, { "epoch": 1.7174564030297692, "grad_norm": 3.0914220809936523, "learning_rate": 4.608070614374739e-06, "loss": 0.5032, "step": 29250 }, { "epoch": 1.7180435676120016, "grad_norm": 5.761440753936768, "learning_rate": 4.60466517986544e-06, "loss": 0.3974, "step": 29260 }, { "epoch": 1.718630732194234, "grad_norm": 2.1898138523101807, "learning_rate": 4.6012599298875085e-06, "loss": 0.3201, "step": 29270 }, { "epoch": 1.7192178967764664, "grad_norm": 2.706757068634033, "learning_rate": 4.597854866030417e-06, "loss": 0.3091, "step": 29280 }, { "epoch": 1.7198050613586988, "grad_norm": 3.125842571258545, "learning_rate": 4.5944499898835566e-06, "loss": 0.4606, "step": 29290 }, { "epoch": 1.7203922259409312, "grad_norm": 3.253680467605591, "learning_rate": 4.591045303036228e-06, "loss": 0.5022, "step": 29300 }, { "epoch": 1.7209793905231636, "grad_norm": 2.6894419193267822, "learning_rate": 4.587640807077644e-06, "loss": 0.4923, "step": 29310 }, { "epoch": 1.721566555105396, "grad_norm": 5.794042110443115, "learning_rate": 4.584236503596934e-06, "loss": 0.4658, "step": 29320 }, { "epoch": 1.7221537196876284, "grad_norm": 4.685024261474609, "learning_rate": 4.5808323941831275e-06, "loss": 0.4648, "step": 29330 }, { "epoch": 1.7227408842698608, "grad_norm": 4.044414520263672, "learning_rate": 4.577428480425169e-06, "loss": 0.456, "step": 29340 }, { "epoch": 1.7233280488520932, "grad_norm": 1.576942801475525, "learning_rate": 4.574024763911915e-06, "loss": 0.4554, "step": 29350 }, { "epoch": 1.7239152134343256, "grad_norm": 10.51234245300293, "learning_rate": 4.570621246232124e-06, "loss": 0.566, "step": 29360 }, { "epoch": 1.724502378016558, "grad_norm": 2.253478765487671, "learning_rate": 4.567217928974465e-06, "loss": 0.4238, "step": 29370 }, { "epoch": 1.7250895425987904, "grad_norm": 1.8634297847747803, "learning_rate": 4.5638148137275095e-06, "loss": 0.4386, "step": 29380 }, { "epoch": 1.7256767071810228, "grad_norm": 1.3794678449630737, "learning_rate": 4.560411902079742e-06, "loss": 0.4285, "step": 29390 }, { "epoch": 1.7262638717632552, "grad_norm": 4.540211200714111, "learning_rate": 4.557009195619544e-06, "loss": 0.3651, "step": 29400 }, { "epoch": 1.7268510363454876, "grad_norm": 2.7970399856567383, "learning_rate": 4.553606695935207e-06, "loss": 0.4426, "step": 29410 }, { "epoch": 1.72743820092772, "grad_norm": 6.309725761413574, "learning_rate": 4.55020440461492e-06, "loss": 0.4403, "step": 29420 }, { "epoch": 1.7280253655099524, "grad_norm": 7.05142879486084, "learning_rate": 4.546802323246784e-06, "loss": 0.3616, "step": 29430 }, { "epoch": 1.7286125300921849, "grad_norm": 7.938793659210205, "learning_rate": 4.543400453418794e-06, "loss": 0.5098, "step": 29440 }, { "epoch": 1.7291996946744173, "grad_norm": 3.732226848602295, "learning_rate": 4.539998796718849e-06, "loss": 0.4033, "step": 29450 }, { "epoch": 1.7297868592566497, "grad_norm": 3.6301896572113037, "learning_rate": 4.536597354734746e-06, "loss": 0.3481, "step": 29460 }, { "epoch": 1.730374023838882, "grad_norm": 2.3909292221069336, "learning_rate": 4.533196129054188e-06, "loss": 0.4163, "step": 29470 }, { "epoch": 1.7309611884211145, "grad_norm": 1.887229084968567, "learning_rate": 4.5297951212647704e-06, "loss": 0.5471, "step": 29480 }, { "epoch": 1.7315483530033469, "grad_norm": 1.5500954389572144, "learning_rate": 4.526394332953991e-06, "loss": 0.3797, "step": 29490 }, { "epoch": 1.7321355175855793, "grad_norm": 4.1936845779418945, "learning_rate": 4.522993765709241e-06, "loss": 0.4914, "step": 29500 }, { "epoch": 1.7327226821678117, "grad_norm": 2.4195995330810547, "learning_rate": 4.519593421117815e-06, "loss": 0.4351, "step": 29510 }, { "epoch": 1.733309846750044, "grad_norm": 7.172719955444336, "learning_rate": 4.516193300766899e-06, "loss": 0.4471, "step": 29520 }, { "epoch": 1.7338970113322765, "grad_norm": 3.291029930114746, "learning_rate": 4.512793406243573e-06, "loss": 0.3634, "step": 29530 }, { "epoch": 1.7344841759145089, "grad_norm": 2.5938611030578613, "learning_rate": 4.509393739134815e-06, "loss": 0.3901, "step": 29540 }, { "epoch": 1.7350713404967413, "grad_norm": 3.674124002456665, "learning_rate": 4.505994301027497e-06, "loss": 0.3121, "step": 29550 }, { "epoch": 1.7356585050789737, "grad_norm": 3.044660806655884, "learning_rate": 4.502595093508381e-06, "loss": 0.5114, "step": 29560 }, { "epoch": 1.736245669661206, "grad_norm": 3.6121671199798584, "learning_rate": 4.499196118164124e-06, "loss": 0.3403, "step": 29570 }, { "epoch": 1.7368328342434385, "grad_norm": 5.488725185394287, "learning_rate": 4.4957973765812705e-06, "loss": 0.4632, "step": 29580 }, { "epoch": 1.7374199988256709, "grad_norm": 3.4589757919311523, "learning_rate": 4.492398870346265e-06, "loss": 0.601, "step": 29590 }, { "epoch": 1.7380071634079033, "grad_norm": 5.317209243774414, "learning_rate": 4.489000601045432e-06, "loss": 0.3612, "step": 29600 }, { "epoch": 1.7385943279901355, "grad_norm": 3.419118642807007, "learning_rate": 4.4856025702649905e-06, "loss": 0.5216, "step": 29610 }, { "epoch": 1.739181492572368, "grad_norm": 10.98812198638916, "learning_rate": 4.482204779591049e-06, "loss": 0.5084, "step": 29620 }, { "epoch": 1.7397686571546003, "grad_norm": 4.388797760009766, "learning_rate": 4.4788072306096e-06, "loss": 0.4671, "step": 29630 }, { "epoch": 1.740355821736833, "grad_norm": 14.608619689941406, "learning_rate": 4.475409924906527e-06, "loss": 0.4032, "step": 29640 }, { "epoch": 1.740942986319065, "grad_norm": 2.0312883853912354, "learning_rate": 4.472012864067597e-06, "loss": 0.3364, "step": 29650 }, { "epoch": 1.7415301509012977, "grad_norm": 3.3819291591644287, "learning_rate": 4.468616049678463e-06, "loss": 0.3651, "step": 29660 }, { "epoch": 1.7421173154835299, "grad_norm": 3.119858980178833, "learning_rate": 4.465219483324667e-06, "loss": 0.407, "step": 29670 }, { "epoch": 1.7427044800657625, "grad_norm": 7.511632919311523, "learning_rate": 4.461823166591633e-06, "loss": 0.4267, "step": 29680 }, { "epoch": 1.7432916446479947, "grad_norm": 2.7822799682617188, "learning_rate": 4.458427101064664e-06, "loss": 0.353, "step": 29690 }, { "epoch": 1.7438788092302273, "grad_norm": 2.3539185523986816, "learning_rate": 4.455031288328952e-06, "loss": 0.3752, "step": 29700 }, { "epoch": 1.7444659738124595, "grad_norm": 4.612985610961914, "learning_rate": 4.4516357299695675e-06, "loss": 0.5507, "step": 29710 }, { "epoch": 1.7450531383946921, "grad_norm": 1.2113133668899536, "learning_rate": 4.448240427571464e-06, "loss": 0.4202, "step": 29720 }, { "epoch": 1.7456403029769243, "grad_norm": 1.7043274641036987, "learning_rate": 4.444845382719473e-06, "loss": 0.3754, "step": 29730 }, { "epoch": 1.746227467559157, "grad_norm": 2.7217307090759277, "learning_rate": 4.4414505969983085e-06, "loss": 0.6145, "step": 29740 }, { "epoch": 1.746814632141389, "grad_norm": 3.6493546962738037, "learning_rate": 4.438056071992565e-06, "loss": 0.4406, "step": 29750 }, { "epoch": 1.7474017967236217, "grad_norm": 2.093169927597046, "learning_rate": 4.43466180928671e-06, "loss": 0.5304, "step": 29760 }, { "epoch": 1.747988961305854, "grad_norm": 6.434410572052002, "learning_rate": 4.431267810465093e-06, "loss": 0.6605, "step": 29770 }, { "epoch": 1.7485761258880865, "grad_norm": 3.2452597618103027, "learning_rate": 4.427874077111939e-06, "loss": 0.312, "step": 29780 }, { "epoch": 1.7491632904703187, "grad_norm": 3.0425922870635986, "learning_rate": 4.424480610811348e-06, "loss": 0.4069, "step": 29790 }, { "epoch": 1.7497504550525513, "grad_norm": 3.8536808490753174, "learning_rate": 4.4210874131472955e-06, "loss": 0.3982, "step": 29800 }, { "epoch": 1.7503376196347835, "grad_norm": 4.338176250457764, "learning_rate": 4.417694485703634e-06, "loss": 0.4267, "step": 29810 }, { "epoch": 1.7509247842170161, "grad_norm": 6.66811466217041, "learning_rate": 4.414301830064086e-06, "loss": 0.5228, "step": 29820 }, { "epoch": 1.7515119487992483, "grad_norm": 3.697617292404175, "learning_rate": 4.410909447812252e-06, "loss": 0.5162, "step": 29830 }, { "epoch": 1.752099113381481, "grad_norm": 6.7773590087890625, "learning_rate": 4.407517340531602e-06, "loss": 0.4475, "step": 29840 }, { "epoch": 1.7526862779637131, "grad_norm": 2.630486488342285, "learning_rate": 4.404125509805476e-06, "loss": 0.3849, "step": 29850 }, { "epoch": 1.7532734425459457, "grad_norm": 2.6718413829803467, "learning_rate": 4.400733957217087e-06, "loss": 0.431, "step": 29860 }, { "epoch": 1.753860607128178, "grad_norm": 5.541154384613037, "learning_rate": 4.397342684349521e-06, "loss": 0.3372, "step": 29870 }, { "epoch": 1.7544477717104106, "grad_norm": 3.4535439014434814, "learning_rate": 4.393951692785727e-06, "loss": 0.5389, "step": 29880 }, { "epoch": 1.7550349362926427, "grad_norm": 1.2250622510910034, "learning_rate": 4.390560984108528e-06, "loss": 0.3637, "step": 29890 }, { "epoch": 1.7556221008748754, "grad_norm": 3.3170135021209717, "learning_rate": 4.38717055990061e-06, "loss": 0.4738, "step": 29900 }, { "epoch": 1.7562092654571075, "grad_norm": 3.1428778171539307, "learning_rate": 4.383780421744534e-06, "loss": 0.4587, "step": 29910 }, { "epoch": 1.7567964300393402, "grad_norm": 4.532366752624512, "learning_rate": 4.38039057122272e-06, "loss": 0.488, "step": 29920 }, { "epoch": 1.7573835946215723, "grad_norm": 1.491245985031128, "learning_rate": 4.3770010099174565e-06, "loss": 0.328, "step": 29930 }, { "epoch": 1.757970759203805, "grad_norm": 3.051605224609375, "learning_rate": 4.3736117394108985e-06, "loss": 0.3226, "step": 29940 }, { "epoch": 1.7585579237860371, "grad_norm": 3.039125919342041, "learning_rate": 4.370222761285062e-06, "loss": 0.3378, "step": 29950 }, { "epoch": 1.7591450883682698, "grad_norm": 1.491212010383606, "learning_rate": 4.36683407712183e-06, "loss": 0.4546, "step": 29960 }, { "epoch": 1.759732252950502, "grad_norm": 5.964461326599121, "learning_rate": 4.363445688502945e-06, "loss": 0.5549, "step": 29970 }, { "epoch": 1.7603194175327346, "grad_norm": 2.5001049041748047, "learning_rate": 4.360057597010012e-06, "loss": 0.5274, "step": 29980 }, { "epoch": 1.7609065821149668, "grad_norm": 11.947254180908203, "learning_rate": 4.356669804224501e-06, "loss": 0.4194, "step": 29990 }, { "epoch": 1.7614937466971994, "grad_norm": 2.1895601749420166, "learning_rate": 4.35328231172774e-06, "loss": 0.4673, "step": 30000 }, { "epoch": 1.7620809112794316, "grad_norm": 5.569532871246338, "learning_rate": 4.349895121100916e-06, "loss": 0.4294, "step": 30010 }, { "epoch": 1.7626680758616642, "grad_norm": 3.575198173522949, "learning_rate": 4.346508233925076e-06, "loss": 0.4534, "step": 30020 }, { "epoch": 1.7632552404438964, "grad_norm": 1.7347885370254517, "learning_rate": 4.343121651781127e-06, "loss": 0.4657, "step": 30030 }, { "epoch": 1.763842405026129, "grad_norm": 4.070258617401123, "learning_rate": 4.339735376249827e-06, "loss": 0.4752, "step": 30040 }, { "epoch": 1.7644295696083612, "grad_norm": 6.366807460784912, "learning_rate": 4.3363494089118004e-06, "loss": 0.3947, "step": 30050 }, { "epoch": 1.7650167341905936, "grad_norm": 4.253348350524902, "learning_rate": 4.33296375134752e-06, "loss": 0.3626, "step": 30060 }, { "epoch": 1.765603898772826, "grad_norm": 5.02949333190918, "learning_rate": 4.329578405137322e-06, "loss": 0.4134, "step": 30070 }, { "epoch": 1.7661910633550584, "grad_norm": 4.572470188140869, "learning_rate": 4.3261933718613876e-06, "loss": 0.3928, "step": 30080 }, { "epoch": 1.7667782279372908, "grad_norm": 5.096623420715332, "learning_rate": 4.322808653099758e-06, "loss": 0.3503, "step": 30090 }, { "epoch": 1.7673653925195232, "grad_norm": 4.592552661895752, "learning_rate": 4.319424250432328e-06, "loss": 0.4137, "step": 30100 }, { "epoch": 1.7679525571017556, "grad_norm": 2.594555616378784, "learning_rate": 4.316040165438842e-06, "loss": 0.461, "step": 30110 }, { "epoch": 1.768539721683988, "grad_norm": 2.8691744804382324, "learning_rate": 4.3126563996988955e-06, "loss": 0.444, "step": 30120 }, { "epoch": 1.7691268862662204, "grad_norm": 3.9816391468048096, "learning_rate": 4.3092729547919406e-06, "loss": 0.4241, "step": 30130 }, { "epoch": 1.7697140508484528, "grad_norm": 6.885705471038818, "learning_rate": 4.305889832297272e-06, "loss": 0.5019, "step": 30140 }, { "epoch": 1.7703012154306852, "grad_norm": 2.249239683151245, "learning_rate": 4.30250703379404e-06, "loss": 0.4943, "step": 30150 }, { "epoch": 1.7708883800129176, "grad_norm": 2.4152636528015137, "learning_rate": 4.299124560861241e-06, "loss": 0.4949, "step": 30160 }, { "epoch": 1.77147554459515, "grad_norm": 6.932055473327637, "learning_rate": 4.295742415077722e-06, "loss": 0.3779, "step": 30170 }, { "epoch": 1.7720627091773824, "grad_norm": 2.708404064178467, "learning_rate": 4.2923605980221704e-06, "loss": 0.4457, "step": 30180 }, { "epoch": 1.7726498737596148, "grad_norm": 3.270735740661621, "learning_rate": 4.288979111273129e-06, "loss": 0.5118, "step": 30190 }, { "epoch": 1.7732370383418472, "grad_norm": 5.718830585479736, "learning_rate": 4.2855979564089804e-06, "loss": 0.3801, "step": 30200 }, { "epoch": 1.7738242029240796, "grad_norm": 2.212660789489746, "learning_rate": 4.282217135007955e-06, "loss": 0.4119, "step": 30210 }, { "epoch": 1.774411367506312, "grad_norm": 2.0404860973358154, "learning_rate": 4.278836648648124e-06, "loss": 0.4975, "step": 30220 }, { "epoch": 1.7749985320885444, "grad_norm": 7.874519348144531, "learning_rate": 4.27545649890741e-06, "loss": 0.408, "step": 30230 }, { "epoch": 1.7755856966707768, "grad_norm": 2.530418872833252, "learning_rate": 4.272076687363569e-06, "loss": 0.4013, "step": 30240 }, { "epoch": 1.7761728612530092, "grad_norm": 2.1382334232330322, "learning_rate": 4.268697215594206e-06, "loss": 0.3622, "step": 30250 }, { "epoch": 1.7767600258352416, "grad_norm": 3.60783052444458, "learning_rate": 4.265318085176766e-06, "loss": 0.4403, "step": 30260 }, { "epoch": 1.777347190417474, "grad_norm": 5.363211631774902, "learning_rate": 4.261939297688531e-06, "loss": 0.4216, "step": 30270 }, { "epoch": 1.7779343549997064, "grad_norm": 1.8156025409698486, "learning_rate": 4.258560854706625e-06, "loss": 0.44, "step": 30280 }, { "epoch": 1.7785215195819388, "grad_norm": 1.5479927062988281, "learning_rate": 4.2551827578080165e-06, "loss": 0.374, "step": 30290 }, { "epoch": 1.7791086841641712, "grad_norm": 1.7697392702102661, "learning_rate": 4.251805008569501e-06, "loss": 0.2967, "step": 30300 }, { "epoch": 1.7796958487464036, "grad_norm": 7.8800225257873535, "learning_rate": 4.248427608567723e-06, "loss": 0.3698, "step": 30310 }, { "epoch": 1.780283013328636, "grad_norm": 3.2779128551483154, "learning_rate": 4.24505055937916e-06, "loss": 0.4436, "step": 30320 }, { "epoch": 1.7808701779108684, "grad_norm": 3.8572371006011963, "learning_rate": 4.241673862580124e-06, "loss": 0.3866, "step": 30330 }, { "epoch": 1.7814573424931008, "grad_norm": 2.4028494358062744, "learning_rate": 4.238297519746763e-06, "loss": 0.2915, "step": 30340 }, { "epoch": 1.7820445070753332, "grad_norm": 3.7680466175079346, "learning_rate": 4.2349215324550605e-06, "loss": 0.3534, "step": 30350 }, { "epoch": 1.7826316716575656, "grad_norm": 5.039106369018555, "learning_rate": 4.2315459022808345e-06, "loss": 0.4972, "step": 30360 }, { "epoch": 1.783218836239798, "grad_norm": 3.3594017028808594, "learning_rate": 4.228170630799736e-06, "loss": 0.4533, "step": 30370 }, { "epoch": 1.7838060008220304, "grad_norm": 4.022374153137207, "learning_rate": 4.224795719587247e-06, "loss": 0.4982, "step": 30380 }, { "epoch": 1.7843931654042628, "grad_norm": 2.046414852142334, "learning_rate": 4.221421170218685e-06, "loss": 0.5079, "step": 30390 }, { "epoch": 1.7849803299864953, "grad_norm": 10.086609840393066, "learning_rate": 4.218046984269196e-06, "loss": 0.3644, "step": 30400 }, { "epoch": 1.7855674945687277, "grad_norm": 2.4034268856048584, "learning_rate": 4.214673163313755e-06, "loss": 0.3947, "step": 30410 }, { "epoch": 1.78615465915096, "grad_norm": 5.42933988571167, "learning_rate": 4.211299708927171e-06, "loss": 0.4469, "step": 30420 }, { "epoch": 1.7867418237331925, "grad_norm": 1.6475788354873657, "learning_rate": 4.207926622684075e-06, "loss": 0.4204, "step": 30430 }, { "epoch": 1.7873289883154249, "grad_norm": 3.919377565383911, "learning_rate": 4.2045539061589345e-06, "loss": 0.5615, "step": 30440 }, { "epoch": 1.7879161528976573, "grad_norm": 3.3358259201049805, "learning_rate": 4.20118156092604e-06, "loss": 0.4177, "step": 30450 }, { "epoch": 1.7885033174798897, "grad_norm": 4.959526062011719, "learning_rate": 4.1978095885595046e-06, "loss": 0.3814, "step": 30460 }, { "epoch": 1.789090482062122, "grad_norm": 5.448666095733643, "learning_rate": 4.194437990633277e-06, "loss": 0.3984, "step": 30470 }, { "epoch": 1.7896776466443542, "grad_norm": 2.9998528957366943, "learning_rate": 4.191066768721123e-06, "loss": 0.4433, "step": 30480 }, { "epoch": 1.7902648112265869, "grad_norm": 4.50815486907959, "learning_rate": 4.18769592439664e-06, "loss": 0.5131, "step": 30490 }, { "epoch": 1.790851975808819, "grad_norm": 4.74385404586792, "learning_rate": 4.184325459233239e-06, "loss": 0.4356, "step": 30500 }, { "epoch": 1.7914391403910517, "grad_norm": 4.2751288414001465, "learning_rate": 4.1809553748041625e-06, "loss": 0.4137, "step": 30510 }, { "epoch": 1.7920263049732839, "grad_norm": 12.43514347076416, "learning_rate": 4.177585672682474e-06, "loss": 0.5545, "step": 30520 }, { "epoch": 1.7926134695555165, "grad_norm": 1.6604920625686646, "learning_rate": 4.174216354441056e-06, "loss": 0.4071, "step": 30530 }, { "epoch": 1.7932006341377487, "grad_norm": 4.191110134124756, "learning_rate": 4.170847421652611e-06, "loss": 0.3041, "step": 30540 }, { "epoch": 1.7937877987199813, "grad_norm": 2.8767855167388916, "learning_rate": 4.1674788758896675e-06, "loss": 0.397, "step": 30550 }, { "epoch": 1.7943749633022135, "grad_norm": 3.393357515335083, "learning_rate": 4.164110718724569e-06, "loss": 0.37, "step": 30560 }, { "epoch": 1.794962127884446, "grad_norm": 2.442314624786377, "learning_rate": 4.160742951729475e-06, "loss": 0.5149, "step": 30570 }, { "epoch": 1.7955492924666783, "grad_norm": 5.905104637145996, "learning_rate": 4.157375576476371e-06, "loss": 0.5213, "step": 30580 }, { "epoch": 1.796136457048911, "grad_norm": 8.823729515075684, "learning_rate": 4.154008594537049e-06, "loss": 0.3716, "step": 30590 }, { "epoch": 1.796723621631143, "grad_norm": 3.3217146396636963, "learning_rate": 4.150642007483127e-06, "loss": 0.4209, "step": 30600 }, { "epoch": 1.7973107862133757, "grad_norm": 4.0031280517578125, "learning_rate": 4.147275816886035e-06, "loss": 0.3986, "step": 30610 }, { "epoch": 1.7978979507956079, "grad_norm": 2.8466522693634033, "learning_rate": 4.143910024317013e-06, "loss": 0.3551, "step": 30620 }, { "epoch": 1.7984851153778405, "grad_norm": 6.9865336418151855, "learning_rate": 4.140544631347125e-06, "loss": 0.3852, "step": 30630 }, { "epoch": 1.7990722799600727, "grad_norm": 4.614768981933594, "learning_rate": 4.137179639547242e-06, "loss": 0.4006, "step": 30640 }, { "epoch": 1.7996594445423053, "grad_norm": 14.90381908416748, "learning_rate": 4.133815050488049e-06, "loss": 0.4321, "step": 30650 }, { "epoch": 1.8002466091245375, "grad_norm": 4.6151275634765625, "learning_rate": 4.130450865740043e-06, "loss": 0.4085, "step": 30660 }, { "epoch": 1.8008337737067701, "grad_norm": 4.505660057067871, "learning_rate": 4.127087086873531e-06, "loss": 0.4796, "step": 30670 }, { "epoch": 1.8014209382890023, "grad_norm": 13.176422119140625, "learning_rate": 4.123723715458636e-06, "loss": 0.4045, "step": 30680 }, { "epoch": 1.802008102871235, "grad_norm": 4.628911018371582, "learning_rate": 4.120360753065282e-06, "loss": 0.4701, "step": 30690 }, { "epoch": 1.802595267453467, "grad_norm": 5.414675235748291, "learning_rate": 4.116998201263208e-06, "loss": 0.4237, "step": 30700 }, { "epoch": 1.8031824320356997, "grad_norm": 2.4007904529571533, "learning_rate": 4.113636061621963e-06, "loss": 0.5502, "step": 30710 }, { "epoch": 1.803769596617932, "grad_norm": 2.829594612121582, "learning_rate": 4.1102743357108995e-06, "loss": 0.4118, "step": 30720 }, { "epoch": 1.8043567612001645, "grad_norm": 3.768345355987549, "learning_rate": 4.106913025099177e-06, "loss": 0.5095, "step": 30730 }, { "epoch": 1.8049439257823967, "grad_norm": 2.207765579223633, "learning_rate": 4.103552131355764e-06, "loss": 0.4444, "step": 30740 }, { "epoch": 1.8055310903646293, "grad_norm": 2.928898811340332, "learning_rate": 4.100191656049431e-06, "loss": 0.3363, "step": 30750 }, { "epoch": 1.8061182549468615, "grad_norm": 8.391777992248535, "learning_rate": 4.096831600748756e-06, "loss": 0.4974, "step": 30760 }, { "epoch": 1.8067054195290941, "grad_norm": 7.122884750366211, "learning_rate": 4.09347196702212e-06, "loss": 0.5325, "step": 30770 }, { "epoch": 1.8072925841113263, "grad_norm": 5.771387100219727, "learning_rate": 4.090112756437704e-06, "loss": 0.487, "step": 30780 }, { "epoch": 1.807879748693559, "grad_norm": 3.4388833045959473, "learning_rate": 4.0867539705635015e-06, "loss": 0.4667, "step": 30790 }, { "epoch": 1.8084669132757911, "grad_norm": 1.964622139930725, "learning_rate": 4.083395610967295e-06, "loss": 0.3127, "step": 30800 }, { "epoch": 1.8090540778580237, "grad_norm": 10.077486038208008, "learning_rate": 4.0800376792166785e-06, "loss": 0.3999, "step": 30810 }, { "epoch": 1.809641242440256, "grad_norm": 7.176054000854492, "learning_rate": 4.076680176879037e-06, "loss": 0.4964, "step": 30820 }, { "epoch": 1.8102284070224886, "grad_norm": 4.645507335662842, "learning_rate": 4.073323105521561e-06, "loss": 0.4377, "step": 30830 }, { "epoch": 1.8108155716047207, "grad_norm": 3.0794332027435303, "learning_rate": 4.069966466711242e-06, "loss": 0.3738, "step": 30840 }, { "epoch": 1.8114027361869534, "grad_norm": 4.175446033477783, "learning_rate": 4.0666102620148615e-06, "loss": 0.4603, "step": 30850 }, { "epoch": 1.8119899007691855, "grad_norm": 1.8879927396774292, "learning_rate": 4.0632544929990044e-06, "loss": 0.483, "step": 30860 }, { "epoch": 1.8125770653514182, "grad_norm": 5.804922103881836, "learning_rate": 4.059899161230054e-06, "loss": 0.4995, "step": 30870 }, { "epoch": 1.8131642299336503, "grad_norm": 5.131938457489014, "learning_rate": 4.056544268274184e-06, "loss": 0.5823, "step": 30880 }, { "epoch": 1.813751394515883, "grad_norm": 7.186208724975586, "learning_rate": 4.053189815697364e-06, "loss": 0.482, "step": 30890 }, { "epoch": 1.8143385590981151, "grad_norm": 2.8029675483703613, "learning_rate": 4.049835805065362e-06, "loss": 0.3002, "step": 30900 }, { "epoch": 1.8149257236803475, "grad_norm": 2.6491806507110596, "learning_rate": 4.046482237943739e-06, "loss": 0.4871, "step": 30910 }, { "epoch": 1.81551288826258, "grad_norm": 13.879087448120117, "learning_rate": 4.043129115897843e-06, "loss": 0.4291, "step": 30920 }, { "epoch": 1.8161000528448124, "grad_norm": 8.887411117553711, "learning_rate": 4.039776440492823e-06, "loss": 0.3801, "step": 30930 }, { "epoch": 1.8166872174270448, "grad_norm": 2.7917773723602295, "learning_rate": 4.036424213293609e-06, "loss": 0.5148, "step": 30940 }, { "epoch": 1.8172743820092772, "grad_norm": 2.571942090988159, "learning_rate": 4.033072435864936e-06, "loss": 0.438, "step": 30950 }, { "epoch": 1.8178615465915096, "grad_norm": 3.604013442993164, "learning_rate": 4.029721109771317e-06, "loss": 0.3973, "step": 30960 }, { "epoch": 1.818448711173742, "grad_norm": 2.8342373371124268, "learning_rate": 4.026370236577058e-06, "loss": 0.4103, "step": 30970 }, { "epoch": 1.8190358757559744, "grad_norm": 14.037745475769043, "learning_rate": 4.023019817846255e-06, "loss": 0.4334, "step": 30980 }, { "epoch": 1.8196230403382068, "grad_norm": 3.7756552696228027, "learning_rate": 4.01966985514279e-06, "loss": 0.4811, "step": 30990 }, { "epoch": 1.8202102049204392, "grad_norm": 4.228011608123779, "learning_rate": 4.016320350030333e-06, "loss": 0.4742, "step": 31000 }, { "epoch": 1.8207973695026716, "grad_norm": 3.4143102169036865, "learning_rate": 4.012971304072341e-06, "loss": 0.3528, "step": 31010 }, { "epoch": 1.821384534084904, "grad_norm": 2.7300496101379395, "learning_rate": 4.009622718832055e-06, "loss": 0.3469, "step": 31020 }, { "epoch": 1.8219716986671364, "grad_norm": 7.908066749572754, "learning_rate": 4.006274595872503e-06, "loss": 0.2959, "step": 31030 }, { "epoch": 1.8225588632493688, "grad_norm": 3.3210012912750244, "learning_rate": 4.0029269367564975e-06, "loss": 0.4507, "step": 31040 }, { "epoch": 1.8231460278316012, "grad_norm": 3.417158365249634, "learning_rate": 3.999579743046631e-06, "loss": 0.5064, "step": 31050 }, { "epoch": 1.8237331924138336, "grad_norm": 3.933278799057007, "learning_rate": 3.996233016305281e-06, "loss": 0.5007, "step": 31060 }, { "epoch": 1.824320356996066, "grad_norm": 4.029742240905762, "learning_rate": 3.992886758094609e-06, "loss": 0.6526, "step": 31070 }, { "epoch": 1.8249075215782984, "grad_norm": 6.1552863121032715, "learning_rate": 3.989540969976554e-06, "loss": 0.449, "step": 31080 }, { "epoch": 1.8254946861605308, "grad_norm": 6.848259449005127, "learning_rate": 3.986195653512839e-06, "loss": 0.3832, "step": 31090 }, { "epoch": 1.8260818507427632, "grad_norm": 1.6578915119171143, "learning_rate": 3.982850810264961e-06, "loss": 0.4426, "step": 31100 }, { "epoch": 1.8266690153249956, "grad_norm": 6.149550914764404, "learning_rate": 3.979506441794207e-06, "loss": 0.4312, "step": 31110 }, { "epoch": 1.827256179907228, "grad_norm": 2.8781309127807617, "learning_rate": 3.97616254966163e-06, "loss": 0.4594, "step": 31120 }, { "epoch": 1.8278433444894604, "grad_norm": 1.8679628372192383, "learning_rate": 3.972819135428071e-06, "loss": 0.4268, "step": 31130 }, { "epoch": 1.8284305090716928, "grad_norm": 2.936486005783081, "learning_rate": 3.969476200654139e-06, "loss": 0.4837, "step": 31140 }, { "epoch": 1.8290176736539252, "grad_norm": 4.719882011413574, "learning_rate": 3.966133746900227e-06, "loss": 0.427, "step": 31150 }, { "epoch": 1.8296048382361576, "grad_norm": 5.3544769287109375, "learning_rate": 3.9627917757264975e-06, "loss": 0.4811, "step": 31160 }, { "epoch": 1.83019200281839, "grad_norm": 8.757286071777344, "learning_rate": 3.9594502886928925e-06, "loss": 0.5904, "step": 31170 }, { "epoch": 1.8307791674006224, "grad_norm": 2.277393102645874, "learning_rate": 3.956109287359122e-06, "loss": 0.568, "step": 31180 }, { "epoch": 1.8313663319828548, "grad_norm": 3.3581104278564453, "learning_rate": 3.952768773284677e-06, "loss": 0.4363, "step": 31190 }, { "epoch": 1.8319534965650872, "grad_norm": 8.017231941223145, "learning_rate": 3.949428748028817e-06, "loss": 0.4201, "step": 31200 }, { "epoch": 1.8325406611473196, "grad_norm": 3.0236451625823975, "learning_rate": 3.946089213150571e-06, "loss": 0.4421, "step": 31210 }, { "epoch": 1.833127825729552, "grad_norm": 2.273719549179077, "learning_rate": 3.942750170208744e-06, "loss": 0.3758, "step": 31220 }, { "epoch": 1.8337149903117844, "grad_norm": 8.288286209106445, "learning_rate": 3.939411620761907e-06, "loss": 0.4259, "step": 31230 }, { "epoch": 1.8343021548940168, "grad_norm": 8.6908540725708, "learning_rate": 3.936073566368402e-06, "loss": 0.5358, "step": 31240 }, { "epoch": 1.8348893194762492, "grad_norm": 1.3368496894836426, "learning_rate": 3.932736008586343e-06, "loss": 0.2828, "step": 31250 }, { "epoch": 1.8354764840584816, "grad_norm": 3.4229910373687744, "learning_rate": 3.929398948973608e-06, "loss": 0.4459, "step": 31260 }, { "epoch": 1.836063648640714, "grad_norm": 2.0354247093200684, "learning_rate": 3.926062389087845e-06, "loss": 0.4692, "step": 31270 }, { "epoch": 1.8366508132229464, "grad_norm": 5.186615943908691, "learning_rate": 3.922726330486467e-06, "loss": 0.4516, "step": 31280 }, { "epoch": 1.8372379778051788, "grad_norm": 5.359299182891846, "learning_rate": 3.919390774726654e-06, "loss": 0.4743, "step": 31290 }, { "epoch": 1.8378251423874112, "grad_norm": 3.185366630554199, "learning_rate": 3.916055723365354e-06, "loss": 0.4347, "step": 31300 }, { "epoch": 1.8384123069696436, "grad_norm": 1.5300606489181519, "learning_rate": 3.912721177959273e-06, "loss": 0.4117, "step": 31310 }, { "epoch": 1.838999471551876, "grad_norm": 4.706286907196045, "learning_rate": 3.909387140064887e-06, "loss": 0.4379, "step": 31320 }, { "epoch": 1.8395866361341084, "grad_norm": 10.331826210021973, "learning_rate": 3.906053611238431e-06, "loss": 0.3359, "step": 31330 }, { "epoch": 1.8401738007163408, "grad_norm": 6.323092937469482, "learning_rate": 3.902720593035903e-06, "loss": 0.4243, "step": 31340 }, { "epoch": 1.840760965298573, "grad_norm": 9.303454399108887, "learning_rate": 3.899388087013067e-06, "loss": 0.4597, "step": 31350 }, { "epoch": 1.8413481298808057, "grad_norm": 5.790228843688965, "learning_rate": 3.896056094725445e-06, "loss": 0.5185, "step": 31360 }, { "epoch": 1.8419352944630378, "grad_norm": 3.7442760467529297, "learning_rate": 3.892724617728314e-06, "loss": 0.4955, "step": 31370 }, { "epoch": 1.8425224590452705, "grad_norm": 2.0277318954467773, "learning_rate": 3.889393657576721e-06, "loss": 0.4188, "step": 31380 }, { "epoch": 1.8431096236275026, "grad_norm": 4.361798286437988, "learning_rate": 3.886063215825463e-06, "loss": 0.3891, "step": 31390 }, { "epoch": 1.8436967882097353, "grad_norm": 2.5135629177093506, "learning_rate": 3.882733294029098e-06, "loss": 0.3444, "step": 31400 }, { "epoch": 1.8442839527919674, "grad_norm": 3.8498899936676025, "learning_rate": 3.879403893741943e-06, "loss": 0.4071, "step": 31410 }, { "epoch": 1.8448711173742, "grad_norm": 6.139440059661865, "learning_rate": 3.876075016518067e-06, "loss": 0.4158, "step": 31420 }, { "epoch": 1.8454582819564322, "grad_norm": 3.2155191898345947, "learning_rate": 3.872746663911304e-06, "loss": 0.3488, "step": 31430 }, { "epoch": 1.8460454465386649, "grad_norm": 13.943241119384766, "learning_rate": 3.869418837475231e-06, "loss": 0.4138, "step": 31440 }, { "epoch": 1.846632611120897, "grad_norm": 1.3231182098388672, "learning_rate": 3.8660915387631866e-06, "loss": 0.3521, "step": 31450 }, { "epoch": 1.8472197757031297, "grad_norm": 7.085463047027588, "learning_rate": 3.862764769328264e-06, "loss": 0.4407, "step": 31460 }, { "epoch": 1.8478069402853619, "grad_norm": 4.820586681365967, "learning_rate": 3.859438530723306e-06, "loss": 0.5371, "step": 31470 }, { "epoch": 1.8483941048675945, "grad_norm": 4.852321147918701, "learning_rate": 3.856112824500908e-06, "loss": 0.3409, "step": 31480 }, { "epoch": 1.8489812694498267, "grad_norm": 5.510758876800537, "learning_rate": 3.852787652213419e-06, "loss": 0.606, "step": 31490 }, { "epoch": 1.8495684340320593, "grad_norm": 6.708723545074463, "learning_rate": 3.849463015412934e-06, "loss": 0.435, "step": 31500 }, { "epoch": 1.8501555986142915, "grad_norm": 1.6126835346221924, "learning_rate": 3.846138915651307e-06, "loss": 0.5278, "step": 31510 }, { "epoch": 1.850742763196524, "grad_norm": 4.392549991607666, "learning_rate": 3.8428153544801325e-06, "loss": 0.396, "step": 31520 }, { "epoch": 1.8513299277787563, "grad_norm": 10.01211166381836, "learning_rate": 3.839492333450759e-06, "loss": 0.3918, "step": 31530 }, { "epoch": 1.851917092360989, "grad_norm": 3.453766107559204, "learning_rate": 3.836169854114279e-06, "loss": 0.4313, "step": 31540 }, { "epoch": 1.852504256943221, "grad_norm": 4.1995110511779785, "learning_rate": 3.832847918021535e-06, "loss": 0.4322, "step": 31550 }, { "epoch": 1.8530914215254537, "grad_norm": 2.2378005981445312, "learning_rate": 3.829526526723114e-06, "loss": 0.377, "step": 31560 }, { "epoch": 1.8536785861076859, "grad_norm": 6.925268173217773, "learning_rate": 3.8262056817693495e-06, "loss": 0.3729, "step": 31570 }, { "epoch": 1.8542657506899185, "grad_norm": 7.165698051452637, "learning_rate": 3.82288538471032e-06, "loss": 0.4225, "step": 31580 }, { "epoch": 1.8548529152721507, "grad_norm": 1.4311619997024536, "learning_rate": 3.8195656370958515e-06, "loss": 0.2816, "step": 31590 }, { "epoch": 1.8554400798543833, "grad_norm": 8.257598876953125, "learning_rate": 3.816246440475506e-06, "loss": 0.5615, "step": 31600 }, { "epoch": 1.8560272444366155, "grad_norm": 5.396920204162598, "learning_rate": 3.8129277963985963e-06, "loss": 0.4309, "step": 31610 }, { "epoch": 1.856614409018848, "grad_norm": 2.6892669200897217, "learning_rate": 3.809609706414172e-06, "loss": 0.3909, "step": 31620 }, { "epoch": 1.8572015736010803, "grad_norm": 10.543177604675293, "learning_rate": 3.8062921720710257e-06, "loss": 0.4899, "step": 31630 }, { "epoch": 1.857788738183313, "grad_norm": 4.09832763671875, "learning_rate": 3.8029751949176906e-06, "loss": 0.3512, "step": 31640 }, { "epoch": 1.858375902765545, "grad_norm": 5.26479959487915, "learning_rate": 3.799658776502442e-06, "loss": 0.5405, "step": 31650 }, { "epoch": 1.8589630673477777, "grad_norm": 6.377203941345215, "learning_rate": 3.796342918373288e-06, "loss": 0.4352, "step": 31660 }, { "epoch": 1.85955023193001, "grad_norm": 7.507822036743164, "learning_rate": 3.793027622077982e-06, "loss": 0.3973, "step": 31670 }, { "epoch": 1.8601373965122425, "grad_norm": 4.248563766479492, "learning_rate": 3.789712889164014e-06, "loss": 0.444, "step": 31680 }, { "epoch": 1.8607245610944747, "grad_norm": 1.750359058380127, "learning_rate": 3.786398721178609e-06, "loss": 0.438, "step": 31690 }, { "epoch": 1.8613117256767073, "grad_norm": 5.846527099609375, "learning_rate": 3.783085119668728e-06, "loss": 0.4427, "step": 31700 }, { "epoch": 1.8618988902589395, "grad_norm": 5.282541751861572, "learning_rate": 3.7797720861810694e-06, "loss": 0.4703, "step": 31710 }, { "epoch": 1.8624860548411721, "grad_norm": 4.050751209259033, "learning_rate": 3.7764596222620644e-06, "loss": 0.4104, "step": 31720 }, { "epoch": 1.8630732194234043, "grad_norm": 1.9745216369628906, "learning_rate": 3.7731477294578803e-06, "loss": 0.4258, "step": 31730 }, { "epoch": 1.863660384005637, "grad_norm": 2.041825294494629, "learning_rate": 3.7698364093144183e-06, "loss": 0.448, "step": 31740 }, { "epoch": 1.8642475485878691, "grad_norm": 4.5444416999816895, "learning_rate": 3.7665256633773086e-06, "loss": 0.3492, "step": 31750 }, { "epoch": 1.8648347131701017, "grad_norm": 4.65644645690918, "learning_rate": 3.7632154931919173e-06, "loss": 0.5394, "step": 31760 }, { "epoch": 1.865421877752334, "grad_norm": 2.1824231147766113, "learning_rate": 3.7599059003033407e-06, "loss": 0.3497, "step": 31770 }, { "epoch": 1.8660090423345663, "grad_norm": 1.380462646484375, "learning_rate": 3.7565968862564063e-06, "loss": 0.4305, "step": 31780 }, { "epoch": 1.8665962069167987, "grad_norm": 2.882465362548828, "learning_rate": 3.753288452595667e-06, "loss": 0.3625, "step": 31790 }, { "epoch": 1.8671833714990311, "grad_norm": 3.289872407913208, "learning_rate": 3.7499806008654106e-06, "loss": 0.4532, "step": 31800 }, { "epoch": 1.8677705360812635, "grad_norm": 3.20068097114563, "learning_rate": 3.7466733326096514e-06, "loss": 0.4049, "step": 31810 }, { "epoch": 1.868357700663496, "grad_norm": 6.089962959289551, "learning_rate": 3.7433666493721287e-06, "loss": 0.3876, "step": 31820 }, { "epoch": 1.8689448652457283, "grad_norm": 6.036837577819824, "learning_rate": 3.7400605526963107e-06, "loss": 0.4898, "step": 31830 }, { "epoch": 1.8695320298279607, "grad_norm": 2.688314199447632, "learning_rate": 3.7367550441253943e-06, "loss": 0.4031, "step": 31840 }, { "epoch": 1.8701191944101931, "grad_norm": 10.431432723999023, "learning_rate": 3.733450125202299e-06, "loss": 0.5325, "step": 31850 }, { "epoch": 1.8707063589924255, "grad_norm": 2.7479074001312256, "learning_rate": 3.7301457974696676e-06, "loss": 0.4343, "step": 31860 }, { "epoch": 1.871293523574658, "grad_norm": 1.4633115530014038, "learning_rate": 3.726842062469872e-06, "loss": 0.4344, "step": 31870 }, { "epoch": 1.8718806881568903, "grad_norm": 4.77953577041626, "learning_rate": 3.723538921745001e-06, "loss": 0.4449, "step": 31880 }, { "epoch": 1.8724678527391228, "grad_norm": 2.2224786281585693, "learning_rate": 3.720236376836871e-06, "loss": 0.3945, "step": 31890 }, { "epoch": 1.8730550173213552, "grad_norm": 2.1563501358032227, "learning_rate": 3.7169344292870194e-06, "loss": 0.4894, "step": 31900 }, { "epoch": 1.8736421819035876, "grad_norm": 1.596325159072876, "learning_rate": 3.7136330806367003e-06, "loss": 0.3692, "step": 31910 }, { "epoch": 1.87422934648582, "grad_norm": 11.910635948181152, "learning_rate": 3.710332332426898e-06, "loss": 0.4307, "step": 31920 }, { "epoch": 1.8748165110680524, "grad_norm": 3.0014944076538086, "learning_rate": 3.707032186198306e-06, "loss": 0.5321, "step": 31930 }, { "epoch": 1.8754036756502848, "grad_norm": 2.4976084232330322, "learning_rate": 3.7037326434913445e-06, "loss": 0.307, "step": 31940 }, { "epoch": 1.8759908402325172, "grad_norm": 3.817124128341675, "learning_rate": 3.700433705846145e-06, "loss": 0.3489, "step": 31950 }, { "epoch": 1.8765780048147496, "grad_norm": 3.2628448009490967, "learning_rate": 3.697135374802563e-06, "loss": 0.4189, "step": 31960 }, { "epoch": 1.877165169396982, "grad_norm": 4.7107648849487305, "learning_rate": 3.693837651900169e-06, "loss": 0.3245, "step": 31970 }, { "epoch": 1.8777523339792144, "grad_norm": 4.244431972503662, "learning_rate": 3.6905405386782455e-06, "loss": 0.2771, "step": 31980 }, { "epoch": 1.8783394985614468, "grad_norm": 3.186835527420044, "learning_rate": 3.6872440366757956e-06, "loss": 0.5303, "step": 31990 }, { "epoch": 1.8789266631436792, "grad_norm": 2.9337074756622314, "learning_rate": 3.6839481474315365e-06, "loss": 0.4318, "step": 32000 }, { "epoch": 1.8795138277259116, "grad_norm": 3.1994106769561768, "learning_rate": 3.680652872483898e-06, "loss": 0.4146, "step": 32010 }, { "epoch": 1.880100992308144, "grad_norm": 3.72444224357605, "learning_rate": 3.6773582133710217e-06, "loss": 0.3869, "step": 32020 }, { "epoch": 1.8806881568903764, "grad_norm": 3.8486077785491943, "learning_rate": 3.6740641716307633e-06, "loss": 0.4504, "step": 32030 }, { "epoch": 1.8812753214726088, "grad_norm": 2.651543378829956, "learning_rate": 3.6707707488006937e-06, "loss": 0.3915, "step": 32040 }, { "epoch": 1.8818624860548412, "grad_norm": 2.409036874771118, "learning_rate": 3.6674779464180877e-06, "loss": 0.4318, "step": 32050 }, { "epoch": 1.8824496506370736, "grad_norm": 7.6992082595825195, "learning_rate": 3.6641857660199364e-06, "loss": 0.4852, "step": 32060 }, { "epoch": 1.883036815219306, "grad_norm": 2.325791835784912, "learning_rate": 3.6608942091429356e-06, "loss": 0.4465, "step": 32070 }, { "epoch": 1.8836239798015384, "grad_norm": 13.579486846923828, "learning_rate": 3.657603277323499e-06, "loss": 0.4582, "step": 32080 }, { "epoch": 1.8842111443837708, "grad_norm": 4.062404632568359, "learning_rate": 3.654312972097738e-06, "loss": 0.3477, "step": 32090 }, { "epoch": 1.8847983089660032, "grad_norm": 10.16796875, "learning_rate": 3.651023295001478e-06, "loss": 0.4211, "step": 32100 }, { "epoch": 1.8853854735482356, "grad_norm": 1.0000386238098145, "learning_rate": 3.647734247570248e-06, "loss": 0.4542, "step": 32110 }, { "epoch": 1.885972638130468, "grad_norm": 7.271573066711426, "learning_rate": 3.644445831339285e-06, "loss": 0.3866, "step": 32120 }, { "epoch": 1.8865598027127004, "grad_norm": 3.488192319869995, "learning_rate": 3.6411580478435317e-06, "loss": 0.4625, "step": 32130 }, { "epoch": 1.8871469672949328, "grad_norm": 4.273473262786865, "learning_rate": 3.6378708986176327e-06, "loss": 0.3736, "step": 32140 }, { "epoch": 1.8877341318771652, "grad_norm": 2.1437456607818604, "learning_rate": 3.6345843851959373e-06, "loss": 0.3914, "step": 32150 }, { "epoch": 1.8883212964593976, "grad_norm": 4.264853000640869, "learning_rate": 3.631298509112503e-06, "loss": 0.4582, "step": 32160 }, { "epoch": 1.88890846104163, "grad_norm": 1.741582989692688, "learning_rate": 3.6280132719010853e-06, "loss": 0.5533, "step": 32170 }, { "epoch": 1.8894956256238624, "grad_norm": 4.96544885635376, "learning_rate": 3.6247286750951397e-06, "loss": 0.3344, "step": 32180 }, { "epoch": 1.8900827902060948, "grad_norm": 1.6570265293121338, "learning_rate": 3.621444720227827e-06, "loss": 0.4784, "step": 32190 }, { "epoch": 1.8906699547883272, "grad_norm": 2.8754184246063232, "learning_rate": 3.618161408832006e-06, "loss": 0.4417, "step": 32200 }, { "epoch": 1.8912571193705596, "grad_norm": 5.684372901916504, "learning_rate": 3.6148787424402344e-06, "loss": 0.4861, "step": 32210 }, { "epoch": 1.8918442839527918, "grad_norm": 4.298274993896484, "learning_rate": 3.6115967225847725e-06, "loss": 0.5247, "step": 32220 }, { "epoch": 1.8924314485350244, "grad_norm": 3.835350751876831, "learning_rate": 3.608315350797572e-06, "loss": 0.3625, "step": 32230 }, { "epoch": 1.8930186131172566, "grad_norm": 7.409722328186035, "learning_rate": 3.605034628610293e-06, "loss": 0.4515, "step": 32240 }, { "epoch": 1.8936057776994892, "grad_norm": 4.493803024291992, "learning_rate": 3.6017545575542825e-06, "loss": 0.4637, "step": 32250 }, { "epoch": 1.8941929422817214, "grad_norm": 3.675229549407959, "learning_rate": 3.598475139160587e-06, "loss": 0.4183, "step": 32260 }, { "epoch": 1.894780106863954, "grad_norm": 3.637338399887085, "learning_rate": 3.5951963749599476e-06, "loss": 0.4145, "step": 32270 }, { "epoch": 1.8953672714461862, "grad_norm": 3.6507506370544434, "learning_rate": 3.5919182664828023e-06, "loss": 0.4316, "step": 32280 }, { "epoch": 1.8959544360284188, "grad_norm": 1.6462267637252808, "learning_rate": 3.588640815259281e-06, "loss": 0.3681, "step": 32290 }, { "epoch": 1.896541600610651, "grad_norm": 4.264452934265137, "learning_rate": 3.585364022819207e-06, "loss": 0.3814, "step": 32300 }, { "epoch": 1.8971287651928836, "grad_norm": 2.7131423950195312, "learning_rate": 3.5820878906920953e-06, "loss": 0.5046, "step": 32310 }, { "epoch": 1.8977159297751158, "grad_norm": 3.4761829376220703, "learning_rate": 3.5788124204071557e-06, "loss": 0.4847, "step": 32320 }, { "epoch": 1.8983030943573485, "grad_norm": 4.446946620941162, "learning_rate": 3.5755376134932877e-06, "loss": 0.3328, "step": 32330 }, { "epoch": 1.8988902589395806, "grad_norm": 1.8850858211517334, "learning_rate": 3.5722634714790787e-06, "loss": 0.4479, "step": 32340 }, { "epoch": 1.8994774235218133, "grad_norm": 4.282196998596191, "learning_rate": 3.5689899958928086e-06, "loss": 0.5517, "step": 32350 }, { "epoch": 1.9000645881040454, "grad_norm": 5.822951793670654, "learning_rate": 3.5657171882624456e-06, "loss": 0.3766, "step": 32360 }, { "epoch": 1.900651752686278, "grad_norm": 4.26113748550415, "learning_rate": 3.5624450501156447e-06, "loss": 0.2983, "step": 32370 }, { "epoch": 1.9012389172685102, "grad_norm": 3.2217044830322266, "learning_rate": 3.5591735829797514e-06, "loss": 0.454, "step": 32380 }, { "epoch": 1.9018260818507429, "grad_norm": 1.9307690858840942, "learning_rate": 3.555902788381792e-06, "loss": 0.4184, "step": 32390 }, { "epoch": 1.902413246432975, "grad_norm": 2.179513692855835, "learning_rate": 3.552632667848489e-06, "loss": 0.5585, "step": 32400 }, { "epoch": 1.9030004110152077, "grad_norm": 4.565446376800537, "learning_rate": 3.5493632229062398e-06, "loss": 0.4238, "step": 32410 }, { "epoch": 1.9035875755974399, "grad_norm": 3.599393129348755, "learning_rate": 3.5460944550811323e-06, "loss": 0.3574, "step": 32420 }, { "epoch": 1.9041747401796725, "grad_norm": 7.533908367156982, "learning_rate": 3.5428263658989383e-06, "loss": 0.366, "step": 32430 }, { "epoch": 1.9047619047619047, "grad_norm": 3.568504810333252, "learning_rate": 3.5395589568851085e-06, "loss": 0.283, "step": 32440 }, { "epoch": 1.9053490693441373, "grad_norm": 2.6744749546051025, "learning_rate": 3.5362922295647817e-06, "loss": 0.4381, "step": 32450 }, { "epoch": 1.9059362339263695, "grad_norm": 1.6316299438476562, "learning_rate": 3.5330261854627747e-06, "loss": 0.4184, "step": 32460 }, { "epoch": 1.906523398508602, "grad_norm": 3.1673495769500732, "learning_rate": 3.529760826103585e-06, "loss": 0.5464, "step": 32470 }, { "epoch": 1.9071105630908343, "grad_norm": 4.268896579742432, "learning_rate": 3.526496153011395e-06, "loss": 0.3875, "step": 32480 }, { "epoch": 1.907697727673067, "grad_norm": 4.055206775665283, "learning_rate": 3.523232167710064e-06, "loss": 0.3138, "step": 32490 }, { "epoch": 1.908284892255299, "grad_norm": 6.838683128356934, "learning_rate": 3.5199688717231273e-06, "loss": 0.4965, "step": 32500 }, { "epoch": 1.9088720568375317, "grad_norm": 3.9589946269989014, "learning_rate": 3.516706266573803e-06, "loss": 0.4852, "step": 32510 }, { "epoch": 1.9094592214197639, "grad_norm": 8.582409858703613, "learning_rate": 3.513444353784986e-06, "loss": 0.3966, "step": 32520 }, { "epoch": 1.9100463860019965, "grad_norm": 14.605650901794434, "learning_rate": 3.5101831348792446e-06, "loss": 0.5866, "step": 32530 }, { "epoch": 1.9106335505842287, "grad_norm": 4.817981719970703, "learning_rate": 3.506922611378827e-06, "loss": 0.5132, "step": 32540 }, { "epoch": 1.9112207151664613, "grad_norm": 3.366882562637329, "learning_rate": 3.503662784805654e-06, "loss": 0.5955, "step": 32550 }, { "epoch": 1.9118078797486935, "grad_norm": 2.7756967544555664, "learning_rate": 3.500403656681325e-06, "loss": 0.4518, "step": 32560 }, { "epoch": 1.912395044330926, "grad_norm": 3.507934808731079, "learning_rate": 3.497145228527109e-06, "loss": 0.5213, "step": 32570 }, { "epoch": 1.9129822089131583, "grad_norm": 2.162142753601074, "learning_rate": 3.493887501863951e-06, "loss": 0.3952, "step": 32580 }, { "epoch": 1.913569373495391, "grad_norm": 4.033923625946045, "learning_rate": 3.490630478212469e-06, "loss": 0.4535, "step": 32590 }, { "epoch": 1.914156538077623, "grad_norm": 4.773042678833008, "learning_rate": 3.487374159092948e-06, "loss": 0.4827, "step": 32600 }, { "epoch": 1.9147437026598557, "grad_norm": 2.886957883834839, "learning_rate": 3.4841185460253512e-06, "loss": 0.3436, "step": 32610 }, { "epoch": 1.915330867242088, "grad_norm": 4.822763442993164, "learning_rate": 3.4808636405293073e-06, "loss": 0.4388, "step": 32620 }, { "epoch": 1.9159180318243205, "grad_norm": 2.3548221588134766, "learning_rate": 3.4776094441241144e-06, "loss": 0.465, "step": 32630 }, { "epoch": 1.9165051964065527, "grad_norm": 4.410897731781006, "learning_rate": 3.4743559583287443e-06, "loss": 0.4879, "step": 32640 }, { "epoch": 1.917092360988785, "grad_norm": 1.64340078830719, "learning_rate": 3.471103184661835e-06, "loss": 0.3489, "step": 32650 }, { "epoch": 1.9176795255710175, "grad_norm": 3.612410306930542, "learning_rate": 3.467851124641688e-06, "loss": 0.4024, "step": 32660 }, { "epoch": 1.91826669015325, "grad_norm": 4.718856334686279, "learning_rate": 3.464599779786276e-06, "loss": 0.3825, "step": 32670 }, { "epoch": 1.9188538547354823, "grad_norm": 3.0058255195617676, "learning_rate": 3.4613491516132403e-06, "loss": 0.4915, "step": 32680 }, { "epoch": 1.9194410193177147, "grad_norm": 2.0981907844543457, "learning_rate": 3.4580992416398784e-06, "loss": 0.4825, "step": 32690 }, { "epoch": 1.9200281838999471, "grad_norm": 4.758894443511963, "learning_rate": 3.454850051383163e-06, "loss": 0.4228, "step": 32700 }, { "epoch": 1.9206153484821795, "grad_norm": 2.7546536922454834, "learning_rate": 3.4516015823597228e-06, "loss": 0.3459, "step": 32710 }, { "epoch": 1.921202513064412, "grad_norm": 0.6942480802536011, "learning_rate": 3.4483538360858575e-06, "loss": 0.4097, "step": 32720 }, { "epoch": 1.9217896776466443, "grad_norm": 3.387871503829956, "learning_rate": 3.4451068140775224e-06, "loss": 0.3515, "step": 32730 }, { "epoch": 1.9223768422288767, "grad_norm": 4.8621931076049805, "learning_rate": 3.441860517850337e-06, "loss": 0.3531, "step": 32740 }, { "epoch": 1.9229640068111091, "grad_norm": 2.7447993755340576, "learning_rate": 3.4386149489195864e-06, "loss": 0.2933, "step": 32750 }, { "epoch": 1.9235511713933415, "grad_norm": 2.019557476043701, "learning_rate": 3.435370108800208e-06, "loss": 0.3863, "step": 32760 }, { "epoch": 1.924138335975574, "grad_norm": 7.100201606750488, "learning_rate": 3.432125999006807e-06, "loss": 0.2917, "step": 32770 }, { "epoch": 1.9247255005578063, "grad_norm": 2.9182114601135254, "learning_rate": 3.4288826210536408e-06, "loss": 0.2764, "step": 32780 }, { "epoch": 1.9253126651400387, "grad_norm": 2.8280346393585205, "learning_rate": 3.42563997645463e-06, "loss": 0.4072, "step": 32790 }, { "epoch": 1.9258998297222711, "grad_norm": 3.8786964416503906, "learning_rate": 3.4223980667233526e-06, "loss": 0.4702, "step": 32800 }, { "epoch": 1.9264869943045035, "grad_norm": 2.1469831466674805, "learning_rate": 3.4191568933730414e-06, "loss": 0.4141, "step": 32810 }, { "epoch": 1.927074158886736, "grad_norm": 5.566099166870117, "learning_rate": 3.415916457916587e-06, "loss": 0.413, "step": 32820 }, { "epoch": 1.9276613234689683, "grad_norm": 13.14962387084961, "learning_rate": 3.4126767618665344e-06, "loss": 0.4695, "step": 32830 }, { "epoch": 1.9282484880512007, "grad_norm": 6.353343963623047, "learning_rate": 3.4094378067350853e-06, "loss": 0.4909, "step": 32840 }, { "epoch": 1.9288356526334332, "grad_norm": 3.938565969467163, "learning_rate": 3.406199594034093e-06, "loss": 0.4851, "step": 32850 }, { "epoch": 1.9294228172156656, "grad_norm": 16.817554473876953, "learning_rate": 3.402962125275066e-06, "loss": 0.4201, "step": 32860 }, { "epoch": 1.930009981797898, "grad_norm": 1.8741662502288818, "learning_rate": 3.399725401969165e-06, "loss": 0.4176, "step": 32870 }, { "epoch": 1.9305971463801304, "grad_norm": 4.609308242797852, "learning_rate": 3.396489425627205e-06, "loss": 0.4764, "step": 32880 }, { "epoch": 1.9311843109623628, "grad_norm": 3.7644665241241455, "learning_rate": 3.393254197759648e-06, "loss": 0.3474, "step": 32890 }, { "epoch": 1.9317714755445952, "grad_norm": 1.156975269317627, "learning_rate": 3.390019719876609e-06, "loss": 0.3823, "step": 32900 }, { "epoch": 1.9323586401268276, "grad_norm": 2.588383913040161, "learning_rate": 3.386785993487855e-06, "loss": 0.4043, "step": 32910 }, { "epoch": 1.93294580470906, "grad_norm": 0.838699460029602, "learning_rate": 3.3835530201027975e-06, "loss": 0.423, "step": 32920 }, { "epoch": 1.9335329692912924, "grad_norm": 2.3602447509765625, "learning_rate": 3.3803208012305e-06, "loss": 0.3871, "step": 32930 }, { "epoch": 1.9341201338735248, "grad_norm": 10.549612998962402, "learning_rate": 3.3770893383796744e-06, "loss": 0.4457, "step": 32940 }, { "epoch": 1.9347072984557572, "grad_norm": 2.7032244205474854, "learning_rate": 3.3738586330586746e-06, "loss": 0.4108, "step": 32950 }, { "epoch": 1.9352944630379896, "grad_norm": 6.402557849884033, "learning_rate": 3.370628686775508e-06, "loss": 0.5585, "step": 32960 }, { "epoch": 1.935881627620222, "grad_norm": 4.430296421051025, "learning_rate": 3.3673995010378233e-06, "loss": 0.397, "step": 32970 }, { "epoch": 1.9364687922024544, "grad_norm": 1.4500025510787964, "learning_rate": 3.3641710773529158e-06, "loss": 0.4378, "step": 32980 }, { "epoch": 1.9370559567846868, "grad_norm": 3.619709014892578, "learning_rate": 3.3609434172277224e-06, "loss": 0.4281, "step": 32990 }, { "epoch": 1.9376431213669192, "grad_norm": 6.399337291717529, "learning_rate": 3.3577165221688273e-06, "loss": 0.4978, "step": 33000 }, { "epoch": 1.9382302859491516, "grad_norm": 2.053422451019287, "learning_rate": 3.3544903936824545e-06, "loss": 0.457, "step": 33010 }, { "epoch": 1.938817450531384, "grad_norm": 2.625300407409668, "learning_rate": 3.3512650332744727e-06, "loss": 0.3312, "step": 33020 }, { "epoch": 1.9394046151136164, "grad_norm": 1.4449588060379028, "learning_rate": 3.3480404424503898e-06, "loss": 0.3372, "step": 33030 }, { "epoch": 1.9399917796958488, "grad_norm": 7.957235813140869, "learning_rate": 3.344816622715358e-06, "loss": 0.4389, "step": 33040 }, { "epoch": 1.9405789442780812, "grad_norm": 2.1956334114074707, "learning_rate": 3.341593575574165e-06, "loss": 0.305, "step": 33050 }, { "epoch": 1.9411661088603136, "grad_norm": 11.954832077026367, "learning_rate": 3.338371302531241e-06, "loss": 0.4501, "step": 33060 }, { "epoch": 1.9417532734425458, "grad_norm": 6.779435634613037, "learning_rate": 3.3351498050906543e-06, "loss": 0.3772, "step": 33070 }, { "epoch": 1.9423404380247784, "grad_norm": 2.427661180496216, "learning_rate": 3.3319290847561097e-06, "loss": 0.4093, "step": 33080 }, { "epoch": 1.9429276026070106, "grad_norm": 4.932921886444092, "learning_rate": 3.3287091430309517e-06, "loss": 0.4385, "step": 33090 }, { "epoch": 1.9435147671892432, "grad_norm": 5.680581569671631, "learning_rate": 3.325489981418159e-06, "loss": 0.5238, "step": 33100 }, { "epoch": 1.9441019317714754, "grad_norm": 4.807107448577881, "learning_rate": 3.3222716014203466e-06, "loss": 0.4442, "step": 33110 }, { "epoch": 1.944689096353708, "grad_norm": 5.825436115264893, "learning_rate": 3.319054004539767e-06, "loss": 0.6663, "step": 33120 }, { "epoch": 1.9452762609359402, "grad_norm": 5.371211051940918, "learning_rate": 3.315837192278305e-06, "loss": 0.4211, "step": 33130 }, { "epoch": 1.9458634255181728, "grad_norm": 11.192180633544922, "learning_rate": 3.312621166137481e-06, "loss": 0.4567, "step": 33140 }, { "epoch": 1.946450590100405, "grad_norm": 5.838712692260742, "learning_rate": 3.3094059276184434e-06, "loss": 0.6198, "step": 33150 }, { "epoch": 1.9470377546826376, "grad_norm": 2.695589065551758, "learning_rate": 3.3061914782219807e-06, "loss": 0.3665, "step": 33160 }, { "epoch": 1.9476249192648698, "grad_norm": 13.521222114562988, "learning_rate": 3.3029778194485053e-06, "loss": 0.4613, "step": 33170 }, { "epoch": 1.9482120838471024, "grad_norm": 1.9363057613372803, "learning_rate": 3.299764952798067e-06, "loss": 0.427, "step": 33180 }, { "epoch": 1.9487992484293346, "grad_norm": 10.962353706359863, "learning_rate": 3.2965528797703407e-06, "loss": 0.5061, "step": 33190 }, { "epoch": 1.9493864130115672, "grad_norm": 4.03026819229126, "learning_rate": 3.293341601864636e-06, "loss": 0.3433, "step": 33200 }, { "epoch": 1.9499735775937994, "grad_norm": 4.435911655426025, "learning_rate": 3.290131120579888e-06, "loss": 0.359, "step": 33210 }, { "epoch": 1.950560742176032, "grad_norm": 1.9050674438476562, "learning_rate": 3.2869214374146594e-06, "loss": 0.3641, "step": 33220 }, { "epoch": 1.9511479067582642, "grad_norm": 4.769528388977051, "learning_rate": 3.2837125538671433e-06, "loss": 0.3745, "step": 33230 }, { "epoch": 1.9517350713404968, "grad_norm": 7.563265323638916, "learning_rate": 3.280504471435155e-06, "loss": 0.5879, "step": 33240 }, { "epoch": 1.952322235922729, "grad_norm": 5.429909706115723, "learning_rate": 3.277297191616141e-06, "loss": 0.554, "step": 33250 }, { "epoch": 1.9529094005049616, "grad_norm": 3.4206573963165283, "learning_rate": 3.274090715907171e-06, "loss": 0.5334, "step": 33260 }, { "epoch": 1.9534965650871938, "grad_norm": 5.349488735198975, "learning_rate": 3.270885045804937e-06, "loss": 0.3521, "step": 33270 }, { "epoch": 1.9540837296694265, "grad_norm": 7.981251239776611, "learning_rate": 3.267680182805759e-06, "loss": 0.4237, "step": 33280 }, { "epoch": 1.9546708942516586, "grad_norm": 5.160166263580322, "learning_rate": 3.264476128405579e-06, "loss": 0.4716, "step": 33290 }, { "epoch": 1.9552580588338913, "grad_norm": 5.697079658508301, "learning_rate": 3.2612728840999604e-06, "loss": 0.3219, "step": 33300 }, { "epoch": 1.9558452234161234, "grad_norm": 3.368560791015625, "learning_rate": 3.2580704513840878e-06, "loss": 0.412, "step": 33310 }, { "epoch": 1.956432387998356, "grad_norm": 1.534320592880249, "learning_rate": 3.254868831752769e-06, "loss": 0.4679, "step": 33320 }, { "epoch": 1.9570195525805882, "grad_norm": 1.549910545349121, "learning_rate": 3.251668026700433e-06, "loss": 0.4331, "step": 33330 }, { "epoch": 1.9576067171628209, "grad_norm": 1.1310662031173706, "learning_rate": 3.248468037721125e-06, "loss": 0.4096, "step": 33340 }, { "epoch": 1.958193881745053, "grad_norm": 3.8788394927978516, "learning_rate": 3.2452688663085114e-06, "loss": 0.4289, "step": 33350 }, { "epoch": 1.9587810463272857, "grad_norm": 6.471278667449951, "learning_rate": 3.2420705139558783e-06, "loss": 0.4344, "step": 33360 }, { "epoch": 1.9593682109095178, "grad_norm": 3.596601963043213, "learning_rate": 3.2388729821561294e-06, "loss": 0.3859, "step": 33370 }, { "epoch": 1.9599553754917505, "grad_norm": 6.003462314605713, "learning_rate": 3.235676272401781e-06, "loss": 0.4269, "step": 33380 }, { "epoch": 1.9605425400739827, "grad_norm": 3.4098310470581055, "learning_rate": 3.2324803861849706e-06, "loss": 0.3414, "step": 33390 }, { "epoch": 1.9611297046562153, "grad_norm": 4.562659740447998, "learning_rate": 3.229285324997449e-06, "loss": 0.4337, "step": 33400 }, { "epoch": 1.9617168692384475, "grad_norm": 14.623678207397461, "learning_rate": 3.2260910903305825e-06, "loss": 0.3644, "step": 33410 }, { "epoch": 1.96230403382068, "grad_norm": 3.8030927181243896, "learning_rate": 3.2228976836753523e-06, "loss": 0.3765, "step": 33420 }, { "epoch": 1.9628911984029123, "grad_norm": 5.73048210144043, "learning_rate": 3.219705106522349e-06, "loss": 0.349, "step": 33430 }, { "epoch": 1.9634783629851449, "grad_norm": 7.34813928604126, "learning_rate": 3.216513360361785e-06, "loss": 0.5116, "step": 33440 }, { "epoch": 1.964065527567377, "grad_norm": 1.9792745113372803, "learning_rate": 3.2133224466834752e-06, "loss": 0.4954, "step": 33450 }, { "epoch": 1.9646526921496097, "grad_norm": 1.9959661960601807, "learning_rate": 3.210132366976851e-06, "loss": 0.2862, "step": 33460 }, { "epoch": 1.9652398567318419, "grad_norm": 3.794114828109741, "learning_rate": 3.206943122730953e-06, "loss": 0.3702, "step": 33470 }, { "epoch": 1.9658270213140745, "grad_norm": 4.580996513366699, "learning_rate": 3.2037547154344333e-06, "loss": 0.3616, "step": 33480 }, { "epoch": 1.9664141858963067, "grad_norm": 3.9465060234069824, "learning_rate": 3.2005671465755516e-06, "loss": 0.5301, "step": 33490 }, { "epoch": 1.9670013504785393, "grad_norm": 5.380383014678955, "learning_rate": 3.1973804176421764e-06, "loss": 0.3614, "step": 33500 }, { "epoch": 1.9675885150607715, "grad_norm": 7.4301605224609375, "learning_rate": 3.1941945301217836e-06, "loss": 0.5287, "step": 33510 }, { "epoch": 1.9681756796430039, "grad_norm": 6.127075672149658, "learning_rate": 3.1910094855014596e-06, "loss": 0.45, "step": 33520 }, { "epoch": 1.9687628442252363, "grad_norm": 3.03947114944458, "learning_rate": 3.187825285267896e-06, "loss": 0.4946, "step": 33530 }, { "epoch": 1.9693500088074687, "grad_norm": 4.188313007354736, "learning_rate": 3.184641930907387e-06, "loss": 0.378, "step": 33540 }, { "epoch": 1.969937173389701, "grad_norm": 2.9416823387145996, "learning_rate": 3.181459423905835e-06, "loss": 0.4861, "step": 33550 }, { "epoch": 1.9705243379719335, "grad_norm": 6.660367965698242, "learning_rate": 3.1782777657487473e-06, "loss": 0.5114, "step": 33560 }, { "epoch": 1.971111502554166, "grad_norm": 2.9965579509735107, "learning_rate": 3.175096957921233e-06, "loss": 0.4594, "step": 33570 }, { "epoch": 1.9716986671363983, "grad_norm": 3.4986860752105713, "learning_rate": 3.171917001908006e-06, "loss": 0.4444, "step": 33580 }, { "epoch": 1.9722858317186307, "grad_norm": 4.917658805847168, "learning_rate": 3.1687378991933782e-06, "loss": 0.5286, "step": 33590 }, { "epoch": 1.972872996300863, "grad_norm": 2.9446959495544434, "learning_rate": 3.165559651261273e-06, "loss": 0.3752, "step": 33600 }, { "epoch": 1.9734601608830955, "grad_norm": 1.8254365921020508, "learning_rate": 3.162382259595205e-06, "loss": 0.3886, "step": 33610 }, { "epoch": 1.974047325465328, "grad_norm": 2.0770809650421143, "learning_rate": 3.1592057256782948e-06, "loss": 0.3437, "step": 33620 }, { "epoch": 1.9746344900475603, "grad_norm": 10.02651596069336, "learning_rate": 3.1560300509932574e-06, "loss": 0.4262, "step": 33630 }, { "epoch": 1.9752216546297927, "grad_norm": 1.6973915100097656, "learning_rate": 3.1528552370224126e-06, "loss": 0.5426, "step": 33640 }, { "epoch": 1.9758088192120251, "grad_norm": 3.629178047180176, "learning_rate": 3.1496812852476753e-06, "loss": 0.5271, "step": 33650 }, { "epoch": 1.9763959837942575, "grad_norm": 11.902081489562988, "learning_rate": 3.146508197150558e-06, "loss": 0.4223, "step": 33660 }, { "epoch": 1.97698314837649, "grad_norm": 2.083247184753418, "learning_rate": 3.143335974212169e-06, "loss": 0.4983, "step": 33670 }, { "epoch": 1.9775703129587223, "grad_norm": 4.5021796226501465, "learning_rate": 3.1401646179132174e-06, "loss": 0.3615, "step": 33680 }, { "epoch": 1.9781574775409547, "grad_norm": 2.7237911224365234, "learning_rate": 3.1369941297340036e-06, "loss": 0.4146, "step": 33690 }, { "epoch": 1.9787446421231871, "grad_norm": 3.6829917430877686, "learning_rate": 3.1338245111544218e-06, "loss": 0.4716, "step": 33700 }, { "epoch": 1.9793318067054195, "grad_norm": 4.161221504211426, "learning_rate": 3.1306557636539646e-06, "loss": 0.5517, "step": 33710 }, { "epoch": 1.979918971287652, "grad_norm": 6.416264057159424, "learning_rate": 3.1274878887117167e-06, "loss": 0.3305, "step": 33720 }, { "epoch": 1.9805061358698843, "grad_norm": 3.093299627304077, "learning_rate": 3.124320887806351e-06, "loss": 0.4796, "step": 33730 }, { "epoch": 1.9810933004521167, "grad_norm": 1.2876977920532227, "learning_rate": 3.121154762416139e-06, "loss": 0.461, "step": 33740 }, { "epoch": 1.9816804650343491, "grad_norm": 2.4107766151428223, "learning_rate": 3.1179895140189366e-06, "loss": 0.3357, "step": 33750 }, { "epoch": 1.9822676296165815, "grad_norm": 3.8340134620666504, "learning_rate": 3.1148251440921995e-06, "loss": 0.4584, "step": 33760 }, { "epoch": 1.982854794198814, "grad_norm": 4.778632640838623, "learning_rate": 3.111661654112965e-06, "loss": 0.3232, "step": 33770 }, { "epoch": 1.9834419587810463, "grad_norm": 4.732583522796631, "learning_rate": 3.108499045557864e-06, "loss": 0.3673, "step": 33780 }, { "epoch": 1.9840291233632787, "grad_norm": 1.2116894721984863, "learning_rate": 3.1053373199031134e-06, "loss": 0.436, "step": 33790 }, { "epoch": 1.9846162879455111, "grad_norm": 3.145334243774414, "learning_rate": 3.1021764786245197e-06, "loss": 0.3322, "step": 33800 }, { "epoch": 1.9852034525277436, "grad_norm": 1.7667466402053833, "learning_rate": 3.099016523197477e-06, "loss": 0.3944, "step": 33810 }, { "epoch": 1.985790617109976, "grad_norm": 2.691445827484131, "learning_rate": 3.0958574550969643e-06, "loss": 0.5031, "step": 33820 }, { "epoch": 1.9863777816922084, "grad_norm": 1.929839849472046, "learning_rate": 3.0926992757975464e-06, "loss": 0.3272, "step": 33830 }, { "epoch": 1.9869649462744408, "grad_norm": 2.2713115215301514, "learning_rate": 3.0895419867733765e-06, "loss": 0.2712, "step": 33840 }, { "epoch": 1.9875521108566732, "grad_norm": 5.426162242889404, "learning_rate": 3.08638558949819e-06, "loss": 0.4845, "step": 33850 }, { "epoch": 1.9881392754389056, "grad_norm": 4.009787559509277, "learning_rate": 3.0832300854453023e-06, "loss": 0.3389, "step": 33860 }, { "epoch": 1.988726440021138, "grad_norm": 1.3439675569534302, "learning_rate": 3.0800754760876183e-06, "loss": 0.3602, "step": 33870 }, { "epoch": 1.9893136046033704, "grad_norm": 3.748802900314331, "learning_rate": 3.076921762897623e-06, "loss": 0.4342, "step": 33880 }, { "epoch": 1.9899007691856028, "grad_norm": 3.0031089782714844, "learning_rate": 3.07376894734738e-06, "loss": 0.3409, "step": 33890 }, { "epoch": 1.9904879337678352, "grad_norm": 3.6614956855773926, "learning_rate": 3.070617030908538e-06, "loss": 0.4156, "step": 33900 }, { "epoch": 1.9910750983500676, "grad_norm": 1.7964529991149902, "learning_rate": 3.0674660150523224e-06, "loss": 0.4121, "step": 33910 }, { "epoch": 1.9916622629323, "grad_norm": 3.643429756164551, "learning_rate": 3.0643159012495444e-06, "loss": 0.3408, "step": 33920 }, { "epoch": 1.9922494275145324, "grad_norm": 4.41633415222168, "learning_rate": 3.0611666909705868e-06, "loss": 0.3938, "step": 33930 }, { "epoch": 1.9928365920967646, "grad_norm": 5.886053085327148, "learning_rate": 3.0580183856854147e-06, "loss": 0.4141, "step": 33940 }, { "epoch": 1.9934237566789972, "grad_norm": 10.669818878173828, "learning_rate": 3.054870986863571e-06, "loss": 0.5501, "step": 33950 }, { "epoch": 1.9940109212612294, "grad_norm": 2.2875165939331055, "learning_rate": 3.0517244959741726e-06, "loss": 0.3839, "step": 33960 }, { "epoch": 1.994598085843462, "grad_norm": 7.851955890655518, "learning_rate": 3.048578914485917e-06, "loss": 0.4657, "step": 33970 }, { "epoch": 1.9951852504256942, "grad_norm": 2.054027795791626, "learning_rate": 3.0454342438670707e-06, "loss": 0.3571, "step": 33980 }, { "epoch": 1.9957724150079268, "grad_norm": 4.08435583114624, "learning_rate": 3.0422904855854812e-06, "loss": 0.382, "step": 33990 }, { "epoch": 1.996359579590159, "grad_norm": 6.452435493469238, "learning_rate": 3.0391476411085686e-06, "loss": 0.3921, "step": 34000 }, { "epoch": 1.9969467441723916, "grad_norm": 3.488433837890625, "learning_rate": 3.036005711903327e-06, "loss": 0.4389, "step": 34010 }, { "epoch": 1.9975339087546238, "grad_norm": 6.686373710632324, "learning_rate": 3.0328646994363187e-06, "loss": 0.3631, "step": 34020 }, { "epoch": 1.9981210733368564, "grad_norm": 5.636704444885254, "learning_rate": 3.0297246051736834e-06, "loss": 0.3784, "step": 34030 }, { "epoch": 1.9987082379190886, "grad_norm": 2.25478458404541, "learning_rate": 3.0265854305811313e-06, "loss": 0.3973, "step": 34040 }, { "epoch": 1.9992954025013212, "grad_norm": 2.9863693714141846, "learning_rate": 3.023447177123939e-06, "loss": 0.4813, "step": 34050 }, { "epoch": 1.9998825670835534, "grad_norm": 1.756784200668335, "learning_rate": 3.02030984626696e-06, "loss": 0.5628, "step": 34060 }, { "epoch": 2.000469731665786, "grad_norm": 7.477410316467285, "learning_rate": 3.017173439474612e-06, "loss": 0.4928, "step": 34070 }, { "epoch": 2.001056896248018, "grad_norm": 6.858396530151367, "learning_rate": 3.014037958210882e-06, "loss": 0.4094, "step": 34080 }, { "epoch": 2.001644060830251, "grad_norm": 3.909811019897461, "learning_rate": 3.0109034039393274e-06, "loss": 0.5114, "step": 34090 }, { "epoch": 2.002231225412483, "grad_norm": 3.8163363933563232, "learning_rate": 3.0077697781230714e-06, "loss": 0.4107, "step": 34100 }, { "epoch": 2.0028183899947156, "grad_norm": 5.479816913604736, "learning_rate": 3.004637082224805e-06, "loss": 0.4907, "step": 34110 }, { "epoch": 2.003405554576948, "grad_norm": 1.4772855043411255, "learning_rate": 3.001505317706781e-06, "loss": 0.3061, "step": 34120 }, { "epoch": 2.0039927191591804, "grad_norm": 3.8559343814849854, "learning_rate": 2.9983744860308226e-06, "loss": 0.424, "step": 34130 }, { "epoch": 2.0045798837414126, "grad_norm": 15.867754936218262, "learning_rate": 2.9952445886583154e-06, "loss": 0.3699, "step": 34140 }, { "epoch": 2.0051670483236452, "grad_norm": 2.239738941192627, "learning_rate": 2.9921156270502085e-06, "loss": 0.3906, "step": 34150 }, { "epoch": 2.0057542129058774, "grad_norm": 2.2815964221954346, "learning_rate": 2.988987602667013e-06, "loss": 0.3984, "step": 34160 }, { "epoch": 2.00634137748811, "grad_norm": 2.0559935569763184, "learning_rate": 2.9858605169688083e-06, "loss": 0.3811, "step": 34170 }, { "epoch": 2.006928542070342, "grad_norm": 1.9674601554870605, "learning_rate": 2.982734371415228e-06, "loss": 0.415, "step": 34180 }, { "epoch": 2.007515706652575, "grad_norm": 6.03007698059082, "learning_rate": 2.9796091674654714e-06, "loss": 0.361, "step": 34190 }, { "epoch": 2.008102871234807, "grad_norm": 7.212956428527832, "learning_rate": 2.9764849065782985e-06, "loss": 0.4709, "step": 34200 }, { "epoch": 2.0086900358170396, "grad_norm": 4.808258056640625, "learning_rate": 2.973361590212026e-06, "loss": 0.4605, "step": 34210 }, { "epoch": 2.009277200399272, "grad_norm": 3.745077610015869, "learning_rate": 2.9702392198245332e-06, "loss": 0.3909, "step": 34220 }, { "epoch": 2.0098643649815044, "grad_norm": 2.5394694805145264, "learning_rate": 2.9671177968732568e-06, "loss": 0.4275, "step": 34230 }, { "epoch": 2.0104515295637366, "grad_norm": 1.4318373203277588, "learning_rate": 2.9639973228151864e-06, "loss": 0.3877, "step": 34240 }, { "epoch": 2.0110386941459693, "grad_norm": 2.43697452545166, "learning_rate": 2.960877799106878e-06, "loss": 0.4127, "step": 34250 }, { "epoch": 2.0116258587282014, "grad_norm": 3.304791212081909, "learning_rate": 2.9577592272044368e-06, "loss": 0.398, "step": 34260 }, { "epoch": 2.012213023310434, "grad_norm": 3.4929451942443848, "learning_rate": 2.954641608563528e-06, "loss": 0.4454, "step": 34270 }, { "epoch": 2.0128001878926662, "grad_norm": 2.758498430252075, "learning_rate": 2.951524944639366e-06, "loss": 0.3059, "step": 34280 }, { "epoch": 2.013387352474899, "grad_norm": 5.039309501647949, "learning_rate": 2.948409236886728e-06, "loss": 0.4903, "step": 34290 }, { "epoch": 2.013974517057131, "grad_norm": 2.1741220951080322, "learning_rate": 2.945294486759935e-06, "loss": 0.4882, "step": 34300 }, { "epoch": 2.0145616816393637, "grad_norm": 2.929568290710449, "learning_rate": 2.9421806957128705e-06, "loss": 0.3849, "step": 34310 }, { "epoch": 2.015148846221596, "grad_norm": 7.463710784912109, "learning_rate": 2.9390678651989623e-06, "loss": 0.3674, "step": 34320 }, { "epoch": 2.0157360108038285, "grad_norm": 2.4747226238250732, "learning_rate": 2.935955996671198e-06, "loss": 0.4027, "step": 34330 }, { "epoch": 2.0163231753860607, "grad_norm": 13.87903118133545, "learning_rate": 2.9328450915821106e-06, "loss": 0.3644, "step": 34340 }, { "epoch": 2.0169103399682933, "grad_norm": 1.512183666229248, "learning_rate": 2.929735151383782e-06, "loss": 0.3499, "step": 34350 }, { "epoch": 2.0174975045505255, "grad_norm": 7.634769439697266, "learning_rate": 2.9266261775278494e-06, "loss": 0.2753, "step": 34360 }, { "epoch": 2.018084669132758, "grad_norm": 4.213283538818359, "learning_rate": 2.9235181714654925e-06, "loss": 0.4697, "step": 34370 }, { "epoch": 2.0186718337149903, "grad_norm": 1.868967890739441, "learning_rate": 2.9204111346474453e-06, "loss": 0.5543, "step": 34380 }, { "epoch": 2.019258998297223, "grad_norm": 2.4100821018218994, "learning_rate": 2.9173050685239858e-06, "loss": 0.4162, "step": 34390 }, { "epoch": 2.019846162879455, "grad_norm": 1.4038974046707153, "learning_rate": 2.91419997454494e-06, "loss": 0.3469, "step": 34400 }, { "epoch": 2.0204333274616877, "grad_norm": 2.2546496391296387, "learning_rate": 2.9110958541596796e-06, "loss": 0.5375, "step": 34410 }, { "epoch": 2.02102049204392, "grad_norm": 2.71091890335083, "learning_rate": 2.9079927088171223e-06, "loss": 0.3591, "step": 34420 }, { "epoch": 2.0216076566261525, "grad_norm": 2.337066411972046, "learning_rate": 2.904890539965731e-06, "loss": 0.444, "step": 34430 }, { "epoch": 2.0221948212083847, "grad_norm": 7.341888427734375, "learning_rate": 2.9017893490535135e-06, "loss": 0.4366, "step": 34440 }, { "epoch": 2.0227819857906173, "grad_norm": 4.697239875793457, "learning_rate": 2.898689137528017e-06, "loss": 0.3566, "step": 34450 }, { "epoch": 2.0233691503728495, "grad_norm": 3.9698381423950195, "learning_rate": 2.8955899068363356e-06, "loss": 0.3783, "step": 34460 }, { "epoch": 2.023956314955082, "grad_norm": 2.8409793376922607, "learning_rate": 2.892491658425105e-06, "loss": 0.4149, "step": 34470 }, { "epoch": 2.0245434795373143, "grad_norm": 5.848104953765869, "learning_rate": 2.889394393740502e-06, "loss": 0.4226, "step": 34480 }, { "epoch": 2.025130644119547, "grad_norm": 3.270761013031006, "learning_rate": 2.8862981142282436e-06, "loss": 0.3586, "step": 34490 }, { "epoch": 2.025717808701779, "grad_norm": 4.8969340324401855, "learning_rate": 2.8832028213335884e-06, "loss": 0.4694, "step": 34500 }, { "epoch": 2.0263049732840117, "grad_norm": 2.7245993614196777, "learning_rate": 2.880108516501332e-06, "loss": 0.4084, "step": 34510 }, { "epoch": 2.026892137866244, "grad_norm": 3.5058577060699463, "learning_rate": 2.877015201175812e-06, "loss": 0.5025, "step": 34520 }, { "epoch": 2.0274793024484765, "grad_norm": 7.367367267608643, "learning_rate": 2.8739228768009015e-06, "loss": 0.3504, "step": 34530 }, { "epoch": 2.0280664670307087, "grad_norm": 2.970810651779175, "learning_rate": 2.870831544820014e-06, "loss": 0.3965, "step": 34540 }, { "epoch": 2.0286536316129413, "grad_norm": 3.036707639694214, "learning_rate": 2.8677412066760948e-06, "loss": 0.485, "step": 34550 }, { "epoch": 2.0292407961951735, "grad_norm": 9.455309867858887, "learning_rate": 2.8646518638116272e-06, "loss": 0.4835, "step": 34560 }, { "epoch": 2.029827960777406, "grad_norm": 2.457967758178711, "learning_rate": 2.861563517668635e-06, "loss": 0.3748, "step": 34570 }, { "epoch": 2.0304151253596383, "grad_norm": 4.757584095001221, "learning_rate": 2.8584761696886743e-06, "loss": 0.4291, "step": 34580 }, { "epoch": 2.0310022899418705, "grad_norm": 2.6397130489349365, "learning_rate": 2.855389821312827e-06, "loss": 0.3997, "step": 34590 }, { "epoch": 2.031589454524103, "grad_norm": 2.8482472896575928, "learning_rate": 2.8523044739817207e-06, "loss": 0.3327, "step": 34600 }, { "epoch": 2.0321766191063353, "grad_norm": 1.3368252515792847, "learning_rate": 2.8492201291355077e-06, "loss": 0.3212, "step": 34610 }, { "epoch": 2.032763783688568, "grad_norm": 4.5571722984313965, "learning_rate": 2.8461367882138756e-06, "loss": 0.5272, "step": 34620 }, { "epoch": 2.0333509482708, "grad_norm": 1.5487617254257202, "learning_rate": 2.8430544526560443e-06, "loss": 0.3647, "step": 34630 }, { "epoch": 2.0339381128530327, "grad_norm": 7.956489562988281, "learning_rate": 2.8399731239007585e-06, "loss": 0.5033, "step": 34640 }, { "epoch": 2.034525277435265, "grad_norm": 4.888302326202393, "learning_rate": 2.836892803386301e-06, "loss": 0.4368, "step": 34650 }, { "epoch": 2.0351124420174975, "grad_norm": 2.838634967803955, "learning_rate": 2.8338134925504796e-06, "loss": 0.4185, "step": 34660 }, { "epoch": 2.0356996065997297, "grad_norm": 5.004483699798584, "learning_rate": 2.830735192830633e-06, "loss": 0.3776, "step": 34670 }, { "epoch": 2.0362867711819623, "grad_norm": 1.428259253501892, "learning_rate": 2.827657905663623e-06, "loss": 0.2944, "step": 34680 }, { "epoch": 2.0368739357641945, "grad_norm": 4.812935829162598, "learning_rate": 2.824581632485843e-06, "loss": 0.3979, "step": 34690 }, { "epoch": 2.037461100346427, "grad_norm": 2.860661745071411, "learning_rate": 2.821506374733214e-06, "loss": 0.4496, "step": 34700 }, { "epoch": 2.0380482649286593, "grad_norm": 3.272108316421509, "learning_rate": 2.818432133841179e-06, "loss": 0.5016, "step": 34710 }, { "epoch": 2.038635429510892, "grad_norm": 2.3445663452148438, "learning_rate": 2.815358911244711e-06, "loss": 0.4736, "step": 34720 }, { "epoch": 2.039222594093124, "grad_norm": 1.6687737703323364, "learning_rate": 2.8122867083783034e-06, "loss": 0.337, "step": 34730 }, { "epoch": 2.0398097586753567, "grad_norm": 3.0617356300354004, "learning_rate": 2.8092155266759775e-06, "loss": 0.4354, "step": 34740 }, { "epoch": 2.040396923257589, "grad_norm": 2.85473895072937, "learning_rate": 2.8061453675712746e-06, "loss": 0.3829, "step": 34750 }, { "epoch": 2.0409840878398215, "grad_norm": 2.1130523681640625, "learning_rate": 2.80307623249726e-06, "loss": 0.3412, "step": 34760 }, { "epoch": 2.0415712524220537, "grad_norm": 4.732879638671875, "learning_rate": 2.800008122886524e-06, "loss": 0.3238, "step": 34770 }, { "epoch": 2.0421584170042864, "grad_norm": 2.2258379459381104, "learning_rate": 2.7969410401711706e-06, "loss": 0.3948, "step": 34780 }, { "epoch": 2.0427455815865185, "grad_norm": 1.9860785007476807, "learning_rate": 2.793874985782832e-06, "loss": 0.3905, "step": 34790 }, { "epoch": 2.043332746168751, "grad_norm": 3.9576053619384766, "learning_rate": 2.790809961152654e-06, "loss": 0.3942, "step": 34800 }, { "epoch": 2.0439199107509833, "grad_norm": 4.674423694610596, "learning_rate": 2.7877459677113133e-06, "loss": 0.4377, "step": 34810 }, { "epoch": 2.044507075333216, "grad_norm": 3.7689156532287598, "learning_rate": 2.784683006888991e-06, "loss": 0.5379, "step": 34820 }, { "epoch": 2.045094239915448, "grad_norm": 2.6938436031341553, "learning_rate": 2.781621080115393e-06, "loss": 0.5465, "step": 34830 }, { "epoch": 2.0456814044976808, "grad_norm": 4.3194169998168945, "learning_rate": 2.778560188819743e-06, "loss": 0.3659, "step": 34840 }, { "epoch": 2.046268569079913, "grad_norm": 1.8462260961532593, "learning_rate": 2.775500334430782e-06, "loss": 0.4515, "step": 34850 }, { "epoch": 2.0468557336621456, "grad_norm": 6.124507427215576, "learning_rate": 2.7724415183767645e-06, "loss": 0.4711, "step": 34860 }, { "epoch": 2.0474428982443778, "grad_norm": 10.941182136535645, "learning_rate": 2.7693837420854595e-06, "loss": 0.4689, "step": 34870 }, { "epoch": 2.0480300628266104, "grad_norm": 3.2227251529693604, "learning_rate": 2.7663270069841515e-06, "loss": 0.4536, "step": 34880 }, { "epoch": 2.0486172274088426, "grad_norm": 4.845832347869873, "learning_rate": 2.7632713144996438e-06, "loss": 0.4253, "step": 34890 }, { "epoch": 2.049204391991075, "grad_norm": 2.0030391216278076, "learning_rate": 2.7602166660582496e-06, "loss": 0.4431, "step": 34900 }, { "epoch": 2.0497915565733074, "grad_norm": 1.5392704010009766, "learning_rate": 2.7571630630857905e-06, "loss": 0.4217, "step": 34910 }, { "epoch": 2.05037872115554, "grad_norm": 2.174341917037964, "learning_rate": 2.7541105070076056e-06, "loss": 0.3657, "step": 34920 }, { "epoch": 2.050965885737772, "grad_norm": 4.240367412567139, "learning_rate": 2.751058999248544e-06, "loss": 0.4665, "step": 34930 }, { "epoch": 2.051553050320005, "grad_norm": 5.951447010040283, "learning_rate": 2.7480085412329643e-06, "loss": 0.464, "step": 34940 }, { "epoch": 2.052140214902237, "grad_norm": 3.2537059783935547, "learning_rate": 2.744959134384739e-06, "loss": 0.3621, "step": 34950 }, { "epoch": 2.0527273794844696, "grad_norm": 5.101819038391113, "learning_rate": 2.7419107801272405e-06, "loss": 0.5617, "step": 34960 }, { "epoch": 2.0533145440667018, "grad_norm": 2.1758289337158203, "learning_rate": 2.7388634798833617e-06, "loss": 0.4046, "step": 34970 }, { "epoch": 2.0539017086489344, "grad_norm": 4.056305885314941, "learning_rate": 2.7358172350754974e-06, "loss": 0.4818, "step": 34980 }, { "epoch": 2.0544888732311666, "grad_norm": 4.259302139282227, "learning_rate": 2.7327720471255513e-06, "loss": 0.3684, "step": 34990 }, { "epoch": 2.055076037813399, "grad_norm": 3.455655574798584, "learning_rate": 2.72972791745493e-06, "loss": 0.3834, "step": 35000 }, { "epoch": 2.0556632023956314, "grad_norm": 4.055662155151367, "learning_rate": 2.7266848474845493e-06, "loss": 0.3801, "step": 35010 }, { "epoch": 2.056250366977864, "grad_norm": 4.813446044921875, "learning_rate": 2.723642838634832e-06, "loss": 0.474, "step": 35020 }, { "epoch": 2.056837531560096, "grad_norm": 5.1751604080200195, "learning_rate": 2.7206018923257027e-06, "loss": 0.3415, "step": 35030 }, { "epoch": 2.057424696142329, "grad_norm": 3.753519296646118, "learning_rate": 2.7175620099765915e-06, "loss": 0.4013, "step": 35040 }, { "epoch": 2.058011860724561, "grad_norm": 5.227480888366699, "learning_rate": 2.7145231930064313e-06, "loss": 0.5083, "step": 35050 }, { "epoch": 2.0585990253067936, "grad_norm": 2.708733081817627, "learning_rate": 2.711485442833658e-06, "loss": 0.4172, "step": 35060 }, { "epoch": 2.059186189889026, "grad_norm": 2.265636682510376, "learning_rate": 2.708448760876209e-06, "loss": 0.4327, "step": 35070 }, { "epoch": 2.0597733544712584, "grad_norm": 2.519832134246826, "learning_rate": 2.705413148551524e-06, "loss": 0.5006, "step": 35080 }, { "epoch": 2.0603605190534906, "grad_norm": 10.087903022766113, "learning_rate": 2.7023786072765446e-06, "loss": 0.5579, "step": 35090 }, { "epoch": 2.0609476836357232, "grad_norm": 10.343388557434082, "learning_rate": 2.699345138467706e-06, "loss": 0.5221, "step": 35100 }, { "epoch": 2.0615348482179554, "grad_norm": 6.3670806884765625, "learning_rate": 2.6963127435409513e-06, "loss": 0.5325, "step": 35110 }, { "epoch": 2.062122012800188, "grad_norm": 4.8578572273254395, "learning_rate": 2.6932814239117155e-06, "loss": 0.4265, "step": 35120 }, { "epoch": 2.06270917738242, "grad_norm": 3.124683380126953, "learning_rate": 2.6902511809949412e-06, "loss": 0.4145, "step": 35130 }, { "epoch": 2.063296341964653, "grad_norm": 5.735479354858398, "learning_rate": 2.6872220162050554e-06, "loss": 0.4761, "step": 35140 }, { "epoch": 2.063883506546885, "grad_norm": 2.2462308406829834, "learning_rate": 2.6841939309559913e-06, "loss": 0.4991, "step": 35150 }, { "epoch": 2.0644706711291176, "grad_norm": 6.084864139556885, "learning_rate": 2.6811669266611744e-06, "loss": 0.364, "step": 35160 }, { "epoch": 2.06505783571135, "grad_norm": 6.785301685333252, "learning_rate": 2.6781410047335275e-06, "loss": 0.3072, "step": 35170 }, { "epoch": 2.0656450002935824, "grad_norm": 1.8641389608383179, "learning_rate": 2.6751161665854688e-06, "loss": 0.379, "step": 35180 }, { "epoch": 2.0662321648758146, "grad_norm": 2.90759539604187, "learning_rate": 2.6720924136289056e-06, "loss": 0.4837, "step": 35190 }, { "epoch": 2.0668193294580472, "grad_norm": 7.084329128265381, "learning_rate": 2.6690697472752413e-06, "loss": 0.401, "step": 35200 }, { "epoch": 2.0674064940402794, "grad_norm": 1.780159831047058, "learning_rate": 2.6660481689353783e-06, "loss": 0.3666, "step": 35210 }, { "epoch": 2.067993658622512, "grad_norm": 2.813100576400757, "learning_rate": 2.6630276800197047e-06, "loss": 0.4176, "step": 35220 }, { "epoch": 2.0685808232047442, "grad_norm": 2.32899808883667, "learning_rate": 2.660008281938098e-06, "loss": 0.5122, "step": 35230 }, { "epoch": 2.069167987786977, "grad_norm": 1.6010276079177856, "learning_rate": 2.6569899760999306e-06, "loss": 0.3716, "step": 35240 }, { "epoch": 2.069755152369209, "grad_norm": 4.4250922203063965, "learning_rate": 2.6539727639140658e-06, "loss": 0.4807, "step": 35250 }, { "epoch": 2.0703423169514417, "grad_norm": 1.9866716861724854, "learning_rate": 2.650956646788854e-06, "loss": 0.3637, "step": 35260 }, { "epoch": 2.070929481533674, "grad_norm": 5.369691848754883, "learning_rate": 2.6479416261321343e-06, "loss": 0.385, "step": 35270 }, { "epoch": 2.0715166461159065, "grad_norm": 3.2831249237060547, "learning_rate": 2.6449277033512367e-06, "loss": 0.3261, "step": 35280 }, { "epoch": 2.0721038106981386, "grad_norm": 8.507840156555176, "learning_rate": 2.641914879852976e-06, "loss": 0.4263, "step": 35290 }, { "epoch": 2.0726909752803713, "grad_norm": 1.4646717309951782, "learning_rate": 2.638903157043655e-06, "loss": 0.3979, "step": 35300 }, { "epoch": 2.0732781398626035, "grad_norm": 3.2013466358184814, "learning_rate": 2.6358925363290635e-06, "loss": 0.4718, "step": 35310 }, { "epoch": 2.073865304444836, "grad_norm": 5.572455406188965, "learning_rate": 2.632883019114477e-06, "loss": 0.4127, "step": 35320 }, { "epoch": 2.0744524690270683, "grad_norm": 2.436938524246216, "learning_rate": 2.6298746068046517e-06, "loss": 0.3174, "step": 35330 }, { "epoch": 2.075039633609301, "grad_norm": 2.50058913230896, "learning_rate": 2.6268673008038327e-06, "loss": 0.4219, "step": 35340 }, { "epoch": 2.075626798191533, "grad_norm": 4.26707124710083, "learning_rate": 2.6238611025157475e-06, "loss": 0.3228, "step": 35350 }, { "epoch": 2.0762139627737657, "grad_norm": 7.097754955291748, "learning_rate": 2.6208560133436063e-06, "loss": 0.433, "step": 35360 }, { "epoch": 2.076801127355998, "grad_norm": 1.883995532989502, "learning_rate": 2.6178520346901017e-06, "loss": 0.3496, "step": 35370 }, { "epoch": 2.0773882919382305, "grad_norm": 2.8160829544067383, "learning_rate": 2.6148491679574074e-06, "loss": 0.3293, "step": 35380 }, { "epoch": 2.0779754565204627, "grad_norm": 6.567079067230225, "learning_rate": 2.6118474145471794e-06, "loss": 0.3299, "step": 35390 }, { "epoch": 2.0785626211026953, "grad_norm": 3.767913579940796, "learning_rate": 2.6088467758605527e-06, "loss": 0.4155, "step": 35400 }, { "epoch": 2.0791497856849275, "grad_norm": 4.400559425354004, "learning_rate": 2.605847253298145e-06, "loss": 0.4167, "step": 35410 }, { "epoch": 2.0797369502671597, "grad_norm": 8.814120292663574, "learning_rate": 2.6028488482600468e-06, "loss": 0.4707, "step": 35420 }, { "epoch": 2.0803241148493923, "grad_norm": 3.7357664108276367, "learning_rate": 2.599851562145831e-06, "loss": 0.4289, "step": 35430 }, { "epoch": 2.080911279431625, "grad_norm": 4.784369945526123, "learning_rate": 2.596855396354547e-06, "loss": 0.3975, "step": 35440 }, { "epoch": 2.081498444013857, "grad_norm": 2.6919071674346924, "learning_rate": 2.5938603522847285e-06, "loss": 0.3177, "step": 35450 }, { "epoch": 2.0820856085960893, "grad_norm": 2.204803466796875, "learning_rate": 2.590866431334372e-06, "loss": 0.3533, "step": 35460 }, { "epoch": 2.082672773178322, "grad_norm": 5.831791877746582, "learning_rate": 2.5878736349009603e-06, "loss": 0.4136, "step": 35470 }, { "epoch": 2.083259937760554, "grad_norm": 7.654670238494873, "learning_rate": 2.584881964381447e-06, "loss": 0.3396, "step": 35480 }, { "epoch": 2.0838471023427867, "grad_norm": 1.4231958389282227, "learning_rate": 2.5818914211722627e-06, "loss": 0.364, "step": 35490 }, { "epoch": 2.084434266925019, "grad_norm": 11.822866439819336, "learning_rate": 2.5789020066693117e-06, "loss": 0.4448, "step": 35500 }, { "epoch": 2.0850214315072515, "grad_norm": 4.852151393890381, "learning_rate": 2.575913722267964e-06, "loss": 0.4754, "step": 35510 }, { "epoch": 2.0856085960894837, "grad_norm": 4.184769630432129, "learning_rate": 2.572926569363071e-06, "loss": 0.364, "step": 35520 }, { "epoch": 2.0861957606717163, "grad_norm": 1.2122914791107178, "learning_rate": 2.569940549348956e-06, "loss": 0.3918, "step": 35530 }, { "epoch": 2.0867829252539485, "grad_norm": 2.6279728412628174, "learning_rate": 2.566955663619408e-06, "loss": 0.3445, "step": 35540 }, { "epoch": 2.087370089836181, "grad_norm": 2.001763343811035, "learning_rate": 2.563971913567692e-06, "loss": 0.4165, "step": 35550 }, { "epoch": 2.0879572544184133, "grad_norm": 3.5879499912261963, "learning_rate": 2.560989300586536e-06, "loss": 0.3983, "step": 35560 }, { "epoch": 2.088544419000646, "grad_norm": 3.478874921798706, "learning_rate": 2.558007826068143e-06, "loss": 0.3614, "step": 35570 }, { "epoch": 2.089131583582878, "grad_norm": 3.9585623741149902, "learning_rate": 2.555027491404182e-06, "loss": 0.3984, "step": 35580 }, { "epoch": 2.0897187481651107, "grad_norm": 1.740714192390442, "learning_rate": 2.552048297985792e-06, "loss": 0.4136, "step": 35590 }, { "epoch": 2.090305912747343, "grad_norm": 3.3250648975372314, "learning_rate": 2.5490702472035785e-06, "loss": 0.5031, "step": 35600 }, { "epoch": 2.0908930773295755, "grad_norm": 7.882654666900635, "learning_rate": 2.5460933404476116e-06, "loss": 0.4521, "step": 35610 }, { "epoch": 2.0914802419118077, "grad_norm": 2.729149580001831, "learning_rate": 2.5431175791074302e-06, "loss": 0.5441, "step": 35620 }, { "epoch": 2.0920674064940403, "grad_norm": 4.055150508880615, "learning_rate": 2.540142964572038e-06, "loss": 0.3547, "step": 35630 }, { "epoch": 2.0926545710762725, "grad_norm": 3.937129259109497, "learning_rate": 2.5371694982299035e-06, "loss": 0.3806, "step": 35640 }, { "epoch": 2.093241735658505, "grad_norm": 2.8641257286071777, "learning_rate": 2.5341971814689548e-06, "loss": 0.3698, "step": 35650 }, { "epoch": 2.0938289002407373, "grad_norm": 9.630319595336914, "learning_rate": 2.53122601567659e-06, "loss": 0.4302, "step": 35660 }, { "epoch": 2.09441606482297, "grad_norm": 4.01943826675415, "learning_rate": 2.528256002239666e-06, "loss": 0.4241, "step": 35670 }, { "epoch": 2.095003229405202, "grad_norm": 12.944561958312988, "learning_rate": 2.525287142544505e-06, "loss": 0.3069, "step": 35680 }, { "epoch": 2.0955903939874347, "grad_norm": 15.574163436889648, "learning_rate": 2.5223194379768866e-06, "loss": 0.4781, "step": 35690 }, { "epoch": 2.096177558569667, "grad_norm": 2.3732643127441406, "learning_rate": 2.519352889922054e-06, "loss": 0.476, "step": 35700 }, { "epoch": 2.0967647231518995, "grad_norm": 2.8976645469665527, "learning_rate": 2.516387499764709e-06, "loss": 0.51, "step": 35710 }, { "epoch": 2.0973518877341317, "grad_norm": 4.1219024658203125, "learning_rate": 2.5134232688890154e-06, "loss": 0.3551, "step": 35720 }, { "epoch": 2.0979390523163644, "grad_norm": 2.0478744506835938, "learning_rate": 2.5104601986785947e-06, "loss": 0.3462, "step": 35730 }, { "epoch": 2.0985262168985965, "grad_norm": 16.939388275146484, "learning_rate": 2.5074982905165233e-06, "loss": 0.4, "step": 35740 }, { "epoch": 2.099113381480829, "grad_norm": 3.10274076461792, "learning_rate": 2.5045375457853393e-06, "loss": 0.4094, "step": 35750 }, { "epoch": 2.0997005460630613, "grad_norm": 6.603026390075684, "learning_rate": 2.5015779658670347e-06, "loss": 0.3853, "step": 35760 }, { "epoch": 2.100287710645294, "grad_norm": 9.65920639038086, "learning_rate": 2.498619552143065e-06, "loss": 0.4086, "step": 35770 }, { "epoch": 2.100874875227526, "grad_norm": 3.0971715450286865, "learning_rate": 2.495662305994331e-06, "loss": 0.3422, "step": 35780 }, { "epoch": 2.1014620398097588, "grad_norm": 3.2310051918029785, "learning_rate": 2.4927062288011953e-06, "loss": 0.3526, "step": 35790 }, { "epoch": 2.102049204391991, "grad_norm": 2.240299701690674, "learning_rate": 2.489751321943472e-06, "loss": 0.4079, "step": 35800 }, { "epoch": 2.1026363689742236, "grad_norm": 6.714266777038574, "learning_rate": 2.4867975868004313e-06, "loss": 0.4934, "step": 35810 }, { "epoch": 2.1032235335564557, "grad_norm": 6.615405559539795, "learning_rate": 2.483845024750794e-06, "loss": 0.3946, "step": 35820 }, { "epoch": 2.1038106981386884, "grad_norm": 5.238297462463379, "learning_rate": 2.4808936371727366e-06, "loss": 0.3379, "step": 35830 }, { "epoch": 2.1043978627209206, "grad_norm": 4.274505615234375, "learning_rate": 2.4779434254438795e-06, "loss": 0.5374, "step": 35840 }, { "epoch": 2.104985027303153, "grad_norm": 11.91087532043457, "learning_rate": 2.474994390941306e-06, "loss": 0.3733, "step": 35850 }, { "epoch": 2.1055721918853854, "grad_norm": 3.006335496902466, "learning_rate": 2.4720465350415416e-06, "loss": 0.4031, "step": 35860 }, { "epoch": 2.106159356467618, "grad_norm": 1.7785584926605225, "learning_rate": 2.469099859120566e-06, "loss": 0.3975, "step": 35870 }, { "epoch": 2.10674652104985, "grad_norm": 2.631075859069824, "learning_rate": 2.4661543645538027e-06, "loss": 0.3196, "step": 35880 }, { "epoch": 2.107333685632083, "grad_norm": 2.335113286972046, "learning_rate": 2.4632100527161283e-06, "loss": 0.4698, "step": 35890 }, { "epoch": 2.107920850214315, "grad_norm": 2.308015823364258, "learning_rate": 2.460266924981867e-06, "loss": 0.4107, "step": 35900 }, { "epoch": 2.1085080147965476, "grad_norm": 2.687558889389038, "learning_rate": 2.457324982724788e-06, "loss": 0.4529, "step": 35910 }, { "epoch": 2.1090951793787798, "grad_norm": 3.902405023574829, "learning_rate": 2.4543842273181094e-06, "loss": 0.5048, "step": 35920 }, { "epoch": 2.1096823439610124, "grad_norm": 2.2130744457244873, "learning_rate": 2.4514446601344954e-06, "loss": 0.367, "step": 35930 }, { "epoch": 2.1102695085432446, "grad_norm": 2.1974105834960938, "learning_rate": 2.448506282546054e-06, "loss": 0.3083, "step": 35940 }, { "epoch": 2.110856673125477, "grad_norm": 5.2279462814331055, "learning_rate": 2.4455690959243384e-06, "loss": 0.2693, "step": 35950 }, { "epoch": 2.1114438377077094, "grad_norm": 2.7190134525299072, "learning_rate": 2.4426331016403474e-06, "loss": 0.405, "step": 35960 }, { "epoch": 2.112031002289942, "grad_norm": 1.6337140798568726, "learning_rate": 2.43969830106452e-06, "loss": 0.3619, "step": 35970 }, { "epoch": 2.112618166872174, "grad_norm": 2.5625061988830566, "learning_rate": 2.4367646955667405e-06, "loss": 0.4378, "step": 35980 }, { "epoch": 2.113205331454407, "grad_norm": 4.226804256439209, "learning_rate": 2.433832286516335e-06, "loss": 0.3972, "step": 35990 }, { "epoch": 2.113792496036639, "grad_norm": 3.2247533798217773, "learning_rate": 2.430901075282071e-06, "loss": 0.4903, "step": 36000 }, { "epoch": 2.1143796606188716, "grad_norm": 10.911209106445312, "learning_rate": 2.4279710632321574e-06, "loss": 0.4495, "step": 36010 }, { "epoch": 2.114966825201104, "grad_norm": 2.528318166732788, "learning_rate": 2.425042251734242e-06, "loss": 0.3213, "step": 36020 }, { "epoch": 2.1155539897833364, "grad_norm": 11.415364265441895, "learning_rate": 2.4221146421554136e-06, "loss": 0.4085, "step": 36030 }, { "epoch": 2.1161411543655686, "grad_norm": 6.721785545349121, "learning_rate": 2.419188235862199e-06, "loss": 0.3731, "step": 36040 }, { "epoch": 2.1167283189478012, "grad_norm": 3.4711737632751465, "learning_rate": 2.416263034220564e-06, "loss": 0.4424, "step": 36050 }, { "epoch": 2.1173154835300334, "grad_norm": 8.931819915771484, "learning_rate": 2.413339038595914e-06, "loss": 0.4053, "step": 36060 }, { "epoch": 2.117902648112266, "grad_norm": 7.809268951416016, "learning_rate": 2.4104162503530854e-06, "loss": 0.3892, "step": 36070 }, { "epoch": 2.118489812694498, "grad_norm": 2.7162861824035645, "learning_rate": 2.4074946708563547e-06, "loss": 0.3285, "step": 36080 }, { "epoch": 2.119076977276731, "grad_norm": 4.09055757522583, "learning_rate": 2.404574301469438e-06, "loss": 0.4029, "step": 36090 }, { "epoch": 2.119664141858963, "grad_norm": 3.224001884460449, "learning_rate": 2.4016551435554836e-06, "loss": 0.3864, "step": 36100 }, { "epoch": 2.1202513064411956, "grad_norm": 4.307884693145752, "learning_rate": 2.3987371984770698e-06, "loss": 0.3439, "step": 36110 }, { "epoch": 2.120838471023428, "grad_norm": 2.483004570007324, "learning_rate": 2.3958204675962144e-06, "loss": 0.4882, "step": 36120 }, { "epoch": 2.1214256356056604, "grad_norm": 2.778757333755493, "learning_rate": 2.3929049522743664e-06, "loss": 0.374, "step": 36130 }, { "epoch": 2.1220128001878926, "grad_norm": 3.7095580101013184, "learning_rate": 2.3899906538724082e-06, "loss": 0.4305, "step": 36140 }, { "epoch": 2.1225999647701252, "grad_norm": 3.9829940795898438, "learning_rate": 2.3870775737506557e-06, "loss": 0.4593, "step": 36150 }, { "epoch": 2.1231871293523574, "grad_norm": 4.0502142906188965, "learning_rate": 2.3841657132688483e-06, "loss": 0.3617, "step": 36160 }, { "epoch": 2.12377429393459, "grad_norm": 5.72535514831543, "learning_rate": 2.3812550737861676e-06, "loss": 0.4943, "step": 36170 }, { "epoch": 2.1243614585168222, "grad_norm": 2.506180763244629, "learning_rate": 2.378345656661218e-06, "loss": 0.3887, "step": 36180 }, { "epoch": 2.124948623099055, "grad_norm": 3.4991531372070312, "learning_rate": 2.3754374632520366e-06, "loss": 0.3441, "step": 36190 }, { "epoch": 2.125535787681287, "grad_norm": 2.859879493713379, "learning_rate": 2.372530494916084e-06, "loss": 0.441, "step": 36200 }, { "epoch": 2.1261229522635197, "grad_norm": 4.839353084564209, "learning_rate": 2.369624753010254e-06, "loss": 0.3489, "step": 36210 }, { "epoch": 2.126710116845752, "grad_norm": 1.8443832397460938, "learning_rate": 2.366720238890868e-06, "loss": 0.4193, "step": 36220 }, { "epoch": 2.1272972814279845, "grad_norm": 17.610795974731445, "learning_rate": 2.3638169539136713e-06, "loss": 0.3384, "step": 36230 }, { "epoch": 2.1278844460102166, "grad_norm": 5.091390609741211, "learning_rate": 2.3609148994338376e-06, "loss": 0.3306, "step": 36240 }, { "epoch": 2.1284716105924493, "grad_norm": 7.08844518661499, "learning_rate": 2.3580140768059666e-06, "loss": 0.4206, "step": 36250 }, { "epoch": 2.1290587751746815, "grad_norm": 2.514860153198242, "learning_rate": 2.355114487384081e-06, "loss": 0.3458, "step": 36260 }, { "epoch": 2.129645939756914, "grad_norm": 5.0122270584106445, "learning_rate": 2.3522161325216293e-06, "loss": 0.4178, "step": 36270 }, { "epoch": 2.1302331043391463, "grad_norm": 2.0859551429748535, "learning_rate": 2.3493190135714854e-06, "loss": 0.4231, "step": 36280 }, { "epoch": 2.1308202689213784, "grad_norm": 3.135540008544922, "learning_rate": 2.3464231318859404e-06, "loss": 0.39, "step": 36290 }, { "epoch": 2.131407433503611, "grad_norm": 3.6186559200286865, "learning_rate": 2.3435284888167135e-06, "loss": 0.4549, "step": 36300 }, { "epoch": 2.1319945980858437, "grad_norm": 3.531802177429199, "learning_rate": 2.340635085714945e-06, "loss": 0.5142, "step": 36310 }, { "epoch": 2.132581762668076, "grad_norm": 4.002334117889404, "learning_rate": 2.337742923931193e-06, "loss": 0.3115, "step": 36320 }, { "epoch": 2.133168927250308, "grad_norm": 3.107370138168335, "learning_rate": 2.334852004815443e-06, "loss": 0.414, "step": 36330 }, { "epoch": 2.1337560918325407, "grad_norm": 1.4222521781921387, "learning_rate": 2.331962329717093e-06, "loss": 0.4172, "step": 36340 }, { "epoch": 2.1343432564147733, "grad_norm": 4.008914470672607, "learning_rate": 2.329073899984964e-06, "loss": 0.4608, "step": 36350 }, { "epoch": 2.1349304209970055, "grad_norm": 2.256120204925537, "learning_rate": 2.326186716967294e-06, "loss": 0.5052, "step": 36360 }, { "epoch": 2.1355175855792377, "grad_norm": 3.7373294830322266, "learning_rate": 2.3233007820117428e-06, "loss": 0.4364, "step": 36370 }, { "epoch": 2.1361047501614703, "grad_norm": 2.5881714820861816, "learning_rate": 2.3204160964653842e-06, "loss": 0.3343, "step": 36380 }, { "epoch": 2.1366919147437025, "grad_norm": 3.759288787841797, "learning_rate": 2.3175326616747084e-06, "loss": 0.3116, "step": 36390 }, { "epoch": 2.137279079325935, "grad_norm": 6.311311721801758, "learning_rate": 2.3146504789856213e-06, "loss": 0.3656, "step": 36400 }, { "epoch": 2.1378662439081673, "grad_norm": 3.5912256240844727, "learning_rate": 2.3117695497434506e-06, "loss": 0.4091, "step": 36410 }, { "epoch": 2.1384534084904, "grad_norm": 2.3368611335754395, "learning_rate": 2.308889875292934e-06, "loss": 0.3834, "step": 36420 }, { "epoch": 2.139040573072632, "grad_norm": 2.5150814056396484, "learning_rate": 2.3060114569782206e-06, "loss": 0.4529, "step": 36430 }, { "epoch": 2.1396277376548647, "grad_norm": 7.696954250335693, "learning_rate": 2.3031342961428783e-06, "loss": 0.4098, "step": 36440 }, { "epoch": 2.140214902237097, "grad_norm": 2.914666175842285, "learning_rate": 2.3002583941298856e-06, "loss": 0.3968, "step": 36450 }, { "epoch": 2.1408020668193295, "grad_norm": 2.1359598636627197, "learning_rate": 2.2973837522816352e-06, "loss": 0.2765, "step": 36460 }, { "epoch": 2.1413892314015617, "grad_norm": 3.037398338317871, "learning_rate": 2.2945103719399307e-06, "loss": 0.3461, "step": 36470 }, { "epoch": 2.1419763959837943, "grad_norm": 4.413668155670166, "learning_rate": 2.2916382544459847e-06, "loss": 0.4259, "step": 36480 }, { "epoch": 2.1425635605660265, "grad_norm": 1.968716025352478, "learning_rate": 2.2887674011404214e-06, "loss": 0.3046, "step": 36490 }, { "epoch": 2.143150725148259, "grad_norm": 2.0955862998962402, "learning_rate": 2.2858978133632793e-06, "loss": 0.4898, "step": 36500 }, { "epoch": 2.1437378897304913, "grad_norm": 4.877186298370361, "learning_rate": 2.2830294924540027e-06, "loss": 0.2977, "step": 36510 }, { "epoch": 2.144325054312724, "grad_norm": 1.8785948753356934, "learning_rate": 2.2801624397514406e-06, "loss": 0.3667, "step": 36520 }, { "epoch": 2.144912218894956, "grad_norm": 4.899197101593018, "learning_rate": 2.2772966565938564e-06, "loss": 0.4376, "step": 36530 }, { "epoch": 2.1454993834771887, "grad_norm": 3.076997995376587, "learning_rate": 2.2744321443189176e-06, "loss": 0.3532, "step": 36540 }, { "epoch": 2.146086548059421, "grad_norm": 3.6339211463928223, "learning_rate": 2.2715689042637007e-06, "loss": 0.4679, "step": 36550 }, { "epoch": 2.1466737126416535, "grad_norm": 4.855822563171387, "learning_rate": 2.2687069377646853e-06, "loss": 0.3855, "step": 36560 }, { "epoch": 2.1472608772238857, "grad_norm": 3.558530569076538, "learning_rate": 2.2658462461577597e-06, "loss": 0.4343, "step": 36570 }, { "epoch": 2.1478480418061183, "grad_norm": 4.781008243560791, "learning_rate": 2.262986830778215e-06, "loss": 0.3227, "step": 36580 }, { "epoch": 2.1484352063883505, "grad_norm": 2.6311919689178467, "learning_rate": 2.2601286929607468e-06, "loss": 0.3969, "step": 36590 }, { "epoch": 2.149022370970583, "grad_norm": 3.124598264694214, "learning_rate": 2.2572718340394557e-06, "loss": 0.5513, "step": 36600 }, { "epoch": 2.1496095355528153, "grad_norm": 2.4137680530548096, "learning_rate": 2.2544162553478457e-06, "loss": 0.3021, "step": 36610 }, { "epoch": 2.150196700135048, "grad_norm": 2.480665922164917, "learning_rate": 2.2515619582188187e-06, "loss": 0.3837, "step": 36620 }, { "epoch": 2.15078386471728, "grad_norm": 3.1790764331817627, "learning_rate": 2.2487089439846827e-06, "loss": 0.3223, "step": 36630 }, { "epoch": 2.1513710292995127, "grad_norm": 2.396097183227539, "learning_rate": 2.2458572139771474e-06, "loss": 0.3495, "step": 36640 }, { "epoch": 2.151958193881745, "grad_norm": 4.814089298248291, "learning_rate": 2.24300676952732e-06, "loss": 0.3653, "step": 36650 }, { "epoch": 2.1525453584639775, "grad_norm": 13.124289512634277, "learning_rate": 2.2401576119657097e-06, "loss": 0.4342, "step": 36660 }, { "epoch": 2.1531325230462097, "grad_norm": 2.2081046104431152, "learning_rate": 2.2373097426222246e-06, "loss": 0.3027, "step": 36670 }, { "epoch": 2.1537196876284423, "grad_norm": 3.4036173820495605, "learning_rate": 2.2344631628261713e-06, "loss": 0.4639, "step": 36680 }, { "epoch": 2.1543068522106745, "grad_norm": 1.8495360612869263, "learning_rate": 2.231617873906255e-06, "loss": 0.407, "step": 36690 }, { "epoch": 2.154894016792907, "grad_norm": 5.726637840270996, "learning_rate": 2.228773877190579e-06, "loss": 0.4718, "step": 36700 }, { "epoch": 2.1554811813751393, "grad_norm": 5.019335746765137, "learning_rate": 2.225931174006639e-06, "loss": 0.421, "step": 36710 }, { "epoch": 2.156068345957372, "grad_norm": 1.714024543762207, "learning_rate": 2.2230897656813325e-06, "loss": 0.4311, "step": 36720 }, { "epoch": 2.156655510539604, "grad_norm": 10.630701065063477, "learning_rate": 2.220249653540948e-06, "loss": 0.5295, "step": 36730 }, { "epoch": 2.1572426751218368, "grad_norm": 1.7880563735961914, "learning_rate": 2.2174108389111765e-06, "loss": 0.5195, "step": 36740 }, { "epoch": 2.157829839704069, "grad_norm": 3.1149470806121826, "learning_rate": 2.214573323117093e-06, "loss": 0.4427, "step": 36750 }, { "epoch": 2.1584170042863016, "grad_norm": 3.6745245456695557, "learning_rate": 2.211737107483174e-06, "loss": 0.3394, "step": 36760 }, { "epoch": 2.1590041688685337, "grad_norm": 6.043123722076416, "learning_rate": 2.2089021933332854e-06, "loss": 0.521, "step": 36770 }, { "epoch": 2.1595913334507664, "grad_norm": 3.4967806339263916, "learning_rate": 2.2060685819906874e-06, "loss": 0.3778, "step": 36780 }, { "epoch": 2.1601784980329986, "grad_norm": 3.7070913314819336, "learning_rate": 2.203236274778031e-06, "loss": 0.3532, "step": 36790 }, { "epoch": 2.160765662615231, "grad_norm": 2.5902185440063477, "learning_rate": 2.2004052730173613e-06, "loss": 0.3759, "step": 36800 }, { "epoch": 2.1613528271974634, "grad_norm": 3.655263900756836, "learning_rate": 2.197575578030106e-06, "loss": 0.2918, "step": 36810 }, { "epoch": 2.161939991779696, "grad_norm": 2.3573195934295654, "learning_rate": 2.1947471911370937e-06, "loss": 0.3697, "step": 36820 }, { "epoch": 2.162527156361928, "grad_norm": 9.624944686889648, "learning_rate": 2.1919201136585357e-06, "loss": 0.4173, "step": 36830 }, { "epoch": 2.163114320944161, "grad_norm": 4.133410453796387, "learning_rate": 2.1890943469140353e-06, "loss": 0.3048, "step": 36840 }, { "epoch": 2.163701485526393, "grad_norm": 9.397141456604004, "learning_rate": 2.1862698922225784e-06, "loss": 0.2926, "step": 36850 }, { "epoch": 2.1642886501086256, "grad_norm": 11.99476432800293, "learning_rate": 2.183446750902544e-06, "loss": 0.3497, "step": 36860 }, { "epoch": 2.1648758146908578, "grad_norm": 7.2836594581604, "learning_rate": 2.1806249242716974e-06, "loss": 0.4266, "step": 36870 }, { "epoch": 2.1654629792730904, "grad_norm": 2.118555784225464, "learning_rate": 2.177804413647188e-06, "loss": 0.4789, "step": 36880 }, { "epoch": 2.1660501438553226, "grad_norm": 6.27738094329834, "learning_rate": 2.174985220345554e-06, "loss": 0.546, "step": 36890 }, { "epoch": 2.166637308437555, "grad_norm": 2.7736053466796875, "learning_rate": 2.1721673456827146e-06, "loss": 0.4315, "step": 36900 }, { "epoch": 2.1672244730197874, "grad_norm": 11.303709030151367, "learning_rate": 2.1693507909739773e-06, "loss": 0.3007, "step": 36910 }, { "epoch": 2.16781163760202, "grad_norm": 5.733154773712158, "learning_rate": 2.166535557534032e-06, "loss": 0.3721, "step": 36920 }, { "epoch": 2.168398802184252, "grad_norm": 12.205484390258789, "learning_rate": 2.1637216466769526e-06, "loss": 0.4056, "step": 36930 }, { "epoch": 2.168985966766485, "grad_norm": 3.438217878341675, "learning_rate": 2.1609090597161925e-06, "loss": 0.4158, "step": 36940 }, { "epoch": 2.169573131348717, "grad_norm": 5.559444427490234, "learning_rate": 2.1580977979645906e-06, "loss": 0.3681, "step": 36950 }, { "epoch": 2.1701602959309496, "grad_norm": 8.297212600708008, "learning_rate": 2.155287862734366e-06, "loss": 0.415, "step": 36960 }, { "epoch": 2.170747460513182, "grad_norm": 2.6975979804992676, "learning_rate": 2.1524792553371195e-06, "loss": 0.4694, "step": 36970 }, { "epoch": 2.1713346250954144, "grad_norm": 2.0403459072113037, "learning_rate": 2.149671977083831e-06, "loss": 0.4755, "step": 36980 }, { "epoch": 2.1719217896776466, "grad_norm": 2.4391112327575684, "learning_rate": 2.1468660292848598e-06, "loss": 0.3247, "step": 36990 }, { "epoch": 2.172508954259879, "grad_norm": 40.72965621948242, "learning_rate": 2.144061413249946e-06, "loss": 0.338, "step": 37000 }, { "epoch": 2.1730961188421114, "grad_norm": 3.107903003692627, "learning_rate": 2.1412581302882056e-06, "loss": 0.4455, "step": 37010 }, { "epoch": 2.173683283424344, "grad_norm": 2.381242275238037, "learning_rate": 2.138456181708136e-06, "loss": 0.397, "step": 37020 }, { "epoch": 2.174270448006576, "grad_norm": 3.1143476963043213, "learning_rate": 2.1356555688176057e-06, "loss": 0.3796, "step": 37030 }, { "epoch": 2.174857612588809, "grad_norm": 6.679479598999023, "learning_rate": 2.132856292923865e-06, "loss": 0.3575, "step": 37040 }, { "epoch": 2.175444777171041, "grad_norm": 10.446815490722656, "learning_rate": 2.130058355333537e-06, "loss": 0.3294, "step": 37050 }, { "epoch": 2.1760319417532736, "grad_norm": 2.6732048988342285, "learning_rate": 2.127261757352625e-06, "loss": 0.5347, "step": 37060 }, { "epoch": 2.176619106335506, "grad_norm": 6.084848880767822, "learning_rate": 2.124466500286504e-06, "loss": 0.4932, "step": 37070 }, { "epoch": 2.1772062709177384, "grad_norm": 2.218578577041626, "learning_rate": 2.1216725854399183e-06, "loss": 0.4717, "step": 37080 }, { "epoch": 2.1777934354999706, "grad_norm": 5.1573662757873535, "learning_rate": 2.1188800141169925e-06, "loss": 0.3858, "step": 37090 }, { "epoch": 2.1783806000822032, "grad_norm": 3.0396578311920166, "learning_rate": 2.1160887876212216e-06, "loss": 0.326, "step": 37100 }, { "epoch": 2.1789677646644354, "grad_norm": 3.1360182762145996, "learning_rate": 2.113298907255473e-06, "loss": 0.3702, "step": 37110 }, { "epoch": 2.179554929246668, "grad_norm": 3.9926998615264893, "learning_rate": 2.1105103743219874e-06, "loss": 0.4158, "step": 37120 }, { "epoch": 2.1801420938289002, "grad_norm": 8.623339653015137, "learning_rate": 2.1077231901223694e-06, "loss": 0.4635, "step": 37130 }, { "epoch": 2.180729258411133, "grad_norm": 8.5442533493042, "learning_rate": 2.1049373559576047e-06, "loss": 0.3477, "step": 37140 }, { "epoch": 2.181316422993365, "grad_norm": 2.4107248783111572, "learning_rate": 2.1021528731280426e-06, "loss": 0.4455, "step": 37150 }, { "epoch": 2.181903587575597, "grad_norm": 3.025336265563965, "learning_rate": 2.0993697429334044e-06, "loss": 0.4018, "step": 37160 }, { "epoch": 2.18249075215783, "grad_norm": 2.284470558166504, "learning_rate": 2.096587966672774e-06, "loss": 0.4475, "step": 37170 }, { "epoch": 2.1830779167400625, "grad_norm": 3.4435253143310547, "learning_rate": 2.0938075456446098e-06, "loss": 0.3411, "step": 37180 }, { "epoch": 2.1836650813222946, "grad_norm": 3.7285990715026855, "learning_rate": 2.0910284811467356e-06, "loss": 0.338, "step": 37190 }, { "epoch": 2.184252245904527, "grad_norm": 4.613473892211914, "learning_rate": 2.0882507744763425e-06, "loss": 0.4956, "step": 37200 }, { "epoch": 2.1848394104867594, "grad_norm": 4.326396465301514, "learning_rate": 2.085474426929986e-06, "loss": 0.4415, "step": 37210 }, { "epoch": 2.185426575068992, "grad_norm": 4.644417762756348, "learning_rate": 2.082699439803589e-06, "loss": 0.4016, "step": 37220 }, { "epoch": 2.1860137396512243, "grad_norm": 2.911057710647583, "learning_rate": 2.0799258143924384e-06, "loss": 0.3135, "step": 37230 }, { "epoch": 2.1866009042334564, "grad_norm": 3.919212579727173, "learning_rate": 2.077153551991186e-06, "loss": 0.3597, "step": 37240 }, { "epoch": 2.187188068815689, "grad_norm": 3.222130298614502, "learning_rate": 2.07438265389385e-06, "loss": 0.4926, "step": 37250 }, { "epoch": 2.1877752333979212, "grad_norm": 7.878127574920654, "learning_rate": 2.071613121393804e-06, "loss": 0.3646, "step": 37260 }, { "epoch": 2.188362397980154, "grad_norm": 3.3828237056732178, "learning_rate": 2.0688449557837915e-06, "loss": 0.4887, "step": 37270 }, { "epoch": 2.188949562562386, "grad_norm": 17.404415130615234, "learning_rate": 2.066078158355916e-06, "loss": 0.4778, "step": 37280 }, { "epoch": 2.1895367271446187, "grad_norm": 2.2943027019500732, "learning_rate": 2.063312730401642e-06, "loss": 0.3191, "step": 37290 }, { "epoch": 2.190123891726851, "grad_norm": 1.879021406173706, "learning_rate": 2.060548673211794e-06, "loss": 0.3723, "step": 37300 }, { "epoch": 2.1907110563090835, "grad_norm": 4.427684783935547, "learning_rate": 2.0577859880765587e-06, "loss": 0.5086, "step": 37310 }, { "epoch": 2.1912982208913157, "grad_norm": 3.0217628479003906, "learning_rate": 2.05502467628548e-06, "loss": 0.3657, "step": 37320 }, { "epoch": 2.1918853854735483, "grad_norm": 3.3988142013549805, "learning_rate": 2.052264739127463e-06, "loss": 0.2479, "step": 37330 }, { "epoch": 2.1924725500557805, "grad_norm": 7.6174163818359375, "learning_rate": 2.0495061778907694e-06, "loss": 0.3905, "step": 37340 }, { "epoch": 2.193059714638013, "grad_norm": 20.016321182250977, "learning_rate": 2.0467489938630207e-06, "loss": 0.3212, "step": 37350 }, { "epoch": 2.1936468792202453, "grad_norm": 2.5045650005340576, "learning_rate": 2.043993188331192e-06, "loss": 0.4904, "step": 37360 }, { "epoch": 2.194234043802478, "grad_norm": 19.12688636779785, "learning_rate": 2.0412387625816154e-06, "loss": 0.4113, "step": 37370 }, { "epoch": 2.19482120838471, "grad_norm": 3.0318470001220703, "learning_rate": 2.0384857178999854e-06, "loss": 0.3767, "step": 37380 }, { "epoch": 2.1954083729669427, "grad_norm": 5.039533615112305, "learning_rate": 2.0357340555713473e-06, "loss": 0.4387, "step": 37390 }, { "epoch": 2.195995537549175, "grad_norm": 2.299555778503418, "learning_rate": 2.0329837768800963e-06, "loss": 0.3755, "step": 37400 }, { "epoch": 2.1965827021314075, "grad_norm": 2.6325318813323975, "learning_rate": 2.0302348831099894e-06, "loss": 0.3562, "step": 37410 }, { "epoch": 2.1971698667136397, "grad_norm": 2.6853108406066895, "learning_rate": 2.0274873755441332e-06, "loss": 0.3614, "step": 37420 }, { "epoch": 2.1977570312958723, "grad_norm": 2.592128276824951, "learning_rate": 2.0247412554649894e-06, "loss": 0.4153, "step": 37430 }, { "epoch": 2.1983441958781045, "grad_norm": 5.25848913192749, "learning_rate": 2.021996524154372e-06, "loss": 0.3498, "step": 37440 }, { "epoch": 2.198931360460337, "grad_norm": 65.68345642089844, "learning_rate": 2.0192531828934403e-06, "loss": 0.4273, "step": 37450 }, { "epoch": 2.1995185250425693, "grad_norm": 1.1666713953018188, "learning_rate": 2.016511232962716e-06, "loss": 0.3492, "step": 37460 }, { "epoch": 2.200105689624802, "grad_norm": 2.9769198894500732, "learning_rate": 2.0137706756420625e-06, "loss": 0.4584, "step": 37470 }, { "epoch": 2.200692854207034, "grad_norm": 2.2189102172851562, "learning_rate": 2.0110315122106993e-06, "loss": 0.3448, "step": 37480 }, { "epoch": 2.2012800187892667, "grad_norm": 2.0068788528442383, "learning_rate": 2.008293743947189e-06, "loss": 0.4218, "step": 37490 }, { "epoch": 2.201867183371499, "grad_norm": 9.777776718139648, "learning_rate": 2.0055573721294464e-06, "loss": 0.5315, "step": 37500 }, { "epoch": 2.2024543479537315, "grad_norm": 2.1777265071868896, "learning_rate": 2.0028223980347345e-06, "loss": 0.5729, "step": 37510 }, { "epoch": 2.2030415125359637, "grad_norm": 2.7775280475616455, "learning_rate": 2.000088822939665e-06, "loss": 0.4361, "step": 37520 }, { "epoch": 2.2036286771181963, "grad_norm": 1.5512728691101074, "learning_rate": 1.9973566481201936e-06, "loss": 0.3506, "step": 37530 }, { "epoch": 2.2042158417004285, "grad_norm": 3.3558480739593506, "learning_rate": 1.994625874851624e-06, "loss": 0.2792, "step": 37540 }, { "epoch": 2.204803006282661, "grad_norm": 11.12766170501709, "learning_rate": 1.991896504408607e-06, "loss": 0.4647, "step": 37550 }, { "epoch": 2.2053901708648933, "grad_norm": 5.3832573890686035, "learning_rate": 1.989168538065136e-06, "loss": 0.402, "step": 37560 }, { "epoch": 2.205977335447126, "grad_norm": 1.463813066482544, "learning_rate": 1.9864419770945508e-06, "loss": 0.3573, "step": 37570 }, { "epoch": 2.206564500029358, "grad_norm": 3.0221753120422363, "learning_rate": 1.983716822769537e-06, "loss": 0.3698, "step": 37580 }, { "epoch": 2.2071516646115907, "grad_norm": 7.029075622558594, "learning_rate": 1.9809930763621164e-06, "loss": 0.4108, "step": 37590 }, { "epoch": 2.207738829193823, "grad_norm": 1.6764819622039795, "learning_rate": 1.978270739143661e-06, "loss": 0.4401, "step": 37600 }, { "epoch": 2.2083259937760555, "grad_norm": 7.461945533752441, "learning_rate": 1.9755498123848814e-06, "loss": 0.4363, "step": 37610 }, { "epoch": 2.2089131583582877, "grad_norm": 3.2748918533325195, "learning_rate": 1.9728302973558357e-06, "loss": 0.4024, "step": 37620 }, { "epoch": 2.2095003229405203, "grad_norm": 2.5354201793670654, "learning_rate": 1.9701121953259123e-06, "loss": 0.4166, "step": 37630 }, { "epoch": 2.2100874875227525, "grad_norm": 6.18790340423584, "learning_rate": 1.967395507563849e-06, "loss": 0.306, "step": 37640 }, { "epoch": 2.210674652104985, "grad_norm": 3.1656460762023926, "learning_rate": 1.9646802353377205e-06, "loss": 0.392, "step": 37650 }, { "epoch": 2.2112618166872173, "grad_norm": 2.6499440670013428, "learning_rate": 1.9619663799149397e-06, "loss": 0.4387, "step": 37660 }, { "epoch": 2.21184898126945, "grad_norm": 3.485534906387329, "learning_rate": 1.9592539425622618e-06, "loss": 0.4061, "step": 37670 }, { "epoch": 2.212436145851682, "grad_norm": 5.349421501159668, "learning_rate": 1.9565429245457743e-06, "loss": 0.4339, "step": 37680 }, { "epoch": 2.2130233104339148, "grad_norm": 5.738641262054443, "learning_rate": 1.9538333271309057e-06, "loss": 0.368, "step": 37690 }, { "epoch": 2.213610475016147, "grad_norm": 1.571330189704895, "learning_rate": 1.9511251515824238e-06, "loss": 0.4229, "step": 37700 }, { "epoch": 2.2141976395983796, "grad_norm": 3.5248851776123047, "learning_rate": 1.94841839916443e-06, "loss": 0.3891, "step": 37710 }, { "epoch": 2.2147848041806117, "grad_norm": 3.6743125915527344, "learning_rate": 1.945713071140359e-06, "loss": 0.3931, "step": 37720 }, { "epoch": 2.2153719687628444, "grad_norm": 2.2467172145843506, "learning_rate": 1.9430091687729837e-06, "loss": 0.4192, "step": 37730 }, { "epoch": 2.2159591333450765, "grad_norm": 2.827434539794922, "learning_rate": 1.9403066933244114e-06, "loss": 0.5003, "step": 37740 }, { "epoch": 2.216546297927309, "grad_norm": 3.8375563621520996, "learning_rate": 1.937605646056084e-06, "loss": 0.4006, "step": 37750 }, { "epoch": 2.2171334625095414, "grad_norm": 5.916222095489502, "learning_rate": 1.9349060282287757e-06, "loss": 0.414, "step": 37760 }, { "epoch": 2.217720627091774, "grad_norm": 4.353111267089844, "learning_rate": 1.932207841102589e-06, "loss": 0.4522, "step": 37770 }, { "epoch": 2.218307791674006, "grad_norm": 2.872447967529297, "learning_rate": 1.9295110859369686e-06, "loss": 0.2855, "step": 37780 }, { "epoch": 2.218894956256239, "grad_norm": 5.903806686401367, "learning_rate": 1.926815763990683e-06, "loss": 0.3788, "step": 37790 }, { "epoch": 2.219482120838471, "grad_norm": 2.330825090408325, "learning_rate": 1.9241218765218368e-06, "loss": 0.3179, "step": 37800 }, { "epoch": 2.2200692854207036, "grad_norm": 6.1784348487854, "learning_rate": 1.9214294247878566e-06, "loss": 0.4347, "step": 37810 }, { "epoch": 2.2206564500029358, "grad_norm": 7.0071210861206055, "learning_rate": 1.9187384100455087e-06, "loss": 0.5149, "step": 37820 }, { "epoch": 2.2212436145851684, "grad_norm": 6.04626989364624, "learning_rate": 1.916048833550882e-06, "loss": 0.4878, "step": 37830 }, { "epoch": 2.2218307791674006, "grad_norm": 4.377737045288086, "learning_rate": 1.9133606965593983e-06, "loss": 0.3068, "step": 37840 }, { "epoch": 2.222417943749633, "grad_norm": 6.026747703552246, "learning_rate": 1.9106740003258043e-06, "loss": 0.3097, "step": 37850 }, { "epoch": 2.2230051083318654, "grad_norm": 1.8480881452560425, "learning_rate": 1.907988746104177e-06, "loss": 0.3859, "step": 37860 }, { "epoch": 2.223592272914098, "grad_norm": 2.145122528076172, "learning_rate": 1.905304935147917e-06, "loss": 0.4, "step": 37870 }, { "epoch": 2.22417943749633, "grad_norm": 5.9864702224731445, "learning_rate": 1.9026225687097544e-06, "loss": 0.4148, "step": 37880 }, { "epoch": 2.224766602078563, "grad_norm": 4.727438926696777, "learning_rate": 1.8999416480417432e-06, "loss": 0.3868, "step": 37890 }, { "epoch": 2.225353766660795, "grad_norm": 1.9919536113739014, "learning_rate": 1.8972621743952652e-06, "loss": 0.3772, "step": 37900 }, { "epoch": 2.2259409312430276, "grad_norm": 2.3880608081817627, "learning_rate": 1.8945841490210204e-06, "loss": 0.3914, "step": 37910 }, { "epoch": 2.22652809582526, "grad_norm": 1.8595415353775024, "learning_rate": 1.8919075731690395e-06, "loss": 0.4996, "step": 37920 }, { "epoch": 2.2271152604074924, "grad_norm": 2.141350269317627, "learning_rate": 1.8892324480886715e-06, "loss": 0.4268, "step": 37930 }, { "epoch": 2.2277024249897246, "grad_norm": 4.497547626495361, "learning_rate": 1.8865587750285968e-06, "loss": 0.4685, "step": 37940 }, { "epoch": 2.228289589571957, "grad_norm": 2.5578744411468506, "learning_rate": 1.8838865552368068e-06, "loss": 0.4413, "step": 37950 }, { "epoch": 2.2288767541541894, "grad_norm": 2.901395797729492, "learning_rate": 1.8812157899606214e-06, "loss": 0.2726, "step": 37960 }, { "epoch": 2.229463918736422, "grad_norm": 2.6071743965148926, "learning_rate": 1.8785464804466802e-06, "loss": 0.4666, "step": 37970 }, { "epoch": 2.230051083318654, "grad_norm": 1.5464118719100952, "learning_rate": 1.8758786279409424e-06, "loss": 0.4923, "step": 37980 }, { "epoch": 2.2306382479008864, "grad_norm": 3.935295581817627, "learning_rate": 1.8732122336886898e-06, "loss": 0.3511, "step": 37990 }, { "epoch": 2.231225412483119, "grad_norm": 3.6874454021453857, "learning_rate": 1.8705472989345191e-06, "loss": 0.3083, "step": 38000 }, { "epoch": 2.2318125770653516, "grad_norm": 3.149425983428955, "learning_rate": 1.8678838249223464e-06, "loss": 0.4325, "step": 38010 }, { "epoch": 2.232399741647584, "grad_norm": 1.7207444906234741, "learning_rate": 1.8652218128954126e-06, "loss": 0.2763, "step": 38020 }, { "epoch": 2.232986906229816, "grad_norm": 2.694838523864746, "learning_rate": 1.8625612640962709e-06, "loss": 0.4543, "step": 38030 }, { "epoch": 2.2335740708120486, "grad_norm": 2.2426047325134277, "learning_rate": 1.8599021797667877e-06, "loss": 0.3997, "step": 38040 }, { "epoch": 2.2341612353942812, "grad_norm": 3.4302635192871094, "learning_rate": 1.8572445611481532e-06, "loss": 0.5643, "step": 38050 }, { "epoch": 2.2347483999765134, "grad_norm": 6.2454681396484375, "learning_rate": 1.854588409480869e-06, "loss": 0.427, "step": 38060 }, { "epoch": 2.2353355645587456, "grad_norm": 1.9973959922790527, "learning_rate": 1.8519337260047543e-06, "loss": 0.4691, "step": 38070 }, { "epoch": 2.2359227291409782, "grad_norm": 3.2262282371520996, "learning_rate": 1.8492805119589414e-06, "loss": 0.4899, "step": 38080 }, { "epoch": 2.236509893723211, "grad_norm": 4.803013801574707, "learning_rate": 1.8466287685818767e-06, "loss": 0.3273, "step": 38090 }, { "epoch": 2.237097058305443, "grad_norm": 6.595989227294922, "learning_rate": 1.8439784971113218e-06, "loss": 0.3932, "step": 38100 }, { "epoch": 2.237684222887675, "grad_norm": 4.1909708976745605, "learning_rate": 1.8413296987843498e-06, "loss": 0.3831, "step": 38110 }, { "epoch": 2.238271387469908, "grad_norm": 2.6920790672302246, "learning_rate": 1.8386823748373462e-06, "loss": 0.3515, "step": 38120 }, { "epoch": 2.23885855205214, "grad_norm": 2.58538556098938, "learning_rate": 1.8360365265060103e-06, "loss": 0.4694, "step": 38130 }, { "epoch": 2.2394457166343726, "grad_norm": 1.2311365604400635, "learning_rate": 1.833392155025348e-06, "loss": 0.4169, "step": 38140 }, { "epoch": 2.240032881216605, "grad_norm": 6.554409980773926, "learning_rate": 1.8307492616296807e-06, "loss": 0.4145, "step": 38150 }, { "epoch": 2.2406200457988374, "grad_norm": 9.976845741271973, "learning_rate": 1.828107847552637e-06, "loss": 0.4041, "step": 38160 }, { "epoch": 2.2412072103810696, "grad_norm": 2.524778366088867, "learning_rate": 1.825467914027157e-06, "loss": 0.3319, "step": 38170 }, { "epoch": 2.2417943749633022, "grad_norm": 5.181172847747803, "learning_rate": 1.8228294622854875e-06, "loss": 0.3739, "step": 38180 }, { "epoch": 2.2423815395455344, "grad_norm": 4.650324821472168, "learning_rate": 1.8201924935591863e-06, "loss": 0.4033, "step": 38190 }, { "epoch": 2.242968704127767, "grad_norm": 3.514399528503418, "learning_rate": 1.8175570090791162e-06, "loss": 0.4041, "step": 38200 }, { "epoch": 2.2435558687099992, "grad_norm": 3.5979511737823486, "learning_rate": 1.8149230100754488e-06, "loss": 0.3999, "step": 38210 }, { "epoch": 2.244143033292232, "grad_norm": 2.397153615951538, "learning_rate": 1.8122904977776645e-06, "loss": 0.3263, "step": 38220 }, { "epoch": 2.244730197874464, "grad_norm": 4.7002716064453125, "learning_rate": 1.8096594734145429e-06, "loss": 0.3095, "step": 38230 }, { "epoch": 2.2453173624566967, "grad_norm": 2.7389214038848877, "learning_rate": 1.8070299382141759e-06, "loss": 0.3886, "step": 38240 }, { "epoch": 2.245904527038929, "grad_norm": 2.1502561569213867, "learning_rate": 1.8044018934039553e-06, "loss": 0.3004, "step": 38250 }, { "epoch": 2.2464916916211615, "grad_norm": 5.669637680053711, "learning_rate": 1.8017753402105853e-06, "loss": 0.386, "step": 38260 }, { "epoch": 2.2470788562033936, "grad_norm": 1.4294172525405884, "learning_rate": 1.7991502798600635e-06, "loss": 0.3477, "step": 38270 }, { "epoch": 2.2476660207856263, "grad_norm": 5.441682815551758, "learning_rate": 1.7965267135776971e-06, "loss": 0.5606, "step": 38280 }, { "epoch": 2.2482531853678585, "grad_norm": 5.884393692016602, "learning_rate": 1.7939046425880941e-06, "loss": 0.4602, "step": 38290 }, { "epoch": 2.248840349950091, "grad_norm": 1.5033226013183594, "learning_rate": 1.7912840681151644e-06, "loss": 0.3546, "step": 38300 }, { "epoch": 2.2494275145323233, "grad_norm": 1.766324758529663, "learning_rate": 1.7886649913821218e-06, "loss": 0.4494, "step": 38310 }, { "epoch": 2.250014679114556, "grad_norm": 9.235917091369629, "learning_rate": 1.7860474136114753e-06, "loss": 0.4153, "step": 38320 }, { "epoch": 2.250601843696788, "grad_norm": 1.9223403930664062, "learning_rate": 1.7834313360250388e-06, "loss": 0.3794, "step": 38330 }, { "epoch": 2.2511890082790207, "grad_norm": 5.846920967102051, "learning_rate": 1.780816759843927e-06, "loss": 0.3563, "step": 38340 }, { "epoch": 2.251776172861253, "grad_norm": 3.043625831604004, "learning_rate": 1.778203686288551e-06, "loss": 0.3324, "step": 38350 }, { "epoch": 2.2523633374434855, "grad_norm": 4.010222434997559, "learning_rate": 1.7755921165786222e-06, "loss": 0.4635, "step": 38360 }, { "epoch": 2.2529505020257177, "grad_norm": 14.553357124328613, "learning_rate": 1.7729820519331464e-06, "loss": 0.4922, "step": 38370 }, { "epoch": 2.2535376666079503, "grad_norm": 3.5826234817504883, "learning_rate": 1.7703734935704315e-06, "loss": 0.4542, "step": 38380 }, { "epoch": 2.2541248311901825, "grad_norm": 3.0133728981018066, "learning_rate": 1.7677664427080798e-06, "loss": 0.3708, "step": 38390 }, { "epoch": 2.254711995772415, "grad_norm": 4.085210800170898, "learning_rate": 1.7651609005629911e-06, "loss": 0.4334, "step": 38400 }, { "epoch": 2.2552991603546473, "grad_norm": 4.459922790527344, "learning_rate": 1.76255686835136e-06, "loss": 0.4386, "step": 38410 }, { "epoch": 2.25588632493688, "grad_norm": 1.7411502599716187, "learning_rate": 1.7599543472886766e-06, "loss": 0.3588, "step": 38420 }, { "epoch": 2.256473489519112, "grad_norm": 3.294173240661621, "learning_rate": 1.7573533385897262e-06, "loss": 0.363, "step": 38430 }, { "epoch": 2.2570606541013447, "grad_norm": 8.31895637512207, "learning_rate": 1.7547538434685878e-06, "loss": 0.357, "step": 38440 }, { "epoch": 2.257647818683577, "grad_norm": 2.2880828380584717, "learning_rate": 1.7521558631386354e-06, "loss": 0.395, "step": 38450 }, { "epoch": 2.2582349832658095, "grad_norm": 1.1262118816375732, "learning_rate": 1.7495593988125308e-06, "loss": 0.3882, "step": 38460 }, { "epoch": 2.2588221478480417, "grad_norm": 4.713985919952393, "learning_rate": 1.7469644517022338e-06, "loss": 0.384, "step": 38470 }, { "epoch": 2.2594093124302743, "grad_norm": 2.4159555435180664, "learning_rate": 1.7443710230189943e-06, "loss": 0.3532, "step": 38480 }, { "epoch": 2.2599964770125065, "grad_norm": 3.5205042362213135, "learning_rate": 1.741779113973352e-06, "loss": 0.4368, "step": 38490 }, { "epoch": 2.260583641594739, "grad_norm": 3.880152702331543, "learning_rate": 1.7391887257751388e-06, "loss": 0.4579, "step": 38500 }, { "epoch": 2.2611708061769713, "grad_norm": 5.456688404083252, "learning_rate": 1.7365998596334771e-06, "loss": 0.3815, "step": 38510 }, { "epoch": 2.261757970759204, "grad_norm": 2.553893804550171, "learning_rate": 1.7340125167567773e-06, "loss": 0.3333, "step": 38520 }, { "epoch": 2.262345135341436, "grad_norm": 6.716250419616699, "learning_rate": 1.7314266983527394e-06, "loss": 0.3564, "step": 38530 }, { "epoch": 2.2629322999236687, "grad_norm": 4.988452911376953, "learning_rate": 1.7288424056283537e-06, "loss": 0.5243, "step": 38540 }, { "epoch": 2.263519464505901, "grad_norm": 4.162874221801758, "learning_rate": 1.726259639789894e-06, "loss": 0.4113, "step": 38550 }, { "epoch": 2.2641066290881335, "grad_norm": 2.278627634048462, "learning_rate": 1.7236784020429248e-06, "loss": 0.3436, "step": 38560 }, { "epoch": 2.2646937936703657, "grad_norm": 4.722484588623047, "learning_rate": 1.7210986935922951e-06, "loss": 0.4068, "step": 38570 }, { "epoch": 2.2652809582525983, "grad_norm": 1.8126823902130127, "learning_rate": 1.7185205156421453e-06, "loss": 0.4816, "step": 38580 }, { "epoch": 2.2658681228348305, "grad_norm": 2.7815849781036377, "learning_rate": 1.7159438693958964e-06, "loss": 0.4144, "step": 38590 }, { "epoch": 2.266455287417063, "grad_norm": 2.864861249923706, "learning_rate": 1.7133687560562535e-06, "loss": 0.3616, "step": 38600 }, { "epoch": 2.2670424519992953, "grad_norm": 2.8156700134277344, "learning_rate": 1.7107951768252096e-06, "loss": 0.3878, "step": 38610 }, { "epoch": 2.267629616581528, "grad_norm": 3.505453109741211, "learning_rate": 1.70822313290404e-06, "loss": 0.3973, "step": 38620 }, { "epoch": 2.26821678116376, "grad_norm": 5.27235746383667, "learning_rate": 1.7056526254933042e-06, "loss": 0.3321, "step": 38630 }, { "epoch": 2.2688039457459928, "grad_norm": 2.8733668327331543, "learning_rate": 1.7030836557928448e-06, "loss": 0.3557, "step": 38640 }, { "epoch": 2.269391110328225, "grad_norm": 11.533845901489258, "learning_rate": 1.700516225001782e-06, "loss": 0.4836, "step": 38650 }, { "epoch": 2.2699782749104576, "grad_norm": 11.863812446594238, "learning_rate": 1.697950334318526e-06, "loss": 0.476, "step": 38660 }, { "epoch": 2.2705654394926897, "grad_norm": 5.777439117431641, "learning_rate": 1.6953859849407617e-06, "loss": 0.334, "step": 38670 }, { "epoch": 2.2711526040749224, "grad_norm": 2.822814702987671, "learning_rate": 1.6928231780654586e-06, "loss": 0.3615, "step": 38680 }, { "epoch": 2.2717397686571545, "grad_norm": 3.639017343521118, "learning_rate": 1.69026191488886e-06, "loss": 0.4804, "step": 38690 }, { "epoch": 2.272326933239387, "grad_norm": 2.0564181804656982, "learning_rate": 1.6877021966064956e-06, "loss": 0.2281, "step": 38700 }, { "epoch": 2.2729140978216194, "grad_norm": 4.6329264640808105, "learning_rate": 1.68514402441317e-06, "loss": 0.4368, "step": 38710 }, { "epoch": 2.273501262403852, "grad_norm": 2.3651084899902344, "learning_rate": 1.6825873995029679e-06, "loss": 0.4156, "step": 38720 }, { "epoch": 2.274088426986084, "grad_norm": 2.7572898864746094, "learning_rate": 1.6800323230692511e-06, "loss": 0.3956, "step": 38730 }, { "epoch": 2.274675591568317, "grad_norm": 4.967609405517578, "learning_rate": 1.6774787963046578e-06, "loss": 0.4914, "step": 38740 }, { "epoch": 2.275262756150549, "grad_norm": 3.9678001403808594, "learning_rate": 1.6749268204011054e-06, "loss": 0.4836, "step": 38750 }, { "epoch": 2.2758499207327816, "grad_norm": 3.831184148788452, "learning_rate": 1.672376396549784e-06, "loss": 0.4557, "step": 38760 }, { "epoch": 2.2764370853150138, "grad_norm": 3.4250710010528564, "learning_rate": 1.6698275259411634e-06, "loss": 0.3619, "step": 38770 }, { "epoch": 2.2770242498972464, "grad_norm": 1.550204873085022, "learning_rate": 1.6672802097649826e-06, "loss": 0.3068, "step": 38780 }, { "epoch": 2.2776114144794786, "grad_norm": 13.40668773651123, "learning_rate": 1.6647344492102597e-06, "loss": 0.5049, "step": 38790 }, { "epoch": 2.278198579061711, "grad_norm": 3.0524685382843018, "learning_rate": 1.6621902454652856e-06, "loss": 0.2975, "step": 38800 }, { "epoch": 2.2787857436439434, "grad_norm": 3.578263759613037, "learning_rate": 1.6596475997176242e-06, "loss": 0.3717, "step": 38810 }, { "epoch": 2.2793729082261756, "grad_norm": 4.164820671081543, "learning_rate": 1.6571065131541126e-06, "loss": 0.2011, "step": 38820 }, { "epoch": 2.279960072808408, "grad_norm": 1.8335431814193726, "learning_rate": 1.6545669869608595e-06, "loss": 0.342, "step": 38830 }, { "epoch": 2.280547237390641, "grad_norm": 2.9237658977508545, "learning_rate": 1.652029022323245e-06, "loss": 0.3682, "step": 38840 }, { "epoch": 2.281134401972873, "grad_norm": 9.438270568847656, "learning_rate": 1.6494926204259216e-06, "loss": 0.3808, "step": 38850 }, { "epoch": 2.281721566555105, "grad_norm": 17.190963745117188, "learning_rate": 1.646957782452811e-06, "loss": 0.3336, "step": 38860 }, { "epoch": 2.282308731137338, "grad_norm": 3.444766044616699, "learning_rate": 1.6444245095871075e-06, "loss": 0.4044, "step": 38870 }, { "epoch": 2.2828958957195704, "grad_norm": 1.747310757637024, "learning_rate": 1.641892803011269e-06, "loss": 0.3858, "step": 38880 }, { "epoch": 2.2834830603018026, "grad_norm": 2.4274232387542725, "learning_rate": 1.6393626639070286e-06, "loss": 0.3976, "step": 38890 }, { "epoch": 2.2840702248840348, "grad_norm": 3.95755934715271, "learning_rate": 1.6368340934553834e-06, "loss": 0.5339, "step": 38900 }, { "epoch": 2.2846573894662674, "grad_norm": 2.7957305908203125, "learning_rate": 1.6343070928366046e-06, "loss": 0.342, "step": 38910 }, { "epoch": 2.2852445540485, "grad_norm": 2.3206896781921387, "learning_rate": 1.631781663230222e-06, "loss": 0.3665, "step": 38920 }, { "epoch": 2.285831718630732, "grad_norm": 2.8907387256622314, "learning_rate": 1.629257805815037e-06, "loss": 0.4396, "step": 38930 }, { "epoch": 2.2864188832129644, "grad_norm": 2.9139976501464844, "learning_rate": 1.6267355217691178e-06, "loss": 0.3143, "step": 38940 }, { "epoch": 2.287006047795197, "grad_norm": 4.305000305175781, "learning_rate": 1.6242148122697954e-06, "loss": 0.4786, "step": 38950 }, { "epoch": 2.2875932123774296, "grad_norm": 2.848788261413574, "learning_rate": 1.6216956784936706e-06, "loss": 0.2637, "step": 38960 }, { "epoch": 2.288180376959662, "grad_norm": 3.287628173828125, "learning_rate": 1.6191781216166002e-06, "loss": 0.4436, "step": 38970 }, { "epoch": 2.288767541541894, "grad_norm": 4.134487152099609, "learning_rate": 1.616662142813712e-06, "loss": 0.3899, "step": 38980 }, { "epoch": 2.2893547061241266, "grad_norm": 4.427136421203613, "learning_rate": 1.614147743259397e-06, "loss": 0.4185, "step": 38990 }, { "epoch": 2.2899418707063592, "grad_norm": 1.4497698545455933, "learning_rate": 1.6116349241273088e-06, "loss": 0.4212, "step": 39000 }, { "epoch": 2.2905290352885914, "grad_norm": 3.237009286880493, "learning_rate": 1.6091236865903575e-06, "loss": 0.316, "step": 39010 }, { "epoch": 2.2911161998708236, "grad_norm": 4.164917469024658, "learning_rate": 1.6066140318207217e-06, "loss": 0.4309, "step": 39020 }, { "epoch": 2.2917033644530562, "grad_norm": 4.058033466339111, "learning_rate": 1.6041059609898385e-06, "loss": 0.4597, "step": 39030 }, { "epoch": 2.2922905290352884, "grad_norm": 9.593399047851562, "learning_rate": 1.6015994752684067e-06, "loss": 0.4609, "step": 39040 }, { "epoch": 2.292877693617521, "grad_norm": 7.144499778747559, "learning_rate": 1.5990945758263848e-06, "loss": 0.514, "step": 39050 }, { "epoch": 2.293464858199753, "grad_norm": 5.10284423828125, "learning_rate": 1.5965912638329873e-06, "loss": 0.454, "step": 39060 }, { "epoch": 2.294052022781986, "grad_norm": 2.936544418334961, "learning_rate": 1.5940895404566946e-06, "loss": 0.379, "step": 39070 }, { "epoch": 2.294639187364218, "grad_norm": 2.5518364906311035, "learning_rate": 1.591589406865241e-06, "loss": 0.3827, "step": 39080 }, { "epoch": 2.2952263519464506, "grad_norm": 2.377483606338501, "learning_rate": 1.5890908642256198e-06, "loss": 0.4249, "step": 39090 }, { "epoch": 2.295813516528683, "grad_norm": 2.361121654510498, "learning_rate": 1.586593913704083e-06, "loss": 0.4299, "step": 39100 }, { "epoch": 2.2964006811109154, "grad_norm": 3.254586935043335, "learning_rate": 1.5840985564661354e-06, "loss": 0.346, "step": 39110 }, { "epoch": 2.2969878456931476, "grad_norm": 2.8788809776306152, "learning_rate": 1.5816047936765417e-06, "loss": 0.4246, "step": 39120 }, { "epoch": 2.2975750102753802, "grad_norm": 2.398514986038208, "learning_rate": 1.5791126264993218e-06, "loss": 0.4555, "step": 39130 }, { "epoch": 2.2981621748576124, "grad_norm": 3.84468412399292, "learning_rate": 1.5766220560977498e-06, "loss": 0.3305, "step": 39140 }, { "epoch": 2.298749339439845, "grad_norm": 3.4108994007110596, "learning_rate": 1.574133083634356e-06, "loss": 0.3282, "step": 39150 }, { "epoch": 2.2993365040220772, "grad_norm": 4.01094913482666, "learning_rate": 1.5716457102709236e-06, "loss": 0.3694, "step": 39160 }, { "epoch": 2.29992366860431, "grad_norm": 2.528898239135742, "learning_rate": 1.5691599371684897e-06, "loss": 0.4031, "step": 39170 }, { "epoch": 2.300510833186542, "grad_norm": 2.617249011993408, "learning_rate": 1.5666757654873443e-06, "loss": 0.4103, "step": 39180 }, { "epoch": 2.3010979977687747, "grad_norm": 3.2681539058685303, "learning_rate": 1.5641931963870317e-06, "loss": 0.3583, "step": 39190 }, { "epoch": 2.301685162351007, "grad_norm": 0.9678052663803101, "learning_rate": 1.5617122310263434e-06, "loss": 0.3449, "step": 39200 }, { "epoch": 2.3022723269332395, "grad_norm": 4.026711940765381, "learning_rate": 1.559232870563327e-06, "loss": 0.3243, "step": 39210 }, { "epoch": 2.3028594915154716, "grad_norm": 1.7566629648208618, "learning_rate": 1.556755116155278e-06, "loss": 0.51, "step": 39220 }, { "epoch": 2.3034466560977043, "grad_norm": 3.775202512741089, "learning_rate": 1.5542789689587484e-06, "loss": 0.4877, "step": 39230 }, { "epoch": 2.3040338206799365, "grad_norm": 3.8468875885009766, "learning_rate": 1.5518044301295305e-06, "loss": 0.4727, "step": 39240 }, { "epoch": 2.304620985262169, "grad_norm": 4.400033473968506, "learning_rate": 1.5493315008226717e-06, "loss": 0.368, "step": 39250 }, { "epoch": 2.3052081498444013, "grad_norm": 3.215181589126587, "learning_rate": 1.5468601821924672e-06, "loss": 0.4431, "step": 39260 }, { "epoch": 2.305795314426634, "grad_norm": 7.098243236541748, "learning_rate": 1.544390475392461e-06, "loss": 0.3333, "step": 39270 }, { "epoch": 2.306382479008866, "grad_norm": 6.396100997924805, "learning_rate": 1.5419223815754447e-06, "loss": 0.5431, "step": 39280 }, { "epoch": 2.3069696435910987, "grad_norm": 2.9004881381988525, "learning_rate": 1.5394559018934536e-06, "loss": 0.3148, "step": 39290 }, { "epoch": 2.307556808173331, "grad_norm": 1.3595175743103027, "learning_rate": 1.5369910374977715e-06, "loss": 0.3477, "step": 39300 }, { "epoch": 2.3081439727555635, "grad_norm": 5.027429103851318, "learning_rate": 1.5345277895389332e-06, "loss": 0.2819, "step": 39310 }, { "epoch": 2.3087311373377957, "grad_norm": 4.072484970092773, "learning_rate": 1.5320661591667135e-06, "loss": 0.4687, "step": 39320 }, { "epoch": 2.3093183019200283, "grad_norm": 4.626945495605469, "learning_rate": 1.5296061475301311e-06, "loss": 0.3828, "step": 39330 }, { "epoch": 2.3099054665022605, "grad_norm": 2.772517442703247, "learning_rate": 1.5271477557774533e-06, "loss": 0.3913, "step": 39340 }, { "epoch": 2.310492631084493, "grad_norm": 2.9266409873962402, "learning_rate": 1.5246909850561881e-06, "loss": 0.4169, "step": 39350 }, { "epoch": 2.3110797956667253, "grad_norm": 1.3644556999206543, "learning_rate": 1.5222358365130896e-06, "loss": 0.4147, "step": 39360 }, { "epoch": 2.311666960248958, "grad_norm": 3.830965280532837, "learning_rate": 1.5197823112941522e-06, "loss": 0.4388, "step": 39370 }, { "epoch": 2.31225412483119, "grad_norm": 2.957308053970337, "learning_rate": 1.517330410544614e-06, "loss": 0.5181, "step": 39380 }, { "epoch": 2.3128412894134227, "grad_norm": 5.023948669433594, "learning_rate": 1.5148801354089548e-06, "loss": 0.4351, "step": 39390 }, { "epoch": 2.313428453995655, "grad_norm": 4.360762119293213, "learning_rate": 1.5124314870308948e-06, "loss": 0.4788, "step": 39400 }, { "epoch": 2.3140156185778875, "grad_norm": 5.449063777923584, "learning_rate": 1.5099844665533958e-06, "loss": 0.4163, "step": 39410 }, { "epoch": 2.3146027831601197, "grad_norm": 2.532247304916382, "learning_rate": 1.5075390751186604e-06, "loss": 0.3667, "step": 39420 }, { "epoch": 2.3151899477423523, "grad_norm": 4.525873184204102, "learning_rate": 1.5050953138681269e-06, "loss": 0.5039, "step": 39430 }, { "epoch": 2.3157771123245845, "grad_norm": 2.134714365005493, "learning_rate": 1.5026531839424768e-06, "loss": 0.3354, "step": 39440 }, { "epoch": 2.316364276906817, "grad_norm": 1.5225247144699097, "learning_rate": 1.5002126864816292e-06, "loss": 0.5161, "step": 39450 }, { "epoch": 2.3169514414890493, "grad_norm": 2.589703321456909, "learning_rate": 1.4977738226247408e-06, "loss": 0.2573, "step": 39460 }, { "epoch": 2.317538606071282, "grad_norm": 6.412961483001709, "learning_rate": 1.4953365935102054e-06, "loss": 0.461, "step": 39470 }, { "epoch": 2.318125770653514, "grad_norm": 4.408931732177734, "learning_rate": 1.4929010002756543e-06, "loss": 0.4392, "step": 39480 }, { "epoch": 2.3187129352357467, "grad_norm": 3.480764150619507, "learning_rate": 1.4904670440579555e-06, "loss": 0.4014, "step": 39490 }, { "epoch": 2.319300099817979, "grad_norm": 4.499719619750977, "learning_rate": 1.488034725993212e-06, "loss": 0.4449, "step": 39500 }, { "epoch": 2.3198872644002115, "grad_norm": 8.19058895111084, "learning_rate": 1.4856040472167648e-06, "loss": 0.4119, "step": 39510 }, { "epoch": 2.3204744289824437, "grad_norm": 2.885985851287842, "learning_rate": 1.4831750088631842e-06, "loss": 0.3329, "step": 39520 }, { "epoch": 2.3210615935646763, "grad_norm": 2.417078733444214, "learning_rate": 1.4807476120662788e-06, "loss": 0.4229, "step": 39530 }, { "epoch": 2.3216487581469085, "grad_norm": 3.696495771408081, "learning_rate": 1.4783218579590898e-06, "loss": 0.4576, "step": 39540 }, { "epoch": 2.322235922729141, "grad_norm": 6.216651439666748, "learning_rate": 1.4758977476738968e-06, "loss": 0.5645, "step": 39550 }, { "epoch": 2.3228230873113733, "grad_norm": 2.9052793979644775, "learning_rate": 1.4734752823422022e-06, "loss": 0.3027, "step": 39560 }, { "epoch": 2.323410251893606, "grad_norm": 3.8212766647338867, "learning_rate": 1.4710544630947477e-06, "loss": 0.403, "step": 39570 }, { "epoch": 2.323997416475838, "grad_norm": 2.819162130355835, "learning_rate": 1.4686352910615044e-06, "loss": 0.3042, "step": 39580 }, { "epoch": 2.3245845810580708, "grad_norm": 5.797154426574707, "learning_rate": 1.466217767371676e-06, "loss": 0.2878, "step": 39590 }, { "epoch": 2.325171745640303, "grad_norm": 1.958799958229065, "learning_rate": 1.4638018931536946e-06, "loss": 0.3855, "step": 39600 }, { "epoch": 2.3257589102225356, "grad_norm": 4.555885314941406, "learning_rate": 1.4613876695352259e-06, "loss": 0.4272, "step": 39610 }, { "epoch": 2.3263460748047677, "grad_norm": 1.617831826210022, "learning_rate": 1.458975097643157e-06, "loss": 0.4256, "step": 39620 }, { "epoch": 2.3269332393870004, "grad_norm": 3.2003769874572754, "learning_rate": 1.4565641786036156e-06, "loss": 0.4033, "step": 39630 }, { "epoch": 2.3275204039692325, "grad_norm": 3.4825150966644287, "learning_rate": 1.4541549135419497e-06, "loss": 0.3163, "step": 39640 }, { "epoch": 2.328107568551465, "grad_norm": 1.809806227684021, "learning_rate": 1.4517473035827396e-06, "loss": 0.2811, "step": 39650 }, { "epoch": 2.3286947331336973, "grad_norm": 2.350465774536133, "learning_rate": 1.4493413498497877e-06, "loss": 0.3455, "step": 39660 }, { "epoch": 2.32928189771593, "grad_norm": 3.560354471206665, "learning_rate": 1.4469370534661287e-06, "loss": 0.3879, "step": 39670 }, { "epoch": 2.329869062298162, "grad_norm": 3.3459432125091553, "learning_rate": 1.4445344155540209e-06, "loss": 0.4646, "step": 39680 }, { "epoch": 2.3304562268803943, "grad_norm": 5.369804382324219, "learning_rate": 1.44213343723495e-06, "loss": 0.4664, "step": 39690 }, { "epoch": 2.331043391462627, "grad_norm": 7.572208881378174, "learning_rate": 1.4397341196296256e-06, "loss": 0.3843, "step": 39700 }, { "epoch": 2.3316305560448596, "grad_norm": 5.4727301597595215, "learning_rate": 1.4373364638579834e-06, "loss": 0.3401, "step": 39710 }, { "epoch": 2.3322177206270918, "grad_norm": 6.2276411056518555, "learning_rate": 1.4349404710391829e-06, "loss": 0.3698, "step": 39720 }, { "epoch": 2.332804885209324, "grad_norm": 3.845325231552124, "learning_rate": 1.4325461422916071e-06, "loss": 0.3379, "step": 39730 }, { "epoch": 2.3333920497915566, "grad_norm": 2.7624425888061523, "learning_rate": 1.4301534787328636e-06, "loss": 0.394, "step": 39740 }, { "epoch": 2.333979214373789, "grad_norm": 1.8670798540115356, "learning_rate": 1.4277624814797786e-06, "loss": 0.3591, "step": 39750 }, { "epoch": 2.3345663789560214, "grad_norm": 2.6922054290771484, "learning_rate": 1.425373151648406e-06, "loss": 0.4301, "step": 39760 }, { "epoch": 2.3351535435382536, "grad_norm": 2.360503911972046, "learning_rate": 1.4229854903540175e-06, "loss": 0.3192, "step": 39770 }, { "epoch": 2.335740708120486, "grad_norm": 5.3358473777771, "learning_rate": 1.4205994987111088e-06, "loss": 0.5146, "step": 39780 }, { "epoch": 2.336327872702719, "grad_norm": 7.632801055908203, "learning_rate": 1.4182151778333935e-06, "loss": 0.4734, "step": 39790 }, { "epoch": 2.336915037284951, "grad_norm": 3.223097085952759, "learning_rate": 1.4158325288338077e-06, "loss": 0.465, "step": 39800 }, { "epoch": 2.337502201867183, "grad_norm": 4.143735885620117, "learning_rate": 1.4134515528245058e-06, "loss": 0.3757, "step": 39810 }, { "epoch": 2.338089366449416, "grad_norm": 5.592953681945801, "learning_rate": 1.4110722509168611e-06, "loss": 0.3624, "step": 39820 }, { "epoch": 2.3386765310316484, "grad_norm": 3.0708248615264893, "learning_rate": 1.4086946242214677e-06, "loss": 0.4578, "step": 39830 }, { "epoch": 2.3392636956138806, "grad_norm": 2.10023832321167, "learning_rate": 1.4063186738481333e-06, "loss": 0.5241, "step": 39840 }, { "epoch": 2.3398508601961128, "grad_norm": 2.0741629600524902, "learning_rate": 1.4039444009058873e-06, "loss": 0.2965, "step": 39850 }, { "epoch": 2.3404380247783454, "grad_norm": 4.74434757232666, "learning_rate": 1.4015718065029727e-06, "loss": 0.4937, "step": 39860 }, { "epoch": 2.341025189360578, "grad_norm": 1.9505083560943604, "learning_rate": 1.3992008917468546e-06, "loss": 0.3866, "step": 39870 }, { "epoch": 2.34161235394281, "grad_norm": 1.577465295791626, "learning_rate": 1.3968316577442104e-06, "loss": 0.3894, "step": 39880 }, { "epoch": 2.3421995185250424, "grad_norm": 3.931823253631592, "learning_rate": 1.3944641056009295e-06, "loss": 0.4097, "step": 39890 }, { "epoch": 2.342786683107275, "grad_norm": 6.551455497741699, "learning_rate": 1.3920982364221213e-06, "loss": 0.4298, "step": 39900 }, { "epoch": 2.343373847689507, "grad_norm": 1.9949582815170288, "learning_rate": 1.3897340513121087e-06, "loss": 0.4534, "step": 39910 }, { "epoch": 2.34396101227174, "grad_norm": 6.698991775512695, "learning_rate": 1.3873715513744274e-06, "loss": 0.4558, "step": 39920 }, { "epoch": 2.344548176853972, "grad_norm": 1.4106544256210327, "learning_rate": 1.3850107377118289e-06, "loss": 0.3784, "step": 39930 }, { "epoch": 2.3451353414362046, "grad_norm": 1.297627568244934, "learning_rate": 1.382651611426271e-06, "loss": 0.3777, "step": 39940 }, { "epoch": 2.345722506018437, "grad_norm": 4.478550910949707, "learning_rate": 1.380294173618934e-06, "loss": 0.3414, "step": 39950 }, { "epoch": 2.3463096706006694, "grad_norm": 2.229208469390869, "learning_rate": 1.3779384253902017e-06, "loss": 0.3434, "step": 39960 }, { "epoch": 2.3468968351829016, "grad_norm": 6.264472007751465, "learning_rate": 1.3755843678396747e-06, "loss": 0.4472, "step": 39970 }, { "epoch": 2.347483999765134, "grad_norm": 7.674493312835693, "learning_rate": 1.3732320020661589e-06, "loss": 0.349, "step": 39980 }, { "epoch": 2.3480711643473664, "grad_norm": 6.14887809753418, "learning_rate": 1.3708813291676748e-06, "loss": 0.3203, "step": 39990 }, { "epoch": 2.348658328929599, "grad_norm": 6.2828369140625, "learning_rate": 1.3685323502414522e-06, "loss": 0.4113, "step": 40000 }, { "epoch": 2.349245493511831, "grad_norm": 4.366650104522705, "learning_rate": 1.3661850663839282e-06, "loss": 0.3775, "step": 40010 }, { "epoch": 2.349832658094064, "grad_norm": 1.7031978368759155, "learning_rate": 1.3638394786907516e-06, "loss": 0.4999, "step": 40020 }, { "epoch": 2.350419822676296, "grad_norm": 2.1394991874694824, "learning_rate": 1.3614955882567764e-06, "loss": 0.4596, "step": 40030 }, { "epoch": 2.3510069872585286, "grad_norm": 5.5178022384643555, "learning_rate": 1.3591533961760672e-06, "loss": 0.3661, "step": 40040 }, { "epoch": 2.351594151840761, "grad_norm": 5.014092922210693, "learning_rate": 1.3568129035418942e-06, "loss": 0.5032, "step": 40050 }, { "epoch": 2.3521813164229934, "grad_norm": 4.050394535064697, "learning_rate": 1.3544741114467352e-06, "loss": 0.4486, "step": 40060 }, { "epoch": 2.3527684810052256, "grad_norm": 19.42388916015625, "learning_rate": 1.3521370209822715e-06, "loss": 0.4672, "step": 40070 }, { "epoch": 2.3533556455874582, "grad_norm": 0.9060661196708679, "learning_rate": 1.3498016332393938e-06, "loss": 0.3481, "step": 40080 }, { "epoch": 2.3539428101696904, "grad_norm": 6.781238555908203, "learning_rate": 1.347467949308196e-06, "loss": 0.4398, "step": 40090 }, { "epoch": 2.354529974751923, "grad_norm": 6.490198612213135, "learning_rate": 1.3451359702779777e-06, "loss": 0.3133, "step": 40100 }, { "epoch": 2.3551171393341552, "grad_norm": 3.378546953201294, "learning_rate": 1.3428056972372428e-06, "loss": 0.426, "step": 40110 }, { "epoch": 2.355704303916388, "grad_norm": 4.413290500640869, "learning_rate": 1.340477131273697e-06, "loss": 0.4394, "step": 40120 }, { "epoch": 2.35629146849862, "grad_norm": 4.7196736335754395, "learning_rate": 1.3381502734742513e-06, "loss": 0.4741, "step": 40130 }, { "epoch": 2.3568786330808527, "grad_norm": 5.841373920440674, "learning_rate": 1.3358251249250187e-06, "loss": 0.474, "step": 40140 }, { "epoch": 2.357465797663085, "grad_norm": 3.681709051132202, "learning_rate": 1.3335016867113137e-06, "loss": 0.3145, "step": 40150 }, { "epoch": 2.3580529622453175, "grad_norm": 4.647074222564697, "learning_rate": 1.331179959917655e-06, "loss": 0.295, "step": 40160 }, { "epoch": 2.3586401268275496, "grad_norm": 10.104726791381836, "learning_rate": 1.3288599456277579e-06, "loss": 0.4162, "step": 40170 }, { "epoch": 2.3592272914097823, "grad_norm": 2.4880220890045166, "learning_rate": 1.32654164492454e-06, "loss": 0.4178, "step": 40180 }, { "epoch": 2.3598144559920144, "grad_norm": 5.304620265960693, "learning_rate": 1.3242250588901234e-06, "loss": 0.3482, "step": 40190 }, { "epoch": 2.360401620574247, "grad_norm": 3.1108317375183105, "learning_rate": 1.321910188605826e-06, "loss": 0.5473, "step": 40200 }, { "epoch": 2.3609887851564793, "grad_norm": 2.6628096103668213, "learning_rate": 1.319597035152163e-06, "loss": 0.511, "step": 40210 }, { "epoch": 2.361575949738712, "grad_norm": 4.115396976470947, "learning_rate": 1.3172855996088513e-06, "loss": 0.4564, "step": 40220 }, { "epoch": 2.362163114320944, "grad_norm": 2.3434808254241943, "learning_rate": 1.314975883054806e-06, "loss": 0.4524, "step": 40230 }, { "epoch": 2.3627502789031767, "grad_norm": 3.810819625854492, "learning_rate": 1.312667886568138e-06, "loss": 0.4147, "step": 40240 }, { "epoch": 2.363337443485409, "grad_norm": 2.1859233379364014, "learning_rate": 1.310361611226158e-06, "loss": 0.399, "step": 40250 }, { "epoch": 2.3639246080676415, "grad_norm": 1.7123095989227295, "learning_rate": 1.308057058105367e-06, "loss": 0.4229, "step": 40260 }, { "epoch": 2.3645117726498737, "grad_norm": 3.9401583671569824, "learning_rate": 1.3057542282814716e-06, "loss": 0.4718, "step": 40270 }, { "epoch": 2.3650989372321063, "grad_norm": 4.971997261047363, "learning_rate": 1.3034531228293673e-06, "loss": 0.457, "step": 40280 }, { "epoch": 2.3656861018143385, "grad_norm": 2.047011137008667, "learning_rate": 1.301153742823148e-06, "loss": 0.402, "step": 40290 }, { "epoch": 2.366273266396571, "grad_norm": 3.7443087100982666, "learning_rate": 1.298856089336098e-06, "loss": 0.3503, "step": 40300 }, { "epoch": 2.3668604309788033, "grad_norm": 4.060335636138916, "learning_rate": 1.2965601634406994e-06, "loss": 0.3549, "step": 40310 }, { "epoch": 2.367447595561036, "grad_norm": 1.2812294960021973, "learning_rate": 1.294265966208627e-06, "loss": 0.3489, "step": 40320 }, { "epoch": 2.368034760143268, "grad_norm": 3.4186623096466064, "learning_rate": 1.291973498710749e-06, "loss": 0.3654, "step": 40330 }, { "epoch": 2.3686219247255007, "grad_norm": 2.669339656829834, "learning_rate": 1.289682762017126e-06, "loss": 0.4766, "step": 40340 }, { "epoch": 2.369209089307733, "grad_norm": 2.1419150829315186, "learning_rate": 1.2873937571970102e-06, "loss": 0.3883, "step": 40350 }, { "epoch": 2.3697962538899655, "grad_norm": 2.033332109451294, "learning_rate": 1.2851064853188455e-06, "loss": 0.3512, "step": 40360 }, { "epoch": 2.3703834184721977, "grad_norm": 2.835803747177124, "learning_rate": 1.2828209474502678e-06, "loss": 0.336, "step": 40370 }, { "epoch": 2.3709705830544303, "grad_norm": 6.9526753425598145, "learning_rate": 1.2805371446581028e-06, "loss": 0.3928, "step": 40380 }, { "epoch": 2.3715577476366625, "grad_norm": 2.630082368850708, "learning_rate": 1.2782550780083674e-06, "loss": 0.4292, "step": 40390 }, { "epoch": 2.372144912218895, "grad_norm": 4.1992363929748535, "learning_rate": 1.2759747485662648e-06, "loss": 0.5079, "step": 40400 }, { "epoch": 2.3727320768011273, "grad_norm": 3.3793492317199707, "learning_rate": 1.2736961573961915e-06, "loss": 0.3179, "step": 40410 }, { "epoch": 2.37331924138336, "grad_norm": 5.98178243637085, "learning_rate": 1.271419305561729e-06, "loss": 0.4108, "step": 40420 }, { "epoch": 2.373906405965592, "grad_norm": 9.616621017456055, "learning_rate": 1.2691441941256532e-06, "loss": 0.5457, "step": 40430 }, { "epoch": 2.3744935705478247, "grad_norm": 4.240873336791992, "learning_rate": 1.2668708241499188e-06, "loss": 0.3127, "step": 40440 }, { "epoch": 2.375080735130057, "grad_norm": 3.9238696098327637, "learning_rate": 1.2645991966956734e-06, "loss": 0.464, "step": 40450 }, { "epoch": 2.3756678997122895, "grad_norm": 2.1196532249450684, "learning_rate": 1.2623293128232506e-06, "loss": 0.3985, "step": 40460 }, { "epoch": 2.3762550642945217, "grad_norm": 4.111268043518066, "learning_rate": 1.260061173592168e-06, "loss": 0.2111, "step": 40470 }, { "epoch": 2.3768422288767543, "grad_norm": 2.5917670726776123, "learning_rate": 1.2577947800611335e-06, "loss": 0.4876, "step": 40480 }, { "epoch": 2.3774293934589865, "grad_norm": 2.4090147018432617, "learning_rate": 1.2555301332880327e-06, "loss": 0.3907, "step": 40490 }, { "epoch": 2.378016558041219, "grad_norm": 4.806921005249023, "learning_rate": 1.2532672343299406e-06, "loss": 0.4804, "step": 40500 }, { "epoch": 2.3786037226234513, "grad_norm": 4.34047794342041, "learning_rate": 1.2510060842431188e-06, "loss": 0.3318, "step": 40510 }, { "epoch": 2.379190887205684, "grad_norm": 3.8870561122894287, "learning_rate": 1.2487466840830098e-06, "loss": 0.3905, "step": 40520 }, { "epoch": 2.379778051787916, "grad_norm": 2.2977590560913086, "learning_rate": 1.2464890349042357e-06, "loss": 0.4409, "step": 40530 }, { "epoch": 2.3803652163701488, "grad_norm": 3.2367610931396484, "learning_rate": 1.2442331377606065e-06, "loss": 0.4011, "step": 40540 }, { "epoch": 2.380952380952381, "grad_norm": 2.8251078128814697, "learning_rate": 1.241978993705113e-06, "loss": 0.5462, "step": 40550 }, { "epoch": 2.381539545534613, "grad_norm": 3.966387987136841, "learning_rate": 1.2397266037899268e-06, "loss": 0.4899, "step": 40560 }, { "epoch": 2.3821267101168457, "grad_norm": 5.0343217849731445, "learning_rate": 1.237475969066403e-06, "loss": 0.3946, "step": 40570 }, { "epoch": 2.3827138746990784, "grad_norm": 2.4470741748809814, "learning_rate": 1.2352270905850705e-06, "loss": 0.3909, "step": 40580 }, { "epoch": 2.3833010392813105, "grad_norm": 2.5356359481811523, "learning_rate": 1.232979969395649e-06, "loss": 0.3256, "step": 40590 }, { "epoch": 2.3838882038635427, "grad_norm": 3.880833148956299, "learning_rate": 1.2307346065470305e-06, "loss": 0.4015, "step": 40600 }, { "epoch": 2.3844753684457753, "grad_norm": 1.8635280132293701, "learning_rate": 1.2284910030872904e-06, "loss": 0.5278, "step": 40610 }, { "epoch": 2.385062533028008, "grad_norm": 3.3339040279388428, "learning_rate": 1.2262491600636766e-06, "loss": 0.4442, "step": 40620 }, { "epoch": 2.38564969761024, "grad_norm": 5.489765644073486, "learning_rate": 1.2240090785226222e-06, "loss": 0.4594, "step": 40630 }, { "epoch": 2.3862368621924723, "grad_norm": 2.056379556655884, "learning_rate": 1.221770759509735e-06, "loss": 0.3863, "step": 40640 }, { "epoch": 2.386824026774705, "grad_norm": 2.4954075813293457, "learning_rate": 1.2195342040697999e-06, "loss": 0.4301, "step": 40650 }, { "epoch": 2.3874111913569376, "grad_norm": 3.4221880435943604, "learning_rate": 1.2172994132467797e-06, "loss": 0.2685, "step": 40660 }, { "epoch": 2.3879983559391698, "grad_norm": 6.657191753387451, "learning_rate": 1.2150663880838131e-06, "loss": 0.4539, "step": 40670 }, { "epoch": 2.388585520521402, "grad_norm": 1.4648656845092773, "learning_rate": 1.2128351296232137e-06, "loss": 0.5555, "step": 40680 }, { "epoch": 2.3891726851036346, "grad_norm": 4.8871989250183105, "learning_rate": 1.2106056389064718e-06, "loss": 0.3761, "step": 40690 }, { "epoch": 2.389759849685867, "grad_norm": 2.6623928546905518, "learning_rate": 1.208377916974252e-06, "loss": 0.4321, "step": 40700 }, { "epoch": 2.3903470142680994, "grad_norm": 4.083249092102051, "learning_rate": 1.2061519648663949e-06, "loss": 0.4812, "step": 40710 }, { "epoch": 2.3909341788503315, "grad_norm": 2.380321979522705, "learning_rate": 1.20392778362191e-06, "loss": 0.3756, "step": 40720 }, { "epoch": 2.391521343432564, "grad_norm": 10.142267227172852, "learning_rate": 1.2017053742789858e-06, "loss": 0.4896, "step": 40730 }, { "epoch": 2.392108508014797, "grad_norm": 7.607112884521484, "learning_rate": 1.1994847378749797e-06, "loss": 0.3424, "step": 40740 }, { "epoch": 2.392695672597029, "grad_norm": 3.7299082279205322, "learning_rate": 1.1972658754464272e-06, "loss": 0.3701, "step": 40750 }, { "epoch": 2.393282837179261, "grad_norm": 3.23736310005188, "learning_rate": 1.1950487880290284e-06, "loss": 0.3968, "step": 40760 }, { "epoch": 2.393870001761494, "grad_norm": 3.084695339202881, "learning_rate": 1.19283347665766e-06, "loss": 0.445, "step": 40770 }, { "epoch": 2.394457166343726, "grad_norm": 2.0516843795776367, "learning_rate": 1.1906199423663667e-06, "loss": 0.3929, "step": 40780 }, { "epoch": 2.3950443309259586, "grad_norm": 4.250428676605225, "learning_rate": 1.1884081861883668e-06, "loss": 0.3912, "step": 40790 }, { "epoch": 2.3956314955081908, "grad_norm": 5.854362964630127, "learning_rate": 1.1861982091560475e-06, "loss": 0.3994, "step": 40800 }, { "epoch": 2.3962186600904234, "grad_norm": 2.83382511138916, "learning_rate": 1.1839900123009622e-06, "loss": 0.3144, "step": 40810 }, { "epoch": 2.3968058246726556, "grad_norm": 1.4669342041015625, "learning_rate": 1.181783596653836e-06, "loss": 0.3142, "step": 40820 }, { "epoch": 2.397392989254888, "grad_norm": 6.891708850860596, "learning_rate": 1.1795789632445665e-06, "loss": 0.4928, "step": 40830 }, { "epoch": 2.3979801538371204, "grad_norm": 4.20612907409668, "learning_rate": 1.1773761131022144e-06, "loss": 0.2728, "step": 40840 }, { "epoch": 2.398567318419353, "grad_norm": 3.2199044227600098, "learning_rate": 1.1751750472550067e-06, "loss": 0.2997, "step": 40850 }, { "epoch": 2.399154483001585, "grad_norm": 2.87660813331604, "learning_rate": 1.1729757667303415e-06, "loss": 0.4395, "step": 40860 }, { "epoch": 2.399741647583818, "grad_norm": 2.608281373977661, "learning_rate": 1.170778272554783e-06, "loss": 0.388, "step": 40870 }, { "epoch": 2.40032881216605, "grad_norm": 2.5111429691314697, "learning_rate": 1.16858256575406e-06, "loss": 0.3793, "step": 40880 }, { "epoch": 2.4009159767482826, "grad_norm": 5.536617755889893, "learning_rate": 1.1663886473530672e-06, "loss": 0.3345, "step": 40890 }, { "epoch": 2.401503141330515, "grad_norm": 2.7711234092712402, "learning_rate": 1.1641965183758663e-06, "loss": 0.316, "step": 40900 }, { "epoch": 2.4020903059127474, "grad_norm": 8.762897491455078, "learning_rate": 1.1620061798456816e-06, "loss": 0.3719, "step": 40910 }, { "epoch": 2.4026774704949796, "grad_norm": 13.00515079498291, "learning_rate": 1.1598176327849032e-06, "loss": 0.4352, "step": 40920 }, { "epoch": 2.403264635077212, "grad_norm": 7.467838764190674, "learning_rate": 1.1576308782150836e-06, "loss": 0.4115, "step": 40930 }, { "epoch": 2.4038517996594444, "grad_norm": 2.5046231746673584, "learning_rate": 1.155445917156941e-06, "loss": 0.3965, "step": 40940 }, { "epoch": 2.404438964241677, "grad_norm": 11.658387184143066, "learning_rate": 1.1532627506303524e-06, "loss": 0.4725, "step": 40950 }, { "epoch": 2.405026128823909, "grad_norm": 3.8490054607391357, "learning_rate": 1.1510813796543607e-06, "loss": 0.2701, "step": 40960 }, { "epoch": 2.405613293406142, "grad_norm": 4.341565132141113, "learning_rate": 1.1489018052471702e-06, "loss": 0.4718, "step": 40970 }, { "epoch": 2.406200457988374, "grad_norm": 9.03463077545166, "learning_rate": 1.1467240284261445e-06, "loss": 0.5104, "step": 40980 }, { "epoch": 2.4067876225706066, "grad_norm": 14.999881744384766, "learning_rate": 1.144548050207811e-06, "loss": 0.5554, "step": 40990 }, { "epoch": 2.407374787152839, "grad_norm": 3.6627817153930664, "learning_rate": 1.1423738716078552e-06, "loss": 0.4069, "step": 41000 }, { "epoch": 2.4079619517350714, "grad_norm": 2.8264858722686768, "learning_rate": 1.1402014936411249e-06, "loss": 0.271, "step": 41010 }, { "epoch": 2.4085491163173036, "grad_norm": 3.0749311447143555, "learning_rate": 1.1380309173216247e-06, "loss": 0.4443, "step": 41020 }, { "epoch": 2.4091362808995362, "grad_norm": 4.813158988952637, "learning_rate": 1.1358621436625216e-06, "loss": 0.3248, "step": 41030 }, { "epoch": 2.4097234454817684, "grad_norm": 2.412872552871704, "learning_rate": 1.133695173676136e-06, "loss": 0.3097, "step": 41040 }, { "epoch": 2.410310610064001, "grad_norm": 2.0992119312286377, "learning_rate": 1.1315300083739516e-06, "loss": 0.2795, "step": 41050 }, { "epoch": 2.4108977746462332, "grad_norm": 1.4358958005905151, "learning_rate": 1.129366648766606e-06, "loss": 0.4047, "step": 41060 }, { "epoch": 2.411484939228466, "grad_norm": 2.2696027755737305, "learning_rate": 1.1272050958638996e-06, "loss": 0.2404, "step": 41070 }, { "epoch": 2.412072103810698, "grad_norm": 1.7553448677062988, "learning_rate": 1.1250453506747822e-06, "loss": 0.4591, "step": 41080 }, { "epoch": 2.4126592683929307, "grad_norm": 2.1497654914855957, "learning_rate": 1.1228874142073631e-06, "loss": 0.3691, "step": 41090 }, { "epoch": 2.413246432975163, "grad_norm": 1.9014496803283691, "learning_rate": 1.1207312874689092e-06, "loss": 0.3121, "step": 41100 }, { "epoch": 2.4138335975573955, "grad_norm": 2.2372257709503174, "learning_rate": 1.118576971465839e-06, "loss": 0.4338, "step": 41110 }, { "epoch": 2.4144207621396276, "grad_norm": 6.228923320770264, "learning_rate": 1.116424467203729e-06, "loss": 0.3506, "step": 41120 }, { "epoch": 2.4150079267218603, "grad_norm": 6.06619930267334, "learning_rate": 1.114273775687309e-06, "loss": 0.4133, "step": 41130 }, { "epoch": 2.4155950913040924, "grad_norm": 2.400552988052368, "learning_rate": 1.1121248979204592e-06, "loss": 0.3496, "step": 41140 }, { "epoch": 2.416182255886325, "grad_norm": 1.993164300918579, "learning_rate": 1.10997783490622e-06, "loss": 0.5507, "step": 41150 }, { "epoch": 2.4167694204685573, "grad_norm": 2.8096580505371094, "learning_rate": 1.10783258764678e-06, "loss": 0.3473, "step": 41160 }, { "epoch": 2.41735658505079, "grad_norm": 6.106757164001465, "learning_rate": 1.1056891571434819e-06, "loss": 0.2792, "step": 41170 }, { "epoch": 2.417943749633022, "grad_norm": 4.727461814880371, "learning_rate": 1.103547544396818e-06, "loss": 0.4504, "step": 41180 }, { "epoch": 2.4185309142152547, "grad_norm": 6.633089065551758, "learning_rate": 1.1014077504064347e-06, "loss": 0.4309, "step": 41190 }, { "epoch": 2.419118078797487, "grad_norm": 24.460750579833984, "learning_rate": 1.0992697761711285e-06, "loss": 0.4997, "step": 41200 }, { "epoch": 2.4197052433797195, "grad_norm": 5.873194217681885, "learning_rate": 1.0971336226888475e-06, "loss": 0.5253, "step": 41210 }, { "epoch": 2.4202924079619517, "grad_norm": 3.3391854763031006, "learning_rate": 1.0949992909566882e-06, "loss": 0.3453, "step": 41220 }, { "epoch": 2.4208795725441843, "grad_norm": 6.507347106933594, "learning_rate": 1.0928667819708976e-06, "loss": 0.4244, "step": 41230 }, { "epoch": 2.4214667371264165, "grad_norm": 3.642199754714966, "learning_rate": 1.0907360967268731e-06, "loss": 0.3767, "step": 41240 }, { "epoch": 2.422053901708649, "grad_norm": 4.070453643798828, "learning_rate": 1.088607236219159e-06, "loss": 0.4985, "step": 41250 }, { "epoch": 2.4226410662908813, "grad_norm": 27.210756301879883, "learning_rate": 1.0864802014414494e-06, "loss": 0.3826, "step": 41260 }, { "epoch": 2.423228230873114, "grad_norm": 3.335416793823242, "learning_rate": 1.0843549933865831e-06, "loss": 0.6092, "step": 41270 }, { "epoch": 2.423815395455346, "grad_norm": 1.481618046760559, "learning_rate": 1.08223161304655e-06, "loss": 0.3498, "step": 41280 }, { "epoch": 2.4244025600375787, "grad_norm": 5.655845642089844, "learning_rate": 1.0801100614124843e-06, "loss": 0.3759, "step": 41290 }, { "epoch": 2.424989724619811, "grad_norm": 10.501018524169922, "learning_rate": 1.0779903394746693e-06, "loss": 0.4521, "step": 41300 }, { "epoch": 2.4255768892020435, "grad_norm": 3.35701847076416, "learning_rate": 1.0758724482225314e-06, "loss": 0.4113, "step": 41310 }, { "epoch": 2.4261640537842757, "grad_norm": 16.376535415649414, "learning_rate": 1.073756388644644e-06, "loss": 0.4427, "step": 41320 }, { "epoch": 2.4267512183665083, "grad_norm": 4.531722545623779, "learning_rate": 1.0716421617287253e-06, "loss": 0.3269, "step": 41330 }, { "epoch": 2.4273383829487405, "grad_norm": 4.074544429779053, "learning_rate": 1.0695297684616373e-06, "loss": 0.422, "step": 41340 }, { "epoch": 2.427925547530973, "grad_norm": 1.782349944114685, "learning_rate": 1.0674192098293895e-06, "loss": 0.4004, "step": 41350 }, { "epoch": 2.4285127121132053, "grad_norm": 1.838516354560852, "learning_rate": 1.0653104868171288e-06, "loss": 0.3493, "step": 41360 }, { "epoch": 2.429099876695438, "grad_norm": 4.0376129150390625, "learning_rate": 1.0632036004091495e-06, "loss": 0.566, "step": 41370 }, { "epoch": 2.42968704127767, "grad_norm": 3.422978162765503, "learning_rate": 1.0610985515888893e-06, "loss": 0.4012, "step": 41380 }, { "epoch": 2.4302742058599023, "grad_norm": 2.4508557319641113, "learning_rate": 1.0589953413389242e-06, "loss": 0.2714, "step": 41390 }, { "epoch": 2.430861370442135, "grad_norm": 3.8289401531219482, "learning_rate": 1.0568939706409798e-06, "loss": 0.3704, "step": 41400 }, { "epoch": 2.4314485350243675, "grad_norm": 5.2283196449279785, "learning_rate": 1.0547944404759124e-06, "loss": 0.4533, "step": 41410 }, { "epoch": 2.4320356996065997, "grad_norm": 6.309638977050781, "learning_rate": 1.0526967518237275e-06, "loss": 0.3929, "step": 41420 }, { "epoch": 2.432622864188832, "grad_norm": 8.644317626953125, "learning_rate": 1.0506009056635675e-06, "loss": 0.447, "step": 41430 }, { "epoch": 2.4332100287710645, "grad_norm": 2.645247459411621, "learning_rate": 1.048506902973715e-06, "loss": 0.3341, "step": 41440 }, { "epoch": 2.433797193353297, "grad_norm": 4.3066935539245605, "learning_rate": 1.0464147447315942e-06, "loss": 0.3952, "step": 41450 }, { "epoch": 2.4343843579355293, "grad_norm": 13.47848892211914, "learning_rate": 1.044324431913764e-06, "loss": 0.4649, "step": 41460 }, { "epoch": 2.4349715225177615, "grad_norm": 1.9781922101974487, "learning_rate": 1.0422359654959252e-06, "loss": 0.5003, "step": 41470 }, { "epoch": 2.435558687099994, "grad_norm": 5.2131123542785645, "learning_rate": 1.0401493464529183e-06, "loss": 0.383, "step": 41480 }, { "epoch": 2.4361458516822267, "grad_norm": 2.1923797130584717, "learning_rate": 1.0380645757587198e-06, "loss": 0.4773, "step": 41490 }, { "epoch": 2.436733016264459, "grad_norm": 3.53096079826355, "learning_rate": 1.0359816543864398e-06, "loss": 0.5109, "step": 41500 }, { "epoch": 2.437320180846691, "grad_norm": 5.424292087554932, "learning_rate": 1.0339005833083298e-06, "loss": 0.357, "step": 41510 }, { "epoch": 2.4379073454289237, "grad_norm": 4.481494426727295, "learning_rate": 1.031821363495777e-06, "loss": 0.3503, "step": 41520 }, { "epoch": 2.4384945100111564, "grad_norm": 2.2851905822753906, "learning_rate": 1.0297439959193023e-06, "loss": 0.4031, "step": 41530 }, { "epoch": 2.4390816745933885, "grad_norm": 2.3620645999908447, "learning_rate": 1.0276684815485665e-06, "loss": 0.3363, "step": 41540 }, { "epoch": 2.4396688391756207, "grad_norm": 7.040288925170898, "learning_rate": 1.025594821352357e-06, "loss": 0.5025, "step": 41550 }, { "epoch": 2.4402560037578533, "grad_norm": 7.240691184997559, "learning_rate": 1.0235230162986059e-06, "loss": 0.4379, "step": 41560 }, { "epoch": 2.440843168340086, "grad_norm": 7.8712053298950195, "learning_rate": 1.0214530673543727e-06, "loss": 0.4109, "step": 41570 }, { "epoch": 2.441430332922318, "grad_norm": 3.4142801761627197, "learning_rate": 1.0193849754858532e-06, "loss": 0.3735, "step": 41580 }, { "epoch": 2.4420174975045503, "grad_norm": 6.642743110656738, "learning_rate": 1.017318741658374e-06, "loss": 0.325, "step": 41590 }, { "epoch": 2.442604662086783, "grad_norm": 1.822106957435608, "learning_rate": 1.0152543668363969e-06, "loss": 0.4812, "step": 41600 }, { "epoch": 2.443191826669015, "grad_norm": 6.579762935638428, "learning_rate": 1.0131918519835144e-06, "loss": 0.4165, "step": 41610 }, { "epoch": 2.4437789912512478, "grad_norm": 6.7198967933654785, "learning_rate": 1.0111311980624516e-06, "loss": 0.4612, "step": 41620 }, { "epoch": 2.44436615583348, "grad_norm": 3.3195877075195312, "learning_rate": 1.0090724060350653e-06, "loss": 0.2851, "step": 41630 }, { "epoch": 2.4449533204157126, "grad_norm": 3.5688581466674805, "learning_rate": 1.0070154768623425e-06, "loss": 0.3959, "step": 41640 }, { "epoch": 2.4455404849979447, "grad_norm": 3.8660128116607666, "learning_rate": 1.0049604115044005e-06, "loss": 0.4327, "step": 41650 }, { "epoch": 2.4461276495801774, "grad_norm": 3.8122005462646484, "learning_rate": 1.0029072109204873e-06, "loss": 0.4608, "step": 41660 }, { "epoch": 2.4467148141624095, "grad_norm": 3.3033666610717773, "learning_rate": 1.00085587606898e-06, "loss": 0.2961, "step": 41670 }, { "epoch": 2.447301978744642, "grad_norm": 5.381361961364746, "learning_rate": 9.988064079073867e-07, "loss": 0.3576, "step": 41680 }, { "epoch": 2.4478891433268744, "grad_norm": 24.431764602661133, "learning_rate": 9.967588073923396e-07, "loss": 0.5056, "step": 41690 }, { "epoch": 2.448476307909107, "grad_norm": 5.536789894104004, "learning_rate": 9.94713075479603e-07, "loss": 0.4263, "step": 41700 }, { "epoch": 2.449063472491339, "grad_norm": 6.192075729370117, "learning_rate": 9.926692131240667e-07, "loss": 0.5541, "step": 41710 }, { "epoch": 2.449650637073572, "grad_norm": 5.237836837768555, "learning_rate": 9.90627221279754e-07, "loss": 0.4367, "step": 41720 }, { "epoch": 2.450237801655804, "grad_norm": 2.8887407779693604, "learning_rate": 9.885871008998054e-07, "loss": 0.3559, "step": 41730 }, { "epoch": 2.4508249662380366, "grad_norm": 1.95566987991333, "learning_rate": 9.865488529364942e-07, "loss": 0.3801, "step": 41740 }, { "epoch": 2.4514121308202688, "grad_norm": 3.3745739459991455, "learning_rate": 9.84512478341218e-07, "loss": 0.3433, "step": 41750 }, { "epoch": 2.4519992954025014, "grad_norm": 4.261983871459961, "learning_rate": 9.82477978064501e-07, "loss": 0.2957, "step": 41760 }, { "epoch": 2.4525864599847336, "grad_norm": 4.624244213104248, "learning_rate": 9.804453530559921e-07, "loss": 0.2729, "step": 41770 }, { "epoch": 2.453173624566966, "grad_norm": 8.362516403198242, "learning_rate": 9.784146042644622e-07, "loss": 0.3414, "step": 41780 }, { "epoch": 2.4537607891491984, "grad_norm": 3.0980567932128906, "learning_rate": 9.763857326378084e-07, "loss": 0.4835, "step": 41790 }, { "epoch": 2.454347953731431, "grad_norm": 3.024106740951538, "learning_rate": 9.743587391230547e-07, "loss": 0.4402, "step": 41800 }, { "epoch": 2.454935118313663, "grad_norm": 2.318167209625244, "learning_rate": 9.723336246663456e-07, "loss": 0.3911, "step": 41810 }, { "epoch": 2.455522282895896, "grad_norm": 2.3866264820098877, "learning_rate": 9.703103902129462e-07, "loss": 0.4034, "step": 41820 }, { "epoch": 2.456109447478128, "grad_norm": 2.343271493911743, "learning_rate": 9.682890367072479e-07, "loss": 0.4361, "step": 41830 }, { "epoch": 2.4566966120603606, "grad_norm": 2.0358684062957764, "learning_rate": 9.662695650927617e-07, "loss": 0.403, "step": 41840 }, { "epoch": 2.457283776642593, "grad_norm": 4.343914031982422, "learning_rate": 9.642519763121216e-07, "loss": 0.5477, "step": 41850 }, { "epoch": 2.4578709412248254, "grad_norm": 4.700606346130371, "learning_rate": 9.622362713070838e-07, "loss": 0.3481, "step": 41860 }, { "epoch": 2.4584581058070576, "grad_norm": 3.57485294342041, "learning_rate": 9.6022245101852e-07, "loss": 0.462, "step": 41870 }, { "epoch": 2.45904527038929, "grad_norm": 7.50326681137085, "learning_rate": 9.582105163864285e-07, "loss": 0.3568, "step": 41880 }, { "epoch": 2.4596324349715224, "grad_norm": 5.06368350982666, "learning_rate": 9.562004683499248e-07, "loss": 0.3571, "step": 41890 }, { "epoch": 2.460219599553755, "grad_norm": 4.506746292114258, "learning_rate": 9.54192307847243e-07, "loss": 0.3845, "step": 41900 }, { "epoch": 2.460806764135987, "grad_norm": 3.488618850708008, "learning_rate": 9.521860358157387e-07, "loss": 0.3968, "step": 41910 }, { "epoch": 2.46139392871822, "grad_norm": 2.6631979942321777, "learning_rate": 9.501816531918812e-07, "loss": 0.3674, "step": 41920 }, { "epoch": 2.461981093300452, "grad_norm": 7.942485809326172, "learning_rate": 9.481791609112623e-07, "loss": 0.4911, "step": 41930 }, { "epoch": 2.4625682578826846, "grad_norm": 4.3573102951049805, "learning_rate": 9.461785599085904e-07, "loss": 0.3958, "step": 41940 }, { "epoch": 2.463155422464917, "grad_norm": 3.3948025703430176, "learning_rate": 9.441798511176897e-07, "loss": 0.3895, "step": 41950 }, { "epoch": 2.4637425870471494, "grad_norm": 2.9729843139648438, "learning_rate": 9.421830354715034e-07, "loss": 0.6199, "step": 41960 }, { "epoch": 2.4643297516293816, "grad_norm": 7.427242279052734, "learning_rate": 9.401881139020897e-07, "loss": 0.4583, "step": 41970 }, { "epoch": 2.4649169162116142, "grad_norm": 4.808932781219482, "learning_rate": 9.381950873406221e-07, "loss": 0.3265, "step": 41980 }, { "epoch": 2.4655040807938464, "grad_norm": 6.904202461242676, "learning_rate": 9.362039567173903e-07, "loss": 0.4328, "step": 41990 }, { "epoch": 2.466091245376079, "grad_norm": 3.6689341068267822, "learning_rate": 9.342147229618015e-07, "loss": 0.3613, "step": 42000 }, { "epoch": 2.4666784099583112, "grad_norm": 2.847910165786743, "learning_rate": 9.322273870023712e-07, "loss": 0.3892, "step": 42010 }, { "epoch": 2.467265574540544, "grad_norm": 2.539646863937378, "learning_rate": 9.302419497667348e-07, "loss": 0.3845, "step": 42020 }, { "epoch": 2.467852739122776, "grad_norm": 3.6274607181549072, "learning_rate": 9.282584121816374e-07, "loss": 0.442, "step": 42030 }, { "epoch": 2.4684399037050087, "grad_norm": 10.714879035949707, "learning_rate": 9.262767751729446e-07, "loss": 0.5958, "step": 42040 }, { "epoch": 2.469027068287241, "grad_norm": 3.164512872695923, "learning_rate": 9.242970396656253e-07, "loss": 0.4584, "step": 42050 }, { "epoch": 2.4696142328694735, "grad_norm": 2.3428704738616943, "learning_rate": 9.223192065837666e-07, "loss": 0.4341, "step": 42060 }, { "epoch": 2.4702013974517056, "grad_norm": 3.4794459342956543, "learning_rate": 9.20343276850566e-07, "loss": 0.4581, "step": 42070 }, { "epoch": 2.4707885620339383, "grad_norm": 4.101227283477783, "learning_rate": 9.183692513883335e-07, "loss": 0.3918, "step": 42080 }, { "epoch": 2.4713757266161704, "grad_norm": 1.2387053966522217, "learning_rate": 9.163971311184905e-07, "loss": 0.3366, "step": 42090 }, { "epoch": 2.471962891198403, "grad_norm": 4.329237937927246, "learning_rate": 9.144269169615661e-07, "loss": 0.3051, "step": 42100 }, { "epoch": 2.4725500557806352, "grad_norm": 1.9695991277694702, "learning_rate": 9.124586098372007e-07, "loss": 0.3537, "step": 42110 }, { "epoch": 2.473137220362868, "grad_norm": 2.3115808963775635, "learning_rate": 9.104922106641496e-07, "loss": 0.4154, "step": 42120 }, { "epoch": 2.4737243849451, "grad_norm": 6.6643967628479, "learning_rate": 9.085277203602722e-07, "loss": 0.4147, "step": 42130 }, { "epoch": 2.4743115495273327, "grad_norm": 2.543058395385742, "learning_rate": 9.065651398425368e-07, "loss": 0.4004, "step": 42140 }, { "epoch": 2.474898714109565, "grad_norm": 4.41888952255249, "learning_rate": 9.046044700270224e-07, "loss": 0.3444, "step": 42150 }, { "epoch": 2.4754858786917975, "grad_norm": 4.05148458480835, "learning_rate": 9.026457118289161e-07, "loss": 0.4906, "step": 42160 }, { "epoch": 2.4760730432740297, "grad_norm": 3.606813430786133, "learning_rate": 9.006888661625113e-07, "loss": 0.4291, "step": 42170 }, { "epoch": 2.4766602078562623, "grad_norm": 1.6449079513549805, "learning_rate": 8.987339339412099e-07, "loss": 0.3722, "step": 42180 }, { "epoch": 2.4772473724384945, "grad_norm": 6.1492462158203125, "learning_rate": 8.967809160775199e-07, "loss": 0.4536, "step": 42190 }, { "epoch": 2.477834537020727, "grad_norm": 14.640966415405273, "learning_rate": 8.948298134830564e-07, "loss": 0.4773, "step": 42200 }, { "epoch": 2.4784217016029593, "grad_norm": 9.901212692260742, "learning_rate": 8.928806270685403e-07, "loss": 0.4296, "step": 42210 }, { "epoch": 2.479008866185192, "grad_norm": 3.0759379863739014, "learning_rate": 8.909333577437973e-07, "loss": 0.2607, "step": 42220 }, { "epoch": 2.479596030767424, "grad_norm": 3.1272165775299072, "learning_rate": 8.889880064177602e-07, "loss": 0.2896, "step": 42230 }, { "epoch": 2.4801831953496567, "grad_norm": 3.893935441970825, "learning_rate": 8.870445739984634e-07, "loss": 0.431, "step": 42240 }, { "epoch": 2.480770359931889, "grad_norm": 4.693424701690674, "learning_rate": 8.851030613930484e-07, "loss": 0.4991, "step": 42250 }, { "epoch": 2.481357524514121, "grad_norm": 8.659393310546875, "learning_rate": 8.83163469507759e-07, "loss": 0.4074, "step": 42260 }, { "epoch": 2.4819446890963537, "grad_norm": 5.411926746368408, "learning_rate": 8.812257992479439e-07, "loss": 0.2905, "step": 42270 }, { "epoch": 2.4825318536785863, "grad_norm": 2.2967801094055176, "learning_rate": 8.79290051518053e-07, "loss": 0.3016, "step": 42280 }, { "epoch": 2.4831190182608185, "grad_norm": 10.424676895141602, "learning_rate": 8.773562272216413e-07, "loss": 0.4364, "step": 42290 }, { "epoch": 2.4837061828430507, "grad_norm": 4.899374961853027, "learning_rate": 8.754243272613633e-07, "loss": 0.5008, "step": 42300 }, { "epoch": 2.4842933474252833, "grad_norm": 2.4898457527160645, "learning_rate": 8.734943525389766e-07, "loss": 0.4436, "step": 42310 }, { "epoch": 2.484880512007516, "grad_norm": 4.496344566345215, "learning_rate": 8.715663039553424e-07, "loss": 0.3571, "step": 42320 }, { "epoch": 2.485467676589748, "grad_norm": 4.4889349937438965, "learning_rate": 8.696401824104161e-07, "loss": 0.3053, "step": 42330 }, { "epoch": 2.4860548411719803, "grad_norm": 5.474678993225098, "learning_rate": 8.67715988803261e-07, "loss": 0.517, "step": 42340 }, { "epoch": 2.486642005754213, "grad_norm": 3.299678325653076, "learning_rate": 8.657937240320352e-07, "loss": 0.2671, "step": 42350 }, { "epoch": 2.4872291703364455, "grad_norm": 1.4562102556228638, "learning_rate": 8.638733889940026e-07, "loss": 0.413, "step": 42360 }, { "epoch": 2.4878163349186777, "grad_norm": 3.581782102584839, "learning_rate": 8.619549845855191e-07, "loss": 0.3149, "step": 42370 }, { "epoch": 2.48840349950091, "grad_norm": 3.1710426807403564, "learning_rate": 8.600385117020444e-07, "loss": 0.5045, "step": 42380 }, { "epoch": 2.4889906640831425, "grad_norm": 2.9251976013183594, "learning_rate": 8.581239712381339e-07, "loss": 0.4936, "step": 42390 }, { "epoch": 2.489577828665375, "grad_norm": 10.513270378112793, "learning_rate": 8.562113640874431e-07, "loss": 0.3218, "step": 42400 }, { "epoch": 2.4901649932476073, "grad_norm": 5.3931121826171875, "learning_rate": 8.543006911427243e-07, "loss": 0.3342, "step": 42410 }, { "epoch": 2.4907521578298395, "grad_norm": 4.265451431274414, "learning_rate": 8.523919532958274e-07, "loss": 0.4538, "step": 42420 }, { "epoch": 2.491339322412072, "grad_norm": 1.493001937866211, "learning_rate": 8.504851514376949e-07, "loss": 0.3762, "step": 42430 }, { "epoch": 2.4919264869943047, "grad_norm": 6.437669277191162, "learning_rate": 8.485802864583725e-07, "loss": 0.3333, "step": 42440 }, { "epoch": 2.492513651576537, "grad_norm": 3.7917912006378174, "learning_rate": 8.466773592469984e-07, "loss": 0.3641, "step": 42450 }, { "epoch": 2.493100816158769, "grad_norm": 2.543466329574585, "learning_rate": 8.447763706918066e-07, "loss": 0.3825, "step": 42460 }, { "epoch": 2.4936879807410017, "grad_norm": 3.9238173961639404, "learning_rate": 8.428773216801245e-07, "loss": 0.2011, "step": 42470 }, { "epoch": 2.494275145323234, "grad_norm": 3.191209077835083, "learning_rate": 8.409802130983758e-07, "loss": 0.467, "step": 42480 }, { "epoch": 2.4948623099054665, "grad_norm": 9.779369354248047, "learning_rate": 8.390850458320793e-07, "loss": 0.382, "step": 42490 }, { "epoch": 2.4954494744876987, "grad_norm": 6.02101469039917, "learning_rate": 8.371918207658458e-07, "loss": 0.3942, "step": 42500 }, { "epoch": 2.4960366390699313, "grad_norm": 2.6411640644073486, "learning_rate": 8.353005387833813e-07, "loss": 0.3801, "step": 42510 }, { "epoch": 2.4966238036521635, "grad_norm": 3.7625622749328613, "learning_rate": 8.334112007674833e-07, "loss": 0.4728, "step": 42520 }, { "epoch": 2.497210968234396, "grad_norm": 3.469398021697998, "learning_rate": 8.315238076000426e-07, "loss": 0.4028, "step": 42530 }, { "epoch": 2.4977981328166283, "grad_norm": 2.126722574234009, "learning_rate": 8.296383601620428e-07, "loss": 0.3478, "step": 42540 }, { "epoch": 2.498385297398861, "grad_norm": 3.8563361167907715, "learning_rate": 8.277548593335588e-07, "loss": 0.3597, "step": 42550 }, { "epoch": 2.498972461981093, "grad_norm": 6.942263126373291, "learning_rate": 8.258733059937552e-07, "loss": 0.4599, "step": 42560 }, { "epoch": 2.4995596265633258, "grad_norm": 2.7067646980285645, "learning_rate": 8.239937010208898e-07, "loss": 0.5172, "step": 42570 }, { "epoch": 2.500146791145558, "grad_norm": 5.2976460456848145, "learning_rate": 8.221160452923104e-07, "loss": 0.3911, "step": 42580 }, { "epoch": 2.5007339557277906, "grad_norm": 2.341871500015259, "learning_rate": 8.202403396844549e-07, "loss": 0.3994, "step": 42590 }, { "epoch": 2.5013211203100227, "grad_norm": 2.6835453510284424, "learning_rate": 8.183665850728512e-07, "loss": 0.4671, "step": 42600 }, { "epoch": 2.5019082848922554, "grad_norm": 1.964096188545227, "learning_rate": 8.164947823321151e-07, "loss": 0.4054, "step": 42610 }, { "epoch": 2.5024954494744875, "grad_norm": 1.9402228593826294, "learning_rate": 8.146249323359529e-07, "loss": 0.3142, "step": 42620 }, { "epoch": 2.50308261405672, "grad_norm": 3.4447643756866455, "learning_rate": 8.127570359571585e-07, "loss": 0.3517, "step": 42630 }, { "epoch": 2.5036697786389523, "grad_norm": 5.210703372955322, "learning_rate": 8.108910940676162e-07, "loss": 0.3973, "step": 42640 }, { "epoch": 2.504256943221185, "grad_norm": 2.8522636890411377, "learning_rate": 8.090271075382939e-07, "loss": 0.3614, "step": 42650 }, { "epoch": 2.504844107803417, "grad_norm": 2.4334025382995605, "learning_rate": 8.071650772392491e-07, "loss": 0.3309, "step": 42660 }, { "epoch": 2.5054312723856498, "grad_norm": 8.03770637512207, "learning_rate": 8.053050040396255e-07, "loss": 0.4615, "step": 42670 }, { "epoch": 2.506018436967882, "grad_norm": 2.881267547607422, "learning_rate": 8.034468888076557e-07, "loss": 0.4508, "step": 42680 }, { "epoch": 2.5066056015501146, "grad_norm": 1.413419246673584, "learning_rate": 8.015907324106576e-07, "loss": 0.3864, "step": 42690 }, { "epoch": 2.5071927661323468, "grad_norm": 6.719443321228027, "learning_rate": 7.997365357150305e-07, "loss": 0.364, "step": 42700 }, { "epoch": 2.5077799307145794, "grad_norm": 3.0059738159179688, "learning_rate": 7.978842995862635e-07, "loss": 0.412, "step": 42710 }, { "epoch": 2.5083670952968116, "grad_norm": 4.9858622550964355, "learning_rate": 7.960340248889293e-07, "loss": 0.4368, "step": 42720 }, { "epoch": 2.508954259879044, "grad_norm": 4.1652512550354, "learning_rate": 7.941857124866847e-07, "loss": 0.6185, "step": 42730 }, { "epoch": 2.5095414244612764, "grad_norm": 3.6793465614318848, "learning_rate": 7.923393632422727e-07, "loss": 0.3499, "step": 42740 }, { "epoch": 2.510128589043509, "grad_norm": 2.7557380199432373, "learning_rate": 7.904949780175137e-07, "loss": 0.4059, "step": 42750 }, { "epoch": 2.510715753625741, "grad_norm": 2.358238697052002, "learning_rate": 7.886525576733195e-07, "loss": 0.4042, "step": 42760 }, { "epoch": 2.511302918207974, "grad_norm": 2.2812447547912598, "learning_rate": 7.868121030696801e-07, "loss": 0.4021, "step": 42770 }, { "epoch": 2.511890082790206, "grad_norm": 2.812279224395752, "learning_rate": 7.849736150656695e-07, "loss": 0.4279, "step": 42780 }, { "epoch": 2.5124772473724386, "grad_norm": 3.448476791381836, "learning_rate": 7.831370945194411e-07, "loss": 0.3969, "step": 42790 }, { "epoch": 2.513064411954671, "grad_norm": 6.1669440269470215, "learning_rate": 7.813025422882336e-07, "loss": 0.4067, "step": 42800 }, { "epoch": 2.5136515765369034, "grad_norm": 3.20806884765625, "learning_rate": 7.794699592283644e-07, "loss": 0.3972, "step": 42810 }, { "epoch": 2.5142387411191356, "grad_norm": 10.313450813293457, "learning_rate": 7.776393461952325e-07, "loss": 0.4497, "step": 42820 }, { "epoch": 2.514825905701368, "grad_norm": 2.7211873531341553, "learning_rate": 7.758107040433182e-07, "loss": 0.3581, "step": 42830 }, { "epoch": 2.5154130702836004, "grad_norm": 4.525038242340088, "learning_rate": 7.739840336261811e-07, "loss": 0.4255, "step": 42840 }, { "epoch": 2.516000234865833, "grad_norm": 2.639192581176758, "learning_rate": 7.721593357964597e-07, "loss": 0.3842, "step": 42850 }, { "epoch": 2.516587399448065, "grad_norm": 5.496959686279297, "learning_rate": 7.70336611405873e-07, "loss": 0.4006, "step": 42860 }, { "epoch": 2.517174564030298, "grad_norm": 6.726895809173584, "learning_rate": 7.685158613052191e-07, "loss": 0.4758, "step": 42870 }, { "epoch": 2.51776172861253, "grad_norm": 2.5755553245544434, "learning_rate": 7.666970863443724e-07, "loss": 0.3579, "step": 42880 }, { "epoch": 2.5183488931947626, "grad_norm": 2.3982505798339844, "learning_rate": 7.648802873722871e-07, "loss": 0.3599, "step": 42890 }, { "epoch": 2.518936057776995, "grad_norm": 3.7415757179260254, "learning_rate": 7.630654652369946e-07, "loss": 0.4268, "step": 42900 }, { "epoch": 2.5195232223592274, "grad_norm": 2.5331547260284424, "learning_rate": 7.612526207856036e-07, "loss": 0.434, "step": 42910 }, { "epoch": 2.5201103869414596, "grad_norm": 1.842288613319397, "learning_rate": 7.594417548643019e-07, "loss": 0.4254, "step": 42920 }, { "epoch": 2.5206975515236922, "grad_norm": 2.322122097015381, "learning_rate": 7.576328683183492e-07, "loss": 0.4459, "step": 42930 }, { "epoch": 2.5212847161059244, "grad_norm": 5.112093448638916, "learning_rate": 7.558259619920843e-07, "loss": 0.3717, "step": 42940 }, { "epoch": 2.521871880688157, "grad_norm": 3.5551867485046387, "learning_rate": 7.540210367289214e-07, "loss": 0.4292, "step": 42950 }, { "epoch": 2.522459045270389, "grad_norm": 3.899317741394043, "learning_rate": 7.522180933713497e-07, "loss": 0.4957, "step": 42960 }, { "epoch": 2.523046209852622, "grad_norm": 2.530000925064087, "learning_rate": 7.504171327609344e-07, "loss": 0.4745, "step": 42970 }, { "epoch": 2.523633374434854, "grad_norm": 3.9180734157562256, "learning_rate": 7.486181557383115e-07, "loss": 0.2706, "step": 42980 }, { "epoch": 2.5242205390170867, "grad_norm": 4.660490036010742, "learning_rate": 7.468211631431943e-07, "loss": 0.3019, "step": 42990 }, { "epoch": 2.524807703599319, "grad_norm": 2.799351453781128, "learning_rate": 7.450261558143718e-07, "loss": 0.3209, "step": 43000 }, { "epoch": 2.525394868181551, "grad_norm": 10.327122688293457, "learning_rate": 7.432331345897026e-07, "loss": 0.4584, "step": 43010 }, { "epoch": 2.5259820327637836, "grad_norm": 4.782814025878906, "learning_rate": 7.414421003061184e-07, "loss": 0.3663, "step": 43020 }, { "epoch": 2.5265691973460163, "grad_norm": 6.561638355255127, "learning_rate": 7.39653053799625e-07, "loss": 0.4503, "step": 43030 }, { "epoch": 2.5271563619282484, "grad_norm": 2.95737624168396, "learning_rate": 7.378659959052998e-07, "loss": 0.388, "step": 43040 }, { "epoch": 2.5277435265104806, "grad_norm": 6.117272853851318, "learning_rate": 7.360809274572917e-07, "loss": 0.3529, "step": 43050 }, { "epoch": 2.5283306910927132, "grad_norm": 3.219174385070801, "learning_rate": 7.342978492888237e-07, "loss": 0.3842, "step": 43060 }, { "epoch": 2.528917855674946, "grad_norm": 1.444471001625061, "learning_rate": 7.325167622321827e-07, "loss": 0.4375, "step": 43070 }, { "epoch": 2.529505020257178, "grad_norm": 3.530967950820923, "learning_rate": 7.30737667118735e-07, "loss": 0.3574, "step": 43080 }, { "epoch": 2.5300921848394102, "grad_norm": 6.1797990798950195, "learning_rate": 7.289605647789111e-07, "loss": 0.2497, "step": 43090 }, { "epoch": 2.530679349421643, "grad_norm": 2.744497537612915, "learning_rate": 7.271854560422154e-07, "loss": 0.3113, "step": 43100 }, { "epoch": 2.5312665140038755, "grad_norm": 3.9684174060821533, "learning_rate": 7.254123417372172e-07, "loss": 0.4354, "step": 43110 }, { "epoch": 2.5318536785861077, "grad_norm": 5.556124210357666, "learning_rate": 7.236412226915574e-07, "loss": 0.3661, "step": 43120 }, { "epoch": 2.53244084316834, "grad_norm": 3.431076765060425, "learning_rate": 7.218720997319462e-07, "loss": 0.4064, "step": 43130 }, { "epoch": 2.5330280077505725, "grad_norm": 2.0647528171539307, "learning_rate": 7.201049736841615e-07, "loss": 0.3154, "step": 43140 }, { "epoch": 2.533615172332805, "grad_norm": 6.799062728881836, "learning_rate": 7.183398453730478e-07, "loss": 0.3974, "step": 43150 }, { "epoch": 2.5342023369150373, "grad_norm": 3.4611752033233643, "learning_rate": 7.165767156225195e-07, "loss": 0.3032, "step": 43160 }, { "epoch": 2.5347895014972694, "grad_norm": 24.167219161987305, "learning_rate": 7.148155852555555e-07, "loss": 0.3645, "step": 43170 }, { "epoch": 2.535376666079502, "grad_norm": 3.995938777923584, "learning_rate": 7.130564550942043e-07, "loss": 0.3433, "step": 43180 }, { "epoch": 2.5359638306617347, "grad_norm": 5.07703971862793, "learning_rate": 7.11299325959578e-07, "loss": 0.374, "step": 43190 }, { "epoch": 2.536550995243967, "grad_norm": 2.207843542098999, "learning_rate": 7.095441986718571e-07, "loss": 0.4771, "step": 43200 }, { "epoch": 2.537138159826199, "grad_norm": 6.686115264892578, "learning_rate": 7.077910740502852e-07, "loss": 0.3178, "step": 43210 }, { "epoch": 2.5377253244084317, "grad_norm": 5.887069225311279, "learning_rate": 7.060399529131723e-07, "loss": 0.6489, "step": 43220 }, { "epoch": 2.5383124889906643, "grad_norm": 6.536721229553223, "learning_rate": 7.042908360778932e-07, "loss": 0.3827, "step": 43230 }, { "epoch": 2.5388996535728965, "grad_norm": 2.143815040588379, "learning_rate": 7.025437243608901e-07, "loss": 0.3509, "step": 43240 }, { "epoch": 2.5394868181551287, "grad_norm": 2.5059103965759277, "learning_rate": 7.007986185776633e-07, "loss": 0.341, "step": 43250 }, { "epoch": 2.5400739827373613, "grad_norm": 5.42589807510376, "learning_rate": 6.990555195427812e-07, "loss": 0.3715, "step": 43260 }, { "epoch": 2.540661147319594, "grad_norm": 2.2539522647857666, "learning_rate": 6.973144280698735e-07, "loss": 0.3647, "step": 43270 }, { "epoch": 2.541248311901826, "grad_norm": 1.0280073881149292, "learning_rate": 6.955753449716345e-07, "loss": 0.4422, "step": 43280 }, { "epoch": 2.5418354764840583, "grad_norm": 8.92809009552002, "learning_rate": 6.938382710598207e-07, "loss": 0.4758, "step": 43290 }, { "epoch": 2.542422641066291, "grad_norm": 4.344889163970947, "learning_rate": 6.921032071452483e-07, "loss": 0.4044, "step": 43300 }, { "epoch": 2.5430098056485235, "grad_norm": 1.5349215269088745, "learning_rate": 6.903701540377967e-07, "loss": 0.3207, "step": 43310 }, { "epoch": 2.5435969702307557, "grad_norm": 4.1346025466918945, "learning_rate": 6.886391125464093e-07, "loss": 0.4057, "step": 43320 }, { "epoch": 2.544184134812988, "grad_norm": 8.274141311645508, "learning_rate": 6.869100834790892e-07, "loss": 0.354, "step": 43330 }, { "epoch": 2.5447712993952205, "grad_norm": 37.87571334838867, "learning_rate": 6.851830676428966e-07, "loss": 0.3835, "step": 43340 }, { "epoch": 2.545358463977453, "grad_norm": 1.782333493232727, "learning_rate": 6.83458065843956e-07, "loss": 0.362, "step": 43350 }, { "epoch": 2.5459456285596853, "grad_norm": 3.5587337017059326, "learning_rate": 6.817350788874504e-07, "loss": 0.4569, "step": 43360 }, { "epoch": 2.5465327931419175, "grad_norm": 3.048422336578369, "learning_rate": 6.800141075776223e-07, "loss": 0.4132, "step": 43370 }, { "epoch": 2.54711995772415, "grad_norm": 10.699331283569336, "learning_rate": 6.78295152717775e-07, "loss": 0.55, "step": 43380 }, { "epoch": 2.5477071223063827, "grad_norm": 4.166684150695801, "learning_rate": 6.765782151102662e-07, "loss": 0.4, "step": 43390 }, { "epoch": 2.548294286888615, "grad_norm": 7.184022903442383, "learning_rate": 6.748632955565171e-07, "loss": 0.4873, "step": 43400 }, { "epoch": 2.548881451470847, "grad_norm": 2.33111310005188, "learning_rate": 6.731503948570046e-07, "loss": 0.4085, "step": 43410 }, { "epoch": 2.5494686160530797, "grad_norm": 3.7342467308044434, "learning_rate": 6.714395138112629e-07, "loss": 0.3976, "step": 43420 }, { "epoch": 2.5500557806353124, "grad_norm": 9.098057746887207, "learning_rate": 6.697306532178854e-07, "loss": 0.4141, "step": 43430 }, { "epoch": 2.5506429452175445, "grad_norm": 3.185363292694092, "learning_rate": 6.680238138745182e-07, "loss": 0.3284, "step": 43440 }, { "epoch": 2.5512301097997767, "grad_norm": 3.590482711791992, "learning_rate": 6.663189965778683e-07, "loss": 0.3703, "step": 43450 }, { "epoch": 2.5518172743820093, "grad_norm": 2.6513540744781494, "learning_rate": 6.646162021236974e-07, "loss": 0.3535, "step": 43460 }, { "epoch": 2.5524044389642415, "grad_norm": 3.4980576038360596, "learning_rate": 6.629154313068226e-07, "loss": 0.481, "step": 43470 }, { "epoch": 2.552991603546474, "grad_norm": 4.13627815246582, "learning_rate": 6.612166849211165e-07, "loss": 0.5231, "step": 43480 }, { "epoch": 2.5535787681287063, "grad_norm": 5.225932598114014, "learning_rate": 6.595199637595073e-07, "loss": 0.5309, "step": 43490 }, { "epoch": 2.554165932710939, "grad_norm": 6.612321853637695, "learning_rate": 6.578252686139775e-07, "loss": 0.3863, "step": 43500 }, { "epoch": 2.554753097293171, "grad_norm": 2.9921016693115234, "learning_rate": 6.561326002755636e-07, "loss": 0.3282, "step": 43510 }, { "epoch": 2.5553402618754038, "grad_norm": 5.208406925201416, "learning_rate": 6.544419595343581e-07, "loss": 0.377, "step": 43520 }, { "epoch": 2.555927426457636, "grad_norm": 6.944899559020996, "learning_rate": 6.527533471795028e-07, "loss": 0.4875, "step": 43530 }, { "epoch": 2.5565145910398686, "grad_norm": 9.982535362243652, "learning_rate": 6.510667639991963e-07, "loss": 0.5181, "step": 43540 }, { "epoch": 2.5571017556221007, "grad_norm": 2.503944158554077, "learning_rate": 6.49382210780688e-07, "loss": 0.3657, "step": 43550 }, { "epoch": 2.5576889202043334, "grad_norm": 2.0955958366394043, "learning_rate": 6.476996883102837e-07, "loss": 0.408, "step": 43560 }, { "epoch": 2.5582760847865655, "grad_norm": 2.5346107482910156, "learning_rate": 6.460191973733359e-07, "loss": 0.4245, "step": 43570 }, { "epoch": 2.558863249368798, "grad_norm": 5.310306072235107, "learning_rate": 6.443407387542516e-07, "loss": 0.4481, "step": 43580 }, { "epoch": 2.5594504139510303, "grad_norm": 3.9366207122802734, "learning_rate": 6.42664313236489e-07, "loss": 0.4348, "step": 43590 }, { "epoch": 2.560037578533263, "grad_norm": 4.506864547729492, "learning_rate": 6.409899216025578e-07, "loss": 0.3653, "step": 43600 }, { "epoch": 2.560624743115495, "grad_norm": 3.0161209106445312, "learning_rate": 6.393175646340183e-07, "loss": 0.4317, "step": 43610 }, { "epoch": 2.5612119076977278, "grad_norm": 6.865502834320068, "learning_rate": 6.376472431114783e-07, "loss": 0.3803, "step": 43620 }, { "epoch": 2.56179907227996, "grad_norm": 6.772477149963379, "learning_rate": 6.359789578145991e-07, "loss": 0.4513, "step": 43630 }, { "epoch": 2.5623862368621926, "grad_norm": 4.655421257019043, "learning_rate": 6.343127095220886e-07, "loss": 0.3986, "step": 43640 }, { "epoch": 2.5629734014444248, "grad_norm": 3.8556103706359863, "learning_rate": 6.326484990117093e-07, "loss": 0.2893, "step": 43650 }, { "epoch": 2.5635605660266574, "grad_norm": 3.681333303451538, "learning_rate": 6.309863270602645e-07, "loss": 0.4259, "step": 43660 }, { "epoch": 2.5641477306088896, "grad_norm": 1.3596006631851196, "learning_rate": 6.293261944436124e-07, "loss": 0.4336, "step": 43670 }, { "epoch": 2.564734895191122, "grad_norm": 3.285193681716919, "learning_rate": 6.27668101936656e-07, "loss": 0.3973, "step": 43680 }, { "epoch": 2.5653220597733544, "grad_norm": 18.36962890625, "learning_rate": 6.260120503133477e-07, "loss": 0.4358, "step": 43690 }, { "epoch": 2.565909224355587, "grad_norm": 8.0608491897583, "learning_rate": 6.243580403466859e-07, "loss": 0.4464, "step": 43700 }, { "epoch": 2.566496388937819, "grad_norm": 3.510223150253296, "learning_rate": 6.227060728087187e-07, "loss": 0.3612, "step": 43710 }, { "epoch": 2.567083553520052, "grad_norm": 3.590001344680786, "learning_rate": 6.210561484705347e-07, "loss": 0.43, "step": 43720 }, { "epoch": 2.567670718102284, "grad_norm": 2.4861929416656494, "learning_rate": 6.194082681022768e-07, "loss": 0.5416, "step": 43730 }, { "epoch": 2.5682578826845166, "grad_norm": 2.4460394382476807, "learning_rate": 6.17762432473129e-07, "loss": 0.3749, "step": 43740 }, { "epoch": 2.568845047266749, "grad_norm": 1.9796088933944702, "learning_rate": 6.161186423513227e-07, "loss": 0.3951, "step": 43750 }, { "epoch": 2.5694322118489814, "grad_norm": 2.6666932106018066, "learning_rate": 6.144768985041316e-07, "loss": 0.4447, "step": 43760 }, { "epoch": 2.5700193764312136, "grad_norm": 4.096259593963623, "learning_rate": 6.128372016978767e-07, "loss": 0.2853, "step": 43770 }, { "epoch": 2.570606541013446, "grad_norm": 2.4330942630767822, "learning_rate": 6.111995526979248e-07, "loss": 0.443, "step": 43780 }, { "epoch": 2.5711937055956784, "grad_norm": 8.726058006286621, "learning_rate": 6.095639522686836e-07, "loss": 0.4328, "step": 43790 }, { "epoch": 2.571780870177911, "grad_norm": 1.7248620986938477, "learning_rate": 6.07930401173607e-07, "loss": 0.4297, "step": 43800 }, { "epoch": 2.572368034760143, "grad_norm": 4.424729824066162, "learning_rate": 6.062989001751912e-07, "loss": 0.4738, "step": 43810 }, { "epoch": 2.572955199342376, "grad_norm": 5.480549335479736, "learning_rate": 6.046694500349753e-07, "loss": 0.6034, "step": 43820 }, { "epoch": 2.573542363924608, "grad_norm": 6.3416337966918945, "learning_rate": 6.030420515135427e-07, "loss": 0.3491, "step": 43830 }, { "epoch": 2.5741295285068406, "grad_norm": 2.4356441497802734, "learning_rate": 6.014167053705184e-07, "loss": 0.5362, "step": 43840 }, { "epoch": 2.574716693089073, "grad_norm": 3.855304718017578, "learning_rate": 5.997934123645666e-07, "loss": 0.4208, "step": 43850 }, { "epoch": 2.5753038576713054, "grad_norm": 3.7895572185516357, "learning_rate": 5.981721732533974e-07, "loss": 0.4017, "step": 43860 }, { "epoch": 2.5758910222535376, "grad_norm": 2.3958675861358643, "learning_rate": 5.965529887937599e-07, "loss": 0.2849, "step": 43870 }, { "epoch": 2.57647818683577, "grad_norm": 3.516533374786377, "learning_rate": 5.949358597414451e-07, "loss": 0.3227, "step": 43880 }, { "epoch": 2.5770653514180024, "grad_norm": 2.9101593494415283, "learning_rate": 5.933207868512836e-07, "loss": 0.2989, "step": 43890 }, { "epoch": 2.577652516000235, "grad_norm": 6.75596284866333, "learning_rate": 5.917077708771468e-07, "loss": 0.4863, "step": 43900 }, { "epoch": 2.578239680582467, "grad_norm": 1.509232759475708, "learning_rate": 5.900968125719465e-07, "loss": 0.3697, "step": 43910 }, { "epoch": 2.5788268451646994, "grad_norm": 2.293081045150757, "learning_rate": 5.884879126876325e-07, "loss": 0.4394, "step": 43920 }, { "epoch": 2.579414009746932, "grad_norm": 2.4960880279541016, "learning_rate": 5.86881071975196e-07, "loss": 0.4459, "step": 43930 }, { "epoch": 2.5800011743291646, "grad_norm": 12.252823829650879, "learning_rate": 5.852762911846665e-07, "loss": 0.3748, "step": 43940 }, { "epoch": 2.580588338911397, "grad_norm": 9.195157051086426, "learning_rate": 5.836735710651086e-07, "loss": 0.4557, "step": 43950 }, { "epoch": 2.581175503493629, "grad_norm": 2.842294931411743, "learning_rate": 5.820729123646279e-07, "loss": 0.2887, "step": 43960 }, { "epoch": 2.5817626680758616, "grad_norm": 3.1968729496002197, "learning_rate": 5.804743158303705e-07, "loss": 0.3454, "step": 43970 }, { "epoch": 2.5823498326580943, "grad_norm": 4.354291915893555, "learning_rate": 5.788777822085162e-07, "loss": 0.3798, "step": 43980 }, { "epoch": 2.5829369972403264, "grad_norm": 2.9765002727508545, "learning_rate": 5.772833122442816e-07, "loss": 0.3622, "step": 43990 }, { "epoch": 2.5835241618225586, "grad_norm": 1.806612253189087, "learning_rate": 5.75690906681921e-07, "loss": 0.4434, "step": 44000 }, { "epoch": 2.5841113264047912, "grad_norm": 3.4576566219329834, "learning_rate": 5.741005662647265e-07, "loss": 0.383, "step": 44010 }, { "epoch": 2.584698490987024, "grad_norm": 6.851508140563965, "learning_rate": 5.72512291735024e-07, "loss": 0.3637, "step": 44020 }, { "epoch": 2.585285655569256, "grad_norm": 2.3337671756744385, "learning_rate": 5.709260838341779e-07, "loss": 0.3745, "step": 44030 }, { "epoch": 2.5858728201514882, "grad_norm": 4.804156303405762, "learning_rate": 5.693419433025826e-07, "loss": 0.3154, "step": 44040 }, { "epoch": 2.586459984733721, "grad_norm": 2.25431752204895, "learning_rate": 5.677598708796744e-07, "loss": 0.3336, "step": 44050 }, { "epoch": 2.5870471493159535, "grad_norm": 3.284146547317505, "learning_rate": 5.661798673039204e-07, "loss": 0.5384, "step": 44060 }, { "epoch": 2.5876343138981857, "grad_norm": 3.963995933532715, "learning_rate": 5.646019333128228e-07, "loss": 0.3167, "step": 44070 }, { "epoch": 2.588221478480418, "grad_norm": 4.446403980255127, "learning_rate": 5.630260696429163e-07, "loss": 0.4307, "step": 44080 }, { "epoch": 2.5888086430626505, "grad_norm": 2.534599542617798, "learning_rate": 5.614522770297709e-07, "loss": 0.3599, "step": 44090 }, { "epoch": 2.589395807644883, "grad_norm": 3.354511260986328, "learning_rate": 5.598805562079895e-07, "loss": 0.4289, "step": 44100 }, { "epoch": 2.5899829722271153, "grad_norm": 3.4188106060028076, "learning_rate": 5.583109079112082e-07, "loss": 0.4187, "step": 44110 }, { "epoch": 2.5905701368093474, "grad_norm": 4.486258029937744, "learning_rate": 5.567433328720956e-07, "loss": 0.4053, "step": 44120 }, { "epoch": 2.59115730139158, "grad_norm": 2.1986184120178223, "learning_rate": 5.551778318223521e-07, "loss": 0.3909, "step": 44130 }, { "epoch": 2.5917444659738127, "grad_norm": 3.7729923725128174, "learning_rate": 5.536144054927101e-07, "loss": 0.4389, "step": 44140 }, { "epoch": 2.592331630556045, "grad_norm": 4.507167339324951, "learning_rate": 5.520530546129344e-07, "loss": 0.335, "step": 44150 }, { "epoch": 2.592918795138277, "grad_norm": 2.638946294784546, "learning_rate": 5.50493779911821e-07, "loss": 0.4075, "step": 44160 }, { "epoch": 2.5935059597205097, "grad_norm": 3.699226140975952, "learning_rate": 5.489365821171938e-07, "loss": 0.3203, "step": 44170 }, { "epoch": 2.5940931243027423, "grad_norm": 7.207027912139893, "learning_rate": 5.47381461955912e-07, "loss": 0.4864, "step": 44180 }, { "epoch": 2.5946802888849745, "grad_norm": 4.433666229248047, "learning_rate": 5.458284201538611e-07, "loss": 0.3498, "step": 44190 }, { "epoch": 2.5952674534672067, "grad_norm": 3.2540881633758545, "learning_rate": 5.442774574359583e-07, "loss": 0.3671, "step": 44200 }, { "epoch": 2.5958546180494393, "grad_norm": 1.607490062713623, "learning_rate": 5.427285745261523e-07, "loss": 0.5184, "step": 44210 }, { "epoch": 2.596441782631672, "grad_norm": 4.586639881134033, "learning_rate": 5.411817721474166e-07, "loss": 0.3514, "step": 44220 }, { "epoch": 2.597028947213904, "grad_norm": 2.5198121070861816, "learning_rate": 5.396370510217564e-07, "loss": 0.3597, "step": 44230 }, { "epoch": 2.5976161117961363, "grad_norm": 12.710036277770996, "learning_rate": 5.380944118702042e-07, "loss": 0.4095, "step": 44240 }, { "epoch": 2.598203276378369, "grad_norm": 1.4445862770080566, "learning_rate": 5.365538554128224e-07, "loss": 0.3098, "step": 44250 }, { "epoch": 2.5987904409606015, "grad_norm": 7.93609094619751, "learning_rate": 5.350153823687004e-07, "loss": 0.4425, "step": 44260 }, { "epoch": 2.5993776055428337, "grad_norm": 1.809911847114563, "learning_rate": 5.334789934559532e-07, "loss": 0.4004, "step": 44270 }, { "epoch": 2.599964770125066, "grad_norm": 3.196472406387329, "learning_rate": 5.319446893917246e-07, "loss": 0.4188, "step": 44280 }, { "epoch": 2.6005519347072985, "grad_norm": 2.744612693786621, "learning_rate": 5.304124708921871e-07, "loss": 0.4892, "step": 44290 }, { "epoch": 2.601139099289531, "grad_norm": 3.7233786582946777, "learning_rate": 5.288823386725384e-07, "loss": 0.436, "step": 44300 }, { "epoch": 2.6017262638717633, "grad_norm": 4.673694133758545, "learning_rate": 5.273542934469994e-07, "loss": 0.3965, "step": 44310 }, { "epoch": 2.6023134284539955, "grad_norm": 12.316829681396484, "learning_rate": 5.258283359288207e-07, "loss": 0.4687, "step": 44320 }, { "epoch": 2.602900593036228, "grad_norm": 6.643340110778809, "learning_rate": 5.243044668302777e-07, "loss": 0.3653, "step": 44330 }, { "epoch": 2.6034877576184603, "grad_norm": 3.4730193614959717, "learning_rate": 5.227826868626689e-07, "loss": 0.3995, "step": 44340 }, { "epoch": 2.604074922200693, "grad_norm": 2.720271587371826, "learning_rate": 5.212629967363209e-07, "loss": 0.3232, "step": 44350 }, { "epoch": 2.604662086782925, "grad_norm": 4.005918025970459, "learning_rate": 5.197453971605804e-07, "loss": 0.3286, "step": 44360 }, { "epoch": 2.6052492513651577, "grad_norm": 3.4583089351654053, "learning_rate": 5.182298888438236e-07, "loss": 0.2773, "step": 44370 }, { "epoch": 2.60583641594739, "grad_norm": 1.8130359649658203, "learning_rate": 5.167164724934465e-07, "loss": 0.2398, "step": 44380 }, { "epoch": 2.6064235805296225, "grad_norm": 2.3778951168060303, "learning_rate": 5.152051488158716e-07, "loss": 0.4141, "step": 44390 }, { "epoch": 2.6070107451118547, "grad_norm": 4.6386003494262695, "learning_rate": 5.136959185165408e-07, "loss": 0.3707, "step": 44400 }, { "epoch": 2.6075979096940873, "grad_norm": 7.298197269439697, "learning_rate": 5.121887822999217e-07, "loss": 0.4967, "step": 44410 }, { "epoch": 2.6081850742763195, "grad_norm": 2.282346487045288, "learning_rate": 5.106837408695042e-07, "loss": 0.423, "step": 44420 }, { "epoch": 2.608772238858552, "grad_norm": 7.896353244781494, "learning_rate": 5.091807949277999e-07, "loss": 0.3438, "step": 44430 }, { "epoch": 2.6093594034407843, "grad_norm": 2.1628353595733643, "learning_rate": 5.076799451763431e-07, "loss": 0.3984, "step": 44440 }, { "epoch": 2.609946568023017, "grad_norm": 1.6922690868377686, "learning_rate": 5.061811923156879e-07, "loss": 0.4354, "step": 44450 }, { "epoch": 2.610533732605249, "grad_norm": 3.2441248893737793, "learning_rate": 5.046845370454112e-07, "loss": 0.3994, "step": 44460 }, { "epoch": 2.6111208971874817, "grad_norm": 6.021762371063232, "learning_rate": 5.031899800641104e-07, "loss": 0.3199, "step": 44470 }, { "epoch": 2.611708061769714, "grad_norm": 3.8093101978302, "learning_rate": 5.016975220694031e-07, "loss": 0.3825, "step": 44480 }, { "epoch": 2.6122952263519466, "grad_norm": 7.311063289642334, "learning_rate": 5.002071637579286e-07, "loss": 0.4652, "step": 44490 }, { "epoch": 2.6128823909341787, "grad_norm": 4.211704730987549, "learning_rate": 4.987189058253428e-07, "loss": 0.375, "step": 44500 }, { "epoch": 2.6134695555164114, "grad_norm": 4.834062576293945, "learning_rate": 4.972327489663248e-07, "loss": 0.4915, "step": 44510 }, { "epoch": 2.6140567200986435, "grad_norm": 3.6190552711486816, "learning_rate": 4.957486938745698e-07, "loss": 0.2931, "step": 44520 }, { "epoch": 2.614643884680876, "grad_norm": 5.272799015045166, "learning_rate": 4.942667412427976e-07, "loss": 0.3929, "step": 44530 }, { "epoch": 2.6152310492631083, "grad_norm": 8.234062194824219, "learning_rate": 4.927868917627387e-07, "loss": 0.3188, "step": 44540 }, { "epoch": 2.615818213845341, "grad_norm": 2.2135136127471924, "learning_rate": 4.91309146125148e-07, "loss": 0.4, "step": 44550 }, { "epoch": 2.616405378427573, "grad_norm": 2.1888508796691895, "learning_rate": 4.898335050197955e-07, "loss": 0.5009, "step": 44560 }, { "epoch": 2.6169925430098058, "grad_norm": 3.0458192825317383, "learning_rate": 4.883599691354701e-07, "loss": 0.4405, "step": 44570 }, { "epoch": 2.617579707592038, "grad_norm": 3.8098957538604736, "learning_rate": 4.868885391599781e-07, "loss": 0.4166, "step": 44580 }, { "epoch": 2.6181668721742706, "grad_norm": 7.958824157714844, "learning_rate": 4.854192157801407e-07, "loss": 0.4207, "step": 44590 }, { "epoch": 2.6187540367565028, "grad_norm": 1.6276893615722656, "learning_rate": 4.839519996817977e-07, "loss": 0.4969, "step": 44600 }, { "epoch": 2.6193412013387354, "grad_norm": 3.99003267288208, "learning_rate": 4.824868915498066e-07, "loss": 0.3598, "step": 44610 }, { "epoch": 2.6199283659209676, "grad_norm": 5.7784342765808105, "learning_rate": 4.810238920680394e-07, "loss": 0.4433, "step": 44620 }, { "epoch": 2.6205155305032, "grad_norm": 1.3505841493606567, "learning_rate": 4.79563001919382e-07, "loss": 0.3921, "step": 44630 }, { "epoch": 2.6211026950854324, "grad_norm": 11.812911033630371, "learning_rate": 4.781042217857385e-07, "loss": 0.529, "step": 44640 }, { "epoch": 2.621689859667665, "grad_norm": 6.573962688446045, "learning_rate": 4.766475523480268e-07, "loss": 0.4279, "step": 44650 }, { "epoch": 2.622277024249897, "grad_norm": 4.971717357635498, "learning_rate": 4.7519299428617994e-07, "loss": 0.5745, "step": 44660 }, { "epoch": 2.62286418883213, "grad_norm": 3.417959451675415, "learning_rate": 4.737405482791474e-07, "loss": 0.4276, "step": 44670 }, { "epoch": 2.623451353414362, "grad_norm": 2.951586961746216, "learning_rate": 4.7229021500488636e-07, "loss": 0.4941, "step": 44680 }, { "epoch": 2.6240385179965946, "grad_norm": 1.9526704549789429, "learning_rate": 4.708419951403764e-07, "loss": 0.4414, "step": 44690 }, { "epoch": 2.624625682578827, "grad_norm": 1.5670522451400757, "learning_rate": 4.6939588936160517e-07, "loss": 0.3887, "step": 44700 }, { "epoch": 2.6252128471610594, "grad_norm": 2.1209089756011963, "learning_rate": 4.679518983435749e-07, "loss": 0.4182, "step": 44710 }, { "epoch": 2.6258000117432916, "grad_norm": 9.920764923095703, "learning_rate": 4.665100227603009e-07, "loss": 0.4188, "step": 44720 }, { "epoch": 2.626387176325524, "grad_norm": 3.467280387878418, "learning_rate": 4.650702632848103e-07, "loss": 0.4205, "step": 44730 }, { "epoch": 2.6269743409077564, "grad_norm": 3.1778488159179688, "learning_rate": 4.636326205891428e-07, "loss": 0.4593, "step": 44740 }, { "epoch": 2.6275615054899886, "grad_norm": 3.6219468116760254, "learning_rate": 4.621970953443511e-07, "loss": 0.4965, "step": 44750 }, { "epoch": 2.628148670072221, "grad_norm": 6.358928203582764, "learning_rate": 4.60763688220498e-07, "loss": 0.4355, "step": 44760 }, { "epoch": 2.628735834654454, "grad_norm": 3.1122782230377197, "learning_rate": 4.5933239988665836e-07, "loss": 0.4869, "step": 44770 }, { "epoch": 2.629322999236686, "grad_norm": 2.951995849609375, "learning_rate": 4.5790323101091885e-07, "loss": 0.3476, "step": 44780 }, { "epoch": 2.629910163818918, "grad_norm": 1.7833980321884155, "learning_rate": 4.5647618226037527e-07, "loss": 0.3833, "step": 44790 }, { "epoch": 2.630497328401151, "grad_norm": 2.1888153553009033, "learning_rate": 4.550512543011343e-07, "loss": 0.5054, "step": 44800 }, { "epoch": 2.6310844929833834, "grad_norm": 3.0196657180786133, "learning_rate": 4.5362844779831436e-07, "loss": 0.4361, "step": 44810 }, { "epoch": 2.6316716575656156, "grad_norm": 1.6414161920547485, "learning_rate": 4.5220776341604044e-07, "loss": 0.3883, "step": 44820 }, { "epoch": 2.632258822147848, "grad_norm": 2.976588726043701, "learning_rate": 4.5078920181744867e-07, "loss": 0.5221, "step": 44830 }, { "epoch": 2.6328459867300804, "grad_norm": 2.1554012298583984, "learning_rate": 4.49372763664685e-07, "loss": 0.3908, "step": 44840 }, { "epoch": 2.633433151312313, "grad_norm": 6.525755882263184, "learning_rate": 4.479584496189049e-07, "loss": 0.2906, "step": 44850 }, { "epoch": 2.634020315894545, "grad_norm": 3.0056538581848145, "learning_rate": 4.4654626034026925e-07, "loss": 0.4016, "step": 44860 }, { "epoch": 2.6346074804767774, "grad_norm": 3.2420873641967773, "learning_rate": 4.451361964879497e-07, "loss": 0.3658, "step": 44870 }, { "epoch": 2.63519464505901, "grad_norm": 1.3363516330718994, "learning_rate": 4.437282587201247e-07, "loss": 0.296, "step": 44880 }, { "epoch": 2.6357818096412426, "grad_norm": 14.73280143737793, "learning_rate": 4.4232244769398134e-07, "loss": 0.4338, "step": 44890 }, { "epoch": 2.636368974223475, "grad_norm": 2.8771231174468994, "learning_rate": 4.4091876406571363e-07, "loss": 0.4403, "step": 44900 }, { "epoch": 2.636956138805707, "grad_norm": 1.73167085647583, "learning_rate": 4.3951720849052026e-07, "loss": 0.3763, "step": 44910 }, { "epoch": 2.6375433033879396, "grad_norm": 2.8727781772613525, "learning_rate": 4.3811778162260956e-07, "loss": 0.4849, "step": 44920 }, { "epoch": 2.6381304679701723, "grad_norm": 3.758859872817993, "learning_rate": 4.367204841151962e-07, "loss": 0.4239, "step": 44930 }, { "epoch": 2.6387176325524044, "grad_norm": 2.751633882522583, "learning_rate": 4.3532531662049957e-07, "loss": 0.4013, "step": 44940 }, { "epoch": 2.6393047971346366, "grad_norm": 7.741943836212158, "learning_rate": 4.339322797897466e-07, "loss": 0.3226, "step": 44950 }, { "epoch": 2.6398919617168692, "grad_norm": 3.6752233505249023, "learning_rate": 4.3254137427316547e-07, "loss": 0.4828, "step": 44960 }, { "epoch": 2.640479126299102, "grad_norm": 4.403818130493164, "learning_rate": 4.311526007199945e-07, "loss": 0.4256, "step": 44970 }, { "epoch": 2.641066290881334, "grad_norm": 3.811251640319824, "learning_rate": 4.297659597784737e-07, "loss": 0.4038, "step": 44980 }, { "epoch": 2.6416534554635662, "grad_norm": 2.595492124557495, "learning_rate": 4.2838145209584956e-07, "loss": 0.3851, "step": 44990 }, { "epoch": 2.642240620045799, "grad_norm": 6.810419082641602, "learning_rate": 4.269990783183714e-07, "loss": 0.4093, "step": 45000 }, { "epoch": 2.6428277846280315, "grad_norm": 2.1758639812469482, "learning_rate": 4.256188390912941e-07, "loss": 0.3085, "step": 45010 }, { "epoch": 2.6434149492102637, "grad_norm": 1.8840594291687012, "learning_rate": 4.242407350588734e-07, "loss": 0.4057, "step": 45020 }, { "epoch": 2.644002113792496, "grad_norm": 1.432043194770813, "learning_rate": 4.228647668643721e-07, "loss": 0.2415, "step": 45030 }, { "epoch": 2.6445892783747285, "grad_norm": 3.0482754707336426, "learning_rate": 4.214909351500535e-07, "loss": 0.3249, "step": 45040 }, { "epoch": 2.645176442956961, "grad_norm": 2.4791436195373535, "learning_rate": 4.2011924055718354e-07, "loss": 0.3602, "step": 45050 }, { "epoch": 2.6457636075391933, "grad_norm": 4.66100549697876, "learning_rate": 4.187496837260313e-07, "loss": 0.3979, "step": 45060 }, { "epoch": 2.6463507721214254, "grad_norm": 1.5698617696762085, "learning_rate": 4.1738226529586867e-07, "loss": 0.3686, "step": 45070 }, { "epoch": 2.646937936703658, "grad_norm": 5.341299533843994, "learning_rate": 4.1601698590496895e-07, "loss": 0.5017, "step": 45080 }, { "epoch": 2.6475251012858907, "grad_norm": 2.9320127964019775, "learning_rate": 4.1465384619060576e-07, "loss": 0.4658, "step": 45090 }, { "epoch": 2.648112265868123, "grad_norm": 1.868635892868042, "learning_rate": 4.1329284678905626e-07, "loss": 0.357, "step": 45100 }, { "epoch": 2.648699430450355, "grad_norm": 5.109080791473389, "learning_rate": 4.119339883355966e-07, "loss": 0.433, "step": 45110 }, { "epoch": 2.6492865950325877, "grad_norm": 3.4059600830078125, "learning_rate": 4.105772714645051e-07, "loss": 0.41, "step": 45120 }, { "epoch": 2.6498737596148203, "grad_norm": 3.4904303550720215, "learning_rate": 4.0922269680905955e-07, "loss": 0.3962, "step": 45130 }, { "epoch": 2.6504609241970525, "grad_norm": 2.5710971355438232, "learning_rate": 4.078702650015365e-07, "loss": 0.3127, "step": 45140 }, { "epoch": 2.6510480887792847, "grad_norm": 1.0972472429275513, "learning_rate": 4.065199766732153e-07, "loss": 0.4669, "step": 45150 }, { "epoch": 2.6516352533615173, "grad_norm": 2.3973751068115234, "learning_rate": 4.051718324543713e-07, "loss": 0.3189, "step": 45160 }, { "epoch": 2.65222241794375, "grad_norm": 6.054414749145508, "learning_rate": 4.0382583297428446e-07, "loss": 0.3091, "step": 45170 }, { "epoch": 2.652809582525982, "grad_norm": 5.160797119140625, "learning_rate": 4.0248197886122664e-07, "loss": 0.3669, "step": 45180 }, { "epoch": 2.6533967471082143, "grad_norm": 16.65796661376953, "learning_rate": 4.0114027074247385e-07, "loss": 0.3056, "step": 45190 }, { "epoch": 2.653983911690447, "grad_norm": 5.082331657409668, "learning_rate": 3.998007092442968e-07, "loss": 0.3379, "step": 45200 }, { "epoch": 2.654571076272679, "grad_norm": 2.1074159145355225, "learning_rate": 3.9846329499196756e-07, "loss": 0.3941, "step": 45210 }, { "epoch": 2.6551582408549117, "grad_norm": 6.427253723144531, "learning_rate": 3.971280286097523e-07, "loss": 0.385, "step": 45220 }, { "epoch": 2.655745405437144, "grad_norm": 3.1115646362304688, "learning_rate": 3.957949107209186e-07, "loss": 0.3323, "step": 45230 }, { "epoch": 2.6563325700193765, "grad_norm": 2.825413465499878, "learning_rate": 3.9446394194772584e-07, "loss": 0.3362, "step": 45240 }, { "epoch": 2.6569197346016087, "grad_norm": 3.8298022747039795, "learning_rate": 3.9313512291143665e-07, "loss": 0.3168, "step": 45250 }, { "epoch": 2.6575068991838413, "grad_norm": 17.76002311706543, "learning_rate": 3.918084542323053e-07, "loss": 0.4154, "step": 45260 }, { "epoch": 2.6580940637660735, "grad_norm": 5.920626163482666, "learning_rate": 3.904839365295854e-07, "loss": 0.4732, "step": 45270 }, { "epoch": 2.658681228348306, "grad_norm": 4.8329386711120605, "learning_rate": 3.8916157042152346e-07, "loss": 0.488, "step": 45280 }, { "epoch": 2.6592683929305383, "grad_norm": 2.227544069290161, "learning_rate": 3.878413565253647e-07, "loss": 0.3137, "step": 45290 }, { "epoch": 2.659855557512771, "grad_norm": 12.702027320861816, "learning_rate": 3.865232954573472e-07, "loss": 0.4102, "step": 45300 }, { "epoch": 2.660442722095003, "grad_norm": 4.137290000915527, "learning_rate": 3.852073878327067e-07, "loss": 0.3323, "step": 45310 }, { "epoch": 2.6610298866772357, "grad_norm": 3.4303414821624756, "learning_rate": 3.838936342656718e-07, "loss": 0.4214, "step": 45320 }, { "epoch": 2.661617051259468, "grad_norm": 3.457986354827881, "learning_rate": 3.8258203536946704e-07, "loss": 0.4535, "step": 45330 }, { "epoch": 2.6622042158417005, "grad_norm": 2.2289302349090576, "learning_rate": 3.8127259175630926e-07, "loss": 0.3855, "step": 45340 }, { "epoch": 2.6627913804239327, "grad_norm": 9.38494873046875, "learning_rate": 3.7996530403741185e-07, "loss": 0.3833, "step": 45350 }, { "epoch": 2.6633785450061653, "grad_norm": 7.8017497062683105, "learning_rate": 3.7866017282298104e-07, "loss": 0.3008, "step": 45360 }, { "epoch": 2.6639657095883975, "grad_norm": 2.712911605834961, "learning_rate": 3.773571987222141e-07, "loss": 0.3875, "step": 45370 }, { "epoch": 2.66455287417063, "grad_norm": 2.2406768798828125, "learning_rate": 3.760563823433039e-07, "loss": 0.4394, "step": 45380 }, { "epoch": 2.6651400387528623, "grad_norm": 2.81390380859375, "learning_rate": 3.7475772429343583e-07, "loss": 0.4565, "step": 45390 }, { "epoch": 2.665727203335095, "grad_norm": 3.6957716941833496, "learning_rate": 3.734612251787878e-07, "loss": 0.4001, "step": 45400 }, { "epoch": 2.666314367917327, "grad_norm": 7.403019428253174, "learning_rate": 3.7216688560452964e-07, "loss": 0.3685, "step": 45410 }, { "epoch": 2.6669015324995597, "grad_norm": 2.1387577056884766, "learning_rate": 3.708747061748236e-07, "loss": 0.4255, "step": 45420 }, { "epoch": 2.667488697081792, "grad_norm": 2.328014612197876, "learning_rate": 3.6958468749282215e-07, "loss": 0.4923, "step": 45430 }, { "epoch": 2.6680758616640246, "grad_norm": 5.951178073883057, "learning_rate": 3.6829683016067153e-07, "loss": 0.4123, "step": 45440 }, { "epoch": 2.6686630262462567, "grad_norm": 1.988975167274475, "learning_rate": 3.6701113477950747e-07, "loss": 0.3998, "step": 45450 }, { "epoch": 2.6692501908284894, "grad_norm": 3.667914867401123, "learning_rate": 3.657276019494582e-07, "loss": 0.4377, "step": 45460 }, { "epoch": 2.6698373554107215, "grad_norm": 4.292534828186035, "learning_rate": 3.6444623226963907e-07, "loss": 0.4359, "step": 45470 }, { "epoch": 2.670424519992954, "grad_norm": 1.9667692184448242, "learning_rate": 3.631670263381587e-07, "loss": 0.3422, "step": 45480 }, { "epoch": 2.6710116845751863, "grad_norm": 21.746545791625977, "learning_rate": 3.6188998475211567e-07, "loss": 0.4603, "step": 45490 }, { "epoch": 2.671598849157419, "grad_norm": 3.1082916259765625, "learning_rate": 3.6061510810759923e-07, "loss": 0.381, "step": 45500 }, { "epoch": 2.672186013739651, "grad_norm": 4.2724385261535645, "learning_rate": 3.593423969996834e-07, "loss": 0.4616, "step": 45510 }, { "epoch": 2.6727731783218838, "grad_norm": 4.0792460441589355, "learning_rate": 3.5807185202243634e-07, "loss": 0.3663, "step": 45520 }, { "epoch": 2.673360342904116, "grad_norm": 5.672051429748535, "learning_rate": 3.56803473768913e-07, "loss": 0.4771, "step": 45530 }, { "epoch": 2.6739475074863486, "grad_norm": 4.717836856842041, "learning_rate": 3.5553726283115763e-07, "loss": 0.3988, "step": 45540 }, { "epoch": 2.6745346720685808, "grad_norm": 1.3712306022644043, "learning_rate": 3.5427321980020244e-07, "loss": 0.3507, "step": 45550 }, { "epoch": 2.6751218366508134, "grad_norm": 8.602185249328613, "learning_rate": 3.530113452660661e-07, "loss": 0.5025, "step": 45560 }, { "epoch": 2.6757090012330456, "grad_norm": 1.957774043083191, "learning_rate": 3.517516398177584e-07, "loss": 0.4091, "step": 45570 }, { "epoch": 2.676296165815278, "grad_norm": 2.064645528793335, "learning_rate": 3.504941040432752e-07, "loss": 0.2743, "step": 45580 }, { "epoch": 2.6768833303975104, "grad_norm": 3.6115028858184814, "learning_rate": 3.492387385295998e-07, "loss": 0.4109, "step": 45590 }, { "epoch": 2.677470494979743, "grad_norm": 6.2818922996521, "learning_rate": 3.479855438627006e-07, "loss": 0.3157, "step": 45600 }, { "epoch": 2.678057659561975, "grad_norm": 3.348665475845337, "learning_rate": 3.4673452062753465e-07, "loss": 0.4032, "step": 45610 }, { "epoch": 2.6786448241442073, "grad_norm": 7.494595527648926, "learning_rate": 3.454856694080455e-07, "loss": 0.439, "step": 45620 }, { "epoch": 2.67923198872644, "grad_norm": 3.882127285003662, "learning_rate": 3.442389907871618e-07, "loss": 0.3516, "step": 45630 }, { "epoch": 2.6798191533086726, "grad_norm": 2.33699631690979, "learning_rate": 3.4299448534679926e-07, "loss": 0.3528, "step": 45640 }, { "epoch": 2.6804063178909048, "grad_norm": 1.4414275884628296, "learning_rate": 3.417521536678586e-07, "loss": 0.3674, "step": 45650 }, { "epoch": 2.680993482473137, "grad_norm": 14.919455528259277, "learning_rate": 3.40511996330225e-07, "loss": 0.422, "step": 45660 }, { "epoch": 2.6815806470553696, "grad_norm": 3.4001946449279785, "learning_rate": 3.392740139127709e-07, "loss": 0.3688, "step": 45670 }, { "epoch": 2.682167811637602, "grad_norm": 9.702862739562988, "learning_rate": 3.380382069933524e-07, "loss": 0.5266, "step": 45680 }, { "epoch": 2.6827549762198344, "grad_norm": 7.5273542404174805, "learning_rate": 3.3680457614880815e-07, "loss": 0.3867, "step": 45690 }, { "epoch": 2.6833421408020666, "grad_norm": 4.693478584289551, "learning_rate": 3.3557312195496403e-07, "loss": 0.4003, "step": 45700 }, { "epoch": 2.683929305384299, "grad_norm": 15.45829963684082, "learning_rate": 3.343438449866293e-07, "loss": 0.395, "step": 45710 }, { "epoch": 2.684516469966532, "grad_norm": 8.02188777923584, "learning_rate": 3.331167458175943e-07, "loss": 0.3962, "step": 45720 }, { "epoch": 2.685103634548764, "grad_norm": 9.77765941619873, "learning_rate": 3.3189182502063874e-07, "loss": 0.3981, "step": 45730 }, { "epoch": 2.685690799130996, "grad_norm": 1.9103609323501587, "learning_rate": 3.3066908316751913e-07, "loss": 0.4177, "step": 45740 }, { "epoch": 2.686277963713229, "grad_norm": 3.6989827156066895, "learning_rate": 3.294485208289777e-07, "loss": 0.4956, "step": 45750 }, { "epoch": 2.6868651282954614, "grad_norm": 1.918100357055664, "learning_rate": 3.2823013857473996e-07, "loss": 0.4536, "step": 45760 }, { "epoch": 2.6874522928776936, "grad_norm": 4.529487609863281, "learning_rate": 3.270139369735126e-07, "loss": 0.4067, "step": 45770 }, { "epoch": 2.688039457459926, "grad_norm": 3.305633544921875, "learning_rate": 3.2579991659298593e-07, "loss": 0.4195, "step": 45780 }, { "epoch": 2.6886266220421584, "grad_norm": 3.85823130607605, "learning_rate": 3.245880779998295e-07, "loss": 0.404, "step": 45790 }, { "epoch": 2.689213786624391, "grad_norm": 6.210549831390381, "learning_rate": 3.233784217596958e-07, "loss": 0.3224, "step": 45800 }, { "epoch": 2.689800951206623, "grad_norm": 1.4753973484039307, "learning_rate": 3.2217094843722097e-07, "loss": 0.3223, "step": 45810 }, { "epoch": 2.6903881157888554, "grad_norm": 3.455826997756958, "learning_rate": 3.209656585960197e-07, "loss": 0.4759, "step": 45820 }, { "epoch": 2.690975280371088, "grad_norm": 4.654003143310547, "learning_rate": 3.197625527986864e-07, "loss": 0.4448, "step": 45830 }, { "epoch": 2.6915624449533206, "grad_norm": 2.9431872367858887, "learning_rate": 3.185616316067985e-07, "loss": 0.4497, "step": 45840 }, { "epoch": 2.692149609535553, "grad_norm": 1.7521235942840576, "learning_rate": 3.1736289558091324e-07, "loss": 0.3674, "step": 45850 }, { "epoch": 2.692736774117785, "grad_norm": 1.6909534931182861, "learning_rate": 3.161663452805669e-07, "loss": 0.4677, "step": 45860 }, { "epoch": 2.6933239387000176, "grad_norm": 4.840485572814941, "learning_rate": 3.149719812642771e-07, "loss": 0.4527, "step": 45870 }, { "epoch": 2.6939111032822503, "grad_norm": 4.5099406242370605, "learning_rate": 3.137798040895379e-07, "loss": 0.385, "step": 45880 }, { "epoch": 2.6944982678644824, "grad_norm": 3.5221478939056396, "learning_rate": 3.1258981431282587e-07, "loss": 0.392, "step": 45890 }, { "epoch": 2.6950854324467146, "grad_norm": 2.9624600410461426, "learning_rate": 3.114020124895961e-07, "loss": 0.349, "step": 45900 }, { "epoch": 2.6956725970289472, "grad_norm": 1.2209521532058716, "learning_rate": 3.1021639917428126e-07, "loss": 0.3612, "step": 45910 }, { "epoch": 2.69625976161118, "grad_norm": 5.5475969314575195, "learning_rate": 3.090329749202914e-07, "loss": 0.3839, "step": 45920 }, { "epoch": 2.696846926193412, "grad_norm": 4.808139324188232, "learning_rate": 3.078517402800174e-07, "loss": 0.3332, "step": 45930 }, { "epoch": 2.6974340907756442, "grad_norm": 3.4361190795898438, "learning_rate": 3.0667269580482716e-07, "loss": 0.3882, "step": 45940 }, { "epoch": 2.698021255357877, "grad_norm": 2.5169808864593506, "learning_rate": 3.054958420450654e-07, "loss": 0.2446, "step": 45950 }, { "epoch": 2.6986084199401095, "grad_norm": 2.6564900875091553, "learning_rate": 3.043211795500556e-07, "loss": 0.3124, "step": 45960 }, { "epoch": 2.6991955845223417, "grad_norm": 2.36094331741333, "learning_rate": 3.031487088680973e-07, "loss": 0.3994, "step": 45970 }, { "epoch": 2.699782749104574, "grad_norm": 5.355538368225098, "learning_rate": 3.0197843054646736e-07, "loss": 0.5373, "step": 45980 }, { "epoch": 2.7003699136868065, "grad_norm": 6.627865791320801, "learning_rate": 3.0081034513141985e-07, "loss": 0.4063, "step": 45990 }, { "epoch": 2.700957078269039, "grad_norm": 17.83757972717285, "learning_rate": 2.996444531681841e-07, "loss": 0.4011, "step": 46000 }, { "epoch": 2.7015442428512713, "grad_norm": 4.418401718139648, "learning_rate": 2.984807552009672e-07, "loss": 0.3673, "step": 46010 }, { "epoch": 2.7021314074335034, "grad_norm": 4.660778522491455, "learning_rate": 2.973192517729495e-07, "loss": 0.467, "step": 46020 }, { "epoch": 2.702718572015736, "grad_norm": 6.839929580688477, "learning_rate": 2.9615994342629017e-07, "loss": 0.347, "step": 46030 }, { "epoch": 2.7033057365979682, "grad_norm": 5.778294563293457, "learning_rate": 2.950028307021213e-07, "loss": 0.3801, "step": 46040 }, { "epoch": 2.703892901180201, "grad_norm": 6.543869495391846, "learning_rate": 2.9384791414055205e-07, "loss": 0.418, "step": 46050 }, { "epoch": 2.704480065762433, "grad_norm": 6.626438617706299, "learning_rate": 2.9269519428066495e-07, "loss": 0.3797, "step": 46060 }, { "epoch": 2.7050672303446657, "grad_norm": 8.285542488098145, "learning_rate": 2.91544671660518e-07, "loss": 0.3755, "step": 46070 }, { "epoch": 2.705654394926898, "grad_norm": 2.7540786266326904, "learning_rate": 2.9039634681714336e-07, "loss": 0.3996, "step": 46080 }, { "epoch": 2.7062415595091305, "grad_norm": 3.605851173400879, "learning_rate": 2.8925022028654715e-07, "loss": 0.3985, "step": 46090 }, { "epoch": 2.7068287240913627, "grad_norm": 17.83403778076172, "learning_rate": 2.8810629260371034e-07, "loss": 0.4834, "step": 46100 }, { "epoch": 2.7074158886735953, "grad_norm": 3.5109665393829346, "learning_rate": 2.869645643025853e-07, "loss": 0.4491, "step": 46110 }, { "epoch": 2.7080030532558275, "grad_norm": 3.118089437484741, "learning_rate": 2.8582503591609964e-07, "loss": 0.3462, "step": 46120 }, { "epoch": 2.70859021783806, "grad_norm": 6.157377243041992, "learning_rate": 2.8468770797615395e-07, "loss": 0.4997, "step": 46130 }, { "epoch": 2.7091773824202923, "grad_norm": 1.4563615322113037, "learning_rate": 2.835525810136225e-07, "loss": 0.3629, "step": 46140 }, { "epoch": 2.709764547002525, "grad_norm": 3.684967279434204, "learning_rate": 2.824196555583503e-07, "loss": 0.4398, "step": 46150 }, { "epoch": 2.710351711584757, "grad_norm": 2.3885738849639893, "learning_rate": 2.8128893213915485e-07, "loss": 0.4663, "step": 46160 }, { "epoch": 2.7109388761669897, "grad_norm": 6.017340183258057, "learning_rate": 2.8016041128382745e-07, "loss": 0.3681, "step": 46170 }, { "epoch": 2.711526040749222, "grad_norm": 19.604856491088867, "learning_rate": 2.7903409351913045e-07, "loss": 0.3748, "step": 46180 }, { "epoch": 2.7121132053314545, "grad_norm": 2.083353042602539, "learning_rate": 2.779099793707979e-07, "loss": 0.401, "step": 46190 }, { "epoch": 2.7127003699136867, "grad_norm": 2.2295725345611572, "learning_rate": 2.7678806936353496e-07, "loss": 0.3294, "step": 46200 }, { "epoch": 2.7132875344959193, "grad_norm": 5.07538366317749, "learning_rate": 2.7566836402101724e-07, "loss": 0.3485, "step": 46210 }, { "epoch": 2.7138746990781515, "grad_norm": 2.059394121170044, "learning_rate": 2.745508638658939e-07, "loss": 0.3376, "step": 46220 }, { "epoch": 2.714461863660384, "grad_norm": 10.600929260253906, "learning_rate": 2.7343556941978213e-07, "loss": 0.3569, "step": 46230 }, { "epoch": 2.7150490282426163, "grad_norm": 3.796678066253662, "learning_rate": 2.7232248120327207e-07, "loss": 0.3059, "step": 46240 }, { "epoch": 2.715636192824849, "grad_norm": 3.8180429935455322, "learning_rate": 2.7121159973592047e-07, "loss": 0.4228, "step": 46250 }, { "epoch": 2.716223357407081, "grad_norm": 5.392240047454834, "learning_rate": 2.7010292553625697e-07, "loss": 0.3447, "step": 46260 }, { "epoch": 2.7168105219893137, "grad_norm": 2.1125240325927734, "learning_rate": 2.6899645912177973e-07, "loss": 0.423, "step": 46270 }, { "epoch": 2.717397686571546, "grad_norm": 3.1366679668426514, "learning_rate": 2.6789220100895683e-07, "loss": 0.3974, "step": 46280 }, { "epoch": 2.7179848511537785, "grad_norm": 12.372179985046387, "learning_rate": 2.66790151713226e-07, "loss": 0.4216, "step": 46290 }, { "epoch": 2.7185720157360107, "grad_norm": 2.8794052600860596, "learning_rate": 2.6569031174899286e-07, "loss": 0.3181, "step": 46300 }, { "epoch": 2.7191591803182433, "grad_norm": 4.757332801818848, "learning_rate": 2.6459268162963236e-07, "loss": 0.4911, "step": 46310 }, { "epoch": 2.7197463449004755, "grad_norm": 10.787747383117676, "learning_rate": 2.634972618674875e-07, "loss": 0.4788, "step": 46320 }, { "epoch": 2.720333509482708, "grad_norm": 3.239635705947876, "learning_rate": 2.6240405297387137e-07, "loss": 0.3271, "step": 46330 }, { "epoch": 2.7209206740649403, "grad_norm": 2.31535267829895, "learning_rate": 2.6131305545906207e-07, "loss": 0.3928, "step": 46340 }, { "epoch": 2.721507838647173, "grad_norm": 2.274751901626587, "learning_rate": 2.602242698323071e-07, "loss": 0.3784, "step": 46350 }, { "epoch": 2.722095003229405, "grad_norm": 2.5859525203704834, "learning_rate": 2.5913769660182277e-07, "loss": 0.2875, "step": 46360 }, { "epoch": 2.7226821678116377, "grad_norm": 3.3605189323425293, "learning_rate": 2.5805333627479e-07, "loss": 0.3391, "step": 46370 }, { "epoch": 2.72326933239387, "grad_norm": 2.220749616622925, "learning_rate": 2.56971189357359e-07, "loss": 0.3864, "step": 46380 }, { "epoch": 2.7238564969761025, "grad_norm": 1.936650276184082, "learning_rate": 2.558912563546462e-07, "loss": 0.3366, "step": 46390 }, { "epoch": 2.7244436615583347, "grad_norm": 1.4100666046142578, "learning_rate": 2.548135377707339e-07, "loss": 0.4045, "step": 46400 }, { "epoch": 2.7250308261405674, "grad_norm": 1.8051868677139282, "learning_rate": 2.5373803410867206e-07, "loss": 0.4061, "step": 46410 }, { "epoch": 2.7256179907227995, "grad_norm": 4.635553359985352, "learning_rate": 2.5266474587047564e-07, "loss": 0.3843, "step": 46420 }, { "epoch": 2.726205155305032, "grad_norm": 3.032679557800293, "learning_rate": 2.515936735571256e-07, "loss": 0.4592, "step": 46430 }, { "epoch": 2.7267923198872643, "grad_norm": 3.2160186767578125, "learning_rate": 2.505248176685693e-07, "loss": 0.453, "step": 46440 }, { "epoch": 2.727379484469497, "grad_norm": 7.965133190155029, "learning_rate": 2.494581787037181e-07, "loss": 0.4421, "step": 46450 }, { "epoch": 2.727966649051729, "grad_norm": 6.844544887542725, "learning_rate": 2.483937571604522e-07, "loss": 0.446, "step": 46460 }, { "epoch": 2.7285538136339618, "grad_norm": 4.861785888671875, "learning_rate": 2.4733155353561133e-07, "loss": 0.4521, "step": 46470 }, { "epoch": 2.729140978216194, "grad_norm": 2.4009995460510254, "learning_rate": 2.4627156832500386e-07, "loss": 0.4171, "step": 46480 }, { "epoch": 2.729728142798426, "grad_norm": 1.2340785264968872, "learning_rate": 2.452138020234018e-07, "loss": 0.3368, "step": 46490 }, { "epoch": 2.7303153073806588, "grad_norm": 2.203653335571289, "learning_rate": 2.4415825512454085e-07, "loss": 0.3872, "step": 46500 }, { "epoch": 2.7309024719628914, "grad_norm": 4.312838554382324, "learning_rate": 2.431049281211212e-07, "loss": 0.3142, "step": 46510 }, { "epoch": 2.7314896365451236, "grad_norm": 14.711164474487305, "learning_rate": 2.4205382150480794e-07, "loss": 0.3425, "step": 46520 }, { "epoch": 2.7320768011273557, "grad_norm": 3.959087610244751, "learning_rate": 2.4100493576622585e-07, "loss": 0.3202, "step": 46530 }, { "epoch": 2.7326639657095884, "grad_norm": 2.064349412918091, "learning_rate": 2.399582713949683e-07, "loss": 0.3055, "step": 46540 }, { "epoch": 2.733251130291821, "grad_norm": 7.070395469665527, "learning_rate": 2.389138288795878e-07, "loss": 0.4581, "step": 46550 }, { "epoch": 2.733838294874053, "grad_norm": 3.065143346786499, "learning_rate": 2.3787160870760329e-07, "loss": 0.3778, "step": 46560 }, { "epoch": 2.7344254594562853, "grad_norm": 6.13034200668335, "learning_rate": 2.368316113654917e-07, "loss": 0.3956, "step": 46570 }, { "epoch": 2.735012624038518, "grad_norm": 4.057127475738525, "learning_rate": 2.3579383733869644e-07, "loss": 0.38, "step": 46580 }, { "epoch": 2.7355997886207506, "grad_norm": 6.646522521972656, "learning_rate": 2.3475828711162163e-07, "loss": 0.3312, "step": 46590 }, { "epoch": 2.7361869532029828, "grad_norm": 3.9899351596832275, "learning_rate": 2.3372496116763343e-07, "loss": 0.4996, "step": 46600 }, { "epoch": 2.736774117785215, "grad_norm": 5.412353038787842, "learning_rate": 2.3269385998905935e-07, "loss": 0.3835, "step": 46610 }, { "epoch": 2.7373612823674476, "grad_norm": 2.4881703853607178, "learning_rate": 2.3166498405718997e-07, "loss": 0.3356, "step": 46620 }, { "epoch": 2.73794844694968, "grad_norm": 3.3253440856933594, "learning_rate": 2.3063833385227496e-07, "loss": 0.4368, "step": 46630 }, { "epoch": 2.7385356115319124, "grad_norm": 24.68531036376953, "learning_rate": 2.2961390985352717e-07, "loss": 0.5123, "step": 46640 }, { "epoch": 2.7391227761141446, "grad_norm": 5.094753265380859, "learning_rate": 2.285917125391196e-07, "loss": 0.4448, "step": 46650 }, { "epoch": 2.739709940696377, "grad_norm": 4.299410820007324, "learning_rate": 2.2757174238618396e-07, "loss": 0.4175, "step": 46660 }, { "epoch": 2.74029710527861, "grad_norm": 3.916452407836914, "learning_rate": 2.2655399987081605e-07, "loss": 0.289, "step": 46670 }, { "epoch": 2.740884269860842, "grad_norm": 2.955411434173584, "learning_rate": 2.2553848546806866e-07, "loss": 0.3944, "step": 46680 }, { "epoch": 2.741471434443074, "grad_norm": 3.2405178546905518, "learning_rate": 2.245251996519565e-07, "loss": 0.474, "step": 46690 }, { "epoch": 2.742058599025307, "grad_norm": 5.771162033081055, "learning_rate": 2.2351414289545293e-07, "loss": 0.4048, "step": 46700 }, { "epoch": 2.7426457636075394, "grad_norm": 4.392556190490723, "learning_rate": 2.2250531567049206e-07, "loss": 0.479, "step": 46710 }, { "epoch": 2.7432329281897716, "grad_norm": 3.099430561065674, "learning_rate": 2.2149871844796555e-07, "loss": 0.4179, "step": 46720 }, { "epoch": 2.743820092772004, "grad_norm": 1.364377737045288, "learning_rate": 2.2049435169772594e-07, "loss": 0.4032, "step": 46730 }, { "epoch": 2.7444072573542364, "grad_norm": 3.1084606647491455, "learning_rate": 2.1949221588858373e-07, "loss": 0.3639, "step": 46740 }, { "epoch": 2.744994421936469, "grad_norm": 7.87115478515625, "learning_rate": 2.1849231148830863e-07, "loss": 0.3859, "step": 46750 }, { "epoch": 2.745581586518701, "grad_norm": 2.747922420501709, "learning_rate": 2.1749463896362678e-07, "loss": 0.4411, "step": 46760 }, { "epoch": 2.7461687511009334, "grad_norm": 2.0525989532470703, "learning_rate": 2.1649919878022507e-07, "loss": 0.3084, "step": 46770 }, { "epoch": 2.746755915683166, "grad_norm": 2.6019961833953857, "learning_rate": 2.1550599140274798e-07, "loss": 0.29, "step": 46780 }, { "epoch": 2.7473430802653986, "grad_norm": 1.771662712097168, "learning_rate": 2.145150172947974e-07, "loss": 0.3851, "step": 46790 }, { "epoch": 2.747930244847631, "grad_norm": 4.36214542388916, "learning_rate": 2.135262769189317e-07, "loss": 0.3906, "step": 46800 }, { "epoch": 2.748517409429863, "grad_norm": 8.524425506591797, "learning_rate": 2.125397707366672e-07, "loss": 0.4868, "step": 46810 }, { "epoch": 2.7491045740120956, "grad_norm": 3.6363348960876465, "learning_rate": 2.1155549920847895e-07, "loss": 0.3473, "step": 46820 }, { "epoch": 2.7496917385943282, "grad_norm": 4.858809471130371, "learning_rate": 2.105734627937972e-07, "loss": 0.2683, "step": 46830 }, { "epoch": 2.7502789031765604, "grad_norm": 3.074327230453491, "learning_rate": 2.0959366195101027e-07, "loss": 0.5648, "step": 46840 }, { "epoch": 2.7508660677587926, "grad_norm": 8.145017623901367, "learning_rate": 2.0861609713745955e-07, "loss": 0.4637, "step": 46850 }, { "epoch": 2.7514532323410252, "grad_norm": 3.8122546672821045, "learning_rate": 2.0764076880944727e-07, "loss": 0.4001, "step": 46860 }, { "epoch": 2.752040396923258, "grad_norm": 3.1933841705322266, "learning_rate": 2.066676774222298e-07, "loss": 0.4156, "step": 46870 }, { "epoch": 2.75262756150549, "grad_norm": 1.8118032217025757, "learning_rate": 2.056968234300194e-07, "loss": 0.3154, "step": 46880 }, { "epoch": 2.753214726087722, "grad_norm": 5.562311172485352, "learning_rate": 2.0472820728598298e-07, "loss": 0.5368, "step": 46890 }, { "epoch": 2.753801890669955, "grad_norm": 2.7058284282684326, "learning_rate": 2.0376182944224388e-07, "loss": 0.371, "step": 46900 }, { "epoch": 2.754389055252187, "grad_norm": 6.94660758972168, "learning_rate": 2.0279769034988072e-07, "loss": 0.524, "step": 46910 }, { "epoch": 2.7549762198344196, "grad_norm": 10.659381866455078, "learning_rate": 2.0183579045892689e-07, "loss": 0.4469, "step": 46920 }, { "epoch": 2.755563384416652, "grad_norm": 3.829662799835205, "learning_rate": 2.008761302183715e-07, "loss": 0.4551, "step": 46930 }, { "epoch": 2.7561505489988845, "grad_norm": 1.7589218616485596, "learning_rate": 1.9991871007615627e-07, "loss": 0.3431, "step": 46940 }, { "epoch": 2.7567377135811166, "grad_norm": 3.500685214996338, "learning_rate": 1.989635304791787e-07, "loss": 0.4014, "step": 46950 }, { "epoch": 2.7573248781633493, "grad_norm": 5.986656188964844, "learning_rate": 1.9801059187329109e-07, "loss": 0.3965, "step": 46960 }, { "epoch": 2.7579120427455814, "grad_norm": 21.624977111816406, "learning_rate": 1.970598947032981e-07, "loss": 0.3543, "step": 46970 }, { "epoch": 2.758499207327814, "grad_norm": 4.344799995422363, "learning_rate": 1.961114394129593e-07, "loss": 0.3643, "step": 46980 }, { "epoch": 2.7590863719100462, "grad_norm": 8.579095840454102, "learning_rate": 1.951652264449866e-07, "loss": 0.2768, "step": 46990 }, { "epoch": 2.759673536492279, "grad_norm": 9.295198440551758, "learning_rate": 1.942212562410467e-07, "loss": 0.374, "step": 47000 }, { "epoch": 2.760260701074511, "grad_norm": 2.6919965744018555, "learning_rate": 1.9327952924175774e-07, "loss": 0.3725, "step": 47010 }, { "epoch": 2.7608478656567437, "grad_norm": 5.1558074951171875, "learning_rate": 1.923400458866942e-07, "loss": 0.407, "step": 47020 }, { "epoch": 2.761435030238976, "grad_norm": 6.934578895568848, "learning_rate": 1.9140280661437914e-07, "loss": 0.3578, "step": 47030 }, { "epoch": 2.7620221948212085, "grad_norm": 8.60124397277832, "learning_rate": 1.904678118622899e-07, "loss": 0.4069, "step": 47040 }, { "epoch": 2.7626093594034407, "grad_norm": 8.206174850463867, "learning_rate": 1.8953506206685678e-07, "loss": 0.3202, "step": 47050 }, { "epoch": 2.7631965239856733, "grad_norm": 3.2084760665893555, "learning_rate": 1.8860455766346097e-07, "loss": 0.4502, "step": 47060 }, { "epoch": 2.7637836885679055, "grad_norm": 3.878319025039673, "learning_rate": 1.876762990864378e-07, "loss": 0.508, "step": 47070 }, { "epoch": 2.764370853150138, "grad_norm": 6.738466739654541, "learning_rate": 1.867502867690707e-07, "loss": 0.4224, "step": 47080 }, { "epoch": 2.7649580177323703, "grad_norm": 5.871855735778809, "learning_rate": 1.8582652114359667e-07, "loss": 0.3458, "step": 47090 }, { "epoch": 2.765545182314603, "grad_norm": 2.4609460830688477, "learning_rate": 1.8490500264120525e-07, "loss": 0.3958, "step": 47100 }, { "epoch": 2.766132346896835, "grad_norm": 2.4164958000183105, "learning_rate": 1.8398573169203626e-07, "loss": 0.4998, "step": 47110 }, { "epoch": 2.7667195114790677, "grad_norm": 3.58469295501709, "learning_rate": 1.8306870872517813e-07, "loss": 0.5008, "step": 47120 }, { "epoch": 2.7673066760613, "grad_norm": 4.563318252563477, "learning_rate": 1.8215393416867288e-07, "loss": 0.3404, "step": 47130 }, { "epoch": 2.7678938406435325, "grad_norm": 7.692144393920898, "learning_rate": 1.8124140844951178e-07, "loss": 0.3764, "step": 47140 }, { "epoch": 2.7684810052257647, "grad_norm": 4.678208827972412, "learning_rate": 1.8033113199363627e-07, "loss": 0.3774, "step": 47150 }, { "epoch": 2.7690681698079973, "grad_norm": 2.73884916305542, "learning_rate": 1.794231052259393e-07, "loss": 0.3744, "step": 47160 }, { "epoch": 2.7696553343902295, "grad_norm": 3.46468186378479, "learning_rate": 1.7851732857026126e-07, "loss": 0.3757, "step": 47170 }, { "epoch": 2.770242498972462, "grad_norm": 2.7277493476867676, "learning_rate": 1.7761380244939454e-07, "loss": 0.4199, "step": 47180 }, { "epoch": 2.7708296635546943, "grad_norm": 2.602670431137085, "learning_rate": 1.7671252728508004e-07, "loss": 0.3441, "step": 47190 }, { "epoch": 2.771416828136927, "grad_norm": 6.13718318939209, "learning_rate": 1.758135034980085e-07, "loss": 0.2996, "step": 47200 }, { "epoch": 2.772003992719159, "grad_norm": 4.424824237823486, "learning_rate": 1.7491673150781818e-07, "loss": 0.3134, "step": 47210 }, { "epoch": 2.7725911573013917, "grad_norm": 5.401703834533691, "learning_rate": 1.740222117330981e-07, "loss": 0.4639, "step": 47220 }, { "epoch": 2.773178321883624, "grad_norm": 4.117897987365723, "learning_rate": 1.7312994459138533e-07, "loss": 0.4499, "step": 47230 }, { "epoch": 2.7737654864658565, "grad_norm": 2.1344287395477295, "learning_rate": 1.722399304991662e-07, "loss": 0.2901, "step": 47240 }, { "epoch": 2.7743526510480887, "grad_norm": 3.249616861343384, "learning_rate": 1.7135216987187342e-07, "loss": 0.4029, "step": 47250 }, { "epoch": 2.7749398156303213, "grad_norm": 1.5320883989334106, "learning_rate": 1.7046666312388993e-07, "loss": 0.4676, "step": 47260 }, { "epoch": 2.7755269802125535, "grad_norm": 3.189213275909424, "learning_rate": 1.6958341066854566e-07, "loss": 0.374, "step": 47270 }, { "epoch": 2.776114144794786, "grad_norm": 3.355142831802368, "learning_rate": 1.6870241291811805e-07, "loss": 0.36, "step": 47280 }, { "epoch": 2.7767013093770183, "grad_norm": 2.6037180423736572, "learning_rate": 1.6782367028383317e-07, "loss": 0.3293, "step": 47290 }, { "epoch": 2.777288473959251, "grad_norm": 8.115367889404297, "learning_rate": 1.66947183175864e-07, "loss": 0.2495, "step": 47300 }, { "epoch": 2.777875638541483, "grad_norm": 2.241934061050415, "learning_rate": 1.6607295200332941e-07, "loss": 0.4949, "step": 47310 }, { "epoch": 2.7784628031237157, "grad_norm": 2.6581525802612305, "learning_rate": 1.6520097717429685e-07, "loss": 0.4367, "step": 47320 }, { "epoch": 2.779049967705948, "grad_norm": 6.2687554359436035, "learning_rate": 1.6433125909577908e-07, "loss": 0.3774, "step": 47330 }, { "epoch": 2.7796371322881805, "grad_norm": 6.2441911697387695, "learning_rate": 1.6346379817373913e-07, "loss": 0.4829, "step": 47340 }, { "epoch": 2.7802242968704127, "grad_norm": 4.620938777923584, "learning_rate": 1.6259859481308148e-07, "loss": 0.4488, "step": 47350 }, { "epoch": 2.780811461452645, "grad_norm": 4.098855018615723, "learning_rate": 1.6173564941765917e-07, "loss": 0.3715, "step": 47360 }, { "epoch": 2.7813986260348775, "grad_norm": 5.599252700805664, "learning_rate": 1.6087496239027221e-07, "loss": 0.3215, "step": 47370 }, { "epoch": 2.78198579061711, "grad_norm": 3.845940113067627, "learning_rate": 1.6001653413266537e-07, "loss": 0.4098, "step": 47380 }, { "epoch": 2.7825729551993423, "grad_norm": 3.185987949371338, "learning_rate": 1.5916036504552924e-07, "loss": 0.4225, "step": 47390 }, { "epoch": 2.7831601197815745, "grad_norm": 3.934349536895752, "learning_rate": 1.583064555284991e-07, "loss": 0.3238, "step": 47400 }, { "epoch": 2.783747284363807, "grad_norm": 1.8886713981628418, "learning_rate": 1.5745480598015673e-07, "loss": 0.4696, "step": 47410 }, { "epoch": 2.7843344489460398, "grad_norm": 1.8278523683547974, "learning_rate": 1.5660541679802854e-07, "loss": 0.3585, "step": 47420 }, { "epoch": 2.784921613528272, "grad_norm": 4.317875862121582, "learning_rate": 1.557582883785874e-07, "loss": 0.2562, "step": 47430 }, { "epoch": 2.785508778110504, "grad_norm": 1.2739421129226685, "learning_rate": 1.5491342111724707e-07, "loss": 0.3954, "step": 47440 }, { "epoch": 2.7860959426927367, "grad_norm": 2.0623888969421387, "learning_rate": 1.5407081540836922e-07, "loss": 0.3912, "step": 47450 }, { "epoch": 2.7866831072749694, "grad_norm": 3.4039909839630127, "learning_rate": 1.532304716452593e-07, "loss": 0.3937, "step": 47460 }, { "epoch": 2.7872702718572016, "grad_norm": 3.8708741664886475, "learning_rate": 1.5239239022016572e-07, "loss": 0.4858, "step": 47470 }, { "epoch": 2.7878574364394337, "grad_norm": 2.9688305854797363, "learning_rate": 1.5155657152428228e-07, "loss": 0.3693, "step": 47480 }, { "epoch": 2.7884446010216664, "grad_norm": 1.1165684461593628, "learning_rate": 1.507230159477463e-07, "loss": 0.3373, "step": 47490 }, { "epoch": 2.789031765603899, "grad_norm": 3.065777540206909, "learning_rate": 1.4989172387963824e-07, "loss": 0.3215, "step": 47500 }, { "epoch": 2.789618930186131, "grad_norm": 3.1388957500457764, "learning_rate": 1.490626957079816e-07, "loss": 0.3997, "step": 47510 }, { "epoch": 2.7902060947683633, "grad_norm": 8.82332992553711, "learning_rate": 1.482359318197446e-07, "loss": 0.3868, "step": 47520 }, { "epoch": 2.790793259350596, "grad_norm": 3.6164958477020264, "learning_rate": 1.47411432600838e-07, "loss": 0.4881, "step": 47530 }, { "epoch": 2.7913804239328286, "grad_norm": 7.925108909606934, "learning_rate": 1.4658919843611452e-07, "loss": 0.4672, "step": 47540 }, { "epoch": 2.7919675885150608, "grad_norm": 16.689218521118164, "learning_rate": 1.4576922970937048e-07, "loss": 0.4107, "step": 47550 }, { "epoch": 2.792554753097293, "grad_norm": 7.0124945640563965, "learning_rate": 1.4495152680334467e-07, "loss": 0.4255, "step": 47560 }, { "epoch": 2.7931419176795256, "grad_norm": 1.8996455669403076, "learning_rate": 1.4413609009971796e-07, "loss": 0.3523, "step": 47570 }, { "epoch": 2.793729082261758, "grad_norm": 2.3597583770751953, "learning_rate": 1.4332291997911417e-07, "loss": 0.4238, "step": 47580 }, { "epoch": 2.7943162468439904, "grad_norm": 2.7263379096984863, "learning_rate": 1.4251201682109805e-07, "loss": 0.3589, "step": 47590 }, { "epoch": 2.7949034114262226, "grad_norm": 7.022460460662842, "learning_rate": 1.417033810041779e-07, "loss": 0.2881, "step": 47600 }, { "epoch": 2.795490576008455, "grad_norm": 5.53732442855835, "learning_rate": 1.4089701290580127e-07, "loss": 0.408, "step": 47610 }, { "epoch": 2.796077740590688, "grad_norm": 2.5883524417877197, "learning_rate": 1.4009291290235927e-07, "loss": 0.3577, "step": 47620 }, { "epoch": 2.79666490517292, "grad_norm": 4.106823444366455, "learning_rate": 1.3929108136918334e-07, "loss": 0.3526, "step": 47630 }, { "epoch": 2.797252069755152, "grad_norm": 13.260628700256348, "learning_rate": 1.3849151868054578e-07, "loss": 0.5162, "step": 47640 }, { "epoch": 2.797839234337385, "grad_norm": 2.6063411235809326, "learning_rate": 1.376942252096608e-07, "loss": 0.434, "step": 47650 }, { "epoch": 2.7984263989196174, "grad_norm": 2.633380651473999, "learning_rate": 1.3689920132868351e-07, "loss": 0.4654, "step": 47660 }, { "epoch": 2.7990135635018496, "grad_norm": 4.205901622772217, "learning_rate": 1.3610644740870816e-07, "loss": 0.5212, "step": 47670 }, { "epoch": 2.799600728084082, "grad_norm": 24.051530838012695, "learning_rate": 1.3531596381977097e-07, "loss": 0.4315, "step": 47680 }, { "epoch": 2.8001878926663144, "grad_norm": 4.065244197845459, "learning_rate": 1.345277509308468e-07, "loss": 0.3833, "step": 47690 }, { "epoch": 2.800775057248547, "grad_norm": 4.310999870300293, "learning_rate": 1.33741809109853e-07, "loss": 0.4373, "step": 47700 }, { "epoch": 2.801362221830779, "grad_norm": 4.925285816192627, "learning_rate": 1.329581387236456e-07, "loss": 0.4118, "step": 47710 }, { "epoch": 2.8019493864130114, "grad_norm": 2.3948466777801514, "learning_rate": 1.3217674013801863e-07, "loss": 0.2012, "step": 47720 }, { "epoch": 2.802536550995244, "grad_norm": 2.467618942260742, "learning_rate": 1.3139761371770753e-07, "loss": 0.4512, "step": 47730 }, { "epoch": 2.8031237155774766, "grad_norm": 3.070974588394165, "learning_rate": 1.3062075982638868e-07, "loss": 0.3854, "step": 47740 }, { "epoch": 2.803710880159709, "grad_norm": 3.983058452606201, "learning_rate": 1.2984617882667527e-07, "loss": 0.3774, "step": 47750 }, { "epoch": 2.804298044741941, "grad_norm": 7.028665542602539, "learning_rate": 1.290738710801198e-07, "loss": 0.4079, "step": 47760 }, { "epoch": 2.8048852093241736, "grad_norm": 1.1559240818023682, "learning_rate": 1.283038369472145e-07, "loss": 0.3865, "step": 47770 }, { "epoch": 2.805472373906406, "grad_norm": 9.440409660339355, "learning_rate": 1.2753607678738954e-07, "loss": 0.365, "step": 47780 }, { "epoch": 2.8060595384886384, "grad_norm": 6.595816135406494, "learning_rate": 1.26770590959015e-07, "loss": 0.4255, "step": 47790 }, { "epoch": 2.8066467030708706, "grad_norm": 2.3249971866607666, "learning_rate": 1.260073798193978e-07, "loss": 0.3835, "step": 47800 }, { "epoch": 2.8072338676531032, "grad_norm": 3.369647741317749, "learning_rate": 1.2524644372478412e-07, "loss": 0.536, "step": 47810 }, { "epoch": 2.8078210322353354, "grad_norm": 10.61023235321045, "learning_rate": 1.2448778303035758e-07, "loss": 0.4407, "step": 47820 }, { "epoch": 2.808408196817568, "grad_norm": 2.713524341583252, "learning_rate": 1.2373139809024048e-07, "loss": 0.419, "step": 47830 }, { "epoch": 2.8089953613998, "grad_norm": 3.1164910793304443, "learning_rate": 1.2297728925749208e-07, "loss": 0.4094, "step": 47840 }, { "epoch": 2.809582525982033, "grad_norm": 3.3423430919647217, "learning_rate": 1.2222545688411024e-07, "loss": 0.3609, "step": 47850 }, { "epoch": 2.810169690564265, "grad_norm": 2.465919256210327, "learning_rate": 1.2147590132102817e-07, "loss": 0.3792, "step": 47860 }, { "epoch": 2.8107568551464976, "grad_norm": 2.033324956893921, "learning_rate": 1.207286229181187e-07, "loss": 0.4984, "step": 47870 }, { "epoch": 2.81134401972873, "grad_norm": 8.109058380126953, "learning_rate": 1.1998362202419013e-07, "loss": 0.3859, "step": 47880 }, { "epoch": 2.8119311843109625, "grad_norm": 5.3506317138671875, "learning_rate": 1.192408989869892e-07, "loss": 0.5105, "step": 47890 }, { "epoch": 2.8125183488931946, "grad_norm": 5.547544956207275, "learning_rate": 1.1850045415319756e-07, "loss": 0.6096, "step": 47900 }, { "epoch": 2.8131055134754273, "grad_norm": 6.039951801300049, "learning_rate": 1.1776228786843424e-07, "loss": 0.474, "step": 47910 }, { "epoch": 2.8136926780576594, "grad_norm": 7.41859245300293, "learning_rate": 1.1702640047725588e-07, "loss": 0.3931, "step": 47920 }, { "epoch": 2.814279842639892, "grad_norm": 7.541845321655273, "learning_rate": 1.1629279232315382e-07, "loss": 0.5321, "step": 47930 }, { "epoch": 2.8148670072221242, "grad_norm": 3.6861584186553955, "learning_rate": 1.1556146374855636e-07, "loss": 0.3506, "step": 47940 }, { "epoch": 2.815454171804357, "grad_norm": 5.677109718322754, "learning_rate": 1.1483241509482657e-07, "loss": 0.2742, "step": 47950 }, { "epoch": 2.816041336386589, "grad_norm": 2.829342842102051, "learning_rate": 1.1410564670226499e-07, "loss": 0.4353, "step": 47960 }, { "epoch": 2.8166285009688217, "grad_norm": 2.738617420196533, "learning_rate": 1.1338115891010637e-07, "loss": 0.4161, "step": 47970 }, { "epoch": 2.817215665551054, "grad_norm": 7.121897220611572, "learning_rate": 1.12658952056523e-07, "loss": 0.4564, "step": 47980 }, { "epoch": 2.8178028301332865, "grad_norm": 2.9823436737060547, "learning_rate": 1.1193902647861966e-07, "loss": 0.3574, "step": 47990 }, { "epoch": 2.8183899947155187, "grad_norm": 3.3877785205841064, "learning_rate": 1.1122138251243808e-07, "loss": 0.3273, "step": 48000 }, { "epoch": 2.8189771592977513, "grad_norm": 2.4843342304229736, "learning_rate": 1.1050602049295478e-07, "loss": 0.5169, "step": 48010 }, { "epoch": 2.8195643238799835, "grad_norm": 3.528724431991577, "learning_rate": 1.0979294075408043e-07, "loss": 0.3829, "step": 48020 }, { "epoch": 2.820151488462216, "grad_norm": 1.5833561420440674, "learning_rate": 1.090821436286621e-07, "loss": 0.3461, "step": 48030 }, { "epoch": 2.8207386530444483, "grad_norm": 1.5058622360229492, "learning_rate": 1.083736294484794e-07, "loss": 0.2638, "step": 48040 }, { "epoch": 2.821325817626681, "grad_norm": 14.411177635192871, "learning_rate": 1.0766739854424668e-07, "loss": 0.4446, "step": 48050 }, { "epoch": 2.821912982208913, "grad_norm": 7.1584553718566895, "learning_rate": 1.0696345124561414e-07, "loss": 0.3635, "step": 48060 }, { "epoch": 2.8225001467911457, "grad_norm": 2.8750739097595215, "learning_rate": 1.0626178788116393e-07, "loss": 0.3903, "step": 48070 }, { "epoch": 2.823087311373378, "grad_norm": 4.90360689163208, "learning_rate": 1.0556240877841461e-07, "loss": 0.5023, "step": 48080 }, { "epoch": 2.8236744759556105, "grad_norm": 2.5997982025146484, "learning_rate": 1.0486531426381508e-07, "loss": 0.3944, "step": 48090 }, { "epoch": 2.8242616405378427, "grad_norm": 20.367355346679688, "learning_rate": 1.0417050466275114e-07, "loss": 0.4444, "step": 48100 }, { "epoch": 2.8248488051200753, "grad_norm": 7.854471206665039, "learning_rate": 1.0347798029954004e-07, "loss": 0.3963, "step": 48110 }, { "epoch": 2.8254359697023075, "grad_norm": 3.5750632286071777, "learning_rate": 1.0278774149743265e-07, "loss": 0.5305, "step": 48120 }, { "epoch": 2.82602313428454, "grad_norm": 2.1534154415130615, "learning_rate": 1.0209978857861458e-07, "loss": 0.397, "step": 48130 }, { "epoch": 2.8266102988667723, "grad_norm": 2.6616358757019043, "learning_rate": 1.0141412186420229e-07, "loss": 0.4466, "step": 48140 }, { "epoch": 2.827197463449005, "grad_norm": 7.224767684936523, "learning_rate": 1.0073074167424646e-07, "loss": 0.3999, "step": 48150 }, { "epoch": 2.827784628031237, "grad_norm": 5.125593185424805, "learning_rate": 1.0004964832773024e-07, "loss": 0.3989, "step": 48160 }, { "epoch": 2.8283717926134697, "grad_norm": 6.0392069816589355, "learning_rate": 9.937084214256875e-08, "loss": 0.3506, "step": 48170 }, { "epoch": 2.828958957195702, "grad_norm": 3.714036226272583, "learning_rate": 9.86943234356097e-08, "loss": 0.3549, "step": 48180 }, { "epoch": 2.8295461217779345, "grad_norm": 2.66880202293396, "learning_rate": 9.802009252263378e-08, "loss": 0.3945, "step": 48190 }, { "epoch": 2.8301332863601667, "grad_norm": 2.1798081398010254, "learning_rate": 9.734814971835316e-08, "loss": 0.3906, "step": 48200 }, { "epoch": 2.8307204509423993, "grad_norm": 3.0429201126098633, "learning_rate": 9.66784953364125e-08, "loss": 0.4429, "step": 48210 }, { "epoch": 2.8313076155246315, "grad_norm": 4.552310466766357, "learning_rate": 9.601112968938786e-08, "loss": 0.3992, "step": 48220 }, { "epoch": 2.8318947801068637, "grad_norm": 3.103877305984497, "learning_rate": 9.534605308878675e-08, "loss": 0.4563, "step": 48230 }, { "epoch": 2.8324819446890963, "grad_norm": 1.4089897871017456, "learning_rate": 9.468326584504805e-08, "loss": 0.299, "step": 48240 }, { "epoch": 2.833069109271329, "grad_norm": 11.179737091064453, "learning_rate": 9.402276826754375e-08, "loss": 0.4698, "step": 48250 }, { "epoch": 2.833656273853561, "grad_norm": 1.6557960510253906, "learning_rate": 9.336456066457556e-08, "loss": 0.36, "step": 48260 }, { "epoch": 2.8342434384357933, "grad_norm": 2.751988172531128, "learning_rate": 9.27086433433766e-08, "loss": 0.3332, "step": 48270 }, { "epoch": 2.834830603018026, "grad_norm": 3.408280611038208, "learning_rate": 9.205501661010974e-08, "loss": 0.4569, "step": 48280 }, { "epoch": 2.8354177676002585, "grad_norm": 6.282094955444336, "learning_rate": 9.140368076987094e-08, "loss": 0.4622, "step": 48290 }, { "epoch": 2.8360049321824907, "grad_norm": 2.690042734146118, "learning_rate": 9.075463612668534e-08, "loss": 0.3612, "step": 48300 }, { "epoch": 2.836592096764723, "grad_norm": 2.7675840854644775, "learning_rate": 9.010788298351059e-08, "loss": 0.4271, "step": 48310 }, { "epoch": 2.8371792613469555, "grad_norm": 2.0484280586242676, "learning_rate": 8.946342164223076e-08, "loss": 0.3827, "step": 48320 }, { "epoch": 2.837766425929188, "grad_norm": 4.205353736877441, "learning_rate": 8.882125240366413e-08, "loss": 0.3946, "step": 48330 }, { "epoch": 2.8383535905114203, "grad_norm": 4.6382975578308105, "learning_rate": 8.818137556755702e-08, "loss": 0.4968, "step": 48340 }, { "epoch": 2.8389407550936525, "grad_norm": 8.190326690673828, "learning_rate": 8.754379143258607e-08, "loss": 0.4899, "step": 48350 }, { "epoch": 2.839527919675885, "grad_norm": 2.2001419067382812, "learning_rate": 8.690850029635878e-08, "loss": 0.2707, "step": 48360 }, { "epoch": 2.8401150842581178, "grad_norm": 3.7206034660339355, "learning_rate": 8.627550245541072e-08, "loss": 0.3977, "step": 48370 }, { "epoch": 2.84070224884035, "grad_norm": 5.48492431640625, "learning_rate": 8.564479820520777e-08, "loss": 0.3622, "step": 48380 }, { "epoch": 2.841289413422582, "grad_norm": 2.652886390686035, "learning_rate": 8.501638784014554e-08, "loss": 0.3227, "step": 48390 }, { "epoch": 2.8418765780048147, "grad_norm": 4.245568752288818, "learning_rate": 8.439027165354884e-08, "loss": 0.3943, "step": 48400 }, { "epoch": 2.8424637425870474, "grad_norm": 4.496824741363525, "learning_rate": 8.376644993767058e-08, "loss": 0.4199, "step": 48410 }, { "epoch": 2.8430509071692796, "grad_norm": 3.251836061477661, "learning_rate": 8.314492298369448e-08, "loss": 0.5523, "step": 48420 }, { "epoch": 2.8436380717515117, "grad_norm": 6.4525532722473145, "learning_rate": 8.252569108173125e-08, "loss": 0.3928, "step": 48430 }, { "epoch": 2.8442252363337444, "grad_norm": 3.4014554023742676, "learning_rate": 8.190875452082191e-08, "loss": 0.3093, "step": 48440 }, { "epoch": 2.844812400915977, "grad_norm": 4.544564723968506, "learning_rate": 8.129411358893502e-08, "loss": 0.3672, "step": 48450 }, { "epoch": 2.845399565498209, "grad_norm": 6.895676612854004, "learning_rate": 8.068176857296772e-08, "loss": 0.4103, "step": 48460 }, { "epoch": 2.8459867300804413, "grad_norm": 4.4675397872924805, "learning_rate": 8.007171975874639e-08, "loss": 0.3938, "step": 48470 }, { "epoch": 2.846573894662674, "grad_norm": 1.2315785884857178, "learning_rate": 7.946396743102436e-08, "loss": 0.3967, "step": 48480 }, { "epoch": 2.8471610592449066, "grad_norm": 3.235792398452759, "learning_rate": 7.88585118734847e-08, "loss": 0.3536, "step": 48490 }, { "epoch": 2.8477482238271388, "grad_norm": 9.106057167053223, "learning_rate": 7.82553533687358e-08, "loss": 0.3855, "step": 48500 }, { "epoch": 2.848335388409371, "grad_norm": 1.7196146249771118, "learning_rate": 7.765449219831634e-08, "loss": 0.399, "step": 48510 }, { "epoch": 2.8489225529916036, "grad_norm": 6.009435176849365, "learning_rate": 7.70559286426914e-08, "loss": 0.3426, "step": 48520 }, { "epoch": 2.849509717573836, "grad_norm": 11.426352500915527, "learning_rate": 7.645966298125362e-08, "loss": 0.3897, "step": 48530 }, { "epoch": 2.8500968821560684, "grad_norm": 7.503849029541016, "learning_rate": 7.586569549232426e-08, "loss": 0.5394, "step": 48540 }, { "epoch": 2.8506840467383006, "grad_norm": 7.8046698570251465, "learning_rate": 7.527402645314985e-08, "loss": 0.2811, "step": 48550 }, { "epoch": 2.851271211320533, "grad_norm": 5.457859992980957, "learning_rate": 7.46846561399056e-08, "loss": 0.4197, "step": 48560 }, { "epoch": 2.851858375902766, "grad_norm": 1.7791637182235718, "learning_rate": 7.409758482769314e-08, "loss": 0.2798, "step": 48570 }, { "epoch": 2.852445540484998, "grad_norm": 9.434212684631348, "learning_rate": 7.351281279054156e-08, "loss": 0.3883, "step": 48580 }, { "epoch": 2.85303270506723, "grad_norm": 9.828226089477539, "learning_rate": 7.293034030140644e-08, "loss": 0.4323, "step": 48590 }, { "epoch": 2.853619869649463, "grad_norm": 4.255585193634033, "learning_rate": 7.235016763216862e-08, "loss": 0.2915, "step": 48600 }, { "epoch": 2.8542070342316954, "grad_norm": 5.642820835113525, "learning_rate": 7.177229505363814e-08, "loss": 0.421, "step": 48610 }, { "epoch": 2.8547941988139276, "grad_norm": 2.5467684268951416, "learning_rate": 7.119672283554813e-08, "loss": 0.4539, "step": 48620 }, { "epoch": 2.8553813633961598, "grad_norm": 6.007460117340088, "learning_rate": 7.0623451246562e-08, "loss": 0.2681, "step": 48630 }, { "epoch": 2.8559685279783924, "grad_norm": 5.424546241760254, "learning_rate": 7.005248055426573e-08, "loss": 0.4019, "step": 48640 }, { "epoch": 2.8565556925606246, "grad_norm": 6.772651195526123, "learning_rate": 6.948381102517221e-08, "loss": 0.3107, "step": 48650 }, { "epoch": 2.857142857142857, "grad_norm": 4.1274614334106445, "learning_rate": 6.89174429247219e-08, "loss": 0.4397, "step": 48660 }, { "epoch": 2.8577300217250894, "grad_norm": 3.6735057830810547, "learning_rate": 6.835337651727891e-08, "loss": 0.4288, "step": 48670 }, { "epoch": 2.858317186307322, "grad_norm": 7.665927886962891, "learning_rate": 6.779161206613482e-08, "loss": 0.3827, "step": 48680 }, { "epoch": 2.858904350889554, "grad_norm": 17.812593460083008, "learning_rate": 6.72321498335049e-08, "loss": 0.5087, "step": 48690 }, { "epoch": 2.859491515471787, "grad_norm": 4.304188251495361, "learning_rate": 6.66749900805297e-08, "loss": 0.3919, "step": 48700 }, { "epoch": 2.860078680054019, "grad_norm": 1.3671303987503052, "learning_rate": 6.612013306727782e-08, "loss": 0.3073, "step": 48710 }, { "epoch": 2.8606658446362516, "grad_norm": 6.29978084564209, "learning_rate": 6.556757905274103e-08, "loss": 0.3345, "step": 48720 }, { "epoch": 2.861253009218484, "grad_norm": 3.608672857284546, "learning_rate": 6.50173282948352e-08, "loss": 0.5354, "step": 48730 }, { "epoch": 2.8618401738007164, "grad_norm": 3.988312005996704, "learning_rate": 6.446938105040268e-08, "loss": 0.4118, "step": 48740 }, { "epoch": 2.8624273383829486, "grad_norm": 2.255985736846924, "learning_rate": 6.392373757520997e-08, "loss": 0.4207, "step": 48750 }, { "epoch": 2.8630145029651812, "grad_norm": 3.975806474685669, "learning_rate": 6.338039812394892e-08, "loss": 0.3846, "step": 48760 }, { "epoch": 2.8636016675474134, "grad_norm": 7.969893932342529, "learning_rate": 6.283936295023441e-08, "loss": 0.4134, "step": 48770 }, { "epoch": 2.864188832129646, "grad_norm": 3.3776917457580566, "learning_rate": 6.230063230660721e-08, "loss": 0.3679, "step": 48780 }, { "epoch": 2.864775996711878, "grad_norm": 2.2373392581939697, "learning_rate": 6.176420644453229e-08, "loss": 0.3639, "step": 48790 }, { "epoch": 2.865363161294111, "grad_norm": 9.63841724395752, "learning_rate": 6.12300856143977e-08, "loss": 0.3217, "step": 48800 }, { "epoch": 2.865950325876343, "grad_norm": 2.922652006149292, "learning_rate": 6.06982700655162e-08, "loss": 0.4341, "step": 48810 }, { "epoch": 2.8665374904585756, "grad_norm": 2.726717472076416, "learning_rate": 6.016876004612592e-08, "loss": 0.482, "step": 48820 }, { "epoch": 2.867124655040808, "grad_norm": 7.229940414428711, "learning_rate": 5.964155580338637e-08, "loss": 0.5148, "step": 48830 }, { "epoch": 2.8677118196230404, "grad_norm": 5.856103420257568, "learning_rate": 5.9116657583381256e-08, "loss": 0.3504, "step": 48840 }, { "epoch": 2.8682989842052726, "grad_norm": 2.448648452758789, "learning_rate": 5.859406563112013e-08, "loss": 0.4059, "step": 48850 }, { "epoch": 2.8688861487875053, "grad_norm": 9.148752212524414, "learning_rate": 5.807378019053345e-08, "loss": 0.325, "step": 48860 }, { "epoch": 2.8694733133697374, "grad_norm": 3.7917468547821045, "learning_rate": 5.7555801504476375e-08, "loss": 0.4246, "step": 48870 }, { "epoch": 2.87006047795197, "grad_norm": 5.405700206756592, "learning_rate": 5.7040129814727175e-08, "loss": 0.3443, "step": 48880 }, { "epoch": 2.8706476425342022, "grad_norm": 3.0179903507232666, "learning_rate": 5.6526765361986645e-08, "loss": 0.4494, "step": 48890 }, { "epoch": 2.871234807116435, "grad_norm": 1.8399113416671753, "learning_rate": 5.601570838587977e-08, "loss": 0.4608, "step": 48900 }, { "epoch": 2.871821971698667, "grad_norm": 6.865592956542969, "learning_rate": 5.550695912495352e-08, "loss": 0.4548, "step": 48910 }, { "epoch": 2.8724091362808997, "grad_norm": 4.543923854827881, "learning_rate": 5.5000517816677926e-08, "loss": 0.4419, "step": 48920 }, { "epoch": 2.872996300863132, "grad_norm": 5.552177906036377, "learning_rate": 5.4496384697445556e-08, "loss": 0.4193, "step": 48930 }, { "epoch": 2.8735834654453645, "grad_norm": 4.935827732086182, "learning_rate": 5.399456000257208e-08, "loss": 0.4419, "step": 48940 }, { "epoch": 2.8741706300275967, "grad_norm": 3.757335662841797, "learning_rate": 5.3495043966295677e-08, "loss": 0.4125, "step": 48950 }, { "epoch": 2.8747577946098293, "grad_norm": 2.1335322856903076, "learning_rate": 5.2997836821776506e-08, "loss": 0.3622, "step": 48960 }, { "epoch": 2.8753449591920615, "grad_norm": 5.91879940032959, "learning_rate": 5.250293880109614e-08, "loss": 0.4646, "step": 48970 }, { "epoch": 2.875932123774294, "grad_norm": 4.841958999633789, "learning_rate": 5.20103501352609e-08, "loss": 0.3895, "step": 48980 }, { "epoch": 2.8765192883565263, "grad_norm": 2.3097238540649414, "learning_rate": 5.152007105419632e-08, "loss": 0.4691, "step": 48990 }, { "epoch": 2.877106452938759, "grad_norm": 5.976476192474365, "learning_rate": 5.103210178675155e-08, "loss": 0.3566, "step": 49000 }, { "epoch": 2.877693617520991, "grad_norm": 3.6110265254974365, "learning_rate": 5.054644256069774e-08, "loss": 0.4604, "step": 49010 }, { "epoch": 2.8782807821032237, "grad_norm": 3.092176675796509, "learning_rate": 5.0063093602725765e-08, "loss": 0.3635, "step": 49020 }, { "epoch": 2.878867946685456, "grad_norm": 2.321337938308716, "learning_rate": 4.958205513845071e-08, "loss": 0.4552, "step": 49030 }, { "epoch": 2.8794551112676885, "grad_norm": 10.347636222839355, "learning_rate": 4.910332739240797e-08, "loss": 0.4561, "step": 49040 }, { "epoch": 2.8800422758499207, "grad_norm": 2.5315115451812744, "learning_rate": 4.862691058805436e-08, "loss": 0.3524, "step": 49050 }, { "epoch": 2.8806294404321533, "grad_norm": 4.522789001464844, "learning_rate": 4.815280494776753e-08, "loss": 0.4097, "step": 49060 }, { "epoch": 2.8812166050143855, "grad_norm": 6.883718967437744, "learning_rate": 4.768101069284714e-08, "loss": 0.422, "step": 49070 }, { "epoch": 2.881803769596618, "grad_norm": 9.380254745483398, "learning_rate": 4.7211528043514254e-08, "loss": 0.4307, "step": 49080 }, { "epoch": 2.8823909341788503, "grad_norm": 2.8385016918182373, "learning_rate": 4.6744357218909686e-08, "loss": 0.5164, "step": 49090 }, { "epoch": 2.8829780987610825, "grad_norm": 3.1939966678619385, "learning_rate": 4.627949843709567e-08, "loss": 0.3868, "step": 49100 }, { "epoch": 2.883565263343315, "grad_norm": 8.8888578414917, "learning_rate": 4.5816951915056416e-08, "loss": 0.3543, "step": 49110 }, { "epoch": 2.8841524279255477, "grad_norm": 2.503753185272217, "learning_rate": 4.535671786869533e-08, "loss": 0.4623, "step": 49120 }, { "epoch": 2.88473959250778, "grad_norm": 2.6471381187438965, "learning_rate": 4.4898796512836685e-08, "loss": 0.4094, "step": 49130 }, { "epoch": 2.885326757090012, "grad_norm": 2.2808003425598145, "learning_rate": 4.444318806122616e-08, "loss": 0.2906, "step": 49140 }, { "epoch": 2.8859139216722447, "grad_norm": 9.332298278808594, "learning_rate": 4.3989892726528095e-08, "loss": 0.3068, "step": 49150 }, { "epoch": 2.8865010862544773, "grad_norm": 3.3689565658569336, "learning_rate": 4.353891072032934e-08, "loss": 0.4003, "step": 49160 }, { "epoch": 2.8870882508367095, "grad_norm": 1.311246395111084, "learning_rate": 4.309024225313541e-08, "loss": 0.4701, "step": 49170 }, { "epoch": 2.8876754154189417, "grad_norm": 3.059788942337036, "learning_rate": 4.26438875343721e-08, "loss": 0.4327, "step": 49180 }, { "epoch": 2.8882625800011743, "grad_norm": 2.800250291824341, "learning_rate": 4.219984677238609e-08, "loss": 0.3382, "step": 49190 }, { "epoch": 2.888849744583407, "grad_norm": 1.9048861265182495, "learning_rate": 4.17581201744427e-08, "loss": 0.3876, "step": 49200 }, { "epoch": 2.889436909165639, "grad_norm": 6.4452996253967285, "learning_rate": 4.1318707946727544e-08, "loss": 0.4958, "step": 49210 }, { "epoch": 2.8900240737478713, "grad_norm": 6.0214080810546875, "learning_rate": 4.0881610294346566e-08, "loss": 0.382, "step": 49220 }, { "epoch": 2.890611238330104, "grad_norm": 2.7728607654571533, "learning_rate": 4.0446827421325443e-08, "loss": 0.327, "step": 49230 }, { "epoch": 2.8911984029123365, "grad_norm": 4.0449981689453125, "learning_rate": 4.0014359530607394e-08, "loss": 0.3548, "step": 49240 }, { "epoch": 2.8917855674945687, "grad_norm": 1.431009292602539, "learning_rate": 3.9584206824057614e-08, "loss": 0.3386, "step": 49250 }, { "epoch": 2.892372732076801, "grad_norm": 6.06682014465332, "learning_rate": 3.9156369502458825e-08, "loss": 0.4284, "step": 49260 }, { "epoch": 2.8929598966590335, "grad_norm": 2.7682173252105713, "learning_rate": 3.873084776551406e-08, "loss": 0.3643, "step": 49270 }, { "epoch": 2.893547061241266, "grad_norm": 4.374297142028809, "learning_rate": 3.8307641811844984e-08, "loss": 0.4447, "step": 49280 }, { "epoch": 2.8941342258234983, "grad_norm": 3.394930124282837, "learning_rate": 3.7886751838992484e-08, "loss": 0.4142, "step": 49290 }, { "epoch": 2.8947213904057305, "grad_norm": 2.9882636070251465, "learning_rate": 3.746817804341607e-08, "loss": 0.3435, "step": 49300 }, { "epoch": 2.895308554987963, "grad_norm": 1.9783436059951782, "learning_rate": 3.705192062049445e-08, "loss": 0.4344, "step": 49310 }, { "epoch": 2.8958957195701958, "grad_norm": 5.5590739250183105, "learning_rate": 3.663797976452554e-08, "loss": 0.4896, "step": 49320 }, { "epoch": 2.896482884152428, "grad_norm": 2.8690176010131836, "learning_rate": 3.6226355668725345e-08, "loss": 0.3309, "step": 49330 }, { "epoch": 2.89707004873466, "grad_norm": 3.1963467597961426, "learning_rate": 3.581704852522849e-08, "loss": 0.3799, "step": 49340 }, { "epoch": 2.8976572133168927, "grad_norm": 2.543874740600586, "learning_rate": 3.541005852508772e-08, "loss": 0.4308, "step": 49350 }, { "epoch": 2.8982443778991254, "grad_norm": 3.7210633754730225, "learning_rate": 3.500538585827551e-08, "loss": 0.4354, "step": 49360 }, { "epoch": 2.8988315424813575, "grad_norm": 2.478198289871216, "learning_rate": 3.4603030713681875e-08, "loss": 0.3507, "step": 49370 }, { "epoch": 2.8994187070635897, "grad_norm": 3.240339994430542, "learning_rate": 3.420299327911492e-08, "loss": 0.3309, "step": 49380 }, { "epoch": 2.9000058716458224, "grad_norm": 3.4240763187408447, "learning_rate": 3.380527374130027e-08, "loss": 0.2413, "step": 49390 }, { "epoch": 2.900593036228055, "grad_norm": 6.912575721740723, "learning_rate": 3.3409872285883326e-08, "loss": 0.4638, "step": 49400 }, { "epoch": 2.901180200810287, "grad_norm": 3.6237666606903076, "learning_rate": 3.301678909742645e-08, "loss": 0.3753, "step": 49410 }, { "epoch": 2.9017673653925193, "grad_norm": 3.2194013595581055, "learning_rate": 3.262602435940954e-08, "loss": 0.391, "step": 49420 }, { "epoch": 2.902354529974752, "grad_norm": 1.3947303295135498, "learning_rate": 3.223757825423113e-08, "loss": 0.404, "step": 49430 }, { "epoch": 2.9029416945569846, "grad_norm": 2.0163862705230713, "learning_rate": 3.18514509632073e-08, "loss": 0.3626, "step": 49440 }, { "epoch": 2.9035288591392168, "grad_norm": 3.5880062580108643, "learning_rate": 3.1467642666571095e-08, "loss": 0.5591, "step": 49450 }, { "epoch": 2.904116023721449, "grad_norm": 2.992309808731079, "learning_rate": 3.1086153543474216e-08, "loss": 0.3399, "step": 49460 }, { "epoch": 2.9047031883036816, "grad_norm": 11.792913436889648, "learning_rate": 3.070698377198478e-08, "loss": 0.4303, "step": 49470 }, { "epoch": 2.905290352885914, "grad_norm": 3.800711154937744, "learning_rate": 3.033013352908898e-08, "loss": 0.4303, "step": 49480 }, { "epoch": 2.9058775174681464, "grad_norm": 1.1604187488555908, "learning_rate": 2.9955602990690006e-08, "loss": 0.3069, "step": 49490 }, { "epoch": 2.9064646820503786, "grad_norm": 3.974208116531372, "learning_rate": 2.9583392331608006e-08, "loss": 0.4697, "step": 49500 }, { "epoch": 2.907051846632611, "grad_norm": 2.2174248695373535, "learning_rate": 2.921350172558124e-08, "loss": 0.4999, "step": 49510 }, { "epoch": 2.9076390112148434, "grad_norm": 2.411625862121582, "learning_rate": 2.884593134526381e-08, "loss": 0.4589, "step": 49520 }, { "epoch": 2.908226175797076, "grad_norm": 6.801300048828125, "learning_rate": 2.8480681362227925e-08, "loss": 0.5759, "step": 49530 }, { "epoch": 2.908813340379308, "grad_norm": 2.424172878265381, "learning_rate": 2.8117751946962203e-08, "loss": 0.3644, "step": 49540 }, { "epoch": 2.909400504961541, "grad_norm": 2.404273509979248, "learning_rate": 2.7757143268871687e-08, "loss": 0.444, "step": 49550 }, { "epoch": 2.909987669543773, "grad_norm": 3.497680187225342, "learning_rate": 2.7398855496278966e-08, "loss": 0.5452, "step": 49560 }, { "epoch": 2.9105748341260056, "grad_norm": 1.3455703258514404, "learning_rate": 2.7042888796422474e-08, "loss": 0.4629, "step": 49570 }, { "epoch": 2.9111619987082378, "grad_norm": 2.3377134799957275, "learning_rate": 2.6689243335457638e-08, "loss": 0.5173, "step": 49580 }, { "epoch": 2.9117491632904704, "grad_norm": 3.692033290863037, "learning_rate": 2.6337919278456302e-08, "loss": 0.4097, "step": 49590 }, { "epoch": 2.9123363278727026, "grad_norm": 9.547811508178711, "learning_rate": 2.5988916789407292e-08, "loss": 0.4645, "step": 49600 }, { "epoch": 2.912923492454935, "grad_norm": 2.9649908542633057, "learning_rate": 2.5642236031215296e-08, "loss": 0.4177, "step": 49610 }, { "epoch": 2.9135106570371674, "grad_norm": 5.120396137237549, "learning_rate": 2.5297877165700314e-08, "loss": 0.3231, "step": 49620 }, { "epoch": 2.9140978216194, "grad_norm": 3.9290921688079834, "learning_rate": 2.4955840353600992e-08, "loss": 0.3072, "step": 49630 }, { "epoch": 2.914684986201632, "grad_norm": 2.706728935241699, "learning_rate": 2.461612575456962e-08, "loss": 0.4913, "step": 49640 }, { "epoch": 2.915272150783865, "grad_norm": 3.921992301940918, "learning_rate": 2.4278733527176024e-08, "loss": 0.3381, "step": 49650 }, { "epoch": 2.915859315366097, "grad_norm": 8.346675872802734, "learning_rate": 2.3943663828905338e-08, "loss": 0.3143, "step": 49660 }, { "epoch": 2.9164464799483296, "grad_norm": 2.5640716552734375, "learning_rate": 2.3610916816159124e-08, "loss": 0.4549, "step": 49670 }, { "epoch": 2.917033644530562, "grad_norm": 1.5315781831741333, "learning_rate": 2.3280492644254248e-08, "loss": 0.3849, "step": 49680 }, { "epoch": 2.9176208091127944, "grad_norm": 3.047182559967041, "learning_rate": 2.295239146742345e-08, "loss": 0.3682, "step": 49690 }, { "epoch": 2.9182079736950266, "grad_norm": 3.3516271114349365, "learning_rate": 2.262661343881589e-08, "loss": 0.3076, "step": 49700 }, { "epoch": 2.9187951382772592, "grad_norm": 7.132715225219727, "learning_rate": 2.230315871049493e-08, "loss": 0.3805, "step": 49710 }, { "epoch": 2.9193823028594914, "grad_norm": 2.665755033493042, "learning_rate": 2.198202743344091e-08, "loss": 0.5204, "step": 49720 }, { "epoch": 2.919969467441724, "grad_norm": 3.9981677532196045, "learning_rate": 2.1663219757548926e-08, "loss": 0.3566, "step": 49730 }, { "epoch": 2.920556632023956, "grad_norm": 3.5649569034576416, "learning_rate": 2.1346735831629383e-08, "loss": 0.4917, "step": 49740 }, { "epoch": 2.921143796606189, "grad_norm": 5.560098171234131, "learning_rate": 2.1032575803408005e-08, "loss": 0.3657, "step": 49750 }, { "epoch": 2.921730961188421, "grad_norm": 2.0388338565826416, "learning_rate": 2.0720739819526936e-08, "loss": 0.467, "step": 49760 }, { "epoch": 2.9223181257706536, "grad_norm": 2.717149496078491, "learning_rate": 2.0411228025541408e-08, "loss": 0.4015, "step": 49770 }, { "epoch": 2.922905290352886, "grad_norm": 12.077221870422363, "learning_rate": 2.0104040565924188e-08, "loss": 0.3558, "step": 49780 }, { "epoch": 2.9234924549351184, "grad_norm": 2.5205078125, "learning_rate": 1.9799177584061134e-08, "loss": 0.363, "step": 49790 }, { "epoch": 2.9240796195173506, "grad_norm": 1.6347932815551758, "learning_rate": 1.949663922225342e-08, "loss": 0.396, "step": 49800 }, { "epoch": 2.9246667840995832, "grad_norm": 4.939041614532471, "learning_rate": 1.9196425621718638e-08, "loss": 0.4158, "step": 49810 }, { "epoch": 2.9252539486818154, "grad_norm": 6.8007707595825195, "learning_rate": 1.8898536922587473e-08, "loss": 0.3983, "step": 49820 }, { "epoch": 2.925841113264048, "grad_norm": 6.139800071716309, "learning_rate": 1.8602973263906478e-08, "loss": 0.4547, "step": 49830 }, { "epoch": 2.9264282778462802, "grad_norm": 2.046860694885254, "learning_rate": 1.83097347836364e-08, "loss": 0.3905, "step": 49840 }, { "epoch": 2.927015442428513, "grad_norm": 2.7433173656463623, "learning_rate": 1.8018821618653313e-08, "loss": 0.3823, "step": 49850 }, { "epoch": 2.927602607010745, "grad_norm": 2.2007083892822266, "learning_rate": 1.773023390474693e-08, "loss": 0.3467, "step": 49860 }, { "epoch": 2.9281897715929777, "grad_norm": 3.633848190307617, "learning_rate": 1.7443971776621717e-08, "loss": 0.4467, "step": 49870 }, { "epoch": 2.92877693617521, "grad_norm": 2.800995349884033, "learning_rate": 1.7160035367898008e-08, "loss": 0.3372, "step": 49880 }, { "epoch": 2.9293641007574425, "grad_norm": 8.077489852905273, "learning_rate": 1.687842481110813e-08, "loss": 0.4452, "step": 49890 }, { "epoch": 2.9299512653396746, "grad_norm": 2.60117244720459, "learning_rate": 1.6599140237700818e-08, "loss": 0.4713, "step": 49900 }, { "epoch": 2.9305384299219073, "grad_norm": 2.778047800064087, "learning_rate": 1.6322181778039014e-08, "loss": 0.319, "step": 49910 }, { "epoch": 2.9311255945041395, "grad_norm": 4.096695423126221, "learning_rate": 1.6047549561398202e-08, "loss": 0.3817, "step": 49920 }, { "epoch": 2.9317127590863716, "grad_norm": 10.991368293762207, "learning_rate": 1.5775243715969725e-08, "loss": 0.5081, "step": 49930 }, { "epoch": 2.9322999236686043, "grad_norm": 6.174489974975586, "learning_rate": 1.550526436885802e-08, "loss": 0.3984, "step": 49940 }, { "epoch": 2.932887088250837, "grad_norm": 2.4704527854919434, "learning_rate": 1.5237611646082284e-08, "loss": 0.3228, "step": 49950 }, { "epoch": 2.933474252833069, "grad_norm": 36.436885833740234, "learning_rate": 1.497228567257536e-08, "loss": 0.377, "step": 49960 }, { "epoch": 2.9340614174153012, "grad_norm": 3.4038798809051514, "learning_rate": 1.470928657218429e-08, "loss": 0.3334, "step": 49970 }, { "epoch": 2.934648581997534, "grad_norm": 6.639990329742432, "learning_rate": 1.4448614467668654e-08, "loss": 0.2925, "step": 49980 }, { "epoch": 2.9352357465797665, "grad_norm": 2.848494529724121, "learning_rate": 1.4190269480704455e-08, "loss": 0.2916, "step": 49990 }, { "epoch": 2.9358229111619987, "grad_norm": 7.689738750457764, "learning_rate": 1.3934251731879678e-08, "loss": 0.5371, "step": 50000 }, { "epoch": 2.936410075744231, "grad_norm": 6.398970603942871, "learning_rate": 1.3680561340695398e-08, "loss": 0.4559, "step": 50010 }, { "epoch": 2.9369972403264635, "grad_norm": 1.7140705585479736, "learning_rate": 1.3429198425568557e-08, "loss": 0.3632, "step": 50020 }, { "epoch": 2.937584404908696, "grad_norm": 3.0798637866973877, "learning_rate": 1.3180163103827525e-08, "loss": 0.2829, "step": 50030 }, { "epoch": 2.9381715694909283, "grad_norm": 2.26163911819458, "learning_rate": 1.2933455491714875e-08, "loss": 0.3292, "step": 50040 }, { "epoch": 2.9387587340731605, "grad_norm": 2.956986665725708, "learning_rate": 1.2689075704387931e-08, "loss": 0.3846, "step": 50050 }, { "epoch": 2.939345898655393, "grad_norm": 2.946484088897705, "learning_rate": 1.2447023855915452e-08, "loss": 0.4099, "step": 50060 }, { "epoch": 2.9399330632376257, "grad_norm": 1.708966612815857, "learning_rate": 1.2207300059281501e-08, "loss": 0.3842, "step": 50070 }, { "epoch": 2.940520227819858, "grad_norm": 1.3180679082870483, "learning_rate": 1.1969904426381574e-08, "loss": 0.361, "step": 50080 }, { "epoch": 2.94110739240209, "grad_norm": 2.9985523223876953, "learning_rate": 1.1734837068025917e-08, "loss": 0.3536, "step": 50090 }, { "epoch": 2.9416945569843227, "grad_norm": 2.47684907913208, "learning_rate": 1.150209809393732e-08, "loss": 0.4441, "step": 50100 }, { "epoch": 2.9422817215665553, "grad_norm": 3.619563579559326, "learning_rate": 1.1271687612752213e-08, "loss": 0.2623, "step": 50110 }, { "epoch": 2.9428688861487875, "grad_norm": 1.2511301040649414, "learning_rate": 1.1043605732019569e-08, "loss": 0.4122, "step": 50120 }, { "epoch": 2.9434560507310197, "grad_norm": 2.770665168762207, "learning_rate": 1.0817852558201448e-08, "loss": 0.4249, "step": 50130 }, { "epoch": 2.9440432153132523, "grad_norm": 5.243077278137207, "learning_rate": 1.0594428196673001e-08, "loss": 0.4786, "step": 50140 }, { "epoch": 2.944630379895485, "grad_norm": 3.3630950450897217, "learning_rate": 1.0373332751723585e-08, "loss": 0.3949, "step": 50150 }, { "epoch": 2.945217544477717, "grad_norm": 6.420507431030273, "learning_rate": 1.0154566326553983e-08, "loss": 0.3464, "step": 50160 }, { "epoch": 2.9458047090599493, "grad_norm": 3.637948513031006, "learning_rate": 9.93812902327751e-09, "loss": 0.2794, "step": 50170 }, { "epoch": 2.946391873642182, "grad_norm": 7.7607741355896, "learning_rate": 9.724020942922241e-09, "loss": 0.4086, "step": 50180 }, { "epoch": 2.9469790382244145, "grad_norm": 3.834477186203003, "learning_rate": 9.512242185427679e-09, "loss": 0.2128, "step": 50190 }, { "epoch": 2.9475662028066467, "grad_norm": 4.644526481628418, "learning_rate": 9.302792849645859e-09, "loss": 0.5503, "step": 50200 }, { "epoch": 2.948153367388879, "grad_norm": 2.0185015201568604, "learning_rate": 9.095673033341913e-09, "loss": 0.4191, "step": 50210 }, { "epoch": 2.9487405319711115, "grad_norm": 2.7599682807922363, "learning_rate": 8.890882833193503e-09, "loss": 0.4928, "step": 50220 }, { "epoch": 2.949327696553344, "grad_norm": 3.7981302738189697, "learning_rate": 8.688422344791946e-09, "loss": 0.3674, "step": 50230 }, { "epoch": 2.9499148611355763, "grad_norm": 4.238650798797607, "learning_rate": 8.48829166263998e-09, "loss": 0.3014, "step": 50240 }, { "epoch": 2.9505020257178085, "grad_norm": 3.7559871673583984, "learning_rate": 8.290490880152325e-09, "loss": 0.4165, "step": 50250 }, { "epoch": 2.951089190300041, "grad_norm": 2.4906845092773438, "learning_rate": 8.095020089657346e-09, "loss": 0.4392, "step": 50260 }, { "epoch": 2.9516763548822738, "grad_norm": 6.298901557922363, "learning_rate": 7.901879382395394e-09, "loss": 0.4155, "step": 50270 }, { "epoch": 2.952263519464506, "grad_norm": 4.212754726409912, "learning_rate": 7.711068848518799e-09, "loss": 0.456, "step": 50280 }, { "epoch": 2.952850684046738, "grad_norm": 2.3506853580474854, "learning_rate": 7.522588577093538e-09, "loss": 0.4753, "step": 50290 }, { "epoch": 2.9534378486289707, "grad_norm": 4.201270580291748, "learning_rate": 7.336438656096456e-09, "loss": 0.4078, "step": 50300 }, { "epoch": 2.9540250132112034, "grad_norm": 7.027324676513672, "learning_rate": 7.152619172417497e-09, "loss": 0.4547, "step": 50310 }, { "epoch": 2.9546121777934355, "grad_norm": 2.360142946243286, "learning_rate": 6.971130211858024e-09, "loss": 0.5189, "step": 50320 }, { "epoch": 2.9551993423756677, "grad_norm": 2.873462438583374, "learning_rate": 6.791971859131941e-09, "loss": 0.3626, "step": 50330 }, { "epoch": 2.9557865069579004, "grad_norm": 3.8957386016845703, "learning_rate": 6.615144197866241e-09, "loss": 0.2631, "step": 50340 }, { "epoch": 2.956373671540133, "grad_norm": 8.733673095703125, "learning_rate": 6.440647310598791e-09, "loss": 0.3747, "step": 50350 }, { "epoch": 2.956960836122365, "grad_norm": 7.283787727355957, "learning_rate": 6.268481278779992e-09, "loss": 0.4675, "step": 50360 }, { "epoch": 2.9575480007045973, "grad_norm": 4.261548042297363, "learning_rate": 6.098646182771673e-09, "loss": 0.5118, "step": 50370 }, { "epoch": 2.95813516528683, "grad_norm": 8.681032180786133, "learning_rate": 5.931142101848197e-09, "loss": 0.3891, "step": 50380 }, { "epoch": 2.958722329869062, "grad_norm": 3.918628454208374, "learning_rate": 5.765969114196468e-09, "loss": 0.4578, "step": 50390 }, { "epoch": 2.9593094944512948, "grad_norm": 4.615451335906982, "learning_rate": 5.603127296914812e-09, "loss": 0.4123, "step": 50400 }, { "epoch": 2.959896659033527, "grad_norm": 3.1698405742645264, "learning_rate": 5.442616726012429e-09, "loss": 0.4653, "step": 50410 }, { "epoch": 2.9604838236157596, "grad_norm": 9.367932319641113, "learning_rate": 5.284437476411608e-09, "loss": 0.3407, "step": 50420 }, { "epoch": 2.9610709881979917, "grad_norm": 4.370306491851807, "learning_rate": 5.128589621946623e-09, "loss": 0.321, "step": 50430 }, { "epoch": 2.9616581527802244, "grad_norm": 3.1897809505462646, "learning_rate": 4.975073235361505e-09, "loss": 0.4292, "step": 50440 }, { "epoch": 2.9622453173624566, "grad_norm": 3.763944625854492, "learning_rate": 4.8238883883150455e-09, "loss": 0.3362, "step": 50450 }, { "epoch": 2.962832481944689, "grad_norm": 2.2963502407073975, "learning_rate": 4.67503515137524e-09, "loss": 0.3122, "step": 50460 }, { "epoch": 2.9634196465269214, "grad_norm": 4.33125114440918, "learning_rate": 4.528513594022621e-09, "loss": 0.5076, "step": 50470 }, { "epoch": 2.964006811109154, "grad_norm": 7.221843719482422, "learning_rate": 4.384323784650257e-09, "loss": 0.4528, "step": 50480 }, { "epoch": 2.964593975691386, "grad_norm": 3.0238096714019775, "learning_rate": 4.242465790560979e-09, "loss": 0.4148, "step": 50490 }, { "epoch": 2.965181140273619, "grad_norm": 2.2194886207580566, "learning_rate": 4.102939677970707e-09, "loss": 0.3677, "step": 50500 }, { "epoch": 2.965768304855851, "grad_norm": 1.856473445892334, "learning_rate": 3.965745512005681e-09, "loss": 0.4846, "step": 50510 }, { "epoch": 2.9663554694380836, "grad_norm": 7.636158466339111, "learning_rate": 3.830883356705783e-09, "loss": 0.4426, "step": 50520 }, { "epoch": 2.9669426340203158, "grad_norm": 3.8847696781158447, "learning_rate": 3.6983532750195504e-09, "loss": 0.4093, "step": 50530 }, { "epoch": 2.9675297986025484, "grad_norm": 4.90143346786499, "learning_rate": 3.568155328808609e-09, "loss": 0.4295, "step": 50540 }, { "epoch": 2.9681169631847806, "grad_norm": 5.943915367126465, "learning_rate": 3.4402895788465675e-09, "loss": 0.5124, "step": 50550 }, { "epoch": 2.968704127767013, "grad_norm": 6.471635818481445, "learning_rate": 3.314756084816795e-09, "loss": 0.387, "step": 50560 }, { "epoch": 2.9692912923492454, "grad_norm": 2.4506523609161377, "learning_rate": 3.1915549053151972e-09, "loss": 0.4081, "step": 50570 }, { "epoch": 2.969878456931478, "grad_norm": 1.888348937034607, "learning_rate": 3.070686097848552e-09, "loss": 0.3985, "step": 50580 }, { "epoch": 2.97046562151371, "grad_norm": 3.9682023525238037, "learning_rate": 2.9521497188350625e-09, "loss": 0.3162, "step": 50590 }, { "epoch": 2.971052786095943, "grad_norm": 5.620306015014648, "learning_rate": 2.835945823604358e-09, "loss": 0.3806, "step": 50600 }, { "epoch": 2.971639950678175, "grad_norm": 4.171652793884277, "learning_rate": 2.722074466397495e-09, "loss": 0.4531, "step": 50610 }, { "epoch": 2.9722271152604076, "grad_norm": 2.545320749282837, "learning_rate": 2.610535700365846e-09, "loss": 0.4176, "step": 50620 }, { "epoch": 2.97281427984264, "grad_norm": 6.397970199584961, "learning_rate": 2.501329577572764e-09, "loss": 0.4282, "step": 50630 }, { "epoch": 2.9734014444248724, "grad_norm": 6.484560966491699, "learning_rate": 2.3944561489930296e-09, "loss": 0.3737, "step": 50640 }, { "epoch": 2.9739886090071046, "grad_norm": 1.3701145648956299, "learning_rate": 2.2899154645117383e-09, "loss": 0.3657, "step": 50650 }, { "epoch": 2.9745757735893372, "grad_norm": 7.3219828605651855, "learning_rate": 2.187707572926523e-09, "loss": 0.3487, "step": 50660 }, { "epoch": 2.9751629381715694, "grad_norm": 8.547548294067383, "learning_rate": 2.0878325219436667e-09, "loss": 0.4349, "step": 50670 }, { "epoch": 2.975750102753802, "grad_norm": 3.4046530723571777, "learning_rate": 1.9902903581831e-09, "loss": 0.5658, "step": 50680 }, { "epoch": 2.976337267336034, "grad_norm": 23.953418731689453, "learning_rate": 1.895081127175069e-09, "loss": 0.3339, "step": 50690 }, { "epoch": 2.976924431918267, "grad_norm": 12.834596633911133, "learning_rate": 1.8022048733595808e-09, "loss": 0.4227, "step": 50700 }, { "epoch": 2.977511596500499, "grad_norm": 2.312685251235962, "learning_rate": 1.7116616400897345e-09, "loss": 0.4476, "step": 50710 }, { "epoch": 2.9780987610827316, "grad_norm": 7.0056939125061035, "learning_rate": 1.6234514696278346e-09, "loss": 0.3854, "step": 50720 }, { "epoch": 2.978685925664964, "grad_norm": 8.77929401397705, "learning_rate": 1.5375744031487228e-09, "loss": 0.4075, "step": 50730 }, { "epoch": 2.9792730902471964, "grad_norm": 3.8908486366271973, "learning_rate": 1.4540304807364458e-09, "loss": 0.398, "step": 50740 }, { "epoch": 2.9798602548294286, "grad_norm": 2.434617280960083, "learning_rate": 1.3728197413881428e-09, "loss": 0.3414, "step": 50750 }, { "epoch": 2.9804474194116612, "grad_norm": 2.511126756668091, "learning_rate": 1.2939422230096033e-09, "loss": 0.3672, "step": 50760 }, { "epoch": 2.9810345839938934, "grad_norm": 1.9848135709762573, "learning_rate": 1.2173979624191534e-09, "loss": 0.4988, "step": 50770 }, { "epoch": 2.981621748576126, "grad_norm": 8.477264404296875, "learning_rate": 1.1431869953459906e-09, "loss": 0.4298, "step": 50780 }, { "epoch": 2.9822089131583582, "grad_norm": 11.847114562988281, "learning_rate": 1.0713093564290732e-09, "loss": 0.3847, "step": 50790 }, { "epoch": 2.9827960777405904, "grad_norm": 2.2989461421966553, "learning_rate": 1.0017650792193412e-09, "loss": 0.2688, "step": 50800 }, { "epoch": 2.983383242322823, "grad_norm": 10.094685554504395, "learning_rate": 9.345541961780502e-10, "loss": 0.3431, "step": 50810 }, { "epoch": 2.9839704069050557, "grad_norm": 8.261369705200195, "learning_rate": 8.696767386773274e-10, "loss": 0.4098, "step": 50820 }, { "epoch": 2.984557571487288, "grad_norm": 2.1612517833709717, "learning_rate": 8.071327370001714e-10, "loss": 0.2749, "step": 50830 }, { "epoch": 2.98514473606952, "grad_norm": 5.280360221862793, "learning_rate": 7.469222203398963e-10, "loss": 0.4143, "step": 50840 }, { "epoch": 2.9857319006517526, "grad_norm": 2.3151824474334717, "learning_rate": 6.89045216801798e-10, "loss": 0.4124, "step": 50850 }, { "epoch": 2.9863190652339853, "grad_norm": 9.957106590270996, "learning_rate": 6.335017534014887e-10, "loss": 0.3506, "step": 50860 }, { "epoch": 2.9869062298162175, "grad_norm": 8.792963027954102, "learning_rate": 5.802918560643411e-10, "loss": 0.3498, "step": 50870 }, { "epoch": 2.9874933943984496, "grad_norm": 4.40753173828125, "learning_rate": 5.294155496277098e-10, "loss": 0.4084, "step": 50880 }, { "epoch": 2.9880805589806823, "grad_norm": 2.2294137477874756, "learning_rate": 4.808728578387101e-10, "loss": 0.4442, "step": 50890 }, { "epoch": 2.988667723562915, "grad_norm": 2.063739776611328, "learning_rate": 4.346638033564388e-10, "loss": 0.3173, "step": 50900 }, { "epoch": 2.989254888145147, "grad_norm": 2.6464779376983643, "learning_rate": 3.907884077497537e-10, "loss": 0.3169, "step": 50910 }, { "epoch": 2.9898420527273792, "grad_norm": 4.137442111968994, "learning_rate": 3.4924669149893895e-10, "loss": 0.4088, "step": 50920 }, { "epoch": 2.990429217309612, "grad_norm": 4.407742500305176, "learning_rate": 3.1003867399348465e-10, "loss": 0.4317, "step": 50930 }, { "epoch": 2.9910163818918445, "grad_norm": 7.7372355461120605, "learning_rate": 2.7316437353541724e-10, "loss": 0.554, "step": 50940 }, { "epoch": 2.9916035464740767, "grad_norm": 5.642074108123779, "learning_rate": 2.3862380733596926e-10, "loss": 0.384, "step": 50950 }, { "epoch": 2.992190711056309, "grad_norm": 1.6467161178588867, "learning_rate": 2.0641699151835449e-10, "loss": 0.3214, "step": 50960 }, { "epoch": 2.9927778756385415, "grad_norm": 2.3429882526397705, "learning_rate": 1.7654394111554784e-10, "loss": 0.3312, "step": 50970 }, { "epoch": 2.993365040220774, "grad_norm": 6.82414436340332, "learning_rate": 1.490046700713954e-10, "loss": 0.4026, "step": 50980 }, { "epoch": 2.9939522048030063, "grad_norm": 2.8197221755981445, "learning_rate": 1.2379919124116958e-10, "loss": 0.4778, "step": 50990 }, { "epoch": 2.9945393693852385, "grad_norm": 2.2440719604492188, "learning_rate": 1.009275163887935e-10, "loss": 0.4096, "step": 51000 }, { "epoch": 2.995126533967471, "grad_norm": 2.2345075607299805, "learning_rate": 8.038965619128203e-11, "loss": 0.3925, "step": 51010 }, { "epoch": 2.9957136985497037, "grad_norm": 1.900291085243225, "learning_rate": 6.218562023485586e-11, "loss": 0.4834, "step": 51020 }, { "epoch": 2.996300863131936, "grad_norm": 2.9142568111419678, "learning_rate": 4.63154170160518e-11, "loss": 0.3401, "step": 51030 }, { "epoch": 2.996888027714168, "grad_norm": 2.088899850845337, "learning_rate": 3.2779053943388094e-11, "loss": 0.3474, "step": 51040 }, { "epoch": 2.9974751922964007, "grad_norm": 1.4059855937957764, "learning_rate": 2.1576537334588864e-11, "loss": 0.29, "step": 51050 }, { "epoch": 2.9980623568786333, "grad_norm": 5.014263153076172, "learning_rate": 1.2707872419359668e-11, "loss": 0.393, "step": 51060 }, { "epoch": 2.9986495214608655, "grad_norm": 3.066333532333374, "learning_rate": 6.173063337167051e-12, "loss": 0.4214, "step": 51070 }, { "epoch": 2.9992366860430977, "grad_norm": 3.870379686355591, "learning_rate": 1.9721131383487745e-12, "loss": 0.3393, "step": 51080 }, { "epoch": 2.9998238506253303, "grad_norm": 2.1822891235351562, "learning_rate": 1.0502378300358829e-13, "loss": 0.4562, "step": 51090 }, { "epoch": 3.0, "step": 51093, "total_flos": 1.1844207027075154e+19, "train_loss": 0.479163933511447, "train_runtime": 44057.776, "train_samples_per_second": 4.639, "train_steps_per_second": 1.16 } ], "logging_steps": 10, "max_steps": 51093, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1844207027075154e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }