{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1264584434721976, "eval_steps": 500, "global_step": 90000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.4050938163577509e-05, "grad_norm": 2.420112133026123, "learning_rate": 3.793733314598848e-08, "loss": 10.5041, "step": 10 }, { "epoch": 2.8101876327155018e-05, "grad_norm": 2.226503849029541, "learning_rate": 8.008992553042011e-08, "loss": 10.4971, "step": 20 }, { "epoch": 4.215281449073253e-05, "grad_norm": 2.459202527999878, "learning_rate": 1.2224251791485176e-07, "loss": 10.5017, "step": 30 }, { "epoch": 5.6203752654310035e-05, "grad_norm": 2.5928807258605957, "learning_rate": 1.6439511029928338e-07, "loss": 10.5044, "step": 40 }, { "epoch": 7.025469081788755e-05, "grad_norm": 2.4456496238708496, "learning_rate": 2.0654770268371502e-07, "loss": 10.4912, "step": 50 }, { "epoch": 8.430562898146506e-05, "grad_norm": 3.4608006477355957, "learning_rate": 2.4870029506814667e-07, "loss": 10.5029, "step": 60 }, { "epoch": 9.835656714504256e-05, "grad_norm": 2.387044668197632, "learning_rate": 2.908528874525783e-07, "loss": 10.4957, "step": 70 }, { "epoch": 0.00011240750530862007, "grad_norm": 2.4745988845825195, "learning_rate": 3.3300547983700995e-07, "loss": 10.503, "step": 80 }, { "epoch": 0.00012645844347219758, "grad_norm": 3.1475226879119873, "learning_rate": 3.751580722214416e-07, "loss": 10.4968, "step": 90 }, { "epoch": 0.0001405093816357751, "grad_norm": 2.5408034324645996, "learning_rate": 4.173106646058732e-07, "loss": 10.4923, "step": 100 }, { "epoch": 0.0001545603197993526, "grad_norm": 2.2912540435791016, "learning_rate": 4.594632569903049e-07, "loss": 10.4942, "step": 110 }, { "epoch": 0.0001686112579629301, "grad_norm": 2.3079867362976074, "learning_rate": 5.016158493747365e-07, "loss": 10.4897, "step": 120 }, { "epoch": 0.00018266219612650763, "grad_norm": 2.3974714279174805, "learning_rate": 5.437684417591682e-07, "loss": 10.4806, "step": 130 }, { "epoch": 0.00019671313429008513, "grad_norm": 2.180088758468628, "learning_rate": 5.859210341435998e-07, "loss": 10.4768, "step": 140 }, { "epoch": 0.00021076407245366265, "grad_norm": 2.415879249572754, "learning_rate": 6.280736265280315e-07, "loss": 10.4835, "step": 150 }, { "epoch": 0.00022481501061724014, "grad_norm": 2.506563425064087, "learning_rate": 6.70226218912463e-07, "loss": 10.4767, "step": 160 }, { "epoch": 0.00023886594878081766, "grad_norm": 2.7735683917999268, "learning_rate": 7.123788112968946e-07, "loss": 10.4738, "step": 170 }, { "epoch": 0.00025291688694439516, "grad_norm": 2.5280656814575195, "learning_rate": 7.545314036813263e-07, "loss": 10.4746, "step": 180 }, { "epoch": 0.0002669678251079727, "grad_norm": 2.373305559158325, "learning_rate": 7.966839960657579e-07, "loss": 10.4709, "step": 190 }, { "epoch": 0.0002810187632715502, "grad_norm": 2.3311121463775635, "learning_rate": 8.388365884501895e-07, "loss": 10.4571, "step": 200 }, { "epoch": 0.0002950697014351277, "grad_norm": 2.61584210395813, "learning_rate": 8.809891808346213e-07, "loss": 10.4502, "step": 210 }, { "epoch": 0.0003091206395987052, "grad_norm": 2.391726016998291, "learning_rate": 9.231417732190529e-07, "loss": 10.4474, "step": 220 }, { "epoch": 0.0003231715777622827, "grad_norm": 2.519890785217285, "learning_rate": 9.652943656034844e-07, "loss": 10.4378, "step": 230 }, { "epoch": 0.0003372225159258602, "grad_norm": 2.557464599609375, "learning_rate": 1.0074469579879162e-06, "loss": 10.4381, "step": 240 }, { "epoch": 0.00035127345408943775, "grad_norm": 2.5819175243377686, "learning_rate": 1.0495995503723478e-06, "loss": 10.4289, "step": 250 }, { "epoch": 0.00036532439225301527, "grad_norm": 2.536975622177124, "learning_rate": 1.0917521427567794e-06, "loss": 10.4208, "step": 260 }, { "epoch": 0.00037937533041659273, "grad_norm": 2.575305700302124, "learning_rate": 1.1339047351412112e-06, "loss": 10.4069, "step": 270 }, { "epoch": 0.00039342626858017025, "grad_norm": 2.553637981414795, "learning_rate": 1.1760573275256428e-06, "loss": 10.3983, "step": 280 }, { "epoch": 0.0004074772067437478, "grad_norm": 2.40779709815979, "learning_rate": 1.2182099199100744e-06, "loss": 10.3975, "step": 290 }, { "epoch": 0.0004215281449073253, "grad_norm": 2.8702962398529053, "learning_rate": 1.260362512294506e-06, "loss": 10.3814, "step": 300 }, { "epoch": 0.0004355790830709028, "grad_norm": 2.869860887527466, "learning_rate": 1.3025151046789375e-06, "loss": 10.3902, "step": 310 }, { "epoch": 0.0004496300212344803, "grad_norm": 2.448800563812256, "learning_rate": 1.3446676970633693e-06, "loss": 10.3575, "step": 320 }, { "epoch": 0.0004636809593980578, "grad_norm": 2.3154518604278564, "learning_rate": 1.386820289447801e-06, "loss": 10.3371, "step": 330 }, { "epoch": 0.0004777318975616353, "grad_norm": 2.516812562942505, "learning_rate": 1.4289728818322327e-06, "loss": 10.3448, "step": 340 }, { "epoch": 0.0004917828357252128, "grad_norm": 2.7383711338043213, "learning_rate": 1.4711254742166643e-06, "loss": 10.3181, "step": 350 }, { "epoch": 0.0005058337738887903, "grad_norm": 2.346808910369873, "learning_rate": 1.513278066601096e-06, "loss": 10.2987, "step": 360 }, { "epoch": 0.0005198847120523679, "grad_norm": 2.4467155933380127, "learning_rate": 1.5554306589855275e-06, "loss": 10.2587, "step": 370 }, { "epoch": 0.0005339356502159454, "grad_norm": 2.229548454284668, "learning_rate": 1.597583251369959e-06, "loss": 10.2929, "step": 380 }, { "epoch": 0.0005479865883795228, "grad_norm": 2.3183753490448, "learning_rate": 1.6397358437543907e-06, "loss": 10.231, "step": 390 }, { "epoch": 0.0005620375265431004, "grad_norm": 2.497016668319702, "learning_rate": 1.6818884361388223e-06, "loss": 10.2457, "step": 400 }, { "epoch": 0.0005760884647066779, "grad_norm": 2.5405783653259277, "learning_rate": 1.724041028523254e-06, "loss": 10.2305, "step": 410 }, { "epoch": 0.0005901394028702554, "grad_norm": 2.377484083175659, "learning_rate": 1.7661936209076857e-06, "loss": 10.176, "step": 420 }, { "epoch": 0.0006041903410338329, "grad_norm": 1.8428674936294556, "learning_rate": 1.8083462132921172e-06, "loss": 10.1656, "step": 430 }, { "epoch": 0.0006182412791974104, "grad_norm": 2.1964094638824463, "learning_rate": 1.8504988056765488e-06, "loss": 10.1992, "step": 440 }, { "epoch": 0.0006322922173609879, "grad_norm": 2.0467209815979004, "learning_rate": 1.8926513980609804e-06, "loss": 10.1928, "step": 450 }, { "epoch": 0.0006463431555245654, "grad_norm": 2.0101869106292725, "learning_rate": 1.9348039904454122e-06, "loss": 10.1389, "step": 460 }, { "epoch": 0.000660394093688143, "grad_norm": 1.808905005455017, "learning_rate": 1.976956582829844e-06, "loss": 10.1005, "step": 470 }, { "epoch": 0.0006744450318517205, "grad_norm": 2.215162515640259, "learning_rate": 2.0191091752142754e-06, "loss": 10.0665, "step": 480 }, { "epoch": 0.0006884959700152979, "grad_norm": 1.7877708673477173, "learning_rate": 2.061261767598707e-06, "loss": 10.0854, "step": 490 }, { "epoch": 0.0007025469081788755, "grad_norm": 2.141770124435425, "learning_rate": 2.1034143599831386e-06, "loss": 10.04, "step": 500 }, { "epoch": 0.000716597846342453, "grad_norm": 1.706669807434082, "learning_rate": 2.14556695236757e-06, "loss": 10.0788, "step": 510 }, { "epoch": 0.0007306487845060305, "grad_norm": 1.7067891359329224, "learning_rate": 2.1877195447520018e-06, "loss": 9.9621, "step": 520 }, { "epoch": 0.000744699722669608, "grad_norm": 1.784010410308838, "learning_rate": 2.2298721371364338e-06, "loss": 9.97, "step": 530 }, { "epoch": 0.0007587506608331855, "grad_norm": 2.3449370861053467, "learning_rate": 2.2720247295208654e-06, "loss": 9.9688, "step": 540 }, { "epoch": 0.000772801598996763, "grad_norm": 1.652551293373108, "learning_rate": 2.314177321905297e-06, "loss": 9.9469, "step": 550 }, { "epoch": 0.0007868525371603405, "grad_norm": 1.5717960596084595, "learning_rate": 2.356329914289729e-06, "loss": 9.9077, "step": 560 }, { "epoch": 0.0008009034753239181, "grad_norm": 1.4391708374023438, "learning_rate": 2.3984825066741606e-06, "loss": 9.9488, "step": 570 }, { "epoch": 0.0008149544134874955, "grad_norm": 1.4368293285369873, "learning_rate": 2.440635099058592e-06, "loss": 9.848, "step": 580 }, { "epoch": 0.000829005351651073, "grad_norm": 1.9761817455291748, "learning_rate": 2.4827876914430237e-06, "loss": 9.8789, "step": 590 }, { "epoch": 0.0008430562898146506, "grad_norm": 1.1829458475112915, "learning_rate": 2.5249402838274553e-06, "loss": 9.8254, "step": 600 }, { "epoch": 0.0008571072279782281, "grad_norm": 1.5284178256988525, "learning_rate": 2.567092876211887e-06, "loss": 9.8105, "step": 610 }, { "epoch": 0.0008711581661418056, "grad_norm": 1.3892128467559814, "learning_rate": 2.6092454685963185e-06, "loss": 9.8078, "step": 620 }, { "epoch": 0.0008852091043053831, "grad_norm": 1.2729390859603882, "learning_rate": 2.65139806098075e-06, "loss": 9.7646, "step": 630 }, { "epoch": 0.0008992600424689606, "grad_norm": 1.1548229455947876, "learning_rate": 2.6935506533651817e-06, "loss": 9.7455, "step": 640 }, { "epoch": 0.0009133109806325381, "grad_norm": 1.2774633169174194, "learning_rate": 2.7357032457496133e-06, "loss": 9.74, "step": 650 }, { "epoch": 0.0009273619187961156, "grad_norm": 1.1594111919403076, "learning_rate": 2.777855838134045e-06, "loss": 9.7456, "step": 660 }, { "epoch": 0.0009414128569596932, "grad_norm": 1.22251296043396, "learning_rate": 2.8200084305184765e-06, "loss": 9.7187, "step": 670 }, { "epoch": 0.0009554637951232706, "grad_norm": 1.4419326782226562, "learning_rate": 2.8621610229029085e-06, "loss": 9.6953, "step": 680 }, { "epoch": 0.0009695147332868481, "grad_norm": 1.0429575443267822, "learning_rate": 2.90431361528734e-06, "loss": 9.608, "step": 690 }, { "epoch": 0.0009835656714504257, "grad_norm": 1.1225723028182983, "learning_rate": 2.9464662076717717e-06, "loss": 9.604, "step": 700 }, { "epoch": 0.0009976166096140032, "grad_norm": 1.0744000673294067, "learning_rate": 2.9886188000562033e-06, "loss": 9.61, "step": 710 }, { "epoch": 0.0010116675477775806, "grad_norm": 1.0817335844039917, "learning_rate": 3.030771392440635e-06, "loss": 9.533, "step": 720 }, { "epoch": 0.001025718485941158, "grad_norm": 1.5660817623138428, "learning_rate": 3.0729239848250664e-06, "loss": 9.5771, "step": 730 }, { "epoch": 0.0010397694241047358, "grad_norm": 1.141122579574585, "learning_rate": 3.115076577209498e-06, "loss": 9.5357, "step": 740 }, { "epoch": 0.0010538203622683132, "grad_norm": 0.990804135799408, "learning_rate": 3.1572291695939296e-06, "loss": 9.5077, "step": 750 }, { "epoch": 0.0010678713004318907, "grad_norm": 0.9177122116088867, "learning_rate": 3.199381761978361e-06, "loss": 9.4705, "step": 760 }, { "epoch": 0.0010819222385954682, "grad_norm": 1.214892864227295, "learning_rate": 3.241534354362793e-06, "loss": 9.4865, "step": 770 }, { "epoch": 0.0010959731767590456, "grad_norm": 0.943670392036438, "learning_rate": 3.2836869467472244e-06, "loss": 9.4527, "step": 780 }, { "epoch": 0.0011100241149226233, "grad_norm": 1.3754514455795288, "learning_rate": 3.325839539131656e-06, "loss": 9.4666, "step": 790 }, { "epoch": 0.0011240750530862008, "grad_norm": 1.0014880895614624, "learning_rate": 3.3679921315160876e-06, "loss": 9.4266, "step": 800 }, { "epoch": 0.0011381259912497783, "grad_norm": 1.1103196144104004, "learning_rate": 3.41014472390052e-06, "loss": 9.4199, "step": 810 }, { "epoch": 0.0011521769294133557, "grad_norm": 0.8306254148483276, "learning_rate": 3.4522973162849516e-06, "loss": 9.396, "step": 820 }, { "epoch": 0.0011662278675769332, "grad_norm": 1.3354564905166626, "learning_rate": 3.494449908669383e-06, "loss": 9.4007, "step": 830 }, { "epoch": 0.0011802788057405109, "grad_norm": 1.1535601615905762, "learning_rate": 3.5366025010538148e-06, "loss": 9.3241, "step": 840 }, { "epoch": 0.0011943297439040883, "grad_norm": 0.8198082447052002, "learning_rate": 3.5787550934382464e-06, "loss": 9.3004, "step": 850 }, { "epoch": 0.0012083806820676658, "grad_norm": 0.9945477247238159, "learning_rate": 3.620907685822678e-06, "loss": 9.2414, "step": 860 }, { "epoch": 0.0012224316202312433, "grad_norm": 0.842752993106842, "learning_rate": 3.6630602782071095e-06, "loss": 9.2984, "step": 870 }, { "epoch": 0.0012364825583948207, "grad_norm": 0.8242266774177551, "learning_rate": 3.705212870591541e-06, "loss": 9.2572, "step": 880 }, { "epoch": 0.0012505334965583984, "grad_norm": 0.8884344696998596, "learning_rate": 3.7473654629759727e-06, "loss": 9.2445, "step": 890 }, { "epoch": 0.0012645844347219759, "grad_norm": 0.8368833065032959, "learning_rate": 3.7895180553604043e-06, "loss": 9.254, "step": 900 }, { "epoch": 0.0012786353728855534, "grad_norm": 0.9276847243309021, "learning_rate": 3.831670647744836e-06, "loss": 9.2425, "step": 910 }, { "epoch": 0.0012926863110491308, "grad_norm": 0.827470064163208, "learning_rate": 3.873823240129267e-06, "loss": 9.2212, "step": 920 }, { "epoch": 0.0013067372492127083, "grad_norm": 0.9183015823364258, "learning_rate": 3.915975832513699e-06, "loss": 9.1258, "step": 930 }, { "epoch": 0.001320788187376286, "grad_norm": 0.8526141047477722, "learning_rate": 3.958128424898131e-06, "loss": 9.1636, "step": 940 }, { "epoch": 0.0013348391255398634, "grad_norm": 0.9103283286094666, "learning_rate": 4.000281017282562e-06, "loss": 9.1574, "step": 950 }, { "epoch": 0.001348890063703441, "grad_norm": 0.8416949510574341, "learning_rate": 4.042433609666994e-06, "loss": 9.1176, "step": 960 }, { "epoch": 0.0013629410018670184, "grad_norm": 0.8522294759750366, "learning_rate": 4.084586202051426e-06, "loss": 9.0875, "step": 970 }, { "epoch": 0.0013769919400305958, "grad_norm": 0.8377172946929932, "learning_rate": 4.1267387944358575e-06, "loss": 9.0616, "step": 980 }, { "epoch": 0.0013910428781941735, "grad_norm": 0.7883292436599731, "learning_rate": 4.1688913868202895e-06, "loss": 9.0931, "step": 990 }, { "epoch": 0.001405093816357751, "grad_norm": 1.2264714241027832, "learning_rate": 4.211043979204721e-06, "loss": 9.0459, "step": 1000 }, { "epoch": 0.0014191447545213285, "grad_norm": 0.8207995295524597, "learning_rate": 4.253196571589153e-06, "loss": 9.0147, "step": 1010 }, { "epoch": 0.001433195692684906, "grad_norm": 0.751477062702179, "learning_rate": 4.295349163973584e-06, "loss": 9.0429, "step": 1020 }, { "epoch": 0.0014472466308484834, "grad_norm": 0.7454262971878052, "learning_rate": 4.337501756358016e-06, "loss": 9.0418, "step": 1030 }, { "epoch": 0.001461297569012061, "grad_norm": 0.7283434271812439, "learning_rate": 4.379654348742447e-06, "loss": 9.0124, "step": 1040 }, { "epoch": 0.0014753485071756385, "grad_norm": 0.8492335677146912, "learning_rate": 4.421806941126879e-06, "loss": 9.014, "step": 1050 }, { "epoch": 0.001489399445339216, "grad_norm": 0.7394980192184448, "learning_rate": 4.463959533511311e-06, "loss": 9.0262, "step": 1060 }, { "epoch": 0.0015034503835027935, "grad_norm": 0.8477874398231506, "learning_rate": 4.506112125895742e-06, "loss": 8.9394, "step": 1070 }, { "epoch": 0.001517501321666371, "grad_norm": 0.7641156315803528, "learning_rate": 4.548264718280174e-06, "loss": 8.9173, "step": 1080 }, { "epoch": 0.0015315522598299486, "grad_norm": 0.7645962834358215, "learning_rate": 4.590417310664605e-06, "loss": 8.897, "step": 1090 }, { "epoch": 0.001545603197993526, "grad_norm": 0.7267779111862183, "learning_rate": 4.632569903049037e-06, "loss": 8.9216, "step": 1100 }, { "epoch": 0.0015596541361571035, "grad_norm": 0.8410118818283081, "learning_rate": 4.6747224954334686e-06, "loss": 8.8682, "step": 1110 }, { "epoch": 0.001573705074320681, "grad_norm": 0.7551319003105164, "learning_rate": 4.7168750878179006e-06, "loss": 8.8911, "step": 1120 }, { "epoch": 0.0015877560124842585, "grad_norm": 1.4869343042373657, "learning_rate": 4.759027680202332e-06, "loss": 8.8781, "step": 1130 }, { "epoch": 0.0016018069506478362, "grad_norm": 0.6931923627853394, "learning_rate": 4.801180272586764e-06, "loss": 8.8261, "step": 1140 }, { "epoch": 0.0016158578888114136, "grad_norm": 0.7531590461730957, "learning_rate": 4.843332864971195e-06, "loss": 8.8656, "step": 1150 }, { "epoch": 0.001629908826974991, "grad_norm": 0.862003743648529, "learning_rate": 4.885485457355627e-06, "loss": 8.8532, "step": 1160 }, { "epoch": 0.0016439597651385686, "grad_norm": 0.6877137422561646, "learning_rate": 4.927638049740058e-06, "loss": 8.8643, "step": 1170 }, { "epoch": 0.001658010703302146, "grad_norm": 0.6912798881530762, "learning_rate": 4.96979064212449e-06, "loss": 8.8212, "step": 1180 }, { "epoch": 0.0016720616414657237, "grad_norm": 0.7850083708763123, "learning_rate": 5.011943234508921e-06, "loss": 8.8194, "step": 1190 }, { "epoch": 0.0016861125796293012, "grad_norm": 0.7854230999946594, "learning_rate": 5.054095826893353e-06, "loss": 8.7891, "step": 1200 }, { "epoch": 0.0017001635177928786, "grad_norm": 0.6913483142852783, "learning_rate": 5.0962484192777845e-06, "loss": 8.78, "step": 1210 }, { "epoch": 0.0017142144559564561, "grad_norm": 0.881175696849823, "learning_rate": 5.1384010116622165e-06, "loss": 8.7337, "step": 1220 }, { "epoch": 0.0017282653941200336, "grad_norm": 2.36761736869812, "learning_rate": 5.180553604046648e-06, "loss": 8.7539, "step": 1230 }, { "epoch": 0.0017423163322836113, "grad_norm": 0.7986196875572205, "learning_rate": 5.2227061964310805e-06, "loss": 8.7183, "step": 1240 }, { "epoch": 0.0017563672704471887, "grad_norm": 0.6862300038337708, "learning_rate": 5.2648587888155125e-06, "loss": 8.6992, "step": 1250 }, { "epoch": 0.0017704182086107662, "grad_norm": 0.727844774723053, "learning_rate": 5.307011381199944e-06, "loss": 8.6963, "step": 1260 }, { "epoch": 0.0017844691467743437, "grad_norm": 0.6336658596992493, "learning_rate": 5.349163973584376e-06, "loss": 8.7178, "step": 1270 }, { "epoch": 0.0017985200849379211, "grad_norm": 0.6945951581001282, "learning_rate": 5.391316565968807e-06, "loss": 8.6921, "step": 1280 }, { "epoch": 0.0018125710231014988, "grad_norm": 1.3838273286819458, "learning_rate": 5.433469158353239e-06, "loss": 8.6852, "step": 1290 }, { "epoch": 0.0018266219612650763, "grad_norm": 1.8115507364273071, "learning_rate": 5.47562175073767e-06, "loss": 8.7111, "step": 1300 }, { "epoch": 0.0018406728994286537, "grad_norm": 0.7600476741790771, "learning_rate": 5.517774343122102e-06, "loss": 8.6884, "step": 1310 }, { "epoch": 0.0018547238375922312, "grad_norm": 0.6664397716522217, "learning_rate": 5.559926935506533e-06, "loss": 8.636, "step": 1320 }, { "epoch": 0.0018687747757558087, "grad_norm": 0.6572439074516296, "learning_rate": 5.602079527890965e-06, "loss": 8.61, "step": 1330 }, { "epoch": 0.0018828257139193864, "grad_norm": 0.6541756987571716, "learning_rate": 5.644232120275396e-06, "loss": 8.613, "step": 1340 }, { "epoch": 0.0018968766520829638, "grad_norm": 0.7731262445449829, "learning_rate": 5.686384712659828e-06, "loss": 8.5664, "step": 1350 }, { "epoch": 0.0019109275902465413, "grad_norm": 0.8053380846977234, "learning_rate": 5.72853730504426e-06, "loss": 8.6225, "step": 1360 }, { "epoch": 0.0019249785284101188, "grad_norm": 0.6197734475135803, "learning_rate": 5.770689897428692e-06, "loss": 8.564, "step": 1370 }, { "epoch": 0.0019390294665736962, "grad_norm": 0.623519241809845, "learning_rate": 5.812842489813123e-06, "loss": 8.5342, "step": 1380 }, { "epoch": 0.0019530804047372737, "grad_norm": 0.9381340742111206, "learning_rate": 5.854995082197555e-06, "loss": 8.5497, "step": 1390 }, { "epoch": 0.0019671313429008514, "grad_norm": 0.677198052406311, "learning_rate": 5.897147674581986e-06, "loss": 8.5417, "step": 1400 }, { "epoch": 0.001981182281064429, "grad_norm": 0.6278719305992126, "learning_rate": 5.939300266966418e-06, "loss": 8.5709, "step": 1410 }, { "epoch": 0.0019952332192280063, "grad_norm": 0.6248645782470703, "learning_rate": 5.981452859350849e-06, "loss": 8.5463, "step": 1420 }, { "epoch": 0.0020092841573915838, "grad_norm": 0.6436347365379333, "learning_rate": 6.023605451735281e-06, "loss": 8.4893, "step": 1430 }, { "epoch": 0.0020233350955551612, "grad_norm": 0.6340057849884033, "learning_rate": 6.065758044119712e-06, "loss": 8.5257, "step": 1440 }, { "epoch": 0.0020373860337187387, "grad_norm": 0.715142548084259, "learning_rate": 6.107910636504144e-06, "loss": 8.4914, "step": 1450 }, { "epoch": 0.002051436971882316, "grad_norm": 0.7200644612312317, "learning_rate": 6.1500632288885755e-06, "loss": 8.4823, "step": 1460 }, { "epoch": 0.002065487910045894, "grad_norm": 0.8068726062774658, "learning_rate": 6.1922158212730075e-06, "loss": 8.4923, "step": 1470 }, { "epoch": 0.0020795388482094715, "grad_norm": 0.6273617744445801, "learning_rate": 6.2343684136574395e-06, "loss": 8.4483, "step": 1480 }, { "epoch": 0.002093589786373049, "grad_norm": 0.6604838967323303, "learning_rate": 6.2765210060418715e-06, "loss": 8.4753, "step": 1490 }, { "epoch": 0.0021076407245366265, "grad_norm": 0.6091654300689697, "learning_rate": 6.318673598426303e-06, "loss": 8.4524, "step": 1500 }, { "epoch": 0.002121691662700204, "grad_norm": 0.7857375144958496, "learning_rate": 6.360826190810735e-06, "loss": 8.4062, "step": 1510 }, { "epoch": 0.0021357426008637814, "grad_norm": 0.6728093028068542, "learning_rate": 6.402978783195166e-06, "loss": 8.4559, "step": 1520 }, { "epoch": 0.002149793539027359, "grad_norm": 0.6304994821548462, "learning_rate": 6.445131375579598e-06, "loss": 8.4665, "step": 1530 }, { "epoch": 0.0021638444771909363, "grad_norm": 0.6075509786605835, "learning_rate": 6.48728396796403e-06, "loss": 8.4142, "step": 1540 }, { "epoch": 0.002177895415354514, "grad_norm": 0.6211352348327637, "learning_rate": 6.529436560348461e-06, "loss": 8.4188, "step": 1550 }, { "epoch": 0.0021919463535180913, "grad_norm": 0.7409493327140808, "learning_rate": 6.571589152732893e-06, "loss": 8.3903, "step": 1560 }, { "epoch": 0.002205997291681669, "grad_norm": 0.6649135947227478, "learning_rate": 6.613741745117324e-06, "loss": 8.3788, "step": 1570 }, { "epoch": 0.0022200482298452466, "grad_norm": 0.6305505037307739, "learning_rate": 6.655894337501756e-06, "loss": 8.3711, "step": 1580 }, { "epoch": 0.002234099168008824, "grad_norm": 0.5979957580566406, "learning_rate": 6.698046929886187e-06, "loss": 8.3786, "step": 1590 }, { "epoch": 0.0022481501061724016, "grad_norm": 0.6189422607421875, "learning_rate": 6.7401995222706194e-06, "loss": 8.3388, "step": 1600 }, { "epoch": 0.002262201044335979, "grad_norm": 0.6126115322113037, "learning_rate": 6.782352114655051e-06, "loss": 8.382, "step": 1610 }, { "epoch": 0.0022762519824995565, "grad_norm": 0.6059759855270386, "learning_rate": 6.824504707039483e-06, "loss": 8.3826, "step": 1620 }, { "epoch": 0.002290302920663134, "grad_norm": 0.6775907874107361, "learning_rate": 6.866657299423914e-06, "loss": 8.3403, "step": 1630 }, { "epoch": 0.0023043538588267114, "grad_norm": 0.7798088788986206, "learning_rate": 6.908809891808346e-06, "loss": 8.3181, "step": 1640 }, { "epoch": 0.002318404796990289, "grad_norm": 0.8462784886360168, "learning_rate": 6.950962484192777e-06, "loss": 8.3389, "step": 1650 }, { "epoch": 0.0023324557351538664, "grad_norm": 0.5759812593460083, "learning_rate": 6.993115076577209e-06, "loss": 8.3245, "step": 1660 }, { "epoch": 0.0023465066733174443, "grad_norm": 0.8392948508262634, "learning_rate": 7.03526766896164e-06, "loss": 8.3226, "step": 1670 }, { "epoch": 0.0023605576114810217, "grad_norm": 0.711455762386322, "learning_rate": 7.077420261346072e-06, "loss": 8.2694, "step": 1680 }, { "epoch": 0.002374608549644599, "grad_norm": 0.6649680137634277, "learning_rate": 7.119572853730503e-06, "loss": 8.3165, "step": 1690 }, { "epoch": 0.0023886594878081767, "grad_norm": 0.5890215635299683, "learning_rate": 7.161725446114935e-06, "loss": 8.3204, "step": 1700 }, { "epoch": 0.002402710425971754, "grad_norm": 0.6956003904342651, "learning_rate": 7.2038780384993665e-06, "loss": 8.2913, "step": 1710 }, { "epoch": 0.0024167613641353316, "grad_norm": 0.6579176187515259, "learning_rate": 7.2460306308837985e-06, "loss": 8.2964, "step": 1720 }, { "epoch": 0.002430812302298909, "grad_norm": 0.9485933184623718, "learning_rate": 7.2881832232682305e-06, "loss": 8.304, "step": 1730 }, { "epoch": 0.0024448632404624865, "grad_norm": 0.6445179581642151, "learning_rate": 7.3303358156526625e-06, "loss": 8.2677, "step": 1740 }, { "epoch": 0.002458914178626064, "grad_norm": 0.6118063926696777, "learning_rate": 7.372488408037094e-06, "loss": 8.3056, "step": 1750 }, { "epoch": 0.0024729651167896415, "grad_norm": 0.5871592164039612, "learning_rate": 7.414641000421526e-06, "loss": 8.2554, "step": 1760 }, { "epoch": 0.0024870160549532194, "grad_norm": 0.6152786612510681, "learning_rate": 7.456793592805957e-06, "loss": 8.2661, "step": 1770 }, { "epoch": 0.002501066993116797, "grad_norm": 0.6126775145530701, "learning_rate": 7.498946185190389e-06, "loss": 8.2189, "step": 1780 }, { "epoch": 0.0025151179312803743, "grad_norm": 0.5303098559379578, "learning_rate": 7.54109877757482e-06, "loss": 8.2553, "step": 1790 }, { "epoch": 0.0025291688694439518, "grad_norm": 0.668865978717804, "learning_rate": 7.583251369959252e-06, "loss": 8.225, "step": 1800 }, { "epoch": 0.0025432198076075292, "grad_norm": 0.6807597875595093, "learning_rate": 7.625403962343683e-06, "loss": 8.2616, "step": 1810 }, { "epoch": 0.0025572707457711067, "grad_norm": 0.5899785757064819, "learning_rate": 7.667556554728115e-06, "loss": 8.2212, "step": 1820 }, { "epoch": 0.002571321683934684, "grad_norm": 0.7344907522201538, "learning_rate": 7.709709147112546e-06, "loss": 8.2091, "step": 1830 }, { "epoch": 0.0025853726220982616, "grad_norm": 0.6710782051086426, "learning_rate": 7.751861739496978e-06, "loss": 8.2347, "step": 1840 }, { "epoch": 0.002599423560261839, "grad_norm": 0.5794618725776672, "learning_rate": 7.79401433188141e-06, "loss": 8.1866, "step": 1850 }, { "epoch": 0.0026134744984254166, "grad_norm": 0.6005648970603943, "learning_rate": 7.836166924265842e-06, "loss": 8.2057, "step": 1860 }, { "epoch": 0.002627525436588994, "grad_norm": 0.6351447701454163, "learning_rate": 7.878319516650273e-06, "loss": 8.2069, "step": 1870 }, { "epoch": 0.002641576374752572, "grad_norm": 2.6223857402801514, "learning_rate": 7.920472109034706e-06, "loss": 8.2184, "step": 1880 }, { "epoch": 0.0026556273129161494, "grad_norm": 0.6811882257461548, "learning_rate": 7.962624701419137e-06, "loss": 8.197, "step": 1890 }, { "epoch": 0.002669678251079727, "grad_norm": 0.6392279863357544, "learning_rate": 8.004777293803568e-06, "loss": 8.212, "step": 1900 }, { "epoch": 0.0026837291892433043, "grad_norm": 0.6159211993217468, "learning_rate": 8.046929886187999e-06, "loss": 8.2124, "step": 1910 }, { "epoch": 0.002697780127406882, "grad_norm": 0.9476580619812012, "learning_rate": 8.089082478572432e-06, "loss": 8.1537, "step": 1920 }, { "epoch": 0.0027118310655704593, "grad_norm": 0.645937442779541, "learning_rate": 8.131235070956863e-06, "loss": 8.1634, "step": 1930 }, { "epoch": 0.0027258820037340367, "grad_norm": 0.6065277457237244, "learning_rate": 8.173387663341294e-06, "loss": 8.1557, "step": 1940 }, { "epoch": 0.002739932941897614, "grad_norm": 0.6352872848510742, "learning_rate": 8.215540255725726e-06, "loss": 8.1722, "step": 1950 }, { "epoch": 0.0027539838800611917, "grad_norm": 0.5992732644081116, "learning_rate": 8.257692848110158e-06, "loss": 8.1705, "step": 1960 }, { "epoch": 0.002768034818224769, "grad_norm": 0.8603838682174683, "learning_rate": 8.29984544049459e-06, "loss": 8.1483, "step": 1970 }, { "epoch": 0.002782085756388347, "grad_norm": 0.7166236042976379, "learning_rate": 8.341998032879022e-06, "loss": 8.1939, "step": 1980 }, { "epoch": 0.0027961366945519245, "grad_norm": 0.6886283755302429, "learning_rate": 8.384150625263454e-06, "loss": 8.1459, "step": 1990 }, { "epoch": 0.002810187632715502, "grad_norm": 0.6554070115089417, "learning_rate": 8.426303217647885e-06, "loss": 8.1696, "step": 2000 }, { "epoch": 0.0028242385708790794, "grad_norm": 0.6008890271186829, "learning_rate": 8.468455810032316e-06, "loss": 8.0969, "step": 2010 }, { "epoch": 0.002838289509042657, "grad_norm": 0.5523585081100464, "learning_rate": 8.510608402416749e-06, "loss": 8.1298, "step": 2020 }, { "epoch": 0.0028523404472062344, "grad_norm": 0.616106390953064, "learning_rate": 8.55276099480118e-06, "loss": 8.1633, "step": 2030 }, { "epoch": 0.002866391385369812, "grad_norm": 0.7677391767501831, "learning_rate": 8.594913587185611e-06, "loss": 8.1168, "step": 2040 }, { "epoch": 0.0028804423235333893, "grad_norm": 1.135625958442688, "learning_rate": 8.637066179570042e-06, "loss": 8.1114, "step": 2050 }, { "epoch": 0.0028944932616969668, "grad_norm": 0.6018761396408081, "learning_rate": 8.679218771954475e-06, "loss": 8.125, "step": 2060 }, { "epoch": 0.0029085441998605442, "grad_norm": 0.6487704515457153, "learning_rate": 8.721371364338906e-06, "loss": 8.1119, "step": 2070 }, { "epoch": 0.002922595138024122, "grad_norm": 0.6532260775566101, "learning_rate": 8.763523956723337e-06, "loss": 8.0926, "step": 2080 }, { "epoch": 0.0029366460761876996, "grad_norm": 0.7966541051864624, "learning_rate": 8.805676549107769e-06, "loss": 8.1293, "step": 2090 }, { "epoch": 0.002950697014351277, "grad_norm": 0.5987772345542908, "learning_rate": 8.847829141492201e-06, "loss": 8.0792, "step": 2100 }, { "epoch": 0.0029647479525148545, "grad_norm": 0.5964518189430237, "learning_rate": 8.889981733876633e-06, "loss": 8.1097, "step": 2110 }, { "epoch": 0.002978798890678432, "grad_norm": 0.6664417386054993, "learning_rate": 8.932134326261064e-06, "loss": 8.0914, "step": 2120 }, { "epoch": 0.0029928498288420095, "grad_norm": 0.7204576730728149, "learning_rate": 8.974286918645495e-06, "loss": 8.0853, "step": 2130 }, { "epoch": 0.003006900767005587, "grad_norm": 0.698284924030304, "learning_rate": 9.016439511029928e-06, "loss": 8.1351, "step": 2140 }, { "epoch": 0.0030209517051691644, "grad_norm": 0.6838796138763428, "learning_rate": 9.058592103414359e-06, "loss": 8.0672, "step": 2150 }, { "epoch": 0.003035002643332742, "grad_norm": 0.6164630651473999, "learning_rate": 9.10074469579879e-06, "loss": 8.1009, "step": 2160 }, { "epoch": 0.0030490535814963193, "grad_norm": 0.7379197478294373, "learning_rate": 9.142897288183221e-06, "loss": 8.05, "step": 2170 }, { "epoch": 0.0030631045196598972, "grad_norm": 0.6812073588371277, "learning_rate": 9.185049880567654e-06, "loss": 8.0716, "step": 2180 }, { "epoch": 0.0030771554578234747, "grad_norm": 0.6673164963722229, "learning_rate": 9.227202472952085e-06, "loss": 8.1383, "step": 2190 }, { "epoch": 0.003091206395987052, "grad_norm": 0.7186004519462585, "learning_rate": 9.269355065336517e-06, "loss": 8.0792, "step": 2200 }, { "epoch": 0.0031052573341506296, "grad_norm": 0.9671910405158997, "learning_rate": 9.311507657720948e-06, "loss": 8.0954, "step": 2210 }, { "epoch": 0.003119308272314207, "grad_norm": 0.6637594103813171, "learning_rate": 9.35366025010538e-06, "loss": 8.0612, "step": 2220 }, { "epoch": 0.0031333592104777846, "grad_norm": 0.6269683837890625, "learning_rate": 9.395812842489812e-06, "loss": 8.0422, "step": 2230 }, { "epoch": 0.003147410148641362, "grad_norm": 0.6617890000343323, "learning_rate": 9.437965434874245e-06, "loss": 8.0337, "step": 2240 }, { "epoch": 0.0031614610868049395, "grad_norm": 0.6194980144500732, "learning_rate": 9.480118027258676e-06, "loss": 8.099, "step": 2250 }, { "epoch": 0.003175512024968517, "grad_norm": 0.6944716572761536, "learning_rate": 9.522270619643107e-06, "loss": 8.0398, "step": 2260 }, { "epoch": 0.0031895629631320944, "grad_norm": 0.6326583623886108, "learning_rate": 9.564423212027538e-06, "loss": 8.0368, "step": 2270 }, { "epoch": 0.0032036139012956723, "grad_norm": 0.583436906337738, "learning_rate": 9.606575804411971e-06, "loss": 8.0594, "step": 2280 }, { "epoch": 0.00321766483945925, "grad_norm": 0.6804009079933167, "learning_rate": 9.648728396796402e-06, "loss": 8.0141, "step": 2290 }, { "epoch": 0.0032317157776228273, "grad_norm": 0.6234678030014038, "learning_rate": 9.690880989180833e-06, "loss": 7.9864, "step": 2300 }, { "epoch": 0.0032457667157864047, "grad_norm": 0.6890107989311218, "learning_rate": 9.733033581565264e-06, "loss": 8.0474, "step": 2310 }, { "epoch": 0.003259817653949982, "grad_norm": 0.7134827375411987, "learning_rate": 9.775186173949697e-06, "loss": 8.0306, "step": 2320 }, { "epoch": 0.0032738685921135597, "grad_norm": 0.6207192540168762, "learning_rate": 9.817338766334128e-06, "loss": 7.9592, "step": 2330 }, { "epoch": 0.003287919530277137, "grad_norm": 1.1250572204589844, "learning_rate": 9.85949135871856e-06, "loss": 8.0243, "step": 2340 }, { "epoch": 0.0033019704684407146, "grad_norm": 0.7023635506629944, "learning_rate": 9.901643951102993e-06, "loss": 8.017, "step": 2350 }, { "epoch": 0.003316021406604292, "grad_norm": 0.6336594820022583, "learning_rate": 9.943796543487424e-06, "loss": 8.0084, "step": 2360 }, { "epoch": 0.0033300723447678695, "grad_norm": 0.7274377942085266, "learning_rate": 9.985949135871857e-06, "loss": 8.0152, "step": 2370 }, { "epoch": 0.0033441232829314474, "grad_norm": 0.8217412829399109, "learning_rate": 1.0028101728256286e-05, "loss": 7.9932, "step": 2380 }, { "epoch": 0.003358174221095025, "grad_norm": 0.7048346400260925, "learning_rate": 1.0070254320640719e-05, "loss": 7.9944, "step": 2390 }, { "epoch": 0.0033722251592586024, "grad_norm": 0.6946651935577393, "learning_rate": 1.011240691302515e-05, "loss": 7.9447, "step": 2400 }, { "epoch": 0.00338627609742218, "grad_norm": 0.7006087899208069, "learning_rate": 1.0154559505409583e-05, "loss": 8.0093, "step": 2410 }, { "epoch": 0.0034003270355857573, "grad_norm": 0.8661535382270813, "learning_rate": 1.0196712097794012e-05, "loss": 8.0699, "step": 2420 }, { "epoch": 0.0034143779737493348, "grad_norm": 0.8086657524108887, "learning_rate": 1.0238864690178445e-05, "loss": 7.9983, "step": 2430 }, { "epoch": 0.0034284289119129122, "grad_norm": 0.6144962906837463, "learning_rate": 1.0281017282562876e-05, "loss": 8.0364, "step": 2440 }, { "epoch": 0.0034424798500764897, "grad_norm": 0.7119608521461487, "learning_rate": 1.032316987494731e-05, "loss": 7.9961, "step": 2450 }, { "epoch": 0.003456530788240067, "grad_norm": 0.7629860043525696, "learning_rate": 1.0365322467331739e-05, "loss": 7.9789, "step": 2460 }, { "epoch": 0.0034705817264036446, "grad_norm": 0.6489969491958618, "learning_rate": 1.0407475059716172e-05, "loss": 7.9433, "step": 2470 }, { "epoch": 0.0034846326645672225, "grad_norm": 0.6559635400772095, "learning_rate": 1.0449627652100603e-05, "loss": 7.9848, "step": 2480 }, { "epoch": 0.0034986836027308, "grad_norm": 0.6898662447929382, "learning_rate": 1.0491780244485036e-05, "loss": 7.9388, "step": 2490 }, { "epoch": 0.0035127345408943775, "grad_norm": 0.7968945503234863, "learning_rate": 1.0533932836869465e-05, "loss": 7.987, "step": 2500 }, { "epoch": 0.003526785479057955, "grad_norm": 0.8573049306869507, "learning_rate": 1.0576085429253898e-05, "loss": 7.937, "step": 2510 }, { "epoch": 0.0035408364172215324, "grad_norm": 0.7501293420791626, "learning_rate": 1.0618238021638329e-05, "loss": 7.9119, "step": 2520 }, { "epoch": 0.00355488735538511, "grad_norm": 0.6564654111862183, "learning_rate": 1.0660390614022762e-05, "loss": 7.9062, "step": 2530 }, { "epoch": 0.0035689382935486873, "grad_norm": 0.7637408971786499, "learning_rate": 1.0702543206407191e-05, "loss": 7.9529, "step": 2540 }, { "epoch": 0.003582989231712265, "grad_norm": 0.6853562593460083, "learning_rate": 1.0744695798791624e-05, "loss": 7.9617, "step": 2550 }, { "epoch": 0.0035970401698758423, "grad_norm": 0.9160966873168945, "learning_rate": 1.0786848391176055e-05, "loss": 7.95, "step": 2560 }, { "epoch": 0.0036110911080394197, "grad_norm": 0.7625723481178284, "learning_rate": 1.0829000983560488e-05, "loss": 7.9144, "step": 2570 }, { "epoch": 0.0036251420462029976, "grad_norm": 0.6952919363975525, "learning_rate": 1.0871153575944918e-05, "loss": 7.9539, "step": 2580 }, { "epoch": 0.003639192984366575, "grad_norm": 0.8298730254173279, "learning_rate": 1.091330616832935e-05, "loss": 7.9589, "step": 2590 }, { "epoch": 0.0036532439225301526, "grad_norm": 0.8768827319145203, "learning_rate": 1.0955458760713784e-05, "loss": 7.9266, "step": 2600 }, { "epoch": 0.00366729486069373, "grad_norm": 0.6922865509986877, "learning_rate": 1.0997611353098215e-05, "loss": 7.9015, "step": 2610 }, { "epoch": 0.0036813457988573075, "grad_norm": 0.7246437668800354, "learning_rate": 1.1039763945482648e-05, "loss": 7.9234, "step": 2620 }, { "epoch": 0.003695396737020885, "grad_norm": 0.8041806817054749, "learning_rate": 1.1081916537867077e-05, "loss": 7.9054, "step": 2630 }, { "epoch": 0.0037094476751844624, "grad_norm": 0.7150896191596985, "learning_rate": 1.112406913025151e-05, "loss": 7.9446, "step": 2640 }, { "epoch": 0.00372349861334804, "grad_norm": 0.8792434930801392, "learning_rate": 1.1166221722635941e-05, "loss": 7.9111, "step": 2650 }, { "epoch": 0.0037375495515116174, "grad_norm": 0.7889289259910583, "learning_rate": 1.1208374315020374e-05, "loss": 7.9393, "step": 2660 }, { "epoch": 0.003751600489675195, "grad_norm": 0.8603889346122742, "learning_rate": 1.1250526907404803e-05, "loss": 7.8837, "step": 2670 }, { "epoch": 0.0037656514278387727, "grad_norm": 0.8094549179077148, "learning_rate": 1.1292679499789236e-05, "loss": 7.9193, "step": 2680 }, { "epoch": 0.00377970236600235, "grad_norm": 0.5979373455047607, "learning_rate": 1.1334832092173667e-05, "loss": 7.8922, "step": 2690 }, { "epoch": 0.0037937533041659277, "grad_norm": 0.8888489007949829, "learning_rate": 1.13769846845581e-05, "loss": 7.8884, "step": 2700 }, { "epoch": 0.003807804242329505, "grad_norm": 0.7355663180351257, "learning_rate": 1.141913727694253e-05, "loss": 7.9256, "step": 2710 }, { "epoch": 0.0038218551804930826, "grad_norm": 0.7639088034629822, "learning_rate": 1.1461289869326963e-05, "loss": 7.8711, "step": 2720 }, { "epoch": 0.00383590611865666, "grad_norm": 1.0683914422988892, "learning_rate": 1.1503442461711394e-05, "loss": 7.8724, "step": 2730 }, { "epoch": 0.0038499570568202375, "grad_norm": 0.99086594581604, "learning_rate": 1.1545595054095827e-05, "loss": 7.8734, "step": 2740 }, { "epoch": 0.003864007994983815, "grad_norm": 0.7015689015388489, "learning_rate": 1.1587747646480256e-05, "loss": 7.893, "step": 2750 }, { "epoch": 0.0038780589331473925, "grad_norm": 0.7968684434890747, "learning_rate": 1.1629900238864689e-05, "loss": 7.8584, "step": 2760 }, { "epoch": 0.00389210987131097, "grad_norm": 0.7174790501594543, "learning_rate": 1.167205283124912e-05, "loss": 7.8578, "step": 2770 }, { "epoch": 0.0039061608094745474, "grad_norm": 0.818223237991333, "learning_rate": 1.1714205423633553e-05, "loss": 7.8421, "step": 2780 }, { "epoch": 0.003920211747638125, "grad_norm": 0.7619180083274841, "learning_rate": 1.1756358016017982e-05, "loss": 7.8614, "step": 2790 }, { "epoch": 0.003934262685801703, "grad_norm": 1.0655688047409058, "learning_rate": 1.1798510608402415e-05, "loss": 7.8461, "step": 2800 }, { "epoch": 0.00394831362396528, "grad_norm": 0.7509090304374695, "learning_rate": 1.1840663200786846e-05, "loss": 7.857, "step": 2810 }, { "epoch": 0.003962364562128858, "grad_norm": 0.7447671890258789, "learning_rate": 1.188281579317128e-05, "loss": 7.8801, "step": 2820 }, { "epoch": 0.003976415500292435, "grad_norm": 0.9486709237098694, "learning_rate": 1.1924968385555709e-05, "loss": 7.7945, "step": 2830 }, { "epoch": 0.003990466438456013, "grad_norm": 0.6755268573760986, "learning_rate": 1.1967120977940142e-05, "loss": 7.8919, "step": 2840 }, { "epoch": 0.0040045173766195905, "grad_norm": 0.8210179805755615, "learning_rate": 1.2009273570324575e-05, "loss": 7.8437, "step": 2850 }, { "epoch": 0.0040185683147831676, "grad_norm": 0.7738075256347656, "learning_rate": 1.2051426162709006e-05, "loss": 7.7473, "step": 2860 }, { "epoch": 0.0040326192529467455, "grad_norm": 1.1072646379470825, "learning_rate": 1.2093578755093439e-05, "loss": 7.8089, "step": 2870 }, { "epoch": 0.0040466701911103225, "grad_norm": 0.7715216279029846, "learning_rate": 1.2135731347477868e-05, "loss": 7.8353, "step": 2880 }, { "epoch": 0.0040607211292739, "grad_norm": 0.8774746656417847, "learning_rate": 1.2177883939862301e-05, "loss": 7.7942, "step": 2890 }, { "epoch": 0.004074772067437477, "grad_norm": 0.8812255263328552, "learning_rate": 1.2220036532246732e-05, "loss": 7.8712, "step": 2900 }, { "epoch": 0.004088823005601055, "grad_norm": 0.8199585676193237, "learning_rate": 1.2262189124631165e-05, "loss": 7.8129, "step": 2910 }, { "epoch": 0.004102873943764632, "grad_norm": 0.6967678666114807, "learning_rate": 1.2304341717015594e-05, "loss": 7.7619, "step": 2920 }, { "epoch": 0.00411692488192821, "grad_norm": 0.744611918926239, "learning_rate": 1.2346494309400027e-05, "loss": 7.8447, "step": 2930 }, { "epoch": 0.004130975820091788, "grad_norm": 0.8083895444869995, "learning_rate": 1.2388646901784458e-05, "loss": 7.8054, "step": 2940 }, { "epoch": 0.004145026758255365, "grad_norm": 0.8558597564697266, "learning_rate": 1.2430799494168891e-05, "loss": 7.756, "step": 2950 }, { "epoch": 0.004159077696418943, "grad_norm": 0.9740859866142273, "learning_rate": 1.247295208655332e-05, "loss": 7.7429, "step": 2960 }, { "epoch": 0.00417312863458252, "grad_norm": 0.7745287418365479, "learning_rate": 1.2515104678937754e-05, "loss": 7.7885, "step": 2970 }, { "epoch": 0.004187179572746098, "grad_norm": 0.8137255907058716, "learning_rate": 1.2557257271322185e-05, "loss": 7.7817, "step": 2980 }, { "epoch": 0.004201230510909675, "grad_norm": 0.8644803166389465, "learning_rate": 1.2599409863706618e-05, "loss": 7.8588, "step": 2990 }, { "epoch": 0.004215281449073253, "grad_norm": 0.8625914454460144, "learning_rate": 1.2641562456091047e-05, "loss": 7.728, "step": 3000 }, { "epoch": 0.00422933238723683, "grad_norm": 1.1142210960388184, "learning_rate": 1.268371504847548e-05, "loss": 7.7437, "step": 3010 }, { "epoch": 0.004243383325400408, "grad_norm": 0.852684736251831, "learning_rate": 1.2725867640859911e-05, "loss": 7.7905, "step": 3020 }, { "epoch": 0.004257434263563985, "grad_norm": 0.8908416628837585, "learning_rate": 1.2768020233244344e-05, "loss": 7.6894, "step": 3030 }, { "epoch": 0.004271485201727563, "grad_norm": 0.8012714982032776, "learning_rate": 1.2810172825628773e-05, "loss": 7.8086, "step": 3040 }, { "epoch": 0.004285536139891141, "grad_norm": 0.6924638748168945, "learning_rate": 1.2852325418013206e-05, "loss": 7.8487, "step": 3050 }, { "epoch": 0.004299587078054718, "grad_norm": 0.8051576614379883, "learning_rate": 1.2894478010397638e-05, "loss": 7.7666, "step": 3060 }, { "epoch": 0.004313638016218296, "grad_norm": 1.3711603879928589, "learning_rate": 1.293663060278207e-05, "loss": 7.7913, "step": 3070 }, { "epoch": 0.004327688954381873, "grad_norm": 0.8338873982429504, "learning_rate": 1.2978783195166503e-05, "loss": 7.7112, "step": 3080 }, { "epoch": 0.004341739892545451, "grad_norm": 0.7337360978126526, "learning_rate": 1.3020935787550933e-05, "loss": 7.753, "step": 3090 }, { "epoch": 0.004355790830709028, "grad_norm": 0.888911247253418, "learning_rate": 1.3063088379935366e-05, "loss": 7.6947, "step": 3100 }, { "epoch": 0.0043698417688726055, "grad_norm": 0.852385401725769, "learning_rate": 1.3105240972319797e-05, "loss": 7.7105, "step": 3110 }, { "epoch": 0.0043838927070361825, "grad_norm": 0.840452253818512, "learning_rate": 1.314739356470423e-05, "loss": 7.8018, "step": 3120 }, { "epoch": 0.0043979436451997604, "grad_norm": 0.8020384311676025, "learning_rate": 1.3189546157088659e-05, "loss": 7.7558, "step": 3130 }, { "epoch": 0.004411994583363338, "grad_norm": 0.8536344170570374, "learning_rate": 1.3231698749473092e-05, "loss": 7.6887, "step": 3140 }, { "epoch": 0.004426045521526915, "grad_norm": 0.8429573178291321, "learning_rate": 1.3273851341857523e-05, "loss": 7.7193, "step": 3150 }, { "epoch": 0.004440096459690493, "grad_norm": 0.9258587956428528, "learning_rate": 1.3316003934241956e-05, "loss": 7.7103, "step": 3160 }, { "epoch": 0.00445414739785407, "grad_norm": 0.7705568671226501, "learning_rate": 1.3358156526626385e-05, "loss": 7.707, "step": 3170 }, { "epoch": 0.004468198336017648, "grad_norm": 1.0604978799819946, "learning_rate": 1.3400309119010818e-05, "loss": 7.7287, "step": 3180 }, { "epoch": 0.004482249274181225, "grad_norm": 0.8479204177856445, "learning_rate": 1.344246171139525e-05, "loss": 7.6984, "step": 3190 }, { "epoch": 0.004496300212344803, "grad_norm": 0.8553823232650757, "learning_rate": 1.3484614303779682e-05, "loss": 7.6764, "step": 3200 }, { "epoch": 0.00451035115050838, "grad_norm": 0.9012032151222229, "learning_rate": 1.3526766896164112e-05, "loss": 7.7013, "step": 3210 }, { "epoch": 0.004524402088671958, "grad_norm": 0.8775386810302734, "learning_rate": 1.3568919488548545e-05, "loss": 7.7516, "step": 3220 }, { "epoch": 0.004538453026835535, "grad_norm": 1.1585534811019897, "learning_rate": 1.3611072080932976e-05, "loss": 7.6667, "step": 3230 }, { "epoch": 0.004552503964999113, "grad_norm": 0.9868244528770447, "learning_rate": 1.3653224673317409e-05, "loss": 7.6702, "step": 3240 }, { "epoch": 0.004566554903162691, "grad_norm": 0.9672613143920898, "learning_rate": 1.3695377265701838e-05, "loss": 7.6212, "step": 3250 }, { "epoch": 0.004580605841326268, "grad_norm": 1.0549743175506592, "learning_rate": 1.3737529858086271e-05, "loss": 7.6721, "step": 3260 }, { "epoch": 0.004594656779489846, "grad_norm": 0.8549468517303467, "learning_rate": 1.3779682450470702e-05, "loss": 7.809, "step": 3270 }, { "epoch": 0.004608707717653423, "grad_norm": 1.5186914205551147, "learning_rate": 1.3821835042855135e-05, "loss": 7.6878, "step": 3280 }, { "epoch": 0.004622758655817001, "grad_norm": 1.0272847414016724, "learning_rate": 1.3863987635239565e-05, "loss": 7.6362, "step": 3290 }, { "epoch": 0.004636809593980578, "grad_norm": 0.8402895927429199, "learning_rate": 1.3906140227623997e-05, "loss": 7.6875, "step": 3300 }, { "epoch": 0.004650860532144156, "grad_norm": 0.8236798644065857, "learning_rate": 1.3948292820008429e-05, "loss": 7.6046, "step": 3310 }, { "epoch": 0.004664911470307733, "grad_norm": 1.09159517288208, "learning_rate": 1.3990445412392861e-05, "loss": 7.7144, "step": 3320 }, { "epoch": 0.004678962408471311, "grad_norm": 0.9624311327934265, "learning_rate": 1.4032598004777294e-05, "loss": 7.6603, "step": 3330 }, { "epoch": 0.0046930133466348885, "grad_norm": 0.9037488698959351, "learning_rate": 1.4074750597161724e-05, "loss": 7.6887, "step": 3340 }, { "epoch": 0.004707064284798466, "grad_norm": 0.8625498414039612, "learning_rate": 1.4116903189546157e-05, "loss": 7.6297, "step": 3350 }, { "epoch": 0.0047211152229620435, "grad_norm": 0.9417233467102051, "learning_rate": 1.4159055781930588e-05, "loss": 7.6359, "step": 3360 }, { "epoch": 0.0047351661611256205, "grad_norm": 0.9447644948959351, "learning_rate": 1.420120837431502e-05, "loss": 7.6173, "step": 3370 }, { "epoch": 0.004749217099289198, "grad_norm": 0.9068923592567444, "learning_rate": 1.424336096669945e-05, "loss": 7.6694, "step": 3380 }, { "epoch": 0.0047632680374527754, "grad_norm": 0.9849382042884827, "learning_rate": 1.4285513559083883e-05, "loss": 7.6411, "step": 3390 }, { "epoch": 0.004777318975616353, "grad_norm": 0.8907585740089417, "learning_rate": 1.4327666151468314e-05, "loss": 7.6669, "step": 3400 }, { "epoch": 0.00479136991377993, "grad_norm": 1.1062923669815063, "learning_rate": 1.4369818743852747e-05, "loss": 7.6615, "step": 3410 }, { "epoch": 0.004805420851943508, "grad_norm": 0.9381359219551086, "learning_rate": 1.4411971336237176e-05, "loss": 7.6943, "step": 3420 }, { "epoch": 0.004819471790107085, "grad_norm": 1.3278305530548096, "learning_rate": 1.445412392862161e-05, "loss": 7.6195, "step": 3430 }, { "epoch": 0.004833522728270663, "grad_norm": 0.8325286507606506, "learning_rate": 1.449627652100604e-05, "loss": 7.6601, "step": 3440 }, { "epoch": 0.004847573666434241, "grad_norm": 0.9755851030349731, "learning_rate": 1.4538429113390473e-05, "loss": 7.6727, "step": 3450 }, { "epoch": 0.004861624604597818, "grad_norm": 0.8660493493080139, "learning_rate": 1.4580581705774903e-05, "loss": 7.5598, "step": 3460 }, { "epoch": 0.004875675542761396, "grad_norm": 0.8259555101394653, "learning_rate": 1.4622734298159336e-05, "loss": 7.5946, "step": 3470 }, { "epoch": 0.004889726480924973, "grad_norm": 0.9008260369300842, "learning_rate": 1.4664886890543767e-05, "loss": 7.5731, "step": 3480 }, { "epoch": 0.004903777419088551, "grad_norm": 0.8646583557128906, "learning_rate": 1.47070394829282e-05, "loss": 7.602, "step": 3490 }, { "epoch": 0.004917828357252128, "grad_norm": 0.8853713870048523, "learning_rate": 1.474919207531263e-05, "loss": 7.5559, "step": 3500 }, { "epoch": 0.004931879295415706, "grad_norm": 0.8749480247497559, "learning_rate": 1.4791344667697062e-05, "loss": 7.5826, "step": 3510 }, { "epoch": 0.004945930233579283, "grad_norm": 0.8559566736221313, "learning_rate": 1.4833497260081493e-05, "loss": 7.6301, "step": 3520 }, { "epoch": 0.004959981171742861, "grad_norm": 0.8258165121078491, "learning_rate": 1.4875649852465926e-05, "loss": 7.5909, "step": 3530 }, { "epoch": 0.004974032109906439, "grad_norm": 0.9734731912612915, "learning_rate": 1.4917802444850356e-05, "loss": 7.5878, "step": 3540 }, { "epoch": 0.004988083048070016, "grad_norm": 0.9722371697425842, "learning_rate": 1.4959955037234788e-05, "loss": 7.5181, "step": 3550 }, { "epoch": 0.005002133986233594, "grad_norm": 1.2670505046844482, "learning_rate": 1.500210762961922e-05, "loss": 7.6094, "step": 3560 }, { "epoch": 0.005016184924397171, "grad_norm": 0.8584160804748535, "learning_rate": 1.5044260222003652e-05, "loss": 7.5955, "step": 3570 }, { "epoch": 0.005030235862560749, "grad_norm": 1.0112240314483643, "learning_rate": 1.5086412814388085e-05, "loss": 7.6168, "step": 3580 }, { "epoch": 0.005044286800724326, "grad_norm": 4.073171138763428, "learning_rate": 1.5128565406772515e-05, "loss": 7.5669, "step": 3590 }, { "epoch": 0.0050583377388879035, "grad_norm": 0.9480034112930298, "learning_rate": 1.5170717999156948e-05, "loss": 7.505, "step": 3600 }, { "epoch": 0.005072388677051481, "grad_norm": 0.95200115442276, "learning_rate": 1.5212870591541379e-05, "loss": 7.6224, "step": 3610 }, { "epoch": 0.0050864396152150585, "grad_norm": 1.117475986480713, "learning_rate": 1.5255023183925812e-05, "loss": 7.5023, "step": 3620 }, { "epoch": 0.0051004905533786355, "grad_norm": 0.9365076422691345, "learning_rate": 1.529717577631024e-05, "loss": 7.5295, "step": 3630 }, { "epoch": 0.005114541491542213, "grad_norm": 0.9868768453598022, "learning_rate": 1.5339328368694676e-05, "loss": 7.5347, "step": 3640 }, { "epoch": 0.005128592429705791, "grad_norm": 0.860265851020813, "learning_rate": 1.5381480961079103e-05, "loss": 7.5199, "step": 3650 }, { "epoch": 0.005142643367869368, "grad_norm": 0.9999997615814209, "learning_rate": 1.5423633553463538e-05, "loss": 7.5793, "step": 3660 }, { "epoch": 0.005156694306032946, "grad_norm": 0.8633819222450256, "learning_rate": 1.546578614584797e-05, "loss": 7.6475, "step": 3670 }, { "epoch": 0.005170745244196523, "grad_norm": 0.902273416519165, "learning_rate": 1.55079387382324e-05, "loss": 7.5576, "step": 3680 }, { "epoch": 0.005184796182360101, "grad_norm": 1.04349946975708, "learning_rate": 1.555009133061683e-05, "loss": 7.5067, "step": 3690 }, { "epoch": 0.005198847120523678, "grad_norm": 0.8825836181640625, "learning_rate": 1.5592243923001263e-05, "loss": 7.5545, "step": 3700 }, { "epoch": 0.005212898058687256, "grad_norm": 0.853202760219574, "learning_rate": 1.5634396515385694e-05, "loss": 7.615, "step": 3710 }, { "epoch": 0.005226948996850833, "grad_norm": 1.0415570735931396, "learning_rate": 1.567654910777013e-05, "loss": 7.5865, "step": 3720 }, { "epoch": 0.005240999935014411, "grad_norm": 0.943265974521637, "learning_rate": 1.5718701700154556e-05, "loss": 7.5048, "step": 3730 }, { "epoch": 0.005255050873177988, "grad_norm": 1.0362399816513062, "learning_rate": 1.576085429253899e-05, "loss": 7.5796, "step": 3740 }, { "epoch": 0.005269101811341566, "grad_norm": 1.0052247047424316, "learning_rate": 1.5803006884923422e-05, "loss": 7.4803, "step": 3750 }, { "epoch": 0.005283152749505144, "grad_norm": 1.0194339752197266, "learning_rate": 1.5845159477307853e-05, "loss": 7.5259, "step": 3760 }, { "epoch": 0.005297203687668721, "grad_norm": 1.3323463201522827, "learning_rate": 1.5887312069692284e-05, "loss": 7.5403, "step": 3770 }, { "epoch": 0.005311254625832299, "grad_norm": 0.9728202223777771, "learning_rate": 1.5929464662076715e-05, "loss": 7.526, "step": 3780 }, { "epoch": 0.005325305563995876, "grad_norm": 0.979168176651001, "learning_rate": 1.5971617254461147e-05, "loss": 7.5024, "step": 3790 }, { "epoch": 0.005339356502159454, "grad_norm": 1.0642094612121582, "learning_rate": 1.601376984684558e-05, "loss": 7.548, "step": 3800 }, { "epoch": 0.005353407440323031, "grad_norm": 0.8981004953384399, "learning_rate": 1.605592243923001e-05, "loss": 7.5229, "step": 3810 }, { "epoch": 0.005367458378486609, "grad_norm": 0.9074918031692505, "learning_rate": 1.6098075031614443e-05, "loss": 7.5607, "step": 3820 }, { "epoch": 0.005381509316650186, "grad_norm": 0.9015301465988159, "learning_rate": 1.6140227623998875e-05, "loss": 7.5874, "step": 3830 }, { "epoch": 0.005395560254813764, "grad_norm": 0.8846139907836914, "learning_rate": 1.6182380216383306e-05, "loss": 7.4998, "step": 3840 }, { "epoch": 0.0054096111929773415, "grad_norm": 1.1710816621780396, "learning_rate": 1.622453280876774e-05, "loss": 7.4512, "step": 3850 }, { "epoch": 0.0054236621311409185, "grad_norm": 1.1861531734466553, "learning_rate": 1.6266685401152168e-05, "loss": 7.4586, "step": 3860 }, { "epoch": 0.0054377130693044964, "grad_norm": 1.260762095451355, "learning_rate": 1.6308837993536603e-05, "loss": 7.4841, "step": 3870 }, { "epoch": 0.0054517640074680735, "grad_norm": 0.8817400932312012, "learning_rate": 1.6350990585921034e-05, "loss": 7.4975, "step": 3880 }, { "epoch": 0.005465814945631651, "grad_norm": 1.0645872354507446, "learning_rate": 1.6393143178305465e-05, "loss": 7.5029, "step": 3890 }, { "epoch": 0.005479865883795228, "grad_norm": 0.9742002487182617, "learning_rate": 1.6435295770689896e-05, "loss": 7.482, "step": 3900 }, { "epoch": 0.005493916821958806, "grad_norm": 0.9298391342163086, "learning_rate": 1.6477448363074327e-05, "loss": 7.5051, "step": 3910 }, { "epoch": 0.005507967760122383, "grad_norm": 0.9993727207183838, "learning_rate": 1.651960095545876e-05, "loss": 7.4972, "step": 3920 }, { "epoch": 0.005522018698285961, "grad_norm": 1.3055437803268433, "learning_rate": 1.6561753547843193e-05, "loss": 7.4705, "step": 3930 }, { "epoch": 0.005536069636449538, "grad_norm": 1.0684734582901, "learning_rate": 1.660390614022762e-05, "loss": 7.45, "step": 3940 }, { "epoch": 0.005550120574613116, "grad_norm": 1.1004630327224731, "learning_rate": 1.6646058732612055e-05, "loss": 7.4706, "step": 3950 }, { "epoch": 0.005564171512776694, "grad_norm": 1.3142606019973755, "learning_rate": 1.6688211324996487e-05, "loss": 7.4427, "step": 3960 }, { "epoch": 0.005578222450940271, "grad_norm": 0.9538849592208862, "learning_rate": 1.6730363917380918e-05, "loss": 7.4318, "step": 3970 }, { "epoch": 0.005592273389103849, "grad_norm": 1.0903221368789673, "learning_rate": 1.677251650976535e-05, "loss": 7.5066, "step": 3980 }, { "epoch": 0.005606324327267426, "grad_norm": 1.7417656183242798, "learning_rate": 1.681466910214978e-05, "loss": 7.5136, "step": 3990 }, { "epoch": 0.005620375265431004, "grad_norm": 1.4083300828933716, "learning_rate": 1.685682169453421e-05, "loss": 7.5295, "step": 4000 }, { "epoch": 0.005634426203594581, "grad_norm": 0.9326221942901611, "learning_rate": 1.6898974286918646e-05, "loss": 7.4544, "step": 4010 }, { "epoch": 0.005648477141758159, "grad_norm": 0.936113715171814, "learning_rate": 1.6941126879303074e-05, "loss": 7.4514, "step": 4020 }, { "epoch": 0.005662528079921736, "grad_norm": 0.8883742690086365, "learning_rate": 1.6983279471687508e-05, "loss": 7.4411, "step": 4030 }, { "epoch": 0.005676579018085314, "grad_norm": 1.0120148658752441, "learning_rate": 1.702543206407194e-05, "loss": 7.4305, "step": 4040 }, { "epoch": 0.005690629956248892, "grad_norm": 1.0194205045700073, "learning_rate": 1.706758465645637e-05, "loss": 7.4132, "step": 4050 }, { "epoch": 0.005704680894412469, "grad_norm": 0.8495796918869019, "learning_rate": 1.71097372488408e-05, "loss": 7.4617, "step": 4060 }, { "epoch": 0.005718731832576047, "grad_norm": 1.0692280530929565, "learning_rate": 1.7151889841225233e-05, "loss": 7.4526, "step": 4070 }, { "epoch": 0.005732782770739624, "grad_norm": 0.9888425469398499, "learning_rate": 1.7194042433609667e-05, "loss": 7.4095, "step": 4080 }, { "epoch": 0.005746833708903202, "grad_norm": 1.1134666204452515, "learning_rate": 1.72361950259941e-05, "loss": 7.4779, "step": 4090 }, { "epoch": 0.005760884647066779, "grad_norm": 0.9555155634880066, "learning_rate": 1.727834761837853e-05, "loss": 7.4583, "step": 4100 }, { "epoch": 0.0057749355852303565, "grad_norm": 1.1139003038406372, "learning_rate": 1.732050021076296e-05, "loss": 7.3599, "step": 4110 }, { "epoch": 0.0057889865233939335, "grad_norm": 0.9886006116867065, "learning_rate": 1.7362652803147392e-05, "loss": 7.4064, "step": 4120 }, { "epoch": 0.0058030374615575114, "grad_norm": 1.1849690675735474, "learning_rate": 1.7404805395531823e-05, "loss": 7.4462, "step": 4130 }, { "epoch": 0.0058170883997210885, "grad_norm": 1.1874699592590332, "learning_rate": 1.7446957987916258e-05, "loss": 7.4456, "step": 4140 }, { "epoch": 0.005831139337884666, "grad_norm": 1.0019316673278809, "learning_rate": 1.7489110580300686e-05, "loss": 7.4241, "step": 4150 }, { "epoch": 0.005845190276048244, "grad_norm": 1.0301438570022583, "learning_rate": 1.753126317268512e-05, "loss": 7.3951, "step": 4160 }, { "epoch": 0.005859241214211821, "grad_norm": 1.0591559410095215, "learning_rate": 1.757341576506955e-05, "loss": 7.4101, "step": 4170 }, { "epoch": 0.005873292152375399, "grad_norm": 1.3416529893875122, "learning_rate": 1.7615568357453982e-05, "loss": 7.4434, "step": 4180 }, { "epoch": 0.005887343090538976, "grad_norm": 1.153102159500122, "learning_rate": 1.7657720949838414e-05, "loss": 7.4831, "step": 4190 }, { "epoch": 0.005901394028702554, "grad_norm": 1.0388206243515015, "learning_rate": 1.7699873542222845e-05, "loss": 7.4446, "step": 4200 }, { "epoch": 0.005915444966866131, "grad_norm": 0.9760084748268127, "learning_rate": 1.7742026134607276e-05, "loss": 7.4868, "step": 4210 }, { "epoch": 0.005929495905029709, "grad_norm": 1.2405914068222046, "learning_rate": 1.778417872699171e-05, "loss": 7.429, "step": 4220 }, { "epoch": 0.005943546843193286, "grad_norm": 0.9897080659866333, "learning_rate": 1.7826331319376138e-05, "loss": 7.3724, "step": 4230 }, { "epoch": 0.005957597781356864, "grad_norm": 0.85589200258255, "learning_rate": 1.7868483911760573e-05, "loss": 7.451, "step": 4240 }, { "epoch": 0.005971648719520442, "grad_norm": 1.0381278991699219, "learning_rate": 1.7910636504145004e-05, "loss": 7.3856, "step": 4250 }, { "epoch": 0.005985699657684019, "grad_norm": 1.2100075483322144, "learning_rate": 1.7952789096529435e-05, "loss": 7.4264, "step": 4260 }, { "epoch": 0.005999750595847597, "grad_norm": 0.9492905139923096, "learning_rate": 1.7994941688913866e-05, "loss": 7.4114, "step": 4270 }, { "epoch": 0.006013801534011174, "grad_norm": 0.92913419008255, "learning_rate": 1.8037094281298297e-05, "loss": 7.4022, "step": 4280 }, { "epoch": 0.006027852472174752, "grad_norm": 1.1783087253570557, "learning_rate": 1.807924687368273e-05, "loss": 7.3281, "step": 4290 }, { "epoch": 0.006041903410338329, "grad_norm": 1.008894681930542, "learning_rate": 1.8121399466067163e-05, "loss": 7.39, "step": 4300 }, { "epoch": 0.006055954348501907, "grad_norm": 0.9797470569610596, "learning_rate": 1.816355205845159e-05, "loss": 7.4415, "step": 4310 }, { "epoch": 0.006070005286665484, "grad_norm": 1.3586163520812988, "learning_rate": 1.8205704650836025e-05, "loss": 7.3472, "step": 4320 }, { "epoch": 0.006084056224829062, "grad_norm": 1.0115594863891602, "learning_rate": 1.8247857243220457e-05, "loss": 7.4058, "step": 4330 }, { "epoch": 0.006098107162992639, "grad_norm": 1.0275291204452515, "learning_rate": 1.8290009835604888e-05, "loss": 7.2664, "step": 4340 }, { "epoch": 0.0061121581011562166, "grad_norm": 0.9744628667831421, "learning_rate": 1.833216242798932e-05, "loss": 7.3819, "step": 4350 }, { "epoch": 0.0061262090393197945, "grad_norm": 1.1602356433868408, "learning_rate": 1.837431502037375e-05, "loss": 7.3576, "step": 4360 }, { "epoch": 0.0061402599774833715, "grad_norm": 1.18546462059021, "learning_rate": 1.8416467612758185e-05, "loss": 7.3605, "step": 4370 }, { "epoch": 0.006154310915646949, "grad_norm": 0.9172095060348511, "learning_rate": 1.8458620205142616e-05, "loss": 7.3156, "step": 4380 }, { "epoch": 0.006168361853810526, "grad_norm": 1.1265218257904053, "learning_rate": 1.8500772797527047e-05, "loss": 7.3078, "step": 4390 }, { "epoch": 0.006182412791974104, "grad_norm": 0.9350690841674805, "learning_rate": 1.8542925389911478e-05, "loss": 7.3547, "step": 4400 }, { "epoch": 0.006196463730137681, "grad_norm": 0.9449189901351929, "learning_rate": 1.858507798229591e-05, "loss": 7.3303, "step": 4410 }, { "epoch": 0.006210514668301259, "grad_norm": 1.1331335306167603, "learning_rate": 1.862723057468034e-05, "loss": 7.3691, "step": 4420 }, { "epoch": 0.006224565606464836, "grad_norm": 1.1100194454193115, "learning_rate": 1.8669383167064772e-05, "loss": 7.3479, "step": 4430 }, { "epoch": 0.006238616544628414, "grad_norm": 1.2014113664627075, "learning_rate": 1.8711535759449203e-05, "loss": 7.2566, "step": 4440 }, { "epoch": 0.006252667482791992, "grad_norm": 1.2027499675750732, "learning_rate": 1.8753688351833634e-05, "loss": 7.3306, "step": 4450 }, { "epoch": 0.006266718420955569, "grad_norm": 1.2113648653030396, "learning_rate": 1.879584094421807e-05, "loss": 7.2312, "step": 4460 }, { "epoch": 0.006280769359119147, "grad_norm": 1.14470636844635, "learning_rate": 1.88379935366025e-05, "loss": 7.3603, "step": 4470 }, { "epoch": 0.006294820297282724, "grad_norm": 1.0293289422988892, "learning_rate": 1.888014612898693e-05, "loss": 7.3057, "step": 4480 }, { "epoch": 0.006308871235446302, "grad_norm": 1.1022226810455322, "learning_rate": 1.8922298721371362e-05, "loss": 7.2761, "step": 4490 }, { "epoch": 0.006322922173609879, "grad_norm": 1.1800730228424072, "learning_rate": 1.8964451313755793e-05, "loss": 7.2929, "step": 4500 }, { "epoch": 0.006336973111773457, "grad_norm": 0.8201963901519775, "learning_rate": 1.9006603906140228e-05, "loss": 7.3127, "step": 4510 }, { "epoch": 0.006351024049937034, "grad_norm": 1.0580271482467651, "learning_rate": 1.904875649852466e-05, "loss": 7.2965, "step": 4520 }, { "epoch": 0.006365074988100612, "grad_norm": 1.135973572731018, "learning_rate": 1.9090909090909087e-05, "loss": 7.3115, "step": 4530 }, { "epoch": 0.006379125926264189, "grad_norm": 1.0042225122451782, "learning_rate": 1.913306168329352e-05, "loss": 7.326, "step": 4540 }, { "epoch": 0.006393176864427767, "grad_norm": 1.073927879333496, "learning_rate": 1.9175214275677952e-05, "loss": 7.2742, "step": 4550 }, { "epoch": 0.006407227802591345, "grad_norm": 1.384734034538269, "learning_rate": 1.9217366868062384e-05, "loss": 7.2784, "step": 4560 }, { "epoch": 0.006421278740754922, "grad_norm": 1.0155476331710815, "learning_rate": 1.9259519460446818e-05, "loss": 7.2515, "step": 4570 }, { "epoch": 0.0064353296789185, "grad_norm": 1.076328992843628, "learning_rate": 1.9301672052831246e-05, "loss": 7.3195, "step": 4580 }, { "epoch": 0.006449380617082077, "grad_norm": 1.0640712976455688, "learning_rate": 1.934382464521568e-05, "loss": 7.2912, "step": 4590 }, { "epoch": 0.0064634315552456545, "grad_norm": 0.9461340308189392, "learning_rate": 1.9385977237600112e-05, "loss": 7.3366, "step": 4600 }, { "epoch": 0.0064774824934092316, "grad_norm": 0.9778033494949341, "learning_rate": 1.9428129829984543e-05, "loss": 7.2975, "step": 4610 }, { "epoch": 0.0064915334315728095, "grad_norm": 1.0978727340698242, "learning_rate": 1.9470282422368974e-05, "loss": 7.271, "step": 4620 }, { "epoch": 0.0065055843697363865, "grad_norm": 1.1852045059204102, "learning_rate": 1.9512435014753405e-05, "loss": 7.2987, "step": 4630 }, { "epoch": 0.006519635307899964, "grad_norm": 1.0794644355773926, "learning_rate": 1.9554587607137836e-05, "loss": 7.2204, "step": 4640 }, { "epoch": 0.006533686246063541, "grad_norm": 1.1216827630996704, "learning_rate": 1.959674019952227e-05, "loss": 7.2575, "step": 4650 }, { "epoch": 0.006547737184227119, "grad_norm": 0.9647300839424133, "learning_rate": 1.96388927919067e-05, "loss": 7.3514, "step": 4660 }, { "epoch": 0.006561788122390697, "grad_norm": 1.1785202026367188, "learning_rate": 1.9681045384291133e-05, "loss": 7.2722, "step": 4670 }, { "epoch": 0.006575839060554274, "grad_norm": 1.2579607963562012, "learning_rate": 1.9723197976675564e-05, "loss": 7.2684, "step": 4680 }, { "epoch": 0.006589889998717852, "grad_norm": 1.069290280342102, "learning_rate": 1.9765350569059996e-05, "loss": 7.2563, "step": 4690 }, { "epoch": 0.006603940936881429, "grad_norm": 1.1216323375701904, "learning_rate": 1.9807503161444427e-05, "loss": 7.1934, "step": 4700 }, { "epoch": 0.006617991875045007, "grad_norm": 1.0338348150253296, "learning_rate": 1.9849655753828858e-05, "loss": 7.2595, "step": 4710 }, { "epoch": 0.006632042813208584, "grad_norm": 1.0657542943954468, "learning_rate": 1.989180834621329e-05, "loss": 7.2447, "step": 4720 }, { "epoch": 0.006646093751372162, "grad_norm": 1.0090150833129883, "learning_rate": 1.9933960938597724e-05, "loss": 7.2676, "step": 4730 }, { "epoch": 0.006660144689535739, "grad_norm": 1.244844913482666, "learning_rate": 1.997611353098215e-05, "loss": 7.2931, "step": 4740 }, { "epoch": 0.006674195627699317, "grad_norm": 1.4052555561065674, "learning_rate": 2.0018266123366586e-05, "loss": 7.3218, "step": 4750 }, { "epoch": 0.006688246565862895, "grad_norm": 1.0389999151229858, "learning_rate": 2.0060418715751017e-05, "loss": 7.3679, "step": 4760 }, { "epoch": 0.006702297504026472, "grad_norm": 1.2123712301254272, "learning_rate": 2.010257130813545e-05, "loss": 7.3174, "step": 4770 }, { "epoch": 0.00671634844219005, "grad_norm": 1.31826651096344, "learning_rate": 2.014472390051988e-05, "loss": 7.2676, "step": 4780 }, { "epoch": 0.006730399380353627, "grad_norm": 1.0967237949371338, "learning_rate": 2.018687649290431e-05, "loss": 7.2652, "step": 4790 }, { "epoch": 0.006744450318517205, "grad_norm": 1.1359235048294067, "learning_rate": 2.0229029085288742e-05, "loss": 7.2868, "step": 4800 }, { "epoch": 0.006758501256680782, "grad_norm": 1.052154779434204, "learning_rate": 2.0271181677673176e-05, "loss": 7.288, "step": 4810 }, { "epoch": 0.00677255219484436, "grad_norm": 1.0705679655075073, "learning_rate": 2.0313334270057608e-05, "loss": 7.2405, "step": 4820 }, { "epoch": 0.006786603133007937, "grad_norm": 0.9692743420600891, "learning_rate": 2.035548686244204e-05, "loss": 7.1738, "step": 4830 }, { "epoch": 0.006800654071171515, "grad_norm": 1.2184343338012695, "learning_rate": 2.039763945482647e-05, "loss": 7.2294, "step": 4840 }, { "epoch": 0.006814705009335092, "grad_norm": 1.4309422969818115, "learning_rate": 2.04397920472109e-05, "loss": 7.2412, "step": 4850 }, { "epoch": 0.0068287559474986695, "grad_norm": 1.538222312927246, "learning_rate": 2.0481944639595336e-05, "loss": 7.1677, "step": 4860 }, { "epoch": 0.006842806885662247, "grad_norm": 1.1325163841247559, "learning_rate": 2.0524097231979763e-05, "loss": 7.2538, "step": 4870 }, { "epoch": 0.0068568578238258245, "grad_norm": 1.0051361322402954, "learning_rate": 2.0566249824364198e-05, "loss": 7.2056, "step": 4880 }, { "epoch": 0.006870908761989402, "grad_norm": 1.08148193359375, "learning_rate": 2.060840241674863e-05, "loss": 7.1734, "step": 4890 }, { "epoch": 0.006884959700152979, "grad_norm": 1.2696799039840698, "learning_rate": 2.065055500913306e-05, "loss": 7.2288, "step": 4900 }, { "epoch": 0.006899010638316557, "grad_norm": 1.0847587585449219, "learning_rate": 2.069270760151749e-05, "loss": 7.2802, "step": 4910 }, { "epoch": 0.006913061576480134, "grad_norm": 1.2094314098358154, "learning_rate": 2.0734860193901923e-05, "loss": 7.2064, "step": 4920 }, { "epoch": 0.006927112514643712, "grad_norm": 1.1414620876312256, "learning_rate": 2.0777012786286354e-05, "loss": 7.1366, "step": 4930 }, { "epoch": 0.006941163452807289, "grad_norm": 1.2508331537246704, "learning_rate": 2.081916537867079e-05, "loss": 7.2477, "step": 4940 }, { "epoch": 0.006955214390970867, "grad_norm": 1.1051875352859497, "learning_rate": 2.0861317971055216e-05, "loss": 7.2439, "step": 4950 }, { "epoch": 0.006969265329134445, "grad_norm": 0.9596846103668213, "learning_rate": 2.090347056343965e-05, "loss": 7.1859, "step": 4960 }, { "epoch": 0.006983316267298022, "grad_norm": 1.0876295566558838, "learning_rate": 2.0945623155824082e-05, "loss": 7.2189, "step": 4970 }, { "epoch": 0.0069973672054616, "grad_norm": 1.1610974073410034, "learning_rate": 2.0987775748208513e-05, "loss": 7.1999, "step": 4980 }, { "epoch": 0.007011418143625177, "grad_norm": 1.023998498916626, "learning_rate": 2.1029928340592944e-05, "loss": 7.1728, "step": 4990 }, { "epoch": 0.007025469081788755, "grad_norm": 1.176242470741272, "learning_rate": 2.1072080932977375e-05, "loss": 7.162, "step": 5000 }, { "epoch": 0.007039520019952332, "grad_norm": 1.1266804933547974, "learning_rate": 2.1114233525361806e-05, "loss": 7.2674, "step": 5010 }, { "epoch": 0.00705357095811591, "grad_norm": 1.0107471942901611, "learning_rate": 2.115638611774624e-05, "loss": 7.168, "step": 5020 }, { "epoch": 0.007067621896279487, "grad_norm": 0.9729942679405212, "learning_rate": 2.119853871013067e-05, "loss": 7.2244, "step": 5030 }, { "epoch": 0.007081672834443065, "grad_norm": 1.117859125137329, "learning_rate": 2.1240691302515103e-05, "loss": 7.1621, "step": 5040 }, { "epoch": 0.007095723772606642, "grad_norm": 1.206984281539917, "learning_rate": 2.1282843894899535e-05, "loss": 7.2517, "step": 5050 }, { "epoch": 0.00710977471077022, "grad_norm": 1.356512188911438, "learning_rate": 2.1324996487283966e-05, "loss": 7.2202, "step": 5060 }, { "epoch": 0.007123825648933798, "grad_norm": 1.0092519521713257, "learning_rate": 2.13671490796684e-05, "loss": 7.2292, "step": 5070 }, { "epoch": 0.007137876587097375, "grad_norm": 1.4104299545288086, "learning_rate": 2.1409301672052828e-05, "loss": 7.1438, "step": 5080 }, { "epoch": 0.0071519275252609526, "grad_norm": 1.2122313976287842, "learning_rate": 2.145145426443726e-05, "loss": 7.1562, "step": 5090 }, { "epoch": 0.00716597846342453, "grad_norm": 1.1602479219436646, "learning_rate": 2.1493606856821694e-05, "loss": 7.166, "step": 5100 }, { "epoch": 0.0071800294015881075, "grad_norm": 1.1590441465377808, "learning_rate": 2.1535759449206125e-05, "loss": 7.155, "step": 5110 }, { "epoch": 0.0071940803397516845, "grad_norm": 0.9332759380340576, "learning_rate": 2.1577912041590556e-05, "loss": 7.1189, "step": 5120 }, { "epoch": 0.007208131277915262, "grad_norm": 1.1661341190338135, "learning_rate": 2.1620064633974987e-05, "loss": 7.1424, "step": 5130 }, { "epoch": 0.0072221822160788395, "grad_norm": 1.2383019924163818, "learning_rate": 2.166221722635942e-05, "loss": 7.1516, "step": 5140 }, { "epoch": 0.007236233154242417, "grad_norm": 1.1211152076721191, "learning_rate": 2.1704369818743853e-05, "loss": 7.0884, "step": 5150 }, { "epoch": 0.007250284092405995, "grad_norm": 1.0935217142105103, "learning_rate": 2.174652241112828e-05, "loss": 7.1914, "step": 5160 }, { "epoch": 0.007264335030569572, "grad_norm": 1.0398094654083252, "learning_rate": 2.1788675003512712e-05, "loss": 7.1011, "step": 5170 }, { "epoch": 0.00727838596873315, "grad_norm": 1.0619064569473267, "learning_rate": 2.1830827595897146e-05, "loss": 7.1611, "step": 5180 }, { "epoch": 0.007292436906896727, "grad_norm": 1.0957516431808472, "learning_rate": 2.1872980188281578e-05, "loss": 7.0981, "step": 5190 }, { "epoch": 0.007306487845060305, "grad_norm": 1.1831027269363403, "learning_rate": 2.191513278066601e-05, "loss": 7.1843, "step": 5200 }, { "epoch": 0.007320538783223882, "grad_norm": 1.323491096496582, "learning_rate": 2.195728537305044e-05, "loss": 7.1189, "step": 5210 }, { "epoch": 0.00733458972138746, "grad_norm": 1.022131085395813, "learning_rate": 2.199943796543487e-05, "loss": 7.1855, "step": 5220 }, { "epoch": 0.007348640659551037, "grad_norm": 1.081374168395996, "learning_rate": 2.2041590557819306e-05, "loss": 7.1447, "step": 5230 }, { "epoch": 0.007362691597714615, "grad_norm": 1.0144031047821045, "learning_rate": 2.2083743150203733e-05, "loss": 7.1438, "step": 5240 }, { "epoch": 0.007376742535878192, "grad_norm": 1.2261079549789429, "learning_rate": 2.2125895742588168e-05, "loss": 7.2035, "step": 5250 }, { "epoch": 0.00739079347404177, "grad_norm": 1.2673313617706299, "learning_rate": 2.21680483349726e-05, "loss": 7.1767, "step": 5260 }, { "epoch": 0.007404844412205348, "grad_norm": 1.063643455505371, "learning_rate": 2.221020092735703e-05, "loss": 7.1446, "step": 5270 }, { "epoch": 0.007418895350368925, "grad_norm": 1.092332124710083, "learning_rate": 2.225235351974146e-05, "loss": 7.132, "step": 5280 }, { "epoch": 0.007432946288532503, "grad_norm": 1.2160872220993042, "learning_rate": 2.2294506112125893e-05, "loss": 7.2127, "step": 5290 }, { "epoch": 0.00744699722669608, "grad_norm": 1.1070367097854614, "learning_rate": 2.2336658704510324e-05, "loss": 7.1532, "step": 5300 }, { "epoch": 0.007461048164859658, "grad_norm": 1.136398196220398, "learning_rate": 2.237881129689476e-05, "loss": 7.1131, "step": 5310 }, { "epoch": 0.007475099103023235, "grad_norm": 1.0606321096420288, "learning_rate": 2.242096388927919e-05, "loss": 7.0342, "step": 5320 }, { "epoch": 0.007489150041186813, "grad_norm": 1.138594150543213, "learning_rate": 2.246311648166362e-05, "loss": 7.0824, "step": 5330 }, { "epoch": 0.00750320097935039, "grad_norm": 1.3768428564071655, "learning_rate": 2.2505269074048052e-05, "loss": 7.1352, "step": 5340 }, { "epoch": 0.0075172519175139675, "grad_norm": 1.2241921424865723, "learning_rate": 2.2547421666432483e-05, "loss": 7.0765, "step": 5350 }, { "epoch": 0.0075313028556775454, "grad_norm": 1.0443278551101685, "learning_rate": 2.2589574258816918e-05, "loss": 7.0452, "step": 5360 }, { "epoch": 0.0075453537938411225, "grad_norm": 1.295654296875, "learning_rate": 2.2631726851201345e-05, "loss": 7.0193, "step": 5370 }, { "epoch": 0.0075594047320047, "grad_norm": 1.6669979095458984, "learning_rate": 2.2673879443585777e-05, "loss": 7.0958, "step": 5380 }, { "epoch": 0.007573455670168277, "grad_norm": 1.039218544960022, "learning_rate": 2.271603203597021e-05, "loss": 7.1377, "step": 5390 }, { "epoch": 0.007587506608331855, "grad_norm": 1.0409983396530151, "learning_rate": 2.2758184628354642e-05, "loss": 7.0415, "step": 5400 }, { "epoch": 0.007601557546495432, "grad_norm": 1.0646721124649048, "learning_rate": 2.2800337220739073e-05, "loss": 7.0761, "step": 5410 }, { "epoch": 0.00761560848465901, "grad_norm": 1.143831729888916, "learning_rate": 2.2842489813123505e-05, "loss": 7.083, "step": 5420 }, { "epoch": 0.007629659422822587, "grad_norm": 1.0598105192184448, "learning_rate": 2.2884642405507936e-05, "loss": 7.0908, "step": 5430 }, { "epoch": 0.007643710360986165, "grad_norm": 1.2153172492980957, "learning_rate": 2.292679499789237e-05, "loss": 7.0801, "step": 5440 }, { "epoch": 0.007657761299149742, "grad_norm": 1.0428032875061035, "learning_rate": 2.2968947590276798e-05, "loss": 7.1072, "step": 5450 }, { "epoch": 0.00767181223731332, "grad_norm": 1.0740203857421875, "learning_rate": 2.301110018266123e-05, "loss": 7.101, "step": 5460 }, { "epoch": 0.007685863175476898, "grad_norm": 1.193075180053711, "learning_rate": 2.3053252775045664e-05, "loss": 7.1157, "step": 5470 }, { "epoch": 0.007699914113640475, "grad_norm": 1.1059104204177856, "learning_rate": 2.3095405367430095e-05, "loss": 7.1466, "step": 5480 }, { "epoch": 0.007713965051804053, "grad_norm": 1.121106743812561, "learning_rate": 2.3137557959814526e-05, "loss": 7.1207, "step": 5490 }, { "epoch": 0.00772801598996763, "grad_norm": 1.1866158246994019, "learning_rate": 2.3179710552198957e-05, "loss": 7.0526, "step": 5500 }, { "epoch": 0.007742066928131208, "grad_norm": 1.1638514995574951, "learning_rate": 2.322186314458339e-05, "loss": 7.0261, "step": 5510 }, { "epoch": 0.007756117866294785, "grad_norm": 1.1464370489120483, "learning_rate": 2.3264015736967823e-05, "loss": 7.1148, "step": 5520 }, { "epoch": 0.007770168804458363, "grad_norm": 1.263282060623169, "learning_rate": 2.330616832935225e-05, "loss": 7.1217, "step": 5530 }, { "epoch": 0.00778421974262194, "grad_norm": 1.1215978860855103, "learning_rate": 2.3348320921736685e-05, "loss": 7.1119, "step": 5540 }, { "epoch": 0.007798270680785518, "grad_norm": 1.093409776687622, "learning_rate": 2.3390473514121117e-05, "loss": 7.0892, "step": 5550 }, { "epoch": 0.007812321618949095, "grad_norm": 1.2007728815078735, "learning_rate": 2.3432626106505548e-05, "loss": 7.051, "step": 5560 }, { "epoch": 0.007826372557112674, "grad_norm": 1.0814827680587769, "learning_rate": 2.3474778698889982e-05, "loss": 7.1414, "step": 5570 }, { "epoch": 0.00784042349527625, "grad_norm": 1.2719553709030151, "learning_rate": 2.351693129127441e-05, "loss": 7.0283, "step": 5580 }, { "epoch": 0.007854474433439828, "grad_norm": 1.3586655855178833, "learning_rate": 2.355908388365884e-05, "loss": 7.0621, "step": 5590 }, { "epoch": 0.007868525371603406, "grad_norm": 1.0803841352462769, "learning_rate": 2.3601236476043276e-05, "loss": 7.0976, "step": 5600 }, { "epoch": 0.007882576309766983, "grad_norm": 1.3272629976272583, "learning_rate": 2.3643389068427707e-05, "loss": 7.0699, "step": 5610 }, { "epoch": 0.00789662724793056, "grad_norm": 1.1906023025512695, "learning_rate": 2.3685541660812138e-05, "loss": 7.1024, "step": 5620 }, { "epoch": 0.007910678186094137, "grad_norm": 1.0353652238845825, "learning_rate": 2.372769425319657e-05, "loss": 7.1339, "step": 5630 }, { "epoch": 0.007924729124257715, "grad_norm": 1.1890437602996826, "learning_rate": 2.3769846845581e-05, "loss": 7.1143, "step": 5640 }, { "epoch": 0.007938780062421293, "grad_norm": 1.1110581159591675, "learning_rate": 2.3811999437965435e-05, "loss": 7.0033, "step": 5650 }, { "epoch": 0.00795283100058487, "grad_norm": 1.1018701791763306, "learning_rate": 2.3854152030349863e-05, "loss": 7.0789, "step": 5660 }, { "epoch": 0.007966881938748447, "grad_norm": 1.2645286321640015, "learning_rate": 2.3896304622734294e-05, "loss": 7.0673, "step": 5670 }, { "epoch": 0.007980932876912025, "grad_norm": 1.2000459432601929, "learning_rate": 2.393845721511873e-05, "loss": 7.0056, "step": 5680 }, { "epoch": 0.007994983815075603, "grad_norm": 1.5909388065338135, "learning_rate": 2.398060980750316e-05, "loss": 7.0763, "step": 5690 }, { "epoch": 0.008009034753239181, "grad_norm": 1.1488021612167358, "learning_rate": 2.402276239988759e-05, "loss": 7.0547, "step": 5700 }, { "epoch": 0.008023085691402757, "grad_norm": 1.6401759386062622, "learning_rate": 2.4064914992272022e-05, "loss": 7.0965, "step": 5710 }, { "epoch": 0.008037136629566335, "grad_norm": 1.1876939535140991, "learning_rate": 2.4107067584656453e-05, "loss": 7.0763, "step": 5720 }, { "epoch": 0.008051187567729913, "grad_norm": 1.2102808952331543, "learning_rate": 2.4149220177040888e-05, "loss": 6.9282, "step": 5730 }, { "epoch": 0.008065238505893491, "grad_norm": 1.1134921312332153, "learning_rate": 2.4191372769425316e-05, "loss": 7.0256, "step": 5740 }, { "epoch": 0.008079289444057067, "grad_norm": 1.1920967102050781, "learning_rate": 2.4233525361809747e-05, "loss": 7.0314, "step": 5750 }, { "epoch": 0.008093340382220645, "grad_norm": 1.218900442123413, "learning_rate": 2.427567795419418e-05, "loss": 7.0789, "step": 5760 }, { "epoch": 0.008107391320384223, "grad_norm": 1.160243034362793, "learning_rate": 2.4317830546578612e-05, "loss": 6.9588, "step": 5770 }, { "epoch": 0.0081214422585478, "grad_norm": 1.1619845628738403, "learning_rate": 2.4359983138963044e-05, "loss": 7.0157, "step": 5780 }, { "epoch": 0.008135493196711379, "grad_norm": 1.2614120244979858, "learning_rate": 2.4402135731347475e-05, "loss": 6.9232, "step": 5790 }, { "epoch": 0.008149544134874955, "grad_norm": 1.1834042072296143, "learning_rate": 2.4444288323731906e-05, "loss": 7.0801, "step": 5800 }, { "epoch": 0.008163595073038533, "grad_norm": 1.147611141204834, "learning_rate": 2.448644091611634e-05, "loss": 7.0824, "step": 5810 }, { "epoch": 0.00817764601120211, "grad_norm": 1.121339201927185, "learning_rate": 2.452859350850077e-05, "loss": 6.9956, "step": 5820 }, { "epoch": 0.008191696949365689, "grad_norm": 1.15691077709198, "learning_rate": 2.45707461008852e-05, "loss": 6.977, "step": 5830 }, { "epoch": 0.008205747887529265, "grad_norm": 1.2041352987289429, "learning_rate": 2.4612898693269634e-05, "loss": 6.9522, "step": 5840 }, { "epoch": 0.008219798825692843, "grad_norm": 1.0645039081573486, "learning_rate": 2.4655051285654065e-05, "loss": 7.0521, "step": 5850 }, { "epoch": 0.00823384976385642, "grad_norm": 1.0398435592651367, "learning_rate": 2.46972038780385e-05, "loss": 6.9993, "step": 5860 }, { "epoch": 0.008247900702019998, "grad_norm": 1.1055554151535034, "learning_rate": 2.4739356470422927e-05, "loss": 7.0278, "step": 5870 }, { "epoch": 0.008261951640183576, "grad_norm": 1.3027024269104004, "learning_rate": 2.478150906280736e-05, "loss": 7.0749, "step": 5880 }, { "epoch": 0.008276002578347152, "grad_norm": 1.2701828479766846, "learning_rate": 2.4823661655191793e-05, "loss": 6.9991, "step": 5890 }, { "epoch": 0.00829005351651073, "grad_norm": 1.2022544145584106, "learning_rate": 2.4865814247576224e-05, "loss": 6.957, "step": 5900 }, { "epoch": 0.008304104454674308, "grad_norm": 1.1316558122634888, "learning_rate": 2.4907966839960656e-05, "loss": 6.9934, "step": 5910 }, { "epoch": 0.008318155392837886, "grad_norm": 1.539913535118103, "learning_rate": 2.4950119432345087e-05, "loss": 7.0327, "step": 5920 }, { "epoch": 0.008332206331001462, "grad_norm": 1.088806390762329, "learning_rate": 2.4992272024729518e-05, "loss": 6.9959, "step": 5930 }, { "epoch": 0.00834625726916504, "grad_norm": 1.0958342552185059, "learning_rate": 2.5034424617113952e-05, "loss": 7.0824, "step": 5940 }, { "epoch": 0.008360308207328618, "grad_norm": 1.241213321685791, "learning_rate": 2.507657720949838e-05, "loss": 7.007, "step": 5950 }, { "epoch": 0.008374359145492196, "grad_norm": 1.1807730197906494, "learning_rate": 2.511872980188281e-05, "loss": 7.0675, "step": 5960 }, { "epoch": 0.008388410083655774, "grad_norm": 1.1051346063613892, "learning_rate": 2.5160882394267246e-05, "loss": 7.0387, "step": 5970 }, { "epoch": 0.00840246102181935, "grad_norm": 1.1446903944015503, "learning_rate": 2.5203034986651677e-05, "loss": 6.9076, "step": 5980 }, { "epoch": 0.008416511959982928, "grad_norm": 1.0661766529083252, "learning_rate": 2.5245187579036108e-05, "loss": 6.9638, "step": 5990 }, { "epoch": 0.008430562898146506, "grad_norm": 1.3106430768966675, "learning_rate": 2.528734017142054e-05, "loss": 6.9139, "step": 6000 }, { "epoch": 0.008444613836310084, "grad_norm": 1.0190626382827759, "learning_rate": 2.532949276380497e-05, "loss": 7.0121, "step": 6010 }, { "epoch": 0.00845866477447366, "grad_norm": 1.2163209915161133, "learning_rate": 2.5371645356189405e-05, "loss": 6.9522, "step": 6020 }, { "epoch": 0.008472715712637238, "grad_norm": 1.3020004034042358, "learning_rate": 2.5413797948573833e-05, "loss": 7.0786, "step": 6030 }, { "epoch": 0.008486766650800816, "grad_norm": 1.3286876678466797, "learning_rate": 2.5455950540958264e-05, "loss": 6.9549, "step": 6040 }, { "epoch": 0.008500817588964394, "grad_norm": 1.3438900709152222, "learning_rate": 2.54981031333427e-05, "loss": 6.9829, "step": 6050 }, { "epoch": 0.00851486852712797, "grad_norm": 1.0740140676498413, "learning_rate": 2.554025572572713e-05, "loss": 6.9899, "step": 6060 }, { "epoch": 0.008528919465291548, "grad_norm": 1.1501201391220093, "learning_rate": 2.5582408318111564e-05, "loss": 7.0018, "step": 6070 }, { "epoch": 0.008542970403455126, "grad_norm": 1.1902880668640137, "learning_rate": 2.5624560910495992e-05, "loss": 7.0088, "step": 6080 }, { "epoch": 0.008557021341618704, "grad_norm": 1.0988359451293945, "learning_rate": 2.5666713502880423e-05, "loss": 6.9861, "step": 6090 }, { "epoch": 0.008571072279782281, "grad_norm": 1.1356360912322998, "learning_rate": 2.5708866095264858e-05, "loss": 7.0025, "step": 6100 }, { "epoch": 0.008585123217945858, "grad_norm": 1.2396526336669922, "learning_rate": 2.575101868764929e-05, "loss": 6.9691, "step": 6110 }, { "epoch": 0.008599174156109436, "grad_norm": 1.1919825077056885, "learning_rate": 2.5793171280033717e-05, "loss": 6.968, "step": 6120 }, { "epoch": 0.008613225094273013, "grad_norm": 1.2087693214416504, "learning_rate": 2.583532387241815e-05, "loss": 6.9081, "step": 6130 }, { "epoch": 0.008627276032436591, "grad_norm": 1.0366860628128052, "learning_rate": 2.5877476464802583e-05, "loss": 6.9781, "step": 6140 }, { "epoch": 0.008641326970600167, "grad_norm": 1.0888359546661377, "learning_rate": 2.5919629057187017e-05, "loss": 7.0216, "step": 6150 }, { "epoch": 0.008655377908763745, "grad_norm": 1.055550217628479, "learning_rate": 2.5961781649571445e-05, "loss": 6.9755, "step": 6160 }, { "epoch": 0.008669428846927323, "grad_norm": 1.300152063369751, "learning_rate": 2.6003934241955876e-05, "loss": 6.9486, "step": 6170 }, { "epoch": 0.008683479785090901, "grad_norm": 1.2063759565353394, "learning_rate": 2.604608683434031e-05, "loss": 6.9448, "step": 6180 }, { "epoch": 0.008697530723254479, "grad_norm": 1.3412996530532837, "learning_rate": 2.6088239426724742e-05, "loss": 6.9007, "step": 6190 }, { "epoch": 0.008711581661418055, "grad_norm": 1.2316458225250244, "learning_rate": 2.613039201910917e-05, "loss": 6.842, "step": 6200 }, { "epoch": 0.008725632599581633, "grad_norm": 1.3386321067810059, "learning_rate": 2.6172544611493604e-05, "loss": 6.9621, "step": 6210 }, { "epoch": 0.008739683537745211, "grad_norm": 1.2841423749923706, "learning_rate": 2.6214697203878035e-05, "loss": 6.9877, "step": 6220 }, { "epoch": 0.008753734475908789, "grad_norm": 1.238442301750183, "learning_rate": 2.625684979626247e-05, "loss": 6.9483, "step": 6230 }, { "epoch": 0.008767785414072365, "grad_norm": 1.2642581462860107, "learning_rate": 2.6299002388646898e-05, "loss": 6.974, "step": 6240 }, { "epoch": 0.008781836352235943, "grad_norm": 1.0157082080841064, "learning_rate": 2.634115498103133e-05, "loss": 6.9708, "step": 6250 }, { "epoch": 0.008795887290399521, "grad_norm": 1.1940841674804688, "learning_rate": 2.6383307573415763e-05, "loss": 6.8949, "step": 6260 }, { "epoch": 0.008809938228563099, "grad_norm": 1.1627519130706787, "learning_rate": 2.6425460165800194e-05, "loss": 6.9352, "step": 6270 }, { "epoch": 0.008823989166726677, "grad_norm": 1.2034648656845093, "learning_rate": 2.6467612758184626e-05, "loss": 6.8588, "step": 6280 }, { "epoch": 0.008838040104890253, "grad_norm": 1.2993476390838623, "learning_rate": 2.6509765350569057e-05, "loss": 6.8985, "step": 6290 }, { "epoch": 0.00885209104305383, "grad_norm": 1.2249815464019775, "learning_rate": 2.6551917942953488e-05, "loss": 6.9706, "step": 6300 }, { "epoch": 0.008866141981217409, "grad_norm": 1.2293840646743774, "learning_rate": 2.6594070535337923e-05, "loss": 6.8822, "step": 6310 }, { "epoch": 0.008880192919380987, "grad_norm": 1.242686152458191, "learning_rate": 2.6636223127722354e-05, "loss": 6.8955, "step": 6320 }, { "epoch": 0.008894243857544563, "grad_norm": 1.4114511013031006, "learning_rate": 2.667837572010678e-05, "loss": 6.8935, "step": 6330 }, { "epoch": 0.00890829479570814, "grad_norm": 1.3672587871551514, "learning_rate": 2.6720528312491216e-05, "loss": 6.8912, "step": 6340 }, { "epoch": 0.008922345733871719, "grad_norm": 1.2746614217758179, "learning_rate": 2.6762680904875647e-05, "loss": 6.8686, "step": 6350 }, { "epoch": 0.008936396672035296, "grad_norm": 1.19154691696167, "learning_rate": 2.6804833497260082e-05, "loss": 6.9352, "step": 6360 }, { "epoch": 0.008950447610198874, "grad_norm": 1.0989205837249756, "learning_rate": 2.684698608964451e-05, "loss": 6.9164, "step": 6370 }, { "epoch": 0.00896449854836245, "grad_norm": 1.2119799852371216, "learning_rate": 2.688913868202894e-05, "loss": 6.9376, "step": 6380 }, { "epoch": 0.008978549486526028, "grad_norm": 1.1754270792007446, "learning_rate": 2.6931291274413375e-05, "loss": 7.0092, "step": 6390 }, { "epoch": 0.008992600424689606, "grad_norm": 1.3046258687973022, "learning_rate": 2.6973443866797806e-05, "loss": 6.9119, "step": 6400 }, { "epoch": 0.009006651362853184, "grad_norm": 1.185444712638855, "learning_rate": 2.7015596459182234e-05, "loss": 6.9164, "step": 6410 }, { "epoch": 0.00902070230101676, "grad_norm": 1.089029312133789, "learning_rate": 2.705774905156667e-05, "loss": 6.9452, "step": 6420 }, { "epoch": 0.009034753239180338, "grad_norm": 1.1562166213989258, "learning_rate": 2.70999016439511e-05, "loss": 6.9627, "step": 6430 }, { "epoch": 0.009048804177343916, "grad_norm": 1.1243866682052612, "learning_rate": 2.7142054236335534e-05, "loss": 6.9085, "step": 6440 }, { "epoch": 0.009062855115507494, "grad_norm": 1.2206236124038696, "learning_rate": 2.7184206828719962e-05, "loss": 6.9762, "step": 6450 }, { "epoch": 0.00907690605367107, "grad_norm": 1.1540404558181763, "learning_rate": 2.7226359421104393e-05, "loss": 6.9419, "step": 6460 }, { "epoch": 0.009090956991834648, "grad_norm": 1.1724482774734497, "learning_rate": 2.7268512013488828e-05, "loss": 6.8162, "step": 6470 }, { "epoch": 0.009105007929998226, "grad_norm": 1.1414722204208374, "learning_rate": 2.731066460587326e-05, "loss": 6.85, "step": 6480 }, { "epoch": 0.009119058868161804, "grad_norm": 1.3120932579040527, "learning_rate": 2.7352817198257687e-05, "loss": 6.883, "step": 6490 }, { "epoch": 0.009133109806325382, "grad_norm": 1.4635417461395264, "learning_rate": 2.739496979064212e-05, "loss": 6.8511, "step": 6500 }, { "epoch": 0.009147160744488958, "grad_norm": 1.1401208639144897, "learning_rate": 2.7437122383026553e-05, "loss": 6.8325, "step": 6510 }, { "epoch": 0.009161211682652536, "grad_norm": 1.2455246448516846, "learning_rate": 2.7479274975410987e-05, "loss": 6.9031, "step": 6520 }, { "epoch": 0.009175262620816114, "grad_norm": 1.1095499992370605, "learning_rate": 2.752142756779542e-05, "loss": 6.9847, "step": 6530 }, { "epoch": 0.009189313558979692, "grad_norm": 1.1827448606491089, "learning_rate": 2.7563580160179846e-05, "loss": 6.8022, "step": 6540 }, { "epoch": 0.009203364497143268, "grad_norm": 1.260500431060791, "learning_rate": 2.760573275256428e-05, "loss": 6.8418, "step": 6550 }, { "epoch": 0.009217415435306846, "grad_norm": 1.2838935852050781, "learning_rate": 2.7647885344948712e-05, "loss": 6.7921, "step": 6560 }, { "epoch": 0.009231466373470424, "grad_norm": 1.320967674255371, "learning_rate": 2.7690037937333146e-05, "loss": 6.9222, "step": 6570 }, { "epoch": 0.009245517311634002, "grad_norm": 1.126835823059082, "learning_rate": 2.7732190529717574e-05, "loss": 6.8736, "step": 6580 }, { "epoch": 0.00925956824979758, "grad_norm": 1.314828634262085, "learning_rate": 2.7774343122102005e-05, "loss": 6.9342, "step": 6590 }, { "epoch": 0.009273619187961156, "grad_norm": 1.155680775642395, "learning_rate": 2.781649571448644e-05, "loss": 6.8753, "step": 6600 }, { "epoch": 0.009287670126124734, "grad_norm": 1.1509275436401367, "learning_rate": 2.785864830687087e-05, "loss": 6.9379, "step": 6610 }, { "epoch": 0.009301721064288311, "grad_norm": 1.267380952835083, "learning_rate": 2.79008008992553e-05, "loss": 6.729, "step": 6620 }, { "epoch": 0.00931577200245189, "grad_norm": 1.0873631238937378, "learning_rate": 2.7942953491639733e-05, "loss": 6.9163, "step": 6630 }, { "epoch": 0.009329822940615465, "grad_norm": 1.1794565916061401, "learning_rate": 2.7985106084024165e-05, "loss": 6.8978, "step": 6640 }, { "epoch": 0.009343873878779043, "grad_norm": 1.2664817571640015, "learning_rate": 2.80272586764086e-05, "loss": 6.8032, "step": 6650 }, { "epoch": 0.009357924816942621, "grad_norm": 1.2982505559921265, "learning_rate": 2.8069411268793027e-05, "loss": 6.8812, "step": 6660 }, { "epoch": 0.0093719757551062, "grad_norm": 1.0752772092819214, "learning_rate": 2.8111563861177458e-05, "loss": 6.8571, "step": 6670 }, { "epoch": 0.009386026693269777, "grad_norm": 1.1509788036346436, "learning_rate": 2.8153716453561893e-05, "loss": 6.8439, "step": 6680 }, { "epoch": 0.009400077631433353, "grad_norm": 1.1949745416641235, "learning_rate": 2.8195869045946324e-05, "loss": 6.8121, "step": 6690 }, { "epoch": 0.009414128569596931, "grad_norm": 1.280206561088562, "learning_rate": 2.823802163833075e-05, "loss": 6.8162, "step": 6700 }, { "epoch": 0.009428179507760509, "grad_norm": 1.1214113235473633, "learning_rate": 2.8280174230715186e-05, "loss": 6.9153, "step": 6710 }, { "epoch": 0.009442230445924087, "grad_norm": 1.1382800340652466, "learning_rate": 2.8322326823099617e-05, "loss": 6.7568, "step": 6720 }, { "epoch": 0.009456281384087663, "grad_norm": 1.1620829105377197, "learning_rate": 2.8364479415484052e-05, "loss": 6.8636, "step": 6730 }, { "epoch": 0.009470332322251241, "grad_norm": 1.1612575054168701, "learning_rate": 2.840663200786848e-05, "loss": 6.8372, "step": 6740 }, { "epoch": 0.009484383260414819, "grad_norm": 1.1702032089233398, "learning_rate": 2.844878460025291e-05, "loss": 6.9363, "step": 6750 }, { "epoch": 0.009498434198578397, "grad_norm": 1.1699141263961792, "learning_rate": 2.8490937192637345e-05, "loss": 6.8231, "step": 6760 }, { "epoch": 0.009512485136741973, "grad_norm": 1.2124075889587402, "learning_rate": 2.8533089785021777e-05, "loss": 6.9048, "step": 6770 }, { "epoch": 0.009526536074905551, "grad_norm": 1.189448595046997, "learning_rate": 2.857524237740621e-05, "loss": 6.8002, "step": 6780 }, { "epoch": 0.009540587013069129, "grad_norm": 1.2542109489440918, "learning_rate": 2.861739496979064e-05, "loss": 6.7874, "step": 6790 }, { "epoch": 0.009554637951232707, "grad_norm": 1.1644352674484253, "learning_rate": 2.865954756217507e-05, "loss": 6.8212, "step": 6800 }, { "epoch": 0.009568688889396285, "grad_norm": 1.0751014947891235, "learning_rate": 2.8701700154559505e-05, "loss": 6.8566, "step": 6810 }, { "epoch": 0.00958273982755986, "grad_norm": 1.3088459968566895, "learning_rate": 2.8743852746943936e-05, "loss": 6.8754, "step": 6820 }, { "epoch": 0.009596790765723439, "grad_norm": 1.2002370357513428, "learning_rate": 2.8786005339328364e-05, "loss": 6.7577, "step": 6830 }, { "epoch": 0.009610841703887017, "grad_norm": 1.3408019542694092, "learning_rate": 2.8828157931712798e-05, "loss": 6.7329, "step": 6840 }, { "epoch": 0.009624892642050594, "grad_norm": 1.1012219190597534, "learning_rate": 2.887031052409723e-05, "loss": 6.8618, "step": 6850 }, { "epoch": 0.00963894358021417, "grad_norm": 1.610071063041687, "learning_rate": 2.8912463116481664e-05, "loss": 6.8109, "step": 6860 }, { "epoch": 0.009652994518377749, "grad_norm": 1.3259707689285278, "learning_rate": 2.895461570886609e-05, "loss": 6.8621, "step": 6870 }, { "epoch": 0.009667045456541326, "grad_norm": 1.3494741916656494, "learning_rate": 2.8996768301250523e-05, "loss": 6.8059, "step": 6880 }, { "epoch": 0.009681096394704904, "grad_norm": 1.2349156141281128, "learning_rate": 2.9038920893634957e-05, "loss": 6.7744, "step": 6890 }, { "epoch": 0.009695147332868482, "grad_norm": 1.1511788368225098, "learning_rate": 2.908107348601939e-05, "loss": 6.8519, "step": 6900 }, { "epoch": 0.009709198271032058, "grad_norm": 1.3229706287384033, "learning_rate": 2.9123226078403816e-05, "loss": 6.9277, "step": 6910 }, { "epoch": 0.009723249209195636, "grad_norm": 1.3774425983428955, "learning_rate": 2.916537867078825e-05, "loss": 6.821, "step": 6920 }, { "epoch": 0.009737300147359214, "grad_norm": 1.2297898530960083, "learning_rate": 2.9207531263172682e-05, "loss": 6.7814, "step": 6930 }, { "epoch": 0.009751351085522792, "grad_norm": 1.274950385093689, "learning_rate": 2.9249683855557117e-05, "loss": 6.8029, "step": 6940 }, { "epoch": 0.009765402023686368, "grad_norm": 1.3338391780853271, "learning_rate": 2.9291836447941544e-05, "loss": 6.772, "step": 6950 }, { "epoch": 0.009779452961849946, "grad_norm": 1.3144457340240479, "learning_rate": 2.9333989040325975e-05, "loss": 6.8667, "step": 6960 }, { "epoch": 0.009793503900013524, "grad_norm": 1.1216480731964111, "learning_rate": 2.937614163271041e-05, "loss": 6.8348, "step": 6970 }, { "epoch": 0.009807554838177102, "grad_norm": 1.1729443073272705, "learning_rate": 2.941829422509484e-05, "loss": 6.8038, "step": 6980 }, { "epoch": 0.00982160577634068, "grad_norm": 1.1787049770355225, "learning_rate": 2.946044681747927e-05, "loss": 6.8645, "step": 6990 }, { "epoch": 0.009835656714504256, "grad_norm": 1.3254892826080322, "learning_rate": 2.9502599409863704e-05, "loss": 6.7894, "step": 7000 }, { "epoch": 0.009849707652667834, "grad_norm": 1.362343668937683, "learning_rate": 2.9544752002248135e-05, "loss": 6.7402, "step": 7010 }, { "epoch": 0.009863758590831412, "grad_norm": 1.3607126474380493, "learning_rate": 2.958690459463257e-05, "loss": 6.7859, "step": 7020 }, { "epoch": 0.00987780952899499, "grad_norm": 1.1887863874435425, "learning_rate": 2.9629057187017e-05, "loss": 6.847, "step": 7030 }, { "epoch": 0.009891860467158566, "grad_norm": 3.412855625152588, "learning_rate": 2.9671209779401428e-05, "loss": 6.7606, "step": 7040 }, { "epoch": 0.009905911405322144, "grad_norm": 1.3983832597732544, "learning_rate": 2.9713362371785863e-05, "loss": 6.8176, "step": 7050 }, { "epoch": 0.009919962343485722, "grad_norm": 1.4330317974090576, "learning_rate": 2.9755514964170294e-05, "loss": 6.845, "step": 7060 }, { "epoch": 0.0099340132816493, "grad_norm": 2.303821325302124, "learning_rate": 2.979766755655473e-05, "loss": 6.6737, "step": 7070 }, { "epoch": 0.009948064219812877, "grad_norm": 1.079118013381958, "learning_rate": 2.9839820148939156e-05, "loss": 6.8816, "step": 7080 }, { "epoch": 0.009962115157976454, "grad_norm": 1.1327508687973022, "learning_rate": 2.9881972741323587e-05, "loss": 6.8724, "step": 7090 }, { "epoch": 0.009976166096140032, "grad_norm": 1.1694974899291992, "learning_rate": 2.9924125333708022e-05, "loss": 6.779, "step": 7100 }, { "epoch": 0.00999021703430361, "grad_norm": 1.2839218378067017, "learning_rate": 2.9966277926092453e-05, "loss": 6.8217, "step": 7110 }, { "epoch": 0.010004267972467187, "grad_norm": 1.1525654792785645, "learning_rate": 3.000843051847688e-05, "loss": 6.798, "step": 7120 }, { "epoch": 0.010018318910630764, "grad_norm": 1.0679171085357666, "learning_rate": 3.0050583110861315e-05, "loss": 6.7938, "step": 7130 }, { "epoch": 0.010032369848794341, "grad_norm": 1.440910816192627, "learning_rate": 3.0092735703245747e-05, "loss": 6.6892, "step": 7140 }, { "epoch": 0.01004642078695792, "grad_norm": 1.1955666542053223, "learning_rate": 3.013488829563018e-05, "loss": 6.7765, "step": 7150 }, { "epoch": 0.010060471725121497, "grad_norm": 1.318375825881958, "learning_rate": 3.017704088801461e-05, "loss": 6.8462, "step": 7160 }, { "epoch": 0.010074522663285073, "grad_norm": 1.328094244003296, "learning_rate": 3.021919348039904e-05, "loss": 6.7747, "step": 7170 }, { "epoch": 0.010088573601448651, "grad_norm": 1.5209085941314697, "learning_rate": 3.0261346072783475e-05, "loss": 6.852, "step": 7180 }, { "epoch": 0.01010262453961223, "grad_norm": 1.3846906423568726, "learning_rate": 3.0303498665167906e-05, "loss": 6.7635, "step": 7190 }, { "epoch": 0.010116675477775807, "grad_norm": 1.2889553308486938, "learning_rate": 3.0345651257552334e-05, "loss": 6.7899, "step": 7200 }, { "epoch": 0.010130726415939385, "grad_norm": 1.244968056678772, "learning_rate": 3.0387803849936768e-05, "loss": 6.8504, "step": 7210 }, { "epoch": 0.010144777354102961, "grad_norm": 1.5130319595336914, "learning_rate": 3.04299564423212e-05, "loss": 6.7033, "step": 7220 }, { "epoch": 0.010158828292266539, "grad_norm": 1.188735008239746, "learning_rate": 3.0472109034705634e-05, "loss": 6.7656, "step": 7230 }, { "epoch": 0.010172879230430117, "grad_norm": 1.2299892902374268, "learning_rate": 3.051426162709006e-05, "loss": 6.7203, "step": 7240 }, { "epoch": 0.010186930168593695, "grad_norm": 1.3426145315170288, "learning_rate": 3.055641421947449e-05, "loss": 6.7867, "step": 7250 }, { "epoch": 0.010200981106757271, "grad_norm": 1.440065860748291, "learning_rate": 3.0598566811858924e-05, "loss": 6.6989, "step": 7260 }, { "epoch": 0.010215032044920849, "grad_norm": 1.7373557090759277, "learning_rate": 3.064071940424336e-05, "loss": 6.762, "step": 7270 }, { "epoch": 0.010229082983084427, "grad_norm": 1.1723196506500244, "learning_rate": 3.068287199662779e-05, "loss": 6.7585, "step": 7280 }, { "epoch": 0.010243133921248005, "grad_norm": 1.1131891012191772, "learning_rate": 3.072502458901222e-05, "loss": 6.6601, "step": 7290 }, { "epoch": 0.010257184859411583, "grad_norm": 1.3221385478973389, "learning_rate": 3.0767177181396655e-05, "loss": 6.798, "step": 7300 }, { "epoch": 0.010271235797575159, "grad_norm": 1.1797014474868774, "learning_rate": 3.080932977378109e-05, "loss": 6.7014, "step": 7310 }, { "epoch": 0.010285286735738737, "grad_norm": 1.2688157558441162, "learning_rate": 3.085148236616552e-05, "loss": 6.7095, "step": 7320 }, { "epoch": 0.010299337673902315, "grad_norm": 1.3802592754364014, "learning_rate": 3.089363495854995e-05, "loss": 6.6367, "step": 7330 }, { "epoch": 0.010313388612065892, "grad_norm": 1.2973464727401733, "learning_rate": 3.093578755093438e-05, "loss": 6.6415, "step": 7340 }, { "epoch": 0.010327439550229469, "grad_norm": 1.2183587551116943, "learning_rate": 3.097794014331881e-05, "loss": 6.7612, "step": 7350 }, { "epoch": 0.010341490488393047, "grad_norm": 1.2998961210250854, "learning_rate": 3.102009273570324e-05, "loss": 6.7186, "step": 7360 }, { "epoch": 0.010355541426556624, "grad_norm": 1.2842721939086914, "learning_rate": 3.1062245328087674e-05, "loss": 6.796, "step": 7370 }, { "epoch": 0.010369592364720202, "grad_norm": 1.5530544519424438, "learning_rate": 3.1104397920472105e-05, "loss": 6.7277, "step": 7380 }, { "epoch": 0.01038364330288378, "grad_norm": 1.4051449298858643, "learning_rate": 3.1146550512856536e-05, "loss": 6.6586, "step": 7390 }, { "epoch": 0.010397694241047356, "grad_norm": 1.4194215536117554, "learning_rate": 3.1188703105240974e-05, "loss": 6.7337, "step": 7400 }, { "epoch": 0.010411745179210934, "grad_norm": 1.2050557136535645, "learning_rate": 3.12308556976254e-05, "loss": 6.7198, "step": 7410 }, { "epoch": 0.010425796117374512, "grad_norm": 1.340126872062683, "learning_rate": 3.127300829000983e-05, "loss": 6.6974, "step": 7420 }, { "epoch": 0.01043984705553809, "grad_norm": 1.363870620727539, "learning_rate": 3.131516088239427e-05, "loss": 6.653, "step": 7430 }, { "epoch": 0.010453897993701666, "grad_norm": 1.3523595333099365, "learning_rate": 3.13573134747787e-05, "loss": 6.6777, "step": 7440 }, { "epoch": 0.010467948931865244, "grad_norm": 1.3135426044464111, "learning_rate": 3.139946606716312e-05, "loss": 6.7118, "step": 7450 }, { "epoch": 0.010481999870028822, "grad_norm": 1.7983295917510986, "learning_rate": 3.144161865954756e-05, "loss": 6.5969, "step": 7460 }, { "epoch": 0.0104960508081924, "grad_norm": 1.22897469997406, "learning_rate": 3.148377125193199e-05, "loss": 6.6821, "step": 7470 }, { "epoch": 0.010510101746355976, "grad_norm": 1.2474480867385864, "learning_rate": 3.152592384431642e-05, "loss": 6.6954, "step": 7480 }, { "epoch": 0.010524152684519554, "grad_norm": 1.1647069454193115, "learning_rate": 3.1568076436700854e-05, "loss": 6.7303, "step": 7490 }, { "epoch": 0.010538203622683132, "grad_norm": 1.2561836242675781, "learning_rate": 3.1610229029085286e-05, "loss": 6.7037, "step": 7500 }, { "epoch": 0.01055225456084671, "grad_norm": 1.7592111825942993, "learning_rate": 3.165238162146972e-05, "loss": 6.6393, "step": 7510 }, { "epoch": 0.010566305499010288, "grad_norm": 1.4957785606384277, "learning_rate": 3.169453421385415e-05, "loss": 6.7021, "step": 7520 }, { "epoch": 0.010580356437173864, "grad_norm": 1.2453256845474243, "learning_rate": 3.1736686806238586e-05, "loss": 6.7024, "step": 7530 }, { "epoch": 0.010594407375337442, "grad_norm": 1.2314335107803345, "learning_rate": 3.177883939862301e-05, "loss": 6.6214, "step": 7540 }, { "epoch": 0.01060845831350102, "grad_norm": 1.3570759296417236, "learning_rate": 3.182099199100744e-05, "loss": 6.6482, "step": 7550 }, { "epoch": 0.010622509251664598, "grad_norm": 1.2058477401733398, "learning_rate": 3.186314458339188e-05, "loss": 6.7454, "step": 7560 }, { "epoch": 0.010636560189828174, "grad_norm": 1.3977819681167603, "learning_rate": 3.190529717577631e-05, "loss": 6.7017, "step": 7570 }, { "epoch": 0.010650611127991752, "grad_norm": 1.2504565715789795, "learning_rate": 3.1947449768160735e-05, "loss": 6.7633, "step": 7580 }, { "epoch": 0.01066466206615533, "grad_norm": 1.3335305452346802, "learning_rate": 3.198960236054517e-05, "loss": 6.6588, "step": 7590 }, { "epoch": 0.010678713004318907, "grad_norm": 1.2901426553726196, "learning_rate": 3.2031754952929604e-05, "loss": 6.6741, "step": 7600 }, { "epoch": 0.010692763942482485, "grad_norm": 1.484226107597351, "learning_rate": 3.2073907545314035e-05, "loss": 6.684, "step": 7610 }, { "epoch": 0.010706814880646062, "grad_norm": 1.2085685729980469, "learning_rate": 3.2116060137698466e-05, "loss": 6.7001, "step": 7620 }, { "epoch": 0.01072086581880964, "grad_norm": 1.306835412979126, "learning_rate": 3.21582127300829e-05, "loss": 6.7055, "step": 7630 }, { "epoch": 0.010734916756973217, "grad_norm": 1.3770405054092407, "learning_rate": 3.220036532246733e-05, "loss": 6.7178, "step": 7640 }, { "epoch": 0.010748967695136795, "grad_norm": 1.4259955883026123, "learning_rate": 3.224251791485176e-05, "loss": 6.6999, "step": 7650 }, { "epoch": 0.010763018633300371, "grad_norm": 1.2039424180984497, "learning_rate": 3.228467050723619e-05, "loss": 6.6817, "step": 7660 }, { "epoch": 0.01077706957146395, "grad_norm": 1.3195371627807617, "learning_rate": 3.232682309962062e-05, "loss": 6.6196, "step": 7670 }, { "epoch": 0.010791120509627527, "grad_norm": 1.228158950805664, "learning_rate": 3.236897569200505e-05, "loss": 6.6338, "step": 7680 }, { "epoch": 0.010805171447791105, "grad_norm": 1.4088062047958374, "learning_rate": 3.241112828438949e-05, "loss": 6.6519, "step": 7690 }, { "epoch": 0.010819222385954683, "grad_norm": 1.2354357242584229, "learning_rate": 3.2453280876773916e-05, "loss": 6.6397, "step": 7700 }, { "epoch": 0.01083327332411826, "grad_norm": 1.2311620712280273, "learning_rate": 3.249543346915835e-05, "loss": 6.6254, "step": 7710 }, { "epoch": 0.010847324262281837, "grad_norm": 1.1504223346710205, "learning_rate": 3.2537586061542785e-05, "loss": 6.7407, "step": 7720 }, { "epoch": 0.010861375200445415, "grad_norm": 1.3598875999450684, "learning_rate": 3.2579738653927216e-05, "loss": 6.7246, "step": 7730 }, { "epoch": 0.010875426138608993, "grad_norm": 1.3000996112823486, "learning_rate": 3.262189124631164e-05, "loss": 6.7343, "step": 7740 }, { "epoch": 0.010889477076772569, "grad_norm": 1.4005444049835205, "learning_rate": 3.266404383869608e-05, "loss": 6.6171, "step": 7750 }, { "epoch": 0.010903528014936147, "grad_norm": 1.3075186014175415, "learning_rate": 3.270619643108051e-05, "loss": 6.7218, "step": 7760 }, { "epoch": 0.010917578953099725, "grad_norm": 1.383267879486084, "learning_rate": 3.274834902346494e-05, "loss": 6.6246, "step": 7770 }, { "epoch": 0.010931629891263303, "grad_norm": 1.3374590873718262, "learning_rate": 3.279050161584937e-05, "loss": 6.757, "step": 7780 }, { "epoch": 0.01094568082942688, "grad_norm": 1.3575273752212524, "learning_rate": 3.28326542082338e-05, "loss": 6.7405, "step": 7790 }, { "epoch": 0.010959731767590457, "grad_norm": 1.5748355388641357, "learning_rate": 3.2874806800618234e-05, "loss": 6.6514, "step": 7800 }, { "epoch": 0.010973782705754035, "grad_norm": 1.6142196655273438, "learning_rate": 3.2916959393002665e-05, "loss": 6.5779, "step": 7810 }, { "epoch": 0.010987833643917613, "grad_norm": 1.1203367710113525, "learning_rate": 3.29591119853871e-05, "loss": 6.7072, "step": 7820 }, { "epoch": 0.01100188458208119, "grad_norm": 1.2387946844100952, "learning_rate": 3.300126457777153e-05, "loss": 6.5173, "step": 7830 }, { "epoch": 0.011015935520244767, "grad_norm": 1.3735456466674805, "learning_rate": 3.304341717015596e-05, "loss": 6.7404, "step": 7840 }, { "epoch": 0.011029986458408345, "grad_norm": 1.399306297302246, "learning_rate": 3.30855697625404e-05, "loss": 6.6327, "step": 7850 }, { "epoch": 0.011044037396571922, "grad_norm": 1.3363549709320068, "learning_rate": 3.312772235492483e-05, "loss": 6.6128, "step": 7860 }, { "epoch": 0.0110580883347355, "grad_norm": 1.1382472515106201, "learning_rate": 3.316987494730925e-05, "loss": 6.6297, "step": 7870 }, { "epoch": 0.011072139272899077, "grad_norm": 1.407041072845459, "learning_rate": 3.321202753969369e-05, "loss": 6.5615, "step": 7880 }, { "epoch": 0.011086190211062654, "grad_norm": 1.3194184303283691, "learning_rate": 3.325418013207812e-05, "loss": 6.7404, "step": 7890 }, { "epoch": 0.011100241149226232, "grad_norm": 1.298169732093811, "learning_rate": 3.329633272446255e-05, "loss": 6.6713, "step": 7900 }, { "epoch": 0.01111429208738981, "grad_norm": 1.251718282699585, "learning_rate": 3.3338485316846984e-05, "loss": 6.623, "step": 7910 }, { "epoch": 0.011128343025553388, "grad_norm": 1.2488762140274048, "learning_rate": 3.3380637909231415e-05, "loss": 6.5778, "step": 7920 }, { "epoch": 0.011142393963716964, "grad_norm": 1.156110405921936, "learning_rate": 3.3422790501615846e-05, "loss": 6.66, "step": 7930 }, { "epoch": 0.011156444901880542, "grad_norm": 1.4375478029251099, "learning_rate": 3.346494309400028e-05, "loss": 6.5979, "step": 7940 }, { "epoch": 0.01117049584004412, "grad_norm": 1.3894151449203491, "learning_rate": 3.350709568638471e-05, "loss": 6.6524, "step": 7950 }, { "epoch": 0.011184546778207698, "grad_norm": 1.1708953380584717, "learning_rate": 3.354924827876914e-05, "loss": 6.6713, "step": 7960 }, { "epoch": 0.011198597716371274, "grad_norm": 1.281219482421875, "learning_rate": 3.359140087115357e-05, "loss": 6.6162, "step": 7970 }, { "epoch": 0.011212648654534852, "grad_norm": 1.3841021060943604, "learning_rate": 3.363355346353801e-05, "loss": 6.5542, "step": 7980 }, { "epoch": 0.01122669959269843, "grad_norm": 1.1785082817077637, "learning_rate": 3.367570605592243e-05, "loss": 6.5345, "step": 7990 }, { "epoch": 0.011240750530862008, "grad_norm": 1.361771821975708, "learning_rate": 3.3717858648306864e-05, "loss": 6.6326, "step": 8000 }, { "epoch": 0.011254801469025586, "grad_norm": 1.6702193021774292, "learning_rate": 3.37600112406913e-05, "loss": 6.6394, "step": 8010 }, { "epoch": 0.011268852407189162, "grad_norm": 1.1941388845443726, "learning_rate": 3.380216383307573e-05, "loss": 6.6372, "step": 8020 }, { "epoch": 0.01128290334535274, "grad_norm": 1.274591326713562, "learning_rate": 3.3844316425460164e-05, "loss": 6.6055, "step": 8030 }, { "epoch": 0.011296954283516318, "grad_norm": 1.3808879852294922, "learning_rate": 3.3886469017844596e-05, "loss": 6.5508, "step": 8040 }, { "epoch": 0.011311005221679896, "grad_norm": 1.2351412773132324, "learning_rate": 3.392862161022903e-05, "loss": 6.6163, "step": 8050 }, { "epoch": 0.011325056159843472, "grad_norm": 1.38816237449646, "learning_rate": 3.397077420261346e-05, "loss": 6.6613, "step": 8060 }, { "epoch": 0.01133910709800705, "grad_norm": 1.3823046684265137, "learning_rate": 3.401292679499789e-05, "loss": 6.6828, "step": 8070 }, { "epoch": 0.011353158036170628, "grad_norm": 1.2434042692184448, "learning_rate": 3.405507938738232e-05, "loss": 6.5608, "step": 8080 }, { "epoch": 0.011367208974334206, "grad_norm": 1.3518249988555908, "learning_rate": 3.409723197976675e-05, "loss": 6.6077, "step": 8090 }, { "epoch": 0.011381259912497783, "grad_norm": 1.5739810466766357, "learning_rate": 3.413938457215118e-05, "loss": 6.7089, "step": 8100 }, { "epoch": 0.01139531085066136, "grad_norm": 1.2564321756362915, "learning_rate": 3.418153716453562e-05, "loss": 6.6499, "step": 8110 }, { "epoch": 0.011409361788824937, "grad_norm": 1.2737637758255005, "learning_rate": 3.4223689756920045e-05, "loss": 6.5284, "step": 8120 }, { "epoch": 0.011423412726988515, "grad_norm": 1.3004697561264038, "learning_rate": 3.4265842349304476e-05, "loss": 6.6387, "step": 8130 }, { "epoch": 0.011437463665152093, "grad_norm": 1.3879871368408203, "learning_rate": 3.4307994941688914e-05, "loss": 6.5927, "step": 8140 }, { "epoch": 0.01145151460331567, "grad_norm": 1.338653802871704, "learning_rate": 3.4350147534073345e-05, "loss": 6.5564, "step": 8150 }, { "epoch": 0.011465565541479247, "grad_norm": 1.4665417671203613, "learning_rate": 3.439230012645777e-05, "loss": 6.5217, "step": 8160 }, { "epoch": 0.011479616479642825, "grad_norm": 1.3037580251693726, "learning_rate": 3.443445271884221e-05, "loss": 6.6446, "step": 8170 }, { "epoch": 0.011493667417806403, "grad_norm": 1.2073763608932495, "learning_rate": 3.447660531122664e-05, "loss": 6.646, "step": 8180 }, { "epoch": 0.011507718355969981, "grad_norm": 1.291408896446228, "learning_rate": 3.451875790361107e-05, "loss": 6.672, "step": 8190 }, { "epoch": 0.011521769294133557, "grad_norm": 1.1927132606506348, "learning_rate": 3.45609104959955e-05, "loss": 6.5918, "step": 8200 }, { "epoch": 0.011535820232297135, "grad_norm": 1.3004413843154907, "learning_rate": 3.460306308837993e-05, "loss": 6.4986, "step": 8210 }, { "epoch": 0.011549871170460713, "grad_norm": 1.3016691207885742, "learning_rate": 3.4645215680764363e-05, "loss": 6.5268, "step": 8220 }, { "epoch": 0.011563922108624291, "grad_norm": 1.3215467929840088, "learning_rate": 3.4687368273148795e-05, "loss": 6.5952, "step": 8230 }, { "epoch": 0.011577973046787867, "grad_norm": 1.1828099489212036, "learning_rate": 3.4729520865533226e-05, "loss": 6.6606, "step": 8240 }, { "epoch": 0.011592023984951445, "grad_norm": 1.3190696239471436, "learning_rate": 3.477167345791766e-05, "loss": 6.5955, "step": 8250 }, { "epoch": 0.011606074923115023, "grad_norm": 1.2753030061721802, "learning_rate": 3.481382605030209e-05, "loss": 6.6255, "step": 8260 }, { "epoch": 0.0116201258612786, "grad_norm": 1.3981962203979492, "learning_rate": 3.4855978642686526e-05, "loss": 6.4703, "step": 8270 }, { "epoch": 0.011634176799442177, "grad_norm": 1.2518624067306519, "learning_rate": 3.489813123507096e-05, "loss": 6.5757, "step": 8280 }, { "epoch": 0.011648227737605755, "grad_norm": 1.1934725046157837, "learning_rate": 3.494028382745538e-05, "loss": 6.5563, "step": 8290 }, { "epoch": 0.011662278675769333, "grad_norm": 1.5110435485839844, "learning_rate": 3.498243641983982e-05, "loss": 6.565, "step": 8300 }, { "epoch": 0.01167632961393291, "grad_norm": 1.2714707851409912, "learning_rate": 3.502458901222425e-05, "loss": 6.6103, "step": 8310 }, { "epoch": 0.011690380552096489, "grad_norm": 1.3573921918869019, "learning_rate": 3.506674160460868e-05, "loss": 6.5801, "step": 8320 }, { "epoch": 0.011704431490260065, "grad_norm": 1.2519656419754028, "learning_rate": 3.510889419699311e-05, "loss": 6.503, "step": 8330 }, { "epoch": 0.011718482428423643, "grad_norm": 1.3184814453125, "learning_rate": 3.5151046789377544e-05, "loss": 6.6102, "step": 8340 }, { "epoch": 0.01173253336658722, "grad_norm": 1.2902504205703735, "learning_rate": 3.5193199381761975e-05, "loss": 6.4634, "step": 8350 }, { "epoch": 0.011746584304750798, "grad_norm": 1.3389567136764526, "learning_rate": 3.5235351974146407e-05, "loss": 6.5529, "step": 8360 }, { "epoch": 0.011760635242914375, "grad_norm": 1.684384822845459, "learning_rate": 3.527750456653084e-05, "loss": 6.4738, "step": 8370 }, { "epoch": 0.011774686181077952, "grad_norm": 1.1865743398666382, "learning_rate": 3.531965715891527e-05, "loss": 6.4601, "step": 8380 }, { "epoch": 0.01178873711924153, "grad_norm": 1.2686684131622314, "learning_rate": 3.53618097512997e-05, "loss": 6.5644, "step": 8390 }, { "epoch": 0.011802788057405108, "grad_norm": 1.3220882415771484, "learning_rate": 3.540396234368414e-05, "loss": 6.5406, "step": 8400 }, { "epoch": 0.011816838995568686, "grad_norm": 1.302966833114624, "learning_rate": 3.544611493606856e-05, "loss": 6.5661, "step": 8410 }, { "epoch": 0.011830889933732262, "grad_norm": 1.270646572113037, "learning_rate": 3.5488267528452994e-05, "loss": 6.4866, "step": 8420 }, { "epoch": 0.01184494087189584, "grad_norm": 1.3006125688552856, "learning_rate": 3.553042012083743e-05, "loss": 6.5404, "step": 8430 }, { "epoch": 0.011858991810059418, "grad_norm": 1.5760924816131592, "learning_rate": 3.557257271322186e-05, "loss": 6.6259, "step": 8440 }, { "epoch": 0.011873042748222996, "grad_norm": 1.215240240097046, "learning_rate": 3.561472530560629e-05, "loss": 6.4761, "step": 8450 }, { "epoch": 0.011887093686386572, "grad_norm": 1.2556153535842896, "learning_rate": 3.5656877897990725e-05, "loss": 6.53, "step": 8460 }, { "epoch": 0.01190114462455015, "grad_norm": 1.2683357000350952, "learning_rate": 3.5699030490375156e-05, "loss": 6.4377, "step": 8470 }, { "epoch": 0.011915195562713728, "grad_norm": 1.187314748764038, "learning_rate": 3.574118308275959e-05, "loss": 6.5894, "step": 8480 }, { "epoch": 0.011929246500877306, "grad_norm": 1.3034703731536865, "learning_rate": 3.578333567514402e-05, "loss": 6.5753, "step": 8490 }, { "epoch": 0.011943297439040884, "grad_norm": 1.4902338981628418, "learning_rate": 3.582548826752845e-05, "loss": 6.5297, "step": 8500 }, { "epoch": 0.01195734837720446, "grad_norm": 1.3260395526885986, "learning_rate": 3.586764085991288e-05, "loss": 6.6046, "step": 8510 }, { "epoch": 0.011971399315368038, "grad_norm": 1.3407644033432007, "learning_rate": 3.590979345229731e-05, "loss": 6.4248, "step": 8520 }, { "epoch": 0.011985450253531616, "grad_norm": 1.1548830270767212, "learning_rate": 3.595194604468175e-05, "loss": 6.5296, "step": 8530 }, { "epoch": 0.011999501191695194, "grad_norm": 1.3393534421920776, "learning_rate": 3.5994098637066174e-05, "loss": 6.5889, "step": 8540 }, { "epoch": 0.01201355212985877, "grad_norm": 1.3542684316635132, "learning_rate": 3.6036251229450605e-05, "loss": 6.5508, "step": 8550 }, { "epoch": 0.012027603068022348, "grad_norm": 1.5744121074676514, "learning_rate": 3.6078403821835043e-05, "loss": 6.5049, "step": 8560 }, { "epoch": 0.012041654006185926, "grad_norm": 1.1964699029922485, "learning_rate": 3.6120556414219475e-05, "loss": 6.538, "step": 8570 }, { "epoch": 0.012055704944349504, "grad_norm": 1.3729437589645386, "learning_rate": 3.61627090066039e-05, "loss": 6.5814, "step": 8580 }, { "epoch": 0.01206975588251308, "grad_norm": 1.190537691116333, "learning_rate": 3.620486159898834e-05, "loss": 6.503, "step": 8590 }, { "epoch": 0.012083806820676658, "grad_norm": 1.2392871379852295, "learning_rate": 3.624701419137277e-05, "loss": 6.4875, "step": 8600 }, { "epoch": 0.012097857758840235, "grad_norm": 1.3132319450378418, "learning_rate": 3.62891667837572e-05, "loss": 6.4879, "step": 8610 }, { "epoch": 0.012111908697003813, "grad_norm": 1.3270124197006226, "learning_rate": 3.633131937614163e-05, "loss": 6.415, "step": 8620 }, { "epoch": 0.012125959635167391, "grad_norm": 1.2737349271774292, "learning_rate": 3.637347196852606e-05, "loss": 6.5448, "step": 8630 }, { "epoch": 0.012140010573330967, "grad_norm": 1.1706392765045166, "learning_rate": 3.641562456091049e-05, "loss": 6.4907, "step": 8640 }, { "epoch": 0.012154061511494545, "grad_norm": 1.3053278923034668, "learning_rate": 3.6457777153294924e-05, "loss": 6.6704, "step": 8650 }, { "epoch": 0.012168112449658123, "grad_norm": 1.3305950164794922, "learning_rate": 3.6499929745679355e-05, "loss": 6.4386, "step": 8660 }, { "epoch": 0.012182163387821701, "grad_norm": 1.307633638381958, "learning_rate": 3.6542082338063786e-05, "loss": 6.4467, "step": 8670 }, { "epoch": 0.012196214325985277, "grad_norm": 1.3092464208602905, "learning_rate": 3.658423493044822e-05, "loss": 6.4597, "step": 8680 }, { "epoch": 0.012210265264148855, "grad_norm": 1.368456244468689, "learning_rate": 3.6626387522832655e-05, "loss": 6.5467, "step": 8690 }, { "epoch": 0.012224316202312433, "grad_norm": 1.2031217813491821, "learning_rate": 3.666854011521708e-05, "loss": 6.4379, "step": 8700 }, { "epoch": 0.012238367140476011, "grad_norm": 1.300486445426941, "learning_rate": 3.671069270760151e-05, "loss": 6.4787, "step": 8710 }, { "epoch": 0.012252418078639589, "grad_norm": 1.6316710710525513, "learning_rate": 3.675284529998595e-05, "loss": 6.4383, "step": 8720 }, { "epoch": 0.012266469016803165, "grad_norm": 1.327189564704895, "learning_rate": 3.679499789237038e-05, "loss": 6.4901, "step": 8730 }, { "epoch": 0.012280519954966743, "grad_norm": 1.4074703454971313, "learning_rate": 3.6837150484754804e-05, "loss": 6.5223, "step": 8740 }, { "epoch": 0.012294570893130321, "grad_norm": 1.3575947284698486, "learning_rate": 3.687930307713924e-05, "loss": 6.5039, "step": 8750 }, { "epoch": 0.012308621831293899, "grad_norm": 1.2397525310516357, "learning_rate": 3.6921455669523674e-05, "loss": 6.3608, "step": 8760 }, { "epoch": 0.012322672769457475, "grad_norm": 1.3196090459823608, "learning_rate": 3.6963608261908105e-05, "loss": 6.4237, "step": 8770 }, { "epoch": 0.012336723707621053, "grad_norm": 1.4720513820648193, "learning_rate": 3.7005760854292536e-05, "loss": 6.4594, "step": 8780 }, { "epoch": 0.01235077464578463, "grad_norm": 1.3891912698745728, "learning_rate": 3.704791344667697e-05, "loss": 6.4615, "step": 8790 }, { "epoch": 0.012364825583948209, "grad_norm": 1.2116029262542725, "learning_rate": 3.70900660390614e-05, "loss": 6.4994, "step": 8800 }, { "epoch": 0.012378876522111787, "grad_norm": 1.3657456636428833, "learning_rate": 3.713221863144583e-05, "loss": 6.5264, "step": 8810 }, { "epoch": 0.012392927460275363, "grad_norm": 1.2680433988571167, "learning_rate": 3.717437122383027e-05, "loss": 6.4179, "step": 8820 }, { "epoch": 0.01240697839843894, "grad_norm": 1.2071449756622314, "learning_rate": 3.721652381621469e-05, "loss": 6.6144, "step": 8830 }, { "epoch": 0.012421029336602519, "grad_norm": 1.2629899978637695, "learning_rate": 3.725867640859912e-05, "loss": 6.5407, "step": 8840 }, { "epoch": 0.012435080274766096, "grad_norm": 1.296008586883545, "learning_rate": 3.730082900098356e-05, "loss": 6.4203, "step": 8850 }, { "epoch": 0.012449131212929673, "grad_norm": 1.4418439865112305, "learning_rate": 3.734298159336799e-05, "loss": 6.4575, "step": 8860 }, { "epoch": 0.01246318215109325, "grad_norm": 1.3441669940948486, "learning_rate": 3.7385134185752416e-05, "loss": 6.4932, "step": 8870 }, { "epoch": 0.012477233089256828, "grad_norm": 1.339735507965088, "learning_rate": 3.7427286778136854e-05, "loss": 6.5467, "step": 8880 }, { "epoch": 0.012491284027420406, "grad_norm": 1.3307244777679443, "learning_rate": 3.7469439370521285e-05, "loss": 6.5179, "step": 8890 }, { "epoch": 0.012505334965583984, "grad_norm": 1.3968795537948608, "learning_rate": 3.751159196290571e-05, "loss": 6.5239, "step": 8900 }, { "epoch": 0.01251938590374756, "grad_norm": 1.3690930604934692, "learning_rate": 3.755374455529015e-05, "loss": 6.585, "step": 8910 }, { "epoch": 0.012533436841911138, "grad_norm": 1.4065380096435547, "learning_rate": 3.759589714767458e-05, "loss": 6.5333, "step": 8920 }, { "epoch": 0.012547487780074716, "grad_norm": 1.351392388343811, "learning_rate": 3.7638049740059e-05, "loss": 6.4869, "step": 8930 }, { "epoch": 0.012561538718238294, "grad_norm": 1.3816670179367065, "learning_rate": 3.768020233244344e-05, "loss": 6.4295, "step": 8940 }, { "epoch": 0.01257558965640187, "grad_norm": 1.369028091430664, "learning_rate": 3.772235492482787e-05, "loss": 6.504, "step": 8950 }, { "epoch": 0.012589640594565448, "grad_norm": 1.4562000036239624, "learning_rate": 3.776450751721231e-05, "loss": 6.4471, "step": 8960 }, { "epoch": 0.012603691532729026, "grad_norm": 1.8173776865005493, "learning_rate": 3.7806660109596735e-05, "loss": 6.5009, "step": 8970 }, { "epoch": 0.012617742470892604, "grad_norm": 1.3316607475280762, "learning_rate": 3.7848812701981166e-05, "loss": 6.3643, "step": 8980 }, { "epoch": 0.01263179340905618, "grad_norm": 1.2292101383209229, "learning_rate": 3.7890965294365604e-05, "loss": 6.3989, "step": 8990 }, { "epoch": 0.012645844347219758, "grad_norm": 1.34901762008667, "learning_rate": 3.793311788675003e-05, "loss": 6.4566, "step": 9000 }, { "epoch": 0.012659895285383336, "grad_norm": 1.2581123113632202, "learning_rate": 3.7975270479134466e-05, "loss": 6.4794, "step": 9010 }, { "epoch": 0.012673946223546914, "grad_norm": 1.2883479595184326, "learning_rate": 3.80174230715189e-05, "loss": 6.4325, "step": 9020 }, { "epoch": 0.012687997161710492, "grad_norm": 1.5478203296661377, "learning_rate": 3.805957566390332e-05, "loss": 6.3938, "step": 9030 }, { "epoch": 0.012702048099874068, "grad_norm": 1.318039059638977, "learning_rate": 3.810172825628776e-05, "loss": 6.5348, "step": 9040 }, { "epoch": 0.012716099038037646, "grad_norm": 1.3280646800994873, "learning_rate": 3.814388084867219e-05, "loss": 6.5114, "step": 9050 }, { "epoch": 0.012730149976201224, "grad_norm": 1.2793512344360352, "learning_rate": 3.8186033441056615e-05, "loss": 6.483, "step": 9060 }, { "epoch": 0.012744200914364802, "grad_norm": 1.3596251010894775, "learning_rate": 3.822818603344105e-05, "loss": 6.449, "step": 9070 }, { "epoch": 0.012758251852528378, "grad_norm": 1.2819275856018066, "learning_rate": 3.8270338625825484e-05, "loss": 6.554, "step": 9080 }, { "epoch": 0.012772302790691956, "grad_norm": 1.3615672588348389, "learning_rate": 3.831249121820992e-05, "loss": 6.4931, "step": 9090 }, { "epoch": 0.012786353728855534, "grad_norm": 1.944034218788147, "learning_rate": 3.835464381059435e-05, "loss": 6.4004, "step": 9100 }, { "epoch": 0.012800404667019111, "grad_norm": 1.4013776779174805, "learning_rate": 3.839679640297878e-05, "loss": 6.3946, "step": 9110 }, { "epoch": 0.01281445560518269, "grad_norm": 1.3120478391647339, "learning_rate": 3.8438948995363216e-05, "loss": 6.3586, "step": 9120 }, { "epoch": 0.012828506543346265, "grad_norm": 1.2097243070602417, "learning_rate": 3.848110158774764e-05, "loss": 6.3219, "step": 9130 }, { "epoch": 0.012842557481509843, "grad_norm": 1.2938241958618164, "learning_rate": 3.852325418013207e-05, "loss": 6.5543, "step": 9140 }, { "epoch": 0.012856608419673421, "grad_norm": 1.4525402784347534, "learning_rate": 3.856540677251651e-05, "loss": 6.4622, "step": 9150 }, { "epoch": 0.012870659357837, "grad_norm": 1.2755237817764282, "learning_rate": 3.8607559364900934e-05, "loss": 6.3855, "step": 9160 }, { "epoch": 0.012884710296000575, "grad_norm": 1.2028285264968872, "learning_rate": 3.864971195728537e-05, "loss": 6.4834, "step": 9170 }, { "epoch": 0.012898761234164153, "grad_norm": 1.3781890869140625, "learning_rate": 3.86918645496698e-05, "loss": 6.4325, "step": 9180 }, { "epoch": 0.012912812172327731, "grad_norm": 1.3861724138259888, "learning_rate": 3.873401714205423e-05, "loss": 6.3593, "step": 9190 }, { "epoch": 0.012926863110491309, "grad_norm": 1.4471116065979004, "learning_rate": 3.8776169734438665e-05, "loss": 6.4511, "step": 9200 }, { "epoch": 0.012940914048654887, "grad_norm": 1.250730037689209, "learning_rate": 3.8818322326823096e-05, "loss": 6.4343, "step": 9210 }, { "epoch": 0.012954964986818463, "grad_norm": 1.2801191806793213, "learning_rate": 3.886047491920752e-05, "loss": 6.3752, "step": 9220 }, { "epoch": 0.012969015924982041, "grad_norm": 1.210339069366455, "learning_rate": 3.890262751159196e-05, "loss": 6.4224, "step": 9230 }, { "epoch": 0.012983066863145619, "grad_norm": 1.3136581182479858, "learning_rate": 3.894478010397639e-05, "loss": 6.3563, "step": 9240 }, { "epoch": 0.012997117801309197, "grad_norm": 1.5053766965866089, "learning_rate": 3.898693269636083e-05, "loss": 6.3402, "step": 9250 }, { "epoch": 0.013011168739472773, "grad_norm": 1.661244511604309, "learning_rate": 3.902908528874525e-05, "loss": 6.3517, "step": 9260 }, { "epoch": 0.013025219677636351, "grad_norm": 1.3023381233215332, "learning_rate": 3.907123788112968e-05, "loss": 6.5525, "step": 9270 }, { "epoch": 0.013039270615799929, "grad_norm": 1.444373369216919, "learning_rate": 3.911339047351412e-05, "loss": 6.5172, "step": 9280 }, { "epoch": 0.013053321553963507, "grad_norm": 1.375860333442688, "learning_rate": 3.9155543065898546e-05, "loss": 6.4256, "step": 9290 }, { "epoch": 0.013067372492127083, "grad_norm": 1.346632719039917, "learning_rate": 3.9197695658282984e-05, "loss": 6.3385, "step": 9300 }, { "epoch": 0.01308142343029066, "grad_norm": 1.2784466743469238, "learning_rate": 3.9239848250667415e-05, "loss": 6.3262, "step": 9310 }, { "epoch": 0.013095474368454239, "grad_norm": 1.4308885335922241, "learning_rate": 3.928200084305184e-05, "loss": 6.3977, "step": 9320 }, { "epoch": 0.013109525306617817, "grad_norm": 1.379128336906433, "learning_rate": 3.932415343543628e-05, "loss": 6.3745, "step": 9330 }, { "epoch": 0.013123576244781394, "grad_norm": 1.3541892766952515, "learning_rate": 3.936630602782071e-05, "loss": 6.5073, "step": 9340 }, { "epoch": 0.01313762718294497, "grad_norm": 1.5439261198043823, "learning_rate": 3.940845862020513e-05, "loss": 6.4435, "step": 9350 }, { "epoch": 0.013151678121108549, "grad_norm": 1.257303237915039, "learning_rate": 3.945061121258957e-05, "loss": 6.3503, "step": 9360 }, { "epoch": 0.013165729059272126, "grad_norm": 1.358400583267212, "learning_rate": 3.9492763804974e-05, "loss": 6.3759, "step": 9370 }, { "epoch": 0.013179779997435704, "grad_norm": 1.232875108718872, "learning_rate": 3.953491639735844e-05, "loss": 6.3632, "step": 9380 }, { "epoch": 0.01319383093559928, "grad_norm": 1.3150238990783691, "learning_rate": 3.9577068989742864e-05, "loss": 6.4415, "step": 9390 }, { "epoch": 0.013207881873762858, "grad_norm": 1.2941501140594482, "learning_rate": 3.9619221582127295e-05, "loss": 6.5106, "step": 9400 }, { "epoch": 0.013221932811926436, "grad_norm": 1.3233859539031982, "learning_rate": 3.966137417451173e-05, "loss": 6.4211, "step": 9410 }, { "epoch": 0.013235983750090014, "grad_norm": 1.309694528579712, "learning_rate": 3.970352676689616e-05, "loss": 6.4375, "step": 9420 }, { "epoch": 0.013250034688253592, "grad_norm": 1.3564949035644531, "learning_rate": 3.974567935928059e-05, "loss": 6.3014, "step": 9430 }, { "epoch": 0.013264085626417168, "grad_norm": 1.276855230331421, "learning_rate": 3.978783195166503e-05, "loss": 6.3429, "step": 9440 }, { "epoch": 0.013278136564580746, "grad_norm": 1.356063961982727, "learning_rate": 3.982998454404945e-05, "loss": 6.4402, "step": 9450 }, { "epoch": 0.013292187502744324, "grad_norm": 1.242181658744812, "learning_rate": 3.987213713643389e-05, "loss": 6.4431, "step": 9460 }, { "epoch": 0.013306238440907902, "grad_norm": 1.3604857921600342, "learning_rate": 3.991428972881832e-05, "loss": 6.3414, "step": 9470 }, { "epoch": 0.013320289379071478, "grad_norm": 1.2341139316558838, "learning_rate": 3.9956442321202745e-05, "loss": 6.4268, "step": 9480 }, { "epoch": 0.013334340317235056, "grad_norm": 1.4619051218032837, "learning_rate": 3.999859491358718e-05, "loss": 6.4312, "step": 9490 }, { "epoch": 0.013348391255398634, "grad_norm": 1.2710881233215332, "learning_rate": 4.0040747505971614e-05, "loss": 6.3772, "step": 9500 }, { "epoch": 0.013362442193562212, "grad_norm": 1.3365315198898315, "learning_rate": 4.008290009835605e-05, "loss": 6.3996, "step": 9510 }, { "epoch": 0.01337649313172579, "grad_norm": 1.246775507926941, "learning_rate": 4.0125052690740476e-05, "loss": 6.4799, "step": 9520 }, { "epoch": 0.013390544069889366, "grad_norm": 1.3018437623977661, "learning_rate": 4.016720528312491e-05, "loss": 6.3685, "step": 9530 }, { "epoch": 0.013404595008052944, "grad_norm": 1.413640022277832, "learning_rate": 4.0209357875509345e-05, "loss": 6.3522, "step": 9540 }, { "epoch": 0.013418645946216522, "grad_norm": 1.3587901592254639, "learning_rate": 4.025151046789377e-05, "loss": 6.3307, "step": 9550 }, { "epoch": 0.0134326968843801, "grad_norm": 1.3375356197357178, "learning_rate": 4.02936630602782e-05, "loss": 6.423, "step": 9560 }, { "epoch": 0.013446747822543676, "grad_norm": 1.5337927341461182, "learning_rate": 4.033581565266264e-05, "loss": 6.3995, "step": 9570 }, { "epoch": 0.013460798760707254, "grad_norm": 1.299627661705017, "learning_rate": 4.037796824504706e-05, "loss": 6.3389, "step": 9580 }, { "epoch": 0.013474849698870832, "grad_norm": 1.1940332651138306, "learning_rate": 4.04201208374315e-05, "loss": 6.3567, "step": 9590 }, { "epoch": 0.01348890063703441, "grad_norm": 1.4569729566574097, "learning_rate": 4.046227342981593e-05, "loss": 6.3624, "step": 9600 }, { "epoch": 0.013502951575197987, "grad_norm": 1.3093900680541992, "learning_rate": 4.0504426022200357e-05, "loss": 6.4537, "step": 9610 }, { "epoch": 0.013517002513361564, "grad_norm": 1.5371428728103638, "learning_rate": 4.0546578614584795e-05, "loss": 6.414, "step": 9620 }, { "epoch": 0.013531053451525141, "grad_norm": 1.2927149534225464, "learning_rate": 4.0588731206969226e-05, "loss": 6.3938, "step": 9630 }, { "epoch": 0.01354510438968872, "grad_norm": 1.2334342002868652, "learning_rate": 4.063088379935365e-05, "loss": 6.36, "step": 9640 }, { "epoch": 0.013559155327852297, "grad_norm": 1.3164347410202026, "learning_rate": 4.067303639173809e-05, "loss": 6.324, "step": 9650 }, { "epoch": 0.013573206266015873, "grad_norm": 1.3269118070602417, "learning_rate": 4.071518898412252e-05, "loss": 6.3407, "step": 9660 }, { "epoch": 0.013587257204179451, "grad_norm": 1.3465888500213623, "learning_rate": 4.075734157650696e-05, "loss": 6.3591, "step": 9670 }, { "epoch": 0.01360130814234303, "grad_norm": 1.2967374324798584, "learning_rate": 4.079949416889138e-05, "loss": 6.3319, "step": 9680 }, { "epoch": 0.013615359080506607, "grad_norm": 1.2995193004608154, "learning_rate": 4.084164676127581e-05, "loss": 6.3171, "step": 9690 }, { "epoch": 0.013629410018670183, "grad_norm": 1.4583133459091187, "learning_rate": 4.088379935366025e-05, "loss": 6.296, "step": 9700 }, { "epoch": 0.013643460956833761, "grad_norm": 1.3236838579177856, "learning_rate": 4.0925951946044675e-05, "loss": 6.4108, "step": 9710 }, { "epoch": 0.013657511894997339, "grad_norm": 1.3856712579727173, "learning_rate": 4.0968104538429106e-05, "loss": 6.3383, "step": 9720 }, { "epoch": 0.013671562833160917, "grad_norm": 1.4215503931045532, "learning_rate": 4.1010257130813544e-05, "loss": 6.3143, "step": 9730 }, { "epoch": 0.013685613771324495, "grad_norm": 1.3027349710464478, "learning_rate": 4.105240972319797e-05, "loss": 6.3573, "step": 9740 }, { "epoch": 0.013699664709488071, "grad_norm": 1.5820553302764893, "learning_rate": 4.1094562315582406e-05, "loss": 6.3765, "step": 9750 }, { "epoch": 0.013713715647651649, "grad_norm": 1.3663697242736816, "learning_rate": 4.113671490796684e-05, "loss": 6.3362, "step": 9760 }, { "epoch": 0.013727766585815227, "grad_norm": 1.3366793394088745, "learning_rate": 4.117886750035126e-05, "loss": 6.3561, "step": 9770 }, { "epoch": 0.013741817523978805, "grad_norm": 1.2957160472869873, "learning_rate": 4.12210200927357e-05, "loss": 6.402, "step": 9780 }, { "epoch": 0.013755868462142381, "grad_norm": 1.2606065273284912, "learning_rate": 4.126317268512013e-05, "loss": 6.3366, "step": 9790 }, { "epoch": 0.013769919400305959, "grad_norm": 1.3806688785552979, "learning_rate": 4.130532527750457e-05, "loss": 6.3261, "step": 9800 }, { "epoch": 0.013783970338469537, "grad_norm": 1.4678658246994019, "learning_rate": 4.1347477869888993e-05, "loss": 6.3808, "step": 9810 }, { "epoch": 0.013798021276633115, "grad_norm": 1.2760154008865356, "learning_rate": 4.1389630462273425e-05, "loss": 6.3305, "step": 9820 }, { "epoch": 0.013812072214796692, "grad_norm": 1.4611419439315796, "learning_rate": 4.143178305465786e-05, "loss": 6.2193, "step": 9830 }, { "epoch": 0.013826123152960269, "grad_norm": 1.3268877267837524, "learning_rate": 4.147393564704229e-05, "loss": 6.3524, "step": 9840 }, { "epoch": 0.013840174091123847, "grad_norm": 1.3382116556167603, "learning_rate": 4.151608823942672e-05, "loss": 6.4381, "step": 9850 }, { "epoch": 0.013854225029287424, "grad_norm": 1.4644103050231934, "learning_rate": 4.1558240831811156e-05, "loss": 6.2919, "step": 9860 }, { "epoch": 0.013868275967451002, "grad_norm": 1.427890658378601, "learning_rate": 4.160039342419558e-05, "loss": 6.3372, "step": 9870 }, { "epoch": 0.013882326905614579, "grad_norm": 1.189791202545166, "learning_rate": 4.164254601658002e-05, "loss": 6.3238, "step": 9880 }, { "epoch": 0.013896377843778156, "grad_norm": 1.2701431512832642, "learning_rate": 4.168469860896445e-05, "loss": 6.3215, "step": 9890 }, { "epoch": 0.013910428781941734, "grad_norm": 1.2288933992385864, "learning_rate": 4.1726851201348874e-05, "loss": 6.2862, "step": 9900 }, { "epoch": 0.013924479720105312, "grad_norm": 1.2237406969070435, "learning_rate": 4.176900379373331e-05, "loss": 6.47, "step": 9910 }, { "epoch": 0.01393853065826889, "grad_norm": 1.292963981628418, "learning_rate": 4.181115638611774e-05, "loss": 6.4382, "step": 9920 }, { "epoch": 0.013952581596432466, "grad_norm": 1.2853929996490479, "learning_rate": 4.185330897850217e-05, "loss": 6.3467, "step": 9930 }, { "epoch": 0.013966632534596044, "grad_norm": 1.2729337215423584, "learning_rate": 4.1895461570886605e-05, "loss": 6.3551, "step": 9940 }, { "epoch": 0.013980683472759622, "grad_norm": 1.2991608381271362, "learning_rate": 4.1937614163271037e-05, "loss": 6.3629, "step": 9950 }, { "epoch": 0.0139947344109232, "grad_norm": 1.286419153213501, "learning_rate": 4.1979766755655475e-05, "loss": 6.4067, "step": 9960 }, { "epoch": 0.014008785349086776, "grad_norm": 1.2718861103057861, "learning_rate": 4.20219193480399e-05, "loss": 6.3202, "step": 9970 }, { "epoch": 0.014022836287250354, "grad_norm": 1.3436089754104614, "learning_rate": 4.206407194042433e-05, "loss": 6.219, "step": 9980 }, { "epoch": 0.014036887225413932, "grad_norm": 1.2512117624282837, "learning_rate": 4.210622453280877e-05, "loss": 6.2775, "step": 9990 }, { "epoch": 0.01405093816357751, "grad_norm": 1.3441169261932373, "learning_rate": 4.214837712519319e-05, "loss": 6.3914, "step": 10000 }, { "epoch": 0.014064989101741088, "grad_norm": 1.3099346160888672, "learning_rate": 4.219052971757763e-05, "loss": 6.3451, "step": 10010 }, { "epoch": 0.014079040039904664, "grad_norm": 1.3395233154296875, "learning_rate": 4.223268230996206e-05, "loss": 6.3383, "step": 10020 }, { "epoch": 0.014093090978068242, "grad_norm": 1.3763487339019775, "learning_rate": 4.2274834902346486e-05, "loss": 6.289, "step": 10030 }, { "epoch": 0.01410714191623182, "grad_norm": 1.429871916770935, "learning_rate": 4.2316987494730924e-05, "loss": 6.2917, "step": 10040 }, { "epoch": 0.014121192854395398, "grad_norm": 1.394494891166687, "learning_rate": 4.235492482787691e-05, "loss": 6.3043, "step": 10050 }, { "epoch": 0.014135243792558974, "grad_norm": 1.4214140176773071, "learning_rate": 4.239707742026134e-05, "loss": 6.3363, "step": 10060 }, { "epoch": 0.014149294730722552, "grad_norm": 1.2173434495925903, "learning_rate": 4.2439230012645776e-05, "loss": 6.3484, "step": 10070 }, { "epoch": 0.01416334566888613, "grad_norm": 1.3638588190078735, "learning_rate": 4.248138260503021e-05, "loss": 6.3495, "step": 10080 }, { "epoch": 0.014177396607049707, "grad_norm": 1.300630807876587, "learning_rate": 4.252353519741464e-05, "loss": 6.3795, "step": 10090 }, { "epoch": 0.014191447545213284, "grad_norm": 1.279421329498291, "learning_rate": 4.256568778979907e-05, "loss": 6.3479, "step": 10100 }, { "epoch": 0.014205498483376862, "grad_norm": 1.2923552989959717, "learning_rate": 4.26078403821835e-05, "loss": 6.3892, "step": 10110 }, { "epoch": 0.01421954942154044, "grad_norm": 1.2862358093261719, "learning_rate": 4.264999297456793e-05, "loss": 6.3228, "step": 10120 }, { "epoch": 0.014233600359704017, "grad_norm": 1.3364365100860596, "learning_rate": 4.269214556695236e-05, "loss": 6.4024, "step": 10130 }, { "epoch": 0.014247651297867595, "grad_norm": 1.2868720293045044, "learning_rate": 4.27342981593368e-05, "loss": 6.2095, "step": 10140 }, { "epoch": 0.014261702236031171, "grad_norm": 1.306137204170227, "learning_rate": 4.2776450751721225e-05, "loss": 6.3935, "step": 10150 }, { "epoch": 0.01427575317419475, "grad_norm": 1.35478675365448, "learning_rate": 4.2818603344105656e-05, "loss": 6.3092, "step": 10160 }, { "epoch": 0.014289804112358327, "grad_norm": 1.340928316116333, "learning_rate": 4.2860755936490094e-05, "loss": 6.2858, "step": 10170 }, { "epoch": 0.014303855050521905, "grad_norm": 1.4878336191177368, "learning_rate": 4.290290852887452e-05, "loss": 6.297, "step": 10180 }, { "epoch": 0.014317905988685481, "grad_norm": 1.3728898763656616, "learning_rate": 4.294506112125895e-05, "loss": 6.2908, "step": 10190 }, { "epoch": 0.01433195692684906, "grad_norm": 1.3865792751312256, "learning_rate": 4.298721371364339e-05, "loss": 6.3898, "step": 10200 }, { "epoch": 0.014346007865012637, "grad_norm": 1.3288946151733398, "learning_rate": 4.302936630602782e-05, "loss": 6.2661, "step": 10210 }, { "epoch": 0.014360058803176215, "grad_norm": 1.5158910751342773, "learning_rate": 4.306730363917381e-05, "loss": 6.2648, "step": 10220 }, { "epoch": 0.014374109741339793, "grad_norm": 1.41054368019104, "learning_rate": 4.310945623155824e-05, "loss": 6.3832, "step": 10230 }, { "epoch": 0.014388160679503369, "grad_norm": 1.5252634286880493, "learning_rate": 4.3151608823942664e-05, "loss": 6.3621, "step": 10240 }, { "epoch": 0.014402211617666947, "grad_norm": 1.2399297952651978, "learning_rate": 4.31937614163271e-05, "loss": 6.3802, "step": 10250 }, { "epoch": 0.014416262555830525, "grad_norm": 1.2246840000152588, "learning_rate": 4.323591400871153e-05, "loss": 6.3442, "step": 10260 }, { "epoch": 0.014430313493994103, "grad_norm": 1.349806785583496, "learning_rate": 4.327806660109597e-05, "loss": 6.2469, "step": 10270 }, { "epoch": 0.014444364432157679, "grad_norm": 1.199388027191162, "learning_rate": 4.3320219193480395e-05, "loss": 6.3044, "step": 10280 }, { "epoch": 0.014458415370321257, "grad_norm": 1.371137261390686, "learning_rate": 4.3362371785864826e-05, "loss": 6.2964, "step": 10290 }, { "epoch": 0.014472466308484835, "grad_norm": 1.3206599950790405, "learning_rate": 4.3404524378249264e-05, "loss": 6.3184, "step": 10300 }, { "epoch": 0.014486517246648413, "grad_norm": 1.4087791442871094, "learning_rate": 4.344667697063369e-05, "loss": 6.2876, "step": 10310 }, { "epoch": 0.01450056818481199, "grad_norm": 1.4454388618469238, "learning_rate": 4.348882956301812e-05, "loss": 6.2846, "step": 10320 }, { "epoch": 0.014514619122975567, "grad_norm": 1.414621353149414, "learning_rate": 4.353098215540256e-05, "loss": 6.2277, "step": 10330 }, { "epoch": 0.014528670061139145, "grad_norm": 1.3868155479431152, "learning_rate": 4.357313474778698e-05, "loss": 6.2959, "step": 10340 }, { "epoch": 0.014542720999302722, "grad_norm": 1.3480900526046753, "learning_rate": 4.361528734017142e-05, "loss": 6.2991, "step": 10350 }, { "epoch": 0.0145567719374663, "grad_norm": 1.3821889162063599, "learning_rate": 4.365743993255585e-05, "loss": 6.2858, "step": 10360 }, { "epoch": 0.014570822875629877, "grad_norm": 1.3947319984436035, "learning_rate": 4.3699592524940276e-05, "loss": 6.2764, "step": 10370 }, { "epoch": 0.014584873813793454, "grad_norm": 1.4054744243621826, "learning_rate": 4.3741745117324714e-05, "loss": 6.3224, "step": 10380 }, { "epoch": 0.014598924751957032, "grad_norm": 1.2518731355667114, "learning_rate": 4.3783897709709145e-05, "loss": 6.3417, "step": 10390 }, { "epoch": 0.01461297569012061, "grad_norm": 1.4203547239303589, "learning_rate": 4.382605030209357e-05, "loss": 6.3121, "step": 10400 }, { "epoch": 0.014627026628284186, "grad_norm": 1.4533650875091553, "learning_rate": 4.386820289447801e-05, "loss": 6.2781, "step": 10410 }, { "epoch": 0.014641077566447764, "grad_norm": 1.4527699947357178, "learning_rate": 4.391035548686244e-05, "loss": 6.3558, "step": 10420 }, { "epoch": 0.014655128504611342, "grad_norm": 1.324060082435608, "learning_rate": 4.3952508079246876e-05, "loss": 6.3507, "step": 10430 }, { "epoch": 0.01466917944277492, "grad_norm": 1.4087752103805542, "learning_rate": 4.39946606716313e-05, "loss": 6.3084, "step": 10440 }, { "epoch": 0.014683230380938498, "grad_norm": 1.2102359533309937, "learning_rate": 4.403681326401573e-05, "loss": 6.3034, "step": 10450 }, { "epoch": 0.014697281319102074, "grad_norm": 1.3251889944076538, "learning_rate": 4.407896585640017e-05, "loss": 6.2459, "step": 10460 }, { "epoch": 0.014711332257265652, "grad_norm": 1.3922446966171265, "learning_rate": 4.4121118448784594e-05, "loss": 6.3356, "step": 10470 }, { "epoch": 0.01472538319542923, "grad_norm": 1.269550085067749, "learning_rate": 4.416327104116903e-05, "loss": 6.2387, "step": 10480 }, { "epoch": 0.014739434133592808, "grad_norm": 1.3506200313568115, "learning_rate": 4.420542363355346e-05, "loss": 6.2655, "step": 10490 }, { "epoch": 0.014753485071756384, "grad_norm": 1.3239065408706665, "learning_rate": 4.424757622593789e-05, "loss": 6.267, "step": 10500 }, { "epoch": 0.014767536009919962, "grad_norm": 1.2084630727767944, "learning_rate": 4.4289728818322325e-05, "loss": 6.3075, "step": 10510 }, { "epoch": 0.01478158694808354, "grad_norm": 1.3695273399353027, "learning_rate": 4.433188141070676e-05, "loss": 6.3784, "step": 10520 }, { "epoch": 0.014795637886247118, "grad_norm": 1.4278502464294434, "learning_rate": 4.437403400309118e-05, "loss": 6.2445, "step": 10530 }, { "epoch": 0.014809688824410696, "grad_norm": 1.606279969215393, "learning_rate": 4.441618659547562e-05, "loss": 6.2251, "step": 10540 }, { "epoch": 0.014823739762574272, "grad_norm": 1.283535122871399, "learning_rate": 4.445833918786005e-05, "loss": 6.2737, "step": 10550 }, { "epoch": 0.01483779070073785, "grad_norm": 1.2936784029006958, "learning_rate": 4.450049178024449e-05, "loss": 6.3372, "step": 10560 }, { "epoch": 0.014851841638901428, "grad_norm": 1.4122823476791382, "learning_rate": 4.454264437262891e-05, "loss": 6.3038, "step": 10570 }, { "epoch": 0.014865892577065005, "grad_norm": 1.2987902164459229, "learning_rate": 4.4584796965013344e-05, "loss": 6.2672, "step": 10580 }, { "epoch": 0.014879943515228582, "grad_norm": 1.2851322889328003, "learning_rate": 4.462694955739778e-05, "loss": 6.2606, "step": 10590 }, { "epoch": 0.01489399445339216, "grad_norm": 1.359154224395752, "learning_rate": 4.4669102149782206e-05, "loss": 6.3153, "step": 10600 }, { "epoch": 0.014908045391555737, "grad_norm": 1.3480968475341797, "learning_rate": 4.471125474216664e-05, "loss": 6.3336, "step": 10610 }, { "epoch": 0.014922096329719315, "grad_norm": 1.3098258972167969, "learning_rate": 4.4753407334551075e-05, "loss": 6.302, "step": 10620 }, { "epoch": 0.014936147267882893, "grad_norm": 1.3076599836349487, "learning_rate": 4.47955599269355e-05, "loss": 6.3044, "step": 10630 }, { "epoch": 0.01495019820604647, "grad_norm": 1.2845678329467773, "learning_rate": 4.483771251931994e-05, "loss": 6.2815, "step": 10640 }, { "epoch": 0.014964249144210047, "grad_norm": 1.2279037237167358, "learning_rate": 4.487986511170437e-05, "loss": 6.2752, "step": 10650 }, { "epoch": 0.014978300082373625, "grad_norm": 1.3851795196533203, "learning_rate": 4.492201770408879e-05, "loss": 6.2336, "step": 10660 }, { "epoch": 0.014992351020537203, "grad_norm": 1.5091443061828613, "learning_rate": 4.496417029647323e-05, "loss": 6.1372, "step": 10670 }, { "epoch": 0.01500640195870078, "grad_norm": 1.263030767440796, "learning_rate": 4.500632288885766e-05, "loss": 6.2136, "step": 10680 }, { "epoch": 0.015020452896864357, "grad_norm": 1.508408546447754, "learning_rate": 4.5048475481242086e-05, "loss": 6.2105, "step": 10690 }, { "epoch": 0.015034503835027935, "grad_norm": 1.5163711309432983, "learning_rate": 4.5090628073626524e-05, "loss": 6.1942, "step": 10700 }, { "epoch": 0.015048554773191513, "grad_norm": 1.3551746606826782, "learning_rate": 4.5132780666010956e-05, "loss": 6.1646, "step": 10710 }, { "epoch": 0.015062605711355091, "grad_norm": 1.3781139850616455, "learning_rate": 4.5174933258395394e-05, "loss": 6.2178, "step": 10720 }, { "epoch": 0.015076656649518667, "grad_norm": 1.3036412000656128, "learning_rate": 4.521708585077982e-05, "loss": 6.2511, "step": 10730 }, { "epoch": 0.015090707587682245, "grad_norm": 1.3681645393371582, "learning_rate": 4.525923844316425e-05, "loss": 6.1301, "step": 10740 }, { "epoch": 0.015104758525845823, "grad_norm": 1.3642160892486572, "learning_rate": 4.530139103554869e-05, "loss": 6.228, "step": 10750 }, { "epoch": 0.0151188094640094, "grad_norm": 1.410559058189392, "learning_rate": 4.534354362793311e-05, "loss": 6.2981, "step": 10760 }, { "epoch": 0.015132860402172977, "grad_norm": 1.3881288766860962, "learning_rate": 4.538569622031755e-05, "loss": 6.1561, "step": 10770 }, { "epoch": 0.015146911340336555, "grad_norm": 1.231331467628479, "learning_rate": 4.542784881270198e-05, "loss": 6.2037, "step": 10780 }, { "epoch": 0.015160962278500133, "grad_norm": 1.5481162071228027, "learning_rate": 4.5470001405086405e-05, "loss": 6.1547, "step": 10790 }, { "epoch": 0.01517501321666371, "grad_norm": 1.3833677768707275, "learning_rate": 4.551215399747084e-05, "loss": 6.2681, "step": 10800 }, { "epoch": 0.015189064154827287, "grad_norm": 1.297667145729065, "learning_rate": 4.5554306589855274e-05, "loss": 6.1601, "step": 10810 }, { "epoch": 0.015203115092990865, "grad_norm": 1.2731614112854004, "learning_rate": 4.55964591822397e-05, "loss": 6.2338, "step": 10820 }, { "epoch": 0.015217166031154443, "grad_norm": 1.392951488494873, "learning_rate": 4.5638611774624136e-05, "loss": 6.223, "step": 10830 }, { "epoch": 0.01523121696931802, "grad_norm": 1.152622103691101, "learning_rate": 4.568076436700857e-05, "loss": 6.1884, "step": 10840 }, { "epoch": 0.015245267907481598, "grad_norm": 1.287348985671997, "learning_rate": 4.5722916959393005e-05, "loss": 6.1756, "step": 10850 }, { "epoch": 0.015259318845645175, "grad_norm": 1.3075429201126099, "learning_rate": 4.576506955177743e-05, "loss": 6.1748, "step": 10860 }, { "epoch": 0.015273369783808752, "grad_norm": 1.3519240617752075, "learning_rate": 4.580722214416186e-05, "loss": 6.2611, "step": 10870 }, { "epoch": 0.01528742072197233, "grad_norm": 1.257500410079956, "learning_rate": 4.58493747365463e-05, "loss": 6.1883, "step": 10880 }, { "epoch": 0.015301471660135908, "grad_norm": 1.276843786239624, "learning_rate": 4.589152732893072e-05, "loss": 6.1289, "step": 10890 }, { "epoch": 0.015315522598299484, "grad_norm": 1.397799015045166, "learning_rate": 4.5933679921315155e-05, "loss": 6.1006, "step": 10900 }, { "epoch": 0.015329573536463062, "grad_norm": 1.3642454147338867, "learning_rate": 4.597583251369959e-05, "loss": 6.179, "step": 10910 }, { "epoch": 0.01534362447462664, "grad_norm": 1.406509280204773, "learning_rate": 4.601798510608402e-05, "loss": 6.1252, "step": 10920 }, { "epoch": 0.015357675412790218, "grad_norm": 1.4512871503829956, "learning_rate": 4.6060137698468455e-05, "loss": 6.2659, "step": 10930 }, { "epoch": 0.015371726350953796, "grad_norm": 1.332694172859192, "learning_rate": 4.6102290290852886e-05, "loss": 6.2391, "step": 10940 }, { "epoch": 0.015385777289117372, "grad_norm": 1.3527559041976929, "learning_rate": 4.614444288323731e-05, "loss": 6.2161, "step": 10950 }, { "epoch": 0.01539982822728095, "grad_norm": 1.3626435995101929, "learning_rate": 4.618659547562175e-05, "loss": 6.1904, "step": 10960 }, { "epoch": 0.015413879165444528, "grad_norm": 1.4977718591690063, "learning_rate": 4.622874806800618e-05, "loss": 6.1898, "step": 10970 }, { "epoch": 0.015427930103608106, "grad_norm": 1.512209415435791, "learning_rate": 4.627090066039062e-05, "loss": 6.139, "step": 10980 }, { "epoch": 0.015441981041771682, "grad_norm": 1.363409161567688, "learning_rate": 4.631305325277504e-05, "loss": 6.1603, "step": 10990 }, { "epoch": 0.01545603197993526, "grad_norm": 1.5099941492080688, "learning_rate": 4.635520584515947e-05, "loss": 6.1706, "step": 11000 }, { "epoch": 0.015470082918098838, "grad_norm": 1.9168140888214111, "learning_rate": 4.639735843754391e-05, "loss": 6.2084, "step": 11010 }, { "epoch": 0.015484133856262416, "grad_norm": 1.2205970287322998, "learning_rate": 4.6439511029928335e-05, "loss": 6.252, "step": 11020 }, { "epoch": 0.015498184794425994, "grad_norm": 1.5673115253448486, "learning_rate": 4.6481663622312766e-05, "loss": 6.0759, "step": 11030 }, { "epoch": 0.01551223573258957, "grad_norm": 1.4149396419525146, "learning_rate": 4.6523816214697204e-05, "loss": 6.2689, "step": 11040 }, { "epoch": 0.015526286670753148, "grad_norm": 1.3954936265945435, "learning_rate": 4.656596880708163e-05, "loss": 6.2501, "step": 11050 }, { "epoch": 0.015540337608916726, "grad_norm": 1.5006779432296753, "learning_rate": 4.660812139946607e-05, "loss": 6.1785, "step": 11060 }, { "epoch": 0.015554388547080304, "grad_norm": 1.3999886512756348, "learning_rate": 4.66502739918505e-05, "loss": 6.15, "step": 11070 }, { "epoch": 0.01556843948524388, "grad_norm": 2.2167131900787354, "learning_rate": 4.669242658423492e-05, "loss": 6.132, "step": 11080 }, { "epoch": 0.015582490423407458, "grad_norm": 1.401848316192627, "learning_rate": 4.673457917661936e-05, "loss": 6.281, "step": 11090 }, { "epoch": 0.015596541361571035, "grad_norm": 1.3416310548782349, "learning_rate": 4.677673176900379e-05, "loss": 6.1515, "step": 11100 }, { "epoch": 0.015610592299734613, "grad_norm": 1.5161608457565308, "learning_rate": 4.6818884361388216e-05, "loss": 6.1189, "step": 11110 }, { "epoch": 0.01562464323789819, "grad_norm": 1.393735408782959, "learning_rate": 4.6861036953772654e-05, "loss": 6.2588, "step": 11120 }, { "epoch": 0.015638694176061767, "grad_norm": 1.3404139280319214, "learning_rate": 4.6903189546157085e-05, "loss": 6.234, "step": 11130 }, { "epoch": 0.015652745114225347, "grad_norm": 1.372836947441101, "learning_rate": 4.694534213854152e-05, "loss": 6.2106, "step": 11140 }, { "epoch": 0.015666796052388923, "grad_norm": 1.3898221254348755, "learning_rate": 4.698749473092595e-05, "loss": 6.0849, "step": 11150 }, { "epoch": 0.0156808469905525, "grad_norm": 1.336380124092102, "learning_rate": 4.702964732331038e-05, "loss": 6.1996, "step": 11160 }, { "epoch": 0.01569489792871608, "grad_norm": 1.478607177734375, "learning_rate": 4.7071799915694816e-05, "loss": 6.1785, "step": 11170 }, { "epoch": 0.015708948866879655, "grad_norm": 1.5594801902770996, "learning_rate": 4.711395250807924e-05, "loss": 6.2302, "step": 11180 }, { "epoch": 0.01572299980504323, "grad_norm": 1.377120852470398, "learning_rate": 4.715610510046367e-05, "loss": 6.2416, "step": 11190 }, { "epoch": 0.01573705074320681, "grad_norm": 1.5377556085586548, "learning_rate": 4.719825769284811e-05, "loss": 6.1337, "step": 11200 }, { "epoch": 0.015751101681370387, "grad_norm": 1.7049307823181152, "learning_rate": 4.7240410285232534e-05, "loss": 6.1986, "step": 11210 }, { "epoch": 0.015765152619533967, "grad_norm": 1.3457587957382202, "learning_rate": 4.728256287761697e-05, "loss": 6.0877, "step": 11220 }, { "epoch": 0.015779203557697543, "grad_norm": 1.3759392499923706, "learning_rate": 4.73247154700014e-05, "loss": 6.1598, "step": 11230 }, { "epoch": 0.01579325449586112, "grad_norm": 1.3922202587127686, "learning_rate": 4.736686806238583e-05, "loss": 6.2017, "step": 11240 }, { "epoch": 0.0158073054340247, "grad_norm": 1.3885234594345093, "learning_rate": 4.7409020654770266e-05, "loss": 6.2302, "step": 11250 }, { "epoch": 0.015821356372188275, "grad_norm": 1.4035016298294067, "learning_rate": 4.74511732471547e-05, "loss": 6.204, "step": 11260 }, { "epoch": 0.015835407310351855, "grad_norm": 1.3315571546554565, "learning_rate": 4.749332583953913e-05, "loss": 6.1677, "step": 11270 }, { "epoch": 0.01584945824851543, "grad_norm": 1.2895710468292236, "learning_rate": 4.753547843192356e-05, "loss": 6.2274, "step": 11280 }, { "epoch": 0.015863509186679007, "grad_norm": 1.3065532445907593, "learning_rate": 4.757763102430799e-05, "loss": 6.0836, "step": 11290 }, { "epoch": 0.015877560124842587, "grad_norm": 1.392031192779541, "learning_rate": 4.761978361669243e-05, "loss": 6.1599, "step": 11300 }, { "epoch": 0.015891611063006163, "grad_norm": 1.3048975467681885, "learning_rate": 4.766193620907685e-05, "loss": 6.1467, "step": 11310 }, { "epoch": 0.01590566200116974, "grad_norm": 1.5152511596679688, "learning_rate": 4.7704088801461284e-05, "loss": 6.0913, "step": 11320 }, { "epoch": 0.01591971293933332, "grad_norm": 1.3078099489212036, "learning_rate": 4.774624139384572e-05, "loss": 6.0885, "step": 11330 }, { "epoch": 0.015933763877496895, "grad_norm": 1.2487436532974243, "learning_rate": 4.7788393986230146e-05, "loss": 6.2689, "step": 11340 }, { "epoch": 0.015947814815660474, "grad_norm": 1.5608822107315063, "learning_rate": 4.7830546578614584e-05, "loss": 6.1847, "step": 11350 }, { "epoch": 0.01596186575382405, "grad_norm": 1.7839996814727783, "learning_rate": 4.7872699170999015e-05, "loss": 6.0849, "step": 11360 }, { "epoch": 0.015975916691987627, "grad_norm": 1.5817362070083618, "learning_rate": 4.791485176338344e-05, "loss": 6.2027, "step": 11370 }, { "epoch": 0.015989967630151206, "grad_norm": 1.7094651460647583, "learning_rate": 4.795700435576788e-05, "loss": 6.1865, "step": 11380 }, { "epoch": 0.016004018568314782, "grad_norm": 1.3029943704605103, "learning_rate": 4.799915694815231e-05, "loss": 6.1732, "step": 11390 }, { "epoch": 0.016018069506478362, "grad_norm": 1.6077370643615723, "learning_rate": 4.804130954053673e-05, "loss": 6.2421, "step": 11400 }, { "epoch": 0.016032120444641938, "grad_norm": 1.7310693264007568, "learning_rate": 4.808346213292117e-05, "loss": 6.0574, "step": 11410 }, { "epoch": 0.016046171382805514, "grad_norm": 1.3837238550186157, "learning_rate": 4.81256147253056e-05, "loss": 6.1435, "step": 11420 }, { "epoch": 0.016060222320969094, "grad_norm": 1.3638036251068115, "learning_rate": 4.816776731769004e-05, "loss": 6.0729, "step": 11430 }, { "epoch": 0.01607427325913267, "grad_norm": 1.391371726989746, "learning_rate": 4.8209919910074465e-05, "loss": 6.0995, "step": 11440 }, { "epoch": 0.01608832419729625, "grad_norm": 1.3891973495483398, "learning_rate": 4.8252072502458896e-05, "loss": 6.0128, "step": 11450 }, { "epoch": 0.016102375135459826, "grad_norm": 1.384169578552246, "learning_rate": 4.8294225094843334e-05, "loss": 6.2458, "step": 11460 }, { "epoch": 0.016116426073623402, "grad_norm": 1.3138374090194702, "learning_rate": 4.833637768722776e-05, "loss": 6.1883, "step": 11470 }, { "epoch": 0.016130477011786982, "grad_norm": 1.3187780380249023, "learning_rate": 4.8378530279612196e-05, "loss": 6.128, "step": 11480 }, { "epoch": 0.016144527949950558, "grad_norm": 1.3277584314346313, "learning_rate": 4.842068287199663e-05, "loss": 6.2424, "step": 11490 }, { "epoch": 0.016158578888114134, "grad_norm": 1.2720946073532104, "learning_rate": 4.846283546438105e-05, "loss": 6.2401, "step": 11500 }, { "epoch": 0.016172629826277714, "grad_norm": 1.4264894723892212, "learning_rate": 4.850498805676549e-05, "loss": 6.1807, "step": 11510 }, { "epoch": 0.01618668076444129, "grad_norm": 1.3776253461837769, "learning_rate": 4.854714064914992e-05, "loss": 6.2413, "step": 11520 }, { "epoch": 0.01620073170260487, "grad_norm": 1.2899820804595947, "learning_rate": 4.8589293241534345e-05, "loss": 6.1676, "step": 11530 }, { "epoch": 0.016214782640768446, "grad_norm": 1.3107612133026123, "learning_rate": 4.863144583391878e-05, "loss": 6.1658, "step": 11540 }, { "epoch": 0.016228833578932022, "grad_norm": 1.4616020917892456, "learning_rate": 4.8673598426303214e-05, "loss": 6.1274, "step": 11550 }, { "epoch": 0.0162428845170956, "grad_norm": 1.3054602146148682, "learning_rate": 4.8715751018687645e-05, "loss": 6.119, "step": 11560 }, { "epoch": 0.016256935455259178, "grad_norm": 1.3119100332260132, "learning_rate": 4.8757903611072077e-05, "loss": 6.0736, "step": 11570 }, { "epoch": 0.016270986393422757, "grad_norm": 1.3544719219207764, "learning_rate": 4.880005620345651e-05, "loss": 6.1392, "step": 11580 }, { "epoch": 0.016285037331586334, "grad_norm": 1.3637272119522095, "learning_rate": 4.8842208795840946e-05, "loss": 6.0982, "step": 11590 }, { "epoch": 0.01629908826974991, "grad_norm": 1.431994915008545, "learning_rate": 4.888436138822537e-05, "loss": 6.07, "step": 11600 }, { "epoch": 0.01631313920791349, "grad_norm": 1.322766661643982, "learning_rate": 4.89265139806098e-05, "loss": 6.2458, "step": 11610 }, { "epoch": 0.016327190146077065, "grad_norm": 1.5207664966583252, "learning_rate": 4.896866657299424e-05, "loss": 6.0894, "step": 11620 }, { "epoch": 0.016341241084240645, "grad_norm": 1.950318455696106, "learning_rate": 4.9010819165378664e-05, "loss": 6.112, "step": 11630 }, { "epoch": 0.01635529202240422, "grad_norm": 1.8058013916015625, "learning_rate": 4.90529717577631e-05, "loss": 6.1553, "step": 11640 }, { "epoch": 0.016369342960567797, "grad_norm": 1.2825206518173218, "learning_rate": 4.909512435014753e-05, "loss": 6.185, "step": 11650 }, { "epoch": 0.016383393898731377, "grad_norm": 1.4293164014816284, "learning_rate": 4.913727694253196e-05, "loss": 6.2412, "step": 11660 }, { "epoch": 0.016397444836894953, "grad_norm": 1.2992738485336304, "learning_rate": 4.9179429534916395e-05, "loss": 6.1107, "step": 11670 }, { "epoch": 0.01641149577505853, "grad_norm": 1.3839462995529175, "learning_rate": 4.9221582127300826e-05, "loss": 6.0917, "step": 11680 }, { "epoch": 0.01642554671322211, "grad_norm": 1.3559750318527222, "learning_rate": 4.926373471968525e-05, "loss": 6.1816, "step": 11690 }, { "epoch": 0.016439597651385685, "grad_norm": 1.3381038904190063, "learning_rate": 4.930588731206969e-05, "loss": 6.2072, "step": 11700 }, { "epoch": 0.016453648589549265, "grad_norm": 1.3606219291687012, "learning_rate": 4.934803990445412e-05, "loss": 6.1239, "step": 11710 }, { "epoch": 0.01646769952771284, "grad_norm": 1.2954307794570923, "learning_rate": 4.939019249683856e-05, "loss": 6.1274, "step": 11720 }, { "epoch": 0.016481750465876417, "grad_norm": 1.3133169412612915, "learning_rate": 4.943234508922298e-05, "loss": 6.0767, "step": 11730 }, { "epoch": 0.016495801404039997, "grad_norm": 1.675938367843628, "learning_rate": 4.947449768160741e-05, "loss": 6.1603, "step": 11740 }, { "epoch": 0.016509852342203573, "grad_norm": 1.3132497072219849, "learning_rate": 4.951665027399185e-05, "loss": 6.1483, "step": 11750 }, { "epoch": 0.016523903280367153, "grad_norm": 1.491671085357666, "learning_rate": 4.9558802866376276e-05, "loss": 6.0745, "step": 11760 }, { "epoch": 0.01653795421853073, "grad_norm": 1.487144112586975, "learning_rate": 4.9600955458760713e-05, "loss": 6.0128, "step": 11770 }, { "epoch": 0.016552005156694305, "grad_norm": 1.572177767753601, "learning_rate": 4.9643108051145145e-05, "loss": 6.1975, "step": 11780 }, { "epoch": 0.016566056094857885, "grad_norm": 1.355470061302185, "learning_rate": 4.968526064352957e-05, "loss": 6.0389, "step": 11790 }, { "epoch": 0.01658010703302146, "grad_norm": 1.437559962272644, "learning_rate": 4.972741323591401e-05, "loss": 6.0687, "step": 11800 }, { "epoch": 0.016594157971185037, "grad_norm": 1.2300171852111816, "learning_rate": 4.976956582829844e-05, "loss": 6.119, "step": 11810 }, { "epoch": 0.016608208909348617, "grad_norm": 1.3405951261520386, "learning_rate": 4.981171842068286e-05, "loss": 6.2048, "step": 11820 }, { "epoch": 0.016622259847512193, "grad_norm": 1.3226433992385864, "learning_rate": 4.98538710130673e-05, "loss": 6.0897, "step": 11830 }, { "epoch": 0.016636310785675772, "grad_norm": 1.3560805320739746, "learning_rate": 4.989602360545173e-05, "loss": 6.0075, "step": 11840 }, { "epoch": 0.01665036172383935, "grad_norm": 1.4106473922729492, "learning_rate": 4.993817619783616e-05, "loss": 5.9296, "step": 11850 }, { "epoch": 0.016664412662002925, "grad_norm": 1.2530484199523926, "learning_rate": 4.9980328790220594e-05, "loss": 6.0896, "step": 11860 }, { "epoch": 0.016678463600166504, "grad_norm": 1.28843092918396, "learning_rate": 5.0022481382605025e-05, "loss": 6.1235, "step": 11870 }, { "epoch": 0.01669251453833008, "grad_norm": 1.4062579870224, "learning_rate": 5.006463397498946e-05, "loss": 6.1136, "step": 11880 }, { "epoch": 0.01670656547649366, "grad_norm": 1.275445818901062, "learning_rate": 5.010678656737389e-05, "loss": 6.0155, "step": 11890 }, { "epoch": 0.016720616414657236, "grad_norm": 1.9027255773544312, "learning_rate": 5.014893915975832e-05, "loss": 6.1248, "step": 11900 }, { "epoch": 0.016734667352820812, "grad_norm": 1.4187486171722412, "learning_rate": 5.0191091752142757e-05, "loss": 6.0323, "step": 11910 }, { "epoch": 0.016748718290984392, "grad_norm": 1.3106961250305176, "learning_rate": 5.023324434452718e-05, "loss": 6.0743, "step": 11920 }, { "epoch": 0.016762769229147968, "grad_norm": 1.2284607887268066, "learning_rate": 5.027539693691162e-05, "loss": 6.1276, "step": 11930 }, { "epoch": 0.016776820167311548, "grad_norm": 1.403949499130249, "learning_rate": 5.031754952929605e-05, "loss": 6.0686, "step": 11940 }, { "epoch": 0.016790871105475124, "grad_norm": 1.421419382095337, "learning_rate": 5.0359702121680474e-05, "loss": 6.241, "step": 11950 }, { "epoch": 0.0168049220436387, "grad_norm": 1.5890074968338013, "learning_rate": 5.040185471406491e-05, "loss": 6.1183, "step": 11960 }, { "epoch": 0.01681897298180228, "grad_norm": 1.38971745967865, "learning_rate": 5.0444007306449344e-05, "loss": 6.0829, "step": 11970 }, { "epoch": 0.016833023919965856, "grad_norm": 1.4102171659469604, "learning_rate": 5.0486159898833775e-05, "loss": 6.1236, "step": 11980 }, { "epoch": 0.016847074858129432, "grad_norm": 1.258589506149292, "learning_rate": 5.0528312491218206e-05, "loss": 6.1806, "step": 11990 }, { "epoch": 0.016861125796293012, "grad_norm": 1.3471759557724, "learning_rate": 5.057046508360264e-05, "loss": 6.0465, "step": 12000 }, { "epoch": 0.016875176734456588, "grad_norm": 1.5169402360916138, "learning_rate": 5.0612617675987075e-05, "loss": 6.1002, "step": 12010 }, { "epoch": 0.016889227672620168, "grad_norm": 1.5497729778289795, "learning_rate": 5.06547702683715e-05, "loss": 6.1487, "step": 12020 }, { "epoch": 0.016903278610783744, "grad_norm": 1.364227294921875, "learning_rate": 5.069692286075593e-05, "loss": 6.0852, "step": 12030 }, { "epoch": 0.01691732954894732, "grad_norm": 1.3978562355041504, "learning_rate": 5.073907545314037e-05, "loss": 6.0646, "step": 12040 }, { "epoch": 0.0169313804871109, "grad_norm": 1.2657595872879028, "learning_rate": 5.078122804552479e-05, "loss": 6.0511, "step": 12050 }, { "epoch": 0.016945431425274476, "grad_norm": 1.472236156463623, "learning_rate": 5.082338063790923e-05, "loss": 6.1274, "step": 12060 }, { "epoch": 0.016959482363438055, "grad_norm": 1.381701946258545, "learning_rate": 5.086553323029366e-05, "loss": 6.0854, "step": 12070 }, { "epoch": 0.01697353330160163, "grad_norm": 1.3113288879394531, "learning_rate": 5.0907685822678086e-05, "loss": 6.1267, "step": 12080 }, { "epoch": 0.016987584239765208, "grad_norm": 1.3406379222869873, "learning_rate": 5.0949838415062524e-05, "loss": 6.0556, "step": 12090 }, { "epoch": 0.017001635177928787, "grad_norm": 1.3727220296859741, "learning_rate": 5.0991991007446956e-05, "loss": 6.0826, "step": 12100 }, { "epoch": 0.017015686116092364, "grad_norm": 1.3808116912841797, "learning_rate": 5.103414359983138e-05, "loss": 6.2429, "step": 12110 }, { "epoch": 0.01702973705425594, "grad_norm": 1.4226034879684448, "learning_rate": 5.107629619221582e-05, "loss": 5.9922, "step": 12120 }, { "epoch": 0.01704378799241952, "grad_norm": 1.3345751762390137, "learning_rate": 5.111844878460025e-05, "loss": 6.1254, "step": 12130 }, { "epoch": 0.017057838930583095, "grad_norm": 1.391182780265808, "learning_rate": 5.116060137698468e-05, "loss": 5.9991, "step": 12140 }, { "epoch": 0.017071889868746675, "grad_norm": 1.3782682418823242, "learning_rate": 5.120275396936911e-05, "loss": 6.1427, "step": 12150 }, { "epoch": 0.01708594080691025, "grad_norm": 1.4560561180114746, "learning_rate": 5.124490656175354e-05, "loss": 6.0616, "step": 12160 }, { "epoch": 0.017099991745073827, "grad_norm": 1.3621957302093506, "learning_rate": 5.128705915413798e-05, "loss": 6.099, "step": 12170 }, { "epoch": 0.017114042683237407, "grad_norm": 1.2872947454452515, "learning_rate": 5.1329211746522405e-05, "loss": 6.068, "step": 12180 }, { "epoch": 0.017128093621400983, "grad_norm": 1.3031105995178223, "learning_rate": 5.1371364338906836e-05, "loss": 6.1765, "step": 12190 }, { "epoch": 0.017142144559564563, "grad_norm": 1.4918880462646484, "learning_rate": 5.1413516931291274e-05, "loss": 6.0744, "step": 12200 }, { "epoch": 0.01715619549772814, "grad_norm": 1.66692316532135, "learning_rate": 5.14556695236757e-05, "loss": 6.1172, "step": 12210 }, { "epoch": 0.017170246435891715, "grad_norm": 1.3032985925674438, "learning_rate": 5.1497822116060136e-05, "loss": 6.0353, "step": 12220 }, { "epoch": 0.017184297374055295, "grad_norm": 1.3920645713806152, "learning_rate": 5.153997470844457e-05, "loss": 6.1914, "step": 12230 }, { "epoch": 0.01719834831221887, "grad_norm": 1.3095813989639282, "learning_rate": 5.158212730082899e-05, "loss": 6.0989, "step": 12240 }, { "epoch": 0.01721239925038245, "grad_norm": 1.347722053527832, "learning_rate": 5.162427989321343e-05, "loss": 5.9789, "step": 12250 }, { "epoch": 0.017226450188546027, "grad_norm": 1.4624723196029663, "learning_rate": 5.166643248559786e-05, "loss": 6.0599, "step": 12260 }, { "epoch": 0.017240501126709603, "grad_norm": 1.4272193908691406, "learning_rate": 5.170858507798229e-05, "loss": 6.1182, "step": 12270 }, { "epoch": 0.017254552064873183, "grad_norm": 1.3339524269104004, "learning_rate": 5.175073767036672e-05, "loss": 6.0278, "step": 12280 }, { "epoch": 0.01726860300303676, "grad_norm": 1.505544900894165, "learning_rate": 5.1792890262751154e-05, "loss": 6.089, "step": 12290 }, { "epoch": 0.017282653941200335, "grad_norm": 1.400678277015686, "learning_rate": 5.1835042855135586e-05, "loss": 6.1141, "step": 12300 }, { "epoch": 0.017296704879363915, "grad_norm": 1.316159963607788, "learning_rate": 5.187719544752002e-05, "loss": 6.1014, "step": 12310 }, { "epoch": 0.01731075581752749, "grad_norm": 1.41743803024292, "learning_rate": 5.191934803990445e-05, "loss": 6.0915, "step": 12320 }, { "epoch": 0.01732480675569107, "grad_norm": 1.7660351991653442, "learning_rate": 5.1961500632288886e-05, "loss": 6.0905, "step": 12330 }, { "epoch": 0.017338857693854647, "grad_norm": 1.3657212257385254, "learning_rate": 5.200365322467331e-05, "loss": 6.0799, "step": 12340 }, { "epoch": 0.017352908632018223, "grad_norm": 1.3705576658248901, "learning_rate": 5.204580581705775e-05, "loss": 6.013, "step": 12350 }, { "epoch": 0.017366959570181802, "grad_norm": 1.4308192729949951, "learning_rate": 5.208795840944218e-05, "loss": 6.1158, "step": 12360 }, { "epoch": 0.01738101050834538, "grad_norm": 1.3916292190551758, "learning_rate": 5.2130111001826604e-05, "loss": 5.9591, "step": 12370 }, { "epoch": 0.017395061446508958, "grad_norm": 1.3628098964691162, "learning_rate": 5.217226359421104e-05, "loss": 6.1443, "step": 12380 }, { "epoch": 0.017409112384672534, "grad_norm": 1.2999829053878784, "learning_rate": 5.221441618659547e-05, "loss": 6.0797, "step": 12390 }, { "epoch": 0.01742316332283611, "grad_norm": 1.4970372915267944, "learning_rate": 5.22565687789799e-05, "loss": 6.0399, "step": 12400 }, { "epoch": 0.01743721426099969, "grad_norm": 1.466609239578247, "learning_rate": 5.2298721371364335e-05, "loss": 6.0547, "step": 12410 }, { "epoch": 0.017451265199163266, "grad_norm": 1.4185417890548706, "learning_rate": 5.2340873963748766e-05, "loss": 6.0352, "step": 12420 }, { "epoch": 0.017465316137326842, "grad_norm": 1.6092323064804077, "learning_rate": 5.23830265561332e-05, "loss": 6.0051, "step": 12430 }, { "epoch": 0.017479367075490422, "grad_norm": 1.5270646810531616, "learning_rate": 5.242517914851763e-05, "loss": 6.1214, "step": 12440 }, { "epoch": 0.017493418013653998, "grad_norm": 1.3804599046707153, "learning_rate": 5.246733174090206e-05, "loss": 6.0227, "step": 12450 }, { "epoch": 0.017507468951817578, "grad_norm": 1.4650616645812988, "learning_rate": 5.25094843332865e-05, "loss": 6.1227, "step": 12460 }, { "epoch": 0.017521519889981154, "grad_norm": 1.3534270524978638, "learning_rate": 5.255163692567092e-05, "loss": 6.0883, "step": 12470 }, { "epoch": 0.01753557082814473, "grad_norm": 1.298676609992981, "learning_rate": 5.259378951805536e-05, "loss": 6.047, "step": 12480 }, { "epoch": 0.01754962176630831, "grad_norm": 1.342682123184204, "learning_rate": 5.263594211043979e-05, "loss": 6.0541, "step": 12490 }, { "epoch": 0.017563672704471886, "grad_norm": 1.375024676322937, "learning_rate": 5.2678094702824216e-05, "loss": 6.0099, "step": 12500 }, { "epoch": 0.017577723642635466, "grad_norm": 1.4682930707931519, "learning_rate": 5.2720247295208654e-05, "loss": 6.1075, "step": 12510 }, { "epoch": 0.017591774580799042, "grad_norm": 1.4236594438552856, "learning_rate": 5.2762399887593085e-05, "loss": 6.052, "step": 12520 }, { "epoch": 0.017605825518962618, "grad_norm": 1.344035267829895, "learning_rate": 5.280455247997751e-05, "loss": 5.9737, "step": 12530 }, { "epoch": 0.017619876457126198, "grad_norm": 1.3492854833602905, "learning_rate": 5.284670507236195e-05, "loss": 6.1154, "step": 12540 }, { "epoch": 0.017633927395289774, "grad_norm": 1.4145216941833496, "learning_rate": 5.288885766474638e-05, "loss": 6.0093, "step": 12550 }, { "epoch": 0.017647978333453353, "grad_norm": 1.3583636283874512, "learning_rate": 5.293101025713081e-05, "loss": 6.0203, "step": 12560 }, { "epoch": 0.01766202927161693, "grad_norm": 1.3437408208847046, "learning_rate": 5.297316284951524e-05, "loss": 6.0749, "step": 12570 }, { "epoch": 0.017676080209780506, "grad_norm": 1.3554259538650513, "learning_rate": 5.301531544189967e-05, "loss": 5.97, "step": 12580 }, { "epoch": 0.017690131147944085, "grad_norm": 1.4989122152328491, "learning_rate": 5.30574680342841e-05, "loss": 6.0707, "step": 12590 }, { "epoch": 0.01770418208610766, "grad_norm": 1.3016396760940552, "learning_rate": 5.3099620626668534e-05, "loss": 6.0922, "step": 12600 }, { "epoch": 0.017718233024271238, "grad_norm": 1.3750371932983398, "learning_rate": 5.3141773219052965e-05, "loss": 5.9968, "step": 12610 }, { "epoch": 0.017732283962434817, "grad_norm": 1.3117624521255493, "learning_rate": 5.31839258114374e-05, "loss": 5.9331, "step": 12620 }, { "epoch": 0.017746334900598393, "grad_norm": 1.3047480583190918, "learning_rate": 5.322607840382183e-05, "loss": 6.1537, "step": 12630 }, { "epoch": 0.017760385838761973, "grad_norm": 1.4629182815551758, "learning_rate": 5.3268230996206266e-05, "loss": 6.1317, "step": 12640 }, { "epoch": 0.01777443677692555, "grad_norm": 1.5809321403503418, "learning_rate": 5.33103835885907e-05, "loss": 6.0821, "step": 12650 }, { "epoch": 0.017788487715089125, "grad_norm": 1.3663504123687744, "learning_rate": 5.335253618097512e-05, "loss": 6.0406, "step": 12660 }, { "epoch": 0.017802538653252705, "grad_norm": 1.3569718599319458, "learning_rate": 5.339468877335956e-05, "loss": 6.0533, "step": 12670 }, { "epoch": 0.01781658959141628, "grad_norm": 1.3136539459228516, "learning_rate": 5.343684136574399e-05, "loss": 5.9933, "step": 12680 }, { "epoch": 0.01783064052957986, "grad_norm": 1.3268357515335083, "learning_rate": 5.347899395812842e-05, "loss": 6.0975, "step": 12690 }, { "epoch": 0.017844691467743437, "grad_norm": 1.3501018285751343, "learning_rate": 5.352114655051285e-05, "loss": 6.0835, "step": 12700 }, { "epoch": 0.017858742405907013, "grad_norm": 1.4120509624481201, "learning_rate": 5.3563299142897284e-05, "loss": 6.0582, "step": 12710 }, { "epoch": 0.017872793344070593, "grad_norm": 1.6523367166519165, "learning_rate": 5.3605451735281715e-05, "loss": 5.9865, "step": 12720 }, { "epoch": 0.01788684428223417, "grad_norm": 1.4812240600585938, "learning_rate": 5.3647604327666146e-05, "loss": 6.0202, "step": 12730 }, { "epoch": 0.01790089522039775, "grad_norm": 1.4424370527267456, "learning_rate": 5.368975692005058e-05, "loss": 6.0952, "step": 12740 }, { "epoch": 0.017914946158561325, "grad_norm": 1.420494556427002, "learning_rate": 5.3731909512435015e-05, "loss": 6.1514, "step": 12750 }, { "epoch": 0.0179289970967249, "grad_norm": 1.4344902038574219, "learning_rate": 5.377406210481944e-05, "loss": 6.0853, "step": 12760 }, { "epoch": 0.01794304803488848, "grad_norm": 1.3586077690124512, "learning_rate": 5.381621469720388e-05, "loss": 6.0114, "step": 12770 }, { "epoch": 0.017957098973052057, "grad_norm": 1.310850977897644, "learning_rate": 5.385836728958831e-05, "loss": 5.9627, "step": 12780 }, { "epoch": 0.017971149911215633, "grad_norm": 1.4529310464859009, "learning_rate": 5.390051988197273e-05, "loss": 5.9186, "step": 12790 }, { "epoch": 0.017985200849379213, "grad_norm": 1.4991663694381714, "learning_rate": 5.394267247435717e-05, "loss": 5.9598, "step": 12800 }, { "epoch": 0.01799925178754279, "grad_norm": 1.3883793354034424, "learning_rate": 5.39848250667416e-05, "loss": 6.0834, "step": 12810 }, { "epoch": 0.01801330272570637, "grad_norm": 1.35647451877594, "learning_rate": 5.4026977659126027e-05, "loss": 6.1214, "step": 12820 }, { "epoch": 0.018027353663869945, "grad_norm": 1.3526296615600586, "learning_rate": 5.4069130251510465e-05, "loss": 6.0049, "step": 12830 }, { "epoch": 0.01804140460203352, "grad_norm": 1.364187240600586, "learning_rate": 5.4111282843894896e-05, "loss": 5.9091, "step": 12840 }, { "epoch": 0.0180554555401971, "grad_norm": 1.277514100074768, "learning_rate": 5.415343543627933e-05, "loss": 6.0787, "step": 12850 }, { "epoch": 0.018069506478360677, "grad_norm": 1.7247467041015625, "learning_rate": 5.419558802866376e-05, "loss": 6.0347, "step": 12860 }, { "epoch": 0.018083557416524256, "grad_norm": 1.2670753002166748, "learning_rate": 5.423774062104819e-05, "loss": 6.0311, "step": 12870 }, { "epoch": 0.018097608354687832, "grad_norm": 1.3634384870529175, "learning_rate": 5.427989321343262e-05, "loss": 6.0208, "step": 12880 }, { "epoch": 0.01811165929285141, "grad_norm": 1.5839406251907349, "learning_rate": 5.432204580581705e-05, "loss": 6.0122, "step": 12890 }, { "epoch": 0.018125710231014988, "grad_norm": 1.3542689085006714, "learning_rate": 5.436419839820148e-05, "loss": 5.9979, "step": 12900 }, { "epoch": 0.018139761169178564, "grad_norm": 1.4818419218063354, "learning_rate": 5.440635099058592e-05, "loss": 6.0396, "step": 12910 }, { "epoch": 0.01815381210734214, "grad_norm": 1.4347730875015259, "learning_rate": 5.4448503582970345e-05, "loss": 6.0252, "step": 12920 }, { "epoch": 0.01816786304550572, "grad_norm": 1.463624119758606, "learning_rate": 5.449065617535478e-05, "loss": 6.0238, "step": 12930 }, { "epoch": 0.018181913983669296, "grad_norm": 1.5159509181976318, "learning_rate": 5.4532808767739214e-05, "loss": 6.0117, "step": 12940 }, { "epoch": 0.018195964921832876, "grad_norm": 1.306830644607544, "learning_rate": 5.457496136012364e-05, "loss": 6.1917, "step": 12950 }, { "epoch": 0.018210015859996452, "grad_norm": 1.3079791069030762, "learning_rate": 5.4617113952508076e-05, "loss": 6.0244, "step": 12960 }, { "epoch": 0.018224066798160028, "grad_norm": 1.3655420541763306, "learning_rate": 5.465926654489251e-05, "loss": 5.9953, "step": 12970 }, { "epoch": 0.018238117736323608, "grad_norm": 1.5014837980270386, "learning_rate": 5.470141913727694e-05, "loss": 6.0585, "step": 12980 }, { "epoch": 0.018252168674487184, "grad_norm": 1.595160722732544, "learning_rate": 5.474357172966137e-05, "loss": 5.9723, "step": 12990 }, { "epoch": 0.018266219612650764, "grad_norm": 1.2678043842315674, "learning_rate": 5.47857243220458e-05, "loss": 6.03, "step": 13000 }, { "epoch": 0.01828027055081434, "grad_norm": 1.397788405418396, "learning_rate": 5.482787691443023e-05, "loss": 5.9627, "step": 13010 }, { "epoch": 0.018294321488977916, "grad_norm": 1.4957337379455566, "learning_rate": 5.4870029506814663e-05, "loss": 5.9015, "step": 13020 }, { "epoch": 0.018308372427141496, "grad_norm": 1.3790498971939087, "learning_rate": 5.4912182099199095e-05, "loss": 5.9694, "step": 13030 }, { "epoch": 0.018322423365305072, "grad_norm": 1.4078073501586914, "learning_rate": 5.4954334691583526e-05, "loss": 6.0007, "step": 13040 }, { "epoch": 0.01833647430346865, "grad_norm": 1.4309850931167603, "learning_rate": 5.499648728396796e-05, "loss": 5.8825, "step": 13050 }, { "epoch": 0.018350525241632228, "grad_norm": 1.3303261995315552, "learning_rate": 5.5038639876352395e-05, "loss": 5.96, "step": 13060 }, { "epoch": 0.018364576179795804, "grad_norm": 1.2958881855010986, "learning_rate": 5.5080792468736826e-05, "loss": 6.1374, "step": 13070 }, { "epoch": 0.018378627117959383, "grad_norm": 1.2930759191513062, "learning_rate": 5.512294506112125e-05, "loss": 6.0608, "step": 13080 }, { "epoch": 0.01839267805612296, "grad_norm": 1.3812742233276367, "learning_rate": 5.516509765350569e-05, "loss": 6.0503, "step": 13090 }, { "epoch": 0.018406728994286536, "grad_norm": 1.5456600189208984, "learning_rate": 5.520725024589012e-05, "loss": 5.9067, "step": 13100 }, { "epoch": 0.018420779932450115, "grad_norm": 1.3641170263290405, "learning_rate": 5.5249402838274544e-05, "loss": 6.0021, "step": 13110 }, { "epoch": 0.01843483087061369, "grad_norm": 1.5792025327682495, "learning_rate": 5.529155543065898e-05, "loss": 5.9245, "step": 13120 }, { "epoch": 0.01844888180877727, "grad_norm": 1.3822730779647827, "learning_rate": 5.533370802304341e-05, "loss": 5.9526, "step": 13130 }, { "epoch": 0.018462932746940847, "grad_norm": 1.744850754737854, "learning_rate": 5.5375860615427844e-05, "loss": 6.0086, "step": 13140 }, { "epoch": 0.018476983685104423, "grad_norm": 1.3281506299972534, "learning_rate": 5.5418013207812275e-05, "loss": 5.9487, "step": 13150 }, { "epoch": 0.018491034623268003, "grad_norm": 1.401555061340332, "learning_rate": 5.5460165800196707e-05, "loss": 6.0319, "step": 13160 }, { "epoch": 0.01850508556143158, "grad_norm": 1.3791970014572144, "learning_rate": 5.550231839258114e-05, "loss": 5.9704, "step": 13170 }, { "epoch": 0.01851913649959516, "grad_norm": 1.3170675039291382, "learning_rate": 5.554447098496557e-05, "loss": 5.9452, "step": 13180 }, { "epoch": 0.018533187437758735, "grad_norm": 1.3928661346435547, "learning_rate": 5.558662357735001e-05, "loss": 5.9074, "step": 13190 }, { "epoch": 0.01854723837592231, "grad_norm": 1.7144209146499634, "learning_rate": 5.562877616973444e-05, "loss": 5.9106, "step": 13200 }, { "epoch": 0.01856128931408589, "grad_norm": 1.4539275169372559, "learning_rate": 5.567092876211886e-05, "loss": 6.0976, "step": 13210 }, { "epoch": 0.018575340252249467, "grad_norm": 1.3310813903808594, "learning_rate": 5.57130813545033e-05, "loss": 6.0462, "step": 13220 }, { "epoch": 0.018589391190413043, "grad_norm": 1.4120519161224365, "learning_rate": 5.575523394688773e-05, "loss": 5.9199, "step": 13230 }, { "epoch": 0.018603442128576623, "grad_norm": 1.307513952255249, "learning_rate": 5.5797386539272156e-05, "loss": 5.9939, "step": 13240 }, { "epoch": 0.0186174930667402, "grad_norm": 1.2844830751419067, "learning_rate": 5.5839539131656594e-05, "loss": 5.9712, "step": 13250 }, { "epoch": 0.01863154400490378, "grad_norm": 1.4797707796096802, "learning_rate": 5.5881691724041025e-05, "loss": 6.0282, "step": 13260 }, { "epoch": 0.018645594943067355, "grad_norm": 1.3347375392913818, "learning_rate": 5.5923844316425456e-05, "loss": 5.928, "step": 13270 }, { "epoch": 0.01865964588123093, "grad_norm": 1.4141706228256226, "learning_rate": 5.596599690880989e-05, "loss": 5.965, "step": 13280 }, { "epoch": 0.01867369681939451, "grad_norm": 1.3178112506866455, "learning_rate": 5.600814950119432e-05, "loss": 5.9698, "step": 13290 }, { "epoch": 0.018687747757558087, "grad_norm": 1.3804385662078857, "learning_rate": 5.605030209357875e-05, "loss": 5.9723, "step": 13300 }, { "epoch": 0.018701798695721666, "grad_norm": 1.3772902488708496, "learning_rate": 5.609245468596318e-05, "loss": 5.9814, "step": 13310 }, { "epoch": 0.018715849633885243, "grad_norm": 1.3835526704788208, "learning_rate": 5.613460727834761e-05, "loss": 6.0272, "step": 13320 }, { "epoch": 0.01872990057204882, "grad_norm": 1.3807588815689087, "learning_rate": 5.617675987073204e-05, "loss": 6.0179, "step": 13330 }, { "epoch": 0.0187439515102124, "grad_norm": 1.3371533155441284, "learning_rate": 5.6218912463116474e-05, "loss": 6.0128, "step": 13340 }, { "epoch": 0.018758002448375975, "grad_norm": 1.4686000347137451, "learning_rate": 5.626106505550091e-05, "loss": 5.9024, "step": 13350 }, { "epoch": 0.018772053386539554, "grad_norm": 1.7148891687393188, "learning_rate": 5.6303217647885343e-05, "loss": 6.0377, "step": 13360 }, { "epoch": 0.01878610432470313, "grad_norm": 1.4156179428100586, "learning_rate": 5.634537024026977e-05, "loss": 6.0123, "step": 13370 }, { "epoch": 0.018800155262866707, "grad_norm": 1.2572938203811646, "learning_rate": 5.6387522832654206e-05, "loss": 5.8405, "step": 13380 }, { "epoch": 0.018814206201030286, "grad_norm": 1.3694168329238892, "learning_rate": 5.642967542503864e-05, "loss": 6.0598, "step": 13390 }, { "epoch": 0.018828257139193862, "grad_norm": 1.4427056312561035, "learning_rate": 5.647182801742306e-05, "loss": 5.91, "step": 13400 }, { "epoch": 0.01884230807735744, "grad_norm": 1.3949912786483765, "learning_rate": 5.65139806098075e-05, "loss": 5.941, "step": 13410 }, { "epoch": 0.018856359015521018, "grad_norm": 2.0856781005859375, "learning_rate": 5.655613320219193e-05, "loss": 5.8934, "step": 13420 }, { "epoch": 0.018870409953684594, "grad_norm": 1.295962929725647, "learning_rate": 5.659828579457636e-05, "loss": 6.0145, "step": 13430 }, { "epoch": 0.018884460891848174, "grad_norm": 1.350246787071228, "learning_rate": 5.664043838696079e-05, "loss": 5.9026, "step": 13440 }, { "epoch": 0.01889851183001175, "grad_norm": 1.334167718887329, "learning_rate": 5.6682590979345224e-05, "loss": 6.0541, "step": 13450 }, { "epoch": 0.018912562768175326, "grad_norm": 1.4749501943588257, "learning_rate": 5.6724743571729655e-05, "loss": 5.9257, "step": 13460 }, { "epoch": 0.018926613706338906, "grad_norm": 1.4598968029022217, "learning_rate": 5.6766896164114086e-05, "loss": 5.9241, "step": 13470 }, { "epoch": 0.018940664644502482, "grad_norm": 1.5799657106399536, "learning_rate": 5.6809048756498524e-05, "loss": 5.8404, "step": 13480 }, { "epoch": 0.01895471558266606, "grad_norm": 1.455440878868103, "learning_rate": 5.6851201348882955e-05, "loss": 5.8688, "step": 13490 }, { "epoch": 0.018968766520829638, "grad_norm": 1.424730896949768, "learning_rate": 5.689335394126738e-05, "loss": 5.9688, "step": 13500 }, { "epoch": 0.018982817458993214, "grad_norm": 1.2700331211090088, "learning_rate": 5.693550653365182e-05, "loss": 5.9306, "step": 13510 }, { "epoch": 0.018996868397156794, "grad_norm": 1.3546868562698364, "learning_rate": 5.697765912603625e-05, "loss": 5.9353, "step": 13520 }, { "epoch": 0.01901091933532037, "grad_norm": 1.417958378791809, "learning_rate": 5.701981171842067e-05, "loss": 5.9517, "step": 13530 }, { "epoch": 0.019024970273483946, "grad_norm": 1.3052090406417847, "learning_rate": 5.706196431080511e-05, "loss": 5.8374, "step": 13540 }, { "epoch": 0.019039021211647526, "grad_norm": 1.298235297203064, "learning_rate": 5.710411690318954e-05, "loss": 5.9565, "step": 13550 }, { "epoch": 0.019053072149811102, "grad_norm": 1.4889936447143555, "learning_rate": 5.7146269495573974e-05, "loss": 5.9748, "step": 13560 }, { "epoch": 0.01906712308797468, "grad_norm": 1.4061089754104614, "learning_rate": 5.7188422087958405e-05, "loss": 5.9171, "step": 13570 }, { "epoch": 0.019081174026138258, "grad_norm": 1.4704773426055908, "learning_rate": 5.7230574680342836e-05, "loss": 6.0023, "step": 13580 }, { "epoch": 0.019095224964301834, "grad_norm": 1.344780683517456, "learning_rate": 5.727272727272727e-05, "loss": 5.9356, "step": 13590 }, { "epoch": 0.019109275902465413, "grad_norm": 1.3091164827346802, "learning_rate": 5.73148798651117e-05, "loss": 5.9137, "step": 13600 }, { "epoch": 0.01912332684062899, "grad_norm": 1.3960378170013428, "learning_rate": 5.735703245749613e-05, "loss": 5.9779, "step": 13610 }, { "epoch": 0.01913737777879257, "grad_norm": 1.5556848049163818, "learning_rate": 5.739918504988056e-05, "loss": 5.9581, "step": 13620 }, { "epoch": 0.019151428716956145, "grad_norm": 1.3245570659637451, "learning_rate": 5.744133764226499e-05, "loss": 5.9721, "step": 13630 }, { "epoch": 0.01916547965511972, "grad_norm": 1.578223705291748, "learning_rate": 5.748349023464943e-05, "loss": 5.9285, "step": 13640 }, { "epoch": 0.0191795305932833, "grad_norm": 2.247985363006592, "learning_rate": 5.752564282703386e-05, "loss": 5.9884, "step": 13650 }, { "epoch": 0.019193581531446877, "grad_norm": 1.6837748289108276, "learning_rate": 5.7567795419418285e-05, "loss": 6.0128, "step": 13660 }, { "epoch": 0.019207632469610457, "grad_norm": 1.367041826248169, "learning_rate": 5.760994801180272e-05, "loss": 5.9926, "step": 13670 }, { "epoch": 0.019221683407774033, "grad_norm": 1.4417003393173218, "learning_rate": 5.7652100604187154e-05, "loss": 5.9076, "step": 13680 }, { "epoch": 0.01923573434593761, "grad_norm": 1.3726568222045898, "learning_rate": 5.7694253196571586e-05, "loss": 5.875, "step": 13690 }, { "epoch": 0.01924978528410119, "grad_norm": 1.3321431875228882, "learning_rate": 5.773640578895602e-05, "loss": 5.9552, "step": 13700 }, { "epoch": 0.019263836222264765, "grad_norm": 1.372376561164856, "learning_rate": 5.777855838134045e-05, "loss": 5.955, "step": 13710 }, { "epoch": 0.01927788716042834, "grad_norm": 1.4178439378738403, "learning_rate": 5.782071097372488e-05, "loss": 5.8555, "step": 13720 }, { "epoch": 0.01929193809859192, "grad_norm": 1.3034266233444214, "learning_rate": 5.786286356610931e-05, "loss": 6.0031, "step": 13730 }, { "epoch": 0.019305989036755497, "grad_norm": 1.7195543050765991, "learning_rate": 5.790501615849374e-05, "loss": 5.8395, "step": 13740 }, { "epoch": 0.019320039974919077, "grad_norm": 1.3207449913024902, "learning_rate": 5.794716875087817e-05, "loss": 5.9347, "step": 13750 }, { "epoch": 0.019334090913082653, "grad_norm": 1.3822189569473267, "learning_rate": 5.7989321343262604e-05, "loss": 5.8751, "step": 13760 }, { "epoch": 0.01934814185124623, "grad_norm": 1.5890324115753174, "learning_rate": 5.803147393564704e-05, "loss": 5.8387, "step": 13770 }, { "epoch": 0.01936219278940981, "grad_norm": 1.2956420183181763, "learning_rate": 5.807362652803147e-05, "loss": 5.8924, "step": 13780 }, { "epoch": 0.019376243727573385, "grad_norm": 1.572546362876892, "learning_rate": 5.81157791204159e-05, "loss": 5.9325, "step": 13790 }, { "epoch": 0.019390294665736964, "grad_norm": 1.4117943048477173, "learning_rate": 5.8157931712800335e-05, "loss": 5.8553, "step": 13800 }, { "epoch": 0.01940434560390054, "grad_norm": 1.48485267162323, "learning_rate": 5.8200084305184766e-05, "loss": 5.8948, "step": 13810 }, { "epoch": 0.019418396542064117, "grad_norm": 1.3272455930709839, "learning_rate": 5.824223689756919e-05, "loss": 5.8358, "step": 13820 }, { "epoch": 0.019432447480227696, "grad_norm": 1.4525662660598755, "learning_rate": 5.828438948995363e-05, "loss": 5.937, "step": 13830 }, { "epoch": 0.019446498418391273, "grad_norm": 1.495578646659851, "learning_rate": 5.832654208233806e-05, "loss": 5.9296, "step": 13840 }, { "epoch": 0.01946054935655485, "grad_norm": 1.4523495435714722, "learning_rate": 5.836869467472249e-05, "loss": 5.7838, "step": 13850 }, { "epoch": 0.01947460029471843, "grad_norm": 1.3701502084732056, "learning_rate": 5.841084726710692e-05, "loss": 5.9265, "step": 13860 }, { "epoch": 0.019488651232882005, "grad_norm": 1.435974359512329, "learning_rate": 5.845299985949135e-05, "loss": 5.7779, "step": 13870 }, { "epoch": 0.019502702171045584, "grad_norm": 1.507411003112793, "learning_rate": 5.8495152451875784e-05, "loss": 6.0682, "step": 13880 }, { "epoch": 0.01951675310920916, "grad_norm": 1.71627938747406, "learning_rate": 5.8537305044260216e-05, "loss": 5.8261, "step": 13890 }, { "epoch": 0.019530804047372737, "grad_norm": 1.3306772708892822, "learning_rate": 5.857945763664465e-05, "loss": 5.879, "step": 13900 }, { "epoch": 0.019544854985536316, "grad_norm": 1.433265209197998, "learning_rate": 5.862161022902908e-05, "loss": 5.8721, "step": 13910 }, { "epoch": 0.019558905923699892, "grad_norm": 1.2996599674224854, "learning_rate": 5.866376282141351e-05, "loss": 6.0022, "step": 13920 }, { "epoch": 0.019572956861863472, "grad_norm": 1.3266264200210571, "learning_rate": 5.870591541379795e-05, "loss": 5.8978, "step": 13930 }, { "epoch": 0.019587007800027048, "grad_norm": 1.3822417259216309, "learning_rate": 5.874806800618238e-05, "loss": 5.9197, "step": 13940 }, { "epoch": 0.019601058738190624, "grad_norm": 1.4425207376480103, "learning_rate": 5.87902205985668e-05, "loss": 5.8509, "step": 13950 }, { "epoch": 0.019615109676354204, "grad_norm": 1.3024529218673706, "learning_rate": 5.883237319095124e-05, "loss": 5.9852, "step": 13960 }, { "epoch": 0.01962916061451778, "grad_norm": 1.3837809562683105, "learning_rate": 5.887452578333567e-05, "loss": 5.9071, "step": 13970 }, { "epoch": 0.01964321155268136, "grad_norm": 1.37079918384552, "learning_rate": 5.89166783757201e-05, "loss": 5.9889, "step": 13980 }, { "epoch": 0.019657262490844936, "grad_norm": 1.5206080675125122, "learning_rate": 5.8958830968104534e-05, "loss": 5.7997, "step": 13990 }, { "epoch": 0.019671313429008512, "grad_norm": 1.3415592908859253, "learning_rate": 5.9000983560488965e-05, "loss": 5.9298, "step": 14000 }, { "epoch": 0.01968536436717209, "grad_norm": 1.7559236288070679, "learning_rate": 5.9043136152873396e-05, "loss": 6.0002, "step": 14010 }, { "epoch": 0.019699415305335668, "grad_norm": 1.3365631103515625, "learning_rate": 5.908528874525783e-05, "loss": 5.8709, "step": 14020 }, { "epoch": 0.019713466243499244, "grad_norm": 1.3177831172943115, "learning_rate": 5.912744133764226e-05, "loss": 5.8005, "step": 14030 }, { "epoch": 0.019727517181662824, "grad_norm": 1.3259848356246948, "learning_rate": 5.916959393002669e-05, "loss": 5.9384, "step": 14040 }, { "epoch": 0.0197415681198264, "grad_norm": 1.5154435634613037, "learning_rate": 5.921174652241112e-05, "loss": 5.9317, "step": 14050 }, { "epoch": 0.01975561905798998, "grad_norm": 1.3720771074295044, "learning_rate": 5.925389911479556e-05, "loss": 5.8961, "step": 14060 }, { "epoch": 0.019769669996153556, "grad_norm": 1.6641851663589478, "learning_rate": 5.9296051707179983e-05, "loss": 5.9034, "step": 14070 }, { "epoch": 0.019783720934317132, "grad_norm": 1.3994899988174438, "learning_rate": 5.9338204299564415e-05, "loss": 5.8884, "step": 14080 }, { "epoch": 0.01979777187248071, "grad_norm": 1.3691990375518799, "learning_rate": 5.938035689194885e-05, "loss": 5.8671, "step": 14090 }, { "epoch": 0.019811822810644288, "grad_norm": 1.7563625574111938, "learning_rate": 5.9422509484333284e-05, "loss": 5.8736, "step": 14100 }, { "epoch": 0.019825873748807867, "grad_norm": 1.3554989099502563, "learning_rate": 5.946466207671771e-05, "loss": 5.9775, "step": 14110 }, { "epoch": 0.019839924686971443, "grad_norm": 1.4109904766082764, "learning_rate": 5.9506814669102146e-05, "loss": 5.9443, "step": 14120 }, { "epoch": 0.01985397562513502, "grad_norm": 1.383750081062317, "learning_rate": 5.954896726148658e-05, "loss": 5.855, "step": 14130 }, { "epoch": 0.0198680265632986, "grad_norm": 1.4050294160842896, "learning_rate": 5.959111985387101e-05, "loss": 5.9805, "step": 14140 }, { "epoch": 0.019882077501462175, "grad_norm": 1.4362891912460327, "learning_rate": 5.963327244625544e-05, "loss": 5.8024, "step": 14150 }, { "epoch": 0.019896128439625755, "grad_norm": 1.4630895853042603, "learning_rate": 5.967542503863987e-05, "loss": 5.9194, "step": 14160 }, { "epoch": 0.01991017937778933, "grad_norm": 1.3018559217453003, "learning_rate": 5.97175776310243e-05, "loss": 5.9572, "step": 14170 }, { "epoch": 0.019924230315952907, "grad_norm": 1.3787480592727661, "learning_rate": 5.975973022340873e-05, "loss": 5.9111, "step": 14180 }, { "epoch": 0.019938281254116487, "grad_norm": 1.5770620107650757, "learning_rate": 5.980188281579317e-05, "loss": 5.949, "step": 14190 }, { "epoch": 0.019952332192280063, "grad_norm": 1.32578706741333, "learning_rate": 5.9844035408177595e-05, "loss": 5.8478, "step": 14200 }, { "epoch": 0.01996638313044364, "grad_norm": 1.3311973810195923, "learning_rate": 5.9886188000562027e-05, "loss": 5.7895, "step": 14210 }, { "epoch": 0.01998043406860722, "grad_norm": 1.4675605297088623, "learning_rate": 5.9928340592946464e-05, "loss": 5.8426, "step": 14220 }, { "epoch": 0.019994485006770795, "grad_norm": 1.389006495475769, "learning_rate": 5.9970493185330896e-05, "loss": 5.8199, "step": 14230 }, { "epoch": 0.020008535944934375, "grad_norm": 1.7180744409561157, "learning_rate": 6.001264577771532e-05, "loss": 5.8948, "step": 14240 }, { "epoch": 0.02002258688309795, "grad_norm": 1.4792721271514893, "learning_rate": 6.005479837009976e-05, "loss": 5.8289, "step": 14250 }, { "epoch": 0.020036637821261527, "grad_norm": 1.3706181049346924, "learning_rate": 6.009695096248419e-05, "loss": 5.9004, "step": 14260 }, { "epoch": 0.020050688759425107, "grad_norm": 1.3976632356643677, "learning_rate": 6.013910355486862e-05, "loss": 5.837, "step": 14270 }, { "epoch": 0.020064739697588683, "grad_norm": 1.3973368406295776, "learning_rate": 6.018125614725305e-05, "loss": 5.9126, "step": 14280 }, { "epoch": 0.020078790635752262, "grad_norm": 1.5715432167053223, "learning_rate": 6.022340873963748e-05, "loss": 6.0113, "step": 14290 }, { "epoch": 0.02009284157391584, "grad_norm": 1.3540730476379395, "learning_rate": 6.0265561332021914e-05, "loss": 5.9477, "step": 14300 }, { "epoch": 0.020106892512079415, "grad_norm": 1.3370450735092163, "learning_rate": 6.0307713924406345e-05, "loss": 5.8446, "step": 14310 }, { "epoch": 0.020120943450242994, "grad_norm": 1.3597146272659302, "learning_rate": 6.0349866516790776e-05, "loss": 5.8455, "step": 14320 }, { "epoch": 0.02013499438840657, "grad_norm": 1.4030274152755737, "learning_rate": 6.039201910917521e-05, "loss": 5.8089, "step": 14330 }, { "epoch": 0.020149045326570147, "grad_norm": 1.470107913017273, "learning_rate": 6.043417170155964e-05, "loss": 5.8458, "step": 14340 }, { "epoch": 0.020163096264733726, "grad_norm": 1.3324445486068726, "learning_rate": 6.0476324293944076e-05, "loss": 5.9478, "step": 14350 }, { "epoch": 0.020177147202897303, "grad_norm": 1.3293122053146362, "learning_rate": 6.05184768863285e-05, "loss": 5.8075, "step": 14360 }, { "epoch": 0.020191198141060882, "grad_norm": 1.3335965871810913, "learning_rate": 6.056062947871293e-05, "loss": 6.0072, "step": 14370 }, { "epoch": 0.02020524907922446, "grad_norm": 1.4222981929779053, "learning_rate": 6.060278207109737e-05, "loss": 5.9405, "step": 14380 }, { "epoch": 0.020219300017388035, "grad_norm": 1.292315125465393, "learning_rate": 6.06449346634818e-05, "loss": 5.959, "step": 14390 }, { "epoch": 0.020233350955551614, "grad_norm": 1.3387219905853271, "learning_rate": 6.0687087255866225e-05, "loss": 5.9308, "step": 14400 }, { "epoch": 0.02024740189371519, "grad_norm": 1.4554129838943481, "learning_rate": 6.0729239848250663e-05, "loss": 5.8909, "step": 14410 }, { "epoch": 0.02026145283187877, "grad_norm": Infinity, "learning_rate": 6.0771392440635095e-05, "loss": 5.927, "step": 14420 }, { "epoch": 0.020275503770042346, "grad_norm": 1.4156428575515747, "learning_rate": 6.0809329773781084e-05, "loss": 5.964, "step": 14430 }, { "epoch": 0.020289554708205922, "grad_norm": 1.5934478044509888, "learning_rate": 6.0851482366165515e-05, "loss": 5.7974, "step": 14440 }, { "epoch": 0.020303605646369502, "grad_norm": 1.6974800825119019, "learning_rate": 6.089363495854994e-05, "loss": 5.913, "step": 14450 }, { "epoch": 0.020317656584533078, "grad_norm": 1.4129645824432373, "learning_rate": 6.093578755093438e-05, "loss": 5.8084, "step": 14460 }, { "epoch": 0.020331707522696658, "grad_norm": 1.3150914907455444, "learning_rate": 6.097794014331881e-05, "loss": 5.8431, "step": 14470 }, { "epoch": 0.020345758460860234, "grad_norm": 1.4848755598068237, "learning_rate": 6.1020092735703247e-05, "loss": 5.8406, "step": 14480 }, { "epoch": 0.02035980939902381, "grad_norm": 1.348905324935913, "learning_rate": 6.106224532808767e-05, "loss": 5.9378, "step": 14490 }, { "epoch": 0.02037386033718739, "grad_norm": 1.266818881034851, "learning_rate": 6.11043979204721e-05, "loss": 5.8756, "step": 14500 }, { "epoch": 0.020387911275350966, "grad_norm": 1.3369197845458984, "learning_rate": 6.114655051285653e-05, "loss": 5.8134, "step": 14510 }, { "epoch": 0.020401962213514542, "grad_norm": 1.5117855072021484, "learning_rate": 6.118870310524096e-05, "loss": 5.8981, "step": 14520 }, { "epoch": 0.02041601315167812, "grad_norm": 1.425666332244873, "learning_rate": 6.12308556976254e-05, "loss": 5.9648, "step": 14530 }, { "epoch": 0.020430064089841698, "grad_norm": 1.3908796310424805, "learning_rate": 6.127300829000983e-05, "loss": 5.7853, "step": 14540 }, { "epoch": 0.020444115028005277, "grad_norm": 1.3612520694732666, "learning_rate": 6.131516088239426e-05, "loss": 5.7987, "step": 14550 }, { "epoch": 0.020458165966168854, "grad_norm": 1.5223125219345093, "learning_rate": 6.13573134747787e-05, "loss": 5.9002, "step": 14560 }, { "epoch": 0.02047221690433243, "grad_norm": 1.5592213869094849, "learning_rate": 6.139946606716312e-05, "loss": 5.867, "step": 14570 }, { "epoch": 0.02048626784249601, "grad_norm": 1.5414535999298096, "learning_rate": 6.144161865954755e-05, "loss": 5.7891, "step": 14580 }, { "epoch": 0.020500318780659586, "grad_norm": 1.3643969297409058, "learning_rate": 6.1483771251932e-05, "loss": 5.8342, "step": 14590 }, { "epoch": 0.020514369718823165, "grad_norm": 1.3756287097930908, "learning_rate": 6.152592384431641e-05, "loss": 5.9926, "step": 14600 }, { "epoch": 0.02052842065698674, "grad_norm": 1.461431622505188, "learning_rate": 6.156807643670086e-05, "loss": 5.8282, "step": 14610 }, { "epoch": 0.020542471595150318, "grad_norm": 1.5052664279937744, "learning_rate": 6.161022902908529e-05, "loss": 5.867, "step": 14620 }, { "epoch": 0.020556522533313897, "grad_norm": 1.815929651260376, "learning_rate": 6.165238162146971e-05, "loss": 5.875, "step": 14630 }, { "epoch": 0.020570573471477473, "grad_norm": 1.991647481918335, "learning_rate": 6.169453421385415e-05, "loss": 5.767, "step": 14640 }, { "epoch": 0.02058462440964105, "grad_norm": 1.380648136138916, "learning_rate": 6.173668680623858e-05, "loss": 5.9249, "step": 14650 }, { "epoch": 0.02059867534780463, "grad_norm": 1.3880946636199951, "learning_rate": 6.1778839398623e-05, "loss": 6.0072, "step": 14660 }, { "epoch": 0.020612726285968205, "grad_norm": 1.4181009531021118, "learning_rate": 6.182099199100745e-05, "loss": 5.7029, "step": 14670 }, { "epoch": 0.020626777224131785, "grad_norm": 1.4614509344100952, "learning_rate": 6.186314458339188e-05, "loss": 5.778, "step": 14680 }, { "epoch": 0.02064082816229536, "grad_norm": 1.4017057418823242, "learning_rate": 6.190529717577631e-05, "loss": 5.7973, "step": 14690 }, { "epoch": 0.020654879100458937, "grad_norm": 1.4342793226242065, "learning_rate": 6.194744976816074e-05, "loss": 5.8359, "step": 14700 }, { "epoch": 0.020668930038622517, "grad_norm": 1.4066189527511597, "learning_rate": 6.198960236054517e-05, "loss": 5.8003, "step": 14710 }, { "epoch": 0.020682980976786093, "grad_norm": 1.3695813417434692, "learning_rate": 6.20317549529296e-05, "loss": 5.9072, "step": 14720 }, { "epoch": 0.020697031914949673, "grad_norm": 1.7797951698303223, "learning_rate": 6.207390754531403e-05, "loss": 5.772, "step": 14730 }, { "epoch": 0.02071108285311325, "grad_norm": 1.340478539466858, "learning_rate": 6.211606013769846e-05, "loss": 5.9631, "step": 14740 }, { "epoch": 0.020725133791276825, "grad_norm": 1.3329967260360718, "learning_rate": 6.21582127300829e-05, "loss": 5.899, "step": 14750 }, { "epoch": 0.020739184729440405, "grad_norm": 1.4106431007385254, "learning_rate": 6.220036532246733e-05, "loss": 5.9145, "step": 14760 }, { "epoch": 0.02075323566760398, "grad_norm": 1.4221521615982056, "learning_rate": 6.224251791485176e-05, "loss": 5.9208, "step": 14770 }, { "epoch": 0.02076728660576756, "grad_norm": 1.3601226806640625, "learning_rate": 6.228467050723619e-05, "loss": 5.8966, "step": 14780 }, { "epoch": 0.020781337543931137, "grad_norm": 1.3591755628585815, "learning_rate": 6.232682309962062e-05, "loss": 5.7644, "step": 14790 }, { "epoch": 0.020795388482094713, "grad_norm": 1.4168270826339722, "learning_rate": 6.236897569200505e-05, "loss": 5.8457, "step": 14800 }, { "epoch": 0.020809439420258292, "grad_norm": 1.443789005279541, "learning_rate": 6.241112828438948e-05, "loss": 5.8221, "step": 14810 }, { "epoch": 0.02082349035842187, "grad_norm": 1.4544609785079956, "learning_rate": 6.245328087677393e-05, "loss": 5.8429, "step": 14820 }, { "epoch": 0.020837541296585445, "grad_norm": 1.2243056297302246, "learning_rate": 6.249543346915834e-05, "loss": 5.8578, "step": 14830 }, { "epoch": 0.020851592234749024, "grad_norm": 1.3254730701446533, "learning_rate": 6.253758606154278e-05, "loss": 5.9038, "step": 14840 }, { "epoch": 0.0208656431729126, "grad_norm": 1.5144115686416626, "learning_rate": 6.257973865392722e-05, "loss": 5.81, "step": 14850 }, { "epoch": 0.02087969411107618, "grad_norm": 1.35421621799469, "learning_rate": 6.262189124631164e-05, "loss": 5.8403, "step": 14860 }, { "epoch": 0.020893745049239756, "grad_norm": 1.3334990739822388, "learning_rate": 6.266404383869607e-05, "loss": 5.7432, "step": 14870 }, { "epoch": 0.020907795987403333, "grad_norm": 1.3845977783203125, "learning_rate": 6.270619643108051e-05, "loss": 5.7457, "step": 14880 }, { "epoch": 0.020921846925566912, "grad_norm": 1.606353521347046, "learning_rate": 6.274834902346493e-05, "loss": 5.7664, "step": 14890 }, { "epoch": 0.02093589786373049, "grad_norm": 1.4412970542907715, "learning_rate": 6.279050161584938e-05, "loss": 5.8228, "step": 14900 }, { "epoch": 0.020949948801894068, "grad_norm": 1.57476007938385, "learning_rate": 6.283265420823381e-05, "loss": 5.8189, "step": 14910 }, { "epoch": 0.020963999740057644, "grad_norm": 1.4712239503860474, "learning_rate": 6.287480680061822e-05, "loss": 5.8303, "step": 14920 }, { "epoch": 0.02097805067822122, "grad_norm": 1.3110334873199463, "learning_rate": 6.291695939300267e-05, "loss": 5.8904, "step": 14930 }, { "epoch": 0.0209921016163848, "grad_norm": 1.3854535818099976, "learning_rate": 6.29591119853871e-05, "loss": 5.7378, "step": 14940 }, { "epoch": 0.021006152554548376, "grad_norm": 1.5063308477401733, "learning_rate": 6.300126457777152e-05, "loss": 5.8302, "step": 14950 }, { "epoch": 0.021020203492711952, "grad_norm": 1.347753643989563, "learning_rate": 6.304341717015596e-05, "loss": 5.8284, "step": 14960 }, { "epoch": 0.021034254430875532, "grad_norm": 1.7087639570236206, "learning_rate": 6.30855697625404e-05, "loss": 5.9404, "step": 14970 }, { "epoch": 0.021048305369039108, "grad_norm": 1.4151090383529663, "learning_rate": 6.312772235492483e-05, "loss": 5.8551, "step": 14980 }, { "epoch": 0.021062356307202688, "grad_norm": 1.3254867792129517, "learning_rate": 6.316987494730926e-05, "loss": 5.8843, "step": 14990 }, { "epoch": 0.021076407245366264, "grad_norm": 1.3656538724899292, "learning_rate": 6.321202753969369e-05, "loss": 5.8179, "step": 15000 }, { "epoch": 0.02109045818352984, "grad_norm": 1.3731048107147217, "learning_rate": 6.325418013207812e-05, "loss": 5.776, "step": 15010 }, { "epoch": 0.02110450912169342, "grad_norm": 1.6539822816848755, "learning_rate": 6.329633272446255e-05, "loss": 5.796, "step": 15020 }, { "epoch": 0.021118560059856996, "grad_norm": 1.6321409940719604, "learning_rate": 6.333848531684698e-05, "loss": 5.7292, "step": 15030 }, { "epoch": 0.021132610998020575, "grad_norm": 1.516080379486084, "learning_rate": 6.338063790923141e-05, "loss": 5.7586, "step": 15040 }, { "epoch": 0.02114666193618415, "grad_norm": 1.3451189994812012, "learning_rate": 6.342279050161584e-05, "loss": 5.7364, "step": 15050 }, { "epoch": 0.021160712874347728, "grad_norm": 1.4270164966583252, "learning_rate": 6.346494309400027e-05, "loss": 5.7612, "step": 15060 }, { "epoch": 0.021174763812511307, "grad_norm": 1.353387475013733, "learning_rate": 6.35070956863847e-05, "loss": 5.8736, "step": 15070 }, { "epoch": 0.021188814750674884, "grad_norm": 1.4181205034255981, "learning_rate": 6.354924827876914e-05, "loss": 5.9262, "step": 15080 }, { "epoch": 0.021202865688838463, "grad_norm": 1.3350406885147095, "learning_rate": 6.359140087115357e-05, "loss": 5.843, "step": 15090 }, { "epoch": 0.02121691662700204, "grad_norm": 1.676595687866211, "learning_rate": 6.3633553463538e-05, "loss": 5.806, "step": 15100 }, { "epoch": 0.021230967565165616, "grad_norm": 1.579698920249939, "learning_rate": 6.367570605592244e-05, "loss": 5.8444, "step": 15110 }, { "epoch": 0.021245018503329195, "grad_norm": 1.5201925039291382, "learning_rate": 6.371785864830686e-05, "loss": 5.948, "step": 15120 }, { "epoch": 0.02125906944149277, "grad_norm": 1.3935141563415527, "learning_rate": 6.376001124069129e-05, "loss": 5.8822, "step": 15130 }, { "epoch": 0.021273120379656348, "grad_norm": 1.358180284500122, "learning_rate": 6.380216383307574e-05, "loss": 5.8469, "step": 15140 }, { "epoch": 0.021287171317819927, "grad_norm": 1.4216656684875488, "learning_rate": 6.384431642546016e-05, "loss": 5.8851, "step": 15150 }, { "epoch": 0.021301222255983503, "grad_norm": 1.40565025806427, "learning_rate": 6.388646901784459e-05, "loss": 5.7583, "step": 15160 }, { "epoch": 0.021315273194147083, "grad_norm": 1.30914306640625, "learning_rate": 6.392862161022903e-05, "loss": 5.9395, "step": 15170 }, { "epoch": 0.02132932413231066, "grad_norm": 1.4431228637695312, "learning_rate": 6.397077420261345e-05, "loss": 5.776, "step": 15180 }, { "epoch": 0.021343375070474235, "grad_norm": 1.397368311882019, "learning_rate": 6.40129267949979e-05, "loss": 5.7714, "step": 15190 }, { "epoch": 0.021357426008637815, "grad_norm": 1.303511142730713, "learning_rate": 6.405507938738232e-05, "loss": 5.8594, "step": 15200 }, { "epoch": 0.02137147694680139, "grad_norm": 1.359408974647522, "learning_rate": 6.409723197976674e-05, "loss": 5.7992, "step": 15210 }, { "epoch": 0.02138552788496497, "grad_norm": 1.2777255773544312, "learning_rate": 6.413938457215119e-05, "loss": 5.8816, "step": 15220 }, { "epoch": 0.021399578823128547, "grad_norm": 1.7330981492996216, "learning_rate": 6.418153716453562e-05, "loss": 5.7651, "step": 15230 }, { "epoch": 0.021413629761292123, "grad_norm": 1.324683427810669, "learning_rate": 6.42194744976816e-05, "loss": 5.7404, "step": 15240 }, { "epoch": 0.021427680699455703, "grad_norm": 1.3045562505722046, "learning_rate": 6.426162709006603e-05, "loss": 5.7965, "step": 15250 }, { "epoch": 0.02144173163761928, "grad_norm": 1.3371610641479492, "learning_rate": 6.430377968245046e-05, "loss": 5.8492, "step": 15260 }, { "epoch": 0.02145578257578286, "grad_norm": 1.61046302318573, "learning_rate": 6.43459322748349e-05, "loss": 5.8089, "step": 15270 }, { "epoch": 0.021469833513946435, "grad_norm": 1.5446206331253052, "learning_rate": 6.438808486721933e-05, "loss": 5.8823, "step": 15280 }, { "epoch": 0.02148388445211001, "grad_norm": 1.337916612625122, "learning_rate": 6.443023745960376e-05, "loss": 5.8088, "step": 15290 }, { "epoch": 0.02149793539027359, "grad_norm": 1.5150680541992188, "learning_rate": 6.447239005198819e-05, "loss": 5.7181, "step": 15300 }, { "epoch": 0.021511986328437167, "grad_norm": 1.3614176511764526, "learning_rate": 6.451454264437262e-05, "loss": 5.8309, "step": 15310 }, { "epoch": 0.021526037266600743, "grad_norm": 1.357839584350586, "learning_rate": 6.455669523675706e-05, "loss": 5.8946, "step": 15320 }, { "epoch": 0.021540088204764322, "grad_norm": 1.4024747610092163, "learning_rate": 6.459884782914148e-05, "loss": 5.8192, "step": 15330 }, { "epoch": 0.0215541391429279, "grad_norm": 1.4322363138198853, "learning_rate": 6.464100042152591e-05, "loss": 5.6945, "step": 15340 }, { "epoch": 0.021568190081091478, "grad_norm": 1.4403985738754272, "learning_rate": 6.468315301391036e-05, "loss": 5.926, "step": 15350 }, { "epoch": 0.021582241019255054, "grad_norm": 1.593833565711975, "learning_rate": 6.472530560629477e-05, "loss": 5.8134, "step": 15360 }, { "epoch": 0.02159629195741863, "grad_norm": 1.4765543937683105, "learning_rate": 6.47674581986792e-05, "loss": 5.8854, "step": 15370 }, { "epoch": 0.02161034289558221, "grad_norm": 1.3382881879806519, "learning_rate": 6.480961079106365e-05, "loss": 5.8777, "step": 15380 }, { "epoch": 0.021624393833745786, "grad_norm": 1.6510887145996094, "learning_rate": 6.485176338344807e-05, "loss": 5.7864, "step": 15390 }, { "epoch": 0.021638444771909366, "grad_norm": 1.3946115970611572, "learning_rate": 6.489391597583251e-05, "loss": 5.7041, "step": 15400 }, { "epoch": 0.021652495710072942, "grad_norm": 1.3984819650650024, "learning_rate": 6.493606856821694e-05, "loss": 5.7186, "step": 15410 }, { "epoch": 0.02166654664823652, "grad_norm": 1.9340324401855469, "learning_rate": 6.497822116060136e-05, "loss": 5.7089, "step": 15420 }, { "epoch": 0.021680597586400098, "grad_norm": 1.4083871841430664, "learning_rate": 6.50203737529858e-05, "loss": 5.8155, "step": 15430 }, { "epoch": 0.021694648524563674, "grad_norm": 1.389730453491211, "learning_rate": 6.506252634537024e-05, "loss": 5.7463, "step": 15440 }, { "epoch": 0.02170869946272725, "grad_norm": 1.2647806406021118, "learning_rate": 6.510467893775467e-05, "loss": 5.8177, "step": 15450 }, { "epoch": 0.02172275040089083, "grad_norm": 1.4326426982879639, "learning_rate": 6.51468315301391e-05, "loss": 5.7414, "step": 15460 }, { "epoch": 0.021736801339054406, "grad_norm": 1.3881163597106934, "learning_rate": 6.518898412252353e-05, "loss": 5.624, "step": 15470 }, { "epoch": 0.021750852277217986, "grad_norm": 1.3433114290237427, "learning_rate": 6.523113671490796e-05, "loss": 5.7344, "step": 15480 }, { "epoch": 0.021764903215381562, "grad_norm": 1.3703086376190186, "learning_rate": 6.52732893072924e-05, "loss": 5.7321, "step": 15490 }, { "epoch": 0.021778954153545138, "grad_norm": 1.3004043102264404, "learning_rate": 6.531544189967682e-05, "loss": 5.7776, "step": 15500 }, { "epoch": 0.021793005091708718, "grad_norm": 1.3912044763565063, "learning_rate": 6.535759449206126e-05, "loss": 5.75, "step": 15510 }, { "epoch": 0.021807056029872294, "grad_norm": 1.3019267320632935, "learning_rate": 6.539974708444569e-05, "loss": 5.8134, "step": 15520 }, { "epoch": 0.021821106968035874, "grad_norm": 1.2605361938476562, "learning_rate": 6.544189967683012e-05, "loss": 5.7915, "step": 15530 }, { "epoch": 0.02183515790619945, "grad_norm": 1.4486380815505981, "learning_rate": 6.548405226921455e-05, "loss": 5.8089, "step": 15540 }, { "epoch": 0.021849208844363026, "grad_norm": 1.4889944791793823, "learning_rate": 6.552620486159898e-05, "loss": 5.8072, "step": 15550 }, { "epoch": 0.021863259782526605, "grad_norm": 1.389483094215393, "learning_rate": 6.556835745398341e-05, "loss": 5.8426, "step": 15560 }, { "epoch": 0.02187731072069018, "grad_norm": 1.465795874595642, "learning_rate": 6.561051004636784e-05, "loss": 5.7026, "step": 15570 }, { "epoch": 0.02189136165885376, "grad_norm": 1.4371973276138306, "learning_rate": 6.565266263875227e-05, "loss": 5.8007, "step": 15580 }, { "epoch": 0.021905412597017337, "grad_norm": 1.3100478649139404, "learning_rate": 6.56948152311367e-05, "loss": 5.7882, "step": 15590 }, { "epoch": 0.021919463535180914, "grad_norm": 1.3720271587371826, "learning_rate": 6.573696782352114e-05, "loss": 5.7386, "step": 15600 }, { "epoch": 0.021933514473344493, "grad_norm": 1.6056287288665771, "learning_rate": 6.577912041590558e-05, "loss": 5.7643, "step": 15610 }, { "epoch": 0.02194756541150807, "grad_norm": 1.4117242097854614, "learning_rate": 6.582127300829e-05, "loss": 5.8054, "step": 15620 }, { "epoch": 0.021961616349671646, "grad_norm": 1.3101048469543457, "learning_rate": 6.586342560067443e-05, "loss": 5.8407, "step": 15630 }, { "epoch": 0.021975667287835225, "grad_norm": 1.535385251045227, "learning_rate": 6.590557819305887e-05, "loss": 5.7486, "step": 15640 }, { "epoch": 0.0219897182259988, "grad_norm": 1.3539336919784546, "learning_rate": 6.594773078544329e-05, "loss": 5.7473, "step": 15650 }, { "epoch": 0.02200376916416238, "grad_norm": 1.5434645414352417, "learning_rate": 6.598988337782774e-05, "loss": 5.8255, "step": 15660 }, { "epoch": 0.022017820102325957, "grad_norm": 1.3859068155288696, "learning_rate": 6.603203597021217e-05, "loss": 5.8002, "step": 15670 }, { "epoch": 0.022031871040489533, "grad_norm": 1.397512674331665, "learning_rate": 6.607418856259659e-05, "loss": 5.8374, "step": 15680 }, { "epoch": 0.022045921978653113, "grad_norm": 1.4506967067718506, "learning_rate": 6.611634115498103e-05, "loss": 5.8459, "step": 15690 }, { "epoch": 0.02205997291681669, "grad_norm": 1.3882638216018677, "learning_rate": 6.615849374736546e-05, "loss": 5.8247, "step": 15700 }, { "epoch": 0.02207402385498027, "grad_norm": 1.308134913444519, "learning_rate": 6.620064633974988e-05, "loss": 5.7157, "step": 15710 }, { "epoch": 0.022088074793143845, "grad_norm": 1.4445452690124512, "learning_rate": 6.624279893213432e-05, "loss": 5.7608, "step": 15720 }, { "epoch": 0.02210212573130742, "grad_norm": 1.362390398979187, "learning_rate": 6.628495152451875e-05, "loss": 5.9047, "step": 15730 }, { "epoch": 0.022116176669471, "grad_norm": 1.2991571426391602, "learning_rate": 6.632710411690319e-05, "loss": 5.7554, "step": 15740 }, { "epoch": 0.022130227607634577, "grad_norm": 1.328895926475525, "learning_rate": 6.636925670928762e-05, "loss": 5.7317, "step": 15750 }, { "epoch": 0.022144278545798153, "grad_norm": 1.303283929824829, "learning_rate": 6.641140930167205e-05, "loss": 5.7671, "step": 15760 }, { "epoch": 0.022158329483961733, "grad_norm": 1.358275055885315, "learning_rate": 6.645356189405648e-05, "loss": 5.7322, "step": 15770 }, { "epoch": 0.02217238042212531, "grad_norm": 1.4214792251586914, "learning_rate": 6.649571448644091e-05, "loss": 5.6888, "step": 15780 }, { "epoch": 0.02218643136028889, "grad_norm": 1.3279885053634644, "learning_rate": 6.653786707882534e-05, "loss": 5.7591, "step": 15790 }, { "epoch": 0.022200482298452465, "grad_norm": 1.3507026433944702, "learning_rate": 6.658001967120977e-05, "loss": 5.7571, "step": 15800 }, { "epoch": 0.02221453323661604, "grad_norm": 1.323520541191101, "learning_rate": 6.66221722635942e-05, "loss": 5.8356, "step": 15810 }, { "epoch": 0.02222858417477962, "grad_norm": 1.2954471111297607, "learning_rate": 6.666432485597864e-05, "loss": 5.9235, "step": 15820 }, { "epoch": 0.022242635112943197, "grad_norm": 1.3912978172302246, "learning_rate": 6.670647744836307e-05, "loss": 5.7872, "step": 15830 }, { "epoch": 0.022256686051106776, "grad_norm": 1.5537766218185425, "learning_rate": 6.67486300407475e-05, "loss": 5.7668, "step": 15840 }, { "epoch": 0.022270736989270352, "grad_norm": 1.3550326824188232, "learning_rate": 6.679078263313193e-05, "loss": 5.7059, "step": 15850 }, { "epoch": 0.02228478792743393, "grad_norm": 1.3379523754119873, "learning_rate": 6.683293522551636e-05, "loss": 5.7736, "step": 15860 }, { "epoch": 0.022298838865597508, "grad_norm": 1.4727954864501953, "learning_rate": 6.687508781790079e-05, "loss": 5.687, "step": 15870 }, { "epoch": 0.022312889803761084, "grad_norm": 1.5783716440200806, "learning_rate": 6.691724041028522e-05, "loss": 5.6273, "step": 15880 }, { "epoch": 0.022326940741924664, "grad_norm": 1.2996593713760376, "learning_rate": 6.695939300266965e-05, "loss": 5.809, "step": 15890 }, { "epoch": 0.02234099168008824, "grad_norm": 1.3592562675476074, "learning_rate": 6.70015455950541e-05, "loss": 5.7186, "step": 15900 }, { "epoch": 0.022355042618251816, "grad_norm": 1.363147258758545, "learning_rate": 6.704369818743852e-05, "loss": 5.7087, "step": 15910 }, { "epoch": 0.022369093556415396, "grad_norm": 1.3323581218719482, "learning_rate": 6.708585077982295e-05, "loss": 5.7382, "step": 15920 }, { "epoch": 0.022383144494578972, "grad_norm": 1.3843733072280884, "learning_rate": 6.712800337220739e-05, "loss": 5.7236, "step": 15930 }, { "epoch": 0.02239719543274255, "grad_norm": 1.336782455444336, "learning_rate": 6.717015596459181e-05, "loss": 5.737, "step": 15940 }, { "epoch": 0.022411246370906128, "grad_norm": 1.3597850799560547, "learning_rate": 6.721230855697625e-05, "loss": 5.6848, "step": 15950 }, { "epoch": 0.022425297309069704, "grad_norm": 1.360641360282898, "learning_rate": 6.725446114936069e-05, "loss": 5.8756, "step": 15960 }, { "epoch": 0.022439348247233284, "grad_norm": 1.4128172397613525, "learning_rate": 6.72966137417451e-05, "loss": 5.7612, "step": 15970 }, { "epoch": 0.02245339918539686, "grad_norm": 1.4090365171432495, "learning_rate": 6.733876633412955e-05, "loss": 5.6862, "step": 15980 }, { "epoch": 0.022467450123560436, "grad_norm": 1.555239200592041, "learning_rate": 6.738091892651398e-05, "loss": 5.6615, "step": 15990 }, { "epoch": 0.022481501061724016, "grad_norm": 1.3207861185073853, "learning_rate": 6.74230715188984e-05, "loss": 5.7863, "step": 16000 }, { "epoch": 0.022495551999887592, "grad_norm": 1.3086305856704712, "learning_rate": 6.746522411128284e-05, "loss": 5.7663, "step": 16010 }, { "epoch": 0.02250960293805117, "grad_norm": 1.5462536811828613, "learning_rate": 6.750737670366727e-05, "loss": 5.7121, "step": 16020 }, { "epoch": 0.022523653876214748, "grad_norm": 1.4813846349716187, "learning_rate": 6.75495292960517e-05, "loss": 5.7485, "step": 16030 }, { "epoch": 0.022537704814378324, "grad_norm": 1.3974696397781372, "learning_rate": 6.759168188843613e-05, "loss": 5.6604, "step": 16040 }, { "epoch": 0.022551755752541904, "grad_norm": 1.3554956912994385, "learning_rate": 6.763383448082057e-05, "loss": 5.5456, "step": 16050 }, { "epoch": 0.02256580669070548, "grad_norm": 1.4472044706344604, "learning_rate": 6.7675987073205e-05, "loss": 5.6876, "step": 16060 }, { "epoch": 0.022579857628869056, "grad_norm": 1.4171758890151978, "learning_rate": 6.771813966558943e-05, "loss": 5.7768, "step": 16070 }, { "epoch": 0.022593908567032635, "grad_norm": 1.4032776355743408, "learning_rate": 6.776029225797386e-05, "loss": 5.6466, "step": 16080 }, { "epoch": 0.02260795950519621, "grad_norm": 1.2590081691741943, "learning_rate": 6.780244485035829e-05, "loss": 5.6616, "step": 16090 }, { "epoch": 0.02262201044335979, "grad_norm": 1.3556309938430786, "learning_rate": 6.784459744274272e-05, "loss": 5.797, "step": 16100 }, { "epoch": 0.022636061381523367, "grad_norm": 1.6029269695281982, "learning_rate": 6.788675003512715e-05, "loss": 5.7445, "step": 16110 }, { "epoch": 0.022650112319686944, "grad_norm": 1.6440356969833374, "learning_rate": 6.792890262751158e-05, "loss": 5.6423, "step": 16120 }, { "epoch": 0.022664163257850523, "grad_norm": 1.447268009185791, "learning_rate": 6.797105521989602e-05, "loss": 5.8162, "step": 16130 }, { "epoch": 0.0226782141960141, "grad_norm": 1.3291443586349487, "learning_rate": 6.801320781228045e-05, "loss": 5.822, "step": 16140 }, { "epoch": 0.02269226513417768, "grad_norm": 1.4091272354125977, "learning_rate": 6.805536040466488e-05, "loss": 5.7244, "step": 16150 }, { "epoch": 0.022706316072341255, "grad_norm": 1.579410433769226, "learning_rate": 6.809751299704932e-05, "loss": 5.7525, "step": 16160 }, { "epoch": 0.02272036701050483, "grad_norm": 1.368188500404358, "learning_rate": 6.813966558943374e-05, "loss": 5.7373, "step": 16170 }, { "epoch": 0.02273441794866841, "grad_norm": 1.3030613660812378, "learning_rate": 6.818181818181817e-05, "loss": 5.7726, "step": 16180 }, { "epoch": 0.022748468886831987, "grad_norm": 1.3315412998199463, "learning_rate": 6.822397077420262e-05, "loss": 5.7665, "step": 16190 }, { "epoch": 0.022762519824995567, "grad_norm": 1.265251636505127, "learning_rate": 6.826612336658703e-05, "loss": 5.7164, "step": 16200 }, { "epoch": 0.022776570763159143, "grad_norm": 1.3779250383377075, "learning_rate": 6.830827595897146e-05, "loss": 5.8774, "step": 16210 }, { "epoch": 0.02279062170132272, "grad_norm": 1.3391507863998413, "learning_rate": 6.835042855135591e-05, "loss": 5.6763, "step": 16220 }, { "epoch": 0.0228046726394863, "grad_norm": 1.2613348960876465, "learning_rate": 6.839258114374033e-05, "loss": 5.7929, "step": 16230 }, { "epoch": 0.022818723577649875, "grad_norm": 1.3553537130355835, "learning_rate": 6.843473373612477e-05, "loss": 5.6879, "step": 16240 }, { "epoch": 0.02283277451581345, "grad_norm": 1.3512293100357056, "learning_rate": 6.84768863285092e-05, "loss": 5.7638, "step": 16250 }, { "epoch": 0.02284682545397703, "grad_norm": 1.3155027627944946, "learning_rate": 6.851903892089362e-05, "loss": 5.7267, "step": 16260 }, { "epoch": 0.022860876392140607, "grad_norm": 1.4464770555496216, "learning_rate": 6.856119151327806e-05, "loss": 5.6172, "step": 16270 }, { "epoch": 0.022874927330304187, "grad_norm": 1.4445135593414307, "learning_rate": 6.86033441056625e-05, "loss": 5.7, "step": 16280 }, { "epoch": 0.022888978268467763, "grad_norm": 1.5342646837234497, "learning_rate": 6.864549669804691e-05, "loss": 5.6978, "step": 16290 }, { "epoch": 0.02290302920663134, "grad_norm": 1.335935354232788, "learning_rate": 6.868764929043136e-05, "loss": 5.7446, "step": 16300 }, { "epoch": 0.02291708014479492, "grad_norm": 1.326428771018982, "learning_rate": 6.872980188281579e-05, "loss": 5.6136, "step": 16310 }, { "epoch": 0.022931131082958495, "grad_norm": 1.3541918992996216, "learning_rate": 6.877195447520022e-05, "loss": 5.7738, "step": 16320 }, { "epoch": 0.022945182021122074, "grad_norm": 1.8092317581176758, "learning_rate": 6.881410706758465e-05, "loss": 5.6128, "step": 16330 }, { "epoch": 0.02295923295928565, "grad_norm": 1.2965271472930908, "learning_rate": 6.885625965996908e-05, "loss": 5.8584, "step": 16340 }, { "epoch": 0.022973283897449227, "grad_norm": 1.3288991451263428, "learning_rate": 6.889841225235351e-05, "loss": 5.7374, "step": 16350 }, { "epoch": 0.022987334835612806, "grad_norm": 1.3659766912460327, "learning_rate": 6.894056484473795e-05, "loss": 5.6953, "step": 16360 }, { "epoch": 0.023001385773776382, "grad_norm": 1.3702479600906372, "learning_rate": 6.898271743712238e-05, "loss": 5.7156, "step": 16370 }, { "epoch": 0.023015436711939962, "grad_norm": 1.3132061958312988, "learning_rate": 6.902487002950681e-05, "loss": 5.6502, "step": 16380 }, { "epoch": 0.023029487650103538, "grad_norm": 1.3793644905090332, "learning_rate": 6.906702262189124e-05, "loss": 5.756, "step": 16390 }, { "epoch": 0.023043538588267114, "grad_norm": 1.364650011062622, "learning_rate": 6.910917521427567e-05, "loss": 5.7081, "step": 16400 }, { "epoch": 0.023057589526430694, "grad_norm": 1.303212285041809, "learning_rate": 6.91513278066601e-05, "loss": 5.7405, "step": 16410 }, { "epoch": 0.02307164046459427, "grad_norm": 1.382996678352356, "learning_rate": 6.919348039904453e-05, "loss": 5.6833, "step": 16420 }, { "epoch": 0.023085691402757846, "grad_norm": 1.4374713897705078, "learning_rate": 6.923563299142896e-05, "loss": 5.7699, "step": 16430 }, { "epoch": 0.023099742340921426, "grad_norm": 1.286665439605713, "learning_rate": 6.92777855838134e-05, "loss": 5.7886, "step": 16440 }, { "epoch": 0.023113793279085002, "grad_norm": 1.403110384941101, "learning_rate": 6.931993817619784e-05, "loss": 5.7172, "step": 16450 }, { "epoch": 0.023127844217248582, "grad_norm": 1.7290191650390625, "learning_rate": 6.936209076858226e-05, "loss": 5.7493, "step": 16460 }, { "epoch": 0.023141895155412158, "grad_norm": 1.2815217971801758, "learning_rate": 6.940424336096669e-05, "loss": 5.7261, "step": 16470 }, { "epoch": 0.023155946093575734, "grad_norm": 1.5143299102783203, "learning_rate": 6.944639595335113e-05, "loss": 5.7154, "step": 16480 }, { "epoch": 0.023169997031739314, "grad_norm": 1.3418786525726318, "learning_rate": 6.948854854573555e-05, "loss": 5.7851, "step": 16490 }, { "epoch": 0.02318404796990289, "grad_norm": 1.296385407447815, "learning_rate": 6.953070113811998e-05, "loss": 5.7819, "step": 16500 }, { "epoch": 0.02319809890806647, "grad_norm": 1.3354800939559937, "learning_rate": 6.957285373050443e-05, "loss": 5.7586, "step": 16510 }, { "epoch": 0.023212149846230046, "grad_norm": 1.6433796882629395, "learning_rate": 6.961500632288884e-05, "loss": 5.6559, "step": 16520 }, { "epoch": 0.023226200784393622, "grad_norm": 1.4096791744232178, "learning_rate": 6.965715891527329e-05, "loss": 5.7111, "step": 16530 }, { "epoch": 0.0232402517225572, "grad_norm": 1.3959141969680786, "learning_rate": 6.969931150765772e-05, "loss": 5.7255, "step": 16540 }, { "epoch": 0.023254302660720778, "grad_norm": 1.2790894508361816, "learning_rate": 6.974146410004214e-05, "loss": 5.6613, "step": 16550 }, { "epoch": 0.023268353598884354, "grad_norm": 1.3713908195495605, "learning_rate": 6.978361669242658e-05, "loss": 5.6093, "step": 16560 }, { "epoch": 0.023282404537047934, "grad_norm": 1.4066718816757202, "learning_rate": 6.982576928481101e-05, "loss": 5.5383, "step": 16570 }, { "epoch": 0.02329645547521151, "grad_norm": 1.5075857639312744, "learning_rate": 6.986792187719543e-05, "loss": 5.5824, "step": 16580 }, { "epoch": 0.02331050641337509, "grad_norm": 1.4346543550491333, "learning_rate": 6.991007446957988e-05, "loss": 5.8348, "step": 16590 }, { "epoch": 0.023324557351538665, "grad_norm": 1.4327467679977417, "learning_rate": 6.995222706196431e-05, "loss": 5.5361, "step": 16600 }, { "epoch": 0.02333860828970224, "grad_norm": 1.403082251548767, "learning_rate": 6.999437965434874e-05, "loss": 5.6565, "step": 16610 }, { "epoch": 0.02335265922786582, "grad_norm": 1.7867377996444702, "learning_rate": 7.003653224673317e-05, "loss": 5.7467, "step": 16620 }, { "epoch": 0.023366710166029397, "grad_norm": 1.3568111658096313, "learning_rate": 7.00786848391176e-05, "loss": 5.624, "step": 16630 }, { "epoch": 0.023380761104192977, "grad_norm": 1.4141385555267334, "learning_rate": 7.012083743150203e-05, "loss": 5.7333, "step": 16640 }, { "epoch": 0.023394812042356553, "grad_norm": 1.526979923248291, "learning_rate": 7.016299002388646e-05, "loss": 5.6305, "step": 16650 }, { "epoch": 0.02340886298052013, "grad_norm": 1.2974737882614136, "learning_rate": 7.02051426162709e-05, "loss": 5.7538, "step": 16660 }, { "epoch": 0.02342291391868371, "grad_norm": 1.3604271411895752, "learning_rate": 7.024729520865533e-05, "loss": 5.7628, "step": 16670 }, { "epoch": 0.023436964856847285, "grad_norm": 1.2945364713668823, "learning_rate": 7.028944780103976e-05, "loss": 5.6513, "step": 16680 }, { "epoch": 0.023451015795010865, "grad_norm": 1.8330632448196411, "learning_rate": 7.033160039342419e-05, "loss": 5.7871, "step": 16690 }, { "epoch": 0.02346506673317444, "grad_norm": 1.4950273036956787, "learning_rate": 7.037375298580862e-05, "loss": 5.7615, "step": 16700 }, { "epoch": 0.023479117671338017, "grad_norm": 1.4327980279922485, "learning_rate": 7.041590557819305e-05, "loss": 5.6334, "step": 16710 }, { "epoch": 0.023493168609501597, "grad_norm": 1.2713170051574707, "learning_rate": 7.045805817057748e-05, "loss": 5.8051, "step": 16720 }, { "epoch": 0.023507219547665173, "grad_norm": 1.3698358535766602, "learning_rate": 7.050021076296191e-05, "loss": 5.6809, "step": 16730 }, { "epoch": 0.02352127048582875, "grad_norm": 1.40662682056427, "learning_rate": 7.054236335534636e-05, "loss": 5.6715, "step": 16740 }, { "epoch": 0.02353532142399233, "grad_norm": 1.3832154273986816, "learning_rate": 7.058451594773077e-05, "loss": 5.7687, "step": 16750 }, { "epoch": 0.023549372362155905, "grad_norm": 1.2708238363265991, "learning_rate": 7.06266685401152e-05, "loss": 5.7387, "step": 16760 }, { "epoch": 0.023563423300319485, "grad_norm": 1.5633302927017212, "learning_rate": 7.066882113249965e-05, "loss": 5.6757, "step": 16770 }, { "epoch": 0.02357747423848306, "grad_norm": 1.4342371225357056, "learning_rate": 7.071097372488407e-05, "loss": 5.7106, "step": 16780 }, { "epoch": 0.023591525176646637, "grad_norm": 1.3297955989837646, "learning_rate": 7.07531263172685e-05, "loss": 5.664, "step": 16790 }, { "epoch": 0.023605576114810217, "grad_norm": 1.9332789182662964, "learning_rate": 7.079527890965294e-05, "loss": 5.6539, "step": 16800 }, { "epoch": 0.023619627052973793, "grad_norm": 1.3228834867477417, "learning_rate": 7.083743150203736e-05, "loss": 5.6064, "step": 16810 }, { "epoch": 0.023633677991137372, "grad_norm": 1.4686253070831299, "learning_rate": 7.08795840944218e-05, "loss": 5.6387, "step": 16820 }, { "epoch": 0.02364772892930095, "grad_norm": 1.4611742496490479, "learning_rate": 7.092173668680624e-05, "loss": 5.7412, "step": 16830 }, { "epoch": 0.023661779867464525, "grad_norm": 1.2942790985107422, "learning_rate": 7.096388927919066e-05, "loss": 5.6693, "step": 16840 }, { "epoch": 0.023675830805628104, "grad_norm": 1.254348635673523, "learning_rate": 7.10060418715751e-05, "loss": 5.7693, "step": 16850 }, { "epoch": 0.02368988174379168, "grad_norm": 1.426842212677002, "learning_rate": 7.104819446395953e-05, "loss": 5.6919, "step": 16860 }, { "epoch": 0.023703932681955257, "grad_norm": 1.349927544593811, "learning_rate": 7.109034705634395e-05, "loss": 5.62, "step": 16870 }, { "epoch": 0.023717983620118836, "grad_norm": 1.4106230735778809, "learning_rate": 7.11324996487284e-05, "loss": 5.8021, "step": 16880 }, { "epoch": 0.023732034558282412, "grad_norm": 1.3730113506317139, "learning_rate": 7.117465224111282e-05, "loss": 5.6808, "step": 16890 }, { "epoch": 0.023746085496445992, "grad_norm": 1.320567011833191, "learning_rate": 7.121680483349726e-05, "loss": 5.6301, "step": 16900 }, { "epoch": 0.023760136434609568, "grad_norm": 1.3973300457000732, "learning_rate": 7.125895742588169e-05, "loss": 5.6409, "step": 16910 }, { "epoch": 0.023774187372773144, "grad_norm": 1.354918122291565, "learning_rate": 7.130111001826612e-05, "loss": 5.6952, "step": 16920 }, { "epoch": 0.023788238310936724, "grad_norm": 1.419696569442749, "learning_rate": 7.134326261065055e-05, "loss": 5.6368, "step": 16930 }, { "epoch": 0.0238022892491003, "grad_norm": 1.403704285621643, "learning_rate": 7.138541520303498e-05, "loss": 5.6484, "step": 16940 }, { "epoch": 0.02381634018726388, "grad_norm": 1.366337776184082, "learning_rate": 7.142756779541941e-05, "loss": 5.7407, "step": 16950 }, { "epoch": 0.023830391125427456, "grad_norm": 1.469281554222107, "learning_rate": 7.146972038780384e-05, "loss": 5.719, "step": 16960 }, { "epoch": 0.023844442063591032, "grad_norm": 1.321475625038147, "learning_rate": 7.151187298018827e-05, "loss": 5.517, "step": 16970 }, { "epoch": 0.023858493001754612, "grad_norm": 1.3294295072555542, "learning_rate": 7.15540255725727e-05, "loss": 5.6871, "step": 16980 }, { "epoch": 0.023872543939918188, "grad_norm": 1.3294674158096313, "learning_rate": 7.159617816495714e-05, "loss": 5.6561, "step": 16990 }, { "epoch": 0.023886594878081768, "grad_norm": 1.4403409957885742, "learning_rate": 7.163833075734157e-05, "loss": 5.5484, "step": 17000 }, { "epoch": 0.023900645816245344, "grad_norm": 1.3805781602859497, "learning_rate": 7.1680483349726e-05, "loss": 5.7715, "step": 17010 }, { "epoch": 0.02391469675440892, "grad_norm": 1.2874103784561157, "learning_rate": 7.172263594211043e-05, "loss": 5.6586, "step": 17020 }, { "epoch": 0.0239287476925725, "grad_norm": 1.2773886919021606, "learning_rate": 7.176478853449487e-05, "loss": 5.7795, "step": 17030 }, { "epoch": 0.023942798630736076, "grad_norm": 1.4012190103530884, "learning_rate": 7.180694112687929e-05, "loss": 5.686, "step": 17040 }, { "epoch": 0.023956849568899652, "grad_norm": 1.3619203567504883, "learning_rate": 7.184909371926372e-05, "loss": 5.7779, "step": 17050 }, { "epoch": 0.02397090050706323, "grad_norm": 1.6374523639678955, "learning_rate": 7.189124631164817e-05, "loss": 5.7293, "step": 17060 }, { "epoch": 0.023984951445226808, "grad_norm": 1.365061640739441, "learning_rate": 7.193339890403259e-05, "loss": 5.6493, "step": 17070 }, { "epoch": 0.023999002383390387, "grad_norm": 1.458706021308899, "learning_rate": 7.197555149641702e-05, "loss": 5.6065, "step": 17080 }, { "epoch": 0.024013053321553963, "grad_norm": 1.3380498886108398, "learning_rate": 7.201770408880146e-05, "loss": 5.575, "step": 17090 }, { "epoch": 0.02402710425971754, "grad_norm": 1.2822099924087524, "learning_rate": 7.205985668118588e-05, "loss": 5.6609, "step": 17100 }, { "epoch": 0.02404115519788112, "grad_norm": 1.3940728902816772, "learning_rate": 7.210200927357032e-05, "loss": 5.6604, "step": 17110 }, { "epoch": 0.024055206136044695, "grad_norm": 1.3046749830245972, "learning_rate": 7.214416186595475e-05, "loss": 5.6748, "step": 17120 }, { "epoch": 0.024069257074208275, "grad_norm": 1.3955029249191284, "learning_rate": 7.218631445833917e-05, "loss": 5.6872, "step": 17130 }, { "epoch": 0.02408330801237185, "grad_norm": 1.2928085327148438, "learning_rate": 7.222846705072362e-05, "loss": 5.6749, "step": 17140 }, { "epoch": 0.024097358950535427, "grad_norm": 1.4071931838989258, "learning_rate": 7.227061964310805e-05, "loss": 5.6209, "step": 17150 }, { "epoch": 0.024111409888699007, "grad_norm": 1.4092259407043457, "learning_rate": 7.231277223549248e-05, "loss": 5.616, "step": 17160 }, { "epoch": 0.024125460826862583, "grad_norm": 1.3400150537490845, "learning_rate": 7.235492482787691e-05, "loss": 5.7729, "step": 17170 }, { "epoch": 0.02413951176502616, "grad_norm": 1.7592744827270508, "learning_rate": 7.239707742026134e-05, "loss": 5.6002, "step": 17180 }, { "epoch": 0.02415356270318974, "grad_norm": 1.4428009986877441, "learning_rate": 7.243923001264577e-05, "loss": 5.5269, "step": 17190 }, { "epoch": 0.024167613641353315, "grad_norm": 1.266127347946167, "learning_rate": 7.24813826050302e-05, "loss": 5.6309, "step": 17200 }, { "epoch": 0.024181664579516895, "grad_norm": 1.3543121814727783, "learning_rate": 7.252353519741464e-05, "loss": 5.5518, "step": 17210 }, { "epoch": 0.02419571551768047, "grad_norm": 1.3485888242721558, "learning_rate": 7.256568778979907e-05, "loss": 5.6967, "step": 17220 }, { "epoch": 0.024209766455844047, "grad_norm": 1.4433296918869019, "learning_rate": 7.26078403821835e-05, "loss": 5.7349, "step": 17230 }, { "epoch": 0.024223817394007627, "grad_norm": 1.47990882396698, "learning_rate": 7.264999297456793e-05, "loss": 5.7185, "step": 17240 }, { "epoch": 0.024237868332171203, "grad_norm": 1.3784239292144775, "learning_rate": 7.269214556695236e-05, "loss": 5.6906, "step": 17250 }, { "epoch": 0.024251919270334783, "grad_norm": 1.329148530960083, "learning_rate": 7.273429815933679e-05, "loss": 5.6781, "step": 17260 }, { "epoch": 0.02426597020849836, "grad_norm": 1.3315861225128174, "learning_rate": 7.277645075172122e-05, "loss": 5.6335, "step": 17270 }, { "epoch": 0.024280021146661935, "grad_norm": 1.4295711517333984, "learning_rate": 7.281860334410565e-05, "loss": 5.6599, "step": 17280 }, { "epoch": 0.024294072084825515, "grad_norm": 1.3033784627914429, "learning_rate": 7.286075593649008e-05, "loss": 5.6773, "step": 17290 }, { "epoch": 0.02430812302298909, "grad_norm": 1.5363476276397705, "learning_rate": 7.290290852887452e-05, "loss": 5.7043, "step": 17300 }, { "epoch": 0.02432217396115267, "grad_norm": 1.4859110116958618, "learning_rate": 7.294506112125895e-05, "loss": 5.5239, "step": 17310 }, { "epoch": 0.024336224899316247, "grad_norm": 1.3148119449615479, "learning_rate": 7.298721371364339e-05, "loss": 5.7742, "step": 17320 }, { "epoch": 0.024350275837479823, "grad_norm": 1.4127575159072876, "learning_rate": 7.302936630602781e-05, "loss": 5.6235, "step": 17330 }, { "epoch": 0.024364326775643402, "grad_norm": 1.7024136781692505, "learning_rate": 7.307151889841224e-05, "loss": 5.6536, "step": 17340 }, { "epoch": 0.02437837771380698, "grad_norm": 1.4915293455123901, "learning_rate": 7.311367149079669e-05, "loss": 5.5354, "step": 17350 }, { "epoch": 0.024392428651970555, "grad_norm": 1.431097149848938, "learning_rate": 7.31558240831811e-05, "loss": 5.626, "step": 17360 }, { "epoch": 0.024406479590134134, "grad_norm": 1.411797285079956, "learning_rate": 7.319797667556555e-05, "loss": 5.6458, "step": 17370 }, { "epoch": 0.02442053052829771, "grad_norm": 1.374850869178772, "learning_rate": 7.324012926794998e-05, "loss": 5.6726, "step": 17380 }, { "epoch": 0.02443458146646129, "grad_norm": 1.3310719728469849, "learning_rate": 7.32822818603344e-05, "loss": 5.6913, "step": 17390 }, { "epoch": 0.024448632404624866, "grad_norm": 1.5239213705062866, "learning_rate": 7.332443445271884e-05, "loss": 5.7034, "step": 17400 }, { "epoch": 0.024462683342788442, "grad_norm": 1.3302022218704224, "learning_rate": 7.336658704510327e-05, "loss": 5.6239, "step": 17410 }, { "epoch": 0.024476734280952022, "grad_norm": 1.499558687210083, "learning_rate": 7.340873963748769e-05, "loss": 5.7574, "step": 17420 }, { "epoch": 0.024490785219115598, "grad_norm": 1.323960304260254, "learning_rate": 7.345089222987213e-05, "loss": 5.6001, "step": 17430 }, { "epoch": 0.024504836157279178, "grad_norm": 1.2899998426437378, "learning_rate": 7.349304482225657e-05, "loss": 5.5648, "step": 17440 }, { "epoch": 0.024518887095442754, "grad_norm": 1.3837937116622925, "learning_rate": 7.3535197414641e-05, "loss": 5.6718, "step": 17450 }, { "epoch": 0.02453293803360633, "grad_norm": 1.2842652797698975, "learning_rate": 7.357735000702543e-05, "loss": 5.7037, "step": 17460 }, { "epoch": 0.02454698897176991, "grad_norm": 1.2789689302444458, "learning_rate": 7.361950259940986e-05, "loss": 5.6116, "step": 17470 }, { "epoch": 0.024561039909933486, "grad_norm": 1.3312838077545166, "learning_rate": 7.366165519179429e-05, "loss": 5.7832, "step": 17480 }, { "epoch": 0.024575090848097062, "grad_norm": 1.3789297342300415, "learning_rate": 7.370380778417872e-05, "loss": 5.6628, "step": 17490 }, { "epoch": 0.024589141786260642, "grad_norm": 1.552386999130249, "learning_rate": 7.374596037656315e-05, "loss": 5.7062, "step": 17500 }, { "epoch": 0.024603192724424218, "grad_norm": 1.3529428243637085, "learning_rate": 7.378811296894758e-05, "loss": 5.5797, "step": 17510 }, { "epoch": 0.024617243662587798, "grad_norm": 1.3707754611968994, "learning_rate": 7.383026556133202e-05, "loss": 5.6016, "step": 17520 }, { "epoch": 0.024631294600751374, "grad_norm": 1.6769342422485352, "learning_rate": 7.387241815371645e-05, "loss": 5.6126, "step": 17530 }, { "epoch": 0.02464534553891495, "grad_norm": 1.3697290420532227, "learning_rate": 7.391457074610088e-05, "loss": 5.5616, "step": 17540 }, { "epoch": 0.02465939647707853, "grad_norm": 1.4338105916976929, "learning_rate": 7.395672333848531e-05, "loss": 5.6251, "step": 17550 }, { "epoch": 0.024673447415242106, "grad_norm": 1.338873028755188, "learning_rate": 7.399887593086974e-05, "loss": 5.5995, "step": 17560 }, { "epoch": 0.024687498353405685, "grad_norm": 1.6078602075576782, "learning_rate": 7.404102852325417e-05, "loss": 5.7184, "step": 17570 }, { "epoch": 0.02470154929156926, "grad_norm": 1.374680995941162, "learning_rate": 7.40831811156386e-05, "loss": 5.54, "step": 17580 }, { "epoch": 0.024715600229732838, "grad_norm": 1.3792024850845337, "learning_rate": 7.41211184487846e-05, "loss": 5.6917, "step": 17590 }, { "epoch": 0.024729651167896417, "grad_norm": 1.3895095586776733, "learning_rate": 7.416327104116903e-05, "loss": 5.5612, "step": 17600 }, { "epoch": 0.024743702106059993, "grad_norm": 1.251436710357666, "learning_rate": 7.420542363355346e-05, "loss": 5.6356, "step": 17610 }, { "epoch": 0.024757753044223573, "grad_norm": 1.6639325618743896, "learning_rate": 7.424757622593789e-05, "loss": 5.613, "step": 17620 }, { "epoch": 0.02477180398238715, "grad_norm": 1.2654144763946533, "learning_rate": 7.428972881832232e-05, "loss": 5.6399, "step": 17630 }, { "epoch": 0.024785854920550725, "grad_norm": 1.3583436012268066, "learning_rate": 7.433188141070675e-05, "loss": 5.7586, "step": 17640 }, { "epoch": 0.024799905858714305, "grad_norm": 1.3544237613677979, "learning_rate": 7.437403400309119e-05, "loss": 5.7153, "step": 17650 }, { "epoch": 0.02481395679687788, "grad_norm": 1.3555983304977417, "learning_rate": 7.441618659547562e-05, "loss": 5.5901, "step": 17660 }, { "epoch": 0.024828007735041457, "grad_norm": 1.5493931770324707, "learning_rate": 7.445833918786005e-05, "loss": 5.5702, "step": 17670 }, { "epoch": 0.024842058673205037, "grad_norm": 1.4650983810424805, "learning_rate": 7.450049178024448e-05, "loss": 5.6018, "step": 17680 }, { "epoch": 0.024856109611368613, "grad_norm": 1.2635927200317383, "learning_rate": 7.454264437262891e-05, "loss": 5.5942, "step": 17690 }, { "epoch": 0.024870160549532193, "grad_norm": 1.5327472686767578, "learning_rate": 7.458479696501334e-05, "loss": 5.5997, "step": 17700 }, { "epoch": 0.02488421148769577, "grad_norm": 1.350309133529663, "learning_rate": 7.462694955739777e-05, "loss": 5.582, "step": 17710 }, { "epoch": 0.024898262425859345, "grad_norm": 1.321396827697754, "learning_rate": 7.46691021497822e-05, "loss": 5.582, "step": 17720 }, { "epoch": 0.024912313364022925, "grad_norm": 1.3941783905029297, "learning_rate": 7.471125474216663e-05, "loss": 5.7505, "step": 17730 }, { "epoch": 0.0249263643021865, "grad_norm": 1.573265790939331, "learning_rate": 7.475340733455107e-05, "loss": 5.4622, "step": 17740 }, { "epoch": 0.02494041524035008, "grad_norm": 1.3758049011230469, "learning_rate": 7.47955599269355e-05, "loss": 5.5887, "step": 17750 }, { "epoch": 0.024954466178513657, "grad_norm": 1.3547019958496094, "learning_rate": 7.483771251931993e-05, "loss": 5.603, "step": 17760 }, { "epoch": 0.024968517116677233, "grad_norm": 1.464343786239624, "learning_rate": 7.487986511170436e-05, "loss": 5.5724, "step": 17770 }, { "epoch": 0.024982568054840813, "grad_norm": 1.409480333328247, "learning_rate": 7.492201770408879e-05, "loss": 5.6456, "step": 17780 }, { "epoch": 0.02499661899300439, "grad_norm": 1.4030753374099731, "learning_rate": 7.496417029647324e-05, "loss": 5.6821, "step": 17790 }, { "epoch": 0.02501066993116797, "grad_norm": 1.4202769994735718, "learning_rate": 7.500632288885765e-05, "loss": 5.6878, "step": 17800 }, { "epoch": 0.025024720869331545, "grad_norm": 1.3402527570724487, "learning_rate": 7.504847548124208e-05, "loss": 5.5967, "step": 17810 }, { "epoch": 0.02503877180749512, "grad_norm": 1.4031054973602295, "learning_rate": 7.509062807362653e-05, "loss": 5.6123, "step": 17820 }, { "epoch": 0.0250528227456587, "grad_norm": 1.5110526084899902, "learning_rate": 7.513278066601096e-05, "loss": 5.6823, "step": 17830 }, { "epoch": 0.025066873683822277, "grad_norm": 1.3663294315338135, "learning_rate": 7.517493325839538e-05, "loss": 5.6382, "step": 17840 }, { "epoch": 0.025080924621985853, "grad_norm": 1.2902328968048096, "learning_rate": 7.521708585077982e-05, "loss": 5.6346, "step": 17850 }, { "epoch": 0.025094975560149432, "grad_norm": 1.4534049034118652, "learning_rate": 7.525923844316425e-05, "loss": 5.5818, "step": 17860 }, { "epoch": 0.02510902649831301, "grad_norm": 1.3165909051895142, "learning_rate": 7.530139103554867e-05, "loss": 5.6324, "step": 17870 }, { "epoch": 0.025123077436476588, "grad_norm": 1.443505048751831, "learning_rate": 7.534354362793312e-05, "loss": 5.6841, "step": 17880 }, { "epoch": 0.025137128374640164, "grad_norm": 1.364423155784607, "learning_rate": 7.538569622031755e-05, "loss": 5.6508, "step": 17890 }, { "epoch": 0.02515117931280374, "grad_norm": 1.3083369731903076, "learning_rate": 7.542784881270196e-05, "loss": 5.7182, "step": 17900 }, { "epoch": 0.02516523025096732, "grad_norm": 1.2806565761566162, "learning_rate": 7.547000140508641e-05, "loss": 5.6075, "step": 17910 }, { "epoch": 0.025179281189130896, "grad_norm": 1.3733397722244263, "learning_rate": 7.551215399747084e-05, "loss": 5.6118, "step": 17920 }, { "epoch": 0.025193332127294476, "grad_norm": 1.3024393320083618, "learning_rate": 7.555430658985526e-05, "loss": 5.628, "step": 17930 }, { "epoch": 0.025207383065458052, "grad_norm": 1.447492003440857, "learning_rate": 7.55964591822397e-05, "loss": 5.6351, "step": 17940 }, { "epoch": 0.025221434003621628, "grad_norm": 1.2635152339935303, "learning_rate": 7.563861177462413e-05, "loss": 5.5923, "step": 17950 }, { "epoch": 0.025235484941785208, "grad_norm": 1.2946640253067017, "learning_rate": 7.568076436700855e-05, "loss": 5.6809, "step": 17960 }, { "epoch": 0.025249535879948784, "grad_norm": 1.283579707145691, "learning_rate": 7.5722916959393e-05, "loss": 5.7118, "step": 17970 }, { "epoch": 0.02526358681811236, "grad_norm": 1.259277582168579, "learning_rate": 7.576506955177743e-05, "loss": 5.6226, "step": 17980 }, { "epoch": 0.02527763775627594, "grad_norm": 1.519494891166687, "learning_rate": 7.580722214416187e-05, "loss": 5.6467, "step": 17990 }, { "epoch": 0.025291688694439516, "grad_norm": 1.3044480085372925, "learning_rate": 7.584937473654629e-05, "loss": 5.7049, "step": 18000 }, { "epoch": 0.025305739632603096, "grad_norm": 1.3547728061676025, "learning_rate": 7.589152732893072e-05, "loss": 5.5962, "step": 18010 }, { "epoch": 0.025319790570766672, "grad_norm": 1.3373003005981445, "learning_rate": 7.593367992131517e-05, "loss": 5.6107, "step": 18020 }, { "epoch": 0.025333841508930248, "grad_norm": 1.3276454210281372, "learning_rate": 7.597583251369958e-05, "loss": 5.5887, "step": 18030 }, { "epoch": 0.025347892447093828, "grad_norm": 1.3012672662734985, "learning_rate": 7.601798510608401e-05, "loss": 5.6691, "step": 18040 }, { "epoch": 0.025361943385257404, "grad_norm": 1.3194389343261719, "learning_rate": 7.606013769846846e-05, "loss": 5.6852, "step": 18050 }, { "epoch": 0.025375994323420983, "grad_norm": 1.2897201776504517, "learning_rate": 7.610229029085288e-05, "loss": 5.6437, "step": 18060 }, { "epoch": 0.02539004526158456, "grad_norm": 2.91089129447937, "learning_rate": 7.614444288323731e-05, "loss": 5.6226, "step": 18070 }, { "epoch": 0.025404096199748136, "grad_norm": 1.2783751487731934, "learning_rate": 7.618659547562175e-05, "loss": 5.5288, "step": 18080 }, { "epoch": 0.025418147137911715, "grad_norm": 1.5145020484924316, "learning_rate": 7.622874806800617e-05, "loss": 5.611, "step": 18090 }, { "epoch": 0.02543219807607529, "grad_norm": 1.3364498615264893, "learning_rate": 7.62709006603906e-05, "loss": 5.5694, "step": 18100 }, { "epoch": 0.02544624901423887, "grad_norm": 1.3756706714630127, "learning_rate": 7.631305325277505e-05, "loss": 5.5867, "step": 18110 }, { "epoch": 0.025460299952402447, "grad_norm": 1.4030925035476685, "learning_rate": 7.635520584515948e-05, "loss": 5.5534, "step": 18120 }, { "epoch": 0.025474350890566023, "grad_norm": 1.3178571462631226, "learning_rate": 7.63973584375439e-05, "loss": 5.6151, "step": 18130 }, { "epoch": 0.025488401828729603, "grad_norm": 1.3697510957717896, "learning_rate": 7.643951102992834e-05, "loss": 5.648, "step": 18140 }, { "epoch": 0.02550245276689318, "grad_norm": 1.3539316654205322, "learning_rate": 7.648166362231277e-05, "loss": 5.5328, "step": 18150 }, { "epoch": 0.025516503705056755, "grad_norm": 1.3074488639831543, "learning_rate": 7.652381621469719e-05, "loss": 5.6203, "step": 18160 }, { "epoch": 0.025530554643220335, "grad_norm": 1.3524091243743896, "learning_rate": 7.656596880708163e-05, "loss": 5.6306, "step": 18170 }, { "epoch": 0.02554460558138391, "grad_norm": 1.3374627828598022, "learning_rate": 7.660812139946606e-05, "loss": 5.6288, "step": 18180 }, { "epoch": 0.02555865651954749, "grad_norm": 1.351073145866394, "learning_rate": 7.665027399185048e-05, "loss": 5.606, "step": 18190 }, { "epoch": 0.025572707457711067, "grad_norm": 1.4369218349456787, "learning_rate": 7.669242658423493e-05, "loss": 5.5697, "step": 18200 }, { "epoch": 0.025586758395874643, "grad_norm": 1.3648687601089478, "learning_rate": 7.673457917661936e-05, "loss": 5.5595, "step": 18210 }, { "epoch": 0.025600809334038223, "grad_norm": 1.3496894836425781, "learning_rate": 7.677673176900378e-05, "loss": 5.528, "step": 18220 }, { "epoch": 0.0256148602722018, "grad_norm": 1.3187439441680908, "learning_rate": 7.681888436138822e-05, "loss": 5.6865, "step": 18230 }, { "epoch": 0.02562891121036538, "grad_norm": 1.4901741743087769, "learning_rate": 7.686103695377265e-05, "loss": 5.5185, "step": 18240 }, { "epoch": 0.025642962148528955, "grad_norm": 1.4721450805664062, "learning_rate": 7.69031895461571e-05, "loss": 5.5971, "step": 18250 }, { "epoch": 0.02565701308669253, "grad_norm": 1.3747284412384033, "learning_rate": 7.694534213854151e-05, "loss": 5.6086, "step": 18260 }, { "epoch": 0.02567106402485611, "grad_norm": 1.4034130573272705, "learning_rate": 7.698749473092594e-05, "loss": 5.585, "step": 18270 }, { "epoch": 0.025685114963019687, "grad_norm": 1.4254305362701416, "learning_rate": 7.702964732331039e-05, "loss": 5.5106, "step": 18280 }, { "epoch": 0.025699165901183263, "grad_norm": 1.4483678340911865, "learning_rate": 7.707179991569481e-05, "loss": 5.5116, "step": 18290 }, { "epoch": 0.025713216839346843, "grad_norm": 1.3134675025939941, "learning_rate": 7.711395250807924e-05, "loss": 5.5165, "step": 18300 }, { "epoch": 0.02572726777751042, "grad_norm": 1.3608453273773193, "learning_rate": 7.715610510046368e-05, "loss": 5.4691, "step": 18310 }, { "epoch": 0.025741318715674, "grad_norm": 1.3626971244812012, "learning_rate": 7.71982576928481e-05, "loss": 5.5634, "step": 18320 }, { "epoch": 0.025755369653837575, "grad_norm": 1.3774510622024536, "learning_rate": 7.724041028523253e-05, "loss": 5.5621, "step": 18330 }, { "epoch": 0.02576942059200115, "grad_norm": 1.361126184463501, "learning_rate": 7.728256287761698e-05, "loss": 5.5653, "step": 18340 }, { "epoch": 0.02578347153016473, "grad_norm": 1.440781593322754, "learning_rate": 7.73247154700014e-05, "loss": 5.5529, "step": 18350 }, { "epoch": 0.025797522468328307, "grad_norm": 1.3827389478683472, "learning_rate": 7.736686806238583e-05, "loss": 5.4872, "step": 18360 }, { "epoch": 0.025811573406491886, "grad_norm": 1.3901987075805664, "learning_rate": 7.740902065477027e-05, "loss": 5.6825, "step": 18370 }, { "epoch": 0.025825624344655462, "grad_norm": 1.549964189529419, "learning_rate": 7.745117324715469e-05, "loss": 5.5766, "step": 18380 }, { "epoch": 0.02583967528281904, "grad_norm": 1.3051047325134277, "learning_rate": 7.749332583953912e-05, "loss": 5.5934, "step": 18390 }, { "epoch": 0.025853726220982618, "grad_norm": 1.3041412830352783, "learning_rate": 7.753547843192356e-05, "loss": 5.5017, "step": 18400 }, { "epoch": 0.025867777159146194, "grad_norm": 1.3146135807037354, "learning_rate": 7.7577631024308e-05, "loss": 5.5809, "step": 18410 }, { "epoch": 0.025881828097309774, "grad_norm": 1.3756433725357056, "learning_rate": 7.761978361669241e-05, "loss": 5.553, "step": 18420 }, { "epoch": 0.02589587903547335, "grad_norm": 1.5293617248535156, "learning_rate": 7.766193620907686e-05, "loss": 5.6161, "step": 18430 }, { "epoch": 0.025909929973636926, "grad_norm": 1.3322265148162842, "learning_rate": 7.770408880146129e-05, "loss": 5.6277, "step": 18440 }, { "epoch": 0.025923980911800506, "grad_norm": 1.5310215950012207, "learning_rate": 7.77462413938457e-05, "loss": 5.7149, "step": 18450 }, { "epoch": 0.025938031849964082, "grad_norm": 1.3711066246032715, "learning_rate": 7.778839398623015e-05, "loss": 5.5819, "step": 18460 }, { "epoch": 0.025952082788127658, "grad_norm": 1.3732609748840332, "learning_rate": 7.783054657861458e-05, "loss": 5.557, "step": 18470 }, { "epoch": 0.025966133726291238, "grad_norm": 1.3032290935516357, "learning_rate": 7.7872699170999e-05, "loss": 5.6042, "step": 18480 }, { "epoch": 0.025980184664454814, "grad_norm": 1.4029079675674438, "learning_rate": 7.791485176338344e-05, "loss": 5.6057, "step": 18490 }, { "epoch": 0.025994235602618394, "grad_norm": 1.2981117963790894, "learning_rate": 7.795700435576788e-05, "loss": 5.572, "step": 18500 }, { "epoch": 0.02600828654078197, "grad_norm": 1.346895456314087, "learning_rate": 7.799915694815229e-05, "loss": 5.5795, "step": 18510 }, { "epoch": 0.026022337478945546, "grad_norm": 1.3572524785995483, "learning_rate": 7.804130954053674e-05, "loss": 5.5995, "step": 18520 }, { "epoch": 0.026036388417109126, "grad_norm": 1.4479949474334717, "learning_rate": 7.808346213292117e-05, "loss": 5.6112, "step": 18530 }, { "epoch": 0.026050439355272702, "grad_norm": 1.5404094457626343, "learning_rate": 7.812561472530561e-05, "loss": 5.6727, "step": 18540 }, { "epoch": 0.02606449029343628, "grad_norm": 1.4202011823654175, "learning_rate": 7.816776731769003e-05, "loss": 5.61, "step": 18550 }, { "epoch": 0.026078541231599858, "grad_norm": 1.4051263332366943, "learning_rate": 7.820991991007446e-05, "loss": 5.5492, "step": 18560 }, { "epoch": 0.026092592169763434, "grad_norm": 1.3520148992538452, "learning_rate": 7.82520725024589e-05, "loss": 5.6385, "step": 18570 }, { "epoch": 0.026106643107927013, "grad_norm": 1.389690637588501, "learning_rate": 7.829422509484332e-05, "loss": 5.482, "step": 18580 }, { "epoch": 0.02612069404609059, "grad_norm": 1.3874398469924927, "learning_rate": 7.833637768722776e-05, "loss": 5.5605, "step": 18590 }, { "epoch": 0.026134744984254166, "grad_norm": 1.4007757902145386, "learning_rate": 7.83785302796122e-05, "loss": 5.5494, "step": 18600 }, { "epoch": 0.026148795922417745, "grad_norm": 1.5084589719772339, "learning_rate": 7.842068287199662e-05, "loss": 5.5122, "step": 18610 }, { "epoch": 0.02616284686058132, "grad_norm": 1.4278961420059204, "learning_rate": 7.846283546438105e-05, "loss": 5.5855, "step": 18620 }, { "epoch": 0.0261768977987449, "grad_norm": 1.4229931831359863, "learning_rate": 7.85049880567655e-05, "loss": 5.4738, "step": 18630 }, { "epoch": 0.026190948736908477, "grad_norm": 1.4617115259170532, "learning_rate": 7.854714064914991e-05, "loss": 5.6064, "step": 18640 }, { "epoch": 0.026204999675072053, "grad_norm": 1.335037350654602, "learning_rate": 7.858929324153434e-05, "loss": 5.5919, "step": 18650 }, { "epoch": 0.026219050613235633, "grad_norm": 1.3201597929000854, "learning_rate": 7.863144583391879e-05, "loss": 5.6363, "step": 18660 }, { "epoch": 0.02623310155139921, "grad_norm": 1.5056530237197876, "learning_rate": 7.86735984263032e-05, "loss": 5.5251, "step": 18670 }, { "epoch": 0.02624715248956279, "grad_norm": 1.3666268587112427, "learning_rate": 7.871575101868764e-05, "loss": 5.5708, "step": 18680 }, { "epoch": 0.026261203427726365, "grad_norm": 1.4931973218917847, "learning_rate": 7.875790361107208e-05, "loss": 5.5645, "step": 18690 }, { "epoch": 0.02627525436588994, "grad_norm": 1.3463979959487915, "learning_rate": 7.880005620345651e-05, "loss": 5.4477, "step": 18700 }, { "epoch": 0.02628930530405352, "grad_norm": 1.3805086612701416, "learning_rate": 7.884220879584093e-05, "loss": 5.6309, "step": 18710 }, { "epoch": 0.026303356242217097, "grad_norm": 1.3408585786819458, "learning_rate": 7.888436138822537e-05, "loss": 5.5943, "step": 18720 }, { "epoch": 0.026317407180380677, "grad_norm": 1.3854987621307373, "learning_rate": 7.89265139806098e-05, "loss": 5.6367, "step": 18730 }, { "epoch": 0.026331458118544253, "grad_norm": 1.4840235710144043, "learning_rate": 7.896866657299422e-05, "loss": 5.6203, "step": 18740 }, { "epoch": 0.02634550905670783, "grad_norm": 1.4539438486099243, "learning_rate": 7.901081916537867e-05, "loss": 5.6533, "step": 18750 }, { "epoch": 0.02635955999487141, "grad_norm": 1.6064668893814087, "learning_rate": 7.90529717577631e-05, "loss": 5.57, "step": 18760 }, { "epoch": 0.026373610933034985, "grad_norm": 1.3408623933792114, "learning_rate": 7.909512435014752e-05, "loss": 5.4955, "step": 18770 }, { "epoch": 0.02638766187119856, "grad_norm": 1.9005026817321777, "learning_rate": 7.913727694253196e-05, "loss": 5.6209, "step": 18780 }, { "epoch": 0.02640171280936214, "grad_norm": 1.2707617282867432, "learning_rate": 7.917942953491639e-05, "loss": 5.6083, "step": 18790 }, { "epoch": 0.026415763747525717, "grad_norm": 1.494612216949463, "learning_rate": 7.922158212730081e-05, "loss": 5.5413, "step": 18800 }, { "epoch": 0.026429814685689296, "grad_norm": 1.4194583892822266, "learning_rate": 7.926373471968525e-05, "loss": 5.5375, "step": 18810 }, { "epoch": 0.026443865623852873, "grad_norm": 1.5205409526824951, "learning_rate": 7.930588731206969e-05, "loss": 5.5377, "step": 18820 }, { "epoch": 0.02645791656201645, "grad_norm": 1.2400825023651123, "learning_rate": 7.934803990445413e-05, "loss": 5.5744, "step": 18830 }, { "epoch": 0.02647196750018003, "grad_norm": 1.3689380884170532, "learning_rate": 7.939019249683855e-05, "loss": 5.618, "step": 18840 }, { "epoch": 0.026486018438343605, "grad_norm": 1.4133222103118896, "learning_rate": 7.943234508922298e-05, "loss": 5.7025, "step": 18850 }, { "epoch": 0.026500069376507184, "grad_norm": 1.4041091203689575, "learning_rate": 7.947449768160742e-05, "loss": 5.5289, "step": 18860 }, { "epoch": 0.02651412031467076, "grad_norm": 1.2778563499450684, "learning_rate": 7.951665027399184e-05, "loss": 5.504, "step": 18870 }, { "epoch": 0.026528171252834336, "grad_norm": 1.4318342208862305, "learning_rate": 7.955880286637627e-05, "loss": 5.5416, "step": 18880 }, { "epoch": 0.026542222190997916, "grad_norm": 1.3022536039352417, "learning_rate": 7.960095545876072e-05, "loss": 5.4591, "step": 18890 }, { "epoch": 0.026556273129161492, "grad_norm": 1.3003450632095337, "learning_rate": 7.964310805114514e-05, "loss": 5.5049, "step": 18900 }, { "epoch": 0.026570324067325072, "grad_norm": 1.4249634742736816, "learning_rate": 7.968526064352957e-05, "loss": 5.5521, "step": 18910 }, { "epoch": 0.026584375005488648, "grad_norm": 2.4674744606018066, "learning_rate": 7.972741323591401e-05, "loss": 5.5821, "step": 18920 }, { "epoch": 0.026598425943652224, "grad_norm": 1.4836219549179077, "learning_rate": 7.976956582829843e-05, "loss": 5.6154, "step": 18930 }, { "epoch": 0.026612476881815804, "grad_norm": 1.395345687866211, "learning_rate": 7.981171842068286e-05, "loss": 5.589, "step": 18940 }, { "epoch": 0.02662652781997938, "grad_norm": 1.3151682615280151, "learning_rate": 7.98538710130673e-05, "loss": 5.4916, "step": 18950 }, { "epoch": 0.026640578758142956, "grad_norm": 1.6428303718566895, "learning_rate": 7.989602360545172e-05, "loss": 5.6574, "step": 18960 }, { "epoch": 0.026654629696306536, "grad_norm": 1.3292784690856934, "learning_rate": 7.993817619783615e-05, "loss": 5.6062, "step": 18970 }, { "epoch": 0.026668680634470112, "grad_norm": 1.4000933170318604, "learning_rate": 7.99803287902206e-05, "loss": 5.5415, "step": 18980 }, { "epoch": 0.02668273157263369, "grad_norm": 1.428321361541748, "learning_rate": 8.002248138260503e-05, "loss": 5.5416, "step": 18990 }, { "epoch": 0.026696782510797268, "grad_norm": 1.3596489429473877, "learning_rate": 8.006463397498945e-05, "loss": 5.6136, "step": 19000 }, { "epoch": 0.026710833448960844, "grad_norm": 1.3439053297042847, "learning_rate": 8.010678656737389e-05, "loss": 5.5557, "step": 19010 }, { "epoch": 0.026724884387124424, "grad_norm": 1.3456841707229614, "learning_rate": 8.014893915975832e-05, "loss": 5.484, "step": 19020 }, { "epoch": 0.026738935325288, "grad_norm": 1.317115306854248, "learning_rate": 8.019109175214274e-05, "loss": 5.6083, "step": 19030 }, { "epoch": 0.02675298626345158, "grad_norm": 1.845428466796875, "learning_rate": 8.023324434452719e-05, "loss": 5.5963, "step": 19040 }, { "epoch": 0.026767037201615156, "grad_norm": 1.309800386428833, "learning_rate": 8.027539693691162e-05, "loss": 5.5722, "step": 19050 }, { "epoch": 0.026781088139778732, "grad_norm": 1.389327883720398, "learning_rate": 8.031754952929603e-05, "loss": 5.4975, "step": 19060 }, { "epoch": 0.02679513907794231, "grad_norm": 1.251827359199524, "learning_rate": 8.035970212168048e-05, "loss": 5.5743, "step": 19070 }, { "epoch": 0.026809190016105888, "grad_norm": 1.2980040311813354, "learning_rate": 8.040185471406491e-05, "loss": 5.5124, "step": 19080 }, { "epoch": 0.026823240954269464, "grad_norm": 1.3532140254974365, "learning_rate": 8.044400730644933e-05, "loss": 5.5087, "step": 19090 }, { "epoch": 0.026837291892433043, "grad_norm": 1.3311684131622314, "learning_rate": 8.048615989883377e-05, "loss": 5.5773, "step": 19100 }, { "epoch": 0.02685134283059662, "grad_norm": 1.3414361476898193, "learning_rate": 8.05283124912182e-05, "loss": 5.4767, "step": 19110 }, { "epoch": 0.0268653937687602, "grad_norm": 1.47315514087677, "learning_rate": 8.057046508360265e-05, "loss": 5.5304, "step": 19120 }, { "epoch": 0.026879444706923775, "grad_norm": 1.3902984857559204, "learning_rate": 8.061261767598707e-05, "loss": 5.5592, "step": 19130 }, { "epoch": 0.02689349564508735, "grad_norm": 1.3104959726333618, "learning_rate": 8.06547702683715e-05, "loss": 5.5947, "step": 19140 }, { "epoch": 0.02690754658325093, "grad_norm": 1.377226710319519, "learning_rate": 8.069692286075594e-05, "loss": 5.6055, "step": 19150 }, { "epoch": 0.026921597521414507, "grad_norm": 1.3036943674087524, "learning_rate": 8.073907545314036e-05, "loss": 5.5622, "step": 19160 }, { "epoch": 0.026935648459578087, "grad_norm": 1.395580768585205, "learning_rate": 8.078122804552479e-05, "loss": 5.4697, "step": 19170 }, { "epoch": 0.026949699397741663, "grad_norm": 1.2811263799667358, "learning_rate": 8.082338063790923e-05, "loss": 5.5932, "step": 19180 }, { "epoch": 0.02696375033590524, "grad_norm": 1.2819894552230835, "learning_rate": 8.086553323029365e-05, "loss": 5.5619, "step": 19190 }, { "epoch": 0.02697780127406882, "grad_norm": 1.2503257989883423, "learning_rate": 8.090768582267808e-05, "loss": 5.6324, "step": 19200 }, { "epoch": 0.026991852212232395, "grad_norm": 1.6546611785888672, "learning_rate": 8.094983841506253e-05, "loss": 5.451, "step": 19210 }, { "epoch": 0.027005903150395975, "grad_norm": 1.3742631673812866, "learning_rate": 8.099199100744695e-05, "loss": 5.6103, "step": 19220 }, { "epoch": 0.02701995408855955, "grad_norm": 1.5028876066207886, "learning_rate": 8.103414359983138e-05, "loss": 5.4252, "step": 19230 }, { "epoch": 0.027034005026723127, "grad_norm": 1.4231295585632324, "learning_rate": 8.107629619221582e-05, "loss": 5.5802, "step": 19240 }, { "epoch": 0.027048055964886707, "grad_norm": 1.3292512893676758, "learning_rate": 8.111844878460025e-05, "loss": 5.4812, "step": 19250 }, { "epoch": 0.027062106903050283, "grad_norm": 1.4668748378753662, "learning_rate": 8.116060137698467e-05, "loss": 5.5739, "step": 19260 }, { "epoch": 0.02707615784121386, "grad_norm": 1.3313186168670654, "learning_rate": 8.120275396936912e-05, "loss": 5.5789, "step": 19270 }, { "epoch": 0.02709020877937744, "grad_norm": 1.3527790307998657, "learning_rate": 8.124490656175355e-05, "loss": 5.4486, "step": 19280 }, { "epoch": 0.027104259717541015, "grad_norm": 1.3377543687820435, "learning_rate": 8.128705915413796e-05, "loss": 5.4651, "step": 19290 }, { "epoch": 0.027118310655704594, "grad_norm": 1.3352538347244263, "learning_rate": 8.132921174652241e-05, "loss": 5.4763, "step": 19300 }, { "epoch": 0.02713236159386817, "grad_norm": 1.3395129442214966, "learning_rate": 8.137136433890684e-05, "loss": 5.4819, "step": 19310 }, { "epoch": 0.027146412532031747, "grad_norm": 1.3543874025344849, "learning_rate": 8.141351693129126e-05, "loss": 5.5129, "step": 19320 }, { "epoch": 0.027160463470195326, "grad_norm": 1.3789215087890625, "learning_rate": 8.14556695236757e-05, "loss": 5.5339, "step": 19330 }, { "epoch": 0.027174514408358903, "grad_norm": 1.388160228729248, "learning_rate": 8.149782211606013e-05, "loss": 5.5459, "step": 19340 }, { "epoch": 0.027188565346522482, "grad_norm": 1.3371630907058716, "learning_rate": 8.153997470844455e-05, "loss": 5.6077, "step": 19350 }, { "epoch": 0.02720261628468606, "grad_norm": 1.3276069164276123, "learning_rate": 8.1582127300829e-05, "loss": 5.5951, "step": 19360 }, { "epoch": 0.027216667222849635, "grad_norm": 1.3663477897644043, "learning_rate": 8.162427989321343e-05, "loss": 5.581, "step": 19370 }, { "epoch": 0.027230718161013214, "grad_norm": 1.48804771900177, "learning_rate": 8.166643248559784e-05, "loss": 5.6145, "step": 19380 }, { "epoch": 0.02724476909917679, "grad_norm": 1.3625435829162598, "learning_rate": 8.170858507798229e-05, "loss": 5.4211, "step": 19390 }, { "epoch": 0.027258820037340366, "grad_norm": 1.324641227722168, "learning_rate": 8.175073767036672e-05, "loss": 5.429, "step": 19400 }, { "epoch": 0.027272870975503946, "grad_norm": 1.3358043432235718, "learning_rate": 8.179289026275117e-05, "loss": 5.4692, "step": 19410 }, { "epoch": 0.027286921913667522, "grad_norm": 1.48837411403656, "learning_rate": 8.183504285513558e-05, "loss": 5.5711, "step": 19420 }, { "epoch": 0.027300972851831102, "grad_norm": 1.3000835180282593, "learning_rate": 8.187719544752001e-05, "loss": 5.4646, "step": 19430 }, { "epoch": 0.027315023789994678, "grad_norm": 1.4490911960601807, "learning_rate": 8.191934803990446e-05, "loss": 5.495, "step": 19440 }, { "epoch": 0.027329074728158254, "grad_norm": 1.3494820594787598, "learning_rate": 8.196150063228888e-05, "loss": 5.573, "step": 19450 }, { "epoch": 0.027343125666321834, "grad_norm": 1.3359415531158447, "learning_rate": 8.200365322467331e-05, "loss": 5.6712, "step": 19460 }, { "epoch": 0.02735717660448541, "grad_norm": 1.3212742805480957, "learning_rate": 8.204580581705775e-05, "loss": 5.6058, "step": 19470 }, { "epoch": 0.02737122754264899, "grad_norm": 1.3946962356567383, "learning_rate": 8.208795840944217e-05, "loss": 5.4978, "step": 19480 }, { "epoch": 0.027385278480812566, "grad_norm": 1.3919817209243774, "learning_rate": 8.21301110018266e-05, "loss": 5.5974, "step": 19490 }, { "epoch": 0.027399329418976142, "grad_norm": 1.320351481437683, "learning_rate": 8.217226359421105e-05, "loss": 5.5079, "step": 19500 }, { "epoch": 0.02741338035713972, "grad_norm": 1.326079249382019, "learning_rate": 8.221441618659546e-05, "loss": 5.5104, "step": 19510 }, { "epoch": 0.027427431295303298, "grad_norm": 1.6833606958389282, "learning_rate": 8.22565687789799e-05, "loss": 5.5943, "step": 19520 }, { "epoch": 0.027441482233466877, "grad_norm": 1.373024344444275, "learning_rate": 8.229872137136434e-05, "loss": 5.5817, "step": 19530 }, { "epoch": 0.027455533171630454, "grad_norm": 1.2997173070907593, "learning_rate": 8.234087396374877e-05, "loss": 5.5115, "step": 19540 }, { "epoch": 0.02746958410979403, "grad_norm": 1.349067211151123, "learning_rate": 8.238302655613319e-05, "loss": 5.5306, "step": 19550 }, { "epoch": 0.02748363504795761, "grad_norm": 1.379137396812439, "learning_rate": 8.242517914851763e-05, "loss": 5.5341, "step": 19560 }, { "epoch": 0.027497685986121186, "grad_norm": 1.4701979160308838, "learning_rate": 8.246733174090206e-05, "loss": 5.4278, "step": 19570 }, { "epoch": 0.027511736924284762, "grad_norm": 1.373063087463379, "learning_rate": 8.250948433328648e-05, "loss": 5.4814, "step": 19580 }, { "epoch": 0.02752578786244834, "grad_norm": 1.3411657810211182, "learning_rate": 8.255163692567093e-05, "loss": 5.5641, "step": 19590 }, { "epoch": 0.027539838800611918, "grad_norm": 1.2834957838058472, "learning_rate": 8.259378951805536e-05, "loss": 5.5242, "step": 19600 }, { "epoch": 0.027553889738775497, "grad_norm": 1.3774769306182861, "learning_rate": 8.263594211043978e-05, "loss": 5.6078, "step": 19610 }, { "epoch": 0.027567940676939073, "grad_norm": 1.3527755737304688, "learning_rate": 8.267809470282422e-05, "loss": 5.443, "step": 19620 }, { "epoch": 0.02758199161510265, "grad_norm": 1.8241328001022339, "learning_rate": 8.272024729520865e-05, "loss": 5.62, "step": 19630 }, { "epoch": 0.02759604255326623, "grad_norm": 1.3106392621994019, "learning_rate": 8.276239988759307e-05, "loss": 5.5135, "step": 19640 }, { "epoch": 0.027610093491429805, "grad_norm": 1.3482909202575684, "learning_rate": 8.280455247997751e-05, "loss": 5.4549, "step": 19650 }, { "epoch": 0.027624144429593385, "grad_norm": 1.3328837156295776, "learning_rate": 8.284670507236194e-05, "loss": 5.4759, "step": 19660 }, { "epoch": 0.02763819536775696, "grad_norm": 1.3374454975128174, "learning_rate": 8.288885766474636e-05, "loss": 5.5537, "step": 19670 }, { "epoch": 0.027652246305920537, "grad_norm": 1.3683466911315918, "learning_rate": 8.293101025713081e-05, "loss": 5.462, "step": 19680 }, { "epoch": 0.027666297244084117, "grad_norm": 1.3449815511703491, "learning_rate": 8.297316284951524e-05, "loss": 5.5387, "step": 19690 }, { "epoch": 0.027680348182247693, "grad_norm": 1.3451807498931885, "learning_rate": 8.301531544189968e-05, "loss": 5.5744, "step": 19700 }, { "epoch": 0.02769439912041127, "grad_norm": 1.3333075046539307, "learning_rate": 8.30574680342841e-05, "loss": 5.5967, "step": 19710 }, { "epoch": 0.02770845005857485, "grad_norm": 1.331404685974121, "learning_rate": 8.309962062666853e-05, "loss": 5.5404, "step": 19720 }, { "epoch": 0.027722500996738425, "grad_norm": 2.0781617164611816, "learning_rate": 8.314177321905298e-05, "loss": 5.4125, "step": 19730 }, { "epoch": 0.027736551934902005, "grad_norm": 1.341507077217102, "learning_rate": 8.31839258114374e-05, "loss": 5.5681, "step": 19740 }, { "epoch": 0.02775060287306558, "grad_norm": 1.3700333833694458, "learning_rate": 8.322607840382183e-05, "loss": 5.5813, "step": 19750 }, { "epoch": 0.027764653811229157, "grad_norm": 1.3675099611282349, "learning_rate": 8.326823099620627e-05, "loss": 5.4456, "step": 19760 }, { "epoch": 0.027778704749392737, "grad_norm": 1.3649263381958008, "learning_rate": 8.331038358859069e-05, "loss": 5.5941, "step": 19770 }, { "epoch": 0.027792755687556313, "grad_norm": 1.3281179666519165, "learning_rate": 8.335253618097512e-05, "loss": 5.4776, "step": 19780 }, { "epoch": 0.027806806625719892, "grad_norm": 1.314491629600525, "learning_rate": 8.339468877335956e-05, "loss": 5.6428, "step": 19790 }, { "epoch": 0.02782085756388347, "grad_norm": 1.4704593420028687, "learning_rate": 8.343684136574398e-05, "loss": 5.5351, "step": 19800 }, { "epoch": 0.027834908502047045, "grad_norm": 1.364715337753296, "learning_rate": 8.347899395812841e-05, "loss": 5.6232, "step": 19810 }, { "epoch": 0.027848959440210624, "grad_norm": 1.2975870370864868, "learning_rate": 8.352114655051286e-05, "loss": 5.5265, "step": 19820 }, { "epoch": 0.0278630103783742, "grad_norm": 1.2619905471801758, "learning_rate": 8.356329914289729e-05, "loss": 5.5378, "step": 19830 }, { "epoch": 0.02787706131653778, "grad_norm": 1.4168630838394165, "learning_rate": 8.36054517352817e-05, "loss": 5.5177, "step": 19840 }, { "epoch": 0.027891112254701356, "grad_norm": 1.4585949182510376, "learning_rate": 8.364760432766615e-05, "loss": 5.4355, "step": 19850 }, { "epoch": 0.027905163192864933, "grad_norm": 1.356927752494812, "learning_rate": 8.368975692005058e-05, "loss": 5.5808, "step": 19860 }, { "epoch": 0.027919214131028512, "grad_norm": 1.495552659034729, "learning_rate": 8.3731909512435e-05, "loss": 5.5017, "step": 19870 }, { "epoch": 0.02793326506919209, "grad_norm": 1.2983280420303345, "learning_rate": 8.377406210481944e-05, "loss": 5.4182, "step": 19880 }, { "epoch": 0.027947316007355665, "grad_norm": 1.3719418048858643, "learning_rate": 8.381621469720387e-05, "loss": 5.5936, "step": 19890 }, { "epoch": 0.027961366945519244, "grad_norm": 1.450944423675537, "learning_rate": 8.385836728958829e-05, "loss": 5.4513, "step": 19900 }, { "epoch": 0.02797541788368282, "grad_norm": 1.314232587814331, "learning_rate": 8.390051988197274e-05, "loss": 5.4597, "step": 19910 }, { "epoch": 0.0279894688218464, "grad_norm": 1.274675726890564, "learning_rate": 8.394267247435717e-05, "loss": 5.4734, "step": 19920 }, { "epoch": 0.028003519760009976, "grad_norm": 1.3578556776046753, "learning_rate": 8.398482506674159e-05, "loss": 5.4294, "step": 19930 }, { "epoch": 0.028017570698173552, "grad_norm": 1.3357514142990112, "learning_rate": 8.402697765912603e-05, "loss": 5.4595, "step": 19940 }, { "epoch": 0.028031621636337132, "grad_norm": 1.3431594371795654, "learning_rate": 8.406913025151046e-05, "loss": 5.5151, "step": 19950 }, { "epoch": 0.028045672574500708, "grad_norm": 1.5500048398971558, "learning_rate": 8.41112828438949e-05, "loss": 5.5227, "step": 19960 }, { "epoch": 0.028059723512664288, "grad_norm": 1.3698660135269165, "learning_rate": 8.415343543627932e-05, "loss": 5.5025, "step": 19970 }, { "epoch": 0.028073774450827864, "grad_norm": 1.332960844039917, "learning_rate": 8.419558802866376e-05, "loss": 5.5337, "step": 19980 }, { "epoch": 0.02808782538899144, "grad_norm": 1.2651346921920776, "learning_rate": 8.42377406210482e-05, "loss": 5.5576, "step": 19990 }, { "epoch": 0.02810187632715502, "grad_norm": 1.3758925199508667, "learning_rate": 8.427989321343262e-05, "loss": 5.3875, "step": 20000 }, { "epoch": 0.028115927265318596, "grad_norm": 1.412123203277588, "learning_rate": 8.432204580581705e-05, "loss": 5.6327, "step": 20010 }, { "epoch": 0.028129978203482175, "grad_norm": 1.3514657020568848, "learning_rate": 8.43641983982015e-05, "loss": 5.5142, "step": 20020 }, { "epoch": 0.02814402914164575, "grad_norm": 1.4179363250732422, "learning_rate": 8.440635099058591e-05, "loss": 5.5414, "step": 20030 }, { "epoch": 0.028158080079809328, "grad_norm": 1.3700520992279053, "learning_rate": 8.444850358297034e-05, "loss": 5.5191, "step": 20040 }, { "epoch": 0.028172131017972907, "grad_norm": 1.3933957815170288, "learning_rate": 8.449065617535479e-05, "loss": 5.5648, "step": 20050 }, { "epoch": 0.028186181956136484, "grad_norm": 1.2371602058410645, "learning_rate": 8.45328087677392e-05, "loss": 5.5392, "step": 20060 }, { "epoch": 0.02820023289430006, "grad_norm": 1.3389537334442139, "learning_rate": 8.457496136012364e-05, "loss": 5.4112, "step": 20070 }, { "epoch": 0.02821428383246364, "grad_norm": 1.3131827116012573, "learning_rate": 8.461711395250808e-05, "loss": 5.5364, "step": 20080 }, { "epoch": 0.028228334770627216, "grad_norm": 1.3104963302612305, "learning_rate": 8.46592665448925e-05, "loss": 5.4475, "step": 20090 }, { "epoch": 0.028242385708790795, "grad_norm": 1.3459300994873047, "learning_rate": 8.470141913727693e-05, "loss": 5.5386, "step": 20100 }, { "epoch": 0.02825643664695437, "grad_norm": 1.3658233880996704, "learning_rate": 8.474357172966137e-05, "loss": 5.5473, "step": 20110 }, { "epoch": 0.028270487585117948, "grad_norm": 1.3540921211242676, "learning_rate": 8.47857243220458e-05, "loss": 5.4997, "step": 20120 }, { "epoch": 0.028284538523281527, "grad_norm": 1.2725752592086792, "learning_rate": 8.482787691443022e-05, "loss": 5.5258, "step": 20130 }, { "epoch": 0.028298589461445103, "grad_norm": 1.3213703632354736, "learning_rate": 8.487002950681467e-05, "loss": 5.5548, "step": 20140 }, { "epoch": 0.028312640399608683, "grad_norm": 1.304655909538269, "learning_rate": 8.49121820991991e-05, "loss": 5.648, "step": 20150 }, { "epoch": 0.02832669133777226, "grad_norm": 1.2564914226531982, "learning_rate": 8.495433469158352e-05, "loss": 5.5097, "step": 20160 }, { "epoch": 0.028340742275935835, "grad_norm": 1.2632144689559937, "learning_rate": 8.499648728396796e-05, "loss": 5.4913, "step": 20170 }, { "epoch": 0.028354793214099415, "grad_norm": 1.2769023180007935, "learning_rate": 8.503863987635239e-05, "loss": 5.504, "step": 20180 }, { "epoch": 0.02836884415226299, "grad_norm": 1.3045737743377686, "learning_rate": 8.508079246873681e-05, "loss": 5.3591, "step": 20190 }, { "epoch": 0.028382895090426567, "grad_norm": 1.3489607572555542, "learning_rate": 8.512294506112125e-05, "loss": 5.4323, "step": 20200 }, { "epoch": 0.028396946028590147, "grad_norm": 1.3132845163345337, "learning_rate": 8.516509765350569e-05, "loss": 5.5967, "step": 20210 }, { "epoch": 0.028410996966753723, "grad_norm": 1.3848180770874023, "learning_rate": 8.52072502458901e-05, "loss": 5.5421, "step": 20220 }, { "epoch": 0.028425047904917303, "grad_norm": 1.3170721530914307, "learning_rate": 8.524940283827455e-05, "loss": 5.5576, "step": 20230 }, { "epoch": 0.02843909884308088, "grad_norm": 1.2676606178283691, "learning_rate": 8.529155543065898e-05, "loss": 5.5059, "step": 20240 }, { "epoch": 0.028453149781244455, "grad_norm": 1.3508251905441284, "learning_rate": 8.533370802304342e-05, "loss": 5.3921, "step": 20250 }, { "epoch": 0.028467200719408035, "grad_norm": 1.4981266260147095, "learning_rate": 8.537586061542784e-05, "loss": 5.4407, "step": 20260 }, { "epoch": 0.02848125165757161, "grad_norm": 1.2797385454177856, "learning_rate": 8.541801320781227e-05, "loss": 5.4869, "step": 20270 }, { "epoch": 0.02849530259573519, "grad_norm": 1.394260287284851, "learning_rate": 8.546016580019672e-05, "loss": 5.5474, "step": 20280 }, { "epoch": 0.028509353533898767, "grad_norm": 1.4078445434570312, "learning_rate": 8.550231839258114e-05, "loss": 5.384, "step": 20290 }, { "epoch": 0.028523404472062343, "grad_norm": 1.7833536863327026, "learning_rate": 8.554447098496557e-05, "loss": 5.4554, "step": 20300 }, { "epoch": 0.028537455410225922, "grad_norm": 1.3692078590393066, "learning_rate": 8.558662357735001e-05, "loss": 5.4846, "step": 20310 }, { "epoch": 0.0285515063483895, "grad_norm": 1.3532313108444214, "learning_rate": 8.562877616973443e-05, "loss": 5.3635, "step": 20320 }, { "epoch": 0.028565557286553078, "grad_norm": 1.2888007164001465, "learning_rate": 8.567092876211886e-05, "loss": 5.5104, "step": 20330 }, { "epoch": 0.028579608224716654, "grad_norm": 1.3509674072265625, "learning_rate": 8.57130813545033e-05, "loss": 5.5451, "step": 20340 }, { "epoch": 0.02859365916288023, "grad_norm": 1.3917303085327148, "learning_rate": 8.575523394688772e-05, "loss": 5.4849, "step": 20350 }, { "epoch": 0.02860771010104381, "grad_norm": 1.3092026710510254, "learning_rate": 8.579738653927215e-05, "loss": 5.3983, "step": 20360 }, { "epoch": 0.028621761039207386, "grad_norm": 1.3765193223953247, "learning_rate": 8.58395391316566e-05, "loss": 5.5259, "step": 20370 }, { "epoch": 0.028635811977370963, "grad_norm": 1.38758385181427, "learning_rate": 8.588169172404102e-05, "loss": 5.4927, "step": 20380 }, { "epoch": 0.028649862915534542, "grad_norm": 1.3264268636703491, "learning_rate": 8.592384431642545e-05, "loss": 5.5557, "step": 20390 }, { "epoch": 0.02866391385369812, "grad_norm": 1.3711224794387817, "learning_rate": 8.596599690880989e-05, "loss": 5.4997, "step": 20400 }, { "epoch": 0.028677964791861698, "grad_norm": 1.430208683013916, "learning_rate": 8.600814950119432e-05, "loss": 5.3837, "step": 20410 }, { "epoch": 0.028692015730025274, "grad_norm": 1.2916505336761475, "learning_rate": 8.605030209357874e-05, "loss": 5.5252, "step": 20420 }, { "epoch": 0.02870606666818885, "grad_norm": 1.300502061843872, "learning_rate": 8.609245468596319e-05, "loss": 5.4593, "step": 20430 }, { "epoch": 0.02872011760635243, "grad_norm": 1.2897007465362549, "learning_rate": 8.613460727834762e-05, "loss": 5.4996, "step": 20440 }, { "epoch": 0.028734168544516006, "grad_norm": 1.3202191591262817, "learning_rate": 8.617675987073203e-05, "loss": 5.3855, "step": 20450 }, { "epoch": 0.028748219482679586, "grad_norm": 1.3272267580032349, "learning_rate": 8.621891246311648e-05, "loss": 5.5036, "step": 20460 }, { "epoch": 0.028762270420843162, "grad_norm": 1.308984398841858, "learning_rate": 8.626106505550091e-05, "loss": 5.5322, "step": 20470 }, { "epoch": 0.028776321359006738, "grad_norm": 1.3932908773422241, "learning_rate": 8.630321764788533e-05, "loss": 5.524, "step": 20480 }, { "epoch": 0.028790372297170318, "grad_norm": 1.3947856426239014, "learning_rate": 8.634537024026977e-05, "loss": 5.5781, "step": 20490 }, { "epoch": 0.028804423235333894, "grad_norm": 1.2810063362121582, "learning_rate": 8.63875228326542e-05, "loss": 5.4543, "step": 20500 }, { "epoch": 0.02881847417349747, "grad_norm": 1.3561229705810547, "learning_rate": 8.642967542503862e-05, "loss": 5.5105, "step": 20510 }, { "epoch": 0.02883252511166105, "grad_norm": 1.3260747194290161, "learning_rate": 8.647182801742307e-05, "loss": 5.451, "step": 20520 }, { "epoch": 0.028846576049824626, "grad_norm": 1.3854423761367798, "learning_rate": 8.65139806098075e-05, "loss": 5.4256, "step": 20530 }, { "epoch": 0.028860626987988205, "grad_norm": 1.3165987730026245, "learning_rate": 8.655613320219194e-05, "loss": 5.3924, "step": 20540 }, { "epoch": 0.02887467792615178, "grad_norm": 2.6763813495635986, "learning_rate": 8.659828579457636e-05, "loss": 5.524, "step": 20550 }, { "epoch": 0.028888728864315358, "grad_norm": 1.459052324295044, "learning_rate": 8.664043838696079e-05, "loss": 5.4794, "step": 20560 }, { "epoch": 0.028902779802478937, "grad_norm": 1.276800274848938, "learning_rate": 8.668259097934523e-05, "loss": 5.4635, "step": 20570 }, { "epoch": 0.028916830740642514, "grad_norm": 1.3238308429718018, "learning_rate": 8.672474357172965e-05, "loss": 5.5032, "step": 20580 }, { "epoch": 0.028930881678806093, "grad_norm": 1.308498740196228, "learning_rate": 8.676689616411408e-05, "loss": 5.5274, "step": 20590 }, { "epoch": 0.02894493261696967, "grad_norm": 1.2564195394515991, "learning_rate": 8.680904875649853e-05, "loss": 5.5354, "step": 20600 }, { "epoch": 0.028958983555133246, "grad_norm": 1.276263952255249, "learning_rate": 8.685120134888295e-05, "loss": 5.4732, "step": 20610 }, { "epoch": 0.028973034493296825, "grad_norm": 1.257238745689392, "learning_rate": 8.689335394126738e-05, "loss": 5.5754, "step": 20620 }, { "epoch": 0.0289870854314604, "grad_norm": 1.402646541595459, "learning_rate": 8.693550653365182e-05, "loss": 5.5832, "step": 20630 }, { "epoch": 0.02900113636962398, "grad_norm": 1.2543669939041138, "learning_rate": 8.697765912603624e-05, "loss": 5.5227, "step": 20640 }, { "epoch": 0.029015187307787557, "grad_norm": 1.3000344038009644, "learning_rate": 8.701981171842067e-05, "loss": 5.3653, "step": 20650 }, { "epoch": 0.029029238245951133, "grad_norm": 1.3282537460327148, "learning_rate": 8.706196431080512e-05, "loss": 5.4161, "step": 20660 }, { "epoch": 0.029043289184114713, "grad_norm": 1.3235125541687012, "learning_rate": 8.710411690318953e-05, "loss": 5.5692, "step": 20670 }, { "epoch": 0.02905734012227829, "grad_norm": 1.3211613893508911, "learning_rate": 8.714626949557396e-05, "loss": 5.4011, "step": 20680 }, { "epoch": 0.029071391060441865, "grad_norm": 1.3463915586471558, "learning_rate": 8.718842208795841e-05, "loss": 5.5313, "step": 20690 }, { "epoch": 0.029085441998605445, "grad_norm": 1.304567813873291, "learning_rate": 8.723057468034284e-05, "loss": 5.3108, "step": 20700 }, { "epoch": 0.02909949293676902, "grad_norm": 1.2378467321395874, "learning_rate": 8.727272727272726e-05, "loss": 5.5691, "step": 20710 }, { "epoch": 0.0291135438749326, "grad_norm": 1.2816358804702759, "learning_rate": 8.73148798651117e-05, "loss": 5.4171, "step": 20720 }, { "epoch": 0.029127594813096177, "grad_norm": 1.467879056930542, "learning_rate": 8.735703245749613e-05, "loss": 5.424, "step": 20730 }, { "epoch": 0.029141645751259753, "grad_norm": 1.2690702676773071, "learning_rate": 8.739918504988055e-05, "loss": 5.5673, "step": 20740 }, { "epoch": 0.029155696689423333, "grad_norm": 1.2710336446762085, "learning_rate": 8.7441337642265e-05, "loss": 5.47, "step": 20750 }, { "epoch": 0.02916974762758691, "grad_norm": 1.2570821046829224, "learning_rate": 8.748349023464943e-05, "loss": 5.5539, "step": 20760 }, { "epoch": 0.02918379856575049, "grad_norm": 1.3138800859451294, "learning_rate": 8.752564282703384e-05, "loss": 5.4017, "step": 20770 }, { "epoch": 0.029197849503914065, "grad_norm": 1.3933358192443848, "learning_rate": 8.756779541941829e-05, "loss": 5.3666, "step": 20780 }, { "epoch": 0.02921190044207764, "grad_norm": 1.499240517616272, "learning_rate": 8.760994801180272e-05, "loss": 5.4957, "step": 20790 }, { "epoch": 0.02922595138024122, "grad_norm": 1.332919955253601, "learning_rate": 8.765210060418714e-05, "loss": 5.4886, "step": 20800 }, { "epoch": 0.029240002318404797, "grad_norm": 1.2993764877319336, "learning_rate": 8.769425319657158e-05, "loss": 5.5195, "step": 20810 }, { "epoch": 0.029254053256568373, "grad_norm": 1.3067516088485718, "learning_rate": 8.773640578895601e-05, "loss": 5.5169, "step": 20820 }, { "epoch": 0.029268104194731952, "grad_norm": 1.28152596950531, "learning_rate": 8.777855838134046e-05, "loss": 5.4082, "step": 20830 }, { "epoch": 0.02928215513289553, "grad_norm": 1.3164806365966797, "learning_rate": 8.782071097372488e-05, "loss": 5.5273, "step": 20840 }, { "epoch": 0.029296206071059108, "grad_norm": 1.3631839752197266, "learning_rate": 8.786286356610931e-05, "loss": 5.4297, "step": 20850 }, { "epoch": 0.029310257009222684, "grad_norm": 1.3792098760604858, "learning_rate": 8.790501615849375e-05, "loss": 5.4204, "step": 20860 }, { "epoch": 0.02932430794738626, "grad_norm": 1.3812150955200195, "learning_rate": 8.794716875087817e-05, "loss": 5.3696, "step": 20870 }, { "epoch": 0.02933835888554984, "grad_norm": 1.2828326225280762, "learning_rate": 8.79893213432626e-05, "loss": 5.5572, "step": 20880 }, { "epoch": 0.029352409823713416, "grad_norm": 1.4241127967834473, "learning_rate": 8.803147393564705e-05, "loss": 5.3305, "step": 20890 }, { "epoch": 0.029366460761876996, "grad_norm": 1.386412501335144, "learning_rate": 8.807362652803146e-05, "loss": 5.5092, "step": 20900 }, { "epoch": 0.029380511700040572, "grad_norm": 1.2819172143936157, "learning_rate": 8.81157791204159e-05, "loss": 5.3611, "step": 20910 }, { "epoch": 0.02939456263820415, "grad_norm": 1.305132269859314, "learning_rate": 8.815793171280034e-05, "loss": 5.5092, "step": 20920 }, { "epoch": 0.029408613576367728, "grad_norm": 1.359618902206421, "learning_rate": 8.820008430518476e-05, "loss": 5.5222, "step": 20930 }, { "epoch": 0.029422664514531304, "grad_norm": 1.3207499980926514, "learning_rate": 8.824223689756919e-05, "loss": 5.4073, "step": 20940 }, { "epoch": 0.029436715452694884, "grad_norm": 1.4006059169769287, "learning_rate": 8.828438948995363e-05, "loss": 5.4006, "step": 20950 }, { "epoch": 0.02945076639085846, "grad_norm": 1.3299939632415771, "learning_rate": 8.832654208233806e-05, "loss": 5.5144, "step": 20960 }, { "epoch": 0.029464817329022036, "grad_norm": 1.2843600511550903, "learning_rate": 8.836869467472248e-05, "loss": 5.4639, "step": 20970 }, { "epoch": 0.029478868267185616, "grad_norm": 1.2437927722930908, "learning_rate": 8.841084726710693e-05, "loss": 5.5152, "step": 20980 }, { "epoch": 0.029492919205349192, "grad_norm": 1.3445243835449219, "learning_rate": 8.845299985949136e-05, "loss": 5.5151, "step": 20990 }, { "epoch": 0.029506970143512768, "grad_norm": 1.3153207302093506, "learning_rate": 8.849515245187578e-05, "loss": 5.5303, "step": 21000 }, { "epoch": 0.029521021081676348, "grad_norm": 1.3267221450805664, "learning_rate": 8.853730504426022e-05, "loss": 5.5593, "step": 21010 }, { "epoch": 0.029535072019839924, "grad_norm": 1.3878754377365112, "learning_rate": 8.857945763664465e-05, "loss": 5.4331, "step": 21020 }, { "epoch": 0.029549122958003503, "grad_norm": 1.3253564834594727, "learning_rate": 8.862161022902907e-05, "loss": 5.4197, "step": 21030 }, { "epoch": 0.02956317389616708, "grad_norm": 1.2962226867675781, "learning_rate": 8.866376282141351e-05, "loss": 5.4463, "step": 21040 }, { "epoch": 0.029577224834330656, "grad_norm": 1.2892351150512695, "learning_rate": 8.870591541379794e-05, "loss": 5.4884, "step": 21050 }, { "epoch": 0.029591275772494235, "grad_norm": 1.435368537902832, "learning_rate": 8.874806800618236e-05, "loss": 5.5157, "step": 21060 }, { "epoch": 0.02960532671065781, "grad_norm": 1.3492792844772339, "learning_rate": 8.879022059856681e-05, "loss": 5.498, "step": 21070 }, { "epoch": 0.02961937764882139, "grad_norm": 1.3389153480529785, "learning_rate": 8.883237319095124e-05, "loss": 5.4208, "step": 21080 }, { "epoch": 0.029633428586984967, "grad_norm": 1.3750202655792236, "learning_rate": 8.887452578333566e-05, "loss": 5.3824, "step": 21090 }, { "epoch": 0.029647479525148544, "grad_norm": 1.3427931070327759, "learning_rate": 8.89166783757201e-05, "loss": 5.4627, "step": 21100 }, { "epoch": 0.029661530463312123, "grad_norm": 1.3169935941696167, "learning_rate": 8.895883096810453e-05, "loss": 5.4553, "step": 21110 }, { "epoch": 0.0296755814014757, "grad_norm": 1.3012065887451172, "learning_rate": 8.900098356048898e-05, "loss": 5.4464, "step": 21120 }, { "epoch": 0.029689632339639276, "grad_norm": 1.3812952041625977, "learning_rate": 8.90431361528734e-05, "loss": 5.4815, "step": 21130 }, { "epoch": 0.029703683277802855, "grad_norm": 1.336200475692749, "learning_rate": 8.908528874525782e-05, "loss": 5.3232, "step": 21140 }, { "epoch": 0.02971773421596643, "grad_norm": 1.2766376733779907, "learning_rate": 8.912744133764227e-05, "loss": 5.4727, "step": 21150 }, { "epoch": 0.02973178515413001, "grad_norm": 1.391106367111206, "learning_rate": 8.916959393002669e-05, "loss": 5.5038, "step": 21160 }, { "epoch": 0.029745836092293587, "grad_norm": 1.3515926599502563, "learning_rate": 8.921174652241112e-05, "loss": 5.4136, "step": 21170 }, { "epoch": 0.029759887030457163, "grad_norm": 1.3355499505996704, "learning_rate": 8.925389911479556e-05, "loss": 5.3543, "step": 21180 }, { "epoch": 0.029773937968620743, "grad_norm": 1.2321969270706177, "learning_rate": 8.929605170717998e-05, "loss": 5.4438, "step": 21190 }, { "epoch": 0.02978798890678432, "grad_norm": 1.378713607788086, "learning_rate": 8.933820429956441e-05, "loss": 5.3934, "step": 21200 }, { "epoch": 0.0298020398449479, "grad_norm": 1.2393652200698853, "learning_rate": 8.938035689194886e-05, "loss": 5.5751, "step": 21210 }, { "epoch": 0.029816090783111475, "grad_norm": 1.4586329460144043, "learning_rate": 8.942250948433327e-05, "loss": 5.2181, "step": 21220 }, { "epoch": 0.02983014172127505, "grad_norm": 1.5103076696395874, "learning_rate": 8.94646620767177e-05, "loss": 5.4515, "step": 21230 }, { "epoch": 0.02984419265943863, "grad_norm": 1.4935081005096436, "learning_rate": 8.950681466910215e-05, "loss": 5.4309, "step": 21240 }, { "epoch": 0.029858243597602207, "grad_norm": 1.3122179508209229, "learning_rate": 8.954896726148658e-05, "loss": 5.3174, "step": 21250 }, { "epoch": 0.029872294535765787, "grad_norm": 1.280525803565979, "learning_rate": 8.9591119853871e-05, "loss": 5.3517, "step": 21260 }, { "epoch": 0.029886345473929363, "grad_norm": 1.3139269351959229, "learning_rate": 8.963327244625544e-05, "loss": 5.4784, "step": 21270 }, { "epoch": 0.02990039641209294, "grad_norm": 1.2973021268844604, "learning_rate": 8.967542503863987e-05, "loss": 5.3769, "step": 21280 }, { "epoch": 0.02991444735025652, "grad_norm": 1.4092621803283691, "learning_rate": 8.971757763102429e-05, "loss": 5.3972, "step": 21290 }, { "epoch": 0.029928498288420095, "grad_norm": 1.364310383796692, "learning_rate": 8.975973022340874e-05, "loss": 5.4728, "step": 21300 }, { "epoch": 0.02994254922658367, "grad_norm": 1.516514539718628, "learning_rate": 8.980188281579317e-05, "loss": 5.4473, "step": 21310 }, { "epoch": 0.02995660016474725, "grad_norm": 1.413217306137085, "learning_rate": 8.984403540817759e-05, "loss": 5.5172, "step": 21320 }, { "epoch": 0.029970651102910827, "grad_norm": 1.724693775177002, "learning_rate": 8.988618800056203e-05, "loss": 5.3085, "step": 21330 }, { "epoch": 0.029984702041074406, "grad_norm": 1.2294498682022095, "learning_rate": 8.992834059294646e-05, "loss": 5.5309, "step": 21340 }, { "epoch": 0.029998752979237982, "grad_norm": 1.2753736972808838, "learning_rate": 8.997049318533088e-05, "loss": 5.4041, "step": 21350 }, { "epoch": 0.03001280391740156, "grad_norm": 1.3634488582611084, "learning_rate": 9.001264577771532e-05, "loss": 5.4316, "step": 21360 }, { "epoch": 0.030026854855565138, "grad_norm": 1.3544750213623047, "learning_rate": 9.005479837009976e-05, "loss": 5.4193, "step": 21370 }, { "epoch": 0.030040905793728714, "grad_norm": 1.3579610586166382, "learning_rate": 9.009695096248417e-05, "loss": 5.4289, "step": 21380 }, { "epoch": 0.030054956731892294, "grad_norm": 1.3251279592514038, "learning_rate": 9.013910355486862e-05, "loss": 5.4037, "step": 21390 }, { "epoch": 0.03006900767005587, "grad_norm": 1.4140822887420654, "learning_rate": 9.018125614725305e-05, "loss": 5.3694, "step": 21400 }, { "epoch": 0.030083058608219446, "grad_norm": 1.3545302152633667, "learning_rate": 9.02234087396375e-05, "loss": 5.4191, "step": 21410 }, { "epoch": 0.030097109546383026, "grad_norm": 1.3583157062530518, "learning_rate": 9.026556133202191e-05, "loss": 5.4124, "step": 21420 }, { "epoch": 0.030111160484546602, "grad_norm": 1.3296844959259033, "learning_rate": 9.030771392440634e-05, "loss": 5.3846, "step": 21430 }, { "epoch": 0.030125211422710182, "grad_norm": 1.287698745727539, "learning_rate": 9.034986651679079e-05, "loss": 5.3998, "step": 21440 }, { "epoch": 0.030139262360873758, "grad_norm": 1.4231420755386353, "learning_rate": 9.03920191091752e-05, "loss": 5.2575, "step": 21450 }, { "epoch": 0.030153313299037334, "grad_norm": 1.347367286682129, "learning_rate": 9.043417170155964e-05, "loss": 5.4645, "step": 21460 }, { "epoch": 0.030167364237200914, "grad_norm": 1.2664035558700562, "learning_rate": 9.047632429394408e-05, "loss": 5.4775, "step": 21470 }, { "epoch": 0.03018141517536449, "grad_norm": 1.3278405666351318, "learning_rate": 9.05184768863285e-05, "loss": 5.5146, "step": 21480 }, { "epoch": 0.030195466113528066, "grad_norm": 1.3342235088348389, "learning_rate": 9.056062947871293e-05, "loss": 5.438, "step": 21490 }, { "epoch": 0.030209517051691646, "grad_norm": 1.2799886465072632, "learning_rate": 9.060278207109737e-05, "loss": 5.4206, "step": 21500 }, { "epoch": 0.030223567989855222, "grad_norm": 1.3722513914108276, "learning_rate": 9.064493466348179e-05, "loss": 5.2671, "step": 21510 }, { "epoch": 0.0302376189280188, "grad_norm": 1.2341845035552979, "learning_rate": 9.068708725586622e-05, "loss": 5.4003, "step": 21520 }, { "epoch": 0.030251669866182378, "grad_norm": 1.3256713151931763, "learning_rate": 9.072923984825067e-05, "loss": 5.4125, "step": 21530 }, { "epoch": 0.030265720804345954, "grad_norm": 1.2737789154052734, "learning_rate": 9.07713924406351e-05, "loss": 5.493, "step": 21540 }, { "epoch": 0.030279771742509533, "grad_norm": 1.3018547296524048, "learning_rate": 9.081354503301952e-05, "loss": 5.4518, "step": 21550 }, { "epoch": 0.03029382268067311, "grad_norm": 1.2525168657302856, "learning_rate": 9.085569762540396e-05, "loss": 5.4166, "step": 21560 }, { "epoch": 0.03030787361883669, "grad_norm": 1.5232125520706177, "learning_rate": 9.089785021778839e-05, "loss": 5.3974, "step": 21570 }, { "epoch": 0.030321924557000265, "grad_norm": 1.332067608833313, "learning_rate": 9.094000281017281e-05, "loss": 5.2823, "step": 21580 }, { "epoch": 0.03033597549516384, "grad_norm": 1.3199515342712402, "learning_rate": 9.098215540255725e-05, "loss": 5.5286, "step": 21590 }, { "epoch": 0.03035002643332742, "grad_norm": 1.3310644626617432, "learning_rate": 9.102430799494169e-05, "loss": 5.3436, "step": 21600 }, { "epoch": 0.030364077371490997, "grad_norm": 1.285497784614563, "learning_rate": 9.10664605873261e-05, "loss": 5.3442, "step": 21610 }, { "epoch": 0.030378128309654574, "grad_norm": 1.5494871139526367, "learning_rate": 9.110861317971055e-05, "loss": 5.5381, "step": 21620 }, { "epoch": 0.030392179247818153, "grad_norm": 1.3371659517288208, "learning_rate": 9.114655051285653e-05, "loss": 5.3604, "step": 21630 }, { "epoch": 0.03040623018598173, "grad_norm": 1.322951078414917, "learning_rate": 9.118870310524096e-05, "loss": 5.4602, "step": 21640 }, { "epoch": 0.03042028112414531, "grad_norm": 1.5651347637176514, "learning_rate": 9.123085569762539e-05, "loss": 5.4247, "step": 21650 }, { "epoch": 0.030434332062308885, "grad_norm": 1.3033775091171265, "learning_rate": 9.127300829000982e-05, "loss": 5.4838, "step": 21660 }, { "epoch": 0.03044838300047246, "grad_norm": 1.436387300491333, "learning_rate": 9.131516088239427e-05, "loss": 5.2139, "step": 21670 }, { "epoch": 0.03046243393863604, "grad_norm": 1.3395622968673706, "learning_rate": 9.135731347477869e-05, "loss": 5.4355, "step": 21680 }, { "epoch": 0.030476484876799617, "grad_norm": 1.457055926322937, "learning_rate": 9.139946606716312e-05, "loss": 5.3278, "step": 21690 }, { "epoch": 0.030490535814963197, "grad_norm": 1.3071521520614624, "learning_rate": 9.144161865954756e-05, "loss": 5.2767, "step": 21700 }, { "epoch": 0.030504586753126773, "grad_norm": 1.3810970783233643, "learning_rate": 9.148377125193198e-05, "loss": 5.4068, "step": 21710 }, { "epoch": 0.03051863769129035, "grad_norm": 1.3929694890975952, "learning_rate": 9.152592384431641e-05, "loss": 5.4296, "step": 21720 }, { "epoch": 0.03053268862945393, "grad_norm": 1.5036205053329468, "learning_rate": 9.156807643670086e-05, "loss": 5.4989, "step": 21730 }, { "epoch": 0.030546739567617505, "grad_norm": 1.315721035003662, "learning_rate": 9.161022902908527e-05, "loss": 5.4409, "step": 21740 }, { "epoch": 0.030560790505781085, "grad_norm": 1.323768138885498, "learning_rate": 9.165238162146972e-05, "loss": 5.4287, "step": 21750 }, { "epoch": 0.03057484144394466, "grad_norm": 1.445924997329712, "learning_rate": 9.169453421385415e-05, "loss": 5.4339, "step": 21760 }, { "epoch": 0.030588892382108237, "grad_norm": 1.287317156791687, "learning_rate": 9.173668680623857e-05, "loss": 5.3386, "step": 21770 }, { "epoch": 0.030602943320271817, "grad_norm": 1.3343262672424316, "learning_rate": 9.177883939862301e-05, "loss": 5.4419, "step": 21780 }, { "epoch": 0.030616994258435393, "grad_norm": 1.309372901916504, "learning_rate": 9.182099199100744e-05, "loss": 5.3939, "step": 21790 }, { "epoch": 0.03063104519659897, "grad_norm": 1.3397578001022339, "learning_rate": 9.186314458339186e-05, "loss": 5.4396, "step": 21800 }, { "epoch": 0.03064509613476255, "grad_norm": 1.3325626850128174, "learning_rate": 9.19052971757763e-05, "loss": 5.3904, "step": 21810 }, { "epoch": 0.030659147072926125, "grad_norm": 1.2755528688430786, "learning_rate": 9.194744976816074e-05, "loss": 5.352, "step": 21820 }, { "epoch": 0.030673198011089704, "grad_norm": 1.3710780143737793, "learning_rate": 9.198960236054517e-05, "loss": 5.3976, "step": 21830 }, { "epoch": 0.03068724894925328, "grad_norm": 1.29194176197052, "learning_rate": 9.20317549529296e-05, "loss": 5.4736, "step": 21840 }, { "epoch": 0.030701299887416857, "grad_norm": 1.2661114931106567, "learning_rate": 9.207390754531403e-05, "loss": 5.5116, "step": 21850 }, { "epoch": 0.030715350825580436, "grad_norm": 1.337958574295044, "learning_rate": 9.211606013769846e-05, "loss": 5.4852, "step": 21860 }, { "epoch": 0.030729401763744012, "grad_norm": 1.3310383558273315, "learning_rate": 9.215821273008289e-05, "loss": 5.2673, "step": 21870 }, { "epoch": 0.030743452701907592, "grad_norm": 1.2530386447906494, "learning_rate": 9.220036532246732e-05, "loss": 5.4167, "step": 21880 }, { "epoch": 0.030757503640071168, "grad_norm": 1.273057460784912, "learning_rate": 9.224251791485175e-05, "loss": 5.4284, "step": 21890 }, { "epoch": 0.030771554578234744, "grad_norm": 1.264639973640442, "learning_rate": 9.228467050723619e-05, "loss": 5.3107, "step": 21900 }, { "epoch": 0.030785605516398324, "grad_norm": 1.8401588201522827, "learning_rate": 9.232682309962062e-05, "loss": 5.3392, "step": 21910 }, { "epoch": 0.0307996564545619, "grad_norm": 1.2661328315734863, "learning_rate": 9.236897569200505e-05, "loss": 5.4274, "step": 21920 }, { "epoch": 0.030813707392725476, "grad_norm": 1.2905445098876953, "learning_rate": 9.241112828438948e-05, "loss": 5.4376, "step": 21930 }, { "epoch": 0.030827758330889056, "grad_norm": 1.395332932472229, "learning_rate": 9.245328087677391e-05, "loss": 5.3368, "step": 21940 }, { "epoch": 0.030841809269052632, "grad_norm": 1.3216753005981445, "learning_rate": 9.249543346915834e-05, "loss": 5.4493, "step": 21950 }, { "epoch": 0.030855860207216212, "grad_norm": 1.528210997581482, "learning_rate": 9.253758606154279e-05, "loss": 5.3529, "step": 21960 }, { "epoch": 0.030869911145379788, "grad_norm": 1.333594560623169, "learning_rate": 9.25797386539272e-05, "loss": 5.2689, "step": 21970 }, { "epoch": 0.030883962083543364, "grad_norm": 1.2748163938522339, "learning_rate": 9.262189124631164e-05, "loss": 5.2231, "step": 21980 }, { "epoch": 0.030898013021706944, "grad_norm": 1.4222743511199951, "learning_rate": 9.266404383869608e-05, "loss": 5.5653, "step": 21990 }, { "epoch": 0.03091206395987052, "grad_norm": 1.2896723747253418, "learning_rate": 9.27061964310805e-05, "loss": 5.4378, "step": 22000 }, { "epoch": 0.0309261148980341, "grad_norm": 1.3336145877838135, "learning_rate": 9.274834902346493e-05, "loss": 5.4317, "step": 22010 }, { "epoch": 0.030940165836197676, "grad_norm": 1.4646224975585938, "learning_rate": 9.279050161584937e-05, "loss": 5.331, "step": 22020 }, { "epoch": 0.030954216774361252, "grad_norm": 1.4956231117248535, "learning_rate": 9.283265420823379e-05, "loss": 5.4396, "step": 22030 }, { "epoch": 0.03096826771252483, "grad_norm": 1.3492366075515747, "learning_rate": 9.287480680061824e-05, "loss": 5.3148, "step": 22040 }, { "epoch": 0.030982318650688408, "grad_norm": 1.420286774635315, "learning_rate": 9.291695939300267e-05, "loss": 5.3644, "step": 22050 }, { "epoch": 0.030996369588851987, "grad_norm": 1.3561815023422241, "learning_rate": 9.295911198538708e-05, "loss": 5.3762, "step": 22060 }, { "epoch": 0.031010420527015563, "grad_norm": 1.2883050441741943, "learning_rate": 9.300126457777153e-05, "loss": 5.3012, "step": 22070 }, { "epoch": 0.03102447146517914, "grad_norm": 1.2738019227981567, "learning_rate": 9.304341717015596e-05, "loss": 5.4794, "step": 22080 }, { "epoch": 0.03103852240334272, "grad_norm": 1.4682445526123047, "learning_rate": 9.308556976254039e-05, "loss": 5.3829, "step": 22090 }, { "epoch": 0.031052573341506295, "grad_norm": 1.2694358825683594, "learning_rate": 9.312772235492482e-05, "loss": 5.4007, "step": 22100 }, { "epoch": 0.03106662427966987, "grad_norm": 1.3557790517807007, "learning_rate": 9.316987494730925e-05, "loss": 5.3658, "step": 22110 }, { "epoch": 0.03108067521783345, "grad_norm": 1.38999342918396, "learning_rate": 9.321202753969368e-05, "loss": 5.3173, "step": 22120 }, { "epoch": 0.031094726155997027, "grad_norm": 1.4154685735702515, "learning_rate": 9.325418013207812e-05, "loss": 5.296, "step": 22130 }, { "epoch": 0.031108777094160607, "grad_norm": 1.3089263439178467, "learning_rate": 9.329633272446255e-05, "loss": 5.3943, "step": 22140 }, { "epoch": 0.031122828032324183, "grad_norm": 1.258065104484558, "learning_rate": 9.333848531684698e-05, "loss": 5.4311, "step": 22150 }, { "epoch": 0.03113687897048776, "grad_norm": 1.329082727432251, "learning_rate": 9.338063790923141e-05, "loss": 5.3696, "step": 22160 }, { "epoch": 0.03115092990865134, "grad_norm": 1.3508650064468384, "learning_rate": 9.342279050161584e-05, "loss": 5.3758, "step": 22170 }, { "epoch": 0.031164980846814915, "grad_norm": 1.3524917364120483, "learning_rate": 9.346494309400027e-05, "loss": 5.3318, "step": 22180 }, { "epoch": 0.031179031784978495, "grad_norm": 1.2871861457824707, "learning_rate": 9.35070956863847e-05, "loss": 5.5478, "step": 22190 }, { "epoch": 0.03119308272314207, "grad_norm": 1.3874597549438477, "learning_rate": 9.354924827876913e-05, "loss": 5.4872, "step": 22200 }, { "epoch": 0.031207133661305647, "grad_norm": 1.3094698190689087, "learning_rate": 9.359140087115357e-05, "loss": 5.3514, "step": 22210 }, { "epoch": 0.031221184599469227, "grad_norm": 1.498294711112976, "learning_rate": 9.3633553463538e-05, "loss": 5.2953, "step": 22220 }, { "epoch": 0.031235235537632803, "grad_norm": 1.7673845291137695, "learning_rate": 9.367570605592243e-05, "loss": 5.3295, "step": 22230 }, { "epoch": 0.03124928647579638, "grad_norm": 1.4196926355361938, "learning_rate": 9.371785864830686e-05, "loss": 5.3922, "step": 22240 }, { "epoch": 0.03126333741395996, "grad_norm": 1.6076236963272095, "learning_rate": 9.37600112406913e-05, "loss": 5.425, "step": 22250 }, { "epoch": 0.031277388352123535, "grad_norm": 1.2859355211257935, "learning_rate": 9.380216383307572e-05, "loss": 5.4363, "step": 22260 }, { "epoch": 0.03129143929028711, "grad_norm": 1.3375812768936157, "learning_rate": 9.384431642546015e-05, "loss": 5.5041, "step": 22270 }, { "epoch": 0.031305490228450694, "grad_norm": 1.2610328197479248, "learning_rate": 9.38864690178446e-05, "loss": 5.3793, "step": 22280 }, { "epoch": 0.03131954116661427, "grad_norm": 1.3440093994140625, "learning_rate": 9.392862161022901e-05, "loss": 5.2878, "step": 22290 }, { "epoch": 0.031333592104777847, "grad_norm": 1.3581554889678955, "learning_rate": 9.397077420261345e-05, "loss": 5.4093, "step": 22300 }, { "epoch": 0.03134764304294142, "grad_norm": 1.3433340787887573, "learning_rate": 9.401292679499789e-05, "loss": 5.391, "step": 22310 }, { "epoch": 0.031361693981105, "grad_norm": 1.3540128469467163, "learning_rate": 9.405507938738231e-05, "loss": 5.3798, "step": 22320 }, { "epoch": 0.031375744919268575, "grad_norm": 1.2961390018463135, "learning_rate": 9.409723197976674e-05, "loss": 5.316, "step": 22330 }, { "epoch": 0.03138979585743216, "grad_norm": 1.2970017194747925, "learning_rate": 9.413938457215118e-05, "loss": 5.3238, "step": 22340 }, { "epoch": 0.031403846795595734, "grad_norm": 1.275976300239563, "learning_rate": 9.41815371645356e-05, "loss": 5.3903, "step": 22350 }, { "epoch": 0.03141789773375931, "grad_norm": 1.329797625541687, "learning_rate": 9.422368975692005e-05, "loss": 5.3756, "step": 22360 }, { "epoch": 0.03143194867192289, "grad_norm": 1.3486462831497192, "learning_rate": 9.426584234930448e-05, "loss": 5.4281, "step": 22370 }, { "epoch": 0.03144599961008646, "grad_norm": 1.3495275974273682, "learning_rate": 9.430799494168891e-05, "loss": 5.3231, "step": 22380 }, { "epoch": 0.031460050548250046, "grad_norm": 1.2206830978393555, "learning_rate": 9.435014753407334e-05, "loss": 5.3296, "step": 22390 }, { "epoch": 0.03147410148641362, "grad_norm": 1.2643572092056274, "learning_rate": 9.439230012645777e-05, "loss": 5.3465, "step": 22400 }, { "epoch": 0.0314881524245772, "grad_norm": 1.3389561176300049, "learning_rate": 9.44344527188422e-05, "loss": 5.3205, "step": 22410 }, { "epoch": 0.031502203362740774, "grad_norm": 1.2590327262878418, "learning_rate": 9.447660531122663e-05, "loss": 5.3742, "step": 22420 }, { "epoch": 0.03151625430090435, "grad_norm": 1.2709726095199585, "learning_rate": 9.451875790361106e-05, "loss": 5.4671, "step": 22430 }, { "epoch": 0.031530305239067934, "grad_norm": 1.3207013607025146, "learning_rate": 9.45609104959955e-05, "loss": 5.2865, "step": 22440 }, { "epoch": 0.03154435617723151, "grad_norm": 1.3502670526504517, "learning_rate": 9.460306308837993e-05, "loss": 5.2844, "step": 22450 }, { "epoch": 0.031558407115395086, "grad_norm": 1.3033579587936401, "learning_rate": 9.464521568076436e-05, "loss": 5.5248, "step": 22460 }, { "epoch": 0.03157245805355866, "grad_norm": 1.2336679697036743, "learning_rate": 9.468736827314879e-05, "loss": 5.4307, "step": 22470 }, { "epoch": 0.03158650899172224, "grad_norm": 1.4275672435760498, "learning_rate": 9.472952086553322e-05, "loss": 5.4736, "step": 22480 }, { "epoch": 0.03160055992988582, "grad_norm": 1.3759491443634033, "learning_rate": 9.477167345791765e-05, "loss": 5.38, "step": 22490 }, { "epoch": 0.0316146108680494, "grad_norm": 1.3446354866027832, "learning_rate": 9.481382605030208e-05, "loss": 5.4072, "step": 22500 }, { "epoch": 0.031628661806212974, "grad_norm": 1.3388227224349976, "learning_rate": 9.485597864268651e-05, "loss": 5.3462, "step": 22510 }, { "epoch": 0.03164271274437655, "grad_norm": 1.3788104057312012, "learning_rate": 9.489813123507095e-05, "loss": 5.4326, "step": 22520 }, { "epoch": 0.031656763682540126, "grad_norm": 1.424253225326538, "learning_rate": 9.494028382745538e-05, "loss": 5.4119, "step": 22530 }, { "epoch": 0.03167081462070371, "grad_norm": 1.2999223470687866, "learning_rate": 9.498243641983982e-05, "loss": 5.3624, "step": 22540 }, { "epoch": 0.031684865558867285, "grad_norm": 1.368601679801941, "learning_rate": 9.502458901222424e-05, "loss": 5.2332, "step": 22550 }, { "epoch": 0.03169891649703086, "grad_norm": 1.4578156471252441, "learning_rate": 9.506674160460867e-05, "loss": 5.2938, "step": 22560 }, { "epoch": 0.03171296743519444, "grad_norm": 1.274147391319275, "learning_rate": 9.510889419699311e-05, "loss": 5.3459, "step": 22570 }, { "epoch": 0.031727018373358014, "grad_norm": 1.3094562292099, "learning_rate": 9.515104678937753e-05, "loss": 5.3795, "step": 22580 }, { "epoch": 0.0317410693115216, "grad_norm": 1.2858974933624268, "learning_rate": 9.519319938176196e-05, "loss": 5.4598, "step": 22590 }, { "epoch": 0.03175512024968517, "grad_norm": 1.267670750617981, "learning_rate": 9.523535197414641e-05, "loss": 5.3513, "step": 22600 }, { "epoch": 0.03176917118784875, "grad_norm": 1.3018923997879028, "learning_rate": 9.527750456653083e-05, "loss": 5.3053, "step": 22610 }, { "epoch": 0.031783222126012325, "grad_norm": 1.5751721858978271, "learning_rate": 9.531965715891526e-05, "loss": 5.5364, "step": 22620 }, { "epoch": 0.0317972730641759, "grad_norm": 1.3210186958312988, "learning_rate": 9.53618097512997e-05, "loss": 5.3663, "step": 22630 }, { "epoch": 0.03181132400233948, "grad_norm": 1.3259351253509521, "learning_rate": 9.540396234368412e-05, "loss": 5.4101, "step": 22640 }, { "epoch": 0.03182537494050306, "grad_norm": 1.2493022680282593, "learning_rate": 9.544611493606856e-05, "loss": 5.409, "step": 22650 }, { "epoch": 0.03183942587866664, "grad_norm": 1.3749110698699951, "learning_rate": 9.5488267528453e-05, "loss": 5.4479, "step": 22660 }, { "epoch": 0.03185347681683021, "grad_norm": 1.2747150659561157, "learning_rate": 9.553042012083743e-05, "loss": 5.3817, "step": 22670 }, { "epoch": 0.03186752775499379, "grad_norm": 1.2162046432495117, "learning_rate": 9.557257271322186e-05, "loss": 5.3368, "step": 22680 }, { "epoch": 0.031881578693157366, "grad_norm": 1.301879644393921, "learning_rate": 9.561472530560629e-05, "loss": 5.4536, "step": 22690 }, { "epoch": 0.03189562963132095, "grad_norm": 1.389109492301941, "learning_rate": 9.565687789799072e-05, "loss": 5.2986, "step": 22700 }, { "epoch": 0.031909680569484525, "grad_norm": 1.2711687088012695, "learning_rate": 9.569903049037515e-05, "loss": 5.488, "step": 22710 }, { "epoch": 0.0319237315076481, "grad_norm": 1.2965320348739624, "learning_rate": 9.573696782352113e-05, "loss": 5.2603, "step": 22720 }, { "epoch": 0.03193778244581168, "grad_norm": 1.325437307357788, "learning_rate": 9.577912041590558e-05, "loss": 5.4919, "step": 22730 }, { "epoch": 0.03195183338397525, "grad_norm": 1.2769930362701416, "learning_rate": 9.582127300829e-05, "loss": 5.329, "step": 22740 }, { "epoch": 0.031965884322138836, "grad_norm": 1.3971630334854126, "learning_rate": 9.586342560067443e-05, "loss": 5.3242, "step": 22750 }, { "epoch": 0.03197993526030241, "grad_norm": 1.2280455827713013, "learning_rate": 9.590557819305887e-05, "loss": 5.342, "step": 22760 }, { "epoch": 0.03199398619846599, "grad_norm": 1.3401799201965332, "learning_rate": 9.594773078544329e-05, "loss": 5.4924, "step": 22770 }, { "epoch": 0.032008037136629565, "grad_norm": 1.4017051458358765, "learning_rate": 9.598988337782772e-05, "loss": 5.277, "step": 22780 }, { "epoch": 0.03202208807479314, "grad_norm": 1.3228023052215576, "learning_rate": 9.603203597021217e-05, "loss": 5.3887, "step": 22790 }, { "epoch": 0.032036139012956724, "grad_norm": 1.2626895904541016, "learning_rate": 9.60741885625966e-05, "loss": 5.4167, "step": 22800 }, { "epoch": 0.0320501899511203, "grad_norm": 1.3468395471572876, "learning_rate": 9.611634115498101e-05, "loss": 5.3809, "step": 22810 }, { "epoch": 0.032064240889283877, "grad_norm": 1.2649286985397339, "learning_rate": 9.615849374736546e-05, "loss": 5.4244, "step": 22820 }, { "epoch": 0.03207829182744745, "grad_norm": 1.2764794826507568, "learning_rate": 9.620064633974989e-05, "loss": 5.3179, "step": 22830 }, { "epoch": 0.03209234276561103, "grad_norm": 1.2899025678634644, "learning_rate": 9.624279893213431e-05, "loss": 5.3826, "step": 22840 }, { "epoch": 0.03210639370377461, "grad_norm": 1.3156379461288452, "learning_rate": 9.628495152451875e-05, "loss": 5.4528, "step": 22850 }, { "epoch": 0.03212044464193819, "grad_norm": 1.3568313121795654, "learning_rate": 9.632710411690318e-05, "loss": 5.3015, "step": 22860 }, { "epoch": 0.032134495580101764, "grad_norm": 1.3608930110931396, "learning_rate": 9.63692567092876e-05, "loss": 5.3645, "step": 22870 }, { "epoch": 0.03214854651826534, "grad_norm": 1.3903223276138306, "learning_rate": 9.641140930167205e-05, "loss": 5.2674, "step": 22880 }, { "epoch": 0.03216259745642892, "grad_norm": 1.372069001197815, "learning_rate": 9.645356189405648e-05, "loss": 5.3272, "step": 22890 }, { "epoch": 0.0321766483945925, "grad_norm": 1.373261570930481, "learning_rate": 9.64957144864409e-05, "loss": 5.2367, "step": 22900 }, { "epoch": 0.032190699332756076, "grad_norm": 1.2778481245040894, "learning_rate": 9.653786707882534e-05, "loss": 5.4686, "step": 22910 }, { "epoch": 0.03220475027091965, "grad_norm": 1.4180645942687988, "learning_rate": 9.658001967120977e-05, "loss": 5.3619, "step": 22920 }, { "epoch": 0.03221880120908323, "grad_norm": 1.3312855958938599, "learning_rate": 9.662217226359422e-05, "loss": 5.2804, "step": 22930 }, { "epoch": 0.032232852147246804, "grad_norm": 1.3726269006729126, "learning_rate": 9.666432485597863e-05, "loss": 5.428, "step": 22940 }, { "epoch": 0.03224690308541038, "grad_norm": 1.2660539150238037, "learning_rate": 9.670647744836306e-05, "loss": 5.3517, "step": 22950 }, { "epoch": 0.032260954023573964, "grad_norm": 1.3094338178634644, "learning_rate": 9.674863004074751e-05, "loss": 5.3259, "step": 22960 }, { "epoch": 0.03227500496173754, "grad_norm": 1.348771572113037, "learning_rate": 9.679078263313193e-05, "loss": 5.3373, "step": 22970 }, { "epoch": 0.032289055899901116, "grad_norm": 1.3427238464355469, "learning_rate": 9.683293522551636e-05, "loss": 5.2462, "step": 22980 }, { "epoch": 0.03230310683806469, "grad_norm": 1.2989392280578613, "learning_rate": 9.68750878179008e-05, "loss": 5.4482, "step": 22990 }, { "epoch": 0.03231715777622827, "grad_norm": 1.3329780101776123, "learning_rate": 9.691724041028522e-05, "loss": 5.2809, "step": 23000 }, { "epoch": 0.03233120871439185, "grad_norm": 1.3921016454696655, "learning_rate": 9.695939300266965e-05, "loss": 5.5561, "step": 23010 }, { "epoch": 0.03234525965255543, "grad_norm": 1.3452013731002808, "learning_rate": 9.70015455950541e-05, "loss": 5.3734, "step": 23020 }, { "epoch": 0.032359310590719004, "grad_norm": 1.2437107563018799, "learning_rate": 9.704369818743851e-05, "loss": 5.4029, "step": 23030 }, { "epoch": 0.03237336152888258, "grad_norm": 1.2906579971313477, "learning_rate": 9.708585077982294e-05, "loss": 5.1958, "step": 23040 }, { "epoch": 0.032387412467046156, "grad_norm": 1.290738821029663, "learning_rate": 9.712800337220739e-05, "loss": 5.3823, "step": 23050 }, { "epoch": 0.03240146340520974, "grad_norm": 1.3592157363891602, "learning_rate": 9.71701559645918e-05, "loss": 5.3865, "step": 23060 }, { "epoch": 0.032415514343373315, "grad_norm": 1.4341562986373901, "learning_rate": 9.721230855697624e-05, "loss": 5.1396, "step": 23070 }, { "epoch": 0.03242956528153689, "grad_norm": 1.7886507511138916, "learning_rate": 9.725446114936068e-05, "loss": 5.2892, "step": 23080 }, { "epoch": 0.03244361621970047, "grad_norm": 1.2833921909332275, "learning_rate": 9.729661374174511e-05, "loss": 5.408, "step": 23090 }, { "epoch": 0.032457667157864044, "grad_norm": 1.2953739166259766, "learning_rate": 9.733876633412953e-05, "loss": 5.2775, "step": 23100 }, { "epoch": 0.03247171809602763, "grad_norm": 1.264936923980713, "learning_rate": 9.738091892651398e-05, "loss": 5.3672, "step": 23110 }, { "epoch": 0.0324857690341912, "grad_norm": 1.324508786201477, "learning_rate": 9.742307151889841e-05, "loss": 5.2859, "step": 23120 }, { "epoch": 0.03249981997235478, "grad_norm": 1.2716864347457886, "learning_rate": 9.746522411128282e-05, "loss": 5.2861, "step": 23130 }, { "epoch": 0.032513870910518355, "grad_norm": 1.2742117643356323, "learning_rate": 9.750737670366727e-05, "loss": 5.3845, "step": 23140 }, { "epoch": 0.03252792184868193, "grad_norm": 1.3381208181381226, "learning_rate": 9.75495292960517e-05, "loss": 5.3024, "step": 23150 }, { "epoch": 0.032541972786845515, "grad_norm": 1.4787967205047607, "learning_rate": 9.759168188843612e-05, "loss": 5.3082, "step": 23160 }, { "epoch": 0.03255602372500909, "grad_norm": 1.301733136177063, "learning_rate": 9.763383448082056e-05, "loss": 5.4468, "step": 23170 }, { "epoch": 0.03257007466317267, "grad_norm": 1.271909236907959, "learning_rate": 9.7675987073205e-05, "loss": 5.2869, "step": 23180 }, { "epoch": 0.03258412560133624, "grad_norm": 1.2851873636245728, "learning_rate": 9.771813966558941e-05, "loss": 5.2482, "step": 23190 }, { "epoch": 0.03259817653949982, "grad_norm": 1.307614803314209, "learning_rate": 9.776029225797386e-05, "loss": 5.36, "step": 23200 }, { "epoch": 0.0326122274776634, "grad_norm": 1.25319504737854, "learning_rate": 9.780244485035829e-05, "loss": 5.409, "step": 23210 }, { "epoch": 0.03262627841582698, "grad_norm": 1.4005918502807617, "learning_rate": 9.784459744274273e-05, "loss": 5.3335, "step": 23220 }, { "epoch": 0.032640329353990555, "grad_norm": 1.2450411319732666, "learning_rate": 9.788675003512715e-05, "loss": 5.3397, "step": 23230 }, { "epoch": 0.03265438029215413, "grad_norm": 1.2542985677719116, "learning_rate": 9.792890262751158e-05, "loss": 5.4002, "step": 23240 }, { "epoch": 0.03266843123031771, "grad_norm": 1.2760860919952393, "learning_rate": 9.797105521989603e-05, "loss": 5.4205, "step": 23250 }, { "epoch": 0.03268248216848129, "grad_norm": 1.3349649906158447, "learning_rate": 9.801320781228044e-05, "loss": 5.3124, "step": 23260 }, { "epoch": 0.032696533106644866, "grad_norm": 1.2851274013519287, "learning_rate": 9.805536040466487e-05, "loss": 5.2979, "step": 23270 }, { "epoch": 0.03271058404480844, "grad_norm": 1.28581702709198, "learning_rate": 9.809751299704932e-05, "loss": 5.3434, "step": 23280 }, { "epoch": 0.03272463498297202, "grad_norm": 1.2845999002456665, "learning_rate": 9.813966558943374e-05, "loss": 5.3271, "step": 23290 }, { "epoch": 0.032738685921135595, "grad_norm": 1.4002037048339844, "learning_rate": 9.818181818181817e-05, "loss": 5.3637, "step": 23300 }, { "epoch": 0.03275273685929917, "grad_norm": 1.2308608293533325, "learning_rate": 9.822397077420261e-05, "loss": 5.3833, "step": 23310 }, { "epoch": 0.032766787797462754, "grad_norm": 1.3694802522659302, "learning_rate": 9.826612336658703e-05, "loss": 5.386, "step": 23320 }, { "epoch": 0.03278083873562633, "grad_norm": 1.294423222541809, "learning_rate": 9.830827595897146e-05, "loss": 5.2552, "step": 23330 }, { "epoch": 0.032794889673789906, "grad_norm": 1.3576372861862183, "learning_rate": 9.83504285513559e-05, "loss": 5.3575, "step": 23340 }, { "epoch": 0.03280894061195348, "grad_norm": 1.276759386062622, "learning_rate": 9.839258114374032e-05, "loss": 5.4282, "step": 23350 }, { "epoch": 0.03282299155011706, "grad_norm": 1.290276288986206, "learning_rate": 9.843473373612476e-05, "loss": 5.2632, "step": 23360 }, { "epoch": 0.03283704248828064, "grad_norm": 1.3245643377304077, "learning_rate": 9.84768863285092e-05, "loss": 5.3792, "step": 23370 }, { "epoch": 0.03285109342644422, "grad_norm": 1.3463149070739746, "learning_rate": 9.851903892089363e-05, "loss": 5.3489, "step": 23380 }, { "epoch": 0.032865144364607794, "grad_norm": 1.3925325870513916, "learning_rate": 9.856119151327805e-05, "loss": 5.2327, "step": 23390 }, { "epoch": 0.03287919530277137, "grad_norm": 1.3809847831726074, "learning_rate": 9.86033441056625e-05, "loss": 5.44, "step": 23400 }, { "epoch": 0.03289324624093495, "grad_norm": 1.2953121662139893, "learning_rate": 9.864549669804692e-05, "loss": 5.2537, "step": 23410 }, { "epoch": 0.03290729717909853, "grad_norm": 1.2769551277160645, "learning_rate": 9.868764929043134e-05, "loss": 5.4532, "step": 23420 }, { "epoch": 0.032921348117262106, "grad_norm": 1.3292839527130127, "learning_rate": 9.872980188281579e-05, "loss": 5.3824, "step": 23430 }, { "epoch": 0.03293539905542568, "grad_norm": 1.2691352367401123, "learning_rate": 9.877195447520022e-05, "loss": 5.3559, "step": 23440 }, { "epoch": 0.03294944999358926, "grad_norm": 1.4160397052764893, "learning_rate": 9.881410706758464e-05, "loss": 5.2673, "step": 23450 }, { "epoch": 0.032963500931752834, "grad_norm": 1.3387774229049683, "learning_rate": 9.885625965996908e-05, "loss": 5.3447, "step": 23460 }, { "epoch": 0.03297755186991642, "grad_norm": 1.3649985790252686, "learning_rate": 9.889841225235351e-05, "loss": 5.3222, "step": 23470 }, { "epoch": 0.032991602808079994, "grad_norm": 1.2664110660552979, "learning_rate": 9.894056484473793e-05, "loss": 5.34, "step": 23480 }, { "epoch": 0.03300565374624357, "grad_norm": 1.3583790063858032, "learning_rate": 9.898271743712237e-05, "loss": 5.2413, "step": 23490 }, { "epoch": 0.033019704684407146, "grad_norm": 1.3465863466262817, "learning_rate": 9.90248700295068e-05, "loss": 5.245, "step": 23500 }, { "epoch": 0.03303375562257072, "grad_norm": 1.3353192806243896, "learning_rate": 9.906702262189125e-05, "loss": 5.3347, "step": 23510 }, { "epoch": 0.033047806560734305, "grad_norm": 1.3138058185577393, "learning_rate": 9.910917521427567e-05, "loss": 5.2903, "step": 23520 }, { "epoch": 0.03306185749889788, "grad_norm": 1.308861494064331, "learning_rate": 9.91513278066601e-05, "loss": 5.2704, "step": 23530 }, { "epoch": 0.03307590843706146, "grad_norm": 1.2975902557373047, "learning_rate": 9.919348039904454e-05, "loss": 5.3236, "step": 23540 }, { "epoch": 0.033089959375225034, "grad_norm": 1.2685211896896362, "learning_rate": 9.923563299142896e-05, "loss": 5.3083, "step": 23550 }, { "epoch": 0.03310401031338861, "grad_norm": 1.3065454959869385, "learning_rate": 9.927778558381339e-05, "loss": 5.3769, "step": 23560 }, { "epoch": 0.03311806125155219, "grad_norm": 1.3842145204544067, "learning_rate": 9.931993817619784e-05, "loss": 5.3922, "step": 23570 }, { "epoch": 0.03313211218971577, "grad_norm": 1.272464632987976, "learning_rate": 9.936209076858225e-05, "loss": 5.2546, "step": 23580 }, { "epoch": 0.033146163127879345, "grad_norm": 1.3734886646270752, "learning_rate": 9.940424336096669e-05, "loss": 5.4634, "step": 23590 }, { "epoch": 0.03316021406604292, "grad_norm": 1.3234834671020508, "learning_rate": 9.944639595335113e-05, "loss": 5.2879, "step": 23600 }, { "epoch": 0.0331742650042065, "grad_norm": 1.3114765882492065, "learning_rate": 9.948854854573555e-05, "loss": 5.2448, "step": 23610 }, { "epoch": 0.033188315942370074, "grad_norm": 1.239916443824768, "learning_rate": 9.953070113811998e-05, "loss": 5.1157, "step": 23620 }, { "epoch": 0.03320236688053366, "grad_norm": 1.3152765035629272, "learning_rate": 9.957285373050442e-05, "loss": 5.2116, "step": 23630 }, { "epoch": 0.03321641781869723, "grad_norm": 1.3076961040496826, "learning_rate": 9.961500632288884e-05, "loss": 5.3021, "step": 23640 }, { "epoch": 0.03323046875686081, "grad_norm": 1.3413221836090088, "learning_rate": 9.965715891527327e-05, "loss": 5.3721, "step": 23650 }, { "epoch": 0.033244519695024385, "grad_norm": 1.2938029766082764, "learning_rate": 9.969931150765772e-05, "loss": 5.313, "step": 23660 }, { "epoch": 0.03325857063318796, "grad_norm": 1.2781319618225098, "learning_rate": 9.974146410004215e-05, "loss": 5.2119, "step": 23670 }, { "epoch": 0.033272621571351545, "grad_norm": 1.3234959840774536, "learning_rate": 9.978361669242657e-05, "loss": 5.4493, "step": 23680 }, { "epoch": 0.03328667250951512, "grad_norm": 1.4533023834228516, "learning_rate": 9.982576928481101e-05, "loss": 5.317, "step": 23690 }, { "epoch": 0.0333007234476787, "grad_norm": 1.276374101638794, "learning_rate": 9.986792187719544e-05, "loss": 5.3605, "step": 23700 }, { "epoch": 0.03331477438584227, "grad_norm": 1.292145013809204, "learning_rate": 9.991007446957986e-05, "loss": 5.3252, "step": 23710 }, { "epoch": 0.03332882532400585, "grad_norm": 1.2901628017425537, "learning_rate": 9.99522270619643e-05, "loss": 5.2686, "step": 23720 }, { "epoch": 0.03334287626216943, "grad_norm": 1.3100521564483643, "learning_rate": 9.999437965434874e-05, "loss": 5.1247, "step": 23730 }, { "epoch": 0.03335692720033301, "grad_norm": 1.344598650932312, "learning_rate": 0.00010003653224673315, "loss": 5.2823, "step": 23740 }, { "epoch": 0.033370978138496585, "grad_norm": 1.4314061403274536, "learning_rate": 0.0001000786848391176, "loss": 5.2714, "step": 23750 }, { "epoch": 0.03338502907666016, "grad_norm": 1.2748030424118042, "learning_rate": 0.00010012083743150203, "loss": 5.3582, "step": 23760 }, { "epoch": 0.03339908001482374, "grad_norm": 1.3783010244369507, "learning_rate": 0.00010016299002388645, "loss": 5.1891, "step": 23770 }, { "epoch": 0.03341313095298732, "grad_norm": 1.3372441530227661, "learning_rate": 0.00010020514261627089, "loss": 5.3044, "step": 23780 }, { "epoch": 0.033427181891150896, "grad_norm": 1.2305197715759277, "learning_rate": 0.00010024729520865532, "loss": 5.3672, "step": 23790 }, { "epoch": 0.03344123282931447, "grad_norm": 1.28226900100708, "learning_rate": 0.00010028944780103977, "loss": 5.3432, "step": 23800 }, { "epoch": 0.03345528376747805, "grad_norm": 1.2784920930862427, "learning_rate": 0.00010033160039342418, "loss": 5.2558, "step": 23810 }, { "epoch": 0.033469334705641625, "grad_norm": 1.298294186592102, "learning_rate": 0.00010037375298580862, "loss": 5.296, "step": 23820 }, { "epoch": 0.03348338564380521, "grad_norm": 1.3087866306304932, "learning_rate": 0.00010041590557819306, "loss": 5.2793, "step": 23830 }, { "epoch": 0.033497436581968784, "grad_norm": 1.3522417545318604, "learning_rate": 0.00010045805817057748, "loss": 5.369, "step": 23840 }, { "epoch": 0.03351148752013236, "grad_norm": 1.2974127531051636, "learning_rate": 0.00010050021076296191, "loss": 5.3775, "step": 23850 }, { "epoch": 0.033525538458295936, "grad_norm": 1.3162837028503418, "learning_rate": 0.00010054236335534635, "loss": 5.3664, "step": 23860 }, { "epoch": 0.03353958939645951, "grad_norm": 1.2961353063583374, "learning_rate": 0.00010058451594773077, "loss": 5.3546, "step": 23870 }, { "epoch": 0.033553640334623096, "grad_norm": 1.3011044263839722, "learning_rate": 0.0001006266685401152, "loss": 5.2702, "step": 23880 }, { "epoch": 0.03356769127278667, "grad_norm": 1.2920984029769897, "learning_rate": 0.00010066882113249965, "loss": 5.2699, "step": 23890 }, { "epoch": 0.03358174221095025, "grad_norm": 1.2577017545700073, "learning_rate": 0.00010071097372488407, "loss": 5.3344, "step": 23900 }, { "epoch": 0.033595793149113824, "grad_norm": 2.0838184356689453, "learning_rate": 0.0001007531263172685, "loss": 5.1927, "step": 23910 }, { "epoch": 0.0336098440872774, "grad_norm": 1.5343761444091797, "learning_rate": 0.00010079527890965294, "loss": 5.3123, "step": 23920 }, { "epoch": 0.03362389502544098, "grad_norm": 1.3116449117660522, "learning_rate": 0.00010083743150203737, "loss": 5.3366, "step": 23930 }, { "epoch": 0.03363794596360456, "grad_norm": 1.2814358472824097, "learning_rate": 0.00010087958409442179, "loss": 5.2453, "step": 23940 }, { "epoch": 0.033651996901768136, "grad_norm": 1.2302325963974, "learning_rate": 0.00010092173668680623, "loss": 5.2506, "step": 23950 }, { "epoch": 0.03366604783993171, "grad_norm": 1.3629188537597656, "learning_rate": 0.00010096388927919067, "loss": 5.3369, "step": 23960 }, { "epoch": 0.03368009877809529, "grad_norm": 1.241894245147705, "learning_rate": 0.00010100604187157508, "loss": 5.3224, "step": 23970 }, { "epoch": 0.033694149716258864, "grad_norm": 1.3450833559036255, "learning_rate": 0.00010104819446395953, "loss": 5.2227, "step": 23980 }, { "epoch": 0.03370820065442245, "grad_norm": 1.3539717197418213, "learning_rate": 0.00010109034705634396, "loss": 5.1751, "step": 23990 }, { "epoch": 0.033722251592586024, "grad_norm": 1.5781745910644531, "learning_rate": 0.00010113249964872838, "loss": 5.2189, "step": 24000 }, { "epoch": 0.0337363025307496, "grad_norm": 1.2791818380355835, "learning_rate": 0.00010117465224111282, "loss": 5.3397, "step": 24010 }, { "epoch": 0.033750353468913176, "grad_norm": 1.318710207939148, "learning_rate": 0.00010121680483349725, "loss": 5.2535, "step": 24020 }, { "epoch": 0.03376440440707675, "grad_norm": 1.4116847515106201, "learning_rate": 0.00010125895742588167, "loss": 5.246, "step": 24030 }, { "epoch": 0.033778455345240335, "grad_norm": 1.352486491203308, "learning_rate": 0.00010130111001826612, "loss": 5.3818, "step": 24040 }, { "epoch": 0.03379250628340391, "grad_norm": 1.3490184545516968, "learning_rate": 0.00010134326261065055, "loss": 5.1459, "step": 24050 }, { "epoch": 0.03380655722156749, "grad_norm": 1.25826895236969, "learning_rate": 0.00010138541520303496, "loss": 5.2441, "step": 24060 }, { "epoch": 0.033820608159731064, "grad_norm": 1.2731109857559204, "learning_rate": 0.00010142756779541941, "loss": 5.382, "step": 24070 }, { "epoch": 0.03383465909789464, "grad_norm": 1.2558667659759521, "learning_rate": 0.00010146972038780384, "loss": 5.2548, "step": 24080 }, { "epoch": 0.03384871003605822, "grad_norm": 1.3731662034988403, "learning_rate": 0.00010151187298018828, "loss": 5.1889, "step": 24090 }, { "epoch": 0.0338627609742218, "grad_norm": 1.2584823369979858, "learning_rate": 0.0001015540255725727, "loss": 5.2309, "step": 24100 }, { "epoch": 0.033876811912385375, "grad_norm": 1.3157720565795898, "learning_rate": 0.00010159617816495713, "loss": 5.147, "step": 24110 }, { "epoch": 0.03389086285054895, "grad_norm": 1.3942573070526123, "learning_rate": 0.00010163833075734158, "loss": 5.1946, "step": 24120 }, { "epoch": 0.03390491378871253, "grad_norm": 1.218231201171875, "learning_rate": 0.000101680483349726, "loss": 5.3256, "step": 24130 }, { "epoch": 0.03391896472687611, "grad_norm": 1.3514090776443481, "learning_rate": 0.00010172263594211043, "loss": 5.2472, "step": 24140 }, { "epoch": 0.03393301566503969, "grad_norm": 1.4769346714019775, "learning_rate": 0.00010176478853449487, "loss": 5.3211, "step": 24150 }, { "epoch": 0.03394706660320326, "grad_norm": 1.4301344156265259, "learning_rate": 0.00010180694112687929, "loss": 5.3223, "step": 24160 }, { "epoch": 0.03396111754136684, "grad_norm": 1.3295481204986572, "learning_rate": 0.00010184909371926372, "loss": 5.2742, "step": 24170 }, { "epoch": 0.033975168479530415, "grad_norm": 1.34248948097229, "learning_rate": 0.00010189124631164817, "loss": 5.3578, "step": 24180 }, { "epoch": 0.033989219417694, "grad_norm": 1.3010623455047607, "learning_rate": 0.00010193339890403258, "loss": 5.3045, "step": 24190 }, { "epoch": 0.034003270355857575, "grad_norm": 1.3236461877822876, "learning_rate": 0.00010197555149641701, "loss": 5.2843, "step": 24200 }, { "epoch": 0.03401732129402115, "grad_norm": 1.3258452415466309, "learning_rate": 0.00010201770408880146, "loss": 5.1747, "step": 24210 }, { "epoch": 0.03403137223218473, "grad_norm": 1.411746621131897, "learning_rate": 0.00010205985668118589, "loss": 5.2508, "step": 24220 }, { "epoch": 0.0340454231703483, "grad_norm": 1.399804949760437, "learning_rate": 0.00010210200927357031, "loss": 5.3071, "step": 24230 }, { "epoch": 0.03405947410851188, "grad_norm": 1.3025681972503662, "learning_rate": 0.00010214416186595475, "loss": 5.1783, "step": 24240 }, { "epoch": 0.03407352504667546, "grad_norm": 1.245466709136963, "learning_rate": 0.00010218631445833918, "loss": 5.2707, "step": 24250 }, { "epoch": 0.03408757598483904, "grad_norm": 1.307504415512085, "learning_rate": 0.0001022284670507236, "loss": 5.418, "step": 24260 }, { "epoch": 0.034101626923002615, "grad_norm": 1.331075668334961, "learning_rate": 0.00010227061964310805, "loss": 5.3077, "step": 24270 }, { "epoch": 0.03411567786116619, "grad_norm": 1.3218508958816528, "learning_rate": 0.00010231277223549248, "loss": 5.4505, "step": 24280 }, { "epoch": 0.03412972879932977, "grad_norm": 1.4919476509094238, "learning_rate": 0.0001023549248278769, "loss": 5.1643, "step": 24290 }, { "epoch": 0.03414377973749335, "grad_norm": 1.511077642440796, "learning_rate": 0.00010239707742026134, "loss": 5.3138, "step": 24300 }, { "epoch": 0.034157830675656926, "grad_norm": 1.2608944177627563, "learning_rate": 0.00010243923001264577, "loss": 5.353, "step": 24310 }, { "epoch": 0.0341718816138205, "grad_norm": 1.2811615467071533, "learning_rate": 0.00010248138260503019, "loss": 5.326, "step": 24320 }, { "epoch": 0.03418593255198408, "grad_norm": 1.3658688068389893, "learning_rate": 0.00010252353519741463, "loss": 5.2803, "step": 24330 }, { "epoch": 0.034199983490147655, "grad_norm": 1.2968345880508423, "learning_rate": 0.00010256568778979906, "loss": 5.2007, "step": 24340 }, { "epoch": 0.03421403442831124, "grad_norm": 1.3284701108932495, "learning_rate": 0.00010260784038218348, "loss": 5.2224, "step": 24350 }, { "epoch": 0.034228085366474814, "grad_norm": 1.2858362197875977, "learning_rate": 0.00010264999297456793, "loss": 5.3118, "step": 24360 }, { "epoch": 0.03424213630463839, "grad_norm": 1.242591381072998, "learning_rate": 0.00010269214556695236, "loss": 5.2242, "step": 24370 }, { "epoch": 0.034256187242801966, "grad_norm": 1.282493233680725, "learning_rate": 0.0001027342981593368, "loss": 5.4394, "step": 24380 }, { "epoch": 0.03427023818096554, "grad_norm": 1.2927570343017578, "learning_rate": 0.00010277645075172122, "loss": 5.3704, "step": 24390 }, { "epoch": 0.034284289119129126, "grad_norm": 1.2820286750793457, "learning_rate": 0.00010281860334410565, "loss": 5.2751, "step": 24400 }, { "epoch": 0.0342983400572927, "grad_norm": 1.3351532220840454, "learning_rate": 0.0001028607559364901, "loss": 5.2028, "step": 24410 }, { "epoch": 0.03431239099545628, "grad_norm": 1.3240007162094116, "learning_rate": 0.00010290290852887451, "loss": 5.1208, "step": 24420 }, { "epoch": 0.034326441933619854, "grad_norm": 1.3099170923233032, "learning_rate": 0.00010294506112125894, "loss": 5.3686, "step": 24430 }, { "epoch": 0.03434049287178343, "grad_norm": 1.3434085845947266, "learning_rate": 0.00010298721371364339, "loss": 5.2695, "step": 24440 }, { "epoch": 0.034354543809947014, "grad_norm": 1.2506746053695679, "learning_rate": 0.0001030293663060278, "loss": 5.4538, "step": 24450 }, { "epoch": 0.03436859474811059, "grad_norm": 1.2607885599136353, "learning_rate": 0.00010307151889841224, "loss": 5.2082, "step": 24460 }, { "epoch": 0.034382645686274166, "grad_norm": 1.2790932655334473, "learning_rate": 0.00010311367149079668, "loss": 5.3349, "step": 24470 }, { "epoch": 0.03439669662443774, "grad_norm": 1.4397903680801392, "learning_rate": 0.0001031558240831811, "loss": 5.2953, "step": 24480 }, { "epoch": 0.03441074756260132, "grad_norm": 1.254449725151062, "learning_rate": 0.00010319797667556553, "loss": 5.3391, "step": 24490 }, { "epoch": 0.0344247985007649, "grad_norm": 1.3138560056686401, "learning_rate": 0.00010324012926794998, "loss": 5.2934, "step": 24500 }, { "epoch": 0.03443884943892848, "grad_norm": 1.2442389726638794, "learning_rate": 0.00010328228186033441, "loss": 5.1722, "step": 24510 }, { "epoch": 0.034452900377092054, "grad_norm": 1.3116623163223267, "learning_rate": 0.00010332443445271882, "loss": 5.2642, "step": 24520 }, { "epoch": 0.03446695131525563, "grad_norm": 1.3116538524627686, "learning_rate": 0.00010336658704510327, "loss": 5.3113, "step": 24530 }, { "epoch": 0.034481002253419206, "grad_norm": 1.3560738563537598, "learning_rate": 0.0001034087396374877, "loss": 5.2302, "step": 24540 }, { "epoch": 0.03449505319158278, "grad_norm": 1.3531079292297363, "learning_rate": 0.00010345089222987212, "loss": 5.2305, "step": 24550 }, { "epoch": 0.034509104129746365, "grad_norm": 1.26160728931427, "learning_rate": 0.00010349304482225656, "loss": 5.2376, "step": 24560 }, { "epoch": 0.03452315506790994, "grad_norm": 1.4624823331832886, "learning_rate": 0.000103535197414641, "loss": 5.1941, "step": 24570 }, { "epoch": 0.03453720600607352, "grad_norm": 1.3396354913711548, "learning_rate": 0.00010357735000702541, "loss": 5.1731, "step": 24580 }, { "epoch": 0.034551256944237094, "grad_norm": 1.3305305242538452, "learning_rate": 0.00010361950259940986, "loss": 5.3613, "step": 24590 }, { "epoch": 0.03456530788240067, "grad_norm": 1.2732019424438477, "learning_rate": 0.00010366165519179429, "loss": 5.2624, "step": 24600 }, { "epoch": 0.03457935882056425, "grad_norm": 1.428470253944397, "learning_rate": 0.0001037038077841787, "loss": 5.2881, "step": 24610 }, { "epoch": 0.03459340975872783, "grad_norm": 1.3105640411376953, "learning_rate": 0.00010374596037656315, "loss": 5.3397, "step": 24620 }, { "epoch": 0.034607460696891405, "grad_norm": 1.4078952074050903, "learning_rate": 0.00010378811296894758, "loss": 5.4407, "step": 24630 }, { "epoch": 0.03462151163505498, "grad_norm": 1.3400802612304688, "learning_rate": 0.00010383026556133203, "loss": 5.3244, "step": 24640 }, { "epoch": 0.03463556257321856, "grad_norm": 1.315213918685913, "learning_rate": 0.00010387241815371644, "loss": 5.3574, "step": 24650 }, { "epoch": 0.03464961351138214, "grad_norm": 1.2948580980300903, "learning_rate": 0.00010391457074610087, "loss": 5.3071, "step": 24660 }, { "epoch": 0.03466366444954572, "grad_norm": 1.3167226314544678, "learning_rate": 0.00010395672333848532, "loss": 5.2615, "step": 24670 }, { "epoch": 0.03467771538770929, "grad_norm": 1.3595393896102905, "learning_rate": 0.00010399887593086974, "loss": 5.3395, "step": 24680 }, { "epoch": 0.03469176632587287, "grad_norm": 1.3089308738708496, "learning_rate": 0.00010404102852325417, "loss": 5.1544, "step": 24690 }, { "epoch": 0.034705817264036445, "grad_norm": 1.2565276622772217, "learning_rate": 0.00010408318111563861, "loss": 5.3002, "step": 24700 }, { "epoch": 0.03471986820220003, "grad_norm": 1.256977915763855, "learning_rate": 0.00010412533370802303, "loss": 5.287, "step": 24710 }, { "epoch": 0.034733919140363605, "grad_norm": 1.2311729192733765, "learning_rate": 0.00010416748630040746, "loss": 5.39, "step": 24720 }, { "epoch": 0.03474797007852718, "grad_norm": 1.2389764785766602, "learning_rate": 0.0001042096388927919, "loss": 5.3162, "step": 24730 }, { "epoch": 0.03476202101669076, "grad_norm": 1.2789169549942017, "learning_rate": 0.00010425179148517632, "loss": 5.3211, "step": 24740 }, { "epoch": 0.03477607195485433, "grad_norm": 1.225974202156067, "learning_rate": 0.00010429394407756076, "loss": 5.2767, "step": 24750 }, { "epoch": 0.034790122893017916, "grad_norm": 1.2819294929504395, "learning_rate": 0.0001043360966699452, "loss": 5.335, "step": 24760 }, { "epoch": 0.03480417383118149, "grad_norm": 1.329951524734497, "learning_rate": 0.00010437824926232962, "loss": 5.1979, "step": 24770 }, { "epoch": 0.03481822476934507, "grad_norm": 1.2084249258041382, "learning_rate": 0.00010442040185471405, "loss": 5.3172, "step": 24780 }, { "epoch": 0.034832275707508645, "grad_norm": 1.2773807048797607, "learning_rate": 0.0001044625544470985, "loss": 5.2737, "step": 24790 }, { "epoch": 0.03484632664567222, "grad_norm": 1.302215576171875, "learning_rate": 0.00010450470703948292, "loss": 5.2025, "step": 24800 }, { "epoch": 0.034860377583835804, "grad_norm": 1.317597508430481, "learning_rate": 0.00010454685963186734, "loss": 5.3506, "step": 24810 }, { "epoch": 0.03487442852199938, "grad_norm": 1.2877779006958008, "learning_rate": 0.00010458901222425179, "loss": 5.2092, "step": 24820 }, { "epoch": 0.034888479460162956, "grad_norm": 1.297938585281372, "learning_rate": 0.00010463116481663622, "loss": 5.3321, "step": 24830 }, { "epoch": 0.03490253039832653, "grad_norm": 1.4365018606185913, "learning_rate": 0.00010467331740902064, "loss": 5.1708, "step": 24840 }, { "epoch": 0.03491658133649011, "grad_norm": 1.2824749946594238, "learning_rate": 0.00010471547000140508, "loss": 5.2512, "step": 24850 }, { "epoch": 0.034930632274653685, "grad_norm": 1.3265628814697266, "learning_rate": 0.00010475762259378951, "loss": 5.1575, "step": 24860 }, { "epoch": 0.03494468321281727, "grad_norm": 1.360796332359314, "learning_rate": 0.00010479977518617393, "loss": 5.2298, "step": 24870 }, { "epoch": 0.034958734150980844, "grad_norm": 1.2341952323913574, "learning_rate": 0.00010484192777855837, "loss": 5.1945, "step": 24880 }, { "epoch": 0.03497278508914442, "grad_norm": 1.2878468036651611, "learning_rate": 0.0001048840803709428, "loss": 5.2382, "step": 24890 }, { "epoch": 0.034986836027307996, "grad_norm": 1.248975157737732, "learning_rate": 0.00010492623296332722, "loss": 5.4503, "step": 24900 }, { "epoch": 0.03500088696547157, "grad_norm": 1.2369763851165771, "learning_rate": 0.00010496838555571167, "loss": 5.2011, "step": 24910 }, { "epoch": 0.035014937903635156, "grad_norm": 1.4231282472610474, "learning_rate": 0.0001050105381480961, "loss": 5.3073, "step": 24920 }, { "epoch": 0.03502898884179873, "grad_norm": 1.3121232986450195, "learning_rate": 0.00010505269074048054, "loss": 5.2135, "step": 24930 }, { "epoch": 0.03504303977996231, "grad_norm": 1.2677346467971802, "learning_rate": 0.00010509484333286496, "loss": 5.2371, "step": 24940 }, { "epoch": 0.035057090718125884, "grad_norm": 1.359431266784668, "learning_rate": 0.00010513699592524939, "loss": 5.2875, "step": 24950 }, { "epoch": 0.03507114165628946, "grad_norm": 1.2087974548339844, "learning_rate": 0.00010517914851763384, "loss": 5.1022, "step": 24960 }, { "epoch": 0.035085192594453044, "grad_norm": 1.297215461730957, "learning_rate": 0.00010522130111001825, "loss": 5.3486, "step": 24970 }, { "epoch": 0.03509924353261662, "grad_norm": 1.2546544075012207, "learning_rate": 0.00010526345370240269, "loss": 5.2505, "step": 24980 }, { "epoch": 0.035113294470780196, "grad_norm": 1.3065543174743652, "learning_rate": 0.00010530560629478713, "loss": 5.1717, "step": 24990 }, { "epoch": 0.03512734540894377, "grad_norm": 1.2734280824661255, "learning_rate": 0.00010534775888717155, "loss": 5.1809, "step": 25000 }, { "epoch": 0.03514139634710735, "grad_norm": 1.3350660800933838, "learning_rate": 0.00010538991147955598, "loss": 5.3357, "step": 25010 }, { "epoch": 0.03515544728527093, "grad_norm": 1.3462845087051392, "learning_rate": 0.00010543206407194042, "loss": 5.266, "step": 25020 }, { "epoch": 0.03516949822343451, "grad_norm": 1.4067381620407104, "learning_rate": 0.00010547000140508639, "loss": 5.2521, "step": 25030 }, { "epoch": 0.035183549161598084, "grad_norm": 1.3267967700958252, "learning_rate": 0.00010551215399747084, "loss": 5.2519, "step": 25040 }, { "epoch": 0.03519760009976166, "grad_norm": 1.2769830226898193, "learning_rate": 0.00010555430658985527, "loss": 5.3735, "step": 25050 }, { "epoch": 0.035211651037925236, "grad_norm": 1.4258005619049072, "learning_rate": 0.0001055964591822397, "loss": 5.3553, "step": 25060 }, { "epoch": 0.03522570197608882, "grad_norm": 1.370233416557312, "learning_rate": 0.00010563861177462413, "loss": 5.2365, "step": 25070 }, { "epoch": 0.035239752914252395, "grad_norm": 1.2431062459945679, "learning_rate": 0.00010568076436700856, "loss": 5.244, "step": 25080 }, { "epoch": 0.03525380385241597, "grad_norm": 1.4300603866577148, "learning_rate": 0.00010572291695939301, "loss": 5.1916, "step": 25090 }, { "epoch": 0.03526785479057955, "grad_norm": 1.3223384618759155, "learning_rate": 0.00010576506955177742, "loss": 5.1435, "step": 25100 }, { "epoch": 0.035281905728743124, "grad_norm": 1.2834317684173584, "learning_rate": 0.00010580722214416186, "loss": 5.3326, "step": 25110 }, { "epoch": 0.03529595666690671, "grad_norm": 1.2990752458572388, "learning_rate": 0.0001058493747365463, "loss": 5.1775, "step": 25120 }, { "epoch": 0.03531000760507028, "grad_norm": 1.2824280261993408, "learning_rate": 0.00010589152732893072, "loss": 5.2595, "step": 25130 }, { "epoch": 0.03532405854323386, "grad_norm": 1.3103786706924438, "learning_rate": 0.00010593367992131515, "loss": 5.2454, "step": 25140 }, { "epoch": 0.035338109481397435, "grad_norm": 1.2637652158737183, "learning_rate": 0.0001059758325136996, "loss": 5.1544, "step": 25150 }, { "epoch": 0.03535216041956101, "grad_norm": 1.2903791666030884, "learning_rate": 0.00010601798510608401, "loss": 5.2983, "step": 25160 }, { "epoch": 0.03536621135772459, "grad_norm": 1.268822193145752, "learning_rate": 0.00010606013769846844, "loss": 5.1807, "step": 25170 }, { "epoch": 0.03538026229588817, "grad_norm": 1.3194992542266846, "learning_rate": 0.00010610229029085289, "loss": 5.1897, "step": 25180 }, { "epoch": 0.03539431323405175, "grad_norm": 1.3022356033325195, "learning_rate": 0.0001061444428832373, "loss": 5.1931, "step": 25190 }, { "epoch": 0.03540836417221532, "grad_norm": 1.4322096109390259, "learning_rate": 0.00010618659547562174, "loss": 5.1337, "step": 25200 }, { "epoch": 0.0354224151103789, "grad_norm": 1.285513162612915, "learning_rate": 0.00010622874806800618, "loss": 5.3514, "step": 25210 }, { "epoch": 0.035436466048542475, "grad_norm": 1.3835152387619019, "learning_rate": 0.00010627090066039061, "loss": 5.2987, "step": 25220 }, { "epoch": 0.03545051698670606, "grad_norm": 1.3201239109039307, "learning_rate": 0.00010631305325277503, "loss": 5.3481, "step": 25230 }, { "epoch": 0.035464567924869635, "grad_norm": 1.3172633647918701, "learning_rate": 0.00010635520584515947, "loss": 5.2151, "step": 25240 }, { "epoch": 0.03547861886303321, "grad_norm": 1.3208476305007935, "learning_rate": 0.0001063973584375439, "loss": 5.2665, "step": 25250 }, { "epoch": 0.03549266980119679, "grad_norm": 1.2850348949432373, "learning_rate": 0.00010643951102992832, "loss": 5.2579, "step": 25260 }, { "epoch": 0.03550672073936036, "grad_norm": 1.2918301820755005, "learning_rate": 0.00010648166362231277, "loss": 5.3202, "step": 25270 }, { "epoch": 0.035520771677523946, "grad_norm": 1.3454853296279907, "learning_rate": 0.0001065238162146972, "loss": 5.302, "step": 25280 }, { "epoch": 0.03553482261568752, "grad_norm": 1.23288893699646, "learning_rate": 0.00010656596880708162, "loss": 5.2014, "step": 25290 }, { "epoch": 0.0355488735538511, "grad_norm": 1.2703696489334106, "learning_rate": 0.00010660812139946606, "loss": 5.2891, "step": 25300 }, { "epoch": 0.035562924492014675, "grad_norm": 1.3757070302963257, "learning_rate": 0.00010665027399185049, "loss": 5.2369, "step": 25310 }, { "epoch": 0.03557697543017825, "grad_norm": 1.3169059753417969, "learning_rate": 0.00010669242658423491, "loss": 5.2194, "step": 25320 }, { "epoch": 0.035591026368341834, "grad_norm": 1.3210643529891968, "learning_rate": 0.00010673457917661935, "loss": 5.2344, "step": 25330 }, { "epoch": 0.03560507730650541, "grad_norm": 1.2993870973587036, "learning_rate": 0.00010677673176900379, "loss": 5.2018, "step": 25340 }, { "epoch": 0.035619128244668986, "grad_norm": 1.299553632736206, "learning_rate": 0.00010681888436138822, "loss": 5.2323, "step": 25350 }, { "epoch": 0.03563317918283256, "grad_norm": 1.4303969144821167, "learning_rate": 0.00010686103695377265, "loss": 5.1968, "step": 25360 }, { "epoch": 0.03564723012099614, "grad_norm": 1.2443745136260986, "learning_rate": 0.00010690318954615708, "loss": 5.3775, "step": 25370 }, { "epoch": 0.03566128105915972, "grad_norm": 1.5378773212432861, "learning_rate": 0.00010694534213854152, "loss": 5.1952, "step": 25380 }, { "epoch": 0.0356753319973233, "grad_norm": 1.2778575420379639, "learning_rate": 0.00010698749473092594, "loss": 5.1902, "step": 25390 }, { "epoch": 0.035689382935486874, "grad_norm": 1.241885781288147, "learning_rate": 0.00010702964732331037, "loss": 5.3114, "step": 25400 }, { "epoch": 0.03570343387365045, "grad_norm": 1.250803828239441, "learning_rate": 0.00010707179991569482, "loss": 5.2635, "step": 25410 }, { "epoch": 0.035717484811814026, "grad_norm": 1.1884135007858276, "learning_rate": 0.00010711395250807924, "loss": 5.2907, "step": 25420 }, { "epoch": 0.03573153574997761, "grad_norm": 1.169700026512146, "learning_rate": 0.00010715610510046367, "loss": 5.2049, "step": 25430 }, { "epoch": 0.035745586688141186, "grad_norm": 1.2907392978668213, "learning_rate": 0.00010719825769284811, "loss": 5.1974, "step": 25440 }, { "epoch": 0.03575963762630476, "grad_norm": 1.3100101947784424, "learning_rate": 0.00010724041028523253, "loss": 5.1541, "step": 25450 }, { "epoch": 0.03577368856446834, "grad_norm": 1.3046437501907349, "learning_rate": 0.00010728256287761696, "loss": 5.2203, "step": 25460 }, { "epoch": 0.035787739502631914, "grad_norm": 1.2555994987487793, "learning_rate": 0.0001073247154700014, "loss": 5.2891, "step": 25470 }, { "epoch": 0.0358017904407955, "grad_norm": 1.315011978149414, "learning_rate": 0.00010736686806238582, "loss": 5.3418, "step": 25480 }, { "epoch": 0.035815841378959073, "grad_norm": 1.3488926887512207, "learning_rate": 0.00010740902065477025, "loss": 5.082, "step": 25490 }, { "epoch": 0.03582989231712265, "grad_norm": 1.2231671810150146, "learning_rate": 0.0001074511732471547, "loss": 5.2338, "step": 25500 }, { "epoch": 0.035843943255286226, "grad_norm": 1.2394180297851562, "learning_rate": 0.00010749332583953913, "loss": 5.2079, "step": 25510 }, { "epoch": 0.0358579941934498, "grad_norm": 1.2652291059494019, "learning_rate": 0.00010753547843192355, "loss": 5.09, "step": 25520 }, { "epoch": 0.03587204513161338, "grad_norm": 1.3209153413772583, "learning_rate": 0.00010757763102430799, "loss": 5.1912, "step": 25530 }, { "epoch": 0.03588609606977696, "grad_norm": 1.2716671228408813, "learning_rate": 0.00010761978361669242, "loss": 5.1782, "step": 25540 }, { "epoch": 0.03590014700794054, "grad_norm": 1.2218382358551025, "learning_rate": 0.00010766193620907684, "loss": 5.3313, "step": 25550 }, { "epoch": 0.035914197946104114, "grad_norm": 1.2553699016571045, "learning_rate": 0.00010770408880146129, "loss": 5.2144, "step": 25560 }, { "epoch": 0.03592824888426769, "grad_norm": 1.3492262363433838, "learning_rate": 0.00010774624139384572, "loss": 5.2676, "step": 25570 }, { "epoch": 0.035942299822431266, "grad_norm": 1.2544468641281128, "learning_rate": 0.00010778839398623013, "loss": 5.2419, "step": 25580 }, { "epoch": 0.03595635076059485, "grad_norm": 1.228851079940796, "learning_rate": 0.00010783054657861458, "loss": 5.2684, "step": 25590 }, { "epoch": 0.035970401698758425, "grad_norm": 1.2383124828338623, "learning_rate": 0.00010787269917099901, "loss": 5.2532, "step": 25600 }, { "epoch": 0.035984452636922, "grad_norm": 1.3050917387008667, "learning_rate": 0.00010791485176338343, "loss": 5.2751, "step": 25610 }, { "epoch": 0.03599850357508558, "grad_norm": 1.26146399974823, "learning_rate": 0.00010795700435576787, "loss": 5.1677, "step": 25620 }, { "epoch": 0.036012554513249154, "grad_norm": 1.2697525024414062, "learning_rate": 0.0001079991569481523, "loss": 5.3691, "step": 25630 }, { "epoch": 0.03602660545141274, "grad_norm": 1.299674153327942, "learning_rate": 0.00010804130954053673, "loss": 5.198, "step": 25640 }, { "epoch": 0.03604065638957631, "grad_norm": 1.3521478176116943, "learning_rate": 0.00010808346213292117, "loss": 5.0404, "step": 25650 }, { "epoch": 0.03605470732773989, "grad_norm": 1.270524501800537, "learning_rate": 0.0001081256147253056, "loss": 5.1391, "step": 25660 }, { "epoch": 0.036068758265903465, "grad_norm": 1.2725433111190796, "learning_rate": 0.00010816776731769004, "loss": 5.3264, "step": 25670 }, { "epoch": 0.03608280920406704, "grad_norm": 1.283799171447754, "learning_rate": 0.00010820991991007446, "loss": 5.2389, "step": 25680 }, { "epoch": 0.036096860142230625, "grad_norm": 1.2880244255065918, "learning_rate": 0.00010825207250245889, "loss": 5.2285, "step": 25690 }, { "epoch": 0.0361109110803942, "grad_norm": 1.3934433460235596, "learning_rate": 0.00010829422509484334, "loss": 5.2087, "step": 25700 }, { "epoch": 0.03612496201855778, "grad_norm": 1.244972825050354, "learning_rate": 0.00010833637768722775, "loss": 5.3292, "step": 25710 }, { "epoch": 0.03613901295672135, "grad_norm": 1.2489376068115234, "learning_rate": 0.00010837853027961218, "loss": 5.4028, "step": 25720 }, { "epoch": 0.03615306389488493, "grad_norm": 1.2495671510696411, "learning_rate": 0.00010842068287199663, "loss": 5.1091, "step": 25730 }, { "epoch": 0.03616711483304851, "grad_norm": 1.2367503643035889, "learning_rate": 0.00010846283546438105, "loss": 5.3419, "step": 25740 }, { "epoch": 0.03618116577121209, "grad_norm": 1.247799038887024, "learning_rate": 0.00010850498805676548, "loss": 5.2247, "step": 25750 }, { "epoch": 0.036195216709375665, "grad_norm": 1.3112616539001465, "learning_rate": 0.00010854714064914992, "loss": 5.3467, "step": 25760 }, { "epoch": 0.03620926764753924, "grad_norm": 1.2683191299438477, "learning_rate": 0.00010858929324153435, "loss": 5.2628, "step": 25770 }, { "epoch": 0.03622331858570282, "grad_norm": 1.295248031616211, "learning_rate": 0.00010863144583391877, "loss": 5.2255, "step": 25780 }, { "epoch": 0.0362373695238664, "grad_norm": 1.3652722835540771, "learning_rate": 0.00010867359842630322, "loss": 5.1288, "step": 25790 }, { "epoch": 0.036251420462029976, "grad_norm": 1.3258819580078125, "learning_rate": 0.00010871575101868765, "loss": 5.2706, "step": 25800 }, { "epoch": 0.03626547140019355, "grad_norm": 1.213344931602478, "learning_rate": 0.00010875790361107206, "loss": 5.2729, "step": 25810 }, { "epoch": 0.03627952233835713, "grad_norm": 1.3529800176620483, "learning_rate": 0.00010880005620345651, "loss": 5.1662, "step": 25820 }, { "epoch": 0.036293573276520705, "grad_norm": 1.239091157913208, "learning_rate": 0.00010884220879584094, "loss": 5.1821, "step": 25830 }, { "epoch": 0.03630762421468428, "grad_norm": 1.2374707460403442, "learning_rate": 0.00010888436138822536, "loss": 5.326, "step": 25840 }, { "epoch": 0.036321675152847864, "grad_norm": 1.2616337537765503, "learning_rate": 0.0001089265139806098, "loss": 5.2543, "step": 25850 }, { "epoch": 0.03633572609101144, "grad_norm": 1.256049633026123, "learning_rate": 0.00010896866657299423, "loss": 5.1544, "step": 25860 }, { "epoch": 0.036349777029175016, "grad_norm": 1.4155590534210205, "learning_rate": 0.00010901081916537865, "loss": 5.3104, "step": 25870 }, { "epoch": 0.03636382796733859, "grad_norm": 1.2644518613815308, "learning_rate": 0.0001090529717577631, "loss": 5.198, "step": 25880 }, { "epoch": 0.03637787890550217, "grad_norm": 1.264570951461792, "learning_rate": 0.00010909512435014753, "loss": 5.0051, "step": 25890 }, { "epoch": 0.03639192984366575, "grad_norm": 1.208848237991333, "learning_rate": 0.00010913727694253195, "loss": 5.2239, "step": 25900 }, { "epoch": 0.03640598078182933, "grad_norm": 1.2104336023330688, "learning_rate": 0.00010917942953491639, "loss": 5.2732, "step": 25910 }, { "epoch": 0.036420031719992904, "grad_norm": 1.298794150352478, "learning_rate": 0.00010922158212730082, "loss": 5.1825, "step": 25920 }, { "epoch": 0.03643408265815648, "grad_norm": 1.3972251415252686, "learning_rate": 0.00010926373471968525, "loss": 5.4068, "step": 25930 }, { "epoch": 0.036448133596320056, "grad_norm": 1.3141639232635498, "learning_rate": 0.00010930588731206968, "loss": 5.3439, "step": 25940 }, { "epoch": 0.03646218453448364, "grad_norm": 1.4204273223876953, "learning_rate": 0.00010934803990445411, "loss": 5.1509, "step": 25950 }, { "epoch": 0.036476235472647216, "grad_norm": 1.2282731533050537, "learning_rate": 0.00010939019249683855, "loss": 5.1782, "step": 25960 }, { "epoch": 0.03649028641081079, "grad_norm": 1.2781927585601807, "learning_rate": 0.00010943234508922298, "loss": 5.1435, "step": 25970 }, { "epoch": 0.03650433734897437, "grad_norm": 1.2722803354263306, "learning_rate": 0.00010947449768160741, "loss": 5.1179, "step": 25980 }, { "epoch": 0.036518388287137944, "grad_norm": 1.3081459999084473, "learning_rate": 0.00010951665027399185, "loss": 5.0402, "step": 25990 }, { "epoch": 0.03653243922530153, "grad_norm": 1.235568881034851, "learning_rate": 0.00010955880286637627, "loss": 5.22, "step": 26000 }, { "epoch": 0.036546490163465103, "grad_norm": 1.3073358535766602, "learning_rate": 0.0001096009554587607, "loss": 5.2479, "step": 26010 }, { "epoch": 0.03656054110162868, "grad_norm": 1.6806720495224, "learning_rate": 0.00010964310805114515, "loss": 5.1738, "step": 26020 }, { "epoch": 0.036574592039792256, "grad_norm": 1.2512214183807373, "learning_rate": 0.00010968526064352956, "loss": 5.2327, "step": 26030 }, { "epoch": 0.03658864297795583, "grad_norm": 1.2663812637329102, "learning_rate": 0.000109727413235914, "loss": 5.1307, "step": 26040 }, { "epoch": 0.036602693916119415, "grad_norm": 1.2181764841079712, "learning_rate": 0.00010976956582829844, "loss": 5.2383, "step": 26050 }, { "epoch": 0.03661674485428299, "grad_norm": 1.291024088859558, "learning_rate": 0.00010981171842068287, "loss": 5.1706, "step": 26060 }, { "epoch": 0.03663079579244657, "grad_norm": 1.2037990093231201, "learning_rate": 0.00010985387101306729, "loss": 5.1271, "step": 26070 }, { "epoch": 0.036644846730610144, "grad_norm": 1.1984660625457764, "learning_rate": 0.00010989602360545173, "loss": 5.291, "step": 26080 }, { "epoch": 0.03665889766877372, "grad_norm": 1.301593542098999, "learning_rate": 0.00010993817619783616, "loss": 5.1974, "step": 26090 }, { "epoch": 0.0366729486069373, "grad_norm": 1.249263882637024, "learning_rate": 0.00010998032879022058, "loss": 5.1413, "step": 26100 }, { "epoch": 0.03668699954510088, "grad_norm": 1.3108259439468384, "learning_rate": 0.00011002248138260503, "loss": 5.3095, "step": 26110 }, { "epoch": 0.036701050483264455, "grad_norm": 1.3736425638198853, "learning_rate": 0.00011006463397498946, "loss": 5.2484, "step": 26120 }, { "epoch": 0.03671510142142803, "grad_norm": 1.2452633380889893, "learning_rate": 0.00011010678656737388, "loss": 5.2032, "step": 26130 }, { "epoch": 0.03672915235959161, "grad_norm": 1.235262393951416, "learning_rate": 0.00011014893915975832, "loss": 5.2425, "step": 26140 }, { "epoch": 0.036743203297755184, "grad_norm": 1.2423274517059326, "learning_rate": 0.00011019109175214275, "loss": 5.1752, "step": 26150 }, { "epoch": 0.03675725423591877, "grad_norm": 1.3279187679290771, "learning_rate": 0.00011023324434452717, "loss": 5.232, "step": 26160 }, { "epoch": 0.03677130517408234, "grad_norm": 1.2280527353286743, "learning_rate": 0.00011027539693691161, "loss": 5.3038, "step": 26170 }, { "epoch": 0.03678535611224592, "grad_norm": 1.324863314628601, "learning_rate": 0.00011031754952929604, "loss": 5.131, "step": 26180 }, { "epoch": 0.036799407050409495, "grad_norm": 1.3295210599899292, "learning_rate": 0.00011035970212168046, "loss": 5.2175, "step": 26190 }, { "epoch": 0.03681345798857307, "grad_norm": 1.2481762170791626, "learning_rate": 0.00011040185471406491, "loss": 5.1572, "step": 26200 }, { "epoch": 0.036827508926736655, "grad_norm": 1.2397472858428955, "learning_rate": 0.00011044400730644934, "loss": 5.1869, "step": 26210 }, { "epoch": 0.03684155986490023, "grad_norm": 1.243333339691162, "learning_rate": 0.00011048615989883377, "loss": 5.1236, "step": 26220 }, { "epoch": 0.03685561080306381, "grad_norm": 1.2801663875579834, "learning_rate": 0.0001105283124912182, "loss": 5.2453, "step": 26230 }, { "epoch": 0.03686966174122738, "grad_norm": 1.3085575103759766, "learning_rate": 0.00011057046508360263, "loss": 5.131, "step": 26240 }, { "epoch": 0.03688371267939096, "grad_norm": 1.2628742456436157, "learning_rate": 0.00011061261767598706, "loss": 5.2099, "step": 26250 }, { "epoch": 0.03689776361755454, "grad_norm": 1.223900556564331, "learning_rate": 0.0001106547702683715, "loss": 5.234, "step": 26260 }, { "epoch": 0.03691181455571812, "grad_norm": 1.3309193849563599, "learning_rate": 0.00011069692286075593, "loss": 5.3202, "step": 26270 }, { "epoch": 0.036925865493881695, "grad_norm": 1.4030629396438599, "learning_rate": 0.00011073907545314037, "loss": 5.3066, "step": 26280 }, { "epoch": 0.03693991643204527, "grad_norm": 1.2538514137268066, "learning_rate": 0.00011078122804552479, "loss": 5.157, "step": 26290 }, { "epoch": 0.03695396737020885, "grad_norm": 1.26717209815979, "learning_rate": 0.00011082338063790922, "loss": 5.1852, "step": 26300 }, { "epoch": 0.03696801830837243, "grad_norm": 1.2575275897979736, "learning_rate": 0.00011086553323029366, "loss": 5.3232, "step": 26310 }, { "epoch": 0.036982069246536006, "grad_norm": 1.3039658069610596, "learning_rate": 0.00011090768582267808, "loss": 5.1781, "step": 26320 }, { "epoch": 0.03699612018469958, "grad_norm": 1.3649189472198486, "learning_rate": 0.00011094983841506251, "loss": 5.2759, "step": 26330 }, { "epoch": 0.03701017112286316, "grad_norm": 1.210462212562561, "learning_rate": 0.00011099199100744696, "loss": 5.0839, "step": 26340 }, { "epoch": 0.037024222061026735, "grad_norm": 1.2349313497543335, "learning_rate": 0.00011103414359983139, "loss": 5.1527, "step": 26350 }, { "epoch": 0.03703827299919032, "grad_norm": 1.326310634613037, "learning_rate": 0.0001110762961922158, "loss": 5.1428, "step": 26360 }, { "epoch": 0.037052323937353894, "grad_norm": 1.2547760009765625, "learning_rate": 0.00011111844878460025, "loss": 5.1292, "step": 26370 }, { "epoch": 0.03706637487551747, "grad_norm": 1.3274054527282715, "learning_rate": 0.00011116060137698468, "loss": 5.262, "step": 26380 }, { "epoch": 0.037080425813681046, "grad_norm": 1.247420072555542, "learning_rate": 0.0001112027539693691, "loss": 5.3095, "step": 26390 }, { "epoch": 0.03709447675184462, "grad_norm": 1.4132479429244995, "learning_rate": 0.00011124490656175354, "loss": 5.2025, "step": 26400 }, { "epoch": 0.037108527690008206, "grad_norm": 1.262511968612671, "learning_rate": 0.00011128705915413798, "loss": 5.1409, "step": 26410 }, { "epoch": 0.03712257862817178, "grad_norm": 1.2406529188156128, "learning_rate": 0.00011132921174652239, "loss": 5.1738, "step": 26420 }, { "epoch": 0.03713662956633536, "grad_norm": 1.169571042060852, "learning_rate": 0.00011137136433890684, "loss": 5.2443, "step": 26430 }, { "epoch": 0.037150680504498934, "grad_norm": 1.268477201461792, "learning_rate": 0.00011141351693129127, "loss": 5.1545, "step": 26440 }, { "epoch": 0.03716473144266251, "grad_norm": 1.2400575876235962, "learning_rate": 0.00011145566952367569, "loss": 5.2186, "step": 26450 }, { "epoch": 0.037178782380826086, "grad_norm": 1.244472622871399, "learning_rate": 0.00011149782211606013, "loss": 5.1656, "step": 26460 }, { "epoch": 0.03719283331898967, "grad_norm": 1.3057937622070312, "learning_rate": 0.00011153997470844456, "loss": 5.0956, "step": 26470 }, { "epoch": 0.037206884257153246, "grad_norm": 1.222322702407837, "learning_rate": 0.00011158212730082898, "loss": 5.2298, "step": 26480 }, { "epoch": 0.03722093519531682, "grad_norm": 1.2240188121795654, "learning_rate": 0.00011162427989321342, "loss": 5.1473, "step": 26490 }, { "epoch": 0.0372349861334804, "grad_norm": 1.2890682220458984, "learning_rate": 0.00011166643248559786, "loss": 5.0835, "step": 26500 }, { "epoch": 0.037249037071643974, "grad_norm": 1.3182138204574585, "learning_rate": 0.00011170858507798229, "loss": 5.3088, "step": 26510 }, { "epoch": 0.03726308800980756, "grad_norm": 1.2409218549728394, "learning_rate": 0.00011175073767036672, "loss": 5.2822, "step": 26520 }, { "epoch": 0.037277138947971133, "grad_norm": 1.2367993593215942, "learning_rate": 0.00011179289026275115, "loss": 5.1691, "step": 26530 }, { "epoch": 0.03729118988613471, "grad_norm": 1.2489789724349976, "learning_rate": 0.00011183504285513558, "loss": 5.2188, "step": 26540 }, { "epoch": 0.037305240824298286, "grad_norm": 1.2142417430877686, "learning_rate": 0.00011187719544752001, "loss": 5.2871, "step": 26550 }, { "epoch": 0.03731929176246186, "grad_norm": 1.2378629446029663, "learning_rate": 0.00011191934803990444, "loss": 5.0973, "step": 26560 }, { "epoch": 0.037333342700625445, "grad_norm": 1.2806910276412964, "learning_rate": 0.00011196150063228889, "loss": 5.3434, "step": 26570 }, { "epoch": 0.03734739363878902, "grad_norm": 1.3529186248779297, "learning_rate": 0.0001120036532246733, "loss": 5.1493, "step": 26580 }, { "epoch": 0.0373614445769526, "grad_norm": 1.2549669742584229, "learning_rate": 0.00011204580581705774, "loss": 5.0873, "step": 26590 }, { "epoch": 0.037375495515116174, "grad_norm": 1.2252702713012695, "learning_rate": 0.00011208795840944218, "loss": 5.1133, "step": 26600 }, { "epoch": 0.03738954645327975, "grad_norm": 1.2540626525878906, "learning_rate": 0.0001121301110018266, "loss": 5.2449, "step": 26610 }, { "epoch": 0.03740359739144333, "grad_norm": 1.178832769393921, "learning_rate": 0.00011217226359421103, "loss": 5.1955, "step": 26620 }, { "epoch": 0.03741764832960691, "grad_norm": 1.2755051851272583, "learning_rate": 0.00011221441618659547, "loss": 5.0336, "step": 26630 }, { "epoch": 0.037431699267770485, "grad_norm": 1.2864094972610474, "learning_rate": 0.0001122565687789799, "loss": 5.2242, "step": 26640 }, { "epoch": 0.03744575020593406, "grad_norm": 1.2537206411361694, "learning_rate": 0.00011229872137136432, "loss": 5.3039, "step": 26650 }, { "epoch": 0.03745980114409764, "grad_norm": 1.2298046350479126, "learning_rate": 0.00011234087396374877, "loss": 5.1417, "step": 26660 }, { "epoch": 0.03747385208226122, "grad_norm": 1.2369047403335571, "learning_rate": 0.0001123830265561332, "loss": 5.2243, "step": 26670 }, { "epoch": 0.0374879030204248, "grad_norm": 1.2635163068771362, "learning_rate": 0.00011242517914851762, "loss": 5.2349, "step": 26680 }, { "epoch": 0.03750195395858837, "grad_norm": 1.3052091598510742, "learning_rate": 0.00011246733174090206, "loss": 5.1844, "step": 26690 }, { "epoch": 0.03751600489675195, "grad_norm": 1.2509127855300903, "learning_rate": 0.00011250948433328649, "loss": 5.1712, "step": 26700 }, { "epoch": 0.037530055834915525, "grad_norm": 1.2266826629638672, "learning_rate": 0.00011255163692567091, "loss": 5.1993, "step": 26710 }, { "epoch": 0.03754410677307911, "grad_norm": 1.3366031646728516, "learning_rate": 0.00011259378951805535, "loss": 5.178, "step": 26720 }, { "epoch": 0.037558157711242685, "grad_norm": 1.2389544248580933, "learning_rate": 0.00011263594211043979, "loss": 5.282, "step": 26730 }, { "epoch": 0.03757220864940626, "grad_norm": 1.2713171243667603, "learning_rate": 0.0001126780947028242, "loss": 5.1055, "step": 26740 }, { "epoch": 0.03758625958756984, "grad_norm": 1.268943190574646, "learning_rate": 0.00011272024729520865, "loss": 5.2751, "step": 26750 }, { "epoch": 0.03760031052573341, "grad_norm": 1.302060842514038, "learning_rate": 0.00011276239988759308, "loss": 5.1239, "step": 26760 }, { "epoch": 0.03761436146389699, "grad_norm": 1.2425678968429565, "learning_rate": 0.00011280455247997751, "loss": 5.2777, "step": 26770 }, { "epoch": 0.03762841240206057, "grad_norm": 1.297904133796692, "learning_rate": 0.00011284670507236194, "loss": 5.3101, "step": 26780 }, { "epoch": 0.03764246334022415, "grad_norm": 1.3182086944580078, "learning_rate": 0.00011288885766474637, "loss": 5.1886, "step": 26790 }, { "epoch": 0.037656514278387725, "grad_norm": 1.1934064626693726, "learning_rate": 0.0001129310102571308, "loss": 5.1946, "step": 26800 }, { "epoch": 0.0376705652165513, "grad_norm": 1.2613691091537476, "learning_rate": 0.00011297316284951524, "loss": 5.2152, "step": 26810 }, { "epoch": 0.03768461615471488, "grad_norm": 1.2664833068847656, "learning_rate": 0.00011301531544189967, "loss": 5.3019, "step": 26820 }, { "epoch": 0.03769866709287846, "grad_norm": 1.687343955039978, "learning_rate": 0.0001130574680342841, "loss": 5.081, "step": 26830 }, { "epoch": 0.037712718031042036, "grad_norm": 1.2772016525268555, "learning_rate": 0.00011309962062666853, "loss": 5.1301, "step": 26840 }, { "epoch": 0.03772676896920561, "grad_norm": 1.2881845235824585, "learning_rate": 0.00011314177321905296, "loss": 5.2073, "step": 26850 }, { "epoch": 0.03774081990736919, "grad_norm": 1.4994240999221802, "learning_rate": 0.0001131839258114374, "loss": 5.2521, "step": 26860 }, { "epoch": 0.037754870845532765, "grad_norm": 1.3068636655807495, "learning_rate": 0.00011322607840382182, "loss": 5.1688, "step": 26870 }, { "epoch": 0.03776892178369635, "grad_norm": 1.2948259115219116, "learning_rate": 0.00011326823099620625, "loss": 5.1923, "step": 26880 }, { "epoch": 0.037782972721859924, "grad_norm": 1.2383815050125122, "learning_rate": 0.0001133103835885907, "loss": 5.205, "step": 26890 }, { "epoch": 0.0377970236600235, "grad_norm": 1.258082628250122, "learning_rate": 0.00011335253618097512, "loss": 5.1863, "step": 26900 }, { "epoch": 0.037811074598187076, "grad_norm": 1.2574599981307983, "learning_rate": 0.00011339468877335955, "loss": 5.1459, "step": 26910 }, { "epoch": 0.03782512553635065, "grad_norm": 1.3260610103607178, "learning_rate": 0.00011343684136574399, "loss": 5.2764, "step": 26920 }, { "epoch": 0.037839176474514236, "grad_norm": 1.1936684846878052, "learning_rate": 0.00011347899395812842, "loss": 5.2587, "step": 26930 }, { "epoch": 0.03785322741267781, "grad_norm": 1.3640984296798706, "learning_rate": 0.00011352114655051284, "loss": 5.0741, "step": 26940 }, { "epoch": 0.03786727835084139, "grad_norm": 1.3140736818313599, "learning_rate": 0.00011356329914289729, "loss": 5.1392, "step": 26950 }, { "epoch": 0.037881329289004964, "grad_norm": 1.268675446510315, "learning_rate": 0.00011360545173528172, "loss": 5.1514, "step": 26960 }, { "epoch": 0.03789538022716854, "grad_norm": 1.4075084924697876, "learning_rate": 0.00011364760432766613, "loss": 5.1024, "step": 26970 }, { "epoch": 0.03790943116533212, "grad_norm": 1.2317532300949097, "learning_rate": 0.00011368975692005058, "loss": 5.2554, "step": 26980 }, { "epoch": 0.0379234821034957, "grad_norm": 1.3724669218063354, "learning_rate": 0.00011373190951243501, "loss": 5.1995, "step": 26990 }, { "epoch": 0.037937533041659276, "grad_norm": 1.2524470090866089, "learning_rate": 0.00011377406210481943, "loss": 5.2892, "step": 27000 }, { "epoch": 0.03795158397982285, "grad_norm": 1.231868863105774, "learning_rate": 0.00011381621469720387, "loss": 5.2432, "step": 27010 }, { "epoch": 0.03796563491798643, "grad_norm": 1.224562406539917, "learning_rate": 0.0001138583672895883, "loss": 5.3197, "step": 27020 }, { "epoch": 0.03797968585615001, "grad_norm": 1.390925407409668, "learning_rate": 0.00011390051988197272, "loss": 5.2181, "step": 27030 }, { "epoch": 0.03799373679431359, "grad_norm": 1.3121964931488037, "learning_rate": 0.00011394267247435717, "loss": 5.1817, "step": 27040 }, { "epoch": 0.03800778773247716, "grad_norm": 1.2776087522506714, "learning_rate": 0.0001139848250667416, "loss": 5.2207, "step": 27050 }, { "epoch": 0.03802183867064074, "grad_norm": 1.2448629140853882, "learning_rate": 0.00011402697765912603, "loss": 5.1486, "step": 27060 }, { "epoch": 0.038035889608804316, "grad_norm": 1.22585129737854, "learning_rate": 0.00011406913025151046, "loss": 5.1954, "step": 27070 }, { "epoch": 0.03804994054696789, "grad_norm": 1.289219856262207, "learning_rate": 0.00011411128284389489, "loss": 5.1862, "step": 27080 }, { "epoch": 0.038063991485131475, "grad_norm": 1.2818207740783691, "learning_rate": 0.00011415343543627932, "loss": 5.2999, "step": 27090 }, { "epoch": 0.03807804242329505, "grad_norm": 1.237623691558838, "learning_rate": 0.00011419558802866375, "loss": 5.0843, "step": 27100 }, { "epoch": 0.03809209336145863, "grad_norm": 1.233920931816101, "learning_rate": 0.00011423774062104818, "loss": 5.1438, "step": 27110 }, { "epoch": 0.038106144299622204, "grad_norm": 1.782360315322876, "learning_rate": 0.00011427989321343262, "loss": 5.1636, "step": 27120 }, { "epoch": 0.03812019523778578, "grad_norm": 1.2615609169006348, "learning_rate": 0.00011432204580581705, "loss": 5.1292, "step": 27130 }, { "epoch": 0.03813424617594936, "grad_norm": 1.2505155801773071, "learning_rate": 0.00011436419839820148, "loss": 5.1816, "step": 27140 }, { "epoch": 0.03814829711411294, "grad_norm": 1.231854796409607, "learning_rate": 0.00011440635099058592, "loss": 5.1576, "step": 27150 }, { "epoch": 0.038162348052276515, "grad_norm": 1.2065684795379639, "learning_rate": 0.00011444850358297034, "loss": 5.1434, "step": 27160 }, { "epoch": 0.03817639899044009, "grad_norm": 1.2832350730895996, "learning_rate": 0.00011449065617535477, "loss": 5.1549, "step": 27170 }, { "epoch": 0.03819044992860367, "grad_norm": 1.2158983945846558, "learning_rate": 0.00011453280876773922, "loss": 5.1136, "step": 27180 }, { "epoch": 0.03820450086676725, "grad_norm": 1.2421070337295532, "learning_rate": 0.00011457496136012363, "loss": 5.1758, "step": 27190 }, { "epoch": 0.03821855180493083, "grad_norm": 1.2263178825378418, "learning_rate": 0.00011461711395250806, "loss": 5.1627, "step": 27200 }, { "epoch": 0.0382326027430944, "grad_norm": 1.2281807661056519, "learning_rate": 0.00011465926654489251, "loss": 5.1769, "step": 27210 }, { "epoch": 0.03824665368125798, "grad_norm": 1.2824562788009644, "learning_rate": 0.00011470141913727694, "loss": 5.0437, "step": 27220 }, { "epoch": 0.038260704619421555, "grad_norm": 1.2487444877624512, "learning_rate": 0.00011474357172966136, "loss": 5.1845, "step": 27230 }, { "epoch": 0.03827475555758514, "grad_norm": 1.223210096359253, "learning_rate": 0.0001147857243220458, "loss": 5.1532, "step": 27240 }, { "epoch": 0.038288806495748715, "grad_norm": 1.2843011617660522, "learning_rate": 0.00011482787691443023, "loss": 5.2197, "step": 27250 }, { "epoch": 0.03830285743391229, "grad_norm": 1.2639187574386597, "learning_rate": 0.00011487002950681465, "loss": 5.1132, "step": 27260 }, { "epoch": 0.03831690837207587, "grad_norm": 1.2122303247451782, "learning_rate": 0.0001149121820991991, "loss": 5.255, "step": 27270 }, { "epoch": 0.03833095931023944, "grad_norm": 1.2370988130569458, "learning_rate": 0.00011495433469158353, "loss": 5.1655, "step": 27280 }, { "epoch": 0.038345010248403026, "grad_norm": 1.2647359371185303, "learning_rate": 0.00011499648728396794, "loss": 5.2058, "step": 27290 }, { "epoch": 0.0383590611865666, "grad_norm": 1.2288408279418945, "learning_rate": 0.00011503863987635239, "loss": 5.2224, "step": 27300 }, { "epoch": 0.03837311212473018, "grad_norm": 1.3430143594741821, "learning_rate": 0.00011508079246873682, "loss": 5.1902, "step": 27310 }, { "epoch": 0.038387163062893755, "grad_norm": 1.2298165559768677, "learning_rate": 0.00011512294506112124, "loss": 5.2141, "step": 27320 }, { "epoch": 0.03840121400105733, "grad_norm": 1.2337199449539185, "learning_rate": 0.00011516509765350568, "loss": 5.1845, "step": 27330 }, { "epoch": 0.038415264939220914, "grad_norm": 1.1939727067947388, "learning_rate": 0.00011520725024589011, "loss": 5.2187, "step": 27340 }, { "epoch": 0.03842931587738449, "grad_norm": 1.3627872467041016, "learning_rate": 0.00011524940283827455, "loss": 5.0741, "step": 27350 }, { "epoch": 0.038443366815548066, "grad_norm": 1.3677321672439575, "learning_rate": 0.00011529155543065898, "loss": 5.1668, "step": 27360 }, { "epoch": 0.03845741775371164, "grad_norm": 1.2773394584655762, "learning_rate": 0.00011533370802304341, "loss": 5.1311, "step": 27370 }, { "epoch": 0.03847146869187522, "grad_norm": 1.2625548839569092, "learning_rate": 0.00011537586061542784, "loss": 5.2052, "step": 27380 }, { "epoch": 0.038485519630038795, "grad_norm": 1.227333426475525, "learning_rate": 0.00011541801320781227, "loss": 5.3206, "step": 27390 }, { "epoch": 0.03849957056820238, "grad_norm": 1.214938759803772, "learning_rate": 0.0001154601658001967, "loss": 5.0788, "step": 27400 }, { "epoch": 0.038513621506365954, "grad_norm": 1.2600798606872559, "learning_rate": 0.00011550231839258113, "loss": 5.2539, "step": 27410 }, { "epoch": 0.03852767244452953, "grad_norm": 1.2314510345458984, "learning_rate": 0.00011554447098496556, "loss": 5.2616, "step": 27420 }, { "epoch": 0.038541723382693106, "grad_norm": 1.2492339611053467, "learning_rate": 0.00011558662357735, "loss": 5.1835, "step": 27430 }, { "epoch": 0.03855577432085668, "grad_norm": 1.3038430213928223, "learning_rate": 0.00011562877616973444, "loss": 5.1959, "step": 27440 }, { "epoch": 0.038569825259020266, "grad_norm": 1.2819589376449585, "learning_rate": 0.00011567092876211886, "loss": 5.2702, "step": 27450 }, { "epoch": 0.03858387619718384, "grad_norm": 1.2621688842773438, "learning_rate": 0.00011571308135450329, "loss": 5.2331, "step": 27460 }, { "epoch": 0.03859792713534742, "grad_norm": 1.2393717765808105, "learning_rate": 0.00011575523394688773, "loss": 5.2205, "step": 27470 }, { "epoch": 0.038611978073510994, "grad_norm": 1.407251238822937, "learning_rate": 0.00011579738653927216, "loss": 5.0831, "step": 27480 }, { "epoch": 0.03862602901167457, "grad_norm": 1.196433663368225, "learning_rate": 0.00011583953913165658, "loss": 5.2502, "step": 27490 }, { "epoch": 0.03864007994983815, "grad_norm": 1.2640856504440308, "learning_rate": 0.00011588169172404103, "loss": 5.1203, "step": 27500 }, { "epoch": 0.03865413088800173, "grad_norm": 1.2602075338363647, "learning_rate": 0.00011592384431642546, "loss": 5.1076, "step": 27510 }, { "epoch": 0.038668181826165306, "grad_norm": 1.226286768913269, "learning_rate": 0.00011596599690880988, "loss": 5.1437, "step": 27520 }, { "epoch": 0.03868223276432888, "grad_norm": 1.2031055688858032, "learning_rate": 0.00011600814950119432, "loss": 5.1661, "step": 27530 }, { "epoch": 0.03869628370249246, "grad_norm": 1.2152131795883179, "learning_rate": 0.00011605030209357875, "loss": 5.143, "step": 27540 }, { "epoch": 0.03871033464065604, "grad_norm": 1.2977949380874634, "learning_rate": 0.00011609245468596317, "loss": 5.2466, "step": 27550 }, { "epoch": 0.03872438557881962, "grad_norm": 1.2086836099624634, "learning_rate": 0.00011613460727834761, "loss": 5.2369, "step": 27560 }, { "epoch": 0.03873843651698319, "grad_norm": 1.1972341537475586, "learning_rate": 0.00011617675987073204, "loss": 5.2176, "step": 27570 }, { "epoch": 0.03875248745514677, "grad_norm": 1.3312931060791016, "learning_rate": 0.00011621891246311646, "loss": 4.9767, "step": 27580 }, { "epoch": 0.038766538393310346, "grad_norm": 1.2363582849502563, "learning_rate": 0.00011626106505550091, "loss": 5.1467, "step": 27590 }, { "epoch": 0.03878058933147393, "grad_norm": 1.2445141077041626, "learning_rate": 0.00011630321764788534, "loss": 4.9751, "step": 27600 }, { "epoch": 0.038794640269637505, "grad_norm": 1.3403300046920776, "learning_rate": 0.00011634537024026976, "loss": 5.1634, "step": 27610 }, { "epoch": 0.03880869120780108, "grad_norm": 1.2853689193725586, "learning_rate": 0.0001163875228326542, "loss": 5.2343, "step": 27620 }, { "epoch": 0.03882274214596466, "grad_norm": 1.2790160179138184, "learning_rate": 0.00011642967542503863, "loss": 5.3136, "step": 27630 }, { "epoch": 0.038836793084128234, "grad_norm": 1.3508522510528564, "learning_rate": 0.00011647182801742306, "loss": 5.0613, "step": 27640 }, { "epoch": 0.03885084402229182, "grad_norm": 1.274295687675476, "learning_rate": 0.0001165139806098075, "loss": 5.1176, "step": 27650 }, { "epoch": 0.03886489496045539, "grad_norm": 1.3735030889511108, "learning_rate": 0.00011655613320219193, "loss": 5.2095, "step": 27660 }, { "epoch": 0.03887894589861897, "grad_norm": 1.2918239831924438, "learning_rate": 0.00011659828579457636, "loss": 5.2064, "step": 27670 }, { "epoch": 0.038892996836782545, "grad_norm": 1.2781789302825928, "learning_rate": 0.00011664043838696079, "loss": 5.0818, "step": 27680 }, { "epoch": 0.03890704777494612, "grad_norm": 1.2447351217269897, "learning_rate": 0.00011668259097934522, "loss": 5.085, "step": 27690 }, { "epoch": 0.0389210987131097, "grad_norm": 1.2342867851257324, "learning_rate": 0.00011672474357172965, "loss": 5.1657, "step": 27700 }, { "epoch": 0.03893514965127328, "grad_norm": 1.2419430017471313, "learning_rate": 0.00011676689616411408, "loss": 5.1151, "step": 27710 }, { "epoch": 0.03894920058943686, "grad_norm": 1.261952519416809, "learning_rate": 0.00011680904875649851, "loss": 4.978, "step": 27720 }, { "epoch": 0.03896325152760043, "grad_norm": 1.2440228462219238, "learning_rate": 0.00011685120134888294, "loss": 5.079, "step": 27730 }, { "epoch": 0.03897730246576401, "grad_norm": 1.2461313009262085, "learning_rate": 0.00011689335394126737, "loss": 5.074, "step": 27740 }, { "epoch": 0.038991353403927585, "grad_norm": 1.3326444625854492, "learning_rate": 0.0001169355065336518, "loss": 5.1748, "step": 27750 }, { "epoch": 0.03900540434209117, "grad_norm": 1.1846364736557007, "learning_rate": 0.00011697765912603625, "loss": 5.1285, "step": 27760 }, { "epoch": 0.039019455280254745, "grad_norm": 1.3407005071640015, "learning_rate": 0.00011701981171842068, "loss": 5.1625, "step": 27770 }, { "epoch": 0.03903350621841832, "grad_norm": 1.1774260997772217, "learning_rate": 0.0001170619643108051, "loss": 5.1255, "step": 27780 }, { "epoch": 0.0390475571565819, "grad_norm": 1.2277191877365112, "learning_rate": 0.00011710411690318954, "loss": 5.0722, "step": 27790 }, { "epoch": 0.03906160809474547, "grad_norm": 1.2346806526184082, "learning_rate": 0.00011714626949557398, "loss": 5.3759, "step": 27800 }, { "epoch": 0.039075659032909056, "grad_norm": 1.2467949390411377, "learning_rate": 0.00011718842208795839, "loss": 5.0743, "step": 27810 }, { "epoch": 0.03908970997107263, "grad_norm": 1.2584148645401, "learning_rate": 0.00011723057468034284, "loss": 5.2156, "step": 27820 }, { "epoch": 0.03910376090923621, "grad_norm": 1.2830735445022583, "learning_rate": 0.00011727272727272727, "loss": 5.1202, "step": 27830 }, { "epoch": 0.039117811847399785, "grad_norm": 1.2006511688232422, "learning_rate": 0.00011731487986511169, "loss": 5.1867, "step": 27840 }, { "epoch": 0.03913186278556336, "grad_norm": 1.2319871187210083, "learning_rate": 0.00011735703245749613, "loss": 5.1403, "step": 27850 }, { "epoch": 0.039145913723726944, "grad_norm": 1.2313419580459595, "learning_rate": 0.00011739918504988056, "loss": 5.1664, "step": 27860 }, { "epoch": 0.03915996466189052, "grad_norm": 1.2943781614303589, "learning_rate": 0.00011744133764226498, "loss": 5.0664, "step": 27870 }, { "epoch": 0.039174015600054096, "grad_norm": 1.2293378114700317, "learning_rate": 0.00011748349023464942, "loss": 5.0583, "step": 27880 }, { "epoch": 0.03918806653821767, "grad_norm": 1.1826175451278687, "learning_rate": 0.00011752564282703386, "loss": 5.1246, "step": 27890 }, { "epoch": 0.03920211747638125, "grad_norm": 1.1859021186828613, "learning_rate": 0.00011756779541941827, "loss": 5.166, "step": 27900 }, { "epoch": 0.03921616841454483, "grad_norm": 1.2054551839828491, "learning_rate": 0.00011760994801180272, "loss": 5.0331, "step": 27910 }, { "epoch": 0.03923021935270841, "grad_norm": 1.2477535009384155, "learning_rate": 0.0001176478853449487, "loss": 5.1473, "step": 27920 }, { "epoch": 0.039244270290871984, "grad_norm": 1.1738812923431396, "learning_rate": 0.00011769003793733315, "loss": 5.2345, "step": 27930 }, { "epoch": 0.03925832122903556, "grad_norm": 1.2477853298187256, "learning_rate": 0.00011773219052971756, "loss": 5.1245, "step": 27940 }, { "epoch": 0.039272372167199136, "grad_norm": 1.1837849617004395, "learning_rate": 0.000117774343122102, "loss": 5.2802, "step": 27950 }, { "epoch": 0.03928642310536272, "grad_norm": 1.2485941648483276, "learning_rate": 0.00011781649571448644, "loss": 5.1342, "step": 27960 }, { "epoch": 0.039300474043526296, "grad_norm": 1.2612384557724, "learning_rate": 0.00011785864830687086, "loss": 5.2221, "step": 27970 }, { "epoch": 0.03931452498168987, "grad_norm": 1.2098948955535889, "learning_rate": 0.00011790080089925529, "loss": 5.0834, "step": 27980 }, { "epoch": 0.03932857591985345, "grad_norm": 1.2792184352874756, "learning_rate": 0.00011794295349163973, "loss": 5.1232, "step": 27990 }, { "epoch": 0.039342626858017024, "grad_norm": 1.1822669506072998, "learning_rate": 0.00011798510608402415, "loss": 5.1502, "step": 28000 }, { "epoch": 0.03935667779618061, "grad_norm": 1.237909197807312, "learning_rate": 0.00011802725867640858, "loss": 5.0886, "step": 28010 }, { "epoch": 0.03937072873434418, "grad_norm": 1.4007855653762817, "learning_rate": 0.00011806941126879303, "loss": 5.1382, "step": 28020 }, { "epoch": 0.03938477967250776, "grad_norm": 1.2554928064346313, "learning_rate": 0.00011811156386117744, "loss": 5.1784, "step": 28030 }, { "epoch": 0.039398830610671336, "grad_norm": 1.2145435810089111, "learning_rate": 0.00011815371645356187, "loss": 5.2392, "step": 28040 }, { "epoch": 0.03941288154883491, "grad_norm": 1.194926381111145, "learning_rate": 0.00011819586904594632, "loss": 5.1088, "step": 28050 }, { "epoch": 0.03942693248699849, "grad_norm": 1.2417168617248535, "learning_rate": 0.00011823802163833075, "loss": 5.2147, "step": 28060 }, { "epoch": 0.03944098342516207, "grad_norm": 1.171698808670044, "learning_rate": 0.00011828017423071517, "loss": 5.1953, "step": 28070 }, { "epoch": 0.03945503436332565, "grad_norm": 1.2243210077285767, "learning_rate": 0.00011832232682309961, "loss": 5.243, "step": 28080 }, { "epoch": 0.03946908530148922, "grad_norm": 1.2159587144851685, "learning_rate": 0.00011836447941548404, "loss": 5.0568, "step": 28090 }, { "epoch": 0.0394831362396528, "grad_norm": 1.2428697347640991, "learning_rate": 0.00011840663200786846, "loss": 5.0406, "step": 28100 }, { "epoch": 0.039497187177816376, "grad_norm": 1.2648569345474243, "learning_rate": 0.0001184487846002529, "loss": 5.222, "step": 28110 }, { "epoch": 0.03951123811597996, "grad_norm": 1.2005157470703125, "learning_rate": 0.00011849093719263734, "loss": 5.1258, "step": 28120 }, { "epoch": 0.039525289054143535, "grad_norm": 1.2685343027114868, "learning_rate": 0.00011853308978502177, "loss": 5.064, "step": 28130 }, { "epoch": 0.03953933999230711, "grad_norm": 1.2965656518936157, "learning_rate": 0.0001185752423774062, "loss": 5.0134, "step": 28140 }, { "epoch": 0.03955339093047069, "grad_norm": 1.264532208442688, "learning_rate": 0.00011861739496979063, "loss": 5.2289, "step": 28150 }, { "epoch": 0.039567441868634264, "grad_norm": 1.3054733276367188, "learning_rate": 0.00011865954756217506, "loss": 5.1761, "step": 28160 }, { "epoch": 0.03958149280679785, "grad_norm": 1.1878447532653809, "learning_rate": 0.0001187017001545595, "loss": 5.1396, "step": 28170 }, { "epoch": 0.03959554374496142, "grad_norm": 1.3018479347229004, "learning_rate": 0.00011874385274694392, "loss": 5.1258, "step": 28180 }, { "epoch": 0.039609594683125, "grad_norm": 1.2443947792053223, "learning_rate": 0.00011878600533932837, "loss": 5.1624, "step": 28190 }, { "epoch": 0.039623645621288575, "grad_norm": 1.3728779554367065, "learning_rate": 0.00011882815793171279, "loss": 5.2995, "step": 28200 }, { "epoch": 0.03963769655945215, "grad_norm": 1.1758084297180176, "learning_rate": 0.00011887031052409722, "loss": 5.1263, "step": 28210 }, { "epoch": 0.039651747497615734, "grad_norm": 1.2095502614974976, "learning_rate": 0.00011891246311648166, "loss": 5.0737, "step": 28220 }, { "epoch": 0.03966579843577931, "grad_norm": 1.2369705438613892, "learning_rate": 0.00011895461570886608, "loss": 5.2049, "step": 28230 }, { "epoch": 0.03967984937394289, "grad_norm": 1.2536842823028564, "learning_rate": 0.00011899676830125051, "loss": 5.1596, "step": 28240 }, { "epoch": 0.03969390031210646, "grad_norm": 1.2963687181472778, "learning_rate": 0.00011903892089363496, "loss": 5.0851, "step": 28250 }, { "epoch": 0.03970795125027004, "grad_norm": 1.3036259412765503, "learning_rate": 0.00011908107348601937, "loss": 5.2885, "step": 28260 }, { "epoch": 0.03972200218843362, "grad_norm": 1.2262283563613892, "learning_rate": 0.0001191232260784038, "loss": 5.2086, "step": 28270 }, { "epoch": 0.0397360531265972, "grad_norm": 1.234108805656433, "learning_rate": 0.00011916537867078825, "loss": 5.185, "step": 28280 }, { "epoch": 0.039750104064760775, "grad_norm": 1.2334399223327637, "learning_rate": 0.00011920753126317267, "loss": 5.1078, "step": 28290 }, { "epoch": 0.03976415500292435, "grad_norm": 1.2662261724472046, "learning_rate": 0.0001192496838555571, "loss": 5.1132, "step": 28300 }, { "epoch": 0.03977820594108793, "grad_norm": 1.2096099853515625, "learning_rate": 0.00011929183644794154, "loss": 5.2598, "step": 28310 }, { "epoch": 0.03979225687925151, "grad_norm": 1.2106359004974365, "learning_rate": 0.00011933398904032596, "loss": 4.9739, "step": 28320 }, { "epoch": 0.039806307817415086, "grad_norm": 1.268173336982727, "learning_rate": 0.00011937614163271039, "loss": 5.3448, "step": 28330 }, { "epoch": 0.03982035875557866, "grad_norm": 1.2181192636489868, "learning_rate": 0.00011941829422509484, "loss": 5.0814, "step": 28340 }, { "epoch": 0.03983440969374224, "grad_norm": 1.2973514795303345, "learning_rate": 0.00011946044681747927, "loss": 4.9829, "step": 28350 }, { "epoch": 0.039848460631905815, "grad_norm": 1.1853080987930298, "learning_rate": 0.00011950259940986369, "loss": 5.1086, "step": 28360 }, { "epoch": 0.03986251157006939, "grad_norm": 1.2887145280838013, "learning_rate": 0.00011954475200224813, "loss": 5.2579, "step": 28370 }, { "epoch": 0.039876562508232974, "grad_norm": 1.3244291543960571, "learning_rate": 0.00011958690459463256, "loss": 5.0825, "step": 28380 }, { "epoch": 0.03989061344639655, "grad_norm": 1.2618260383605957, "learning_rate": 0.00011962905718701698, "loss": 5.0181, "step": 28390 }, { "epoch": 0.039904664384560126, "grad_norm": 1.179956078529358, "learning_rate": 0.00011967120977940142, "loss": 5.2237, "step": 28400 }, { "epoch": 0.0399187153227237, "grad_norm": 1.2036820650100708, "learning_rate": 0.00011971336237178585, "loss": 5.0647, "step": 28410 }, { "epoch": 0.03993276626088728, "grad_norm": 1.2143844366073608, "learning_rate": 0.00011975551496417029, "loss": 5.0621, "step": 28420 }, { "epoch": 0.03994681719905086, "grad_norm": 1.1590876579284668, "learning_rate": 0.00011979766755655472, "loss": 5.0342, "step": 28430 }, { "epoch": 0.03996086813721444, "grad_norm": 1.2688127756118774, "learning_rate": 0.00011983982014893915, "loss": 5.2687, "step": 28440 }, { "epoch": 0.039974919075378014, "grad_norm": 1.236639142036438, "learning_rate": 0.00011988197274132358, "loss": 5.0454, "step": 28450 }, { "epoch": 0.03998897001354159, "grad_norm": 1.2120410203933716, "learning_rate": 0.00011992412533370801, "loss": 5.1101, "step": 28460 }, { "epoch": 0.040003020951705166, "grad_norm": 1.2371752262115479, "learning_rate": 0.00011996627792609244, "loss": 5.1434, "step": 28470 }, { "epoch": 0.04001707188986875, "grad_norm": 1.2628980875015259, "learning_rate": 0.00012000843051847689, "loss": 5.058, "step": 28480 }, { "epoch": 0.040031122828032326, "grad_norm": 1.2172374725341797, "learning_rate": 0.0001200505831108613, "loss": 5.1972, "step": 28490 }, { "epoch": 0.0400451737661959, "grad_norm": 1.1844308376312256, "learning_rate": 0.00012009273570324574, "loss": 5.1624, "step": 28500 }, { "epoch": 0.04005922470435948, "grad_norm": 1.2529436349868774, "learning_rate": 0.00012013488829563018, "loss": 5.0565, "step": 28510 }, { "epoch": 0.040073275642523054, "grad_norm": 1.2237712144851685, "learning_rate": 0.0001201770408880146, "loss": 5.1909, "step": 28520 }, { "epoch": 0.04008732658068664, "grad_norm": 1.2147319316864014, "learning_rate": 0.00012021919348039903, "loss": 5.0974, "step": 28530 }, { "epoch": 0.04010137751885021, "grad_norm": 1.194334626197815, "learning_rate": 0.00012026134607278347, "loss": 5.2433, "step": 28540 }, { "epoch": 0.04011542845701379, "grad_norm": 1.2386977672576904, "learning_rate": 0.00012030349866516789, "loss": 5.2399, "step": 28550 }, { "epoch": 0.040129479395177366, "grad_norm": 1.2008346319198608, "learning_rate": 0.00012034565125755232, "loss": 5.0889, "step": 28560 }, { "epoch": 0.04014353033334094, "grad_norm": 1.1994179487228394, "learning_rate": 0.00012038780384993677, "loss": 5.1955, "step": 28570 }, { "epoch": 0.040157581271504525, "grad_norm": 1.251625895500183, "learning_rate": 0.00012042995644232118, "loss": 5.1563, "step": 28580 }, { "epoch": 0.0401716322096681, "grad_norm": 1.3374724388122559, "learning_rate": 0.00012047210903470562, "loss": 5.2259, "step": 28590 }, { "epoch": 0.04018568314783168, "grad_norm": 1.184990644454956, "learning_rate": 0.00012051426162709006, "loss": 5.1774, "step": 28600 }, { "epoch": 0.04019973408599525, "grad_norm": 1.2657346725463867, "learning_rate": 0.00012055641421947449, "loss": 5.0466, "step": 28610 }, { "epoch": 0.04021378502415883, "grad_norm": 1.301020860671997, "learning_rate": 0.00012059435155262047, "loss": 5.1848, "step": 28620 }, { "epoch": 0.04022783596232241, "grad_norm": 1.2240586280822754, "learning_rate": 0.0001206365041450049, "loss": 5.1032, "step": 28630 }, { "epoch": 0.04024188690048599, "grad_norm": 1.2186791896820068, "learning_rate": 0.00012067865673738935, "loss": 5.1814, "step": 28640 }, { "epoch": 0.040255937838649565, "grad_norm": 1.2839056253433228, "learning_rate": 0.00012072080932977377, "loss": 5.2019, "step": 28650 }, { "epoch": 0.04026998877681314, "grad_norm": 1.2120087146759033, "learning_rate": 0.0001207629619221582, "loss": 5.1894, "step": 28660 }, { "epoch": 0.04028403971497672, "grad_norm": 1.3091609477996826, "learning_rate": 0.00012080511451454264, "loss": 5.2507, "step": 28670 }, { "epoch": 0.040298090653140294, "grad_norm": 1.285378098487854, "learning_rate": 0.00012084726710692706, "loss": 5.1372, "step": 28680 }, { "epoch": 0.04031214159130388, "grad_norm": 1.1991231441497803, "learning_rate": 0.00012088941969931149, "loss": 5.1167, "step": 28690 }, { "epoch": 0.04032619252946745, "grad_norm": 1.2777354717254639, "learning_rate": 0.00012093157229169594, "loss": 5.0535, "step": 28700 }, { "epoch": 0.04034024346763103, "grad_norm": 1.2229773998260498, "learning_rate": 0.00012097372488408035, "loss": 5.1478, "step": 28710 }, { "epoch": 0.040354294405794605, "grad_norm": 1.2146847248077393, "learning_rate": 0.00012101587747646479, "loss": 5.1944, "step": 28720 }, { "epoch": 0.04036834534395818, "grad_norm": 1.1953580379486084, "learning_rate": 0.00012105803006884923, "loss": 5.1611, "step": 28730 }, { "epoch": 0.040382396282121764, "grad_norm": 1.2256001234054565, "learning_rate": 0.00012110018266123366, "loss": 5.316, "step": 28740 }, { "epoch": 0.04039644722028534, "grad_norm": 1.2031221389770508, "learning_rate": 0.00012114233525361808, "loss": 5.0734, "step": 28750 }, { "epoch": 0.04041049815844892, "grad_norm": 1.2694804668426514, "learning_rate": 0.00012118448784600252, "loss": 5.1379, "step": 28760 }, { "epoch": 0.04042454909661249, "grad_norm": 1.2258470058441162, "learning_rate": 0.00012122664043838696, "loss": 5.0536, "step": 28770 }, { "epoch": 0.04043860003477607, "grad_norm": 1.2044484615325928, "learning_rate": 0.00012126879303077137, "loss": 5.2435, "step": 28780 }, { "epoch": 0.04045265097293965, "grad_norm": 1.27933931350708, "learning_rate": 0.00012131094562315582, "loss": 5.1742, "step": 28790 }, { "epoch": 0.04046670191110323, "grad_norm": 1.2397048473358154, "learning_rate": 0.00012135309821554025, "loss": 5.1537, "step": 28800 }, { "epoch": 0.040480752849266805, "grad_norm": 1.3826310634613037, "learning_rate": 0.00012139525080792467, "loss": 5.1767, "step": 28810 }, { "epoch": 0.04049480378743038, "grad_norm": 1.2300126552581787, "learning_rate": 0.00012143740340030911, "loss": 5.0833, "step": 28820 }, { "epoch": 0.04050885472559396, "grad_norm": 1.2584890127182007, "learning_rate": 0.00012147955599269354, "loss": 5.1247, "step": 28830 }, { "epoch": 0.04052290566375754, "grad_norm": 1.433069109916687, "learning_rate": 0.00012152170858507796, "loss": 5.133, "step": 28840 }, { "epoch": 0.040536956601921116, "grad_norm": 1.2440624237060547, "learning_rate": 0.0001215638611774624, "loss": 5.0757, "step": 28850 }, { "epoch": 0.04055100754008469, "grad_norm": 1.216599702835083, "learning_rate": 0.00012160601376984684, "loss": 4.9827, "step": 28860 }, { "epoch": 0.04056505847824827, "grad_norm": 1.3541535139083862, "learning_rate": 0.00012164816636223125, "loss": 5.112, "step": 28870 }, { "epoch": 0.040579109416411845, "grad_norm": 1.1753549575805664, "learning_rate": 0.0001216903189546157, "loss": 5.1629, "step": 28880 }, { "epoch": 0.04059316035457543, "grad_norm": 1.2143678665161133, "learning_rate": 0.00012173247154700013, "loss": 5.1621, "step": 28890 }, { "epoch": 0.040607211292739004, "grad_norm": 1.2164275646209717, "learning_rate": 0.00012177462413938457, "loss": 5.1983, "step": 28900 }, { "epoch": 0.04062126223090258, "grad_norm": 1.676349401473999, "learning_rate": 0.00012181677673176899, "loss": 5.1482, "step": 28910 }, { "epoch": 0.040635313169066156, "grad_norm": 1.188805341720581, "learning_rate": 0.00012185892932415342, "loss": 5.1028, "step": 28920 }, { "epoch": 0.04064936410722973, "grad_norm": 1.304349660873413, "learning_rate": 0.00012190108191653787, "loss": 5.1201, "step": 28930 }, { "epoch": 0.040663415045393315, "grad_norm": 1.256514549255371, "learning_rate": 0.00012194323450892229, "loss": 5.1092, "step": 28940 }, { "epoch": 0.04067746598355689, "grad_norm": 1.1946409940719604, "learning_rate": 0.00012198538710130672, "loss": 5.2572, "step": 28950 }, { "epoch": 0.04069151692172047, "grad_norm": 1.2821025848388672, "learning_rate": 0.00012202753969369116, "loss": 5.16, "step": 28960 }, { "epoch": 0.040705567859884044, "grad_norm": 1.2164089679718018, "learning_rate": 0.00012206969228607558, "loss": 5.1757, "step": 28970 }, { "epoch": 0.04071961879804762, "grad_norm": 1.3499404191970825, "learning_rate": 0.00012211184487846002, "loss": 5.1214, "step": 28980 }, { "epoch": 0.040733669736211196, "grad_norm": 1.1832070350646973, "learning_rate": 0.00012215399747084445, "loss": 5.1601, "step": 28990 }, { "epoch": 0.04074772067437478, "grad_norm": 1.2199169397354126, "learning_rate": 0.00012219615006322886, "loss": 5.046, "step": 29000 }, { "epoch": 0.040761771612538356, "grad_norm": 1.2331231832504272, "learning_rate": 0.00012223830265561332, "loss": 5.0809, "step": 29010 }, { "epoch": 0.04077582255070193, "grad_norm": 1.2529569864273071, "learning_rate": 0.00012228045524799775, "loss": 5.0944, "step": 29020 }, { "epoch": 0.04078987348886551, "grad_norm": 1.2748099565505981, "learning_rate": 0.00012232260784038218, "loss": 4.9984, "step": 29030 }, { "epoch": 0.040803924427029084, "grad_norm": 1.151564121246338, "learning_rate": 0.0001223647604327666, "loss": 5.0823, "step": 29040 }, { "epoch": 0.04081797536519267, "grad_norm": 1.1809443235397339, "learning_rate": 0.00012240691302515104, "loss": 5.1298, "step": 29050 }, { "epoch": 0.04083202630335624, "grad_norm": 1.2853679656982422, "learning_rate": 0.00012244906561753547, "loss": 5.1489, "step": 29060 }, { "epoch": 0.04084607724151982, "grad_norm": 1.2451461553573608, "learning_rate": 0.0001224912182099199, "loss": 5.2294, "step": 29070 }, { "epoch": 0.040860128179683396, "grad_norm": 1.1953366994857788, "learning_rate": 0.00012253337080230434, "loss": 5.1022, "step": 29080 }, { "epoch": 0.04087417911784697, "grad_norm": 1.1801906824111938, "learning_rate": 0.00012257552339468877, "loss": 5.0872, "step": 29090 }, { "epoch": 0.040888230056010555, "grad_norm": 1.2454745769500732, "learning_rate": 0.0001226176759870732, "loss": 5.1487, "step": 29100 }, { "epoch": 0.04090228099417413, "grad_norm": 1.5468188524246216, "learning_rate": 0.00012265982857945763, "loss": 5.094, "step": 29110 }, { "epoch": 0.04091633193233771, "grad_norm": 1.2361538410186768, "learning_rate": 0.00012270198117184206, "loss": 5.1832, "step": 29120 }, { "epoch": 0.04093038287050128, "grad_norm": 1.2000207901000977, "learning_rate": 0.0001227441337642265, "loss": 5.1678, "step": 29130 }, { "epoch": 0.04094443380866486, "grad_norm": 1.1986608505249023, "learning_rate": 0.00012278628635661092, "loss": 5.1264, "step": 29140 }, { "epoch": 0.04095848474682844, "grad_norm": 1.2702739238739014, "learning_rate": 0.00012282843894899535, "loss": 5.1283, "step": 29150 }, { "epoch": 0.04097253568499202, "grad_norm": 1.256484031677246, "learning_rate": 0.00012287059154137978, "loss": 5.1021, "step": 29160 }, { "epoch": 0.040986586623155595, "grad_norm": 1.2504194974899292, "learning_rate": 0.00012291274413376422, "loss": 5.1583, "step": 29170 }, { "epoch": 0.04100063756131917, "grad_norm": 1.193787693977356, "learning_rate": 0.00012295489672614865, "loss": 5.1885, "step": 29180 }, { "epoch": 0.04101468849948275, "grad_norm": 1.2166396379470825, "learning_rate": 0.00012299704931853308, "loss": 4.9795, "step": 29190 }, { "epoch": 0.04102873943764633, "grad_norm": 1.3542882204055786, "learning_rate": 0.0001230392019109175, "loss": 5.0044, "step": 29200 }, { "epoch": 0.04104279037580991, "grad_norm": 1.3186988830566406, "learning_rate": 0.00012308135450330194, "loss": 5.0079, "step": 29210 }, { "epoch": 0.04105684131397348, "grad_norm": 1.174622893333435, "learning_rate": 0.00012312350709568637, "loss": 5.1067, "step": 29220 }, { "epoch": 0.04107089225213706, "grad_norm": 1.1636348962783813, "learning_rate": 0.0001231656596880708, "loss": 5.1078, "step": 29230 }, { "epoch": 0.041084943190300635, "grad_norm": 1.1850835084915161, "learning_rate": 0.00012320781228045523, "loss": 5.1331, "step": 29240 }, { "epoch": 0.04109899412846422, "grad_norm": 1.2667229175567627, "learning_rate": 0.00012324996487283966, "loss": 5.0366, "step": 29250 }, { "epoch": 0.041113045066627794, "grad_norm": 1.217487096786499, "learning_rate": 0.0001232921174652241, "loss": 5.1203, "step": 29260 }, { "epoch": 0.04112709600479137, "grad_norm": 1.2223021984100342, "learning_rate": 0.00012333427005760853, "loss": 5.0865, "step": 29270 }, { "epoch": 0.04114114694295495, "grad_norm": 1.1677926778793335, "learning_rate": 0.00012337642264999296, "loss": 5.1227, "step": 29280 }, { "epoch": 0.04115519788111852, "grad_norm": 1.197870135307312, "learning_rate": 0.0001234185752423774, "loss": 5.1118, "step": 29290 }, { "epoch": 0.0411692488192821, "grad_norm": 1.2114304304122925, "learning_rate": 0.00012346072783476182, "loss": 5.1161, "step": 29300 }, { "epoch": 0.04118329975744568, "grad_norm": 1.266849160194397, "learning_rate": 0.00012350288042714625, "loss": 5.0399, "step": 29310 }, { "epoch": 0.04119735069560926, "grad_norm": 1.3081083297729492, "learning_rate": 0.0001235450330195307, "loss": 5.1199, "step": 29320 }, { "epoch": 0.041211401633772834, "grad_norm": 1.2739002704620361, "learning_rate": 0.00012358718561191511, "loss": 5.0112, "step": 29330 }, { "epoch": 0.04122545257193641, "grad_norm": 1.150550127029419, "learning_rate": 0.00012362933820429955, "loss": 5.0035, "step": 29340 }, { "epoch": 0.04123950351009999, "grad_norm": 1.1802303791046143, "learning_rate": 0.000123671490796684, "loss": 5.0593, "step": 29350 }, { "epoch": 0.04125355444826357, "grad_norm": 1.209800124168396, "learning_rate": 0.0001237136433890684, "loss": 5.0386, "step": 29360 }, { "epoch": 0.041267605386427146, "grad_norm": 1.1998001337051392, "learning_rate": 0.00012375579598145284, "loss": 5.1109, "step": 29370 }, { "epoch": 0.04128165632459072, "grad_norm": 1.1864341497421265, "learning_rate": 0.0001237979485738373, "loss": 5.1374, "step": 29380 }, { "epoch": 0.0412957072627543, "grad_norm": 1.1645110845565796, "learning_rate": 0.0001238401011662217, "loss": 5.0924, "step": 29390 }, { "epoch": 0.041309758200917875, "grad_norm": 1.193508505821228, "learning_rate": 0.00012388225375860613, "loss": 5.1911, "step": 29400 }, { "epoch": 0.04132380913908146, "grad_norm": 1.2677333354949951, "learning_rate": 0.0001239244063509906, "loss": 5.1231, "step": 29410 }, { "epoch": 0.041337860077245034, "grad_norm": 1.1700749397277832, "learning_rate": 0.000123966558943375, "loss": 5.1528, "step": 29420 }, { "epoch": 0.04135191101540861, "grad_norm": 1.1878379583358765, "learning_rate": 0.00012400871153575943, "loss": 5.2438, "step": 29430 }, { "epoch": 0.041365961953572186, "grad_norm": 1.2088021039962769, "learning_rate": 0.00012405086412814388, "loss": 5.0927, "step": 29440 }, { "epoch": 0.04138001289173576, "grad_norm": 1.279140830039978, "learning_rate": 0.00012409301672052832, "loss": 5.0007, "step": 29450 }, { "epoch": 0.041394063829899345, "grad_norm": 1.22353196144104, "learning_rate": 0.00012413516931291272, "loss": 5.1005, "step": 29460 }, { "epoch": 0.04140811476806292, "grad_norm": 1.2469463348388672, "learning_rate": 0.00012417732190529718, "loss": 5.0887, "step": 29470 }, { "epoch": 0.0414221657062265, "grad_norm": 1.2041430473327637, "learning_rate": 0.0001242194744976816, "loss": 5.1795, "step": 29480 }, { "epoch": 0.041436216644390074, "grad_norm": 1.2046658992767334, "learning_rate": 0.000124261627090066, "loss": 5.1795, "step": 29490 }, { "epoch": 0.04145026758255365, "grad_norm": 1.1940624713897705, "learning_rate": 0.00012430377968245047, "loss": 5.1144, "step": 29500 }, { "epoch": 0.04146431852071723, "grad_norm": 1.2514973878860474, "learning_rate": 0.0001243459322748349, "loss": 5.0636, "step": 29510 }, { "epoch": 0.04147836945888081, "grad_norm": 1.166250228881836, "learning_rate": 0.0001243880848672193, "loss": 5.1468, "step": 29520 }, { "epoch": 0.041492420397044386, "grad_norm": 1.201289176940918, "learning_rate": 0.00012443023745960376, "loss": 5.0477, "step": 29530 }, { "epoch": 0.04150647133520796, "grad_norm": 1.2102603912353516, "learning_rate": 0.0001244723900519882, "loss": 4.9634, "step": 29540 }, { "epoch": 0.04152052227337154, "grad_norm": 1.303805947303772, "learning_rate": 0.0001245145426443726, "loss": 5.0351, "step": 29550 }, { "epoch": 0.04153457321153512, "grad_norm": 1.160770058631897, "learning_rate": 0.00012455669523675706, "loss": 5.224, "step": 29560 }, { "epoch": 0.0415486241496987, "grad_norm": 1.1994746923446655, "learning_rate": 0.0001245988478291415, "loss": 5.1209, "step": 29570 }, { "epoch": 0.04156267508786227, "grad_norm": 1.2357219457626343, "learning_rate": 0.0001246410004215259, "loss": 5.0569, "step": 29580 }, { "epoch": 0.04157672602602585, "grad_norm": 1.1528397798538208, "learning_rate": 0.00012468315301391035, "loss": 5.1259, "step": 29590 }, { "epoch": 0.041590776964189426, "grad_norm": 1.1492503881454468, "learning_rate": 0.00012472530560629478, "loss": 5.2507, "step": 29600 }, { "epoch": 0.041604827902353, "grad_norm": 1.1577057838439941, "learning_rate": 0.00012476745819867921, "loss": 5.2534, "step": 29610 }, { "epoch": 0.041618878840516585, "grad_norm": 1.2352550029754639, "learning_rate": 0.00012480961079106365, "loss": 5.0825, "step": 29620 }, { "epoch": 0.04163292977868016, "grad_norm": 1.359626054763794, "learning_rate": 0.00012485176338344808, "loss": 5.0889, "step": 29630 }, { "epoch": 0.04164698071684374, "grad_norm": 1.2673550844192505, "learning_rate": 0.0001248939159758325, "loss": 5.0325, "step": 29640 }, { "epoch": 0.04166103165500731, "grad_norm": 1.1786859035491943, "learning_rate": 0.00012493606856821694, "loss": 5.1071, "step": 29650 }, { "epoch": 0.04167508259317089, "grad_norm": 1.209380030632019, "learning_rate": 0.00012497822116060137, "loss": 5.172, "step": 29660 }, { "epoch": 0.04168913353133447, "grad_norm": 1.3903775215148926, "learning_rate": 0.0001250203737529858, "loss": 5.128, "step": 29670 }, { "epoch": 0.04170318446949805, "grad_norm": 1.2083300352096558, "learning_rate": 0.00012506252634537023, "loss": 5.2088, "step": 29680 }, { "epoch": 0.041717235407661625, "grad_norm": 1.3227828741073608, "learning_rate": 0.00012510467893775466, "loss": 5.0573, "step": 29690 }, { "epoch": 0.0417312863458252, "grad_norm": 1.19940185546875, "learning_rate": 0.0001251468315301391, "loss": 5.0194, "step": 29700 }, { "epoch": 0.04174533728398878, "grad_norm": 1.232577919960022, "learning_rate": 0.00012518898412252353, "loss": 5.1452, "step": 29710 }, { "epoch": 0.04175938822215236, "grad_norm": 1.2753794193267822, "learning_rate": 0.00012523113671490796, "loss": 4.9559, "step": 29720 }, { "epoch": 0.04177343916031594, "grad_norm": 1.3034454584121704, "learning_rate": 0.0001252732893072924, "loss": 5.1681, "step": 29730 }, { "epoch": 0.04178749009847951, "grad_norm": 1.1941345930099487, "learning_rate": 0.00012531544189967682, "loss": 5.0454, "step": 29740 }, { "epoch": 0.04180154103664309, "grad_norm": 1.2708008289337158, "learning_rate": 0.00012535759449206125, "loss": 5.0656, "step": 29750 }, { "epoch": 0.041815591974806665, "grad_norm": 1.1832579374313354, "learning_rate": 0.00012539974708444568, "loss": 5.0031, "step": 29760 }, { "epoch": 0.04182964291297025, "grad_norm": 1.1988276243209839, "learning_rate": 0.0001254418996768301, "loss": 5.0703, "step": 29770 }, { "epoch": 0.041843693851133824, "grad_norm": 1.2514029741287231, "learning_rate": 0.00012548405226921454, "loss": 5.1405, "step": 29780 }, { "epoch": 0.0418577447892974, "grad_norm": 1.1804388761520386, "learning_rate": 0.00012552620486159898, "loss": 5.0957, "step": 29790 }, { "epoch": 0.04187179572746098, "grad_norm": 1.2735486030578613, "learning_rate": 0.0001255683574539834, "loss": 5.0302, "step": 29800 }, { "epoch": 0.04188584666562455, "grad_norm": 1.2295114994049072, "learning_rate": 0.00012561051004636784, "loss": 5.2056, "step": 29810 }, { "epoch": 0.041899897603788136, "grad_norm": 1.1553417444229126, "learning_rate": 0.00012565266263875227, "loss": 5.0882, "step": 29820 }, { "epoch": 0.04191394854195171, "grad_norm": 1.304266333580017, "learning_rate": 0.0001256948152311367, "loss": 5.2603, "step": 29830 }, { "epoch": 0.04192799948011529, "grad_norm": 1.1928671598434448, "learning_rate": 0.00012573696782352113, "loss": 5.1606, "step": 29840 }, { "epoch": 0.041942050418278864, "grad_norm": 1.2966556549072266, "learning_rate": 0.00012577912041590556, "loss": 5.1442, "step": 29850 }, { "epoch": 0.04195610135644244, "grad_norm": 1.3299918174743652, "learning_rate": 0.00012582127300829, "loss": 5.0088, "step": 29860 }, { "epoch": 0.041970152294606024, "grad_norm": 1.2035354375839233, "learning_rate": 0.00012586342560067442, "loss": 5.0605, "step": 29870 }, { "epoch": 0.0419842032327696, "grad_norm": 1.1587860584259033, "learning_rate": 0.00012590557819305886, "loss": 5.262, "step": 29880 }, { "epoch": 0.041998254170933176, "grad_norm": 1.212730884552002, "learning_rate": 0.0001259477307854433, "loss": 5.1807, "step": 29890 }, { "epoch": 0.04201230510909675, "grad_norm": 1.2755563259124756, "learning_rate": 0.00012598988337782774, "loss": 5.1815, "step": 29900 }, { "epoch": 0.04202635604726033, "grad_norm": 1.2221256494522095, "learning_rate": 0.00012603203597021215, "loss": 5.1726, "step": 29910 }, { "epoch": 0.042040406985423905, "grad_norm": 1.246488332748413, "learning_rate": 0.00012607418856259658, "loss": 5.1286, "step": 29920 }, { "epoch": 0.04205445792358749, "grad_norm": 1.2140997648239136, "learning_rate": 0.00012611634115498104, "loss": 5.1014, "step": 29930 }, { "epoch": 0.042068508861751064, "grad_norm": 1.2002910375595093, "learning_rate": 0.00012615849374736544, "loss": 5.1418, "step": 29940 }, { "epoch": 0.04208255979991464, "grad_norm": 1.1797535419464111, "learning_rate": 0.00012620064633974987, "loss": 5.2805, "step": 29950 }, { "epoch": 0.042096610738078216, "grad_norm": 1.2059688568115234, "learning_rate": 0.00012624279893213433, "loss": 5.1886, "step": 29960 }, { "epoch": 0.04211066167624179, "grad_norm": 1.22100031375885, "learning_rate": 0.00012628495152451874, "loss": 5.1398, "step": 29970 }, { "epoch": 0.042124712614405375, "grad_norm": 1.2340439558029175, "learning_rate": 0.00012632710411690317, "loss": 5.1082, "step": 29980 }, { "epoch": 0.04213876355256895, "grad_norm": 1.218373417854309, "learning_rate": 0.00012636925670928763, "loss": 5.1635, "step": 29990 }, { "epoch": 0.04215281449073253, "grad_norm": 1.2156459093093872, "learning_rate": 0.00012641140930167203, "loss": 5.1422, "step": 30000 }, { "epoch": 0.042166865428896104, "grad_norm": 1.1739648580551147, "learning_rate": 0.00012645356189405646, "loss": 5.2125, "step": 30010 }, { "epoch": 0.04218091636705968, "grad_norm": 1.2138736248016357, "learning_rate": 0.00012649571448644092, "loss": 5.1365, "step": 30020 }, { "epoch": 0.04219496730522326, "grad_norm": 1.1886487007141113, "learning_rate": 0.00012653786707882535, "loss": 5.0892, "step": 30030 }, { "epoch": 0.04220901824338684, "grad_norm": 1.1931877136230469, "learning_rate": 0.00012658001967120975, "loss": 5.091, "step": 30040 }, { "epoch": 0.042223069181550416, "grad_norm": 1.1976298093795776, "learning_rate": 0.0001266221722635942, "loss": 5.243, "step": 30050 }, { "epoch": 0.04223712011971399, "grad_norm": 1.1765670776367188, "learning_rate": 0.00012666432485597864, "loss": 5.1197, "step": 30060 }, { "epoch": 0.04225117105787757, "grad_norm": 1.182282567024231, "learning_rate": 0.00012670647744836305, "loss": 5.0915, "step": 30070 }, { "epoch": 0.04226522199604115, "grad_norm": 1.2112866640090942, "learning_rate": 0.0001267486300407475, "loss": 5.1765, "step": 30080 }, { "epoch": 0.04227927293420473, "grad_norm": 1.2055011987686157, "learning_rate": 0.00012679078263313194, "loss": 5.0592, "step": 30090 }, { "epoch": 0.0422933238723683, "grad_norm": 1.2951544523239136, "learning_rate": 0.00012683293522551634, "loss": 5.0923, "step": 30100 }, { "epoch": 0.04230737481053188, "grad_norm": 1.1470328569412231, "learning_rate": 0.0001268750878179008, "loss": 5.103, "step": 30110 }, { "epoch": 0.042321425748695456, "grad_norm": 1.1909065246582031, "learning_rate": 0.00012691724041028523, "loss": 5.2148, "step": 30120 }, { "epoch": 0.04233547668685904, "grad_norm": 1.2222651243209839, "learning_rate": 0.00012695939300266963, "loss": 5.07, "step": 30130 }, { "epoch": 0.042349527625022615, "grad_norm": 1.1813335418701172, "learning_rate": 0.0001270015455950541, "loss": 5.0987, "step": 30140 }, { "epoch": 0.04236357856318619, "grad_norm": 1.1861828565597534, "learning_rate": 0.00012704369818743852, "loss": 5.1389, "step": 30150 }, { "epoch": 0.04237762950134977, "grad_norm": 1.2178916931152344, "learning_rate": 0.00012708585077982293, "loss": 5.1295, "step": 30160 }, { "epoch": 0.04239168043951334, "grad_norm": 1.1351721286773682, "learning_rate": 0.0001271280033722074, "loss": 5.069, "step": 30170 }, { "epoch": 0.042405731377676927, "grad_norm": 1.2155929803848267, "learning_rate": 0.00012717015596459182, "loss": 5.1474, "step": 30180 }, { "epoch": 0.0424197823158405, "grad_norm": 1.1644866466522217, "learning_rate": 0.00012721230855697625, "loss": 5.1563, "step": 30190 }, { "epoch": 0.04243383325400408, "grad_norm": 1.1552120447158813, "learning_rate": 0.00012725446114936068, "loss": 5.2732, "step": 30200 }, { "epoch": 0.042447884192167655, "grad_norm": 1.1935938596725464, "learning_rate": 0.0001272966137417451, "loss": 5.0557, "step": 30210 }, { "epoch": 0.04246193513033123, "grad_norm": 1.207800030708313, "learning_rate": 0.00012733876633412954, "loss": 5.0201, "step": 30220 }, { "epoch": 0.04247598606849481, "grad_norm": 1.2109053134918213, "learning_rate": 0.00012738091892651397, "loss": 5.0742, "step": 30230 }, { "epoch": 0.04249003700665839, "grad_norm": 1.20108962059021, "learning_rate": 0.0001274230715188984, "loss": 5.1348, "step": 30240 }, { "epoch": 0.04250408794482197, "grad_norm": 1.1453440189361572, "learning_rate": 0.00012746522411128284, "loss": 5.1966, "step": 30250 }, { "epoch": 0.04251813888298554, "grad_norm": 1.1746257543563843, "learning_rate": 0.00012750737670366727, "loss": 5.0678, "step": 30260 }, { "epoch": 0.04253218982114912, "grad_norm": 1.5763996839523315, "learning_rate": 0.0001275495292960517, "loss": 5.0624, "step": 30270 }, { "epoch": 0.042546240759312695, "grad_norm": 1.2596639394760132, "learning_rate": 0.00012759168188843613, "loss": 5.0564, "step": 30280 }, { "epoch": 0.04256029169747628, "grad_norm": 1.1996608972549438, "learning_rate": 0.00012763383448082056, "loss": 4.9513, "step": 30290 }, { "epoch": 0.042574342635639854, "grad_norm": 1.2479219436645508, "learning_rate": 0.000127675987073205, "loss": 5.0576, "step": 30300 }, { "epoch": 0.04258839357380343, "grad_norm": 1.2073161602020264, "learning_rate": 0.00012771813966558942, "loss": 5.1667, "step": 30310 }, { "epoch": 0.04260244451196701, "grad_norm": 1.2177608013153076, "learning_rate": 0.00012776029225797385, "loss": 4.9945, "step": 30320 }, { "epoch": 0.04261649545013058, "grad_norm": 1.213683843612671, "learning_rate": 0.00012780244485035829, "loss": 5.1458, "step": 30330 }, { "epoch": 0.042630546388294166, "grad_norm": 1.2280396223068237, "learning_rate": 0.00012784459744274272, "loss": 5.0936, "step": 30340 }, { "epoch": 0.04264459732645774, "grad_norm": 1.2426551580429077, "learning_rate": 0.00012788675003512715, "loss": 5.0878, "step": 30350 }, { "epoch": 0.04265864826462132, "grad_norm": 1.2090293169021606, "learning_rate": 0.00012792890262751158, "loss": 5.1355, "step": 30360 }, { "epoch": 0.042672699202784894, "grad_norm": 1.1868354082107544, "learning_rate": 0.000127971055219896, "loss": 5.0465, "step": 30370 }, { "epoch": 0.04268675014094847, "grad_norm": 1.1995588541030884, "learning_rate": 0.00012801320781228044, "loss": 5.1283, "step": 30380 }, { "epoch": 0.042700801079112054, "grad_norm": 1.3185629844665527, "learning_rate": 0.00012805536040466487, "loss": 5.1045, "step": 30390 }, { "epoch": 0.04271485201727563, "grad_norm": 1.1966207027435303, "learning_rate": 0.0001280975129970493, "loss": 4.9284, "step": 30400 }, { "epoch": 0.042728902955439206, "grad_norm": 1.1545592546463013, "learning_rate": 0.00012813966558943373, "loss": 5.1675, "step": 30410 }, { "epoch": 0.04274295389360278, "grad_norm": 1.1954106092453003, "learning_rate": 0.00012818181818181817, "loss": 5.1079, "step": 30420 }, { "epoch": 0.04275700483176636, "grad_norm": 1.1693055629730225, "learning_rate": 0.0001282239707742026, "loss": 5.2517, "step": 30430 }, { "epoch": 0.04277105576992994, "grad_norm": 1.2295222282409668, "learning_rate": 0.00012826612336658703, "loss": 5.117, "step": 30440 }, { "epoch": 0.04278510670809352, "grad_norm": 1.2061803340911865, "learning_rate": 0.00012830827595897149, "loss": 5.0885, "step": 30450 }, { "epoch": 0.042799157646257094, "grad_norm": 1.1537740230560303, "learning_rate": 0.0001283504285513559, "loss": 5.1159, "step": 30460 }, { "epoch": 0.04281320858442067, "grad_norm": 1.150728464126587, "learning_rate": 0.00012839258114374032, "loss": 5.2855, "step": 30470 }, { "epoch": 0.042827259522584246, "grad_norm": 1.178875207901001, "learning_rate": 0.00012843473373612478, "loss": 5.2431, "step": 30480 }, { "epoch": 0.04284131046074783, "grad_norm": 1.1979405879974365, "learning_rate": 0.00012847688632850918, "loss": 5.0651, "step": 30490 }, { "epoch": 0.042855361398911405, "grad_norm": 1.142595648765564, "learning_rate": 0.00012851903892089361, "loss": 5.0605, "step": 30500 }, { "epoch": 0.04286941233707498, "grad_norm": 1.142235279083252, "learning_rate": 0.00012856119151327807, "loss": 5.1623, "step": 30510 }, { "epoch": 0.04288346327523856, "grad_norm": 1.2095253467559814, "learning_rate": 0.00012860334410566248, "loss": 5.0609, "step": 30520 }, { "epoch": 0.042897514213402134, "grad_norm": 1.1336437463760376, "learning_rate": 0.0001286454966980469, "loss": 5.0318, "step": 30530 }, { "epoch": 0.04291156515156572, "grad_norm": 1.2415790557861328, "learning_rate": 0.00012868764929043137, "loss": 5.2027, "step": 30540 }, { "epoch": 0.04292561608972929, "grad_norm": 1.2634472846984863, "learning_rate": 0.00012872980188281577, "loss": 5.1282, "step": 30550 }, { "epoch": 0.04293966702789287, "grad_norm": 1.198366403579712, "learning_rate": 0.0001287719544752002, "loss": 5.1052, "step": 30560 }, { "epoch": 0.042953717966056446, "grad_norm": 1.2169760465621948, "learning_rate": 0.00012881410706758466, "loss": 5.0278, "step": 30570 }, { "epoch": 0.04296776890422002, "grad_norm": 1.2113124132156372, "learning_rate": 0.00012885625965996906, "loss": 5.0952, "step": 30580 }, { "epoch": 0.0429818198423836, "grad_norm": 1.247658371925354, "learning_rate": 0.0001288984122523535, "loss": 5.1624, "step": 30590 }, { "epoch": 0.04299587078054718, "grad_norm": 1.3335022926330566, "learning_rate": 0.00012894056484473795, "loss": 5.016, "step": 30600 }, { "epoch": 0.04300992171871076, "grad_norm": 1.1578710079193115, "learning_rate": 0.00012898271743712238, "loss": 5.2555, "step": 30610 }, { "epoch": 0.04302397265687433, "grad_norm": 1.1831419467926025, "learning_rate": 0.0001290248700295068, "loss": 5.2272, "step": 30620 }, { "epoch": 0.04303802359503791, "grad_norm": 1.3762239217758179, "learning_rate": 0.00012906702262189125, "loss": 5.1915, "step": 30630 }, { "epoch": 0.043052074533201486, "grad_norm": 1.204743504524231, "learning_rate": 0.00012910917521427568, "loss": 5.1658, "step": 30640 }, { "epoch": 0.04306612547136507, "grad_norm": 1.2079994678497314, "learning_rate": 0.00012915132780666008, "loss": 5.0166, "step": 30650 }, { "epoch": 0.043080176409528645, "grad_norm": 1.2461872100830078, "learning_rate": 0.00012919348039904454, "loss": 5.0831, "step": 30660 }, { "epoch": 0.04309422734769222, "grad_norm": 1.1971070766448975, "learning_rate": 0.00012923563299142897, "loss": 5.1336, "step": 30670 }, { "epoch": 0.0431082782858558, "grad_norm": 1.1303532123565674, "learning_rate": 0.00012927778558381338, "loss": 5.0732, "step": 30680 }, { "epoch": 0.04312232922401937, "grad_norm": 1.1667498350143433, "learning_rate": 0.00012931993817619783, "loss": 5.2159, "step": 30690 }, { "epoch": 0.043136380162182957, "grad_norm": 1.159986972808838, "learning_rate": 0.00012936209076858227, "loss": 5.1623, "step": 30700 }, { "epoch": 0.04315043110034653, "grad_norm": 1.1590168476104736, "learning_rate": 0.00012940424336096667, "loss": 5.1168, "step": 30710 }, { "epoch": 0.04316448203851011, "grad_norm": 1.156524658203125, "learning_rate": 0.00012944639595335113, "loss": 5.0177, "step": 30720 }, { "epoch": 0.043178532976673685, "grad_norm": 1.2327642440795898, "learning_rate": 0.00012948854854573556, "loss": 5.1663, "step": 30730 }, { "epoch": 0.04319258391483726, "grad_norm": 1.215132236480713, "learning_rate": 0.00012953070113812, "loss": 5.1787, "step": 30740 }, { "epoch": 0.043206634853000844, "grad_norm": 1.198944091796875, "learning_rate": 0.00012957285373050442, "loss": 5.1268, "step": 30750 }, { "epoch": 0.04322068579116442, "grad_norm": 1.1945573091506958, "learning_rate": 0.00012961500632288885, "loss": 5.1569, "step": 30760 }, { "epoch": 0.043234736729328, "grad_norm": 1.1851938962936401, "learning_rate": 0.00012965715891527328, "loss": 5.0766, "step": 30770 }, { "epoch": 0.04324878766749157, "grad_norm": 1.2323837280273438, "learning_rate": 0.00012969931150765771, "loss": 5.0651, "step": 30780 }, { "epoch": 0.04326283860565515, "grad_norm": 1.1993993520736694, "learning_rate": 0.00012974146410004215, "loss": 4.9883, "step": 30790 }, { "epoch": 0.04327688954381873, "grad_norm": 1.2613015174865723, "learning_rate": 0.00012978361669242658, "loss": 5.0884, "step": 30800 }, { "epoch": 0.04329094048198231, "grad_norm": 1.2269909381866455, "learning_rate": 0.000129825769284811, "loss": 5.1133, "step": 30810 }, { "epoch": 0.043304991420145884, "grad_norm": 1.2280735969543457, "learning_rate": 0.00012986792187719544, "loss": 4.8907, "step": 30820 }, { "epoch": 0.04331904235830946, "grad_norm": 1.1935653686523438, "learning_rate": 0.00012991007446957987, "loss": 5.042, "step": 30830 }, { "epoch": 0.04333309329647304, "grad_norm": 1.1728026866912842, "learning_rate": 0.0001299522270619643, "loss": 5.0976, "step": 30840 }, { "epoch": 0.04334714423463662, "grad_norm": 1.1702702045440674, "learning_rate": 0.00012999437965434873, "loss": 5.0627, "step": 30850 }, { "epoch": 0.043361195172800196, "grad_norm": 1.144583821296692, "learning_rate": 0.00013003653224673316, "loss": 4.9926, "step": 30860 }, { "epoch": 0.04337524611096377, "grad_norm": 1.1992604732513428, "learning_rate": 0.0001300786848391176, "loss": 5.1402, "step": 30870 }, { "epoch": 0.04338929704912735, "grad_norm": 1.2514524459838867, "learning_rate": 0.00013012083743150203, "loss": 5.2172, "step": 30880 }, { "epoch": 0.043403347987290924, "grad_norm": 1.1470156908035278, "learning_rate": 0.00013016299002388646, "loss": 5.0893, "step": 30890 }, { "epoch": 0.0434173989254545, "grad_norm": 1.1299411058425903, "learning_rate": 0.0001302051426162709, "loss": 5.1019, "step": 30900 }, { "epoch": 0.043431449863618084, "grad_norm": 1.2057139873504639, "learning_rate": 0.00013024729520865532, "loss": 5.0939, "step": 30910 }, { "epoch": 0.04344550080178166, "grad_norm": 1.108620285987854, "learning_rate": 0.00013028944780103975, "loss": 5.1344, "step": 30920 }, { "epoch": 0.043459551739945236, "grad_norm": 1.22635817527771, "learning_rate": 0.00013033160039342418, "loss": 5.0597, "step": 30930 }, { "epoch": 0.04347360267810881, "grad_norm": 1.2512584924697876, "learning_rate": 0.0001303737529858086, "loss": 4.9751, "step": 30940 }, { "epoch": 0.04348765361627239, "grad_norm": 1.198045253753662, "learning_rate": 0.00013041590557819304, "loss": 5.0859, "step": 30950 }, { "epoch": 0.04350170455443597, "grad_norm": 1.21504545211792, "learning_rate": 0.00013045805817057748, "loss": 5.1187, "step": 30960 }, { "epoch": 0.04351575549259955, "grad_norm": 1.1842291355133057, "learning_rate": 0.0001305002107629619, "loss": 5.0685, "step": 30970 }, { "epoch": 0.043529806430763124, "grad_norm": 1.1496257781982422, "learning_rate": 0.00013054236335534634, "loss": 5.1684, "step": 30980 }, { "epoch": 0.0435438573689267, "grad_norm": 1.2007911205291748, "learning_rate": 0.00013058451594773077, "loss": 5.1206, "step": 30990 }, { "epoch": 0.043557908307090276, "grad_norm": 1.1559113264083862, "learning_rate": 0.0001306266685401152, "loss": 5.1135, "step": 31000 }, { "epoch": 0.04357195924525386, "grad_norm": 1.2255867719650269, "learning_rate": 0.00013066882113249963, "loss": 5.0647, "step": 31010 }, { "epoch": 0.043586010183417435, "grad_norm": 1.1707844734191895, "learning_rate": 0.00013071097372488406, "loss": 5.0317, "step": 31020 }, { "epoch": 0.04360006112158101, "grad_norm": 1.1722240447998047, "learning_rate": 0.00013075312631726852, "loss": 4.9861, "step": 31030 }, { "epoch": 0.04361411205974459, "grad_norm": 1.164713740348816, "learning_rate": 0.00013079527890965293, "loss": 5.2333, "step": 31040 }, { "epoch": 0.043628162997908164, "grad_norm": 1.2219550609588623, "learning_rate": 0.00013083743150203736, "loss": 5.118, "step": 31050 }, { "epoch": 0.04364221393607175, "grad_norm": 1.173423171043396, "learning_rate": 0.00013087958409442181, "loss": 5.1398, "step": 31060 }, { "epoch": 0.04365626487423532, "grad_norm": 1.227458119392395, "learning_rate": 0.00013092173668680622, "loss": 5.0945, "step": 31070 }, { "epoch": 0.0436703158123989, "grad_norm": 1.2771947383880615, "learning_rate": 0.00013096388927919065, "loss": 5.0949, "step": 31080 }, { "epoch": 0.043684366750562476, "grad_norm": 1.218270182609558, "learning_rate": 0.0001310060418715751, "loss": 5.1639, "step": 31090 }, { "epoch": 0.04369841768872605, "grad_norm": 1.195109248161316, "learning_rate": 0.0001310481944639595, "loss": 5.1404, "step": 31100 }, { "epoch": 0.043712468626889635, "grad_norm": 1.2469673156738281, "learning_rate": 0.00013109034705634394, "loss": 5.086, "step": 31110 }, { "epoch": 0.04372651956505321, "grad_norm": 1.1276779174804688, "learning_rate": 0.0001311324996487284, "loss": 5.0138, "step": 31120 }, { "epoch": 0.04374057050321679, "grad_norm": 1.1672612428665161, "learning_rate": 0.0001311746522411128, "loss": 5.0668, "step": 31130 }, { "epoch": 0.04375462144138036, "grad_norm": 1.2316880226135254, "learning_rate": 0.00013121680483349724, "loss": 5.0723, "step": 31140 }, { "epoch": 0.04376867237954394, "grad_norm": 1.3245317935943604, "learning_rate": 0.0001312589574258817, "loss": 5.0082, "step": 31150 }, { "epoch": 0.04378272331770752, "grad_norm": 1.2602311372756958, "learning_rate": 0.0001313011100182661, "loss": 4.9338, "step": 31160 }, { "epoch": 0.0437967742558711, "grad_norm": 1.1890308856964111, "learning_rate": 0.00013134326261065053, "loss": 5.1929, "step": 31170 }, { "epoch": 0.043810825194034675, "grad_norm": 1.2277629375457764, "learning_rate": 0.000131385415203035, "loss": 4.9834, "step": 31180 }, { "epoch": 0.04382487613219825, "grad_norm": 1.142812728881836, "learning_rate": 0.00013142756779541942, "loss": 5.1015, "step": 31190 }, { "epoch": 0.04383892707036183, "grad_norm": 1.1541565656661987, "learning_rate": 0.00013146972038780382, "loss": 5.0397, "step": 31200 }, { "epoch": 0.0438529780085254, "grad_norm": 1.1937357187271118, "learning_rate": 0.00013151187298018828, "loss": 5.0137, "step": 31210 }, { "epoch": 0.043867028946688987, "grad_norm": 1.1668943166732788, "learning_rate": 0.0001315540255725727, "loss": 5.1796, "step": 31220 }, { "epoch": 0.04388107988485256, "grad_norm": 1.1891443729400635, "learning_rate": 0.00013159617816495712, "loss": 5.059, "step": 31230 }, { "epoch": 0.04389513082301614, "grad_norm": 1.1822489500045776, "learning_rate": 0.00013163833075734158, "loss": 5.1285, "step": 31240 }, { "epoch": 0.043909181761179715, "grad_norm": 1.1516724824905396, "learning_rate": 0.000131680483349726, "loss": 5.1245, "step": 31250 }, { "epoch": 0.04392323269934329, "grad_norm": 1.2096267938613892, "learning_rate": 0.0001317226359421104, "loss": 5.1178, "step": 31260 }, { "epoch": 0.043937283637506874, "grad_norm": 1.2081648111343384, "learning_rate": 0.00013176478853449487, "loss": 5.1561, "step": 31270 }, { "epoch": 0.04395133457567045, "grad_norm": 1.1660428047180176, "learning_rate": 0.0001318069411268793, "loss": 5.2086, "step": 31280 }, { "epoch": 0.04396538551383403, "grad_norm": 1.153578519821167, "learning_rate": 0.0001318490937192637, "loss": 5.156, "step": 31290 }, { "epoch": 0.0439794364519976, "grad_norm": 1.2928626537322998, "learning_rate": 0.00013189124631164816, "loss": 5.0846, "step": 31300 }, { "epoch": 0.04399348739016118, "grad_norm": 1.1599982976913452, "learning_rate": 0.0001319333989040326, "loss": 5.1444, "step": 31310 }, { "epoch": 0.04400753832832476, "grad_norm": 1.1558305025100708, "learning_rate": 0.00013197555149641702, "loss": 5.0282, "step": 31320 }, { "epoch": 0.04402158926648834, "grad_norm": 1.168467402458191, "learning_rate": 0.00013201770408880146, "loss": 4.9011, "step": 31330 }, { "epoch": 0.044035640204651914, "grad_norm": 1.1910271644592285, "learning_rate": 0.0001320598566811859, "loss": 5.0972, "step": 31340 }, { "epoch": 0.04404969114281549, "grad_norm": 1.1661889553070068, "learning_rate": 0.00013210200927357032, "loss": 5.0018, "step": 31350 }, { "epoch": 0.04406374208097907, "grad_norm": 1.1902189254760742, "learning_rate": 0.00013214416186595475, "loss": 5.1285, "step": 31360 }, { "epoch": 0.04407779301914265, "grad_norm": 1.191560983657837, "learning_rate": 0.00013218631445833918, "loss": 5.0772, "step": 31370 }, { "epoch": 0.044091843957306226, "grad_norm": 1.108267903327942, "learning_rate": 0.0001322284670507236, "loss": 5.1525, "step": 31380 }, { "epoch": 0.0441058948954698, "grad_norm": 1.2221838235855103, "learning_rate": 0.00013227061964310804, "loss": 5.2337, "step": 31390 }, { "epoch": 0.04411994583363338, "grad_norm": 1.19728684425354, "learning_rate": 0.00013231277223549247, "loss": 5.1011, "step": 31400 }, { "epoch": 0.044133996771796954, "grad_norm": 1.1746065616607666, "learning_rate": 0.0001323549248278769, "loss": 5.1121, "step": 31410 }, { "epoch": 0.04414804770996054, "grad_norm": 1.2161784172058105, "learning_rate": 0.00013239707742026134, "loss": 5.1285, "step": 31420 }, { "epoch": 0.044162098648124114, "grad_norm": 1.2407193183898926, "learning_rate": 0.00013243923001264577, "loss": 5.0633, "step": 31430 }, { "epoch": 0.04417614958628769, "grad_norm": 1.1342822313308716, "learning_rate": 0.0001324813826050302, "loss": 5.0568, "step": 31440 }, { "epoch": 0.044190200524451266, "grad_norm": 1.2115027904510498, "learning_rate": 0.00013252353519741463, "loss": 4.9452, "step": 31450 }, { "epoch": 0.04420425146261484, "grad_norm": 1.20106840133667, "learning_rate": 0.00013256568778979906, "loss": 5.0924, "step": 31460 }, { "epoch": 0.044218302400778425, "grad_norm": 1.1099613904953003, "learning_rate": 0.0001326078403821835, "loss": 5.07, "step": 31470 }, { "epoch": 0.044232353338942, "grad_norm": 1.1473101377487183, "learning_rate": 0.00013264999297456792, "loss": 5.069, "step": 31480 }, { "epoch": 0.04424640427710558, "grad_norm": 1.1742898225784302, "learning_rate": 0.00013269214556695235, "loss": 5.1199, "step": 31490 }, { "epoch": 0.044260455215269154, "grad_norm": 1.1407841444015503, "learning_rate": 0.00013273429815933679, "loss": 5.0929, "step": 31500 }, { "epoch": 0.04427450615343273, "grad_norm": 1.17631196975708, "learning_rate": 0.00013277645075172122, "loss": 4.9963, "step": 31510 }, { "epoch": 0.044288557091596306, "grad_norm": 1.1480594873428345, "learning_rate": 0.00013281860334410565, "loss": 5.11, "step": 31520 }, { "epoch": 0.04430260802975989, "grad_norm": 1.2020182609558105, "learning_rate": 0.00013286075593649008, "loss": 5.0679, "step": 31530 }, { "epoch": 0.044316658967923465, "grad_norm": 1.230470061302185, "learning_rate": 0.0001329029085288745, "loss": 4.9922, "step": 31540 }, { "epoch": 0.04433070990608704, "grad_norm": 1.2092961072921753, "learning_rate": 0.00013294506112125894, "loss": 5.0695, "step": 31550 }, { "epoch": 0.04434476084425062, "grad_norm": 1.2055702209472656, "learning_rate": 0.00013298721371364337, "loss": 5.1677, "step": 31560 }, { "epoch": 0.044358811782414194, "grad_norm": 1.1955968141555786, "learning_rate": 0.0001330293663060278, "loss": 5.1062, "step": 31570 }, { "epoch": 0.04437286272057778, "grad_norm": 1.1392838954925537, "learning_rate": 0.00013307151889841224, "loss": 5.0741, "step": 31580 }, { "epoch": 0.04438691365874135, "grad_norm": 1.1732559204101562, "learning_rate": 0.00013311367149079667, "loss": 5.0425, "step": 31590 }, { "epoch": 0.04440096459690493, "grad_norm": 1.1739134788513184, "learning_rate": 0.0001331558240831811, "loss": 5.0015, "step": 31600 }, { "epoch": 0.044415015535068506, "grad_norm": 1.175411581993103, "learning_rate": 0.00013319797667556556, "loss": 5.0818, "step": 31610 }, { "epoch": 0.04442906647323208, "grad_norm": 1.2318931818008423, "learning_rate": 0.00013324012926794996, "loss": 4.9621, "step": 31620 }, { "epoch": 0.044443117411395665, "grad_norm": 1.146024227142334, "learning_rate": 0.0001332822818603344, "loss": 5.061, "step": 31630 }, { "epoch": 0.04445716834955924, "grad_norm": 1.1564781665802002, "learning_rate": 0.00013332443445271885, "loss": 5.2572, "step": 31640 }, { "epoch": 0.04447121928772282, "grad_norm": 1.1591886281967163, "learning_rate": 0.00013336658704510325, "loss": 5.1789, "step": 31650 }, { "epoch": 0.04448527022588639, "grad_norm": 1.172905445098877, "learning_rate": 0.00013340873963748768, "loss": 5.0811, "step": 31660 }, { "epoch": 0.04449932116404997, "grad_norm": 1.1963396072387695, "learning_rate": 0.00013345089222987214, "loss": 5.15, "step": 31670 }, { "epoch": 0.04451337210221355, "grad_norm": 1.234837532043457, "learning_rate": 0.00013349304482225655, "loss": 5.1728, "step": 31680 }, { "epoch": 0.04452742304037713, "grad_norm": 1.179028868675232, "learning_rate": 0.00013353519741464098, "loss": 5.0332, "step": 31690 }, { "epoch": 0.044541473978540705, "grad_norm": 1.1915132999420166, "learning_rate": 0.00013357735000702544, "loss": 5.1575, "step": 31700 }, { "epoch": 0.04455552491670428, "grad_norm": 1.248386263847351, "learning_rate": 0.00013361950259940984, "loss": 5.1046, "step": 31710 }, { "epoch": 0.04456957585486786, "grad_norm": 1.134567141532898, "learning_rate": 0.00013366165519179427, "loss": 5.0822, "step": 31720 }, { "epoch": 0.04458362679303144, "grad_norm": 1.2228316068649292, "learning_rate": 0.00013370380778417873, "loss": 4.9389, "step": 31730 }, { "epoch": 0.044597677731195016, "grad_norm": 1.143936276435852, "learning_rate": 0.00013374596037656316, "loss": 5.0541, "step": 31740 }, { "epoch": 0.04461172866935859, "grad_norm": 1.1601719856262207, "learning_rate": 0.00013378811296894757, "loss": 5.1395, "step": 31750 }, { "epoch": 0.04462577960752217, "grad_norm": 1.1841803789138794, "learning_rate": 0.00013383026556133202, "loss": 5.0936, "step": 31760 }, { "epoch": 0.044639830545685745, "grad_norm": 1.1612204313278198, "learning_rate": 0.00013387241815371645, "loss": 5.0203, "step": 31770 }, { "epoch": 0.04465388148384933, "grad_norm": 1.2054232358932495, "learning_rate": 0.00013391457074610086, "loss": 5.0786, "step": 31780 }, { "epoch": 0.044667932422012904, "grad_norm": 1.155108094215393, "learning_rate": 0.00013395672333848532, "loss": 4.9753, "step": 31790 }, { "epoch": 0.04468198336017648, "grad_norm": 1.1976162195205688, "learning_rate": 0.00013399887593086975, "loss": 5.0861, "step": 31800 }, { "epoch": 0.04469603429834006, "grad_norm": 1.157468557357788, "learning_rate": 0.00013404102852325415, "loss": 5.1192, "step": 31810 }, { "epoch": 0.04471008523650363, "grad_norm": 1.1830439567565918, "learning_rate": 0.0001340831811156386, "loss": 5.1491, "step": 31820 }, { "epoch": 0.04472413617466721, "grad_norm": 1.2510123252868652, "learning_rate": 0.00013412533370802304, "loss": 5.0752, "step": 31830 }, { "epoch": 0.04473818711283079, "grad_norm": 1.151624083518982, "learning_rate": 0.00013416748630040745, "loss": 5.0527, "step": 31840 }, { "epoch": 0.04475223805099437, "grad_norm": 1.1923450231552124, "learning_rate": 0.0001342096388927919, "loss": 5.0977, "step": 31850 }, { "epoch": 0.044766288989157944, "grad_norm": 1.1659468412399292, "learning_rate": 0.00013425179148517633, "loss": 4.9261, "step": 31860 }, { "epoch": 0.04478033992732152, "grad_norm": 1.139060378074646, "learning_rate": 0.00013429394407756074, "loss": 5.0291, "step": 31870 }, { "epoch": 0.0447943908654851, "grad_norm": 1.134356141090393, "learning_rate": 0.0001343360966699452, "loss": 5.1025, "step": 31880 }, { "epoch": 0.04480844180364868, "grad_norm": 1.1306984424591064, "learning_rate": 0.00013437824926232963, "loss": 5.0889, "step": 31890 }, { "epoch": 0.044822492741812256, "grad_norm": 1.1675775051116943, "learning_rate": 0.00013442040185471406, "loss": 4.949, "step": 31900 }, { "epoch": 0.04483654367997583, "grad_norm": 1.2445342540740967, "learning_rate": 0.0001344625544470985, "loss": 4.9771, "step": 31910 }, { "epoch": 0.04485059461813941, "grad_norm": 1.1660617589950562, "learning_rate": 0.00013450470703948292, "loss": 5.0768, "step": 31920 }, { "epoch": 0.044864645556302984, "grad_norm": 1.2834054231643677, "learning_rate": 0.00013454685963186735, "loss": 4.9632, "step": 31930 }, { "epoch": 0.04487869649446657, "grad_norm": 1.2516728639602661, "learning_rate": 0.00013458901222425178, "loss": 5.091, "step": 31940 }, { "epoch": 0.044892747432630144, "grad_norm": 1.1859095096588135, "learning_rate": 0.00013463116481663622, "loss": 4.9886, "step": 31950 }, { "epoch": 0.04490679837079372, "grad_norm": 1.140558123588562, "learning_rate": 0.00013467331740902065, "loss": 5.1474, "step": 31960 }, { "epoch": 0.044920849308957296, "grad_norm": 1.1825441122055054, "learning_rate": 0.00013471547000140508, "loss": 5.0819, "step": 31970 }, { "epoch": 0.04493490024712087, "grad_norm": 1.1421507596969604, "learning_rate": 0.0001347576225937895, "loss": 5.0934, "step": 31980 }, { "epoch": 0.044948951185284455, "grad_norm": 1.1730067729949951, "learning_rate": 0.00013479977518617394, "loss": 4.9571, "step": 31990 }, { "epoch": 0.04496300212344803, "grad_norm": 1.1735901832580566, "learning_rate": 0.00013484192777855837, "loss": 5.0852, "step": 32000 }, { "epoch": 0.04497705306161161, "grad_norm": 1.2650270462036133, "learning_rate": 0.0001348840803709428, "loss": 4.9872, "step": 32010 }, { "epoch": 0.044991103999775184, "grad_norm": 1.2169119119644165, "learning_rate": 0.00013492623296332723, "loss": 5.0531, "step": 32020 }, { "epoch": 0.04500515493793876, "grad_norm": 1.1884304285049438, "learning_rate": 0.00013496838555571166, "loss": 4.9522, "step": 32030 }, { "epoch": 0.04501920587610234, "grad_norm": 1.131784439086914, "learning_rate": 0.0001350105381480961, "loss": 5.0842, "step": 32040 }, { "epoch": 0.04503325681426592, "grad_norm": 1.1771255731582642, "learning_rate": 0.00013505269074048053, "loss": 5.1013, "step": 32050 }, { "epoch": 0.045047307752429495, "grad_norm": 1.181799054145813, "learning_rate": 0.00013509484333286496, "loss": 5.0919, "step": 32060 }, { "epoch": 0.04506135869059307, "grad_norm": 1.173443078994751, "learning_rate": 0.0001351369959252494, "loss": 4.9914, "step": 32070 }, { "epoch": 0.04507540962875665, "grad_norm": 1.1408971548080444, "learning_rate": 0.00013517914851763382, "loss": 5.0476, "step": 32080 }, { "epoch": 0.04508946056692023, "grad_norm": 1.1605677604675293, "learning_rate": 0.00013522130111001825, "loss": 5.1048, "step": 32090 }, { "epoch": 0.04510351150508381, "grad_norm": 1.1556565761566162, "learning_rate": 0.00013526345370240268, "loss": 5.0675, "step": 32100 }, { "epoch": 0.04511756244324738, "grad_norm": 1.1450446844100952, "learning_rate": 0.00013530560629478711, "loss": 5.0533, "step": 32110 }, { "epoch": 0.04513161338141096, "grad_norm": 1.163759708404541, "learning_rate": 0.00013534775888717155, "loss": 5.1082, "step": 32120 }, { "epoch": 0.045145664319574536, "grad_norm": 1.2103548049926758, "learning_rate": 0.00013538991147955598, "loss": 5.0448, "step": 32130 }, { "epoch": 0.04515971525773811, "grad_norm": 1.3155592679977417, "learning_rate": 0.0001354320640719404, "loss": 5.0333, "step": 32140 }, { "epoch": 0.045173766195901695, "grad_norm": 1.169601321220398, "learning_rate": 0.00013547421666432484, "loss": 5.0035, "step": 32150 }, { "epoch": 0.04518781713406527, "grad_norm": 1.1616172790527344, "learning_rate": 0.0001355163692567093, "loss": 5.0827, "step": 32160 }, { "epoch": 0.04520186807222885, "grad_norm": 1.1275705099105835, "learning_rate": 0.0001355585218490937, "loss": 5.0115, "step": 32170 }, { "epoch": 0.04521591901039242, "grad_norm": 1.1657685041427612, "learning_rate": 0.00013560067444147813, "loss": 5.0359, "step": 32180 }, { "epoch": 0.045229969948556, "grad_norm": 1.1922658681869507, "learning_rate": 0.0001356428270338626, "loss": 5.0735, "step": 32190 }, { "epoch": 0.04524402088671958, "grad_norm": 1.262093186378479, "learning_rate": 0.000135684979626247, "loss": 5.0809, "step": 32200 }, { "epoch": 0.04525807182488316, "grad_norm": 1.1995415687561035, "learning_rate": 0.00013572713221863143, "loss": 5.0154, "step": 32210 }, { "epoch": 0.045272122763046735, "grad_norm": 1.1763862371444702, "learning_rate": 0.00013576928481101588, "loss": 5.0166, "step": 32220 }, { "epoch": 0.04528617370121031, "grad_norm": 1.1900880336761475, "learning_rate": 0.0001358114374034003, "loss": 5.0693, "step": 32230 }, { "epoch": 0.04530022463937389, "grad_norm": 1.1440578699111938, "learning_rate": 0.00013585358999578472, "loss": 5.0902, "step": 32240 }, { "epoch": 0.04531427557753747, "grad_norm": 1.1583417654037476, "learning_rate": 0.00013589574258816918, "loss": 4.9809, "step": 32250 }, { "epoch": 0.045328326515701046, "grad_norm": 1.2270220518112183, "learning_rate": 0.00013593789518055358, "loss": 5.099, "step": 32260 }, { "epoch": 0.04534237745386462, "grad_norm": 1.1560698747634888, "learning_rate": 0.000135980047772938, "loss": 5.1224, "step": 32270 }, { "epoch": 0.0453564283920282, "grad_norm": 1.1910443305969238, "learning_rate": 0.00013602220036532247, "loss": 5.0196, "step": 32280 }, { "epoch": 0.045370479330191775, "grad_norm": 1.204832673072815, "learning_rate": 0.00013606435295770688, "loss": 5.0512, "step": 32290 }, { "epoch": 0.04538453026835536, "grad_norm": 1.3012568950653076, "learning_rate": 0.0001361065055500913, "loss": 5.0248, "step": 32300 }, { "epoch": 0.045398581206518934, "grad_norm": 1.1138745546340942, "learning_rate": 0.00013614865814247576, "loss": 5.1426, "step": 32310 }, { "epoch": 0.04541263214468251, "grad_norm": 1.3530911207199097, "learning_rate": 0.0001361908107348602, "loss": 4.9985, "step": 32320 }, { "epoch": 0.04542668308284609, "grad_norm": 1.1052865982055664, "learning_rate": 0.0001362329633272446, "loss": 5.0387, "step": 32330 }, { "epoch": 0.04544073402100966, "grad_norm": 1.1460787057876587, "learning_rate": 0.00013627511591962906, "loss": 4.9708, "step": 32340 }, { "epoch": 0.045454784959173246, "grad_norm": 1.1664546728134155, "learning_rate": 0.0001363172685120135, "loss": 4.9381, "step": 32350 }, { "epoch": 0.04546883589733682, "grad_norm": 1.1577123403549194, "learning_rate": 0.0001363594211043979, "loss": 4.9815, "step": 32360 }, { "epoch": 0.0454828868355004, "grad_norm": 1.1973512172698975, "learning_rate": 0.00013640157369678235, "loss": 5.1239, "step": 32370 }, { "epoch": 0.045496937773663974, "grad_norm": 1.1406227350234985, "learning_rate": 0.00013644372628916678, "loss": 5.0569, "step": 32380 }, { "epoch": 0.04551098871182755, "grad_norm": 1.1365989446640015, "learning_rate": 0.0001364858788815512, "loss": 4.9423, "step": 32390 }, { "epoch": 0.045525039649991134, "grad_norm": 1.1738041639328003, "learning_rate": 0.00013652803147393565, "loss": 5.1111, "step": 32400 }, { "epoch": 0.04553909058815471, "grad_norm": 1.2123348712921143, "learning_rate": 0.00013657018406632008, "loss": 5.1021, "step": 32410 }, { "epoch": 0.045553141526318286, "grad_norm": 1.1308833360671997, "learning_rate": 0.00013661233665870448, "loss": 5.0533, "step": 32420 }, { "epoch": 0.04556719246448186, "grad_norm": 1.2196406126022339, "learning_rate": 0.00013665448925108894, "loss": 4.9591, "step": 32430 }, { "epoch": 0.04558124340264544, "grad_norm": 1.1016089916229248, "learning_rate": 0.00013669664184347337, "loss": 5.0497, "step": 32440 }, { "epoch": 0.045595294340809014, "grad_norm": 1.248906135559082, "learning_rate": 0.0001367387944358578, "loss": 5.0935, "step": 32450 }, { "epoch": 0.0456093452789726, "grad_norm": 1.1859537363052368, "learning_rate": 0.00013678094702824223, "loss": 5.1309, "step": 32460 }, { "epoch": 0.045623396217136174, "grad_norm": 1.2062934637069702, "learning_rate": 0.00013682309962062666, "loss": 5.0923, "step": 32470 }, { "epoch": 0.04563744715529975, "grad_norm": 1.4972939491271973, "learning_rate": 0.0001368652522130111, "loss": 5.009, "step": 32480 }, { "epoch": 0.045651498093463326, "grad_norm": 1.1737431287765503, "learning_rate": 0.00013690740480539553, "loss": 5.1537, "step": 32490 }, { "epoch": 0.0456655490316269, "grad_norm": 1.1139081716537476, "learning_rate": 0.00013694955739777996, "loss": 5.1087, "step": 32500 }, { "epoch": 0.045679599969790485, "grad_norm": 1.2105811834335327, "learning_rate": 0.0001369917099901644, "loss": 5.0994, "step": 32510 }, { "epoch": 0.04569365090795406, "grad_norm": 1.2313215732574463, "learning_rate": 0.00013703386258254882, "loss": 5.0346, "step": 32520 }, { "epoch": 0.04570770184611764, "grad_norm": 1.1877297163009644, "learning_rate": 0.00013707601517493325, "loss": 5.132, "step": 32530 }, { "epoch": 0.045721752784281214, "grad_norm": 1.1226810216903687, "learning_rate": 0.00013711816776731768, "loss": 5.0537, "step": 32540 }, { "epoch": 0.04573580372244479, "grad_norm": 1.2374706268310547, "learning_rate": 0.0001371603203597021, "loss": 5.0868, "step": 32550 }, { "epoch": 0.04574985466060837, "grad_norm": 1.1252933740615845, "learning_rate": 0.00013720247295208654, "loss": 5.1987, "step": 32560 }, { "epoch": 0.04576390559877195, "grad_norm": 1.1599668264389038, "learning_rate": 0.00013724462554447097, "loss": 5.0369, "step": 32570 }, { "epoch": 0.045777956536935525, "grad_norm": 1.2881426811218262, "learning_rate": 0.0001372867781368554, "loss": 5.0175, "step": 32580 }, { "epoch": 0.0457920074750991, "grad_norm": 1.1907261610031128, "learning_rate": 0.00013732893072923984, "loss": 5.0749, "step": 32590 }, { "epoch": 0.04580605841326268, "grad_norm": 1.1891196966171265, "learning_rate": 0.00013737108332162427, "loss": 5.1202, "step": 32600 }, { "epoch": 0.04582010935142626, "grad_norm": 1.1137062311172485, "learning_rate": 0.0001374132359140087, "loss": 4.9954, "step": 32610 }, { "epoch": 0.04583416028958984, "grad_norm": 1.1719475984573364, "learning_rate": 0.00013745538850639313, "loss": 4.9482, "step": 32620 }, { "epoch": 0.04584821122775341, "grad_norm": 1.1451879739761353, "learning_rate": 0.00013749754109877756, "loss": 4.9943, "step": 32630 }, { "epoch": 0.04586226216591699, "grad_norm": 1.144889235496521, "learning_rate": 0.000137539693691162, "loss": 5.0704, "step": 32640 }, { "epoch": 0.045876313104080566, "grad_norm": 1.1692876815795898, "learning_rate": 0.00013758184628354642, "loss": 4.9091, "step": 32650 }, { "epoch": 0.04589036404224415, "grad_norm": 1.1342777013778687, "learning_rate": 0.00013762399887593086, "loss": 5.0762, "step": 32660 }, { "epoch": 0.045904414980407725, "grad_norm": 1.1293963193893433, "learning_rate": 0.0001376661514683153, "loss": 5.1474, "step": 32670 }, { "epoch": 0.0459184659185713, "grad_norm": 1.1727898120880127, "learning_rate": 0.00013770830406069972, "loss": 5.0395, "step": 32680 }, { "epoch": 0.04593251685673488, "grad_norm": 1.1893597841262817, "learning_rate": 0.00013775045665308415, "loss": 5.0505, "step": 32690 }, { "epoch": 0.04594656779489845, "grad_norm": 1.1680330038070679, "learning_rate": 0.00013779260924546858, "loss": 5.0726, "step": 32700 }, { "epoch": 0.045960618733062036, "grad_norm": 1.1110119819641113, "learning_rate": 0.000137834761837853, "loss": 4.9678, "step": 32710 }, { "epoch": 0.04597466967122561, "grad_norm": 1.1456838846206665, "learning_rate": 0.00013787691443023744, "loss": 5.0045, "step": 32720 }, { "epoch": 0.04598872060938919, "grad_norm": 1.1601930856704712, "learning_rate": 0.00013791906702262187, "loss": 5.044, "step": 32730 }, { "epoch": 0.046002771547552765, "grad_norm": 1.1178258657455444, "learning_rate": 0.00013796121961500633, "loss": 4.9794, "step": 32740 }, { "epoch": 0.04601682248571634, "grad_norm": 1.1255162954330444, "learning_rate": 0.00013800337220739074, "loss": 5.0032, "step": 32750 }, { "epoch": 0.046030873423879924, "grad_norm": 1.1232777833938599, "learning_rate": 0.00013804552479977517, "loss": 5.0948, "step": 32760 }, { "epoch": 0.0460449243620435, "grad_norm": 1.158825159072876, "learning_rate": 0.00013808767739215963, "loss": 5.0024, "step": 32770 }, { "epoch": 0.046058975300207076, "grad_norm": 1.132293462753296, "learning_rate": 0.00013812982998454403, "loss": 5.0553, "step": 32780 }, { "epoch": 0.04607302623837065, "grad_norm": 1.1477060317993164, "learning_rate": 0.00013817198257692846, "loss": 5.016, "step": 32790 }, { "epoch": 0.04608707717653423, "grad_norm": 1.1272825002670288, "learning_rate": 0.00013821413516931292, "loss": 5.0562, "step": 32800 }, { "epoch": 0.046101128114697805, "grad_norm": 1.1596038341522217, "learning_rate": 0.00013825628776169732, "loss": 5.0503, "step": 32810 }, { "epoch": 0.04611517905286139, "grad_norm": 1.131719708442688, "learning_rate": 0.00013829844035408175, "loss": 4.9989, "step": 32820 }, { "epoch": 0.046129229991024964, "grad_norm": 1.1962887048721313, "learning_rate": 0.0001383405929464662, "loss": 5.0298, "step": 32830 }, { "epoch": 0.04614328092918854, "grad_norm": 1.133915901184082, "learning_rate": 0.00013838274553885062, "loss": 4.9067, "step": 32840 }, { "epoch": 0.04615733186735212, "grad_norm": 1.2771695852279663, "learning_rate": 0.00013842489813123505, "loss": 5.0036, "step": 32850 }, { "epoch": 0.04617138280551569, "grad_norm": 1.1963088512420654, "learning_rate": 0.0001384670507236195, "loss": 5.0803, "step": 32860 }, { "epoch": 0.046185433743679276, "grad_norm": 1.0991750955581665, "learning_rate": 0.0001385092033160039, "loss": 5.0325, "step": 32870 }, { "epoch": 0.04619948468184285, "grad_norm": 1.179234504699707, "learning_rate": 0.00013855135590838834, "loss": 5.0296, "step": 32880 }, { "epoch": 0.04621353562000643, "grad_norm": 1.1082992553710938, "learning_rate": 0.0001385935085007728, "loss": 5.1175, "step": 32890 }, { "epoch": 0.046227586558170004, "grad_norm": 1.2209382057189941, "learning_rate": 0.00013863566109315723, "loss": 5.0361, "step": 32900 }, { "epoch": 0.04624163749633358, "grad_norm": 1.1588517427444458, "learning_rate": 0.00013867781368554163, "loss": 5.1443, "step": 32910 }, { "epoch": 0.046255688434497164, "grad_norm": 1.0982693433761597, "learning_rate": 0.0001387199662779261, "loss": 5.0724, "step": 32920 }, { "epoch": 0.04626973937266074, "grad_norm": 1.1611078977584839, "learning_rate": 0.00013876211887031052, "loss": 5.127, "step": 32930 }, { "epoch": 0.046283790310824316, "grad_norm": 1.1593987941741943, "learning_rate": 0.00013880427146269493, "loss": 5.1295, "step": 32940 }, { "epoch": 0.04629784124898789, "grad_norm": 1.1332095861434937, "learning_rate": 0.00013884642405507939, "loss": 5.0213, "step": 32950 }, { "epoch": 0.04631189218715147, "grad_norm": 1.1835927963256836, "learning_rate": 0.00013888857664746382, "loss": 5.0062, "step": 32960 }, { "epoch": 0.04632594312531505, "grad_norm": 1.1010524034500122, "learning_rate": 0.00013893072923984822, "loss": 5.1005, "step": 32970 }, { "epoch": 0.04633999406347863, "grad_norm": 1.1981157064437866, "learning_rate": 0.00013897288183223268, "loss": 4.8705, "step": 32980 }, { "epoch": 0.046354045001642204, "grad_norm": 1.2006856203079224, "learning_rate": 0.0001390150344246171, "loss": 5.0685, "step": 32990 }, { "epoch": 0.04636809593980578, "grad_norm": 1.1585785150527954, "learning_rate": 0.00013905718701700152, "loss": 5.0302, "step": 33000 }, { "epoch": 0.046382146877969356, "grad_norm": 1.1677348613739014, "learning_rate": 0.00013909933960938597, "loss": 4.9233, "step": 33010 }, { "epoch": 0.04639619781613294, "grad_norm": 1.1076332330703735, "learning_rate": 0.0001391414922017704, "loss": 5.0191, "step": 33020 }, { "epoch": 0.046410248754296515, "grad_norm": 1.0935347080230713, "learning_rate": 0.00013918364479415484, "loss": 5.1139, "step": 33030 }, { "epoch": 0.04642429969246009, "grad_norm": 1.1202341318130493, "learning_rate": 0.00013922579738653927, "loss": 5.0648, "step": 33040 }, { "epoch": 0.04643835063062367, "grad_norm": 1.1524226665496826, "learning_rate": 0.0001392679499789237, "loss": 4.8851, "step": 33050 }, { "epoch": 0.046452401568787244, "grad_norm": 1.19994056224823, "learning_rate": 0.00013931010257130813, "loss": 5.0289, "step": 33060 }, { "epoch": 0.04646645250695083, "grad_norm": 1.1779851913452148, "learning_rate": 0.00013935225516369256, "loss": 5.0094, "step": 33070 }, { "epoch": 0.0464805034451144, "grad_norm": 1.186610460281372, "learning_rate": 0.000139394407756077, "loss": 5.0195, "step": 33080 }, { "epoch": 0.04649455438327798, "grad_norm": 1.1642428636550903, "learning_rate": 0.00013943656034846142, "loss": 5.0686, "step": 33090 }, { "epoch": 0.046508605321441555, "grad_norm": 1.1461211442947388, "learning_rate": 0.00013947871294084585, "loss": 5.1077, "step": 33100 }, { "epoch": 0.04652265625960513, "grad_norm": 1.1319652795791626, "learning_rate": 0.00013952086553323029, "loss": 5.0228, "step": 33110 }, { "epoch": 0.04653670719776871, "grad_norm": 1.2022271156311035, "learning_rate": 0.00013956301812561472, "loss": 5.0792, "step": 33120 }, { "epoch": 0.04655075813593229, "grad_norm": 1.1919790506362915, "learning_rate": 0.00013960517071799915, "loss": 5.0433, "step": 33130 }, { "epoch": 0.04656480907409587, "grad_norm": 1.1596547365188599, "learning_rate": 0.00013964732331038358, "loss": 5.0325, "step": 33140 }, { "epoch": 0.04657886001225944, "grad_norm": 1.16820228099823, "learning_rate": 0.000139689475902768, "loss": 5.0048, "step": 33150 }, { "epoch": 0.04659291095042302, "grad_norm": 1.422676682472229, "learning_rate": 0.00013973162849515244, "loss": 4.9945, "step": 33160 }, { "epoch": 0.046606961888586595, "grad_norm": 1.1482213735580444, "learning_rate": 0.00013977378108753687, "loss": 4.9878, "step": 33170 }, { "epoch": 0.04662101282675018, "grad_norm": 1.2206376791000366, "learning_rate": 0.0001398159336799213, "loss": 5.1302, "step": 33180 }, { "epoch": 0.046635063764913755, "grad_norm": 1.1734592914581299, "learning_rate": 0.00013985808627230573, "loss": 4.9858, "step": 33190 }, { "epoch": 0.04664911470307733, "grad_norm": 1.1573774814605713, "learning_rate": 0.00013990023886469017, "loss": 5.0833, "step": 33200 }, { "epoch": 0.04666316564124091, "grad_norm": 1.1239194869995117, "learning_rate": 0.0001399423914570746, "loss": 4.9798, "step": 33210 }, { "epoch": 0.04667721657940448, "grad_norm": 1.2762165069580078, "learning_rate": 0.00013998454404945903, "loss": 4.9813, "step": 33220 }, { "epoch": 0.046691267517568066, "grad_norm": 1.1590991020202637, "learning_rate": 0.00014002669664184346, "loss": 5.0678, "step": 33230 }, { "epoch": 0.04670531845573164, "grad_norm": 1.2096633911132812, "learning_rate": 0.0001400688492342279, "loss": 5.2786, "step": 33240 }, { "epoch": 0.04671936939389522, "grad_norm": 1.1752020120620728, "learning_rate": 0.00014011100182661232, "loss": 5.097, "step": 33250 }, { "epoch": 0.046733420332058795, "grad_norm": 1.1489932537078857, "learning_rate": 0.00014015315441899675, "loss": 5.1115, "step": 33260 }, { "epoch": 0.04674747127022237, "grad_norm": 1.2402178049087524, "learning_rate": 0.00014019530701138118, "loss": 5.0294, "step": 33270 }, { "epoch": 0.046761522208385954, "grad_norm": 1.109150767326355, "learning_rate": 0.00014023745960376561, "loss": 5.1305, "step": 33280 }, { "epoch": 0.04677557314654953, "grad_norm": 1.1309486627578735, "learning_rate": 0.00014027961219615005, "loss": 4.9889, "step": 33290 }, { "epoch": 0.046789624084713106, "grad_norm": 1.1985018253326416, "learning_rate": 0.00014032176478853448, "loss": 5.0472, "step": 33300 }, { "epoch": 0.04680367502287668, "grad_norm": 1.231369972229004, "learning_rate": 0.0001403639173809189, "loss": 5.0311, "step": 33310 }, { "epoch": 0.04681772596104026, "grad_norm": 1.1748765707015991, "learning_rate": 0.00014040606997330337, "loss": 4.9819, "step": 33320 }, { "epoch": 0.04683177689920384, "grad_norm": 1.2126888036727905, "learning_rate": 0.00014044822256568777, "loss": 4.9947, "step": 33330 }, { "epoch": 0.04684582783736742, "grad_norm": 1.2520742416381836, "learning_rate": 0.0001404903751580722, "loss": 4.9942, "step": 33340 }, { "epoch": 0.046859878775530994, "grad_norm": 1.1721147298812866, "learning_rate": 0.00014053252775045666, "loss": 5.0119, "step": 33350 }, { "epoch": 0.04687392971369457, "grad_norm": 1.1375186443328857, "learning_rate": 0.00014057468034284106, "loss": 5.1042, "step": 33360 }, { "epoch": 0.04688798065185815, "grad_norm": 1.1971328258514404, "learning_rate": 0.0001406168329352255, "loss": 5.0726, "step": 33370 }, { "epoch": 0.04690203159002173, "grad_norm": 1.4078949689865112, "learning_rate": 0.00014065898552760995, "loss": 5.0296, "step": 33380 }, { "epoch": 0.046916082528185306, "grad_norm": 1.2646558284759521, "learning_rate": 0.00014070113811999436, "loss": 5.0794, "step": 33390 }, { "epoch": 0.04693013346634888, "grad_norm": 1.107875943183899, "learning_rate": 0.0001407432907123788, "loss": 5.0813, "step": 33400 }, { "epoch": 0.04694418440451246, "grad_norm": 1.1319985389709473, "learning_rate": 0.00014078544330476325, "loss": 5.1663, "step": 33410 }, { "epoch": 0.046958235342676034, "grad_norm": 1.0848567485809326, "learning_rate": 0.00014082759589714765, "loss": 5.1029, "step": 33420 }, { "epoch": 0.04697228628083961, "grad_norm": 1.1160449981689453, "learning_rate": 0.00014086974848953208, "loss": 4.9525, "step": 33430 }, { "epoch": 0.046986337219003194, "grad_norm": 1.1369086503982544, "learning_rate": 0.00014091190108191654, "loss": 5.0147, "step": 33440 }, { "epoch": 0.04700038815716677, "grad_norm": 1.1825052499771118, "learning_rate": 0.00014095405367430097, "loss": 5.0531, "step": 33450 }, { "epoch": 0.047014439095330346, "grad_norm": 1.1444425582885742, "learning_rate": 0.00014099620626668538, "loss": 5.0372, "step": 33460 }, { "epoch": 0.04702849003349392, "grad_norm": 1.1329076290130615, "learning_rate": 0.00014103835885906983, "loss": 5.189, "step": 33470 }, { "epoch": 0.0470425409716575, "grad_norm": 1.1967828273773193, "learning_rate": 0.00014108051145145427, "loss": 5.0807, "step": 33480 }, { "epoch": 0.04705659190982108, "grad_norm": 1.1986347436904907, "learning_rate": 0.00014112266404383867, "loss": 5.0648, "step": 33490 }, { "epoch": 0.04707064284798466, "grad_norm": 1.131181001663208, "learning_rate": 0.00014116481663622313, "loss": 5.0826, "step": 33500 }, { "epoch": 0.047084693786148234, "grad_norm": 1.1698004007339478, "learning_rate": 0.00014120696922860756, "loss": 5.0187, "step": 33510 }, { "epoch": 0.04709874472431181, "grad_norm": 1.186985731124878, "learning_rate": 0.00014124912182099196, "loss": 5.071, "step": 33520 }, { "epoch": 0.047112795662475386, "grad_norm": 1.1888569593429565, "learning_rate": 0.00014129127441337642, "loss": 4.9117, "step": 33530 }, { "epoch": 0.04712684660063897, "grad_norm": 1.1342166662216187, "learning_rate": 0.00014133342700576085, "loss": 5.0269, "step": 33540 }, { "epoch": 0.047140897538802545, "grad_norm": 1.2070249319076538, "learning_rate": 0.00014137557959814526, "loss": 4.9468, "step": 33550 }, { "epoch": 0.04715494847696612, "grad_norm": 1.1142017841339111, "learning_rate": 0.00014141773219052971, "loss": 5.2021, "step": 33560 }, { "epoch": 0.0471689994151297, "grad_norm": 1.092371940612793, "learning_rate": 0.00014145988478291415, "loss": 5.0756, "step": 33570 }, { "epoch": 0.047183050353293274, "grad_norm": 1.1797840595245361, "learning_rate": 0.00014150203737529855, "loss": 5.0859, "step": 33580 }, { "epoch": 0.04719710129145686, "grad_norm": 1.177193522453308, "learning_rate": 0.000141544189967683, "loss": 4.9599, "step": 33590 }, { "epoch": 0.04721115222962043, "grad_norm": 1.1384317874908447, "learning_rate": 0.00014158634256006744, "loss": 5.0421, "step": 33600 }, { "epoch": 0.04722520316778401, "grad_norm": 1.1723968982696533, "learning_rate": 0.00014162849515245187, "loss": 5.0571, "step": 33610 }, { "epoch": 0.047239254105947585, "grad_norm": 1.1619266271591187, "learning_rate": 0.0001416706477448363, "loss": 5.114, "step": 33620 }, { "epoch": 0.04725330504411116, "grad_norm": 1.1768392324447632, "learning_rate": 0.00014171280033722073, "loss": 4.9927, "step": 33630 }, { "epoch": 0.047267355982274745, "grad_norm": 1.1438103914260864, "learning_rate": 0.00014175495292960516, "loss": 5.0783, "step": 33640 }, { "epoch": 0.04728140692043832, "grad_norm": 1.165272831916809, "learning_rate": 0.0001417971055219896, "loss": 4.9889, "step": 33650 }, { "epoch": 0.0472954578586019, "grad_norm": 1.2394903898239136, "learning_rate": 0.00014183925811437403, "loss": 5.0751, "step": 33660 }, { "epoch": 0.04730950879676547, "grad_norm": 1.2130417823791504, "learning_rate": 0.00014188141070675846, "loss": 5.1452, "step": 33670 }, { "epoch": 0.04732355973492905, "grad_norm": 1.100905418395996, "learning_rate": 0.0001419235632991429, "loss": 5.1668, "step": 33680 }, { "epoch": 0.04733761067309263, "grad_norm": 1.1132055521011353, "learning_rate": 0.00014196571589152732, "loss": 4.9762, "step": 33690 }, { "epoch": 0.04735166161125621, "grad_norm": 1.1489577293395996, "learning_rate": 0.00014200786848391175, "loss": 5.0032, "step": 33700 }, { "epoch": 0.047365712549419785, "grad_norm": 1.1104612350463867, "learning_rate": 0.00014205002107629618, "loss": 5.0617, "step": 33710 }, { "epoch": 0.04737976348758336, "grad_norm": 1.148146629333496, "learning_rate": 0.0001420921736686806, "loss": 5.0499, "step": 33720 }, { "epoch": 0.04739381442574694, "grad_norm": 1.1576919555664062, "learning_rate": 0.00014213432626106504, "loss": 5.0133, "step": 33730 }, { "epoch": 0.04740786536391051, "grad_norm": 1.1263247728347778, "learning_rate": 0.00014217647885344948, "loss": 4.8634, "step": 33740 }, { "epoch": 0.047421916302074096, "grad_norm": 1.168540358543396, "learning_rate": 0.0001422186314458339, "loss": 4.9549, "step": 33750 }, { "epoch": 0.04743596724023767, "grad_norm": 1.1476012468338013, "learning_rate": 0.00014226078403821834, "loss": 5.0192, "step": 33760 }, { "epoch": 0.04745001817840125, "grad_norm": 1.1322742700576782, "learning_rate": 0.00014230293663060277, "loss": 5.1356, "step": 33770 }, { "epoch": 0.047464069116564825, "grad_norm": 1.147298812866211, "learning_rate": 0.0001423450892229872, "loss": 5.0278, "step": 33780 }, { "epoch": 0.0474781200547284, "grad_norm": 1.1303203105926514, "learning_rate": 0.00014238724181537163, "loss": 5.0996, "step": 33790 }, { "epoch": 0.047492170992891984, "grad_norm": 1.0890021324157715, "learning_rate": 0.00014242939440775606, "loss": 5.0753, "step": 33800 }, { "epoch": 0.04750622193105556, "grad_norm": 1.163889765739441, "learning_rate": 0.0001424715470001405, "loss": 5.0723, "step": 33810 }, { "epoch": 0.047520272869219136, "grad_norm": 1.1471232175827026, "learning_rate": 0.00014251369959252492, "loss": 5.0793, "step": 33820 }, { "epoch": 0.04753432380738271, "grad_norm": 1.135847568511963, "learning_rate": 0.00014255585218490936, "loss": 5.1346, "step": 33830 }, { "epoch": 0.04754837474554629, "grad_norm": 1.1431607007980347, "learning_rate": 0.0001425980047772938, "loss": 5.1053, "step": 33840 }, { "epoch": 0.04756242568370987, "grad_norm": 1.1063638925552368, "learning_rate": 0.00014264015736967822, "loss": 4.9736, "step": 33850 }, { "epoch": 0.04757647662187345, "grad_norm": 1.1206468343734741, "learning_rate": 0.00014268230996206265, "loss": 4.9508, "step": 33860 }, { "epoch": 0.047590527560037024, "grad_norm": 1.234711766242981, "learning_rate": 0.00014272446255444708, "loss": 4.9879, "step": 33870 }, { "epoch": 0.0476045784982006, "grad_norm": 1.2123228311538696, "learning_rate": 0.0001427666151468315, "loss": 4.8591, "step": 33880 }, { "epoch": 0.04761862943636418, "grad_norm": 1.330238938331604, "learning_rate": 0.00014280876773921594, "loss": 4.9129, "step": 33890 }, { "epoch": 0.04763268037452776, "grad_norm": 1.1890876293182373, "learning_rate": 0.0001428509203316004, "loss": 5.018, "step": 33900 }, { "epoch": 0.047646731312691336, "grad_norm": 1.2122526168823242, "learning_rate": 0.0001428930729239848, "loss": 5.1118, "step": 33910 }, { "epoch": 0.04766078225085491, "grad_norm": 1.1024436950683594, "learning_rate": 0.00014293522551636924, "loss": 4.8631, "step": 33920 }, { "epoch": 0.04767483318901849, "grad_norm": 1.12148118019104, "learning_rate": 0.0001429773781087537, "loss": 5.0444, "step": 33930 }, { "epoch": 0.047688884127182064, "grad_norm": 1.1458978652954102, "learning_rate": 0.0001430195307011381, "loss": 5.1447, "step": 33940 }, { "epoch": 0.04770293506534565, "grad_norm": 1.1465718746185303, "learning_rate": 0.00014306168329352253, "loss": 4.968, "step": 33950 }, { "epoch": 0.047716986003509224, "grad_norm": 1.162516713142395, "learning_rate": 0.000143103835885907, "loss": 5.0728, "step": 33960 }, { "epoch": 0.0477310369416728, "grad_norm": 1.1164641380310059, "learning_rate": 0.0001431459884782914, "loss": 4.9858, "step": 33970 }, { "epoch": 0.047745087879836376, "grad_norm": 1.1258015632629395, "learning_rate": 0.00014318814107067582, "loss": 5.0594, "step": 33980 }, { "epoch": 0.04775913881799995, "grad_norm": 1.1459895372390747, "learning_rate": 0.00014323029366306028, "loss": 4.987, "step": 33990 }, { "epoch": 0.047773189756163535, "grad_norm": 1.16275155544281, "learning_rate": 0.00014327244625544469, "loss": 5.0785, "step": 34000 }, { "epoch": 0.04778724069432711, "grad_norm": 1.1919983625411987, "learning_rate": 0.00014331459884782912, "loss": 5.0735, "step": 34010 }, { "epoch": 0.04780129163249069, "grad_norm": 1.0925780534744263, "learning_rate": 0.00014335675144021358, "loss": 5.1679, "step": 34020 }, { "epoch": 0.047815342570654264, "grad_norm": 1.1478568315505981, "learning_rate": 0.000143398904032598, "loss": 5.1252, "step": 34030 }, { "epoch": 0.04782939350881784, "grad_norm": 1.1364628076553345, "learning_rate": 0.0001434410566249824, "loss": 4.9149, "step": 34040 }, { "epoch": 0.047843444446981416, "grad_norm": 1.130101203918457, "learning_rate": 0.00014348320921736687, "loss": 4.909, "step": 34050 }, { "epoch": 0.047857495385145, "grad_norm": 1.151053547859192, "learning_rate": 0.0001435253618097513, "loss": 5.0317, "step": 34060 }, { "epoch": 0.047871546323308575, "grad_norm": 1.158276081085205, "learning_rate": 0.0001435675144021357, "loss": 5.0723, "step": 34070 }, { "epoch": 0.04788559726147215, "grad_norm": 1.2227519750595093, "learning_rate": 0.00014360966699452016, "loss": 5.0276, "step": 34080 }, { "epoch": 0.04789964819963573, "grad_norm": 1.1406713724136353, "learning_rate": 0.0001436518195869046, "loss": 5.0192, "step": 34090 }, { "epoch": 0.047913699137799304, "grad_norm": 1.1668028831481934, "learning_rate": 0.000143693972179289, "loss": 4.9819, "step": 34100 }, { "epoch": 0.04792775007596289, "grad_norm": 1.1254831552505493, "learning_rate": 0.00014373612477167346, "loss": 5.0085, "step": 34110 }, { "epoch": 0.04794180101412646, "grad_norm": 1.1782492399215698, "learning_rate": 0.0001437782773640579, "loss": 5.087, "step": 34120 }, { "epoch": 0.04795585195229004, "grad_norm": 1.125497817993164, "learning_rate": 0.0001438204299564423, "loss": 5.0525, "step": 34130 }, { "epoch": 0.047969902890453615, "grad_norm": 1.1250324249267578, "learning_rate": 0.00014386258254882675, "loss": 5.0964, "step": 34140 }, { "epoch": 0.04798395382861719, "grad_norm": 1.1316850185394287, "learning_rate": 0.00014390473514121118, "loss": 5.0596, "step": 34150 }, { "epoch": 0.047998004766780775, "grad_norm": 1.158501386642456, "learning_rate": 0.0001439468877335956, "loss": 5.0526, "step": 34160 }, { "epoch": 0.04801205570494435, "grad_norm": 1.1655758619308472, "learning_rate": 0.00014398904032598004, "loss": 5.0539, "step": 34170 }, { "epoch": 0.04802610664310793, "grad_norm": 1.1360056400299072, "learning_rate": 0.00014403119291836447, "loss": 5.0661, "step": 34180 }, { "epoch": 0.0480401575812715, "grad_norm": 1.1133544445037842, "learning_rate": 0.0001440733455107489, "loss": 5.0339, "step": 34190 }, { "epoch": 0.04805420851943508, "grad_norm": 1.0935070514678955, "learning_rate": 0.00014411549810313334, "loss": 4.9833, "step": 34200 }, { "epoch": 0.04806825945759866, "grad_norm": 1.0671685934066772, "learning_rate": 0.00014415765069551777, "loss": 4.9763, "step": 34210 }, { "epoch": 0.04808231039576224, "grad_norm": 1.1430456638336182, "learning_rate": 0.0001441998032879022, "loss": 4.9482, "step": 34220 }, { "epoch": 0.048096361333925815, "grad_norm": 1.1322462558746338, "learning_rate": 0.00014424195588028663, "loss": 4.9501, "step": 34230 }, { "epoch": 0.04811041227208939, "grad_norm": 1.121475100517273, "learning_rate": 0.00014428410847267106, "loss": 5.0683, "step": 34240 }, { "epoch": 0.04812446321025297, "grad_norm": 1.1033834218978882, "learning_rate": 0.0001443262610650555, "loss": 4.9089, "step": 34250 }, { "epoch": 0.04813851414841655, "grad_norm": 1.1717206239700317, "learning_rate": 0.00014436841365743992, "loss": 4.9873, "step": 34260 }, { "epoch": 0.048152565086580126, "grad_norm": 1.1643035411834717, "learning_rate": 0.00014441056624982435, "loss": 5.0396, "step": 34270 }, { "epoch": 0.0481666160247437, "grad_norm": 1.109316110610962, "learning_rate": 0.00014445271884220879, "loss": 5.0065, "step": 34280 }, { "epoch": 0.04818066696290728, "grad_norm": 1.0868771076202393, "learning_rate": 0.00014449487143459322, "loss": 5.0783, "step": 34290 }, { "epoch": 0.048194717901070855, "grad_norm": 1.1115925312042236, "learning_rate": 0.00014453702402697765, "loss": 4.9573, "step": 34300 }, { "epoch": 0.04820876883923444, "grad_norm": 1.14295494556427, "learning_rate": 0.00014457496136012363, "loss": 5.016, "step": 34310 }, { "epoch": 0.048222819777398014, "grad_norm": 1.148398518562317, "learning_rate": 0.00014461711395250806, "loss": 5.147, "step": 34320 }, { "epoch": 0.04823687071556159, "grad_norm": 1.1277004480361938, "learning_rate": 0.0001446592665448925, "loss": 5.1473, "step": 34330 }, { "epoch": 0.048250921653725166, "grad_norm": 1.186437964439392, "learning_rate": 0.00014470141913727692, "loss": 4.9353, "step": 34340 }, { "epoch": 0.04826497259188874, "grad_norm": 1.131174921989441, "learning_rate": 0.00014474357172966136, "loss": 5.0711, "step": 34350 }, { "epoch": 0.04827902353005232, "grad_norm": 1.1172866821289062, "learning_rate": 0.0001447857243220458, "loss": 5.1223, "step": 34360 }, { "epoch": 0.0482930744682159, "grad_norm": 1.179067850112915, "learning_rate": 0.00014482787691443022, "loss": 4.9628, "step": 34370 }, { "epoch": 0.04830712540637948, "grad_norm": 1.1836308240890503, "learning_rate": 0.00014487002950681465, "loss": 5.0934, "step": 34380 }, { "epoch": 0.048321176344543054, "grad_norm": 1.1305451393127441, "learning_rate": 0.00014491218209919908, "loss": 5.0188, "step": 34390 }, { "epoch": 0.04833522728270663, "grad_norm": 1.113901138305664, "learning_rate": 0.0001449543346915835, "loss": 5.0124, "step": 34400 }, { "epoch": 0.04834927822087021, "grad_norm": 1.1200542449951172, "learning_rate": 0.00014499648728396794, "loss": 5.1159, "step": 34410 }, { "epoch": 0.04836332915903379, "grad_norm": 1.1390150785446167, "learning_rate": 0.00014503863987635237, "loss": 5.0467, "step": 34420 }, { "epoch": 0.048377380097197366, "grad_norm": 1.1669032573699951, "learning_rate": 0.0001450807924687368, "loss": 5.0509, "step": 34430 }, { "epoch": 0.04839143103536094, "grad_norm": 1.1723250150680542, "learning_rate": 0.00014512294506112124, "loss": 4.8566, "step": 34440 }, { "epoch": 0.04840548197352452, "grad_norm": 1.2041908502578735, "learning_rate": 0.0001451650976535057, "loss": 5.0341, "step": 34450 }, { "epoch": 0.048419532911688094, "grad_norm": 1.1292054653167725, "learning_rate": 0.0001452072502458901, "loss": 5.0321, "step": 34460 }, { "epoch": 0.04843358384985168, "grad_norm": 1.12458074092865, "learning_rate": 0.00014524940283827453, "loss": 5.0766, "step": 34470 }, { "epoch": 0.048447634788015254, "grad_norm": 1.1062582731246948, "learning_rate": 0.000145291555430659, "loss": 5.0649, "step": 34480 }, { "epoch": 0.04846168572617883, "grad_norm": 1.124700665473938, "learning_rate": 0.0001453337080230434, "loss": 4.9559, "step": 34490 }, { "epoch": 0.048475736664342406, "grad_norm": 1.1494004726409912, "learning_rate": 0.00014537586061542782, "loss": 5.0064, "step": 34500 }, { "epoch": 0.04848978760250598, "grad_norm": 1.1343144178390503, "learning_rate": 0.00014541801320781228, "loss": 5.0244, "step": 34510 }, { "epoch": 0.048503838540669565, "grad_norm": 1.0855003595352173, "learning_rate": 0.00014546016580019669, "loss": 5.0812, "step": 34520 }, { "epoch": 0.04851788947883314, "grad_norm": 1.1069141626358032, "learning_rate": 0.00014550231839258112, "loss": 5.0764, "step": 34530 }, { "epoch": 0.04853194041699672, "grad_norm": 1.470924735069275, "learning_rate": 0.00014554447098496557, "loss": 5.0076, "step": 34540 }, { "epoch": 0.048545991355160294, "grad_norm": 1.1423304080963135, "learning_rate": 0.00014558662357734998, "loss": 5.008, "step": 34550 }, { "epoch": 0.04856004229332387, "grad_norm": 1.164551854133606, "learning_rate": 0.00014562877616973444, "loss": 4.9289, "step": 34560 }, { "epoch": 0.04857409323148745, "grad_norm": 1.0787816047668457, "learning_rate": 0.00014567092876211887, "loss": 5.0759, "step": 34570 }, { "epoch": 0.04858814416965103, "grad_norm": 1.1306999921798706, "learning_rate": 0.0001457130813545033, "loss": 5.0986, "step": 34580 }, { "epoch": 0.048602195107814605, "grad_norm": 1.1416428089141846, "learning_rate": 0.00014575523394688773, "loss": 5.0618, "step": 34590 }, { "epoch": 0.04861624604597818, "grad_norm": 1.1224125623703003, "learning_rate": 0.00014579738653927216, "loss": 5.0685, "step": 34600 }, { "epoch": 0.04863029698414176, "grad_norm": 1.1280869245529175, "learning_rate": 0.0001458395391316566, "loss": 4.9973, "step": 34610 }, { "epoch": 0.04864434792230534, "grad_norm": 1.1328657865524292, "learning_rate": 0.00014588169172404102, "loss": 5.0089, "step": 34620 }, { "epoch": 0.04865839886046892, "grad_norm": 1.183826208114624, "learning_rate": 0.00014592384431642546, "loss": 4.9232, "step": 34630 }, { "epoch": 0.04867244979863249, "grad_norm": 1.1926147937774658, "learning_rate": 0.00014596599690880989, "loss": 4.9648, "step": 34640 }, { "epoch": 0.04868650073679607, "grad_norm": 1.1898232698440552, "learning_rate": 0.00014600814950119432, "loss": 4.9844, "step": 34650 }, { "epoch": 0.048700551674959645, "grad_norm": 1.1884058713912964, "learning_rate": 0.00014605030209357875, "loss": 5.0502, "step": 34660 }, { "epoch": 0.04871460261312322, "grad_norm": 1.1177476644515991, "learning_rate": 0.00014609245468596318, "loss": 5.0569, "step": 34670 }, { "epoch": 0.048728653551286805, "grad_norm": 1.3436133861541748, "learning_rate": 0.0001461346072783476, "loss": 5.0094, "step": 34680 }, { "epoch": 0.04874270448945038, "grad_norm": 1.1321064233779907, "learning_rate": 0.00014617675987073204, "loss": 5.0142, "step": 34690 }, { "epoch": 0.04875675542761396, "grad_norm": 1.104989767074585, "learning_rate": 0.00014621891246311647, "loss": 5.0279, "step": 34700 }, { "epoch": 0.04877080636577753, "grad_norm": 1.1625908613204956, "learning_rate": 0.0001462610650555009, "loss": 4.9406, "step": 34710 }, { "epoch": 0.04878485730394111, "grad_norm": 1.1442335844039917, "learning_rate": 0.00014630321764788534, "loss": 4.9705, "step": 34720 }, { "epoch": 0.04879890824210469, "grad_norm": 1.1111855506896973, "learning_rate": 0.00014634537024026977, "loss": 5.003, "step": 34730 }, { "epoch": 0.04881295918026827, "grad_norm": 1.1129965782165527, "learning_rate": 0.0001463875228326542, "loss": 4.8758, "step": 34740 }, { "epoch": 0.048827010118431845, "grad_norm": 1.1186314821243286, "learning_rate": 0.00014642967542503863, "loss": 5.0139, "step": 34750 }, { "epoch": 0.04884106105659542, "grad_norm": 1.150681972503662, "learning_rate": 0.00014647182801742306, "loss": 4.9994, "step": 34760 }, { "epoch": 0.048855111994759, "grad_norm": 1.1509026288986206, "learning_rate": 0.0001465139806098075, "loss": 5.0786, "step": 34770 }, { "epoch": 0.04886916293292258, "grad_norm": 1.1005713939666748, "learning_rate": 0.00014655613320219192, "loss": 4.9926, "step": 34780 }, { "epoch": 0.048883213871086156, "grad_norm": 1.176209807395935, "learning_rate": 0.00014659828579457635, "loss": 5.0281, "step": 34790 }, { "epoch": 0.04889726480924973, "grad_norm": 1.1285464763641357, "learning_rate": 0.00014664043838696078, "loss": 4.8861, "step": 34800 }, { "epoch": 0.04891131574741331, "grad_norm": 1.1435015201568604, "learning_rate": 0.00014668259097934522, "loss": 4.9688, "step": 34810 }, { "epoch": 0.048925366685576885, "grad_norm": 1.1347907781600952, "learning_rate": 0.00014672474357172965, "loss": 5.0083, "step": 34820 }, { "epoch": 0.04893941762374047, "grad_norm": 1.1737812757492065, "learning_rate": 0.00014676689616411408, "loss": 5.0881, "step": 34830 }, { "epoch": 0.048953468561904044, "grad_norm": 1.0986801385879517, "learning_rate": 0.0001468090487564985, "loss": 5.0823, "step": 34840 }, { "epoch": 0.04896751950006762, "grad_norm": 1.218393325805664, "learning_rate": 0.00014685120134888294, "loss": 5.0719, "step": 34850 }, { "epoch": 0.048981570438231196, "grad_norm": 1.0812214612960815, "learning_rate": 0.00014689335394126737, "loss": 5.0184, "step": 34860 }, { "epoch": 0.04899562137639477, "grad_norm": 1.2031632661819458, "learning_rate": 0.0001469355065336518, "loss": 5.0449, "step": 34870 }, { "epoch": 0.049009672314558356, "grad_norm": 1.109456181526184, "learning_rate": 0.00014697765912603623, "loss": 5.1284, "step": 34880 }, { "epoch": 0.04902372325272193, "grad_norm": 1.1223009824752808, "learning_rate": 0.00014701981171842067, "loss": 4.9705, "step": 34890 }, { "epoch": 0.04903777419088551, "grad_norm": 1.125307559967041, "learning_rate": 0.0001470619643108051, "loss": 4.9975, "step": 34900 }, { "epoch": 0.049051825129049084, "grad_norm": 1.1051353216171265, "learning_rate": 0.00014710411690318953, "loss": 4.9682, "step": 34910 }, { "epoch": 0.04906587606721266, "grad_norm": 1.0811420679092407, "learning_rate": 0.00014714626949557396, "loss": 5.0424, "step": 34920 }, { "epoch": 0.049079927005376243, "grad_norm": 1.1198779344558716, "learning_rate": 0.0001471884220879584, "loss": 5.0246, "step": 34930 }, { "epoch": 0.04909397794353982, "grad_norm": 1.1411025524139404, "learning_rate": 0.00014723057468034282, "loss": 5.0529, "step": 34940 }, { "epoch": 0.049108028881703396, "grad_norm": 1.1323539018630981, "learning_rate": 0.00014727272727272725, "loss": 4.9568, "step": 34950 }, { "epoch": 0.04912207981986697, "grad_norm": 1.1151742935180664, "learning_rate": 0.00014731487986511168, "loss": 4.9563, "step": 34960 }, { "epoch": 0.04913613075803055, "grad_norm": 1.1359392404556274, "learning_rate": 0.00014735703245749611, "loss": 5.112, "step": 34970 }, { "epoch": 0.049150181696194124, "grad_norm": 1.0861750841140747, "learning_rate": 0.00014739918504988055, "loss": 4.9891, "step": 34980 }, { "epoch": 0.04916423263435771, "grad_norm": 1.2048242092132568, "learning_rate": 0.00014744133764226498, "loss": 5.0255, "step": 34990 }, { "epoch": 0.049178283572521284, "grad_norm": 1.1281739473342896, "learning_rate": 0.00014748349023464944, "loss": 5.1152, "step": 35000 }, { "epoch": 0.04919233451068486, "grad_norm": 1.1170549392700195, "learning_rate": 0.00014752564282703384, "loss": 4.8994, "step": 35010 }, { "epoch": 0.049206385448848436, "grad_norm": 1.1060420274734497, "learning_rate": 0.00014756779541941827, "loss": 5.1206, "step": 35020 }, { "epoch": 0.04922043638701201, "grad_norm": 1.105675458908081, "learning_rate": 0.00014760994801180273, "loss": 4.9421, "step": 35030 }, { "epoch": 0.049234487325175595, "grad_norm": 1.1171681880950928, "learning_rate": 0.00014765210060418713, "loss": 4.9546, "step": 35040 }, { "epoch": 0.04924853826333917, "grad_norm": 1.1521064043045044, "learning_rate": 0.00014769425319657156, "loss": 4.8988, "step": 35050 }, { "epoch": 0.04926258920150275, "grad_norm": 1.110176920890808, "learning_rate": 0.00014773640578895602, "loss": 4.9492, "step": 35060 }, { "epoch": 0.049276640139666324, "grad_norm": 1.0732078552246094, "learning_rate": 0.00014777855838134043, "loss": 5.0328, "step": 35070 }, { "epoch": 0.0492906910778299, "grad_norm": 1.1150749921798706, "learning_rate": 0.00014782071097372486, "loss": 5.1412, "step": 35080 }, { "epoch": 0.04930474201599348, "grad_norm": 1.1571887731552124, "learning_rate": 0.00014786286356610932, "loss": 4.9802, "step": 35090 }, { "epoch": 0.04931879295415706, "grad_norm": 1.0949782133102417, "learning_rate": 0.00014790501615849372, "loss": 4.9798, "step": 35100 }, { "epoch": 0.049332843892320635, "grad_norm": 1.1114603281021118, "learning_rate": 0.00014794716875087815, "loss": 4.9468, "step": 35110 }, { "epoch": 0.04934689483048421, "grad_norm": 1.0757004022598267, "learning_rate": 0.0001479893213432626, "loss": 4.8796, "step": 35120 }, { "epoch": 0.04936094576864779, "grad_norm": 1.1924103498458862, "learning_rate": 0.000148031473935647, "loss": 4.932, "step": 35130 }, { "epoch": 0.04937499670681137, "grad_norm": 1.1277077198028564, "learning_rate": 0.00014807362652803147, "loss": 5.0101, "step": 35140 }, { "epoch": 0.04938904764497495, "grad_norm": 1.1275465488433838, "learning_rate": 0.0001481157791204159, "loss": 4.9974, "step": 35150 }, { "epoch": 0.04940309858313852, "grad_norm": 1.1476144790649414, "learning_rate": 0.00014815793171280033, "loss": 5.0018, "step": 35160 }, { "epoch": 0.0494171495213021, "grad_norm": 1.1097756624221802, "learning_rate": 0.00014820008430518477, "loss": 5.0057, "step": 35170 }, { "epoch": 0.049431200459465675, "grad_norm": 1.1443592309951782, "learning_rate": 0.0001482422368975692, "loss": 4.9678, "step": 35180 }, { "epoch": 0.04944525139762926, "grad_norm": 1.1339356899261475, "learning_rate": 0.00014828438948995363, "loss": 4.9923, "step": 35190 }, { "epoch": 0.049459302335792835, "grad_norm": 1.0916308164596558, "learning_rate": 0.00014832654208233806, "loss": 5.0241, "step": 35200 }, { "epoch": 0.04947335327395641, "grad_norm": 1.1227270364761353, "learning_rate": 0.0001483686946747225, "loss": 4.9788, "step": 35210 }, { "epoch": 0.04948740421211999, "grad_norm": 1.126926064491272, "learning_rate": 0.00014841084726710692, "loss": 4.8996, "step": 35220 }, { "epoch": 0.04950145515028356, "grad_norm": 1.1398160457611084, "learning_rate": 0.00014845299985949135, "loss": 4.9572, "step": 35230 }, { "epoch": 0.049515506088447146, "grad_norm": 1.1179039478302002, "learning_rate": 0.00014849515245187578, "loss": 4.9517, "step": 35240 }, { "epoch": 0.04952955702661072, "grad_norm": 1.1064491271972656, "learning_rate": 0.00014853730504426021, "loss": 4.8927, "step": 35250 }, { "epoch": 0.0495436079647743, "grad_norm": 1.128804087638855, "learning_rate": 0.00014857945763664465, "loss": 5.0498, "step": 35260 }, { "epoch": 0.049557658902937875, "grad_norm": 1.1051520109176636, "learning_rate": 0.00014862161022902908, "loss": 5.0604, "step": 35270 }, { "epoch": 0.04957170984110145, "grad_norm": 1.1551424264907837, "learning_rate": 0.0001486637628214135, "loss": 4.917, "step": 35280 }, { "epoch": 0.049585760779265034, "grad_norm": 1.081084132194519, "learning_rate": 0.00014870591541379794, "loss": 5.2189, "step": 35290 }, { "epoch": 0.04959981171742861, "grad_norm": 1.1549731492996216, "learning_rate": 0.00014874806800618237, "loss": 4.9709, "step": 35300 }, { "epoch": 0.049613862655592186, "grad_norm": 1.1668425798416138, "learning_rate": 0.0001487902205985668, "loss": 5.0498, "step": 35310 }, { "epoch": 0.04962791359375576, "grad_norm": 1.143610954284668, "learning_rate": 0.00014883237319095123, "loss": 5.0319, "step": 35320 }, { "epoch": 0.04964196453191934, "grad_norm": 1.1000237464904785, "learning_rate": 0.00014887452578333566, "loss": 5.0029, "step": 35330 }, { "epoch": 0.049656015470082915, "grad_norm": 1.1435863971710205, "learning_rate": 0.0001489166783757201, "loss": 4.8895, "step": 35340 }, { "epoch": 0.0496700664082465, "grad_norm": 1.1478296518325806, "learning_rate": 0.00014895883096810453, "loss": 5.0575, "step": 35350 }, { "epoch": 0.049684117346410074, "grad_norm": 1.0952467918395996, "learning_rate": 0.00014900098356048896, "loss": 5.0144, "step": 35360 }, { "epoch": 0.04969816828457365, "grad_norm": 1.0802350044250488, "learning_rate": 0.0001490431361528734, "loss": 5.0557, "step": 35370 }, { "epoch": 0.049712219222737226, "grad_norm": 1.1121940612792969, "learning_rate": 0.00014908528874525782, "loss": 5.0688, "step": 35380 }, { "epoch": 0.0497262701609008, "grad_norm": 1.1207494735717773, "learning_rate": 0.00014912744133764225, "loss": 4.9295, "step": 35390 }, { "epoch": 0.049740321099064386, "grad_norm": 1.1117680072784424, "learning_rate": 0.00014916959393002668, "loss": 5.0406, "step": 35400 }, { "epoch": 0.04975437203722796, "grad_norm": 1.2448337078094482, "learning_rate": 0.0001492117465224111, "loss": 4.9559, "step": 35410 }, { "epoch": 0.04976842297539154, "grad_norm": 1.0946520566940308, "learning_rate": 0.00014925389911479554, "loss": 4.9064, "step": 35420 }, { "epoch": 0.049782473913555114, "grad_norm": 1.0836005210876465, "learning_rate": 0.00014929605170717998, "loss": 5.0171, "step": 35430 }, { "epoch": 0.04979652485171869, "grad_norm": 1.1312141418457031, "learning_rate": 0.0001493382042995644, "loss": 5.103, "step": 35440 }, { "epoch": 0.04981057578988227, "grad_norm": 1.2918745279312134, "learning_rate": 0.00014938035689194884, "loss": 5.056, "step": 35450 }, { "epoch": 0.04982462672804585, "grad_norm": 1.1127320528030396, "learning_rate": 0.00014942250948433327, "loss": 4.988, "step": 35460 }, { "epoch": 0.049838677666209426, "grad_norm": 1.1448490619659424, "learning_rate": 0.0001494646620767177, "loss": 5.141, "step": 35470 }, { "epoch": 0.049852728604373, "grad_norm": 1.1456868648529053, "learning_rate": 0.00014950681466910213, "loss": 5.0021, "step": 35480 }, { "epoch": 0.04986677954253658, "grad_norm": 1.1046836376190186, "learning_rate": 0.00014954896726148656, "loss": 5.0716, "step": 35490 }, { "epoch": 0.04988083048070016, "grad_norm": 1.1139931678771973, "learning_rate": 0.000149591119853871, "loss": 5.1356, "step": 35500 }, { "epoch": 0.04989488141886374, "grad_norm": 1.103615164756775, "learning_rate": 0.00014963327244625542, "loss": 4.9488, "step": 35510 }, { "epoch": 0.049908932357027314, "grad_norm": 1.1086006164550781, "learning_rate": 0.00014967542503863986, "loss": 5.1068, "step": 35520 }, { "epoch": 0.04992298329519089, "grad_norm": 1.2253687381744385, "learning_rate": 0.0001497175776310243, "loss": 4.9127, "step": 35530 }, { "epoch": 0.049937034233354466, "grad_norm": 1.0812503099441528, "learning_rate": 0.00014975973022340872, "loss": 5.0393, "step": 35540 }, { "epoch": 0.04995108517151805, "grad_norm": 1.108386516571045, "learning_rate": 0.00014980188281579315, "loss": 4.9698, "step": 35550 }, { "epoch": 0.049965136109681625, "grad_norm": 1.108074426651001, "learning_rate": 0.00014984403540817758, "loss": 5.0325, "step": 35560 }, { "epoch": 0.0499791870478452, "grad_norm": 1.1028242111206055, "learning_rate": 0.000149886188000562, "loss": 5.0655, "step": 35570 }, { "epoch": 0.04999323798600878, "grad_norm": 1.1405538320541382, "learning_rate": 0.00014992834059294647, "loss": 5.0859, "step": 35580 }, { "epoch": 0.050007288924172354, "grad_norm": 1.1215912103652954, "learning_rate": 0.00014997049318533087, "loss": 4.9616, "step": 35590 }, { "epoch": 0.05002133986233594, "grad_norm": 1.2131357192993164, "learning_rate": 0.0001500126457777153, "loss": 4.9392, "step": 35600 }, { "epoch": 0.05003539080049951, "grad_norm": 1.1143559217453003, "learning_rate": 0.00015005479837009974, "loss": 4.9206, "step": 35610 }, { "epoch": 0.05004944173866309, "grad_norm": 1.0911166667938232, "learning_rate": 0.00015009695096248417, "loss": 5.0549, "step": 35620 }, { "epoch": 0.050063492676826665, "grad_norm": 1.0939656496047974, "learning_rate": 0.0001501391035548686, "loss": 5.0054, "step": 35630 }, { "epoch": 0.05007754361499024, "grad_norm": 1.123145341873169, "learning_rate": 0.00015018125614725306, "loss": 4.9684, "step": 35640 }, { "epoch": 0.05009159455315382, "grad_norm": 1.072049856185913, "learning_rate": 0.0001502234087396375, "loss": 4.9857, "step": 35650 }, { "epoch": 0.0501056454913174, "grad_norm": 1.1123992204666138, "learning_rate": 0.00015026556133202192, "loss": 5.0464, "step": 35660 }, { "epoch": 0.05011969642948098, "grad_norm": 1.2838733196258545, "learning_rate": 0.00015030771392440632, "loss": 5.0244, "step": 35670 }, { "epoch": 0.05013374736764455, "grad_norm": 1.0853863954544067, "learning_rate": 0.00015034986651679075, "loss": 5.0946, "step": 35680 }, { "epoch": 0.05014779830580813, "grad_norm": 1.0802594423294067, "learning_rate": 0.00015039201910917519, "loss": 4.9547, "step": 35690 }, { "epoch": 0.050161849243971705, "grad_norm": 1.1661789417266846, "learning_rate": 0.00015043417170155964, "loss": 5.0453, "step": 35700 }, { "epoch": 0.05017590018213529, "grad_norm": 1.0816981792449951, "learning_rate": 0.00015047632429394408, "loss": 5.1652, "step": 35710 }, { "epoch": 0.050189951120298865, "grad_norm": 1.1324831247329712, "learning_rate": 0.0001505184768863285, "loss": 4.9944, "step": 35720 }, { "epoch": 0.05020400205846244, "grad_norm": 1.1340142488479614, "learning_rate": 0.0001505606294787129, "loss": 5.0676, "step": 35730 }, { "epoch": 0.05021805299662602, "grad_norm": 1.0971640348434448, "learning_rate": 0.00015060278207109734, "loss": 4.9213, "step": 35740 }, { "epoch": 0.05023210393478959, "grad_norm": 1.1202173233032227, "learning_rate": 0.0001506449346634818, "loss": 5.0313, "step": 35750 }, { "epoch": 0.050246154872953176, "grad_norm": 1.1177239418029785, "learning_rate": 0.00015068708725586623, "loss": 5.0798, "step": 35760 }, { "epoch": 0.05026020581111675, "grad_norm": 1.1042288541793823, "learning_rate": 0.00015072923984825066, "loss": 5.1434, "step": 35770 }, { "epoch": 0.05027425674928033, "grad_norm": 1.0804433822631836, "learning_rate": 0.0001507713924406351, "loss": 5.0876, "step": 35780 }, { "epoch": 0.050288307687443905, "grad_norm": 1.0773561000823975, "learning_rate": 0.00015081354503301952, "loss": 5.0045, "step": 35790 }, { "epoch": 0.05030235862560748, "grad_norm": 1.1056697368621826, "learning_rate": 0.00015085569762540393, "loss": 5.1274, "step": 35800 }, { "epoch": 0.050316409563771064, "grad_norm": 1.1184196472167969, "learning_rate": 0.0001508978502177884, "loss": 4.973, "step": 35810 }, { "epoch": 0.05033046050193464, "grad_norm": 1.1502302885055542, "learning_rate": 0.00015094000281017282, "loss": 4.8589, "step": 35820 }, { "epoch": 0.050344511440098216, "grad_norm": 1.110656499862671, "learning_rate": 0.00015098215540255725, "loss": 5.1146, "step": 35830 }, { "epoch": 0.05035856237826179, "grad_norm": 1.0898936986923218, "learning_rate": 0.00015102430799494168, "loss": 5.0243, "step": 35840 }, { "epoch": 0.05037261331642537, "grad_norm": 1.1786738634109497, "learning_rate": 0.0001510664605873261, "loss": 5.0675, "step": 35850 }, { "epoch": 0.05038666425458895, "grad_norm": 1.1352401971817017, "learning_rate": 0.00015110861317971052, "loss": 5.0869, "step": 35860 }, { "epoch": 0.05040071519275253, "grad_norm": 1.2441682815551758, "learning_rate": 0.00015115076577209497, "loss": 4.9753, "step": 35870 }, { "epoch": 0.050414766130916104, "grad_norm": 1.062424898147583, "learning_rate": 0.0001511929183644794, "loss": 5.1051, "step": 35880 }, { "epoch": 0.05042881706907968, "grad_norm": 1.0863795280456543, "learning_rate": 0.00015123507095686384, "loss": 5.0136, "step": 35890 }, { "epoch": 0.050442868007243256, "grad_norm": 1.1157557964324951, "learning_rate": 0.00015127722354924827, "loss": 5.0345, "step": 35900 }, { "epoch": 0.05045691894540684, "grad_norm": 1.0928047895431519, "learning_rate": 0.0001513193761416327, "loss": 5.0341, "step": 35910 }, { "epoch": 0.050470969883570416, "grad_norm": 1.103150486946106, "learning_rate": 0.0001513615287340171, "loss": 5.0651, "step": 35920 }, { "epoch": 0.05048502082173399, "grad_norm": 1.1346688270568848, "learning_rate": 0.00015140368132640156, "loss": 4.9009, "step": 35930 }, { "epoch": 0.05049907175989757, "grad_norm": 1.1874549388885498, "learning_rate": 0.000151445833918786, "loss": 4.9587, "step": 35940 }, { "epoch": 0.050513122698061144, "grad_norm": 1.10201895236969, "learning_rate": 0.00015148798651117042, "loss": 5.0994, "step": 35950 }, { "epoch": 0.05052717363622472, "grad_norm": 1.1142152547836304, "learning_rate": 0.00015153013910355485, "loss": 4.9086, "step": 35960 }, { "epoch": 0.0505412245743883, "grad_norm": 1.1207566261291504, "learning_rate": 0.00015157229169593929, "loss": 4.9371, "step": 35970 }, { "epoch": 0.05055527551255188, "grad_norm": 1.146672248840332, "learning_rate": 0.00015161444428832374, "loss": 4.9947, "step": 35980 }, { "epoch": 0.050569326450715456, "grad_norm": 1.1421736478805542, "learning_rate": 0.00015165659688070815, "loss": 5.1101, "step": 35990 }, { "epoch": 0.05058337738887903, "grad_norm": 1.0799651145935059, "learning_rate": 0.00015169874947309258, "loss": 4.856, "step": 36000 }, { "epoch": 0.05059742832704261, "grad_norm": 1.1083508729934692, "learning_rate": 0.000151740902065477, "loss": 4.9631, "step": 36010 }, { "epoch": 0.05061147926520619, "grad_norm": 1.1054953336715698, "learning_rate": 0.00015178305465786144, "loss": 5.0091, "step": 36020 }, { "epoch": 0.05062553020336977, "grad_norm": 1.219901204109192, "learning_rate": 0.00015182520725024587, "loss": 4.9669, "step": 36030 }, { "epoch": 0.050639581141533344, "grad_norm": 1.1822065114974976, "learning_rate": 0.00015186735984263033, "loss": 4.9951, "step": 36040 }, { "epoch": 0.05065363207969692, "grad_norm": 1.0649558305740356, "learning_rate": 0.00015190951243501473, "loss": 4.9975, "step": 36050 }, { "epoch": 0.050667683017860496, "grad_norm": 1.1625783443450928, "learning_rate": 0.00015195166502739917, "loss": 4.9153, "step": 36060 }, { "epoch": 0.05068173395602408, "grad_norm": 1.0902538299560547, "learning_rate": 0.0001519938176197836, "loss": 4.9411, "step": 36070 }, { "epoch": 0.050695784894187655, "grad_norm": 1.12613046169281, "learning_rate": 0.00015203597021216803, "loss": 4.9862, "step": 36080 }, { "epoch": 0.05070983583235123, "grad_norm": 1.1342322826385498, "learning_rate": 0.00015207812280455246, "loss": 5.0452, "step": 36090 }, { "epoch": 0.05072388677051481, "grad_norm": 1.0751118659973145, "learning_rate": 0.00015212027539693692, "loss": 5.0084, "step": 36100 }, { "epoch": 0.050737937708678384, "grad_norm": 1.1124134063720703, "learning_rate": 0.00015216242798932135, "loss": 4.9795, "step": 36110 }, { "epoch": 0.05075198864684197, "grad_norm": 1.1308832168579102, "learning_rate": 0.00015220458058170575, "loss": 5.0751, "step": 36120 }, { "epoch": 0.05076603958500554, "grad_norm": 1.1083581447601318, "learning_rate": 0.00015224673317409018, "loss": 5.0698, "step": 36130 }, { "epoch": 0.05078009052316912, "grad_norm": 1.140774130821228, "learning_rate": 0.00015228888576647462, "loss": 5.041, "step": 36140 }, { "epoch": 0.050794141461332695, "grad_norm": 1.0934237241744995, "learning_rate": 0.00015233103835885905, "loss": 5.0086, "step": 36150 }, { "epoch": 0.05080819239949627, "grad_norm": 1.0771994590759277, "learning_rate": 0.0001523731909512435, "loss": 4.9932, "step": 36160 }, { "epoch": 0.050822243337659855, "grad_norm": 1.076748251914978, "learning_rate": 0.00015241534354362794, "loss": 4.8132, "step": 36170 }, { "epoch": 0.05083629427582343, "grad_norm": 1.0804756879806519, "learning_rate": 0.00015245749613601234, "loss": 4.9832, "step": 36180 }, { "epoch": 0.05085034521398701, "grad_norm": 1.0959327220916748, "learning_rate": 0.00015249964872839677, "loss": 4.9817, "step": 36190 }, { "epoch": 0.05086439615215058, "grad_norm": 1.185053825378418, "learning_rate": 0.0001525418013207812, "loss": 5.0076, "step": 36200 }, { "epoch": 0.05087844709031416, "grad_norm": 1.1760470867156982, "learning_rate": 0.00015258395391316563, "loss": 4.9851, "step": 36210 }, { "epoch": 0.05089249802847774, "grad_norm": 1.1177805662155151, "learning_rate": 0.0001526261065055501, "loss": 5.0122, "step": 36220 }, { "epoch": 0.05090654896664132, "grad_norm": 1.0969743728637695, "learning_rate": 0.00015266825909793452, "loss": 4.8199, "step": 36230 }, { "epoch": 0.050920599904804895, "grad_norm": 1.171985387802124, "learning_rate": 0.00015271041169031895, "loss": 4.9671, "step": 36240 }, { "epoch": 0.05093465084296847, "grad_norm": 1.0644655227661133, "learning_rate": 0.00015275256428270336, "loss": 5.0531, "step": 36250 }, { "epoch": 0.05094870178113205, "grad_norm": 1.0977424383163452, "learning_rate": 0.0001527947168750878, "loss": 4.9316, "step": 36260 }, { "epoch": 0.05096275271929562, "grad_norm": 1.0816428661346436, "learning_rate": 0.00015283686946747222, "loss": 5.0497, "step": 36270 }, { "epoch": 0.050976803657459206, "grad_norm": 1.1141808032989502, "learning_rate": 0.00015287902205985668, "loss": 4.8977, "step": 36280 }, { "epoch": 0.05099085459562278, "grad_norm": 1.135312557220459, "learning_rate": 0.0001529211746522411, "loss": 5.0077, "step": 36290 }, { "epoch": 0.05100490553378636, "grad_norm": 1.0765386819839478, "learning_rate": 0.00015296332724462554, "loss": 4.9414, "step": 36300 }, { "epoch": 0.051018956471949935, "grad_norm": 1.0505393743515015, "learning_rate": 0.00015300547983700995, "loss": 4.8978, "step": 36310 }, { "epoch": 0.05103300741011351, "grad_norm": 1.127390742301941, "learning_rate": 0.00015304763242939438, "loss": 5.0173, "step": 36320 }, { "epoch": 0.051047058348277094, "grad_norm": 1.1132893562316895, "learning_rate": 0.00015308978502177883, "loss": 5.0465, "step": 36330 }, { "epoch": 0.05106110928644067, "grad_norm": 1.0964865684509277, "learning_rate": 0.00015313193761416327, "loss": 5.009, "step": 36340 }, { "epoch": 0.051075160224604246, "grad_norm": 1.0559228658676147, "learning_rate": 0.0001531740902065477, "loss": 5.136, "step": 36350 }, { "epoch": 0.05108921116276782, "grad_norm": 1.0860519409179688, "learning_rate": 0.00015321624279893213, "loss": 5.0737, "step": 36360 }, { "epoch": 0.0511032621009314, "grad_norm": 1.1100194454193115, "learning_rate": 0.00015325839539131656, "loss": 4.9814, "step": 36370 }, { "epoch": 0.05111731303909498, "grad_norm": 1.0740619897842407, "learning_rate": 0.00015330054798370096, "loss": 4.9415, "step": 36380 }, { "epoch": 0.05113136397725856, "grad_norm": 1.1333972215652466, "learning_rate": 0.00015334270057608542, "loss": 5.037, "step": 36390 }, { "epoch": 0.051145414915422134, "grad_norm": 1.1923198699951172, "learning_rate": 0.00015338485316846985, "loss": 4.9674, "step": 36400 }, { "epoch": 0.05115946585358571, "grad_norm": 1.0857388973236084, "learning_rate": 0.00015342700576085428, "loss": 5.0191, "step": 36410 }, { "epoch": 0.051173516791749286, "grad_norm": 1.1336112022399902, "learning_rate": 0.00015346494309400027, "loss": 4.9396, "step": 36420 }, { "epoch": 0.05118756772991287, "grad_norm": 1.1554714441299438, "learning_rate": 0.0001535070956863847, "loss": 4.9489, "step": 36430 }, { "epoch": 0.051201618668076446, "grad_norm": 1.1246522665023804, "learning_rate": 0.00015354924827876913, "loss": 5.0433, "step": 36440 }, { "epoch": 0.05121566960624002, "grad_norm": 1.1257320642471313, "learning_rate": 0.00015359140087115356, "loss": 4.9265, "step": 36450 }, { "epoch": 0.0512297205444036, "grad_norm": 1.1546956300735474, "learning_rate": 0.00015363355346353802, "loss": 4.9957, "step": 36460 }, { "epoch": 0.051243771482567174, "grad_norm": 1.1014739274978638, "learning_rate": 0.00015367570605592242, "loss": 4.9966, "step": 36470 }, { "epoch": 0.05125782242073076, "grad_norm": 1.1065007448196411, "learning_rate": 0.00015371785864830685, "loss": 4.9691, "step": 36480 }, { "epoch": 0.05127187335889433, "grad_norm": 1.0437864065170288, "learning_rate": 0.00015376001124069128, "loss": 5.0153, "step": 36490 }, { "epoch": 0.05128592429705791, "grad_norm": 1.0865705013275146, "learning_rate": 0.00015380216383307572, "loss": 5.0422, "step": 36500 }, { "epoch": 0.051299975235221486, "grad_norm": 1.0838688611984253, "learning_rate": 0.00015384431642546015, "loss": 4.9375, "step": 36510 }, { "epoch": 0.05131402617338506, "grad_norm": 1.13599693775177, "learning_rate": 0.0001538864690178446, "loss": 4.9728, "step": 36520 }, { "epoch": 0.051328077111548645, "grad_norm": 1.183531403541565, "learning_rate": 0.00015392862161022904, "loss": 5.066, "step": 36530 }, { "epoch": 0.05134212804971222, "grad_norm": 1.1362754106521606, "learning_rate": 0.00015397077420261344, "loss": 5.0401, "step": 36540 }, { "epoch": 0.0513561789878758, "grad_norm": 1.1068400144577026, "learning_rate": 0.00015401292679499787, "loss": 4.9703, "step": 36550 }, { "epoch": 0.051370229926039374, "grad_norm": 1.1438322067260742, "learning_rate": 0.0001540550793873823, "loss": 5.0719, "step": 36560 }, { "epoch": 0.05138428086420295, "grad_norm": 1.0228193998336792, "learning_rate": 0.00015409723197976673, "loss": 4.9806, "step": 36570 }, { "epoch": 0.051398331802366526, "grad_norm": 1.0660607814788818, "learning_rate": 0.0001541393845721512, "loss": 5.1565, "step": 36580 }, { "epoch": 0.05141238274053011, "grad_norm": 1.1452786922454834, "learning_rate": 0.00015418153716453562, "loss": 5.0448, "step": 36590 }, { "epoch": 0.051426433678693685, "grad_norm": 1.1097620725631714, "learning_rate": 0.00015422368975692003, "loss": 5.046, "step": 36600 }, { "epoch": 0.05144048461685726, "grad_norm": 1.2317959070205688, "learning_rate": 0.00015426584234930446, "loss": 4.9258, "step": 36610 }, { "epoch": 0.05145453555502084, "grad_norm": 1.0700747966766357, "learning_rate": 0.0001543079949416889, "loss": 5.0544, "step": 36620 }, { "epoch": 0.051468586493184414, "grad_norm": 1.0716843605041504, "learning_rate": 0.00015435014753407332, "loss": 5.0264, "step": 36630 }, { "epoch": 0.051482637431348, "grad_norm": 1.0859941244125366, "learning_rate": 0.00015439230012645778, "loss": 4.989, "step": 36640 }, { "epoch": 0.05149668836951157, "grad_norm": 1.0916000604629517, "learning_rate": 0.0001544344527188422, "loss": 4.971, "step": 36650 }, { "epoch": 0.05151073930767515, "grad_norm": 1.139979362487793, "learning_rate": 0.00015447660531122664, "loss": 4.9515, "step": 36660 }, { "epoch": 0.051524790245838725, "grad_norm": 1.1469497680664062, "learning_rate": 0.00015451875790361105, "loss": 4.9998, "step": 36670 }, { "epoch": 0.0515388411840023, "grad_norm": 1.1082267761230469, "learning_rate": 0.00015456091049599548, "loss": 4.9369, "step": 36680 }, { "epoch": 0.051552892122165885, "grad_norm": 1.1943933963775635, "learning_rate": 0.0001546030630883799, "loss": 4.9594, "step": 36690 }, { "epoch": 0.05156694306032946, "grad_norm": 1.0887293815612793, "learning_rate": 0.00015464521568076437, "loss": 4.9823, "step": 36700 }, { "epoch": 0.05158099399849304, "grad_norm": 1.1254092454910278, "learning_rate": 0.0001546873682731488, "loss": 4.9227, "step": 36710 }, { "epoch": 0.05159504493665661, "grad_norm": 1.1450250148773193, "learning_rate": 0.00015472952086553323, "loss": 4.9345, "step": 36720 }, { "epoch": 0.05160909587482019, "grad_norm": 1.0992549657821655, "learning_rate": 0.00015477167345791763, "loss": 4.9191, "step": 36730 }, { "epoch": 0.05162314681298377, "grad_norm": 1.0670653581619263, "learning_rate": 0.00015481382605030206, "loss": 4.9883, "step": 36740 }, { "epoch": 0.05163719775114735, "grad_norm": 1.074080467224121, "learning_rate": 0.0001548559786426865, "loss": 4.9899, "step": 36750 }, { "epoch": 0.051651248689310925, "grad_norm": 1.1951225996017456, "learning_rate": 0.00015489813123507095, "loss": 5.037, "step": 36760 }, { "epoch": 0.0516652996274745, "grad_norm": 1.0775877237319946, "learning_rate": 0.00015494028382745538, "loss": 5.0046, "step": 36770 }, { "epoch": 0.05167935056563808, "grad_norm": 1.087863802909851, "learning_rate": 0.00015498243641983982, "loss": 4.9907, "step": 36780 }, { "epoch": 0.05169340150380166, "grad_norm": 1.0674740076065063, "learning_rate": 0.00015502458901222425, "loss": 5.0147, "step": 36790 }, { "epoch": 0.051707452441965236, "grad_norm": 1.069732904434204, "learning_rate": 0.00015506674160460865, "loss": 4.8633, "step": 36800 }, { "epoch": 0.05172150338012881, "grad_norm": 1.0820239782333374, "learning_rate": 0.00015510889419699308, "loss": 5.0354, "step": 36810 }, { "epoch": 0.05173555431829239, "grad_norm": 1.1347521543502808, "learning_rate": 0.00015515104678937754, "loss": 4.8265, "step": 36820 }, { "epoch": 0.051749605256455965, "grad_norm": 1.1913303136825562, "learning_rate": 0.00015519319938176197, "loss": 4.9479, "step": 36830 }, { "epoch": 0.05176365619461955, "grad_norm": 1.1147087812423706, "learning_rate": 0.0001552353519741464, "loss": 4.9907, "step": 36840 }, { "epoch": 0.051777707132783124, "grad_norm": 1.076640009880066, "learning_rate": 0.00015527750456653083, "loss": 4.9654, "step": 36850 }, { "epoch": 0.0517917580709467, "grad_norm": 1.1436219215393066, "learning_rate": 0.00015531965715891524, "loss": 4.9585, "step": 36860 }, { "epoch": 0.051805809009110276, "grad_norm": 1.0725477933883667, "learning_rate": 0.00015536180975129967, "loss": 5.0113, "step": 36870 }, { "epoch": 0.05181985994727385, "grad_norm": 1.087472677230835, "learning_rate": 0.00015540396234368413, "loss": 4.8823, "step": 36880 }, { "epoch": 0.05183391088543743, "grad_norm": 1.0537265539169312, "learning_rate": 0.00015544611493606856, "loss": 4.8053, "step": 36890 }, { "epoch": 0.05184796182360101, "grad_norm": 1.0963505506515503, "learning_rate": 0.000155488267528453, "loss": 5.0603, "step": 36900 }, { "epoch": 0.05186201276176459, "grad_norm": 1.1273267269134521, "learning_rate": 0.00015553042012083742, "loss": 5.0182, "step": 36910 }, { "epoch": 0.051876063699928164, "grad_norm": 1.0548876523971558, "learning_rate": 0.00015557257271322188, "loss": 5.1116, "step": 36920 }, { "epoch": 0.05189011463809174, "grad_norm": 1.1563364267349243, "learning_rate": 0.00015561472530560626, "loss": 4.9696, "step": 36930 }, { "epoch": 0.051904165576255316, "grad_norm": 1.0634068250656128, "learning_rate": 0.00015565687789799071, "loss": 4.9628, "step": 36940 }, { "epoch": 0.0519182165144189, "grad_norm": 1.182931900024414, "learning_rate": 0.00015569903049037515, "loss": 5.021, "step": 36950 }, { "epoch": 0.051932267452582476, "grad_norm": 1.0970754623413086, "learning_rate": 0.00015574118308275958, "loss": 5.0209, "step": 36960 }, { "epoch": 0.05194631839074605, "grad_norm": 1.10125732421875, "learning_rate": 0.000155783335675144, "loss": 4.963, "step": 36970 }, { "epoch": 0.05196036932890963, "grad_norm": 1.1242049932479858, "learning_rate": 0.00015582548826752847, "loss": 5.0308, "step": 36980 }, { "epoch": 0.051974420267073204, "grad_norm": 1.114256739616394, "learning_rate": 0.00015586764085991287, "loss": 5.0974, "step": 36990 }, { "epoch": 0.05198847120523679, "grad_norm": 1.080618143081665, "learning_rate": 0.0001559097934522973, "loss": 4.9911, "step": 37000 }, { "epoch": 0.05200252214340036, "grad_norm": 1.097937822341919, "learning_rate": 0.00015595194604468173, "loss": 4.9756, "step": 37010 }, { "epoch": 0.05201657308156394, "grad_norm": 1.2937126159667969, "learning_rate": 0.00015599409863706616, "loss": 5.0298, "step": 37020 }, { "epoch": 0.052030624019727516, "grad_norm": 1.1030606031417847, "learning_rate": 0.0001560362512294506, "loss": 4.9976, "step": 37030 }, { "epoch": 0.05204467495789109, "grad_norm": 1.1693426370620728, "learning_rate": 0.00015607840382183505, "loss": 4.9024, "step": 37040 }, { "epoch": 0.052058725896054675, "grad_norm": 1.1187068223953247, "learning_rate": 0.00015612055641421946, "loss": 5.0483, "step": 37050 }, { "epoch": 0.05207277683421825, "grad_norm": 1.0532891750335693, "learning_rate": 0.0001561627090066039, "loss": 4.9193, "step": 37060 }, { "epoch": 0.05208682777238183, "grad_norm": 1.1207010746002197, "learning_rate": 0.00015620486159898832, "loss": 4.9848, "step": 37070 }, { "epoch": 0.052100878710545404, "grad_norm": 1.0693873167037964, "learning_rate": 0.00015624701419137275, "loss": 4.9215, "step": 37080 }, { "epoch": 0.05211492964870898, "grad_norm": 1.0835233926773071, "learning_rate": 0.00015628916678375718, "loss": 4.7552, "step": 37090 }, { "epoch": 0.05212898058687256, "grad_norm": 1.124945878982544, "learning_rate": 0.00015633131937614164, "loss": 5.0005, "step": 37100 }, { "epoch": 0.05214303152503614, "grad_norm": 1.0725970268249512, "learning_rate": 0.00015637347196852607, "loss": 4.9894, "step": 37110 }, { "epoch": 0.052157082463199715, "grad_norm": 1.1235997676849365, "learning_rate": 0.00015641562456091048, "loss": 5.0398, "step": 37120 }, { "epoch": 0.05217113340136329, "grad_norm": 1.1638944149017334, "learning_rate": 0.0001564577771532949, "loss": 4.8523, "step": 37130 }, { "epoch": 0.05218518433952687, "grad_norm": 1.1262266635894775, "learning_rate": 0.00015649992974567934, "loss": 4.8287, "step": 37140 }, { "epoch": 0.05219923527769045, "grad_norm": 1.1217573881149292, "learning_rate": 0.00015654208233806377, "loss": 5.0582, "step": 37150 }, { "epoch": 0.05221328621585403, "grad_norm": 1.1533286571502686, "learning_rate": 0.00015658423493044823, "loss": 4.939, "step": 37160 }, { "epoch": 0.0522273371540176, "grad_norm": 1.063332438468933, "learning_rate": 0.00015662638752283266, "loss": 5.0078, "step": 37170 }, { "epoch": 0.05224138809218118, "grad_norm": 1.0666674375534058, "learning_rate": 0.00015666854011521706, "loss": 4.8933, "step": 37180 }, { "epoch": 0.052255439030344755, "grad_norm": 1.1299502849578857, "learning_rate": 0.0001567106927076015, "loss": 4.9698, "step": 37190 }, { "epoch": 0.05226948996850833, "grad_norm": 1.0899701118469238, "learning_rate": 0.00015675284529998592, "loss": 5.1382, "step": 37200 }, { "epoch": 0.052283540906671915, "grad_norm": 1.0890862941741943, "learning_rate": 0.00015679499789237036, "loss": 4.9112, "step": 37210 }, { "epoch": 0.05229759184483549, "grad_norm": 1.1074151992797852, "learning_rate": 0.00015683715048475481, "loss": 4.9958, "step": 37220 }, { "epoch": 0.05231164278299907, "grad_norm": 1.047777771949768, "learning_rate": 0.00015687930307713925, "loss": 4.8429, "step": 37230 }, { "epoch": 0.05232569372116264, "grad_norm": 1.0324103832244873, "learning_rate": 0.00015692145566952368, "loss": 4.9171, "step": 37240 }, { "epoch": 0.05233974465932622, "grad_norm": 1.043428897857666, "learning_rate": 0.00015696360826190808, "loss": 5.0088, "step": 37250 }, { "epoch": 0.0523537955974898, "grad_norm": 1.107131004333496, "learning_rate": 0.0001570057608542925, "loss": 5.0575, "step": 37260 }, { "epoch": 0.05236784653565338, "grad_norm": 1.1451491117477417, "learning_rate": 0.00015704791344667694, "loss": 4.9536, "step": 37270 }, { "epoch": 0.052381897473816955, "grad_norm": 1.1125915050506592, "learning_rate": 0.0001570900660390614, "loss": 4.987, "step": 37280 }, { "epoch": 0.05239594841198053, "grad_norm": 1.1330432891845703, "learning_rate": 0.00015713221863144583, "loss": 4.9512, "step": 37290 }, { "epoch": 0.05240999935014411, "grad_norm": 1.0728533267974854, "learning_rate": 0.00015717437122383026, "loss": 5.0056, "step": 37300 }, { "epoch": 0.05242405028830769, "grad_norm": 1.1220287084579468, "learning_rate": 0.00015721652381621467, "loss": 4.8007, "step": 37310 }, { "epoch": 0.052438101226471266, "grad_norm": 1.086573839187622, "learning_rate": 0.0001572586764085991, "loss": 4.8958, "step": 37320 }, { "epoch": 0.05245215216463484, "grad_norm": 1.034942865371704, "learning_rate": 0.00015730082900098353, "loss": 4.9862, "step": 37330 }, { "epoch": 0.05246620310279842, "grad_norm": 1.2076352834701538, "learning_rate": 0.000157342981593368, "loss": 5.0167, "step": 37340 }, { "epoch": 0.052480254040961995, "grad_norm": 1.0755456686019897, "learning_rate": 0.00015738513418575242, "loss": 4.9192, "step": 37350 }, { "epoch": 0.05249430497912558, "grad_norm": 1.0850402116775513, "learning_rate": 0.00015742728677813685, "loss": 5.0214, "step": 37360 }, { "epoch": 0.052508355917289154, "grad_norm": 1.1084506511688232, "learning_rate": 0.00015746943937052128, "loss": 5.02, "step": 37370 }, { "epoch": 0.05252240685545273, "grad_norm": 1.1227412223815918, "learning_rate": 0.00015751159196290569, "loss": 5.046, "step": 37380 }, { "epoch": 0.052536457793616306, "grad_norm": 1.1785093545913696, "learning_rate": 0.00015755374455529012, "loss": 4.9965, "step": 37390 }, { "epoch": 0.05255050873177988, "grad_norm": 1.1120465993881226, "learning_rate": 0.00015759589714767458, "loss": 4.9608, "step": 37400 }, { "epoch": 0.052564559669943466, "grad_norm": 1.1275166273117065, "learning_rate": 0.000157638049740059, "loss": 4.9256, "step": 37410 }, { "epoch": 0.05257861060810704, "grad_norm": 1.0952259302139282, "learning_rate": 0.00015768020233244344, "loss": 4.9027, "step": 37420 }, { "epoch": 0.05259266154627062, "grad_norm": 1.084206223487854, "learning_rate": 0.00015772235492482787, "loss": 4.9846, "step": 37430 }, { "epoch": 0.052606712484434194, "grad_norm": 1.1174198389053345, "learning_rate": 0.00015776450751721227, "loss": 4.8721, "step": 37440 }, { "epoch": 0.05262076342259777, "grad_norm": 1.077741265296936, "learning_rate": 0.0001578066601095967, "loss": 4.9004, "step": 37450 }, { "epoch": 0.05263481436076135, "grad_norm": 1.1274837255477905, "learning_rate": 0.00015784881270198116, "loss": 4.9527, "step": 37460 }, { "epoch": 0.05264886529892493, "grad_norm": 1.098956823348999, "learning_rate": 0.0001578909652943656, "loss": 4.9924, "step": 37470 }, { "epoch": 0.052662916237088506, "grad_norm": 1.2196323871612549, "learning_rate": 0.00015793311788675002, "loss": 4.9223, "step": 37480 }, { "epoch": 0.05267696717525208, "grad_norm": 1.065298318862915, "learning_rate": 0.00015797527047913446, "loss": 4.9821, "step": 37490 }, { "epoch": 0.05269101811341566, "grad_norm": 1.0735142230987549, "learning_rate": 0.00015801742307151891, "loss": 5.0825, "step": 37500 }, { "epoch": 0.052705069051579234, "grad_norm": 1.075454831123352, "learning_rate": 0.0001580595756639033, "loss": 4.9954, "step": 37510 }, { "epoch": 0.05271911998974282, "grad_norm": 1.0807503461837769, "learning_rate": 0.00015810172825628775, "loss": 4.9428, "step": 37520 }, { "epoch": 0.05273317092790639, "grad_norm": 1.1390740871429443, "learning_rate": 0.00015814388084867218, "loss": 4.9437, "step": 37530 }, { "epoch": 0.05274722186606997, "grad_norm": 1.0902714729309082, "learning_rate": 0.0001581860334410566, "loss": 5.0031, "step": 37540 }, { "epoch": 0.052761272804233546, "grad_norm": 1.0692888498306274, "learning_rate": 0.00015822818603344104, "loss": 4.8607, "step": 37550 }, { "epoch": 0.05277532374239712, "grad_norm": 1.0641309022903442, "learning_rate": 0.0001582703386258255, "loss": 5.0244, "step": 37560 }, { "epoch": 0.052789374680560705, "grad_norm": 1.0614022016525269, "learning_rate": 0.00015831249121820988, "loss": 4.937, "step": 37570 }, { "epoch": 0.05280342561872428, "grad_norm": 1.0764083862304688, "learning_rate": 0.00015835464381059434, "loss": 4.8395, "step": 37580 }, { "epoch": 0.05281747655688786, "grad_norm": 1.091915488243103, "learning_rate": 0.00015839679640297877, "loss": 5.0053, "step": 37590 }, { "epoch": 0.052831527495051434, "grad_norm": 1.3076285123825073, "learning_rate": 0.0001584389489953632, "loss": 4.8942, "step": 37600 }, { "epoch": 0.05284557843321501, "grad_norm": 1.0910054445266724, "learning_rate": 0.00015848110158774763, "loss": 4.9173, "step": 37610 }, { "epoch": 0.05285962937137859, "grad_norm": 1.0928317308425903, "learning_rate": 0.0001585232541801321, "loss": 5.0224, "step": 37620 }, { "epoch": 0.05287368030954217, "grad_norm": 1.0733320713043213, "learning_rate": 0.00015856540677251652, "loss": 4.9854, "step": 37630 }, { "epoch": 0.052887731247705745, "grad_norm": 1.0689090490341187, "learning_rate": 0.00015860755936490092, "loss": 4.9577, "step": 37640 }, { "epoch": 0.05290178218586932, "grad_norm": 1.1283597946166992, "learning_rate": 0.00015864971195728535, "loss": 5.0415, "step": 37650 }, { "epoch": 0.0529158331240329, "grad_norm": 1.1071979999542236, "learning_rate": 0.00015869186454966979, "loss": 5.0478, "step": 37660 }, { "epoch": 0.05292988406219648, "grad_norm": 1.0887236595153809, "learning_rate": 0.00015873401714205422, "loss": 5.0003, "step": 37670 }, { "epoch": 0.05294393500036006, "grad_norm": 1.1010711193084717, "learning_rate": 0.00015877616973443868, "loss": 4.8824, "step": 37680 }, { "epoch": 0.05295798593852363, "grad_norm": 1.0766741037368774, "learning_rate": 0.0001588183223268231, "loss": 5.1363, "step": 37690 }, { "epoch": 0.05297203687668721, "grad_norm": 1.0775905847549438, "learning_rate": 0.0001588604749192075, "loss": 4.9825, "step": 37700 }, { "epoch": 0.052986087814850785, "grad_norm": 1.0823009014129639, "learning_rate": 0.00015890262751159194, "loss": 5.2041, "step": 37710 }, { "epoch": 0.05300013875301437, "grad_norm": 1.0547107458114624, "learning_rate": 0.00015894478010397637, "loss": 5.036, "step": 37720 }, { "epoch": 0.053014189691177944, "grad_norm": 1.0810598134994507, "learning_rate": 0.0001589869326963608, "loss": 4.7697, "step": 37730 }, { "epoch": 0.05302824062934152, "grad_norm": 1.0776020288467407, "learning_rate": 0.00015902908528874526, "loss": 4.95, "step": 37740 }, { "epoch": 0.0530422915675051, "grad_norm": 1.068782925605774, "learning_rate": 0.0001590712378811297, "loss": 4.9632, "step": 37750 }, { "epoch": 0.05305634250566867, "grad_norm": 1.0810242891311646, "learning_rate": 0.0001591133904735141, "loss": 5.0325, "step": 37760 }, { "epoch": 0.053070393443832256, "grad_norm": 1.0585830211639404, "learning_rate": 0.00015915554306589853, "loss": 4.9651, "step": 37770 }, { "epoch": 0.05308444438199583, "grad_norm": 1.0817153453826904, "learning_rate": 0.00015919769565828296, "loss": 5.022, "step": 37780 }, { "epoch": 0.05309849532015941, "grad_norm": 1.1465531587600708, "learning_rate": 0.0001592398482506674, "loss": 4.9242, "step": 37790 }, { "epoch": 0.053112546258322985, "grad_norm": 1.1970518827438354, "learning_rate": 0.00015928200084305185, "loss": 5.0169, "step": 37800 }, { "epoch": 0.05312659719648656, "grad_norm": 1.0447371006011963, "learning_rate": 0.00015932415343543628, "loss": 5.0174, "step": 37810 }, { "epoch": 0.053140648134650144, "grad_norm": 1.117707371711731, "learning_rate": 0.0001593663060278207, "loss": 4.9709, "step": 37820 }, { "epoch": 0.05315469907281372, "grad_norm": 1.101830005645752, "learning_rate": 0.00015940845862020512, "loss": 4.9622, "step": 37830 }, { "epoch": 0.053168750010977296, "grad_norm": 1.0951014757156372, "learning_rate": 0.00015945061121258955, "loss": 5.005, "step": 37840 }, { "epoch": 0.05318280094914087, "grad_norm": 1.0685999393463135, "learning_rate": 0.00015949276380497398, "loss": 4.9262, "step": 37850 }, { "epoch": 0.05319685188730445, "grad_norm": 1.1599674224853516, "learning_rate": 0.00015953491639735844, "loss": 4.9453, "step": 37860 }, { "epoch": 0.053210902825468025, "grad_norm": 1.1290907859802246, "learning_rate": 0.00015957706898974287, "loss": 4.8283, "step": 37870 }, { "epoch": 0.05322495376363161, "grad_norm": 1.1260758638381958, "learning_rate": 0.0001596192215821273, "loss": 4.8132, "step": 37880 }, { "epoch": 0.053239004701795184, "grad_norm": 1.4716039896011353, "learning_rate": 0.0001596613741745117, "loss": 5.0606, "step": 37890 }, { "epoch": 0.05325305563995876, "grad_norm": 1.08821439743042, "learning_rate": 0.00015970352676689613, "loss": 5.085, "step": 37900 }, { "epoch": 0.053267106578122336, "grad_norm": 1.0185142755508423, "learning_rate": 0.00015974567935928056, "loss": 4.9683, "step": 37910 }, { "epoch": 0.05328115751628591, "grad_norm": 1.0454940795898438, "learning_rate": 0.00015978783195166502, "loss": 5.0988, "step": 37920 }, { "epoch": 0.053295208454449496, "grad_norm": 1.085282325744629, "learning_rate": 0.00015982998454404945, "loss": 4.873, "step": 37930 }, { "epoch": 0.05330925939261307, "grad_norm": 1.1121746301651, "learning_rate": 0.00015987213713643389, "loss": 4.97, "step": 37940 }, { "epoch": 0.05332331033077665, "grad_norm": 1.072363257408142, "learning_rate": 0.00015991428972881832, "loss": 4.9087, "step": 37950 }, { "epoch": 0.053337361268940224, "grad_norm": 1.0807424783706665, "learning_rate": 0.00015995644232120272, "loss": 4.9707, "step": 37960 }, { "epoch": 0.0533514122071038, "grad_norm": 1.1155600547790527, "learning_rate": 0.00015999859491358715, "loss": 4.9794, "step": 37970 }, { "epoch": 0.05336546314526738, "grad_norm": 1.1688952445983887, "learning_rate": 0.0001600407475059716, "loss": 4.8757, "step": 37980 }, { "epoch": 0.05337951408343096, "grad_norm": 1.0695239305496216, "learning_rate": 0.00016008290009835604, "loss": 4.9425, "step": 37990 }, { "epoch": 0.053393565021594536, "grad_norm": 1.1101523637771606, "learning_rate": 0.00016012505269074047, "loss": 4.9533, "step": 38000 }, { "epoch": 0.05340761595975811, "grad_norm": 1.0731979608535767, "learning_rate": 0.0001601672052831249, "loss": 4.9356, "step": 38010 }, { "epoch": 0.05342166689792169, "grad_norm": 1.0717893838882446, "learning_rate": 0.0001602093578755093, "loss": 4.9014, "step": 38020 }, { "epoch": 0.05343571783608527, "grad_norm": 1.1218794584274292, "learning_rate": 0.00016025151046789374, "loss": 5.0567, "step": 38030 }, { "epoch": 0.05344976877424885, "grad_norm": 1.0864797830581665, "learning_rate": 0.0001602936630602782, "loss": 4.964, "step": 38040 }, { "epoch": 0.05346381971241242, "grad_norm": 1.1077324151992798, "learning_rate": 0.00016033581565266263, "loss": 5.0164, "step": 38050 }, { "epoch": 0.053477870650576, "grad_norm": 1.0500108003616333, "learning_rate": 0.00016037796824504706, "loss": 4.9392, "step": 38060 }, { "epoch": 0.053491921588739576, "grad_norm": 1.052202820777893, "learning_rate": 0.0001604201208374315, "loss": 5.1295, "step": 38070 }, { "epoch": 0.05350597252690316, "grad_norm": 1.0734517574310303, "learning_rate": 0.00016046227342981592, "loss": 4.9639, "step": 38080 }, { "epoch": 0.053520023465066735, "grad_norm": 1.0562039613723755, "learning_rate": 0.00016050442602220033, "loss": 5.0704, "step": 38090 }, { "epoch": 0.05353407440323031, "grad_norm": 1.0819730758666992, "learning_rate": 0.00016054657861458478, "loss": 4.9824, "step": 38100 }, { "epoch": 0.05354812534139389, "grad_norm": 1.0542058944702148, "learning_rate": 0.00016058873120696922, "loss": 5.0282, "step": 38110 }, { "epoch": 0.053562176279557464, "grad_norm": 1.0968252420425415, "learning_rate": 0.00016063088379935365, "loss": 4.9974, "step": 38120 }, { "epoch": 0.05357622721772105, "grad_norm": 1.1440109014511108, "learning_rate": 0.00016067303639173808, "loss": 5.0201, "step": 38130 }, { "epoch": 0.05359027815588462, "grad_norm": 1.1223822832107544, "learning_rate": 0.00016071518898412254, "loss": 4.885, "step": 38140 }, { "epoch": 0.0536043290940482, "grad_norm": 1.0383020639419556, "learning_rate": 0.0001607573415765069, "loss": 5.0449, "step": 38150 }, { "epoch": 0.053618380032211775, "grad_norm": 1.1103941202163696, "learning_rate": 0.00016079949416889137, "loss": 5.1299, "step": 38160 }, { "epoch": 0.05363243097037535, "grad_norm": 1.1310505867004395, "learning_rate": 0.0001608416467612758, "loss": 4.8367, "step": 38170 }, { "epoch": 0.05364648190853893, "grad_norm": 1.1387687921524048, "learning_rate": 0.00016088379935366023, "loss": 4.9226, "step": 38180 }, { "epoch": 0.05366053284670251, "grad_norm": 1.082069993019104, "learning_rate": 0.00016092595194604466, "loss": 5.0986, "step": 38190 }, { "epoch": 0.05367458378486609, "grad_norm": 1.1030524969100952, "learning_rate": 0.00016096810453842912, "loss": 5.0223, "step": 38200 }, { "epoch": 0.05368863472302966, "grad_norm": 1.0807095766067505, "learning_rate": 0.00016101025713081355, "loss": 4.998, "step": 38210 }, { "epoch": 0.05370268566119324, "grad_norm": 1.0664376020431519, "learning_rate": 0.00016105240972319796, "loss": 5.0521, "step": 38220 }, { "epoch": 0.053716736599356815, "grad_norm": 1.1031697988510132, "learning_rate": 0.0001610945623155824, "loss": 4.9556, "step": 38230 }, { "epoch": 0.0537307875375204, "grad_norm": 1.0721416473388672, "learning_rate": 0.00016113671490796682, "loss": 5.0203, "step": 38240 }, { "epoch": 0.053744838475683974, "grad_norm": 1.0892760753631592, "learning_rate": 0.00016117886750035125, "loss": 4.9218, "step": 38250 }, { "epoch": 0.05375888941384755, "grad_norm": 1.0493428707122803, "learning_rate": 0.0001612210200927357, "loss": 5.0262, "step": 38260 }, { "epoch": 0.05377294035201113, "grad_norm": 1.0725332498550415, "learning_rate": 0.00016126317268512014, "loss": 5.0425, "step": 38270 }, { "epoch": 0.0537869912901747, "grad_norm": 1.0133819580078125, "learning_rate": 0.00016130532527750455, "loss": 5.1757, "step": 38280 }, { "epoch": 0.053801042228338286, "grad_norm": 1.0614526271820068, "learning_rate": 0.00016134747786988898, "loss": 4.8729, "step": 38290 }, { "epoch": 0.05381509316650186, "grad_norm": 1.0696353912353516, "learning_rate": 0.0001613896304622734, "loss": 4.9481, "step": 38300 }, { "epoch": 0.05382914410466544, "grad_norm": 1.1202436685562134, "learning_rate": 0.00016143178305465784, "loss": 4.9642, "step": 38310 }, { "epoch": 0.053843195042829015, "grad_norm": 1.1082916259765625, "learning_rate": 0.0001614739356470423, "loss": 4.9846, "step": 38320 }, { "epoch": 0.05385724598099259, "grad_norm": 1.0403755903244019, "learning_rate": 0.00016151608823942673, "loss": 5.0774, "step": 38330 }, { "epoch": 0.053871296919156174, "grad_norm": 1.1000362634658813, "learning_rate": 0.00016155824083181116, "loss": 4.9838, "step": 38340 }, { "epoch": 0.05388534785731975, "grad_norm": 1.0491472482681274, "learning_rate": 0.00016160039342419556, "loss": 4.971, "step": 38350 }, { "epoch": 0.053899398795483326, "grad_norm": 1.093478798866272, "learning_rate": 0.00016164254601658, "loss": 4.9692, "step": 38360 }, { "epoch": 0.0539134497336469, "grad_norm": 1.0910667181015015, "learning_rate": 0.00016168469860896443, "loss": 4.9924, "step": 38370 }, { "epoch": 0.05392750067181048, "grad_norm": 1.0395450592041016, "learning_rate": 0.00016172685120134888, "loss": 5.0483, "step": 38380 }, { "epoch": 0.05394155160997406, "grad_norm": 1.1088807582855225, "learning_rate": 0.00016176900379373331, "loss": 5.0126, "step": 38390 }, { "epoch": 0.05395560254813764, "grad_norm": 1.096949815750122, "learning_rate": 0.00016181115638611775, "loss": 4.8924, "step": 38400 }, { "epoch": 0.053969653486301214, "grad_norm": 1.0980435609817505, "learning_rate": 0.00016185330897850215, "loss": 4.9031, "step": 38410 }, { "epoch": 0.05398370442446479, "grad_norm": 1.1065305471420288, "learning_rate": 0.00016189546157088658, "loss": 4.9263, "step": 38420 }, { "epoch": 0.053997755362628366, "grad_norm": 1.0549814701080322, "learning_rate": 0.000161937614163271, "loss": 4.9421, "step": 38430 }, { "epoch": 0.05401180630079195, "grad_norm": 1.0844722986221313, "learning_rate": 0.00016197976675565547, "loss": 5.0158, "step": 38440 }, { "epoch": 0.054025857238955526, "grad_norm": 1.0721558332443237, "learning_rate": 0.0001620219193480399, "loss": 4.9792, "step": 38450 }, { "epoch": 0.0540399081771191, "grad_norm": 1.0752381086349487, "learning_rate": 0.00016206407194042433, "loss": 4.834, "step": 38460 }, { "epoch": 0.05405395911528268, "grad_norm": 1.0965216159820557, "learning_rate": 0.00016210622453280874, "loss": 4.9676, "step": 38470 }, { "epoch": 0.054068010053446254, "grad_norm": 1.094175934791565, "learning_rate": 0.00016214837712519317, "loss": 4.9671, "step": 38480 }, { "epoch": 0.05408206099160983, "grad_norm": 1.0462054014205933, "learning_rate": 0.0001621905297175776, "loss": 5.075, "step": 38490 }, { "epoch": 0.05409611192977341, "grad_norm": 1.0655064582824707, "learning_rate": 0.00016223268230996206, "loss": 4.9576, "step": 38500 }, { "epoch": 0.05411016286793699, "grad_norm": 1.075216293334961, "learning_rate": 0.0001622748349023465, "loss": 5.0349, "step": 38510 }, { "epoch": 0.054124213806100566, "grad_norm": 1.0411499738693237, "learning_rate": 0.00016231698749473092, "loss": 5.0538, "step": 38520 }, { "epoch": 0.05413826474426414, "grad_norm": 1.0397356748580933, "learning_rate": 0.00016235914008711535, "loss": 5.0188, "step": 38530 }, { "epoch": 0.05415231568242772, "grad_norm": 1.0994830131530762, "learning_rate": 0.00016240129267949976, "loss": 4.9885, "step": 38540 }, { "epoch": 0.0541663666205913, "grad_norm": 1.059415578842163, "learning_rate": 0.0001624434452718842, "loss": 4.868, "step": 38550 }, { "epoch": 0.05418041755875488, "grad_norm": 1.1089651584625244, "learning_rate": 0.00016248559786426864, "loss": 4.8518, "step": 38560 }, { "epoch": 0.05419446849691845, "grad_norm": 1.064829707145691, "learning_rate": 0.00016252775045665308, "loss": 4.9925, "step": 38570 }, { "epoch": 0.05420851943508203, "grad_norm": 1.0721412897109985, "learning_rate": 0.0001625699030490375, "loss": 4.9596, "step": 38580 }, { "epoch": 0.054222570373245606, "grad_norm": 1.1417285203933716, "learning_rate": 0.00016261205564142194, "loss": 4.8477, "step": 38590 }, { "epoch": 0.05423662131140919, "grad_norm": 1.2103323936462402, "learning_rate": 0.00016265420823380634, "loss": 5.13, "step": 38600 }, { "epoch": 0.054250672249572765, "grad_norm": 1.1253100633621216, "learning_rate": 0.00016269636082619077, "loss": 4.9663, "step": 38610 }, { "epoch": 0.05426472318773634, "grad_norm": 1.0597470998764038, "learning_rate": 0.00016273851341857523, "loss": 4.9221, "step": 38620 }, { "epoch": 0.05427877412589992, "grad_norm": 1.0937083959579468, "learning_rate": 0.00016278066601095966, "loss": 4.8933, "step": 38630 }, { "epoch": 0.054292825064063494, "grad_norm": 1.0493961572647095, "learning_rate": 0.0001628228186033441, "loss": 5.0548, "step": 38640 }, { "epoch": 0.05430687600222708, "grad_norm": 1.0712189674377441, "learning_rate": 0.00016286497119572853, "loss": 4.8846, "step": 38650 }, { "epoch": 0.05432092694039065, "grad_norm": 1.1186167001724243, "learning_rate": 0.00016290712378811296, "loss": 5.0001, "step": 38660 }, { "epoch": 0.05433497787855423, "grad_norm": 1.1650162935256958, "learning_rate": 0.00016294927638049736, "loss": 4.8981, "step": 38670 }, { "epoch": 0.054349028816717805, "grad_norm": 1.0327341556549072, "learning_rate": 0.00016299142897288182, "loss": 5.0457, "step": 38680 }, { "epoch": 0.05436307975488138, "grad_norm": 1.051758050918579, "learning_rate": 0.00016303358156526625, "loss": 4.9659, "step": 38690 }, { "epoch": 0.054377130693044964, "grad_norm": 1.1347402334213257, "learning_rate": 0.00016307573415765068, "loss": 4.9157, "step": 38700 }, { "epoch": 0.05439118163120854, "grad_norm": 1.0630327463150024, "learning_rate": 0.0001631178867500351, "loss": 5.0936, "step": 38710 }, { "epoch": 0.05440523256937212, "grad_norm": 1.104245662689209, "learning_rate": 0.00016316003934241957, "loss": 5.0357, "step": 38720 }, { "epoch": 0.05441928350753569, "grad_norm": 1.0906322002410889, "learning_rate": 0.00016320219193480395, "loss": 4.9834, "step": 38730 }, { "epoch": 0.05443333444569927, "grad_norm": 1.0565413236618042, "learning_rate": 0.0001632443445271884, "loss": 4.9378, "step": 38740 }, { "epoch": 0.05444738538386285, "grad_norm": 1.0998698472976685, "learning_rate": 0.00016328649711957284, "loss": 5.0077, "step": 38750 }, { "epoch": 0.05446143632202643, "grad_norm": 1.1021455526351929, "learning_rate": 0.00016332864971195727, "loss": 4.9545, "step": 38760 }, { "epoch": 0.054475487260190004, "grad_norm": 1.0735493898391724, "learning_rate": 0.0001633708023043417, "loss": 4.9852, "step": 38770 }, { "epoch": 0.05448953819835358, "grad_norm": 1.0596339702606201, "learning_rate": 0.00016341295489672616, "loss": 4.919, "step": 38780 }, { "epoch": 0.05450358913651716, "grad_norm": 1.1908012628555298, "learning_rate": 0.0001634551074891106, "loss": 4.8531, "step": 38790 }, { "epoch": 0.05451764007468073, "grad_norm": 1.1102972030639648, "learning_rate": 0.000163497260081495, "loss": 4.8526, "step": 38800 }, { "epoch": 0.054531691012844316, "grad_norm": 1.1045284271240234, "learning_rate": 0.00016353941267387942, "loss": 4.9537, "step": 38810 }, { "epoch": 0.05454574195100789, "grad_norm": 1.0729451179504395, "learning_rate": 0.00016358156526626386, "loss": 4.9667, "step": 38820 }, { "epoch": 0.05455979288917147, "grad_norm": 1.0459548234939575, "learning_rate": 0.00016362371785864829, "loss": 4.913, "step": 38830 }, { "epoch": 0.054573843827335045, "grad_norm": 1.0615431070327759, "learning_rate": 0.00016366587045103274, "loss": 4.9522, "step": 38840 }, { "epoch": 0.05458789476549862, "grad_norm": 1.1227502822875977, "learning_rate": 0.00016370802304341718, "loss": 4.9627, "step": 38850 }, { "epoch": 0.054601945703662204, "grad_norm": 1.0452730655670166, "learning_rate": 0.00016375017563580158, "loss": 5.0616, "step": 38860 }, { "epoch": 0.05461599664182578, "grad_norm": 0.998740017414093, "learning_rate": 0.000163792328228186, "loss": 4.9609, "step": 38870 }, { "epoch": 0.054630047579989356, "grad_norm": 1.1312004327774048, "learning_rate": 0.00016383448082057044, "loss": 4.9495, "step": 38880 }, { "epoch": 0.05464409851815293, "grad_norm": 1.0797289609909058, "learning_rate": 0.00016387663341295487, "loss": 4.946, "step": 38890 }, { "epoch": 0.05465814945631651, "grad_norm": 1.0702334642410278, "learning_rate": 0.00016391878600533933, "loss": 5.0044, "step": 38900 }, { "epoch": 0.05467220039448009, "grad_norm": 1.0228101015090942, "learning_rate": 0.00016396093859772376, "loss": 4.8987, "step": 38910 }, { "epoch": 0.05468625133264367, "grad_norm": 1.0430359840393066, "learning_rate": 0.0001640030911901082, "loss": 5.0504, "step": 38920 }, { "epoch": 0.054700302270807244, "grad_norm": 1.065110206604004, "learning_rate": 0.0001640452437824926, "loss": 4.9139, "step": 38930 }, { "epoch": 0.05471435320897082, "grad_norm": 1.1111505031585693, "learning_rate": 0.00016408739637487703, "loss": 4.9611, "step": 38940 }, { "epoch": 0.054728404147134396, "grad_norm": 1.0788532495498657, "learning_rate": 0.00016412954896726146, "loss": 4.8959, "step": 38950 }, { "epoch": 0.05474245508529798, "grad_norm": 1.0466620922088623, "learning_rate": 0.00016417170155964592, "loss": 5.1461, "step": 38960 }, { "epoch": 0.054756506023461556, "grad_norm": 1.115582823753357, "learning_rate": 0.00016421385415203035, "loss": 4.9678, "step": 38970 }, { "epoch": 0.05477055696162513, "grad_norm": 1.0731269121170044, "learning_rate": 0.00016425600674441478, "loss": 4.9063, "step": 38980 }, { "epoch": 0.05478460789978871, "grad_norm": 1.081316351890564, "learning_rate": 0.00016429815933679918, "loss": 4.9726, "step": 38990 }, { "epoch": 0.054798658837952284, "grad_norm": 1.0765340328216553, "learning_rate": 0.00016434031192918362, "loss": 4.8929, "step": 39000 }, { "epoch": 0.05481270977611587, "grad_norm": 1.0560728311538696, "learning_rate": 0.00016438246452156805, "loss": 4.9395, "step": 39010 }, { "epoch": 0.05482676071427944, "grad_norm": 1.069467306137085, "learning_rate": 0.0001644246171139525, "loss": 5.0583, "step": 39020 }, { "epoch": 0.05484081165244302, "grad_norm": 1.0423463582992554, "learning_rate": 0.00016446676970633694, "loss": 4.993, "step": 39030 }, { "epoch": 0.054854862590606596, "grad_norm": 1.064470648765564, "learning_rate": 0.00016450892229872137, "loss": 5.0129, "step": 39040 }, { "epoch": 0.05486891352877017, "grad_norm": 1.0736603736877441, "learning_rate": 0.0001645510748911058, "loss": 5.0073, "step": 39050 }, { "epoch": 0.054882964466933755, "grad_norm": 1.045296549797058, "learning_rate": 0.0001645932274834902, "loss": 4.9443, "step": 39060 }, { "epoch": 0.05489701540509733, "grad_norm": 1.1380881071090698, "learning_rate": 0.00016463538007587463, "loss": 4.9244, "step": 39070 }, { "epoch": 0.05491106634326091, "grad_norm": 1.0342426300048828, "learning_rate": 0.0001646775326682591, "loss": 4.9206, "step": 39080 }, { "epoch": 0.05492511728142448, "grad_norm": 1.0200796127319336, "learning_rate": 0.00016471968526064352, "loss": 5.0105, "step": 39090 }, { "epoch": 0.05493916821958806, "grad_norm": 1.0826342105865479, "learning_rate": 0.00016476183785302795, "loss": 5.0702, "step": 39100 }, { "epoch": 0.054953219157751636, "grad_norm": 1.0333572626113892, "learning_rate": 0.00016480399044541239, "loss": 4.8559, "step": 39110 }, { "epoch": 0.05496727009591522, "grad_norm": 1.1156673431396484, "learning_rate": 0.0001648461430377968, "loss": 5.0019, "step": 39120 }, { "epoch": 0.054981321034078795, "grad_norm": 1.07420814037323, "learning_rate": 0.00016488829563018122, "loss": 5.0687, "step": 39130 }, { "epoch": 0.05499537197224237, "grad_norm": 1.0014334917068481, "learning_rate": 0.00016493044822256568, "loss": 5.0476, "step": 39140 }, { "epoch": 0.05500942291040595, "grad_norm": 1.0669128894805908, "learning_rate": 0.0001649726008149501, "loss": 4.8751, "step": 39150 }, { "epoch": 0.055023473848569524, "grad_norm": 1.0790259838104248, "learning_rate": 0.00016501475340733454, "loss": 5.0306, "step": 39160 }, { "epoch": 0.05503752478673311, "grad_norm": 1.0873439311981201, "learning_rate": 0.00016505690599971897, "loss": 4.8936, "step": 39170 }, { "epoch": 0.05505157572489668, "grad_norm": 1.0448416471481323, "learning_rate": 0.00016509905859210338, "loss": 4.8749, "step": 39180 }, { "epoch": 0.05506562666306026, "grad_norm": 1.0575209856033325, "learning_rate": 0.0001651412111844878, "loss": 4.9797, "step": 39190 }, { "epoch": 0.055079677601223835, "grad_norm": 1.1213315725326538, "learning_rate": 0.00016518336377687227, "loss": 4.9521, "step": 39200 }, { "epoch": 0.05509372853938741, "grad_norm": 1.0260167121887207, "learning_rate": 0.0001652255163692567, "loss": 5.0328, "step": 39210 }, { "epoch": 0.055107779477550994, "grad_norm": 1.0591065883636475, "learning_rate": 0.00016526766896164113, "loss": 4.9218, "step": 39220 }, { "epoch": 0.05512183041571457, "grad_norm": 1.0388381481170654, "learning_rate": 0.00016530982155402556, "loss": 4.9386, "step": 39230 }, { "epoch": 0.05513588135387815, "grad_norm": 1.0415207147598267, "learning_rate": 0.00016535197414641, "loss": 4.8885, "step": 39240 }, { "epoch": 0.05514993229204172, "grad_norm": 1.0508288145065308, "learning_rate": 0.0001653941267387944, "loss": 5.0216, "step": 39250 }, { "epoch": 0.0551639832302053, "grad_norm": 1.0683437585830688, "learning_rate": 0.00016543627933117885, "loss": 4.9671, "step": 39260 }, { "epoch": 0.05517803416836888, "grad_norm": 1.0968106985092163, "learning_rate": 0.00016547843192356328, "loss": 4.9947, "step": 39270 }, { "epoch": 0.05519208510653246, "grad_norm": 1.0676803588867188, "learning_rate": 0.00016552058451594772, "loss": 4.973, "step": 39280 }, { "epoch": 0.055206136044696034, "grad_norm": 1.1953974962234497, "learning_rate": 0.00016556273710833215, "loss": 5.0037, "step": 39290 }, { "epoch": 0.05522018698285961, "grad_norm": 1.0898711681365967, "learning_rate": 0.0001656048897007166, "loss": 4.8778, "step": 39300 }, { "epoch": 0.05523423792102319, "grad_norm": 1.0796436071395874, "learning_rate": 0.00016564282703386256, "loss": 5.0486, "step": 39310 }, { "epoch": 0.05524828885918677, "grad_norm": 1.0733755826950073, "learning_rate": 0.000165684979626247, "loss": 4.9972, "step": 39320 }, { "epoch": 0.055262339797350346, "grad_norm": 1.0305414199829102, "learning_rate": 0.00016572713221863145, "loss": 5.0652, "step": 39330 }, { "epoch": 0.05527639073551392, "grad_norm": 1.0890687704086304, "learning_rate": 0.00016576928481101588, "loss": 5.0277, "step": 39340 }, { "epoch": 0.0552904416736775, "grad_norm": 1.1217213869094849, "learning_rate": 0.00016581143740340029, "loss": 4.8927, "step": 39350 }, { "epoch": 0.055304492611841075, "grad_norm": 1.1358542442321777, "learning_rate": 0.00016585358999578472, "loss": 4.9728, "step": 39360 }, { "epoch": 0.05531854355000466, "grad_norm": 1.036201000213623, "learning_rate": 0.00016589574258816915, "loss": 4.9836, "step": 39370 }, { "epoch": 0.055332594488168234, "grad_norm": 1.0400617122650146, "learning_rate": 0.0001659378951805536, "loss": 4.9628, "step": 39380 }, { "epoch": 0.05534664542633181, "grad_norm": 1.030198097229004, "learning_rate": 0.00016598004777293804, "loss": 4.8906, "step": 39390 }, { "epoch": 0.055360696364495386, "grad_norm": 1.0506401062011719, "learning_rate": 0.00016602220036532247, "loss": 4.8948, "step": 39400 }, { "epoch": 0.05537474730265896, "grad_norm": 1.0474390983581543, "learning_rate": 0.00016606435295770687, "loss": 4.9137, "step": 39410 }, { "epoch": 0.05538879824082254, "grad_norm": 1.0166150331497192, "learning_rate": 0.0001661065055500913, "loss": 5.0177, "step": 39420 }, { "epoch": 0.05540284917898612, "grad_norm": 1.0387684106826782, "learning_rate": 0.00016614865814247573, "loss": 4.9143, "step": 39430 }, { "epoch": 0.0554169001171497, "grad_norm": 1.0397957563400269, "learning_rate": 0.0001661908107348602, "loss": 4.9583, "step": 39440 }, { "epoch": 0.055430951055313274, "grad_norm": 1.0370898246765137, "learning_rate": 0.00016623296332724462, "loss": 4.9651, "step": 39450 }, { "epoch": 0.05544500199347685, "grad_norm": 1.0566965341567993, "learning_rate": 0.00016627511591962906, "loss": 5.0027, "step": 39460 }, { "epoch": 0.055459052931640426, "grad_norm": 1.054910659790039, "learning_rate": 0.0001663172685120135, "loss": 5.0484, "step": 39470 }, { "epoch": 0.05547310386980401, "grad_norm": 1.0622105598449707, "learning_rate": 0.0001663594211043979, "loss": 4.9111, "step": 39480 }, { "epoch": 0.055487154807967586, "grad_norm": 1.0327736139297485, "learning_rate": 0.00016640157369678232, "loss": 5.0337, "step": 39490 }, { "epoch": 0.05550120574613116, "grad_norm": 1.0651620626449585, "learning_rate": 0.00016644372628916678, "loss": 4.9398, "step": 39500 }, { "epoch": 0.05551525668429474, "grad_norm": 1.0605753660202026, "learning_rate": 0.0001664858788815512, "loss": 5.0069, "step": 39510 }, { "epoch": 0.055529307622458314, "grad_norm": 1.0352438688278198, "learning_rate": 0.00016652803147393564, "loss": 4.9148, "step": 39520 }, { "epoch": 0.0555433585606219, "grad_norm": 1.1176104545593262, "learning_rate": 0.00016657018406632007, "loss": 4.9403, "step": 39530 }, { "epoch": 0.05555740949878547, "grad_norm": 1.0405285358428955, "learning_rate": 0.00016661233665870448, "loss": 4.8954, "step": 39540 }, { "epoch": 0.05557146043694905, "grad_norm": 1.0079467296600342, "learning_rate": 0.0001666544892510889, "loss": 5.0099, "step": 39550 }, { "epoch": 0.055585511375112626, "grad_norm": 1.0810550451278687, "learning_rate": 0.00016669664184347337, "loss": 4.9254, "step": 39560 }, { "epoch": 0.0555995623132762, "grad_norm": 1.0852543115615845, "learning_rate": 0.0001667387944358578, "loss": 4.9133, "step": 39570 }, { "epoch": 0.055613613251439785, "grad_norm": 1.074148178100586, "learning_rate": 0.00016678094702824223, "loss": 4.8821, "step": 39580 }, { "epoch": 0.05562766418960336, "grad_norm": 1.2053712606430054, "learning_rate": 0.00016682309962062666, "loss": 4.8644, "step": 39590 }, { "epoch": 0.05564171512776694, "grad_norm": 1.3233577013015747, "learning_rate": 0.00016686525221301106, "loss": 4.9788, "step": 39600 }, { "epoch": 0.05565576606593051, "grad_norm": 1.0858327150344849, "learning_rate": 0.0001669074048053955, "loss": 4.9615, "step": 39610 }, { "epoch": 0.05566981700409409, "grad_norm": 1.0294164419174194, "learning_rate": 0.00016694955739777995, "loss": 4.8376, "step": 39620 }, { "epoch": 0.05568386794225767, "grad_norm": 1.046635627746582, "learning_rate": 0.00016699170999016439, "loss": 4.9348, "step": 39630 }, { "epoch": 0.05569791888042125, "grad_norm": 1.0553175210952759, "learning_rate": 0.00016703386258254882, "loss": 5.0127, "step": 39640 }, { "epoch": 0.055711969818584825, "grad_norm": 1.0525593757629395, "learning_rate": 0.00016707601517493325, "loss": 4.9244, "step": 39650 }, { "epoch": 0.0557260207567484, "grad_norm": 1.049413800239563, "learning_rate": 0.00016711816776731768, "loss": 4.9705, "step": 39660 }, { "epoch": 0.05574007169491198, "grad_norm": 1.0601624250411987, "learning_rate": 0.00016716032035970208, "loss": 4.9637, "step": 39670 }, { "epoch": 0.05575412263307556, "grad_norm": 1.0123969316482544, "learning_rate": 0.00016720247295208654, "loss": 4.9764, "step": 39680 }, { "epoch": 0.05576817357123914, "grad_norm": 1.0407583713531494, "learning_rate": 0.00016724462554447097, "loss": 4.9696, "step": 39690 }, { "epoch": 0.05578222450940271, "grad_norm": 1.0655407905578613, "learning_rate": 0.0001672867781368554, "loss": 4.955, "step": 39700 }, { "epoch": 0.05579627544756629, "grad_norm": 1.0213630199432373, "learning_rate": 0.00016732893072923983, "loss": 4.8745, "step": 39710 }, { "epoch": 0.055810326385729865, "grad_norm": 1.0620572566986084, "learning_rate": 0.00016737108332162427, "loss": 4.925, "step": 39720 }, { "epoch": 0.05582437732389344, "grad_norm": 1.0573869943618774, "learning_rate": 0.00016741323591400867, "loss": 4.9061, "step": 39730 }, { "epoch": 0.055838428262057024, "grad_norm": 1.0326967239379883, "learning_rate": 0.00016745538850639313, "loss": 5.0449, "step": 39740 }, { "epoch": 0.0558524792002206, "grad_norm": 1.082334280014038, "learning_rate": 0.00016749754109877756, "loss": 4.9577, "step": 39750 }, { "epoch": 0.05586653013838418, "grad_norm": 1.0817333459854126, "learning_rate": 0.000167539693691162, "loss": 4.9172, "step": 39760 }, { "epoch": 0.05588058107654775, "grad_norm": 1.0093368291854858, "learning_rate": 0.00016758184628354642, "loss": 4.8782, "step": 39770 }, { "epoch": 0.05589463201471133, "grad_norm": 1.0431386232376099, "learning_rate": 0.00016762399887593085, "loss": 4.9162, "step": 39780 }, { "epoch": 0.05590868295287491, "grad_norm": 1.052478313446045, "learning_rate": 0.0001676661514683153, "loss": 5.017, "step": 39790 }, { "epoch": 0.05592273389103849, "grad_norm": 1.0899916887283325, "learning_rate": 0.00016770830406069972, "loss": 4.9324, "step": 39800 }, { "epoch": 0.055936784829202064, "grad_norm": 1.0490303039550781, "learning_rate": 0.00016775045665308415, "loss": 4.9905, "step": 39810 }, { "epoch": 0.05595083576736564, "grad_norm": 1.129417896270752, "learning_rate": 0.00016779260924546858, "loss": 4.8127, "step": 39820 }, { "epoch": 0.05596488670552922, "grad_norm": 1.0525208711624146, "learning_rate": 0.000167834761837853, "loss": 4.9774, "step": 39830 }, { "epoch": 0.0559789376436928, "grad_norm": 1.0649760961532593, "learning_rate": 0.00016787691443023744, "loss": 4.8925, "step": 39840 }, { "epoch": 0.055992988581856376, "grad_norm": 1.0432103872299194, "learning_rate": 0.0001679190670226219, "loss": 5.1254, "step": 39850 }, { "epoch": 0.05600703952001995, "grad_norm": 1.046905517578125, "learning_rate": 0.0001679612196150063, "loss": 5.0319, "step": 39860 }, { "epoch": 0.05602109045818353, "grad_norm": 0.994096040725708, "learning_rate": 0.00016800337220739073, "loss": 5.0441, "step": 39870 }, { "epoch": 0.056035141396347105, "grad_norm": 1.0326930284500122, "learning_rate": 0.00016804552479977516, "loss": 4.9293, "step": 39880 }, { "epoch": 0.05604919233451069, "grad_norm": 1.0579875707626343, "learning_rate": 0.0001680876773921596, "loss": 4.9475, "step": 39890 }, { "epoch": 0.056063243272674264, "grad_norm": 1.1440166234970093, "learning_rate": 0.00016812982998454403, "loss": 4.8159, "step": 39900 }, { "epoch": 0.05607729421083784, "grad_norm": 1.0132042169570923, "learning_rate": 0.00016817198257692849, "loss": 5.0357, "step": 39910 }, { "epoch": 0.056091345149001416, "grad_norm": 1.0793256759643555, "learning_rate": 0.00016821413516931292, "loss": 4.8232, "step": 39920 }, { "epoch": 0.05610539608716499, "grad_norm": 1.0349791049957275, "learning_rate": 0.00016825628776169732, "loss": 5.0093, "step": 39930 }, { "epoch": 0.056119447025328575, "grad_norm": 1.054980754852295, "learning_rate": 0.00016829844035408175, "loss": 4.905, "step": 39940 }, { "epoch": 0.05613349796349215, "grad_norm": 1.1767756938934326, "learning_rate": 0.00016834059294646618, "loss": 4.9174, "step": 39950 }, { "epoch": 0.05614754890165573, "grad_norm": 1.0445729494094849, "learning_rate": 0.00016838274553885064, "loss": 4.9582, "step": 39960 }, { "epoch": 0.056161599839819304, "grad_norm": 1.0728813409805298, "learning_rate": 0.00016842489813123507, "loss": 4.8951, "step": 39970 }, { "epoch": 0.05617565077798288, "grad_norm": 1.0347257852554321, "learning_rate": 0.0001684670507236195, "loss": 4.9209, "step": 39980 }, { "epoch": 0.05618970171614646, "grad_norm": 1.0311264991760254, "learning_rate": 0.0001685092033160039, "loss": 4.9572, "step": 39990 }, { "epoch": 0.05620375265431004, "grad_norm": 1.029685378074646, "learning_rate": 0.00016855135590838834, "loss": 4.8857, "step": 40000 }, { "epoch": 0.056217803592473616, "grad_norm": 1.0317046642303467, "learning_rate": 0.00016859350850077277, "loss": 4.9465, "step": 40010 }, { "epoch": 0.05623185453063719, "grad_norm": 1.0499831438064575, "learning_rate": 0.00016863566109315723, "loss": 4.914, "step": 40020 }, { "epoch": 0.05624590546880077, "grad_norm": 1.0336376428604126, "learning_rate": 0.00016867781368554166, "loss": 5.013, "step": 40030 }, { "epoch": 0.05625995640696435, "grad_norm": 1.024095892906189, "learning_rate": 0.0001687199662779261, "loss": 4.8756, "step": 40040 }, { "epoch": 0.05627400734512793, "grad_norm": 1.0317274332046509, "learning_rate": 0.00016876211887031052, "loss": 4.9187, "step": 40050 }, { "epoch": 0.0562880582832915, "grad_norm": 1.834438681602478, "learning_rate": 0.00016880427146269493, "loss": 4.8996, "step": 40060 }, { "epoch": 0.05630210922145508, "grad_norm": 1.0885308980941772, "learning_rate": 0.00016884642405507936, "loss": 5.0043, "step": 40070 }, { "epoch": 0.056316160159618656, "grad_norm": 1.1309927701950073, "learning_rate": 0.00016888857664746381, "loss": 4.9811, "step": 40080 }, { "epoch": 0.05633021109778223, "grad_norm": 1.034248948097229, "learning_rate": 0.00016893072923984825, "loss": 4.9089, "step": 40090 }, { "epoch": 0.056344262035945815, "grad_norm": 1.0103942155838013, "learning_rate": 0.00016897288183223268, "loss": 4.8929, "step": 40100 }, { "epoch": 0.05635831297410939, "grad_norm": 1.0718656778335571, "learning_rate": 0.0001690150344246171, "loss": 4.9839, "step": 40110 }, { "epoch": 0.05637236391227297, "grad_norm": 1.044500470161438, "learning_rate": 0.0001690571870170015, "loss": 4.8699, "step": 40120 }, { "epoch": 0.05638641485043654, "grad_norm": 1.024745225906372, "learning_rate": 0.00016909933960938594, "loss": 5.0166, "step": 40130 }, { "epoch": 0.05640046578860012, "grad_norm": 1.0210412740707397, "learning_rate": 0.0001691414922017704, "loss": 4.958, "step": 40140 }, { "epoch": 0.0564145167267637, "grad_norm": 1.0404855012893677, "learning_rate": 0.00016918364479415483, "loss": 4.9962, "step": 40150 }, { "epoch": 0.05642856766492728, "grad_norm": 1.0807390213012695, "learning_rate": 0.00016922579738653926, "loss": 4.9206, "step": 40160 }, { "epoch": 0.056442618603090855, "grad_norm": 1.0216072797775269, "learning_rate": 0.0001692679499789237, "loss": 4.9689, "step": 40170 }, { "epoch": 0.05645666954125443, "grad_norm": 1.0541982650756836, "learning_rate": 0.00016931010257130813, "loss": 4.9347, "step": 40180 }, { "epoch": 0.05647072047941801, "grad_norm": 1.072575569152832, "learning_rate": 0.00016935225516369253, "loss": 5.0225, "step": 40190 }, { "epoch": 0.05648477141758159, "grad_norm": 1.0966130495071411, "learning_rate": 0.000169394407756077, "loss": 4.9683, "step": 40200 }, { "epoch": 0.05649882235574517, "grad_norm": 1.0755553245544434, "learning_rate": 0.00016943656034846142, "loss": 4.8777, "step": 40210 }, { "epoch": 0.05651287329390874, "grad_norm": 1.0782699584960938, "learning_rate": 0.00016947871294084585, "loss": 4.9303, "step": 40220 }, { "epoch": 0.05652692423207232, "grad_norm": 1.0849295854568481, "learning_rate": 0.00016952086553323028, "loss": 4.9465, "step": 40230 }, { "epoch": 0.056540975170235895, "grad_norm": 1.0700656175613403, "learning_rate": 0.0001695630181256147, "loss": 4.9312, "step": 40240 }, { "epoch": 0.05655502610839948, "grad_norm": 1.053043246269226, "learning_rate": 0.00016960517071799912, "loss": 4.9405, "step": 40250 }, { "epoch": 0.056569077046563054, "grad_norm": 1.0179575681686401, "learning_rate": 0.00016964732331038358, "loss": 4.91, "step": 40260 }, { "epoch": 0.05658312798472663, "grad_norm": 1.0473299026489258, "learning_rate": 0.000169689475902768, "loss": 5.0573, "step": 40270 }, { "epoch": 0.05659717892289021, "grad_norm": 1.0085793733596802, "learning_rate": 0.00016973162849515244, "loss": 4.9596, "step": 40280 }, { "epoch": 0.05661122986105378, "grad_norm": 1.210898518562317, "learning_rate": 0.00016977378108753687, "loss": 4.9208, "step": 40290 }, { "epoch": 0.056625280799217366, "grad_norm": 1.0420124530792236, "learning_rate": 0.0001698159336799213, "loss": 4.8712, "step": 40300 }, { "epoch": 0.05663933173738094, "grad_norm": 1.0410534143447876, "learning_rate": 0.0001698580862723057, "loss": 4.8958, "step": 40310 }, { "epoch": 0.05665338267554452, "grad_norm": 1.0174304246902466, "learning_rate": 0.00016990023886469016, "loss": 5.0153, "step": 40320 }, { "epoch": 0.056667433613708094, "grad_norm": 1.1811450719833374, "learning_rate": 0.0001699423914570746, "loss": 4.8823, "step": 40330 }, { "epoch": 0.05668148455187167, "grad_norm": 1.0314950942993164, "learning_rate": 0.00016998454404945903, "loss": 5.0111, "step": 40340 }, { "epoch": 0.056695535490035254, "grad_norm": 1.0767461061477661, "learning_rate": 0.00017002669664184346, "loss": 5.0005, "step": 40350 }, { "epoch": 0.05670958642819883, "grad_norm": 1.0583289861679077, "learning_rate": 0.0001700688492342279, "loss": 4.9518, "step": 40360 }, { "epoch": 0.056723637366362406, "grad_norm": 1.068487286567688, "learning_rate": 0.00017011100182661235, "loss": 5.0675, "step": 40370 }, { "epoch": 0.05673768830452598, "grad_norm": 1.0626734495162964, "learning_rate": 0.00017015315441899675, "loss": 4.979, "step": 40380 }, { "epoch": 0.05675173924268956, "grad_norm": 1.0374014377593994, "learning_rate": 0.00017019530701138118, "loss": 4.7829, "step": 40390 }, { "epoch": 0.056765790180853135, "grad_norm": 1.0241491794586182, "learning_rate": 0.0001702374596037656, "loss": 4.9447, "step": 40400 }, { "epoch": 0.05677984111901672, "grad_norm": 1.0468249320983887, "learning_rate": 0.00017027961219615004, "loss": 4.8091, "step": 40410 }, { "epoch": 0.056793892057180294, "grad_norm": 1.0289359092712402, "learning_rate": 0.00017032176478853447, "loss": 4.9062, "step": 40420 }, { "epoch": 0.05680794299534387, "grad_norm": 1.02260422706604, "learning_rate": 0.00017036391738091893, "loss": 4.9701, "step": 40430 }, { "epoch": 0.056821993933507446, "grad_norm": 1.0522793531417847, "learning_rate": 0.00017040606997330334, "loss": 4.9831, "step": 40440 }, { "epoch": 0.05683604487167102, "grad_norm": 1.0370879173278809, "learning_rate": 0.00017044822256568777, "loss": 4.8978, "step": 40450 }, { "epoch": 0.056850095809834605, "grad_norm": 1.0785061120986938, "learning_rate": 0.0001704903751580722, "loss": 4.914, "step": 40460 }, { "epoch": 0.05686414674799818, "grad_norm": 1.0285074710845947, "learning_rate": 0.00017053252775045663, "loss": 5.0287, "step": 40470 }, { "epoch": 0.05687819768616176, "grad_norm": 1.0587258338928223, "learning_rate": 0.00017057468034284106, "loss": 4.8259, "step": 40480 }, { "epoch": 0.056892248624325334, "grad_norm": 1.0602926015853882, "learning_rate": 0.00017061683293522552, "loss": 4.9808, "step": 40490 }, { "epoch": 0.05690629956248891, "grad_norm": 1.0697758197784424, "learning_rate": 0.00017065898552760995, "loss": 5.0533, "step": 40500 }, { "epoch": 0.05692035050065249, "grad_norm": 1.0304670333862305, "learning_rate": 0.00017070113811999436, "loss": 5.0054, "step": 40510 }, { "epoch": 0.05693440143881607, "grad_norm": 1.0418838262557983, "learning_rate": 0.00017074329071237879, "loss": 4.9798, "step": 40520 }, { "epoch": 0.056948452376979646, "grad_norm": 0.9913653135299683, "learning_rate": 0.00017078544330476322, "loss": 4.9925, "step": 40530 }, { "epoch": 0.05696250331514322, "grad_norm": 0.988510012626648, "learning_rate": 0.00017082759589714768, "loss": 4.9377, "step": 40540 }, { "epoch": 0.0569765542533068, "grad_norm": 1.019424557685852, "learning_rate": 0.0001708697484895321, "loss": 5.0321, "step": 40550 }, { "epoch": 0.05699060519147038, "grad_norm": 1.0009490251541138, "learning_rate": 0.00017091190108191654, "loss": 4.9732, "step": 40560 }, { "epoch": 0.05700465612963396, "grad_norm": 1.026256799697876, "learning_rate": 0.00017095405367430094, "loss": 4.912, "step": 40570 }, { "epoch": 0.05701870706779753, "grad_norm": 1.0279102325439453, "learning_rate": 0.00017099620626668537, "loss": 4.9669, "step": 40580 }, { "epoch": 0.05703275800596111, "grad_norm": 1.028504490852356, "learning_rate": 0.0001710383588590698, "loss": 4.9007, "step": 40590 }, { "epoch": 0.057046808944124686, "grad_norm": 1.0499134063720703, "learning_rate": 0.00017108051145145426, "loss": 4.9829, "step": 40600 }, { "epoch": 0.05706085988228827, "grad_norm": 1.0060728788375854, "learning_rate": 0.0001711226640438387, "loss": 4.9123, "step": 40610 }, { "epoch": 0.057074910820451845, "grad_norm": 1.0461713075637817, "learning_rate": 0.00017116481663622312, "loss": 5.0092, "step": 40620 }, { "epoch": 0.05708896175861542, "grad_norm": 1.0552021265029907, "learning_rate": 0.00017120696922860756, "loss": 4.8915, "step": 40630 }, { "epoch": 0.057103012696779, "grad_norm": 1.0912543535232544, "learning_rate": 0.00017124912182099196, "loss": 4.9294, "step": 40640 }, { "epoch": 0.05711706363494257, "grad_norm": 1.0685486793518066, "learning_rate": 0.0001712912744133764, "loss": 4.9328, "step": 40650 }, { "epoch": 0.057131114573106156, "grad_norm": 1.093159556388855, "learning_rate": 0.00017133342700576085, "loss": 5.0754, "step": 40660 }, { "epoch": 0.05714516551126973, "grad_norm": 1.0238051414489746, "learning_rate": 0.00017137557959814528, "loss": 4.8441, "step": 40670 }, { "epoch": 0.05715921644943331, "grad_norm": 1.0513436794281006, "learning_rate": 0.0001714177321905297, "loss": 4.9075, "step": 40680 }, { "epoch": 0.057173267387596885, "grad_norm": 1.0914078950881958, "learning_rate": 0.00017145988478291414, "loss": 4.9667, "step": 40690 }, { "epoch": 0.05718731832576046, "grad_norm": 1.0058307647705078, "learning_rate": 0.00017150203737529855, "loss": 5.0005, "step": 40700 }, { "epoch": 0.05720136926392404, "grad_norm": 0.9954103827476501, "learning_rate": 0.00017154418996768298, "loss": 5.0039, "step": 40710 }, { "epoch": 0.05721542020208762, "grad_norm": 1.0139034986495972, "learning_rate": 0.00017158634256006744, "loss": 4.9052, "step": 40720 }, { "epoch": 0.0572294711402512, "grad_norm": 1.0923491716384888, "learning_rate": 0.00017162849515245187, "loss": 4.8337, "step": 40730 }, { "epoch": 0.05724352207841477, "grad_norm": 1.025458574295044, "learning_rate": 0.0001716706477448363, "loss": 4.941, "step": 40740 }, { "epoch": 0.05725757301657835, "grad_norm": 1.1214048862457275, "learning_rate": 0.00017171280033722073, "loss": 4.9764, "step": 40750 }, { "epoch": 0.057271623954741925, "grad_norm": 1.0020577907562256, "learning_rate": 0.00017175495292960516, "loss": 4.9437, "step": 40760 }, { "epoch": 0.05728567489290551, "grad_norm": 1.0177772045135498, "learning_rate": 0.00017179710552198957, "loss": 4.8961, "step": 40770 }, { "epoch": 0.057299725831069084, "grad_norm": 1.029328465461731, "learning_rate": 0.00017183925811437402, "loss": 4.8494, "step": 40780 }, { "epoch": 0.05731377676923266, "grad_norm": 1.0189824104309082, "learning_rate": 0.00017188141070675845, "loss": 4.912, "step": 40790 }, { "epoch": 0.05732782770739624, "grad_norm": 0.9939380884170532, "learning_rate": 0.00017192356329914289, "loss": 4.8773, "step": 40800 }, { "epoch": 0.05734187864555981, "grad_norm": 1.076353907585144, "learning_rate": 0.00017196571589152732, "loss": 4.9416, "step": 40810 }, { "epoch": 0.057355929583723396, "grad_norm": 1.049857258796692, "learning_rate": 0.00017200786848391175, "loss": 4.8408, "step": 40820 }, { "epoch": 0.05736998052188697, "grad_norm": 1.0945760011672974, "learning_rate": 0.00017205002107629615, "loss": 4.928, "step": 40830 }, { "epoch": 0.05738403146005055, "grad_norm": 1.0334876775741577, "learning_rate": 0.0001720921736686806, "loss": 5.0276, "step": 40840 }, { "epoch": 0.057398082398214124, "grad_norm": 1.0239782333374023, "learning_rate": 0.00017213432626106504, "loss": 5.0475, "step": 40850 }, { "epoch": 0.0574121333363777, "grad_norm": 1.0112613439559937, "learning_rate": 0.00017217647885344947, "loss": 5.0883, "step": 40860 }, { "epoch": 0.057426184274541284, "grad_norm": 1.0588569641113281, "learning_rate": 0.0001722186314458339, "loss": 4.9827, "step": 40870 }, { "epoch": 0.05744023521270486, "grad_norm": 1.0560933351516724, "learning_rate": 0.00017226078403821834, "loss": 4.953, "step": 40880 }, { "epoch": 0.057454286150868436, "grad_norm": 1.08881413936615, "learning_rate": 0.00017230293663060274, "loss": 4.9457, "step": 40890 }, { "epoch": 0.05746833708903201, "grad_norm": 1.0113881826400757, "learning_rate": 0.0001723450892229872, "loss": 5.0579, "step": 40900 }, { "epoch": 0.05748238802719559, "grad_norm": 0.9996208548545837, "learning_rate": 0.00017238724181537163, "loss": 4.8815, "step": 40910 }, { "epoch": 0.05749643896535917, "grad_norm": 1.0179857015609741, "learning_rate": 0.00017242939440775606, "loss": 4.8752, "step": 40920 }, { "epoch": 0.05751048990352275, "grad_norm": 1.0453020334243774, "learning_rate": 0.0001724715470001405, "loss": 4.8984, "step": 40930 }, { "epoch": 0.057524540841686324, "grad_norm": 1.0377174615859985, "learning_rate": 0.00017251369959252492, "loss": 4.9946, "step": 40940 }, { "epoch": 0.0575385917798499, "grad_norm": 1.2559747695922852, "learning_rate": 0.00017255585218490938, "loss": 4.9518, "step": 40950 }, { "epoch": 0.057552642718013476, "grad_norm": 1.0141798257827759, "learning_rate": 0.00017259800477729378, "loss": 4.9145, "step": 40960 }, { "epoch": 0.05756669365617706, "grad_norm": 1.0602003335952759, "learning_rate": 0.00017264015736967822, "loss": 4.8659, "step": 40970 }, { "epoch": 0.057580744594340635, "grad_norm": 1.0627175569534302, "learning_rate": 0.00017268230996206265, "loss": 5.0545, "step": 40980 }, { "epoch": 0.05759479553250421, "grad_norm": 1.0080777406692505, "learning_rate": 0.00017272446255444708, "loss": 4.936, "step": 40990 }, { "epoch": 0.05760884647066779, "grad_norm": 1.0756409168243408, "learning_rate": 0.0001727666151468315, "loss": 4.8759, "step": 41000 }, { "epoch": 0.057622897408831364, "grad_norm": 1.0565085411071777, "learning_rate": 0.00017280876773921597, "loss": 4.9084, "step": 41010 }, { "epoch": 0.05763694834699494, "grad_norm": 0.9990535378456116, "learning_rate": 0.00017285092033160037, "loss": 4.8277, "step": 41020 }, { "epoch": 0.05765099928515852, "grad_norm": 1.066185712814331, "learning_rate": 0.0001728930729239848, "loss": 4.906, "step": 41030 }, { "epoch": 0.0576650502233221, "grad_norm": 0.9987258315086365, "learning_rate": 0.00017293522551636923, "loss": 4.863, "step": 41040 }, { "epoch": 0.057679101161485676, "grad_norm": 1.011893391609192, "learning_rate": 0.00017297737810875367, "loss": 4.931, "step": 41050 }, { "epoch": 0.05769315209964925, "grad_norm": 1.0528898239135742, "learning_rate": 0.0001730195307011381, "loss": 5.0409, "step": 41060 }, { "epoch": 0.05770720303781283, "grad_norm": 1.0291653871536255, "learning_rate": 0.00017306168329352255, "loss": 4.87, "step": 41070 }, { "epoch": 0.05772125397597641, "grad_norm": 1.029781460762024, "learning_rate": 0.00017310383588590699, "loss": 5.0709, "step": 41080 }, { "epoch": 0.05773530491413999, "grad_norm": 1.0627702474594116, "learning_rate": 0.0001731459884782914, "loss": 4.8982, "step": 41090 }, { "epoch": 0.05774935585230356, "grad_norm": 1.027671456336975, "learning_rate": 0.00017318814107067582, "loss": 4.8947, "step": 41100 }, { "epoch": 0.05776340679046714, "grad_norm": 1.0236762762069702, "learning_rate": 0.00017323029366306025, "loss": 4.9642, "step": 41110 }, { "epoch": 0.057777457728630716, "grad_norm": 1.0269731283187866, "learning_rate": 0.00017327244625544468, "loss": 4.9839, "step": 41120 }, { "epoch": 0.0577915086667943, "grad_norm": 1.002811074256897, "learning_rate": 0.00017331459884782914, "loss": 4.96, "step": 41130 }, { "epoch": 0.057805559604957875, "grad_norm": 1.0268176794052124, "learning_rate": 0.00017335675144021357, "loss": 4.9521, "step": 41140 }, { "epoch": 0.05781961054312145, "grad_norm": 1.0485950708389282, "learning_rate": 0.00017339890403259798, "loss": 4.9922, "step": 41150 }, { "epoch": 0.05783366148128503, "grad_norm": 1.0000367164611816, "learning_rate": 0.0001734410566249824, "loss": 4.8602, "step": 41160 }, { "epoch": 0.0578477124194486, "grad_norm": 1.0004191398620605, "learning_rate": 0.00017348320921736684, "loss": 4.8732, "step": 41170 }, { "epoch": 0.057861763357612186, "grad_norm": 1.0630460977554321, "learning_rate": 0.0001735253618097513, "loss": 5.0591, "step": 41180 }, { "epoch": 0.05787581429577576, "grad_norm": 1.069863200187683, "learning_rate": 0.00017356751440213573, "loss": 4.9227, "step": 41190 }, { "epoch": 0.05788986523393934, "grad_norm": 1.0146735906600952, "learning_rate": 0.00017360966699452016, "loss": 4.9153, "step": 41200 }, { "epoch": 0.057903916172102915, "grad_norm": 1.0278013944625854, "learning_rate": 0.0001736518195869046, "loss": 4.9406, "step": 41210 }, { "epoch": 0.05791796711026649, "grad_norm": 1.0474538803100586, "learning_rate": 0.000173693972179289, "loss": 4.8249, "step": 41220 }, { "epoch": 0.057932018048430074, "grad_norm": 1.0575395822525024, "learning_rate": 0.00017373612477167343, "loss": 4.9606, "step": 41230 }, { "epoch": 0.05794606898659365, "grad_norm": 1.0785536766052246, "learning_rate": 0.00017377827736405788, "loss": 4.9818, "step": 41240 }, { "epoch": 0.05796011992475723, "grad_norm": 1.034358024597168, "learning_rate": 0.00017382042995644232, "loss": 5.0435, "step": 41250 }, { "epoch": 0.0579741708629208, "grad_norm": 1.0407671928405762, "learning_rate": 0.00017386258254882675, "loss": 4.9362, "step": 41260 }, { "epoch": 0.05798822180108438, "grad_norm": 1.1351573467254639, "learning_rate": 0.00017390473514121118, "loss": 4.9251, "step": 41270 }, { "epoch": 0.05800227273924796, "grad_norm": 1.0201154947280884, "learning_rate": 0.00017394688773359558, "loss": 4.9085, "step": 41280 }, { "epoch": 0.05801632367741154, "grad_norm": 1.0584053993225098, "learning_rate": 0.00017398904032598, "loss": 4.9704, "step": 41290 }, { "epoch": 0.058030374615575114, "grad_norm": 1.0306909084320068, "learning_rate": 0.00017403119291836447, "loss": 4.9058, "step": 41300 }, { "epoch": 0.05804442555373869, "grad_norm": 1.0459680557250977, "learning_rate": 0.0001740733455107489, "loss": 4.949, "step": 41310 }, { "epoch": 0.05805847649190227, "grad_norm": 0.9976636171340942, "learning_rate": 0.00017411549810313333, "loss": 5.0384, "step": 41320 }, { "epoch": 0.05807252743006584, "grad_norm": 1.01653254032135, "learning_rate": 0.00017415765069551776, "loss": 4.9653, "step": 41330 }, { "epoch": 0.058086578368229426, "grad_norm": 1.0457319021224976, "learning_rate": 0.0001741998032879022, "loss": 4.849, "step": 41340 }, { "epoch": 0.058100629306393, "grad_norm": 1.0464872121810913, "learning_rate": 0.0001742419558802866, "loss": 4.8891, "step": 41350 }, { "epoch": 0.05811468024455658, "grad_norm": 1.0535956621170044, "learning_rate": 0.00017428410847267106, "loss": 4.988, "step": 41360 }, { "epoch": 0.058128731182720154, "grad_norm": 1.0425530672073364, "learning_rate": 0.0001743262610650555, "loss": 4.816, "step": 41370 }, { "epoch": 0.05814278212088373, "grad_norm": 1.0837699174880981, "learning_rate": 0.00017436841365743992, "loss": 4.9787, "step": 41380 }, { "epoch": 0.058156833059047314, "grad_norm": 1.025530457496643, "learning_rate": 0.00017441056624982435, "loss": 4.9268, "step": 41390 }, { "epoch": 0.05817088399721089, "grad_norm": 1.0290592908859253, "learning_rate": 0.00017445271884220878, "loss": 4.9541, "step": 41400 }, { "epoch": 0.058184934935374466, "grad_norm": 1.0412163734436035, "learning_rate": 0.0001744948714345932, "loss": 4.938, "step": 41410 }, { "epoch": 0.05819898587353804, "grad_norm": 1.013218879699707, "learning_rate": 0.00017453702402697765, "loss": 5.0052, "step": 41420 }, { "epoch": 0.05821303681170162, "grad_norm": 1.0787619352340698, "learning_rate": 0.00017457917661936208, "loss": 4.8859, "step": 41430 }, { "epoch": 0.0582270877498652, "grad_norm": 1.0490294694900513, "learning_rate": 0.0001746213292117465, "loss": 4.8447, "step": 41440 }, { "epoch": 0.05824113868802878, "grad_norm": 1.0566773414611816, "learning_rate": 0.00017466348180413094, "loss": 5.0328, "step": 41450 }, { "epoch": 0.058255189626192354, "grad_norm": 1.0242851972579956, "learning_rate": 0.00017470563439651537, "loss": 4.9068, "step": 41460 }, { "epoch": 0.05826924056435593, "grad_norm": 1.0676864385604858, "learning_rate": 0.00017474778698889983, "loss": 4.9384, "step": 41470 }, { "epoch": 0.058283291502519506, "grad_norm": 1.0332306623458862, "learning_rate": 0.00017478993958128423, "loss": 5.0788, "step": 41480 }, { "epoch": 0.05829734244068309, "grad_norm": 0.9824172258377075, "learning_rate": 0.00017483209217366866, "loss": 4.8847, "step": 41490 }, { "epoch": 0.058311393378846665, "grad_norm": 1.071233868598938, "learning_rate": 0.0001748742447660531, "loss": 4.8921, "step": 41500 }, { "epoch": 0.05832544431701024, "grad_norm": 1.0408722162246704, "learning_rate": 0.00017491639735843753, "loss": 5.0539, "step": 41510 }, { "epoch": 0.05833949525517382, "grad_norm": 1.0348032712936401, "learning_rate": 0.00017495854995082196, "loss": 4.8807, "step": 41520 }, { "epoch": 0.058353546193337394, "grad_norm": 1.0092370510101318, "learning_rate": 0.00017500070254320642, "loss": 4.9697, "step": 41530 }, { "epoch": 0.05836759713150098, "grad_norm": 1.1517890691757202, "learning_rate": 0.00017504285513559082, "loss": 4.9005, "step": 41540 }, { "epoch": 0.05838164806966455, "grad_norm": 1.056350827217102, "learning_rate": 0.00017508500772797525, "loss": 4.9908, "step": 41550 }, { "epoch": 0.05839569900782813, "grad_norm": 1.0199683904647827, "learning_rate": 0.00017512716032035968, "loss": 4.9737, "step": 41560 }, { "epoch": 0.058409749945991705, "grad_norm": 1.0125006437301636, "learning_rate": 0.0001751693129127441, "loss": 4.989, "step": 41570 }, { "epoch": 0.05842380088415528, "grad_norm": 1.0040638446807861, "learning_rate": 0.00017521146550512854, "loss": 5.0114, "step": 41580 }, { "epoch": 0.058437851822318865, "grad_norm": 1.3754082918167114, "learning_rate": 0.000175253618097513, "loss": 5.0118, "step": 41590 }, { "epoch": 0.05845190276048244, "grad_norm": 0.9892797470092773, "learning_rate": 0.0001752957706898974, "loss": 4.9575, "step": 41600 }, { "epoch": 0.05846595369864602, "grad_norm": 1.073715090751648, "learning_rate": 0.00017533792328228184, "loss": 4.9162, "step": 41610 }, { "epoch": 0.05848000463680959, "grad_norm": 1.0172522068023682, "learning_rate": 0.00017538007587466627, "loss": 5.0024, "step": 41620 }, { "epoch": 0.05849405557497317, "grad_norm": 1.0679402351379395, "learning_rate": 0.0001754222284670507, "loss": 4.9627, "step": 41630 }, { "epoch": 0.058508106513136746, "grad_norm": 1.0129616260528564, "learning_rate": 0.00017546438105943513, "loss": 4.9732, "step": 41640 }, { "epoch": 0.05852215745130033, "grad_norm": 1.0495364665985107, "learning_rate": 0.0001755065336518196, "loss": 4.8889, "step": 41650 }, { "epoch": 0.058536208389463905, "grad_norm": 0.9987136721611023, "learning_rate": 0.00017554868624420402, "loss": 4.9349, "step": 41660 }, { "epoch": 0.05855025932762748, "grad_norm": 1.0522116422653198, "learning_rate": 0.00017559083883658842, "loss": 4.9909, "step": 41670 }, { "epoch": 0.05856431026579106, "grad_norm": 1.0374841690063477, "learning_rate": 0.00017563299142897286, "loss": 4.9103, "step": 41680 }, { "epoch": 0.05857836120395463, "grad_norm": 1.0144661664962769, "learning_rate": 0.0001756751440213573, "loss": 4.9681, "step": 41690 }, { "epoch": 0.058592412142118216, "grad_norm": 1.0426274538040161, "learning_rate": 0.00017571729661374172, "loss": 5.0318, "step": 41700 }, { "epoch": 0.05860646308028179, "grad_norm": 1.0269166231155396, "learning_rate": 0.00017575944920612618, "loss": 5.0346, "step": 41710 }, { "epoch": 0.05862051401844537, "grad_norm": 1.0402437448501587, "learning_rate": 0.0001758016017985106, "loss": 4.8424, "step": 41720 }, { "epoch": 0.058634564956608945, "grad_norm": 0.9999310970306396, "learning_rate": 0.000175843754390895, "loss": 4.8544, "step": 41730 }, { "epoch": 0.05864861589477252, "grad_norm": 1.0584388971328735, "learning_rate": 0.00017588590698327944, "loss": 4.976, "step": 41740 }, { "epoch": 0.058662666832936104, "grad_norm": 1.0145403146743774, "learning_rate": 0.00017592805957566387, "loss": 4.9392, "step": 41750 }, { "epoch": 0.05867671777109968, "grad_norm": 1.0221564769744873, "learning_rate": 0.00017597021216804833, "loss": 4.9297, "step": 41760 }, { "epoch": 0.05869076870926326, "grad_norm": 1.0751060247421265, "learning_rate": 0.00017601236476043276, "loss": 4.9083, "step": 41770 }, { "epoch": 0.05870481964742683, "grad_norm": 1.022549033164978, "learning_rate": 0.0001760545173528172, "loss": 4.9086, "step": 41780 }, { "epoch": 0.05871887058559041, "grad_norm": 1.0539672374725342, "learning_rate": 0.00017609666994520163, "loss": 4.8246, "step": 41790 }, { "epoch": 0.05873292152375399, "grad_norm": 1.0099142789840698, "learning_rate": 0.00017613882253758603, "loss": 4.8999, "step": 41800 }, { "epoch": 0.05874697246191757, "grad_norm": 1.0530145168304443, "learning_rate": 0.00017617675987073204, "loss": 4.9517, "step": 41810 }, { "epoch": 0.058761023400081144, "grad_norm": 1.0349324941635132, "learning_rate": 0.00017621891246311647, "loss": 4.9144, "step": 41820 }, { "epoch": 0.05877507433824472, "grad_norm": 0.9777777791023254, "learning_rate": 0.00017626106505550087, "loss": 4.8404, "step": 41830 }, { "epoch": 0.0587891252764083, "grad_norm": 1.0137258768081665, "learning_rate": 0.00017630321764788533, "loss": 4.7486, "step": 41840 }, { "epoch": 0.05880317621457188, "grad_norm": 0.9945560097694397, "learning_rate": 0.00017634537024026976, "loss": 4.9688, "step": 41850 }, { "epoch": 0.058817227152735456, "grad_norm": 1.0335863828659058, "learning_rate": 0.0001763875228326542, "loss": 4.9175, "step": 41860 }, { "epoch": 0.05883127809089903, "grad_norm": 1.0477795600891113, "learning_rate": 0.00017642967542503863, "loss": 5.0537, "step": 41870 }, { "epoch": 0.05884532902906261, "grad_norm": 1.0379626750946045, "learning_rate": 0.00017647182801742306, "loss": 4.8798, "step": 41880 }, { "epoch": 0.058859379967226184, "grad_norm": 1.0096701383590698, "learning_rate": 0.00017651398060980752, "loss": 4.9598, "step": 41890 }, { "epoch": 0.05887343090538977, "grad_norm": 0.9835436940193176, "learning_rate": 0.00017655613320219192, "loss": 4.8876, "step": 41900 }, { "epoch": 0.058887481843553344, "grad_norm": 1.0108128786087036, "learning_rate": 0.00017659828579457635, "loss": 4.9465, "step": 41910 }, { "epoch": 0.05890153278171692, "grad_norm": 0.9913715720176697, "learning_rate": 0.00017664043838696078, "loss": 4.8998, "step": 41920 }, { "epoch": 0.058915583719880496, "grad_norm": 1.0140135288238525, "learning_rate": 0.0001766825909793452, "loss": 4.9478, "step": 41930 }, { "epoch": 0.05892963465804407, "grad_norm": 1.0244427919387817, "learning_rate": 0.00017672474357172964, "loss": 4.8803, "step": 41940 }, { "epoch": 0.05894368559620765, "grad_norm": 1.0371630191802979, "learning_rate": 0.0001767668961641141, "loss": 4.9136, "step": 41950 }, { "epoch": 0.05895773653437123, "grad_norm": 1.0342624187469482, "learning_rate": 0.0001768090487564985, "loss": 5.0633, "step": 41960 }, { "epoch": 0.05897178747253481, "grad_norm": 1.0081278085708618, "learning_rate": 0.00017685120134888294, "loss": 4.972, "step": 41970 }, { "epoch": 0.058985838410698384, "grad_norm": 1.0183650255203247, "learning_rate": 0.00017689335394126737, "loss": 4.8956, "step": 41980 }, { "epoch": 0.05899988934886196, "grad_norm": 1.0174623727798462, "learning_rate": 0.0001769355065336518, "loss": 4.7951, "step": 41990 }, { "epoch": 0.059013940287025536, "grad_norm": 1.0399575233459473, "learning_rate": 0.00017697765912603623, "loss": 4.8732, "step": 42000 }, { "epoch": 0.05902799122518912, "grad_norm": 1.0714610815048218, "learning_rate": 0.0001770198117184207, "loss": 4.8844, "step": 42010 }, { "epoch": 0.059042042163352695, "grad_norm": 1.07644522190094, "learning_rate": 0.00017706196431080512, "loss": 4.8854, "step": 42020 }, { "epoch": 0.05905609310151627, "grad_norm": 1.018179178237915, "learning_rate": 0.00017710411690318953, "loss": 4.9124, "step": 42030 }, { "epoch": 0.05907014403967985, "grad_norm": 1.0028572082519531, "learning_rate": 0.00017714626949557396, "loss": 5.0101, "step": 42040 }, { "epoch": 0.059084194977843424, "grad_norm": 1.0452194213867188, "learning_rate": 0.0001771884220879584, "loss": 4.9129, "step": 42050 }, { "epoch": 0.05909824591600701, "grad_norm": 0.9863782525062561, "learning_rate": 0.00017723057468034282, "loss": 4.961, "step": 42060 }, { "epoch": 0.05911229685417058, "grad_norm": 1.0302046537399292, "learning_rate": 0.00017727272727272728, "loss": 4.896, "step": 42070 }, { "epoch": 0.05912634779233416, "grad_norm": 0.994310200214386, "learning_rate": 0.0001773148798651117, "loss": 4.9747, "step": 42080 }, { "epoch": 0.059140398730497735, "grad_norm": 1.021562933921814, "learning_rate": 0.0001773570324574961, "loss": 4.8488, "step": 42090 }, { "epoch": 0.05915444966866131, "grad_norm": 1.034226655960083, "learning_rate": 0.00017739918504988054, "loss": 4.8405, "step": 42100 }, { "epoch": 0.059168500606824895, "grad_norm": 1.0121209621429443, "learning_rate": 0.00017744133764226497, "loss": 4.9374, "step": 42110 }, { "epoch": 0.05918255154498847, "grad_norm": 0.9862031936645508, "learning_rate": 0.0001774834902346494, "loss": 4.8789, "step": 42120 }, { "epoch": 0.05919660248315205, "grad_norm": 1.0321632623672485, "learning_rate": 0.00017752564282703386, "loss": 4.9114, "step": 42130 }, { "epoch": 0.05921065342131562, "grad_norm": 1.0482895374298096, "learning_rate": 0.0001775677954194183, "loss": 4.8856, "step": 42140 }, { "epoch": 0.0592247043594792, "grad_norm": 1.1908639669418335, "learning_rate": 0.0001776099480118027, "loss": 4.8354, "step": 42150 }, { "epoch": 0.05923875529764278, "grad_norm": 1.1106269359588623, "learning_rate": 0.00017765210060418713, "loss": 4.8703, "step": 42160 }, { "epoch": 0.05925280623580636, "grad_norm": 1.042508840560913, "learning_rate": 0.00017769425319657156, "loss": 4.8856, "step": 42170 }, { "epoch": 0.059266857173969935, "grad_norm": 1.398681640625, "learning_rate": 0.000177736405788956, "loss": 4.9736, "step": 42180 }, { "epoch": 0.05928090811213351, "grad_norm": 1.0562206506729126, "learning_rate": 0.00017777855838134045, "loss": 4.9827, "step": 42190 }, { "epoch": 0.05929495905029709, "grad_norm": 0.977316677570343, "learning_rate": 0.00017782071097372488, "loss": 4.9283, "step": 42200 }, { "epoch": 0.05930900998846067, "grad_norm": 1.0145175457000732, "learning_rate": 0.0001778628635661093, "loss": 4.9636, "step": 42210 }, { "epoch": 0.059323060926624246, "grad_norm": 1.0313082933425903, "learning_rate": 0.00017790501615849372, "loss": 4.7409, "step": 42220 }, { "epoch": 0.05933711186478782, "grad_norm": 1.0246168375015259, "learning_rate": 0.00017794716875087815, "loss": 4.9663, "step": 42230 }, { "epoch": 0.0593511628029514, "grad_norm": 1.1417378187179565, "learning_rate": 0.00017798932134326258, "loss": 4.9773, "step": 42240 }, { "epoch": 0.059365213741114975, "grad_norm": 1.0039445161819458, "learning_rate": 0.00017803147393564704, "loss": 4.9977, "step": 42250 }, { "epoch": 0.05937926467927855, "grad_norm": 1.012376070022583, "learning_rate": 0.00017807362652803147, "loss": 4.8092, "step": 42260 }, { "epoch": 0.059393315617442134, "grad_norm": 1.0877492427825928, "learning_rate": 0.0001781157791204159, "loss": 4.9357, "step": 42270 }, { "epoch": 0.05940736655560571, "grad_norm": 1.0365524291992188, "learning_rate": 0.0001781579317128003, "loss": 4.8717, "step": 42280 }, { "epoch": 0.05942141749376929, "grad_norm": 0.9986197352409363, "learning_rate": 0.00017820008430518474, "loss": 4.9256, "step": 42290 }, { "epoch": 0.05943546843193286, "grad_norm": 1.0150564908981323, "learning_rate": 0.00017824223689756917, "loss": 4.8873, "step": 42300 }, { "epoch": 0.05944951937009644, "grad_norm": 1.0646934509277344, "learning_rate": 0.00017828438948995362, "loss": 4.8436, "step": 42310 }, { "epoch": 0.05946357030826002, "grad_norm": 1.0046230554580688, "learning_rate": 0.00017832654208233806, "loss": 5.0225, "step": 42320 }, { "epoch": 0.0594776212464236, "grad_norm": 1.0435608625411987, "learning_rate": 0.0001783686946747225, "loss": 4.9509, "step": 42330 }, { "epoch": 0.059491672184587174, "grad_norm": 1.004775047302246, "learning_rate": 0.00017841084726710692, "loss": 4.8448, "step": 42340 }, { "epoch": 0.05950572312275075, "grad_norm": 1.0181119441986084, "learning_rate": 0.00017845299985949132, "loss": 4.9456, "step": 42350 }, { "epoch": 0.05951977406091433, "grad_norm": 1.0563257932662964, "learning_rate": 0.00017849515245187575, "loss": 4.938, "step": 42360 }, { "epoch": 0.05953382499907791, "grad_norm": 1.2527403831481934, "learning_rate": 0.0001785373050442602, "loss": 4.8606, "step": 42370 }, { "epoch": 0.059547875937241486, "grad_norm": 0.9892441630363464, "learning_rate": 0.00017857945763664464, "loss": 4.9537, "step": 42380 }, { "epoch": 0.05956192687540506, "grad_norm": 0.9748817086219788, "learning_rate": 0.00017862161022902907, "loss": 4.9919, "step": 42390 }, { "epoch": 0.05957597781356864, "grad_norm": 1.0507982969284058, "learning_rate": 0.0001786637628214135, "loss": 4.9252, "step": 42400 }, { "epoch": 0.059590028751732214, "grad_norm": 1.108594536781311, "learning_rate": 0.0001787059154137979, "loss": 4.9004, "step": 42410 }, { "epoch": 0.0596040796898958, "grad_norm": 1.0259724855422974, "learning_rate": 0.00017874806800618237, "loss": 4.7901, "step": 42420 }, { "epoch": 0.059618130628059374, "grad_norm": 1.0419907569885254, "learning_rate": 0.0001787902205985668, "loss": 4.9264, "step": 42430 }, { "epoch": 0.05963218156622295, "grad_norm": 1.0113157033920288, "learning_rate": 0.00017883237319095123, "loss": 4.9435, "step": 42440 }, { "epoch": 0.059646232504386526, "grad_norm": 1.0024772882461548, "learning_rate": 0.00017887452578333566, "loss": 5.0503, "step": 42450 }, { "epoch": 0.0596602834425501, "grad_norm": 1.024277687072754, "learning_rate": 0.0001789166783757201, "loss": 5.0154, "step": 42460 }, { "epoch": 0.059674334380713685, "grad_norm": 0.9590503573417664, "learning_rate": 0.00017895883096810455, "loss": 4.9712, "step": 42470 }, { "epoch": 0.05968838531887726, "grad_norm": 1.0252444744110107, "learning_rate": 0.00017900098356048895, "loss": 4.8889, "step": 42480 }, { "epoch": 0.05970243625704084, "grad_norm": 0.9953712821006775, "learning_rate": 0.00017904313615287339, "loss": 5.0119, "step": 42490 }, { "epoch": 0.059716487195204414, "grad_norm": 1.018610954284668, "learning_rate": 0.00017908528874525782, "loss": 4.8525, "step": 42500 }, { "epoch": 0.05973053813336799, "grad_norm": 0.9951494336128235, "learning_rate": 0.00017912744133764225, "loss": 4.9818, "step": 42510 }, { "epoch": 0.05974458907153157, "grad_norm": 1.0133929252624512, "learning_rate": 0.00017916959393002668, "loss": 5.0259, "step": 42520 }, { "epoch": 0.05975864000969515, "grad_norm": 0.9819827079772949, "learning_rate": 0.00017921174652241114, "loss": 4.9843, "step": 42530 }, { "epoch": 0.059772690947858725, "grad_norm": 0.9785075187683105, "learning_rate": 0.00017925389911479554, "loss": 4.9552, "step": 42540 }, { "epoch": 0.0597867418860223, "grad_norm": 1.0113465785980225, "learning_rate": 0.00017929605170717997, "loss": 4.9311, "step": 42550 }, { "epoch": 0.05980079282418588, "grad_norm": 0.9836959838867188, "learning_rate": 0.0001793382042995644, "loss": 4.84, "step": 42560 }, { "epoch": 0.05981484376234946, "grad_norm": 0.9870015382766724, "learning_rate": 0.00017938035689194884, "loss": 4.9337, "step": 42570 }, { "epoch": 0.05982889470051304, "grad_norm": 0.9871739745140076, "learning_rate": 0.00017942250948433327, "loss": 4.8171, "step": 42580 }, { "epoch": 0.05984294563867661, "grad_norm": 0.9959299564361572, "learning_rate": 0.00017946466207671772, "loss": 4.8464, "step": 42590 }, { "epoch": 0.05985699657684019, "grad_norm": 1.0436452627182007, "learning_rate": 0.00017950681466910216, "loss": 4.941, "step": 42600 }, { "epoch": 0.059871047515003765, "grad_norm": 1.0525795221328735, "learning_rate": 0.00017954896726148656, "loss": 4.8133, "step": 42610 }, { "epoch": 0.05988509845316734, "grad_norm": 1.0073230266571045, "learning_rate": 0.000179591119853871, "loss": 4.8356, "step": 42620 }, { "epoch": 0.059899149391330925, "grad_norm": 0.9857484102249146, "learning_rate": 0.00017963327244625542, "loss": 4.8508, "step": 42630 }, { "epoch": 0.0599132003294945, "grad_norm": 1.013698935508728, "learning_rate": 0.00017967542503863985, "loss": 4.806, "step": 42640 }, { "epoch": 0.05992725126765808, "grad_norm": 1.013667106628418, "learning_rate": 0.0001797175776310243, "loss": 4.9422, "step": 42650 }, { "epoch": 0.05994130220582165, "grad_norm": 1.0081895589828491, "learning_rate": 0.00017975973022340874, "loss": 4.9618, "step": 42660 }, { "epoch": 0.05995535314398523, "grad_norm": 1.0109422206878662, "learning_rate": 0.00017980188281579315, "loss": 4.8748, "step": 42670 }, { "epoch": 0.05996940408214881, "grad_norm": 1.035300374031067, "learning_rate": 0.00017984403540817758, "loss": 4.8972, "step": 42680 }, { "epoch": 0.05998345502031239, "grad_norm": 0.994915783405304, "learning_rate": 0.000179886188000562, "loss": 4.889, "step": 42690 }, { "epoch": 0.059997505958475965, "grad_norm": 1.0624393224716187, "learning_rate": 0.00017992834059294644, "loss": 4.9058, "step": 42700 }, { "epoch": 0.06001155689663954, "grad_norm": 1.0263959169387817, "learning_rate": 0.0001799704931853309, "loss": 4.8691, "step": 42710 }, { "epoch": 0.06002560783480312, "grad_norm": 1.0453038215637207, "learning_rate": 0.00018001264577771533, "loss": 4.6771, "step": 42720 }, { "epoch": 0.0600396587729667, "grad_norm": 1.070002794265747, "learning_rate": 0.00018005479837009973, "loss": 4.9346, "step": 42730 }, { "epoch": 0.060053709711130276, "grad_norm": 1.0732343196868896, "learning_rate": 0.00018009695096248417, "loss": 5.021, "step": 42740 }, { "epoch": 0.06006776064929385, "grad_norm": 1.127801775932312, "learning_rate": 0.0001801391035548686, "loss": 4.9084, "step": 42750 }, { "epoch": 0.06008181158745743, "grad_norm": 1.0170589685440063, "learning_rate": 0.00018018125614725303, "loss": 4.9463, "step": 42760 }, { "epoch": 0.060095862525621005, "grad_norm": 1.0990118980407715, "learning_rate": 0.00018022340873963749, "loss": 4.9675, "step": 42770 }, { "epoch": 0.06010991346378459, "grad_norm": 1.0004825592041016, "learning_rate": 0.00018026556133202192, "loss": 5.0224, "step": 42780 }, { "epoch": 0.060123964401948164, "grad_norm": 1.0344843864440918, "learning_rate": 0.00018030771392440635, "loss": 4.8195, "step": 42790 }, { "epoch": 0.06013801534011174, "grad_norm": 1.0126923322677612, "learning_rate": 0.00018034986651679075, "loss": 4.9871, "step": 42800 }, { "epoch": 0.06015206627827532, "grad_norm": 1.0566537380218506, "learning_rate": 0.00018039201910917518, "loss": 4.959, "step": 42810 }, { "epoch": 0.06016611721643889, "grad_norm": 1.052660584449768, "learning_rate": 0.00018043417170155961, "loss": 4.995, "step": 42820 }, { "epoch": 0.060180168154602476, "grad_norm": 1.0248833894729614, "learning_rate": 0.00018047632429394407, "loss": 4.8879, "step": 42830 }, { "epoch": 0.06019421909276605, "grad_norm": 1.013227939605713, "learning_rate": 0.0001805184768863285, "loss": 4.9213, "step": 42840 }, { "epoch": 0.06020827003092963, "grad_norm": 1.00045645236969, "learning_rate": 0.00018056062947871294, "loss": 5.0058, "step": 42850 }, { "epoch": 0.060222320969093204, "grad_norm": 0.959171712398529, "learning_rate": 0.00018060278207109734, "loss": 4.9514, "step": 42860 }, { "epoch": 0.06023637190725678, "grad_norm": 1.0098928213119507, "learning_rate": 0.00018064493466348177, "loss": 4.9365, "step": 42870 }, { "epoch": 0.060250422845420364, "grad_norm": 1.0477204322814941, "learning_rate": 0.0001806870872558662, "loss": 4.9944, "step": 42880 }, { "epoch": 0.06026447378358394, "grad_norm": 1.0132365226745605, "learning_rate": 0.00018072923984825066, "loss": 4.8793, "step": 42890 }, { "epoch": 0.060278524721747516, "grad_norm": 1.0186618566513062, "learning_rate": 0.0001807713924406351, "loss": 4.9529, "step": 42900 }, { "epoch": 0.06029257565991109, "grad_norm": 1.0929832458496094, "learning_rate": 0.00018081354503301952, "loss": 4.9407, "step": 42910 }, { "epoch": 0.06030662659807467, "grad_norm": 1.0314265489578247, "learning_rate": 0.00018085569762540395, "loss": 4.819, "step": 42920 }, { "epoch": 0.060320677536238244, "grad_norm": 0.9774079918861389, "learning_rate": 0.00018089785021778836, "loss": 4.9518, "step": 42930 }, { "epoch": 0.06033472847440183, "grad_norm": 0.9911931157112122, "learning_rate": 0.0001809400028101728, "loss": 5.0276, "step": 42940 }, { "epoch": 0.060348779412565404, "grad_norm": 1.0376858711242676, "learning_rate": 0.00018098215540255725, "loss": 4.8859, "step": 42950 }, { "epoch": 0.06036283035072898, "grad_norm": 0.9969538450241089, "learning_rate": 0.00018102430799494168, "loss": 4.9157, "step": 42960 }, { "epoch": 0.060376881288892556, "grad_norm": 1.0148366689682007, "learning_rate": 0.0001810664605873261, "loss": 4.942, "step": 42970 }, { "epoch": 0.06039093222705613, "grad_norm": 0.9672243595123291, "learning_rate": 0.00018110861317971054, "loss": 4.9629, "step": 42980 }, { "epoch": 0.060404983165219715, "grad_norm": 1.0580873489379883, "learning_rate": 0.00018115076577209494, "loss": 4.8622, "step": 42990 }, { "epoch": 0.06041903410338329, "grad_norm": 1.0431230068206787, "learning_rate": 0.0001811929183644794, "loss": 4.8916, "step": 43000 }, { "epoch": 0.06043308504154687, "grad_norm": 1.0647096633911133, "learning_rate": 0.00018123507095686383, "loss": 4.9537, "step": 43010 }, { "epoch": 0.060447135979710444, "grad_norm": 1.0209344625473022, "learning_rate": 0.00018127722354924826, "loss": 4.8759, "step": 43020 }, { "epoch": 0.06046118691787402, "grad_norm": 1.0193747282028198, "learning_rate": 0.0001813193761416327, "loss": 4.8229, "step": 43030 }, { "epoch": 0.0604752378560376, "grad_norm": 1.0239225625991821, "learning_rate": 0.00018136152873401713, "loss": 4.9135, "step": 43040 }, { "epoch": 0.06048928879420118, "grad_norm": 1.0313153266906738, "learning_rate": 0.00018140368132640159, "loss": 4.9518, "step": 43050 }, { "epoch": 0.060503339732364755, "grad_norm": 1.0050175189971924, "learning_rate": 0.000181445833918786, "loss": 4.9435, "step": 43060 }, { "epoch": 0.06051739067052833, "grad_norm": 0.9589523077011108, "learning_rate": 0.00018148798651117042, "loss": 4.8134, "step": 43070 }, { "epoch": 0.06053144160869191, "grad_norm": 0.9744222164154053, "learning_rate": 0.00018153013910355485, "loss": 4.9269, "step": 43080 }, { "epoch": 0.06054549254685549, "grad_norm": 0.9929285049438477, "learning_rate": 0.00018157229169593928, "loss": 4.9625, "step": 43090 }, { "epoch": 0.06055954348501907, "grad_norm": 1.0369280576705933, "learning_rate": 0.00018161444428832371, "loss": 4.9153, "step": 43100 }, { "epoch": 0.06057359442318264, "grad_norm": 0.994658350944519, "learning_rate": 0.00018165659688070817, "loss": 4.9614, "step": 43110 }, { "epoch": 0.06058764536134622, "grad_norm": 1.1135982275009155, "learning_rate": 0.00018169874947309258, "loss": 4.897, "step": 43120 }, { "epoch": 0.060601696299509795, "grad_norm": 0.9817977547645569, "learning_rate": 0.000181740902065477, "loss": 4.9774, "step": 43130 }, { "epoch": 0.06061574723767338, "grad_norm": 1.008264422416687, "learning_rate": 0.00018178305465786144, "loss": 4.8703, "step": 43140 }, { "epoch": 0.060629798175836955, "grad_norm": 1.0090116262435913, "learning_rate": 0.00018182520725024587, "loss": 4.7993, "step": 43150 }, { "epoch": 0.06064384911400053, "grad_norm": 1.018884301185608, "learning_rate": 0.0001818673598426303, "loss": 5.0122, "step": 43160 }, { "epoch": 0.06065790005216411, "grad_norm": 1.0300415754318237, "learning_rate": 0.00018190951243501476, "loss": 4.8523, "step": 43170 }, { "epoch": 0.06067195099032768, "grad_norm": 1.0521844625473022, "learning_rate": 0.0001819516650273992, "loss": 4.8799, "step": 43180 }, { "epoch": 0.060686001928491266, "grad_norm": 1.026895523071289, "learning_rate": 0.0001819938176197836, "loss": 4.9328, "step": 43190 }, { "epoch": 0.06070005286665484, "grad_norm": 1.0086530447006226, "learning_rate": 0.00018203597021216803, "loss": 4.8162, "step": 43200 }, { "epoch": 0.06071410380481842, "grad_norm": 1.0305287837982178, "learning_rate": 0.00018207812280455246, "loss": 4.858, "step": 43210 }, { "epoch": 0.060728154742981995, "grad_norm": 0.997772216796875, "learning_rate": 0.0001821202753969369, "loss": 4.9303, "step": 43220 }, { "epoch": 0.06074220568114557, "grad_norm": 1.0172392129898071, "learning_rate": 0.00018216242798932135, "loss": 4.9411, "step": 43230 }, { "epoch": 0.06075625661930915, "grad_norm": 0.9829245805740356, "learning_rate": 0.00018220458058170578, "loss": 5.0541, "step": 43240 }, { "epoch": 0.06077030755747273, "grad_norm": 0.9785235524177551, "learning_rate": 0.00018224673317409018, "loss": 4.953, "step": 43250 }, { "epoch": 0.060784358495636306, "grad_norm": 0.9770336747169495, "learning_rate": 0.0001822888857664746, "loss": 5.0195, "step": 43260 }, { "epoch": 0.06079840943379988, "grad_norm": 0.994461178779602, "learning_rate": 0.00018233103835885904, "loss": 4.8224, "step": 43270 }, { "epoch": 0.06081246037196346, "grad_norm": 0.9978111982345581, "learning_rate": 0.00018237319095124348, "loss": 4.8499, "step": 43280 }, { "epoch": 0.060826511310127035, "grad_norm": 1.0382758378982544, "learning_rate": 0.00018241534354362793, "loss": 4.9601, "step": 43290 }, { "epoch": 0.06084056224829062, "grad_norm": 1.0007946491241455, "learning_rate": 0.00018245749613601236, "loss": 4.8906, "step": 43300 }, { "epoch": 0.060854613186454194, "grad_norm": 0.9689368009567261, "learning_rate": 0.0001824996487283968, "loss": 4.9713, "step": 43310 }, { "epoch": 0.06086866412461777, "grad_norm": 0.9696866273880005, "learning_rate": 0.0001825418013207812, "loss": 4.9007, "step": 43320 }, { "epoch": 0.06088271506278135, "grad_norm": 0.9870903491973877, "learning_rate": 0.00018258395391316563, "loss": 4.9312, "step": 43330 }, { "epoch": 0.06089676600094492, "grad_norm": 1.0196123123168945, "learning_rate": 0.00018262610650555006, "loss": 4.7852, "step": 43340 }, { "epoch": 0.060910816939108506, "grad_norm": 0.9788631200790405, "learning_rate": 0.00018266825909793452, "loss": 4.8479, "step": 43350 }, { "epoch": 0.06092486787727208, "grad_norm": 0.9982026219367981, "learning_rate": 0.00018271041169031895, "loss": 4.9585, "step": 43360 }, { "epoch": 0.06093891881543566, "grad_norm": 1.0699291229248047, "learning_rate": 0.00018275256428270338, "loss": 4.9751, "step": 43370 }, { "epoch": 0.060952969753599234, "grad_norm": 1.0497549772262573, "learning_rate": 0.0001827947168750878, "loss": 4.9279, "step": 43380 }, { "epoch": 0.06096702069176281, "grad_norm": 0.9782308340072632, "learning_rate": 0.00018283686946747222, "loss": 4.9041, "step": 43390 }, { "epoch": 0.060981071629926394, "grad_norm": 0.964344322681427, "learning_rate": 0.00018287902205985665, "loss": 4.9652, "step": 43400 }, { "epoch": 0.06099512256808997, "grad_norm": 0.9865180253982544, "learning_rate": 0.0001829211746522411, "loss": 5.0092, "step": 43410 }, { "epoch": 0.061009173506253546, "grad_norm": 1.071978211402893, "learning_rate": 0.00018296332724462554, "loss": 4.8976, "step": 43420 }, { "epoch": 0.06102322444441712, "grad_norm": 0.9803053736686707, "learning_rate": 0.00018300547983700997, "loss": 4.8913, "step": 43430 }, { "epoch": 0.0610372753825807, "grad_norm": 0.9943988919258118, "learning_rate": 0.00018304763242939437, "loss": 5.0052, "step": 43440 }, { "epoch": 0.06105132632074428, "grad_norm": 0.9977701306343079, "learning_rate": 0.0001830897850217788, "loss": 4.8374, "step": 43450 }, { "epoch": 0.06106537725890786, "grad_norm": 1.0006359815597534, "learning_rate": 0.00018313193761416324, "loss": 4.9586, "step": 43460 }, { "epoch": 0.061079428197071434, "grad_norm": 1.0693806409835815, "learning_rate": 0.0001831740902065477, "loss": 4.932, "step": 43470 }, { "epoch": 0.06109347913523501, "grad_norm": 0.9895679950714111, "learning_rate": 0.00018321624279893213, "loss": 4.9462, "step": 43480 }, { "epoch": 0.061107530073398586, "grad_norm": 1.0478029251098633, "learning_rate": 0.00018325839539131656, "loss": 4.9418, "step": 43490 }, { "epoch": 0.06112158101156217, "grad_norm": 0.9845918416976929, "learning_rate": 0.000183300547983701, "loss": 4.9005, "step": 43500 }, { "epoch": 0.061135631949725745, "grad_norm": 1.0808178186416626, "learning_rate": 0.0001833427005760854, "loss": 4.8593, "step": 43510 }, { "epoch": 0.06114968288788932, "grad_norm": 0.9863511919975281, "learning_rate": 0.00018338485316846982, "loss": 4.9801, "step": 43520 }, { "epoch": 0.0611637338260529, "grad_norm": 1.0400804281234741, "learning_rate": 0.00018342700576085428, "loss": 4.8457, "step": 43530 }, { "epoch": 0.061177784764216474, "grad_norm": 1.01461660861969, "learning_rate": 0.0001834691583532387, "loss": 4.969, "step": 43540 }, { "epoch": 0.06119183570238005, "grad_norm": 0.9691588282585144, "learning_rate": 0.00018351131094562314, "loss": 4.8088, "step": 43550 }, { "epoch": 0.06120588664054363, "grad_norm": 1.038812279701233, "learning_rate": 0.00018355346353800757, "loss": 4.8867, "step": 43560 }, { "epoch": 0.06121993757870721, "grad_norm": 0.9811944365501404, "learning_rate": 0.00018359561613039198, "loss": 4.9723, "step": 43570 }, { "epoch": 0.061233988516870785, "grad_norm": 1.0180128812789917, "learning_rate": 0.00018363776872277644, "loss": 4.6997, "step": 43580 }, { "epoch": 0.06124803945503436, "grad_norm": 1.078423023223877, "learning_rate": 0.00018367992131516087, "loss": 4.8831, "step": 43590 }, { "epoch": 0.06126209039319794, "grad_norm": 0.9817917346954346, "learning_rate": 0.0001837220739075453, "loss": 4.9102, "step": 43600 }, { "epoch": 0.06127614133136152, "grad_norm": 0.977243959903717, "learning_rate": 0.00018376422649992973, "loss": 4.883, "step": 43610 }, { "epoch": 0.0612901922695251, "grad_norm": 1.1097636222839355, "learning_rate": 0.00018380637909231416, "loss": 4.8, "step": 43620 }, { "epoch": 0.06130424320768867, "grad_norm": 0.9828046560287476, "learning_rate": 0.00018384853168469862, "loss": 4.8724, "step": 43630 }, { "epoch": 0.06131829414585225, "grad_norm": 0.9683628678321838, "learning_rate": 0.00018389068427708302, "loss": 4.9274, "step": 43640 }, { "epoch": 0.061332345084015825, "grad_norm": 0.989050030708313, "learning_rate": 0.00018393283686946746, "loss": 4.9037, "step": 43650 }, { "epoch": 0.06134639602217941, "grad_norm": 1.0087295770645142, "learning_rate": 0.0001839749894618519, "loss": 4.9268, "step": 43660 }, { "epoch": 0.061360446960342985, "grad_norm": 1.0443772077560425, "learning_rate": 0.00018401714205423632, "loss": 4.9625, "step": 43670 }, { "epoch": 0.06137449789850656, "grad_norm": 1.0253444910049438, "learning_rate": 0.00018405929464662075, "loss": 4.7028, "step": 43680 }, { "epoch": 0.06138854883667014, "grad_norm": 1.0143331289291382, "learning_rate": 0.0001841014472390052, "loss": 4.8486, "step": 43690 }, { "epoch": 0.06140259977483371, "grad_norm": 0.9996601939201355, "learning_rate": 0.0001841435998313896, "loss": 4.9462, "step": 43700 }, { "epoch": 0.061416650712997296, "grad_norm": 0.9822000861167908, "learning_rate": 0.00018418575242377404, "loss": 4.9844, "step": 43710 }, { "epoch": 0.06143070165116087, "grad_norm": 1.0107418298721313, "learning_rate": 0.00018422790501615847, "loss": 5.0152, "step": 43720 }, { "epoch": 0.06144475258932445, "grad_norm": 1.0787968635559082, "learning_rate": 0.0001842700576085429, "loss": 4.9467, "step": 43730 }, { "epoch": 0.061458803527488025, "grad_norm": 1.0120429992675781, "learning_rate": 0.00018431221020092734, "loss": 4.8599, "step": 43740 }, { "epoch": 0.0614728544656516, "grad_norm": 1.002541422843933, "learning_rate": 0.0001843543627933118, "loss": 5.0134, "step": 43750 }, { "epoch": 0.061486905403815184, "grad_norm": 1.0086458921432495, "learning_rate": 0.00018439651538569623, "loss": 4.8738, "step": 43760 }, { "epoch": 0.06150095634197876, "grad_norm": 1.0043725967407227, "learning_rate": 0.00018443866797808063, "loss": 4.9894, "step": 43770 }, { "epoch": 0.061515007280142336, "grad_norm": 0.9925393462181091, "learning_rate": 0.00018448082057046506, "loss": 4.9451, "step": 43780 }, { "epoch": 0.06152905821830591, "grad_norm": 0.9828112721443176, "learning_rate": 0.0001845229731628495, "loss": 4.8894, "step": 43790 }, { "epoch": 0.06154310915646949, "grad_norm": 1.0312516689300537, "learning_rate": 0.00018456512575523392, "loss": 4.8535, "step": 43800 }, { "epoch": 0.06155716009463307, "grad_norm": 1.0502887964248657, "learning_rate": 0.00018460727834761838, "loss": 4.8503, "step": 43810 }, { "epoch": 0.06157121103279665, "grad_norm": 0.9553511738777161, "learning_rate": 0.0001846494309400028, "loss": 4.931, "step": 43820 }, { "epoch": 0.061585261970960224, "grad_norm": 1.0687919855117798, "learning_rate": 0.00018469158353238722, "loss": 4.858, "step": 43830 }, { "epoch": 0.0615993129091238, "grad_norm": 0.9931618571281433, "learning_rate": 0.00018473373612477165, "loss": 4.9647, "step": 43840 }, { "epoch": 0.061613363847287377, "grad_norm": 1.0885789394378662, "learning_rate": 0.00018477588871715608, "loss": 4.8131, "step": 43850 }, { "epoch": 0.06162741478545095, "grad_norm": 0.9883633255958557, "learning_rate": 0.0001848180413095405, "loss": 4.8692, "step": 43860 }, { "epoch": 0.061641465723614536, "grad_norm": 0.9546381235122681, "learning_rate": 0.00018486019390192497, "loss": 4.9282, "step": 43870 }, { "epoch": 0.06165551666177811, "grad_norm": 0.9708349704742432, "learning_rate": 0.0001849023464943094, "loss": 4.9329, "step": 43880 }, { "epoch": 0.06166956759994169, "grad_norm": 0.985491156578064, "learning_rate": 0.00018494449908669383, "loss": 5.066, "step": 43890 }, { "epoch": 0.061683618538105264, "grad_norm": 1.0057768821716309, "learning_rate": 0.00018498665167907823, "loss": 5.0658, "step": 43900 }, { "epoch": 0.06169766947626884, "grad_norm": 1.0529582500457764, "learning_rate": 0.00018502880427146267, "loss": 4.9211, "step": 43910 }, { "epoch": 0.061711720414432424, "grad_norm": 0.9484959840774536, "learning_rate": 0.0001850709568638471, "loss": 4.8917, "step": 43920 }, { "epoch": 0.061725771352596, "grad_norm": 0.9802437424659729, "learning_rate": 0.00018511310945623156, "loss": 4.853, "step": 43930 }, { "epoch": 0.061739822290759576, "grad_norm": 1.045055627822876, "learning_rate": 0.00018515526204861599, "loss": 4.8402, "step": 43940 }, { "epoch": 0.06175387322892315, "grad_norm": 1.073882818222046, "learning_rate": 0.00018519741464100042, "loss": 4.9227, "step": 43950 }, { "epoch": 0.06176792416708673, "grad_norm": 0.9608358144760132, "learning_rate": 0.00018523956723338482, "loss": 4.9701, "step": 43960 }, { "epoch": 0.06178197510525031, "grad_norm": 0.9510095119476318, "learning_rate": 0.00018528171982576925, "loss": 4.9003, "step": 43970 }, { "epoch": 0.06179602604341389, "grad_norm": 1.0726205110549927, "learning_rate": 0.00018532387241815368, "loss": 4.9263, "step": 43980 }, { "epoch": 0.061810076981577464, "grad_norm": 0.9834354519844055, "learning_rate": 0.00018536602501053814, "loss": 5.0427, "step": 43990 }, { "epoch": 0.06182412791974104, "grad_norm": 0.9882546067237854, "learning_rate": 0.00018540817760292257, "loss": 5.026, "step": 44000 }, { "epoch": 0.061838178857904616, "grad_norm": 0.9732726216316223, "learning_rate": 0.000185450330195307, "loss": 4.9425, "step": 44010 }, { "epoch": 0.0618522297960682, "grad_norm": 0.9791545271873474, "learning_rate": 0.00018549248278769144, "loss": 4.965, "step": 44020 }, { "epoch": 0.061866280734231775, "grad_norm": 1.0380457639694214, "learning_rate": 0.00018553463538007584, "loss": 4.9099, "step": 44030 }, { "epoch": 0.06188033167239535, "grad_norm": 1.010048747062683, "learning_rate": 0.00018557678797246027, "loss": 4.9624, "step": 44040 }, { "epoch": 0.06189438261055893, "grad_norm": 0.9912928342819214, "learning_rate": 0.00018561894056484473, "loss": 4.9728, "step": 44050 }, { "epoch": 0.061908433548722504, "grad_norm": 0.9759913682937622, "learning_rate": 0.00018566109315722916, "loss": 4.9517, "step": 44060 }, { "epoch": 0.06192248448688609, "grad_norm": 0.997549295425415, "learning_rate": 0.0001857032457496136, "loss": 4.8432, "step": 44070 }, { "epoch": 0.06193653542504966, "grad_norm": 1.0062215328216553, "learning_rate": 0.00018574539834199802, "loss": 4.9373, "step": 44080 }, { "epoch": 0.06195058636321324, "grad_norm": 1.012075424194336, "learning_rate": 0.00018578755093438243, "loss": 4.8813, "step": 44090 }, { "epoch": 0.061964637301376815, "grad_norm": 0.95697021484375, "learning_rate": 0.00018582970352676686, "loss": 4.944, "step": 44100 }, { "epoch": 0.06197868823954039, "grad_norm": 0.9985111951828003, "learning_rate": 0.00018587185611915132, "loss": 4.8227, "step": 44110 }, { "epoch": 0.061992739177703975, "grad_norm": 0.9926140308380127, "learning_rate": 0.00018591400871153575, "loss": 5.0079, "step": 44120 }, { "epoch": 0.06200679011586755, "grad_norm": 1.1736414432525635, "learning_rate": 0.00018595616130392018, "loss": 4.7146, "step": 44130 }, { "epoch": 0.06202084105403113, "grad_norm": 1.0231581926345825, "learning_rate": 0.0001859983138963046, "loss": 5.0206, "step": 44140 }, { "epoch": 0.0620348919921947, "grad_norm": 1.0800116062164307, "learning_rate": 0.00018604046648868901, "loss": 4.8657, "step": 44150 }, { "epoch": 0.06204894293035828, "grad_norm": 0.9866999387741089, "learning_rate": 0.00018608261908107347, "loss": 4.9722, "step": 44160 }, { "epoch": 0.062062993868521855, "grad_norm": 1.0231711864471436, "learning_rate": 0.0001861247716734579, "loss": 4.8777, "step": 44170 }, { "epoch": 0.06207704480668544, "grad_norm": 0.9434810876846313, "learning_rate": 0.00018616692426584233, "loss": 4.9722, "step": 44180 }, { "epoch": 0.062091095744849015, "grad_norm": 1.0458160638809204, "learning_rate": 0.00018620907685822677, "loss": 4.8806, "step": 44190 }, { "epoch": 0.06210514668301259, "grad_norm": 0.9809548258781433, "learning_rate": 0.0001862512294506112, "loss": 4.7962, "step": 44200 }, { "epoch": 0.06211919762117617, "grad_norm": 1.012259840965271, "learning_rate": 0.00018629338204299565, "loss": 4.9335, "step": 44210 }, { "epoch": 0.06213324855933974, "grad_norm": 0.9664605855941772, "learning_rate": 0.00018633553463538006, "loss": 4.9491, "step": 44220 }, { "epoch": 0.062147299497503326, "grad_norm": 0.9853652715682983, "learning_rate": 0.0001863776872277645, "loss": 4.9029, "step": 44230 }, { "epoch": 0.0621613504356669, "grad_norm": 1.0576822757720947, "learning_rate": 0.00018641983982014892, "loss": 4.8949, "step": 44240 }, { "epoch": 0.06217540137383048, "grad_norm": 1.0025372505187988, "learning_rate": 0.00018646199241253335, "loss": 4.9004, "step": 44250 }, { "epoch": 0.062189452311994055, "grad_norm": 1.0175806283950806, "learning_rate": 0.00018650414500491778, "loss": 5.03, "step": 44260 }, { "epoch": 0.06220350325015763, "grad_norm": 1.0149425268173218, "learning_rate": 0.00018654629759730224, "loss": 4.9339, "step": 44270 }, { "epoch": 0.062217554188321214, "grad_norm": 1.0029298067092896, "learning_rate": 0.00018658845018968665, "loss": 4.8234, "step": 44280 }, { "epoch": 0.06223160512648479, "grad_norm": 0.9579691290855408, "learning_rate": 0.00018663060278207108, "loss": 5.1016, "step": 44290 }, { "epoch": 0.062245656064648366, "grad_norm": 1.061834454536438, "learning_rate": 0.0001866727553744555, "loss": 4.9704, "step": 44300 }, { "epoch": 0.06225970700281194, "grad_norm": 1.004465103149414, "learning_rate": 0.00018671490796683994, "loss": 4.9067, "step": 44310 }, { "epoch": 0.06227375794097552, "grad_norm": 0.9837499856948853, "learning_rate": 0.00018675706055922437, "loss": 4.9309, "step": 44320 }, { "epoch": 0.0622878088791391, "grad_norm": 1.000868558883667, "learning_rate": 0.00018679921315160883, "loss": 4.8866, "step": 44330 }, { "epoch": 0.06230185981730268, "grad_norm": 0.9788700938224792, "learning_rate": 0.00018684136574399326, "loss": 4.9327, "step": 44340 }, { "epoch": 0.062315910755466254, "grad_norm": 0.9754438400268555, "learning_rate": 0.00018688351833637766, "loss": 4.8594, "step": 44350 }, { "epoch": 0.06232996169362983, "grad_norm": 0.951196014881134, "learning_rate": 0.0001869256709287621, "loss": 4.9354, "step": 44360 }, { "epoch": 0.062344012631793407, "grad_norm": 0.9627026915550232, "learning_rate": 0.00018696782352114653, "loss": 4.9621, "step": 44370 }, { "epoch": 0.06235806356995699, "grad_norm": 1.063202977180481, "learning_rate": 0.00018700997611353096, "loss": 4.8807, "step": 44380 }, { "epoch": 0.062372114508120566, "grad_norm": 0.9841533899307251, "learning_rate": 0.00018705212870591542, "loss": 4.9947, "step": 44390 }, { "epoch": 0.06238616544628414, "grad_norm": 0.9734062552452087, "learning_rate": 0.00018709428129829985, "loss": 4.9473, "step": 44400 }, { "epoch": 0.06240021638444772, "grad_norm": 0.9729593396186829, "learning_rate": 0.00018713643389068425, "loss": 5.1047, "step": 44410 }, { "epoch": 0.062414267322611294, "grad_norm": 1.000727891921997, "learning_rate": 0.00018717858648306868, "loss": 4.9258, "step": 44420 }, { "epoch": 0.06242831826077488, "grad_norm": 0.9832708835601807, "learning_rate": 0.0001872207390754531, "loss": 4.7362, "step": 44430 }, { "epoch": 0.062442369198938454, "grad_norm": 1.075731635093689, "learning_rate": 0.00018726289166783754, "loss": 5.0231, "step": 44440 }, { "epoch": 0.06245642013710203, "grad_norm": 0.965521514415741, "learning_rate": 0.000187305044260222, "loss": 4.8227, "step": 44450 }, { "epoch": 0.062470471075265606, "grad_norm": 1.0088485479354858, "learning_rate": 0.00018734719685260643, "loss": 4.8802, "step": 44460 }, { "epoch": 0.06248452201342918, "grad_norm": 0.9651148319244385, "learning_rate": 0.00018738934944499087, "loss": 4.8197, "step": 44470 }, { "epoch": 0.06249857295159276, "grad_norm": 0.9971210360527039, "learning_rate": 0.00018743150203737527, "loss": 4.9242, "step": 44480 }, { "epoch": 0.06251262388975634, "grad_norm": 0.9815917015075684, "learning_rate": 0.0001874736546297597, "loss": 4.8368, "step": 44490 }, { "epoch": 0.06252667482791992, "grad_norm": 0.9839165806770325, "learning_rate": 0.00018751580722214413, "loss": 4.8778, "step": 44500 }, { "epoch": 0.0625407257660835, "grad_norm": 0.990401029586792, "learning_rate": 0.0001875579598145286, "loss": 4.9428, "step": 44510 }, { "epoch": 0.06255477670424707, "grad_norm": 0.9693237543106079, "learning_rate": 0.00018760011240691302, "loss": 4.8596, "step": 44520 }, { "epoch": 0.06256882764241065, "grad_norm": 1.0013371706008911, "learning_rate": 0.00018764226499929745, "loss": 4.9533, "step": 44530 }, { "epoch": 0.06258287858057422, "grad_norm": 0.9999648928642273, "learning_rate": 0.00018768441759168186, "loss": 4.9845, "step": 44540 }, { "epoch": 0.0625969295187378, "grad_norm": 0.982397198677063, "learning_rate": 0.0001877265701840663, "loss": 4.8334, "step": 44550 }, { "epoch": 0.06261098045690139, "grad_norm": 0.9798036217689514, "learning_rate": 0.0001877645075172123, "loss": 4.8915, "step": 44560 }, { "epoch": 0.06262503139506496, "grad_norm": 0.9902676343917847, "learning_rate": 0.0001878066601095967, "loss": 4.9485, "step": 44570 }, { "epoch": 0.06263908233322854, "grad_norm": 0.9916227459907532, "learning_rate": 0.00018784881270198113, "loss": 4.8654, "step": 44580 }, { "epoch": 0.06265313327139212, "grad_norm": 1.0029773712158203, "learning_rate": 0.0001878909652943656, "loss": 4.7969, "step": 44590 }, { "epoch": 0.06266718420955569, "grad_norm": 0.985836386680603, "learning_rate": 0.00018793311788675002, "loss": 4.9815, "step": 44600 }, { "epoch": 0.06268123514771927, "grad_norm": 1.05781888961792, "learning_rate": 0.00018797527047913445, "loss": 4.9361, "step": 44610 }, { "epoch": 0.06269528608588285, "grad_norm": 1.0255253314971924, "learning_rate": 0.00018801742307151888, "loss": 4.7969, "step": 44620 }, { "epoch": 0.06270933702404642, "grad_norm": 0.9807506799697876, "learning_rate": 0.00018805957566390332, "loss": 4.8693, "step": 44630 }, { "epoch": 0.06272338796221, "grad_norm": 0.9755240082740784, "learning_rate": 0.00018810172825628772, "loss": 4.8811, "step": 44640 }, { "epoch": 0.06273743890037357, "grad_norm": 0.9982534050941467, "learning_rate": 0.00018814388084867218, "loss": 5.0296, "step": 44650 }, { "epoch": 0.06275148983853715, "grad_norm": 0.9563882350921631, "learning_rate": 0.0001881860334410566, "loss": 4.9605, "step": 44660 }, { "epoch": 0.06276554077670074, "grad_norm": 0.9405322074890137, "learning_rate": 0.00018822818603344104, "loss": 4.9641, "step": 44670 }, { "epoch": 0.06277959171486432, "grad_norm": 0.9542915225028992, "learning_rate": 0.00018827033862582547, "loss": 4.99, "step": 44680 }, { "epoch": 0.06279364265302789, "grad_norm": 0.9676512479782104, "learning_rate": 0.0001883124912182099, "loss": 4.9995, "step": 44690 }, { "epoch": 0.06280769359119147, "grad_norm": 0.9878407120704651, "learning_rate": 0.0001883546438105943, "loss": 4.962, "step": 44700 }, { "epoch": 0.06282174452935504, "grad_norm": 0.9911555647850037, "learning_rate": 0.00018839679640297876, "loss": 4.8163, "step": 44710 }, { "epoch": 0.06283579546751862, "grad_norm": 1.0555427074432373, "learning_rate": 0.0001884389489953632, "loss": 5.0145, "step": 44720 }, { "epoch": 0.0628498464056822, "grad_norm": 0.9912372827529907, "learning_rate": 0.00018848110158774763, "loss": 4.8577, "step": 44730 }, { "epoch": 0.06286389734384577, "grad_norm": 0.9817824959754944, "learning_rate": 0.00018852325418013206, "loss": 4.8828, "step": 44740 }, { "epoch": 0.06287794828200935, "grad_norm": 0.9970001578330994, "learning_rate": 0.0001885654067725165, "loss": 4.8374, "step": 44750 }, { "epoch": 0.06289199922017293, "grad_norm": 1.0151931047439575, "learning_rate": 0.00018860755936490095, "loss": 4.9853, "step": 44760 }, { "epoch": 0.06290605015833652, "grad_norm": 0.9986475110054016, "learning_rate": 0.00018864971195728535, "loss": 4.9118, "step": 44770 }, { "epoch": 0.06292010109650009, "grad_norm": 1.008439064025879, "learning_rate": 0.00018869186454966978, "loss": 4.9462, "step": 44780 }, { "epoch": 0.06293415203466367, "grad_norm": 1.03360116481781, "learning_rate": 0.00018873401714205421, "loss": 4.8674, "step": 44790 }, { "epoch": 0.06294820297282724, "grad_norm": 0.9503140449523926, "learning_rate": 0.00018877616973443865, "loss": 4.9065, "step": 44800 }, { "epoch": 0.06296225391099082, "grad_norm": 0.9909397959709167, "learning_rate": 0.0001888183223268231, "loss": 5.0051, "step": 44810 }, { "epoch": 0.0629763048491544, "grad_norm": 0.9907909035682678, "learning_rate": 0.00018886047491920753, "loss": 4.8448, "step": 44820 }, { "epoch": 0.06299035578731797, "grad_norm": 0.962094783782959, "learning_rate": 0.00018890262751159194, "loss": 4.962, "step": 44830 }, { "epoch": 0.06300440672548155, "grad_norm": 0.9932377934455872, "learning_rate": 0.00018894478010397637, "loss": 5.0061, "step": 44840 }, { "epoch": 0.06301845766364512, "grad_norm": 0.9481516480445862, "learning_rate": 0.0001889869326963608, "loss": 4.9518, "step": 44850 }, { "epoch": 0.0630325086018087, "grad_norm": 0.9638423323631287, "learning_rate": 0.00018902908528874523, "loss": 4.9881, "step": 44860 }, { "epoch": 0.06304655953997229, "grad_norm": 1.0749502182006836, "learning_rate": 0.0001890712378811297, "loss": 4.915, "step": 44870 }, { "epoch": 0.06306061047813587, "grad_norm": 0.9810838103294373, "learning_rate": 0.00018911339047351412, "loss": 4.9, "step": 44880 }, { "epoch": 0.06307466141629944, "grad_norm": 1.0337384939193726, "learning_rate": 0.00018915554306589855, "loss": 4.845, "step": 44890 }, { "epoch": 0.06308871235446302, "grad_norm": 0.9709575772285461, "learning_rate": 0.00018919769565828296, "loss": 4.8708, "step": 44900 }, { "epoch": 0.0631027632926266, "grad_norm": 1.0215866565704346, "learning_rate": 0.0001892398482506674, "loss": 4.939, "step": 44910 }, { "epoch": 0.06311681423079017, "grad_norm": 0.9717178344726562, "learning_rate": 0.00018928200084305182, "loss": 4.9069, "step": 44920 }, { "epoch": 0.06313086516895375, "grad_norm": 0.9674050807952881, "learning_rate": 0.00018932415343543628, "loss": 4.8942, "step": 44930 }, { "epoch": 0.06314491610711732, "grad_norm": 1.011043906211853, "learning_rate": 0.0001893663060278207, "loss": 4.9006, "step": 44940 }, { "epoch": 0.0631589670452809, "grad_norm": 0.9711986184120178, "learning_rate": 0.00018940845862020514, "loss": 4.8699, "step": 44950 }, { "epoch": 0.06317301798344448, "grad_norm": 1.0319536924362183, "learning_rate": 0.00018945061121258954, "loss": 4.8679, "step": 44960 }, { "epoch": 0.06318706892160805, "grad_norm": 1.0072388648986816, "learning_rate": 0.00018949276380497398, "loss": 4.9792, "step": 44970 }, { "epoch": 0.06320111985977164, "grad_norm": 0.9475632905960083, "learning_rate": 0.0001895349163973584, "loss": 4.8833, "step": 44980 }, { "epoch": 0.06321517079793522, "grad_norm": 1.02812922000885, "learning_rate": 0.00018957706898974286, "loss": 4.9171, "step": 44990 }, { "epoch": 0.0632292217360988, "grad_norm": 0.9555014967918396, "learning_rate": 0.0001896192215821273, "loss": 4.9232, "step": 45000 }, { "epoch": 0.06324327267426237, "grad_norm": 0.9676048755645752, "learning_rate": 0.00018966137417451173, "loss": 4.9209, "step": 45010 }, { "epoch": 0.06325732361242595, "grad_norm": 1.054318904876709, "learning_rate": 0.00018970352676689616, "loss": 4.9587, "step": 45020 }, { "epoch": 0.06327137455058952, "grad_norm": 0.9832106232643127, "learning_rate": 0.00018974567935928056, "loss": 4.8852, "step": 45030 }, { "epoch": 0.0632854254887531, "grad_norm": 0.9964709281921387, "learning_rate": 0.000189787831951665, "loss": 4.8634, "step": 45040 }, { "epoch": 0.06329947642691668, "grad_norm": 0.959078848361969, "learning_rate": 0.00018982998454404945, "loss": 4.9014, "step": 45050 }, { "epoch": 0.06331352736508025, "grad_norm": 0.993674099445343, "learning_rate": 0.00018987213713643388, "loss": 4.7264, "step": 45060 }, { "epoch": 0.06332757830324383, "grad_norm": 0.9697414636611938, "learning_rate": 0.00018991428972881831, "loss": 4.8757, "step": 45070 }, { "epoch": 0.06334162924140742, "grad_norm": 0.9635653495788574, "learning_rate": 0.00018995644232120275, "loss": 4.9426, "step": 45080 }, { "epoch": 0.063355680179571, "grad_norm": 0.9881270527839661, "learning_rate": 0.00018999859491358715, "loss": 4.9085, "step": 45090 }, { "epoch": 0.06336973111773457, "grad_norm": 0.9849804639816284, "learning_rate": 0.00019004074750597158, "loss": 4.9223, "step": 45100 }, { "epoch": 0.06338378205589815, "grad_norm": 0.953628659248352, "learning_rate": 0.00019008290009835604, "loss": 4.9461, "step": 45110 }, { "epoch": 0.06339783299406172, "grad_norm": 0.9483082890510559, "learning_rate": 0.00019012505269074047, "loss": 4.9306, "step": 45120 }, { "epoch": 0.0634118839322253, "grad_norm": 1.0407912731170654, "learning_rate": 0.0001901672052831249, "loss": 4.7811, "step": 45130 }, { "epoch": 0.06342593487038888, "grad_norm": 0.9633585214614868, "learning_rate": 0.00019020935787550933, "loss": 4.8575, "step": 45140 }, { "epoch": 0.06343998580855245, "grad_norm": 0.9573111534118652, "learning_rate": 0.00019025151046789376, "loss": 4.873, "step": 45150 }, { "epoch": 0.06345403674671603, "grad_norm": 1.0147194862365723, "learning_rate": 0.00019029366306027817, "loss": 4.7867, "step": 45160 }, { "epoch": 0.0634680876848796, "grad_norm": 0.9639556407928467, "learning_rate": 0.00019033581565266263, "loss": 4.8389, "step": 45170 }, { "epoch": 0.0634821386230432, "grad_norm": 0.9909672141075134, "learning_rate": 0.00019037796824504706, "loss": 4.9519, "step": 45180 }, { "epoch": 0.06349618956120677, "grad_norm": 0.9403594732284546, "learning_rate": 0.0001904201208374315, "loss": 5.012, "step": 45190 }, { "epoch": 0.06351024049937035, "grad_norm": 1.0044161081314087, "learning_rate": 0.00019046227342981592, "loss": 4.8788, "step": 45200 }, { "epoch": 0.06352429143753392, "grad_norm": 0.9971259832382202, "learning_rate": 0.00019050442602220035, "loss": 4.9838, "step": 45210 }, { "epoch": 0.0635383423756975, "grad_norm": 0.9722863435745239, "learning_rate": 0.00019054657861458475, "loss": 4.9108, "step": 45220 }, { "epoch": 0.06355239331386107, "grad_norm": 0.967170774936676, "learning_rate": 0.0001905887312069692, "loss": 4.8078, "step": 45230 }, { "epoch": 0.06356644425202465, "grad_norm": 0.9585663676261902, "learning_rate": 0.00019063088379935364, "loss": 4.9564, "step": 45240 }, { "epoch": 0.06358049519018823, "grad_norm": 1.025834083557129, "learning_rate": 0.00019067303639173807, "loss": 4.8575, "step": 45250 }, { "epoch": 0.0635945461283518, "grad_norm": 0.9579908847808838, "learning_rate": 0.0001907151889841225, "loss": 4.8684, "step": 45260 }, { "epoch": 0.06360859706651538, "grad_norm": 1.0075056552886963, "learning_rate": 0.00019075734157650694, "loss": 4.9019, "step": 45270 }, { "epoch": 0.06362264800467896, "grad_norm": 0.9414964914321899, "learning_rate": 0.00019079949416889134, "loss": 4.8507, "step": 45280 }, { "epoch": 0.06363669894284255, "grad_norm": 0.947604775428772, "learning_rate": 0.0001908416467612758, "loss": 4.9015, "step": 45290 }, { "epoch": 0.06365074988100612, "grad_norm": 0.961848795413971, "learning_rate": 0.00019088379935366023, "loss": 4.9397, "step": 45300 }, { "epoch": 0.0636648008191697, "grad_norm": 0.9270375370979309, "learning_rate": 0.00019092595194604466, "loss": 4.8831, "step": 45310 }, { "epoch": 0.06367885175733327, "grad_norm": 0.9623255729675293, "learning_rate": 0.0001909681045384291, "loss": 4.9062, "step": 45320 }, { "epoch": 0.06369290269549685, "grad_norm": 0.9620530605316162, "learning_rate": 0.00019101025713081352, "loss": 4.9139, "step": 45330 }, { "epoch": 0.06370695363366043, "grad_norm": 0.9391373991966248, "learning_rate": 0.00019105240972319798, "loss": 4.9787, "step": 45340 }, { "epoch": 0.063721004571824, "grad_norm": 1.0350898504257202, "learning_rate": 0.0001910945623155824, "loss": 4.7686, "step": 45350 }, { "epoch": 0.06373505550998758, "grad_norm": 0.9722337126731873, "learning_rate": 0.00019113671490796682, "loss": 4.8961, "step": 45360 }, { "epoch": 0.06374910644815115, "grad_norm": 0.9319054484367371, "learning_rate": 0.00019117886750035125, "loss": 5.0057, "step": 45370 }, { "epoch": 0.06376315738631473, "grad_norm": 0.9770829081535339, "learning_rate": 0.00019122102009273568, "loss": 4.9013, "step": 45380 }, { "epoch": 0.06377720832447832, "grad_norm": 0.9620832204818726, "learning_rate": 0.00019126317268512014, "loss": 4.974, "step": 45390 }, { "epoch": 0.0637912592626419, "grad_norm": 1.0096343755722046, "learning_rate": 0.00019130532527750457, "loss": 4.8912, "step": 45400 }, { "epoch": 0.06380531020080547, "grad_norm": 0.962091326713562, "learning_rate": 0.00019134747786988897, "loss": 4.9313, "step": 45410 }, { "epoch": 0.06381936113896905, "grad_norm": 0.9614576697349548, "learning_rate": 0.0001913896304622734, "loss": 4.9008, "step": 45420 }, { "epoch": 0.06383341207713263, "grad_norm": 0.9405627846717834, "learning_rate": 0.00019143178305465784, "loss": 4.8902, "step": 45430 }, { "epoch": 0.0638474630152962, "grad_norm": 1.075329065322876, "learning_rate": 0.00019147393564704227, "loss": 4.7951, "step": 45440 }, { "epoch": 0.06386151395345978, "grad_norm": 0.9246218800544739, "learning_rate": 0.00019151608823942673, "loss": 4.8835, "step": 45450 }, { "epoch": 0.06387556489162335, "grad_norm": 0.9820383191108704, "learning_rate": 0.00019155824083181116, "loss": 4.9678, "step": 45460 }, { "epoch": 0.06388961582978693, "grad_norm": 0.9855993390083313, "learning_rate": 0.0001916003934241956, "loss": 4.9499, "step": 45470 }, { "epoch": 0.0639036667679505, "grad_norm": 0.9557892084121704, "learning_rate": 0.00019164254601658, "loss": 4.9888, "step": 45480 }, { "epoch": 0.0639177177061141, "grad_norm": 0.9590747356414795, "learning_rate": 0.00019168469860896442, "loss": 4.9599, "step": 45490 }, { "epoch": 0.06393176864427767, "grad_norm": 0.9951782822608948, "learning_rate": 0.00019172685120134885, "loss": 4.8487, "step": 45500 }, { "epoch": 0.06394581958244125, "grad_norm": 0.9474301338195801, "learning_rate": 0.0001917690037937333, "loss": 4.9257, "step": 45510 }, { "epoch": 0.06395987052060483, "grad_norm": 0.9949396252632141, "learning_rate": 0.00019181115638611774, "loss": 4.8466, "step": 45520 }, { "epoch": 0.0639739214587684, "grad_norm": 0.9865704774856567, "learning_rate": 0.00019185330897850217, "loss": 4.9049, "step": 45530 }, { "epoch": 0.06398797239693198, "grad_norm": 1.0222458839416504, "learning_rate": 0.00019189546157088658, "loss": 4.8955, "step": 45540 }, { "epoch": 0.06400202333509555, "grad_norm": 0.9480180144309998, "learning_rate": 0.000191937614163271, "loss": 4.9021, "step": 45550 }, { "epoch": 0.06401607427325913, "grad_norm": 0.9689090847969055, "learning_rate": 0.00019197976675565544, "loss": 5.0263, "step": 45560 }, { "epoch": 0.0640301252114227, "grad_norm": 0.9644342064857483, "learning_rate": 0.0001920219193480399, "loss": 4.9652, "step": 45570 }, { "epoch": 0.06404417614958628, "grad_norm": 0.9961534738540649, "learning_rate": 0.00019206407194042433, "loss": 4.8413, "step": 45580 }, { "epoch": 0.06405822708774986, "grad_norm": 0.9438686966896057, "learning_rate": 0.00019210622453280876, "loss": 4.9344, "step": 45590 }, { "epoch": 0.06407227802591345, "grad_norm": 0.9871533513069153, "learning_rate": 0.0001921483771251932, "loss": 4.9071, "step": 45600 }, { "epoch": 0.06408632896407702, "grad_norm": 0.9871090650558472, "learning_rate": 0.0001921905297175776, "loss": 4.8008, "step": 45610 }, { "epoch": 0.0641003799022406, "grad_norm": 1.0225098133087158, "learning_rate": 0.00019223268230996203, "loss": 5.0841, "step": 45620 }, { "epoch": 0.06411443084040418, "grad_norm": 0.9514864087104797, "learning_rate": 0.00019227483490234649, "loss": 4.8433, "step": 45630 }, { "epoch": 0.06412848177856775, "grad_norm": 0.9759644865989685, "learning_rate": 0.00019231698749473092, "loss": 4.9319, "step": 45640 }, { "epoch": 0.06414253271673133, "grad_norm": 0.9882025122642517, "learning_rate": 0.00019235914008711535, "loss": 4.9566, "step": 45650 }, { "epoch": 0.0641565836548949, "grad_norm": 0.9645729660987854, "learning_rate": 0.00019240129267949978, "loss": 4.963, "step": 45660 }, { "epoch": 0.06417063459305848, "grad_norm": 0.9985837936401367, "learning_rate": 0.00019244344527188418, "loss": 4.968, "step": 45670 }, { "epoch": 0.06418468553122206, "grad_norm": 0.9520775675773621, "learning_rate": 0.00019248559786426862, "loss": 4.9343, "step": 45680 }, { "epoch": 0.06419873646938563, "grad_norm": 0.947435736656189, "learning_rate": 0.00019252775045665307, "loss": 4.8316, "step": 45690 }, { "epoch": 0.06421278740754922, "grad_norm": 1.099055528640747, "learning_rate": 0.0001925699030490375, "loss": 4.7128, "step": 45700 }, { "epoch": 0.0642268383457128, "grad_norm": 0.9727609753608704, "learning_rate": 0.00019261205564142194, "loss": 4.8179, "step": 45710 }, { "epoch": 0.06424088928387638, "grad_norm": 0.9793810248374939, "learning_rate": 0.00019265420823380637, "loss": 4.9058, "step": 45720 }, { "epoch": 0.06425494022203995, "grad_norm": 0.9617207646369934, "learning_rate": 0.0001926963608261908, "loss": 4.8955, "step": 45730 }, { "epoch": 0.06426899116020353, "grad_norm": 0.9949877262115479, "learning_rate": 0.0001927385134185752, "loss": 4.9032, "step": 45740 }, { "epoch": 0.0642830420983671, "grad_norm": 0.9807795882225037, "learning_rate": 0.00019278066601095966, "loss": 4.9258, "step": 45750 }, { "epoch": 0.06429709303653068, "grad_norm": 0.9766243696212769, "learning_rate": 0.0001928228186033441, "loss": 4.8925, "step": 45760 }, { "epoch": 0.06431114397469426, "grad_norm": 0.971154510974884, "learning_rate": 0.00019286497119572852, "loss": 5.0002, "step": 45770 }, { "epoch": 0.06432519491285783, "grad_norm": 0.966464102268219, "learning_rate": 0.00019290712378811295, "loss": 4.8888, "step": 45780 }, { "epoch": 0.06433924585102141, "grad_norm": 0.9556533694267273, "learning_rate": 0.00019294927638049738, "loss": 4.8204, "step": 45790 }, { "epoch": 0.064353296789185, "grad_norm": 0.9517030715942383, "learning_rate": 0.0001929914289728818, "loss": 4.8391, "step": 45800 }, { "epoch": 0.06436734772734858, "grad_norm": 1.0709158182144165, "learning_rate": 0.00019303358156526625, "loss": 4.8775, "step": 45810 }, { "epoch": 0.06438139866551215, "grad_norm": 0.9663071036338806, "learning_rate": 0.00019307573415765068, "loss": 4.7677, "step": 45820 }, { "epoch": 0.06439544960367573, "grad_norm": 0.9344285726547241, "learning_rate": 0.0001931178867500351, "loss": 4.9828, "step": 45830 }, { "epoch": 0.0644095005418393, "grad_norm": 0.9900237917900085, "learning_rate": 0.00019316003934241954, "loss": 4.7965, "step": 45840 }, { "epoch": 0.06442355148000288, "grad_norm": 0.9743140935897827, "learning_rate": 0.00019320219193480397, "loss": 4.963, "step": 45850 }, { "epoch": 0.06443760241816646, "grad_norm": 0.9134804606437683, "learning_rate": 0.00019324434452718843, "loss": 4.8572, "step": 45860 }, { "epoch": 0.06445165335633003, "grad_norm": 0.9742643237113953, "learning_rate": 0.00019328649711957283, "loss": 4.8383, "step": 45870 }, { "epoch": 0.06446570429449361, "grad_norm": 0.9616056680679321, "learning_rate": 0.00019332864971195727, "loss": 4.7778, "step": 45880 }, { "epoch": 0.06447975523265718, "grad_norm": 0.9398157596588135, "learning_rate": 0.0001933708023043417, "loss": 4.9049, "step": 45890 }, { "epoch": 0.06449380617082076, "grad_norm": 1.0289732217788696, "learning_rate": 0.00019341295489672613, "loss": 4.9867, "step": 45900 }, { "epoch": 0.06450785710898435, "grad_norm": 0.9898787140846252, "learning_rate": 0.00019345510748911056, "loss": 4.9202, "step": 45910 }, { "epoch": 0.06452190804714793, "grad_norm": 0.9590668082237244, "learning_rate": 0.00019349726008149502, "loss": 4.8323, "step": 45920 }, { "epoch": 0.0645359589853115, "grad_norm": 0.9680622220039368, "learning_rate": 0.00019353941267387942, "loss": 4.9099, "step": 45930 }, { "epoch": 0.06455000992347508, "grad_norm": 0.9715376496315002, "learning_rate": 0.00019358156526626385, "loss": 4.8441, "step": 45940 }, { "epoch": 0.06456406086163866, "grad_norm": 1.0132158994674683, "learning_rate": 0.00019362371785864828, "loss": 4.9372, "step": 45950 }, { "epoch": 0.06457811179980223, "grad_norm": 1.0213428735733032, "learning_rate": 0.00019366587045103271, "loss": 4.9999, "step": 45960 }, { "epoch": 0.06459216273796581, "grad_norm": 0.9593140482902527, "learning_rate": 0.00019370802304341717, "loss": 4.9006, "step": 45970 }, { "epoch": 0.06460621367612938, "grad_norm": 0.9583016037940979, "learning_rate": 0.0001937501756358016, "loss": 4.9547, "step": 45980 }, { "epoch": 0.06462026461429296, "grad_norm": 1.043340802192688, "learning_rate": 0.000193792328228186, "loss": 4.8028, "step": 45990 }, { "epoch": 0.06463431555245654, "grad_norm": 0.9959349036216736, "learning_rate": 0.00019383448082057044, "loss": 4.8156, "step": 46000 }, { "epoch": 0.06464836649062013, "grad_norm": 0.9628585577011108, "learning_rate": 0.00019387663341295487, "loss": 4.9479, "step": 46010 }, { "epoch": 0.0646624174287837, "grad_norm": 0.9335030317306519, "learning_rate": 0.0001939187860053393, "loss": 4.8707, "step": 46020 }, { "epoch": 0.06467646836694728, "grad_norm": 0.9447938203811646, "learning_rate": 0.00019396093859772376, "loss": 4.8819, "step": 46030 }, { "epoch": 0.06469051930511086, "grad_norm": 0.9535121917724609, "learning_rate": 0.0001940030911901082, "loss": 4.9952, "step": 46040 }, { "epoch": 0.06470457024327443, "grad_norm": 0.9345034956932068, "learning_rate": 0.00019404524378249262, "loss": 4.8327, "step": 46050 }, { "epoch": 0.06471862118143801, "grad_norm": 0.9597700238227844, "learning_rate": 0.00019408739637487703, "loss": 4.9953, "step": 46060 }, { "epoch": 0.06473267211960158, "grad_norm": 0.9878246188163757, "learning_rate": 0.00019412954896726146, "loss": 4.9171, "step": 46070 }, { "epoch": 0.06474672305776516, "grad_norm": 0.9477070569992065, "learning_rate": 0.0001941717015596459, "loss": 4.8915, "step": 46080 }, { "epoch": 0.06476077399592874, "grad_norm": 1.2421875, "learning_rate": 0.00019421385415203035, "loss": 4.9158, "step": 46090 }, { "epoch": 0.06477482493409231, "grad_norm": 0.9720420837402344, "learning_rate": 0.00019425600674441478, "loss": 5.008, "step": 46100 }, { "epoch": 0.0647888758722559, "grad_norm": 0.9742789268493652, "learning_rate": 0.0001942981593367992, "loss": 4.8, "step": 46110 }, { "epoch": 0.06480292681041948, "grad_norm": 0.9673479795455933, "learning_rate": 0.0001943403119291836, "loss": 4.8455, "step": 46120 }, { "epoch": 0.06481697774858305, "grad_norm": 0.9082801938056946, "learning_rate": 0.00019438246452156804, "loss": 4.8671, "step": 46130 }, { "epoch": 0.06483102868674663, "grad_norm": 0.9455775022506714, "learning_rate": 0.00019442461711395248, "loss": 4.8722, "step": 46140 }, { "epoch": 0.0648450796249102, "grad_norm": 0.9338746666908264, "learning_rate": 0.00019446676970633693, "loss": 4.8412, "step": 46150 }, { "epoch": 0.06485913056307378, "grad_norm": 0.976826012134552, "learning_rate": 0.00019450892229872137, "loss": 4.7848, "step": 46160 }, { "epoch": 0.06487318150123736, "grad_norm": 0.9593846797943115, "learning_rate": 0.0001945510748911058, "loss": 4.8292, "step": 46170 }, { "epoch": 0.06488723243940094, "grad_norm": 0.9429964423179626, "learning_rate": 0.00019459322748349023, "loss": 4.8929, "step": 46180 }, { "epoch": 0.06490128337756451, "grad_norm": 0.9602091312408447, "learning_rate": 0.00019463538007587463, "loss": 4.8737, "step": 46190 }, { "epoch": 0.06491533431572809, "grad_norm": 0.9551581740379333, "learning_rate": 0.00019467753266825906, "loss": 4.8948, "step": 46200 }, { "epoch": 0.06492938525389168, "grad_norm": 0.9924933314323425, "learning_rate": 0.00019471968526064352, "loss": 4.9313, "step": 46210 }, { "epoch": 0.06494343619205525, "grad_norm": 0.9613593220710754, "learning_rate": 0.00019476183785302795, "loss": 4.8588, "step": 46220 }, { "epoch": 0.06495748713021883, "grad_norm": 0.9572134017944336, "learning_rate": 0.00019480399044541238, "loss": 4.8414, "step": 46230 }, { "epoch": 0.0649715380683824, "grad_norm": 0.9708147048950195, "learning_rate": 0.00019484614303779681, "loss": 4.8422, "step": 46240 }, { "epoch": 0.06498558900654598, "grad_norm": 0.9796114563941956, "learning_rate": 0.00019488829563018122, "loss": 4.843, "step": 46250 }, { "epoch": 0.06499963994470956, "grad_norm": 0.9643402099609375, "learning_rate": 0.00019493044822256565, "loss": 4.9446, "step": 46260 }, { "epoch": 0.06501369088287313, "grad_norm": 0.9505442976951599, "learning_rate": 0.0001949726008149501, "loss": 5.0311, "step": 46270 }, { "epoch": 0.06502774182103671, "grad_norm": 0.9346354007720947, "learning_rate": 0.00019501475340733454, "loss": 4.8329, "step": 46280 }, { "epoch": 0.06504179275920029, "grad_norm": 1.1094235181808472, "learning_rate": 0.00019505690599971897, "loss": 4.9727, "step": 46290 }, { "epoch": 0.06505584369736386, "grad_norm": 0.9481832385063171, "learning_rate": 0.0001950990585921034, "loss": 4.9542, "step": 46300 }, { "epoch": 0.06506989463552744, "grad_norm": 0.9947605729103088, "learning_rate": 0.00019514121118448783, "loss": 4.979, "step": 46310 }, { "epoch": 0.06508394557369103, "grad_norm": 0.9515612125396729, "learning_rate": 0.00019518336377687224, "loss": 4.9198, "step": 46320 }, { "epoch": 0.0650979965118546, "grad_norm": 0.9424987435340881, "learning_rate": 0.0001952255163692567, "loss": 4.8822, "step": 46330 }, { "epoch": 0.06511204745001818, "grad_norm": 1.0159428119659424, "learning_rate": 0.00019526766896164113, "loss": 4.7915, "step": 46340 }, { "epoch": 0.06512609838818176, "grad_norm": 0.993810772895813, "learning_rate": 0.00019530982155402556, "loss": 4.9791, "step": 46350 }, { "epoch": 0.06514014932634533, "grad_norm": 1.029266357421875, "learning_rate": 0.00019535197414641, "loss": 4.7561, "step": 46360 }, { "epoch": 0.06515420026450891, "grad_norm": 0.9550025463104248, "learning_rate": 0.00019539412673879442, "loss": 4.9486, "step": 46370 }, { "epoch": 0.06516825120267249, "grad_norm": 0.9962372183799744, "learning_rate": 0.00019543627933117882, "loss": 4.8646, "step": 46380 }, { "epoch": 0.06518230214083606, "grad_norm": 0.9664082527160645, "learning_rate": 0.00019547843192356328, "loss": 4.9335, "step": 46390 }, { "epoch": 0.06519635307899964, "grad_norm": 1.0118764638900757, "learning_rate": 0.0001955205845159477, "loss": 4.8674, "step": 46400 }, { "epoch": 0.06521040401716321, "grad_norm": 1.034801959991455, "learning_rate": 0.00019556273710833214, "loss": 4.8237, "step": 46410 }, { "epoch": 0.0652244549553268, "grad_norm": 0.9353872537612915, "learning_rate": 0.00019560488970071658, "loss": 4.9462, "step": 46420 }, { "epoch": 0.06523850589349038, "grad_norm": 1.0210577249526978, "learning_rate": 0.000195647042293101, "loss": 4.914, "step": 46430 }, { "epoch": 0.06525255683165396, "grad_norm": 0.9545332789421082, "learning_rate": 0.00019568919488548547, "loss": 4.9091, "step": 46440 }, { "epoch": 0.06526660776981753, "grad_norm": 0.9154708981513977, "learning_rate": 0.00019573134747786987, "loss": 4.88, "step": 46450 }, { "epoch": 0.06528065870798111, "grad_norm": 0.9885104894638062, "learning_rate": 0.0001957735000702543, "loss": 4.8147, "step": 46460 }, { "epoch": 0.06529470964614469, "grad_norm": 1.0183314085006714, "learning_rate": 0.00019581565266263873, "loss": 4.8397, "step": 46470 }, { "epoch": 0.06530876058430826, "grad_norm": 0.969292163848877, "learning_rate": 0.00019585780525502316, "loss": 4.82, "step": 46480 }, { "epoch": 0.06532281152247184, "grad_norm": 0.9941924810409546, "learning_rate": 0.0001958999578474076, "loss": 4.8408, "step": 46490 }, { "epoch": 0.06533686246063541, "grad_norm": 0.9590076804161072, "learning_rate": 0.00019594211043979205, "loss": 4.8889, "step": 46500 }, { "epoch": 0.06535091339879899, "grad_norm": 0.9533814787864685, "learning_rate": 0.00019598426303217646, "loss": 4.8636, "step": 46510 }, { "epoch": 0.06536496433696258, "grad_norm": 0.9973676800727844, "learning_rate": 0.0001960264156245609, "loss": 4.8238, "step": 46520 }, { "epoch": 0.06537901527512616, "grad_norm": 0.9768134951591492, "learning_rate": 0.00019606856821694532, "loss": 4.8206, "step": 46530 }, { "epoch": 0.06539306621328973, "grad_norm": 0.9430971145629883, "learning_rate": 0.00019611072080932975, "loss": 4.919, "step": 46540 }, { "epoch": 0.06540711715145331, "grad_norm": 0.9878526329994202, "learning_rate": 0.0001961528734017142, "loss": 4.9512, "step": 46550 }, { "epoch": 0.06542116808961689, "grad_norm": 0.9809000492095947, "learning_rate": 0.00019619502599409864, "loss": 4.8933, "step": 46560 }, { "epoch": 0.06543521902778046, "grad_norm": 0.957922637462616, "learning_rate": 0.00019623717858648307, "loss": 4.8238, "step": 46570 }, { "epoch": 0.06544926996594404, "grad_norm": 0.9714646935462952, "learning_rate": 0.00019627511591962905, "loss": 4.928, "step": 46580 }, { "epoch": 0.06546332090410761, "grad_norm": 0.9626190662384033, "learning_rate": 0.00019631726851201348, "loss": 4.7571, "step": 46590 }, { "epoch": 0.06547737184227119, "grad_norm": 0.9654691815376282, "learning_rate": 0.00019635942110439792, "loss": 4.9651, "step": 46600 }, { "epoch": 0.06549142278043477, "grad_norm": 0.9982614517211914, "learning_rate": 0.00019640157369678232, "loss": 5.0936, "step": 46610 }, { "epoch": 0.06550547371859834, "grad_norm": 0.9625030159950256, "learning_rate": 0.00019644372628916675, "loss": 4.9313, "step": 46620 }, { "epoch": 0.06551952465676193, "grad_norm": 0.9751782417297363, "learning_rate": 0.0001964858788815512, "loss": 4.9248, "step": 46630 }, { "epoch": 0.06553357559492551, "grad_norm": 0.9599964022636414, "learning_rate": 0.00019652803147393564, "loss": 4.9034, "step": 46640 }, { "epoch": 0.06554762653308908, "grad_norm": 0.9146353602409363, "learning_rate": 0.00019657018406632007, "loss": 4.9831, "step": 46650 }, { "epoch": 0.06556167747125266, "grad_norm": 0.9709069728851318, "learning_rate": 0.0001966123366587045, "loss": 4.8672, "step": 46660 }, { "epoch": 0.06557572840941624, "grad_norm": 0.9541670083999634, "learning_rate": 0.0001966544892510889, "loss": 4.9094, "step": 46670 }, { "epoch": 0.06558977934757981, "grad_norm": 0.9834936857223511, "learning_rate": 0.00019669664184347334, "loss": 4.7472, "step": 46680 }, { "epoch": 0.06560383028574339, "grad_norm": 0.919543445110321, "learning_rate": 0.0001967387944358578, "loss": 5.0209, "step": 46690 }, { "epoch": 0.06561788122390697, "grad_norm": 0.9145947694778442, "learning_rate": 0.00019678094702824223, "loss": 4.7879, "step": 46700 }, { "epoch": 0.06563193216207054, "grad_norm": 0.9651200771331787, "learning_rate": 0.00019682309962062666, "loss": 4.756, "step": 46710 }, { "epoch": 0.06564598310023412, "grad_norm": 0.9358447790145874, "learning_rate": 0.0001968652522130111, "loss": 4.9653, "step": 46720 }, { "epoch": 0.06566003403839771, "grad_norm": 0.9291307330131531, "learning_rate": 0.00019690740480539552, "loss": 4.8989, "step": 46730 }, { "epoch": 0.06567408497656128, "grad_norm": 0.9250601530075073, "learning_rate": 0.00019694955739777992, "loss": 4.7435, "step": 46740 }, { "epoch": 0.06568813591472486, "grad_norm": 0.9375638961791992, "learning_rate": 0.00019699170999016438, "loss": 4.8347, "step": 46750 }, { "epoch": 0.06570218685288844, "grad_norm": 0.9673593044281006, "learning_rate": 0.00019703386258254881, "loss": 4.8107, "step": 46760 }, { "epoch": 0.06571623779105201, "grad_norm": 0.9529427886009216, "learning_rate": 0.00019707601517493324, "loss": 4.8632, "step": 46770 }, { "epoch": 0.06573028872921559, "grad_norm": 0.9514247179031372, "learning_rate": 0.00019711816776731768, "loss": 4.9264, "step": 46780 }, { "epoch": 0.06574433966737916, "grad_norm": 1.0021986961364746, "learning_rate": 0.0001971603203597021, "loss": 4.8666, "step": 46790 }, { "epoch": 0.06575839060554274, "grad_norm": 1.545295000076294, "learning_rate": 0.0001972024729520865, "loss": 4.8099, "step": 46800 }, { "epoch": 0.06577244154370632, "grad_norm": 0.9640814065933228, "learning_rate": 0.00019724462554447097, "loss": 4.9203, "step": 46810 }, { "epoch": 0.0657864924818699, "grad_norm": 0.9138042330741882, "learning_rate": 0.0001972867781368554, "loss": 5.0355, "step": 46820 }, { "epoch": 0.06580054342003348, "grad_norm": 0.9734580516815186, "learning_rate": 0.00019732893072923983, "loss": 4.8711, "step": 46830 }, { "epoch": 0.06581459435819706, "grad_norm": 1.0923768281936646, "learning_rate": 0.00019737108332162426, "loss": 4.9167, "step": 46840 }, { "epoch": 0.06582864529636064, "grad_norm": 1.087605595588684, "learning_rate": 0.0001974132359140087, "loss": 4.9004, "step": 46850 }, { "epoch": 0.06584269623452421, "grad_norm": 0.9761277437210083, "learning_rate": 0.00019745538850639315, "loss": 4.9228, "step": 46860 }, { "epoch": 0.06585674717268779, "grad_norm": 1.0201919078826904, "learning_rate": 0.00019749754109877756, "loss": 4.9418, "step": 46870 }, { "epoch": 0.06587079811085136, "grad_norm": 0.9499254822731018, "learning_rate": 0.000197539693691162, "loss": 4.9016, "step": 46880 }, { "epoch": 0.06588484904901494, "grad_norm": 0.9977640509605408, "learning_rate": 0.00019758184628354642, "loss": 4.8839, "step": 46890 }, { "epoch": 0.06589889998717852, "grad_norm": 0.9540080428123474, "learning_rate": 0.00019762399887593085, "loss": 4.8135, "step": 46900 }, { "epoch": 0.06591295092534209, "grad_norm": 0.9515107274055481, "learning_rate": 0.00019766615146831528, "loss": 4.7421, "step": 46910 }, { "epoch": 0.06592700186350567, "grad_norm": 0.9759949445724487, "learning_rate": 0.00019770830406069974, "loss": 4.9691, "step": 46920 }, { "epoch": 0.06594105280166924, "grad_norm": 0.9368551969528198, "learning_rate": 0.00019775045665308414, "loss": 4.9767, "step": 46930 }, { "epoch": 0.06595510373983283, "grad_norm": 0.9469462037086487, "learning_rate": 0.00019779260924546857, "loss": 4.8049, "step": 46940 }, { "epoch": 0.06596915467799641, "grad_norm": 0.979142427444458, "learning_rate": 0.000197834761837853, "loss": 4.8257, "step": 46950 }, { "epoch": 0.06598320561615999, "grad_norm": 0.9656456112861633, "learning_rate": 0.00019787691443023744, "loss": 4.9793, "step": 46960 }, { "epoch": 0.06599725655432356, "grad_norm": 0.9597137570381165, "learning_rate": 0.00019791906702262187, "loss": 4.882, "step": 46970 }, { "epoch": 0.06601130749248714, "grad_norm": 0.9437071084976196, "learning_rate": 0.00019796121961500633, "loss": 4.8506, "step": 46980 }, { "epoch": 0.06602535843065072, "grad_norm": 0.9833372235298157, "learning_rate": 0.00019800337220739076, "loss": 4.8654, "step": 46990 }, { "epoch": 0.06603940936881429, "grad_norm": 0.9324950575828552, "learning_rate": 0.00019804552479977516, "loss": 4.8477, "step": 47000 }, { "epoch": 0.06605346030697787, "grad_norm": 0.9253446459770203, "learning_rate": 0.0001980876773921596, "loss": 4.831, "step": 47010 }, { "epoch": 0.06606751124514144, "grad_norm": 0.9337993264198303, "learning_rate": 0.00019812982998454402, "loss": 4.8415, "step": 47020 }, { "epoch": 0.06608156218330502, "grad_norm": 0.9470283389091492, "learning_rate": 0.00019817198257692846, "loss": 4.9581, "step": 47030 }, { "epoch": 0.06609561312146861, "grad_norm": 0.9741782546043396, "learning_rate": 0.00019821413516931291, "loss": 4.9539, "step": 47040 }, { "epoch": 0.06610966405963219, "grad_norm": 0.9608268141746521, "learning_rate": 0.00019825628776169734, "loss": 4.8291, "step": 47050 }, { "epoch": 0.06612371499779576, "grad_norm": 1.0971790552139282, "learning_rate": 0.00019829844035408175, "loss": 4.8117, "step": 47060 }, { "epoch": 0.06613776593595934, "grad_norm": 0.9461948871612549, "learning_rate": 0.00019834059294646618, "loss": 4.9276, "step": 47070 }, { "epoch": 0.06615181687412292, "grad_norm": 0.9624106287956238, "learning_rate": 0.0001983827455388506, "loss": 4.8393, "step": 47080 }, { "epoch": 0.06616586781228649, "grad_norm": 0.9574129581451416, "learning_rate": 0.00019842489813123504, "loss": 4.7859, "step": 47090 }, { "epoch": 0.06617991875045007, "grad_norm": 0.9906713962554932, "learning_rate": 0.0001984670507236195, "loss": 4.9673, "step": 47100 }, { "epoch": 0.06619396968861364, "grad_norm": 0.9565755128860474, "learning_rate": 0.00019850920331600393, "loss": 4.9318, "step": 47110 }, { "epoch": 0.06620802062677722, "grad_norm": 0.9409468770027161, "learning_rate": 0.00019855135590838834, "loss": 4.7968, "step": 47120 }, { "epoch": 0.0662220715649408, "grad_norm": 0.9226107001304626, "learning_rate": 0.00019859350850077277, "loss": 4.7978, "step": 47130 }, { "epoch": 0.06623612250310439, "grad_norm": 0.919445812702179, "learning_rate": 0.0001986356610931572, "loss": 4.9299, "step": 47140 }, { "epoch": 0.06625017344126796, "grad_norm": 0.9550936818122864, "learning_rate": 0.00019867781368554163, "loss": 4.8339, "step": 47150 }, { "epoch": 0.06626422437943154, "grad_norm": 0.9679076075553894, "learning_rate": 0.0001987199662779261, "loss": 4.912, "step": 47160 }, { "epoch": 0.06627827531759511, "grad_norm": 0.9844717979431152, "learning_rate": 0.00019876211887031052, "loss": 4.9034, "step": 47170 }, { "epoch": 0.06629232625575869, "grad_norm": 0.9533633589744568, "learning_rate": 0.00019880427146269495, "loss": 4.9254, "step": 47180 }, { "epoch": 0.06630637719392227, "grad_norm": 0.9623854160308838, "learning_rate": 0.00019884642405507935, "loss": 4.9148, "step": 47190 }, { "epoch": 0.06632042813208584, "grad_norm": 0.9844478964805603, "learning_rate": 0.00019888857664746379, "loss": 4.901, "step": 47200 }, { "epoch": 0.06633447907024942, "grad_norm": 0.9062860012054443, "learning_rate": 0.00019893072923984824, "loss": 4.8345, "step": 47210 }, { "epoch": 0.066348530008413, "grad_norm": 0.9490434527397156, "learning_rate": 0.00019897288183223267, "loss": 4.8597, "step": 47220 }, { "epoch": 0.06636258094657657, "grad_norm": 0.9500983357429504, "learning_rate": 0.0001990150344246171, "loss": 4.8869, "step": 47230 }, { "epoch": 0.06637663188474015, "grad_norm": 0.9309141039848328, "learning_rate": 0.00019905718701700154, "loss": 4.864, "step": 47240 }, { "epoch": 0.06639068282290374, "grad_norm": 1.0119329690933228, "learning_rate": 0.00019909933960938594, "loss": 4.8581, "step": 47250 }, { "epoch": 0.06640473376106731, "grad_norm": 0.9545915126800537, "learning_rate": 0.00019914149220177037, "loss": 5.0142, "step": 47260 }, { "epoch": 0.06641878469923089, "grad_norm": 0.948043167591095, "learning_rate": 0.00019918364479415483, "loss": 4.9292, "step": 47270 }, { "epoch": 0.06643283563739447, "grad_norm": 0.9219819903373718, "learning_rate": 0.00019922579738653926, "loss": 4.9284, "step": 47280 }, { "epoch": 0.06644688657555804, "grad_norm": 0.9398238658905029, "learning_rate": 0.0001992679499789237, "loss": 4.8902, "step": 47290 }, { "epoch": 0.06646093751372162, "grad_norm": 0.9383578896522522, "learning_rate": 0.00019931010257130812, "loss": 4.827, "step": 47300 }, { "epoch": 0.0664749884518852, "grad_norm": 0.9550478458404541, "learning_rate": 0.00019935225516369256, "loss": 5.0553, "step": 47310 }, { "epoch": 0.06648903939004877, "grad_norm": 0.9616899490356445, "learning_rate": 0.00019939440775607696, "loss": 4.8483, "step": 47320 }, { "epoch": 0.06650309032821235, "grad_norm": 0.9487091898918152, "learning_rate": 0.00019943656034846142, "loss": 4.9295, "step": 47330 }, { "epoch": 0.06651714126637592, "grad_norm": 0.939850926399231, "learning_rate": 0.00019947871294084585, "loss": 4.7803, "step": 47340 }, { "epoch": 0.06653119220453951, "grad_norm": 0.9599955081939697, "learning_rate": 0.00019952086553323028, "loss": 4.8776, "step": 47350 }, { "epoch": 0.06654524314270309, "grad_norm": 0.9502904415130615, "learning_rate": 0.0001995630181256147, "loss": 4.8173, "step": 47360 }, { "epoch": 0.06655929408086667, "grad_norm": 0.9396896958351135, "learning_rate": 0.00019960517071799914, "loss": 4.9296, "step": 47370 }, { "epoch": 0.06657334501903024, "grad_norm": 0.9196685552597046, "learning_rate": 0.00019964732331038355, "loss": 5.0329, "step": 47380 }, { "epoch": 0.06658739595719382, "grad_norm": 0.9280641078948975, "learning_rate": 0.000199689475902768, "loss": 4.8225, "step": 47390 }, { "epoch": 0.0666014468953574, "grad_norm": 0.9672068357467651, "learning_rate": 0.00019973162849515244, "loss": 4.8844, "step": 47400 }, { "epoch": 0.06661549783352097, "grad_norm": 0.9293968677520752, "learning_rate": 0.00019977378108753687, "loss": 4.828, "step": 47410 }, { "epoch": 0.06662954877168455, "grad_norm": 0.9214613437652588, "learning_rate": 0.0001998159336799213, "loss": 4.8509, "step": 47420 }, { "epoch": 0.06664359970984812, "grad_norm": 0.9712655544281006, "learning_rate": 0.00019985808627230573, "loss": 4.8579, "step": 47430 }, { "epoch": 0.0666576506480117, "grad_norm": 0.9182402491569519, "learning_rate": 0.0001999002388646902, "loss": 4.789, "step": 47440 }, { "epoch": 0.06667170158617529, "grad_norm": 0.9156343936920166, "learning_rate": 0.0001999423914570746, "loss": 4.8536, "step": 47450 }, { "epoch": 0.06668575252433886, "grad_norm": 0.9341609477996826, "learning_rate": 0.00019998454404945902, "loss": 4.989, "step": 47460 }, { "epoch": 0.06669980346250244, "grad_norm": 0.9106461405754089, "learning_rate": 0.00020002669664184345, "loss": 4.9627, "step": 47470 }, { "epoch": 0.06671385440066602, "grad_norm": 1.2454720735549927, "learning_rate": 0.00020006884923422788, "loss": 4.8649, "step": 47480 }, { "epoch": 0.0667279053388296, "grad_norm": 0.9427316784858704, "learning_rate": 0.00020011100182661232, "loss": 4.8691, "step": 47490 }, { "epoch": 0.06674195627699317, "grad_norm": 0.9439139366149902, "learning_rate": 0.00020015315441899677, "loss": 4.9893, "step": 47500 }, { "epoch": 0.06675600721515675, "grad_norm": 0.9804021120071411, "learning_rate": 0.00020019530701138118, "loss": 5.011, "step": 47510 }, { "epoch": 0.06677005815332032, "grad_norm": 0.9366146922111511, "learning_rate": 0.0002002374596037656, "loss": 4.9722, "step": 47520 }, { "epoch": 0.0667841090914839, "grad_norm": 0.9678098559379578, "learning_rate": 0.00020027961219615004, "loss": 4.91, "step": 47530 }, { "epoch": 0.06679816002964747, "grad_norm": 0.97898268699646, "learning_rate": 0.00020032176478853447, "loss": 4.8503, "step": 47540 }, { "epoch": 0.06681221096781105, "grad_norm": 0.9911873936653137, "learning_rate": 0.0002003639173809189, "loss": 4.8557, "step": 47550 }, { "epoch": 0.06682626190597464, "grad_norm": 0.9423503875732422, "learning_rate": 0.00020040606997330336, "loss": 4.783, "step": 47560 }, { "epoch": 0.06684031284413822, "grad_norm": 0.956548810005188, "learning_rate": 0.0002004482225656878, "loss": 5.0057, "step": 47570 }, { "epoch": 0.06685436378230179, "grad_norm": 0.9642254114151001, "learning_rate": 0.0002004903751580722, "loss": 4.9451, "step": 47580 }, { "epoch": 0.06686841472046537, "grad_norm": 0.9153860211372375, "learning_rate": 0.00020053252775045663, "loss": 4.8504, "step": 47590 }, { "epoch": 0.06688246565862895, "grad_norm": 0.9987194538116455, "learning_rate": 0.00020057468034284106, "loss": 4.7961, "step": 47600 }, { "epoch": 0.06689651659679252, "grad_norm": 0.8986924290657043, "learning_rate": 0.0002006168329352255, "loss": 4.8971, "step": 47610 }, { "epoch": 0.0669105675349561, "grad_norm": 0.9505689740180969, "learning_rate": 0.00020065898552760995, "loss": 4.9268, "step": 47620 }, { "epoch": 0.06692461847311967, "grad_norm": 0.9821462631225586, "learning_rate": 0.00020070113811999438, "loss": 4.8518, "step": 47630 }, { "epoch": 0.06693866941128325, "grad_norm": 0.9475671648979187, "learning_rate": 0.00020074329071237878, "loss": 4.9195, "step": 47640 }, { "epoch": 0.06695272034944683, "grad_norm": 0.9619015455245972, "learning_rate": 0.00020078544330476321, "loss": 4.8156, "step": 47650 }, { "epoch": 0.06696677128761042, "grad_norm": 0.9441516995429993, "learning_rate": 0.00020082759589714765, "loss": 4.9198, "step": 47660 }, { "epoch": 0.06698082222577399, "grad_norm": 0.923410177230835, "learning_rate": 0.00020086974848953208, "loss": 4.9305, "step": 47670 }, { "epoch": 0.06699487316393757, "grad_norm": 0.9332021474838257, "learning_rate": 0.00020091190108191654, "loss": 4.7413, "step": 47680 }, { "epoch": 0.06700892410210114, "grad_norm": 0.9328610897064209, "learning_rate": 0.00020095405367430097, "loss": 4.847, "step": 47690 }, { "epoch": 0.06702297504026472, "grad_norm": 0.9511117339134216, "learning_rate": 0.0002009962062666854, "loss": 4.8878, "step": 47700 }, { "epoch": 0.0670370259784283, "grad_norm": 0.9161847829818726, "learning_rate": 0.0002010383588590698, "loss": 4.9897, "step": 47710 }, { "epoch": 0.06705107691659187, "grad_norm": 0.9360916614532471, "learning_rate": 0.00020108051145145423, "loss": 4.9512, "step": 47720 }, { "epoch": 0.06706512785475545, "grad_norm": 1.0616661310195923, "learning_rate": 0.00020112266404383866, "loss": 4.9006, "step": 47730 }, { "epoch": 0.06707917879291903, "grad_norm": 0.9333809614181519, "learning_rate": 0.00020116481663622312, "loss": 4.7788, "step": 47740 }, { "epoch": 0.0670932297310826, "grad_norm": 0.9299507737159729, "learning_rate": 0.00020120696922860755, "loss": 4.9081, "step": 47750 }, { "epoch": 0.06710728066924619, "grad_norm": 0.961584746837616, "learning_rate": 0.00020124912182099198, "loss": 4.8686, "step": 47760 }, { "epoch": 0.06712133160740977, "grad_norm": 0.9510294198989868, "learning_rate": 0.0002012912744133764, "loss": 4.8925, "step": 47770 }, { "epoch": 0.06713538254557334, "grad_norm": 0.9562880396842957, "learning_rate": 0.00020133342700576082, "loss": 4.8545, "step": 47780 }, { "epoch": 0.06714943348373692, "grad_norm": 0.9377868175506592, "learning_rate": 0.00020137557959814528, "loss": 4.8412, "step": 47790 }, { "epoch": 0.0671634844219005, "grad_norm": 0.9249438643455505, "learning_rate": 0.0002014177321905297, "loss": 4.9385, "step": 47800 }, { "epoch": 0.06717753536006407, "grad_norm": 0.9281932711601257, "learning_rate": 0.00020145988478291414, "loss": 4.8762, "step": 47810 }, { "epoch": 0.06719158629822765, "grad_norm": 0.9970051050186157, "learning_rate": 0.00020150203737529857, "loss": 4.7625, "step": 47820 }, { "epoch": 0.06720563723639122, "grad_norm": 0.9395272731781006, "learning_rate": 0.00020154418996768298, "loss": 4.753, "step": 47830 }, { "epoch": 0.0672196881745548, "grad_norm": 1.0339237451553345, "learning_rate": 0.0002015863425600674, "loss": 4.8091, "step": 47840 }, { "epoch": 0.06723373911271838, "grad_norm": 0.9100208282470703, "learning_rate": 0.00020162849515245187, "loss": 5.0206, "step": 47850 }, { "epoch": 0.06724779005088195, "grad_norm": 0.9560271501541138, "learning_rate": 0.0002016706477448363, "loss": 4.9883, "step": 47860 }, { "epoch": 0.06726184098904554, "grad_norm": 0.9476458430290222, "learning_rate": 0.00020171280033722073, "loss": 4.7588, "step": 47870 }, { "epoch": 0.06727589192720912, "grad_norm": 0.9497449398040771, "learning_rate": 0.00020175495292960516, "loss": 4.8974, "step": 47880 }, { "epoch": 0.0672899428653727, "grad_norm": 0.9444364309310913, "learning_rate": 0.0002017971055219896, "loss": 4.884, "step": 47890 }, { "epoch": 0.06730399380353627, "grad_norm": 0.9414916634559631, "learning_rate": 0.000201839258114374, "loss": 4.9842, "step": 47900 }, { "epoch": 0.06731804474169985, "grad_norm": 0.9677934646606445, "learning_rate": 0.00020188141070675845, "loss": 4.8616, "step": 47910 }, { "epoch": 0.06733209567986342, "grad_norm": 0.9179354906082153, "learning_rate": 0.00020192356329914288, "loss": 4.9397, "step": 47920 }, { "epoch": 0.067346146618027, "grad_norm": 0.9381024241447449, "learning_rate": 0.00020196571589152731, "loss": 4.957, "step": 47930 }, { "epoch": 0.06736019755619058, "grad_norm": 0.9283067584037781, "learning_rate": 0.00020200786848391175, "loss": 4.6955, "step": 47940 }, { "epoch": 0.06737424849435415, "grad_norm": 0.9692529439926147, "learning_rate": 0.00020205002107629618, "loss": 4.8843, "step": 47950 }, { "epoch": 0.06738829943251773, "grad_norm": 0.9457141160964966, "learning_rate": 0.00020209217366868058, "loss": 4.955, "step": 47960 }, { "epoch": 0.06740235037068132, "grad_norm": 0.9691393375396729, "learning_rate": 0.00020213432626106504, "loss": 4.8771, "step": 47970 }, { "epoch": 0.0674164013088449, "grad_norm": 0.9532297253608704, "learning_rate": 0.00020217647885344947, "loss": 4.7826, "step": 47980 }, { "epoch": 0.06743045224700847, "grad_norm": 0.9230175614356995, "learning_rate": 0.0002022186314458339, "loss": 4.8278, "step": 47990 }, { "epoch": 0.06744450318517205, "grad_norm": 0.9743686318397522, "learning_rate": 0.00020226078403821833, "loss": 4.9041, "step": 48000 }, { "epoch": 0.06745855412333562, "grad_norm": 0.942179799079895, "learning_rate": 0.00020230293663060276, "loss": 4.8774, "step": 48010 }, { "epoch": 0.0674726050614992, "grad_norm": 0.9336681962013245, "learning_rate": 0.00020234508922298722, "loss": 4.9366, "step": 48020 }, { "epoch": 0.06748665599966278, "grad_norm": 1.0104656219482422, "learning_rate": 0.00020238724181537163, "loss": 4.9661, "step": 48030 }, { "epoch": 0.06750070693782635, "grad_norm": 0.9617270827293396, "learning_rate": 0.00020242939440775606, "loss": 4.8729, "step": 48040 }, { "epoch": 0.06751475787598993, "grad_norm": 0.8925577402114868, "learning_rate": 0.0002024715470001405, "loss": 4.9942, "step": 48050 }, { "epoch": 0.0675288088141535, "grad_norm": 0.9957881569862366, "learning_rate": 0.00020251369959252492, "loss": 4.8822, "step": 48060 }, { "epoch": 0.0675428597523171, "grad_norm": 0.8969820737838745, "learning_rate": 0.00020255585218490935, "loss": 4.8443, "step": 48070 }, { "epoch": 0.06755691069048067, "grad_norm": 0.8763751983642578, "learning_rate": 0.0002025980047772938, "loss": 4.8714, "step": 48080 }, { "epoch": 0.06757096162864425, "grad_norm": 0.9305700659751892, "learning_rate": 0.0002026401573696782, "loss": 4.8867, "step": 48090 }, { "epoch": 0.06758501256680782, "grad_norm": 0.9393258690834045, "learning_rate": 0.00020268230996206264, "loss": 4.9085, "step": 48100 }, { "epoch": 0.0675990635049714, "grad_norm": 0.9406274557113647, "learning_rate": 0.00020272446255444708, "loss": 4.9558, "step": 48110 }, { "epoch": 0.06761311444313498, "grad_norm": 0.9472900032997131, "learning_rate": 0.0002027666151468315, "loss": 4.8328, "step": 48120 }, { "epoch": 0.06762716538129855, "grad_norm": 0.9359706044197083, "learning_rate": 0.00020280876773921594, "loss": 4.9482, "step": 48130 }, { "epoch": 0.06764121631946213, "grad_norm": 0.9522212743759155, "learning_rate": 0.0002028509203316004, "loss": 4.8838, "step": 48140 }, { "epoch": 0.0676552672576257, "grad_norm": 0.9575254917144775, "learning_rate": 0.00020289307292398483, "loss": 4.9737, "step": 48150 }, { "epoch": 0.06766931819578928, "grad_norm": 0.9331732988357544, "learning_rate": 0.00020293522551636923, "loss": 4.7875, "step": 48160 }, { "epoch": 0.06768336913395286, "grad_norm": 0.9446621537208557, "learning_rate": 0.00020297737810875366, "loss": 4.8592, "step": 48170 }, { "epoch": 0.06769742007211645, "grad_norm": 0.9311910271644592, "learning_rate": 0.0002030195307011381, "loss": 4.8407, "step": 48180 }, { "epoch": 0.06771147101028002, "grad_norm": 0.9321035146713257, "learning_rate": 0.00020306168329352252, "loss": 4.9385, "step": 48190 }, { "epoch": 0.0677255219484436, "grad_norm": 0.9516061544418335, "learning_rate": 0.00020310383588590698, "loss": 4.9021, "step": 48200 }, { "epoch": 0.06773957288660717, "grad_norm": 0.9016589522361755, "learning_rate": 0.00020314598847829141, "loss": 5.0427, "step": 48210 }, { "epoch": 0.06775362382477075, "grad_norm": 0.9039486646652222, "learning_rate": 0.00020318814107067582, "loss": 4.9687, "step": 48220 }, { "epoch": 0.06776767476293433, "grad_norm": 0.9151666164398193, "learning_rate": 0.00020323029366306025, "loss": 4.9319, "step": 48230 }, { "epoch": 0.0677817257010979, "grad_norm": 0.9722932577133179, "learning_rate": 0.00020327244625544468, "loss": 4.9052, "step": 48240 }, { "epoch": 0.06779577663926148, "grad_norm": 0.9459034204483032, "learning_rate": 0.0002033145988478291, "loss": 4.7892, "step": 48250 }, { "epoch": 0.06780982757742506, "grad_norm": 0.9252526760101318, "learning_rate": 0.00020335675144021357, "loss": 4.8437, "step": 48260 }, { "epoch": 0.06782387851558863, "grad_norm": 0.9979690909385681, "learning_rate": 0.000203398904032598, "loss": 4.9336, "step": 48270 }, { "epoch": 0.06783792945375222, "grad_norm": 1.0930742025375366, "learning_rate": 0.00020344105662498243, "loss": 4.8809, "step": 48280 }, { "epoch": 0.0678519803919158, "grad_norm": 0.9363275766372681, "learning_rate": 0.00020348320921736684, "loss": 4.829, "step": 48290 }, { "epoch": 0.06786603133007937, "grad_norm": 0.9431924223899841, "learning_rate": 0.00020352536180975127, "loss": 4.8887, "step": 48300 }, { "epoch": 0.06788008226824295, "grad_norm": 0.949468731880188, "learning_rate": 0.0002035675144021357, "loss": 4.9473, "step": 48310 }, { "epoch": 0.06789413320640653, "grad_norm": 0.9422009587287903, "learning_rate": 0.00020360966699452016, "loss": 4.8516, "step": 48320 }, { "epoch": 0.0679081841445701, "grad_norm": 0.9336867928504944, "learning_rate": 0.0002036518195869046, "loss": 4.8706, "step": 48330 }, { "epoch": 0.06792223508273368, "grad_norm": 0.9072600603103638, "learning_rate": 0.00020369397217928902, "loss": 4.9884, "step": 48340 }, { "epoch": 0.06793628602089725, "grad_norm": 0.9766586422920227, "learning_rate": 0.00020373612477167342, "loss": 4.8165, "step": 48350 }, { "epoch": 0.06795033695906083, "grad_norm": 0.9324946403503418, "learning_rate": 0.00020377827736405785, "loss": 4.8802, "step": 48360 }, { "epoch": 0.06796438789722441, "grad_norm": 0.949245035648346, "learning_rate": 0.00020382042995644229, "loss": 4.8723, "step": 48370 }, { "epoch": 0.067978438835388, "grad_norm": 0.9230749011039734, "learning_rate": 0.00020386258254882674, "loss": 4.8103, "step": 48380 }, { "epoch": 0.06799248977355157, "grad_norm": 0.9191190004348755, "learning_rate": 0.00020390473514121118, "loss": 4.8711, "step": 48390 }, { "epoch": 0.06800654071171515, "grad_norm": 0.9459436535835266, "learning_rate": 0.0002039468877335956, "loss": 4.8038, "step": 48400 }, { "epoch": 0.06802059164987873, "grad_norm": 0.9071534872055054, "learning_rate": 0.00020398904032598004, "loss": 4.8562, "step": 48410 }, { "epoch": 0.0680346425880423, "grad_norm": 0.9326480627059937, "learning_rate": 0.00020403119291836444, "loss": 4.9268, "step": 48420 }, { "epoch": 0.06804869352620588, "grad_norm": 0.9467244744300842, "learning_rate": 0.0002040733455107489, "loss": 4.92, "step": 48430 }, { "epoch": 0.06806274446436945, "grad_norm": 0.9642537832260132, "learning_rate": 0.00020411549810313333, "loss": 4.7312, "step": 48440 }, { "epoch": 0.06807679540253303, "grad_norm": 0.9437421560287476, "learning_rate": 0.00020415765069551776, "loss": 4.8087, "step": 48450 }, { "epoch": 0.0680908463406966, "grad_norm": 0.9311556816101074, "learning_rate": 0.0002041998032879022, "loss": 4.8312, "step": 48460 }, { "epoch": 0.06810489727886018, "grad_norm": 1.035953164100647, "learning_rate": 0.00020424195588028662, "loss": 4.9495, "step": 48470 }, { "epoch": 0.06811894821702376, "grad_norm": 0.9514090418815613, "learning_rate": 0.00020428410847267103, "loss": 4.8289, "step": 48480 }, { "epoch": 0.06813299915518735, "grad_norm": 1.0147558450698853, "learning_rate": 0.0002043262610650555, "loss": 4.793, "step": 48490 }, { "epoch": 0.06814705009335092, "grad_norm": 0.947508692741394, "learning_rate": 0.00020436841365743992, "loss": 4.836, "step": 48500 }, { "epoch": 0.0681611010315145, "grad_norm": 0.9145636558532715, "learning_rate": 0.00020441056624982435, "loss": 4.7227, "step": 48510 }, { "epoch": 0.06817515196967808, "grad_norm": 0.9304710030555725, "learning_rate": 0.00020445271884220878, "loss": 4.9376, "step": 48520 }, { "epoch": 0.06818920290784165, "grad_norm": 0.9380136728286743, "learning_rate": 0.0002044948714345932, "loss": 4.9731, "step": 48530 }, { "epoch": 0.06820325384600523, "grad_norm": 0.9320715665817261, "learning_rate": 0.00020453702402697762, "loss": 4.7998, "step": 48540 }, { "epoch": 0.0682173047841688, "grad_norm": 0.939115047454834, "learning_rate": 0.00020457917661936207, "loss": 4.9202, "step": 48550 }, { "epoch": 0.06823135572233238, "grad_norm": 0.9177758693695068, "learning_rate": 0.0002046213292117465, "loss": 4.865, "step": 48560 }, { "epoch": 0.06824540666049596, "grad_norm": 0.9489643573760986, "learning_rate": 0.00020466348180413094, "loss": 4.8896, "step": 48570 }, { "epoch": 0.06825945759865953, "grad_norm": 0.9503336548805237, "learning_rate": 0.00020470563439651537, "loss": 4.7754, "step": 48580 }, { "epoch": 0.06827350853682312, "grad_norm": 0.920091986656189, "learning_rate": 0.0002047477869888998, "loss": 4.9028, "step": 48590 }, { "epoch": 0.0682875594749867, "grad_norm": 0.9541656970977783, "learning_rate": 0.00020478993958128426, "loss": 4.8475, "step": 48600 }, { "epoch": 0.06830161041315028, "grad_norm": 0.9243221879005432, "learning_rate": 0.00020483209217366866, "loss": 4.8828, "step": 48610 }, { "epoch": 0.06831566135131385, "grad_norm": 0.9231835603713989, "learning_rate": 0.0002048742447660531, "loss": 4.9098, "step": 48620 }, { "epoch": 0.06832971228947743, "grad_norm": 0.9024877548217773, "learning_rate": 0.00020491639735843752, "loss": 4.8797, "step": 48630 }, { "epoch": 0.068343763227641, "grad_norm": 0.9409641623497009, "learning_rate": 0.00020495854995082195, "loss": 4.864, "step": 48640 }, { "epoch": 0.06835781416580458, "grad_norm": 0.9287962317466736, "learning_rate": 0.00020500070254320639, "loss": 4.8829, "step": 48650 }, { "epoch": 0.06837186510396816, "grad_norm": 0.9256884455680847, "learning_rate": 0.00020504285513559084, "loss": 4.7937, "step": 48660 }, { "epoch": 0.06838591604213173, "grad_norm": 0.9251719117164612, "learning_rate": 0.00020508500772797525, "loss": 4.9763, "step": 48670 }, { "epoch": 0.06839996698029531, "grad_norm": 0.9523289203643799, "learning_rate": 0.00020512716032035968, "loss": 4.9023, "step": 48680 }, { "epoch": 0.0684140179184589, "grad_norm": 0.9629274606704712, "learning_rate": 0.0002051693129127441, "loss": 4.9088, "step": 48690 }, { "epoch": 0.06842806885662248, "grad_norm": 0.9680508971214294, "learning_rate": 0.00020521146550512854, "loss": 4.9068, "step": 48700 }, { "epoch": 0.06844211979478605, "grad_norm": 0.989648699760437, "learning_rate": 0.00020525361809751297, "loss": 4.7496, "step": 48710 }, { "epoch": 0.06845617073294963, "grad_norm": 0.9049251079559326, "learning_rate": 0.00020529577068989743, "loss": 4.9653, "step": 48720 }, { "epoch": 0.0684702216711132, "grad_norm": 0.9711537957191467, "learning_rate": 0.00020533792328228186, "loss": 4.9024, "step": 48730 }, { "epoch": 0.06848427260927678, "grad_norm": 0.987939178943634, "learning_rate": 0.00020538007587466627, "loss": 4.8989, "step": 48740 }, { "epoch": 0.06849832354744036, "grad_norm": 0.9508666396141052, "learning_rate": 0.0002054222284670507, "loss": 4.8888, "step": 48750 }, { "epoch": 0.06851237448560393, "grad_norm": 1.0984941720962524, "learning_rate": 0.00020546438105943513, "loss": 4.8828, "step": 48760 }, { "epoch": 0.06852642542376751, "grad_norm": 0.9066498875617981, "learning_rate": 0.00020550653365181956, "loss": 4.7879, "step": 48770 }, { "epoch": 0.06854047636193109, "grad_norm": 0.957650899887085, "learning_rate": 0.00020554868624420402, "loss": 4.9034, "step": 48780 }, { "epoch": 0.06855452730009466, "grad_norm": 0.9184542894363403, "learning_rate": 0.00020559083883658845, "loss": 4.9151, "step": 48790 }, { "epoch": 0.06856857823825825, "grad_norm": 0.9331673383712769, "learning_rate": 0.00020563299142897285, "loss": 4.764, "step": 48800 }, { "epoch": 0.06858262917642183, "grad_norm": 0.9273619055747986, "learning_rate": 0.00020567514402135728, "loss": 4.8304, "step": 48810 }, { "epoch": 0.0685966801145854, "grad_norm": 0.962047815322876, "learning_rate": 0.00020571729661374172, "loss": 4.7868, "step": 48820 }, { "epoch": 0.06861073105274898, "grad_norm": 0.9674386382102966, "learning_rate": 0.00020575944920612615, "loss": 4.8832, "step": 48830 }, { "epoch": 0.06862478199091256, "grad_norm": 0.925841212272644, "learning_rate": 0.0002058016017985106, "loss": 4.8394, "step": 48840 }, { "epoch": 0.06863883292907613, "grad_norm": 0.9452566504478455, "learning_rate": 0.00020584375439089504, "loss": 4.8387, "step": 48850 }, { "epoch": 0.06865288386723971, "grad_norm": 1.0907858610153198, "learning_rate": 0.00020588590698327947, "loss": 4.873, "step": 48860 }, { "epoch": 0.06866693480540328, "grad_norm": 0.9650761485099792, "learning_rate": 0.00020592805957566387, "loss": 4.9087, "step": 48870 }, { "epoch": 0.06868098574356686, "grad_norm": 0.8910475373268127, "learning_rate": 0.0002059702121680483, "loss": 4.8725, "step": 48880 }, { "epoch": 0.06869503668173044, "grad_norm": 0.9114874005317688, "learning_rate": 0.00020601236476043273, "loss": 4.9084, "step": 48890 }, { "epoch": 0.06870908761989403, "grad_norm": 0.9370610117912292, "learning_rate": 0.0002060545173528172, "loss": 4.8315, "step": 48900 }, { "epoch": 0.0687231385580576, "grad_norm": 0.9323891401290894, "learning_rate": 0.00020609666994520162, "loss": 4.7051, "step": 48910 }, { "epoch": 0.06873718949622118, "grad_norm": 1.0027904510498047, "learning_rate": 0.00020613882253758605, "loss": 4.87, "step": 48920 }, { "epoch": 0.06875124043438476, "grad_norm": 0.9343486428260803, "learning_rate": 0.00020618097512997046, "loss": 4.8427, "step": 48930 }, { "epoch": 0.06876529137254833, "grad_norm": 0.9870249032974243, "learning_rate": 0.0002062231277223549, "loss": 4.8214, "step": 48940 }, { "epoch": 0.06877934231071191, "grad_norm": 0.9833707213401794, "learning_rate": 0.00020626528031473932, "loss": 4.9394, "step": 48950 }, { "epoch": 0.06879339324887548, "grad_norm": 0.9330481290817261, "learning_rate": 0.00020630743290712378, "loss": 4.8596, "step": 48960 }, { "epoch": 0.06880744418703906, "grad_norm": 0.9530215859413147, "learning_rate": 0.0002063495854995082, "loss": 4.892, "step": 48970 }, { "epoch": 0.06882149512520264, "grad_norm": 0.9184706807136536, "learning_rate": 0.00020639173809189264, "loss": 4.9649, "step": 48980 }, { "epoch": 0.06883554606336621, "grad_norm": 0.9311974048614502, "learning_rate": 0.00020643389068427707, "loss": 4.9251, "step": 48990 }, { "epoch": 0.0688495970015298, "grad_norm": 0.9431549310684204, "learning_rate": 0.00020647604327666148, "loss": 4.8339, "step": 49000 }, { "epoch": 0.06886364793969338, "grad_norm": 1.0781751871109009, "learning_rate": 0.00020651819586904593, "loss": 4.915, "step": 49010 }, { "epoch": 0.06887769887785695, "grad_norm": 0.9329824447631836, "learning_rate": 0.00020656034846143037, "loss": 4.8891, "step": 49020 }, { "epoch": 0.06889174981602053, "grad_norm": 0.9006100296974182, "learning_rate": 0.0002066025010538148, "loss": 4.9299, "step": 49030 }, { "epoch": 0.06890580075418411, "grad_norm": 0.8924393057823181, "learning_rate": 0.00020664465364619923, "loss": 4.7221, "step": 49040 }, { "epoch": 0.06891985169234768, "grad_norm": 0.9394928216934204, "learning_rate": 0.00020668680623858366, "loss": 4.8564, "step": 49050 }, { "epoch": 0.06893390263051126, "grad_norm": 0.9694632291793823, "learning_rate": 0.00020672895883096806, "loss": 4.8552, "step": 49060 }, { "epoch": 0.06894795356867484, "grad_norm": 0.9108564853668213, "learning_rate": 0.00020677111142335252, "loss": 4.8766, "step": 49070 }, { "epoch": 0.06896200450683841, "grad_norm": 0.9487257599830627, "learning_rate": 0.00020681326401573695, "loss": 4.7678, "step": 49080 }, { "epoch": 0.06897605544500199, "grad_norm": 0.9499151706695557, "learning_rate": 0.00020685541660812138, "loss": 4.822, "step": 49090 }, { "epoch": 0.06899010638316556, "grad_norm": 1.0061317682266235, "learning_rate": 0.00020689756920050582, "loss": 4.8902, "step": 49100 }, { "epoch": 0.06900415732132915, "grad_norm": 0.9726933240890503, "learning_rate": 0.00020693972179289025, "loss": 4.8946, "step": 49110 }, { "epoch": 0.06901820825949273, "grad_norm": 0.8891914486885071, "learning_rate": 0.00020698187438527465, "loss": 4.9583, "step": 49120 }, { "epoch": 0.0690322591976563, "grad_norm": 0.9320970773696899, "learning_rate": 0.0002070240269776591, "loss": 4.825, "step": 49130 }, { "epoch": 0.06904631013581988, "grad_norm": 0.8775174021720886, "learning_rate": 0.00020706617957004354, "loss": 4.8636, "step": 49140 }, { "epoch": 0.06906036107398346, "grad_norm": 0.9681304097175598, "learning_rate": 0.00020710411690318955, "loss": 4.8397, "step": 49150 }, { "epoch": 0.06907441201214704, "grad_norm": 0.9166766405105591, "learning_rate": 0.00020714626949557395, "loss": 4.8822, "step": 49160 }, { "epoch": 0.06908846295031061, "grad_norm": 0.9451488852500916, "learning_rate": 0.00020718842208795838, "loss": 4.9481, "step": 49170 }, { "epoch": 0.06910251388847419, "grad_norm": 1.025514006614685, "learning_rate": 0.00020723057468034282, "loss": 4.7886, "step": 49180 }, { "epoch": 0.06911656482663776, "grad_norm": 1.0833834409713745, "learning_rate": 0.00020727272727272725, "loss": 4.8875, "step": 49190 }, { "epoch": 0.06913061576480134, "grad_norm": 0.9185973405838013, "learning_rate": 0.0002073148798651117, "loss": 4.9811, "step": 49200 }, { "epoch": 0.06914466670296493, "grad_norm": 0.9156882762908936, "learning_rate": 0.00020735703245749614, "loss": 4.8988, "step": 49210 }, { "epoch": 0.0691587176411285, "grad_norm": 0.9855953454971313, "learning_rate": 0.00020739918504988054, "loss": 4.7664, "step": 49220 }, { "epoch": 0.06917276857929208, "grad_norm": 0.9379180073738098, "learning_rate": 0.00020744133764226497, "loss": 4.7998, "step": 49230 }, { "epoch": 0.06918681951745566, "grad_norm": 0.901465892791748, "learning_rate": 0.0002074834902346494, "loss": 4.9366, "step": 49240 }, { "epoch": 0.06920087045561923, "grad_norm": 0.9280766844749451, "learning_rate": 0.00020752564282703383, "loss": 4.9037, "step": 49250 }, { "epoch": 0.06921492139378281, "grad_norm": 1.0117437839508057, "learning_rate": 0.0002075677954194183, "loss": 4.8711, "step": 49260 }, { "epoch": 0.06922897233194639, "grad_norm": 0.936126708984375, "learning_rate": 0.00020760994801180272, "loss": 4.8852, "step": 49270 }, { "epoch": 0.06924302327010996, "grad_norm": 0.907888650894165, "learning_rate": 0.00020765210060418715, "loss": 4.8348, "step": 49280 }, { "epoch": 0.06925707420827354, "grad_norm": 0.9258394241333008, "learning_rate": 0.00020769425319657156, "loss": 4.912, "step": 49290 }, { "epoch": 0.06927112514643712, "grad_norm": 0.9378911852836609, "learning_rate": 0.000207736405788956, "loss": 4.9453, "step": 49300 }, { "epoch": 0.0692851760846007, "grad_norm": 0.9445339441299438, "learning_rate": 0.00020777855838134042, "loss": 4.7425, "step": 49310 }, { "epoch": 0.06929922702276428, "grad_norm": 0.9408323168754578, "learning_rate": 0.00020782071097372488, "loss": 4.8162, "step": 49320 }, { "epoch": 0.06931327796092786, "grad_norm": 0.9732528328895569, "learning_rate": 0.0002078628635661093, "loss": 4.8715, "step": 49330 }, { "epoch": 0.06932732889909143, "grad_norm": 0.9590655565261841, "learning_rate": 0.00020790501615849374, "loss": 4.7933, "step": 49340 }, { "epoch": 0.06934137983725501, "grad_norm": 0.9126473069190979, "learning_rate": 0.00020794716875087815, "loss": 4.8097, "step": 49350 }, { "epoch": 0.06935543077541859, "grad_norm": 0.9477401375770569, "learning_rate": 0.00020798932134326258, "loss": 4.9911, "step": 49360 }, { "epoch": 0.06936948171358216, "grad_norm": 0.897299587726593, "learning_rate": 0.000208031473935647, "loss": 4.9375, "step": 49370 }, { "epoch": 0.06938353265174574, "grad_norm": 0.951884388923645, "learning_rate": 0.00020807362652803147, "loss": 4.78, "step": 49380 }, { "epoch": 0.06939758358990931, "grad_norm": 0.9042193293571472, "learning_rate": 0.0002081157791204159, "loss": 4.8414, "step": 49390 }, { "epoch": 0.06941163452807289, "grad_norm": 0.9469078183174133, "learning_rate": 0.00020815793171280033, "loss": 4.9658, "step": 49400 }, { "epoch": 0.06942568546623647, "grad_norm": 0.9061809778213501, "learning_rate": 0.00020820008430518476, "loss": 4.7509, "step": 49410 }, { "epoch": 0.06943973640440006, "grad_norm": 0.9353413581848145, "learning_rate": 0.00020824223689756916, "loss": 4.8421, "step": 49420 }, { "epoch": 0.06945378734256363, "grad_norm": 0.9837194085121155, "learning_rate": 0.0002082843894899536, "loss": 4.8281, "step": 49430 }, { "epoch": 0.06946783828072721, "grad_norm": 0.9294975996017456, "learning_rate": 0.00020832654208233805, "loss": 4.7543, "step": 49440 }, { "epoch": 0.06948188921889079, "grad_norm": 0.9024710059165955, "learning_rate": 0.00020836869467472248, "loss": 4.8502, "step": 49450 }, { "epoch": 0.06949594015705436, "grad_norm": 0.9258474707603455, "learning_rate": 0.00020841084726710692, "loss": 4.9781, "step": 49460 }, { "epoch": 0.06950999109521794, "grad_norm": 1.0224697589874268, "learning_rate": 0.00020845299985949135, "loss": 4.766, "step": 49470 }, { "epoch": 0.06952404203338151, "grad_norm": 0.9513781070709229, "learning_rate": 0.00020849515245187575, "loss": 4.879, "step": 49480 }, { "epoch": 0.06953809297154509, "grad_norm": 0.9261512160301208, "learning_rate": 0.00020853730504426018, "loss": 4.7467, "step": 49490 }, { "epoch": 0.06955214390970867, "grad_norm": 0.9599387049674988, "learning_rate": 0.00020857945763664464, "loss": 4.9011, "step": 49500 }, { "epoch": 0.06956619484787224, "grad_norm": 0.908785343170166, "learning_rate": 0.00020862161022902907, "loss": 4.8237, "step": 49510 }, { "epoch": 0.06958024578603583, "grad_norm": 0.8983234167098999, "learning_rate": 0.0002086637628214135, "loss": 4.9282, "step": 49520 }, { "epoch": 0.06959429672419941, "grad_norm": 0.8948048949241638, "learning_rate": 0.00020870591541379793, "loss": 4.9866, "step": 49530 }, { "epoch": 0.06960834766236298, "grad_norm": 0.8868510127067566, "learning_rate": 0.00020874806800618237, "loss": 4.8474, "step": 49540 }, { "epoch": 0.06962239860052656, "grad_norm": 1.2011126279830933, "learning_rate": 0.00020879022059856677, "loss": 4.9062, "step": 49550 }, { "epoch": 0.06963644953869014, "grad_norm": 0.9646673202514648, "learning_rate": 0.00020883237319095123, "loss": 4.7145, "step": 49560 }, { "epoch": 0.06965050047685371, "grad_norm": 0.9322654604911804, "learning_rate": 0.00020887452578333566, "loss": 4.9656, "step": 49570 }, { "epoch": 0.06966455141501729, "grad_norm": 0.9516728520393372, "learning_rate": 0.0002089166783757201, "loss": 4.8092, "step": 49580 }, { "epoch": 0.06967860235318087, "grad_norm": 0.9191582202911377, "learning_rate": 0.00020895883096810452, "loss": 4.8314, "step": 49590 }, { "epoch": 0.06969265329134444, "grad_norm": 0.9767720699310303, "learning_rate": 0.00020900098356048898, "loss": 4.9208, "step": 49600 }, { "epoch": 0.06970670422950802, "grad_norm": 0.9100189208984375, "learning_rate": 0.00020904313615287336, "loss": 4.8623, "step": 49610 }, { "epoch": 0.06972075516767161, "grad_norm": 0.9272692203521729, "learning_rate": 0.00020908528874525781, "loss": 4.7475, "step": 49620 }, { "epoch": 0.06973480610583518, "grad_norm": 0.9022219181060791, "learning_rate": 0.00020912744133764225, "loss": 4.8501, "step": 49630 }, { "epoch": 0.06974885704399876, "grad_norm": 0.9093071222305298, "learning_rate": 0.00020916959393002668, "loss": 4.9728, "step": 49640 }, { "epoch": 0.06976290798216234, "grad_norm": 0.9411072134971619, "learning_rate": 0.0002092117465224111, "loss": 4.7739, "step": 49650 }, { "epoch": 0.06977695892032591, "grad_norm": 0.9116350412368774, "learning_rate": 0.00020925389911479557, "loss": 4.8922, "step": 49660 }, { "epoch": 0.06979100985848949, "grad_norm": 0.9344385266304016, "learning_rate": 0.00020929605170717997, "loss": 4.8928, "step": 49670 }, { "epoch": 0.06980506079665307, "grad_norm": 0.9087563157081604, "learning_rate": 0.0002093382042995644, "loss": 4.818, "step": 49680 }, { "epoch": 0.06981911173481664, "grad_norm": 0.8912515044212341, "learning_rate": 0.00020938035689194883, "loss": 4.8745, "step": 49690 }, { "epoch": 0.06983316267298022, "grad_norm": 0.9308001399040222, "learning_rate": 0.00020942250948433326, "loss": 4.8263, "step": 49700 }, { "epoch": 0.0698472136111438, "grad_norm": 0.910260021686554, "learning_rate": 0.0002094646620767177, "loss": 4.753, "step": 49710 }, { "epoch": 0.06986126454930737, "grad_norm": 0.9524611830711365, "learning_rate": 0.00020950681466910215, "loss": 4.8124, "step": 49720 }, { "epoch": 0.06987531548747096, "grad_norm": 0.8876780271530151, "learning_rate": 0.00020954896726148658, "loss": 4.934, "step": 49730 }, { "epoch": 0.06988936642563454, "grad_norm": 0.9967767596244812, "learning_rate": 0.000209591119853871, "loss": 4.8565, "step": 49740 }, { "epoch": 0.06990341736379811, "grad_norm": 0.9247635006904602, "learning_rate": 0.000209629057187017, "loss": 4.8617, "step": 49750 }, { "epoch": 0.06991746830196169, "grad_norm": 0.886182427406311, "learning_rate": 0.00020967120977940143, "loss": 4.9107, "step": 49760 }, { "epoch": 0.06993151924012526, "grad_norm": 0.963778555393219, "learning_rate": 0.00020971336237178583, "loss": 4.8586, "step": 49770 }, { "epoch": 0.06994557017828884, "grad_norm": 0.9098008275032043, "learning_rate": 0.00020975551496417026, "loss": 4.8441, "step": 49780 }, { "epoch": 0.06995962111645242, "grad_norm": 0.9119469523429871, "learning_rate": 0.0002097976675565547, "loss": 4.8072, "step": 49790 }, { "epoch": 0.06997367205461599, "grad_norm": 0.920755922794342, "learning_rate": 0.00020983982014893915, "loss": 4.8332, "step": 49800 }, { "epoch": 0.06998772299277957, "grad_norm": 0.9371573328971863, "learning_rate": 0.00020988197274132359, "loss": 4.7674, "step": 49810 }, { "epoch": 0.07000177393094315, "grad_norm": 0.926888108253479, "learning_rate": 0.00020992412533370802, "loss": 4.7776, "step": 49820 }, { "epoch": 0.07001582486910674, "grad_norm": 0.9252434968948364, "learning_rate": 0.00020996627792609245, "loss": 4.8627, "step": 49830 }, { "epoch": 0.07002987580727031, "grad_norm": 0.8703329563140869, "learning_rate": 0.00021000843051847685, "loss": 5.0105, "step": 49840 }, { "epoch": 0.07004392674543389, "grad_norm": 0.9439218640327454, "learning_rate": 0.00021005058311086128, "loss": 4.7864, "step": 49850 }, { "epoch": 0.07005797768359746, "grad_norm": 0.9055263996124268, "learning_rate": 0.00021009273570324574, "loss": 4.8109, "step": 49860 }, { "epoch": 0.07007202862176104, "grad_norm": 0.9342459440231323, "learning_rate": 0.00021013488829563017, "loss": 4.8661, "step": 49870 }, { "epoch": 0.07008607955992462, "grad_norm": 0.910137414932251, "learning_rate": 0.0002101770408880146, "loss": 4.8083, "step": 49880 }, { "epoch": 0.07010013049808819, "grad_norm": 0.9078051447868347, "learning_rate": 0.00021021919348039903, "loss": 5.0483, "step": 49890 }, { "epoch": 0.07011418143625177, "grad_norm": 0.9174194931983948, "learning_rate": 0.00021026134607278344, "loss": 4.9562, "step": 49900 }, { "epoch": 0.07012823237441534, "grad_norm": 0.9276567101478577, "learning_rate": 0.00021030349866516787, "loss": 4.8575, "step": 49910 }, { "epoch": 0.07014228331257892, "grad_norm": 0.9092664122581482, "learning_rate": 0.00021034565125755233, "loss": 4.673, "step": 49920 }, { "epoch": 0.07015633425074251, "grad_norm": 0.9359709620475769, "learning_rate": 0.00021038780384993676, "loss": 4.8121, "step": 49930 }, { "epoch": 0.07017038518890609, "grad_norm": 0.9264384508132935, "learning_rate": 0.0002104299564423212, "loss": 4.825, "step": 49940 }, { "epoch": 0.07018443612706966, "grad_norm": 0.881436824798584, "learning_rate": 0.00021047210903470562, "loss": 4.7949, "step": 49950 }, { "epoch": 0.07019848706523324, "grad_norm": 0.9295757412910461, "learning_rate": 0.00021051426162709005, "loss": 4.8378, "step": 49960 }, { "epoch": 0.07021253800339682, "grad_norm": 0.9156924486160278, "learning_rate": 0.00021055641421947446, "loss": 4.9212, "step": 49970 }, { "epoch": 0.07022658894156039, "grad_norm": 0.9250329732894897, "learning_rate": 0.00021059856681185892, "loss": 4.8159, "step": 49980 }, { "epoch": 0.07024063987972397, "grad_norm": 0.88548743724823, "learning_rate": 0.00021064071940424335, "loss": 4.858, "step": 49990 }, { "epoch": 0.07025469081788754, "grad_norm": 0.9146063327789307, "learning_rate": 0.00021068287199662778, "loss": 4.8865, "step": 50000 }, { "epoch": 0.07026874175605112, "grad_norm": 0.9075859785079956, "learning_rate": 0.0002107250245890122, "loss": 4.8735, "step": 50010 }, { "epoch": 0.0702827926942147, "grad_norm": 0.9209066033363342, "learning_rate": 0.00021076717718139664, "loss": 4.8269, "step": 50020 }, { "epoch": 0.07029684363237827, "grad_norm": 0.9381097555160522, "learning_rate": 0.00021080932977378104, "loss": 4.9601, "step": 50030 }, { "epoch": 0.07031089457054186, "grad_norm": 0.8956311941146851, "learning_rate": 0.0002108514823661655, "loss": 4.9086, "step": 50040 }, { "epoch": 0.07032494550870544, "grad_norm": 0.9148889780044556, "learning_rate": 0.00021089363495854993, "loss": 4.7701, "step": 50050 }, { "epoch": 0.07033899644686901, "grad_norm": 0.9117963314056396, "learning_rate": 0.00021093578755093436, "loss": 4.9762, "step": 50060 }, { "epoch": 0.07035304738503259, "grad_norm": 0.9547762870788574, "learning_rate": 0.0002109779401433188, "loss": 4.8947, "step": 50070 }, { "epoch": 0.07036709832319617, "grad_norm": 0.9054858088493347, "learning_rate": 0.00021102009273570323, "loss": 4.8054, "step": 50080 }, { "epoch": 0.07038114926135974, "grad_norm": 0.8751288652420044, "learning_rate": 0.00021106224532808763, "loss": 4.8845, "step": 50090 }, { "epoch": 0.07039520019952332, "grad_norm": 0.9098647236824036, "learning_rate": 0.0002111043979204721, "loss": 4.7945, "step": 50100 }, { "epoch": 0.0704092511376869, "grad_norm": 0.9443636536598206, "learning_rate": 0.00021114655051285652, "loss": 4.8985, "step": 50110 }, { "epoch": 0.07042330207585047, "grad_norm": 0.9238690137863159, "learning_rate": 0.00021118870310524095, "loss": 4.78, "step": 50120 }, { "epoch": 0.07043735301401405, "grad_norm": 0.9531887173652649, "learning_rate": 0.00021123085569762538, "loss": 4.7975, "step": 50130 }, { "epoch": 0.07045140395217764, "grad_norm": 0.9312260150909424, "learning_rate": 0.00021127300829000981, "loss": 4.8473, "step": 50140 }, { "epoch": 0.07046545489034121, "grad_norm": 0.9800652861595154, "learning_rate": 0.00021131516088239427, "loss": 4.8799, "step": 50150 }, { "epoch": 0.07047950582850479, "grad_norm": 0.8918007016181946, "learning_rate": 0.00021135731347477868, "loss": 4.967, "step": 50160 }, { "epoch": 0.07049355676666837, "grad_norm": 0.9277655482292175, "learning_rate": 0.0002113994660671631, "loss": 4.7375, "step": 50170 }, { "epoch": 0.07050760770483194, "grad_norm": 0.9381228089332581, "learning_rate": 0.00021144161865954754, "loss": 4.8449, "step": 50180 }, { "epoch": 0.07052165864299552, "grad_norm": 0.9662410616874695, "learning_rate": 0.00021148377125193197, "loss": 4.8809, "step": 50190 }, { "epoch": 0.0705357095811591, "grad_norm": 0.9184268116950989, "learning_rate": 0.0002115259238443164, "loss": 4.9633, "step": 50200 }, { "epoch": 0.07054976051932267, "grad_norm": 0.9069191217422485, "learning_rate": 0.00021156807643670086, "loss": 4.8526, "step": 50210 }, { "epoch": 0.07056381145748625, "grad_norm": 0.908988356590271, "learning_rate": 0.00021161022902908526, "loss": 4.7715, "step": 50220 }, { "epoch": 0.07057786239564982, "grad_norm": 0.9809160828590393, "learning_rate": 0.0002116523816214697, "loss": 4.9594, "step": 50230 }, { "epoch": 0.07059191333381341, "grad_norm": 0.9043667912483215, "learning_rate": 0.00021169453421385413, "loss": 4.8035, "step": 50240 }, { "epoch": 0.07060596427197699, "grad_norm": 1.00430166721344, "learning_rate": 0.00021173668680623856, "loss": 4.8976, "step": 50250 }, { "epoch": 0.07062001521014057, "grad_norm": 0.9381590485572815, "learning_rate": 0.00021177883939862301, "loss": 4.8531, "step": 50260 }, { "epoch": 0.07063406614830414, "grad_norm": 0.8797604441642761, "learning_rate": 0.00021182099199100745, "loss": 4.8615, "step": 50270 }, { "epoch": 0.07064811708646772, "grad_norm": 0.8880553245544434, "learning_rate": 0.00021186314458339188, "loss": 4.8398, "step": 50280 }, { "epoch": 0.0706621680246313, "grad_norm": 0.9292892217636108, "learning_rate": 0.00021190529717577628, "loss": 4.9145, "step": 50290 }, { "epoch": 0.07067621896279487, "grad_norm": 0.9727832078933716, "learning_rate": 0.0002119474497681607, "loss": 4.7891, "step": 50300 }, { "epoch": 0.07069026990095845, "grad_norm": 0.9402398467063904, "learning_rate": 0.00021198960236054514, "loss": 4.8532, "step": 50310 }, { "epoch": 0.07070432083912202, "grad_norm": 0.9445501565933228, "learning_rate": 0.0002120317549529296, "loss": 4.8188, "step": 50320 }, { "epoch": 0.0707183717772856, "grad_norm": 0.9223546385765076, "learning_rate": 0.00021207390754531403, "loss": 4.9161, "step": 50330 }, { "epoch": 0.07073242271544918, "grad_norm": 0.9386568069458008, "learning_rate": 0.00021211606013769846, "loss": 4.9415, "step": 50340 }, { "epoch": 0.07074647365361277, "grad_norm": 0.9313529133796692, "learning_rate": 0.00021215821273008287, "loss": 4.8792, "step": 50350 }, { "epoch": 0.07076052459177634, "grad_norm": 0.9341327548027039, "learning_rate": 0.0002122003653224673, "loss": 4.8473, "step": 50360 }, { "epoch": 0.07077457552993992, "grad_norm": 0.8974583745002747, "learning_rate": 0.00021224251791485173, "loss": 4.919, "step": 50370 }, { "epoch": 0.0707886264681035, "grad_norm": 0.9477220177650452, "learning_rate": 0.0002122846705072362, "loss": 4.9454, "step": 50380 }, { "epoch": 0.07080267740626707, "grad_norm": 0.9393336176872253, "learning_rate": 0.00021232682309962062, "loss": 5.068, "step": 50390 }, { "epoch": 0.07081672834443065, "grad_norm": 0.9357321858406067, "learning_rate": 0.00021236897569200505, "loss": 4.794, "step": 50400 }, { "epoch": 0.07083077928259422, "grad_norm": 0.9102789163589478, "learning_rate": 0.00021241112828438948, "loss": 4.9233, "step": 50410 }, { "epoch": 0.0708448302207578, "grad_norm": 0.9070646166801453, "learning_rate": 0.00021245328087677389, "loss": 4.9097, "step": 50420 }, { "epoch": 0.07085888115892137, "grad_norm": 0.8896940350532532, "learning_rate": 0.00021249543346915832, "loss": 4.9443, "step": 50430 }, { "epoch": 0.07087293209708495, "grad_norm": 0.9510523080825806, "learning_rate": 0.00021253758606154278, "loss": 4.7866, "step": 50440 }, { "epoch": 0.07088698303524854, "grad_norm": 0.9242532253265381, "learning_rate": 0.0002125797386539272, "loss": 4.8039, "step": 50450 }, { "epoch": 0.07090103397341212, "grad_norm": 0.9695941209793091, "learning_rate": 0.00021262189124631164, "loss": 4.859, "step": 50460 }, { "epoch": 0.0709150849115757, "grad_norm": 0.8837727904319763, "learning_rate": 0.00021266404383869607, "loss": 4.9475, "step": 50470 }, { "epoch": 0.07092913584973927, "grad_norm": 0.9693347811698914, "learning_rate": 0.00021270619643108047, "loss": 4.8531, "step": 50480 }, { "epoch": 0.07094318678790285, "grad_norm": 0.9365562796592712, "learning_rate": 0.0002127483490234649, "loss": 4.8912, "step": 50490 }, { "epoch": 0.07095723772606642, "grad_norm": 0.8954295516014099, "learning_rate": 0.00021279050161584936, "loss": 4.8255, "step": 50500 }, { "epoch": 0.07097128866423, "grad_norm": 0.9566641449928284, "learning_rate": 0.0002128326542082338, "loss": 4.9466, "step": 50510 }, { "epoch": 0.07098533960239357, "grad_norm": 0.8944385647773743, "learning_rate": 0.00021287480680061823, "loss": 4.9177, "step": 50520 }, { "epoch": 0.07099939054055715, "grad_norm": 0.8918579816818237, "learning_rate": 0.00021291695939300266, "loss": 4.7941, "step": 50530 }, { "epoch": 0.07101344147872073, "grad_norm": 0.8937610387802124, "learning_rate": 0.0002129591119853871, "loss": 4.8411, "step": 50540 }, { "epoch": 0.07102749241688432, "grad_norm": 0.9060569405555725, "learning_rate": 0.0002130012645777715, "loss": 4.7955, "step": 50550 }, { "epoch": 0.07104154335504789, "grad_norm": 0.9308838248252869, "learning_rate": 0.00021304341717015595, "loss": 4.8368, "step": 50560 }, { "epoch": 0.07105559429321147, "grad_norm": 0.8938664793968201, "learning_rate": 0.00021308556976254038, "loss": 4.8399, "step": 50570 }, { "epoch": 0.07106964523137504, "grad_norm": 0.8650916814804077, "learning_rate": 0.0002131277223549248, "loss": 4.9222, "step": 50580 }, { "epoch": 0.07108369616953862, "grad_norm": 0.9728739857673645, "learning_rate": 0.00021316987494730924, "loss": 4.7345, "step": 50590 }, { "epoch": 0.0710977471077022, "grad_norm": 0.912001371383667, "learning_rate": 0.00021321202753969367, "loss": 4.88, "step": 50600 }, { "epoch": 0.07111179804586577, "grad_norm": 0.9506505131721497, "learning_rate": 0.00021325418013207808, "loss": 4.8297, "step": 50610 }, { "epoch": 0.07112584898402935, "grad_norm": 0.9795990586280823, "learning_rate": 0.00021329633272446254, "loss": 4.8647, "step": 50620 }, { "epoch": 0.07113989992219293, "grad_norm": 0.9694528579711914, "learning_rate": 0.00021333848531684697, "loss": 4.8235, "step": 50630 }, { "epoch": 0.0711539508603565, "grad_norm": 0.9510214924812317, "learning_rate": 0.0002133806379092314, "loss": 4.9937, "step": 50640 }, { "epoch": 0.07116800179852008, "grad_norm": 0.9015481472015381, "learning_rate": 0.00021342279050161583, "loss": 4.8967, "step": 50650 }, { "epoch": 0.07118205273668367, "grad_norm": 0.8986161947250366, "learning_rate": 0.00021346494309400026, "loss": 4.9995, "step": 50660 }, { "epoch": 0.07119610367484724, "grad_norm": 0.8803780674934387, "learning_rate": 0.00021350709568638472, "loss": 4.8779, "step": 50670 }, { "epoch": 0.07121015461301082, "grad_norm": 0.893484354019165, "learning_rate": 0.00021354924827876912, "loss": 5.0183, "step": 50680 }, { "epoch": 0.0712242055511744, "grad_norm": 0.9707047343254089, "learning_rate": 0.00021359140087115355, "loss": 4.8389, "step": 50690 }, { "epoch": 0.07123825648933797, "grad_norm": 0.8995065689086914, "learning_rate": 0.00021363355346353799, "loss": 4.7205, "step": 50700 }, { "epoch": 0.07125230742750155, "grad_norm": 1.0342682600021362, "learning_rate": 0.00021367570605592242, "loss": 4.7475, "step": 50710 }, { "epoch": 0.07126635836566513, "grad_norm": 0.9065954685211182, "learning_rate": 0.00021371785864830685, "loss": 4.8589, "step": 50720 }, { "epoch": 0.0712804093038287, "grad_norm": 0.9274217486381531, "learning_rate": 0.0002137600112406913, "loss": 4.9241, "step": 50730 }, { "epoch": 0.07129446024199228, "grad_norm": 0.9372475743293762, "learning_rate": 0.0002138021638330757, "loss": 4.7475, "step": 50740 }, { "epoch": 0.07130851118015585, "grad_norm": 0.8814763426780701, "learning_rate": 0.00021384431642546014, "loss": 4.9215, "step": 50750 }, { "epoch": 0.07132256211831944, "grad_norm": 0.9730595946311951, "learning_rate": 0.00021388646901784457, "loss": 4.9178, "step": 50760 }, { "epoch": 0.07133661305648302, "grad_norm": 0.9046145081520081, "learning_rate": 0.000213928621610229, "loss": 4.7846, "step": 50770 }, { "epoch": 0.0713506639946466, "grad_norm": 0.8996490240097046, "learning_rate": 0.00021397077420261344, "loss": 4.8804, "step": 50780 }, { "epoch": 0.07136471493281017, "grad_norm": 0.9104477763175964, "learning_rate": 0.0002140129267949979, "loss": 5.0551, "step": 50790 }, { "epoch": 0.07137876587097375, "grad_norm": 0.9244865775108337, "learning_rate": 0.0002140550793873823, "loss": 4.9711, "step": 50800 }, { "epoch": 0.07139281680913732, "grad_norm": 0.9118911623954773, "learning_rate": 0.00021409723197976673, "loss": 4.9435, "step": 50810 }, { "epoch": 0.0714068677473009, "grad_norm": 0.8913031816482544, "learning_rate": 0.00021413938457215116, "loss": 4.7714, "step": 50820 }, { "epoch": 0.07142091868546448, "grad_norm": 0.9230485558509827, "learning_rate": 0.0002141815371645356, "loss": 4.8122, "step": 50830 }, { "epoch": 0.07143496962362805, "grad_norm": 0.9266636967658997, "learning_rate": 0.00021422368975692005, "loss": 4.8106, "step": 50840 }, { "epoch": 0.07144902056179163, "grad_norm": 0.9038143157958984, "learning_rate": 0.00021426584234930448, "loss": 4.8459, "step": 50850 }, { "epoch": 0.07146307149995522, "grad_norm": 0.9343311190605164, "learning_rate": 0.0002143079949416889, "loss": 4.8616, "step": 50860 }, { "epoch": 0.0714771224381188, "grad_norm": 0.8671455979347229, "learning_rate": 0.00021435014753407332, "loss": 4.8417, "step": 50870 }, { "epoch": 0.07149117337628237, "grad_norm": 0.9161444306373596, "learning_rate": 0.00021439230012645775, "loss": 4.7486, "step": 50880 }, { "epoch": 0.07150522431444595, "grad_norm": 0.8902353644371033, "learning_rate": 0.00021443445271884218, "loss": 4.9153, "step": 50890 }, { "epoch": 0.07151927525260952, "grad_norm": 0.9097145199775696, "learning_rate": 0.00021447660531122664, "loss": 4.8412, "step": 50900 }, { "epoch": 0.0715333261907731, "grad_norm": 0.9682596921920776, "learning_rate": 0.00021451875790361107, "loss": 4.7094, "step": 50910 }, { "epoch": 0.07154737712893668, "grad_norm": 0.9312680959701538, "learning_rate": 0.0002145609104959955, "loss": 4.8971, "step": 50920 }, { "epoch": 0.07156142806710025, "grad_norm": 0.8974460959434509, "learning_rate": 0.0002146030630883799, "loss": 4.7942, "step": 50930 }, { "epoch": 0.07157547900526383, "grad_norm": 0.9163692593574524, "learning_rate": 0.00021464521568076433, "loss": 4.8219, "step": 50940 }, { "epoch": 0.0715895299434274, "grad_norm": 0.9357579946517944, "learning_rate": 0.00021468736827314877, "loss": 4.865, "step": 50950 }, { "epoch": 0.071603580881591, "grad_norm": 0.9122851490974426, "learning_rate": 0.00021472952086553322, "loss": 4.9114, "step": 50960 }, { "epoch": 0.07161763181975457, "grad_norm": 0.8831797242164612, "learning_rate": 0.00021477167345791765, "loss": 4.8798, "step": 50970 }, { "epoch": 0.07163168275791815, "grad_norm": 0.9927180409431458, "learning_rate": 0.00021481382605030209, "loss": 4.7515, "step": 50980 }, { "epoch": 0.07164573369608172, "grad_norm": 0.9619544148445129, "learning_rate": 0.00021485597864268652, "loss": 4.7979, "step": 50990 }, { "epoch": 0.0716597846342453, "grad_norm": 0.92621248960495, "learning_rate": 0.00021489813123507092, "loss": 4.8098, "step": 51000 }, { "epoch": 0.07167383557240888, "grad_norm": 0.9142638444900513, "learning_rate": 0.00021494028382745535, "loss": 4.8371, "step": 51010 }, { "epoch": 0.07168788651057245, "grad_norm": 0.9437655210494995, "learning_rate": 0.0002149824364198398, "loss": 4.8507, "step": 51020 }, { "epoch": 0.07170193744873603, "grad_norm": 0.9370262026786804, "learning_rate": 0.00021502458901222424, "loss": 4.8096, "step": 51030 }, { "epoch": 0.0717159883868996, "grad_norm": 0.8940436244010925, "learning_rate": 0.00021506674160460867, "loss": 4.7918, "step": 51040 }, { "epoch": 0.07173003932506318, "grad_norm": 0.9073984026908875, "learning_rate": 0.0002151088941969931, "loss": 4.8127, "step": 51050 }, { "epoch": 0.07174409026322676, "grad_norm": 0.9012202024459839, "learning_rate": 0.0002151510467893775, "loss": 4.7126, "step": 51060 }, { "epoch": 0.07175814120139035, "grad_norm": 0.8825278282165527, "learning_rate": 0.00021519319938176194, "loss": 4.8573, "step": 51070 }, { "epoch": 0.07177219213955392, "grad_norm": 0.9400650262832642, "learning_rate": 0.0002152353519741464, "loss": 4.8074, "step": 51080 }, { "epoch": 0.0717862430777175, "grad_norm": 0.8937569260597229, "learning_rate": 0.00021527750456653083, "loss": 4.7746, "step": 51090 }, { "epoch": 0.07180029401588107, "grad_norm": 0.9248559474945068, "learning_rate": 0.00021531965715891526, "loss": 4.889, "step": 51100 }, { "epoch": 0.07181434495404465, "grad_norm": 0.8893438577651978, "learning_rate": 0.0002153618097512997, "loss": 4.7216, "step": 51110 }, { "epoch": 0.07182839589220823, "grad_norm": 0.9111591577529907, "learning_rate": 0.00021540396234368412, "loss": 4.8645, "step": 51120 }, { "epoch": 0.0718424468303718, "grad_norm": 1.1774877309799194, "learning_rate": 0.00021544611493606853, "loss": 4.8673, "step": 51130 }, { "epoch": 0.07185649776853538, "grad_norm": 0.8930349946022034, "learning_rate": 0.00021548826752845298, "loss": 4.9113, "step": 51140 }, { "epoch": 0.07187054870669896, "grad_norm": 0.9083362221717834, "learning_rate": 0.00021553042012083742, "loss": 4.9166, "step": 51150 }, { "epoch": 0.07188459964486253, "grad_norm": 0.9521656632423401, "learning_rate": 0.00021557257271322185, "loss": 4.8467, "step": 51160 }, { "epoch": 0.07189865058302612, "grad_norm": 0.8885960578918457, "learning_rate": 0.00021561472530560628, "loss": 4.9143, "step": 51170 }, { "epoch": 0.0719127015211897, "grad_norm": 0.9528680443763733, "learning_rate": 0.0002156568778979907, "loss": 4.936, "step": 51180 }, { "epoch": 0.07192675245935327, "grad_norm": 0.9279723763465881, "learning_rate": 0.0002156990304903751, "loss": 4.9369, "step": 51190 }, { "epoch": 0.07194080339751685, "grad_norm": 0.8902644515037537, "learning_rate": 0.00021574118308275957, "loss": 4.9081, "step": 51200 }, { "epoch": 0.07195485433568043, "grad_norm": 0.8919149041175842, "learning_rate": 0.000215783335675144, "loss": 4.7189, "step": 51210 }, { "epoch": 0.071968905273844, "grad_norm": 0.8641528487205505, "learning_rate": 0.00021582548826752843, "loss": 4.9004, "step": 51220 }, { "epoch": 0.07198295621200758, "grad_norm": 0.9722802639007568, "learning_rate": 0.00021586764085991287, "loss": 4.8593, "step": 51230 }, { "epoch": 0.07199700715017116, "grad_norm": 0.9114046692848206, "learning_rate": 0.0002159097934522973, "loss": 4.8033, "step": 51240 }, { "epoch": 0.07201105808833473, "grad_norm": 0.9105363488197327, "learning_rate": 0.00021595194604468175, "loss": 4.929, "step": 51250 }, { "epoch": 0.07202510902649831, "grad_norm": 0.9299967288970947, "learning_rate": 0.00021599409863706616, "loss": 4.8742, "step": 51260 }, { "epoch": 0.0720391599646619, "grad_norm": 0.905307948589325, "learning_rate": 0.0002160362512294506, "loss": 4.9712, "step": 51270 }, { "epoch": 0.07205321090282547, "grad_norm": 0.8946942090988159, "learning_rate": 0.00021607840382183502, "loss": 4.8168, "step": 51280 }, { "epoch": 0.07206726184098905, "grad_norm": 0.8690630197525024, "learning_rate": 0.00021612055641421945, "loss": 4.9673, "step": 51290 }, { "epoch": 0.07208131277915263, "grad_norm": 0.9104512333869934, "learning_rate": 0.00021616270900660388, "loss": 4.8752, "step": 51300 }, { "epoch": 0.0720953637173162, "grad_norm": 0.9049569368362427, "learning_rate": 0.00021620486159898834, "loss": 4.8247, "step": 51310 }, { "epoch": 0.07210941465547978, "grad_norm": 0.9620559215545654, "learning_rate": 0.00021624701419137275, "loss": 4.8299, "step": 51320 }, { "epoch": 0.07212346559364335, "grad_norm": 0.925479531288147, "learning_rate": 0.00021628916678375718, "loss": 4.9163, "step": 51330 }, { "epoch": 0.07213751653180693, "grad_norm": 0.9238383769989014, "learning_rate": 0.0002163313193761416, "loss": 4.7718, "step": 51340 }, { "epoch": 0.0721515674699705, "grad_norm": 0.9379529356956482, "learning_rate": 0.00021637347196852604, "loss": 4.8578, "step": 51350 }, { "epoch": 0.07216561840813408, "grad_norm": 0.9757288694381714, "learning_rate": 0.00021641562456091047, "loss": 4.7786, "step": 51360 }, { "epoch": 0.07217966934629766, "grad_norm": 0.9008384943008423, "learning_rate": 0.00021645777715329493, "loss": 4.7713, "step": 51370 }, { "epoch": 0.07219372028446125, "grad_norm": 0.9475241899490356, "learning_rate": 0.00021649992974567936, "loss": 4.7283, "step": 51380 }, { "epoch": 0.07220777122262483, "grad_norm": 0.8943010568618774, "learning_rate": 0.00021654208233806376, "loss": 4.9106, "step": 51390 }, { "epoch": 0.0722218221607884, "grad_norm": 0.9169595241546631, "learning_rate": 0.0002165842349304482, "loss": 4.8022, "step": 51400 }, { "epoch": 0.07223587309895198, "grad_norm": 0.9076150059700012, "learning_rate": 0.00021662638752283263, "loss": 4.8884, "step": 51410 }, { "epoch": 0.07224992403711555, "grad_norm": 0.8879525661468506, "learning_rate": 0.00021666854011521708, "loss": 4.8106, "step": 51420 }, { "epoch": 0.07226397497527913, "grad_norm": 0.9336953163146973, "learning_rate": 0.00021671069270760152, "loss": 4.8502, "step": 51430 }, { "epoch": 0.0722780259134427, "grad_norm": 0.9571032524108887, "learning_rate": 0.00021675284529998595, "loss": 4.8924, "step": 51440 }, { "epoch": 0.07229207685160628, "grad_norm": 0.8778558969497681, "learning_rate": 0.00021679499789237035, "loss": 4.7895, "step": 51450 }, { "epoch": 0.07230612778976986, "grad_norm": 0.9267377853393555, "learning_rate": 0.00021683715048475478, "loss": 4.8497, "step": 51460 }, { "epoch": 0.07232017872793343, "grad_norm": 0.8599752187728882, "learning_rate": 0.0002168793030771392, "loss": 4.7963, "step": 51470 }, { "epoch": 0.07233422966609702, "grad_norm": 0.9141520261764526, "learning_rate": 0.00021692145566952367, "loss": 4.889, "step": 51480 }, { "epoch": 0.0723482806042606, "grad_norm": 0.9507299065589905, "learning_rate": 0.0002169636082619081, "loss": 4.9028, "step": 51490 }, { "epoch": 0.07236233154242418, "grad_norm": 0.9014940857887268, "learning_rate": 0.00021700576085429253, "loss": 4.8435, "step": 51500 }, { "epoch": 0.07237638248058775, "grad_norm": 0.9214157462120056, "learning_rate": 0.00021704791344667694, "loss": 4.8943, "step": 51510 }, { "epoch": 0.07239043341875133, "grad_norm": 0.8904531598091125, "learning_rate": 0.00021709006603906137, "loss": 4.788, "step": 51520 }, { "epoch": 0.0724044843569149, "grad_norm": 0.8769239187240601, "learning_rate": 0.0002171322186314458, "loss": 4.8987, "step": 51530 }, { "epoch": 0.07241853529507848, "grad_norm": 0.8964927196502686, "learning_rate": 0.00021717437122383026, "loss": 4.7834, "step": 51540 }, { "epoch": 0.07243258623324206, "grad_norm": 0.9342873096466064, "learning_rate": 0.0002172165238162147, "loss": 4.8952, "step": 51550 }, { "epoch": 0.07244663717140563, "grad_norm": 0.9120386838912964, "learning_rate": 0.00021725867640859912, "loss": 4.927, "step": 51560 }, { "epoch": 0.07246068810956921, "grad_norm": 0.8827990889549255, "learning_rate": 0.00021730082900098355, "loss": 4.9028, "step": 51570 }, { "epoch": 0.0724747390477328, "grad_norm": 0.8882848620414734, "learning_rate": 0.00021734298159336796, "loss": 4.8229, "step": 51580 }, { "epoch": 0.07248878998589638, "grad_norm": 0.9415622353553772, "learning_rate": 0.0002173851341857524, "loss": 4.851, "step": 51590 }, { "epoch": 0.07250284092405995, "grad_norm": 0.936032772064209, "learning_rate": 0.00021742728677813685, "loss": 4.7324, "step": 51600 }, { "epoch": 0.07251689186222353, "grad_norm": 0.9080641865730286, "learning_rate": 0.00021746943937052128, "loss": 4.8478, "step": 51610 }, { "epoch": 0.0725309428003871, "grad_norm": 0.8853969573974609, "learning_rate": 0.0002175115919629057, "loss": 4.8263, "step": 51620 }, { "epoch": 0.07254499373855068, "grad_norm": 0.9154071807861328, "learning_rate": 0.00021755374455529014, "loss": 4.9162, "step": 51630 }, { "epoch": 0.07255904467671426, "grad_norm": 0.894340991973877, "learning_rate": 0.00021759589714767454, "loss": 4.8624, "step": 51640 }, { "epoch": 0.07257309561487783, "grad_norm": 0.9048279523849487, "learning_rate": 0.00021763804974005897, "loss": 4.9428, "step": 51650 }, { "epoch": 0.07258714655304141, "grad_norm": 0.8889136910438538, "learning_rate": 0.00021768020233244343, "loss": 4.8428, "step": 51660 }, { "epoch": 0.07260119749120499, "grad_norm": 1.0818347930908203, "learning_rate": 0.00021772235492482786, "loss": 4.9329, "step": 51670 }, { "epoch": 0.07261524842936856, "grad_norm": 0.9882572889328003, "learning_rate": 0.0002177645075172123, "loss": 4.8468, "step": 51680 }, { "epoch": 0.07262929936753215, "grad_norm": 0.8729943633079529, "learning_rate": 0.00021780666010959673, "loss": 5.0247, "step": 51690 }, { "epoch": 0.07264335030569573, "grad_norm": 0.8866937160491943, "learning_rate": 0.00021784881270198116, "loss": 4.7614, "step": 51700 }, { "epoch": 0.0726574012438593, "grad_norm": 0.9050795435905457, "learning_rate": 0.00021789096529436556, "loss": 4.9629, "step": 51710 }, { "epoch": 0.07267145218202288, "grad_norm": 0.9173886775970459, "learning_rate": 0.00021793311788675002, "loss": 4.7605, "step": 51720 }, { "epoch": 0.07268550312018646, "grad_norm": 0.8916087746620178, "learning_rate": 0.00021797527047913445, "loss": 4.8462, "step": 51730 }, { "epoch": 0.07269955405835003, "grad_norm": 0.8887311220169067, "learning_rate": 0.00021801742307151888, "loss": 4.9066, "step": 51740 }, { "epoch": 0.07271360499651361, "grad_norm": 0.871232271194458, "learning_rate": 0.0002180595756639033, "loss": 4.9546, "step": 51750 }, { "epoch": 0.07272765593467719, "grad_norm": 0.8541275858879089, "learning_rate": 0.00021810172825628774, "loss": 4.8691, "step": 51760 }, { "epoch": 0.07274170687284076, "grad_norm": 0.8941138982772827, "learning_rate": 0.00021814388084867215, "loss": 4.8881, "step": 51770 }, { "epoch": 0.07275575781100434, "grad_norm": 0.9860665202140808, "learning_rate": 0.0002181860334410566, "loss": 4.7749, "step": 51780 }, { "epoch": 0.07276980874916793, "grad_norm": 0.8882085680961609, "learning_rate": 0.00021822818603344104, "loss": 4.8333, "step": 51790 }, { "epoch": 0.0727838596873315, "grad_norm": 0.9299302697181702, "learning_rate": 0.00021827033862582547, "loss": 4.8661, "step": 51800 }, { "epoch": 0.07279791062549508, "grad_norm": 0.8716930747032166, "learning_rate": 0.0002183124912182099, "loss": 4.8118, "step": 51810 }, { "epoch": 0.07281196156365866, "grad_norm": 0.9178931713104248, "learning_rate": 0.00021835464381059433, "loss": 4.867, "step": 51820 }, { "epoch": 0.07282601250182223, "grad_norm": 0.9466065764427185, "learning_rate": 0.0002183967964029788, "loss": 4.8518, "step": 51830 }, { "epoch": 0.07284006343998581, "grad_norm": 0.9389024972915649, "learning_rate": 0.0002184389489953632, "loss": 4.7342, "step": 51840 }, { "epoch": 0.07285411437814938, "grad_norm": 0.9677282571792603, "learning_rate": 0.00021848110158774762, "loss": 4.8186, "step": 51850 }, { "epoch": 0.07286816531631296, "grad_norm": 0.9958227872848511, "learning_rate": 0.00021852325418013206, "loss": 4.9577, "step": 51860 }, { "epoch": 0.07288221625447654, "grad_norm": 0.8936029076576233, "learning_rate": 0.0002185654067725165, "loss": 4.9129, "step": 51870 }, { "epoch": 0.07289626719264011, "grad_norm": 0.9088529944419861, "learning_rate": 0.00021860755936490092, "loss": 4.8115, "step": 51880 }, { "epoch": 0.0729103181308037, "grad_norm": 0.9063525795936584, "learning_rate": 0.00021864971195728538, "loss": 4.9166, "step": 51890 }, { "epoch": 0.07292436906896728, "grad_norm": 0.9061357378959656, "learning_rate": 0.00021869186454966978, "loss": 4.8306, "step": 51900 }, { "epoch": 0.07293842000713086, "grad_norm": 0.9639674425125122, "learning_rate": 0.0002187340171420542, "loss": 4.8523, "step": 51910 }, { "epoch": 0.07295247094529443, "grad_norm": 0.9000791311264038, "learning_rate": 0.00021877616973443864, "loss": 4.7562, "step": 51920 }, { "epoch": 0.07296652188345801, "grad_norm": 0.9156561493873596, "learning_rate": 0.00021881832232682307, "loss": 5.0079, "step": 51930 }, { "epoch": 0.07298057282162158, "grad_norm": 0.9364044666290283, "learning_rate": 0.0002188604749192075, "loss": 4.9567, "step": 51940 }, { "epoch": 0.07299462375978516, "grad_norm": 0.8726660013198853, "learning_rate": 0.00021890262751159196, "loss": 4.9127, "step": 51950 }, { "epoch": 0.07300867469794874, "grad_norm": 0.9417910575866699, "learning_rate": 0.0002189447801039764, "loss": 4.8765, "step": 51960 }, { "epoch": 0.07302272563611231, "grad_norm": 0.8728827238082886, "learning_rate": 0.0002189869326963608, "loss": 4.7915, "step": 51970 }, { "epoch": 0.07303677657427589, "grad_norm": 0.9182146787643433, "learning_rate": 0.00021902908528874523, "loss": 4.9794, "step": 51980 }, { "epoch": 0.07305082751243946, "grad_norm": 0.8773520588874817, "learning_rate": 0.00021907123788112966, "loss": 4.863, "step": 51990 }, { "epoch": 0.07306487845060305, "grad_norm": 0.9333748817443848, "learning_rate": 0.0002191133904735141, "loss": 4.7767, "step": 52000 }, { "epoch": 0.07307892938876663, "grad_norm": 0.9405140280723572, "learning_rate": 0.00021915554306589855, "loss": 4.8579, "step": 52010 }, { "epoch": 0.07309298032693021, "grad_norm": 0.8939602971076965, "learning_rate": 0.00021919769565828298, "loss": 4.9197, "step": 52020 }, { "epoch": 0.07310703126509378, "grad_norm": 1.0516551733016968, "learning_rate": 0.00021923984825066739, "loss": 4.7807, "step": 52030 }, { "epoch": 0.07312108220325736, "grad_norm": 0.9042669534683228, "learning_rate": 0.00021928200084305182, "loss": 4.8805, "step": 52040 }, { "epoch": 0.07313513314142094, "grad_norm": 0.8705670833587646, "learning_rate": 0.00021932415343543625, "loss": 4.8592, "step": 52050 }, { "epoch": 0.07314918407958451, "grad_norm": 0.9007607698440552, "learning_rate": 0.0002193663060278207, "loss": 4.9571, "step": 52060 }, { "epoch": 0.07316323501774809, "grad_norm": 0.9101800322532654, "learning_rate": 0.00021940845862020514, "loss": 4.8341, "step": 52070 }, { "epoch": 0.07317728595591166, "grad_norm": 0.8472366333007812, "learning_rate": 0.00021945061121258957, "loss": 5.002, "step": 52080 }, { "epoch": 0.07319133689407524, "grad_norm": 0.8725908994674683, "learning_rate": 0.00021949276380497397, "loss": 4.8479, "step": 52090 }, { "epoch": 0.07320538783223883, "grad_norm": 0.8859214782714844, "learning_rate": 0.0002195349163973584, "loss": 4.8376, "step": 52100 }, { "epoch": 0.0732194387704024, "grad_norm": 0.9071083068847656, "learning_rate": 0.00021957706898974283, "loss": 4.7523, "step": 52110 }, { "epoch": 0.07323348970856598, "grad_norm": 0.8878831267356873, "learning_rate": 0.0002196192215821273, "loss": 4.937, "step": 52120 }, { "epoch": 0.07324754064672956, "grad_norm": 0.924068033695221, "learning_rate": 0.00021966137417451172, "loss": 4.6908, "step": 52130 }, { "epoch": 0.07326159158489313, "grad_norm": 0.9516233205795288, "learning_rate": 0.00021970352676689616, "loss": 4.8146, "step": 52140 }, { "epoch": 0.07327564252305671, "grad_norm": 0.8874931931495667, "learning_rate": 0.0002197456793592806, "loss": 4.799, "step": 52150 }, { "epoch": 0.07328969346122029, "grad_norm": 0.8723164200782776, "learning_rate": 0.000219787831951665, "loss": 4.6445, "step": 52160 }, { "epoch": 0.07330374439938386, "grad_norm": 0.8742164373397827, "learning_rate": 0.00021982998454404942, "loss": 5.0286, "step": 52170 }, { "epoch": 0.07331779533754744, "grad_norm": 1.0268549919128418, "learning_rate": 0.00021987213713643388, "loss": 4.7709, "step": 52180 }, { "epoch": 0.07333184627571102, "grad_norm": 0.8598684668540955, "learning_rate": 0.0002199142897288183, "loss": 4.9128, "step": 52190 }, { "epoch": 0.0733458972138746, "grad_norm": 0.9072014689445496, "learning_rate": 0.00021995644232120274, "loss": 4.8774, "step": 52200 }, { "epoch": 0.07335994815203818, "grad_norm": 0.9579162001609802, "learning_rate": 0.00021999859491358717, "loss": 4.8299, "step": 52210 }, { "epoch": 0.07337399909020176, "grad_norm": 0.9101182222366333, "learning_rate": 0.00022004074750597158, "loss": 4.7819, "step": 52220 }, { "epoch": 0.07338805002836533, "grad_norm": 0.8982588052749634, "learning_rate": 0.000220082900098356, "loss": 4.8396, "step": 52230 }, { "epoch": 0.07340210096652891, "grad_norm": 0.9071044921875, "learning_rate": 0.00022012505269074047, "loss": 4.8595, "step": 52240 }, { "epoch": 0.07341615190469249, "grad_norm": 0.952265739440918, "learning_rate": 0.0002201672052831249, "loss": 4.8164, "step": 52250 }, { "epoch": 0.07343020284285606, "grad_norm": 0.9162461161613464, "learning_rate": 0.00022020935787550933, "loss": 4.8448, "step": 52260 }, { "epoch": 0.07344425378101964, "grad_norm": 0.8878207802772522, "learning_rate": 0.00022025151046789376, "loss": 4.8139, "step": 52270 }, { "epoch": 0.07345830471918322, "grad_norm": 0.8891446590423584, "learning_rate": 0.0002202936630602782, "loss": 4.9729, "step": 52280 }, { "epoch": 0.07347235565734679, "grad_norm": 0.9611853957176208, "learning_rate": 0.0002203358156526626, "loss": 4.9082, "step": 52290 }, { "epoch": 0.07348640659551037, "grad_norm": 0.8403993248939514, "learning_rate": 0.00022037796824504705, "loss": 4.904, "step": 52300 }, { "epoch": 0.07350045753367396, "grad_norm": 0.873360276222229, "learning_rate": 0.00022042012083743149, "loss": 4.9462, "step": 52310 }, { "epoch": 0.07351450847183753, "grad_norm": 0.8688433766365051, "learning_rate": 0.00022046227342981592, "loss": 5.0245, "step": 52320 }, { "epoch": 0.07352855941000111, "grad_norm": 0.899304986000061, "learning_rate": 0.00022050442602220035, "loss": 4.7631, "step": 52330 }, { "epoch": 0.07354261034816469, "grad_norm": 0.8732203841209412, "learning_rate": 0.00022054657861458478, "loss": 4.9718, "step": 52340 }, { "epoch": 0.07355666128632826, "grad_norm": 0.8838686943054199, "learning_rate": 0.00022058873120696918, "loss": 4.9258, "step": 52350 }, { "epoch": 0.07357071222449184, "grad_norm": 0.9056248068809509, "learning_rate": 0.00022063088379935364, "loss": 4.789, "step": 52360 }, { "epoch": 0.07358476316265541, "grad_norm": 0.8867168426513672, "learning_rate": 0.00022067303639173807, "loss": 4.8421, "step": 52370 }, { "epoch": 0.07359881410081899, "grad_norm": 1.1361435651779175, "learning_rate": 0.0002207151889841225, "loss": 4.8528, "step": 52380 }, { "epoch": 0.07361286503898257, "grad_norm": 0.897068977355957, "learning_rate": 0.00022075734157650693, "loss": 4.7161, "step": 52390 }, { "epoch": 0.07362691597714614, "grad_norm": 0.9281494617462158, "learning_rate": 0.00022079949416889137, "loss": 4.7663, "step": 52400 }, { "epoch": 0.07364096691530973, "grad_norm": 0.9386702179908752, "learning_rate": 0.00022084164676127582, "loss": 4.8699, "step": 52410 }, { "epoch": 0.07365501785347331, "grad_norm": 0.8745991587638855, "learning_rate": 0.00022088379935366023, "loss": 4.7858, "step": 52420 }, { "epoch": 0.07366906879163689, "grad_norm": 0.9027533531188965, "learning_rate": 0.00022092595194604466, "loss": 4.8626, "step": 52430 }, { "epoch": 0.07368311972980046, "grad_norm": 0.8828858733177185, "learning_rate": 0.0002209681045384291, "loss": 4.6567, "step": 52440 }, { "epoch": 0.07369717066796404, "grad_norm": 0.8733296990394592, "learning_rate": 0.00022101025713081352, "loss": 4.8987, "step": 52450 }, { "epoch": 0.07371122160612761, "grad_norm": 0.8760149478912354, "learning_rate": 0.00022105240972319795, "loss": 4.8488, "step": 52460 }, { "epoch": 0.07372527254429119, "grad_norm": 0.9006288647651672, "learning_rate": 0.0002210945623155824, "loss": 4.8752, "step": 52470 }, { "epoch": 0.07373932348245477, "grad_norm": 0.8950282335281372, "learning_rate": 0.00022113671490796682, "loss": 4.9373, "step": 52480 }, { "epoch": 0.07375337442061834, "grad_norm": 0.8998293876647949, "learning_rate": 0.00022117886750035125, "loss": 4.8634, "step": 52490 }, { "epoch": 0.07376742535878192, "grad_norm": 0.8735572099685669, "learning_rate": 0.00022122102009273568, "loss": 4.869, "step": 52500 }, { "epoch": 0.07378147629694551, "grad_norm": 0.9324967265129089, "learning_rate": 0.0002212631726851201, "loss": 4.7894, "step": 52510 }, { "epoch": 0.07379552723510908, "grad_norm": 0.8685128092765808, "learning_rate": 0.00022130532527750454, "loss": 4.8566, "step": 52520 }, { "epoch": 0.07380957817327266, "grad_norm": 0.916084885597229, "learning_rate": 0.000221347477869889, "loss": 4.6707, "step": 52530 }, { "epoch": 0.07382362911143624, "grad_norm": 0.8901728987693787, "learning_rate": 0.00022138963046227343, "loss": 4.8058, "step": 52540 }, { "epoch": 0.07383768004959981, "grad_norm": 0.871108889579773, "learning_rate": 0.00022143178305465783, "loss": 4.9019, "step": 52550 }, { "epoch": 0.07385173098776339, "grad_norm": 0.8851538300514221, "learning_rate": 0.00022147393564704226, "loss": 4.8358, "step": 52560 }, { "epoch": 0.07386578192592697, "grad_norm": 0.9221569895744324, "learning_rate": 0.0002215160882394267, "loss": 4.9551, "step": 52570 }, { "epoch": 0.07387983286409054, "grad_norm": 0.9061564803123474, "learning_rate": 0.00022155824083181113, "loss": 4.6869, "step": 52580 }, { "epoch": 0.07389388380225412, "grad_norm": 0.8911418914794922, "learning_rate": 0.00022160039342419559, "loss": 4.8726, "step": 52590 }, { "epoch": 0.0739079347404177, "grad_norm": 0.9063213467597961, "learning_rate": 0.00022164254601658002, "loss": 4.7479, "step": 52600 }, { "epoch": 0.07392198567858127, "grad_norm": 0.8305318355560303, "learning_rate": 0.00022168469860896442, "loss": 4.9001, "step": 52610 }, { "epoch": 0.07393603661674486, "grad_norm": 0.871112585067749, "learning_rate": 0.00022172685120134885, "loss": 4.8549, "step": 52620 }, { "epoch": 0.07395008755490844, "grad_norm": 0.8946985006332397, "learning_rate": 0.00022176900379373328, "loss": 4.8635, "step": 52630 }, { "epoch": 0.07396413849307201, "grad_norm": 0.9011940360069275, "learning_rate": 0.00022181115638611774, "loss": 4.8687, "step": 52640 }, { "epoch": 0.07397818943123559, "grad_norm": 0.8744351267814636, "learning_rate": 0.00022185330897850217, "loss": 4.9532, "step": 52650 }, { "epoch": 0.07399224036939916, "grad_norm": 0.886094331741333, "learning_rate": 0.0002218954615708866, "loss": 4.9025, "step": 52660 }, { "epoch": 0.07400629130756274, "grad_norm": 0.8723426461219788, "learning_rate": 0.00022193761416327103, "loss": 4.8615, "step": 52670 }, { "epoch": 0.07402034224572632, "grad_norm": 0.8950220942497253, "learning_rate": 0.00022197976675565544, "loss": 4.825, "step": 52680 }, { "epoch": 0.0740343931838899, "grad_norm": 0.8471283316612244, "learning_rate": 0.00022202191934803987, "loss": 4.7089, "step": 52690 }, { "epoch": 0.07404844412205347, "grad_norm": 0.8990341424942017, "learning_rate": 0.00022206407194042433, "loss": 4.9334, "step": 52700 }, { "epoch": 0.07406249506021705, "grad_norm": 0.9074385762214661, "learning_rate": 0.00022210622453280876, "loss": 4.8409, "step": 52710 }, { "epoch": 0.07407654599838064, "grad_norm": 0.8630465865135193, "learning_rate": 0.0002221483771251932, "loss": 4.8109, "step": 52720 }, { "epoch": 0.07409059693654421, "grad_norm": 0.9616298675537109, "learning_rate": 0.00022219052971757762, "loss": 4.8484, "step": 52730 }, { "epoch": 0.07410464787470779, "grad_norm": 0.9482759237289429, "learning_rate": 0.00022223268230996203, "loss": 4.886, "step": 52740 }, { "epoch": 0.07411869881287136, "grad_norm": 0.8867536187171936, "learning_rate": 0.00022227483490234646, "loss": 4.8773, "step": 52750 }, { "epoch": 0.07413274975103494, "grad_norm": 0.9389952421188354, "learning_rate": 0.00022231698749473091, "loss": 4.8021, "step": 52760 }, { "epoch": 0.07414680068919852, "grad_norm": 0.8756046891212463, "learning_rate": 0.00022235914008711535, "loss": 4.708, "step": 52770 }, { "epoch": 0.07416085162736209, "grad_norm": 0.8724471926689148, "learning_rate": 0.00022240129267949978, "loss": 4.794, "step": 52780 }, { "epoch": 0.07417490256552567, "grad_norm": 0.895776629447937, "learning_rate": 0.0002224434452718842, "loss": 4.8679, "step": 52790 }, { "epoch": 0.07418895350368925, "grad_norm": 0.9033660292625427, "learning_rate": 0.0002224855978642686, "loss": 4.6832, "step": 52800 }, { "epoch": 0.07420300444185282, "grad_norm": 0.9038524627685547, "learning_rate": 0.00022252775045665304, "loss": 4.7711, "step": 52810 }, { "epoch": 0.07421705538001641, "grad_norm": 0.8903810381889343, "learning_rate": 0.0002225699030490375, "loss": 4.8756, "step": 52820 }, { "epoch": 0.07423110631817999, "grad_norm": 0.9286917448043823, "learning_rate": 0.00022261205564142193, "loss": 4.8495, "step": 52830 }, { "epoch": 0.07424515725634356, "grad_norm": 0.8560378551483154, "learning_rate": 0.00022265420823380636, "loss": 4.8827, "step": 52840 }, { "epoch": 0.07425920819450714, "grad_norm": 1.5267508029937744, "learning_rate": 0.0002226963608261908, "loss": 4.7531, "step": 52850 }, { "epoch": 0.07427325913267072, "grad_norm": 0.8825668096542358, "learning_rate": 0.00022273851341857523, "loss": 4.9219, "step": 52860 }, { "epoch": 0.07428731007083429, "grad_norm": 0.8723922967910767, "learning_rate": 0.00022278066601095963, "loss": 4.7371, "step": 52870 }, { "epoch": 0.07430136100899787, "grad_norm": 0.8615224361419678, "learning_rate": 0.0002228228186033441, "loss": 4.7961, "step": 52880 }, { "epoch": 0.07431541194716144, "grad_norm": 0.8599326610565186, "learning_rate": 0.00022286497119572852, "loss": 4.9175, "step": 52890 }, { "epoch": 0.07432946288532502, "grad_norm": 0.9164808988571167, "learning_rate": 0.00022290712378811295, "loss": 4.7722, "step": 52900 }, { "epoch": 0.0743435138234886, "grad_norm": 0.8620802760124207, "learning_rate": 0.00022294927638049738, "loss": 4.8847, "step": 52910 }, { "epoch": 0.07435756476165217, "grad_norm": 0.88958340883255, "learning_rate": 0.0002229914289728818, "loss": 4.8875, "step": 52920 }, { "epoch": 0.07437161569981576, "grad_norm": 0.894120454788208, "learning_rate": 0.00022303358156526622, "loss": 4.8375, "step": 52930 }, { "epoch": 0.07438566663797934, "grad_norm": 0.8956342935562134, "learning_rate": 0.00022307573415765068, "loss": 4.9163, "step": 52940 }, { "epoch": 0.07439971757614292, "grad_norm": 0.8793826699256897, "learning_rate": 0.0002231178867500351, "loss": 4.8802, "step": 52950 }, { "epoch": 0.07441376851430649, "grad_norm": 0.8765672445297241, "learning_rate": 0.00022316003934241954, "loss": 4.8051, "step": 52960 }, { "epoch": 0.07442781945247007, "grad_norm": 0.9064963459968567, "learning_rate": 0.00022320219193480397, "loss": 4.804, "step": 52970 }, { "epoch": 0.07444187039063364, "grad_norm": 0.879390299320221, "learning_rate": 0.0002232443445271884, "loss": 5.0118, "step": 52980 }, { "epoch": 0.07445592132879722, "grad_norm": 0.8609398603439331, "learning_rate": 0.00022328649711957286, "loss": 4.8256, "step": 52990 }, { "epoch": 0.0744699722669608, "grad_norm": 0.9296293258666992, "learning_rate": 0.00022332864971195726, "loss": 4.8331, "step": 53000 }, { "epoch": 0.07448402320512437, "grad_norm": 1.0869863033294678, "learning_rate": 0.0002233708023043417, "loss": 4.7803, "step": 53010 }, { "epoch": 0.07449807414328795, "grad_norm": 0.9066222906112671, "learning_rate": 0.00022341295489672613, "loss": 4.868, "step": 53020 }, { "epoch": 0.07451212508145154, "grad_norm": 0.889058530330658, "learning_rate": 0.00022345510748911056, "loss": 4.7838, "step": 53030 }, { "epoch": 0.07452617601961511, "grad_norm": 1.2822096347808838, "learning_rate": 0.000223497260081495, "loss": 4.7383, "step": 53040 }, { "epoch": 0.07454022695777869, "grad_norm": 0.8843401074409485, "learning_rate": 0.00022353941267387945, "loss": 4.8443, "step": 53050 }, { "epoch": 0.07455427789594227, "grad_norm": 0.9470616579055786, "learning_rate": 0.00022358156526626385, "loss": 4.836, "step": 53060 }, { "epoch": 0.07456832883410584, "grad_norm": 0.878521740436554, "learning_rate": 0.00022362371785864828, "loss": 4.9428, "step": 53070 }, { "epoch": 0.07458237977226942, "grad_norm": 0.894644558429718, "learning_rate": 0.0002236658704510327, "loss": 4.911, "step": 53080 }, { "epoch": 0.074596430710433, "grad_norm": 0.8709197044372559, "learning_rate": 0.00022370802304341714, "loss": 4.8799, "step": 53090 }, { "epoch": 0.07461048164859657, "grad_norm": 0.8657320141792297, "learning_rate": 0.00022375017563580157, "loss": 4.9005, "step": 53100 }, { "epoch": 0.07462453258676015, "grad_norm": 0.9332450032234192, "learning_rate": 0.00022379232822818603, "loss": 4.994, "step": 53110 }, { "epoch": 0.07463858352492372, "grad_norm": 0.9028719067573547, "learning_rate": 0.00022383448082057046, "loss": 4.9586, "step": 53120 }, { "epoch": 0.07465263446308731, "grad_norm": 0.9070411920547485, "learning_rate": 0.00022387663341295487, "loss": 5.001, "step": 53130 }, { "epoch": 0.07466668540125089, "grad_norm": 0.8787438869476318, "learning_rate": 0.0002239187860053393, "loss": 4.9275, "step": 53140 }, { "epoch": 0.07468073633941447, "grad_norm": 0.901603102684021, "learning_rate": 0.00022396093859772373, "loss": 4.9226, "step": 53150 }, { "epoch": 0.07469478727757804, "grad_norm": 0.9032551646232605, "learning_rate": 0.00022400309119010816, "loss": 4.7562, "step": 53160 }, { "epoch": 0.07470883821574162, "grad_norm": 0.9277701377868652, "learning_rate": 0.00022404524378249262, "loss": 4.9293, "step": 53170 }, { "epoch": 0.0747228891539052, "grad_norm": 0.9006163477897644, "learning_rate": 0.00022408739637487705, "loss": 4.7839, "step": 53180 }, { "epoch": 0.07473694009206877, "grad_norm": 0.9267420768737793, "learning_rate": 0.00022412954896726146, "loss": 4.7228, "step": 53190 }, { "epoch": 0.07475099103023235, "grad_norm": 0.8903605937957764, "learning_rate": 0.00022417170155964589, "loss": 4.7873, "step": 53200 }, { "epoch": 0.07476504196839592, "grad_norm": 0.8836460113525391, "learning_rate": 0.00022421385415203032, "loss": 4.9616, "step": 53210 }, { "epoch": 0.0747790929065595, "grad_norm": 0.8987323045730591, "learning_rate": 0.00022425600674441478, "loss": 4.8712, "step": 53220 }, { "epoch": 0.07479314384472308, "grad_norm": 0.8787208199501038, "learning_rate": 0.0002242981593367992, "loss": 4.8302, "step": 53230 }, { "epoch": 0.07480719478288667, "grad_norm": 0.8872378468513489, "learning_rate": 0.00022434031192918364, "loss": 4.7994, "step": 53240 }, { "epoch": 0.07482124572105024, "grad_norm": 0.8948333263397217, "learning_rate": 0.00022438246452156807, "loss": 4.9177, "step": 53250 }, { "epoch": 0.07483529665921382, "grad_norm": 0.8879920840263367, "learning_rate": 0.00022442461711395247, "loss": 4.8703, "step": 53260 }, { "epoch": 0.0748493475973774, "grad_norm": 0.897305965423584, "learning_rate": 0.0002244667697063369, "loss": 4.8631, "step": 53270 }, { "epoch": 0.07486339853554097, "grad_norm": 0.8983936905860901, "learning_rate": 0.00022450892229872136, "loss": 4.7904, "step": 53280 }, { "epoch": 0.07487744947370455, "grad_norm": 0.9298288822174072, "learning_rate": 0.0002245510748911058, "loss": 4.9048, "step": 53290 }, { "epoch": 0.07489150041186812, "grad_norm": 0.9018476009368896, "learning_rate": 0.00022459322748349022, "loss": 4.8721, "step": 53300 }, { "epoch": 0.0749055513500317, "grad_norm": 0.8535480499267578, "learning_rate": 0.00022463538007587466, "loss": 4.9276, "step": 53310 }, { "epoch": 0.07491960228819528, "grad_norm": 0.8795814514160156, "learning_rate": 0.00022467753266825906, "loss": 4.8589, "step": 53320 }, { "epoch": 0.07493365322635885, "grad_norm": 0.9002819061279297, "learning_rate": 0.0002247196852606435, "loss": 5.0146, "step": 53330 }, { "epoch": 0.07494770416452244, "grad_norm": 0.9197555780410767, "learning_rate": 0.00022476183785302795, "loss": 4.9211, "step": 53340 }, { "epoch": 0.07496175510268602, "grad_norm": 0.9005763530731201, "learning_rate": 0.00022480399044541238, "loss": 4.9638, "step": 53350 }, { "epoch": 0.0749758060408496, "grad_norm": 0.8709673881530762, "learning_rate": 0.0002248461430377968, "loss": 4.8595, "step": 53360 }, { "epoch": 0.07498985697901317, "grad_norm": 0.8983860611915588, "learning_rate": 0.00022488829563018124, "loss": 4.7567, "step": 53370 }, { "epoch": 0.07500390791717675, "grad_norm": 0.8729245066642761, "learning_rate": 0.00022493044822256567, "loss": 4.8404, "step": 53380 }, { "epoch": 0.07501795885534032, "grad_norm": 0.8825687170028687, "learning_rate": 0.00022497260081495008, "loss": 4.8511, "step": 53390 }, { "epoch": 0.0750320097935039, "grad_norm": 1.1445651054382324, "learning_rate": 0.00022501475340733454, "loss": 4.8355, "step": 53400 }, { "epoch": 0.07504606073166747, "grad_norm": 0.8848526477813721, "learning_rate": 0.00022505690599971897, "loss": 4.8923, "step": 53410 }, { "epoch": 0.07506011166983105, "grad_norm": 0.9172847867012024, "learning_rate": 0.0002250990585921034, "loss": 4.9146, "step": 53420 }, { "epoch": 0.07507416260799463, "grad_norm": 0.8731652498245239, "learning_rate": 0.00022514121118448783, "loss": 5.0147, "step": 53430 }, { "epoch": 0.07508821354615822, "grad_norm": 0.8790530562400818, "learning_rate": 0.00022518336377687226, "loss": 4.8628, "step": 53440 }, { "epoch": 0.07510226448432179, "grad_norm": 0.8736057281494141, "learning_rate": 0.00022522551636925667, "loss": 4.7773, "step": 53450 }, { "epoch": 0.07511631542248537, "grad_norm": 0.9131947755813599, "learning_rate": 0.00022526766896164112, "loss": 4.7888, "step": 53460 }, { "epoch": 0.07513036636064895, "grad_norm": 0.8734030723571777, "learning_rate": 0.00022530982155402555, "loss": 4.6335, "step": 53470 }, { "epoch": 0.07514441729881252, "grad_norm": 0.8606353402137756, "learning_rate": 0.00022535197414640999, "loss": 4.7898, "step": 53480 }, { "epoch": 0.0751584682369761, "grad_norm": 0.9181352853775024, "learning_rate": 0.00022539412673879442, "loss": 4.7451, "step": 53490 }, { "epoch": 0.07517251917513967, "grad_norm": 0.8751580119132996, "learning_rate": 0.00022543627933117885, "loss": 4.9376, "step": 53500 }, { "epoch": 0.07518657011330325, "grad_norm": 0.8582903742790222, "learning_rate": 0.00022547843192356325, "loss": 4.8904, "step": 53510 }, { "epoch": 0.07520062105146683, "grad_norm": 0.9157120585441589, "learning_rate": 0.0002255205845159477, "loss": 4.8151, "step": 53520 }, { "epoch": 0.0752146719896304, "grad_norm": 0.866913914680481, "learning_rate": 0.00022556273710833214, "loss": 4.8619, "step": 53530 }, { "epoch": 0.07522872292779398, "grad_norm": 0.9174304008483887, "learning_rate": 0.00022560488970071657, "loss": 4.8327, "step": 53540 }, { "epoch": 0.07524277386595757, "grad_norm": 0.8768737316131592, "learning_rate": 0.000225647042293101, "loss": 4.7639, "step": 53550 }, { "epoch": 0.07525682480412114, "grad_norm": 0.8870129585266113, "learning_rate": 0.00022568919488548544, "loss": 4.7549, "step": 53560 }, { "epoch": 0.07527087574228472, "grad_norm": 0.8497852087020874, "learning_rate": 0.0002257313474778699, "loss": 4.6357, "step": 53570 }, { "epoch": 0.0752849266804483, "grad_norm": 0.8594181537628174, "learning_rate": 0.0002257735000702543, "loss": 4.9172, "step": 53580 }, { "epoch": 0.07529897761861187, "grad_norm": 0.8665555715560913, "learning_rate": 0.00022581565266263873, "loss": 4.8535, "step": 53590 }, { "epoch": 0.07531302855677545, "grad_norm": 0.8849197030067444, "learning_rate": 0.00022585780525502316, "loss": 4.8296, "step": 53600 }, { "epoch": 0.07532707949493903, "grad_norm": 0.8826219439506531, "learning_rate": 0.0002258999578474076, "loss": 4.8087, "step": 53610 }, { "epoch": 0.0753411304331026, "grad_norm": 0.9253659248352051, "learning_rate": 0.00022594211043979202, "loss": 4.8322, "step": 53620 }, { "epoch": 0.07535518137126618, "grad_norm": 0.9128340482711792, "learning_rate": 0.00022598426303217648, "loss": 4.6608, "step": 53630 }, { "epoch": 0.07536923230942975, "grad_norm": 0.8583227396011353, "learning_rate": 0.00022602641562456088, "loss": 4.7733, "step": 53640 }, { "epoch": 0.07538328324759334, "grad_norm": 0.8400426506996155, "learning_rate": 0.00022606856821694532, "loss": 4.799, "step": 53650 }, { "epoch": 0.07539733418575692, "grad_norm": 0.9156240224838257, "learning_rate": 0.00022611072080932975, "loss": 4.7519, "step": 53660 }, { "epoch": 0.0754113851239205, "grad_norm": 0.902900218963623, "learning_rate": 0.00022615287340171418, "loss": 4.6597, "step": 53670 }, { "epoch": 0.07542543606208407, "grad_norm": 0.8428563475608826, "learning_rate": 0.0002261950259940986, "loss": 4.8529, "step": 53680 }, { "epoch": 0.07543948700024765, "grad_norm": 0.9369030594825745, "learning_rate": 0.00022623717858648307, "loss": 4.6786, "step": 53690 }, { "epoch": 0.07545353793841122, "grad_norm": 0.8624921441078186, "learning_rate": 0.0002262793311788675, "loss": 4.8759, "step": 53700 }, { "epoch": 0.0754675888765748, "grad_norm": 0.864963710308075, "learning_rate": 0.0002263214837712519, "loss": 4.8758, "step": 53710 }, { "epoch": 0.07548163981473838, "grad_norm": 0.8992347121238708, "learning_rate": 0.00022636363636363633, "loss": 4.7599, "step": 53720 }, { "epoch": 0.07549569075290195, "grad_norm": 0.9037030935287476, "learning_rate": 0.00022640578895602077, "loss": 4.8639, "step": 53730 }, { "epoch": 0.07550974169106553, "grad_norm": 0.917029857635498, "learning_rate": 0.0002264479415484052, "loss": 4.7925, "step": 53740 }, { "epoch": 0.07552379262922912, "grad_norm": 0.8903194665908813, "learning_rate": 0.00022649009414078965, "loss": 4.7699, "step": 53750 }, { "epoch": 0.0755378435673927, "grad_norm": 0.8409320712089539, "learning_rate": 0.00022653224673317409, "loss": 4.7887, "step": 53760 }, { "epoch": 0.07555189450555627, "grad_norm": 0.9254723787307739, "learning_rate": 0.0002265743993255585, "loss": 4.7081, "step": 53770 }, { "epoch": 0.07556594544371985, "grad_norm": 0.9033946990966797, "learning_rate": 0.00022661655191794292, "loss": 4.8571, "step": 53780 }, { "epoch": 0.07557999638188342, "grad_norm": 0.8677847385406494, "learning_rate": 0.00022665870451032735, "loss": 4.8679, "step": 53790 }, { "epoch": 0.075594047320047, "grad_norm": 0.871347963809967, "learning_rate": 0.0002267008571027118, "loss": 4.7151, "step": 53800 }, { "epoch": 0.07560809825821058, "grad_norm": 0.8525243997573853, "learning_rate": 0.00022674300969509624, "loss": 4.9235, "step": 53810 }, { "epoch": 0.07562214919637415, "grad_norm": 0.850013256072998, "learning_rate": 0.00022678516228748067, "loss": 4.9236, "step": 53820 }, { "epoch": 0.07563620013453773, "grad_norm": 0.8850587606430054, "learning_rate": 0.0002268273148798651, "loss": 4.8146, "step": 53830 }, { "epoch": 0.0756502510727013, "grad_norm": 0.9525415301322937, "learning_rate": 0.0002268694674722495, "loss": 4.6886, "step": 53840 }, { "epoch": 0.07566430201086488, "grad_norm": 0.8798964619636536, "learning_rate": 0.00022691162006463394, "loss": 4.7848, "step": 53850 }, { "epoch": 0.07567835294902847, "grad_norm": 0.8539289236068726, "learning_rate": 0.0002269537726570184, "loss": 4.7355, "step": 53860 }, { "epoch": 0.07569240388719205, "grad_norm": 0.8648083806037903, "learning_rate": 0.00022699592524940283, "loss": 4.7876, "step": 53870 }, { "epoch": 0.07570645482535562, "grad_norm": 0.906799852848053, "learning_rate": 0.00022703807784178726, "loss": 4.8246, "step": 53880 }, { "epoch": 0.0757205057635192, "grad_norm": 0.9276502132415771, "learning_rate": 0.0002270802304341717, "loss": 4.8731, "step": 53890 }, { "epoch": 0.07573455670168278, "grad_norm": 0.8921993970870972, "learning_rate": 0.0002271223830265561, "loss": 4.8887, "step": 53900 }, { "epoch": 0.07574860763984635, "grad_norm": 0.8780595064163208, "learning_rate": 0.00022716453561894053, "loss": 4.7841, "step": 53910 }, { "epoch": 0.07576265857800993, "grad_norm": 0.9392471313476562, "learning_rate": 0.00022720668821132498, "loss": 4.8724, "step": 53920 }, { "epoch": 0.0757767095161735, "grad_norm": 0.89900803565979, "learning_rate": 0.00022724884080370942, "loss": 4.7855, "step": 53930 }, { "epoch": 0.07579076045433708, "grad_norm": 0.9714470505714417, "learning_rate": 0.00022729099339609385, "loss": 4.8981, "step": 53940 }, { "epoch": 0.07580481139250066, "grad_norm": 0.8538759350776672, "learning_rate": 0.00022733314598847828, "loss": 4.9213, "step": 53950 }, { "epoch": 0.07581886233066425, "grad_norm": 0.879152238368988, "learning_rate": 0.0002273752985808627, "loss": 4.8243, "step": 53960 }, { "epoch": 0.07583291326882782, "grad_norm": 0.8458477258682251, "learning_rate": 0.0002274174511732471, "loss": 4.8997, "step": 53970 }, { "epoch": 0.0758469642069914, "grad_norm": 0.9155076146125793, "learning_rate": 0.00022745960376563157, "loss": 4.8447, "step": 53980 }, { "epoch": 0.07586101514515498, "grad_norm": 0.9187341332435608, "learning_rate": 0.000227501756358016, "loss": 4.9255, "step": 53990 }, { "epoch": 0.07587506608331855, "grad_norm": 0.8751149773597717, "learning_rate": 0.00022754390895040043, "loss": 4.8512, "step": 54000 }, { "epoch": 0.07588911702148213, "grad_norm": 0.8937033414840698, "learning_rate": 0.00022758606154278486, "loss": 4.8653, "step": 54010 }, { "epoch": 0.0759031679596457, "grad_norm": 0.862657904624939, "learning_rate": 0.0002276282141351693, "loss": 4.748, "step": 54020 }, { "epoch": 0.07591721889780928, "grad_norm": 0.8705819845199585, "learning_rate": 0.0002276703667275537, "loss": 4.8872, "step": 54030 }, { "epoch": 0.07593126983597286, "grad_norm": 0.8772851824760437, "learning_rate": 0.00022771251931993816, "loss": 4.9373, "step": 54040 }, { "epoch": 0.07594532077413643, "grad_norm": 0.8797933459281921, "learning_rate": 0.0002277546719123226, "loss": 4.8121, "step": 54050 }, { "epoch": 0.07595937171230002, "grad_norm": 0.8654122948646545, "learning_rate": 0.00022779682450470702, "loss": 4.76, "step": 54060 }, { "epoch": 0.0759734226504636, "grad_norm": 0.875860869884491, "learning_rate": 0.00022783897709709145, "loss": 4.7232, "step": 54070 }, { "epoch": 0.07598747358862717, "grad_norm": 0.9174049496650696, "learning_rate": 0.00022788112968947588, "loss": 4.7754, "step": 54080 }, { "epoch": 0.07600152452679075, "grad_norm": 0.8807012438774109, "learning_rate": 0.00022792328228186034, "loss": 4.8693, "step": 54090 }, { "epoch": 0.07601557546495433, "grad_norm": 0.8439174294471741, "learning_rate": 0.00022796543487424475, "loss": 4.8432, "step": 54100 }, { "epoch": 0.0760296264031179, "grad_norm": 0.9406492710113525, "learning_rate": 0.00022800758746662918, "loss": 4.8385, "step": 54110 }, { "epoch": 0.07604367734128148, "grad_norm": 0.8603804707527161, "learning_rate": 0.0002280497400590136, "loss": 4.7904, "step": 54120 }, { "epoch": 0.07605772827944506, "grad_norm": 0.8673272132873535, "learning_rate": 0.0002280876773921596, "loss": 4.8706, "step": 54130 }, { "epoch": 0.07607177921760863, "grad_norm": 0.8606926202774048, "learning_rate": 0.00022812982998454402, "loss": 4.7967, "step": 54140 }, { "epoch": 0.07608583015577221, "grad_norm": 0.8870376348495483, "learning_rate": 0.00022817198257692845, "loss": 4.8095, "step": 54150 }, { "epoch": 0.07609988109393578, "grad_norm": 0.8442344665527344, "learning_rate": 0.00022821413516931288, "loss": 4.8743, "step": 54160 }, { "epoch": 0.07611393203209937, "grad_norm": 0.8706654906272888, "learning_rate": 0.00022825628776169734, "loss": 4.839, "step": 54170 }, { "epoch": 0.07612798297026295, "grad_norm": 0.8698878288269043, "learning_rate": 0.00022829844035408177, "loss": 4.8972, "step": 54180 }, { "epoch": 0.07614203390842653, "grad_norm": 0.8605942726135254, "learning_rate": 0.00022834059294646618, "loss": 4.7414, "step": 54190 }, { "epoch": 0.0761560848465901, "grad_norm": 0.8853992819786072, "learning_rate": 0.0002283827455388506, "loss": 4.7935, "step": 54200 }, { "epoch": 0.07617013578475368, "grad_norm": 1.3746801614761353, "learning_rate": 0.00022842489813123504, "loss": 4.8538, "step": 54210 }, { "epoch": 0.07618418672291725, "grad_norm": 0.8895174264907837, "learning_rate": 0.00022846705072361947, "loss": 4.8864, "step": 54220 }, { "epoch": 0.07619823766108083, "grad_norm": 0.9199739098548889, "learning_rate": 0.00022850920331600393, "loss": 4.7924, "step": 54230 }, { "epoch": 0.07621228859924441, "grad_norm": 0.9062716364860535, "learning_rate": 0.00022855135590838836, "loss": 4.8258, "step": 54240 }, { "epoch": 0.07622633953740798, "grad_norm": 0.8298748731613159, "learning_rate": 0.0002285935085007728, "loss": 4.8469, "step": 54250 }, { "epoch": 0.07624039047557156, "grad_norm": 0.866485059261322, "learning_rate": 0.0002286356610931572, "loss": 4.8599, "step": 54260 }, { "epoch": 0.07625444141373515, "grad_norm": 0.8970810174942017, "learning_rate": 0.00022867781368554163, "loss": 4.8573, "step": 54270 }, { "epoch": 0.07626849235189873, "grad_norm": 0.8525856137275696, "learning_rate": 0.00022871996627792606, "loss": 4.7595, "step": 54280 }, { "epoch": 0.0762825432900623, "grad_norm": 0.9844761490821838, "learning_rate": 0.00022876211887031052, "loss": 4.829, "step": 54290 }, { "epoch": 0.07629659422822588, "grad_norm": 0.8804373741149902, "learning_rate": 0.00022880427146269495, "loss": 4.8698, "step": 54300 }, { "epoch": 0.07631064516638945, "grad_norm": 0.9130948781967163, "learning_rate": 0.00022884642405507938, "loss": 4.8689, "step": 54310 }, { "epoch": 0.07632469610455303, "grad_norm": 0.8662232160568237, "learning_rate": 0.00022888857664746378, "loss": 4.9434, "step": 54320 }, { "epoch": 0.0763387470427166, "grad_norm": 0.8689405918121338, "learning_rate": 0.00022893072923984821, "loss": 4.892, "step": 54330 }, { "epoch": 0.07635279798088018, "grad_norm": 0.8666510581970215, "learning_rate": 0.00022897288183223264, "loss": 4.9568, "step": 54340 }, { "epoch": 0.07636684891904376, "grad_norm": 0.885850191116333, "learning_rate": 0.0002290150344246171, "loss": 4.8736, "step": 54350 }, { "epoch": 0.07638089985720733, "grad_norm": 0.8362758159637451, "learning_rate": 0.00022905718701700153, "loss": 4.7962, "step": 54360 }, { "epoch": 0.07639495079537093, "grad_norm": 0.8462679982185364, "learning_rate": 0.00022909933960938597, "loss": 4.7975, "step": 54370 }, { "epoch": 0.0764090017335345, "grad_norm": 0.8839713335037231, "learning_rate": 0.0002291414922017704, "loss": 4.7665, "step": 54380 }, { "epoch": 0.07642305267169808, "grad_norm": 0.8993695974349976, "learning_rate": 0.0002291836447941548, "loss": 4.8324, "step": 54390 }, { "epoch": 0.07643710360986165, "grad_norm": 0.8608269095420837, "learning_rate": 0.00022922579738653923, "loss": 4.7931, "step": 54400 }, { "epoch": 0.07645115454802523, "grad_norm": 0.8435550928115845, "learning_rate": 0.0002292679499789237, "loss": 4.7409, "step": 54410 }, { "epoch": 0.0764652054861888, "grad_norm": 0.9165601134300232, "learning_rate": 0.00022931010257130812, "loss": 4.7406, "step": 54420 }, { "epoch": 0.07647925642435238, "grad_norm": 0.8588644862174988, "learning_rate": 0.00022935225516369255, "loss": 4.7544, "step": 54430 }, { "epoch": 0.07649330736251596, "grad_norm": 0.9699477553367615, "learning_rate": 0.00022939440775607698, "loss": 4.8401, "step": 54440 }, { "epoch": 0.07650735830067953, "grad_norm": 0.822721004486084, "learning_rate": 0.0002294365603484614, "loss": 4.9051, "step": 54450 }, { "epoch": 0.07652140923884311, "grad_norm": 0.8678251504898071, "learning_rate": 0.00022947871294084585, "loss": 4.9236, "step": 54460 }, { "epoch": 0.07653546017700669, "grad_norm": 0.8811358213424683, "learning_rate": 0.00022952086553323028, "loss": 4.9256, "step": 54470 }, { "epoch": 0.07654951111517028, "grad_norm": 0.8711695075035095, "learning_rate": 0.0002295630181256147, "loss": 4.8998, "step": 54480 }, { "epoch": 0.07656356205333385, "grad_norm": 0.8371874094009399, "learning_rate": 0.00022960517071799914, "loss": 4.7928, "step": 54490 }, { "epoch": 0.07657761299149743, "grad_norm": 0.8641277551651001, "learning_rate": 0.00022964732331038357, "loss": 4.8052, "step": 54500 }, { "epoch": 0.076591663929661, "grad_norm": 0.8634895086288452, "learning_rate": 0.00022968947590276803, "loss": 4.9161, "step": 54510 }, { "epoch": 0.07660571486782458, "grad_norm": 0.9361057281494141, "learning_rate": 0.00022973162849515243, "loss": 4.8, "step": 54520 }, { "epoch": 0.07661976580598816, "grad_norm": 0.8911561965942383, "learning_rate": 0.00022977378108753686, "loss": 4.8536, "step": 54530 }, { "epoch": 0.07663381674415173, "grad_norm": 0.8985777497291565, "learning_rate": 0.0002298159336799213, "loss": 4.6861, "step": 54540 }, { "epoch": 0.07664786768231531, "grad_norm": 0.8521196246147156, "learning_rate": 0.00022985808627230573, "loss": 4.8514, "step": 54550 }, { "epoch": 0.07666191862047889, "grad_norm": 0.8784716129302979, "learning_rate": 0.00022990023886469016, "loss": 4.8231, "step": 54560 }, { "epoch": 0.07667596955864246, "grad_norm": 0.8700553774833679, "learning_rate": 0.00022994239145707462, "loss": 4.8225, "step": 54570 }, { "epoch": 0.07669002049680605, "grad_norm": 0.8594143986701965, "learning_rate": 0.00022998454404945902, "loss": 4.8293, "step": 54580 }, { "epoch": 0.07670407143496963, "grad_norm": 0.8826280832290649, "learning_rate": 0.00023002669664184345, "loss": 4.8003, "step": 54590 }, { "epoch": 0.0767181223731332, "grad_norm": 0.9344685673713684, "learning_rate": 0.00023006884923422788, "loss": 4.688, "step": 54600 }, { "epoch": 0.07673217331129678, "grad_norm": 0.8549621105194092, "learning_rate": 0.00023010678656737386, "loss": 4.8083, "step": 54610 }, { "epoch": 0.07674622424946036, "grad_norm": 0.8863023519515991, "learning_rate": 0.0002301489391597583, "loss": 4.681, "step": 54620 }, { "epoch": 0.07676027518762393, "grad_norm": 0.8559615015983582, "learning_rate": 0.00023019109175214273, "loss": 4.8223, "step": 54630 }, { "epoch": 0.07677432612578751, "grad_norm": 0.8779832720756531, "learning_rate": 0.00023023324434452716, "loss": 4.8405, "step": 54640 }, { "epoch": 0.07678837706395109, "grad_norm": 0.8753707408905029, "learning_rate": 0.00023027539693691162, "loss": 4.8169, "step": 54650 }, { "epoch": 0.07680242800211466, "grad_norm": 0.8928582668304443, "learning_rate": 0.00023031754952929605, "loss": 4.7668, "step": 54660 }, { "epoch": 0.07681647894027824, "grad_norm": 0.8623060584068298, "learning_rate": 0.00023035970212168048, "loss": 4.7727, "step": 54670 }, { "epoch": 0.07683052987844183, "grad_norm": 0.9036771655082703, "learning_rate": 0.00023040185471406488, "loss": 4.7807, "step": 54680 }, { "epoch": 0.0768445808166054, "grad_norm": 0.8482112884521484, "learning_rate": 0.00023044400730644931, "loss": 4.7243, "step": 54690 }, { "epoch": 0.07685863175476898, "grad_norm": 0.8324148058891296, "learning_rate": 0.00023048615989883375, "loss": 4.8233, "step": 54700 }, { "epoch": 0.07687268269293256, "grad_norm": 0.8993374109268188, "learning_rate": 0.0002305283124912182, "loss": 4.9474, "step": 54710 }, { "epoch": 0.07688673363109613, "grad_norm": 0.8752866387367249, "learning_rate": 0.00023057046508360263, "loss": 4.8148, "step": 54720 }, { "epoch": 0.07690078456925971, "grad_norm": 0.884166419506073, "learning_rate": 0.00023061261767598707, "loss": 4.9107, "step": 54730 }, { "epoch": 0.07691483550742328, "grad_norm": 0.8490076661109924, "learning_rate": 0.00023065477026837147, "loss": 4.7861, "step": 54740 }, { "epoch": 0.07692888644558686, "grad_norm": 0.8822079300880432, "learning_rate": 0.0002306969228607559, "loss": 4.959, "step": 54750 }, { "epoch": 0.07694293738375044, "grad_norm": 0.8657336831092834, "learning_rate": 0.00023073907545314033, "loss": 4.7831, "step": 54760 }, { "epoch": 0.07695698832191401, "grad_norm": 0.8511672019958496, "learning_rate": 0.0002307812280455248, "loss": 4.7675, "step": 54770 }, { "epoch": 0.07697103926007759, "grad_norm": 0.8640127778053284, "learning_rate": 0.00023082338063790922, "loss": 4.8313, "step": 54780 }, { "epoch": 0.07698509019824118, "grad_norm": 0.8668428659439087, "learning_rate": 0.00023086553323029365, "loss": 4.9068, "step": 54790 }, { "epoch": 0.07699914113640476, "grad_norm": 0.8341074585914612, "learning_rate": 0.00023090768582267808, "loss": 4.8259, "step": 54800 }, { "epoch": 0.07701319207456833, "grad_norm": 0.8660095930099487, "learning_rate": 0.0002309498384150625, "loss": 4.7503, "step": 54810 }, { "epoch": 0.07702724301273191, "grad_norm": 0.8701623678207397, "learning_rate": 0.00023099199100744692, "loss": 4.7477, "step": 54820 }, { "epoch": 0.07704129395089548, "grad_norm": 0.8704190850257874, "learning_rate": 0.00023103414359983138, "loss": 4.7662, "step": 54830 }, { "epoch": 0.07705534488905906, "grad_norm": 0.8704859614372253, "learning_rate": 0.0002310762961922158, "loss": 4.8061, "step": 54840 }, { "epoch": 0.07706939582722264, "grad_norm": 0.8688639998435974, "learning_rate": 0.00023111844878460024, "loss": 4.6981, "step": 54850 }, { "epoch": 0.07708344676538621, "grad_norm": 0.8302347660064697, "learning_rate": 0.00023116060137698467, "loss": 4.7741, "step": 54860 }, { "epoch": 0.07709749770354979, "grad_norm": 0.8849836587905884, "learning_rate": 0.00023120275396936908, "loss": 4.8139, "step": 54870 }, { "epoch": 0.07711154864171336, "grad_norm": 0.8696654438972473, "learning_rate": 0.0002312449065617535, "loss": 4.8226, "step": 54880 }, { "epoch": 0.07712559957987696, "grad_norm": 0.8935073018074036, "learning_rate": 0.00023128705915413796, "loss": 4.7954, "step": 54890 }, { "epoch": 0.07713965051804053, "grad_norm": 0.8748064637184143, "learning_rate": 0.0002313292117465224, "loss": 4.8052, "step": 54900 }, { "epoch": 0.07715370145620411, "grad_norm": 0.8696599006652832, "learning_rate": 0.00023137136433890683, "loss": 4.8104, "step": 54910 }, { "epoch": 0.07716775239436768, "grad_norm": 0.852148175239563, "learning_rate": 0.00023141351693129126, "loss": 4.7914, "step": 54920 }, { "epoch": 0.07718180333253126, "grad_norm": 0.849692702293396, "learning_rate": 0.0002314556695236757, "loss": 4.8389, "step": 54930 }, { "epoch": 0.07719585427069484, "grad_norm": 0.8747760057449341, "learning_rate": 0.0002314978221160601, "loss": 4.8114, "step": 54940 }, { "epoch": 0.07720990520885841, "grad_norm": 0.972870409488678, "learning_rate": 0.00023153997470844455, "loss": 4.9658, "step": 54950 }, { "epoch": 0.07722395614702199, "grad_norm": 0.883500337600708, "learning_rate": 0.00023158212730082898, "loss": 4.915, "step": 54960 }, { "epoch": 0.07723800708518556, "grad_norm": 0.860719621181488, "learning_rate": 0.00023162427989321341, "loss": 4.7428, "step": 54970 }, { "epoch": 0.07725205802334914, "grad_norm": 0.8673244118690491, "learning_rate": 0.00023166643248559785, "loss": 4.9087, "step": 54980 }, { "epoch": 0.07726610896151273, "grad_norm": 0.862847626209259, "learning_rate": 0.00023170858507798228, "loss": 4.7927, "step": 54990 }, { "epoch": 0.0772801598996763, "grad_norm": 0.873631477355957, "learning_rate": 0.00023175073767036668, "loss": 4.7431, "step": 55000 }, { "epoch": 0.07729421083783988, "grad_norm": 0.8591784834861755, "learning_rate": 0.00023179289026275114, "loss": 4.8536, "step": 55010 }, { "epoch": 0.07730826177600346, "grad_norm": 0.8631607890129089, "learning_rate": 0.00023183504285513557, "loss": 4.7317, "step": 55020 }, { "epoch": 0.07732231271416704, "grad_norm": 0.8369123339653015, "learning_rate": 0.00023187719544752, "loss": 4.7036, "step": 55030 }, { "epoch": 0.07733636365233061, "grad_norm": 0.8408652544021606, "learning_rate": 0.00023191934803990443, "loss": 4.9028, "step": 55040 }, { "epoch": 0.07735041459049419, "grad_norm": 0.8339114189147949, "learning_rate": 0.0002319615006322889, "loss": 4.7352, "step": 55050 }, { "epoch": 0.07736446552865776, "grad_norm": 0.8363640308380127, "learning_rate": 0.00023200365322467327, "loss": 4.7126, "step": 55060 }, { "epoch": 0.07737851646682134, "grad_norm": 0.8503546714782715, "learning_rate": 0.00023204580581705773, "loss": 4.8552, "step": 55070 }, { "epoch": 0.07739256740498492, "grad_norm": 0.8623889088630676, "learning_rate": 0.00023208795840944216, "loss": 4.7625, "step": 55080 }, { "epoch": 0.07740661834314849, "grad_norm": 0.8672446608543396, "learning_rate": 0.0002321301110018266, "loss": 4.703, "step": 55090 }, { "epoch": 0.07742066928131208, "grad_norm": 0.83352130651474, "learning_rate": 0.00023217226359421102, "loss": 4.9044, "step": 55100 }, { "epoch": 0.07743472021947566, "grad_norm": 0.8357213139533997, "learning_rate": 0.00023221441618659548, "loss": 4.7765, "step": 55110 }, { "epoch": 0.07744877115763923, "grad_norm": 0.8617483973503113, "learning_rate": 0.0002322565687789799, "loss": 4.8928, "step": 55120 }, { "epoch": 0.07746282209580281, "grad_norm": 0.8757510185241699, "learning_rate": 0.0002322987213713643, "loss": 4.9618, "step": 55130 }, { "epoch": 0.07747687303396639, "grad_norm": 0.8584406971931458, "learning_rate": 0.00023234087396374874, "loss": 4.7733, "step": 55140 }, { "epoch": 0.07749092397212996, "grad_norm": 0.8585185408592224, "learning_rate": 0.00023238302655613318, "loss": 4.8477, "step": 55150 }, { "epoch": 0.07750497491029354, "grad_norm": 0.9038747549057007, "learning_rate": 0.0002324251791485176, "loss": 4.9543, "step": 55160 }, { "epoch": 0.07751902584845712, "grad_norm": 0.8479253649711609, "learning_rate": 0.00023246733174090206, "loss": 4.7465, "step": 55170 }, { "epoch": 0.07753307678662069, "grad_norm": 0.8460027575492859, "learning_rate": 0.0002325094843332865, "loss": 4.8782, "step": 55180 }, { "epoch": 0.07754712772478427, "grad_norm": 0.8895059823989868, "learning_rate": 0.0002325516369256709, "loss": 4.9051, "step": 55190 }, { "epoch": 0.07756117866294786, "grad_norm": 0.8584068417549133, "learning_rate": 0.00023259378951805533, "loss": 4.8017, "step": 55200 }, { "epoch": 0.07757522960111143, "grad_norm": 0.8654706478118896, "learning_rate": 0.00023263594211043976, "loss": 4.8685, "step": 55210 }, { "epoch": 0.07758928053927501, "grad_norm": 0.8444214463233948, "learning_rate": 0.0002326780947028242, "loss": 4.7856, "step": 55220 }, { "epoch": 0.07760333147743859, "grad_norm": 0.9347013235092163, "learning_rate": 0.00023272024729520865, "loss": 4.8631, "step": 55230 }, { "epoch": 0.07761738241560216, "grad_norm": 0.8600069880485535, "learning_rate": 0.00023276239988759308, "loss": 4.953, "step": 55240 }, { "epoch": 0.07763143335376574, "grad_norm": 0.8897565007209778, "learning_rate": 0.00023280455247997751, "loss": 4.8058, "step": 55250 }, { "epoch": 0.07764548429192931, "grad_norm": 0.868504524230957, "learning_rate": 0.00023284670507236192, "loss": 4.7797, "step": 55260 }, { "epoch": 0.07765953523009289, "grad_norm": 0.8592216372489929, "learning_rate": 0.00023288885766474635, "loss": 4.7845, "step": 55270 }, { "epoch": 0.07767358616825647, "grad_norm": 0.8826098442077637, "learning_rate": 0.00023293101025713078, "loss": 4.7159, "step": 55280 }, { "epoch": 0.07768763710642004, "grad_norm": 0.8514786958694458, "learning_rate": 0.00023297316284951524, "loss": 4.7917, "step": 55290 }, { "epoch": 0.07770168804458363, "grad_norm": 0.9170621037483215, "learning_rate": 0.00023301531544189967, "loss": 4.7785, "step": 55300 }, { "epoch": 0.07771573898274721, "grad_norm": 0.8372135162353516, "learning_rate": 0.0002330574680342841, "loss": 4.8629, "step": 55310 }, { "epoch": 0.07772978992091079, "grad_norm": 0.867605447769165, "learning_rate": 0.0002330996206266685, "loss": 4.8731, "step": 55320 }, { "epoch": 0.07774384085907436, "grad_norm": 0.8326741456985474, "learning_rate": 0.00023314177321905294, "loss": 4.9606, "step": 55330 }, { "epoch": 0.07775789179723794, "grad_norm": 0.8772097229957581, "learning_rate": 0.00023318392581143737, "loss": 4.7784, "step": 55340 }, { "epoch": 0.07777194273540151, "grad_norm": 0.8476175665855408, "learning_rate": 0.00023322607840382183, "loss": 4.9128, "step": 55350 }, { "epoch": 0.07778599367356509, "grad_norm": 0.8802474737167358, "learning_rate": 0.00023326823099620626, "loss": 4.7922, "step": 55360 }, { "epoch": 0.07780004461172867, "grad_norm": 0.8601409792900085, "learning_rate": 0.0002333103835885907, "loss": 4.8804, "step": 55370 }, { "epoch": 0.07781409554989224, "grad_norm": 0.8463119864463806, "learning_rate": 0.00023335253618097512, "loss": 4.8471, "step": 55380 }, { "epoch": 0.07782814648805582, "grad_norm": 0.8972960114479065, "learning_rate": 0.00023339468877335952, "loss": 4.8792, "step": 55390 }, { "epoch": 0.0778421974262194, "grad_norm": 0.9296767711639404, "learning_rate": 0.00023343684136574395, "loss": 4.7318, "step": 55400 }, { "epoch": 0.07785624836438299, "grad_norm": 0.9014337062835693, "learning_rate": 0.0002334789939581284, "loss": 4.8008, "step": 55410 }, { "epoch": 0.07787029930254656, "grad_norm": 0.8471907377243042, "learning_rate": 0.00023352114655051284, "loss": 4.9053, "step": 55420 }, { "epoch": 0.07788435024071014, "grad_norm": 0.8656927347183228, "learning_rate": 0.00023356329914289727, "loss": 4.8065, "step": 55430 }, { "epoch": 0.07789840117887371, "grad_norm": 0.8613098859786987, "learning_rate": 0.0002336054517352817, "loss": 5.0078, "step": 55440 }, { "epoch": 0.07791245211703729, "grad_norm": 0.8597536683082581, "learning_rate": 0.0002336476043276661, "loss": 4.8672, "step": 55450 }, { "epoch": 0.07792650305520087, "grad_norm": 0.8532637357711792, "learning_rate": 0.00023368975692005054, "loss": 4.7714, "step": 55460 }, { "epoch": 0.07794055399336444, "grad_norm": 0.9325604438781738, "learning_rate": 0.000233731909512435, "loss": 4.6782, "step": 55470 }, { "epoch": 0.07795460493152802, "grad_norm": 0.8740974068641663, "learning_rate": 0.00023377406210481943, "loss": 4.7074, "step": 55480 }, { "epoch": 0.0779686558696916, "grad_norm": 0.8521115183830261, "learning_rate": 0.00023381621469720386, "loss": 4.8376, "step": 55490 }, { "epoch": 0.07798270680785517, "grad_norm": 0.8538252711296082, "learning_rate": 0.0002338583672895883, "loss": 4.8655, "step": 55500 }, { "epoch": 0.07799675774601876, "grad_norm": 0.8827087879180908, "learning_rate": 0.00023390051988197272, "loss": 4.9127, "step": 55510 }, { "epoch": 0.07801080868418234, "grad_norm": 0.8596060872077942, "learning_rate": 0.00023394267247435713, "loss": 4.7259, "step": 55520 }, { "epoch": 0.07802485962234591, "grad_norm": 0.8459311723709106, "learning_rate": 0.0002339848250667416, "loss": 4.9033, "step": 55530 }, { "epoch": 0.07803891056050949, "grad_norm": 0.8380910754203796, "learning_rate": 0.00023402697765912602, "loss": 4.7834, "step": 55540 }, { "epoch": 0.07805296149867307, "grad_norm": 0.8815401792526245, "learning_rate": 0.00023406913025151045, "loss": 4.8435, "step": 55550 }, { "epoch": 0.07806701243683664, "grad_norm": 0.8380628228187561, "learning_rate": 0.00023411128284389488, "loss": 4.8127, "step": 55560 }, { "epoch": 0.07808106337500022, "grad_norm": 0.8409835696220398, "learning_rate": 0.0002341534354362793, "loss": 4.8411, "step": 55570 }, { "epoch": 0.0780951143131638, "grad_norm": 0.8814390897750854, "learning_rate": 0.00023419558802866372, "loss": 4.8431, "step": 55580 }, { "epoch": 0.07810916525132737, "grad_norm": 0.8419836163520813, "learning_rate": 0.00023423774062104817, "loss": 4.8255, "step": 55590 }, { "epoch": 0.07812321618949095, "grad_norm": 0.8572084903717041, "learning_rate": 0.0002342798932134326, "loss": 4.727, "step": 55600 }, { "epoch": 0.07813726712765454, "grad_norm": 0.8860167860984802, "learning_rate": 0.00023432204580581704, "loss": 4.9694, "step": 55610 }, { "epoch": 0.07815131806581811, "grad_norm": 0.8654281497001648, "learning_rate": 0.00023436419839820147, "loss": 4.8643, "step": 55620 }, { "epoch": 0.07816536900398169, "grad_norm": 0.848604142665863, "learning_rate": 0.0002344063509905859, "loss": 4.9082, "step": 55630 }, { "epoch": 0.07817941994214526, "grad_norm": 0.8291561007499695, "learning_rate": 0.00023444850358297036, "loss": 4.9041, "step": 55640 }, { "epoch": 0.07819347088030884, "grad_norm": 0.8431739807128906, "learning_rate": 0.00023449065617535476, "loss": 4.8418, "step": 55650 }, { "epoch": 0.07820752181847242, "grad_norm": 0.8272885084152222, "learning_rate": 0.0002345328087677392, "loss": 4.7878, "step": 55660 }, { "epoch": 0.078221572756636, "grad_norm": 0.8414096236228943, "learning_rate": 0.00023457496136012362, "loss": 4.9341, "step": 55670 }, { "epoch": 0.07823562369479957, "grad_norm": 0.8919362425804138, "learning_rate": 0.00023461711395250805, "loss": 4.9505, "step": 55680 }, { "epoch": 0.07824967463296315, "grad_norm": 0.8313416242599487, "learning_rate": 0.0002346592665448925, "loss": 4.9542, "step": 55690 }, { "epoch": 0.07826372557112672, "grad_norm": 0.902398943901062, "learning_rate": 0.00023470141913727694, "loss": 4.8456, "step": 55700 }, { "epoch": 0.07827777650929031, "grad_norm": 0.8654336333274841, "learning_rate": 0.00023474357172966135, "loss": 4.7756, "step": 55710 }, { "epoch": 0.07829182744745389, "grad_norm": 0.848102867603302, "learning_rate": 0.00023478572432204578, "loss": 4.9353, "step": 55720 }, { "epoch": 0.07830587838561746, "grad_norm": 0.8511942028999329, "learning_rate": 0.0002348278769144302, "loss": 4.7061, "step": 55730 }, { "epoch": 0.07831992932378104, "grad_norm": 0.8495092988014221, "learning_rate": 0.00023487002950681464, "loss": 4.8499, "step": 55740 }, { "epoch": 0.07833398026194462, "grad_norm": 0.8534985780715942, "learning_rate": 0.0002349121820991991, "loss": 4.7557, "step": 55750 }, { "epoch": 0.07834803120010819, "grad_norm": 0.8438852429389954, "learning_rate": 0.00023495433469158353, "loss": 4.7647, "step": 55760 }, { "epoch": 0.07836208213827177, "grad_norm": 0.8534382581710815, "learning_rate": 0.00023499648728396793, "loss": 4.7887, "step": 55770 }, { "epoch": 0.07837613307643534, "grad_norm": 0.8537501096725464, "learning_rate": 0.00023503863987635237, "loss": 4.8719, "step": 55780 }, { "epoch": 0.07839018401459892, "grad_norm": 0.862980306148529, "learning_rate": 0.0002350807924687368, "loss": 4.8047, "step": 55790 }, { "epoch": 0.0784042349527625, "grad_norm": 0.8612253069877625, "learning_rate": 0.00023512294506112123, "loss": 4.8798, "step": 55800 }, { "epoch": 0.07841828589092607, "grad_norm": 0.8723065257072449, "learning_rate": 0.00023516509765350569, "loss": 4.8036, "step": 55810 }, { "epoch": 0.07843233682908966, "grad_norm": 0.8549647331237793, "learning_rate": 0.00023520725024589012, "loss": 4.9236, "step": 55820 }, { "epoch": 0.07844638776725324, "grad_norm": 0.8528120517730713, "learning_rate": 0.00023524940283827455, "loss": 4.8222, "step": 55830 }, { "epoch": 0.07846043870541682, "grad_norm": 0.8351045250892639, "learning_rate": 0.00023529155543065895, "loss": 4.8075, "step": 55840 }, { "epoch": 0.07847448964358039, "grad_norm": 0.8468853831291199, "learning_rate": 0.00023533370802304338, "loss": 4.7578, "step": 55850 }, { "epoch": 0.07848854058174397, "grad_norm": 0.9013736844062805, "learning_rate": 0.00023537586061542781, "loss": 4.7043, "step": 55860 }, { "epoch": 0.07850259151990754, "grad_norm": 0.8511090874671936, "learning_rate": 0.00023541801320781227, "loss": 4.8785, "step": 55870 }, { "epoch": 0.07851664245807112, "grad_norm": 0.8878459930419922, "learning_rate": 0.0002354601658001967, "loss": 4.8643, "step": 55880 }, { "epoch": 0.0785306933962347, "grad_norm": 0.873618483543396, "learning_rate": 0.00023550231839258114, "loss": 4.7879, "step": 55890 }, { "epoch": 0.07854474433439827, "grad_norm": 0.829479992389679, "learning_rate": 0.00023554447098496554, "loss": 4.6622, "step": 55900 }, { "epoch": 0.07855879527256185, "grad_norm": 0.8406625986099243, "learning_rate": 0.00023558662357734997, "loss": 4.972, "step": 55910 }, { "epoch": 0.07857284621072544, "grad_norm": 0.8553104400634766, "learning_rate": 0.0002356287761697344, "loss": 4.7771, "step": 55920 }, { "epoch": 0.07858689714888902, "grad_norm": 0.8392889499664307, "learning_rate": 0.00023567092876211886, "loss": 4.7373, "step": 55930 }, { "epoch": 0.07860094808705259, "grad_norm": 0.8430394530296326, "learning_rate": 0.0002357130813545033, "loss": 4.9143, "step": 55940 }, { "epoch": 0.07861499902521617, "grad_norm": 0.8386268615722656, "learning_rate": 0.00023575523394688772, "loss": 4.8825, "step": 55950 }, { "epoch": 0.07862904996337974, "grad_norm": 0.8521925210952759, "learning_rate": 0.00023579738653927215, "loss": 4.7222, "step": 55960 }, { "epoch": 0.07864310090154332, "grad_norm": 0.8255799412727356, "learning_rate": 0.00023583953913165656, "loss": 4.8464, "step": 55970 }, { "epoch": 0.0786571518397069, "grad_norm": 0.9146137237548828, "learning_rate": 0.000235881691724041, "loss": 4.7756, "step": 55980 }, { "epoch": 0.07867120277787047, "grad_norm": 0.9210695624351501, "learning_rate": 0.00023592384431642545, "loss": 4.7982, "step": 55990 }, { "epoch": 0.07868525371603405, "grad_norm": 0.8495543003082275, "learning_rate": 0.00023596599690880988, "loss": 4.6518, "step": 56000 }, { "epoch": 0.07869930465419762, "grad_norm": 0.8346798419952393, "learning_rate": 0.0002360081495011943, "loss": 4.8465, "step": 56010 }, { "epoch": 0.07871335559236121, "grad_norm": 0.8985382914543152, "learning_rate": 0.00023605030209357874, "loss": 4.7423, "step": 56020 }, { "epoch": 0.07872740653052479, "grad_norm": 0.8527060747146606, "learning_rate": 0.00023609245468596314, "loss": 4.9118, "step": 56030 }, { "epoch": 0.07874145746868837, "grad_norm": 0.8444516658782959, "learning_rate": 0.00023613460727834758, "loss": 4.9117, "step": 56040 }, { "epoch": 0.07875550840685194, "grad_norm": 0.879189133644104, "learning_rate": 0.00023617675987073203, "loss": 4.6753, "step": 56050 }, { "epoch": 0.07876955934501552, "grad_norm": 0.8668951392173767, "learning_rate": 0.00023621891246311647, "loss": 4.8826, "step": 56060 }, { "epoch": 0.0787836102831791, "grad_norm": 0.8374396562576294, "learning_rate": 0.0002362610650555009, "loss": 4.8564, "step": 56070 }, { "epoch": 0.07879766122134267, "grad_norm": 0.8434193730354309, "learning_rate": 0.00023630321764788533, "loss": 4.85, "step": 56080 }, { "epoch": 0.07881171215950625, "grad_norm": 0.8440457582473755, "learning_rate": 0.00023634537024026976, "loss": 4.8728, "step": 56090 }, { "epoch": 0.07882576309766982, "grad_norm": 0.8732414245605469, "learning_rate": 0.00023638752283265416, "loss": 4.9149, "step": 56100 }, { "epoch": 0.0788398140358334, "grad_norm": 0.8345431685447693, "learning_rate": 0.00023642967542503862, "loss": 4.9031, "step": 56110 }, { "epoch": 0.07885386497399698, "grad_norm": 0.8340375423431396, "learning_rate": 0.00023647182801742305, "loss": 4.8705, "step": 56120 }, { "epoch": 0.07886791591216057, "grad_norm": 0.8727152943611145, "learning_rate": 0.00023651398060980748, "loss": 4.8331, "step": 56130 }, { "epoch": 0.07888196685032414, "grad_norm": 0.8571959733963013, "learning_rate": 0.00023655613320219191, "loss": 4.8701, "step": 56140 }, { "epoch": 0.07889601778848772, "grad_norm": 0.8859211802482605, "learning_rate": 0.00023659828579457635, "loss": 4.7979, "step": 56150 }, { "epoch": 0.0789100687266513, "grad_norm": 0.8285291790962219, "learning_rate": 0.00023664043838696075, "loss": 4.862, "step": 56160 }, { "epoch": 0.07892411966481487, "grad_norm": 0.8508478403091431, "learning_rate": 0.0002366825909793452, "loss": 4.8707, "step": 56170 }, { "epoch": 0.07893817060297845, "grad_norm": 0.8419012427330017, "learning_rate": 0.00023672474357172964, "loss": 4.8833, "step": 56180 }, { "epoch": 0.07895222154114202, "grad_norm": 0.9277992844581604, "learning_rate": 0.00023676689616411407, "loss": 4.8056, "step": 56190 }, { "epoch": 0.0789662724793056, "grad_norm": 0.879784882068634, "learning_rate": 0.0002368090487564985, "loss": 4.9411, "step": 56200 }, { "epoch": 0.07898032341746918, "grad_norm": 0.8691972494125366, "learning_rate": 0.00023685120134888293, "loss": 4.7991, "step": 56210 }, { "epoch": 0.07899437435563275, "grad_norm": 0.8471894860267639, "learning_rate": 0.0002368933539412674, "loss": 4.6985, "step": 56220 }, { "epoch": 0.07900842529379634, "grad_norm": 0.8567607402801514, "learning_rate": 0.0002369355065336518, "loss": 4.8068, "step": 56230 }, { "epoch": 0.07902247623195992, "grad_norm": 0.8540003895759583, "learning_rate": 0.00023697765912603623, "loss": 4.6384, "step": 56240 }, { "epoch": 0.0790365271701235, "grad_norm": 0.886030912399292, "learning_rate": 0.00023701981171842066, "loss": 4.8265, "step": 56250 }, { "epoch": 0.07905057810828707, "grad_norm": 0.9057062268257141, "learning_rate": 0.0002370619643108051, "loss": 4.881, "step": 56260 }, { "epoch": 0.07906462904645065, "grad_norm": 0.8884521126747131, "learning_rate": 0.00023710411690318955, "loss": 4.7785, "step": 56270 }, { "epoch": 0.07907867998461422, "grad_norm": 0.8333665132522583, "learning_rate": 0.00023714626949557398, "loss": 4.83, "step": 56280 }, { "epoch": 0.0790927309227778, "grad_norm": 0.8531104922294617, "learning_rate": 0.00023718842208795838, "loss": 4.7392, "step": 56290 }, { "epoch": 0.07910678186094137, "grad_norm": 0.853764533996582, "learning_rate": 0.0002372305746803428, "loss": 4.8426, "step": 56300 }, { "epoch": 0.07912083279910495, "grad_norm": 0.8597157597541809, "learning_rate": 0.00023727272727272724, "loss": 4.9028, "step": 56310 }, { "epoch": 0.07913488373726853, "grad_norm": 0.8901702165603638, "learning_rate": 0.00023731487986511168, "loss": 4.7871, "step": 56320 }, { "epoch": 0.07914893467543212, "grad_norm": 0.8476951122283936, "learning_rate": 0.00023735703245749613, "loss": 4.7935, "step": 56330 }, { "epoch": 0.0791629856135957, "grad_norm": 0.8251456022262573, "learning_rate": 0.00023739918504988057, "loss": 4.8088, "step": 56340 }, { "epoch": 0.07917703655175927, "grad_norm": 0.8737718462944031, "learning_rate": 0.000237441337642265, "loss": 4.8629, "step": 56350 }, { "epoch": 0.07919108748992285, "grad_norm": 0.847468912601471, "learning_rate": 0.0002374834902346494, "loss": 4.8208, "step": 56360 }, { "epoch": 0.07920513842808642, "grad_norm": 0.8241137266159058, "learning_rate": 0.00023752564282703383, "loss": 4.7132, "step": 56370 }, { "epoch": 0.07921918936625, "grad_norm": 0.8659803867340088, "learning_rate": 0.00023756779541941826, "loss": 4.7646, "step": 56380 }, { "epoch": 0.07923324030441357, "grad_norm": 0.876525342464447, "learning_rate": 0.00023760994801180272, "loss": 4.8045, "step": 56390 }, { "epoch": 0.07924729124257715, "grad_norm": 0.8721202611923218, "learning_rate": 0.00023765210060418715, "loss": 4.8596, "step": 56400 }, { "epoch": 0.07926134218074073, "grad_norm": 0.8766302466392517, "learning_rate": 0.00023769425319657158, "loss": 4.8604, "step": 56410 }, { "epoch": 0.0792753931189043, "grad_norm": 0.8864064812660217, "learning_rate": 0.000237736405788956, "loss": 4.7736, "step": 56420 }, { "epoch": 0.07928944405706788, "grad_norm": 0.8010143041610718, "learning_rate": 0.00023777855838134042, "loss": 4.8483, "step": 56430 }, { "epoch": 0.07930349499523147, "grad_norm": 0.8585301637649536, "learning_rate": 0.00023782071097372485, "loss": 4.8176, "step": 56440 }, { "epoch": 0.07931754593339504, "grad_norm": 0.8217198848724365, "learning_rate": 0.0002378628635661093, "loss": 4.8178, "step": 56450 }, { "epoch": 0.07933159687155862, "grad_norm": 0.8594200015068054, "learning_rate": 0.00023790501615849374, "loss": 4.7902, "step": 56460 }, { "epoch": 0.0793456478097222, "grad_norm": 0.8369775414466858, "learning_rate": 0.00023794716875087817, "loss": 4.6128, "step": 56470 }, { "epoch": 0.07935969874788577, "grad_norm": 0.8443807363510132, "learning_rate": 0.00023798932134326257, "loss": 4.9548, "step": 56480 }, { "epoch": 0.07937374968604935, "grad_norm": 0.921536386013031, "learning_rate": 0.000238031473935647, "loss": 4.9101, "step": 56490 }, { "epoch": 0.07938780062421293, "grad_norm": 0.842056155204773, "learning_rate": 0.00023807362652803144, "loss": 4.825, "step": 56500 }, { "epoch": 0.0794018515623765, "grad_norm": 0.8614702224731445, "learning_rate": 0.0002381157791204159, "loss": 4.7713, "step": 56510 }, { "epoch": 0.07941590250054008, "grad_norm": 0.8810839056968689, "learning_rate": 0.00023815793171280033, "loss": 4.9506, "step": 56520 }, { "epoch": 0.07942995343870365, "grad_norm": 0.8495205640792847, "learning_rate": 0.00023820008430518476, "loss": 4.8596, "step": 56530 }, { "epoch": 0.07944400437686724, "grad_norm": 0.829397439956665, "learning_rate": 0.0002382422368975692, "loss": 4.7906, "step": 56540 }, { "epoch": 0.07945805531503082, "grad_norm": 0.8445591926574707, "learning_rate": 0.0002382843894899536, "loss": 4.8183, "step": 56550 }, { "epoch": 0.0794721062531944, "grad_norm": 0.8689804673194885, "learning_rate": 0.00023832654208233802, "loss": 4.9633, "step": 56560 }, { "epoch": 0.07948615719135797, "grad_norm": 0.8556300401687622, "learning_rate": 0.00023836869467472248, "loss": 4.8468, "step": 56570 }, { "epoch": 0.07950020812952155, "grad_norm": 0.9189597964286804, "learning_rate": 0.0002384108472671069, "loss": 4.7865, "step": 56580 }, { "epoch": 0.07951425906768513, "grad_norm": 0.8929227590560913, "learning_rate": 0.00023845299985949134, "loss": 4.9381, "step": 56590 }, { "epoch": 0.0795283100058487, "grad_norm": 0.8549538254737854, "learning_rate": 0.00023849515245187578, "loss": 4.8449, "step": 56600 }, { "epoch": 0.07954236094401228, "grad_norm": 0.8399584293365479, "learning_rate": 0.00023853730504426018, "loss": 4.8057, "step": 56610 }, { "epoch": 0.07955641188217585, "grad_norm": 0.8562041521072388, "learning_rate": 0.0002385794576366446, "loss": 4.7622, "step": 56620 }, { "epoch": 0.07957046282033943, "grad_norm": 0.8549632430076599, "learning_rate": 0.00023862161022902907, "loss": 4.7727, "step": 56630 }, { "epoch": 0.07958451375850302, "grad_norm": 0.8222755789756775, "learning_rate": 0.0002386637628214135, "loss": 4.8657, "step": 56640 }, { "epoch": 0.0795985646966666, "grad_norm": 0.8658560514450073, "learning_rate": 0.00023870591541379793, "loss": 4.8183, "step": 56650 }, { "epoch": 0.07961261563483017, "grad_norm": 0.8258546590805054, "learning_rate": 0.00023874806800618236, "loss": 4.8242, "step": 56660 }, { "epoch": 0.07962666657299375, "grad_norm": 0.8417488932609558, "learning_rate": 0.0002387902205985668, "loss": 4.7378, "step": 56670 }, { "epoch": 0.07964071751115732, "grad_norm": 0.8733723759651184, "learning_rate": 0.0002388323731909512, "loss": 4.7247, "step": 56680 }, { "epoch": 0.0796547684493209, "grad_norm": 0.8495579361915588, "learning_rate": 0.00023887452578333566, "loss": 4.8123, "step": 56690 }, { "epoch": 0.07966881938748448, "grad_norm": 0.8252209424972534, "learning_rate": 0.0002389166783757201, "loss": 4.791, "step": 56700 }, { "epoch": 0.07968287032564805, "grad_norm": 0.8873644471168518, "learning_rate": 0.00023895883096810452, "loss": 4.7452, "step": 56710 }, { "epoch": 0.07969692126381163, "grad_norm": 0.8369097113609314, "learning_rate": 0.00023900098356048895, "loss": 4.6592, "step": 56720 }, { "epoch": 0.0797109722019752, "grad_norm": 0.9035923480987549, "learning_rate": 0.00023904313615287338, "loss": 4.646, "step": 56730 }, { "epoch": 0.07972502314013878, "grad_norm": 0.87725430727005, "learning_rate": 0.00023908528874525778, "loss": 4.6869, "step": 56740 }, { "epoch": 0.07973907407830237, "grad_norm": 0.8985244631767273, "learning_rate": 0.00023912744133764224, "loss": 4.7142, "step": 56750 }, { "epoch": 0.07975312501646595, "grad_norm": 0.8276535868644714, "learning_rate": 0.00023916959393002667, "loss": 4.8682, "step": 56760 }, { "epoch": 0.07976717595462952, "grad_norm": 0.8182491660118103, "learning_rate": 0.0002392117465224111, "loss": 4.9084, "step": 56770 }, { "epoch": 0.0797812268927931, "grad_norm": 0.8240747451782227, "learning_rate": 0.00023925389911479554, "loss": 4.957, "step": 56780 }, { "epoch": 0.07979527783095668, "grad_norm": 0.8549389243125916, "learning_rate": 0.00023929605170717997, "loss": 4.8275, "step": 56790 }, { "epoch": 0.07980932876912025, "grad_norm": 0.8908678293228149, "learning_rate": 0.00023933820429956443, "loss": 4.9102, "step": 56800 }, { "epoch": 0.07982337970728383, "grad_norm": 0.8527509570121765, "learning_rate": 0.00023938035689194883, "loss": 4.9285, "step": 56810 }, { "epoch": 0.0798374306454474, "grad_norm": 0.8254045248031616, "learning_rate": 0.00023942250948433326, "loss": 4.8112, "step": 56820 }, { "epoch": 0.07985148158361098, "grad_norm": 0.850629448890686, "learning_rate": 0.0002394646620767177, "loss": 4.8258, "step": 56830 }, { "epoch": 0.07986553252177456, "grad_norm": 0.8825765252113342, "learning_rate": 0.00023950681466910212, "loss": 4.8338, "step": 56840 }, { "epoch": 0.07987958345993815, "grad_norm": 0.8650710582733154, "learning_rate": 0.00023954896726148658, "loss": 4.8267, "step": 56850 }, { "epoch": 0.07989363439810172, "grad_norm": 0.8440834283828735, "learning_rate": 0.000239591119853871, "loss": 4.9396, "step": 56860 }, { "epoch": 0.0799076853362653, "grad_norm": 0.8128529191017151, "learning_rate": 0.00023963327244625542, "loss": 4.9086, "step": 56870 }, { "epoch": 0.07992173627442888, "grad_norm": 0.9077975749969482, "learning_rate": 0.00023967542503863985, "loss": 4.7803, "step": 56880 }, { "epoch": 0.07993578721259245, "grad_norm": 0.9005282521247864, "learning_rate": 0.00023971757763102428, "loss": 4.8147, "step": 56890 }, { "epoch": 0.07994983815075603, "grad_norm": 0.8272696137428284, "learning_rate": 0.0002397597302234087, "loss": 4.7913, "step": 56900 }, { "epoch": 0.0799638890889196, "grad_norm": 0.8787935972213745, "learning_rate": 0.00023980188281579317, "loss": 4.817, "step": 56910 }, { "epoch": 0.07997794002708318, "grad_norm": 0.8999579548835754, "learning_rate": 0.0002398440354081776, "loss": 4.8105, "step": 56920 }, { "epoch": 0.07999199096524676, "grad_norm": 0.8166282773017883, "learning_rate": 0.00023988618800056203, "loss": 4.8415, "step": 56930 }, { "epoch": 0.08000604190341033, "grad_norm": 0.8887068033218384, "learning_rate": 0.00023992834059294644, "loss": 4.8513, "step": 56940 }, { "epoch": 0.08002009284157392, "grad_norm": 0.8588322997093201, "learning_rate": 0.00023997049318533087, "loss": 4.7216, "step": 56950 }, { "epoch": 0.0800341437797375, "grad_norm": 0.8577817678451538, "learning_rate": 0.0002400126457777153, "loss": 4.8628, "step": 56960 }, { "epoch": 0.08004819471790107, "grad_norm": 0.8334136605262756, "learning_rate": 0.00024005479837009976, "loss": 4.8069, "step": 56970 }, { "epoch": 0.08006224565606465, "grad_norm": 0.8446940779685974, "learning_rate": 0.0002400969509624842, "loss": 4.771, "step": 56980 }, { "epoch": 0.08007629659422823, "grad_norm": 0.817052960395813, "learning_rate": 0.00024013910355486862, "loss": 4.8963, "step": 56990 }, { "epoch": 0.0800903475323918, "grad_norm": 0.8921146392822266, "learning_rate": 0.00024018125614725302, "loss": 4.8283, "step": 57000 }, { "epoch": 0.08010439847055538, "grad_norm": 0.8406715989112854, "learning_rate": 0.00024022340873963745, "loss": 4.759, "step": 57010 }, { "epoch": 0.08011844940871896, "grad_norm": 0.8852546811103821, "learning_rate": 0.00024026556133202188, "loss": 4.8704, "step": 57020 }, { "epoch": 0.08013250034688253, "grad_norm": 0.8223075866699219, "learning_rate": 0.00024030771392440634, "loss": 4.7321, "step": 57030 }, { "epoch": 0.08014655128504611, "grad_norm": 0.8500157594680786, "learning_rate": 0.00024034986651679077, "loss": 4.9298, "step": 57040 }, { "epoch": 0.08016060222320968, "grad_norm": 0.8550832867622375, "learning_rate": 0.0002403920191091752, "loss": 4.8613, "step": 57050 }, { "epoch": 0.08017465316137327, "grad_norm": 0.9191502928733826, "learning_rate": 0.00024043417170155964, "loss": 4.6645, "step": 57060 }, { "epoch": 0.08018870409953685, "grad_norm": 0.8604241013526917, "learning_rate": 0.00024047632429394404, "loss": 4.7982, "step": 57070 }, { "epoch": 0.08020275503770043, "grad_norm": 0.8823716640472412, "learning_rate": 0.00024051847688632847, "loss": 4.7106, "step": 57080 }, { "epoch": 0.080216805975864, "grad_norm": 0.8595419526100159, "learning_rate": 0.00024056062947871293, "loss": 4.835, "step": 57090 }, { "epoch": 0.08023085691402758, "grad_norm": 0.8353159427642822, "learning_rate": 0.00024060278207109736, "loss": 4.7358, "step": 57100 }, { "epoch": 0.08024490785219116, "grad_norm": 0.8417918682098389, "learning_rate": 0.0002406449346634818, "loss": 4.9533, "step": 57110 }, { "epoch": 0.08025895879035473, "grad_norm": 0.8293863534927368, "learning_rate": 0.00024068708725586622, "loss": 4.9028, "step": 57120 }, { "epoch": 0.08027300972851831, "grad_norm": 0.8590785264968872, "learning_rate": 0.00024072923984825063, "loss": 4.8322, "step": 57130 }, { "epoch": 0.08028706066668188, "grad_norm": 0.8248816728591919, "learning_rate": 0.00024077139244063506, "loss": 4.7921, "step": 57140 }, { "epoch": 0.08030111160484546, "grad_norm": 0.8333460688591003, "learning_rate": 0.00024081354503301952, "loss": 4.9145, "step": 57150 }, { "epoch": 0.08031516254300905, "grad_norm": 0.8591793179512024, "learning_rate": 0.00024085569762540395, "loss": 4.8857, "step": 57160 }, { "epoch": 0.08032921348117263, "grad_norm": 0.8233816623687744, "learning_rate": 0.00024089785021778838, "loss": 4.8737, "step": 57170 }, { "epoch": 0.0803432644193362, "grad_norm": 0.8355622887611389, "learning_rate": 0.0002409400028101728, "loss": 4.7534, "step": 57180 }, { "epoch": 0.08035731535749978, "grad_norm": 0.8862990140914917, "learning_rate": 0.00024098215540255721, "loss": 4.952, "step": 57190 }, { "epoch": 0.08037136629566335, "grad_norm": 0.8540744781494141, "learning_rate": 0.00024102430799494165, "loss": 4.8611, "step": 57200 }, { "epoch": 0.08038541723382693, "grad_norm": 0.8337244987487793, "learning_rate": 0.0002410664605873261, "loss": 4.8597, "step": 57210 }, { "epoch": 0.0803994681719905, "grad_norm": 0.859093189239502, "learning_rate": 0.00024110861317971053, "loss": 4.7506, "step": 57220 }, { "epoch": 0.08041351911015408, "grad_norm": 0.8373620510101318, "learning_rate": 0.00024115076577209497, "loss": 4.7512, "step": 57230 }, { "epoch": 0.08042757004831766, "grad_norm": 0.9458513855934143, "learning_rate": 0.0002411929183644794, "loss": 4.849, "step": 57240 }, { "epoch": 0.08044162098648124, "grad_norm": 0.830441415309906, "learning_rate": 0.00024123507095686383, "loss": 4.8426, "step": 57250 }, { "epoch": 0.08045567192464483, "grad_norm": 0.8454274535179138, "learning_rate": 0.00024127722354924823, "loss": 4.931, "step": 57260 }, { "epoch": 0.0804697228628084, "grad_norm": 0.8949636816978455, "learning_rate": 0.0002413193761416327, "loss": 4.8015, "step": 57270 }, { "epoch": 0.08048377380097198, "grad_norm": 0.8301366567611694, "learning_rate": 0.00024136152873401712, "loss": 4.7237, "step": 57280 }, { "epoch": 0.08049782473913555, "grad_norm": 0.8346551060676575, "learning_rate": 0.00024140368132640155, "loss": 4.8849, "step": 57290 }, { "epoch": 0.08051187567729913, "grad_norm": 0.8517528772354126, "learning_rate": 0.00024144583391878598, "loss": 4.9193, "step": 57300 }, { "epoch": 0.0805259266154627, "grad_norm": 0.8681390285491943, "learning_rate": 0.00024148798651117042, "loss": 4.8459, "step": 57310 }, { "epoch": 0.08053997755362628, "grad_norm": 0.8334530591964722, "learning_rate": 0.00024153013910355482, "loss": 4.7559, "step": 57320 }, { "epoch": 0.08055402849178986, "grad_norm": 0.844562828540802, "learning_rate": 0.00024157229169593928, "loss": 4.7291, "step": 57330 }, { "epoch": 0.08056807942995343, "grad_norm": 0.8150333166122437, "learning_rate": 0.0002416144442883237, "loss": 4.8139, "step": 57340 }, { "epoch": 0.08058213036811701, "grad_norm": 0.8444922566413879, "learning_rate": 0.00024165659688070814, "loss": 4.8298, "step": 57350 }, { "epoch": 0.08059618130628059, "grad_norm": 0.832202672958374, "learning_rate": 0.00024169874947309257, "loss": 4.6788, "step": 57360 }, { "epoch": 0.08061023224444418, "grad_norm": 0.819246232509613, "learning_rate": 0.000241740902065477, "loss": 4.8065, "step": 57370 }, { "epoch": 0.08062428318260775, "grad_norm": 0.8348022103309631, "learning_rate": 0.00024178305465786146, "loss": 4.8509, "step": 57380 }, { "epoch": 0.08063833412077133, "grad_norm": 0.85079425573349, "learning_rate": 0.00024182520725024586, "loss": 4.7028, "step": 57390 }, { "epoch": 0.0806523850589349, "grad_norm": 0.8373984098434448, "learning_rate": 0.0002418673598426303, "loss": 4.7505, "step": 57400 }, { "epoch": 0.08066643599709848, "grad_norm": 0.8485515713691711, "learning_rate": 0.00024190951243501473, "loss": 4.8502, "step": 57410 }, { "epoch": 0.08068048693526206, "grad_norm": 0.8618393540382385, "learning_rate": 0.00024195166502739916, "loss": 4.8078, "step": 57420 }, { "epoch": 0.08069453787342563, "grad_norm": 0.8273009657859802, "learning_rate": 0.00024199381761978362, "loss": 4.8212, "step": 57430 }, { "epoch": 0.08070858881158921, "grad_norm": 0.8302605748176575, "learning_rate": 0.00024203597021216805, "loss": 4.6817, "step": 57440 }, { "epoch": 0.08072263974975279, "grad_norm": 0.8489684462547302, "learning_rate": 0.00024207812280455245, "loss": 4.79, "step": 57450 }, { "epoch": 0.08073669068791636, "grad_norm": 0.842799186706543, "learning_rate": 0.00024212027539693688, "loss": 4.8192, "step": 57460 }, { "epoch": 0.08075074162607995, "grad_norm": 0.8465150594711304, "learning_rate": 0.00024216242798932131, "loss": 4.8832, "step": 57470 }, { "epoch": 0.08076479256424353, "grad_norm": 0.8409611582756042, "learning_rate": 0.00024220458058170575, "loss": 4.7202, "step": 57480 }, { "epoch": 0.0807788435024071, "grad_norm": 0.8487250804901123, "learning_rate": 0.0002422467331740902, "loss": 4.748, "step": 57490 }, { "epoch": 0.08079289444057068, "grad_norm": 0.845812976360321, "learning_rate": 0.00024228888576647463, "loss": 4.7638, "step": 57500 }, { "epoch": 0.08080694537873426, "grad_norm": 0.8647302985191345, "learning_rate": 0.00024233103835885907, "loss": 4.8452, "step": 57510 }, { "epoch": 0.08082099631689783, "grad_norm": 0.8373607993125916, "learning_rate": 0.00024237319095124347, "loss": 4.8014, "step": 57520 }, { "epoch": 0.08083504725506141, "grad_norm": 0.8152607679367065, "learning_rate": 0.0002424153435436279, "loss": 4.8544, "step": 57530 }, { "epoch": 0.08084909819322499, "grad_norm": 0.8385706543922424, "learning_rate": 0.00024245749613601233, "loss": 4.8766, "step": 57540 }, { "epoch": 0.08086314913138856, "grad_norm": 0.8651167154312134, "learning_rate": 0.0002424996487283968, "loss": 4.8027, "step": 57550 }, { "epoch": 0.08087720006955214, "grad_norm": 0.837224543094635, "learning_rate": 0.00024254180132078122, "loss": 4.7422, "step": 57560 }, { "epoch": 0.08089125100771573, "grad_norm": 0.8273807168006897, "learning_rate": 0.00024258395391316565, "loss": 4.7841, "step": 57570 }, { "epoch": 0.0809053019458793, "grad_norm": 0.8563796281814575, "learning_rate": 0.00024262610650555006, "loss": 4.825, "step": 57580 }, { "epoch": 0.08091935288404288, "grad_norm": 0.8224353790283203, "learning_rate": 0.0002426682590979345, "loss": 4.8982, "step": 57590 }, { "epoch": 0.08093340382220646, "grad_norm": 0.8756199479103088, "learning_rate": 0.00024271041169031892, "loss": 4.7504, "step": 57600 }, { "epoch": 0.08094745476037003, "grad_norm": 0.8563529253005981, "learning_rate": 0.00024275256428270338, "loss": 4.8682, "step": 57610 }, { "epoch": 0.08096150569853361, "grad_norm": 0.8547507524490356, "learning_rate": 0.0002427947168750878, "loss": 4.8426, "step": 57620 }, { "epoch": 0.08097555663669719, "grad_norm": 0.8033429980278015, "learning_rate": 0.00024283686946747224, "loss": 4.8511, "step": 57630 }, { "epoch": 0.08098960757486076, "grad_norm": 0.8401547074317932, "learning_rate": 0.00024287902205985667, "loss": 4.7927, "step": 57640 }, { "epoch": 0.08100365851302434, "grad_norm": 0.8384739756584167, "learning_rate": 0.00024292117465224108, "loss": 4.8009, "step": 57650 }, { "epoch": 0.08101770945118791, "grad_norm": 0.840840220451355, "learning_rate": 0.0002429633272446255, "loss": 4.7733, "step": 57660 }, { "epoch": 0.08103176038935149, "grad_norm": 0.8522785902023315, "learning_rate": 0.00024300547983700996, "loss": 4.7992, "step": 57670 }, { "epoch": 0.08104581132751508, "grad_norm": 0.8566286563873291, "learning_rate": 0.0002430476324293944, "loss": 4.8891, "step": 57680 }, { "epoch": 0.08105986226567866, "grad_norm": 0.8864015340805054, "learning_rate": 0.00024308978502177883, "loss": 4.8248, "step": 57690 }, { "epoch": 0.08107391320384223, "grad_norm": 0.8461414575576782, "learning_rate": 0.00024313193761416326, "loss": 4.8703, "step": 57700 }, { "epoch": 0.08108796414200581, "grad_norm": 0.8322480916976929, "learning_rate": 0.00024317409020654766, "loss": 4.872, "step": 57710 }, { "epoch": 0.08110201508016938, "grad_norm": 0.8064298033714294, "learning_rate": 0.0002432162427989321, "loss": 4.9469, "step": 57720 }, { "epoch": 0.08111606601833296, "grad_norm": 0.8545445799827576, "learning_rate": 0.00024325839539131655, "loss": 4.8237, "step": 57730 }, { "epoch": 0.08113011695649654, "grad_norm": 0.8561162948608398, "learning_rate": 0.00024330054798370098, "loss": 4.8889, "step": 57740 }, { "epoch": 0.08114416789466011, "grad_norm": 0.862308919429779, "learning_rate": 0.00024334270057608541, "loss": 4.6817, "step": 57750 }, { "epoch": 0.08115821883282369, "grad_norm": 0.832358717918396, "learning_rate": 0.00024338485316846985, "loss": 4.819, "step": 57760 }, { "epoch": 0.08117226977098727, "grad_norm": 0.8506022095680237, "learning_rate": 0.00024342700576085428, "loss": 4.7971, "step": 57770 }, { "epoch": 0.08118632070915086, "grad_norm": 0.8205908536911011, "learning_rate": 0.00024346915835323868, "loss": 4.7174, "step": 57780 }, { "epoch": 0.08120037164731443, "grad_norm": 0.843183159828186, "learning_rate": 0.00024351131094562314, "loss": 4.8403, "step": 57790 }, { "epoch": 0.08121442258547801, "grad_norm": 0.8414803147315979, "learning_rate": 0.00024355346353800757, "loss": 4.7169, "step": 57800 }, { "epoch": 0.08122847352364158, "grad_norm": 0.8241091370582581, "learning_rate": 0.000243595616130392, "loss": 4.7469, "step": 57810 }, { "epoch": 0.08124252446180516, "grad_norm": 0.8284342288970947, "learning_rate": 0.00024363776872277643, "loss": 4.8104, "step": 57820 }, { "epoch": 0.08125657539996874, "grad_norm": 0.8515377044677734, "learning_rate": 0.00024367992131516086, "loss": 4.7973, "step": 57830 }, { "epoch": 0.08127062633813231, "grad_norm": 0.8532301187515259, "learning_rate": 0.00024372207390754527, "loss": 4.7666, "step": 57840 }, { "epoch": 0.08128467727629589, "grad_norm": 0.8152916431427002, "learning_rate": 0.00024376422649992973, "loss": 4.797, "step": 57850 }, { "epoch": 0.08129872821445946, "grad_norm": 0.834984540939331, "learning_rate": 0.00024380637909231416, "loss": 4.7428, "step": 57860 }, { "epoch": 0.08131277915262304, "grad_norm": 0.8661616444587708, "learning_rate": 0.0002438485316846986, "loss": 4.8754, "step": 57870 }, { "epoch": 0.08132683009078663, "grad_norm": 0.843447744846344, "learning_rate": 0.00024389068427708302, "loss": 5.0731, "step": 57880 }, { "epoch": 0.08134088102895021, "grad_norm": 0.8291600346565247, "learning_rate": 0.00024393283686946745, "loss": 4.8219, "step": 57890 }, { "epoch": 0.08135493196711378, "grad_norm": 0.8383052349090576, "learning_rate": 0.00024397077420261343, "loss": 4.7993, "step": 57900 }, { "epoch": 0.08136898290527736, "grad_norm": 0.8022859692573547, "learning_rate": 0.00024401292679499786, "loss": 4.8967, "step": 57910 }, { "epoch": 0.08138303384344094, "grad_norm": 0.8225985169410706, "learning_rate": 0.00024405507938738232, "loss": 4.7647, "step": 57920 }, { "epoch": 0.08139708478160451, "grad_norm": 0.8517218828201294, "learning_rate": 0.00024409723197976675, "loss": 4.7826, "step": 57930 }, { "epoch": 0.08141113571976809, "grad_norm": 0.8240352869033813, "learning_rate": 0.00024413938457215116, "loss": 4.7381, "step": 57940 }, { "epoch": 0.08142518665793166, "grad_norm": 0.8103241324424744, "learning_rate": 0.0002441815371645356, "loss": 4.7835, "step": 57950 }, { "epoch": 0.08143923759609524, "grad_norm": 0.8353750705718994, "learning_rate": 0.00024422368975692005, "loss": 4.8407, "step": 57960 }, { "epoch": 0.08145328853425882, "grad_norm": 0.8388403058052063, "learning_rate": 0.0002442658423493045, "loss": 4.8119, "step": 57970 }, { "epoch": 0.08146733947242239, "grad_norm": 0.8061336278915405, "learning_rate": 0.0002443079949416889, "loss": 4.7948, "step": 57980 }, { "epoch": 0.08148139041058598, "grad_norm": 0.9352821707725525, "learning_rate": 0.00024435014753407334, "loss": 4.8472, "step": 57990 }, { "epoch": 0.08149544134874956, "grad_norm": 0.8587878346443176, "learning_rate": 0.0002443923001264577, "loss": 4.8216, "step": 58000 }, { "epoch": 0.08150949228691313, "grad_norm": 0.8197136521339417, "learning_rate": 0.0002444344527188422, "loss": 4.7632, "step": 58010 }, { "epoch": 0.08152354322507671, "grad_norm": 0.8324862718582153, "learning_rate": 0.00024447660531122663, "loss": 4.8624, "step": 58020 }, { "epoch": 0.08153759416324029, "grad_norm": 0.8072025179862976, "learning_rate": 0.00024451875790361107, "loss": 4.8684, "step": 58030 }, { "epoch": 0.08155164510140386, "grad_norm": 0.8620959520339966, "learning_rate": 0.0002445609104959955, "loss": 4.8949, "step": 58040 }, { "epoch": 0.08156569603956744, "grad_norm": 0.8675084114074707, "learning_rate": 0.00024460306308837993, "loss": 4.7576, "step": 58050 }, { "epoch": 0.08157974697773102, "grad_norm": 0.8540964126586914, "learning_rate": 0.00024464521568076436, "loss": 4.7649, "step": 58060 }, { "epoch": 0.08159379791589459, "grad_norm": 0.8450266122817993, "learning_rate": 0.0002446873682731488, "loss": 4.8923, "step": 58070 }, { "epoch": 0.08160784885405817, "grad_norm": 0.8445958495140076, "learning_rate": 0.0002447295208655332, "loss": 4.7243, "step": 58080 }, { "epoch": 0.08162189979222176, "grad_norm": 0.823774516582489, "learning_rate": 0.00024477167345791765, "loss": 4.7552, "step": 58090 }, { "epoch": 0.08163595073038533, "grad_norm": 0.8373647928237915, "learning_rate": 0.0002448138260503021, "loss": 4.7746, "step": 58100 }, { "epoch": 0.08165000166854891, "grad_norm": 0.8526290655136108, "learning_rate": 0.0002448559786426865, "loss": 4.7328, "step": 58110 }, { "epoch": 0.08166405260671249, "grad_norm": 0.8044569492340088, "learning_rate": 0.00024489813123507095, "loss": 4.8296, "step": 58120 }, { "epoch": 0.08167810354487606, "grad_norm": 0.8177915811538696, "learning_rate": 0.0002449402838274554, "loss": 4.8325, "step": 58130 }, { "epoch": 0.08169215448303964, "grad_norm": 0.8297937512397766, "learning_rate": 0.0002449824364198398, "loss": 4.8649, "step": 58140 }, { "epoch": 0.08170620542120322, "grad_norm": 0.8474819660186768, "learning_rate": 0.00024502458901222424, "loss": 4.7662, "step": 58150 }, { "epoch": 0.08172025635936679, "grad_norm": 0.876619815826416, "learning_rate": 0.00024506674160460867, "loss": 4.8893, "step": 58160 }, { "epoch": 0.08173430729753037, "grad_norm": 0.8410592675209045, "learning_rate": 0.0002451088941969931, "loss": 4.7708, "step": 58170 }, { "epoch": 0.08174835823569394, "grad_norm": 0.8169021010398865, "learning_rate": 0.00024515104678937753, "loss": 4.892, "step": 58180 }, { "epoch": 0.08176240917385753, "grad_norm": 0.8390880823135376, "learning_rate": 0.00024519319938176196, "loss": 4.8694, "step": 58190 }, { "epoch": 0.08177646011202111, "grad_norm": 0.8502917885780334, "learning_rate": 0.0002452353519741464, "loss": 4.7237, "step": 58200 }, { "epoch": 0.08179051105018469, "grad_norm": 0.8233117461204529, "learning_rate": 0.0002452775045665308, "loss": 4.7619, "step": 58210 }, { "epoch": 0.08180456198834826, "grad_norm": 0.8351155519485474, "learning_rate": 0.00024531965715891526, "loss": 4.8656, "step": 58220 }, { "epoch": 0.08181861292651184, "grad_norm": 0.8331232666969299, "learning_rate": 0.0002453618097512997, "loss": 4.7766, "step": 58230 }, { "epoch": 0.08183266386467541, "grad_norm": 0.8282060623168945, "learning_rate": 0.0002454039623436841, "loss": 4.7699, "step": 58240 }, { "epoch": 0.08184671480283899, "grad_norm": 0.8700007200241089, "learning_rate": 0.00024544611493606855, "loss": 4.7489, "step": 58250 }, { "epoch": 0.08186076574100257, "grad_norm": 0.8399953842163086, "learning_rate": 0.000245488267528453, "loss": 4.7302, "step": 58260 }, { "epoch": 0.08187481667916614, "grad_norm": 0.8028297424316406, "learning_rate": 0.0002455304201208374, "loss": 4.9267, "step": 58270 }, { "epoch": 0.08188886761732972, "grad_norm": 0.8101634979248047, "learning_rate": 0.00024557257271322184, "loss": 4.8224, "step": 58280 }, { "epoch": 0.0819029185554933, "grad_norm": 0.8356708884239197, "learning_rate": 0.0002456147253056063, "loss": 4.7565, "step": 58290 }, { "epoch": 0.08191696949365689, "grad_norm": 0.8344118595123291, "learning_rate": 0.0002456568778979907, "loss": 4.812, "step": 58300 }, { "epoch": 0.08193102043182046, "grad_norm": 0.8281126022338867, "learning_rate": 0.00024569903049037514, "loss": 4.8933, "step": 58310 }, { "epoch": 0.08194507136998404, "grad_norm": 0.8315061926841736, "learning_rate": 0.00024574118308275957, "loss": 4.6364, "step": 58320 }, { "epoch": 0.08195912230814761, "grad_norm": 0.8570355772972107, "learning_rate": 0.000245783335675144, "loss": 4.8018, "step": 58330 }, { "epoch": 0.08197317324631119, "grad_norm": 0.8422298431396484, "learning_rate": 0.00024582548826752843, "loss": 4.7245, "step": 58340 }, { "epoch": 0.08198722418447477, "grad_norm": 0.8204920887947083, "learning_rate": 0.00024586764085991286, "loss": 4.7966, "step": 58350 }, { "epoch": 0.08200127512263834, "grad_norm": 0.8395097851753235, "learning_rate": 0.0002459097934522973, "loss": 4.7934, "step": 58360 }, { "epoch": 0.08201532606080192, "grad_norm": 0.8092932105064392, "learning_rate": 0.0002459519460446817, "loss": 4.812, "step": 58370 }, { "epoch": 0.0820293769989655, "grad_norm": 0.818016529083252, "learning_rate": 0.00024599409863706616, "loss": 4.8381, "step": 58380 }, { "epoch": 0.08204342793712907, "grad_norm": 0.866174042224884, "learning_rate": 0.0002460362512294506, "loss": 4.8195, "step": 58390 }, { "epoch": 0.08205747887529266, "grad_norm": 0.8038418292999268, "learning_rate": 0.000246078403821835, "loss": 4.8522, "step": 58400 }, { "epoch": 0.08207152981345624, "grad_norm": 0.8310818672180176, "learning_rate": 0.00024612055641421945, "loss": 4.8415, "step": 58410 }, { "epoch": 0.08208558075161981, "grad_norm": 0.8277154564857483, "learning_rate": 0.0002461627090066039, "loss": 4.738, "step": 58420 }, { "epoch": 0.08209963168978339, "grad_norm": 0.8364769220352173, "learning_rate": 0.0002462048615989883, "loss": 4.8246, "step": 58430 }, { "epoch": 0.08211368262794697, "grad_norm": 0.8868595361709595, "learning_rate": 0.00024624701419137274, "loss": 4.8204, "step": 58440 }, { "epoch": 0.08212773356611054, "grad_norm": 0.8325658440589905, "learning_rate": 0.0002462891667837572, "loss": 4.7931, "step": 58450 }, { "epoch": 0.08214178450427412, "grad_norm": 0.8351956605911255, "learning_rate": 0.0002463313193761416, "loss": 4.8991, "step": 58460 }, { "epoch": 0.0821558354424377, "grad_norm": 0.8221379518508911, "learning_rate": 0.00024637347196852604, "loss": 4.867, "step": 58470 }, { "epoch": 0.08216988638060127, "grad_norm": 0.827820897102356, "learning_rate": 0.00024641562456091047, "loss": 4.793, "step": 58480 }, { "epoch": 0.08218393731876485, "grad_norm": 0.902683436870575, "learning_rate": 0.0002464577771532949, "loss": 4.7763, "step": 58490 }, { "epoch": 0.08219798825692844, "grad_norm": 0.8596385717391968, "learning_rate": 0.00024649992974567933, "loss": 4.7973, "step": 58500 }, { "epoch": 0.08221203919509201, "grad_norm": 0.8017410635948181, "learning_rate": 0.00024654208233806376, "loss": 4.8217, "step": 58510 }, { "epoch": 0.08222609013325559, "grad_norm": 0.8284733891487122, "learning_rate": 0.0002465842349304482, "loss": 4.8301, "step": 58520 }, { "epoch": 0.08224014107141916, "grad_norm": 0.8326720595359802, "learning_rate": 0.0002466263875228326, "loss": 4.7582, "step": 58530 }, { "epoch": 0.08225419200958274, "grad_norm": 0.8211853504180908, "learning_rate": 0.00024666854011521705, "loss": 4.8276, "step": 58540 }, { "epoch": 0.08226824294774632, "grad_norm": 0.8203056454658508, "learning_rate": 0.0002467106927076015, "loss": 4.8199, "step": 58550 }, { "epoch": 0.0822822938859099, "grad_norm": 0.8489205837249756, "learning_rate": 0.0002467528452999859, "loss": 4.8442, "step": 58560 }, { "epoch": 0.08229634482407347, "grad_norm": 0.8325367569923401, "learning_rate": 0.00024679499789237035, "loss": 4.8388, "step": 58570 }, { "epoch": 0.08231039576223705, "grad_norm": 0.8521776795387268, "learning_rate": 0.0002468371504847548, "loss": 4.9109, "step": 58580 }, { "epoch": 0.08232444670040062, "grad_norm": 0.8650569915771484, "learning_rate": 0.0002468793030771392, "loss": 4.8815, "step": 58590 }, { "epoch": 0.0823384976385642, "grad_norm": 0.8365474343299866, "learning_rate": 0.00024692145566952364, "loss": 4.8285, "step": 58600 }, { "epoch": 0.08235254857672779, "grad_norm": 0.8498002886772156, "learning_rate": 0.00024696360826190807, "loss": 4.8788, "step": 58610 }, { "epoch": 0.08236659951489136, "grad_norm": 0.8518571257591248, "learning_rate": 0.0002470057608542925, "loss": 4.7712, "step": 58620 }, { "epoch": 0.08238065045305494, "grad_norm": 0.8263545632362366, "learning_rate": 0.00024704791344667694, "loss": 4.9409, "step": 58630 }, { "epoch": 0.08239470139121852, "grad_norm": 0.8218898177146912, "learning_rate": 0.0002470900660390614, "loss": 4.6849, "step": 58640 }, { "epoch": 0.08240875232938209, "grad_norm": 0.8200307488441467, "learning_rate": 0.0002471322186314458, "loss": 4.7979, "step": 58650 }, { "epoch": 0.08242280326754567, "grad_norm": 0.8574158549308777, "learning_rate": 0.00024717437122383023, "loss": 4.8704, "step": 58660 }, { "epoch": 0.08243685420570925, "grad_norm": 0.8805510401725769, "learning_rate": 0.00024721652381621466, "loss": 4.8024, "step": 58670 }, { "epoch": 0.08245090514387282, "grad_norm": 0.8428569436073303, "learning_rate": 0.0002472586764085991, "loss": 4.7948, "step": 58680 }, { "epoch": 0.0824649560820364, "grad_norm": 0.8235595226287842, "learning_rate": 0.0002473008290009835, "loss": 4.709, "step": 58690 }, { "epoch": 0.08247900702019997, "grad_norm": 0.8152335286140442, "learning_rate": 0.000247342981593368, "loss": 4.8758, "step": 58700 }, { "epoch": 0.08249305795836356, "grad_norm": 0.8103764653205872, "learning_rate": 0.0002473851341857524, "loss": 4.7552, "step": 58710 }, { "epoch": 0.08250710889652714, "grad_norm": 0.8165539503097534, "learning_rate": 0.0002474272867781368, "loss": 4.8179, "step": 58720 }, { "epoch": 0.08252115983469072, "grad_norm": 0.851290225982666, "learning_rate": 0.00024746943937052125, "loss": 4.9018, "step": 58730 }, { "epoch": 0.08253521077285429, "grad_norm": 0.8036884665489197, "learning_rate": 0.0002475115919629057, "loss": 4.8221, "step": 58740 }, { "epoch": 0.08254926171101787, "grad_norm": 0.8429344296455383, "learning_rate": 0.0002475537445552901, "loss": 4.8122, "step": 58750 }, { "epoch": 0.08256331264918144, "grad_norm": 0.8305610418319702, "learning_rate": 0.0002475958971476746, "loss": 4.7718, "step": 58760 }, { "epoch": 0.08257736358734502, "grad_norm": 0.8201099038124084, "learning_rate": 0.000247638049740059, "loss": 4.9241, "step": 58770 }, { "epoch": 0.0825914145255086, "grad_norm": 0.8135986328125, "learning_rate": 0.0002476802023324434, "loss": 4.8023, "step": 58780 }, { "epoch": 0.08260546546367217, "grad_norm": 0.8189361095428467, "learning_rate": 0.00024772235492482783, "loss": 4.8384, "step": 58790 }, { "epoch": 0.08261951640183575, "grad_norm": 0.8032187223434448, "learning_rate": 0.00024776450751721226, "loss": 4.8052, "step": 58800 }, { "epoch": 0.08263356733999934, "grad_norm": 0.8336382508277893, "learning_rate": 0.0002478066601095967, "loss": 4.755, "step": 58810 }, { "epoch": 0.08264761827816292, "grad_norm": 0.8608211278915405, "learning_rate": 0.0002478488127019812, "loss": 4.9017, "step": 58820 }, { "epoch": 0.08266166921632649, "grad_norm": 0.8575011491775513, "learning_rate": 0.0002478909652943656, "loss": 4.7826, "step": 58830 }, { "epoch": 0.08267572015449007, "grad_norm": 0.8528128266334534, "learning_rate": 0.00024793311788675, "loss": 4.8539, "step": 58840 }, { "epoch": 0.08268977109265364, "grad_norm": 0.8594315052032471, "learning_rate": 0.0002479752704791344, "loss": 4.7169, "step": 58850 }, { "epoch": 0.08270382203081722, "grad_norm": 0.8435611128807068, "learning_rate": 0.00024801742307151885, "loss": 4.7116, "step": 58860 }, { "epoch": 0.0827178729689808, "grad_norm": 0.8331279158592224, "learning_rate": 0.0002480595756639033, "loss": 4.933, "step": 58870 }, { "epoch": 0.08273192390714437, "grad_norm": 0.8534746766090393, "learning_rate": 0.00024810172825628777, "loss": 4.8714, "step": 58880 }, { "epoch": 0.08274597484530795, "grad_norm": 0.8149212002754211, "learning_rate": 0.0002481438808486722, "loss": 4.917, "step": 58890 }, { "epoch": 0.08276002578347152, "grad_norm": 0.8224017024040222, "learning_rate": 0.00024818603344105663, "loss": 4.7634, "step": 58900 }, { "epoch": 0.0827740767216351, "grad_norm": 0.8118835687637329, "learning_rate": 0.000248228186033441, "loss": 4.782, "step": 58910 }, { "epoch": 0.08278812765979869, "grad_norm": 0.8306914567947388, "learning_rate": 0.00024827033862582544, "loss": 4.8674, "step": 58920 }, { "epoch": 0.08280217859796227, "grad_norm": 0.8186638355255127, "learning_rate": 0.00024831249121820987, "loss": 4.8705, "step": 58930 }, { "epoch": 0.08281622953612584, "grad_norm": 0.8312268257141113, "learning_rate": 0.00024835464381059436, "loss": 4.8295, "step": 58940 }, { "epoch": 0.08283028047428942, "grad_norm": 0.8370024561882019, "learning_rate": 0.0002483967964029788, "loss": 4.8434, "step": 58950 }, { "epoch": 0.082844331412453, "grad_norm": 0.8207914233207703, "learning_rate": 0.0002484389489953632, "loss": 4.7712, "step": 58960 }, { "epoch": 0.08285838235061657, "grad_norm": 0.8279985785484314, "learning_rate": 0.0002484811015877476, "loss": 4.8821, "step": 58970 }, { "epoch": 0.08287243328878015, "grad_norm": 0.8251729011535645, "learning_rate": 0.000248523254180132, "loss": 4.6615, "step": 58980 }, { "epoch": 0.08288648422694372, "grad_norm": 0.8180922269821167, "learning_rate": 0.0002485654067725165, "loss": 4.778, "step": 58990 }, { "epoch": 0.0829005351651073, "grad_norm": 0.833755612373352, "learning_rate": 0.00024860755936490094, "loss": 4.8855, "step": 59000 }, { "epoch": 0.08291458610327088, "grad_norm": 0.8239709734916687, "learning_rate": 0.0002486497119572854, "loss": 4.7605, "step": 59010 }, { "epoch": 0.08292863704143447, "grad_norm": 0.8077985644340515, "learning_rate": 0.0002486918645496698, "loss": 4.8525, "step": 59020 }, { "epoch": 0.08294268797959804, "grad_norm": 0.795622706413269, "learning_rate": 0.0002487340171420542, "loss": 4.7966, "step": 59030 }, { "epoch": 0.08295673891776162, "grad_norm": 0.8494805693626404, "learning_rate": 0.0002487761697344386, "loss": 4.8679, "step": 59040 }, { "epoch": 0.0829707898559252, "grad_norm": 0.8523160219192505, "learning_rate": 0.0002488183223268231, "loss": 4.824, "step": 59050 }, { "epoch": 0.08298484079408877, "grad_norm": 0.8548047542572021, "learning_rate": 0.00024886047491920753, "loss": 4.7349, "step": 59060 }, { "epoch": 0.08299889173225235, "grad_norm": 0.8346095681190491, "learning_rate": 0.00024890262751159196, "loss": 4.858, "step": 59070 }, { "epoch": 0.08301294267041592, "grad_norm": 0.8644514679908752, "learning_rate": 0.0002489447801039764, "loss": 4.897, "step": 59080 }, { "epoch": 0.0830269936085795, "grad_norm": 0.8482093811035156, "learning_rate": 0.0002489869326963608, "loss": 4.8537, "step": 59090 }, { "epoch": 0.08304104454674308, "grad_norm": 0.8546515703201294, "learning_rate": 0.0002490290852887452, "loss": 4.7443, "step": 59100 }, { "epoch": 0.08305509548490665, "grad_norm": 0.8156818151473999, "learning_rate": 0.0002490712378811297, "loss": 4.7512, "step": 59110 }, { "epoch": 0.08306914642307024, "grad_norm": 0.8068788647651672, "learning_rate": 0.0002491133904735141, "loss": 4.8865, "step": 59120 }, { "epoch": 0.08308319736123382, "grad_norm": 0.8569709658622742, "learning_rate": 0.00024915554306589855, "loss": 4.8543, "step": 59130 }, { "epoch": 0.0830972482993974, "grad_norm": 0.8595927357673645, "learning_rate": 0.000249197695658283, "loss": 4.9016, "step": 59140 }, { "epoch": 0.08311129923756097, "grad_norm": 0.8086974024772644, "learning_rate": 0.0002492398482506674, "loss": 4.8776, "step": 59150 }, { "epoch": 0.08312535017572455, "grad_norm": 0.8013572096824646, "learning_rate": 0.0002492820008430518, "loss": 4.7825, "step": 59160 }, { "epoch": 0.08313940111388812, "grad_norm": 0.8331977725028992, "learning_rate": 0.00024932415343543627, "loss": 4.8848, "step": 59170 }, { "epoch": 0.0831534520520517, "grad_norm": 0.8577580451965332, "learning_rate": 0.0002493663060278207, "loss": 4.8004, "step": 59180 }, { "epoch": 0.08316750299021528, "grad_norm": 0.8222034573554993, "learning_rate": 0.00024940845862020513, "loss": 4.7706, "step": 59190 }, { "epoch": 0.08318155392837885, "grad_norm": 0.8116173148155212, "learning_rate": 0.00024945061121258957, "loss": 4.925, "step": 59200 }, { "epoch": 0.08319560486654243, "grad_norm": 0.8104360699653625, "learning_rate": 0.000249492763804974, "loss": 4.8693, "step": 59210 }, { "epoch": 0.083209655804706, "grad_norm": 0.8252411484718323, "learning_rate": 0.00024953491639735843, "loss": 4.7542, "step": 59220 }, { "epoch": 0.0832237067428696, "grad_norm": 0.8130877614021301, "learning_rate": 0.00024957706898974286, "loss": 4.8668, "step": 59230 }, { "epoch": 0.08323775768103317, "grad_norm": 0.8746052980422974, "learning_rate": 0.0002496192215821273, "loss": 4.732, "step": 59240 }, { "epoch": 0.08325180861919675, "grad_norm": 0.8369506001472473, "learning_rate": 0.0002496613741745117, "loss": 4.6947, "step": 59250 }, { "epoch": 0.08326585955736032, "grad_norm": 0.8340523838996887, "learning_rate": 0.00024970352676689615, "loss": 4.8168, "step": 59260 }, { "epoch": 0.0832799104955239, "grad_norm": 0.8198563456535339, "learning_rate": 0.0002497456793592806, "loss": 4.8669, "step": 59270 }, { "epoch": 0.08329396143368747, "grad_norm": 0.7938926219940186, "learning_rate": 0.000249787831951665, "loss": 4.9119, "step": 59280 }, { "epoch": 0.08330801237185105, "grad_norm": 0.8195042610168457, "learning_rate": 0.00024982998454404945, "loss": 4.7753, "step": 59290 }, { "epoch": 0.08332206331001463, "grad_norm": 0.786556601524353, "learning_rate": 0.0002498721371364339, "loss": 4.8474, "step": 59300 }, { "epoch": 0.0833361142481782, "grad_norm": 0.7999930381774902, "learning_rate": 0.0002499142897288183, "loss": 4.8823, "step": 59310 }, { "epoch": 0.08335016518634178, "grad_norm": 0.8049028515815735, "learning_rate": 0.00024995644232120274, "loss": 4.758, "step": 59320 }, { "epoch": 0.08336421612450537, "grad_norm": 0.8140138983726501, "learning_rate": 0.00024999859491358717, "loss": 4.8163, "step": 59330 }, { "epoch": 0.08337826706266895, "grad_norm": 0.7999250292778015, "learning_rate": 0.0002500407475059716, "loss": 4.8743, "step": 59340 }, { "epoch": 0.08339231800083252, "grad_norm": 0.8304258584976196, "learning_rate": 0.00025008290009835603, "loss": 4.8675, "step": 59350 }, { "epoch": 0.0834063689389961, "grad_norm": 0.8012201189994812, "learning_rate": 0.00025012505269074046, "loss": 4.8525, "step": 59360 }, { "epoch": 0.08342041987715967, "grad_norm": 0.7982044219970703, "learning_rate": 0.0002501672052831249, "loss": 4.8047, "step": 59370 }, { "epoch": 0.08343447081532325, "grad_norm": 0.8110559582710266, "learning_rate": 0.0002502093578755093, "loss": 4.83, "step": 59380 }, { "epoch": 0.08344852175348683, "grad_norm": 0.8042994141578674, "learning_rate": 0.00025025151046789376, "loss": 4.6822, "step": 59390 }, { "epoch": 0.0834625726916504, "grad_norm": 0.9221730828285217, "learning_rate": 0.0002502936630602782, "loss": 4.8834, "step": 59400 }, { "epoch": 0.08347662362981398, "grad_norm": 0.8320198059082031, "learning_rate": 0.0002503358156526626, "loss": 4.7958, "step": 59410 }, { "epoch": 0.08349067456797755, "grad_norm": 0.8428821563720703, "learning_rate": 0.00025037796824504705, "loss": 4.7707, "step": 59420 }, { "epoch": 0.08350472550614114, "grad_norm": 0.7867874503135681, "learning_rate": 0.0002504201208374315, "loss": 4.8291, "step": 59430 }, { "epoch": 0.08351877644430472, "grad_norm": 0.8074051737785339, "learning_rate": 0.0002504622734298159, "loss": 4.8084, "step": 59440 }, { "epoch": 0.0835328273824683, "grad_norm": 0.8142467141151428, "learning_rate": 0.00025050442602220034, "loss": 4.8289, "step": 59450 }, { "epoch": 0.08354687832063187, "grad_norm": 0.8267741203308105, "learning_rate": 0.0002505465786145848, "loss": 4.734, "step": 59460 }, { "epoch": 0.08356092925879545, "grad_norm": 0.786417543888092, "learning_rate": 0.0002505887312069692, "loss": 4.7357, "step": 59470 }, { "epoch": 0.08357498019695903, "grad_norm": 0.8515885472297668, "learning_rate": 0.00025063088379935364, "loss": 4.7698, "step": 59480 }, { "epoch": 0.0835890311351226, "grad_norm": 0.8577207922935486, "learning_rate": 0.00025067303639173807, "loss": 4.7342, "step": 59490 }, { "epoch": 0.08360308207328618, "grad_norm": 0.8472533226013184, "learning_rate": 0.0002507151889841225, "loss": 4.7212, "step": 59500 }, { "epoch": 0.08361713301144975, "grad_norm": 0.8497911691665649, "learning_rate": 0.00025075734157650693, "loss": 4.6883, "step": 59510 }, { "epoch": 0.08363118394961333, "grad_norm": 0.7981038689613342, "learning_rate": 0.00025079949416889136, "loss": 4.7996, "step": 59520 }, { "epoch": 0.0836452348877769, "grad_norm": 0.8073769807815552, "learning_rate": 0.0002508416467612758, "loss": 4.8362, "step": 59530 }, { "epoch": 0.0836592858259405, "grad_norm": 0.8505818247795105, "learning_rate": 0.0002508837993536602, "loss": 4.7964, "step": 59540 }, { "epoch": 0.08367333676410407, "grad_norm": 0.822891891002655, "learning_rate": 0.00025092595194604466, "loss": 4.9098, "step": 59550 }, { "epoch": 0.08368738770226765, "grad_norm": 0.8498794436454773, "learning_rate": 0.0002509681045384291, "loss": 4.8038, "step": 59560 }, { "epoch": 0.08370143864043122, "grad_norm": 0.8308042883872986, "learning_rate": 0.0002510102571308135, "loss": 4.8735, "step": 59570 }, { "epoch": 0.0837154895785948, "grad_norm": 0.8018273711204529, "learning_rate": 0.00025105240972319795, "loss": 4.908, "step": 59580 }, { "epoch": 0.08372954051675838, "grad_norm": 0.8206508159637451, "learning_rate": 0.0002510945623155824, "loss": 4.7587, "step": 59590 }, { "epoch": 0.08374359145492195, "grad_norm": 0.841090977191925, "learning_rate": 0.0002511367149079668, "loss": 4.7037, "step": 59600 }, { "epoch": 0.08375764239308553, "grad_norm": 0.7994279265403748, "learning_rate": 0.00025117886750035124, "loss": 4.7894, "step": 59610 }, { "epoch": 0.0837716933312491, "grad_norm": 0.7760536074638367, "learning_rate": 0.0002512210200927357, "loss": 4.8517, "step": 59620 }, { "epoch": 0.08378574426941268, "grad_norm": 0.8113006949424744, "learning_rate": 0.0002512631726851201, "loss": 4.9551, "step": 59630 }, { "epoch": 0.08379979520757627, "grad_norm": 0.8211884498596191, "learning_rate": 0.00025130532527750454, "loss": 4.9197, "step": 59640 }, { "epoch": 0.08381384614573985, "grad_norm": 0.8168864846229553, "learning_rate": 0.00025134747786988897, "loss": 4.8356, "step": 59650 }, { "epoch": 0.08382789708390342, "grad_norm": 0.8315194845199585, "learning_rate": 0.0002513896304622734, "loss": 4.7761, "step": 59660 }, { "epoch": 0.083841948022067, "grad_norm": 0.8301543593406677, "learning_rate": 0.00025143178305465783, "loss": 4.8829, "step": 59670 }, { "epoch": 0.08385599896023058, "grad_norm": 0.8475062251091003, "learning_rate": 0.00025147393564704226, "loss": 4.7885, "step": 59680 }, { "epoch": 0.08387004989839415, "grad_norm": 0.8310939073562622, "learning_rate": 0.0002515160882394267, "loss": 4.7185, "step": 59690 }, { "epoch": 0.08388410083655773, "grad_norm": 0.8392112851142883, "learning_rate": 0.0002515582408318111, "loss": 4.7873, "step": 59700 }, { "epoch": 0.0838981517747213, "grad_norm": 0.8230659365653992, "learning_rate": 0.00025160039342419556, "loss": 4.7909, "step": 59710 }, { "epoch": 0.08391220271288488, "grad_norm": 0.834247350692749, "learning_rate": 0.00025164254601658, "loss": 4.7701, "step": 59720 }, { "epoch": 0.08392625365104846, "grad_norm": 0.8113618493080139, "learning_rate": 0.0002516846986089644, "loss": 4.8853, "step": 59730 }, { "epoch": 0.08394030458921205, "grad_norm": 0.8198868632316589, "learning_rate": 0.00025172685120134885, "loss": 4.8926, "step": 59740 }, { "epoch": 0.08395435552737562, "grad_norm": 0.8316317200660706, "learning_rate": 0.0002517690037937333, "loss": 4.8605, "step": 59750 }, { "epoch": 0.0839684064655392, "grad_norm": 0.9213061332702637, "learning_rate": 0.0002518111563861177, "loss": 4.8161, "step": 59760 }, { "epoch": 0.08398245740370278, "grad_norm": 0.8067527413368225, "learning_rate": 0.00025185330897850214, "loss": 4.8305, "step": 59770 }, { "epoch": 0.08399650834186635, "grad_norm": 0.8328394293785095, "learning_rate": 0.0002518954615708866, "loss": 4.8528, "step": 59780 }, { "epoch": 0.08401055928002993, "grad_norm": 0.8063852190971375, "learning_rate": 0.000251937614163271, "loss": 4.7661, "step": 59790 }, { "epoch": 0.0840246102181935, "grad_norm": 0.8150149583816528, "learning_rate": 0.0002519797667556555, "loss": 4.6904, "step": 59800 }, { "epoch": 0.08403866115635708, "grad_norm": 0.8086451292037964, "learning_rate": 0.00025202191934803987, "loss": 4.8242, "step": 59810 }, { "epoch": 0.08405271209452066, "grad_norm": 0.8651347160339355, "learning_rate": 0.0002520640719404243, "loss": 4.7535, "step": 59820 }, { "epoch": 0.08406676303268423, "grad_norm": 0.8142220377922058, "learning_rate": 0.00025210622453280873, "loss": 4.8362, "step": 59830 }, { "epoch": 0.08408081397084781, "grad_norm": 0.8657679557800293, "learning_rate": 0.00025214837712519316, "loss": 4.7861, "step": 59840 }, { "epoch": 0.0840948649090114, "grad_norm": 0.87697833776474, "learning_rate": 0.0002521905297175776, "loss": 4.739, "step": 59850 }, { "epoch": 0.08410891584717498, "grad_norm": 0.870245099067688, "learning_rate": 0.0002522326823099621, "loss": 4.7173, "step": 59860 }, { "epoch": 0.08412296678533855, "grad_norm": 0.8389632701873779, "learning_rate": 0.00025227483490234645, "loss": 4.7693, "step": 59870 }, { "epoch": 0.08413701772350213, "grad_norm": 0.8237364888191223, "learning_rate": 0.0002523169874947309, "loss": 4.8892, "step": 59880 }, { "epoch": 0.0841510686616657, "grad_norm": 0.8023500442504883, "learning_rate": 0.0002523591400871153, "loss": 4.7923, "step": 59890 }, { "epoch": 0.08416511959982928, "grad_norm": 0.8338358402252197, "learning_rate": 0.00025240129267949975, "loss": 4.8156, "step": 59900 }, { "epoch": 0.08417917053799286, "grad_norm": 0.7917497158050537, "learning_rate": 0.00025243923001264573, "loss": 4.7169, "step": 59910 }, { "epoch": 0.08419322147615643, "grad_norm": 0.8231458067893982, "learning_rate": 0.00025248138260503016, "loss": 4.8951, "step": 59920 }, { "epoch": 0.08420727241432001, "grad_norm": 0.8315156102180481, "learning_rate": 0.0002525235351974146, "loss": 4.8144, "step": 59930 }, { "epoch": 0.08422132335248358, "grad_norm": 0.8496831059455872, "learning_rate": 0.0002525656877897991, "loss": 4.9156, "step": 59940 }, { "epoch": 0.08423537429064717, "grad_norm": 0.8502749800682068, "learning_rate": 0.0002526078403821835, "loss": 4.9349, "step": 59950 }, { "epoch": 0.08424942522881075, "grad_norm": 0.8212767243385315, "learning_rate": 0.00025264999297456794, "loss": 4.801, "step": 59960 }, { "epoch": 0.08426347616697433, "grad_norm": 0.8079051971435547, "learning_rate": 0.0002526921455669523, "loss": 4.821, "step": 59970 }, { "epoch": 0.0842775271051379, "grad_norm": 0.7926346063613892, "learning_rate": 0.00025273429815933675, "loss": 4.7747, "step": 59980 }, { "epoch": 0.08429157804330148, "grad_norm": 0.8248520493507385, "learning_rate": 0.0002527764507517212, "loss": 4.8311, "step": 59990 }, { "epoch": 0.08430562898146506, "grad_norm": 0.8126881122589111, "learning_rate": 0.00025281860334410566, "loss": 4.8472, "step": 60000 }, { "epoch": 0.08431967991962863, "grad_norm": 0.8101698160171509, "learning_rate": 0.0002528607559364901, "loss": 4.7401, "step": 60010 }, { "epoch": 0.08433373085779221, "grad_norm": 0.8176490664482117, "learning_rate": 0.00025290290852887453, "loss": 4.7447, "step": 60020 }, { "epoch": 0.08434778179595578, "grad_norm": 0.8406776189804077, "learning_rate": 0.00025294506112125896, "loss": 4.7898, "step": 60030 }, { "epoch": 0.08436183273411936, "grad_norm": 0.8245927691459656, "learning_rate": 0.00025298721371364334, "loss": 4.6811, "step": 60040 }, { "epoch": 0.08437588367228295, "grad_norm": 0.8441291451454163, "learning_rate": 0.00025302936630602777, "loss": 4.7632, "step": 60050 }, { "epoch": 0.08438993461044653, "grad_norm": 0.8327770829200745, "learning_rate": 0.00025307151889841225, "loss": 4.8536, "step": 60060 }, { "epoch": 0.0844039855486101, "grad_norm": 0.850288987159729, "learning_rate": 0.0002531136714907967, "loss": 4.8284, "step": 60070 }, { "epoch": 0.08441803648677368, "grad_norm": 0.7964315414428711, "learning_rate": 0.0002531558240831811, "loss": 4.9407, "step": 60080 }, { "epoch": 0.08443208742493725, "grad_norm": 0.8162838220596313, "learning_rate": 0.00025319797667556555, "loss": 4.8588, "step": 60090 }, { "epoch": 0.08444613836310083, "grad_norm": 0.8507556319236755, "learning_rate": 0.0002532401292679499, "loss": 4.7351, "step": 60100 }, { "epoch": 0.08446018930126441, "grad_norm": 0.7655229568481445, "learning_rate": 0.00025328228186033435, "loss": 4.9912, "step": 60110 }, { "epoch": 0.08447424023942798, "grad_norm": 0.8107126951217651, "learning_rate": 0.00025332443445271884, "loss": 4.7462, "step": 60120 }, { "epoch": 0.08448829117759156, "grad_norm": 0.8109584450721741, "learning_rate": 0.00025336658704510327, "loss": 4.8539, "step": 60130 }, { "epoch": 0.08450234211575514, "grad_norm": 0.8400088548660278, "learning_rate": 0.0002534087396374877, "loss": 4.9024, "step": 60140 }, { "epoch": 0.08451639305391871, "grad_norm": 0.8335424065589905, "learning_rate": 0.00025345089222987213, "loss": 4.8311, "step": 60150 }, { "epoch": 0.0845304439920823, "grad_norm": 0.8169661164283752, "learning_rate": 0.0002534930448222565, "loss": 4.8582, "step": 60160 }, { "epoch": 0.08454449493024588, "grad_norm": 0.7944623827934265, "learning_rate": 0.00025353519741464094, "loss": 4.7601, "step": 60170 }, { "epoch": 0.08455854586840945, "grad_norm": 0.8325213193893433, "learning_rate": 0.0002535773500070254, "loss": 4.819, "step": 60180 }, { "epoch": 0.08457259680657303, "grad_norm": 0.8035892844200134, "learning_rate": 0.00025361950259940986, "loss": 4.8387, "step": 60190 }, { "epoch": 0.0845866477447366, "grad_norm": 0.8531663417816162, "learning_rate": 0.0002536616551917943, "loss": 4.9727, "step": 60200 }, { "epoch": 0.08460069868290018, "grad_norm": 0.8064233660697937, "learning_rate": 0.0002537038077841787, "loss": 4.739, "step": 60210 }, { "epoch": 0.08461474962106376, "grad_norm": 0.8612556457519531, "learning_rate": 0.00025374596037656315, "loss": 4.763, "step": 60220 }, { "epoch": 0.08462880055922734, "grad_norm": 0.8533815145492554, "learning_rate": 0.0002537881129689476, "loss": 4.7813, "step": 60230 }, { "epoch": 0.08464285149739091, "grad_norm": 0.7971927523612976, "learning_rate": 0.000253830265561332, "loss": 4.7823, "step": 60240 }, { "epoch": 0.08465690243555449, "grad_norm": 0.8081861734390259, "learning_rate": 0.00025387241815371644, "loss": 4.7512, "step": 60250 }, { "epoch": 0.08467095337371808, "grad_norm": 0.8018068075180054, "learning_rate": 0.0002539145707461009, "loss": 4.7706, "step": 60260 }, { "epoch": 0.08468500431188165, "grad_norm": 0.8136337995529175, "learning_rate": 0.0002539567233384853, "loss": 4.7486, "step": 60270 }, { "epoch": 0.08469905525004523, "grad_norm": 0.837989091873169, "learning_rate": 0.00025399887593086974, "loss": 4.7813, "step": 60280 }, { "epoch": 0.0847131061882088, "grad_norm": 0.8240730166435242, "learning_rate": 0.00025404102852325417, "loss": 4.8329, "step": 60290 }, { "epoch": 0.08472715712637238, "grad_norm": 0.8388275504112244, "learning_rate": 0.0002540831811156386, "loss": 4.8039, "step": 60300 }, { "epoch": 0.08474120806453596, "grad_norm": 0.8347736597061157, "learning_rate": 0.00025412533370802303, "loss": 4.6845, "step": 60310 }, { "epoch": 0.08475525900269953, "grad_norm": 0.8188750147819519, "learning_rate": 0.00025416748630040746, "loss": 4.7612, "step": 60320 }, { "epoch": 0.08476930994086311, "grad_norm": 0.8329417109489441, "learning_rate": 0.0002542096388927919, "loss": 4.8708, "step": 60330 }, { "epoch": 0.08478336087902669, "grad_norm": 0.8301446437835693, "learning_rate": 0.0002542517914851763, "loss": 4.8245, "step": 60340 }, { "epoch": 0.08479741181719026, "grad_norm": 0.8130600452423096, "learning_rate": 0.00025429394407756076, "loss": 4.8549, "step": 60350 }, { "epoch": 0.08481146275535385, "grad_norm": 0.8038048148155212, "learning_rate": 0.0002543360966699452, "loss": 4.8175, "step": 60360 }, { "epoch": 0.08482551369351743, "grad_norm": 0.7973541021347046, "learning_rate": 0.0002543782492623296, "loss": 4.852, "step": 60370 }, { "epoch": 0.084839564631681, "grad_norm": 0.8406749367713928, "learning_rate": 0.00025442040185471405, "loss": 4.8618, "step": 60380 }, { "epoch": 0.08485361556984458, "grad_norm": 0.8773977160453796, "learning_rate": 0.0002544625544470985, "loss": 4.7474, "step": 60390 }, { "epoch": 0.08486766650800816, "grad_norm": 0.7995550632476807, "learning_rate": 0.0002545047070394829, "loss": 4.7697, "step": 60400 }, { "epoch": 0.08488171744617173, "grad_norm": 0.8282455801963806, "learning_rate": 0.00025454685963186734, "loss": 4.8029, "step": 60410 }, { "epoch": 0.08489576838433531, "grad_norm": 0.8432449102401733, "learning_rate": 0.0002545890122242518, "loss": 4.872, "step": 60420 }, { "epoch": 0.08490981932249889, "grad_norm": 0.8184887170791626, "learning_rate": 0.0002546311648166362, "loss": 4.7083, "step": 60430 }, { "epoch": 0.08492387026066246, "grad_norm": 0.7973591685295105, "learning_rate": 0.00025467331740902064, "loss": 4.8724, "step": 60440 }, { "epoch": 0.08493792119882604, "grad_norm": 0.8287274837493896, "learning_rate": 0.00025471547000140507, "loss": 4.9017, "step": 60450 }, { "epoch": 0.08495197213698961, "grad_norm": 0.8042613863945007, "learning_rate": 0.0002547576225937895, "loss": 4.7804, "step": 60460 }, { "epoch": 0.0849660230751532, "grad_norm": 0.8201180100440979, "learning_rate": 0.00025479977518617393, "loss": 4.8187, "step": 60470 }, { "epoch": 0.08498007401331678, "grad_norm": 0.8212829232215881, "learning_rate": 0.00025484192777855836, "loss": 4.8745, "step": 60480 }, { "epoch": 0.08499412495148036, "grad_norm": 0.8027827143669128, "learning_rate": 0.0002548840803709428, "loss": 4.7172, "step": 60490 }, { "epoch": 0.08500817588964393, "grad_norm": 1.7494938373565674, "learning_rate": 0.0002549262329633272, "loss": 4.827, "step": 60500 }, { "epoch": 0.08502222682780751, "grad_norm": 0.8321921825408936, "learning_rate": 0.00025496838555571165, "loss": 4.7602, "step": 60510 }, { "epoch": 0.08503627776597109, "grad_norm": 0.8253518342971802, "learning_rate": 0.0002550105381480961, "loss": 4.7594, "step": 60520 }, { "epoch": 0.08505032870413466, "grad_norm": 0.8793212175369263, "learning_rate": 0.0002550526907404805, "loss": 4.7981, "step": 60530 }, { "epoch": 0.08506437964229824, "grad_norm": 0.8195023536682129, "learning_rate": 0.00025509484333286495, "loss": 4.7146, "step": 60540 }, { "epoch": 0.08507843058046181, "grad_norm": 0.8089900016784668, "learning_rate": 0.0002551369959252494, "loss": 4.835, "step": 60550 }, { "epoch": 0.08509248151862539, "grad_norm": 0.798321008682251, "learning_rate": 0.0002551791485176338, "loss": 4.9411, "step": 60560 }, { "epoch": 0.08510653245678898, "grad_norm": 0.7749428153038025, "learning_rate": 0.00025522130111001824, "loss": 4.7919, "step": 60570 }, { "epoch": 0.08512058339495256, "grad_norm": 0.832476019859314, "learning_rate": 0.00025526345370240267, "loss": 4.8104, "step": 60580 }, { "epoch": 0.08513463433311613, "grad_norm": 0.8051169514656067, "learning_rate": 0.0002553056062947871, "loss": 4.7459, "step": 60590 }, { "epoch": 0.08514868527127971, "grad_norm": 0.8199285268783569, "learning_rate": 0.00025534775888717153, "loss": 4.7434, "step": 60600 }, { "epoch": 0.08516273620944328, "grad_norm": 0.820178210735321, "learning_rate": 0.00025538991147955597, "loss": 4.8498, "step": 60610 }, { "epoch": 0.08517678714760686, "grad_norm": 0.8192396759986877, "learning_rate": 0.0002554320640719404, "loss": 4.8455, "step": 60620 }, { "epoch": 0.08519083808577044, "grad_norm": 0.8456838130950928, "learning_rate": 0.00025547421666432483, "loss": 4.9442, "step": 60630 }, { "epoch": 0.08520488902393401, "grad_norm": 0.7862314581871033, "learning_rate": 0.00025551636925670926, "loss": 4.8109, "step": 60640 }, { "epoch": 0.08521893996209759, "grad_norm": 0.781730592250824, "learning_rate": 0.0002555585218490937, "loss": 4.8463, "step": 60650 }, { "epoch": 0.08523299090026117, "grad_norm": 0.8605624437332153, "learning_rate": 0.0002556006744414781, "loss": 4.7406, "step": 60660 }, { "epoch": 0.08524704183842476, "grad_norm": 0.798784613609314, "learning_rate": 0.00025564282703386255, "loss": 4.9813, "step": 60670 }, { "epoch": 0.08526109277658833, "grad_norm": 0.8223904967308044, "learning_rate": 0.000255684979626247, "loss": 4.9145, "step": 60680 }, { "epoch": 0.08527514371475191, "grad_norm": 0.8261276483535767, "learning_rate": 0.0002557271322186314, "loss": 4.815, "step": 60690 }, { "epoch": 0.08528919465291548, "grad_norm": 0.7942939400672913, "learning_rate": 0.00025576928481101585, "loss": 4.7418, "step": 60700 }, { "epoch": 0.08530324559107906, "grad_norm": 0.7955240607261658, "learning_rate": 0.0002558114374034003, "loss": 4.7581, "step": 60710 }, { "epoch": 0.08531729652924264, "grad_norm": 0.8077800273895264, "learning_rate": 0.0002558535899957847, "loss": 4.8254, "step": 60720 }, { "epoch": 0.08533134746740621, "grad_norm": 0.8321365714073181, "learning_rate": 0.00025589574258816914, "loss": 4.8636, "step": 60730 }, { "epoch": 0.08534539840556979, "grad_norm": 0.8244563341140747, "learning_rate": 0.0002559378951805536, "loss": 4.8733, "step": 60740 }, { "epoch": 0.08535944934373337, "grad_norm": 0.8096931576728821, "learning_rate": 0.000255980047772938, "loss": 4.8622, "step": 60750 }, { "epoch": 0.08537350028189694, "grad_norm": 0.8346720337867737, "learning_rate": 0.00025602220036532243, "loss": 4.8147, "step": 60760 }, { "epoch": 0.08538755122006053, "grad_norm": 0.7969753742218018, "learning_rate": 0.00025606435295770686, "loss": 4.803, "step": 60770 }, { "epoch": 0.08540160215822411, "grad_norm": 0.7898983359336853, "learning_rate": 0.0002561065055500913, "loss": 4.7337, "step": 60780 }, { "epoch": 0.08541565309638768, "grad_norm": 0.815297544002533, "learning_rate": 0.0002561486581424757, "loss": 4.8547, "step": 60790 }, { "epoch": 0.08542970403455126, "grad_norm": 0.8263185620307922, "learning_rate": 0.0002561908107348602, "loss": 4.7106, "step": 60800 }, { "epoch": 0.08544375497271484, "grad_norm": 0.7924615740776062, "learning_rate": 0.0002562329633272446, "loss": 4.628, "step": 60810 }, { "epoch": 0.08545780591087841, "grad_norm": 0.8533799052238464, "learning_rate": 0.000256275115919629, "loss": 4.8976, "step": 60820 }, { "epoch": 0.08547185684904199, "grad_norm": 0.7679029703140259, "learning_rate": 0.00025631726851201345, "loss": 4.8269, "step": 60830 }, { "epoch": 0.08548590778720556, "grad_norm": 0.9749653935432434, "learning_rate": 0.0002563594211043979, "loss": 4.8443, "step": 60840 }, { "epoch": 0.08549995872536914, "grad_norm": 0.8148539066314697, "learning_rate": 0.0002564015736967823, "loss": 4.8212, "step": 60850 }, { "epoch": 0.08551400966353272, "grad_norm": 0.8042306900024414, "learning_rate": 0.0002564437262891668, "loss": 4.9523, "step": 60860 }, { "epoch": 0.08552806060169629, "grad_norm": 0.8610725998878479, "learning_rate": 0.0002564858788815512, "loss": 4.822, "step": 60870 }, { "epoch": 0.08554211153985988, "grad_norm": 0.8803948760032654, "learning_rate": 0.0002565280314739356, "loss": 4.7024, "step": 60880 }, { "epoch": 0.08555616247802346, "grad_norm": 0.805877149105072, "learning_rate": 0.00025657018406632004, "loss": 4.7908, "step": 60890 }, { "epoch": 0.08557021341618704, "grad_norm": 0.8154386878013611, "learning_rate": 0.00025661233665870447, "loss": 4.9642, "step": 60900 }, { "epoch": 0.08558426435435061, "grad_norm": 0.8559610843658447, "learning_rate": 0.0002566544892510889, "loss": 4.7765, "step": 60910 }, { "epoch": 0.08559831529251419, "grad_norm": 0.8591551184654236, "learning_rate": 0.0002566966418434734, "loss": 4.7464, "step": 60920 }, { "epoch": 0.08561236623067776, "grad_norm": 0.8162509202957153, "learning_rate": 0.0002567387944358578, "loss": 4.7748, "step": 60930 }, { "epoch": 0.08562641716884134, "grad_norm": 0.8140776753425598, "learning_rate": 0.0002567809470282422, "loss": 4.888, "step": 60940 }, { "epoch": 0.08564046810700492, "grad_norm": 0.8785784244537354, "learning_rate": 0.0002568230996206266, "loss": 4.9443, "step": 60950 }, { "epoch": 0.08565451904516849, "grad_norm": 0.7831446528434753, "learning_rate": 0.00025686525221301106, "loss": 4.9672, "step": 60960 }, { "epoch": 0.08566856998333207, "grad_norm": 0.8083533048629761, "learning_rate": 0.0002569074048053955, "loss": 4.7751, "step": 60970 }, { "epoch": 0.08568262092149566, "grad_norm": 0.8388922810554504, "learning_rate": 0.00025694955739778, "loss": 4.8792, "step": 60980 }, { "epoch": 0.08569667185965923, "grad_norm": 0.8060362339019775, "learning_rate": 0.0002569917099901644, "loss": 4.796, "step": 60990 }, { "epoch": 0.08571072279782281, "grad_norm": 0.8279473185539246, "learning_rate": 0.0002570338625825488, "loss": 4.7601, "step": 61000 }, { "epoch": 0.08572477373598639, "grad_norm": 0.8248935341835022, "learning_rate": 0.0002570760151749332, "loss": 4.8548, "step": 61010 }, { "epoch": 0.08573882467414996, "grad_norm": 0.8512712717056274, "learning_rate": 0.00025711816776731764, "loss": 4.7709, "step": 61020 }, { "epoch": 0.08575287561231354, "grad_norm": 0.7810263633728027, "learning_rate": 0.0002571603203597021, "loss": 4.8958, "step": 61030 }, { "epoch": 0.08576692655047712, "grad_norm": 0.8753952980041504, "learning_rate": 0.00025720247295208656, "loss": 4.6734, "step": 61040 }, { "epoch": 0.08578097748864069, "grad_norm": 0.8158033490180969, "learning_rate": 0.000257244625544471, "loss": 4.8609, "step": 61050 }, { "epoch": 0.08579502842680427, "grad_norm": 0.8370834589004517, "learning_rate": 0.0002572867781368554, "loss": 4.8195, "step": 61060 }, { "epoch": 0.08580907936496784, "grad_norm": 0.792068362236023, "learning_rate": 0.0002573289307292398, "loss": 4.7818, "step": 61070 }, { "epoch": 0.08582313030313143, "grad_norm": 0.8027522563934326, "learning_rate": 0.00025737108332162423, "loss": 4.7769, "step": 61080 }, { "epoch": 0.08583718124129501, "grad_norm": 0.8090192079544067, "learning_rate": 0.00025741323591400866, "loss": 4.6341, "step": 61090 }, { "epoch": 0.08585123217945859, "grad_norm": 0.7874997854232788, "learning_rate": 0.00025745538850639315, "loss": 4.9727, "step": 61100 }, { "epoch": 0.08586528311762216, "grad_norm": 0.7994272708892822, "learning_rate": 0.0002574975410987776, "loss": 4.7984, "step": 61110 }, { "epoch": 0.08587933405578574, "grad_norm": 0.8127740025520325, "learning_rate": 0.000257539693691162, "loss": 4.8371, "step": 61120 }, { "epoch": 0.08589338499394931, "grad_norm": 0.7839046120643616, "learning_rate": 0.0002575818462835464, "loss": 4.9259, "step": 61130 }, { "epoch": 0.08590743593211289, "grad_norm": 0.812605619430542, "learning_rate": 0.0002576239988759308, "loss": 4.7804, "step": 61140 }, { "epoch": 0.08592148687027647, "grad_norm": 0.8251674175262451, "learning_rate": 0.00025766615146831525, "loss": 4.7948, "step": 61150 }, { "epoch": 0.08593553780844004, "grad_norm": 0.7972586750984192, "learning_rate": 0.00025770830406069973, "loss": 4.7815, "step": 61160 }, { "epoch": 0.08594958874660362, "grad_norm": 0.8652247190475464, "learning_rate": 0.00025775045665308417, "loss": 4.9054, "step": 61170 }, { "epoch": 0.0859636396847672, "grad_norm": 0.7779868245124817, "learning_rate": 0.00025778839398623015, "loss": 4.8678, "step": 61180 }, { "epoch": 0.08597769062293079, "grad_norm": 0.8390598893165588, "learning_rate": 0.0002578305465786146, "loss": 4.796, "step": 61190 }, { "epoch": 0.08599174156109436, "grad_norm": 0.7976048588752747, "learning_rate": 0.000257872699170999, "loss": 4.8985, "step": 61200 }, { "epoch": 0.08600579249925794, "grad_norm": 0.8002855777740479, "learning_rate": 0.00025791485176338344, "loss": 4.8251, "step": 61210 }, { "epoch": 0.08601984343742151, "grad_norm": 0.787406861782074, "learning_rate": 0.00025795700435576787, "loss": 4.8762, "step": 61220 }, { "epoch": 0.08603389437558509, "grad_norm": 0.8275022506713867, "learning_rate": 0.00025799915694815225, "loss": 4.8124, "step": 61230 }, { "epoch": 0.08604794531374867, "grad_norm": 0.8134986162185669, "learning_rate": 0.00025804130954053674, "loss": 4.6739, "step": 61240 }, { "epoch": 0.08606199625191224, "grad_norm": 0.8022597432136536, "learning_rate": 0.00025808346213292117, "loss": 4.8794, "step": 61250 }, { "epoch": 0.08607604719007582, "grad_norm": 0.789439857006073, "learning_rate": 0.0002581256147253056, "loss": 4.8165, "step": 61260 }, { "epoch": 0.0860900981282394, "grad_norm": 0.7730726003646851, "learning_rate": 0.00025816776731769003, "loss": 4.8367, "step": 61270 }, { "epoch": 0.08610414906640297, "grad_norm": 0.8278162479400635, "learning_rate": 0.00025820991991007446, "loss": 4.6802, "step": 61280 }, { "epoch": 0.08611820000456656, "grad_norm": 0.8260532021522522, "learning_rate": 0.00025825207250245884, "loss": 4.7331, "step": 61290 }, { "epoch": 0.08613225094273014, "grad_norm": 0.8389633297920227, "learning_rate": 0.0002582942250948433, "loss": 4.8219, "step": 61300 }, { "epoch": 0.08614630188089371, "grad_norm": 0.7943553328514099, "learning_rate": 0.00025833637768722775, "loss": 4.9142, "step": 61310 }, { "epoch": 0.08616035281905729, "grad_norm": 0.815380334854126, "learning_rate": 0.0002583785302796122, "loss": 4.7978, "step": 61320 }, { "epoch": 0.08617440375722087, "grad_norm": 0.7899944186210632, "learning_rate": 0.0002584206828719966, "loss": 4.7829, "step": 61330 }, { "epoch": 0.08618845469538444, "grad_norm": 0.8026753664016724, "learning_rate": 0.00025846283546438105, "loss": 4.7086, "step": 61340 }, { "epoch": 0.08620250563354802, "grad_norm": 0.8403603434562683, "learning_rate": 0.0002585049880567655, "loss": 4.8447, "step": 61350 }, { "epoch": 0.0862165565717116, "grad_norm": 0.7885310649871826, "learning_rate": 0.0002585471406491499, "loss": 4.818, "step": 61360 }, { "epoch": 0.08623060750987517, "grad_norm": 0.7821835279464722, "learning_rate": 0.00025858929324153434, "loss": 4.7951, "step": 61370 }, { "epoch": 0.08624465844803875, "grad_norm": 0.8090482354164124, "learning_rate": 0.00025863144583391877, "loss": 4.7686, "step": 61380 }, { "epoch": 0.08625870938620234, "grad_norm": 0.8011608719825745, "learning_rate": 0.0002586735984263032, "loss": 4.8789, "step": 61390 }, { "epoch": 0.08627276032436591, "grad_norm": 0.7893279194831848, "learning_rate": 0.00025871575101868763, "loss": 4.8815, "step": 61400 }, { "epoch": 0.08628681126252949, "grad_norm": 0.8958577513694763, "learning_rate": 0.00025875790361107206, "loss": 4.6991, "step": 61410 }, { "epoch": 0.08630086220069307, "grad_norm": 0.8177340030670166, "learning_rate": 0.0002588000562034565, "loss": 4.8222, "step": 61420 }, { "epoch": 0.08631491313885664, "grad_norm": 0.8018973469734192, "learning_rate": 0.00025884220879584093, "loss": 4.9089, "step": 61430 }, { "epoch": 0.08632896407702022, "grad_norm": 0.7807876467704773, "learning_rate": 0.00025888436138822536, "loss": 4.826, "step": 61440 }, { "epoch": 0.0863430150151838, "grad_norm": 0.8257559537887573, "learning_rate": 0.0002589265139806098, "loss": 4.8224, "step": 61450 }, { "epoch": 0.08635706595334737, "grad_norm": 0.8308781385421753, "learning_rate": 0.0002589686665729942, "loss": 4.6974, "step": 61460 }, { "epoch": 0.08637111689151095, "grad_norm": 0.7814127206802368, "learning_rate": 0.00025901081916537865, "loss": 4.8122, "step": 61470 }, { "epoch": 0.08638516782967452, "grad_norm": 0.7803528904914856, "learning_rate": 0.0002590529717577631, "loss": 4.8447, "step": 61480 }, { "epoch": 0.0863992187678381, "grad_norm": 0.8149372935295105, "learning_rate": 0.0002590951243501475, "loss": 4.7555, "step": 61490 }, { "epoch": 0.08641326970600169, "grad_norm": 0.8497858047485352, "learning_rate": 0.00025913727694253195, "loss": 4.7493, "step": 61500 }, { "epoch": 0.08642732064416526, "grad_norm": 0.8034449219703674, "learning_rate": 0.0002591794295349164, "loss": 4.8812, "step": 61510 }, { "epoch": 0.08644137158232884, "grad_norm": 0.8024821877479553, "learning_rate": 0.0002592215821273008, "loss": 4.928, "step": 61520 }, { "epoch": 0.08645542252049242, "grad_norm": 0.7921434640884399, "learning_rate": 0.00025926373471968524, "loss": 4.751, "step": 61530 }, { "epoch": 0.086469473458656, "grad_norm": 0.8643134832382202, "learning_rate": 0.00025930588731206967, "loss": 4.8568, "step": 61540 }, { "epoch": 0.08648352439681957, "grad_norm": 0.7830716967582703, "learning_rate": 0.0002593480399044541, "loss": 4.853, "step": 61550 }, { "epoch": 0.08649757533498315, "grad_norm": 0.7742080092430115, "learning_rate": 0.00025939019249683853, "loss": 4.8559, "step": 61560 }, { "epoch": 0.08651162627314672, "grad_norm": 0.8120469450950623, "learning_rate": 0.00025943234508922296, "loss": 4.856, "step": 61570 }, { "epoch": 0.0865256772113103, "grad_norm": 0.827467679977417, "learning_rate": 0.0002594744976816074, "loss": 4.7491, "step": 61580 }, { "epoch": 0.08653972814947387, "grad_norm": 0.7826569080352783, "learning_rate": 0.0002595166502739918, "loss": 4.7901, "step": 61590 }, { "epoch": 0.08655377908763746, "grad_norm": 0.7950142621994019, "learning_rate": 0.00025955880286637626, "loss": 4.7976, "step": 61600 }, { "epoch": 0.08656783002580104, "grad_norm": 0.821763277053833, "learning_rate": 0.0002596009554587607, "loss": 4.821, "step": 61610 }, { "epoch": 0.08658188096396462, "grad_norm": 0.8673090934753418, "learning_rate": 0.0002596431080511451, "loss": 4.7369, "step": 61620 }, { "epoch": 0.08659593190212819, "grad_norm": 0.7934688925743103, "learning_rate": 0.00025968526064352955, "loss": 4.6503, "step": 61630 }, { "epoch": 0.08660998284029177, "grad_norm": 0.7949123978614807, "learning_rate": 0.000259727413235914, "loss": 4.8444, "step": 61640 }, { "epoch": 0.08662403377845534, "grad_norm": 0.7871426939964294, "learning_rate": 0.0002597695658282984, "loss": 4.8069, "step": 61650 }, { "epoch": 0.08663808471661892, "grad_norm": 0.8564624786376953, "learning_rate": 0.00025981171842068284, "loss": 4.863, "step": 61660 }, { "epoch": 0.0866521356547825, "grad_norm": 0.8065398931503296, "learning_rate": 0.0002598538710130673, "loss": 4.9085, "step": 61670 }, { "epoch": 0.08666618659294607, "grad_norm": 0.79127436876297, "learning_rate": 0.0002598960236054517, "loss": 4.7051, "step": 61680 }, { "epoch": 0.08668023753110965, "grad_norm": 0.7989571690559387, "learning_rate": 0.00025993817619783614, "loss": 4.6685, "step": 61690 }, { "epoch": 0.08669428846927324, "grad_norm": 0.794864296913147, "learning_rate": 0.00025998032879022057, "loss": 4.8343, "step": 61700 }, { "epoch": 0.08670833940743682, "grad_norm": 0.803548276424408, "learning_rate": 0.000260022481382605, "loss": 4.7731, "step": 61710 }, { "epoch": 0.08672239034560039, "grad_norm": 0.8012824654579163, "learning_rate": 0.00026006463397498943, "loss": 4.8466, "step": 61720 }, { "epoch": 0.08673644128376397, "grad_norm": 0.792219340801239, "learning_rate": 0.00026010678656737386, "loss": 4.7721, "step": 61730 }, { "epoch": 0.08675049222192754, "grad_norm": 0.7680630087852478, "learning_rate": 0.0002601489391597583, "loss": 4.7905, "step": 61740 }, { "epoch": 0.08676454316009112, "grad_norm": 0.7854152917861938, "learning_rate": 0.0002601910917521427, "loss": 4.7413, "step": 61750 }, { "epoch": 0.0867785940982547, "grad_norm": 0.8030341267585754, "learning_rate": 0.00026023324434452716, "loss": 4.8224, "step": 61760 }, { "epoch": 0.08679264503641827, "grad_norm": 0.807295560836792, "learning_rate": 0.0002602753969369116, "loss": 4.8628, "step": 61770 }, { "epoch": 0.08680669597458185, "grad_norm": 0.7911593317985535, "learning_rate": 0.000260317549529296, "loss": 4.6736, "step": 61780 }, { "epoch": 0.08682074691274543, "grad_norm": 0.8324936032295227, "learning_rate": 0.00026035970212168045, "loss": 4.8149, "step": 61790 }, { "epoch": 0.086834797850909, "grad_norm": 0.8072502017021179, "learning_rate": 0.0002604018547140649, "loss": 4.8918, "step": 61800 }, { "epoch": 0.08684884878907259, "grad_norm": 0.8104373216629028, "learning_rate": 0.0002604440073064493, "loss": 4.8896, "step": 61810 }, { "epoch": 0.08686289972723617, "grad_norm": 1.0080177783966064, "learning_rate": 0.00026048615989883374, "loss": 4.8292, "step": 61820 }, { "epoch": 0.08687695066539974, "grad_norm": 0.8252009749412537, "learning_rate": 0.0002605283124912182, "loss": 4.7111, "step": 61830 }, { "epoch": 0.08689100160356332, "grad_norm": 0.8061153888702393, "learning_rate": 0.0002605704650836026, "loss": 4.781, "step": 61840 }, { "epoch": 0.0869050525417269, "grad_norm": 0.8102174401283264, "learning_rate": 0.00026061261767598704, "loss": 4.8626, "step": 61850 }, { "epoch": 0.08691910347989047, "grad_norm": 0.8249738812446594, "learning_rate": 0.00026065477026837147, "loss": 4.9965, "step": 61860 }, { "epoch": 0.08693315441805405, "grad_norm": 0.8087854385375977, "learning_rate": 0.00026069692286075595, "loss": 4.8119, "step": 61870 }, { "epoch": 0.08694720535621762, "grad_norm": 0.7888401746749878, "learning_rate": 0.00026073907545314033, "loss": 4.7734, "step": 61880 }, { "epoch": 0.0869612562943812, "grad_norm": 0.7908345460891724, "learning_rate": 0.00026078122804552476, "loss": 4.792, "step": 61890 }, { "epoch": 0.08697530723254478, "grad_norm": 0.8059920072555542, "learning_rate": 0.0002608233806379092, "loss": 4.8439, "step": 61900 }, { "epoch": 0.08698935817070837, "grad_norm": 0.8107774257659912, "learning_rate": 0.0002608655332302936, "loss": 4.8511, "step": 61910 }, { "epoch": 0.08700340910887194, "grad_norm": 0.7921923995018005, "learning_rate": 0.00026090768582267805, "loss": 4.6943, "step": 61920 }, { "epoch": 0.08701746004703552, "grad_norm": 0.8148934245109558, "learning_rate": 0.00026094983841506254, "loss": 4.7229, "step": 61930 }, { "epoch": 0.0870315109851991, "grad_norm": 0.7990559935569763, "learning_rate": 0.0002609919910074469, "loss": 4.7258, "step": 61940 }, { "epoch": 0.08704556192336267, "grad_norm": 0.7900142073631287, "learning_rate": 0.00026103414359983135, "loss": 4.7876, "step": 61950 }, { "epoch": 0.08705961286152625, "grad_norm": 0.7979053258895874, "learning_rate": 0.0002610762961922158, "loss": 4.7788, "step": 61960 }, { "epoch": 0.08707366379968982, "grad_norm": 0.8106703162193298, "learning_rate": 0.0002611184487846002, "loss": 4.7476, "step": 61970 }, { "epoch": 0.0870877147378534, "grad_norm": 0.8180921673774719, "learning_rate": 0.00026116060137698464, "loss": 4.6959, "step": 61980 }, { "epoch": 0.08710176567601698, "grad_norm": 0.7843952178955078, "learning_rate": 0.0002612027539693691, "loss": 4.7037, "step": 61990 }, { "epoch": 0.08711581661418055, "grad_norm": 0.7980206608772278, "learning_rate": 0.0002612449065617535, "loss": 4.8514, "step": 62000 }, { "epoch": 0.08712986755234414, "grad_norm": 0.8227792382240295, "learning_rate": 0.00026128705915413793, "loss": 4.7442, "step": 62010 }, { "epoch": 0.08714391849050772, "grad_norm": 0.8404500484466553, "learning_rate": 0.00026132921174652237, "loss": 4.89, "step": 62020 }, { "epoch": 0.0871579694286713, "grad_norm": 0.8126966953277588, "learning_rate": 0.0002613713643389068, "loss": 4.8632, "step": 62030 }, { "epoch": 0.08717202036683487, "grad_norm": 0.817398726940155, "learning_rate": 0.0002614135169312913, "loss": 4.8315, "step": 62040 }, { "epoch": 0.08718607130499845, "grad_norm": 0.81786048412323, "learning_rate": 0.0002614556695236757, "loss": 4.7713, "step": 62050 }, { "epoch": 0.08720012224316202, "grad_norm": 0.8178216814994812, "learning_rate": 0.00026149782211606014, "loss": 4.7856, "step": 62060 }, { "epoch": 0.0872141731813256, "grad_norm": 0.8115580081939697, "learning_rate": 0.0002615399747084445, "loss": 4.7314, "step": 62070 }, { "epoch": 0.08722822411948918, "grad_norm": 0.7866354584693909, "learning_rate": 0.00026158212730082895, "loss": 4.839, "step": 62080 }, { "epoch": 0.08724227505765275, "grad_norm": 0.808581531047821, "learning_rate": 0.0002616242798932134, "loss": 4.8308, "step": 62090 }, { "epoch": 0.08725632599581633, "grad_norm": 0.7981185913085938, "learning_rate": 0.00026166643248559787, "loss": 4.7796, "step": 62100 }, { "epoch": 0.0872703769339799, "grad_norm": 0.8379929065704346, "learning_rate": 0.0002617085850779823, "loss": 4.737, "step": 62110 }, { "epoch": 0.0872844278721435, "grad_norm": 0.8175838589668274, "learning_rate": 0.00026175073767036673, "loss": 4.8416, "step": 62120 }, { "epoch": 0.08729847881030707, "grad_norm": 0.8132398724555969, "learning_rate": 0.0002617928902627511, "loss": 4.8088, "step": 62130 }, { "epoch": 0.08731252974847065, "grad_norm": 0.8097195625305176, "learning_rate": 0.00026183504285513554, "loss": 4.8075, "step": 62140 }, { "epoch": 0.08732658068663422, "grad_norm": 0.8951746225357056, "learning_rate": 0.00026187719544751997, "loss": 4.8201, "step": 62150 }, { "epoch": 0.0873406316247978, "grad_norm": 0.8372669219970703, "learning_rate": 0.00026191934803990446, "loss": 4.8856, "step": 62160 }, { "epoch": 0.08735468256296137, "grad_norm": 0.7923863530158997, "learning_rate": 0.0002619615006322889, "loss": 4.8744, "step": 62170 }, { "epoch": 0.08736873350112495, "grad_norm": 0.7726845741271973, "learning_rate": 0.0002620036532246733, "loss": 4.8476, "step": 62180 }, { "epoch": 0.08738278443928853, "grad_norm": 0.7641924619674683, "learning_rate": 0.00026204580581705775, "loss": 4.8731, "step": 62190 }, { "epoch": 0.0873968353774521, "grad_norm": 0.8116229772567749, "learning_rate": 0.00026208795840944213, "loss": 4.8149, "step": 62200 }, { "epoch": 0.08741088631561568, "grad_norm": 0.7808107137680054, "learning_rate": 0.00026213011100182656, "loss": 4.7596, "step": 62210 }, { "epoch": 0.08742493725377927, "grad_norm": 0.7787986397743225, "learning_rate": 0.00026217226359421104, "loss": 4.7875, "step": 62220 }, { "epoch": 0.08743898819194285, "grad_norm": 0.7925445437431335, "learning_rate": 0.0002622144161865955, "loss": 4.7452, "step": 62230 }, { "epoch": 0.08745303913010642, "grad_norm": 0.8389007449150085, "learning_rate": 0.0002622565687789799, "loss": 4.7547, "step": 62240 }, { "epoch": 0.08746709006827, "grad_norm": 0.7930507659912109, "learning_rate": 0.00026229872137136434, "loss": 4.7961, "step": 62250 }, { "epoch": 0.08748114100643357, "grad_norm": 0.7937973737716675, "learning_rate": 0.0002623408739637487, "loss": 4.7547, "step": 62260 }, { "epoch": 0.08749519194459715, "grad_norm": 0.797887921333313, "learning_rate": 0.00026238302655613315, "loss": 4.9243, "step": 62270 }, { "epoch": 0.08750924288276073, "grad_norm": 0.7737236618995667, "learning_rate": 0.00026242517914851763, "loss": 4.8146, "step": 62280 }, { "epoch": 0.0875232938209243, "grad_norm": 0.8101477026939392, "learning_rate": 0.00026246733174090206, "loss": 4.7729, "step": 62290 }, { "epoch": 0.08753734475908788, "grad_norm": 0.8078815340995789, "learning_rate": 0.0002625094843332865, "loss": 4.6863, "step": 62300 }, { "epoch": 0.08755139569725146, "grad_norm": 0.7906801700592041, "learning_rate": 0.0002625516369256709, "loss": 4.7801, "step": 62310 }, { "epoch": 0.08756544663541505, "grad_norm": 0.8077771663665771, "learning_rate": 0.00026259378951805536, "loss": 4.7214, "step": 62320 }, { "epoch": 0.08757949757357862, "grad_norm": 0.8051345348358154, "learning_rate": 0.00026263594211043973, "loss": 4.8615, "step": 62330 }, { "epoch": 0.0875935485117422, "grad_norm": 0.798250675201416, "learning_rate": 0.0002626780947028242, "loss": 4.7257, "step": 62340 }, { "epoch": 0.08760759944990577, "grad_norm": 0.7771165370941162, "learning_rate": 0.00026272024729520865, "loss": 4.7493, "step": 62350 }, { "epoch": 0.08762165038806935, "grad_norm": 0.7755945920944214, "learning_rate": 0.0002627623998875931, "loss": 4.9485, "step": 62360 }, { "epoch": 0.08763570132623293, "grad_norm": 0.7809829115867615, "learning_rate": 0.0002628045524799775, "loss": 4.7587, "step": 62370 }, { "epoch": 0.0876497522643965, "grad_norm": 0.80907142162323, "learning_rate": 0.00026284670507236194, "loss": 4.7375, "step": 62380 }, { "epoch": 0.08766380320256008, "grad_norm": 0.8076225519180298, "learning_rate": 0.0002628888576647463, "loss": 4.898, "step": 62390 }, { "epoch": 0.08767785414072365, "grad_norm": 0.7816614508628845, "learning_rate": 0.0002629310102571308, "loss": 4.689, "step": 62400 }, { "epoch": 0.08769190507888723, "grad_norm": 0.8814823031425476, "learning_rate": 0.00026297316284951524, "loss": 4.7298, "step": 62410 }, { "epoch": 0.0877059560170508, "grad_norm": 0.8320509791374207, "learning_rate": 0.00026301531544189967, "loss": 4.7035, "step": 62420 }, { "epoch": 0.0877200069552144, "grad_norm": 0.8074789643287659, "learning_rate": 0.0002630574680342841, "loss": 4.8431, "step": 62430 }, { "epoch": 0.08773405789337797, "grad_norm": 0.7870790362358093, "learning_rate": 0.00026309962062666853, "loss": 4.8224, "step": 62440 }, { "epoch": 0.08774810883154155, "grad_norm": 0.8190731406211853, "learning_rate": 0.00026314177321905296, "loss": 4.7692, "step": 62450 }, { "epoch": 0.08776215976970513, "grad_norm": 0.8550110459327698, "learning_rate": 0.0002631839258114374, "loss": 4.8563, "step": 62460 }, { "epoch": 0.0877762107078687, "grad_norm": 0.7676360607147217, "learning_rate": 0.0002632260784038218, "loss": 4.7988, "step": 62470 }, { "epoch": 0.08779026164603228, "grad_norm": 0.8471920490264893, "learning_rate": 0.00026326823099620625, "loss": 4.7623, "step": 62480 }, { "epoch": 0.08780431258419585, "grad_norm": 0.7891804575920105, "learning_rate": 0.0002633103835885907, "loss": 4.8314, "step": 62490 }, { "epoch": 0.08781836352235943, "grad_norm": 0.8159024119377136, "learning_rate": 0.0002633525361809751, "loss": 4.8448, "step": 62500 }, { "epoch": 0.087832414460523, "grad_norm": 0.7949129939079285, "learning_rate": 0.00026339468877335955, "loss": 4.7038, "step": 62510 }, { "epoch": 0.08784646539868658, "grad_norm": 0.8131535649299622, "learning_rate": 0.000263436841365744, "loss": 4.8487, "step": 62520 }, { "epoch": 0.08786051633685017, "grad_norm": 0.856124222278595, "learning_rate": 0.0002634789939581284, "loss": 4.7969, "step": 62530 }, { "epoch": 0.08787456727501375, "grad_norm": 0.7902341485023499, "learning_rate": 0.00026352114655051284, "loss": 4.7348, "step": 62540 }, { "epoch": 0.08788861821317732, "grad_norm": 0.7894250154495239, "learning_rate": 0.00026356329914289727, "loss": 4.7934, "step": 62550 }, { "epoch": 0.0879026691513409, "grad_norm": 0.7873814702033997, "learning_rate": 0.0002636054517352817, "loss": 4.8012, "step": 62560 }, { "epoch": 0.08791672008950448, "grad_norm": 0.8066341876983643, "learning_rate": 0.00026364760432766613, "loss": 4.8533, "step": 62570 }, { "epoch": 0.08793077102766805, "grad_norm": 0.7610378861427307, "learning_rate": 0.00026368975692005057, "loss": 4.7305, "step": 62580 }, { "epoch": 0.08794482196583163, "grad_norm": 0.8013112545013428, "learning_rate": 0.000263731909512435, "loss": 4.7925, "step": 62590 }, { "epoch": 0.0879588729039952, "grad_norm": 0.7849205732345581, "learning_rate": 0.00026377406210481943, "loss": 4.793, "step": 62600 }, { "epoch": 0.08797292384215878, "grad_norm": 0.8190938234329224, "learning_rate": 0.00026381621469720386, "loss": 4.7288, "step": 62610 }, { "epoch": 0.08798697478032236, "grad_norm": 0.8454349040985107, "learning_rate": 0.0002638583672895883, "loss": 4.8616, "step": 62620 }, { "epoch": 0.08800102571848595, "grad_norm": 0.7780125737190247, "learning_rate": 0.0002639005198819727, "loss": 4.7913, "step": 62630 }, { "epoch": 0.08801507665664952, "grad_norm": 0.8072920441627502, "learning_rate": 0.00026394267247435715, "loss": 4.8829, "step": 62640 }, { "epoch": 0.0880291275948131, "grad_norm": 0.8076897263526917, "learning_rate": 0.0002639848250667416, "loss": 4.7353, "step": 62650 }, { "epoch": 0.08804317853297668, "grad_norm": 0.7699461579322815, "learning_rate": 0.000264026977659126, "loss": 4.8364, "step": 62660 }, { "epoch": 0.08805722947114025, "grad_norm": 0.8059861063957214, "learning_rate": 0.00026406913025151045, "loss": 4.779, "step": 62670 }, { "epoch": 0.08807128040930383, "grad_norm": 0.7961968779563904, "learning_rate": 0.0002641112828438949, "loss": 4.8785, "step": 62680 }, { "epoch": 0.0880853313474674, "grad_norm": 0.8171684741973877, "learning_rate": 0.0002641534354362793, "loss": 4.7068, "step": 62690 }, { "epoch": 0.08809938228563098, "grad_norm": 0.7811867594718933, "learning_rate": 0.00026419558802866374, "loss": 4.971, "step": 62700 }, { "epoch": 0.08811343322379456, "grad_norm": 0.7805197834968567, "learning_rate": 0.00026423774062104817, "loss": 4.9694, "step": 62710 }, { "epoch": 0.08812748416195813, "grad_norm": 0.8082363605499268, "learning_rate": 0.0002642798932134326, "loss": 4.7084, "step": 62720 }, { "epoch": 0.08814153510012171, "grad_norm": 0.8304884433746338, "learning_rate": 0.00026432204580581703, "loss": 4.7155, "step": 62730 }, { "epoch": 0.0881555860382853, "grad_norm": 0.8257015943527222, "learning_rate": 0.00026436419839820146, "loss": 4.8118, "step": 62740 }, { "epoch": 0.08816963697644888, "grad_norm": 0.7988607883453369, "learning_rate": 0.0002644063509905859, "loss": 4.7102, "step": 62750 }, { "epoch": 0.08818368791461245, "grad_norm": 0.7881811857223511, "learning_rate": 0.0002644485035829703, "loss": 4.8997, "step": 62760 }, { "epoch": 0.08819773885277603, "grad_norm": 0.859298825263977, "learning_rate": 0.00026449065617535476, "loss": 4.8189, "step": 62770 }, { "epoch": 0.0882117897909396, "grad_norm": 0.781113862991333, "learning_rate": 0.0002645328087677392, "loss": 4.7552, "step": 62780 }, { "epoch": 0.08822584072910318, "grad_norm": 0.7915051579475403, "learning_rate": 0.0002645749613601236, "loss": 4.7368, "step": 62790 }, { "epoch": 0.08823989166726676, "grad_norm": 0.8124852180480957, "learning_rate": 0.00026461711395250805, "loss": 4.8497, "step": 62800 }, { "epoch": 0.08825394260543033, "grad_norm": 0.8054794073104858, "learning_rate": 0.0002646592665448925, "loss": 4.7871, "step": 62810 }, { "epoch": 0.08826799354359391, "grad_norm": 0.8467390537261963, "learning_rate": 0.0002647014191372769, "loss": 4.6806, "step": 62820 }, { "epoch": 0.08828204448175749, "grad_norm": 0.8043726682662964, "learning_rate": 0.00026474357172966134, "loss": 4.8889, "step": 62830 }, { "epoch": 0.08829609541992108, "grad_norm": 0.7874152660369873, "learning_rate": 0.0002647857243220458, "loss": 4.8625, "step": 62840 }, { "epoch": 0.08831014635808465, "grad_norm": 0.8490023016929626, "learning_rate": 0.0002648278769144302, "loss": 4.7956, "step": 62850 }, { "epoch": 0.08832419729624823, "grad_norm": 0.8019155263900757, "learning_rate": 0.00026487002950681464, "loss": 4.7038, "step": 62860 }, { "epoch": 0.0883382482344118, "grad_norm": 0.7915338277816772, "learning_rate": 0.00026491218209919907, "loss": 4.8408, "step": 62870 }, { "epoch": 0.08835229917257538, "grad_norm": 0.7774483561515808, "learning_rate": 0.0002649543346915835, "loss": 4.8746, "step": 62880 }, { "epoch": 0.08836635011073896, "grad_norm": 0.7984682321548462, "learning_rate": 0.00026499648728396793, "loss": 4.7794, "step": 62890 }, { "epoch": 0.08838040104890253, "grad_norm": 0.803041398525238, "learning_rate": 0.00026503863987635236, "loss": 4.7, "step": 62900 }, { "epoch": 0.08839445198706611, "grad_norm": 0.7951576113700867, "learning_rate": 0.0002650807924687368, "loss": 4.7453, "step": 62910 }, { "epoch": 0.08840850292522968, "grad_norm": 0.7813413143157959, "learning_rate": 0.0002651229450611212, "loss": 4.7845, "step": 62920 }, { "epoch": 0.08842255386339326, "grad_norm": 0.7739083170890808, "learning_rate": 0.00026516509765350566, "loss": 4.7668, "step": 62930 }, { "epoch": 0.08843660480155685, "grad_norm": 0.8090055584907532, "learning_rate": 0.0002652072502458901, "loss": 4.8, "step": 62940 }, { "epoch": 0.08845065573972043, "grad_norm": 0.7967948317527771, "learning_rate": 0.0002652494028382745, "loss": 4.9125, "step": 62950 }, { "epoch": 0.088464706677884, "grad_norm": 0.7919458150863647, "learning_rate": 0.00026529155543065895, "loss": 4.8298, "step": 62960 }, { "epoch": 0.08847875761604758, "grad_norm": 0.7871581315994263, "learning_rate": 0.0002653337080230434, "loss": 4.8399, "step": 62970 }, { "epoch": 0.08849280855421116, "grad_norm": 0.7785304188728333, "learning_rate": 0.0002653758606154278, "loss": 4.856, "step": 62980 }, { "epoch": 0.08850685949237473, "grad_norm": 0.7989861369132996, "learning_rate": 0.00026541801320781224, "loss": 4.7617, "step": 62990 }, { "epoch": 0.08852091043053831, "grad_norm": 0.7797976732254028, "learning_rate": 0.0002654601658001967, "loss": 4.9164, "step": 63000 }, { "epoch": 0.08853496136870188, "grad_norm": 0.766815721988678, "learning_rate": 0.0002655023183925811, "loss": 4.809, "step": 63010 }, { "epoch": 0.08854901230686546, "grad_norm": 0.8306536078453064, "learning_rate": 0.00026554447098496554, "loss": 4.8476, "step": 63020 }, { "epoch": 0.08856306324502904, "grad_norm": 0.8039864897727966, "learning_rate": 0.00026558662357735, "loss": 4.7684, "step": 63030 }, { "epoch": 0.08857711418319261, "grad_norm": 0.8326762914657593, "learning_rate": 0.0002656287761697344, "loss": 4.7807, "step": 63040 }, { "epoch": 0.0885911651213562, "grad_norm": 0.7910907864570618, "learning_rate": 0.00026567092876211883, "loss": 4.7499, "step": 63050 }, { "epoch": 0.08860521605951978, "grad_norm": 0.7897930145263672, "learning_rate": 0.00026571308135450326, "loss": 4.8467, "step": 63060 }, { "epoch": 0.08861926699768335, "grad_norm": 0.7836660146713257, "learning_rate": 0.0002657552339468877, "loss": 4.7958, "step": 63070 }, { "epoch": 0.08863331793584693, "grad_norm": 0.7851306796073914, "learning_rate": 0.0002657973865392721, "loss": 4.7329, "step": 63080 }, { "epoch": 0.08864736887401051, "grad_norm": 0.7947384715080261, "learning_rate": 0.0002658395391316566, "loss": 4.8723, "step": 63090 }, { "epoch": 0.08866141981217408, "grad_norm": 0.8200597167015076, "learning_rate": 0.000265881691724041, "loss": 4.8698, "step": 63100 }, { "epoch": 0.08867547075033766, "grad_norm": 0.7728275060653687, "learning_rate": 0.0002659238443164254, "loss": 4.7902, "step": 63110 }, { "epoch": 0.08868952168850124, "grad_norm": 0.7961565852165222, "learning_rate": 0.00026596599690880985, "loss": 4.736, "step": 63120 }, { "epoch": 0.08870357262666481, "grad_norm": 0.7639411091804504, "learning_rate": 0.0002660081495011943, "loss": 4.9944, "step": 63130 }, { "epoch": 0.08871762356482839, "grad_norm": 0.8078753352165222, "learning_rate": 0.0002660503020935787, "loss": 4.7914, "step": 63140 }, { "epoch": 0.08873167450299198, "grad_norm": 0.8076030611991882, "learning_rate": 0.0002660924546859632, "loss": 4.9144, "step": 63150 }, { "epoch": 0.08874572544115555, "grad_norm": 0.8135642409324646, "learning_rate": 0.00026613460727834763, "loss": 4.7739, "step": 63160 }, { "epoch": 0.08875977637931913, "grad_norm": 0.81586754322052, "learning_rate": 0.000266176759870732, "loss": 4.7433, "step": 63170 }, { "epoch": 0.0887738273174827, "grad_norm": 0.8089296817779541, "learning_rate": 0.00026621891246311644, "loss": 4.8924, "step": 63180 }, { "epoch": 0.08878787825564628, "grad_norm": 0.7918955683708191, "learning_rate": 0.00026626106505550087, "loss": 4.7475, "step": 63190 }, { "epoch": 0.08880192919380986, "grad_norm": 0.7879622578620911, "learning_rate": 0.00026630321764788535, "loss": 4.8674, "step": 63200 }, { "epoch": 0.08881598013197343, "grad_norm": 0.7970558404922485, "learning_rate": 0.0002663453702402698, "loss": 4.8004, "step": 63210 }, { "epoch": 0.08883003107013701, "grad_norm": 0.7629973292350769, "learning_rate": 0.0002663875228326542, "loss": 4.715, "step": 63220 }, { "epoch": 0.08884408200830059, "grad_norm": 0.8390489816665649, "learning_rate": 0.0002664296754250386, "loss": 4.7549, "step": 63230 }, { "epoch": 0.08885813294646416, "grad_norm": 0.7946515679359436, "learning_rate": 0.000266471828017423, "loss": 4.846, "step": 63240 }, { "epoch": 0.08887218388462775, "grad_norm": 0.790104866027832, "learning_rate": 0.00026651398060980745, "loss": 5.0164, "step": 63250 }, { "epoch": 0.08888623482279133, "grad_norm": 0.7630163431167603, "learning_rate": 0.00026655613320219194, "loss": 4.895, "step": 63260 }, { "epoch": 0.0889002857609549, "grad_norm": 0.8079057931900024, "learning_rate": 0.00026659828579457637, "loss": 4.8883, "step": 63270 }, { "epoch": 0.08891433669911848, "grad_norm": 0.7824141383171082, "learning_rate": 0.0002666404383869608, "loss": 4.7367, "step": 63280 }, { "epoch": 0.08892838763728206, "grad_norm": 1.028652310371399, "learning_rate": 0.0002666825909793452, "loss": 4.8906, "step": 63290 }, { "epoch": 0.08894243857544563, "grad_norm": 0.8163354396820068, "learning_rate": 0.0002667247435717296, "loss": 4.8534, "step": 63300 }, { "epoch": 0.08895648951360921, "grad_norm": 0.7730701565742493, "learning_rate": 0.00026676689616411404, "loss": 4.7245, "step": 63310 }, { "epoch": 0.08897054045177279, "grad_norm": 0.7872650623321533, "learning_rate": 0.0002668090487564985, "loss": 4.799, "step": 63320 }, { "epoch": 0.08898459138993636, "grad_norm": 0.7956480979919434, "learning_rate": 0.00026685120134888296, "loss": 4.8097, "step": 63330 }, { "epoch": 0.08899864232809994, "grad_norm": 0.7865951061248779, "learning_rate": 0.0002668933539412674, "loss": 4.9028, "step": 63340 }, { "epoch": 0.08901269326626352, "grad_norm": 0.7755113244056702, "learning_rate": 0.0002669355065336518, "loss": 4.8193, "step": 63350 }, { "epoch": 0.0890267442044271, "grad_norm": 0.7839175462722778, "learning_rate": 0.0002669776591260362, "loss": 4.7572, "step": 63360 }, { "epoch": 0.08904079514259068, "grad_norm": 0.801013708114624, "learning_rate": 0.00026701981171842063, "loss": 4.7782, "step": 63370 }, { "epoch": 0.08905484608075426, "grad_norm": 0.8248993754386902, "learning_rate": 0.0002670619643108051, "loss": 4.8563, "step": 63380 }, { "epoch": 0.08906889701891783, "grad_norm": 0.8062582612037659, "learning_rate": 0.00026710411690318954, "loss": 4.8467, "step": 63390 }, { "epoch": 0.08908294795708141, "grad_norm": 0.7893478274345398, "learning_rate": 0.000267146269495574, "loss": 4.8678, "step": 63400 }, { "epoch": 0.08909699889524499, "grad_norm": 0.8045288920402527, "learning_rate": 0.0002671884220879584, "loss": 4.7405, "step": 63410 }, { "epoch": 0.08911104983340856, "grad_norm": 0.7915014028549194, "learning_rate": 0.0002672305746803428, "loss": 4.7565, "step": 63420 }, { "epoch": 0.08912510077157214, "grad_norm": 0.8055747747421265, "learning_rate": 0.0002672727272727272, "loss": 4.8634, "step": 63430 }, { "epoch": 0.08913915170973571, "grad_norm": 0.8131148815155029, "learning_rate": 0.0002673148798651117, "loss": 4.7419, "step": 63440 }, { "epoch": 0.08915320264789929, "grad_norm": 0.7881912589073181, "learning_rate": 0.00026735703245749613, "loss": 4.8812, "step": 63450 }, { "epoch": 0.08916725358606288, "grad_norm": 0.7696599960327148, "learning_rate": 0.00026739918504988056, "loss": 4.7284, "step": 63460 }, { "epoch": 0.08918130452422646, "grad_norm": 0.7833225727081299, "learning_rate": 0.000267441337642265, "loss": 4.8861, "step": 63470 }, { "epoch": 0.08919535546239003, "grad_norm": 0.7945125102996826, "learning_rate": 0.0002674834902346494, "loss": 4.8175, "step": 63480 }, { "epoch": 0.08920940640055361, "grad_norm": 0.8102738857269287, "learning_rate": 0.0002675256428270338, "loss": 4.8052, "step": 63490 }, { "epoch": 0.08922345733871719, "grad_norm": 0.7833125591278076, "learning_rate": 0.0002675677954194183, "loss": 4.8178, "step": 63500 }, { "epoch": 0.08923750827688076, "grad_norm": 0.7774162888526917, "learning_rate": 0.0002676099480118027, "loss": 4.9522, "step": 63510 }, { "epoch": 0.08925155921504434, "grad_norm": 0.7914357781410217, "learning_rate": 0.00026765210060418715, "loss": 4.7583, "step": 63520 }, { "epoch": 0.08926561015320791, "grad_norm": 0.780647337436676, "learning_rate": 0.0002676942531965716, "loss": 4.7975, "step": 63530 }, { "epoch": 0.08927966109137149, "grad_norm": 0.7892720699310303, "learning_rate": 0.000267736405788956, "loss": 4.8141, "step": 63540 }, { "epoch": 0.08929371202953507, "grad_norm": 0.7910761833190918, "learning_rate": 0.0002677785583813404, "loss": 4.7316, "step": 63550 }, { "epoch": 0.08930776296769866, "grad_norm": 0.7864344716072083, "learning_rate": 0.0002678207109737249, "loss": 4.6778, "step": 63560 }, { "epoch": 0.08932181390586223, "grad_norm": 0.8073357343673706, "learning_rate": 0.0002678628635661093, "loss": 4.7731, "step": 63570 }, { "epoch": 0.08933586484402581, "grad_norm": 0.784953236579895, "learning_rate": 0.00026790501615849374, "loss": 4.7903, "step": 63580 }, { "epoch": 0.08934991578218938, "grad_norm": 0.7666168808937073, "learning_rate": 0.00026794716875087817, "loss": 4.8284, "step": 63590 }, { "epoch": 0.08936396672035296, "grad_norm": 0.8397361636161804, "learning_rate": 0.0002679893213432626, "loss": 4.872, "step": 63600 }, { "epoch": 0.08937801765851654, "grad_norm": 0.801026463508606, "learning_rate": 0.00026803147393564703, "loss": 4.9165, "step": 63610 }, { "epoch": 0.08939206859668011, "grad_norm": 0.7671746015548706, "learning_rate": 0.00026807362652803146, "loss": 4.9064, "step": 63620 }, { "epoch": 0.08940611953484369, "grad_norm": 0.791368305683136, "learning_rate": 0.0002681157791204159, "loss": 4.8467, "step": 63630 }, { "epoch": 0.08942017047300727, "grad_norm": 0.7889533638954163, "learning_rate": 0.0002681579317128003, "loss": 4.7742, "step": 63640 }, { "epoch": 0.08943422141117084, "grad_norm": 0.8022910356521606, "learning_rate": 0.00026820008430518475, "loss": 4.7715, "step": 63650 }, { "epoch": 0.08944827234933442, "grad_norm": 0.8055729866027832, "learning_rate": 0.0002682422368975692, "loss": 4.8834, "step": 63660 }, { "epoch": 0.08946232328749801, "grad_norm": 0.8192464709281921, "learning_rate": 0.0002682843894899536, "loss": 4.8719, "step": 63670 }, { "epoch": 0.08947637422566158, "grad_norm": 0.7914919257164001, "learning_rate": 0.00026832654208233805, "loss": 4.8239, "step": 63680 }, { "epoch": 0.08949042516382516, "grad_norm": 0.7857105135917664, "learning_rate": 0.0002683686946747225, "loss": 4.798, "step": 63690 }, { "epoch": 0.08950447610198874, "grad_norm": 0.7843837738037109, "learning_rate": 0.0002684108472671069, "loss": 4.8482, "step": 63700 }, { "epoch": 0.08951852704015231, "grad_norm": 0.7536832690238953, "learning_rate": 0.00026845299985949134, "loss": 4.9153, "step": 63710 }, { "epoch": 0.08953257797831589, "grad_norm": 0.8098011016845703, "learning_rate": 0.00026849515245187577, "loss": 4.7243, "step": 63720 }, { "epoch": 0.08954662891647946, "grad_norm": 0.7772601842880249, "learning_rate": 0.0002685373050442602, "loss": 4.7708, "step": 63730 }, { "epoch": 0.08956067985464304, "grad_norm": 0.7769831418991089, "learning_rate": 0.00026857945763664464, "loss": 4.8495, "step": 63740 }, { "epoch": 0.08957473079280662, "grad_norm": 0.7833921909332275, "learning_rate": 0.00026862161022902907, "loss": 4.7753, "step": 63750 }, { "epoch": 0.0895887817309702, "grad_norm": 0.7920560836791992, "learning_rate": 0.0002686637628214135, "loss": 4.892, "step": 63760 }, { "epoch": 0.08960283266913378, "grad_norm": 0.7888614535331726, "learning_rate": 0.00026870591541379793, "loss": 4.8269, "step": 63770 }, { "epoch": 0.08961688360729736, "grad_norm": 0.7918998599052429, "learning_rate": 0.00026874806800618236, "loss": 4.7976, "step": 63780 }, { "epoch": 0.08963093454546094, "grad_norm": 0.8044359683990479, "learning_rate": 0.0002687902205985668, "loss": 4.6607, "step": 63790 }, { "epoch": 0.08964498548362451, "grad_norm": 0.8149663209915161, "learning_rate": 0.0002688323731909512, "loss": 4.9424, "step": 63800 }, { "epoch": 0.08965903642178809, "grad_norm": 0.8094214797019958, "learning_rate": 0.00026887452578333565, "loss": 4.7464, "step": 63810 }, { "epoch": 0.08967308735995166, "grad_norm": 0.8028861284255981, "learning_rate": 0.0002689166783757201, "loss": 4.7546, "step": 63820 }, { "epoch": 0.08968713829811524, "grad_norm": 0.7879270315170288, "learning_rate": 0.0002689588309681045, "loss": 4.7557, "step": 63830 }, { "epoch": 0.08970118923627882, "grad_norm": 0.802869439125061, "learning_rate": 0.00026900098356048895, "loss": 4.7707, "step": 63840 }, { "epoch": 0.08971524017444239, "grad_norm": 0.825872004032135, "learning_rate": 0.0002690431361528734, "loss": 4.7376, "step": 63850 }, { "epoch": 0.08972929111260597, "grad_norm": 0.7914631366729736, "learning_rate": 0.0002690852887452578, "loss": 4.7492, "step": 63860 }, { "epoch": 0.08974334205076956, "grad_norm": 0.8055576682090759, "learning_rate": 0.00026912744133764224, "loss": 4.8859, "step": 63870 }, { "epoch": 0.08975739298893314, "grad_norm": 0.7824854254722595, "learning_rate": 0.00026916959393002667, "loss": 4.6911, "step": 63880 }, { "epoch": 0.08977144392709671, "grad_norm": 0.7842464447021484, "learning_rate": 0.0002692117465224111, "loss": 4.8765, "step": 63890 }, { "epoch": 0.08978549486526029, "grad_norm": 0.7833605408668518, "learning_rate": 0.00026925389911479553, "loss": 4.9597, "step": 63900 }, { "epoch": 0.08979954580342386, "grad_norm": 0.8475800156593323, "learning_rate": 0.00026929605170717997, "loss": 4.8712, "step": 63910 }, { "epoch": 0.08981359674158744, "grad_norm": 0.7852799296379089, "learning_rate": 0.0002693382042995644, "loss": 4.7098, "step": 63920 }, { "epoch": 0.08982764767975102, "grad_norm": 0.7918947339057922, "learning_rate": 0.00026938035689194883, "loss": 4.7586, "step": 63930 }, { "epoch": 0.08984169861791459, "grad_norm": 0.7760900855064392, "learning_rate": 0.00026942250948433326, "loss": 4.6896, "step": 63940 }, { "epoch": 0.08985574955607817, "grad_norm": 0.7669195532798767, "learning_rate": 0.0002694646620767177, "loss": 4.7557, "step": 63950 }, { "epoch": 0.08986980049424174, "grad_norm": 0.7910531163215637, "learning_rate": 0.0002695068146691021, "loss": 4.7617, "step": 63960 }, { "epoch": 0.08988385143240532, "grad_norm": 0.7762371301651001, "learning_rate": 0.00026954896726148655, "loss": 4.8493, "step": 63970 }, { "epoch": 0.08989790237056891, "grad_norm": 0.7670621275901794, "learning_rate": 0.000269591119853871, "loss": 4.8319, "step": 63980 }, { "epoch": 0.08991195330873249, "grad_norm": 0.7959233522415161, "learning_rate": 0.0002696332724462554, "loss": 4.8357, "step": 63990 }, { "epoch": 0.08992600424689606, "grad_norm": 0.7903863191604614, "learning_rate": 0.00026967542503863985, "loss": 4.9134, "step": 64000 }, { "epoch": 0.08994005518505964, "grad_norm": 0.7668089866638184, "learning_rate": 0.0002697175776310243, "loss": 4.8917, "step": 64010 }, { "epoch": 0.08995410612322322, "grad_norm": 0.7648235559463501, "learning_rate": 0.0002697597302234087, "loss": 4.7716, "step": 64020 }, { "epoch": 0.08996815706138679, "grad_norm": 0.7938455939292908, "learning_rate": 0.00026980188281579314, "loss": 4.8386, "step": 64030 }, { "epoch": 0.08998220799955037, "grad_norm": 0.7752029895782471, "learning_rate": 0.00026984403540817757, "loss": 4.7078, "step": 64040 }, { "epoch": 0.08999625893771394, "grad_norm": 0.7659651041030884, "learning_rate": 0.000269886188000562, "loss": 4.83, "step": 64050 }, { "epoch": 0.09001030987587752, "grad_norm": 0.7914566993713379, "learning_rate": 0.00026992834059294643, "loss": 4.8551, "step": 64060 }, { "epoch": 0.0900243608140411, "grad_norm": 0.7842085957527161, "learning_rate": 0.00026997049318533086, "loss": 4.7521, "step": 64070 }, { "epoch": 0.09003841175220469, "grad_norm": 0.8064524531364441, "learning_rate": 0.0002700126457777153, "loss": 4.6835, "step": 64080 }, { "epoch": 0.09005246269036826, "grad_norm": 0.7767420411109924, "learning_rate": 0.0002700547983700997, "loss": 4.8526, "step": 64090 }, { "epoch": 0.09006651362853184, "grad_norm": 0.8287174105644226, "learning_rate": 0.00027009695096248416, "loss": 4.8894, "step": 64100 }, { "epoch": 0.09008056456669541, "grad_norm": 0.7760927677154541, "learning_rate": 0.0002701391035548686, "loss": 4.8314, "step": 64110 }, { "epoch": 0.09009461550485899, "grad_norm": 0.7992680072784424, "learning_rate": 0.000270181256147253, "loss": 4.6976, "step": 64120 }, { "epoch": 0.09010866644302257, "grad_norm": 0.7589333057403564, "learning_rate": 0.00027022340873963745, "loss": 4.7346, "step": 64130 }, { "epoch": 0.09012271738118614, "grad_norm": 0.8577151298522949, "learning_rate": 0.0002702655613320219, "loss": 4.8193, "step": 64140 }, { "epoch": 0.09013676831934972, "grad_norm": 0.8156781792640686, "learning_rate": 0.0002703077139244063, "loss": 4.8104, "step": 64150 }, { "epoch": 0.0901508192575133, "grad_norm": 0.780096709728241, "learning_rate": 0.00027034986651679074, "loss": 4.8663, "step": 64160 }, { "epoch": 0.09016487019567687, "grad_norm": 0.798434853553772, "learning_rate": 0.0002703920191091752, "loss": 4.971, "step": 64170 }, { "epoch": 0.09017892113384046, "grad_norm": 0.7590034008026123, "learning_rate": 0.0002704341717015596, "loss": 4.7796, "step": 64180 }, { "epoch": 0.09019297207200404, "grad_norm": 0.7929971814155579, "learning_rate": 0.0002704763242939441, "loss": 4.8087, "step": 64190 }, { "epoch": 0.09020702301016761, "grad_norm": 0.8228213787078857, "learning_rate": 0.00027051847688632847, "loss": 4.7715, "step": 64200 }, { "epoch": 0.09022107394833119, "grad_norm": 0.7692803144454956, "learning_rate": 0.0002705606294787129, "loss": 4.7081, "step": 64210 }, { "epoch": 0.09023512488649477, "grad_norm": 0.7841417789459229, "learning_rate": 0.00027060278207109733, "loss": 4.8686, "step": 64220 }, { "epoch": 0.09024917582465834, "grad_norm": 0.7873497009277344, "learning_rate": 0.00027064493466348176, "loss": 4.8794, "step": 64230 }, { "epoch": 0.09026322676282192, "grad_norm": 0.7846882343292236, "learning_rate": 0.0002706870872558662, "loss": 4.671, "step": 64240 }, { "epoch": 0.0902772777009855, "grad_norm": 0.7908580303192139, "learning_rate": 0.0002707292398482507, "loss": 4.7099, "step": 64250 }, { "epoch": 0.09029132863914907, "grad_norm": 0.8587797284126282, "learning_rate": 0.00027077139244063506, "loss": 4.7138, "step": 64260 }, { "epoch": 0.09030537957731265, "grad_norm": 0.8411611914634705, "learning_rate": 0.0002708135450330195, "loss": 4.8517, "step": 64270 }, { "epoch": 0.09031943051547622, "grad_norm": 0.8374800086021423, "learning_rate": 0.0002708556976254039, "loss": 4.7404, "step": 64280 }, { "epoch": 0.09033348145363981, "grad_norm": 0.74876469373703, "learning_rate": 0.00027089785021778835, "loss": 4.8666, "step": 64290 }, { "epoch": 0.09034753239180339, "grad_norm": 0.7894213199615479, "learning_rate": 0.0002709400028101728, "loss": 4.6872, "step": 64300 }, { "epoch": 0.09036158332996697, "grad_norm": 0.7926090955734253, "learning_rate": 0.00027098215540255727, "loss": 4.7816, "step": 64310 }, { "epoch": 0.09037563426813054, "grad_norm": 0.7763543725013733, "learning_rate": 0.0002710243079949417, "loss": 4.8655, "step": 64320 }, { "epoch": 0.09038968520629412, "grad_norm": 0.7839246988296509, "learning_rate": 0.0002710664605873261, "loss": 4.9012, "step": 64330 }, { "epoch": 0.0904037361444577, "grad_norm": 0.8784212470054626, "learning_rate": 0.0002711086131797105, "loss": 4.8065, "step": 64340 }, { "epoch": 0.09041778708262127, "grad_norm": 0.7844157218933105, "learning_rate": 0.00027115076577209494, "loss": 4.7653, "step": 64350 }, { "epoch": 0.09043183802078485, "grad_norm": 0.7894060015678406, "learning_rate": 0.00027119291836447937, "loss": 4.7516, "step": 64360 }, { "epoch": 0.09044588895894842, "grad_norm": 0.7836933732032776, "learning_rate": 0.00027123085569762535, "loss": 4.7689, "step": 64370 }, { "epoch": 0.090459939897112, "grad_norm": 0.7689288854598999, "learning_rate": 0.0002712730082900098, "loss": 4.8232, "step": 64380 }, { "epoch": 0.09047399083527559, "grad_norm": 0.83738112449646, "learning_rate": 0.00027131516088239427, "loss": 4.8224, "step": 64390 }, { "epoch": 0.09048804177343917, "grad_norm": 0.7634382843971252, "learning_rate": 0.0002713573134747787, "loss": 4.9363, "step": 64400 }, { "epoch": 0.09050209271160274, "grad_norm": 0.7892853617668152, "learning_rate": 0.00027139946606716313, "loss": 4.8402, "step": 64410 }, { "epoch": 0.09051614364976632, "grad_norm": 0.7666093111038208, "learning_rate": 0.0002714416186595475, "loss": 4.8348, "step": 64420 }, { "epoch": 0.0905301945879299, "grad_norm": 0.780320405960083, "learning_rate": 0.00027148377125193194, "loss": 4.695, "step": 64430 }, { "epoch": 0.09054424552609347, "grad_norm": 0.7658125758171082, "learning_rate": 0.00027152592384431637, "loss": 4.8615, "step": 64440 }, { "epoch": 0.09055829646425705, "grad_norm": 0.7964998483657837, "learning_rate": 0.00027156807643670085, "loss": 4.7311, "step": 64450 }, { "epoch": 0.09057234740242062, "grad_norm": 0.7794137001037598, "learning_rate": 0.0002716102290290853, "loss": 4.8909, "step": 64460 }, { "epoch": 0.0905863983405842, "grad_norm": 0.763118326663971, "learning_rate": 0.0002716523816214697, "loss": 4.6842, "step": 64470 }, { "epoch": 0.09060044927874777, "grad_norm": 0.758752167224884, "learning_rate": 0.00027169453421385415, "loss": 4.7289, "step": 64480 }, { "epoch": 0.09061450021691136, "grad_norm": 0.8015297055244446, "learning_rate": 0.0002717366868062385, "loss": 4.8431, "step": 64490 }, { "epoch": 0.09062855115507494, "grad_norm": 0.7830663323402405, "learning_rate": 0.000271778839398623, "loss": 4.8457, "step": 64500 }, { "epoch": 0.09064260209323852, "grad_norm": 0.7865242958068848, "learning_rate": 0.00027182099199100744, "loss": 4.7674, "step": 64510 }, { "epoch": 0.09065665303140209, "grad_norm": 0.829883337020874, "learning_rate": 0.00027186314458339187, "loss": 4.7804, "step": 64520 }, { "epoch": 0.09067070396956567, "grad_norm": 0.7968289852142334, "learning_rate": 0.0002719052971757763, "loss": 4.8857, "step": 64530 }, { "epoch": 0.09068475490772925, "grad_norm": 0.7666770815849304, "learning_rate": 0.00027194744976816073, "loss": 4.7485, "step": 64540 }, { "epoch": 0.09069880584589282, "grad_norm": 0.7687329053878784, "learning_rate": 0.0002719896023605451, "loss": 4.8631, "step": 64550 }, { "epoch": 0.0907128567840564, "grad_norm": 0.8181332945823669, "learning_rate": 0.0002720317549529296, "loss": 4.8318, "step": 64560 }, { "epoch": 0.09072690772221997, "grad_norm": 0.7772801518440247, "learning_rate": 0.00027207390754531403, "loss": 4.8091, "step": 64570 }, { "epoch": 0.09074095866038355, "grad_norm": 0.7849490642547607, "learning_rate": 0.00027211606013769846, "loss": 4.8445, "step": 64580 }, { "epoch": 0.09075500959854713, "grad_norm": 0.7753223180770874, "learning_rate": 0.0002721582127300829, "loss": 4.9714, "step": 64590 }, { "epoch": 0.09076906053671072, "grad_norm": 0.7721008062362671, "learning_rate": 0.0002722003653224673, "loss": 4.8823, "step": 64600 }, { "epoch": 0.09078311147487429, "grad_norm": 0.7952477335929871, "learning_rate": 0.00027224251791485175, "loss": 4.8502, "step": 64610 }, { "epoch": 0.09079716241303787, "grad_norm": 0.7919043302536011, "learning_rate": 0.0002722846705072362, "loss": 4.757, "step": 64620 }, { "epoch": 0.09081121335120144, "grad_norm": 0.7945954203605652, "learning_rate": 0.0002723268230996206, "loss": 4.8094, "step": 64630 }, { "epoch": 0.09082526428936502, "grad_norm": 0.768685519695282, "learning_rate": 0.00027236897569200505, "loss": 4.796, "step": 64640 }, { "epoch": 0.0908393152275286, "grad_norm": 0.7738785743713379, "learning_rate": 0.0002724111282843895, "loss": 4.8588, "step": 64650 }, { "epoch": 0.09085336616569217, "grad_norm": 0.76154625415802, "learning_rate": 0.0002724532808767739, "loss": 4.8627, "step": 64660 }, { "epoch": 0.09086741710385575, "grad_norm": 0.7614012956619263, "learning_rate": 0.00027249543346915834, "loss": 4.9136, "step": 64670 }, { "epoch": 0.09088146804201933, "grad_norm": 0.7839372754096985, "learning_rate": 0.00027253758606154277, "loss": 4.8921, "step": 64680 }, { "epoch": 0.0908955189801829, "grad_norm": 0.8074344396591187, "learning_rate": 0.0002725797386539272, "loss": 4.8253, "step": 64690 }, { "epoch": 0.09090956991834649, "grad_norm": 0.7729949951171875, "learning_rate": 0.00027262189124631163, "loss": 4.6527, "step": 64700 }, { "epoch": 0.09092362085651007, "grad_norm": 0.7652543187141418, "learning_rate": 0.00027266404383869606, "loss": 4.7971, "step": 64710 }, { "epoch": 0.09093767179467364, "grad_norm": 0.7983514070510864, "learning_rate": 0.0002727061964310805, "loss": 4.7203, "step": 64720 }, { "epoch": 0.09095172273283722, "grad_norm": 0.7539568543434143, "learning_rate": 0.0002727483490234649, "loss": 4.8282, "step": 64730 }, { "epoch": 0.0909657736710008, "grad_norm": 0.7831068634986877, "learning_rate": 0.00027279050161584936, "loss": 4.7831, "step": 64740 }, { "epoch": 0.09097982460916437, "grad_norm": 0.7707744836807251, "learning_rate": 0.0002728326542082338, "loss": 4.8327, "step": 64750 }, { "epoch": 0.09099387554732795, "grad_norm": 0.7709292769432068, "learning_rate": 0.0002728748068006182, "loss": 4.7731, "step": 64760 }, { "epoch": 0.09100792648549152, "grad_norm": 0.8126934766769409, "learning_rate": 0.00027291695939300265, "loss": 4.7199, "step": 64770 }, { "epoch": 0.0910219774236551, "grad_norm": 0.7724310755729675, "learning_rate": 0.0002729591119853871, "loss": 4.9449, "step": 64780 }, { "epoch": 0.09103602836181868, "grad_norm": 0.787074089050293, "learning_rate": 0.0002730012645777715, "loss": 4.8887, "step": 64790 }, { "epoch": 0.09105007929998227, "grad_norm": 0.8085893988609314, "learning_rate": 0.00027304341717015594, "loss": 4.6988, "step": 64800 }, { "epoch": 0.09106413023814584, "grad_norm": 0.7805407047271729, "learning_rate": 0.0002730855697625404, "loss": 4.752, "step": 64810 }, { "epoch": 0.09107818117630942, "grad_norm": 0.8128443360328674, "learning_rate": 0.0002731277223549248, "loss": 4.8374, "step": 64820 }, { "epoch": 0.091092232114473, "grad_norm": 0.745376467704773, "learning_rate": 0.00027316987494730924, "loss": 4.684, "step": 64830 }, { "epoch": 0.09110628305263657, "grad_norm": 0.7663626670837402, "learning_rate": 0.00027321202753969367, "loss": 4.897, "step": 64840 }, { "epoch": 0.09112033399080015, "grad_norm": 0.7975637316703796, "learning_rate": 0.0002732541801320781, "loss": 4.8247, "step": 64850 }, { "epoch": 0.09113438492896372, "grad_norm": 0.7657324075698853, "learning_rate": 0.00027329633272446253, "loss": 4.7546, "step": 64860 }, { "epoch": 0.0911484358671273, "grad_norm": 0.7604333162307739, "learning_rate": 0.00027333848531684696, "loss": 4.6362, "step": 64870 }, { "epoch": 0.09116248680529088, "grad_norm": 0.8322310447692871, "learning_rate": 0.0002733806379092314, "loss": 4.7038, "step": 64880 }, { "epoch": 0.09117653774345445, "grad_norm": 0.7785294055938721, "learning_rate": 0.0002734227905016158, "loss": 4.7769, "step": 64890 }, { "epoch": 0.09119058868161803, "grad_norm": 0.763119637966156, "learning_rate": 0.00027346494309400026, "loss": 4.8997, "step": 64900 }, { "epoch": 0.09120463961978162, "grad_norm": 0.7625258564949036, "learning_rate": 0.0002735070956863847, "loss": 4.8272, "step": 64910 }, { "epoch": 0.0912186905579452, "grad_norm": 0.777725100517273, "learning_rate": 0.0002735492482787691, "loss": 4.7371, "step": 64920 }, { "epoch": 0.09123274149610877, "grad_norm": 0.8185275793075562, "learning_rate": 0.00027359140087115355, "loss": 4.7327, "step": 64930 }, { "epoch": 0.09124679243427235, "grad_norm": 0.7645296454429626, "learning_rate": 0.000273633553463538, "loss": 4.7193, "step": 64940 }, { "epoch": 0.09126084337243592, "grad_norm": 0.7671161890029907, "learning_rate": 0.0002736757060559224, "loss": 4.8717, "step": 64950 }, { "epoch": 0.0912748943105995, "grad_norm": 0.7883368730545044, "learning_rate": 0.00027371785864830684, "loss": 4.8019, "step": 64960 }, { "epoch": 0.09128894524876308, "grad_norm": 0.7796012759208679, "learning_rate": 0.0002737600112406913, "loss": 4.8924, "step": 64970 }, { "epoch": 0.09130299618692665, "grad_norm": 0.7437483072280884, "learning_rate": 0.0002738021638330757, "loss": 4.7397, "step": 64980 }, { "epoch": 0.09131704712509023, "grad_norm": 0.7617444396018982, "learning_rate": 0.00027384431642546014, "loss": 4.7948, "step": 64990 }, { "epoch": 0.0913310980632538, "grad_norm": 0.7927044034004211, "learning_rate": 0.00027388646901784457, "loss": 4.7253, "step": 65000 }, { "epoch": 0.0913451490014174, "grad_norm": 0.8112998604774475, "learning_rate": 0.000273928621610229, "loss": 4.7289, "step": 65010 }, { "epoch": 0.09135919993958097, "grad_norm": 0.7460951805114746, "learning_rate": 0.00027397077420261343, "loss": 4.6933, "step": 65020 }, { "epoch": 0.09137325087774455, "grad_norm": 0.7493196129798889, "learning_rate": 0.00027401292679499786, "loss": 4.8195, "step": 65030 }, { "epoch": 0.09138730181590812, "grad_norm": 0.7835866808891296, "learning_rate": 0.0002740550793873823, "loss": 4.7021, "step": 65040 }, { "epoch": 0.0914013527540717, "grad_norm": 0.7718499302864075, "learning_rate": 0.0002740972319797667, "loss": 4.8433, "step": 65050 }, { "epoch": 0.09141540369223528, "grad_norm": 0.7729384899139404, "learning_rate": 0.00027413938457215115, "loss": 4.6984, "step": 65060 }, { "epoch": 0.09142945463039885, "grad_norm": 0.7586498260498047, "learning_rate": 0.0002741815371645356, "loss": 4.836, "step": 65070 }, { "epoch": 0.09144350556856243, "grad_norm": 0.8164992332458496, "learning_rate": 0.00027422368975692, "loss": 4.7512, "step": 65080 }, { "epoch": 0.091457556506726, "grad_norm": 0.7848567962646484, "learning_rate": 0.00027426584234930445, "loss": 4.8578, "step": 65090 }, { "epoch": 0.09147160744488958, "grad_norm": 0.7660877704620361, "learning_rate": 0.0002743079949416889, "loss": 4.8616, "step": 65100 }, { "epoch": 0.09148565838305317, "grad_norm": 0.7506645321846008, "learning_rate": 0.0002743501475340733, "loss": 4.6539, "step": 65110 }, { "epoch": 0.09149970932121675, "grad_norm": 0.7743892669677734, "learning_rate": 0.00027439230012645774, "loss": 4.7529, "step": 65120 }, { "epoch": 0.09151376025938032, "grad_norm": 0.7679518461227417, "learning_rate": 0.0002744344527188422, "loss": 4.768, "step": 65130 }, { "epoch": 0.0915278111975439, "grad_norm": 0.7692804336547852, "learning_rate": 0.0002744766053112266, "loss": 4.8296, "step": 65140 }, { "epoch": 0.09154186213570747, "grad_norm": 0.7586258053779602, "learning_rate": 0.00027451875790361104, "loss": 4.7978, "step": 65150 }, { "epoch": 0.09155591307387105, "grad_norm": 0.799343466758728, "learning_rate": 0.00027456091049599547, "loss": 4.6653, "step": 65160 }, { "epoch": 0.09156996401203463, "grad_norm": 0.8051402568817139, "learning_rate": 0.0002746030630883799, "loss": 4.8429, "step": 65170 }, { "epoch": 0.0915840149501982, "grad_norm": 0.7682332396507263, "learning_rate": 0.00027464521568076433, "loss": 4.7701, "step": 65180 }, { "epoch": 0.09159806588836178, "grad_norm": 0.7532578110694885, "learning_rate": 0.0002746873682731488, "loss": 4.9643, "step": 65190 }, { "epoch": 0.09161211682652536, "grad_norm": 0.7631083130836487, "learning_rate": 0.0002747295208655332, "loss": 4.8076, "step": 65200 }, { "epoch": 0.09162616776468893, "grad_norm": 0.7771109342575073, "learning_rate": 0.0002747716734579176, "loss": 4.894, "step": 65210 }, { "epoch": 0.09164021870285252, "grad_norm": 0.8161442279815674, "learning_rate": 0.00027481382605030205, "loss": 4.7906, "step": 65220 }, { "epoch": 0.0916542696410161, "grad_norm": 0.7733320593833923, "learning_rate": 0.0002748559786426865, "loss": 4.7649, "step": 65230 }, { "epoch": 0.09166832057917967, "grad_norm": 0.7682567238807678, "learning_rate": 0.0002748981312350709, "loss": 4.6973, "step": 65240 }, { "epoch": 0.09168237151734325, "grad_norm": 0.7778883576393127, "learning_rate": 0.0002749402838274554, "loss": 4.7464, "step": 65250 }, { "epoch": 0.09169642245550683, "grad_norm": 0.7723760008811951, "learning_rate": 0.0002749824364198398, "loss": 4.8092, "step": 65260 }, { "epoch": 0.0917104733936704, "grad_norm": 0.7590900659561157, "learning_rate": 0.0002750245890122242, "loss": 4.7138, "step": 65270 }, { "epoch": 0.09172452433183398, "grad_norm": 0.7957313656806946, "learning_rate": 0.00027506674160460864, "loss": 4.7174, "step": 65280 }, { "epoch": 0.09173857526999755, "grad_norm": 0.7801516056060791, "learning_rate": 0.00027510889419699307, "loss": 4.8837, "step": 65290 }, { "epoch": 0.09175262620816113, "grad_norm": 0.7635098099708557, "learning_rate": 0.0002751510467893775, "loss": 4.7774, "step": 65300 }, { "epoch": 0.09176667714632471, "grad_norm": 0.8163349032402039, "learning_rate": 0.000275193199381762, "loss": 4.902, "step": 65310 }, { "epoch": 0.0917807280844883, "grad_norm": 0.7679600715637207, "learning_rate": 0.0002752353519741464, "loss": 4.8026, "step": 65320 }, { "epoch": 0.09179477902265187, "grad_norm": 0.794219970703125, "learning_rate": 0.0002752775045665308, "loss": 4.8016, "step": 65330 }, { "epoch": 0.09180882996081545, "grad_norm": 0.8230299353599548, "learning_rate": 0.00027531965715891523, "loss": 4.7612, "step": 65340 }, { "epoch": 0.09182288089897903, "grad_norm": 0.7828049659729004, "learning_rate": 0.00027536180975129966, "loss": 4.8781, "step": 65350 }, { "epoch": 0.0918369318371426, "grad_norm": 0.7481945753097534, "learning_rate": 0.0002754039623436841, "loss": 4.818, "step": 65360 }, { "epoch": 0.09185098277530618, "grad_norm": 0.8005225658416748, "learning_rate": 0.0002754461149360686, "loss": 4.7186, "step": 65370 }, { "epoch": 0.09186503371346975, "grad_norm": 0.8023760318756104, "learning_rate": 0.000275488267528453, "loss": 4.7263, "step": 65380 }, { "epoch": 0.09187908465163333, "grad_norm": 0.8292908072471619, "learning_rate": 0.0002755304201208374, "loss": 4.9251, "step": 65390 }, { "epoch": 0.0918931355897969, "grad_norm": 0.7676294445991516, "learning_rate": 0.0002755725727132218, "loss": 4.7774, "step": 65400 }, { "epoch": 0.09190718652796048, "grad_norm": 0.7877136468887329, "learning_rate": 0.00027561472530560625, "loss": 4.8057, "step": 65410 }, { "epoch": 0.09192123746612407, "grad_norm": 0.7636573314666748, "learning_rate": 0.0002756568778979907, "loss": 4.9751, "step": 65420 }, { "epoch": 0.09193528840428765, "grad_norm": 0.7771406173706055, "learning_rate": 0.00027569903049037516, "loss": 4.785, "step": 65430 }, { "epoch": 0.09194933934245123, "grad_norm": 0.7473503351211548, "learning_rate": 0.0002757411830827596, "loss": 4.7884, "step": 65440 }, { "epoch": 0.0919633902806148, "grad_norm": 0.7925262451171875, "learning_rate": 0.000275783335675144, "loss": 4.7761, "step": 65450 }, { "epoch": 0.09197744121877838, "grad_norm": 0.7829357385635376, "learning_rate": 0.0002758254882675284, "loss": 4.7649, "step": 65460 }, { "epoch": 0.09199149215694195, "grad_norm": 0.8279735445976257, "learning_rate": 0.00027586764085991283, "loss": 4.7296, "step": 65470 }, { "epoch": 0.09200554309510553, "grad_norm": 0.7664202451705933, "learning_rate": 0.00027590979345229726, "loss": 4.8287, "step": 65480 }, { "epoch": 0.0920195940332691, "grad_norm": 0.7685601711273193, "learning_rate": 0.00027595194604468175, "loss": 4.7864, "step": 65490 }, { "epoch": 0.09203364497143268, "grad_norm": 0.784343421459198, "learning_rate": 0.0002759940986370662, "loss": 4.817, "step": 65500 }, { "epoch": 0.09204769590959626, "grad_norm": 0.8135119080543518, "learning_rate": 0.0002760362512294506, "loss": 4.8879, "step": 65510 }, { "epoch": 0.09206174684775985, "grad_norm": 0.7757298350334167, "learning_rate": 0.000276078403821835, "loss": 4.793, "step": 65520 }, { "epoch": 0.09207579778592342, "grad_norm": 0.8407139778137207, "learning_rate": 0.0002761205564142194, "loss": 4.8615, "step": 65530 }, { "epoch": 0.092089848724087, "grad_norm": 0.7666981220245361, "learning_rate": 0.00027616270900660385, "loss": 4.721, "step": 65540 }, { "epoch": 0.09210389966225058, "grad_norm": 0.781695544719696, "learning_rate": 0.00027620486159898834, "loss": 4.7766, "step": 65550 }, { "epoch": 0.09211795060041415, "grad_norm": 0.7683060169219971, "learning_rate": 0.00027624701419137277, "loss": 4.8538, "step": 65560 }, { "epoch": 0.09213200153857773, "grad_norm": 0.7697662115097046, "learning_rate": 0.0002762891667837572, "loss": 4.8836, "step": 65570 }, { "epoch": 0.0921460524767413, "grad_norm": 0.7881425023078918, "learning_rate": 0.00027633131937614163, "loss": 4.7934, "step": 65580 }, { "epoch": 0.09216010341490488, "grad_norm": 0.7787525653839111, "learning_rate": 0.000276373471968526, "loss": 4.8257, "step": 65590 }, { "epoch": 0.09217415435306846, "grad_norm": 0.757805585861206, "learning_rate": 0.00027641562456091044, "loss": 4.8827, "step": 65600 }, { "epoch": 0.09218820529123203, "grad_norm": 0.7534720301628113, "learning_rate": 0.0002764577771532949, "loss": 4.7524, "step": 65610 }, { "epoch": 0.09220225622939561, "grad_norm": 0.7800748348236084, "learning_rate": 0.00027649992974567935, "loss": 4.8452, "step": 65620 }, { "epoch": 0.0922163071675592, "grad_norm": 0.7849797010421753, "learning_rate": 0.0002765420823380638, "loss": 4.7981, "step": 65630 }, { "epoch": 0.09223035810572278, "grad_norm": 0.7599306106567383, "learning_rate": 0.0002765842349304482, "loss": 4.7777, "step": 65640 }, { "epoch": 0.09224440904388635, "grad_norm": 0.789528489112854, "learning_rate": 0.0002766263875228326, "loss": 4.7558, "step": 65650 }, { "epoch": 0.09225845998204993, "grad_norm": 0.7765876650810242, "learning_rate": 0.0002766685401152171, "loss": 4.8782, "step": 65660 }, { "epoch": 0.0922725109202135, "grad_norm": 0.7817826271057129, "learning_rate": 0.0002767106927076015, "loss": 4.8516, "step": 65670 }, { "epoch": 0.09228656185837708, "grad_norm": 0.7477931380271912, "learning_rate": 0.00027675284529998594, "loss": 4.9344, "step": 65680 }, { "epoch": 0.09230061279654066, "grad_norm": 0.7491680979728699, "learning_rate": 0.00027679499789237037, "loss": 4.8218, "step": 65690 }, { "epoch": 0.09231466373470423, "grad_norm": 0.7636551260948181, "learning_rate": 0.0002768371504847548, "loss": 4.8828, "step": 65700 }, { "epoch": 0.09232871467286781, "grad_norm": 0.7482210993766785, "learning_rate": 0.00027687930307713923, "loss": 4.7725, "step": 65710 }, { "epoch": 0.09234276561103139, "grad_norm": 0.7747960090637207, "learning_rate": 0.00027692145566952367, "loss": 4.7205, "step": 65720 }, { "epoch": 0.09235681654919498, "grad_norm": 0.7718808650970459, "learning_rate": 0.0002769636082619081, "loss": 4.7258, "step": 65730 }, { "epoch": 0.09237086748735855, "grad_norm": 0.7397331595420837, "learning_rate": 0.00027700576085429253, "loss": 4.7103, "step": 65740 }, { "epoch": 0.09238491842552213, "grad_norm": 0.7860981822013855, "learning_rate": 0.00027704791344667696, "loss": 4.752, "step": 65750 }, { "epoch": 0.0923989693636857, "grad_norm": 0.7629745006561279, "learning_rate": 0.0002770900660390614, "loss": 4.8679, "step": 65760 }, { "epoch": 0.09241302030184928, "grad_norm": 0.8002822995185852, "learning_rate": 0.0002771322186314458, "loss": 4.6234, "step": 65770 }, { "epoch": 0.09242707124001286, "grad_norm": 0.7651919722557068, "learning_rate": 0.00027717437122383025, "loss": 4.8368, "step": 65780 }, { "epoch": 0.09244112217817643, "grad_norm": 0.7951807379722595, "learning_rate": 0.0002772165238162147, "loss": 4.7385, "step": 65790 }, { "epoch": 0.09245517311634001, "grad_norm": 0.7759065628051758, "learning_rate": 0.0002772586764085991, "loss": 4.8701, "step": 65800 }, { "epoch": 0.09246922405450358, "grad_norm": 0.753464937210083, "learning_rate": 0.00027730082900098355, "loss": 4.7545, "step": 65810 }, { "epoch": 0.09248327499266716, "grad_norm": 0.7959068417549133, "learning_rate": 0.000277342981593368, "loss": 4.8609, "step": 65820 }, { "epoch": 0.09249732593083075, "grad_norm": 0.7836405038833618, "learning_rate": 0.0002773851341857524, "loss": 4.768, "step": 65830 }, { "epoch": 0.09251137686899433, "grad_norm": 0.7547979950904846, "learning_rate": 0.00027742728677813684, "loss": 4.7868, "step": 65840 }, { "epoch": 0.0925254278071579, "grad_norm": 0.793201744556427, "learning_rate": 0.00027746943937052127, "loss": 4.7997, "step": 65850 }, { "epoch": 0.09253947874532148, "grad_norm": 0.7729663252830505, "learning_rate": 0.0002775115919629057, "loss": 4.7825, "step": 65860 }, { "epoch": 0.09255352968348506, "grad_norm": 0.8203871846199036, "learning_rate": 0.00027755374455529013, "loss": 4.7756, "step": 65870 }, { "epoch": 0.09256758062164863, "grad_norm": 0.8373938798904419, "learning_rate": 0.00027759589714767456, "loss": 4.7466, "step": 65880 }, { "epoch": 0.09258163155981221, "grad_norm": 0.77327960729599, "learning_rate": 0.000277638049740059, "loss": 4.812, "step": 65890 }, { "epoch": 0.09259568249797578, "grad_norm": 0.7683082818984985, "learning_rate": 0.0002776802023324434, "loss": 4.8128, "step": 65900 }, { "epoch": 0.09260973343613936, "grad_norm": 0.7626813650131226, "learning_rate": 0.00027772235492482786, "loss": 4.8501, "step": 65910 }, { "epoch": 0.09262378437430294, "grad_norm": 0.7827718257904053, "learning_rate": 0.0002777645075172123, "loss": 4.7699, "step": 65920 }, { "epoch": 0.09263783531246651, "grad_norm": 0.7622679471969604, "learning_rate": 0.0002778066601095967, "loss": 4.6728, "step": 65930 }, { "epoch": 0.0926518862506301, "grad_norm": 0.7683987021446228, "learning_rate": 0.00027784881270198115, "loss": 4.9069, "step": 65940 }, { "epoch": 0.09266593718879368, "grad_norm": 0.7734614610671997, "learning_rate": 0.0002778909652943656, "loss": 4.8696, "step": 65950 }, { "epoch": 0.09267998812695726, "grad_norm": 1.3228286504745483, "learning_rate": 0.00027793311788675, "loss": 4.7319, "step": 65960 }, { "epoch": 0.09269403906512083, "grad_norm": 0.7824644446372986, "learning_rate": 0.00027797527047913445, "loss": 4.7125, "step": 65970 }, { "epoch": 0.09270809000328441, "grad_norm": 0.7849623560905457, "learning_rate": 0.0002780174230715189, "loss": 4.8618, "step": 65980 }, { "epoch": 0.09272214094144798, "grad_norm": 0.7580630779266357, "learning_rate": 0.0002780595756639033, "loss": 4.7867, "step": 65990 }, { "epoch": 0.09273619187961156, "grad_norm": 0.7988112568855286, "learning_rate": 0.00027810172825628774, "loss": 4.7798, "step": 66000 }, { "epoch": 0.09275024281777514, "grad_norm": 0.7714882493019104, "learning_rate": 0.00027814388084867217, "loss": 4.8462, "step": 66010 }, { "epoch": 0.09276429375593871, "grad_norm": 0.7701422572135925, "learning_rate": 0.0002781860334410566, "loss": 4.7374, "step": 66020 }, { "epoch": 0.09277834469410229, "grad_norm": 0.7310877442359924, "learning_rate": 0.00027822818603344103, "loss": 4.8362, "step": 66030 }, { "epoch": 0.09279239563226588, "grad_norm": 0.7519302368164062, "learning_rate": 0.00027827033862582546, "loss": 4.8217, "step": 66040 }, { "epoch": 0.09280644657042945, "grad_norm": 0.7439327239990234, "learning_rate": 0.0002783124912182099, "loss": 4.899, "step": 66050 }, { "epoch": 0.09282049750859303, "grad_norm": 0.7650001645088196, "learning_rate": 0.0002783546438105943, "loss": 4.8668, "step": 66060 }, { "epoch": 0.0928345484467566, "grad_norm": 0.7906643152236938, "learning_rate": 0.00027839679640297876, "loss": 4.85, "step": 66070 }, { "epoch": 0.09284859938492018, "grad_norm": 0.772290825843811, "learning_rate": 0.0002784389489953632, "loss": 4.8879, "step": 66080 }, { "epoch": 0.09286265032308376, "grad_norm": 0.7907188534736633, "learning_rate": 0.0002784811015877476, "loss": 4.7498, "step": 66090 }, { "epoch": 0.09287670126124734, "grad_norm": 0.7830239534378052, "learning_rate": 0.00027852325418013205, "loss": 4.8478, "step": 66100 }, { "epoch": 0.09289075219941091, "grad_norm": 0.7706902027130127, "learning_rate": 0.0002785654067725165, "loss": 4.8905, "step": 66110 }, { "epoch": 0.09290480313757449, "grad_norm": 0.7300134301185608, "learning_rate": 0.0002786075593649009, "loss": 4.7552, "step": 66120 }, { "epoch": 0.09291885407573806, "grad_norm": 0.7618225812911987, "learning_rate": 0.00027864971195728534, "loss": 4.7571, "step": 66130 }, { "epoch": 0.09293290501390165, "grad_norm": 0.8037118911743164, "learning_rate": 0.0002786918645496698, "loss": 4.749, "step": 66140 }, { "epoch": 0.09294695595206523, "grad_norm": 0.7755313515663147, "learning_rate": 0.0002787340171420542, "loss": 4.8419, "step": 66150 }, { "epoch": 0.0929610068902288, "grad_norm": 0.753610372543335, "learning_rate": 0.00027877616973443864, "loss": 4.8936, "step": 66160 }, { "epoch": 0.09297505782839238, "grad_norm": 0.8028730750083923, "learning_rate": 0.00027881832232682307, "loss": 4.8275, "step": 66170 }, { "epoch": 0.09298910876655596, "grad_norm": 0.7614244818687439, "learning_rate": 0.0002788604749192075, "loss": 4.8163, "step": 66180 }, { "epoch": 0.09300315970471953, "grad_norm": 0.7953831553459167, "learning_rate": 0.00027890262751159193, "loss": 4.9137, "step": 66190 }, { "epoch": 0.09301721064288311, "grad_norm": 0.7856250405311584, "learning_rate": 0.00027894478010397636, "loss": 4.8262, "step": 66200 }, { "epoch": 0.09303126158104669, "grad_norm": 0.749713659286499, "learning_rate": 0.0002789869326963608, "loss": 4.8025, "step": 66210 }, { "epoch": 0.09304531251921026, "grad_norm": 0.7905395030975342, "learning_rate": 0.0002790290852887452, "loss": 4.8134, "step": 66220 }, { "epoch": 0.09305936345737384, "grad_norm": 0.7537453174591064, "learning_rate": 0.00027907123788112966, "loss": 4.6562, "step": 66230 }, { "epoch": 0.09307341439553742, "grad_norm": 0.7715054154396057, "learning_rate": 0.0002791133904735141, "loss": 4.8059, "step": 66240 }, { "epoch": 0.093087465333701, "grad_norm": 0.7600757479667664, "learning_rate": 0.0002791555430658985, "loss": 4.7877, "step": 66250 }, { "epoch": 0.09310151627186458, "grad_norm": 0.7605622410774231, "learning_rate": 0.00027919769565828295, "loss": 4.8044, "step": 66260 }, { "epoch": 0.09311556721002816, "grad_norm": 0.7521841526031494, "learning_rate": 0.0002792398482506674, "loss": 4.771, "step": 66270 }, { "epoch": 0.09312961814819173, "grad_norm": 0.751024067401886, "learning_rate": 0.0002792820008430518, "loss": 4.7984, "step": 66280 }, { "epoch": 0.09314366908635531, "grad_norm": 0.7688322067260742, "learning_rate": 0.0002793241534354363, "loss": 4.8356, "step": 66290 }, { "epoch": 0.09315772002451889, "grad_norm": 0.7705650329589844, "learning_rate": 0.0002793663060278207, "loss": 4.7877, "step": 66300 }, { "epoch": 0.09317177096268246, "grad_norm": 0.769523024559021, "learning_rate": 0.0002794084586202051, "loss": 4.8034, "step": 66310 }, { "epoch": 0.09318582190084604, "grad_norm": 0.7682741284370422, "learning_rate": 0.00027945061121258954, "loss": 4.6817, "step": 66320 }, { "epoch": 0.09319987283900961, "grad_norm": 0.765863835811615, "learning_rate": 0.00027949276380497397, "loss": 4.7184, "step": 66330 }, { "epoch": 0.09321392377717319, "grad_norm": 0.7482745051383972, "learning_rate": 0.0002795349163973584, "loss": 4.766, "step": 66340 }, { "epoch": 0.09322797471533678, "grad_norm": 0.7587314248085022, "learning_rate": 0.0002795770689897429, "loss": 4.8509, "step": 66350 }, { "epoch": 0.09324202565350036, "grad_norm": 0.7830142378807068, "learning_rate": 0.00027961922158212726, "loss": 4.6339, "step": 66360 }, { "epoch": 0.09325607659166393, "grad_norm": 0.7715080380439758, "learning_rate": 0.0002796613741745117, "loss": 4.7213, "step": 66370 }, { "epoch": 0.09327012752982751, "grad_norm": 0.7741284966468811, "learning_rate": 0.0002797035267668961, "loss": 4.7524, "step": 66380 }, { "epoch": 0.09328417846799109, "grad_norm": 0.7751002907752991, "learning_rate": 0.00027974567935928055, "loss": 4.8013, "step": 66390 }, { "epoch": 0.09329822940615466, "grad_norm": 0.8361513614654541, "learning_rate": 0.000279787831951665, "loss": 4.8237, "step": 66400 }, { "epoch": 0.09331228034431824, "grad_norm": 0.7796192169189453, "learning_rate": 0.00027982998454404947, "loss": 4.8101, "step": 66410 }, { "epoch": 0.09332633128248181, "grad_norm": 0.8053950071334839, "learning_rate": 0.0002798721371364339, "loss": 4.6938, "step": 66420 }, { "epoch": 0.09334038222064539, "grad_norm": 0.8083071112632751, "learning_rate": 0.0002799142897288183, "loss": 4.8032, "step": 66430 }, { "epoch": 0.09335443315880897, "grad_norm": 0.8004527688026428, "learning_rate": 0.0002799564423212027, "loss": 4.7322, "step": 66440 }, { "epoch": 0.09336848409697256, "grad_norm": 0.7978747487068176, "learning_rate": 0.00027999859491358714, "loss": 4.8168, "step": 66450 }, { "epoch": 0.09338253503513613, "grad_norm": 0.8221896290779114, "learning_rate": 0.00028004074750597157, "loss": 4.5916, "step": 66460 }, { "epoch": 0.09339658597329971, "grad_norm": 0.8084535002708435, "learning_rate": 0.00028008290009835606, "loss": 4.8748, "step": 66470 }, { "epoch": 0.09341063691146329, "grad_norm": 0.8202959895133972, "learning_rate": 0.0002801250526907405, "loss": 4.7996, "step": 66480 }, { "epoch": 0.09342468784962686, "grad_norm": 0.7502172589302063, "learning_rate": 0.00028016720528312487, "loss": 4.9115, "step": 66490 }, { "epoch": 0.09343873878779044, "grad_norm": 0.7529955506324768, "learning_rate": 0.0002802093578755093, "loss": 4.8029, "step": 66500 }, { "epoch": 0.09345278972595401, "grad_norm": 0.7608353495597839, "learning_rate": 0.00028025151046789373, "loss": 4.7967, "step": 66510 }, { "epoch": 0.09346684066411759, "grad_norm": 0.8318527936935425, "learning_rate": 0.00028029366306027816, "loss": 4.7791, "step": 66520 }, { "epoch": 0.09348089160228117, "grad_norm": 0.7857316732406616, "learning_rate": 0.00028033581565266264, "loss": 4.7374, "step": 66530 }, { "epoch": 0.09349494254044474, "grad_norm": 0.9135415554046631, "learning_rate": 0.0002803779682450471, "loss": 4.761, "step": 66540 }, { "epoch": 0.09350899347860832, "grad_norm": 0.7974305748939514, "learning_rate": 0.00028042012083743145, "loss": 4.6771, "step": 66550 }, { "epoch": 0.09352304441677191, "grad_norm": 0.7421619892120361, "learning_rate": 0.0002804622734298159, "loss": 4.7538, "step": 66560 }, { "epoch": 0.09353709535493548, "grad_norm": 0.796992838382721, "learning_rate": 0.0002805044260222003, "loss": 4.8697, "step": 66570 }, { "epoch": 0.09355114629309906, "grad_norm": 1.0140798091888428, "learning_rate": 0.00028054657861458475, "loss": 4.8219, "step": 66580 }, { "epoch": 0.09356519723126264, "grad_norm": 0.8030684590339661, "learning_rate": 0.00028058873120696923, "loss": 4.7397, "step": 66590 }, { "epoch": 0.09357924816942621, "grad_norm": 0.8484904766082764, "learning_rate": 0.00028063088379935366, "loss": 4.7481, "step": 66600 }, { "epoch": 0.09359329910758979, "grad_norm": 0.7419153451919556, "learning_rate": 0.0002806730363917381, "loss": 4.8027, "step": 66610 }, { "epoch": 0.09360735004575337, "grad_norm": 0.7505769729614258, "learning_rate": 0.00028071518898412247, "loss": 4.7095, "step": 66620 }, { "epoch": 0.09362140098391694, "grad_norm": 0.7895868420600891, "learning_rate": 0.0002807573415765069, "loss": 4.7332, "step": 66630 }, { "epoch": 0.09363545192208052, "grad_norm": 0.7834476232528687, "learning_rate": 0.00028079949416889133, "loss": 4.6707, "step": 66640 }, { "epoch": 0.0936495028602441, "grad_norm": 0.7624843716621399, "learning_rate": 0.0002808416467612758, "loss": 4.7925, "step": 66650 }, { "epoch": 0.09366355379840768, "grad_norm": 0.8170449733734131, "learning_rate": 0.00028088379935366025, "loss": 4.7729, "step": 66660 }, { "epoch": 0.09367760473657126, "grad_norm": 0.8319101929664612, "learning_rate": 0.0002809259519460447, "loss": 4.933, "step": 66670 }, { "epoch": 0.09369165567473484, "grad_norm": 0.7560040354728699, "learning_rate": 0.00028096810453842906, "loss": 4.7946, "step": 66680 }, { "epoch": 0.09370570661289841, "grad_norm": 0.7837918996810913, "learning_rate": 0.0002810102571308135, "loss": 4.7895, "step": 66690 }, { "epoch": 0.09371975755106199, "grad_norm": 0.7507804036140442, "learning_rate": 0.0002810524097231979, "loss": 4.7504, "step": 66700 }, { "epoch": 0.09373380848922556, "grad_norm": 0.771360456943512, "learning_rate": 0.0002810945623155824, "loss": 4.8561, "step": 66710 }, { "epoch": 0.09374785942738914, "grad_norm": 0.7771738171577454, "learning_rate": 0.00028113671490796684, "loss": 4.874, "step": 66720 }, { "epoch": 0.09376191036555272, "grad_norm": 0.7817769646644592, "learning_rate": 0.00028117886750035127, "loss": 4.8408, "step": 66730 }, { "epoch": 0.0937759613037163, "grad_norm": 0.9140200018882751, "learning_rate": 0.0002812210200927357, "loss": 4.8285, "step": 66740 }, { "epoch": 0.09379001224187987, "grad_norm": 0.7840200662612915, "learning_rate": 0.0002812631726851201, "loss": 4.8193, "step": 66750 }, { "epoch": 0.09380406318004346, "grad_norm": 0.7683082818984985, "learning_rate": 0.0002813053252775045, "loss": 4.5404, "step": 66760 }, { "epoch": 0.09381811411820704, "grad_norm": 0.7743772268295288, "learning_rate": 0.000281347477869889, "loss": 4.9332, "step": 66770 }, { "epoch": 0.09383216505637061, "grad_norm": 0.7601823210716248, "learning_rate": 0.0002813896304622734, "loss": 4.8454, "step": 66780 }, { "epoch": 0.09384621599453419, "grad_norm": 0.7685533761978149, "learning_rate": 0.00028143178305465786, "loss": 4.8704, "step": 66790 }, { "epoch": 0.09386026693269776, "grad_norm": 0.7852855324745178, "learning_rate": 0.0002814739356470423, "loss": 4.7243, "step": 66800 }, { "epoch": 0.09387431787086134, "grad_norm": 0.7344205379486084, "learning_rate": 0.00028151608823942666, "loss": 4.6807, "step": 66810 }, { "epoch": 0.09388836880902492, "grad_norm": 0.7859446406364441, "learning_rate": 0.00028155824083181115, "loss": 4.6914, "step": 66820 }, { "epoch": 0.09390241974718849, "grad_norm": 0.769825279712677, "learning_rate": 0.0002816003934241956, "loss": 4.795, "step": 66830 }, { "epoch": 0.09391647068535207, "grad_norm": 0.7718509435653687, "learning_rate": 0.00028164254601658, "loss": 4.7383, "step": 66840 }, { "epoch": 0.09393052162351564, "grad_norm": 0.7945657968521118, "learning_rate": 0.00028168469860896444, "loss": 4.8164, "step": 66850 }, { "epoch": 0.09394457256167922, "grad_norm": 0.9426447749137878, "learning_rate": 0.0002817268512013489, "loss": 4.674, "step": 66860 }, { "epoch": 0.09395862349984281, "grad_norm": 0.7499502301216125, "learning_rate": 0.0002817690037937333, "loss": 4.7949, "step": 66870 }, { "epoch": 0.09397267443800639, "grad_norm": 0.7673668265342712, "learning_rate": 0.00028181115638611774, "loss": 4.8771, "step": 66880 }, { "epoch": 0.09398672537616996, "grad_norm": 0.7585043907165527, "learning_rate": 0.00028185330897850217, "loss": 4.8944, "step": 66890 }, { "epoch": 0.09400077631433354, "grad_norm": 0.7782924175262451, "learning_rate": 0.0002818954615708866, "loss": 4.7822, "step": 66900 }, { "epoch": 0.09401482725249712, "grad_norm": 0.7514545917510986, "learning_rate": 0.00028193761416327103, "loss": 4.6599, "step": 66910 }, { "epoch": 0.09402887819066069, "grad_norm": 0.7437326312065125, "learning_rate": 0.00028197976675565546, "loss": 4.8388, "step": 66920 }, { "epoch": 0.09404292912882427, "grad_norm": 0.7777213454246521, "learning_rate": 0.0002820219193480399, "loss": 4.7727, "step": 66930 }, { "epoch": 0.09405698006698784, "grad_norm": 0.7418848276138306, "learning_rate": 0.0002820640719404243, "loss": 4.7165, "step": 66940 }, { "epoch": 0.09407103100515142, "grad_norm": 0.7748672962188721, "learning_rate": 0.00028210622453280875, "loss": 4.7824, "step": 66950 }, { "epoch": 0.094085081943315, "grad_norm": 0.7600992321968079, "learning_rate": 0.0002821483771251932, "loss": 4.8642, "step": 66960 }, { "epoch": 0.09409913288147859, "grad_norm": 0.7563418745994568, "learning_rate": 0.0002821905297175776, "loss": 4.8016, "step": 66970 }, { "epoch": 0.09411318381964216, "grad_norm": 0.7561931014060974, "learning_rate": 0.00028223268230996205, "loss": 4.8931, "step": 66980 }, { "epoch": 0.09412723475780574, "grad_norm": 0.8095070719718933, "learning_rate": 0.0002822748349023465, "loss": 4.6954, "step": 66990 }, { "epoch": 0.09414128569596932, "grad_norm": 0.7557608485221863, "learning_rate": 0.0002823169874947309, "loss": 4.9589, "step": 67000 }, { "epoch": 0.09415533663413289, "grad_norm": 0.7877722978591919, "learning_rate": 0.00028235914008711534, "loss": 4.8427, "step": 67010 }, { "epoch": 0.09416938757229647, "grad_norm": 0.7584842443466187, "learning_rate": 0.00028240129267949977, "loss": 4.8917, "step": 67020 }, { "epoch": 0.09418343851046004, "grad_norm": 0.7679328322410583, "learning_rate": 0.0002824434452718842, "loss": 4.684, "step": 67030 }, { "epoch": 0.09419748944862362, "grad_norm": 0.7546048760414124, "learning_rate": 0.00028248559786426863, "loss": 4.6522, "step": 67040 }, { "epoch": 0.0942115403867872, "grad_norm": 0.7862738966941833, "learning_rate": 0.00028252775045665307, "loss": 4.7412, "step": 67050 }, { "epoch": 0.09422559132495077, "grad_norm": 0.7737675905227661, "learning_rate": 0.0002825699030490375, "loss": 4.7039, "step": 67060 }, { "epoch": 0.09423964226311436, "grad_norm": 0.7403519153594971, "learning_rate": 0.00028261205564142193, "loss": 4.8726, "step": 67070 }, { "epoch": 0.09425369320127794, "grad_norm": 0.7542514204978943, "learning_rate": 0.00028265420823380636, "loss": 4.8402, "step": 67080 }, { "epoch": 0.09426774413944151, "grad_norm": 0.7455792427062988, "learning_rate": 0.0002826963608261908, "loss": 4.8167, "step": 67090 }, { "epoch": 0.09428179507760509, "grad_norm": 0.7721514701843262, "learning_rate": 0.0002827385134185752, "loss": 4.8949, "step": 67100 }, { "epoch": 0.09429584601576867, "grad_norm": 0.7985861301422119, "learning_rate": 0.00028278066601095965, "loss": 4.8274, "step": 67110 }, { "epoch": 0.09430989695393224, "grad_norm": 0.7794066071510315, "learning_rate": 0.0002828228186033441, "loss": 4.7993, "step": 67120 }, { "epoch": 0.09432394789209582, "grad_norm": 0.7830890417098999, "learning_rate": 0.0002828649711957285, "loss": 4.7902, "step": 67130 }, { "epoch": 0.0943379988302594, "grad_norm": 0.7577866911888123, "learning_rate": 0.00028290712378811295, "loss": 4.8907, "step": 67140 }, { "epoch": 0.09435204976842297, "grad_norm": 0.8127627968788147, "learning_rate": 0.0002829492763804974, "loss": 4.7554, "step": 67150 }, { "epoch": 0.09436610070658655, "grad_norm": 0.7625774145126343, "learning_rate": 0.0002829914289728818, "loss": 4.7444, "step": 67160 }, { "epoch": 0.09438015164475012, "grad_norm": 0.7992131114006042, "learning_rate": 0.00028303358156526624, "loss": 4.8071, "step": 67170 }, { "epoch": 0.09439420258291371, "grad_norm": 0.7639414668083191, "learning_rate": 0.00028307573415765067, "loss": 4.7122, "step": 67180 }, { "epoch": 0.09440825352107729, "grad_norm": 0.7498297691345215, "learning_rate": 0.0002831178867500351, "loss": 4.7293, "step": 67190 }, { "epoch": 0.09442230445924087, "grad_norm": 0.7553703188896179, "learning_rate": 0.00028316003934241953, "loss": 4.9202, "step": 67200 }, { "epoch": 0.09443635539740444, "grad_norm": 0.7734369039535522, "learning_rate": 0.00028320219193480396, "loss": 4.7912, "step": 67210 }, { "epoch": 0.09445040633556802, "grad_norm": 0.7736976146697998, "learning_rate": 0.0002832443445271884, "loss": 4.8836, "step": 67220 }, { "epoch": 0.0944644572737316, "grad_norm": 0.7649384140968323, "learning_rate": 0.0002832864971195728, "loss": 4.8357, "step": 67230 }, { "epoch": 0.09447850821189517, "grad_norm": 0.7400626540184021, "learning_rate": 0.00028332864971195726, "loss": 4.8263, "step": 67240 }, { "epoch": 0.09449255915005875, "grad_norm": 0.8864115476608276, "learning_rate": 0.0002833708023043417, "loss": 4.7784, "step": 67250 }, { "epoch": 0.09450661008822232, "grad_norm": 0.7719464302062988, "learning_rate": 0.0002834129548967261, "loss": 4.927, "step": 67260 }, { "epoch": 0.0945206610263859, "grad_norm": 0.7647191286087036, "learning_rate": 0.00028345510748911055, "loss": 4.776, "step": 67270 }, { "epoch": 0.09453471196454949, "grad_norm": 0.7322827577590942, "learning_rate": 0.000283497260081495, "loss": 4.8069, "step": 67280 }, { "epoch": 0.09454876290271307, "grad_norm": 0.7592594623565674, "learning_rate": 0.0002835394126738794, "loss": 4.7898, "step": 67290 }, { "epoch": 0.09456281384087664, "grad_norm": 0.7448276281356812, "learning_rate": 0.00028358156526626384, "loss": 4.8365, "step": 67300 }, { "epoch": 0.09457686477904022, "grad_norm": 0.786535382270813, "learning_rate": 0.0002836237178586483, "loss": 4.7254, "step": 67310 }, { "epoch": 0.0945909157172038, "grad_norm": 0.7626109719276428, "learning_rate": 0.0002836658704510327, "loss": 4.9295, "step": 67320 }, { "epoch": 0.09460496665536737, "grad_norm": 0.8014910817146301, "learning_rate": 0.00028370802304341714, "loss": 4.8296, "step": 67330 }, { "epoch": 0.09461901759353095, "grad_norm": 0.7696790099143982, "learning_rate": 0.00028375017563580157, "loss": 4.7296, "step": 67340 }, { "epoch": 0.09463306853169452, "grad_norm": 0.770531952381134, "learning_rate": 0.000283792328228186, "loss": 4.8663, "step": 67350 }, { "epoch": 0.0946471194698581, "grad_norm": 0.7845657467842102, "learning_rate": 0.00028383448082057043, "loss": 4.7573, "step": 67360 }, { "epoch": 0.09466117040802167, "grad_norm": 0.798072099685669, "learning_rate": 0.00028387663341295486, "loss": 4.7633, "step": 67370 }, { "epoch": 0.09467522134618526, "grad_norm": 0.7655891180038452, "learning_rate": 0.0002839187860053393, "loss": 4.729, "step": 67380 }, { "epoch": 0.09468927228434884, "grad_norm": 0.7528164386749268, "learning_rate": 0.0002839609385977237, "loss": 4.7863, "step": 67390 }, { "epoch": 0.09470332322251242, "grad_norm": 0.7557967305183411, "learning_rate": 0.00028400309119010816, "loss": 4.8202, "step": 67400 }, { "epoch": 0.094717374160676, "grad_norm": 0.7755447626113892, "learning_rate": 0.0002840452437824926, "loss": 4.7798, "step": 67410 }, { "epoch": 0.09473142509883957, "grad_norm": 0.7739848494529724, "learning_rate": 0.000284087396374877, "loss": 4.8155, "step": 67420 }, { "epoch": 0.09474547603700315, "grad_norm": 0.7496004700660706, "learning_rate": 0.00028412954896726145, "loss": 4.579, "step": 67430 }, { "epoch": 0.09475952697516672, "grad_norm": 0.7430173754692078, "learning_rate": 0.0002841717015596459, "loss": 4.7696, "step": 67440 }, { "epoch": 0.0947735779133303, "grad_norm": 0.7696523666381836, "learning_rate": 0.00028421385415203037, "loss": 4.9943, "step": 67450 }, { "epoch": 0.09478762885149387, "grad_norm": 0.8024923801422119, "learning_rate": 0.00028425600674441474, "loss": 4.7865, "step": 67460 }, { "epoch": 0.09480167978965745, "grad_norm": 0.7850974202156067, "learning_rate": 0.0002842981593367992, "loss": 4.813, "step": 67470 }, { "epoch": 0.09481573072782103, "grad_norm": 0.7672629952430725, "learning_rate": 0.0002843403119291836, "loss": 4.8848, "step": 67480 }, { "epoch": 0.09482978166598462, "grad_norm": 0.7520509958267212, "learning_rate": 0.00028438246452156804, "loss": 4.821, "step": 67490 }, { "epoch": 0.09484383260414819, "grad_norm": 0.7907251715660095, "learning_rate": 0.00028442461711395247, "loss": 4.7222, "step": 67500 }, { "epoch": 0.09485788354231177, "grad_norm": 0.7532843947410583, "learning_rate": 0.00028446676970633695, "loss": 4.7545, "step": 67510 }, { "epoch": 0.09487193448047535, "grad_norm": 0.7584605813026428, "learning_rate": 0.00028450892229872133, "loss": 4.7687, "step": 67520 }, { "epoch": 0.09488598541863892, "grad_norm": 0.7673341035842896, "learning_rate": 0.00028455107489110576, "loss": 4.7871, "step": 67530 }, { "epoch": 0.0949000363568025, "grad_norm": 0.7831926345825195, "learning_rate": 0.0002845932274834902, "loss": 4.7859, "step": 67540 }, { "epoch": 0.09491408729496607, "grad_norm": 0.7618937492370605, "learning_rate": 0.0002846353800758746, "loss": 4.8277, "step": 67550 }, { "epoch": 0.09492813823312965, "grad_norm": 0.813814640045166, "learning_rate": 0.00028467753266825905, "loss": 4.7525, "step": 67560 }, { "epoch": 0.09494218917129323, "grad_norm": 0.7494709491729736, "learning_rate": 0.00028471968526064354, "loss": 4.8525, "step": 67570 }, { "epoch": 0.0949562401094568, "grad_norm": 0.7520213723182678, "learning_rate": 0.00028476183785302797, "loss": 4.7942, "step": 67580 }, { "epoch": 0.09497029104762039, "grad_norm": 0.7849497199058533, "learning_rate": 0.00028480399044541235, "loss": 4.7749, "step": 67590 }, { "epoch": 0.09498434198578397, "grad_norm": 0.7550547122955322, "learning_rate": 0.0002848461430377968, "loss": 4.7389, "step": 67600 }, { "epoch": 0.09499839292394754, "grad_norm": 0.7957075834274292, "learning_rate": 0.0002848882956301812, "loss": 4.7788, "step": 67610 }, { "epoch": 0.09501244386211112, "grad_norm": 0.7405850291252136, "learning_rate": 0.00028493044822256564, "loss": 4.7592, "step": 67620 }, { "epoch": 0.0950264948002747, "grad_norm": 0.7549129128456116, "learning_rate": 0.00028497260081495013, "loss": 4.8282, "step": 67630 }, { "epoch": 0.09504054573843827, "grad_norm": 0.7701460123062134, "learning_rate": 0.00028501475340733456, "loss": 4.8901, "step": 67640 }, { "epoch": 0.09505459667660185, "grad_norm": 0.7383482456207275, "learning_rate": 0.00028505690599971894, "loss": 4.7539, "step": 67650 }, { "epoch": 0.09506864761476543, "grad_norm": 0.7618653774261475, "learning_rate": 0.00028509905859210337, "loss": 4.812, "step": 67660 }, { "epoch": 0.095082698552929, "grad_norm": 0.7711530327796936, "learning_rate": 0.0002851412111844878, "loss": 4.7898, "step": 67670 }, { "epoch": 0.09509674949109258, "grad_norm": 0.7828205227851868, "learning_rate": 0.00028518336377687223, "loss": 4.8524, "step": 67680 }, { "epoch": 0.09511080042925617, "grad_norm": 0.7714737057685852, "learning_rate": 0.0002852255163692567, "loss": 4.731, "step": 67690 }, { "epoch": 0.09512485136741974, "grad_norm": 0.7553906440734863, "learning_rate": 0.00028526766896164115, "loss": 4.7764, "step": 67700 }, { "epoch": 0.09513890230558332, "grad_norm": 0.7760165333747864, "learning_rate": 0.0002853098215540256, "loss": 4.7859, "step": 67710 }, { "epoch": 0.0951529532437469, "grad_norm": 0.7785241603851318, "learning_rate": 0.00028535197414640995, "loss": 4.7499, "step": 67720 }, { "epoch": 0.09516700418191047, "grad_norm": 0.7613420486450195, "learning_rate": 0.0002853941267387944, "loss": 4.806, "step": 67730 }, { "epoch": 0.09518105512007405, "grad_norm": 0.7879531979560852, "learning_rate": 0.0002854362793311788, "loss": 4.8668, "step": 67740 }, { "epoch": 0.09519510605823762, "grad_norm": 0.7439324855804443, "learning_rate": 0.0002854784319235633, "loss": 4.8048, "step": 67750 }, { "epoch": 0.0952091569964012, "grad_norm": 0.8519731163978577, "learning_rate": 0.00028552058451594773, "loss": 4.6315, "step": 67760 }, { "epoch": 0.09522320793456478, "grad_norm": 0.7999274134635925, "learning_rate": 0.00028556273710833216, "loss": 4.8338, "step": 67770 }, { "epoch": 0.09523725887272835, "grad_norm": 0.7844228148460388, "learning_rate": 0.00028560488970071654, "loss": 4.7579, "step": 67780 }, { "epoch": 0.09525130981089193, "grad_norm": 0.7809820175170898, "learning_rate": 0.00028564704229310097, "loss": 4.7089, "step": 67790 }, { "epoch": 0.09526536074905552, "grad_norm": 0.759044885635376, "learning_rate": 0.0002856891948854854, "loss": 4.7492, "step": 67800 }, { "epoch": 0.0952794116872191, "grad_norm": 0.755091667175293, "learning_rate": 0.0002857313474778699, "loss": 4.6888, "step": 67810 }, { "epoch": 0.09529346262538267, "grad_norm": 0.7296857237815857, "learning_rate": 0.0002857735000702543, "loss": 4.7357, "step": 67820 }, { "epoch": 0.09530751356354625, "grad_norm": 0.7496383190155029, "learning_rate": 0.00028581565266263875, "loss": 4.7345, "step": 67830 }, { "epoch": 0.09532156450170982, "grad_norm": 0.7716061472892761, "learning_rate": 0.00028585780525502313, "loss": 4.8233, "step": 67840 }, { "epoch": 0.0953356154398734, "grad_norm": 0.7615830302238464, "learning_rate": 0.00028589995784740756, "loss": 4.7433, "step": 67850 }, { "epoch": 0.09534966637803698, "grad_norm": 0.7765911817550659, "learning_rate": 0.000285942110439792, "loss": 4.7496, "step": 67860 }, { "epoch": 0.09536371731620055, "grad_norm": 0.7490736842155457, "learning_rate": 0.0002859842630321765, "loss": 4.8747, "step": 67870 }, { "epoch": 0.09537776825436413, "grad_norm": 0.7704018354415894, "learning_rate": 0.0002860264156245609, "loss": 4.8234, "step": 67880 }, { "epoch": 0.0953918191925277, "grad_norm": 0.7510921359062195, "learning_rate": 0.00028606856821694534, "loss": 4.7961, "step": 67890 }, { "epoch": 0.0954058701306913, "grad_norm": 0.7316580414772034, "learning_rate": 0.00028611072080932977, "loss": 4.7473, "step": 67900 }, { "epoch": 0.09541992106885487, "grad_norm": 0.7586633563041687, "learning_rate": 0.00028615287340171415, "loss": 4.8093, "step": 67910 }, { "epoch": 0.09543397200701845, "grad_norm": 0.7565239071846008, "learning_rate": 0.0002861950259940986, "loss": 4.7894, "step": 67920 }, { "epoch": 0.09544802294518202, "grad_norm": 0.7565003037452698, "learning_rate": 0.00028623717858648306, "loss": 4.8842, "step": 67930 }, { "epoch": 0.0954620738833456, "grad_norm": 0.7603064179420471, "learning_rate": 0.0002862793311788675, "loss": 4.7224, "step": 67940 }, { "epoch": 0.09547612482150918, "grad_norm": 0.7563630938529968, "learning_rate": 0.0002863214837712519, "loss": 4.7289, "step": 67950 }, { "epoch": 0.09549017575967275, "grad_norm": 0.7232897281646729, "learning_rate": 0.00028636363636363636, "loss": 4.718, "step": 67960 }, { "epoch": 0.09550422669783633, "grad_norm": 0.747141420841217, "learning_rate": 0.00028640578895602073, "loss": 4.7984, "step": 67970 }, { "epoch": 0.0955182776359999, "grad_norm": 0.7786388397216797, "learning_rate": 0.00028644794154840516, "loss": 4.6962, "step": 67980 }, { "epoch": 0.09553232857416348, "grad_norm": 0.7422512769699097, "learning_rate": 0.00028649009414078965, "loss": 4.8789, "step": 67990 }, { "epoch": 0.09554637951232707, "grad_norm": 0.8502141833305359, "learning_rate": 0.0002865322467331741, "loss": 4.8269, "step": 68000 }, { "epoch": 0.09556043045049065, "grad_norm": 0.7409288287162781, "learning_rate": 0.0002865743993255585, "loss": 4.7592, "step": 68010 }, { "epoch": 0.09557448138865422, "grad_norm": 0.7689024209976196, "learning_rate": 0.00028661655191794294, "loss": 4.7512, "step": 68020 }, { "epoch": 0.0955885323268178, "grad_norm": 0.7674072980880737, "learning_rate": 0.0002866587045103274, "loss": 4.9744, "step": 68030 }, { "epoch": 0.09560258326498138, "grad_norm": 0.7291126251220703, "learning_rate": 0.0002867008571027118, "loss": 4.7998, "step": 68040 }, { "epoch": 0.09561663420314495, "grad_norm": 0.7502152323722839, "learning_rate": 0.00028674300969509624, "loss": 4.7427, "step": 68050 }, { "epoch": 0.09563068514130853, "grad_norm": 0.744105875492096, "learning_rate": 0.00028678516228748067, "loss": 4.8043, "step": 68060 }, { "epoch": 0.0956447360794721, "grad_norm": 0.7957160472869873, "learning_rate": 0.0002868273148798651, "loss": 4.8657, "step": 68070 }, { "epoch": 0.09565878701763568, "grad_norm": 0.857458233833313, "learning_rate": 0.00028686946747224953, "loss": 4.685, "step": 68080 }, { "epoch": 0.09567283795579926, "grad_norm": 0.7583727240562439, "learning_rate": 0.00028691162006463396, "loss": 4.8969, "step": 68090 }, { "epoch": 0.09568688889396283, "grad_norm": 0.7256913781166077, "learning_rate": 0.0002869537726570184, "loss": 4.8367, "step": 68100 }, { "epoch": 0.09570093983212642, "grad_norm": 0.7758738994598389, "learning_rate": 0.0002869959252494028, "loss": 4.8103, "step": 68110 }, { "epoch": 0.09571499077029, "grad_norm": 0.7613289952278137, "learning_rate": 0.00028703807784178725, "loss": 4.8833, "step": 68120 }, { "epoch": 0.09572904170845357, "grad_norm": 0.7221300601959229, "learning_rate": 0.0002870802304341717, "loss": 4.7747, "step": 68130 }, { "epoch": 0.09574309264661715, "grad_norm": 0.7547207474708557, "learning_rate": 0.0002871223830265561, "loss": 4.8013, "step": 68140 }, { "epoch": 0.09575714358478073, "grad_norm": 0.7506118416786194, "learning_rate": 0.00028716453561894055, "loss": 4.8435, "step": 68150 }, { "epoch": 0.0957711945229443, "grad_norm": 0.7563244700431824, "learning_rate": 0.000287206688211325, "loss": 4.7599, "step": 68160 }, { "epoch": 0.09578524546110788, "grad_norm": 0.7807770371437073, "learning_rate": 0.0002872488408037094, "loss": 4.8858, "step": 68170 }, { "epoch": 0.09579929639927146, "grad_norm": 0.7905330657958984, "learning_rate": 0.00028729099339609384, "loss": 4.8305, "step": 68180 }, { "epoch": 0.09581334733743503, "grad_norm": 0.7471740245819092, "learning_rate": 0.00028733314598847827, "loss": 4.7711, "step": 68190 }, { "epoch": 0.09582739827559861, "grad_norm": 0.7244470119476318, "learning_rate": 0.0002873752985808627, "loss": 4.9656, "step": 68200 }, { "epoch": 0.0958414492137622, "grad_norm": 0.7297165989875793, "learning_rate": 0.00028741745117324713, "loss": 4.783, "step": 68210 }, { "epoch": 0.09585550015192577, "grad_norm": 0.7688151597976685, "learning_rate": 0.00028745960376563157, "loss": 4.7866, "step": 68220 }, { "epoch": 0.09586955109008935, "grad_norm": 0.7747588157653809, "learning_rate": 0.000287501756358016, "loss": 4.7084, "step": 68230 }, { "epoch": 0.09588360202825293, "grad_norm": 0.766656219959259, "learning_rate": 0.00028754390895040043, "loss": 4.7271, "step": 68240 }, { "epoch": 0.0958976529664165, "grad_norm": 0.7433441281318665, "learning_rate": 0.00028758606154278486, "loss": 4.8076, "step": 68250 }, { "epoch": 0.09591170390458008, "grad_norm": 0.7474486231803894, "learning_rate": 0.0002876282141351693, "loss": 4.7575, "step": 68260 }, { "epoch": 0.09592575484274365, "grad_norm": 0.7397088408470154, "learning_rate": 0.0002876703667275537, "loss": 4.8102, "step": 68270 }, { "epoch": 0.09593980578090723, "grad_norm": 0.7497064471244812, "learning_rate": 0.00028771251931993815, "loss": 4.8034, "step": 68280 }, { "epoch": 0.0959538567190708, "grad_norm": 0.7426905632019043, "learning_rate": 0.0002877546719123226, "loss": 4.7192, "step": 68290 }, { "epoch": 0.09596790765723438, "grad_norm": 0.7634294629096985, "learning_rate": 0.000287796824504707, "loss": 4.7343, "step": 68300 }, { "epoch": 0.09598195859539797, "grad_norm": 0.7681379318237305, "learning_rate": 0.00028783897709709145, "loss": 4.7821, "step": 68310 }, { "epoch": 0.09599600953356155, "grad_norm": 0.7707437872886658, "learning_rate": 0.0002878811296894759, "loss": 4.839, "step": 68320 }, { "epoch": 0.09601006047172513, "grad_norm": 0.7358832359313965, "learning_rate": 0.0002879232822818603, "loss": 4.7412, "step": 68330 }, { "epoch": 0.0960241114098887, "grad_norm": 0.7570725083351135, "learning_rate": 0.00028796543487424474, "loss": 4.7562, "step": 68340 }, { "epoch": 0.09603816234805228, "grad_norm": 0.7852720022201538, "learning_rate": 0.00028800758746662917, "loss": 4.6697, "step": 68350 }, { "epoch": 0.09605221328621585, "grad_norm": 0.7732431888580322, "learning_rate": 0.0002880497400590136, "loss": 4.7449, "step": 68360 }, { "epoch": 0.09606626422437943, "grad_norm": 0.7763086557388306, "learning_rate": 0.00028809189265139803, "loss": 4.7639, "step": 68370 }, { "epoch": 0.096080315162543, "grad_norm": 0.7549687623977661, "learning_rate": 0.00028813404524378246, "loss": 4.7652, "step": 68380 }, { "epoch": 0.09609436610070658, "grad_norm": 0.7978414297103882, "learning_rate": 0.0002881761978361669, "loss": 4.8397, "step": 68390 }, { "epoch": 0.09610841703887016, "grad_norm": 0.7623574137687683, "learning_rate": 0.00028821835042855133, "loss": 4.7205, "step": 68400 }, { "epoch": 0.09612246797703373, "grad_norm": 0.7838755249977112, "learning_rate": 0.00028826050302093576, "loss": 4.8583, "step": 68410 }, { "epoch": 0.09613651891519732, "grad_norm": 0.7429123520851135, "learning_rate": 0.0002883026556133202, "loss": 4.9437, "step": 68420 }, { "epoch": 0.0961505698533609, "grad_norm": 0.7363834381103516, "learning_rate": 0.0002883448082057046, "loss": 4.8511, "step": 68430 }, { "epoch": 0.09616462079152448, "grad_norm": 0.7411858439445496, "learning_rate": 0.00028838696079808905, "loss": 4.8572, "step": 68440 }, { "epoch": 0.09617867172968805, "grad_norm": 0.8027516007423401, "learning_rate": 0.0002884291133904735, "loss": 4.7389, "step": 68450 }, { "epoch": 0.09619272266785163, "grad_norm": 0.7942211031913757, "learning_rate": 0.0002884712659828579, "loss": 4.8992, "step": 68460 }, { "epoch": 0.0962067736060152, "grad_norm": 0.7656008005142212, "learning_rate": 0.00028851341857524235, "loss": 4.9126, "step": 68470 }, { "epoch": 0.09622082454417878, "grad_norm": 0.7987953424453735, "learning_rate": 0.0002885555711676268, "loss": 4.7477, "step": 68480 }, { "epoch": 0.09623487548234236, "grad_norm": 0.7650423645973206, "learning_rate": 0.0002885977237600112, "loss": 4.8695, "step": 68490 }, { "epoch": 0.09624892642050593, "grad_norm": 0.7581927180290222, "learning_rate": 0.00028863987635239564, "loss": 4.9698, "step": 68500 }, { "epoch": 0.09626297735866951, "grad_norm": 0.7571653127670288, "learning_rate": 0.00028868202894478007, "loss": 4.7919, "step": 68510 }, { "epoch": 0.0962770282968331, "grad_norm": 0.752564549446106, "learning_rate": 0.0002887241815371645, "loss": 4.7388, "step": 68520 }, { "epoch": 0.09629107923499668, "grad_norm": 0.7321598529815674, "learning_rate": 0.00028876633412954893, "loss": 4.7807, "step": 68530 }, { "epoch": 0.09630513017316025, "grad_norm": 0.7438421845436096, "learning_rate": 0.00028880848672193336, "loss": 4.8229, "step": 68540 }, { "epoch": 0.09631918111132383, "grad_norm": 0.7429989576339722, "learning_rate": 0.0002888506393143178, "loss": 4.7067, "step": 68550 }, { "epoch": 0.0963332320494874, "grad_norm": 0.749879777431488, "learning_rate": 0.0002888927919067022, "loss": 4.8433, "step": 68560 }, { "epoch": 0.09634728298765098, "grad_norm": 0.8272457718849182, "learning_rate": 0.00028893494449908666, "loss": 4.6546, "step": 68570 }, { "epoch": 0.09636133392581456, "grad_norm": 0.7289469242095947, "learning_rate": 0.0002889770970914711, "loss": 4.8699, "step": 68580 }, { "epoch": 0.09637538486397813, "grad_norm": 0.7849155068397522, "learning_rate": 0.0002890192496838555, "loss": 4.5681, "step": 68590 }, { "epoch": 0.09638943580214171, "grad_norm": 0.7392459511756897, "learning_rate": 0.00028906140227623995, "loss": 4.8062, "step": 68600 }, { "epoch": 0.09640348674030529, "grad_norm": 0.771606981754303, "learning_rate": 0.00028910355486862444, "loss": 4.7789, "step": 68610 }, { "epoch": 0.09641753767846888, "grad_norm": 0.7495245337486267, "learning_rate": 0.0002891457074610088, "loss": 4.725, "step": 68620 }, { "epoch": 0.09643158861663245, "grad_norm": 0.758076012134552, "learning_rate": 0.00028918786005339324, "loss": 4.8515, "step": 68630 }, { "epoch": 0.09644563955479603, "grad_norm": 0.7334091663360596, "learning_rate": 0.0002892300126457777, "loss": 5.0253, "step": 68640 }, { "epoch": 0.0964596904929596, "grad_norm": 0.7746126651763916, "learning_rate": 0.0002892721652381621, "loss": 4.7502, "step": 68650 }, { "epoch": 0.09647374143112318, "grad_norm": 0.7248702645301819, "learning_rate": 0.00028931431783054654, "loss": 4.9443, "step": 68660 }, { "epoch": 0.09648779236928676, "grad_norm": 0.8012595772743225, "learning_rate": 0.000289356470422931, "loss": 4.8209, "step": 68670 }, { "epoch": 0.09650184330745033, "grad_norm": 0.7670885324478149, "learning_rate": 0.0002893986230153154, "loss": 4.7838, "step": 68680 }, { "epoch": 0.09651589424561391, "grad_norm": 0.7456730008125305, "learning_rate": 0.00028944077560769983, "loss": 4.7761, "step": 68690 }, { "epoch": 0.09652994518377749, "grad_norm": 0.727116584777832, "learning_rate": 0.00028948292820008426, "loss": 4.7292, "step": 68700 }, { "epoch": 0.09654399612194106, "grad_norm": 0.758237898349762, "learning_rate": 0.0002895250807924687, "loss": 4.7278, "step": 68710 }, { "epoch": 0.09655804706010464, "grad_norm": 0.7621706128120422, "learning_rate": 0.0002895672333848531, "loss": 4.7813, "step": 68720 }, { "epoch": 0.09657209799826823, "grad_norm": 0.7740551829338074, "learning_rate": 0.0002896093859772376, "loss": 4.8008, "step": 68730 }, { "epoch": 0.0965861489364318, "grad_norm": 0.7670646905899048, "learning_rate": 0.00028965153856962204, "loss": 4.762, "step": 68740 }, { "epoch": 0.09660019987459538, "grad_norm": 0.76221764087677, "learning_rate": 0.0002896936911620064, "loss": 4.6957, "step": 68750 }, { "epoch": 0.09661425081275896, "grad_norm": 0.7280772924423218, "learning_rate": 0.00028973584375439085, "loss": 4.8566, "step": 68760 }, { "epoch": 0.09662830175092253, "grad_norm": 0.7589055299758911, "learning_rate": 0.0002897779963467753, "loss": 4.7532, "step": 68770 }, { "epoch": 0.09664235268908611, "grad_norm": 0.8092201948165894, "learning_rate": 0.0002898201489391597, "loss": 4.7692, "step": 68780 }, { "epoch": 0.09665640362724968, "grad_norm": 0.7353342175483704, "learning_rate": 0.0002898623015315442, "loss": 4.8688, "step": 68790 }, { "epoch": 0.09667045456541326, "grad_norm": 0.7660097479820251, "learning_rate": 0.00028990445412392863, "loss": 4.809, "step": 68800 }, { "epoch": 0.09668450550357684, "grad_norm": 0.7710687518119812, "learning_rate": 0.000289946606716313, "loss": 4.7622, "step": 68810 }, { "epoch": 0.09669855644174041, "grad_norm": 0.7906742095947266, "learning_rate": 0.00028998875930869744, "loss": 4.8203, "step": 68820 }, { "epoch": 0.096712607379904, "grad_norm": 0.7781643271446228, "learning_rate": 0.00029003091190108187, "loss": 4.8479, "step": 68830 }, { "epoch": 0.09672665831806758, "grad_norm": 0.7606348991394043, "learning_rate": 0.0002900730644934663, "loss": 4.8669, "step": 68840 }, { "epoch": 0.09674070925623116, "grad_norm": 0.7359606623649597, "learning_rate": 0.0002901152170858508, "loss": 4.861, "step": 68850 }, { "epoch": 0.09675476019439473, "grad_norm": 0.7380146384239197, "learning_rate": 0.0002901573696782352, "loss": 4.7876, "step": 68860 }, { "epoch": 0.09676881113255831, "grad_norm": 0.7908768057823181, "learning_rate": 0.00029019952227061965, "loss": 4.765, "step": 68870 }, { "epoch": 0.09678286207072188, "grad_norm": 0.7726009488105774, "learning_rate": 0.000290241674863004, "loss": 4.6684, "step": 68880 }, { "epoch": 0.09679691300888546, "grad_norm": 0.7417333126068115, "learning_rate": 0.00029028382745538845, "loss": 4.8782, "step": 68890 }, { "epoch": 0.09681096394704904, "grad_norm": 0.7703368067741394, "learning_rate": 0.0002903259800477729, "loss": 4.8342, "step": 68900 }, { "epoch": 0.09682501488521261, "grad_norm": 0.774889349937439, "learning_rate": 0.00029036813264015737, "loss": 4.8245, "step": 68910 }, { "epoch": 0.09683906582337619, "grad_norm": 0.7258095145225525, "learning_rate": 0.0002904102852325418, "loss": 4.7255, "step": 68920 }, { "epoch": 0.09685311676153978, "grad_norm": 0.8192217946052551, "learning_rate": 0.00029045243782492623, "loss": 4.7965, "step": 68930 }, { "epoch": 0.09686716769970335, "grad_norm": 0.7315029501914978, "learning_rate": 0.0002904945904173106, "loss": 4.6835, "step": 68940 }, { "epoch": 0.09688121863786693, "grad_norm": 0.7798453569412231, "learning_rate": 0.00029053674300969504, "loss": 4.8065, "step": 68950 }, { "epoch": 0.09689526957603051, "grad_norm": 0.7461909651756287, "learning_rate": 0.00029057889560207947, "loss": 4.8489, "step": 68960 }, { "epoch": 0.09690932051419408, "grad_norm": 0.7602362036705017, "learning_rate": 0.00029062104819446396, "loss": 4.7368, "step": 68970 }, { "epoch": 0.09692337145235766, "grad_norm": 0.7881399989128113, "learning_rate": 0.0002906632007868484, "loss": 4.8113, "step": 68980 }, { "epoch": 0.09693742239052124, "grad_norm": 0.7365151643753052, "learning_rate": 0.0002907053533792328, "loss": 4.873, "step": 68990 }, { "epoch": 0.09695147332868481, "grad_norm": 0.7622225284576416, "learning_rate": 0.00029074750597161725, "loss": 4.7345, "step": 69000 }, { "epoch": 0.09696552426684839, "grad_norm": 0.7960168123245239, "learning_rate": 0.00029078965856400163, "loss": 4.6768, "step": 69010 }, { "epoch": 0.09697957520501196, "grad_norm": 0.7706853747367859, "learning_rate": 0.00029083181115638606, "loss": 4.8081, "step": 69020 }, { "epoch": 0.09699362614317554, "grad_norm": 0.7176666259765625, "learning_rate": 0.00029087396374877054, "loss": 4.8542, "step": 69030 }, { "epoch": 0.09700767708133913, "grad_norm": 0.7501884698867798, "learning_rate": 0.000290916116341155, "loss": 4.7623, "step": 69040 }, { "epoch": 0.0970217280195027, "grad_norm": 0.7589862942695618, "learning_rate": 0.0002909582689335394, "loss": 4.8451, "step": 69050 }, { "epoch": 0.09703577895766628, "grad_norm": 0.7779807448387146, "learning_rate": 0.00029100042152592384, "loss": 4.7506, "step": 69060 }, { "epoch": 0.09704982989582986, "grad_norm": 0.745506227016449, "learning_rate": 0.0002910425741183082, "loss": 4.7947, "step": 69070 }, { "epoch": 0.09706388083399344, "grad_norm": 0.7517091035842896, "learning_rate": 0.00029108472671069265, "loss": 4.7302, "step": 69080 }, { "epoch": 0.09707793177215701, "grad_norm": 0.7594832181930542, "learning_rate": 0.00029112687930307713, "loss": 4.7494, "step": 69090 }, { "epoch": 0.09709198271032059, "grad_norm": 0.7366914749145508, "learning_rate": 0.00029116903189546156, "loss": 4.7735, "step": 69100 }, { "epoch": 0.09710603364848416, "grad_norm": 0.7551494836807251, "learning_rate": 0.000291211184487846, "loss": 4.8079, "step": 69110 }, { "epoch": 0.09712008458664774, "grad_norm": 0.7677338123321533, "learning_rate": 0.0002912533370802304, "loss": 4.7275, "step": 69120 }, { "epoch": 0.09713413552481132, "grad_norm": 0.7898170948028564, "learning_rate": 0.00029129548967261486, "loss": 4.7946, "step": 69130 }, { "epoch": 0.0971481864629749, "grad_norm": 0.7417213320732117, "learning_rate": 0.00029133764226499923, "loss": 4.7658, "step": 69140 }, { "epoch": 0.09716223740113848, "grad_norm": 0.7414938807487488, "learning_rate": 0.0002913797948573837, "loss": 4.8031, "step": 69150 }, { "epoch": 0.09717628833930206, "grad_norm": 0.7222784161567688, "learning_rate": 0.00029142194744976815, "loss": 4.7848, "step": 69160 }, { "epoch": 0.09719033927746563, "grad_norm": 0.7901402115821838, "learning_rate": 0.0002914641000421526, "loss": 4.7291, "step": 69170 }, { "epoch": 0.09720439021562921, "grad_norm": 0.7619618773460388, "learning_rate": 0.000291506252634537, "loss": 4.772, "step": 69180 }, { "epoch": 0.09721844115379279, "grad_norm": 0.7558301687240601, "learning_rate": 0.00029154840522692144, "loss": 4.6799, "step": 69190 }, { "epoch": 0.09723249209195636, "grad_norm": 0.7339478731155396, "learning_rate": 0.0002915905578193059, "loss": 4.699, "step": 69200 }, { "epoch": 0.09724654303011994, "grad_norm": 0.7412484288215637, "learning_rate": 0.0002916327104116903, "loss": 4.7946, "step": 69210 }, { "epoch": 0.09726059396828352, "grad_norm": 0.7897161245346069, "learning_rate": 0.00029167486300407474, "loss": 4.7929, "step": 69220 }, { "epoch": 0.09727464490644709, "grad_norm": 0.7667302489280701, "learning_rate": 0.00029171701559645917, "loss": 4.7734, "step": 69230 }, { "epoch": 0.09728869584461068, "grad_norm": 0.7448514699935913, "learning_rate": 0.0002917591681888436, "loss": 4.7505, "step": 69240 }, { "epoch": 0.09730274678277426, "grad_norm": 0.7377836108207703, "learning_rate": 0.00029180132078122803, "loss": 4.6303, "step": 69250 }, { "epoch": 0.09731679772093783, "grad_norm": 0.742834746837616, "learning_rate": 0.00029184347337361246, "loss": 4.8095, "step": 69260 }, { "epoch": 0.09733084865910141, "grad_norm": 0.7830805778503418, "learning_rate": 0.0002918856259659969, "loss": 4.7535, "step": 69270 }, { "epoch": 0.09734489959726499, "grad_norm": 0.7334023118019104, "learning_rate": 0.0002919277785583813, "loss": 4.8267, "step": 69280 }, { "epoch": 0.09735895053542856, "grad_norm": 0.7822585105895996, "learning_rate": 0.00029196993115076576, "loss": 4.743, "step": 69290 }, { "epoch": 0.09737300147359214, "grad_norm": 0.7647944092750549, "learning_rate": 0.0002920120837431502, "loss": 4.7674, "step": 69300 }, { "epoch": 0.09738705241175571, "grad_norm": 0.7947744727134705, "learning_rate": 0.0002920542363355346, "loss": 4.7308, "step": 69310 }, { "epoch": 0.09740110334991929, "grad_norm": 0.7814948558807373, "learning_rate": 0.0002920921736686806, "loss": 4.8563, "step": 69320 }, { "epoch": 0.09741515428808287, "grad_norm": 0.7444278001785278, "learning_rate": 0.00029213432626106503, "loss": 4.7429, "step": 69330 }, { "epoch": 0.09742920522624644, "grad_norm": 0.7391352653503418, "learning_rate": 0.00029217647885344946, "loss": 4.6809, "step": 69340 }, { "epoch": 0.09744325616441003, "grad_norm": 0.7300538420677185, "learning_rate": 0.0002922186314458339, "loss": 4.8616, "step": 69350 }, { "epoch": 0.09745730710257361, "grad_norm": 0.7259089350700378, "learning_rate": 0.0002922607840382183, "loss": 4.7652, "step": 69360 }, { "epoch": 0.09747135804073719, "grad_norm": 0.7582126259803772, "learning_rate": 0.00029230293663060276, "loss": 4.7776, "step": 69370 }, { "epoch": 0.09748540897890076, "grad_norm": 0.7297702431678772, "learning_rate": 0.0002923450892229872, "loss": 4.7676, "step": 69380 }, { "epoch": 0.09749945991706434, "grad_norm": 0.753105878829956, "learning_rate": 0.0002923872418153716, "loss": 4.7513, "step": 69390 }, { "epoch": 0.09751351085522791, "grad_norm": 0.7406522631645203, "learning_rate": 0.00029242939440775605, "loss": 4.8253, "step": 69400 }, { "epoch": 0.09752756179339149, "grad_norm": 0.8035889863967896, "learning_rate": 0.0002924715470001405, "loss": 4.7889, "step": 69410 }, { "epoch": 0.09754161273155507, "grad_norm": 0.7537881731987, "learning_rate": 0.0002925136995925249, "loss": 4.8387, "step": 69420 }, { "epoch": 0.09755566366971864, "grad_norm": 0.7590875625610352, "learning_rate": 0.00029255585218490934, "loss": 4.8179, "step": 69430 }, { "epoch": 0.09756971460788222, "grad_norm": 0.7489945292472839, "learning_rate": 0.0002925980047772938, "loss": 4.8359, "step": 69440 }, { "epoch": 0.09758376554604581, "grad_norm": 0.7501441240310669, "learning_rate": 0.0002926401573696782, "loss": 4.7967, "step": 69450 }, { "epoch": 0.09759781648420938, "grad_norm": 0.7316177487373352, "learning_rate": 0.00029268230996206264, "loss": 4.772, "step": 69460 }, { "epoch": 0.09761186742237296, "grad_norm": 0.7441616654396057, "learning_rate": 0.00029272446255444707, "loss": 4.7145, "step": 69470 }, { "epoch": 0.09762591836053654, "grad_norm": 0.733923077583313, "learning_rate": 0.0002927666151468315, "loss": 4.8215, "step": 69480 }, { "epoch": 0.09763996929870011, "grad_norm": 0.7444280385971069, "learning_rate": 0.00029280876773921593, "loss": 4.8405, "step": 69490 }, { "epoch": 0.09765402023686369, "grad_norm": 0.7458165287971497, "learning_rate": 0.00029285092033160036, "loss": 4.7594, "step": 69500 }, { "epoch": 0.09766807117502727, "grad_norm": 0.7323271632194519, "learning_rate": 0.0002928930729239848, "loss": 4.6669, "step": 69510 }, { "epoch": 0.09768212211319084, "grad_norm": 0.7334503531455994, "learning_rate": 0.0002929352255163692, "loss": 4.8641, "step": 69520 }, { "epoch": 0.09769617305135442, "grad_norm": 0.747018039226532, "learning_rate": 0.00029297737810875365, "loss": 4.8795, "step": 69530 }, { "epoch": 0.097710223989518, "grad_norm": 0.7300179600715637, "learning_rate": 0.0002930195307011381, "loss": 4.6616, "step": 69540 }, { "epoch": 0.09772427492768158, "grad_norm": 0.7466294765472412, "learning_rate": 0.0002930616832935225, "loss": 4.8153, "step": 69550 }, { "epoch": 0.09773832586584516, "grad_norm": 0.7623385787010193, "learning_rate": 0.00029310383588590695, "loss": 4.7444, "step": 69560 }, { "epoch": 0.09775237680400874, "grad_norm": 0.7668961882591248, "learning_rate": 0.0002931459884782914, "loss": 4.8579, "step": 69570 }, { "epoch": 0.09776642774217231, "grad_norm": 0.7659717202186584, "learning_rate": 0.0002931881410706758, "loss": 4.6188, "step": 69580 }, { "epoch": 0.09778047868033589, "grad_norm": 0.7393763065338135, "learning_rate": 0.00029323029366306024, "loss": 4.8071, "step": 69590 }, { "epoch": 0.09779452961849947, "grad_norm": 0.7585105299949646, "learning_rate": 0.00029327244625544467, "loss": 4.7279, "step": 69600 }, { "epoch": 0.09780858055666304, "grad_norm": 0.9059628844261169, "learning_rate": 0.0002933145988478291, "loss": 4.931, "step": 69610 }, { "epoch": 0.09782263149482662, "grad_norm": 0.7281200289726257, "learning_rate": 0.00029335675144021354, "loss": 4.8205, "step": 69620 }, { "epoch": 0.0978366824329902, "grad_norm": 0.7609696984291077, "learning_rate": 0.00029339890403259797, "loss": 4.8716, "step": 69630 }, { "epoch": 0.09785073337115377, "grad_norm": 0.7602007389068604, "learning_rate": 0.0002934410566249824, "loss": 4.8642, "step": 69640 }, { "epoch": 0.09786478430931735, "grad_norm": 0.7322178483009338, "learning_rate": 0.00029348320921736683, "loss": 4.8271, "step": 69650 }, { "epoch": 0.09787883524748094, "grad_norm": 0.757675290107727, "learning_rate": 0.00029352536180975126, "loss": 4.8059, "step": 69660 }, { "epoch": 0.09789288618564451, "grad_norm": 0.770977258682251, "learning_rate": 0.0002935675144021357, "loss": 4.7335, "step": 69670 }, { "epoch": 0.09790693712380809, "grad_norm": 0.7639873623847961, "learning_rate": 0.0002936096669945201, "loss": 4.7993, "step": 69680 }, { "epoch": 0.09792098806197166, "grad_norm": 0.7296125888824463, "learning_rate": 0.00029365181958690455, "loss": 4.9388, "step": 69690 }, { "epoch": 0.09793503900013524, "grad_norm": 0.7537563443183899, "learning_rate": 0.000293693972179289, "loss": 4.8099, "step": 69700 }, { "epoch": 0.09794908993829882, "grad_norm": 0.7601600289344788, "learning_rate": 0.0002937361247716734, "loss": 4.7219, "step": 69710 }, { "epoch": 0.09796314087646239, "grad_norm": 0.7119511365890503, "learning_rate": 0.00029377827736405785, "loss": 4.7607, "step": 69720 }, { "epoch": 0.09797719181462597, "grad_norm": 0.7661904096603394, "learning_rate": 0.0002938204299564423, "loss": 4.7596, "step": 69730 }, { "epoch": 0.09799124275278955, "grad_norm": 0.7884121537208557, "learning_rate": 0.00029386258254882676, "loss": 4.7573, "step": 69740 }, { "epoch": 0.09800529369095312, "grad_norm": 0.7419071793556213, "learning_rate": 0.00029390473514121114, "loss": 4.852, "step": 69750 }, { "epoch": 0.09801934462911671, "grad_norm": 0.8159481287002563, "learning_rate": 0.00029394688773359557, "loss": 4.6828, "step": 69760 }, { "epoch": 0.09803339556728029, "grad_norm": 0.755358874797821, "learning_rate": 0.00029398904032598, "loss": 4.7223, "step": 69770 }, { "epoch": 0.09804744650544386, "grad_norm": 0.7439599633216858, "learning_rate": 0.00029403119291836443, "loss": 4.7543, "step": 69780 }, { "epoch": 0.09806149744360744, "grad_norm": 0.7651283740997314, "learning_rate": 0.0002940733455107489, "loss": 4.8595, "step": 69790 }, { "epoch": 0.09807554838177102, "grad_norm": 0.7742921113967896, "learning_rate": 0.00029411549810313335, "loss": 4.6861, "step": 69800 }, { "epoch": 0.09808959931993459, "grad_norm": 0.7510424256324768, "learning_rate": 0.00029415765069551773, "loss": 4.7856, "step": 69810 }, { "epoch": 0.09810365025809817, "grad_norm": 0.7651505470275879, "learning_rate": 0.00029419980328790216, "loss": 4.7194, "step": 69820 }, { "epoch": 0.09811770119626174, "grad_norm": 0.7609485387802124, "learning_rate": 0.0002942419558802866, "loss": 4.7036, "step": 69830 }, { "epoch": 0.09813175213442532, "grad_norm": 0.7432272434234619, "learning_rate": 0.000294284108472671, "loss": 4.7723, "step": 69840 }, { "epoch": 0.0981458030725889, "grad_norm": 0.7429347038269043, "learning_rate": 0.0002943262610650555, "loss": 4.7675, "step": 69850 }, { "epoch": 0.09815985401075249, "grad_norm": 0.7958447933197021, "learning_rate": 0.00029436841365743994, "loss": 4.7454, "step": 69860 }, { "epoch": 0.09817390494891606, "grad_norm": 0.7396420836448669, "learning_rate": 0.00029441056624982437, "loss": 4.6856, "step": 69870 }, { "epoch": 0.09818795588707964, "grad_norm": 0.7574613690376282, "learning_rate": 0.00029445271884220875, "loss": 4.8366, "step": 69880 }, { "epoch": 0.09820200682524322, "grad_norm": 0.7506481409072876, "learning_rate": 0.0002944948714345932, "loss": 4.6661, "step": 69890 }, { "epoch": 0.09821605776340679, "grad_norm": 0.7857638001441956, "learning_rate": 0.0002945370240269776, "loss": 4.7884, "step": 69900 }, { "epoch": 0.09823010870157037, "grad_norm": 0.7581502199172974, "learning_rate": 0.0002945791766193621, "loss": 4.7728, "step": 69910 }, { "epoch": 0.09824415963973394, "grad_norm": 0.7380070090293884, "learning_rate": 0.0002946213292117465, "loss": 4.7034, "step": 69920 }, { "epoch": 0.09825821057789752, "grad_norm": 0.719098687171936, "learning_rate": 0.00029466348180413096, "loss": 4.8379, "step": 69930 }, { "epoch": 0.0982722615160611, "grad_norm": 0.7294281125068665, "learning_rate": 0.00029470563439651533, "loss": 4.8387, "step": 69940 }, { "epoch": 0.09828631245422467, "grad_norm": 0.7245777249336243, "learning_rate": 0.00029474778698889976, "loss": 4.9148, "step": 69950 }, { "epoch": 0.09830036339238825, "grad_norm": 0.7560135126113892, "learning_rate": 0.0002947899395812842, "loss": 4.8186, "step": 69960 }, { "epoch": 0.09831441433055184, "grad_norm": 0.7155381441116333, "learning_rate": 0.0002948320921736687, "loss": 4.8529, "step": 69970 }, { "epoch": 0.09832846526871541, "grad_norm": 0.751598060131073, "learning_rate": 0.0002948742447660531, "loss": 4.6971, "step": 69980 }, { "epoch": 0.09834251620687899, "grad_norm": 0.7454250454902649, "learning_rate": 0.00029491639735843754, "loss": 4.8887, "step": 69990 }, { "epoch": 0.09835656714504257, "grad_norm": 0.745444118976593, "learning_rate": 0.000294958549950822, "loss": 4.8025, "step": 70000 }, { "epoch": 0.09837061808320614, "grad_norm": 0.7454139590263367, "learning_rate": 0.00029500070254320635, "loss": 4.8269, "step": 70010 }, { "epoch": 0.09838466902136972, "grad_norm": 0.7288814187049866, "learning_rate": 0.0002950428551355908, "loss": 4.8821, "step": 70020 }, { "epoch": 0.0983987199595333, "grad_norm": 0.7203482985496521, "learning_rate": 0.00029508500772797527, "loss": 4.7518, "step": 70030 }, { "epoch": 0.09841277089769687, "grad_norm": 0.7820789813995361, "learning_rate": 0.0002951271603203597, "loss": 4.7813, "step": 70040 }, { "epoch": 0.09842682183586045, "grad_norm": 0.7446337938308716, "learning_rate": 0.00029516931291274413, "loss": 4.7992, "step": 70050 }, { "epoch": 0.09844087277402402, "grad_norm": 0.7281479835510254, "learning_rate": 0.00029521146550512856, "loss": 4.8715, "step": 70060 }, { "epoch": 0.09845492371218761, "grad_norm": 0.7503718137741089, "learning_rate": 0.00029525361809751294, "loss": 4.7856, "step": 70070 }, { "epoch": 0.09846897465035119, "grad_norm": 0.7504997849464417, "learning_rate": 0.00029529577068989737, "loss": 4.7455, "step": 70080 }, { "epoch": 0.09848302558851477, "grad_norm": 0.755368709564209, "learning_rate": 0.00029533792328228185, "loss": 4.7809, "step": 70090 }, { "epoch": 0.09849707652667834, "grad_norm": 0.7570355534553528, "learning_rate": 0.0002953800758746663, "loss": 4.7163, "step": 70100 }, { "epoch": 0.09851112746484192, "grad_norm": 0.7263005971908569, "learning_rate": 0.0002954222284670507, "loss": 4.8162, "step": 70110 }, { "epoch": 0.0985251784030055, "grad_norm": 0.7369113564491272, "learning_rate": 0.00029546438105943515, "loss": 4.7484, "step": 70120 }, { "epoch": 0.09853922934116907, "grad_norm": 0.7835990190505981, "learning_rate": 0.0002955065336518196, "loss": 4.7219, "step": 70130 }, { "epoch": 0.09855328027933265, "grad_norm": 0.7315928936004639, "learning_rate": 0.00029554868624420396, "loss": 4.7446, "step": 70140 }, { "epoch": 0.09856733121749622, "grad_norm": 0.7778264284133911, "learning_rate": 0.00029559083883658844, "loss": 4.7838, "step": 70150 }, { "epoch": 0.0985813821556598, "grad_norm": 0.7490993142127991, "learning_rate": 0.00029563299142897287, "loss": 4.7523, "step": 70160 }, { "epoch": 0.09859543309382339, "grad_norm": 0.728198766708374, "learning_rate": 0.0002956751440213573, "loss": 4.7791, "step": 70170 }, { "epoch": 0.09860948403198697, "grad_norm": 0.7419266700744629, "learning_rate": 0.00029571729661374173, "loss": 4.7178, "step": 70180 }, { "epoch": 0.09862353497015054, "grad_norm": 0.7586491703987122, "learning_rate": 0.00029575944920612617, "loss": 4.7541, "step": 70190 }, { "epoch": 0.09863758590831412, "grad_norm": 0.7712236046791077, "learning_rate": 0.00029580160179851054, "loss": 4.7473, "step": 70200 }, { "epoch": 0.0986516368464777, "grad_norm": 0.7614474892616272, "learning_rate": 0.00029584375439089503, "loss": 4.7263, "step": 70210 }, { "epoch": 0.09866568778464127, "grad_norm": 0.7652673125267029, "learning_rate": 0.00029588590698327946, "loss": 4.7051, "step": 70220 }, { "epoch": 0.09867973872280485, "grad_norm": 0.7385839223861694, "learning_rate": 0.0002959280595756639, "loss": 4.7849, "step": 70230 }, { "epoch": 0.09869378966096842, "grad_norm": 0.8170595765113831, "learning_rate": 0.0002959702121680483, "loss": 4.8655, "step": 70240 }, { "epoch": 0.098707840599132, "grad_norm": 0.7230207324028015, "learning_rate": 0.00029601236476043275, "loss": 4.8548, "step": 70250 }, { "epoch": 0.09872189153729558, "grad_norm": 0.7606765031814575, "learning_rate": 0.0002960545173528172, "loss": 4.7995, "step": 70260 }, { "epoch": 0.09873594247545917, "grad_norm": 0.7488109469413757, "learning_rate": 0.0002960966699452016, "loss": 4.827, "step": 70270 }, { "epoch": 0.09874999341362274, "grad_norm": 0.7458518743515015, "learning_rate": 0.00029613882253758605, "loss": 4.8889, "step": 70280 }, { "epoch": 0.09876404435178632, "grad_norm": 0.7448275089263916, "learning_rate": 0.0002961809751299705, "loss": 4.7753, "step": 70290 }, { "epoch": 0.0987780952899499, "grad_norm": 0.7387959957122803, "learning_rate": 0.0002962231277223549, "loss": 4.738, "step": 70300 }, { "epoch": 0.09879214622811347, "grad_norm": 0.7459929585456848, "learning_rate": 0.00029626528031473934, "loss": 4.6784, "step": 70310 }, { "epoch": 0.09880619716627705, "grad_norm": 0.7338137030601501, "learning_rate": 0.00029630743290712377, "loss": 4.8043, "step": 70320 }, { "epoch": 0.09882024810444062, "grad_norm": 0.7475922107696533, "learning_rate": 0.0002963495854995082, "loss": 4.8699, "step": 70330 }, { "epoch": 0.0988342990426042, "grad_norm": 0.7375343441963196, "learning_rate": 0.00029639173809189263, "loss": 4.7544, "step": 70340 }, { "epoch": 0.09884834998076777, "grad_norm": 0.7328280210494995, "learning_rate": 0.00029643389068427706, "loss": 4.7604, "step": 70350 }, { "epoch": 0.09886240091893135, "grad_norm": 0.723840594291687, "learning_rate": 0.0002964760432766615, "loss": 4.888, "step": 70360 }, { "epoch": 0.09887645185709493, "grad_norm": 0.7332075238227844, "learning_rate": 0.0002965181958690459, "loss": 4.7549, "step": 70370 }, { "epoch": 0.09889050279525852, "grad_norm": 0.7340726852416992, "learning_rate": 0.00029656034846143036, "loss": 4.7467, "step": 70380 }, { "epoch": 0.0989045537334221, "grad_norm": 0.7343766093254089, "learning_rate": 0.0002966025010538148, "loss": 4.7236, "step": 70390 }, { "epoch": 0.09891860467158567, "grad_norm": 0.7564096450805664, "learning_rate": 0.0002966446536461992, "loss": 4.6312, "step": 70400 }, { "epoch": 0.09893265560974925, "grad_norm": 0.7509664297103882, "learning_rate": 0.00029668680623858365, "loss": 4.7879, "step": 70410 }, { "epoch": 0.09894670654791282, "grad_norm": 0.7478966116905212, "learning_rate": 0.0002967289588309681, "loss": 4.8726, "step": 70420 }, { "epoch": 0.0989607574860764, "grad_norm": 0.7956492304801941, "learning_rate": 0.0002967711114233525, "loss": 4.8496, "step": 70430 }, { "epoch": 0.09897480842423997, "grad_norm": 0.7533037662506104, "learning_rate": 0.00029681326401573694, "loss": 4.8447, "step": 70440 }, { "epoch": 0.09898885936240355, "grad_norm": 0.7333579063415527, "learning_rate": 0.0002968554166081214, "loss": 4.7172, "step": 70450 }, { "epoch": 0.09900291030056713, "grad_norm": 0.745343029499054, "learning_rate": 0.0002968975692005058, "loss": 4.7113, "step": 70460 }, { "epoch": 0.0990169612387307, "grad_norm": 0.7377544045448303, "learning_rate": 0.00029693972179289024, "loss": 5.0384, "step": 70470 }, { "epoch": 0.09903101217689429, "grad_norm": 0.756903886795044, "learning_rate": 0.00029698187438527467, "loss": 4.6786, "step": 70480 }, { "epoch": 0.09904506311505787, "grad_norm": 0.7782666087150574, "learning_rate": 0.0002970240269776591, "loss": 4.8214, "step": 70490 }, { "epoch": 0.09905911405322144, "grad_norm": 0.7528187036514282, "learning_rate": 0.00029706617957004353, "loss": 4.7897, "step": 70500 }, { "epoch": 0.09907316499138502, "grad_norm": 0.7484343647956848, "learning_rate": 0.00029710833216242796, "loss": 4.7155, "step": 70510 }, { "epoch": 0.0990872159295486, "grad_norm": 0.7250393629074097, "learning_rate": 0.0002971504847548124, "loss": 4.8713, "step": 70520 }, { "epoch": 0.09910126686771217, "grad_norm": 0.7773247957229614, "learning_rate": 0.0002971926373471968, "loss": 4.7164, "step": 70530 }, { "epoch": 0.09911531780587575, "grad_norm": 0.729132890701294, "learning_rate": 0.00029723478993958126, "loss": 4.7736, "step": 70540 }, { "epoch": 0.09912936874403933, "grad_norm": 0.7351367473602295, "learning_rate": 0.0002972769425319657, "loss": 4.9012, "step": 70550 }, { "epoch": 0.0991434196822029, "grad_norm": 0.7388243675231934, "learning_rate": 0.0002973190951243501, "loss": 4.6448, "step": 70560 }, { "epoch": 0.09915747062036648, "grad_norm": 0.7806434631347656, "learning_rate": 0.00029736124771673455, "loss": 4.7922, "step": 70570 }, { "epoch": 0.09917152155853007, "grad_norm": 0.788178026676178, "learning_rate": 0.000297403400309119, "loss": 4.7924, "step": 70580 }, { "epoch": 0.09918557249669364, "grad_norm": 0.7238157391548157, "learning_rate": 0.0002974455529015034, "loss": 4.8356, "step": 70590 }, { "epoch": 0.09919962343485722, "grad_norm": 0.7454667687416077, "learning_rate": 0.00029748770549388784, "loss": 4.7332, "step": 70600 }, { "epoch": 0.0992136743730208, "grad_norm": 0.7305548191070557, "learning_rate": 0.0002975298580862723, "loss": 4.8754, "step": 70610 }, { "epoch": 0.09922772531118437, "grad_norm": 0.7459691762924194, "learning_rate": 0.0002975720106786567, "loss": 4.7751, "step": 70620 }, { "epoch": 0.09924177624934795, "grad_norm": 0.7162081599235535, "learning_rate": 0.00029761416327104114, "loss": 4.8116, "step": 70630 }, { "epoch": 0.09925582718751153, "grad_norm": 0.7404140830039978, "learning_rate": 0.00029765631586342557, "loss": 4.8285, "step": 70640 }, { "epoch": 0.0992698781256751, "grad_norm": 0.7442372441291809, "learning_rate": 0.00029769846845581, "loss": 4.7367, "step": 70650 }, { "epoch": 0.09928392906383868, "grad_norm": 0.7542151212692261, "learning_rate": 0.00029774062104819443, "loss": 4.7881, "step": 70660 }, { "epoch": 0.09929798000200225, "grad_norm": 0.7247451543807983, "learning_rate": 0.00029778277364057886, "loss": 4.7453, "step": 70670 }, { "epoch": 0.09931203094016583, "grad_norm": 0.7268667221069336, "learning_rate": 0.0002978249262329633, "loss": 4.7807, "step": 70680 }, { "epoch": 0.09932608187832942, "grad_norm": 0.7007578611373901, "learning_rate": 0.0002978670788253477, "loss": 4.8148, "step": 70690 }, { "epoch": 0.099340132816493, "grad_norm": 0.7512852549552917, "learning_rate": 0.00029790923141773216, "loss": 4.7867, "step": 70700 }, { "epoch": 0.09935418375465657, "grad_norm": 0.7661580443382263, "learning_rate": 0.0002979513840101166, "loss": 4.721, "step": 70710 }, { "epoch": 0.09936823469282015, "grad_norm": 0.7639215588569641, "learning_rate": 0.000297993536602501, "loss": 4.7987, "step": 70720 }, { "epoch": 0.09938228563098372, "grad_norm": 0.7315471172332764, "learning_rate": 0.00029803568919488545, "loss": 4.9177, "step": 70730 }, { "epoch": 0.0993963365691473, "grad_norm": 0.7605272531509399, "learning_rate": 0.0002980778417872699, "loss": 4.8585, "step": 70740 }, { "epoch": 0.09941038750731088, "grad_norm": 0.71612948179245, "learning_rate": 0.0002981199943796543, "loss": 4.7394, "step": 70750 }, { "epoch": 0.09942443844547445, "grad_norm": 0.7437877058982849, "learning_rate": 0.00029816214697203874, "loss": 4.7909, "step": 70760 }, { "epoch": 0.09943848938363803, "grad_norm": 0.7186173796653748, "learning_rate": 0.0002982042995644232, "loss": 4.7169, "step": 70770 }, { "epoch": 0.0994525403218016, "grad_norm": 0.7542218565940857, "learning_rate": 0.0002982464521568076, "loss": 4.7704, "step": 70780 }, { "epoch": 0.0994665912599652, "grad_norm": 0.7415812611579895, "learning_rate": 0.00029828860474919204, "loss": 4.8173, "step": 70790 }, { "epoch": 0.09948064219812877, "grad_norm": 0.7335118651390076, "learning_rate": 0.00029833075734157647, "loss": 4.8941, "step": 70800 }, { "epoch": 0.09949469313629235, "grad_norm": 0.7257421612739563, "learning_rate": 0.0002983729099339609, "loss": 4.8061, "step": 70810 }, { "epoch": 0.09950874407445592, "grad_norm": 0.7506051659584045, "learning_rate": 0.00029841506252634533, "loss": 4.7745, "step": 70820 }, { "epoch": 0.0995227950126195, "grad_norm": 0.765600323677063, "learning_rate": 0.00029845721511872976, "loss": 4.7226, "step": 70830 }, { "epoch": 0.09953684595078308, "grad_norm": 0.7521647214889526, "learning_rate": 0.00029849936771111425, "loss": 4.8212, "step": 70840 }, { "epoch": 0.09955089688894665, "grad_norm": 0.7769646644592285, "learning_rate": 0.0002985415203034986, "loss": 4.7042, "step": 70850 }, { "epoch": 0.09956494782711023, "grad_norm": 0.751598060131073, "learning_rate": 0.00029858367289588305, "loss": 4.7392, "step": 70860 }, { "epoch": 0.0995789987652738, "grad_norm": 0.7508914470672607, "learning_rate": 0.0002986258254882675, "loss": 4.8002, "step": 70870 }, { "epoch": 0.09959304970343738, "grad_norm": 0.728714644908905, "learning_rate": 0.0002986679780806519, "loss": 4.7734, "step": 70880 }, { "epoch": 0.09960710064160097, "grad_norm": 0.7441499829292297, "learning_rate": 0.00029871013067303635, "loss": 4.8044, "step": 70890 }, { "epoch": 0.09962115157976455, "grad_norm": 0.74562007188797, "learning_rate": 0.00029875228326542083, "loss": 4.8104, "step": 70900 }, { "epoch": 0.09963520251792812, "grad_norm": 0.7518746256828308, "learning_rate": 0.0002987944358578052, "loss": 4.7614, "step": 70910 }, { "epoch": 0.0996492534560917, "grad_norm": 0.7399574518203735, "learning_rate": 0.00029883658845018964, "loss": 4.7511, "step": 70920 }, { "epoch": 0.09966330439425528, "grad_norm": 0.7430614233016968, "learning_rate": 0.00029887874104257407, "loss": 4.7823, "step": 70930 }, { "epoch": 0.09967735533241885, "grad_norm": 0.7251525521278381, "learning_rate": 0.0002989208936349585, "loss": 4.8084, "step": 70940 }, { "epoch": 0.09969140627058243, "grad_norm": 0.8306521773338318, "learning_rate": 0.000298963046227343, "loss": 4.7226, "step": 70950 }, { "epoch": 0.099705457208746, "grad_norm": 0.7492359280586243, "learning_rate": 0.0002990051988197274, "loss": 4.7497, "step": 70960 }, { "epoch": 0.09971950814690958, "grad_norm": 0.736562967300415, "learning_rate": 0.00029904735141211185, "loss": 4.6997, "step": 70970 }, { "epoch": 0.09973355908507316, "grad_norm": 0.740244448184967, "learning_rate": 0.00029908950400449623, "loss": 4.7356, "step": 70980 }, { "epoch": 0.09974761002323673, "grad_norm": 0.7707760334014893, "learning_rate": 0.00029913165659688066, "loss": 4.6801, "step": 70990 }, { "epoch": 0.09976166096140032, "grad_norm": 0.7498148679733276, "learning_rate": 0.0002991738091892651, "loss": 4.6808, "step": 71000 }, { "epoch": 0.0997757118995639, "grad_norm": 0.7346001863479614, "learning_rate": 0.0002992159617816496, "loss": 4.7796, "step": 71010 }, { "epoch": 0.09978976283772747, "grad_norm": 0.7446746826171875, "learning_rate": 0.000299258114374034, "loss": 4.7906, "step": 71020 }, { "epoch": 0.09980381377589105, "grad_norm": 0.7236793637275696, "learning_rate": 0.00029930026696641844, "loss": 4.7742, "step": 71030 }, { "epoch": 0.09981786471405463, "grad_norm": 0.7329582571983337, "learning_rate": 0.0002993424195588028, "loss": 4.6221, "step": 71040 }, { "epoch": 0.0998319156522182, "grad_norm": 0.7298672199249268, "learning_rate": 0.00029938457215118725, "loss": 4.8417, "step": 71050 }, { "epoch": 0.09984596659038178, "grad_norm": 0.7621816396713257, "learning_rate": 0.0002994267247435717, "loss": 4.8465, "step": 71060 }, { "epoch": 0.09986001752854536, "grad_norm": 0.7588155269622803, "learning_rate": 0.00029946887733595616, "loss": 4.7579, "step": 71070 }, { "epoch": 0.09987406846670893, "grad_norm": 0.7053050398826599, "learning_rate": 0.0002995110299283406, "loss": 4.7375, "step": 71080 }, { "epoch": 0.09988811940487251, "grad_norm": 0.7316890954971313, "learning_rate": 0.000299553182520725, "loss": 4.8224, "step": 71090 }, { "epoch": 0.0999021703430361, "grad_norm": 0.7458178400993347, "learning_rate": 0.0002995953351131094, "loss": 4.9259, "step": 71100 }, { "epoch": 0.09991622128119967, "grad_norm": 0.7310707569122314, "learning_rate": 0.00029963748770549383, "loss": 4.7937, "step": 71110 }, { "epoch": 0.09993027221936325, "grad_norm": 0.7549211978912354, "learning_rate": 0.00029967964029787826, "loss": 4.7397, "step": 71120 }, { "epoch": 0.09994432315752683, "grad_norm": 0.743523120880127, "learning_rate": 0.00029972179289026275, "loss": 4.7553, "step": 71130 }, { "epoch": 0.0999583740956904, "grad_norm": 0.7451346516609192, "learning_rate": 0.0002997639454826472, "loss": 4.7489, "step": 71140 }, { "epoch": 0.09997242503385398, "grad_norm": 0.7571463584899902, "learning_rate": 0.0002998060980750316, "loss": 4.7289, "step": 71150 }, { "epoch": 0.09998647597201755, "grad_norm": 0.7282206416130066, "learning_rate": 0.00029984825066741604, "loss": 4.8128, "step": 71160 }, { "epoch": 0.10000052691018113, "grad_norm": 0.7786749601364136, "learning_rate": 0.0002998904032598004, "loss": 4.928, "step": 71170 }, { "epoch": 0.10001457784834471, "grad_norm": 0.7948459982872009, "learning_rate": 0.00029993255585218485, "loss": 4.7095, "step": 71180 }, { "epoch": 0.10002862878650828, "grad_norm": 0.7715887427330017, "learning_rate": 0.00029997470844456934, "loss": 4.8289, "step": 71190 }, { "epoch": 0.10004267972467187, "grad_norm": 0.7413100004196167, "learning_rate": 0.0002999999999711326, "loss": 4.6136, "step": 71200 }, { "epoch": 0.10005673066283545, "grad_norm": 0.8110563158988953, "learning_rate": 0.00029999999964637453, "loss": 4.7762, "step": 71210 }, { "epoch": 0.10007078160099903, "grad_norm": 0.7453218698501587, "learning_rate": 0.0002999999989607741, "loss": 4.7854, "step": 71220 }, { "epoch": 0.1000848325391626, "grad_norm": 0.787434995174408, "learning_rate": 0.0002999999979143313, "loss": 4.7152, "step": 71230 }, { "epoch": 0.10009888347732618, "grad_norm": 0.7521796226501465, "learning_rate": 0.00029999999650704627, "loss": 4.863, "step": 71240 }, { "epoch": 0.10011293441548975, "grad_norm": 0.7553433179855347, "learning_rate": 0.00029999999473891885, "loss": 4.7434, "step": 71250 }, { "epoch": 0.10012698535365333, "grad_norm": 0.7574853301048279, "learning_rate": 0.00029999999260994916, "loss": 4.8025, "step": 71260 }, { "epoch": 0.1001410362918169, "grad_norm": 0.7284955978393555, "learning_rate": 0.0002999999901201371, "loss": 4.7283, "step": 71270 }, { "epoch": 0.10015508722998048, "grad_norm": 0.7206137776374817, "learning_rate": 0.00029999998726948276, "loss": 4.6774, "step": 71280 }, { "epoch": 0.10016913816814406, "grad_norm": 0.7361604571342468, "learning_rate": 0.00029999998405798615, "loss": 4.8914, "step": 71290 }, { "epoch": 0.10018318910630764, "grad_norm": 0.7506524920463562, "learning_rate": 0.0002999999804856472, "loss": 4.7574, "step": 71300 }, { "epoch": 0.10019724004447123, "grad_norm": 0.7192894220352173, "learning_rate": 0.00029999997655246597, "loss": 4.7978, "step": 71310 }, { "epoch": 0.1002112909826348, "grad_norm": 0.6896945238113403, "learning_rate": 0.0002999999722584425, "loss": 4.7519, "step": 71320 }, { "epoch": 0.10022534192079838, "grad_norm": 0.7413153648376465, "learning_rate": 0.00029999996760357676, "loss": 4.878, "step": 71330 }, { "epoch": 0.10023939285896195, "grad_norm": 0.7378984689712524, "learning_rate": 0.0002999999625878688, "loss": 4.7571, "step": 71340 }, { "epoch": 0.10025344379712553, "grad_norm": 0.7499397397041321, "learning_rate": 0.0002999999572113185, "loss": 4.7636, "step": 71350 }, { "epoch": 0.1002674947352891, "grad_norm": 0.725183367729187, "learning_rate": 0.0002999999514739261, "loss": 4.8262, "step": 71360 }, { "epoch": 0.10028154567345268, "grad_norm": 0.783112645149231, "learning_rate": 0.0002999999453756914, "loss": 4.7547, "step": 71370 }, { "epoch": 0.10029559661161626, "grad_norm": 0.7308584451675415, "learning_rate": 0.00029999993891661455, "loss": 4.7689, "step": 71380 }, { "epoch": 0.10030964754977983, "grad_norm": 0.7266840934753418, "learning_rate": 0.0002999999320966955, "loss": 4.7863, "step": 71390 }, { "epoch": 0.10032369848794341, "grad_norm": 0.7103331089019775, "learning_rate": 0.00029999992491593427, "loss": 4.6985, "step": 71400 }, { "epoch": 0.100337749426107, "grad_norm": 0.7648615837097168, "learning_rate": 0.0002999999173743309, "loss": 4.8235, "step": 71410 }, { "epoch": 0.10035180036427058, "grad_norm": 0.7262908220291138, "learning_rate": 0.0002999999094718854, "loss": 4.8409, "step": 71420 }, { "epoch": 0.10036585130243415, "grad_norm": 0.7537401914596558, "learning_rate": 0.0002999999012085978, "loss": 4.7755, "step": 71430 }, { "epoch": 0.10037990224059773, "grad_norm": 0.7262635827064514, "learning_rate": 0.0002999998925844681, "loss": 4.7393, "step": 71440 }, { "epoch": 0.1003939531787613, "grad_norm": 0.7246768474578857, "learning_rate": 0.0002999998835994963, "loss": 4.747, "step": 71450 }, { "epoch": 0.10040800411692488, "grad_norm": 0.7420998811721802, "learning_rate": 0.0002999998742536825, "loss": 4.7245, "step": 71460 }, { "epoch": 0.10042205505508846, "grad_norm": 0.7534216046333313, "learning_rate": 0.0002999998645470266, "loss": 4.7235, "step": 71470 }, { "epoch": 0.10043610599325203, "grad_norm": 0.7178711891174316, "learning_rate": 0.00029999985447952876, "loss": 4.6908, "step": 71480 }, { "epoch": 0.10045015693141561, "grad_norm": 0.7422319054603577, "learning_rate": 0.0002999998440511889, "loss": 4.8748, "step": 71490 }, { "epoch": 0.10046420786957919, "grad_norm": 0.7381262183189392, "learning_rate": 0.00029999983326200715, "loss": 4.7584, "step": 71500 }, { "epoch": 0.10047825880774278, "grad_norm": 0.726177453994751, "learning_rate": 0.0002999998232432237, "loss": 4.7548, "step": 71510 }, { "epoch": 0.10049230974590635, "grad_norm": 0.732631266117096, "learning_rate": 0.00029999981176844225, "loss": 4.8067, "step": 71520 }, { "epoch": 0.10050636068406993, "grad_norm": 0.7357652187347412, "learning_rate": 0.00029999979993281894, "loss": 4.8273, "step": 71530 }, { "epoch": 0.1005204116222335, "grad_norm": 0.7101837396621704, "learning_rate": 0.0002999997877363538, "loss": 4.6746, "step": 71540 }, { "epoch": 0.10053446256039708, "grad_norm": 0.7089740633964539, "learning_rate": 0.0002999997751790468, "loss": 4.8879, "step": 71550 }, { "epoch": 0.10054851349856066, "grad_norm": 0.7296656966209412, "learning_rate": 0.000299999762260898, "loss": 4.7599, "step": 71560 }, { "epoch": 0.10056256443672423, "grad_norm": 0.7465136051177979, "learning_rate": 0.00029999974898190754, "loss": 4.8796, "step": 71570 }, { "epoch": 0.10057661537488781, "grad_norm": 0.7571253776550293, "learning_rate": 0.0002999997353420753, "loss": 4.7633, "step": 71580 }, { "epoch": 0.10059066631305139, "grad_norm": 0.7460293173789978, "learning_rate": 0.00029999972134140134, "loss": 4.9302, "step": 71590 }, { "epoch": 0.10060471725121496, "grad_norm": 0.7485179305076599, "learning_rate": 0.0002999997069798857, "loss": 4.8744, "step": 71600 }, { "epoch": 0.10061876818937854, "grad_norm": 0.7496930956840515, "learning_rate": 0.0002999996922575285, "loss": 4.7929, "step": 71610 }, { "epoch": 0.10063281912754213, "grad_norm": 0.7394983768463135, "learning_rate": 0.0002999996771743297, "loss": 4.7198, "step": 71620 }, { "epoch": 0.1006468700657057, "grad_norm": 0.7388569712638855, "learning_rate": 0.00029999966173028934, "loss": 4.7331, "step": 71630 }, { "epoch": 0.10066092100386928, "grad_norm": 0.7156569361686707, "learning_rate": 0.00029999964592540747, "loss": 4.8351, "step": 71640 }, { "epoch": 0.10067497194203286, "grad_norm": 0.7694973349571228, "learning_rate": 0.0002999996297596841, "loss": 4.8795, "step": 71650 }, { "epoch": 0.10068902288019643, "grad_norm": 0.8083022236824036, "learning_rate": 0.0002999996132331193, "loss": 4.7547, "step": 71660 }, { "epoch": 0.10070307381836001, "grad_norm": 0.7387674450874329, "learning_rate": 0.00029999959634571306, "loss": 4.6957, "step": 71670 }, { "epoch": 0.10071712475652358, "grad_norm": 0.7180886268615723, "learning_rate": 0.0002999995790974655, "loss": 4.9078, "step": 71680 }, { "epoch": 0.10073117569468716, "grad_norm": 0.7135482430458069, "learning_rate": 0.00029999956148837664, "loss": 4.8903, "step": 71690 }, { "epoch": 0.10074522663285074, "grad_norm": 0.7912961840629578, "learning_rate": 0.00029999954351844646, "loss": 4.7291, "step": 71700 }, { "epoch": 0.10075927757101431, "grad_norm": 0.7425175905227661, "learning_rate": 0.0002999995251876751, "loss": 4.8633, "step": 71710 }, { "epoch": 0.1007733285091779, "grad_norm": 0.741531491279602, "learning_rate": 0.00029999950649606255, "loss": 4.8959, "step": 71720 }, { "epoch": 0.10078737944734148, "grad_norm": 0.7444692254066467, "learning_rate": 0.0002999994874436088, "loss": 4.7001, "step": 71730 }, { "epoch": 0.10080143038550506, "grad_norm": 0.7535449862480164, "learning_rate": 0.000299999468030314, "loss": 4.7645, "step": 71740 }, { "epoch": 0.10081548132366863, "grad_norm": 0.7291529178619385, "learning_rate": 0.0002999994482561781, "loss": 4.8059, "step": 71750 }, { "epoch": 0.10082953226183221, "grad_norm": 0.7530328035354614, "learning_rate": 0.0002999994281212013, "loss": 4.7958, "step": 71760 }, { "epoch": 0.10084358319999578, "grad_norm": 0.7359017133712769, "learning_rate": 0.0002999994076253835, "loss": 4.7982, "step": 71770 }, { "epoch": 0.10085763413815936, "grad_norm": 0.7342458367347717, "learning_rate": 0.00029999938676872475, "loss": 4.7397, "step": 71780 }, { "epoch": 0.10087168507632294, "grad_norm": 0.7454365491867065, "learning_rate": 0.0002999993655512252, "loss": 4.7391, "step": 71790 }, { "epoch": 0.10088573601448651, "grad_norm": 0.771918773651123, "learning_rate": 0.0002999993439728848, "loss": 4.8666, "step": 71800 }, { "epoch": 0.10089978695265009, "grad_norm": 0.7269641160964966, "learning_rate": 0.00029999932203370364, "loss": 4.8573, "step": 71810 }, { "epoch": 0.10091383789081368, "grad_norm": 0.7324143648147583, "learning_rate": 0.00029999929973368184, "loss": 4.6735, "step": 71820 }, { "epoch": 0.10092788882897726, "grad_norm": 0.7486067414283752, "learning_rate": 0.0002999992770728194, "loss": 4.6773, "step": 71830 }, { "epoch": 0.10094193976714083, "grad_norm": 0.7198307514190674, "learning_rate": 0.0002999992540511163, "loss": 4.7123, "step": 71840 }, { "epoch": 0.10095599070530441, "grad_norm": 0.7431245446205139, "learning_rate": 0.00029999923066857274, "loss": 4.7798, "step": 71850 }, { "epoch": 0.10097004164346798, "grad_norm": 0.7345250844955444, "learning_rate": 0.00029999920692518864, "loss": 4.8772, "step": 71860 }, { "epoch": 0.10098409258163156, "grad_norm": 0.7660800814628601, "learning_rate": 0.0002999991828209641, "loss": 4.8468, "step": 71870 }, { "epoch": 0.10099814351979514, "grad_norm": 0.7611793279647827, "learning_rate": 0.0002999991583558993, "loss": 4.8363, "step": 71880 }, { "epoch": 0.10101219445795871, "grad_norm": 0.7366256713867188, "learning_rate": 0.0002999991335299941, "loss": 4.7184, "step": 71890 }, { "epoch": 0.10102624539612229, "grad_norm": 0.7112072706222534, "learning_rate": 0.0002999991083432487, "loss": 4.724, "step": 71900 }, { "epoch": 0.10104029633428586, "grad_norm": 0.7414090633392334, "learning_rate": 0.0002999990827956631, "loss": 4.8933, "step": 71910 }, { "epoch": 0.10105434727244944, "grad_norm": 0.7189602851867676, "learning_rate": 0.0002999990568872374, "loss": 4.7637, "step": 71920 }, { "epoch": 0.10106839821061303, "grad_norm": 0.7293269038200378, "learning_rate": 0.0002999990306179716, "loss": 4.8199, "step": 71930 }, { "epoch": 0.1010824491487766, "grad_norm": 0.7632038593292236, "learning_rate": 0.0002999990039878658, "loss": 4.6565, "step": 71940 }, { "epoch": 0.10109650008694018, "grad_norm": 0.7255117297172546, "learning_rate": 0.0002999989769969201, "loss": 4.6914, "step": 71950 }, { "epoch": 0.10111055102510376, "grad_norm": 0.7196149230003357, "learning_rate": 0.0002999989496451345, "loss": 4.7191, "step": 71960 }, { "epoch": 0.10112460196326734, "grad_norm": 0.7434830665588379, "learning_rate": 0.00029999892193250917, "loss": 4.7101, "step": 71970 }, { "epoch": 0.10113865290143091, "grad_norm": 0.7188177108764648, "learning_rate": 0.000299998893859044, "loss": 4.8254, "step": 71980 }, { "epoch": 0.10115270383959449, "grad_norm": 0.7340826988220215, "learning_rate": 0.00029999886542473925, "loss": 4.7947, "step": 71990 }, { "epoch": 0.10116675477775806, "grad_norm": 0.7305575609207153, "learning_rate": 0.0002999988366295948, "loss": 4.5447, "step": 72000 }, { "epoch": 0.10118080571592164, "grad_norm": 0.7468332052230835, "learning_rate": 0.0002999988074736109, "loss": 4.7055, "step": 72010 }, { "epoch": 0.10119485665408522, "grad_norm": 0.7357817888259888, "learning_rate": 0.00029999877795678753, "loss": 4.8088, "step": 72020 }, { "epoch": 0.1012089075922488, "grad_norm": 0.7364190816879272, "learning_rate": 0.0002999987480791248, "loss": 4.6873, "step": 72030 }, { "epoch": 0.10122295853041238, "grad_norm": 0.7412238717079163, "learning_rate": 0.00029999871784062266, "loss": 4.7859, "step": 72040 }, { "epoch": 0.10123700946857596, "grad_norm": 0.7454258799552917, "learning_rate": 0.00029999868724128135, "loss": 4.7342, "step": 72050 }, { "epoch": 0.10125106040673953, "grad_norm": 0.7320020794868469, "learning_rate": 0.0002999986562811008, "loss": 4.7076, "step": 72060 }, { "epoch": 0.10126511134490311, "grad_norm": 0.7472354769706726, "learning_rate": 0.00029999862496008124, "loss": 4.7221, "step": 72070 }, { "epoch": 0.10127916228306669, "grad_norm": 0.7363531589508057, "learning_rate": 0.0002999985932782226, "loss": 4.681, "step": 72080 }, { "epoch": 0.10129321322123026, "grad_norm": 0.7143122553825378, "learning_rate": 0.000299998561235525, "loss": 4.7911, "step": 72090 }, { "epoch": 0.10130726415939384, "grad_norm": 0.7466495037078857, "learning_rate": 0.0002999985288319886, "loss": 4.9298, "step": 72100 }, { "epoch": 0.10132131509755742, "grad_norm": 0.7105987668037415, "learning_rate": 0.00029999849606761336, "loss": 4.8656, "step": 72110 }, { "epoch": 0.10133536603572099, "grad_norm": 0.7323604822158813, "learning_rate": 0.0002999984629423994, "loss": 4.8375, "step": 72120 }, { "epoch": 0.10134941697388458, "grad_norm": 0.7545437812805176, "learning_rate": 0.00029999842945634684, "loss": 4.7479, "step": 72130 }, { "epoch": 0.10136346791204816, "grad_norm": 0.732039213180542, "learning_rate": 0.0002999983956094557, "loss": 4.8971, "step": 72140 }, { "epoch": 0.10137751885021173, "grad_norm": 0.7388688325881958, "learning_rate": 0.0002999983614017261, "loss": 4.7152, "step": 72150 }, { "epoch": 0.10139156978837531, "grad_norm": 0.7269540429115295, "learning_rate": 0.0002999983268331582, "loss": 4.8155, "step": 72160 }, { "epoch": 0.10140562072653889, "grad_norm": 0.7393106818199158, "learning_rate": 0.00029999829190375187, "loss": 4.7825, "step": 72170 }, { "epoch": 0.10141967166470246, "grad_norm": 0.7458699941635132, "learning_rate": 0.00029999825661350735, "loss": 4.696, "step": 72180 }, { "epoch": 0.10143372260286604, "grad_norm": 0.7346116900444031, "learning_rate": 0.00029999822096242473, "loss": 4.7549, "step": 72190 }, { "epoch": 0.10144777354102961, "grad_norm": 0.7251469492912292, "learning_rate": 0.000299998184950504, "loss": 4.7406, "step": 72200 }, { "epoch": 0.10146182447919319, "grad_norm": 0.7549038529396057, "learning_rate": 0.0002999981485777453, "loss": 4.7226, "step": 72210 }, { "epoch": 0.10147587541735677, "grad_norm": 0.7311290502548218, "learning_rate": 0.0002999981118441488, "loss": 4.7761, "step": 72220 }, { "epoch": 0.10148992635552034, "grad_norm": 0.7395356297492981, "learning_rate": 0.0002999980747497145, "loss": 4.8128, "step": 72230 }, { "epoch": 0.10150397729368393, "grad_norm": 0.7352813482284546, "learning_rate": 0.0002999980372944425, "loss": 4.7336, "step": 72240 }, { "epoch": 0.10151802823184751, "grad_norm": 0.7374472618103027, "learning_rate": 0.00029999799947833287, "loss": 4.6786, "step": 72250 }, { "epoch": 0.10153207917001109, "grad_norm": 0.7531014680862427, "learning_rate": 0.0002999979613013857, "loss": 4.7935, "step": 72260 }, { "epoch": 0.10154613010817466, "grad_norm": 0.7118738889694214, "learning_rate": 0.00029999792276360113, "loss": 4.7567, "step": 72270 }, { "epoch": 0.10156018104633824, "grad_norm": 0.7456701397895813, "learning_rate": 0.00029999788386497923, "loss": 4.7232, "step": 72280 }, { "epoch": 0.10157423198450181, "grad_norm": 0.7232758402824402, "learning_rate": 0.00029999784460552005, "loss": 4.7032, "step": 72290 }, { "epoch": 0.10158828292266539, "grad_norm": 0.7644693851470947, "learning_rate": 0.0002999978049852238, "loss": 4.8459, "step": 72300 }, { "epoch": 0.10160233386082897, "grad_norm": 0.7518143057823181, "learning_rate": 0.00029999776500409044, "loss": 4.7958, "step": 72310 }, { "epoch": 0.10161638479899254, "grad_norm": 0.7484323382377625, "learning_rate": 0.00029999772466212017, "loss": 4.7845, "step": 72320 }, { "epoch": 0.10163043573715612, "grad_norm": 0.7363878488540649, "learning_rate": 0.000299997683959313, "loss": 4.8805, "step": 72330 }, { "epoch": 0.10164448667531971, "grad_norm": 0.771102786064148, "learning_rate": 0.00029999764289566914, "loss": 4.766, "step": 72340 }, { "epoch": 0.10165853761348329, "grad_norm": 0.7849813103675842, "learning_rate": 0.00029999760147118854, "loss": 4.7616, "step": 72350 }, { "epoch": 0.10167258855164686, "grad_norm": 0.7605536580085754, "learning_rate": 0.0002999975596858714, "loss": 4.8137, "step": 72360 }, { "epoch": 0.10168663948981044, "grad_norm": 0.7546150088310242, "learning_rate": 0.0002999975175397178, "loss": 4.7202, "step": 72370 }, { "epoch": 0.10170069042797401, "grad_norm": 0.7308611869812012, "learning_rate": 0.0002999974750327279, "loss": 4.7466, "step": 72380 }, { "epoch": 0.10171474136613759, "grad_norm": 0.7356964349746704, "learning_rate": 0.0002999974321649017, "loss": 4.8318, "step": 72390 }, { "epoch": 0.10172879230430117, "grad_norm": 0.73519366979599, "learning_rate": 0.00029999738893623937, "loss": 4.7815, "step": 72400 }, { "epoch": 0.10174284324246474, "grad_norm": 0.6988383531570435, "learning_rate": 0.000299997345346741, "loss": 4.7789, "step": 72410 }, { "epoch": 0.10175689418062832, "grad_norm": 0.745654284954071, "learning_rate": 0.0002999973013964067, "loss": 4.8238, "step": 72420 }, { "epoch": 0.1017709451187919, "grad_norm": 0.7207648158073425, "learning_rate": 0.00029999725708523643, "loss": 4.7722, "step": 72430 }, { "epoch": 0.10178499605695548, "grad_norm": 0.8427481651306152, "learning_rate": 0.00029999721241323054, "loss": 4.7605, "step": 72440 }, { "epoch": 0.10179904699511906, "grad_norm": 0.7269243597984314, "learning_rate": 0.00029999716738038904, "loss": 4.8267, "step": 72450 }, { "epoch": 0.10181309793328264, "grad_norm": 0.7501411437988281, "learning_rate": 0.00029999712198671195, "loss": 4.7538, "step": 72460 }, { "epoch": 0.10182714887144621, "grad_norm": 0.7097275853157043, "learning_rate": 0.0002999970762321995, "loss": 4.7286, "step": 72470 }, { "epoch": 0.10184119980960979, "grad_norm": 0.7278454303741455, "learning_rate": 0.00029999703011685176, "loss": 4.8126, "step": 72480 }, { "epoch": 0.10185525074777337, "grad_norm": 0.7060046195983887, "learning_rate": 0.00029999698364066883, "loss": 4.715, "step": 72490 }, { "epoch": 0.10186930168593694, "grad_norm": 0.7554420828819275, "learning_rate": 0.0002999969368036508, "loss": 4.9029, "step": 72500 }, { "epoch": 0.10188335262410052, "grad_norm": 0.733871340751648, "learning_rate": 0.00029999688960579785, "loss": 4.808, "step": 72510 }, { "epoch": 0.1018974035622641, "grad_norm": 0.7300422191619873, "learning_rate": 0.00029999684204711006, "loss": 4.8701, "step": 72520 }, { "epoch": 0.10191145450042767, "grad_norm": 0.7577276825904846, "learning_rate": 0.0002999967941275875, "loss": 4.6814, "step": 72530 }, { "epoch": 0.10192550543859125, "grad_norm": 0.7170461416244507, "learning_rate": 0.00029999674584723035, "loss": 4.7992, "step": 72540 }, { "epoch": 0.10193955637675484, "grad_norm": 0.7378881573677063, "learning_rate": 0.00029999669720603864, "loss": 4.7144, "step": 72550 }, { "epoch": 0.10195360731491841, "grad_norm": 0.7653374671936035, "learning_rate": 0.0002999966482040126, "loss": 4.8943, "step": 72560 }, { "epoch": 0.10196765825308199, "grad_norm": 0.7405737042427063, "learning_rate": 0.0002999965988411523, "loss": 4.7858, "step": 72570 }, { "epoch": 0.10198170919124556, "grad_norm": 0.712135910987854, "learning_rate": 0.0002999965491174578, "loss": 4.9162, "step": 72580 }, { "epoch": 0.10199576012940914, "grad_norm": 0.742899477481842, "learning_rate": 0.0002999964990329293, "loss": 4.7942, "step": 72590 }, { "epoch": 0.10200981106757272, "grad_norm": 0.7559585571289062, "learning_rate": 0.0002999964485875669, "loss": 4.6854, "step": 72600 }, { "epoch": 0.1020238620057363, "grad_norm": 0.7186324000358582, "learning_rate": 0.00029999639778137074, "loss": 4.7303, "step": 72610 }, { "epoch": 0.10203791294389987, "grad_norm": 0.7546454071998596, "learning_rate": 0.0002999963466143408, "loss": 4.6544, "step": 72620 }, { "epoch": 0.10205196388206345, "grad_norm": 0.6856681108474731, "learning_rate": 0.00029999629508647746, "loss": 4.7369, "step": 72630 }, { "epoch": 0.10206601482022702, "grad_norm": 0.7506888508796692, "learning_rate": 0.00029999624319778067, "loss": 4.7411, "step": 72640 }, { "epoch": 0.10208006575839061, "grad_norm": 0.7311187982559204, "learning_rate": 0.00029999619094825056, "loss": 4.6478, "step": 72650 }, { "epoch": 0.10209411669655419, "grad_norm": 0.7421939969062805, "learning_rate": 0.0002999961383378873, "loss": 4.7328, "step": 72660 }, { "epoch": 0.10210816763471776, "grad_norm": 0.7084130644798279, "learning_rate": 0.00029999608536669095, "loss": 4.6734, "step": 72670 }, { "epoch": 0.10212221857288134, "grad_norm": 0.7495949864387512, "learning_rate": 0.0002999960320346617, "loss": 4.7531, "step": 72680 }, { "epoch": 0.10213626951104492, "grad_norm": 0.7394850254058838, "learning_rate": 0.0002999959783417997, "loss": 4.7793, "step": 72690 }, { "epoch": 0.10215032044920849, "grad_norm": 0.7716978788375854, "learning_rate": 0.00029999592428810504, "loss": 4.7758, "step": 72700 }, { "epoch": 0.10216437138737207, "grad_norm": 0.7250087857246399, "learning_rate": 0.0002999958698735779, "loss": 4.8447, "step": 72710 }, { "epoch": 0.10217842232553564, "grad_norm": 0.7172320485115051, "learning_rate": 0.0002999958150982183, "loss": 4.9669, "step": 72720 }, { "epoch": 0.10219247326369922, "grad_norm": 0.7347210049629211, "learning_rate": 0.00029999575996202647, "loss": 4.7817, "step": 72730 }, { "epoch": 0.1022065242018628, "grad_norm": 0.7101386785507202, "learning_rate": 0.00029999570446500245, "loss": 4.7991, "step": 72740 }, { "epoch": 0.10222057514002639, "grad_norm": 0.7327938079833984, "learning_rate": 0.0002999956486071465, "loss": 4.707, "step": 72750 }, { "epoch": 0.10223462607818996, "grad_norm": 0.7212610244750977, "learning_rate": 0.0002999955923884587, "loss": 4.7766, "step": 72760 }, { "epoch": 0.10224867701635354, "grad_norm": 0.720736563205719, "learning_rate": 0.0002999955358089391, "loss": 4.7198, "step": 72770 }, { "epoch": 0.10226272795451712, "grad_norm": 0.7179093360900879, "learning_rate": 0.000299995478868588, "loss": 4.7682, "step": 72780 }, { "epoch": 0.10227677889268069, "grad_norm": 0.7311738133430481, "learning_rate": 0.0002999954215674054, "loss": 4.8445, "step": 72790 }, { "epoch": 0.10229082983084427, "grad_norm": 0.7069105505943298, "learning_rate": 0.00029999536390539145, "loss": 4.7873, "step": 72800 }, { "epoch": 0.10230488076900784, "grad_norm": 0.7458084225654602, "learning_rate": 0.00029999530588254635, "loss": 4.6617, "step": 72810 }, { "epoch": 0.10231893170717142, "grad_norm": 0.7396774291992188, "learning_rate": 0.00029999524749887025, "loss": 4.8425, "step": 72820 }, { "epoch": 0.102332982645335, "grad_norm": 0.7057560682296753, "learning_rate": 0.0002999951887543632, "loss": 4.8336, "step": 72830 }, { "epoch": 0.10234703358349857, "grad_norm": 0.716718852519989, "learning_rate": 0.0002999951296490254, "loss": 4.6897, "step": 72840 }, { "epoch": 0.10236108452166215, "grad_norm": 0.7290700674057007, "learning_rate": 0.000299995070182857, "loss": 4.8415, "step": 72850 }, { "epoch": 0.10237513545982574, "grad_norm": 0.7181606292724609, "learning_rate": 0.00029999501035585816, "loss": 4.8283, "step": 72860 }, { "epoch": 0.10238918639798932, "grad_norm": 0.7337809801101685, "learning_rate": 0.00029999495016802893, "loss": 4.8772, "step": 72870 }, { "epoch": 0.10240323733615289, "grad_norm": 0.7575526833534241, "learning_rate": 0.0002999948896193696, "loss": 4.787, "step": 72880 }, { "epoch": 0.10241728827431647, "grad_norm": Infinity, "learning_rate": 0.00029999482870988017, "loss": 4.7359, "step": 72890 }, { "epoch": 0.10243133921248004, "grad_norm": 0.7764653563499451, "learning_rate": 0.0002999947735828301, "loss": 4.7426, "step": 72900 }, { "epoch": 0.10244539015064362, "grad_norm": 0.7185347676277161, "learning_rate": 0.00029999471198776404, "loss": 4.867, "step": 72910 }, { "epoch": 0.1024594410888072, "grad_norm": 0.755716860294342, "learning_rate": 0.0002999946500318684, "loss": 4.7958, "step": 72920 }, { "epoch": 0.10247349202697077, "grad_norm": 0.7544811367988586, "learning_rate": 0.0002999945877151432, "loss": 4.9392, "step": 72930 }, { "epoch": 0.10248754296513435, "grad_norm": 0.7307287454605103, "learning_rate": 0.00029999452503758875, "loss": 4.8479, "step": 72940 }, { "epoch": 0.10250159390329792, "grad_norm": 0.7607468366622925, "learning_rate": 0.00029999446199920516, "loss": 4.6962, "step": 72950 }, { "epoch": 0.10251564484146151, "grad_norm": 0.721833348274231, "learning_rate": 0.00029999439859999257, "loss": 4.7497, "step": 72960 }, { "epoch": 0.10252969577962509, "grad_norm": 0.7424848079681396, "learning_rate": 0.00029999433483995106, "loss": 4.7419, "step": 72970 }, { "epoch": 0.10254374671778867, "grad_norm": 0.721015453338623, "learning_rate": 0.0002999942707190809, "loss": 4.5932, "step": 72980 }, { "epoch": 0.10255779765595224, "grad_norm": 0.7474051713943481, "learning_rate": 0.00029999420623738214, "loss": 4.6855, "step": 72990 }, { "epoch": 0.10257184859411582, "grad_norm": 0.7426432967185974, "learning_rate": 0.000299994141394855, "loss": 4.7178, "step": 73000 }, { "epoch": 0.1025858995322794, "grad_norm": 0.7148788571357727, "learning_rate": 0.00029999407619149966, "loss": 4.8307, "step": 73010 }, { "epoch": 0.10259995047044297, "grad_norm": 0.7621682286262512, "learning_rate": 0.00029999401062731623, "loss": 4.7439, "step": 73020 }, { "epoch": 0.10261400140860655, "grad_norm": 0.7121155858039856, "learning_rate": 0.00029999394470230485, "loss": 4.6666, "step": 73030 }, { "epoch": 0.10262805234677012, "grad_norm": 0.7072491645812988, "learning_rate": 0.0002999938784164658, "loss": 4.6901, "step": 73040 }, { "epoch": 0.1026421032849337, "grad_norm": 0.7037854790687561, "learning_rate": 0.00029999381176979903, "loss": 4.7861, "step": 73050 }, { "epoch": 0.10265615422309729, "grad_norm": 0.7043249011039734, "learning_rate": 0.0002999937447623049, "loss": 4.8758, "step": 73060 }, { "epoch": 0.10267020516126087, "grad_norm": 0.7356353998184204, "learning_rate": 0.00029999367739398343, "loss": 4.7525, "step": 73070 }, { "epoch": 0.10268425609942444, "grad_norm": 0.7560482025146484, "learning_rate": 0.00029999360966483486, "loss": 4.642, "step": 73080 }, { "epoch": 0.10269830703758802, "grad_norm": 0.7081610560417175, "learning_rate": 0.00029999354157485937, "loss": 4.6725, "step": 73090 }, { "epoch": 0.1027123579757516, "grad_norm": 0.7189093828201294, "learning_rate": 0.00029999347312405705, "loss": 4.852, "step": 73100 }, { "epoch": 0.10272640891391517, "grad_norm": 0.7496188879013062, "learning_rate": 0.0002999934043124281, "loss": 4.7643, "step": 73110 }, { "epoch": 0.10274045985207875, "grad_norm": 0.8219110369682312, "learning_rate": 0.0002999933351399727, "loss": 4.8383, "step": 73120 }, { "epoch": 0.10275451079024232, "grad_norm": 0.714171826839447, "learning_rate": 0.00029999326560669096, "loss": 4.6523, "step": 73130 }, { "epoch": 0.1027685617284059, "grad_norm": 0.7769915461540222, "learning_rate": 0.00029999319571258316, "loss": 4.7178, "step": 73140 }, { "epoch": 0.10278261266656948, "grad_norm": 0.7325097918510437, "learning_rate": 0.00029999312545764934, "loss": 4.7205, "step": 73150 }, { "epoch": 0.10279666360473305, "grad_norm": 0.7231411933898926, "learning_rate": 0.00029999305484188974, "loss": 4.8554, "step": 73160 }, { "epoch": 0.10281071454289664, "grad_norm": 0.7304064035415649, "learning_rate": 0.00029999298386530456, "loss": 4.7141, "step": 73170 }, { "epoch": 0.10282476548106022, "grad_norm": 0.7381361722946167, "learning_rate": 0.0002999929125278939, "loss": 4.8076, "step": 73180 }, { "epoch": 0.1028388164192238, "grad_norm": 0.7440902590751648, "learning_rate": 0.00029999284082965797, "loss": 4.7352, "step": 73190 }, { "epoch": 0.10285286735738737, "grad_norm": 0.7400135397911072, "learning_rate": 0.0002999927687705969, "loss": 4.8078, "step": 73200 }, { "epoch": 0.10286691829555095, "grad_norm": 0.6989032626152039, "learning_rate": 0.00029999269635071094, "loss": 4.8402, "step": 73210 }, { "epoch": 0.10288096923371452, "grad_norm": 0.7391857504844666, "learning_rate": 0.00029999262357000017, "loss": 4.8794, "step": 73220 }, { "epoch": 0.1028950201718781, "grad_norm": 0.7222636342048645, "learning_rate": 0.00029999255042846486, "loss": 4.6988, "step": 73230 }, { "epoch": 0.10290907111004167, "grad_norm": 0.7194198369979858, "learning_rate": 0.0002999924769261051, "loss": 4.649, "step": 73240 }, { "epoch": 0.10292312204820525, "grad_norm": 0.7942532300949097, "learning_rate": 0.00029999240306292116, "loss": 4.7988, "step": 73250 }, { "epoch": 0.10293717298636883, "grad_norm": 0.7117690443992615, "learning_rate": 0.00029999232883891314, "loss": 4.6962, "step": 73260 }, { "epoch": 0.10295122392453242, "grad_norm": 0.7358763813972473, "learning_rate": 0.00029999225425408123, "loss": 4.7445, "step": 73270 }, { "epoch": 0.102965274862696, "grad_norm": 0.7358459234237671, "learning_rate": 0.00029999217930842565, "loss": 4.7767, "step": 73280 }, { "epoch": 0.10297932580085957, "grad_norm": 0.7460939884185791, "learning_rate": 0.0002999921040019465, "loss": 4.7906, "step": 73290 }, { "epoch": 0.10299337673902315, "grad_norm": 0.7220245599746704, "learning_rate": 0.0002999920283346441, "loss": 4.818, "step": 73300 }, { "epoch": 0.10300742767718672, "grad_norm": 0.7411149740219116, "learning_rate": 0.00029999195230651847, "loss": 4.7419, "step": 73310 }, { "epoch": 0.1030214786153503, "grad_norm": 0.7259693145751953, "learning_rate": 0.0002999918759175699, "loss": 4.7345, "step": 73320 }, { "epoch": 0.10303552955351387, "grad_norm": 0.7284397482872009, "learning_rate": 0.00029999179916779856, "loss": 4.873, "step": 73330 }, { "epoch": 0.10304958049167745, "grad_norm": 0.7188897132873535, "learning_rate": 0.0002999917220572046, "loss": 4.8309, "step": 73340 }, { "epoch": 0.10306363142984103, "grad_norm": 0.7184074521064758, "learning_rate": 0.0002999916445857882, "loss": 4.7298, "step": 73350 }, { "epoch": 0.1030776823680046, "grad_norm": 0.7120581269264221, "learning_rate": 0.00029999156675354963, "loss": 4.8255, "step": 73360 }, { "epoch": 0.10309173330616819, "grad_norm": 0.733837902545929, "learning_rate": 0.000299991488560489, "loss": 4.7718, "step": 73370 }, { "epoch": 0.10310578424433177, "grad_norm": 0.7166063785552979, "learning_rate": 0.00029999141000660646, "loss": 4.801, "step": 73380 }, { "epoch": 0.10311983518249535, "grad_norm": 0.7153290510177612, "learning_rate": 0.0002999913310919023, "loss": 4.7146, "step": 73390 }, { "epoch": 0.10313388612065892, "grad_norm": 0.7479586005210876, "learning_rate": 0.00029999125181637666, "loss": 4.8948, "step": 73400 }, { "epoch": 0.1031479370588225, "grad_norm": 0.7473878860473633, "learning_rate": 0.0002999911721800297, "loss": 4.7084, "step": 73410 }, { "epoch": 0.10316198799698607, "grad_norm": 0.7283495664596558, "learning_rate": 0.0002999910921828617, "loss": 4.7356, "step": 73420 }, { "epoch": 0.10317603893514965, "grad_norm": 0.7387956976890564, "learning_rate": 0.0002999910118248728, "loss": 4.8818, "step": 73430 }, { "epoch": 0.10319008987331323, "grad_norm": 0.7675484418869019, "learning_rate": 0.0002999909311060632, "loss": 4.8588, "step": 73440 }, { "epoch": 0.1032041408114768, "grad_norm": 0.7122921943664551, "learning_rate": 0.00029999085002643306, "loss": 4.7725, "step": 73450 }, { "epoch": 0.10321819174964038, "grad_norm": 0.7225001454353333, "learning_rate": 0.0002999907685859826, "loss": 4.6845, "step": 73460 }, { "epoch": 0.10323224268780395, "grad_norm": 0.7381240725517273, "learning_rate": 0.00029999068678471203, "loss": 4.7242, "step": 73470 }, { "epoch": 0.10324629362596754, "grad_norm": 0.7431597709655762, "learning_rate": 0.00029999060462262153, "loss": 4.8332, "step": 73480 }, { "epoch": 0.10326034456413112, "grad_norm": 0.7094483971595764, "learning_rate": 0.00029999052209971126, "loss": 4.812, "step": 73490 }, { "epoch": 0.1032743955022947, "grad_norm": 0.7248532176017761, "learning_rate": 0.0002999904392159815, "loss": 4.734, "step": 73500 }, { "epoch": 0.10328844644045827, "grad_norm": 0.7661448121070862, "learning_rate": 0.00029999035597143243, "loss": 4.8685, "step": 73510 }, { "epoch": 0.10330249737862185, "grad_norm": 0.7161885499954224, "learning_rate": 0.0002999902723660642, "loss": 4.7242, "step": 73520 }, { "epoch": 0.10331654831678543, "grad_norm": 0.8056687116622925, "learning_rate": 0.00029999018839987706, "loss": 4.7779, "step": 73530 }, { "epoch": 0.103330599254949, "grad_norm": 0.7297375798225403, "learning_rate": 0.0002999901040728712, "loss": 4.699, "step": 73540 }, { "epoch": 0.10334465019311258, "grad_norm": 0.7313802242279053, "learning_rate": 0.00029999001938504677, "loss": 4.7828, "step": 73550 }, { "epoch": 0.10335870113127615, "grad_norm": 0.7233887314796448, "learning_rate": 0.000299989934336404, "loss": 4.7871, "step": 73560 }, { "epoch": 0.10337275206943973, "grad_norm": 0.7726687788963318, "learning_rate": 0.0002999898489269432, "loss": 4.7328, "step": 73570 }, { "epoch": 0.10338680300760332, "grad_norm": 0.7139991521835327, "learning_rate": 0.00029998976315666445, "loss": 4.6724, "step": 73580 }, { "epoch": 0.1034008539457669, "grad_norm": 0.7393600940704346, "learning_rate": 0.00029998967702556795, "loss": 4.7427, "step": 73590 }, { "epoch": 0.10341490488393047, "grad_norm": 0.7399834394454956, "learning_rate": 0.00029998959053365406, "loss": 4.7802, "step": 73600 }, { "epoch": 0.10342895582209405, "grad_norm": 0.7233583331108093, "learning_rate": 0.0002999895036809228, "loss": 4.8864, "step": 73610 }, { "epoch": 0.10344300676025762, "grad_norm": 0.7222065925598145, "learning_rate": 0.0002999894164673745, "loss": 4.771, "step": 73620 }, { "epoch": 0.1034570576984212, "grad_norm": 0.7875381112098694, "learning_rate": 0.0002999893288930093, "loss": 4.6923, "step": 73630 }, { "epoch": 0.10347110863658478, "grad_norm": 0.7450695037841797, "learning_rate": 0.00029998924095782744, "loss": 4.7587, "step": 73640 }, { "epoch": 0.10348515957474835, "grad_norm": 0.7188162803649902, "learning_rate": 0.00029998915266182916, "loss": 4.8141, "step": 73650 }, { "epoch": 0.10349921051291193, "grad_norm": 0.7098068594932556, "learning_rate": 0.00029998906400501463, "loss": 4.7548, "step": 73660 }, { "epoch": 0.1035132614510755, "grad_norm": 0.730060875415802, "learning_rate": 0.00029998897498738407, "loss": 4.788, "step": 73670 }, { "epoch": 0.1035273123892391, "grad_norm": 0.7018395662307739, "learning_rate": 0.00029998888560893775, "loss": 4.8347, "step": 73680 }, { "epoch": 0.10354136332740267, "grad_norm": 0.7120745778083801, "learning_rate": 0.0002999887958696758, "loss": 4.8095, "step": 73690 }, { "epoch": 0.10355541426556625, "grad_norm": 0.7242351174354553, "learning_rate": 0.00029998870576959847, "loss": 4.6808, "step": 73700 }, { "epoch": 0.10356946520372982, "grad_norm": 0.7491177320480347, "learning_rate": 0.00029998861530870595, "loss": 4.7441, "step": 73710 }, { "epoch": 0.1035835161418934, "grad_norm": 0.7404350638389587, "learning_rate": 0.00029998852448699854, "loss": 4.7472, "step": 73720 }, { "epoch": 0.10359756708005698, "grad_norm": 0.7084073424339294, "learning_rate": 0.0002999884333044764, "loss": 4.7107, "step": 73730 }, { "epoch": 0.10361161801822055, "grad_norm": 0.7354574799537659, "learning_rate": 0.00029998834176113973, "loss": 4.7334, "step": 73740 }, { "epoch": 0.10362566895638413, "grad_norm": 0.7333629131317139, "learning_rate": 0.0002999882498569888, "loss": 4.7645, "step": 73750 }, { "epoch": 0.1036397198945477, "grad_norm": 0.7341249585151672, "learning_rate": 0.0002999881575920238, "loss": 4.8465, "step": 73760 }, { "epoch": 0.10365377083271128, "grad_norm": 0.7545302510261536, "learning_rate": 0.00029998806496624495, "loss": 4.7715, "step": 73770 }, { "epoch": 0.10366782177087486, "grad_norm": 0.7346596121788025, "learning_rate": 0.00029998797197965246, "loss": 4.7478, "step": 73780 }, { "epoch": 0.10368187270903845, "grad_norm": 0.7493293285369873, "learning_rate": 0.0002999878786322466, "loss": 4.6977, "step": 73790 }, { "epoch": 0.10369592364720202, "grad_norm": 0.713477611541748, "learning_rate": 0.00029998778492402756, "loss": 4.7183, "step": 73800 }, { "epoch": 0.1037099745853656, "grad_norm": 0.7505142688751221, "learning_rate": 0.0002999876908549956, "loss": 4.7706, "step": 73810 }, { "epoch": 0.10372402552352918, "grad_norm": 0.7234914898872375, "learning_rate": 0.0002999875964251509, "loss": 4.7092, "step": 73820 }, { "epoch": 0.10373807646169275, "grad_norm": 0.7134677171707153, "learning_rate": 0.0002999875016344937, "loss": 4.8585, "step": 73830 }, { "epoch": 0.10375212739985633, "grad_norm": 0.7248352766036987, "learning_rate": 0.00029998740648302425, "loss": 4.7321, "step": 73840 }, { "epoch": 0.1037661783380199, "grad_norm": 0.729803204536438, "learning_rate": 0.0002999873109707428, "loss": 4.8516, "step": 73850 }, { "epoch": 0.10378022927618348, "grad_norm": 0.7194018363952637, "learning_rate": 0.0002999872150976495, "loss": 4.7418, "step": 73860 }, { "epoch": 0.10379428021434706, "grad_norm": 0.7106111645698547, "learning_rate": 0.0002999871188637446, "loss": 4.7227, "step": 73870 }, { "epoch": 0.10380833115251063, "grad_norm": 0.724184513092041, "learning_rate": 0.0002999870222690284, "loss": 4.7338, "step": 73880 }, { "epoch": 0.10382238209067422, "grad_norm": 0.7321856617927551, "learning_rate": 0.0002999869253135011, "loss": 4.7236, "step": 73890 }, { "epoch": 0.1038364330288378, "grad_norm": 0.7175441384315491, "learning_rate": 0.00029998682799716294, "loss": 4.8629, "step": 73900 }, { "epoch": 0.10385048396700138, "grad_norm": 0.7437195181846619, "learning_rate": 0.00029998673032001407, "loss": 4.7403, "step": 73910 }, { "epoch": 0.10386453490516495, "grad_norm": 0.7287278771400452, "learning_rate": 0.0002999866322820548, "loss": 4.7742, "step": 73920 }, { "epoch": 0.10387858584332853, "grad_norm": 0.7289449572563171, "learning_rate": 0.00029998653388328537, "loss": 4.7351, "step": 73930 }, { "epoch": 0.1038926367814921, "grad_norm": 0.7480982542037964, "learning_rate": 0.0002999864351237061, "loss": 4.7372, "step": 73940 }, { "epoch": 0.10390668771965568, "grad_norm": 0.7479860186576843, "learning_rate": 0.00029998633600331705, "loss": 4.7979, "step": 73950 }, { "epoch": 0.10392073865781926, "grad_norm": 0.756632387638092, "learning_rate": 0.0002999862365221185, "loss": 4.7318, "step": 73960 }, { "epoch": 0.10393478959598283, "grad_norm": 0.722648561000824, "learning_rate": 0.0002999861366801108, "loss": 4.7916, "step": 73970 }, { "epoch": 0.10394884053414641, "grad_norm": 0.7514876127243042, "learning_rate": 0.0002999860364772941, "loss": 4.8139, "step": 73980 }, { "epoch": 0.10396289147231, "grad_norm": 0.7162560224533081, "learning_rate": 0.0002999859359136686, "loss": 4.7387, "step": 73990 }, { "epoch": 0.10397694241047357, "grad_norm": 0.7170782089233398, "learning_rate": 0.0002999858349892347, "loss": 4.8234, "step": 74000 }, { "epoch": 0.10399099334863715, "grad_norm": 0.7494903206825256, "learning_rate": 0.0002999857337039925, "loss": 4.7603, "step": 74010 }, { "epoch": 0.10400504428680073, "grad_norm": 0.7144988775253296, "learning_rate": 0.00029998563205794223, "loss": 4.7661, "step": 74020 }, { "epoch": 0.1040190952249643, "grad_norm": 0.7347742319107056, "learning_rate": 0.0002999855300510843, "loss": 4.8141, "step": 74030 }, { "epoch": 0.10403314616312788, "grad_norm": 0.7606138586997986, "learning_rate": 0.0002999854276834188, "loss": 4.7885, "step": 74040 }, { "epoch": 0.10404719710129146, "grad_norm": 0.7385061979293823, "learning_rate": 0.00029998532495494604, "loss": 4.8078, "step": 74050 }, { "epoch": 0.10406124803945503, "grad_norm": 0.7367041110992432, "learning_rate": 0.00029998522186566626, "loss": 4.802, "step": 74060 }, { "epoch": 0.10407529897761861, "grad_norm": 0.7521949410438538, "learning_rate": 0.0002999851184155797, "loss": 4.7509, "step": 74070 }, { "epoch": 0.10408934991578218, "grad_norm": 0.746960461139679, "learning_rate": 0.00029998501460468656, "loss": 4.7074, "step": 74080 }, { "epoch": 0.10410340085394576, "grad_norm": 0.7234981060028076, "learning_rate": 0.0002999849104329872, "loss": 4.8579, "step": 74090 }, { "epoch": 0.10411745179210935, "grad_norm": 0.6993427872657776, "learning_rate": 0.00029998480590048176, "loss": 4.7195, "step": 74100 }, { "epoch": 0.10413150273027293, "grad_norm": 0.7664044499397278, "learning_rate": 0.0002999847010071706, "loss": 4.8389, "step": 74110 }, { "epoch": 0.1041455536684365, "grad_norm": 0.7423094511032104, "learning_rate": 0.0002999845957530538, "loss": 4.7013, "step": 74120 }, { "epoch": 0.10415960460660008, "grad_norm": 0.7351399660110474, "learning_rate": 0.00029998449013813184, "loss": 4.8115, "step": 74130 }, { "epoch": 0.10417365554476365, "grad_norm": 0.7567272186279297, "learning_rate": 0.00029998438416240484, "loss": 4.7673, "step": 74140 }, { "epoch": 0.10418770648292723, "grad_norm": 0.7193549275398254, "learning_rate": 0.00029998427782587303, "loss": 4.7971, "step": 74150 }, { "epoch": 0.10420175742109081, "grad_norm": 0.7518367767333984, "learning_rate": 0.0002999841711285368, "loss": 4.8342, "step": 74160 }, { "epoch": 0.10421580835925438, "grad_norm": 0.7178200483322144, "learning_rate": 0.0002999840640703963, "loss": 4.7677, "step": 74170 }, { "epoch": 0.10422985929741796, "grad_norm": 0.8699411749839783, "learning_rate": 0.0002999839566514517, "loss": 4.6964, "step": 74180 }, { "epoch": 0.10424391023558154, "grad_norm": 0.7391669750213623, "learning_rate": 0.0002999838488717035, "loss": 4.7222, "step": 74190 }, { "epoch": 0.10425796117374513, "grad_norm": 0.7199887633323669, "learning_rate": 0.0002999837407311517, "loss": 4.7128, "step": 74200 }, { "epoch": 0.1042720121119087, "grad_norm": 0.7740340232849121, "learning_rate": 0.00029998363222979676, "loss": 4.6918, "step": 74210 }, { "epoch": 0.10428606305007228, "grad_norm": 0.7272385954856873, "learning_rate": 0.0002999835233676388, "loss": 4.6641, "step": 74220 }, { "epoch": 0.10430011398823585, "grad_norm": 0.7720179557800293, "learning_rate": 0.0002999834141446782, "loss": 4.8111, "step": 74230 }, { "epoch": 0.10431416492639943, "grad_norm": 0.7326408624649048, "learning_rate": 0.0002999833045609152, "loss": 4.7686, "step": 74240 }, { "epoch": 0.104328215864563, "grad_norm": 0.708678126335144, "learning_rate": 0.00029998319461635, "loss": 4.7862, "step": 74250 }, { "epoch": 0.10434226680272658, "grad_norm": 0.7066006660461426, "learning_rate": 0.00029998308431098287, "loss": 4.7004, "step": 74260 }, { "epoch": 0.10435631774089016, "grad_norm": 0.7312318682670593, "learning_rate": 0.00029998297364481415, "loss": 4.8703, "step": 74270 }, { "epoch": 0.10437036867905373, "grad_norm": 0.7320531606674194, "learning_rate": 0.00029998286261784403, "loss": 4.694, "step": 74280 }, { "epoch": 0.10438441961721731, "grad_norm": 0.743951141834259, "learning_rate": 0.0002999827512300728, "loss": 4.8147, "step": 74290 }, { "epoch": 0.1043984705553809, "grad_norm": 0.7393697500228882, "learning_rate": 0.00029998263948150075, "loss": 4.8336, "step": 74300 }, { "epoch": 0.10441252149354448, "grad_norm": 0.7255837917327881, "learning_rate": 0.00029998252737212813, "loss": 4.7326, "step": 74310 }, { "epoch": 0.10442657243170805, "grad_norm": 0.7354263067245483, "learning_rate": 0.0002999824149019552, "loss": 4.7204, "step": 74320 }, { "epoch": 0.10444062336987163, "grad_norm": 0.7385523915290833, "learning_rate": 0.0002999823020709823, "loss": 4.7987, "step": 74330 }, { "epoch": 0.1044546743080352, "grad_norm": 0.7048619985580444, "learning_rate": 0.0002999821888792096, "loss": 4.7279, "step": 74340 }, { "epoch": 0.10446872524619878, "grad_norm": 0.7030783295631409, "learning_rate": 0.00029998207532663736, "loss": 4.8038, "step": 74350 }, { "epoch": 0.10448277618436236, "grad_norm": 0.7411651611328125, "learning_rate": 0.000299981961413266, "loss": 4.9361, "step": 74360 }, { "epoch": 0.10449682712252593, "grad_norm": 0.7514051198959351, "learning_rate": 0.00029998184713909563, "loss": 4.7624, "step": 74370 }, { "epoch": 0.10451087806068951, "grad_norm": 0.7228174209594727, "learning_rate": 0.0002999817325041267, "loss": 4.8236, "step": 74380 }, { "epoch": 0.10452492899885309, "grad_norm": 0.7124325633049011, "learning_rate": 0.0002999816175083593, "loss": 4.8186, "step": 74390 }, { "epoch": 0.10453897993701666, "grad_norm": 0.7320361137390137, "learning_rate": 0.00029998150215179383, "loss": 4.8853, "step": 74400 }, { "epoch": 0.10455303087518025, "grad_norm": 0.7228125333786011, "learning_rate": 0.0002999813864344305, "loss": 4.6899, "step": 74410 }, { "epoch": 0.10456708181334383, "grad_norm": 0.7609881162643433, "learning_rate": 0.00029998127035626965, "loss": 4.6897, "step": 74420 }, { "epoch": 0.1045811327515074, "grad_norm": 0.7024797797203064, "learning_rate": 0.0002999811539173115, "loss": 4.756, "step": 74430 }, { "epoch": 0.10459518368967098, "grad_norm": 0.7212551832199097, "learning_rate": 0.00029998103711755637, "loss": 4.8677, "step": 74440 }, { "epoch": 0.10460923462783456, "grad_norm": 0.7069899439811707, "learning_rate": 0.00029998091995700455, "loss": 4.8609, "step": 74450 }, { "epoch": 0.10462328556599813, "grad_norm": 0.7125039100646973, "learning_rate": 0.00029998080243565626, "loss": 4.6833, "step": 74460 }, { "epoch": 0.10463733650416171, "grad_norm": 0.7615266442298889, "learning_rate": 0.00029998068455351184, "loss": 4.6864, "step": 74470 }, { "epoch": 0.10465138744232529, "grad_norm": 0.7260366082191467, "learning_rate": 0.00029998056631057155, "loss": 4.7931, "step": 74480 }, { "epoch": 0.10466543838048886, "grad_norm": 0.7347187995910645, "learning_rate": 0.00029998044770683567, "loss": 4.797, "step": 74490 }, { "epoch": 0.10467948931865244, "grad_norm": 0.7333105206489563, "learning_rate": 0.0002999803287423045, "loss": 4.7057, "step": 74500 }, { "epoch": 0.10469354025681603, "grad_norm": 0.7438333630561829, "learning_rate": 0.0002999802094169783, "loss": 4.8193, "step": 74510 }, { "epoch": 0.1047075911949796, "grad_norm": 0.836247980594635, "learning_rate": 0.00029998008973085744, "loss": 4.6534, "step": 74520 }, { "epoch": 0.10472164213314318, "grad_norm": 0.7281263470649719, "learning_rate": 0.00029997996968394216, "loss": 4.7365, "step": 74530 }, { "epoch": 0.10473569307130676, "grad_norm": 0.7797707319259644, "learning_rate": 0.0002999798492762327, "loss": 4.6875, "step": 74540 }, { "epoch": 0.10474974400947033, "grad_norm": 0.7314170598983765, "learning_rate": 0.00029997972850772937, "loss": 4.7991, "step": 74550 }, { "epoch": 0.10476379494763391, "grad_norm": 0.8902124762535095, "learning_rate": 0.00029997960737843246, "loss": 4.7621, "step": 74560 }, { "epoch": 0.10477784588579749, "grad_norm": 0.7190029621124268, "learning_rate": 0.00029997948588834233, "loss": 4.8784, "step": 74570 }, { "epoch": 0.10479189682396106, "grad_norm": 0.7397128343582153, "learning_rate": 0.00029997936403745916, "loss": 4.8362, "step": 74580 }, { "epoch": 0.10480594776212464, "grad_norm": 0.7313927412033081, "learning_rate": 0.0002999792418257833, "loss": 4.788, "step": 74590 }, { "epoch": 0.10481999870028821, "grad_norm": 0.7316232919692993, "learning_rate": 0.00029997911925331514, "loss": 4.6484, "step": 74600 }, { "epoch": 0.1048340496384518, "grad_norm": 0.7240854501724243, "learning_rate": 0.00029997899632005484, "loss": 4.6801, "step": 74610 }, { "epoch": 0.10484810057661538, "grad_norm": 0.7367461323738098, "learning_rate": 0.0002999788730260027, "loss": 4.7329, "step": 74620 }, { "epoch": 0.10486215151477896, "grad_norm": 0.7352172136306763, "learning_rate": 0.00029997874937115905, "loss": 4.6665, "step": 74630 }, { "epoch": 0.10487620245294253, "grad_norm": 0.7014811635017395, "learning_rate": 0.0002999786253555242, "loss": 4.7753, "step": 74640 }, { "epoch": 0.10489025339110611, "grad_norm": 0.7095927596092224, "learning_rate": 0.0002999785009790985, "loss": 4.7157, "step": 74650 }, { "epoch": 0.10490430432926968, "grad_norm": 0.7442272305488586, "learning_rate": 0.00029997837624188206, "loss": 4.7303, "step": 74660 }, { "epoch": 0.10491835526743326, "grad_norm": 0.7289188504219055, "learning_rate": 0.00029997825114387545, "loss": 4.8395, "step": 74670 }, { "epoch": 0.10493240620559684, "grad_norm": 0.7389072179794312, "learning_rate": 0.0002999781256850788, "loss": 4.7305, "step": 74680 }, { "epoch": 0.10494645714376041, "grad_norm": 0.7263525128364563, "learning_rate": 0.00029997799986549237, "loss": 4.8211, "step": 74690 }, { "epoch": 0.10496050808192399, "grad_norm": 0.7205453515052795, "learning_rate": 0.00029997787368511654, "loss": 4.8692, "step": 74700 }, { "epoch": 0.10497455902008757, "grad_norm": 0.7371070384979248, "learning_rate": 0.0002999777471439517, "loss": 4.6703, "step": 74710 }, { "epoch": 0.10498860995825116, "grad_norm": 0.7681359648704529, "learning_rate": 0.000299977620241998, "loss": 4.6302, "step": 74720 }, { "epoch": 0.10500266089641473, "grad_norm": 0.7186245322227478, "learning_rate": 0.00029997749297925577, "loss": 4.8669, "step": 74730 }, { "epoch": 0.10501671183457831, "grad_norm": 0.7410948872566223, "learning_rate": 0.00029997736535572543, "loss": 4.8782, "step": 74740 }, { "epoch": 0.10503076277274188, "grad_norm": 0.7192717790603638, "learning_rate": 0.00029997723737140713, "loss": 4.6931, "step": 74750 }, { "epoch": 0.10504481371090546, "grad_norm": 0.6977980732917786, "learning_rate": 0.0002999771090263013, "loss": 4.8123, "step": 74760 }, { "epoch": 0.10505886464906904, "grad_norm": 0.7145591378211975, "learning_rate": 0.00029997698032040827, "loss": 4.7737, "step": 74770 }, { "epoch": 0.10507291558723261, "grad_norm": 0.7509664297103882, "learning_rate": 0.0002999768512537282, "loss": 4.6107, "step": 74780 }, { "epoch": 0.10508696652539619, "grad_norm": 0.7300460338592529, "learning_rate": 0.0002999767218262615, "loss": 4.6742, "step": 74790 }, { "epoch": 0.10510101746355976, "grad_norm": 0.711122453212738, "learning_rate": 0.0002999765920380085, "loss": 4.8037, "step": 74800 }, { "epoch": 0.10511506840172334, "grad_norm": 0.6955252289772034, "learning_rate": 0.0002999764618889695, "loss": 4.9123, "step": 74810 }, { "epoch": 0.10512911933988693, "grad_norm": 0.7196218967437744, "learning_rate": 0.00029997633137914475, "loss": 4.7226, "step": 74820 }, { "epoch": 0.10514317027805051, "grad_norm": 0.7192176580429077, "learning_rate": 0.0002999762005085346, "loss": 4.5709, "step": 74830 }, { "epoch": 0.10515722121621408, "grad_norm": 0.7230987548828125, "learning_rate": 0.0002999760692771394, "loss": 4.7651, "step": 74840 }, { "epoch": 0.10517127215437766, "grad_norm": 0.705202579498291, "learning_rate": 0.0002999759376849594, "loss": 4.6931, "step": 74850 }, { "epoch": 0.10518532309254124, "grad_norm": 0.7315492630004883, "learning_rate": 0.000299975805731995, "loss": 4.7949, "step": 74860 }, { "epoch": 0.10519937403070481, "grad_norm": 0.7159463763237, "learning_rate": 0.0002999756734182464, "loss": 4.72, "step": 74870 }, { "epoch": 0.10521342496886839, "grad_norm": 0.7130573987960815, "learning_rate": 0.00029997554074371414, "loss": 4.7058, "step": 74880 }, { "epoch": 0.10522747590703196, "grad_norm": 0.714362621307373, "learning_rate": 0.00029997540770839825, "loss": 4.7595, "step": 74890 }, { "epoch": 0.10524152684519554, "grad_norm": 0.7358441352844238, "learning_rate": 0.00029997527431229923, "loss": 4.69, "step": 74900 }, { "epoch": 0.10525557778335912, "grad_norm": 0.7290864586830139, "learning_rate": 0.00029997514055541736, "loss": 4.7994, "step": 74910 }, { "epoch": 0.1052696287215227, "grad_norm": 0.7372201085090637, "learning_rate": 0.000299975006437753, "loss": 4.8169, "step": 74920 }, { "epoch": 0.10528367965968628, "grad_norm": 0.7162683010101318, "learning_rate": 0.0002999748719593064, "loss": 4.7578, "step": 74930 }, { "epoch": 0.10529773059784986, "grad_norm": 0.7158368825912476, "learning_rate": 0.0002999747371200779, "loss": 4.8043, "step": 74940 }, { "epoch": 0.10531178153601344, "grad_norm": 0.7241293787956238, "learning_rate": 0.00029997460192006793, "loss": 4.7606, "step": 74950 }, { "epoch": 0.10532583247417701, "grad_norm": 0.7109899520874023, "learning_rate": 0.00029997446635927666, "loss": 4.6894, "step": 74960 }, { "epoch": 0.10533988341234059, "grad_norm": 0.7288115620613098, "learning_rate": 0.0002999743304377045, "loss": 4.7761, "step": 74970 }, { "epoch": 0.10535393435050416, "grad_norm": 0.7241887450218201, "learning_rate": 0.00029997419415535173, "loss": 4.726, "step": 74980 }, { "epoch": 0.10536798528866774, "grad_norm": 0.7299656271934509, "learning_rate": 0.0002999740575122187, "loss": 4.766, "step": 74990 }, { "epoch": 0.10538203622683132, "grad_norm": 0.9255989789962769, "learning_rate": 0.0002999739205083058, "loss": 4.792, "step": 75000 }, { "epoch": 0.10539608716499489, "grad_norm": 0.7332300543785095, "learning_rate": 0.0002999737831436133, "loss": 4.7162, "step": 75010 }, { "epoch": 0.10541013810315847, "grad_norm": 0.7358729839324951, "learning_rate": 0.00029997364541814155, "loss": 4.8229, "step": 75020 }, { "epoch": 0.10542418904132206, "grad_norm": 0.7899588346481323, "learning_rate": 0.00029997350733189084, "loss": 4.6358, "step": 75030 }, { "epoch": 0.10543823997948563, "grad_norm": 0.7667545676231384, "learning_rate": 0.0002999733688848615, "loss": 4.8529, "step": 75040 }, { "epoch": 0.10545229091764921, "grad_norm": 0.7302919626235962, "learning_rate": 0.00029997323007705396, "loss": 4.6845, "step": 75050 }, { "epoch": 0.10546634185581279, "grad_norm": 0.7035412788391113, "learning_rate": 0.0002999730909084684, "loss": 4.7805, "step": 75060 }, { "epoch": 0.10548039279397636, "grad_norm": 0.7011492848396301, "learning_rate": 0.00029997295137910535, "loss": 4.6031, "step": 75070 }, { "epoch": 0.10549444373213994, "grad_norm": 0.7378577589988708, "learning_rate": 0.00029997281148896497, "loss": 4.8141, "step": 75080 }, { "epoch": 0.10550849467030352, "grad_norm": 0.7121955156326294, "learning_rate": 0.0002999726712380477, "loss": 4.7603, "step": 75090 }, { "epoch": 0.10552254560846709, "grad_norm": 0.7434091567993164, "learning_rate": 0.00029997253062635386, "loss": 4.9379, "step": 75100 }, { "epoch": 0.10553659654663067, "grad_norm": 0.7290818095207214, "learning_rate": 0.0002999723896538837, "loss": 4.7528, "step": 75110 }, { "epoch": 0.10555064748479424, "grad_norm": 0.7576467394828796, "learning_rate": 0.0002999722483206377, "loss": 4.8125, "step": 75120 }, { "epoch": 0.10556469842295783, "grad_norm": 0.7280282974243164, "learning_rate": 0.0002999721066266161, "loss": 4.7116, "step": 75130 }, { "epoch": 0.10557874936112141, "grad_norm": 0.7176198959350586, "learning_rate": 0.0002999719645718193, "loss": 4.8024, "step": 75140 }, { "epoch": 0.10559280029928499, "grad_norm": 0.7369096279144287, "learning_rate": 0.00029997182215624755, "loss": 4.7124, "step": 75150 }, { "epoch": 0.10560685123744856, "grad_norm": 0.7322725653648376, "learning_rate": 0.00029997167937990125, "loss": 4.7161, "step": 75160 }, { "epoch": 0.10562090217561214, "grad_norm": 0.7204955816268921, "learning_rate": 0.0002999715362427808, "loss": 4.7834, "step": 75170 }, { "epoch": 0.10563495311377571, "grad_norm": 0.7370813488960266, "learning_rate": 0.0002999713927448865, "loss": 4.7807, "step": 75180 }, { "epoch": 0.10564900405193929, "grad_norm": 0.737962543964386, "learning_rate": 0.00029997124888621866, "loss": 4.7968, "step": 75190 }, { "epoch": 0.10566305499010287, "grad_norm": 0.7260681986808777, "learning_rate": 0.0002999711046667777, "loss": 4.6648, "step": 75200 }, { "epoch": 0.10567710592826644, "grad_norm": 0.7455945014953613, "learning_rate": 0.00029997096008656384, "loss": 4.7407, "step": 75210 }, { "epoch": 0.10569115686643002, "grad_norm": 0.710437536239624, "learning_rate": 0.0002999708151455776, "loss": 4.7474, "step": 75220 }, { "epoch": 0.10570520780459361, "grad_norm": 0.7501201033592224, "learning_rate": 0.0002999706698438192, "loss": 4.6896, "step": 75230 }, { "epoch": 0.10571925874275719, "grad_norm": 0.7423140406608582, "learning_rate": 0.00029997052418128897, "loss": 4.8219, "step": 75240 }, { "epoch": 0.10573330968092076, "grad_norm": 0.8542389869689941, "learning_rate": 0.0002999703781579874, "loss": 4.7801, "step": 75250 }, { "epoch": 0.10574736061908434, "grad_norm": 0.736545979976654, "learning_rate": 0.0002999702317739147, "loss": 4.8571, "step": 75260 }, { "epoch": 0.10576141155724791, "grad_norm": 0.7311723828315735, "learning_rate": 0.00029997008502907134, "loss": 4.8457, "step": 75270 }, { "epoch": 0.10577546249541149, "grad_norm": 0.7157546877861023, "learning_rate": 0.00029996993792345757, "loss": 4.827, "step": 75280 }, { "epoch": 0.10578951343357507, "grad_norm": 0.7155654430389404, "learning_rate": 0.0002999697904570738, "loss": 4.9051, "step": 75290 }, { "epoch": 0.10580356437173864, "grad_norm": 0.7166204452514648, "learning_rate": 0.0002999696426299204, "loss": 4.7967, "step": 75300 }, { "epoch": 0.10581761530990222, "grad_norm": 0.7281050086021423, "learning_rate": 0.0002999694944419976, "loss": 4.7054, "step": 75310 }, { "epoch": 0.1058316662480658, "grad_norm": 0.7242557406425476, "learning_rate": 0.00029996934589330594, "loss": 4.7892, "step": 75320 }, { "epoch": 0.10584571718622938, "grad_norm": 0.7574252486228943, "learning_rate": 0.0002999691969838457, "loss": 4.7271, "step": 75330 }, { "epoch": 0.10585976812439296, "grad_norm": 0.7095407843589783, "learning_rate": 0.0002999690477136172, "loss": 4.7575, "step": 75340 }, { "epoch": 0.10587381906255654, "grad_norm": 0.7685112953186035, "learning_rate": 0.00029996889808262087, "loss": 4.8053, "step": 75350 }, { "epoch": 0.10588787000072011, "grad_norm": 0.7352884411811829, "learning_rate": 0.000299968748090857, "loss": 4.7534, "step": 75360 }, { "epoch": 0.10590192093888369, "grad_norm": 0.7439702153205872, "learning_rate": 0.000299968597738326, "loss": 4.7675, "step": 75370 }, { "epoch": 0.10591597187704727, "grad_norm": 0.7124572396278381, "learning_rate": 0.00029996844702502817, "loss": 4.8264, "step": 75380 }, { "epoch": 0.10593002281521084, "grad_norm": 0.7124819159507751, "learning_rate": 0.00029996829595096394, "loss": 4.764, "step": 75390 }, { "epoch": 0.10594407375337442, "grad_norm": 0.7227981686592102, "learning_rate": 0.00029996814451613366, "loss": 4.672, "step": 75400 }, { "epoch": 0.105958124691538, "grad_norm": 0.7123106718063354, "learning_rate": 0.00029996799272053766, "loss": 4.8143, "step": 75410 }, { "epoch": 0.10597217562970157, "grad_norm": 0.7216246724128723, "learning_rate": 0.00029996784056417636, "loss": 4.8079, "step": 75420 }, { "epoch": 0.10598622656786515, "grad_norm": 0.7304632067680359, "learning_rate": 0.00029996768804705005, "loss": 4.7621, "step": 75430 }, { "epoch": 0.10600027750602874, "grad_norm": 0.7460058927536011, "learning_rate": 0.00029996753516915915, "loss": 4.7263, "step": 75440 }, { "epoch": 0.10601432844419231, "grad_norm": 0.7226387858390808, "learning_rate": 0.00029996738193050404, "loss": 4.8106, "step": 75450 }, { "epoch": 0.10602837938235589, "grad_norm": 0.7301837801933289, "learning_rate": 0.00029996722833108505, "loss": 4.8293, "step": 75460 }, { "epoch": 0.10604243032051947, "grad_norm": 0.7361141443252563, "learning_rate": 0.00029996707437090255, "loss": 4.8255, "step": 75470 }, { "epoch": 0.10605648125868304, "grad_norm": 0.726216733455658, "learning_rate": 0.000299966920049957, "loss": 4.7971, "step": 75480 }, { "epoch": 0.10607053219684662, "grad_norm": 0.7054420113563538, "learning_rate": 0.0002999667653682486, "loss": 4.8014, "step": 75490 }, { "epoch": 0.1060845831350102, "grad_norm": 0.7391892075538635, "learning_rate": 0.00029996661032577785, "loss": 4.8289, "step": 75500 }, { "epoch": 0.10609863407317377, "grad_norm": 0.7168707251548767, "learning_rate": 0.00029996645492254514, "loss": 4.6734, "step": 75510 }, { "epoch": 0.10611268501133735, "grad_norm": 0.7321151494979858, "learning_rate": 0.0002999662991585507, "loss": 4.7675, "step": 75520 }, { "epoch": 0.10612673594950092, "grad_norm": 0.7170401215553284, "learning_rate": 0.00029996614303379513, "loss": 4.6391, "step": 75530 }, { "epoch": 0.10614078688766451, "grad_norm": 0.6838593482971191, "learning_rate": 0.00029996598654827864, "loss": 4.7761, "step": 75540 }, { "epoch": 0.10615483782582809, "grad_norm": 0.7038040161132812, "learning_rate": 0.00029996582970200157, "loss": 4.8264, "step": 75550 }, { "epoch": 0.10616888876399166, "grad_norm": 0.7399590015411377, "learning_rate": 0.00029996567249496447, "loss": 4.7957, "step": 75560 }, { "epoch": 0.10618293970215524, "grad_norm": 0.7103729248046875, "learning_rate": 0.0002999655149271675, "loss": 4.8062, "step": 75570 }, { "epoch": 0.10619699064031882, "grad_norm": 0.7498480081558228, "learning_rate": 0.0002999653569986113, "loss": 4.7943, "step": 75580 }, { "epoch": 0.10621104157848239, "grad_norm": 0.730226993560791, "learning_rate": 0.000299965198709296, "loss": 4.7162, "step": 75590 }, { "epoch": 0.10622509251664597, "grad_norm": 0.7429940700531006, "learning_rate": 0.0002999650400592221, "loss": 4.7841, "step": 75600 }, { "epoch": 0.10623914345480955, "grad_norm": 0.7538554668426514, "learning_rate": 0.00029996488104839, "loss": 4.7471, "step": 75610 }, { "epoch": 0.10625319439297312, "grad_norm": 0.7402827739715576, "learning_rate": 0.0002999647216768001, "loss": 4.6182, "step": 75620 }, { "epoch": 0.1062672453311367, "grad_norm": 0.7203502058982849, "learning_rate": 0.0002999645619444526, "loss": 4.7311, "step": 75630 }, { "epoch": 0.10628129626930029, "grad_norm": 0.729570209980011, "learning_rate": 0.00029996440185134815, "loss": 4.7514, "step": 75640 }, { "epoch": 0.10629534720746386, "grad_norm": 0.8635588884353638, "learning_rate": 0.00029996424139748696, "loss": 4.7753, "step": 75650 }, { "epoch": 0.10630939814562744, "grad_norm": 0.7464445233345032, "learning_rate": 0.00029996408058286943, "loss": 4.6894, "step": 75660 }, { "epoch": 0.10632344908379102, "grad_norm": 0.7225930094718933, "learning_rate": 0.0002999639194074961, "loss": 4.7286, "step": 75670 }, { "epoch": 0.10633750002195459, "grad_norm": 0.7263953685760498, "learning_rate": 0.0002999637578713671, "loss": 4.7748, "step": 75680 }, { "epoch": 0.10635155096011817, "grad_norm": 0.7385531663894653, "learning_rate": 0.000299963595974483, "loss": 4.7724, "step": 75690 }, { "epoch": 0.10636560189828174, "grad_norm": 0.6999334096908569, "learning_rate": 0.0002999634337168441, "loss": 4.8438, "step": 75700 }, { "epoch": 0.10637965283644532, "grad_norm": 0.7192821502685547, "learning_rate": 0.0002999632710984509, "loss": 4.7399, "step": 75710 }, { "epoch": 0.1063937037746089, "grad_norm": 0.7049062252044678, "learning_rate": 0.0002999631081193037, "loss": 4.7818, "step": 75720 }, { "epoch": 0.10640775471277247, "grad_norm": 0.7248371839523315, "learning_rate": 0.00029996294477940296, "loss": 4.624, "step": 75730 }, { "epoch": 0.10642180565093605, "grad_norm": 0.6994637250900269, "learning_rate": 0.000299962781078749, "loss": 4.7474, "step": 75740 }, { "epoch": 0.10643585658909964, "grad_norm": 0.744132936000824, "learning_rate": 0.0002999626170173422, "loss": 4.7988, "step": 75750 }, { "epoch": 0.10644990752726322, "grad_norm": 0.7178496718406677, "learning_rate": 0.00029996245259518304, "loss": 4.7616, "step": 75760 }, { "epoch": 0.10646395846542679, "grad_norm": 0.7215403318405151, "learning_rate": 0.0002999622878122719, "loss": 4.7007, "step": 75770 }, { "epoch": 0.10647800940359037, "grad_norm": 0.7298187613487244, "learning_rate": 0.00029996212266860913, "loss": 4.7248, "step": 75780 }, { "epoch": 0.10649206034175394, "grad_norm": 0.7294317483901978, "learning_rate": 0.00029996195716419514, "loss": 4.6084, "step": 75790 }, { "epoch": 0.10650611127991752, "grad_norm": 0.7263827919960022, "learning_rate": 0.0002999617912990303, "loss": 4.7366, "step": 75800 }, { "epoch": 0.1065201622180811, "grad_norm": 0.768453061580658, "learning_rate": 0.00029996162507311513, "loss": 4.8619, "step": 75810 }, { "epoch": 0.10653421315624467, "grad_norm": 0.7260246276855469, "learning_rate": 0.00029996145848644993, "loss": 4.7207, "step": 75820 }, { "epoch": 0.10654826409440825, "grad_norm": 0.7110903859138489, "learning_rate": 0.00029996129153903506, "loss": 4.7166, "step": 75830 }, { "epoch": 0.10656231503257182, "grad_norm": 0.7710065245628357, "learning_rate": 0.00029996112423087104, "loss": 4.7668, "step": 75840 }, { "epoch": 0.10657636597073541, "grad_norm": 0.7256232500076294, "learning_rate": 0.00029996095656195816, "loss": 4.7379, "step": 75850 }, { "epoch": 0.10659041690889899, "grad_norm": 0.7331588864326477, "learning_rate": 0.0002999607885322969, "loss": 4.7213, "step": 75860 }, { "epoch": 0.10660446784706257, "grad_norm": 0.7234667539596558, "learning_rate": 0.0002999606201418876, "loss": 4.9289, "step": 75870 }, { "epoch": 0.10661851878522614, "grad_norm": 0.7169747352600098, "learning_rate": 0.0002999604513907308, "loss": 4.804, "step": 75880 }, { "epoch": 0.10663256972338972, "grad_norm": 0.7151200771331787, "learning_rate": 0.0002999602822788267, "loss": 4.7486, "step": 75890 }, { "epoch": 0.1066466206615533, "grad_norm": 0.7405186295509338, "learning_rate": 0.00029996011280617585, "loss": 4.7887, "step": 75900 }, { "epoch": 0.10666067159971687, "grad_norm": 0.712820291519165, "learning_rate": 0.0002999599429727786, "loss": 4.7063, "step": 75910 }, { "epoch": 0.10667472253788045, "grad_norm": 0.733707845211029, "learning_rate": 0.00029995977277863544, "loss": 4.7957, "step": 75920 }, { "epoch": 0.10668877347604402, "grad_norm": 0.7222506403923035, "learning_rate": 0.0002999596022237467, "loss": 4.6025, "step": 75930 }, { "epoch": 0.1067028244142076, "grad_norm": 0.7260400056838989, "learning_rate": 0.0002999594313081128, "loss": 4.7407, "step": 75940 }, { "epoch": 0.10671687535237119, "grad_norm": 0.7056910395622253, "learning_rate": 0.0002999592600317342, "loss": 4.8546, "step": 75950 }, { "epoch": 0.10673092629053477, "grad_norm": 0.7579191327095032, "learning_rate": 0.0002999590883946112, "loss": 4.6756, "step": 75960 }, { "epoch": 0.10674497722869834, "grad_norm": 0.7058764100074768, "learning_rate": 0.00029995891639674437, "loss": 4.7785, "step": 75970 }, { "epoch": 0.10675902816686192, "grad_norm": 0.7291330099105835, "learning_rate": 0.000299958744038134, "loss": 4.7122, "step": 75980 }, { "epoch": 0.1067730791050255, "grad_norm": 0.7083228230476379, "learning_rate": 0.00029995857131878055, "loss": 4.8688, "step": 75990 }, { "epoch": 0.10678713004318907, "grad_norm": 0.7070737481117249, "learning_rate": 0.00029995839823868445, "loss": 4.7306, "step": 76000 }, { "epoch": 0.10680118098135265, "grad_norm": 0.7116759419441223, "learning_rate": 0.00029995822479784605, "loss": 4.6925, "step": 76010 }, { "epoch": 0.10681523191951622, "grad_norm": 0.7482094764709473, "learning_rate": 0.0002999580509962658, "loss": 4.6478, "step": 76020 }, { "epoch": 0.1068292828576798, "grad_norm": 0.7246078252792358, "learning_rate": 0.00029995787683394427, "loss": 4.7802, "step": 76030 }, { "epoch": 0.10684333379584338, "grad_norm": 0.7256546020507812, "learning_rate": 0.0002999577023108816, "loss": 4.7196, "step": 76040 }, { "epoch": 0.10685738473400695, "grad_norm": 0.7039734721183777, "learning_rate": 0.0002999575274270784, "loss": 4.7635, "step": 76050 }, { "epoch": 0.10687143567217054, "grad_norm": 0.6815078258514404, "learning_rate": 0.0002999573521825351, "loss": 4.8917, "step": 76060 }, { "epoch": 0.10688548661033412, "grad_norm": 0.7078894972801208, "learning_rate": 0.00029995717657725195, "loss": 4.7096, "step": 76070 }, { "epoch": 0.1068995375484977, "grad_norm": 0.7273938655853271, "learning_rate": 0.0002999570006112296, "loss": 4.644, "step": 76080 }, { "epoch": 0.10691358848666127, "grad_norm": 0.7119658589363098, "learning_rate": 0.0002999568242844683, "loss": 4.788, "step": 76090 }, { "epoch": 0.10692763942482485, "grad_norm": 0.7011669874191284, "learning_rate": 0.00029995664759696855, "loss": 4.5298, "step": 76100 }, { "epoch": 0.10694169036298842, "grad_norm": 0.7396077513694763, "learning_rate": 0.0002999564705487307, "loss": 4.6668, "step": 76110 }, { "epoch": 0.106955741301152, "grad_norm": 0.7068682312965393, "learning_rate": 0.00029995629313975535, "loss": 4.7548, "step": 76120 }, { "epoch": 0.10696979223931558, "grad_norm": 0.7131781578063965, "learning_rate": 0.0002999561153700427, "loss": 4.8285, "step": 76130 }, { "epoch": 0.10698384317747915, "grad_norm": 0.7171995639801025, "learning_rate": 0.00029995593723959336, "loss": 4.6529, "step": 76140 }, { "epoch": 0.10699789411564273, "grad_norm": 0.6993269920349121, "learning_rate": 0.0002999557587484077, "loss": 4.6662, "step": 76150 }, { "epoch": 0.10701194505380632, "grad_norm": 0.7163041234016418, "learning_rate": 0.0002999555798964861, "loss": 4.8221, "step": 76160 }, { "epoch": 0.1070259959919699, "grad_norm": 0.6855682134628296, "learning_rate": 0.0002999554006838291, "loss": 4.7832, "step": 76170 }, { "epoch": 0.10704004693013347, "grad_norm": 0.7540141940116882, "learning_rate": 0.00029995522111043695, "loss": 4.7666, "step": 76180 }, { "epoch": 0.10705409786829705, "grad_norm": 0.7331625819206238, "learning_rate": 0.0002999550411763102, "loss": 4.7275, "step": 76190 }, { "epoch": 0.10706814880646062, "grad_norm": 0.7579591870307922, "learning_rate": 0.0002999548608814493, "loss": 4.6774, "step": 76200 }, { "epoch": 0.1070821997446242, "grad_norm": 0.7343604564666748, "learning_rate": 0.0002999546802258547, "loss": 4.716, "step": 76210 }, { "epoch": 0.10709625068278777, "grad_norm": 0.7140637040138245, "learning_rate": 0.00029995449920952675, "loss": 4.6961, "step": 76220 }, { "epoch": 0.10711030162095135, "grad_norm": 0.7120891213417053, "learning_rate": 0.000299954317832466, "loss": 4.8529, "step": 76230 }, { "epoch": 0.10712435255911493, "grad_norm": 0.7201858758926392, "learning_rate": 0.0002999541360946727, "loss": 4.7678, "step": 76240 }, { "epoch": 0.1071384034972785, "grad_norm": 0.7303602695465088, "learning_rate": 0.0002999539539961475, "loss": 4.8727, "step": 76250 }, { "epoch": 0.1071524544354421, "grad_norm": 0.7096266150474548, "learning_rate": 0.00029995377153689063, "loss": 4.7656, "step": 76260 }, { "epoch": 0.10716650537360567, "grad_norm": 0.6913262009620667, "learning_rate": 0.00029995358871690275, "loss": 4.7481, "step": 76270 }, { "epoch": 0.10718055631176925, "grad_norm": 0.7402442693710327, "learning_rate": 0.0002999534055361841, "loss": 4.7493, "step": 76280 }, { "epoch": 0.10719460724993282, "grad_norm": 0.7259013652801514, "learning_rate": 0.00029995322199473525, "loss": 4.7733, "step": 76290 }, { "epoch": 0.1072086581880964, "grad_norm": 0.7574719786643982, "learning_rate": 0.0002999530380925566, "loss": 4.65, "step": 76300 }, { "epoch": 0.10722270912625997, "grad_norm": 0.7451621294021606, "learning_rate": 0.00029995285382964856, "loss": 4.6954, "step": 76310 }, { "epoch": 0.10723676006442355, "grad_norm": 0.704237163066864, "learning_rate": 0.00029995266920601165, "loss": 4.7913, "step": 76320 }, { "epoch": 0.10725081100258713, "grad_norm": 0.7237526774406433, "learning_rate": 0.0002999524842216463, "loss": 4.7553, "step": 76330 }, { "epoch": 0.1072648619407507, "grad_norm": 0.724061131477356, "learning_rate": 0.0002999522988765529, "loss": 4.7573, "step": 76340 }, { "epoch": 0.10727891287891428, "grad_norm": 0.6993366479873657, "learning_rate": 0.0002999521131707318, "loss": 4.6088, "step": 76350 }, { "epoch": 0.10729296381707785, "grad_norm": 0.6915683746337891, "learning_rate": 0.00029995192710418375, "loss": 4.8146, "step": 76360 }, { "epoch": 0.10730701475524144, "grad_norm": 0.7251079678535461, "learning_rate": 0.0002999517406769089, "loss": 4.6804, "step": 76370 }, { "epoch": 0.10732106569340502, "grad_norm": 0.7160983681678772, "learning_rate": 0.00029995155388890785, "loss": 4.8018, "step": 76380 }, { "epoch": 0.1073351166315686, "grad_norm": 0.6960489153862, "learning_rate": 0.00029995136674018105, "loss": 4.6292, "step": 76390 }, { "epoch": 0.10734916756973217, "grad_norm": 0.7182583808898926, "learning_rate": 0.00029995117923072885, "loss": 4.7744, "step": 76400 }, { "epoch": 0.10736321850789575, "grad_norm": 0.7371787428855896, "learning_rate": 0.0002999509913605518, "loss": 4.7254, "step": 76410 }, { "epoch": 0.10737726944605933, "grad_norm": 0.7134677171707153, "learning_rate": 0.0002999508031296503, "loss": 4.8213, "step": 76420 }, { "epoch": 0.1073913203842229, "grad_norm": 0.7464840412139893, "learning_rate": 0.00029995061453802485, "loss": 4.722, "step": 76430 }, { "epoch": 0.10740537132238648, "grad_norm": 0.7147547602653503, "learning_rate": 0.0002999504255856758, "loss": 4.8029, "step": 76440 }, { "epoch": 0.10741942226055005, "grad_norm": 0.7210128307342529, "learning_rate": 0.00029995023627260375, "loss": 4.7076, "step": 76450 }, { "epoch": 0.10743347319871363, "grad_norm": 0.6943991184234619, "learning_rate": 0.00029995004659880907, "loss": 4.7665, "step": 76460 }, { "epoch": 0.10744752413687722, "grad_norm": 0.7254976034164429, "learning_rate": 0.0002999498565642922, "loss": 4.6883, "step": 76470 }, { "epoch": 0.1074615750750408, "grad_norm": 0.733951210975647, "learning_rate": 0.00029994966616905366, "loss": 4.6997, "step": 76480 }, { "epoch": 0.10747562601320437, "grad_norm": 0.7102512121200562, "learning_rate": 0.00029994947541309386, "loss": 4.6974, "step": 76490 }, { "epoch": 0.10748967695136795, "grad_norm": 0.7103749513626099, "learning_rate": 0.00029994928429641327, "loss": 4.677, "step": 76500 }, { "epoch": 0.10750372788953153, "grad_norm": 0.7255563735961914, "learning_rate": 0.00029994909281901233, "loss": 4.7501, "step": 76510 }, { "epoch": 0.1075177788276951, "grad_norm": 0.7186533808708191, "learning_rate": 0.00029994890098089153, "loss": 4.6903, "step": 76520 }, { "epoch": 0.10753182976585868, "grad_norm": 0.7188341617584229, "learning_rate": 0.00029994870878205136, "loss": 4.7485, "step": 76530 }, { "epoch": 0.10754588070402225, "grad_norm": 0.7477817535400391, "learning_rate": 0.00029994851622249224, "loss": 4.7999, "step": 76540 }, { "epoch": 0.10755993164218583, "grad_norm": 0.7210610508918762, "learning_rate": 0.0002999483233022146, "loss": 4.7705, "step": 76550 }, { "epoch": 0.1075739825803494, "grad_norm": 0.7165090441703796, "learning_rate": 0.000299948130021219, "loss": 4.7389, "step": 76560 }, { "epoch": 0.107588033518513, "grad_norm": 0.6880157589912415, "learning_rate": 0.00029994793637950584, "loss": 4.7931, "step": 76570 }, { "epoch": 0.10760208445667657, "grad_norm": 0.719469428062439, "learning_rate": 0.00029994774237707556, "loss": 4.751, "step": 76580 }, { "epoch": 0.10761613539484015, "grad_norm": 0.7153851389884949, "learning_rate": 0.0002999475480139287, "loss": 4.6964, "step": 76590 }, { "epoch": 0.10763018633300372, "grad_norm": 0.7175468802452087, "learning_rate": 0.00029994735329006567, "loss": 4.6719, "step": 76600 }, { "epoch": 0.1076442372711673, "grad_norm": 0.7065703868865967, "learning_rate": 0.0002999471582054869, "loss": 4.7861, "step": 76610 }, { "epoch": 0.10765828820933088, "grad_norm": 0.7555844187736511, "learning_rate": 0.00029994696276019307, "loss": 4.7068, "step": 76620 }, { "epoch": 0.10767233914749445, "grad_norm": 0.7342779040336609, "learning_rate": 0.0002999467669541844, "loss": 4.7435, "step": 76630 }, { "epoch": 0.10768639008565803, "grad_norm": 0.7292844653129578, "learning_rate": 0.00029994657078746145, "loss": 4.6379, "step": 76640 }, { "epoch": 0.1077004410238216, "grad_norm": 0.7343292832374573, "learning_rate": 0.0002999463742600247, "loss": 4.6573, "step": 76650 }, { "epoch": 0.10771449196198518, "grad_norm": 0.7275290489196777, "learning_rate": 0.0002999461773718746, "loss": 4.7651, "step": 76660 }, { "epoch": 0.10772854290014876, "grad_norm": 0.7052959203720093, "learning_rate": 0.0002999459801230117, "loss": 4.806, "step": 76670 }, { "epoch": 0.10774259383831235, "grad_norm": 0.7254387140274048, "learning_rate": 0.0002999457825134364, "loss": 4.5679, "step": 76680 }, { "epoch": 0.10775664477647592, "grad_norm": 0.7066879272460938, "learning_rate": 0.0002999455845431492, "loss": 4.6913, "step": 76690 }, { "epoch": 0.1077706957146395, "grad_norm": 0.716200590133667, "learning_rate": 0.0002999453862121506, "loss": 4.567, "step": 76700 }, { "epoch": 0.10778474665280308, "grad_norm": 0.7112560868263245, "learning_rate": 0.00029994518752044104, "loss": 4.7576, "step": 76710 }, { "epoch": 0.10779879759096665, "grad_norm": 0.7039072513580322, "learning_rate": 0.000299944988468021, "loss": 4.6923, "step": 76720 }, { "epoch": 0.10781284852913023, "grad_norm": 0.7240103483200073, "learning_rate": 0.00029994478905489094, "loss": 4.8887, "step": 76730 }, { "epoch": 0.1078268994672938, "grad_norm": 0.6706738471984863, "learning_rate": 0.0002999445892810514, "loss": 4.848, "step": 76740 }, { "epoch": 0.10784095040545738, "grad_norm": 0.7257563471794128, "learning_rate": 0.0002999443891465028, "loss": 4.7367, "step": 76750 }, { "epoch": 0.10785500134362096, "grad_norm": 0.7115849256515503, "learning_rate": 0.0002999441886512456, "loss": 4.7701, "step": 76760 }, { "epoch": 0.10786905228178453, "grad_norm": 0.7439100742340088, "learning_rate": 0.00029994398779528047, "loss": 4.7311, "step": 76770 }, { "epoch": 0.10788310321994812, "grad_norm": 0.7145756483078003, "learning_rate": 0.00029994378657860764, "loss": 4.7226, "step": 76780 }, { "epoch": 0.1078971541581117, "grad_norm": 0.7000148892402649, "learning_rate": 0.00029994358500122774, "loss": 4.8696, "step": 76790 }, { "epoch": 0.10791120509627528, "grad_norm": 0.7268564105033875, "learning_rate": 0.00029994338306314126, "loss": 4.7751, "step": 76800 }, { "epoch": 0.10792525603443885, "grad_norm": 0.6927092671394348, "learning_rate": 0.00029994318076434864, "loss": 4.6979, "step": 76810 }, { "epoch": 0.10793930697260243, "grad_norm": 0.7400745749473572, "learning_rate": 0.0002999429781048503, "loss": 4.7819, "step": 76820 }, { "epoch": 0.107953357910766, "grad_norm": 0.7177280783653259, "learning_rate": 0.00029994277508464684, "loss": 4.7256, "step": 76830 }, { "epoch": 0.10796740884892958, "grad_norm": 0.7067402601242065, "learning_rate": 0.00029994257170373875, "loss": 4.8645, "step": 76840 }, { "epoch": 0.10798145978709316, "grad_norm": 0.7211511731147766, "learning_rate": 0.0002999423679621265, "loss": 4.768, "step": 76850 }, { "epoch": 0.10799551072525673, "grad_norm": 0.717394232749939, "learning_rate": 0.0002999421638598105, "loss": 4.7647, "step": 76860 }, { "epoch": 0.10800956166342031, "grad_norm": 0.741280734539032, "learning_rate": 0.0002999419593967913, "loss": 4.6856, "step": 76870 }, { "epoch": 0.1080236126015839, "grad_norm": 0.7437717318534851, "learning_rate": 0.0002999417545730694, "loss": 4.7239, "step": 76880 }, { "epoch": 0.10803766353974747, "grad_norm": 0.7169762849807739, "learning_rate": 0.00029994154938864533, "loss": 4.7881, "step": 76890 }, { "epoch": 0.10805171447791105, "grad_norm": 0.7102547287940979, "learning_rate": 0.0002999413438435195, "loss": 4.6919, "step": 76900 }, { "epoch": 0.10806576541607463, "grad_norm": 0.7318692803382874, "learning_rate": 0.00029994113793769243, "loss": 4.8037, "step": 76910 }, { "epoch": 0.1080798163542382, "grad_norm": 0.7535057663917542, "learning_rate": 0.0002999409316711647, "loss": 4.7232, "step": 76920 }, { "epoch": 0.10809386729240178, "grad_norm": 0.7213916182518005, "learning_rate": 0.0002999407250439367, "loss": 4.6257, "step": 76930 }, { "epoch": 0.10810791823056536, "grad_norm": 0.7251282930374146, "learning_rate": 0.00029994051805600896, "loss": 4.6825, "step": 76940 }, { "epoch": 0.10812196916872893, "grad_norm": 0.8058227300643921, "learning_rate": 0.0002999403107073819, "loss": 4.6347, "step": 76950 }, { "epoch": 0.10813602010689251, "grad_norm": 0.7237033843994141, "learning_rate": 0.0002999401029980562, "loss": 4.7505, "step": 76960 }, { "epoch": 0.10815007104505608, "grad_norm": 0.7178425192832947, "learning_rate": 0.0002999398949280323, "loss": 4.7016, "step": 76970 }, { "epoch": 0.10816412198321966, "grad_norm": 0.7172357439994812, "learning_rate": 0.0002999396864973106, "loss": 4.8938, "step": 76980 }, { "epoch": 0.10817817292138325, "grad_norm": 0.7308405637741089, "learning_rate": 0.0002999394777058916, "loss": 4.7532, "step": 76990 }, { "epoch": 0.10819222385954683, "grad_norm": 0.715327799320221, "learning_rate": 0.00029993926855377594, "loss": 4.7328, "step": 77000 }, { "epoch": 0.1082062747977104, "grad_norm": 0.6993787288665771, "learning_rate": 0.000299939059040964, "loss": 4.7433, "step": 77010 }, { "epoch": 0.10822032573587398, "grad_norm": 0.7134409546852112, "learning_rate": 0.0002999388491674564, "loss": 4.6656, "step": 77020 }, { "epoch": 0.10823437667403756, "grad_norm": 0.7323780655860901, "learning_rate": 0.00029993863893325355, "loss": 4.7395, "step": 77030 }, { "epoch": 0.10824842761220113, "grad_norm": 0.7384430170059204, "learning_rate": 0.00029993844941407694, "loss": 4.701, "step": 77040 }, { "epoch": 0.10826247855036471, "grad_norm": 0.7190428972244263, "learning_rate": 0.00029993823849455454, "loss": 4.8667, "step": 77050 }, { "epoch": 0.10827652948852828, "grad_norm": 0.7207445502281189, "learning_rate": 0.0002999380272143384, "loss": 4.8387, "step": 77060 }, { "epoch": 0.10829058042669186, "grad_norm": 0.7112932205200195, "learning_rate": 0.000299937815573429, "loss": 4.7327, "step": 77070 }, { "epoch": 0.10830463136485544, "grad_norm": 0.7273050546646118, "learning_rate": 0.00029993760357182686, "loss": 4.7299, "step": 77080 }, { "epoch": 0.10831868230301903, "grad_norm": 0.7051961421966553, "learning_rate": 0.0002999373912095325, "loss": 4.7734, "step": 77090 }, { "epoch": 0.1083327332411826, "grad_norm": 0.7308077216148376, "learning_rate": 0.0002999371784865464, "loss": 4.859, "step": 77100 }, { "epoch": 0.10834678417934618, "grad_norm": 0.7368705868721008, "learning_rate": 0.00029993696540286913, "loss": 4.7365, "step": 77110 }, { "epoch": 0.10836083511750975, "grad_norm": 0.7260712385177612, "learning_rate": 0.00029993675195850116, "loss": 4.822, "step": 77120 }, { "epoch": 0.10837488605567333, "grad_norm": 0.7681806087493896, "learning_rate": 0.000299936538153443, "loss": 4.7356, "step": 77130 }, { "epoch": 0.1083889369938369, "grad_norm": 0.7338926196098328, "learning_rate": 0.0002999363239876952, "loss": 4.8259, "step": 77140 }, { "epoch": 0.10840298793200048, "grad_norm": 0.6925027370452881, "learning_rate": 0.0002999361094612582, "loss": 4.723, "step": 77150 }, { "epoch": 0.10841703887016406, "grad_norm": 0.7058929800987244, "learning_rate": 0.0002999358945741326, "loss": 4.7304, "step": 77160 }, { "epoch": 0.10843108980832764, "grad_norm": 0.7068637013435364, "learning_rate": 0.00029993567932631886, "loss": 4.9076, "step": 77170 }, { "epoch": 0.10844514074649121, "grad_norm": 0.7397764921188354, "learning_rate": 0.00029993546371781754, "loss": 4.7557, "step": 77180 }, { "epoch": 0.1084591916846548, "grad_norm": 0.7128493189811707, "learning_rate": 0.0002999352477486291, "loss": 4.7219, "step": 77190 }, { "epoch": 0.10847324262281838, "grad_norm": 0.7162886261940002, "learning_rate": 0.00029993503141875413, "loss": 4.7888, "step": 77200 }, { "epoch": 0.10848729356098195, "grad_norm": 0.7667913436889648, "learning_rate": 0.00029993481472819313, "loss": 4.8192, "step": 77210 }, { "epoch": 0.10850134449914553, "grad_norm": 0.707908034324646, "learning_rate": 0.0002999345976769466, "loss": 4.7827, "step": 77220 }, { "epoch": 0.1085153954373091, "grad_norm": 0.7176185250282288, "learning_rate": 0.00029993438026501513, "loss": 4.6943, "step": 77230 }, { "epoch": 0.10852944637547268, "grad_norm": 0.7139223217964172, "learning_rate": 0.0002999341624923991, "loss": 4.7028, "step": 77240 }, { "epoch": 0.10854349731363626, "grad_norm": 0.7057112455368042, "learning_rate": 0.00029993394435909917, "loss": 4.7016, "step": 77250 }, { "epoch": 0.10855754825179983, "grad_norm": 0.7334666848182678, "learning_rate": 0.0002999337258651158, "loss": 4.7411, "step": 77260 }, { "epoch": 0.10857159918996341, "grad_norm": 0.7039465308189392, "learning_rate": 0.0002999335070104496, "loss": 4.8021, "step": 77270 }, { "epoch": 0.10858565012812699, "grad_norm": 0.716081976890564, "learning_rate": 0.0002999332877951009, "loss": 4.7026, "step": 77280 }, { "epoch": 0.10859970106629056, "grad_norm": 0.725543737411499, "learning_rate": 0.00029993306821907037, "loss": 4.7908, "step": 77290 }, { "epoch": 0.10861375200445415, "grad_norm": 0.7131525874137878, "learning_rate": 0.0002999328482823586, "loss": 4.7378, "step": 77300 }, { "epoch": 0.10862780294261773, "grad_norm": 0.7272675037384033, "learning_rate": 0.00029993262798496604, "loss": 4.6323, "step": 77310 }, { "epoch": 0.1086418538807813, "grad_norm": 0.8117364645004272, "learning_rate": 0.0002999324073268931, "loss": 4.7449, "step": 77320 }, { "epoch": 0.10865590481894488, "grad_norm": 0.7216888666152954, "learning_rate": 0.00029993218630814056, "loss": 4.6925, "step": 77330 }, { "epoch": 0.10866995575710846, "grad_norm": 0.7425556778907776, "learning_rate": 0.0002999319649287088, "loss": 4.8035, "step": 77340 }, { "epoch": 0.10868400669527203, "grad_norm": 0.7141205668449402, "learning_rate": 0.00029993174318859836, "loss": 4.7453, "step": 77350 }, { "epoch": 0.10869805763343561, "grad_norm": 0.7087002992630005, "learning_rate": 0.0002999315210878098, "loss": 4.6944, "step": 77360 }, { "epoch": 0.10871210857159919, "grad_norm": 0.7184770107269287, "learning_rate": 0.0002999312986263436, "loss": 4.8394, "step": 77370 }, { "epoch": 0.10872615950976276, "grad_norm": 0.7024494409561157, "learning_rate": 0.0002999310758042004, "loss": 4.7444, "step": 77380 }, { "epoch": 0.10874021044792634, "grad_norm": 0.723019540309906, "learning_rate": 0.0002999308526213806, "loss": 4.7096, "step": 77390 }, { "epoch": 0.10875426138608993, "grad_norm": 0.731758713722229, "learning_rate": 0.0002999306290778849, "loss": 4.7041, "step": 77400 }, { "epoch": 0.1087683123242535, "grad_norm": 0.7360407710075378, "learning_rate": 0.0002999304051737137, "loss": 4.7408, "step": 77410 }, { "epoch": 0.10878236326241708, "grad_norm": 0.9013959169387817, "learning_rate": 0.00029993018090886765, "loss": 4.8065, "step": 77420 }, { "epoch": 0.10879641420058066, "grad_norm": 0.716956377029419, "learning_rate": 0.00029992995628334715, "loss": 4.7094, "step": 77430 }, { "epoch": 0.10881046513874423, "grad_norm": 0.7028824090957642, "learning_rate": 0.0002999297312971529, "loss": 4.7542, "step": 77440 }, { "epoch": 0.10882451607690781, "grad_norm": 0.7272639274597168, "learning_rate": 0.00029992950595028524, "loss": 4.7021, "step": 77450 }, { "epoch": 0.10883856701507139, "grad_norm": 0.769668459892273, "learning_rate": 0.0002999292802427449, "loss": 4.7857, "step": 77460 }, { "epoch": 0.10885261795323496, "grad_norm": 0.7413691878318787, "learning_rate": 0.00029992905417453237, "loss": 4.7665, "step": 77470 }, { "epoch": 0.10886666889139854, "grad_norm": 0.7249032258987427, "learning_rate": 0.0002999288277456482, "loss": 4.6758, "step": 77480 }, { "epoch": 0.10888071982956211, "grad_norm": 0.7224403023719788, "learning_rate": 0.00029992860095609286, "loss": 4.696, "step": 77490 }, { "epoch": 0.1088947707677257, "grad_norm": 0.708949863910675, "learning_rate": 0.00029992837380586694, "loss": 4.7162, "step": 77500 }, { "epoch": 0.10890882170588928, "grad_norm": 0.7245976328849792, "learning_rate": 0.000299928146294971, "loss": 4.6874, "step": 77510 }, { "epoch": 0.10892287264405286, "grad_norm": 1.1022865772247314, "learning_rate": 0.00029992791842340565, "loss": 4.7513, "step": 77520 }, { "epoch": 0.10893692358221643, "grad_norm": 0.7656620740890503, "learning_rate": 0.0002999276901911713, "loss": 4.6487, "step": 77530 }, { "epoch": 0.10895097452038001, "grad_norm": 0.698890209197998, "learning_rate": 0.00029992746159826863, "loss": 4.7322, "step": 77540 }, { "epoch": 0.10896502545854359, "grad_norm": 0.7267652153968811, "learning_rate": 0.00029992723264469807, "loss": 4.7124, "step": 77550 }, { "epoch": 0.10897907639670716, "grad_norm": 0.7437712550163269, "learning_rate": 0.0002999270033304603, "loss": 4.7506, "step": 77560 }, { "epoch": 0.10899312733487074, "grad_norm": 0.7425222396850586, "learning_rate": 0.0002999267736555557, "loss": 4.6336, "step": 77570 }, { "epoch": 0.10900717827303431, "grad_norm": 0.735388994216919, "learning_rate": 0.00029992654361998497, "loss": 4.7389, "step": 77580 }, { "epoch": 0.10902122921119789, "grad_norm": 0.7018815279006958, "learning_rate": 0.00029992631322374864, "loss": 4.7882, "step": 77590 }, { "epoch": 0.10903528014936147, "grad_norm": 0.7219749689102173, "learning_rate": 0.00029992608246684726, "loss": 4.773, "step": 77600 }, { "epoch": 0.10904933108752506, "grad_norm": 0.7270717024803162, "learning_rate": 0.00029992585134928137, "loss": 4.7005, "step": 77610 }, { "epoch": 0.10906338202568863, "grad_norm": 0.6950241327285767, "learning_rate": 0.00029992561987105145, "loss": 4.7569, "step": 77620 }, { "epoch": 0.10907743296385221, "grad_norm": 0.711341142654419, "learning_rate": 0.0002999253880321582, "loss": 4.7915, "step": 77630 }, { "epoch": 0.10909148390201578, "grad_norm": 0.7145726084709167, "learning_rate": 0.00029992515583260207, "loss": 4.6679, "step": 77640 }, { "epoch": 0.10910553484017936, "grad_norm": 0.7017688155174255, "learning_rate": 0.0002999249232723837, "loss": 4.897, "step": 77650 }, { "epoch": 0.10911958577834294, "grad_norm": 0.7016028165817261, "learning_rate": 0.0002999246903515035, "loss": 4.7972, "step": 77660 }, { "epoch": 0.10913363671650651, "grad_norm": 0.739503026008606, "learning_rate": 0.00029992445706996225, "loss": 4.7323, "step": 77670 }, { "epoch": 0.10914768765467009, "grad_norm": 0.7204867601394653, "learning_rate": 0.00029992422342776037, "loss": 4.7506, "step": 77680 }, { "epoch": 0.10916173859283367, "grad_norm": 0.713930070400238, "learning_rate": 0.0002999239894248984, "loss": 4.8078, "step": 77690 }, { "epoch": 0.10917578953099724, "grad_norm": 0.7046233415603638, "learning_rate": 0.00029992375506137704, "loss": 4.6674, "step": 77700 }, { "epoch": 0.10918984046916083, "grad_norm": 0.731946587562561, "learning_rate": 0.0002999235203371967, "loss": 4.7776, "step": 77710 }, { "epoch": 0.10920389140732441, "grad_norm": 0.7064023017883301, "learning_rate": 0.00029992328525235804, "loss": 4.749, "step": 77720 }, { "epoch": 0.10921794234548798, "grad_norm": 0.693246066570282, "learning_rate": 0.00029992304980686155, "loss": 4.7499, "step": 77730 }, { "epoch": 0.10923199328365156, "grad_norm": 0.7170222997665405, "learning_rate": 0.0002999228140007079, "loss": 4.6803, "step": 77740 }, { "epoch": 0.10924604422181514, "grad_norm": 0.7952947616577148, "learning_rate": 0.0002999225778338976, "loss": 4.6546, "step": 77750 }, { "epoch": 0.10926009515997871, "grad_norm": 0.7248690128326416, "learning_rate": 0.0002999223413064312, "loss": 4.6298, "step": 77760 }, { "epoch": 0.10927414609814229, "grad_norm": 0.7593909502029419, "learning_rate": 0.0002999221044183093, "loss": 4.7174, "step": 77770 }, { "epoch": 0.10928819703630586, "grad_norm": 0.7026767730712891, "learning_rate": 0.0002999218671695324, "loss": 4.5835, "step": 77780 }, { "epoch": 0.10930224797446944, "grad_norm": 0.7110097408294678, "learning_rate": 0.0002999216295601012, "loss": 4.8037, "step": 77790 }, { "epoch": 0.10931629891263302, "grad_norm": 0.7158304452896118, "learning_rate": 0.00029992139159001614, "loss": 4.7756, "step": 77800 }, { "epoch": 0.10933034985079661, "grad_norm": 0.7022632360458374, "learning_rate": 0.0002999211532592779, "loss": 4.7069, "step": 77810 }, { "epoch": 0.10934440078896018, "grad_norm": 0.7358798384666443, "learning_rate": 0.000299920914567887, "loss": 4.8487, "step": 77820 }, { "epoch": 0.10935845172712376, "grad_norm": 0.721831738948822, "learning_rate": 0.00029992067551584396, "loss": 4.7852, "step": 77830 }, { "epoch": 0.10937250266528734, "grad_norm": 0.7131444811820984, "learning_rate": 0.0002999204361031495, "loss": 4.8763, "step": 77840 }, { "epoch": 0.10938655360345091, "grad_norm": 0.7156862616539001, "learning_rate": 0.00029992019632980405, "loss": 4.7805, "step": 77850 }, { "epoch": 0.10940060454161449, "grad_norm": 0.7103162407875061, "learning_rate": 0.00029991995619580824, "loss": 4.7374, "step": 77860 }, { "epoch": 0.10941465547977806, "grad_norm": 0.6913536190986633, "learning_rate": 0.0002999197157011627, "loss": 4.6408, "step": 77870 }, { "epoch": 0.10942870641794164, "grad_norm": 0.7195391058921814, "learning_rate": 0.0002999194748458679, "loss": 4.8017, "step": 77880 }, { "epoch": 0.10944275735610522, "grad_norm": 0.7326173186302185, "learning_rate": 0.0002999192336299246, "loss": 4.729, "step": 77890 }, { "epoch": 0.10945680829426879, "grad_norm": 0.721824049949646, "learning_rate": 0.0002999189920533332, "loss": 4.7534, "step": 77900 }, { "epoch": 0.10947085923243237, "grad_norm": 0.719916582107544, "learning_rate": 0.00029991875011609433, "loss": 4.7614, "step": 77910 }, { "epoch": 0.10948491017059596, "grad_norm": 0.7365716099739075, "learning_rate": 0.0002999185078182086, "loss": 4.6417, "step": 77920 }, { "epoch": 0.10949896110875953, "grad_norm": 0.7198102474212646, "learning_rate": 0.00029991826515967653, "loss": 4.6395, "step": 77930 }, { "epoch": 0.10951301204692311, "grad_norm": 0.7074626088142395, "learning_rate": 0.0002999180221404988, "loss": 4.8106, "step": 77940 }, { "epoch": 0.10952706298508669, "grad_norm": 0.7036644220352173, "learning_rate": 0.000299917778760676, "loss": 4.6494, "step": 77950 }, { "epoch": 0.10954111392325026, "grad_norm": 0.7054417729377747, "learning_rate": 0.00029991753502020855, "loss": 4.8741, "step": 77960 }, { "epoch": 0.10955516486141384, "grad_norm": 0.7201747298240662, "learning_rate": 0.0002999172909190972, "loss": 4.8208, "step": 77970 }, { "epoch": 0.10956921579957742, "grad_norm": 0.7417471408843994, "learning_rate": 0.0002999170464573425, "loss": 4.6939, "step": 77980 }, { "epoch": 0.10958326673774099, "grad_norm": 0.7059158086776733, "learning_rate": 0.000299916801634945, "loss": 4.6488, "step": 77990 }, { "epoch": 0.10959731767590457, "grad_norm": 0.7165995836257935, "learning_rate": 0.00029991655645190534, "loss": 4.6077, "step": 78000 }, { "epoch": 0.10961136861406814, "grad_norm": 0.6998836398124695, "learning_rate": 0.0002999163109082241, "loss": 4.7637, "step": 78010 }, { "epoch": 0.10962541955223173, "grad_norm": 0.7285990715026855, "learning_rate": 0.00029991606500390184, "loss": 4.7262, "step": 78020 }, { "epoch": 0.10963947049039531, "grad_norm": 0.7066509127616882, "learning_rate": 0.0002999158187389391, "loss": 4.8026, "step": 78030 }, { "epoch": 0.10965352142855889, "grad_norm": 0.7119672894477844, "learning_rate": 0.0002999155721133366, "loss": 4.7852, "step": 78040 }, { "epoch": 0.10966757236672246, "grad_norm": 0.7337003946304321, "learning_rate": 0.0002999153251270949, "loss": 4.7541, "step": 78050 }, { "epoch": 0.10968162330488604, "grad_norm": 0.7333500981330872, "learning_rate": 0.0002999150777802145, "loss": 4.8055, "step": 78060 }, { "epoch": 0.10969567424304962, "grad_norm": 0.7334858775138855, "learning_rate": 0.0002999148300726961, "loss": 4.719, "step": 78070 }, { "epoch": 0.10970972518121319, "grad_norm": 0.7448481917381287, "learning_rate": 0.00029991458200454024, "loss": 4.591, "step": 78080 }, { "epoch": 0.10972377611937677, "grad_norm": 0.7082262635231018, "learning_rate": 0.0002999143335757475, "loss": 4.6573, "step": 78090 }, { "epoch": 0.10973782705754034, "grad_norm": 0.752067506313324, "learning_rate": 0.00029991408478631857, "loss": 4.801, "step": 78100 }, { "epoch": 0.10975187799570392, "grad_norm": 0.7344422936439514, "learning_rate": 0.000299913835636254, "loss": 4.6786, "step": 78110 }, { "epoch": 0.10976592893386751, "grad_norm": 0.7434372901916504, "learning_rate": 0.00029991358612555435, "loss": 4.7506, "step": 78120 }, { "epoch": 0.10977997987203109, "grad_norm": 0.7248328924179077, "learning_rate": 0.00029991333625422023, "loss": 4.766, "step": 78130 }, { "epoch": 0.10979403081019466, "grad_norm": 0.7229834794998169, "learning_rate": 0.00029991308602225227, "loss": 4.7095, "step": 78140 }, { "epoch": 0.10980808174835824, "grad_norm": 0.718464732170105, "learning_rate": 0.0002999128354296511, "loss": 4.731, "step": 78150 }, { "epoch": 0.10982213268652181, "grad_norm": 0.7103954553604126, "learning_rate": 0.00029991258447641724, "loss": 4.7028, "step": 78160 }, { "epoch": 0.10983618362468539, "grad_norm": 0.7132537961006165, "learning_rate": 0.00029991233316255136, "loss": 4.5993, "step": 78170 }, { "epoch": 0.10985023456284897, "grad_norm": 0.7273306846618652, "learning_rate": 0.00029991208148805404, "loss": 4.7597, "step": 78180 }, { "epoch": 0.10986428550101254, "grad_norm": 0.7169373035430908, "learning_rate": 0.00029991182945292587, "loss": 4.821, "step": 78190 }, { "epoch": 0.10987833643917612, "grad_norm": 0.7016663551330566, "learning_rate": 0.0002999115770571675, "loss": 4.7387, "step": 78200 }, { "epoch": 0.1098923873773397, "grad_norm": 0.7153110504150391, "learning_rate": 0.00029991132430077946, "loss": 4.6402, "step": 78210 }, { "epoch": 0.10990643831550327, "grad_norm": 0.7164067625999451, "learning_rate": 0.0002999110711837625, "loss": 4.7666, "step": 78220 }, { "epoch": 0.10992048925366686, "grad_norm": 0.7064258456230164, "learning_rate": 0.0002999108177061171, "loss": 4.7787, "step": 78230 }, { "epoch": 0.10993454019183044, "grad_norm": 0.7285356521606445, "learning_rate": 0.0002999105638678439, "loss": 4.8512, "step": 78240 }, { "epoch": 0.10994859112999401, "grad_norm": 0.7336903810501099, "learning_rate": 0.00029991030966894345, "loss": 4.7763, "step": 78250 }, { "epoch": 0.10996264206815759, "grad_norm": 0.7162684798240662, "learning_rate": 0.0002999100551094165, "loss": 4.755, "step": 78260 }, { "epoch": 0.10997669300632117, "grad_norm": 0.7184701561927795, "learning_rate": 0.0002999098001892636, "loss": 4.7169, "step": 78270 }, { "epoch": 0.10999074394448474, "grad_norm": 0.7117341756820679, "learning_rate": 0.00029990954490848536, "loss": 4.7958, "step": 78280 }, { "epoch": 0.11000479488264832, "grad_norm": 0.7427325248718262, "learning_rate": 0.00029990928926708235, "loss": 4.7711, "step": 78290 }, { "epoch": 0.1100188458208119, "grad_norm": 0.7277485728263855, "learning_rate": 0.00029990903326505527, "loss": 4.7021, "step": 78300 }, { "epoch": 0.11003289675897547, "grad_norm": 0.7296305894851685, "learning_rate": 0.0002999087769024047, "loss": 4.73, "step": 78310 }, { "epoch": 0.11004694769713905, "grad_norm": 0.7299262285232544, "learning_rate": 0.0002999085201791312, "loss": 4.8023, "step": 78320 }, { "epoch": 0.11006099863530264, "grad_norm": 0.7146306037902832, "learning_rate": 0.0002999082630952354, "loss": 4.6505, "step": 78330 }, { "epoch": 0.11007504957346621, "grad_norm": 0.7327890396118164, "learning_rate": 0.00029990800565071806, "loss": 4.7601, "step": 78340 }, { "epoch": 0.11008910051162979, "grad_norm": 0.716513454914093, "learning_rate": 0.00029990774784557965, "loss": 4.7456, "step": 78350 }, { "epoch": 0.11010315144979337, "grad_norm": 0.7176522612571716, "learning_rate": 0.0002999074896798208, "loss": 4.856, "step": 78360 }, { "epoch": 0.11011720238795694, "grad_norm": 0.7281580567359924, "learning_rate": 0.0002999072311534422, "loss": 4.7127, "step": 78370 }, { "epoch": 0.11013125332612052, "grad_norm": 0.7641294002532959, "learning_rate": 0.00029990697226644445, "loss": 4.6719, "step": 78380 }, { "epoch": 0.1101453042642841, "grad_norm": 0.7364638447761536, "learning_rate": 0.00029990671301882813, "loss": 4.672, "step": 78390 }, { "epoch": 0.11015935520244767, "grad_norm": 0.7401412725448608, "learning_rate": 0.0002999064534105939, "loss": 4.8599, "step": 78400 }, { "epoch": 0.11017340614061125, "grad_norm": 0.7288405895233154, "learning_rate": 0.0002999061934417424, "loss": 4.7307, "step": 78410 }, { "epoch": 0.11018745707877482, "grad_norm": 0.7121359705924988, "learning_rate": 0.0002999059331122742, "loss": 4.7776, "step": 78420 }, { "epoch": 0.11020150801693841, "grad_norm": 0.6956414580345154, "learning_rate": 0.00029990567242219, "loss": 4.6886, "step": 78430 }, { "epoch": 0.11021555895510199, "grad_norm": 0.7270907163619995, "learning_rate": 0.00029990541137149036, "loss": 4.6126, "step": 78440 }, { "epoch": 0.11022960989326556, "grad_norm": 0.731920063495636, "learning_rate": 0.0002999051499601759, "loss": 4.7614, "step": 78450 }, { "epoch": 0.11024366083142914, "grad_norm": 0.7504376769065857, "learning_rate": 0.0002999048881882473, "loss": 4.6018, "step": 78460 }, { "epoch": 0.11025771176959272, "grad_norm": 0.7335795164108276, "learning_rate": 0.0002999046260557052, "loss": 4.6259, "step": 78470 }, { "epoch": 0.1102717627077563, "grad_norm": 0.7405069470405579, "learning_rate": 0.0002999043635625502, "loss": 4.7949, "step": 78480 }, { "epoch": 0.11028581364591987, "grad_norm": 0.7165108323097229, "learning_rate": 0.00029990410070878293, "loss": 4.8312, "step": 78490 }, { "epoch": 0.11029986458408345, "grad_norm": 0.7114459872245789, "learning_rate": 0.00029990383749440403, "loss": 4.6969, "step": 78500 }, { "epoch": 0.11031391552224702, "grad_norm": 0.7313840389251709, "learning_rate": 0.00029990357391941413, "loss": 4.7718, "step": 78510 }, { "epoch": 0.1103279664604106, "grad_norm": 0.7081531882286072, "learning_rate": 0.0002999033099838138, "loss": 4.7343, "step": 78520 }, { "epoch": 0.11034201739857417, "grad_norm": 0.7249537706375122, "learning_rate": 0.00029990304568760384, "loss": 4.7134, "step": 78530 }, { "epoch": 0.11035606833673776, "grad_norm": 0.7393092513084412, "learning_rate": 0.00029990278103078475, "loss": 4.7877, "step": 78540 }, { "epoch": 0.11037011927490134, "grad_norm": 0.7348365783691406, "learning_rate": 0.00029990251601335715, "loss": 4.693, "step": 78550 }, { "epoch": 0.11038417021306492, "grad_norm": 0.7228670120239258, "learning_rate": 0.00029990225063532185, "loss": 4.7112, "step": 78560 }, { "epoch": 0.11039822115122849, "grad_norm": 0.7742883563041687, "learning_rate": 0.0002999019848966793, "loss": 4.7404, "step": 78570 }, { "epoch": 0.11041227208939207, "grad_norm": 0.7462913393974304, "learning_rate": 0.00029990171879743015, "loss": 4.699, "step": 78580 }, { "epoch": 0.11042632302755565, "grad_norm": 0.7195731997489929, "learning_rate": 0.00029990145233757515, "loss": 4.745, "step": 78590 }, { "epoch": 0.11044037396571922, "grad_norm": 0.7176849246025085, "learning_rate": 0.00029990118551711484, "loss": 4.7519, "step": 78600 }, { "epoch": 0.1104544249038828, "grad_norm": 0.7326333522796631, "learning_rate": 0.00029990091833604997, "loss": 4.6879, "step": 78610 }, { "epoch": 0.11046847584204637, "grad_norm": 0.7096735239028931, "learning_rate": 0.00029990065079438114, "loss": 4.7295, "step": 78620 }, { "epoch": 0.11048252678020995, "grad_norm": 0.7012742757797241, "learning_rate": 0.00029990038289210894, "loss": 4.5788, "step": 78630 }, { "epoch": 0.11049657771837354, "grad_norm": 0.7373076677322388, "learning_rate": 0.00029990011462923403, "loss": 4.7283, "step": 78640 }, { "epoch": 0.11051062865653712, "grad_norm": 0.7383387684822083, "learning_rate": 0.0002998998460057571, "loss": 4.718, "step": 78650 }, { "epoch": 0.11052467959470069, "grad_norm": 0.7086196541786194, "learning_rate": 0.00029989957702167877, "loss": 4.7736, "step": 78660 }, { "epoch": 0.11053873053286427, "grad_norm": 0.7198120951652527, "learning_rate": 0.0002998993076769997, "loss": 4.7757, "step": 78670 }, { "epoch": 0.11055278147102784, "grad_norm": 0.7587843537330627, "learning_rate": 0.00029989903797172056, "loss": 4.801, "step": 78680 }, { "epoch": 0.11056683240919142, "grad_norm": 0.7173702716827393, "learning_rate": 0.0002998987679058419, "loss": 4.7577, "step": 78690 }, { "epoch": 0.110580883347355, "grad_norm": 0.7113364934921265, "learning_rate": 0.0002998984974793645, "loss": 4.7938, "step": 78700 }, { "epoch": 0.11059493428551857, "grad_norm": 0.7075303196907043, "learning_rate": 0.00029989822669228885, "loss": 4.8154, "step": 78710 }, { "epoch": 0.11060898522368215, "grad_norm": 0.749019205570221, "learning_rate": 0.00029989795554461574, "loss": 4.6428, "step": 78720 }, { "epoch": 0.11062303616184573, "grad_norm": 0.7221829295158386, "learning_rate": 0.00029989768403634583, "loss": 4.7743, "step": 78730 }, { "epoch": 0.11063708710000932, "grad_norm": 0.7469913959503174, "learning_rate": 0.0002998974121674797, "loss": 4.7479, "step": 78740 }, { "epoch": 0.11065113803817289, "grad_norm": 0.7294034361839294, "learning_rate": 0.000299897139938018, "loss": 4.6667, "step": 78750 }, { "epoch": 0.11066518897633647, "grad_norm": 0.7280092239379883, "learning_rate": 0.00029989686734796143, "loss": 4.8348, "step": 78760 }, { "epoch": 0.11067923991450004, "grad_norm": 0.7117518782615662, "learning_rate": 0.0002998965943973106, "loss": 4.7427, "step": 78770 }, { "epoch": 0.11069329085266362, "grad_norm": 0.7216711640357971, "learning_rate": 0.0002998963210860662, "loss": 4.6675, "step": 78780 }, { "epoch": 0.1107073417908272, "grad_norm": 0.7047522664070129, "learning_rate": 0.00029989604741422897, "loss": 4.7768, "step": 78790 }, { "epoch": 0.11072139272899077, "grad_norm": 0.7834486961364746, "learning_rate": 0.0002998957733817994, "loss": 4.5577, "step": 78800 }, { "epoch": 0.11073544366715435, "grad_norm": 0.6964229345321655, "learning_rate": 0.00029989549898877825, "loss": 4.7269, "step": 78810 }, { "epoch": 0.11074949460531792, "grad_norm": 0.7209952473640442, "learning_rate": 0.00029989522423516614, "loss": 4.6683, "step": 78820 }, { "epoch": 0.1107635455434815, "grad_norm": 0.7614384293556213, "learning_rate": 0.0002998949491209638, "loss": 4.6271, "step": 78830 }, { "epoch": 0.11077759648164508, "grad_norm": 0.706362247467041, "learning_rate": 0.00029989467364617176, "loss": 4.7318, "step": 78840 }, { "epoch": 0.11079164741980867, "grad_norm": 0.7691898941993713, "learning_rate": 0.0002998943978107908, "loss": 4.7161, "step": 78850 }, { "epoch": 0.11080569835797224, "grad_norm": 0.6833986043930054, "learning_rate": 0.00029989412161482155, "loss": 4.7538, "step": 78860 }, { "epoch": 0.11081974929613582, "grad_norm": 0.7353855967521667, "learning_rate": 0.0002998938450582647, "loss": 4.8051, "step": 78870 }, { "epoch": 0.1108338002342994, "grad_norm": 0.7171785831451416, "learning_rate": 0.00029989356814112087, "loss": 4.7027, "step": 78880 }, { "epoch": 0.11084785117246297, "grad_norm": 0.6899160146713257, "learning_rate": 0.00029989329086339074, "loss": 4.7299, "step": 78890 }, { "epoch": 0.11086190211062655, "grad_norm": 0.6934748888015747, "learning_rate": 0.000299893013225075, "loss": 4.7226, "step": 78900 }, { "epoch": 0.11087595304879012, "grad_norm": 0.7488577961921692, "learning_rate": 0.0002998927352261743, "loss": 4.7633, "step": 78910 }, { "epoch": 0.1108900039869537, "grad_norm": 0.6993347406387329, "learning_rate": 0.0002998924568666893, "loss": 4.6352, "step": 78920 }, { "epoch": 0.11090405492511728, "grad_norm": 0.7227916717529297, "learning_rate": 0.00029989217814662067, "loss": 4.7705, "step": 78930 }, { "epoch": 0.11091810586328085, "grad_norm": 0.7275491952896118, "learning_rate": 0.00029989189906596907, "loss": 4.7141, "step": 78940 }, { "epoch": 0.11093215680144444, "grad_norm": 0.7532659769058228, "learning_rate": 0.0002998916196247352, "loss": 4.6849, "step": 78950 }, { "epoch": 0.11094620773960802, "grad_norm": 0.7284137606620789, "learning_rate": 0.00029989133982291974, "loss": 4.7539, "step": 78960 }, { "epoch": 0.1109602586777716, "grad_norm": 0.7159876823425293, "learning_rate": 0.0002998910596605233, "loss": 4.7526, "step": 78970 }, { "epoch": 0.11097430961593517, "grad_norm": 0.7106112837791443, "learning_rate": 0.00029989077913754667, "loss": 4.7353, "step": 78980 }, { "epoch": 0.11098836055409875, "grad_norm": 0.7276374697685242, "learning_rate": 0.0002998904982539904, "loss": 4.7902, "step": 78990 }, { "epoch": 0.11100241149226232, "grad_norm": 0.7346372604370117, "learning_rate": 0.0002998902170098552, "loss": 4.8002, "step": 79000 }, { "epoch": 0.1110164624304259, "grad_norm": 0.7175330519676208, "learning_rate": 0.00029988993540514187, "loss": 4.7478, "step": 79010 }, { "epoch": 0.11103051336858948, "grad_norm": 0.7902714014053345, "learning_rate": 0.00029988965343985085, "loss": 4.6789, "step": 79020 }, { "epoch": 0.11104456430675305, "grad_norm": 0.7199444770812988, "learning_rate": 0.00029988937111398304, "loss": 4.7025, "step": 79030 }, { "epoch": 0.11105861524491663, "grad_norm": 0.7127149105072021, "learning_rate": 0.000299889088427539, "loss": 4.6838, "step": 79040 }, { "epoch": 0.11107266618308022, "grad_norm": 0.700092613697052, "learning_rate": 0.0002998888053805195, "loss": 4.7037, "step": 79050 }, { "epoch": 0.1110867171212438, "grad_norm": 0.704186737537384, "learning_rate": 0.0002998885219729251, "loss": 4.9441, "step": 79060 }, { "epoch": 0.11110076805940737, "grad_norm": 0.714832603931427, "learning_rate": 0.00029988823820475654, "loss": 4.8514, "step": 79070 }, { "epoch": 0.11111481899757095, "grad_norm": 0.7268180251121521, "learning_rate": 0.0002998879540760145, "loss": 4.708, "step": 79080 }, { "epoch": 0.11112886993573452, "grad_norm": 0.7234380841255188, "learning_rate": 0.0002998876695866997, "loss": 4.7114, "step": 79090 }, { "epoch": 0.1111429208738981, "grad_norm": 0.7124701738357544, "learning_rate": 0.0002998873847368128, "loss": 4.7032, "step": 79100 }, { "epoch": 0.11115697181206168, "grad_norm": 0.7144205570220947, "learning_rate": 0.0002998870995263545, "loss": 4.7765, "step": 79110 }, { "epoch": 0.11117102275022525, "grad_norm": 0.7110472917556763, "learning_rate": 0.00029988684252865396, "loss": 4.7136, "step": 79120 }, { "epoch": 0.11118507368838883, "grad_norm": 0.7378934025764465, "learning_rate": 0.0002998865566331118, "loss": 4.8435, "step": 79130 }, { "epoch": 0.1111991246265524, "grad_norm": 0.7095884084701538, "learning_rate": 0.00029988627037700027, "loss": 4.8469, "step": 79140 }, { "epoch": 0.11121317556471598, "grad_norm": 0.7001661062240601, "learning_rate": 0.00029988598376031996, "loss": 4.8903, "step": 79150 }, { "epoch": 0.11122722650287957, "grad_norm": 0.7070997357368469, "learning_rate": 0.00029988569678307157, "loss": 4.8263, "step": 79160 }, { "epoch": 0.11124127744104315, "grad_norm": 0.7273538112640381, "learning_rate": 0.00029988540944525586, "loss": 4.758, "step": 79170 }, { "epoch": 0.11125532837920672, "grad_norm": 0.7323654890060425, "learning_rate": 0.0002998851217468735, "loss": 4.645, "step": 79180 }, { "epoch": 0.1112693793173703, "grad_norm": 0.6991866230964661, "learning_rate": 0.0002998848336879251, "loss": 4.687, "step": 79190 }, { "epoch": 0.11128343025553387, "grad_norm": 0.7104476094245911, "learning_rate": 0.0002998845452684114, "loss": 4.7952, "step": 79200 }, { "epoch": 0.11129748119369745, "grad_norm": 0.7157678604125977, "learning_rate": 0.0002998842564883332, "loss": 4.7413, "step": 79210 }, { "epoch": 0.11131153213186103, "grad_norm": 0.7549179196357727, "learning_rate": 0.0002998839673476911, "loss": 4.6288, "step": 79220 }, { "epoch": 0.1113255830700246, "grad_norm": 0.7367541193962097, "learning_rate": 0.00029988367784648573, "loss": 4.8033, "step": 79230 }, { "epoch": 0.11133963400818818, "grad_norm": 0.7407969832420349, "learning_rate": 0.0002998833879847179, "loss": 4.7142, "step": 79240 }, { "epoch": 0.11135368494635176, "grad_norm": 0.7445090413093567, "learning_rate": 0.0002998830977623883, "loss": 4.6799, "step": 79250 }, { "epoch": 0.11136773588451535, "grad_norm": 0.7357890605926514, "learning_rate": 0.0002998828071794975, "loss": 4.6795, "step": 79260 }, { "epoch": 0.11138178682267892, "grad_norm": 0.7026675343513489, "learning_rate": 0.00029988251623604633, "loss": 4.7248, "step": 79270 }, { "epoch": 0.1113958377608425, "grad_norm": 0.7261384129524231, "learning_rate": 0.00029988222493203547, "loss": 4.8116, "step": 79280 }, { "epoch": 0.11140988869900607, "grad_norm": 0.7369160056114197, "learning_rate": 0.0002998819332674656, "loss": 4.6674, "step": 79290 }, { "epoch": 0.11142393963716965, "grad_norm": 0.7252683043479919, "learning_rate": 0.00029988164124233744, "loss": 4.7722, "step": 79300 }, { "epoch": 0.11143799057533323, "grad_norm": 0.694120466709137, "learning_rate": 0.0002998813488566516, "loss": 4.7246, "step": 79310 }, { "epoch": 0.1114520415134968, "grad_norm": 0.7220812439918518, "learning_rate": 0.0002998810561104089, "loss": 4.7342, "step": 79320 }, { "epoch": 0.11146609245166038, "grad_norm": 0.7195317149162292, "learning_rate": 0.00029988076300361004, "loss": 4.5972, "step": 79330 }, { "epoch": 0.11148014338982395, "grad_norm": 0.7166000008583069, "learning_rate": 0.00029988046953625567, "loss": 4.5616, "step": 79340 }, { "epoch": 0.11149419432798753, "grad_norm": 0.7502967715263367, "learning_rate": 0.0002998801757083465, "loss": 4.8734, "step": 79350 }, { "epoch": 0.11150824526615112, "grad_norm": 0.7110496163368225, "learning_rate": 0.00029987988151988326, "loss": 4.7498, "step": 79360 }, { "epoch": 0.1115222962043147, "grad_norm": 0.7079729437828064, "learning_rate": 0.0002998795869708666, "loss": 4.7823, "step": 79370 }, { "epoch": 0.11153634714247827, "grad_norm": 0.7025529146194458, "learning_rate": 0.00029987929206129737, "loss": 4.7594, "step": 79380 }, { "epoch": 0.11155039808064185, "grad_norm": 0.6984058022499084, "learning_rate": 0.00029987899679117613, "loss": 4.8166, "step": 79390 }, { "epoch": 0.11156444901880543, "grad_norm": 0.7246695160865784, "learning_rate": 0.0002998787011605037, "loss": 4.7495, "step": 79400 }, { "epoch": 0.111578499956969, "grad_norm": 0.7176758050918579, "learning_rate": 0.0002998784051692807, "loss": 4.661, "step": 79410 }, { "epoch": 0.11159255089513258, "grad_norm": 0.719412624835968, "learning_rate": 0.00029987810881750785, "loss": 4.7986, "step": 79420 }, { "epoch": 0.11160660183329615, "grad_norm": 0.7081352472305298, "learning_rate": 0.00029987781210518593, "loss": 4.6177, "step": 79430 }, { "epoch": 0.11162065277145973, "grad_norm": 0.7022585272789001, "learning_rate": 0.0002998775150323156, "loss": 4.7622, "step": 79440 }, { "epoch": 0.1116347037096233, "grad_norm": 0.7345691919326782, "learning_rate": 0.0002998772175988976, "loss": 4.7247, "step": 79450 }, { "epoch": 0.11164875464778688, "grad_norm": 0.7096884250640869, "learning_rate": 0.00029987691980493266, "loss": 4.6959, "step": 79460 }, { "epoch": 0.11166280558595047, "grad_norm": 0.7393691539764404, "learning_rate": 0.00029987662165042143, "loss": 4.6315, "step": 79470 }, { "epoch": 0.11167685652411405, "grad_norm": 0.7320327758789062, "learning_rate": 0.0002998763231353647, "loss": 4.7236, "step": 79480 }, { "epoch": 0.11169090746227762, "grad_norm": 0.730957567691803, "learning_rate": 0.00029987602425976317, "loss": 4.5586, "step": 79490 }, { "epoch": 0.1117049584004412, "grad_norm": 0.7240452766418457, "learning_rate": 0.00029987572502361754, "loss": 4.7775, "step": 79500 }, { "epoch": 0.11171900933860478, "grad_norm": 0.7169708013534546, "learning_rate": 0.0002998754254269285, "loss": 4.8488, "step": 79510 }, { "epoch": 0.11173306027676835, "grad_norm": 0.7280612587928772, "learning_rate": 0.00029987512546969687, "loss": 4.6919, "step": 79520 }, { "epoch": 0.11174711121493193, "grad_norm": 0.7089032530784607, "learning_rate": 0.0002998748251519233, "loss": 4.7049, "step": 79530 }, { "epoch": 0.1117611621530955, "grad_norm": 0.7215970158576965, "learning_rate": 0.0002998745244736085, "loss": 4.8457, "step": 79540 }, { "epoch": 0.11177521309125908, "grad_norm": 0.7137922048568726, "learning_rate": 0.00029987422343475317, "loss": 4.6256, "step": 79550 }, { "epoch": 0.11178926402942266, "grad_norm": 0.768875241279602, "learning_rate": 0.00029987392203535815, "loss": 4.7273, "step": 79560 }, { "epoch": 0.11180331496758625, "grad_norm": 0.7097663879394531, "learning_rate": 0.0002998736202754241, "loss": 4.7569, "step": 79570 }, { "epoch": 0.11181736590574982, "grad_norm": 0.7040906548500061, "learning_rate": 0.00029987331815495175, "loss": 4.8235, "step": 79580 }, { "epoch": 0.1118314168439134, "grad_norm": 0.734401285648346, "learning_rate": 0.0002998730156739418, "loss": 4.7113, "step": 79590 }, { "epoch": 0.11184546778207698, "grad_norm": 0.7665032148361206, "learning_rate": 0.0002998727128323949, "loss": 4.7411, "step": 79600 }, { "epoch": 0.11185951872024055, "grad_norm": 0.7201694846153259, "learning_rate": 0.000299872409630312, "loss": 4.5992, "step": 79610 }, { "epoch": 0.11187356965840413, "grad_norm": 0.7197522521018982, "learning_rate": 0.00029987210606769365, "loss": 4.7049, "step": 79620 }, { "epoch": 0.1118876205965677, "grad_norm": 0.734275221824646, "learning_rate": 0.0002998718021445406, "loss": 4.761, "step": 79630 }, { "epoch": 0.11190167153473128, "grad_norm": 0.7246469259262085, "learning_rate": 0.0002998714978608537, "loss": 4.6607, "step": 79640 }, { "epoch": 0.11191572247289486, "grad_norm": 0.7006386518478394, "learning_rate": 0.00029987119321663356, "loss": 4.6202, "step": 79650 }, { "epoch": 0.11192977341105843, "grad_norm": 0.7070479989051819, "learning_rate": 0.00029987088821188093, "loss": 4.817, "step": 79660 }, { "epoch": 0.11194382434922202, "grad_norm": 0.7431702613830566, "learning_rate": 0.00029987058284659655, "loss": 4.9736, "step": 79670 }, { "epoch": 0.1119578752873856, "grad_norm": 0.7081708908081055, "learning_rate": 0.0002998702771207812, "loss": 4.7651, "step": 79680 }, { "epoch": 0.11197192622554918, "grad_norm": 0.7065561413764954, "learning_rate": 0.0002998699710344356, "loss": 4.8145, "step": 79690 }, { "epoch": 0.11198597716371275, "grad_norm": 0.7250800728797913, "learning_rate": 0.00029986966458756046, "loss": 4.7197, "step": 79700 }, { "epoch": 0.11200002810187633, "grad_norm": 0.7182665467262268, "learning_rate": 0.0002998693577801565, "loss": 4.6853, "step": 79710 }, { "epoch": 0.1120140790400399, "grad_norm": 0.721691906452179, "learning_rate": 0.0002998690506122245, "loss": 4.7432, "step": 79720 }, { "epoch": 0.11202812997820348, "grad_norm": 0.7138781547546387, "learning_rate": 0.0002998687430837652, "loss": 4.8657, "step": 79730 }, { "epoch": 0.11204218091636706, "grad_norm": 0.7386598587036133, "learning_rate": 0.0002998684351947793, "loss": 4.5605, "step": 79740 }, { "epoch": 0.11205623185453063, "grad_norm": 0.72069251537323, "learning_rate": 0.0002998681269452676, "loss": 4.7569, "step": 79750 }, { "epoch": 0.11207028279269421, "grad_norm": 0.7185590863227844, "learning_rate": 0.0002998678183352308, "loss": 4.8125, "step": 79760 }, { "epoch": 0.11208433373085779, "grad_norm": 0.71347576379776, "learning_rate": 0.00029986750936466963, "loss": 4.77, "step": 79770 }, { "epoch": 0.11209838466902138, "grad_norm": 0.7057868242263794, "learning_rate": 0.00029986720003358485, "loss": 4.7322, "step": 79780 }, { "epoch": 0.11211243560718495, "grad_norm": 0.7145351767539978, "learning_rate": 0.0002998668903419772, "loss": 4.7988, "step": 79790 }, { "epoch": 0.11212648654534853, "grad_norm": 0.7262371778488159, "learning_rate": 0.00029986658028984746, "loss": 4.7664, "step": 79800 }, { "epoch": 0.1121405374835121, "grad_norm": 0.702280580997467, "learning_rate": 0.0002998662698771963, "loss": 4.8152, "step": 79810 }, { "epoch": 0.11215458842167568, "grad_norm": 0.7286804914474487, "learning_rate": 0.00029986595910402457, "loss": 4.7686, "step": 79820 }, { "epoch": 0.11216863935983926, "grad_norm": 0.7360864281654358, "learning_rate": 0.0002998656479703329, "loss": 4.7821, "step": 79830 }, { "epoch": 0.11218269029800283, "grad_norm": 0.7273938059806824, "learning_rate": 0.0002998653364761221, "loss": 4.8064, "step": 79840 }, { "epoch": 0.11219674123616641, "grad_norm": 0.7493408918380737, "learning_rate": 0.000299865024621393, "loss": 4.7691, "step": 79850 }, { "epoch": 0.11221079217432998, "grad_norm": 0.7027277946472168, "learning_rate": 0.00029986471240614623, "loss": 4.5972, "step": 79860 }, { "epoch": 0.11222484311249356, "grad_norm": 0.7099219560623169, "learning_rate": 0.00029986439983038256, "loss": 4.8091, "step": 79870 }, { "epoch": 0.11223889405065715, "grad_norm": 0.726952314376831, "learning_rate": 0.00029986408689410275, "loss": 4.6214, "step": 79880 }, { "epoch": 0.11225294498882073, "grad_norm": 0.7291332483291626, "learning_rate": 0.0002998637735973076, "loss": 4.7047, "step": 79890 }, { "epoch": 0.1122669959269843, "grad_norm": 0.7252445220947266, "learning_rate": 0.00029986345993999786, "loss": 4.7274, "step": 79900 }, { "epoch": 0.11228104686514788, "grad_norm": 0.7106611132621765, "learning_rate": 0.0002998631459221742, "loss": 4.7364, "step": 79910 }, { "epoch": 0.11229509780331146, "grad_norm": 0.7336950898170471, "learning_rate": 0.0002998628315438374, "loss": 4.8461, "step": 79920 }, { "epoch": 0.11230914874147503, "grad_norm": 0.7143675088882446, "learning_rate": 0.00029986251680498826, "loss": 4.6871, "step": 79930 }, { "epoch": 0.11232319967963861, "grad_norm": 0.7315747737884521, "learning_rate": 0.0002998622017056276, "loss": 4.7463, "step": 79940 }, { "epoch": 0.11233725061780218, "grad_norm": 0.7288627028465271, "learning_rate": 0.00029986188624575604, "loss": 4.7344, "step": 79950 }, { "epoch": 0.11235130155596576, "grad_norm": 0.7390293478965759, "learning_rate": 0.0002998615704253744, "loss": 4.7487, "step": 79960 }, { "epoch": 0.11236535249412934, "grad_norm": 0.7393094301223755, "learning_rate": 0.00029986125424448344, "loss": 4.6628, "step": 79970 }, { "epoch": 0.11237940343229293, "grad_norm": 0.7108803391456604, "learning_rate": 0.0002998609377030839, "loss": 4.6948, "step": 79980 }, { "epoch": 0.1123934543704565, "grad_norm": 0.7048096656799316, "learning_rate": 0.00029986062080117657, "loss": 4.7815, "step": 79990 }, { "epoch": 0.11240750530862008, "grad_norm": 0.722067654132843, "learning_rate": 0.00029986030353876226, "loss": 4.6608, "step": 80000 }, { "epoch": 0.11242155624678365, "grad_norm": 0.6960885524749756, "learning_rate": 0.0002998599859158416, "loss": 4.7304, "step": 80010 }, { "epoch": 0.11243560718494723, "grad_norm": 0.7393103241920471, "learning_rate": 0.0002998596679324155, "loss": 4.6923, "step": 80020 }, { "epoch": 0.11244965812311081, "grad_norm": 0.7443263530731201, "learning_rate": 0.0002998593495884846, "loss": 4.7715, "step": 80030 }, { "epoch": 0.11246370906127438, "grad_norm": 0.7111883163452148, "learning_rate": 0.00029985903088404974, "loss": 4.6946, "step": 80040 }, { "epoch": 0.11247775999943796, "grad_norm": 0.7264695763587952, "learning_rate": 0.00029985871181911164, "loss": 4.6478, "step": 80050 }, { "epoch": 0.11249181093760154, "grad_norm": 0.7133517265319824, "learning_rate": 0.0002998583923936711, "loss": 4.7459, "step": 80060 }, { "epoch": 0.11250586187576511, "grad_norm": 0.7024343609809875, "learning_rate": 0.0002998580726077289, "loss": 4.7401, "step": 80070 }, { "epoch": 0.1125199128139287, "grad_norm": 0.7126244306564331, "learning_rate": 0.0002998577524612858, "loss": 4.8324, "step": 80080 }, { "epoch": 0.11253396375209228, "grad_norm": 0.750248908996582, "learning_rate": 0.0002998574319543426, "loss": 4.6754, "step": 80090 }, { "epoch": 0.11254801469025585, "grad_norm": 0.6958588361740112, "learning_rate": 0.0002998571110869, "loss": 4.7199, "step": 80100 }, { "epoch": 0.11256206562841943, "grad_norm": 0.7012098431587219, "learning_rate": 0.0002998567898589588, "loss": 4.5606, "step": 80110 }, { "epoch": 0.112576116566583, "grad_norm": 0.739078938961029, "learning_rate": 0.0002998564682705198, "loss": 4.8491, "step": 80120 }, { "epoch": 0.11259016750474658, "grad_norm": 0.745468258857727, "learning_rate": 0.00029985614632158374, "loss": 4.7074, "step": 80130 }, { "epoch": 0.11260421844291016, "grad_norm": 0.7302658557891846, "learning_rate": 0.0002998558240121514, "loss": 4.8606, "step": 80140 }, { "epoch": 0.11261826938107374, "grad_norm": 0.7549187541007996, "learning_rate": 0.00029985550134222356, "loss": 4.7794, "step": 80150 }, { "epoch": 0.11263232031923731, "grad_norm": 0.7186284065246582, "learning_rate": 0.000299855178311801, "loss": 4.6571, "step": 80160 }, { "epoch": 0.11264637125740089, "grad_norm": 0.7297990918159485, "learning_rate": 0.0002998548549208845, "loss": 4.691, "step": 80170 }, { "epoch": 0.11266042219556446, "grad_norm": 0.6997066140174866, "learning_rate": 0.00029985453116947484, "loss": 4.7066, "step": 80180 }, { "epoch": 0.11267447313372805, "grad_norm": 0.7195040583610535, "learning_rate": 0.00029985420705757275, "loss": 4.7252, "step": 80190 }, { "epoch": 0.11268852407189163, "grad_norm": 0.699199914932251, "learning_rate": 0.0002998538825851791, "loss": 4.7883, "step": 80200 }, { "epoch": 0.1127025750100552, "grad_norm": 0.6951486468315125, "learning_rate": 0.00029985355775229464, "loss": 4.8286, "step": 80210 }, { "epoch": 0.11271662594821878, "grad_norm": 0.7475593686103821, "learning_rate": 0.0002998532325589201, "loss": 4.7873, "step": 80220 }, { "epoch": 0.11273067688638236, "grad_norm": 0.7170931100845337, "learning_rate": 0.00029985290700505627, "loss": 4.6795, "step": 80230 }, { "epoch": 0.11274472782454593, "grad_norm": 0.7242776155471802, "learning_rate": 0.000299852581090704, "loss": 4.7406, "step": 80240 }, { "epoch": 0.11275877876270951, "grad_norm": 0.6967573165893555, "learning_rate": 0.000299852254815864, "loss": 4.8199, "step": 80250 }, { "epoch": 0.11277282970087309, "grad_norm": 0.7147936820983887, "learning_rate": 0.0002998519281805371, "loss": 4.7201, "step": 80260 }, { "epoch": 0.11278688063903666, "grad_norm": 0.6991884708404541, "learning_rate": 0.0002998516011847241, "loss": 4.6309, "step": 80270 }, { "epoch": 0.11280093157720024, "grad_norm": 0.7079862356185913, "learning_rate": 0.0002998512738284257, "loss": 4.7821, "step": 80280 }, { "epoch": 0.11281498251536383, "grad_norm": 0.7197107076644897, "learning_rate": 0.00029985094611164286, "loss": 4.8384, "step": 80290 }, { "epoch": 0.1128290334535274, "grad_norm": 0.7062892317771912, "learning_rate": 0.0002998506180343762, "loss": 4.6175, "step": 80300 }, { "epoch": 0.11284308439169098, "grad_norm": 0.7397512793540955, "learning_rate": 0.0002998502895966265, "loss": 4.7381, "step": 80310 }, { "epoch": 0.11285713532985456, "grad_norm": 0.7265103459358215, "learning_rate": 0.0002998499607983947, "loss": 4.7411, "step": 80320 }, { "epoch": 0.11287118626801813, "grad_norm": 0.7303242087364197, "learning_rate": 0.00029984963163968146, "loss": 4.6545, "step": 80330 }, { "epoch": 0.11288523720618171, "grad_norm": 0.6990224719047546, "learning_rate": 0.0002998493021204877, "loss": 4.715, "step": 80340 }, { "epoch": 0.11289928814434529, "grad_norm": 0.7091715931892395, "learning_rate": 0.000299848972240814, "loss": 4.7124, "step": 80350 }, { "epoch": 0.11291333908250886, "grad_norm": 0.718712329864502, "learning_rate": 0.0002998486420006614, "loss": 4.6979, "step": 80360 }, { "epoch": 0.11292739002067244, "grad_norm": 0.7297937870025635, "learning_rate": 0.00029984831140003056, "loss": 4.7455, "step": 80370 }, { "epoch": 0.11294144095883601, "grad_norm": 0.7224186062812805, "learning_rate": 0.0002998479804389222, "loss": 4.6676, "step": 80380 }, { "epoch": 0.1129554918969996, "grad_norm": 0.72234046459198, "learning_rate": 0.0002998476491173373, "loss": 4.7358, "step": 80390 }, { "epoch": 0.11296954283516318, "grad_norm": 0.7080051898956299, "learning_rate": 0.0002998473174352766, "loss": 4.6412, "step": 80400 }, { "epoch": 0.11298359377332676, "grad_norm": 0.7055094242095947, "learning_rate": 0.00029984698539274075, "loss": 4.7382, "step": 80410 }, { "epoch": 0.11299764471149033, "grad_norm": 0.7096608281135559, "learning_rate": 0.00029984665298973074, "loss": 4.6255, "step": 80420 }, { "epoch": 0.11301169564965391, "grad_norm": 0.7352139949798584, "learning_rate": 0.0002998463202262473, "loss": 4.6637, "step": 80430 }, { "epoch": 0.11302574658781749, "grad_norm": 0.692638635635376, "learning_rate": 0.0002998459871022912, "loss": 4.7391, "step": 80440 }, { "epoch": 0.11303979752598106, "grad_norm": 0.7054898738861084, "learning_rate": 0.00029984565361786326, "loss": 4.7407, "step": 80450 }, { "epoch": 0.11305384846414464, "grad_norm": 0.7301256656646729, "learning_rate": 0.0002998453197729643, "loss": 4.8302, "step": 80460 }, { "epoch": 0.11306789940230821, "grad_norm": 0.7004954814910889, "learning_rate": 0.00029984498556759516, "loss": 4.7458, "step": 80470 }, { "epoch": 0.11308195034047179, "grad_norm": 0.7302805781364441, "learning_rate": 0.00029984465100175653, "loss": 4.6252, "step": 80480 }, { "epoch": 0.11309600127863537, "grad_norm": 0.7323416471481323, "learning_rate": 0.0002998443160754493, "loss": 4.7379, "step": 80490 }, { "epoch": 0.11311005221679896, "grad_norm": 0.7046477198600769, "learning_rate": 0.00029984398078867423, "loss": 4.7576, "step": 80500 }, { "epoch": 0.11312410315496253, "grad_norm": 0.719993531703949, "learning_rate": 0.00029984364514143224, "loss": 4.7161, "step": 80510 }, { "epoch": 0.11313815409312611, "grad_norm": 0.7029352784156799, "learning_rate": 0.000299843309133724, "loss": 4.7524, "step": 80520 }, { "epoch": 0.11315220503128968, "grad_norm": 0.7699235677719116, "learning_rate": 0.00029984297276555033, "loss": 4.7803, "step": 80530 }, { "epoch": 0.11316625596945326, "grad_norm": 0.7614515423774719, "learning_rate": 0.0002998426360369121, "loss": 4.6891, "step": 80540 }, { "epoch": 0.11318030690761684, "grad_norm": 0.7042279243469238, "learning_rate": 0.00029984229894781006, "loss": 4.7154, "step": 80550 }, { "epoch": 0.11319435784578041, "grad_norm": 0.7094720602035522, "learning_rate": 0.00029984196149824514, "loss": 4.7491, "step": 80560 }, { "epoch": 0.11320840878394399, "grad_norm": 0.722616970539093, "learning_rate": 0.000299841623688218, "loss": 4.6867, "step": 80570 }, { "epoch": 0.11322245972210757, "grad_norm": 0.719673216342926, "learning_rate": 0.0002998412855177296, "loss": 4.6543, "step": 80580 }, { "epoch": 0.11323651066027114, "grad_norm": 0.7150025963783264, "learning_rate": 0.0002998409469867806, "loss": 4.7076, "step": 80590 }, { "epoch": 0.11325056159843473, "grad_norm": 0.7299203276634216, "learning_rate": 0.0002998406080953719, "loss": 4.7068, "step": 80600 }, { "epoch": 0.11326461253659831, "grad_norm": 0.7360802292823792, "learning_rate": 0.00029984026884350437, "loss": 4.6402, "step": 80610 }, { "epoch": 0.11327866347476188, "grad_norm": 0.7183154225349426, "learning_rate": 0.0002998399292311787, "loss": 4.6114, "step": 80620 }, { "epoch": 0.11329271441292546, "grad_norm": 0.6950372457504272, "learning_rate": 0.0002998395892583958, "loss": 4.7284, "step": 80630 }, { "epoch": 0.11330676535108904, "grad_norm": 0.7128282785415649, "learning_rate": 0.0002998392489251564, "loss": 4.8155, "step": 80640 }, { "epoch": 0.11332081628925261, "grad_norm": 0.7069471478462219, "learning_rate": 0.00029983890823146147, "loss": 4.7038, "step": 80650 }, { "epoch": 0.11333486722741619, "grad_norm": 0.7087200880050659, "learning_rate": 0.00029983856717731165, "loss": 4.5934, "step": 80660 }, { "epoch": 0.11334891816557977, "grad_norm": 0.7316790819168091, "learning_rate": 0.0002998382257627079, "loss": 4.6918, "step": 80670 }, { "epoch": 0.11336296910374334, "grad_norm": 0.7532024383544922, "learning_rate": 0.00029983788398765094, "loss": 4.667, "step": 80680 }, { "epoch": 0.11337702004190692, "grad_norm": 0.7689909338951111, "learning_rate": 0.00029983754185214167, "loss": 4.6237, "step": 80690 }, { "epoch": 0.11339107098007051, "grad_norm": 0.7248286604881287, "learning_rate": 0.0002998371993561809, "loss": 4.7349, "step": 80700 }, { "epoch": 0.11340512191823408, "grad_norm": 0.7164115309715271, "learning_rate": 0.00029983685649976943, "loss": 4.7656, "step": 80710 }, { "epoch": 0.11341917285639766, "grad_norm": 0.6937947273254395, "learning_rate": 0.0002998365132829081, "loss": 4.7744, "step": 80720 }, { "epoch": 0.11343322379456124, "grad_norm": 0.700322687625885, "learning_rate": 0.00029983616970559766, "loss": 4.7036, "step": 80730 }, { "epoch": 0.11344727473272481, "grad_norm": 0.7567490935325623, "learning_rate": 0.00029983582576783907, "loss": 4.7032, "step": 80740 }, { "epoch": 0.11346132567088839, "grad_norm": 0.7181823253631592, "learning_rate": 0.0002998354814696331, "loss": 4.8266, "step": 80750 }, { "epoch": 0.11347537660905196, "grad_norm": 0.7397735714912415, "learning_rate": 0.0002998351368109805, "loss": 4.6778, "step": 80760 }, { "epoch": 0.11348942754721554, "grad_norm": 0.698970377445221, "learning_rate": 0.0002998347917918822, "loss": 4.6136, "step": 80770 }, { "epoch": 0.11350347848537912, "grad_norm": 0.6962791085243225, "learning_rate": 0.00029983444641233897, "loss": 4.687, "step": 80780 }, { "epoch": 0.11351752942354269, "grad_norm": 0.7418192028999329, "learning_rate": 0.0002998341006723517, "loss": 4.7033, "step": 80790 }, { "epoch": 0.11353158036170627, "grad_norm": 0.7161971926689148, "learning_rate": 0.00029983375457192117, "loss": 4.8678, "step": 80800 }, { "epoch": 0.11354563129986986, "grad_norm": 0.711134672164917, "learning_rate": 0.0002998334081110483, "loss": 4.7424, "step": 80810 }, { "epoch": 0.11355968223803344, "grad_norm": 0.7014956474304199, "learning_rate": 0.00029983306128973376, "loss": 4.7054, "step": 80820 }, { "epoch": 0.11357373317619701, "grad_norm": 0.7056500911712646, "learning_rate": 0.00029983271410797857, "loss": 4.7097, "step": 80830 }, { "epoch": 0.11358778411436059, "grad_norm": 0.7953014373779297, "learning_rate": 0.0002998323665657834, "loss": 4.7563, "step": 80840 }, { "epoch": 0.11360183505252416, "grad_norm": 0.7236987948417664, "learning_rate": 0.00029983201866314924, "loss": 4.6917, "step": 80850 }, { "epoch": 0.11361588599068774, "grad_norm": 0.7303849458694458, "learning_rate": 0.0002998316704000768, "loss": 4.6084, "step": 80860 }, { "epoch": 0.11362993692885132, "grad_norm": 0.6903831958770752, "learning_rate": 0.00029983132177656695, "loss": 4.768, "step": 80870 }, { "epoch": 0.11364398786701489, "grad_norm": 0.744113564491272, "learning_rate": 0.0002998309727926206, "loss": 4.6886, "step": 80880 }, { "epoch": 0.11365803880517847, "grad_norm": 0.7207052707672119, "learning_rate": 0.0002998306234482385, "loss": 4.7185, "step": 80890 }, { "epoch": 0.11367208974334204, "grad_norm": 0.7463589906692505, "learning_rate": 0.00029983027374342155, "loss": 4.7078, "step": 80900 }, { "epoch": 0.11368614068150563, "grad_norm": 1.0752589702606201, "learning_rate": 0.0002998299236781705, "loss": 4.6764, "step": 80910 }, { "epoch": 0.11370019161966921, "grad_norm": 0.7272537350654602, "learning_rate": 0.0002998295732524863, "loss": 4.8209, "step": 80920 }, { "epoch": 0.11371424255783279, "grad_norm": 0.7111218571662903, "learning_rate": 0.00029982922246636975, "loss": 4.71, "step": 80930 }, { "epoch": 0.11372829349599636, "grad_norm": 0.7262582182884216, "learning_rate": 0.0002998288713198217, "loss": 4.7863, "step": 80940 }, { "epoch": 0.11374234443415994, "grad_norm": 0.7486706972122192, "learning_rate": 0.00029982851981284303, "loss": 4.7294, "step": 80950 }, { "epoch": 0.11375639537232352, "grad_norm": 0.764664351940155, "learning_rate": 0.00029982816794543445, "loss": 4.714, "step": 80960 }, { "epoch": 0.11377044631048709, "grad_norm": 0.701084554195404, "learning_rate": 0.000299827815717597, "loss": 4.8014, "step": 80970 }, { "epoch": 0.11378449724865067, "grad_norm": 1.806844711303711, "learning_rate": 0.0002998274631293314, "loss": 4.7518, "step": 80980 }, { "epoch": 0.11379854818681424, "grad_norm": 0.7623975276947021, "learning_rate": 0.0002998271101806385, "loss": 4.8233, "step": 80990 }, { "epoch": 0.11381259912497782, "grad_norm": 0.7140116095542908, "learning_rate": 0.0002998267568715192, "loss": 4.7421, "step": 81000 }, { "epoch": 0.11382665006314141, "grad_norm": 0.7124093174934387, "learning_rate": 0.0002998264032019743, "loss": 4.7544, "step": 81010 }, { "epoch": 0.11384070100130499, "grad_norm": 0.7354893088340759, "learning_rate": 0.0002998260491720047, "loss": 4.8484, "step": 81020 }, { "epoch": 0.11385475193946856, "grad_norm": 0.7834903001785278, "learning_rate": 0.0002998256947816112, "loss": 4.7304, "step": 81030 }, { "epoch": 0.11386880287763214, "grad_norm": 0.7008675336837769, "learning_rate": 0.00029982534003079474, "loss": 4.7707, "step": 81040 }, { "epoch": 0.11388285381579571, "grad_norm": 0.7236028909683228, "learning_rate": 0.0002998249849195561, "loss": 4.7268, "step": 81050 }, { "epoch": 0.11389690475395929, "grad_norm": 0.7143322825431824, "learning_rate": 0.0002998246294478961, "loss": 4.7511, "step": 81060 }, { "epoch": 0.11391095569212287, "grad_norm": 0.7715073227882385, "learning_rate": 0.0002998242736158157, "loss": 4.7602, "step": 81070 }, { "epoch": 0.11392500663028644, "grad_norm": 0.7184918522834778, "learning_rate": 0.00029982391742331565, "loss": 4.7352, "step": 81080 }, { "epoch": 0.11393905756845002, "grad_norm": 0.6928876638412476, "learning_rate": 0.0002998235608703969, "loss": 4.5818, "step": 81090 }, { "epoch": 0.1139531085066136, "grad_norm": 0.7220043540000916, "learning_rate": 0.00029982320395706023, "loss": 4.7761, "step": 81100 }, { "epoch": 0.11396715944477717, "grad_norm": 0.6904178261756897, "learning_rate": 0.00029982284668330657, "loss": 4.8029, "step": 81110 }, { "epoch": 0.11398121038294076, "grad_norm": 0.7251768708229065, "learning_rate": 0.00029982248904913673, "loss": 4.7282, "step": 81120 }, { "epoch": 0.11399526132110434, "grad_norm": 0.6947658658027649, "learning_rate": 0.00029982213105455153, "loss": 4.7695, "step": 81130 }, { "epoch": 0.11400931225926791, "grad_norm": 0.7100722193717957, "learning_rate": 0.000299821772699552, "loss": 4.8002, "step": 81140 }, { "epoch": 0.11402336319743149, "grad_norm": 0.7027919292449951, "learning_rate": 0.0002998214139841388, "loss": 4.598, "step": 81150 }, { "epoch": 0.11403741413559507, "grad_norm": 0.693475604057312, "learning_rate": 0.0002998210549083129, "loss": 4.7001, "step": 81160 }, { "epoch": 0.11405146507375864, "grad_norm": 0.720072329044342, "learning_rate": 0.0002998206954720751, "loss": 4.7215, "step": 81170 }, { "epoch": 0.11406551601192222, "grad_norm": 0.6978077292442322, "learning_rate": 0.0002998203356754264, "loss": 4.7458, "step": 81180 }, { "epoch": 0.1140795669500858, "grad_norm": 0.721059262752533, "learning_rate": 0.0002998199755183675, "loss": 4.6789, "step": 81190 }, { "epoch": 0.11409361788824937, "grad_norm": 0.7521209716796875, "learning_rate": 0.0002998196150008994, "loss": 4.5958, "step": 81200 }, { "epoch": 0.11410766882641295, "grad_norm": 0.7243664860725403, "learning_rate": 0.00029981925412302286, "loss": 4.7493, "step": 81210 }, { "epoch": 0.11412171976457654, "grad_norm": 0.6969353556632996, "learning_rate": 0.0002998189290247855, "loss": 4.8096, "step": 81220 }, { "epoch": 0.11413577070274011, "grad_norm": 0.7307901978492737, "learning_rate": 0.0002998185674621355, "loss": 4.582, "step": 81230 }, { "epoch": 0.11414982164090369, "grad_norm": 0.7377418875694275, "learning_rate": 0.00029981820553907944, "loss": 4.7947, "step": 81240 }, { "epoch": 0.11416387257906727, "grad_norm": 0.7029874324798584, "learning_rate": 0.00029981784325561856, "loss": 4.6826, "step": 81250 }, { "epoch": 0.11417792351723084, "grad_norm": 0.7139830589294434, "learning_rate": 0.00029981748061175343, "loss": 4.7922, "step": 81260 }, { "epoch": 0.11419197445539442, "grad_norm": 0.7318224906921387, "learning_rate": 0.00029981711760748513, "loss": 4.6779, "step": 81270 }, { "epoch": 0.114206025393558, "grad_norm": 0.7297080159187317, "learning_rate": 0.00029981675424281437, "loss": 4.7369, "step": 81280 }, { "epoch": 0.11422007633172157, "grad_norm": 0.7171475887298584, "learning_rate": 0.0002998163905177421, "loss": 4.8174, "step": 81290 }, { "epoch": 0.11423412726988515, "grad_norm": 0.710830569267273, "learning_rate": 0.0002998160264322693, "loss": 4.7841, "step": 81300 }, { "epoch": 0.11424817820804872, "grad_norm": 0.7462754845619202, "learning_rate": 0.0002998156619863966, "loss": 4.7658, "step": 81310 }, { "epoch": 0.11426222914621231, "grad_norm": 0.7007805705070496, "learning_rate": 0.0002998152971801251, "loss": 4.8825, "step": 81320 }, { "epoch": 0.11427628008437589, "grad_norm": 0.6981909275054932, "learning_rate": 0.0002998149320134556, "loss": 4.7546, "step": 81330 }, { "epoch": 0.11429033102253947, "grad_norm": 0.7107502818107605, "learning_rate": 0.0002998145664863889, "loss": 4.7203, "step": 81340 }, { "epoch": 0.11430438196070304, "grad_norm": 0.7196722030639648, "learning_rate": 0.000299814200598926, "loss": 4.7632, "step": 81350 }, { "epoch": 0.11431843289886662, "grad_norm": 0.7067022919654846, "learning_rate": 0.00029981383435106774, "loss": 4.7363, "step": 81360 }, { "epoch": 0.1143324838370302, "grad_norm": 0.7224134802818298, "learning_rate": 0.000299813467742815, "loss": 4.6321, "step": 81370 }, { "epoch": 0.11434653477519377, "grad_norm": 0.722435712814331, "learning_rate": 0.00029981310077416864, "loss": 4.7949, "step": 81380 }, { "epoch": 0.11436058571335735, "grad_norm": 0.7146986722946167, "learning_rate": 0.00029981273344512955, "loss": 4.7668, "step": 81390 }, { "epoch": 0.11437463665152092, "grad_norm": 0.7286592125892639, "learning_rate": 0.0002998123657556986, "loss": 4.7215, "step": 81400 }, { "epoch": 0.1143886875896845, "grad_norm": 0.7215551137924194, "learning_rate": 0.00029981199770587677, "loss": 4.7592, "step": 81410 }, { "epoch": 0.11440273852784807, "grad_norm": 0.737603485584259, "learning_rate": 0.0002998116292956648, "loss": 4.651, "step": 81420 }, { "epoch": 0.11441678946601166, "grad_norm": 0.7101167440414429, "learning_rate": 0.00029981126052506374, "loss": 4.8422, "step": 81430 }, { "epoch": 0.11443084040417524, "grad_norm": 0.7299492955207825, "learning_rate": 0.0002998108913940743, "loss": 4.885, "step": 81440 }, { "epoch": 0.11444489134233882, "grad_norm": 0.6973890662193298, "learning_rate": 0.00029981052190269745, "loss": 4.8612, "step": 81450 }, { "epoch": 0.1144589422805024, "grad_norm": 0.7072581648826599, "learning_rate": 0.0002998101520509341, "loss": 4.683, "step": 81460 }, { "epoch": 0.11447299321866597, "grad_norm": 0.7035022974014282, "learning_rate": 0.0002998097818387851, "loss": 4.6592, "step": 81470 }, { "epoch": 0.11448704415682955, "grad_norm": 0.7320752143859863, "learning_rate": 0.0002998094112662514, "loss": 4.6978, "step": 81480 }, { "epoch": 0.11450109509499312, "grad_norm": 0.6897929906845093, "learning_rate": 0.00029980904033333384, "loss": 4.9475, "step": 81490 }, { "epoch": 0.1145151460331567, "grad_norm": 0.6888123750686646, "learning_rate": 0.0002998086690400334, "loss": 4.6484, "step": 81500 }, { "epoch": 0.11452919697132027, "grad_norm": 0.7391230463981628, "learning_rate": 0.0002998082973863508, "loss": 4.7175, "step": 81510 }, { "epoch": 0.11454324790948385, "grad_norm": 0.7109723091125488, "learning_rate": 0.000299807925372287, "loss": 4.7042, "step": 81520 }, { "epoch": 0.11455729884764744, "grad_norm": 0.6957715749740601, "learning_rate": 0.000299807552997843, "loss": 4.7355, "step": 81530 }, { "epoch": 0.11457134978581102, "grad_norm": 0.7242121696472168, "learning_rate": 0.00029980718026301965, "loss": 4.8314, "step": 81540 }, { "epoch": 0.11458540072397459, "grad_norm": 0.7289703488349915, "learning_rate": 0.0002998068071678178, "loss": 4.709, "step": 81550 }, { "epoch": 0.11459945166213817, "grad_norm": 0.7176446914672852, "learning_rate": 0.00029980643371223833, "loss": 4.7353, "step": 81560 }, { "epoch": 0.11461350260030174, "grad_norm": 0.7373208403587341, "learning_rate": 0.0002998060598962822, "loss": 4.754, "step": 81570 }, { "epoch": 0.11462755353846532, "grad_norm": 0.7444453835487366, "learning_rate": 0.00029980568571995027, "loss": 4.6487, "step": 81580 }, { "epoch": 0.1146416044766289, "grad_norm": 0.7543371915817261, "learning_rate": 0.00029980531118324345, "loss": 4.8064, "step": 81590 }, { "epoch": 0.11465565541479247, "grad_norm": 2.069936513900757, "learning_rate": 0.0002998049362861627, "loss": 4.6117, "step": 81600 }, { "epoch": 0.11466970635295605, "grad_norm": 0.7130931615829468, "learning_rate": 0.0002998045610287088, "loss": 4.7338, "step": 81610 }, { "epoch": 0.11468375729111963, "grad_norm": 0.7223381400108337, "learning_rate": 0.0002998041854108828, "loss": 4.8274, "step": 81620 }, { "epoch": 0.11469780822928322, "grad_norm": 0.7344392538070679, "learning_rate": 0.0002998038094326855, "loss": 4.7107, "step": 81630 }, { "epoch": 0.11471185916744679, "grad_norm": 0.735009491443634, "learning_rate": 0.00029980343309411777, "loss": 4.7712, "step": 81640 }, { "epoch": 0.11472591010561037, "grad_norm": 0.7493471503257751, "learning_rate": 0.0002998030563951806, "loss": 4.692, "step": 81650 }, { "epoch": 0.11473996104377394, "grad_norm": 0.7441632747650146, "learning_rate": 0.0002998026793358749, "loss": 4.7691, "step": 81660 }, { "epoch": 0.11475401198193752, "grad_norm": 0.7013207674026489, "learning_rate": 0.00029980230191620157, "loss": 4.6536, "step": 81670 }, { "epoch": 0.1147680629201011, "grad_norm": 0.697334349155426, "learning_rate": 0.00029980192413616146, "loss": 4.6575, "step": 81680 }, { "epoch": 0.11478211385826467, "grad_norm": 0.6946876645088196, "learning_rate": 0.0002998015459957555, "loss": 4.7414, "step": 81690 }, { "epoch": 0.11479616479642825, "grad_norm": 0.7325066924095154, "learning_rate": 0.0002998011674949846, "loss": 4.7229, "step": 81700 }, { "epoch": 0.11481021573459183, "grad_norm": 0.6877426505088806, "learning_rate": 0.00029980078863384976, "loss": 4.7216, "step": 81710 }, { "epoch": 0.1148242666727554, "grad_norm": 0.7010763883590698, "learning_rate": 0.00029980040941235175, "loss": 4.77, "step": 81720 }, { "epoch": 0.11483831761091898, "grad_norm": 0.7128811478614807, "learning_rate": 0.00029980002983049156, "loss": 4.555, "step": 81730 }, { "epoch": 0.11485236854908257, "grad_norm": 0.6962786316871643, "learning_rate": 0.0002997996498882701, "loss": 4.699, "step": 81740 }, { "epoch": 0.11486641948724614, "grad_norm": 0.7425155639648438, "learning_rate": 0.0002997992695856883, "loss": 4.7556, "step": 81750 }, { "epoch": 0.11488047042540972, "grad_norm": 0.7460845112800598, "learning_rate": 0.000299798888922747, "loss": 4.7343, "step": 81760 }, { "epoch": 0.1148945213635733, "grad_norm": 0.7301716804504395, "learning_rate": 0.0002997985078994472, "loss": 4.8511, "step": 81770 }, { "epoch": 0.11490857230173687, "grad_norm": 0.7091270089149475, "learning_rate": 0.00029979812651578975, "loss": 4.6521, "step": 81780 }, { "epoch": 0.11492262323990045, "grad_norm": 0.7062439918518066, "learning_rate": 0.00029979774477177564, "loss": 4.7847, "step": 81790 }, { "epoch": 0.11493667417806402, "grad_norm": 0.6802224516868591, "learning_rate": 0.00029979736266740574, "loss": 4.7821, "step": 81800 }, { "epoch": 0.1149507251162276, "grad_norm": 0.7181935906410217, "learning_rate": 0.00029979698020268097, "loss": 4.7611, "step": 81810 }, { "epoch": 0.11496477605439118, "grad_norm": 0.6938358545303345, "learning_rate": 0.0002997965973776023, "loss": 4.7246, "step": 81820 }, { "epoch": 0.11497882699255475, "grad_norm": 0.6967886686325073, "learning_rate": 0.0002997962141921706, "loss": 4.7603, "step": 81830 }, { "epoch": 0.11499287793071834, "grad_norm": 0.7130746841430664, "learning_rate": 0.0002997958306463867, "loss": 4.7692, "step": 81840 }, { "epoch": 0.11500692886888192, "grad_norm": 0.7128005027770996, "learning_rate": 0.0002997954467402517, "loss": 4.7132, "step": 81850 }, { "epoch": 0.1150209798070455, "grad_norm": 0.7023301124572754, "learning_rate": 0.00029979506247376645, "loss": 4.7823, "step": 81860 }, { "epoch": 0.11503503074520907, "grad_norm": 0.7159565091133118, "learning_rate": 0.0002997946778469319, "loss": 4.7408, "step": 81870 }, { "epoch": 0.11504908168337265, "grad_norm": 0.6996265649795532, "learning_rate": 0.00029979429285974886, "loss": 4.681, "step": 81880 }, { "epoch": 0.11506313262153622, "grad_norm": 0.6762425899505615, "learning_rate": 0.0002997939075122184, "loss": 4.6677, "step": 81890 }, { "epoch": 0.1150771835596998, "grad_norm": 0.6953459978103638, "learning_rate": 0.0002997935218043414, "loss": 4.7948, "step": 81900 }, { "epoch": 0.11509123449786338, "grad_norm": 0.7166847586631775, "learning_rate": 0.00029979313573611876, "loss": 4.7171, "step": 81910 }, { "epoch": 0.11510528543602695, "grad_norm": 0.7089852094650269, "learning_rate": 0.0002997927493075514, "loss": 4.6225, "step": 81920 }, { "epoch": 0.11511933637419053, "grad_norm": 0.7163013815879822, "learning_rate": 0.00029979236251864033, "loss": 4.8264, "step": 81930 }, { "epoch": 0.11513338731235412, "grad_norm": 0.7131022810935974, "learning_rate": 0.0002997919753693864, "loss": 4.7408, "step": 81940 }, { "epoch": 0.1151474382505177, "grad_norm": 0.7089284658432007, "learning_rate": 0.0002997915878597905, "loss": 4.7225, "step": 81950 }, { "epoch": 0.11516148918868127, "grad_norm": 0.770504355430603, "learning_rate": 0.00029979119998985377, "loss": 4.5838, "step": 81960 }, { "epoch": 0.11517554012684485, "grad_norm": 0.7346895933151245, "learning_rate": 0.0002997908117595769, "loss": 4.6957, "step": 81970 }, { "epoch": 0.11518959106500842, "grad_norm": 0.7275788187980652, "learning_rate": 0.00029979042316896095, "loss": 4.7569, "step": 81980 }, { "epoch": 0.115203642003172, "grad_norm": 0.7080996036529541, "learning_rate": 0.0002997900342180068, "loss": 4.6806, "step": 81990 }, { "epoch": 0.11521769294133558, "grad_norm": 0.6918423175811768, "learning_rate": 0.0002997896449067155, "loss": 4.6623, "step": 82000 }, { "epoch": 0.11523174387949915, "grad_norm": 0.7253255844116211, "learning_rate": 0.00029978925523508785, "loss": 4.7897, "step": 82010 }, { "epoch": 0.11524579481766273, "grad_norm": 0.750601053237915, "learning_rate": 0.00029978886520312485, "loss": 4.7144, "step": 82020 }, { "epoch": 0.1152598457558263, "grad_norm": 0.7183839678764343, "learning_rate": 0.0002997884748108274, "loss": 4.8058, "step": 82030 }, { "epoch": 0.11527389669398988, "grad_norm": 0.7069101333618164, "learning_rate": 0.00029978808405819654, "loss": 4.6957, "step": 82040 }, { "epoch": 0.11528794763215347, "grad_norm": 0.7078718543052673, "learning_rate": 0.000299787692945233, "loss": 4.6645, "step": 82050 }, { "epoch": 0.11530199857031705, "grad_norm": 0.7066739797592163, "learning_rate": 0.000299787301471938, "loss": 4.7598, "step": 82060 }, { "epoch": 0.11531604950848062, "grad_norm": 0.7062867283821106, "learning_rate": 0.0002997869096383123, "loss": 4.6227, "step": 82070 }, { "epoch": 0.1153301004466442, "grad_norm": 0.686747670173645, "learning_rate": 0.00029978651744435686, "loss": 4.6724, "step": 82080 }, { "epoch": 0.11534415138480777, "grad_norm": 0.729067862033844, "learning_rate": 0.0002997861248900727, "loss": 4.7635, "step": 82090 }, { "epoch": 0.11535820232297135, "grad_norm": 0.7246768474578857, "learning_rate": 0.00029978573197546066, "loss": 4.662, "step": 82100 }, { "epoch": 0.11537225326113493, "grad_norm": 0.743782103061676, "learning_rate": 0.0002997853387005218, "loss": 4.8008, "step": 82110 }, { "epoch": 0.1153863041992985, "grad_norm": 0.6988118290901184, "learning_rate": 0.00029978494506525694, "loss": 4.8016, "step": 82120 }, { "epoch": 0.11540035513746208, "grad_norm": 0.7115128040313721, "learning_rate": 0.0002997845510696671, "loss": 4.6794, "step": 82130 }, { "epoch": 0.11541440607562566, "grad_norm": 0.7527329921722412, "learning_rate": 0.0002997841567137532, "loss": 4.7338, "step": 82140 }, { "epoch": 0.11542845701378925, "grad_norm": 0.738523006439209, "learning_rate": 0.0002997837619975162, "loss": 4.6762, "step": 82150 }, { "epoch": 0.11544250795195282, "grad_norm": 0.7189664244651794, "learning_rate": 0.00029978336692095713, "loss": 4.5602, "step": 82160 }, { "epoch": 0.1154565588901164, "grad_norm": 0.7112995386123657, "learning_rate": 0.00029978297148407684, "loss": 4.7785, "step": 82170 }, { "epoch": 0.11547060982827997, "grad_norm": 0.6832894682884216, "learning_rate": 0.00029978257568687627, "loss": 4.6804, "step": 82180 }, { "epoch": 0.11548466076644355, "grad_norm": 0.6907115578651428, "learning_rate": 0.00029978217952935645, "loss": 4.6355, "step": 82190 }, { "epoch": 0.11549871170460713, "grad_norm": 0.6926548480987549, "learning_rate": 0.00029978178301151825, "loss": 4.6094, "step": 82200 }, { "epoch": 0.1155127626427707, "grad_norm": 0.7217580676078796, "learning_rate": 0.0002997813861333627, "loss": 4.6527, "step": 82210 }, { "epoch": 0.11552681358093428, "grad_norm": 0.7224411368370056, "learning_rate": 0.0002997809888948907, "loss": 4.7685, "step": 82220 }, { "epoch": 0.11554086451909786, "grad_norm": 0.7286561131477356, "learning_rate": 0.00029978059129610326, "loss": 4.5463, "step": 82230 }, { "epoch": 0.11555491545726143, "grad_norm": 0.7249482870101929, "learning_rate": 0.0002997801933370013, "loss": 4.8454, "step": 82240 }, { "epoch": 0.11556896639542502, "grad_norm": 0.7178618907928467, "learning_rate": 0.00029977979501758574, "loss": 4.7182, "step": 82250 }, { "epoch": 0.1155830173335886, "grad_norm": 0.7384633421897888, "learning_rate": 0.00029977939633785757, "loss": 4.7321, "step": 82260 }, { "epoch": 0.11559706827175217, "grad_norm": 0.7196593284606934, "learning_rate": 0.0002997789972978178, "loss": 4.7556, "step": 82270 }, { "epoch": 0.11561111920991575, "grad_norm": 0.7066304087638855, "learning_rate": 0.0002997785978974674, "loss": 4.7843, "step": 82280 }, { "epoch": 0.11562517014807933, "grad_norm": 0.736116886138916, "learning_rate": 0.00029977819813680715, "loss": 4.8727, "step": 82290 }, { "epoch": 0.1156392210862429, "grad_norm": 0.708854079246521, "learning_rate": 0.00029977779801583824, "loss": 4.7171, "step": 82300 }, { "epoch": 0.11565327202440648, "grad_norm": 0.7159163355827332, "learning_rate": 0.00029977739753456146, "loss": 4.7331, "step": 82310 }, { "epoch": 0.11566732296257005, "grad_norm": 0.7191676497459412, "learning_rate": 0.0002997769966929779, "loss": 4.7406, "step": 82320 }, { "epoch": 0.11568137390073363, "grad_norm": 0.7123490571975708, "learning_rate": 0.0002997765954910885, "loss": 4.7577, "step": 82330 }, { "epoch": 0.1156954248388972, "grad_norm": 0.8625441193580627, "learning_rate": 0.0002997761939288941, "loss": 4.661, "step": 82340 }, { "epoch": 0.11570947577706078, "grad_norm": 0.6980680227279663, "learning_rate": 0.0002997757920063958, "loss": 4.7982, "step": 82350 }, { "epoch": 0.11572352671522437, "grad_norm": 0.7026267647743225, "learning_rate": 0.00029977538972359455, "loss": 4.6444, "step": 82360 }, { "epoch": 0.11573757765338795, "grad_norm": 0.7010255455970764, "learning_rate": 0.0002997749870804913, "loss": 4.7762, "step": 82370 }, { "epoch": 0.11575162859155153, "grad_norm": 0.7031188011169434, "learning_rate": 0.000299774584077087, "loss": 4.7665, "step": 82380 }, { "epoch": 0.1157656795297151, "grad_norm": 0.6966001391410828, "learning_rate": 0.0002997741807133827, "loss": 4.6379, "step": 82390 }, { "epoch": 0.11577973046787868, "grad_norm": 0.7564621567726135, "learning_rate": 0.00029977381737799296, "loss": 4.7131, "step": 82400 }, { "epoch": 0.11579378140604225, "grad_norm": 0.7084454298019409, "learning_rate": 0.0002997734133297211, "loss": 4.6699, "step": 82410 }, { "epoch": 0.11580783234420583, "grad_norm": 0.6875171661376953, "learning_rate": 0.0002997730089211521, "loss": 4.7788, "step": 82420 }, { "epoch": 0.1158218832823694, "grad_norm": 0.706990122795105, "learning_rate": 0.00029977260415228675, "loss": 4.6794, "step": 82430 }, { "epoch": 0.11583593422053298, "grad_norm": 0.6801334023475647, "learning_rate": 0.0002997721990231261, "loss": 4.6598, "step": 82440 }, { "epoch": 0.11584998515869656, "grad_norm": 0.7689122557640076, "learning_rate": 0.0002997717935336711, "loss": 4.5744, "step": 82450 }, { "epoch": 0.11586403609686015, "grad_norm": 0.7127279043197632, "learning_rate": 0.00029977138768392283, "loss": 4.6187, "step": 82460 }, { "epoch": 0.11587808703502372, "grad_norm": 0.7756818532943726, "learning_rate": 0.0002997709814738821, "loss": 4.7612, "step": 82470 }, { "epoch": 0.1158921379731873, "grad_norm": 0.7288581132888794, "learning_rate": 0.00029977057490355004, "loss": 4.7149, "step": 82480 }, { "epoch": 0.11590618891135088, "grad_norm": 0.7076622843742371, "learning_rate": 0.0002997701679729275, "loss": 4.6853, "step": 82490 }, { "epoch": 0.11592023984951445, "grad_norm": 0.7319123148918152, "learning_rate": 0.00029976976068201556, "loss": 4.5778, "step": 82500 }, { "epoch": 0.11593429078767803, "grad_norm": 0.719047486782074, "learning_rate": 0.0002997693530308151, "loss": 4.6536, "step": 82510 }, { "epoch": 0.1159483417258416, "grad_norm": 0.7204477190971375, "learning_rate": 0.00029976894501932723, "loss": 4.7233, "step": 82520 }, { "epoch": 0.11596239266400518, "grad_norm": 0.7214565277099609, "learning_rate": 0.0002997685366475528, "loss": 4.7507, "step": 82530 }, { "epoch": 0.11597644360216876, "grad_norm": 0.7362378239631653, "learning_rate": 0.0002997681279154929, "loss": 4.8398, "step": 82540 }, { "epoch": 0.11599049454033233, "grad_norm": 0.7328546643257141, "learning_rate": 0.0002997677188231484, "loss": 4.7098, "step": 82550 }, { "epoch": 0.11600454547849592, "grad_norm": 0.7075091004371643, "learning_rate": 0.0002997673093705204, "loss": 4.7569, "step": 82560 }, { "epoch": 0.1160185964166595, "grad_norm": 0.7006434202194214, "learning_rate": 0.00029976689955760985, "loss": 4.725, "step": 82570 }, { "epoch": 0.11603264735482308, "grad_norm": 0.70038241147995, "learning_rate": 0.0002997664893844177, "loss": 4.6548, "step": 82580 }, { "epoch": 0.11604669829298665, "grad_norm": 0.7156614065170288, "learning_rate": 0.00029976607885094495, "loss": 4.7975, "step": 82590 }, { "epoch": 0.11606074923115023, "grad_norm": 0.7034133076667786, "learning_rate": 0.0002997656679571926, "loss": 4.7715, "step": 82600 }, { "epoch": 0.1160748001693138, "grad_norm": 0.7613126635551453, "learning_rate": 0.0002997652567031616, "loss": 4.8339, "step": 82610 }, { "epoch": 0.11608885110747738, "grad_norm": 0.7240241765975952, "learning_rate": 0.000299764845088853, "loss": 4.6923, "step": 82620 }, { "epoch": 0.11610290204564096, "grad_norm": 0.6997239589691162, "learning_rate": 0.00029976443311426776, "loss": 4.7306, "step": 82630 }, { "epoch": 0.11611695298380453, "grad_norm": 0.7083427309989929, "learning_rate": 0.0002997640207794069, "loss": 4.6962, "step": 82640 }, { "epoch": 0.11613100392196811, "grad_norm": 0.7667652368545532, "learning_rate": 0.00029976360808427134, "loss": 4.6436, "step": 82650 }, { "epoch": 0.11614505486013169, "grad_norm": 0.6891388297080994, "learning_rate": 0.0002997631950288622, "loss": 4.6205, "step": 82660 }, { "epoch": 0.11615910579829528, "grad_norm": 0.7161304354667664, "learning_rate": 0.00029976278161318034, "loss": 4.7486, "step": 82670 }, { "epoch": 0.11617315673645885, "grad_norm": 0.715480625629425, "learning_rate": 0.00029976236783722674, "loss": 4.6952, "step": 82680 }, { "epoch": 0.11618720767462243, "grad_norm": 0.7926939725875854, "learning_rate": 0.00029976195370100256, "loss": 4.7267, "step": 82690 }, { "epoch": 0.116201258612786, "grad_norm": 0.7394651770591736, "learning_rate": 0.00029976153920450863, "loss": 4.6537, "step": 82700 }, { "epoch": 0.11621530955094958, "grad_norm": 0.7032037973403931, "learning_rate": 0.0002997611243477461, "loss": 4.8165, "step": 82710 }, { "epoch": 0.11622936048911316, "grad_norm": 0.7072632908821106, "learning_rate": 0.00029976070913071583, "loss": 4.6947, "step": 82720 }, { "epoch": 0.11624341142727673, "grad_norm": 0.7013282179832458, "learning_rate": 0.0002997602935534188, "loss": 4.6732, "step": 82730 }, { "epoch": 0.11625746236544031, "grad_norm": 0.7069587111473083, "learning_rate": 0.0002997598776158562, "loss": 4.6504, "step": 82740 }, { "epoch": 0.11627151330360389, "grad_norm": 0.7177491188049316, "learning_rate": 0.0002997594613180289, "loss": 4.5903, "step": 82750 }, { "epoch": 0.11628556424176746, "grad_norm": 0.7011220455169678, "learning_rate": 0.0002997590446599379, "loss": 4.8592, "step": 82760 }, { "epoch": 0.11629961517993105, "grad_norm": 0.7004333138465881, "learning_rate": 0.00029975862764158417, "loss": 4.8258, "step": 82770 }, { "epoch": 0.11631366611809463, "grad_norm": 0.7188377976417542, "learning_rate": 0.0002997582102629688, "loss": 4.7412, "step": 82780 }, { "epoch": 0.1163277170562582, "grad_norm": 0.7212134003639221, "learning_rate": 0.00029975779252409274, "loss": 4.7791, "step": 82790 }, { "epoch": 0.11634176799442178, "grad_norm": 0.7132065892219543, "learning_rate": 0.00029975737442495696, "loss": 4.8067, "step": 82800 }, { "epoch": 0.11635581893258536, "grad_norm": 0.7281680107116699, "learning_rate": 0.0002997569559655626, "loss": 4.7152, "step": 82810 }, { "epoch": 0.11636986987074893, "grad_norm": 0.6972888708114624, "learning_rate": 0.0002997565371459105, "loss": 4.7821, "step": 82820 }, { "epoch": 0.11638392080891251, "grad_norm": 1.2375811338424683, "learning_rate": 0.0002997561179660018, "loss": 4.7507, "step": 82830 }, { "epoch": 0.11639797174707608, "grad_norm": 0.6999583840370178, "learning_rate": 0.0002997556984258375, "loss": 4.7406, "step": 82840 }, { "epoch": 0.11641202268523966, "grad_norm": 0.7113127708435059, "learning_rate": 0.0002997552785254185, "loss": 4.7202, "step": 82850 }, { "epoch": 0.11642607362340324, "grad_norm": 0.6991801857948303, "learning_rate": 0.0002997548582647459, "loss": 4.6655, "step": 82860 }, { "epoch": 0.11644012456156683, "grad_norm": 0.7328364849090576, "learning_rate": 0.00029975443764382066, "loss": 4.7223, "step": 82870 }, { "epoch": 0.1164541754997304, "grad_norm": 0.7123426795005798, "learning_rate": 0.0002997540166626438, "loss": 4.6433, "step": 82880 }, { "epoch": 0.11646822643789398, "grad_norm": 0.7378365397453308, "learning_rate": 0.0002997535953212164, "loss": 4.8043, "step": 82890 }, { "epoch": 0.11648227737605756, "grad_norm": 0.7234169840812683, "learning_rate": 0.0002997531736195394, "loss": 4.726, "step": 82900 }, { "epoch": 0.11649632831422113, "grad_norm": 0.709543764591217, "learning_rate": 0.0002997527515576139, "loss": 4.6436, "step": 82910 }, { "epoch": 0.11651037925238471, "grad_norm": 0.7029217481613159, "learning_rate": 0.0002997523291354408, "loss": 4.7416, "step": 82920 }, { "epoch": 0.11652443019054828, "grad_norm": 0.7221389412879944, "learning_rate": 0.0002997519063530212, "loss": 4.8052, "step": 82930 }, { "epoch": 0.11653848112871186, "grad_norm": 0.7333707809448242, "learning_rate": 0.00029975148321035607, "loss": 4.6743, "step": 82940 }, { "epoch": 0.11655253206687544, "grad_norm": 0.712275505065918, "learning_rate": 0.00029975105970744646, "loss": 4.7026, "step": 82950 }, { "epoch": 0.11656658300503901, "grad_norm": 0.7303036451339722, "learning_rate": 0.00029975063584429334, "loss": 4.7575, "step": 82960 }, { "epoch": 0.11658063394320259, "grad_norm": 0.7000608444213867, "learning_rate": 0.00029975021162089783, "loss": 4.8079, "step": 82970 }, { "epoch": 0.11659468488136618, "grad_norm": 0.7333692312240601, "learning_rate": 0.0002997497870372608, "loss": 4.5493, "step": 82980 }, { "epoch": 0.11660873581952975, "grad_norm": 0.7260574102401733, "learning_rate": 0.0002997493620933834, "loss": 4.6226, "step": 82990 }, { "epoch": 0.11662278675769333, "grad_norm": 0.712442934513092, "learning_rate": 0.0002997489367892666, "loss": 4.8145, "step": 83000 }, { "epoch": 0.11663683769585691, "grad_norm": 0.7070791125297546, "learning_rate": 0.00029974851112491146, "loss": 4.6327, "step": 83010 }, { "epoch": 0.11665088863402048, "grad_norm": 0.7452596426010132, "learning_rate": 0.0002997480851003189, "loss": 4.6218, "step": 83020 }, { "epoch": 0.11666493957218406, "grad_norm": 0.7247107625007629, "learning_rate": 0.0002997476587154901, "loss": 4.7157, "step": 83030 }, { "epoch": 0.11667899051034764, "grad_norm": 0.7191839814186096, "learning_rate": 0.000299747231970426, "loss": 4.5815, "step": 83040 }, { "epoch": 0.11669304144851121, "grad_norm": 0.7884148955345154, "learning_rate": 0.0002997468048651276, "loss": 4.5333, "step": 83050 }, { "epoch": 0.11670709238667479, "grad_norm": 0.7189491987228394, "learning_rate": 0.00029974637739959594, "loss": 4.7521, "step": 83060 }, { "epoch": 0.11672114332483836, "grad_norm": 0.7118692994117737, "learning_rate": 0.00029974594957383213, "loss": 4.7522, "step": 83070 }, { "epoch": 0.11673519426300195, "grad_norm": 0.7117485404014587, "learning_rate": 0.0002997455213878371, "loss": 4.76, "step": 83080 }, { "epoch": 0.11674924520116553, "grad_norm": 0.7011753916740417, "learning_rate": 0.00029974509284161197, "loss": 4.6493, "step": 83090 }, { "epoch": 0.1167632961393291, "grad_norm": 0.7218371033668518, "learning_rate": 0.0002997446639351576, "loss": 4.6864, "step": 83100 }, { "epoch": 0.11677734707749268, "grad_norm": 0.7155550718307495, "learning_rate": 0.0002997442346684753, "loss": 4.7834, "step": 83110 }, { "epoch": 0.11679139801565626, "grad_norm": 0.7008214592933655, "learning_rate": 0.00029974380504156585, "loss": 4.6983, "step": 83120 }, { "epoch": 0.11680544895381983, "grad_norm": 0.697800874710083, "learning_rate": 0.0002997433750544304, "loss": 4.7975, "step": 83130 }, { "epoch": 0.11681949989198341, "grad_norm": 0.7439045310020447, "learning_rate": 0.00029974294470706993, "loss": 4.7438, "step": 83140 }, { "epoch": 0.11683355083014699, "grad_norm": 0.7132053971290588, "learning_rate": 0.00029974251399948553, "loss": 4.7585, "step": 83150 }, { "epoch": 0.11684760176831056, "grad_norm": 0.6998278498649597, "learning_rate": 0.0002997420829316782, "loss": 4.7226, "step": 83160 }, { "epoch": 0.11686165270647414, "grad_norm": 0.6966537833213806, "learning_rate": 0.000299741651503649, "loss": 4.6966, "step": 83170 }, { "epoch": 0.11687570364463773, "grad_norm": 0.6955662965774536, "learning_rate": 0.00029974121971539894, "loss": 4.6016, "step": 83180 }, { "epoch": 0.1168897545828013, "grad_norm": 0.6958626508712769, "learning_rate": 0.0002997407875669291, "loss": 4.6607, "step": 83190 }, { "epoch": 0.11690380552096488, "grad_norm": 0.7100691199302673, "learning_rate": 0.00029974035505824046, "loss": 4.7568, "step": 83200 }, { "epoch": 0.11691785645912846, "grad_norm": 0.7105290293693542, "learning_rate": 0.00029973992218933416, "loss": 4.8082, "step": 83210 }, { "epoch": 0.11693190739729203, "grad_norm": 0.7148688435554504, "learning_rate": 0.00029973948896021117, "loss": 4.6506, "step": 83220 }, { "epoch": 0.11694595833545561, "grad_norm": 0.7135083079338074, "learning_rate": 0.0002997390553708725, "loss": 4.7098, "step": 83230 }, { "epoch": 0.11696000927361919, "grad_norm": 0.6871196627616882, "learning_rate": 0.0002997386214213192, "loss": 4.7369, "step": 83240 }, { "epoch": 0.11697406021178276, "grad_norm": 0.7220321297645569, "learning_rate": 0.00029973818711155246, "loss": 4.7927, "step": 83250 }, { "epoch": 0.11698811114994634, "grad_norm": 0.6984358429908752, "learning_rate": 0.0002997377524415731, "loss": 4.7761, "step": 83260 }, { "epoch": 0.11700216208810992, "grad_norm": 0.7106263637542725, "learning_rate": 0.00029973731741138234, "loss": 4.7305, "step": 83270 }, { "epoch": 0.11701621302627349, "grad_norm": 0.715644121170044, "learning_rate": 0.00029973688202098115, "loss": 4.781, "step": 83280 }, { "epoch": 0.11703026396443708, "grad_norm": 0.7058255076408386, "learning_rate": 0.0002997364462703706, "loss": 4.6378, "step": 83290 }, { "epoch": 0.11704431490260066, "grad_norm": 0.6990760564804077, "learning_rate": 0.0002997360101595517, "loss": 4.7514, "step": 83300 }, { "epoch": 0.11705836584076423, "grad_norm": 0.7147160172462463, "learning_rate": 0.00029973557368852553, "loss": 4.6166, "step": 83310 }, { "epoch": 0.11707241677892781, "grad_norm": 0.712631344795227, "learning_rate": 0.00029973513685729316, "loss": 4.6634, "step": 83320 }, { "epoch": 0.11708646771709139, "grad_norm": 0.7080478668212891, "learning_rate": 0.0002997346996658556, "loss": 4.6785, "step": 83330 }, { "epoch": 0.11710051865525496, "grad_norm": 0.6984758973121643, "learning_rate": 0.0002997342621142139, "loss": 4.7836, "step": 83340 }, { "epoch": 0.11711456959341854, "grad_norm": 0.7084048986434937, "learning_rate": 0.00029973382420236917, "loss": 4.8846, "step": 83350 }, { "epoch": 0.11712862053158211, "grad_norm": 0.7941184043884277, "learning_rate": 0.0002997333859303225, "loss": 4.5988, "step": 83360 }, { "epoch": 0.11714267146974569, "grad_norm": 0.7254170775413513, "learning_rate": 0.0002997329472980747, "loss": 4.7335, "step": 83370 }, { "epoch": 0.11715672240790927, "grad_norm": 0.7091358304023743, "learning_rate": 0.0002997325083056271, "loss": 4.7572, "step": 83380 }, { "epoch": 0.11717077334607286, "grad_norm": 0.7274218797683716, "learning_rate": 0.0002997320689529807, "loss": 4.6113, "step": 83390 }, { "epoch": 0.11718482428423643, "grad_norm": 0.699985146522522, "learning_rate": 0.0002997316292401364, "loss": 4.7306, "step": 83400 }, { "epoch": 0.11719887522240001, "grad_norm": 0.7258591055870056, "learning_rate": 0.0002997311891670954, "loss": 4.7404, "step": 83410 }, { "epoch": 0.11721292616056359, "grad_norm": 0.7040428519248962, "learning_rate": 0.00029973074873385877, "loss": 4.8231, "step": 83420 }, { "epoch": 0.11722697709872716, "grad_norm": 0.7231871485710144, "learning_rate": 0.0002997303079404275, "loss": 4.7028, "step": 83430 }, { "epoch": 0.11724102803689074, "grad_norm": 0.701480507850647, "learning_rate": 0.0002997298667868027, "loss": 4.9066, "step": 83440 }, { "epoch": 0.11725507897505431, "grad_norm": 0.739446759223938, "learning_rate": 0.0002997294252729854, "loss": 4.8486, "step": 83450 }, { "epoch": 0.11726912991321789, "grad_norm": 0.7062489986419678, "learning_rate": 0.00029972898339897664, "loss": 4.7646, "step": 83460 }, { "epoch": 0.11728318085138147, "grad_norm": 0.7041696906089783, "learning_rate": 0.0002997285411647775, "loss": 4.7471, "step": 83470 }, { "epoch": 0.11729723178954504, "grad_norm": 0.7124684453010559, "learning_rate": 0.00029972809857038913, "loss": 4.719, "step": 83480 }, { "epoch": 0.11731128272770863, "grad_norm": 0.7017519474029541, "learning_rate": 0.0002997276556158124, "loss": 4.6908, "step": 83490 }, { "epoch": 0.11732533366587221, "grad_norm": 0.7156013250350952, "learning_rate": 0.0002997272123010486, "loss": 4.7906, "step": 83500 }, { "epoch": 0.11733938460403578, "grad_norm": 0.715263307094574, "learning_rate": 0.0002997267686260987, "loss": 4.6222, "step": 83510 }, { "epoch": 0.11735343554219936, "grad_norm": 0.7254456877708435, "learning_rate": 0.0002997263245909637, "loss": 4.585, "step": 83520 }, { "epoch": 0.11736748648036294, "grad_norm": 0.7293530106544495, "learning_rate": 0.00029972588019564476, "loss": 4.6986, "step": 83530 }, { "epoch": 0.11738153741852651, "grad_norm": 0.7089353799819946, "learning_rate": 0.00029972543544014285, "loss": 4.6565, "step": 83540 }, { "epoch": 0.11739558835669009, "grad_norm": 0.7219105958938599, "learning_rate": 0.0002997249903244592, "loss": 4.6934, "step": 83550 }, { "epoch": 0.11740963929485367, "grad_norm": 0.7276551723480225, "learning_rate": 0.00029972454484859475, "loss": 4.6889, "step": 83560 }, { "epoch": 0.11742369023301724, "grad_norm": 0.6939069628715515, "learning_rate": 0.00029972409901255056, "loss": 4.7927, "step": 83570 }, { "epoch": 0.11743774117118082, "grad_norm": 0.7165120244026184, "learning_rate": 0.0002997236528163278, "loss": 4.757, "step": 83580 }, { "epoch": 0.1174517921093444, "grad_norm": 0.774430513381958, "learning_rate": 0.00029972320625992754, "loss": 4.657, "step": 83590 }, { "epoch": 0.11746584304750798, "grad_norm": 0.7265297770500183, "learning_rate": 0.00029972275934335077, "loss": 4.6596, "step": 83600 }, { "epoch": 0.11747989398567156, "grad_norm": 0.7017237544059753, "learning_rate": 0.00029972231206659857, "loss": 4.7227, "step": 83610 }, { "epoch": 0.11749394492383514, "grad_norm": 0.703671395778656, "learning_rate": 0.00029972186442967213, "loss": 4.6661, "step": 83620 }, { "epoch": 0.11750799586199871, "grad_norm": 0.7623944878578186, "learning_rate": 0.00029972141643257237, "loss": 4.6292, "step": 83630 }, { "epoch": 0.11752204680016229, "grad_norm": 0.73209547996521, "learning_rate": 0.00029972096807530043, "loss": 4.6943, "step": 83640 }, { "epoch": 0.11753609773832586, "grad_norm": 0.7071471810340881, "learning_rate": 0.0002997205193578575, "loss": 4.6732, "step": 83650 }, { "epoch": 0.11755014867648944, "grad_norm": 0.7387067675590515, "learning_rate": 0.00029972007028024447, "loss": 4.6494, "step": 83660 }, { "epoch": 0.11756419961465302, "grad_norm": 0.7155556678771973, "learning_rate": 0.00029971962084246256, "loss": 4.6376, "step": 83670 }, { "epoch": 0.1175782505528166, "grad_norm": 0.7402138113975525, "learning_rate": 0.0002997191710445128, "loss": 4.6761, "step": 83680 }, { "epoch": 0.11759230149098017, "grad_norm": 0.7761551141738892, "learning_rate": 0.0002997187208863963, "loss": 4.6724, "step": 83690 }, { "epoch": 0.11760635242914376, "grad_norm": 0.7482668161392212, "learning_rate": 0.0002997182703681141, "loss": 4.7362, "step": 83700 }, { "epoch": 0.11762040336730734, "grad_norm": 0.7007404565811157, "learning_rate": 0.00029971781948966733, "loss": 4.8267, "step": 83710 }, { "epoch": 0.11763445430547091, "grad_norm": 0.6935082077980042, "learning_rate": 0.000299717368251057, "loss": 4.6847, "step": 83720 }, { "epoch": 0.11764850524363449, "grad_norm": 0.7338686585426331, "learning_rate": 0.00029971691665228424, "loss": 4.8848, "step": 83730 }, { "epoch": 0.11766255618179806, "grad_norm": 0.7095941305160522, "learning_rate": 0.00029971646469335015, "loss": 4.7018, "step": 83740 }, { "epoch": 0.11767660711996164, "grad_norm": 0.7719713449478149, "learning_rate": 0.00029971601237425586, "loss": 4.6187, "step": 83750 }, { "epoch": 0.11769065805812522, "grad_norm": 0.7228941321372986, "learning_rate": 0.00029971555969500235, "loss": 4.6907, "step": 83760 }, { "epoch": 0.11770470899628879, "grad_norm": 0.7366059422492981, "learning_rate": 0.0002997151066555908, "loss": 4.7233, "step": 83770 }, { "epoch": 0.11771875993445237, "grad_norm": 0.8631296157836914, "learning_rate": 0.0002997146532560222, "loss": 4.7148, "step": 83780 }, { "epoch": 0.11773281087261595, "grad_norm": 0.7031307816505432, "learning_rate": 0.00029971419949629776, "loss": 4.7728, "step": 83790 }, { "epoch": 0.11774686181077954, "grad_norm": 0.7433533668518066, "learning_rate": 0.00029971374537641853, "loss": 4.6781, "step": 83800 }, { "epoch": 0.11776091274894311, "grad_norm": 0.760563850402832, "learning_rate": 0.00029971329089638555, "loss": 4.7035, "step": 83810 }, { "epoch": 0.11777496368710669, "grad_norm": 0.7019788026809692, "learning_rate": 0.00029971283605619996, "loss": 4.6762, "step": 83820 }, { "epoch": 0.11778901462527026, "grad_norm": 0.6909261345863342, "learning_rate": 0.00029971238085586284, "loss": 4.6912, "step": 83830 }, { "epoch": 0.11780306556343384, "grad_norm": 0.7148104310035706, "learning_rate": 0.00029971192529537534, "loss": 4.7071, "step": 83840 }, { "epoch": 0.11781711650159742, "grad_norm": 0.7196040749549866, "learning_rate": 0.00029971146937473844, "loss": 4.7632, "step": 83850 }, { "epoch": 0.11783116743976099, "grad_norm": 0.7324225902557373, "learning_rate": 0.0002997110130939533, "loss": 4.5944, "step": 83860 }, { "epoch": 0.11784521837792457, "grad_norm": 0.7406251430511475, "learning_rate": 0.00029971055645302115, "loss": 4.7681, "step": 83870 }, { "epoch": 0.11785926931608814, "grad_norm": 0.6971359252929688, "learning_rate": 0.00029971009945194284, "loss": 4.5862, "step": 83880 }, { "epoch": 0.11787332025425172, "grad_norm": 0.7004740834236145, "learning_rate": 0.00029970964209071957, "loss": 4.6547, "step": 83890 }, { "epoch": 0.1178873711924153, "grad_norm": 0.736107349395752, "learning_rate": 0.0002997091843693525, "loss": 4.5973, "step": 83900 }, { "epoch": 0.11790142213057889, "grad_norm": 0.7069373726844788, "learning_rate": 0.00029970872628784273, "loss": 4.6779, "step": 83910 }, { "epoch": 0.11791547306874246, "grad_norm": 0.7382087707519531, "learning_rate": 0.0002997082678461913, "loss": 4.5928, "step": 83920 }, { "epoch": 0.11792952400690604, "grad_norm": 0.7047857642173767, "learning_rate": 0.0002997078090443993, "loss": 4.7603, "step": 83930 }, { "epoch": 0.11794357494506962, "grad_norm": 0.7913258075714111, "learning_rate": 0.0002997073498824679, "loss": 4.5967, "step": 83940 }, { "epoch": 0.11795762588323319, "grad_norm": 0.7597509026527405, "learning_rate": 0.0002997068903603982, "loss": 4.6874, "step": 83950 }, { "epoch": 0.11797167682139677, "grad_norm": 0.7429311871528625, "learning_rate": 0.00029970643047819124, "loss": 4.6269, "step": 83960 }, { "epoch": 0.11798572775956034, "grad_norm": 0.7132638692855835, "learning_rate": 0.00029970597023584814, "loss": 4.6915, "step": 83970 }, { "epoch": 0.11799977869772392, "grad_norm": 0.7404999136924744, "learning_rate": 0.0002997055096333701, "loss": 4.7002, "step": 83980 }, { "epoch": 0.1180138296358875, "grad_norm": 0.7566960453987122, "learning_rate": 0.0002997050486707581, "loss": 4.6367, "step": 83990 }, { "epoch": 0.11802788057405107, "grad_norm": 0.7191115617752075, "learning_rate": 0.0002997045873480134, "loss": 4.6247, "step": 84000 }, { "epoch": 0.11804193151221466, "grad_norm": 0.7127372026443481, "learning_rate": 0.0002997041256651369, "loss": 4.5988, "step": 84010 }, { "epoch": 0.11805598245037824, "grad_norm": 0.7138316035270691, "learning_rate": 0.0002997036636221299, "loss": 4.694, "step": 84020 }, { "epoch": 0.11807003338854181, "grad_norm": 0.7099950313568115, "learning_rate": 0.0002997032012189934, "loss": 4.7058, "step": 84030 }, { "epoch": 0.11808408432670539, "grad_norm": 0.7554578185081482, "learning_rate": 0.00029970273845572864, "loss": 4.6664, "step": 84040 }, { "epoch": 0.11809813526486897, "grad_norm": 0.7338352799415588, "learning_rate": 0.00029970227533233657, "loss": 4.6666, "step": 84050 }, { "epoch": 0.11811218620303254, "grad_norm": 0.7148082852363586, "learning_rate": 0.0002997018118488184, "loss": 4.6799, "step": 84060 }, { "epoch": 0.11812623714119612, "grad_norm": 0.7274507284164429, "learning_rate": 0.0002997013480051753, "loss": 4.6302, "step": 84070 }, { "epoch": 0.1181402880793597, "grad_norm": 0.7034341096878052, "learning_rate": 0.0002997008838014082, "loss": 4.743, "step": 84080 }, { "epoch": 0.11815433901752327, "grad_norm": 0.7157617807388306, "learning_rate": 0.00029970041923751837, "loss": 4.6052, "step": 84090 }, { "epoch": 0.11816838995568685, "grad_norm": 0.8156205415725708, "learning_rate": 0.00029969995431350686, "loss": 4.7544, "step": 84100 }, { "epoch": 0.11818244089385044, "grad_norm": 0.7375916838645935, "learning_rate": 0.0002996994890293749, "loss": 4.7907, "step": 84110 }, { "epoch": 0.11819649183201401, "grad_norm": 0.6925612092018127, "learning_rate": 0.00029969902338512343, "loss": 4.7709, "step": 84120 }, { "epoch": 0.11821054277017759, "grad_norm": 0.7296881079673767, "learning_rate": 0.0002996985573807537, "loss": 4.7123, "step": 84130 }, { "epoch": 0.11822459370834117, "grad_norm": 0.717957079410553, "learning_rate": 0.0002996980910162668, "loss": 4.6673, "step": 84140 }, { "epoch": 0.11823864464650474, "grad_norm": 0.6915928721427917, "learning_rate": 0.0002996976242916638, "loss": 4.6409, "step": 84150 }, { "epoch": 0.11825269558466832, "grad_norm": 0.69838547706604, "learning_rate": 0.00029969715720694595, "loss": 4.8507, "step": 84160 }, { "epoch": 0.1182667465228319, "grad_norm": 0.701752245426178, "learning_rate": 0.0002996966897621143, "loss": 4.5703, "step": 84170 }, { "epoch": 0.11828079746099547, "grad_norm": 0.7115376591682434, "learning_rate": 0.0002996962219571699, "loss": 4.7654, "step": 84180 }, { "epoch": 0.11829484839915905, "grad_norm": 0.7013298869132996, "learning_rate": 0.00029969575379211396, "loss": 4.6597, "step": 84190 }, { "epoch": 0.11830889933732262, "grad_norm": 0.7234477996826172, "learning_rate": 0.0002996952852669476, "loss": 4.6087, "step": 84200 }, { "epoch": 0.1183229502754862, "grad_norm": 0.6927001476287842, "learning_rate": 0.00029969481638167193, "loss": 4.622, "step": 84210 }, { "epoch": 0.11833700121364979, "grad_norm": 0.7288459539413452, "learning_rate": 0.00029969434713628807, "loss": 4.6142, "step": 84220 }, { "epoch": 0.11835105215181337, "grad_norm": 0.7043154239654541, "learning_rate": 0.0002996938775307972, "loss": 4.6994, "step": 84230 }, { "epoch": 0.11836510308997694, "grad_norm": 0.7041778564453125, "learning_rate": 0.0002996934075652004, "loss": 4.6914, "step": 84240 }, { "epoch": 0.11837915402814052, "grad_norm": 0.7131189703941345, "learning_rate": 0.00029969293723949885, "loss": 4.6957, "step": 84250 }, { "epoch": 0.1183932049663041, "grad_norm": 0.7089633345603943, "learning_rate": 0.0002996924665536936, "loss": 4.7275, "step": 84260 }, { "epoch": 0.11840725590446767, "grad_norm": 0.7021405696868896, "learning_rate": 0.00029969199550778584, "loss": 4.7756, "step": 84270 }, { "epoch": 0.11842130684263125, "grad_norm": 0.706847071647644, "learning_rate": 0.00029969152410177667, "loss": 4.792, "step": 84280 }, { "epoch": 0.11843535778079482, "grad_norm": 0.7018424272537231, "learning_rate": 0.0002996910523356672, "loss": 4.6697, "step": 84290 }, { "epoch": 0.1184494087189584, "grad_norm": 0.7254888415336609, "learning_rate": 0.0002996905802094587, "loss": 4.81, "step": 84300 }, { "epoch": 0.11846345965712198, "grad_norm": 0.7102382779121399, "learning_rate": 0.0002996901077231522, "loss": 4.6677, "step": 84310 }, { "epoch": 0.11847751059528557, "grad_norm": 0.6817678213119507, "learning_rate": 0.00029968963487674884, "loss": 4.6627, "step": 84320 }, { "epoch": 0.11849156153344914, "grad_norm": 0.7572276592254639, "learning_rate": 0.00029968916167024973, "loss": 4.8695, "step": 84330 }, { "epoch": 0.11850561247161272, "grad_norm": 0.7169764041900635, "learning_rate": 0.00029968868810365613, "loss": 4.6564, "step": 84340 }, { "epoch": 0.1185196634097763, "grad_norm": 0.708909273147583, "learning_rate": 0.000299688214176969, "loss": 4.5852, "step": 84350 }, { "epoch": 0.11853371434793987, "grad_norm": 0.7388038039207458, "learning_rate": 0.0002996877398901896, "loss": 4.7354, "step": 84360 }, { "epoch": 0.11854776528610345, "grad_norm": 0.7097702622413635, "learning_rate": 0.00029968726524331915, "loss": 4.7295, "step": 84370 }, { "epoch": 0.11856181622426702, "grad_norm": 0.7070819735527039, "learning_rate": 0.00029968679023635857, "loss": 4.6223, "step": 84380 }, { "epoch": 0.1185758671624306, "grad_norm": 0.7111035585403442, "learning_rate": 0.0002996863148693092, "loss": 4.5126, "step": 84390 }, { "epoch": 0.11858991810059417, "grad_norm": 0.7140163779258728, "learning_rate": 0.00029968583914217204, "loss": 4.7595, "step": 84400 }, { "epoch": 0.11860396903875775, "grad_norm": 0.7231415510177612, "learning_rate": 0.00029968536305494836, "loss": 4.7312, "step": 84410 }, { "epoch": 0.11861801997692134, "grad_norm": 0.74226975440979, "learning_rate": 0.00029968488660763923, "loss": 4.6488, "step": 84420 }, { "epoch": 0.11863207091508492, "grad_norm": 0.7267938852310181, "learning_rate": 0.0002996844098002458, "loss": 4.5626, "step": 84430 }, { "epoch": 0.11864612185324849, "grad_norm": 0.7126814126968384, "learning_rate": 0.00029968393263276927, "loss": 4.6538, "step": 84440 }, { "epoch": 0.11866017279141207, "grad_norm": 0.7250803709030151, "learning_rate": 0.0002996834551052107, "loss": 4.6957, "step": 84450 }, { "epoch": 0.11867422372957565, "grad_norm": 0.7476853728294373, "learning_rate": 0.0002996829772175713, "loss": 4.6687, "step": 84460 }, { "epoch": 0.11868827466773922, "grad_norm": 0.7315259575843811, "learning_rate": 0.0002996824989698522, "loss": 4.7679, "step": 84470 }, { "epoch": 0.1187023256059028, "grad_norm": 0.7134382128715515, "learning_rate": 0.00029968202036205455, "loss": 4.7786, "step": 84480 }, { "epoch": 0.11871637654406637, "grad_norm": 0.7032891511917114, "learning_rate": 0.0002996815413941795, "loss": 4.6831, "step": 84490 }, { "epoch": 0.11873042748222995, "grad_norm": 0.708054780960083, "learning_rate": 0.0002996810620662282, "loss": 4.7639, "step": 84500 }, { "epoch": 0.11874447842039353, "grad_norm": 0.7284858822822571, "learning_rate": 0.00029968058237820185, "loss": 4.7167, "step": 84510 }, { "epoch": 0.1187585293585571, "grad_norm": 0.7195180058479309, "learning_rate": 0.0002996801023301015, "loss": 4.7123, "step": 84520 }, { "epoch": 0.11877258029672069, "grad_norm": 0.7547293305397034, "learning_rate": 0.00029967962192192847, "loss": 4.7145, "step": 84530 }, { "epoch": 0.11878663123488427, "grad_norm": 0.7729194760322571, "learning_rate": 0.00029967914115368375, "loss": 4.7879, "step": 84540 }, { "epoch": 0.11880068217304784, "grad_norm": 0.7287982702255249, "learning_rate": 0.0002996786600253686, "loss": 4.7128, "step": 84550 }, { "epoch": 0.11881473311121142, "grad_norm": 0.7168266177177429, "learning_rate": 0.00029967817853698413, "loss": 4.6013, "step": 84560 }, { "epoch": 0.118828784049375, "grad_norm": 0.6961681246757507, "learning_rate": 0.0002996776966885315, "loss": 4.7291, "step": 84570 }, { "epoch": 0.11884283498753857, "grad_norm": 0.7002440690994263, "learning_rate": 0.00029967721448001186, "loss": 4.6898, "step": 84580 }, { "epoch": 0.11885688592570215, "grad_norm": 0.7198019027709961, "learning_rate": 0.00029967673191142645, "loss": 4.6502, "step": 84590 }, { "epoch": 0.11887093686386573, "grad_norm": 0.7356411814689636, "learning_rate": 0.0002996762489827763, "loss": 4.7287, "step": 84600 }, { "epoch": 0.1188849878020293, "grad_norm": 0.7238462567329407, "learning_rate": 0.0002996757656940627, "loss": 4.6479, "step": 84610 }, { "epoch": 0.11889903874019288, "grad_norm": 0.7430039644241333, "learning_rate": 0.0002996752820452867, "loss": 4.7358, "step": 84620 }, { "epoch": 0.11891308967835647, "grad_norm": 0.6926167011260986, "learning_rate": 0.0002996747980364496, "loss": 4.6281, "step": 84630 }, { "epoch": 0.11892714061652004, "grad_norm": 0.7018167972564697, "learning_rate": 0.00029967431366755237, "loss": 4.7712, "step": 84640 }, { "epoch": 0.11894119155468362, "grad_norm": 0.6989160180091858, "learning_rate": 0.00029967382893859634, "loss": 4.7982, "step": 84650 }, { "epoch": 0.1189552424928472, "grad_norm": 0.7366704940795898, "learning_rate": 0.0002996733438495826, "loss": 4.7934, "step": 84660 }, { "epoch": 0.11896929343101077, "grad_norm": 0.698725163936615, "learning_rate": 0.00029967285840051236, "loss": 4.7432, "step": 84670 }, { "epoch": 0.11898334436917435, "grad_norm": 0.7053683400154114, "learning_rate": 0.0002996723725913868, "loss": 4.7379, "step": 84680 }, { "epoch": 0.11899739530733792, "grad_norm": 0.7268297076225281, "learning_rate": 0.000299671886422207, "loss": 4.6208, "step": 84690 }, { "epoch": 0.1190114462455015, "grad_norm": 0.7235490679740906, "learning_rate": 0.0002996713998929742, "loss": 4.6095, "step": 84700 }, { "epoch": 0.11902549718366508, "grad_norm": 0.706933319568634, "learning_rate": 0.00029967091300368956, "loss": 4.6821, "step": 84710 }, { "epoch": 0.11903954812182865, "grad_norm": 0.7260755300521851, "learning_rate": 0.0002996704257543542, "loss": 4.6393, "step": 84720 }, { "epoch": 0.11905359905999224, "grad_norm": 0.7088666558265686, "learning_rate": 0.00029966993814496944, "loss": 4.7713, "step": 84730 }, { "epoch": 0.11906764999815582, "grad_norm": 0.6888231635093689, "learning_rate": 0.0002996694501755363, "loss": 4.6284, "step": 84740 }, { "epoch": 0.1190817009363194, "grad_norm": 0.7280553579330444, "learning_rate": 0.000299668961846056, "loss": 4.6492, "step": 84750 }, { "epoch": 0.11909575187448297, "grad_norm": 0.7076670527458191, "learning_rate": 0.00029966847315652975, "loss": 4.7118, "step": 84760 }, { "epoch": 0.11910980281264655, "grad_norm": 0.7475929856300354, "learning_rate": 0.0002996679841069587, "loss": 4.6426, "step": 84770 }, { "epoch": 0.11912385375081012, "grad_norm": 0.6810241937637329, "learning_rate": 0.00029966749469734395, "loss": 4.7599, "step": 84780 }, { "epoch": 0.1191379046889737, "grad_norm": 0.7051819562911987, "learning_rate": 0.00029966700492768683, "loss": 4.7097, "step": 84790 }, { "epoch": 0.11915195562713728, "grad_norm": 0.7109501361846924, "learning_rate": 0.0002996665147979884, "loss": 4.625, "step": 84800 }, { "epoch": 0.11916600656530085, "grad_norm": 0.6987961530685425, "learning_rate": 0.0002996660243082499, "loss": 4.6492, "step": 84810 }, { "epoch": 0.11918005750346443, "grad_norm": 0.7196264863014221, "learning_rate": 0.00029966553345847245, "loss": 4.5642, "step": 84820 }, { "epoch": 0.11919410844162802, "grad_norm": 0.6904036998748779, "learning_rate": 0.0002996650422486573, "loss": 4.7637, "step": 84830 }, { "epoch": 0.1192081593797916, "grad_norm": 0.7117659449577332, "learning_rate": 0.0002996645506788056, "loss": 4.7709, "step": 84840 }, { "epoch": 0.11922221031795517, "grad_norm": 0.7033788561820984, "learning_rate": 0.0002996640587489185, "loss": 4.654, "step": 84850 }, { "epoch": 0.11923626125611875, "grad_norm": 0.710970938205719, "learning_rate": 0.00029966356645899726, "loss": 4.6868, "step": 84860 }, { "epoch": 0.11925031219428232, "grad_norm": 0.7019822597503662, "learning_rate": 0.00029966307380904295, "loss": 4.6512, "step": 84870 }, { "epoch": 0.1192643631324459, "grad_norm": 0.7262449860572815, "learning_rate": 0.00029966258079905687, "loss": 4.6789, "step": 84880 }, { "epoch": 0.11927841407060948, "grad_norm": 0.7337659001350403, "learning_rate": 0.0002996620874290402, "loss": 4.7618, "step": 84890 }, { "epoch": 0.11929246500877305, "grad_norm": 0.7070848941802979, "learning_rate": 0.00029966159369899405, "loss": 4.7406, "step": 84900 }, { "epoch": 0.11930651594693663, "grad_norm": 0.7408710718154907, "learning_rate": 0.00029966109960891964, "loss": 4.8364, "step": 84910 }, { "epoch": 0.1193205668851002, "grad_norm": 0.6921396851539612, "learning_rate": 0.0002996606051588182, "loss": 4.767, "step": 84920 }, { "epoch": 0.11933461782326378, "grad_norm": 0.7204297780990601, "learning_rate": 0.00029966011034869084, "loss": 4.6362, "step": 84930 }, { "epoch": 0.11934866876142737, "grad_norm": 0.7283856868743896, "learning_rate": 0.00029965961517853883, "loss": 4.6364, "step": 84940 }, { "epoch": 0.11936271969959095, "grad_norm": 0.7073463201522827, "learning_rate": 0.0002996591196483633, "loss": 4.7698, "step": 84950 }, { "epoch": 0.11937677063775452, "grad_norm": 0.7231873869895935, "learning_rate": 0.00029965862375816546, "loss": 4.7165, "step": 84960 }, { "epoch": 0.1193908215759181, "grad_norm": 0.7474141716957092, "learning_rate": 0.00029965812750794654, "loss": 4.7594, "step": 84970 }, { "epoch": 0.11940487251408168, "grad_norm": 0.752204179763794, "learning_rate": 0.00029965763089770766, "loss": 4.7098, "step": 84980 }, { "epoch": 0.11941892345224525, "grad_norm": 0.6901721954345703, "learning_rate": 0.0002996571339274501, "loss": 4.5758, "step": 84990 }, { "epoch": 0.11943297439040883, "grad_norm": 0.711621105670929, "learning_rate": 0.000299656636597175, "loss": 4.7077, "step": 85000 }, { "epoch": 0.1194470253285724, "grad_norm": 0.7142300605773926, "learning_rate": 0.0002996561389068836, "loss": 4.7274, "step": 85010 }, { "epoch": 0.11946107626673598, "grad_norm": 0.7306178212165833, "learning_rate": 0.000299655640856577, "loss": 4.7378, "step": 85020 }, { "epoch": 0.11947512720489956, "grad_norm": 0.7129437923431396, "learning_rate": 0.0002996551424462565, "loss": 4.679, "step": 85030 }, { "epoch": 0.11948917814306315, "grad_norm": 0.7050901055335999, "learning_rate": 0.00029965464367592327, "loss": 4.7632, "step": 85040 }, { "epoch": 0.11950322908122672, "grad_norm": 0.6919633150100708, "learning_rate": 0.0002996541445455785, "loss": 4.629, "step": 85050 }, { "epoch": 0.1195172800193903, "grad_norm": 0.705011785030365, "learning_rate": 0.0002996536450552234, "loss": 4.639, "step": 85060 }, { "epoch": 0.11953133095755387, "grad_norm": 0.7281025648117065, "learning_rate": 0.0002996531452048592, "loss": 4.7754, "step": 85070 }, { "epoch": 0.11954538189571745, "grad_norm": 0.7474626898765564, "learning_rate": 0.00029965264499448704, "loss": 4.6266, "step": 85080 }, { "epoch": 0.11955943283388103, "grad_norm": 0.7219833731651306, "learning_rate": 0.00029965214442410817, "loss": 4.6425, "step": 85090 }, { "epoch": 0.1195734837720446, "grad_norm": 0.7083914875984192, "learning_rate": 0.0002996516434937237, "loss": 4.7466, "step": 85100 }, { "epoch": 0.11958753471020818, "grad_norm": 0.7148147225379944, "learning_rate": 0.000299651142203335, "loss": 4.5847, "step": 85110 }, { "epoch": 0.11960158564837176, "grad_norm": 0.7254843711853027, "learning_rate": 0.00029965064055294315, "loss": 4.6418, "step": 85120 }, { "epoch": 0.11961563658653533, "grad_norm": 0.7078420519828796, "learning_rate": 0.0002996501385425494, "loss": 4.6431, "step": 85130 }, { "epoch": 0.11962968752469892, "grad_norm": 0.7228267192840576, "learning_rate": 0.00029964963617215497, "loss": 4.6071, "step": 85140 }, { "epoch": 0.1196437384628625, "grad_norm": 0.700187623500824, "learning_rate": 0.00029964913344176104, "loss": 4.765, "step": 85150 }, { "epoch": 0.11965778940102607, "grad_norm": 0.723201334476471, "learning_rate": 0.00029964863035136883, "loss": 4.7408, "step": 85160 }, { "epoch": 0.11967184033918965, "grad_norm": 0.7586082220077515, "learning_rate": 0.00029964812690097954, "loss": 4.7016, "step": 85170 }, { "epoch": 0.11968589127735323, "grad_norm": 0.7144497632980347, "learning_rate": 0.0002996476230905944, "loss": 4.6846, "step": 85180 }, { "epoch": 0.1196999422155168, "grad_norm": 0.7355106472969055, "learning_rate": 0.0002996471189202146, "loss": 4.7114, "step": 85190 }, { "epoch": 0.11971399315368038, "grad_norm": 0.7208760380744934, "learning_rate": 0.0002996466143898414, "loss": 4.6776, "step": 85200 }, { "epoch": 0.11972804409184395, "grad_norm": 0.724746823310852, "learning_rate": 0.00029964610949947593, "loss": 4.8292, "step": 85210 }, { "epoch": 0.11974209503000753, "grad_norm": 0.6953820586204529, "learning_rate": 0.0002996456042491195, "loss": 4.7256, "step": 85220 }, { "epoch": 0.11975614596817111, "grad_norm": 0.7167063355445862, "learning_rate": 0.0002996450986387733, "loss": 4.6268, "step": 85230 }, { "epoch": 0.11977019690633468, "grad_norm": 0.7081119418144226, "learning_rate": 0.0002996445926684385, "loss": 4.644, "step": 85240 }, { "epoch": 0.11978424784449827, "grad_norm": 0.7091720700263977, "learning_rate": 0.0002996440863381163, "loss": 4.7266, "step": 85250 }, { "epoch": 0.11979829878266185, "grad_norm": 0.7209839820861816, "learning_rate": 0.000299643579647808, "loss": 4.7454, "step": 85260 }, { "epoch": 0.11981234972082543, "grad_norm": 0.6966519951820374, "learning_rate": 0.00029964307259751474, "loss": 4.6377, "step": 85270 }, { "epoch": 0.119826400658989, "grad_norm": 0.701129674911499, "learning_rate": 0.00029964256518723786, "loss": 4.6073, "step": 85280 }, { "epoch": 0.11984045159715258, "grad_norm": 0.6923808455467224, "learning_rate": 0.0002996420574169785, "loss": 4.6931, "step": 85290 }, { "epoch": 0.11985450253531615, "grad_norm": 0.7123744487762451, "learning_rate": 0.0002996415492867378, "loss": 4.6669, "step": 85300 }, { "epoch": 0.11986855347347973, "grad_norm": 0.7286718487739563, "learning_rate": 0.0002996410407965171, "loss": 4.6736, "step": 85310 }, { "epoch": 0.1198826044116433, "grad_norm": 0.7242770791053772, "learning_rate": 0.0002996405319463176, "loss": 4.6789, "step": 85320 }, { "epoch": 0.11989665534980688, "grad_norm": 0.7120243310928345, "learning_rate": 0.0002996400227361405, "loss": 4.6072, "step": 85330 }, { "epoch": 0.11991070628797046, "grad_norm": 0.7126664519309998, "learning_rate": 0.00029963951316598706, "loss": 4.7034, "step": 85340 }, { "epoch": 0.11992475722613405, "grad_norm": 0.7311067581176758, "learning_rate": 0.00029963900323585845, "loss": 4.7734, "step": 85350 }, { "epoch": 0.11993880816429763, "grad_norm": 0.736724317073822, "learning_rate": 0.00029963849294575596, "loss": 4.6918, "step": 85360 }, { "epoch": 0.1199528591024612, "grad_norm": 0.7146928310394287, "learning_rate": 0.0002996379822956808, "loss": 4.5878, "step": 85370 }, { "epoch": 0.11996691004062478, "grad_norm": 0.6940208673477173, "learning_rate": 0.00029963747128563417, "loss": 4.7649, "step": 85380 }, { "epoch": 0.11998096097878835, "grad_norm": 0.7303279638290405, "learning_rate": 0.0002996369599156173, "loss": 4.6806, "step": 85390 }, { "epoch": 0.11999501191695193, "grad_norm": 0.7200376391410828, "learning_rate": 0.00029963644818563145, "loss": 4.7124, "step": 85400 }, { "epoch": 0.1200090628551155, "grad_norm": 0.6997661590576172, "learning_rate": 0.00029963593609567783, "loss": 4.7171, "step": 85410 }, { "epoch": 0.12002311379327908, "grad_norm": 0.7147961854934692, "learning_rate": 0.00029963542364575766, "loss": 4.5216, "step": 85420 }, { "epoch": 0.12003716473144266, "grad_norm": 0.7196703553199768, "learning_rate": 0.0002996349108358722, "loss": 4.7912, "step": 85430 }, { "epoch": 0.12005121566960623, "grad_norm": 0.7229997515678406, "learning_rate": 0.0002996343976660227, "loss": 4.6595, "step": 85440 }, { "epoch": 0.12006526660776982, "grad_norm": 0.7053361535072327, "learning_rate": 0.00029963388413621037, "loss": 4.6721, "step": 85450 }, { "epoch": 0.1200793175459334, "grad_norm": 0.7070173621177673, "learning_rate": 0.00029963337024643637, "loss": 4.6046, "step": 85460 }, { "epoch": 0.12009336848409698, "grad_norm": 0.6948744654655457, "learning_rate": 0.0002996328559967021, "loss": 4.7345, "step": 85470 }, { "epoch": 0.12010741942226055, "grad_norm": 0.7141450643539429, "learning_rate": 0.00029963234138700865, "loss": 4.6867, "step": 85480 }, { "epoch": 0.12012147036042413, "grad_norm": 0.7317810654640198, "learning_rate": 0.0002996318264173573, "loss": 4.5998, "step": 85490 }, { "epoch": 0.1201355212985877, "grad_norm": 0.7293758392333984, "learning_rate": 0.0002996313110877494, "loss": 4.7669, "step": 85500 }, { "epoch": 0.12014957223675128, "grad_norm": 0.7065700888633728, "learning_rate": 0.000299630795398186, "loss": 4.6155, "step": 85510 }, { "epoch": 0.12016362317491486, "grad_norm": 0.7263256907463074, "learning_rate": 0.0002996302793486685, "loss": 4.6669, "step": 85520 }, { "epoch": 0.12017767411307843, "grad_norm": 0.7085826396942139, "learning_rate": 0.000299629762939198, "loss": 4.6789, "step": 85530 }, { "epoch": 0.12019172505124201, "grad_norm": 0.7257848381996155, "learning_rate": 0.0002996292461697759, "loss": 4.7586, "step": 85540 }, { "epoch": 0.12020577598940559, "grad_norm": 0.7008543610572815, "learning_rate": 0.0002996287290404033, "loss": 4.7479, "step": 85550 }, { "epoch": 0.12021982692756918, "grad_norm": 0.6934198141098022, "learning_rate": 0.00029962821155108153, "loss": 4.7428, "step": 85560 }, { "epoch": 0.12023387786573275, "grad_norm": 0.6932348012924194, "learning_rate": 0.0002996276937018119, "loss": 4.701, "step": 85570 }, { "epoch": 0.12024792880389633, "grad_norm": 0.6966341733932495, "learning_rate": 0.0002996271754925954, "loss": 4.7036, "step": 85580 }, { "epoch": 0.1202619797420599, "grad_norm": 0.7221991419792175, "learning_rate": 0.00029962665692343356, "loss": 4.6009, "step": 85590 }, { "epoch": 0.12027603068022348, "grad_norm": 0.7265183925628662, "learning_rate": 0.00029962613799432744, "loss": 4.6958, "step": 85600 }, { "epoch": 0.12029008161838706, "grad_norm": 0.7104468941688538, "learning_rate": 0.0002996256187052784, "loss": 4.6442, "step": 85610 }, { "epoch": 0.12030413255655063, "grad_norm": 0.725537896156311, "learning_rate": 0.0002996250990562877, "loss": 4.7405, "step": 85620 }, { "epoch": 0.12031818349471421, "grad_norm": 0.7155194282531738, "learning_rate": 0.0002996245790473564, "loss": 4.7107, "step": 85630 }, { "epoch": 0.12033223443287779, "grad_norm": 0.7233233451843262, "learning_rate": 0.00029962405867848597, "loss": 4.6326, "step": 85640 }, { "epoch": 0.12034628537104136, "grad_norm": 0.7190825939178467, "learning_rate": 0.0002996235379496776, "loss": 4.7711, "step": 85650 }, { "epoch": 0.12036033630920495, "grad_norm": 0.7331857681274414, "learning_rate": 0.00029962301686093246, "loss": 4.7669, "step": 85660 }, { "epoch": 0.12037438724736853, "grad_norm": 0.7344217896461487, "learning_rate": 0.00029962249541225195, "loss": 4.7563, "step": 85670 }, { "epoch": 0.1203884381855321, "grad_norm": 0.706200122833252, "learning_rate": 0.0002996219736036372, "loss": 4.7661, "step": 85680 }, { "epoch": 0.12040248912369568, "grad_norm": 0.7147502899169922, "learning_rate": 0.0002996214514350895, "loss": 4.5101, "step": 85690 }, { "epoch": 0.12041654006185926, "grad_norm": 0.7082406878471375, "learning_rate": 0.0002996209289066101, "loss": 4.6323, "step": 85700 }, { "epoch": 0.12043059100002283, "grad_norm": 0.7128416299819946, "learning_rate": 0.0002996204060182003, "loss": 4.6708, "step": 85710 }, { "epoch": 0.12044464193818641, "grad_norm": 0.7045454382896423, "learning_rate": 0.0002996198827698613, "loss": 4.6902, "step": 85720 }, { "epoch": 0.12045869287634998, "grad_norm": 0.7640816569328308, "learning_rate": 0.0002996193591615944, "loss": 4.7689, "step": 85730 }, { "epoch": 0.12047274381451356, "grad_norm": 0.7236823439598083, "learning_rate": 0.00029961883519340085, "loss": 4.7993, "step": 85740 }, { "epoch": 0.12048679475267714, "grad_norm": 0.7274718880653381, "learning_rate": 0.0002996183108652819, "loss": 4.7037, "step": 85750 }, { "epoch": 0.12050084569084073, "grad_norm": 0.7061973810195923, "learning_rate": 0.00029961778617723883, "loss": 4.6896, "step": 85760 }, { "epoch": 0.1205148966290043, "grad_norm": 0.7118090391159058, "learning_rate": 0.00029961726112927284, "loss": 4.6545, "step": 85770 }, { "epoch": 0.12052894756716788, "grad_norm": 0.6875846982002258, "learning_rate": 0.00029961673572138534, "loss": 4.6925, "step": 85780 }, { "epoch": 0.12054299850533146, "grad_norm": 0.7023685574531555, "learning_rate": 0.00029961620995357747, "loss": 4.6063, "step": 85790 }, { "epoch": 0.12055704944349503, "grad_norm": 0.7327830195426941, "learning_rate": 0.0002996156838258505, "loss": 4.7115, "step": 85800 }, { "epoch": 0.12057110038165861, "grad_norm": 0.7080143690109253, "learning_rate": 0.00029961515733820565, "loss": 4.7993, "step": 85810 }, { "epoch": 0.12058515131982218, "grad_norm": 0.7189124822616577, "learning_rate": 0.00029961463049064436, "loss": 4.6521, "step": 85820 }, { "epoch": 0.12059920225798576, "grad_norm": 0.7159058451652527, "learning_rate": 0.0002996141032831677, "loss": 4.798, "step": 85830 }, { "epoch": 0.12061325319614934, "grad_norm": 0.7728943228721619, "learning_rate": 0.0002996135757157771, "loss": 4.6605, "step": 85840 }, { "epoch": 0.12062730413431291, "grad_norm": 0.9222893118858337, "learning_rate": 0.0002996130477884737, "loss": 4.6658, "step": 85850 }, { "epoch": 0.12064135507247649, "grad_norm": 0.7004067301750183, "learning_rate": 0.00029961251950125884, "loss": 4.7422, "step": 85860 }, { "epoch": 0.12065540601064008, "grad_norm": 0.7140782475471497, "learning_rate": 0.00029961199085413385, "loss": 4.6833, "step": 85870 }, { "epoch": 0.12066945694880366, "grad_norm": 0.7055903077125549, "learning_rate": 0.00029961146184709987, "loss": 4.7995, "step": 85880 }, { "epoch": 0.12068350788696723, "grad_norm": 0.7646137475967407, "learning_rate": 0.00029961093248015826, "loss": 4.7584, "step": 85890 }, { "epoch": 0.12069755882513081, "grad_norm": 0.710187554359436, "learning_rate": 0.0002996104027533102, "loss": 4.5475, "step": 85900 }, { "epoch": 0.12071160976329438, "grad_norm": 0.7333691120147705, "learning_rate": 0.00029960987266655707, "loss": 4.7007, "step": 85910 }, { "epoch": 0.12072566070145796, "grad_norm": 0.7043735384941101, "learning_rate": 0.00029960934221990015, "loss": 4.7965, "step": 85920 }, { "epoch": 0.12073971163962154, "grad_norm": 0.7109283804893494, "learning_rate": 0.00029960881141334063, "loss": 4.6432, "step": 85930 }, { "epoch": 0.12075376257778511, "grad_norm": 0.6858183145523071, "learning_rate": 0.00029960828024687983, "loss": 4.7364, "step": 85940 }, { "epoch": 0.12076781351594869, "grad_norm": 0.7360689043998718, "learning_rate": 0.0002996077487205191, "loss": 4.5901, "step": 85950 }, { "epoch": 0.12078186445411226, "grad_norm": 0.6898038387298584, "learning_rate": 0.0002996072700390809, "loss": 4.6589, "step": 85960 }, { "epoch": 0.12079591539227585, "grad_norm": 1.0662989616394043, "learning_rate": 0.00029960673782891364, "loss": 4.5829, "step": 85970 }, { "epoch": 0.12080996633043943, "grad_norm": 0.7156269550323486, "learning_rate": 0.0002996062052588501, "loss": 4.722, "step": 85980 }, { "epoch": 0.120824017268603, "grad_norm": 0.7196338176727295, "learning_rate": 0.0002996056723288915, "loss": 4.655, "step": 85990 }, { "epoch": 0.12083806820676658, "grad_norm": 0.6985352039337158, "learning_rate": 0.0002996051390390392, "loss": 4.7494, "step": 86000 }, { "epoch": 0.12085211914493016, "grad_norm": 0.7045134902000427, "learning_rate": 0.0002996046053892945, "loss": 4.7251, "step": 86010 }, { "epoch": 0.12086617008309374, "grad_norm": 0.7500072121620178, "learning_rate": 0.00029960407137965856, "loss": 4.732, "step": 86020 }, { "epoch": 0.12088022102125731, "grad_norm": 0.7060306668281555, "learning_rate": 0.0002996035370101327, "loss": 4.7745, "step": 86030 }, { "epoch": 0.12089427195942089, "grad_norm": 0.7595226764678955, "learning_rate": 0.0002996030022807184, "loss": 4.6405, "step": 86040 }, { "epoch": 0.12090832289758446, "grad_norm": 0.696519136428833, "learning_rate": 0.0002996024671914167, "loss": 4.724, "step": 86050 }, { "epoch": 0.12092237383574804, "grad_norm": 0.7316704988479614, "learning_rate": 0.000299601931742229, "loss": 4.5628, "step": 86060 }, { "epoch": 0.12093642477391163, "grad_norm": 0.6876169443130493, "learning_rate": 0.0002996013959331566, "loss": 4.7434, "step": 86070 }, { "epoch": 0.1209504757120752, "grad_norm": 0.7389705777168274, "learning_rate": 0.00029960085976420066, "loss": 4.7681, "step": 86080 }, { "epoch": 0.12096452665023878, "grad_norm": 0.702412486076355, "learning_rate": 0.00029960032323536265, "loss": 4.7209, "step": 86090 }, { "epoch": 0.12097857758840236, "grad_norm": 0.6958481669425964, "learning_rate": 0.0002995997863466437, "loss": 4.8133, "step": 86100 }, { "epoch": 0.12099262852656593, "grad_norm": 0.717162013053894, "learning_rate": 0.00029959924909804525, "loss": 4.7312, "step": 86110 }, { "epoch": 0.12100667946472951, "grad_norm": 0.7261740565299988, "learning_rate": 0.00029959871148956853, "loss": 4.6485, "step": 86120 }, { "epoch": 0.12102073040289309, "grad_norm": 0.7102240324020386, "learning_rate": 0.00029959817352121485, "loss": 4.8627, "step": 86130 }, { "epoch": 0.12103478134105666, "grad_norm": 0.7106133699417114, "learning_rate": 0.0002995976351929854, "loss": 4.7682, "step": 86140 }, { "epoch": 0.12104883227922024, "grad_norm": 0.6954272389411926, "learning_rate": 0.00029959709650488164, "loss": 4.6691, "step": 86150 }, { "epoch": 0.12106288321738382, "grad_norm": 0.7176921367645264, "learning_rate": 0.0002995965574569047, "loss": 4.6487, "step": 86160 }, { "epoch": 0.12107693415554739, "grad_norm": 0.8005315661430359, "learning_rate": 0.000299596018049056, "loss": 4.735, "step": 86170 }, { "epoch": 0.12109098509371098, "grad_norm": 0.7331059575080872, "learning_rate": 0.0002995954782813368, "loss": 4.7956, "step": 86180 }, { "epoch": 0.12110503603187456, "grad_norm": 0.7373118996620178, "learning_rate": 0.0002995949381537484, "loss": 4.584, "step": 86190 }, { "epoch": 0.12111908697003813, "grad_norm": 0.7049098610877991, "learning_rate": 0.00029959439766629207, "loss": 4.6658, "step": 86200 }, { "epoch": 0.12113313790820171, "grad_norm": 0.7197480797767639, "learning_rate": 0.00029959385681896914, "loss": 4.8936, "step": 86210 }, { "epoch": 0.12114718884636529, "grad_norm": 0.7270209193229675, "learning_rate": 0.00029959331561178087, "loss": 4.6427, "step": 86220 }, { "epoch": 0.12116123978452886, "grad_norm": 0.7451220750808716, "learning_rate": 0.0002995927740447287, "loss": 4.7832, "step": 86230 }, { "epoch": 0.12117529072269244, "grad_norm": 0.7173677682876587, "learning_rate": 0.00029959223211781374, "loss": 4.6468, "step": 86240 }, { "epoch": 0.12118934166085601, "grad_norm": 0.7185055613517761, "learning_rate": 0.0002995916898310374, "loss": 4.7216, "step": 86250 }, { "epoch": 0.12120339259901959, "grad_norm": 0.729455828666687, "learning_rate": 0.00029959114718440094, "loss": 4.7083, "step": 86260 }, { "epoch": 0.12121744353718317, "grad_norm": 0.70658940076828, "learning_rate": 0.0002995906041779057, "loss": 4.6727, "step": 86270 }, { "epoch": 0.12123149447534676, "grad_norm": 0.7007445693016052, "learning_rate": 0.00029959006081155303, "loss": 4.7122, "step": 86280 }, { "epoch": 0.12124554541351033, "grad_norm": 0.7127397656440735, "learning_rate": 0.00029958951708534417, "loss": 4.6473, "step": 86290 }, { "epoch": 0.12125959635167391, "grad_norm": 0.7260735630989075, "learning_rate": 0.0002995889729992805, "loss": 4.6155, "step": 86300 }, { "epoch": 0.12127364728983749, "grad_norm": 0.7390902638435364, "learning_rate": 0.00029958842855336314, "loss": 4.6914, "step": 86310 }, { "epoch": 0.12128769822800106, "grad_norm": 0.7290669083595276, "learning_rate": 0.00029958788374759364, "loss": 4.6071, "step": 86320 }, { "epoch": 0.12130174916616464, "grad_norm": 0.7232752442359924, "learning_rate": 0.00029958733858197314, "loss": 4.5553, "step": 86330 }, { "epoch": 0.12131580010432821, "grad_norm": 0.7251487970352173, "learning_rate": 0.0002995867930565031, "loss": 4.6294, "step": 86340 }, { "epoch": 0.12132985104249179, "grad_norm": 0.7375115156173706, "learning_rate": 0.00029958624717118463, "loss": 4.7586, "step": 86350 }, { "epoch": 0.12134390198065537, "grad_norm": 0.7383313775062561, "learning_rate": 0.0002995857009260192, "loss": 4.7191, "step": 86360 }, { "epoch": 0.12135795291881894, "grad_norm": 0.730225145816803, "learning_rate": 0.0002995851543210081, "loss": 4.6134, "step": 86370 }, { "epoch": 0.12137200385698253, "grad_norm": 1.1462152004241943, "learning_rate": 0.00029958460735615266, "loss": 4.6346, "step": 86380 }, { "epoch": 0.12138605479514611, "grad_norm": 0.7488716840744019, "learning_rate": 0.00029958406003145413, "loss": 4.7212, "step": 86390 }, { "epoch": 0.12140010573330969, "grad_norm": 0.7324610352516174, "learning_rate": 0.0002995835123469139, "loss": 4.6655, "step": 86400 }, { "epoch": 0.12141415667147326, "grad_norm": 0.7316756248474121, "learning_rate": 0.0002995829643025332, "loss": 4.5674, "step": 86410 }, { "epoch": 0.12142820760963684, "grad_norm": 0.7131539583206177, "learning_rate": 0.0002995824158983134, "loss": 4.7464, "step": 86420 }, { "epoch": 0.12144225854780041, "grad_norm": 0.7157332897186279, "learning_rate": 0.0002995818671342559, "loss": 4.7049, "step": 86430 }, { "epoch": 0.12145630948596399, "grad_norm": 0.7550666332244873, "learning_rate": 0.0002995813180103618, "loss": 4.5843, "step": 86440 }, { "epoch": 0.12147036042412757, "grad_norm": 0.7143164277076721, "learning_rate": 0.00029958076852663266, "loss": 4.6484, "step": 86450 }, { "epoch": 0.12148441136229114, "grad_norm": 0.7037560343742371, "learning_rate": 0.0002995802186830697, "loss": 4.806, "step": 86460 }, { "epoch": 0.12149846230045472, "grad_norm": 0.710610032081604, "learning_rate": 0.0002995796684796742, "loss": 4.7, "step": 86470 }, { "epoch": 0.1215125132386183, "grad_norm": 0.7024252414703369, "learning_rate": 0.0002995791179164475, "loss": 4.6566, "step": 86480 }, { "epoch": 0.12152656417678188, "grad_norm": 0.7062261700630188, "learning_rate": 0.00029957856699339104, "loss": 4.69, "step": 86490 }, { "epoch": 0.12154061511494546, "grad_norm": 0.7243465185165405, "learning_rate": 0.00029957801571050597, "loss": 4.7999, "step": 86500 }, { "epoch": 0.12155466605310904, "grad_norm": 0.7086811661720276, "learning_rate": 0.0002995774640677937, "loss": 4.6722, "step": 86510 }, { "epoch": 0.12156871699127261, "grad_norm": 0.7045823931694031, "learning_rate": 0.0002995769672817016, "loss": 4.8183, "step": 86520 }, { "epoch": 0.12158276792943619, "grad_norm": 0.7064098119735718, "learning_rate": 0.0002995764149553213, "loss": 4.7609, "step": 86530 }, { "epoch": 0.12159681886759977, "grad_norm": 0.7484224438667297, "learning_rate": 0.0002995758622691177, "loss": 4.681, "step": 86540 }, { "epoch": 0.12161086980576334, "grad_norm": 0.6937485933303833, "learning_rate": 0.0002995753092230921, "loss": 4.7115, "step": 86550 }, { "epoch": 0.12162492074392692, "grad_norm": 0.7272822856903076, "learning_rate": 0.00029957475581724586, "loss": 4.5993, "step": 86560 }, { "epoch": 0.1216389716820905, "grad_norm": 0.696310818195343, "learning_rate": 0.0002995742020515802, "loss": 4.7079, "step": 86570 }, { "epoch": 0.12165302262025407, "grad_norm": 0.7192110419273376, "learning_rate": 0.0002995736479260965, "loss": 4.7158, "step": 86580 }, { "epoch": 0.12166707355841766, "grad_norm": 0.7095620632171631, "learning_rate": 0.0002995730934407961, "loss": 4.6992, "step": 86590 }, { "epoch": 0.12168112449658124, "grad_norm": 0.6891617178916931, "learning_rate": 0.0002995725385956804, "loss": 4.7227, "step": 86600 }, { "epoch": 0.12169517543474481, "grad_norm": 0.7383580207824707, "learning_rate": 0.0002995719833907506, "loss": 4.7221, "step": 86610 }, { "epoch": 0.12170922637290839, "grad_norm": 0.7053825259208679, "learning_rate": 0.0002995714278260082, "loss": 4.6793, "step": 86620 }, { "epoch": 0.12172327731107196, "grad_norm": 0.7410792708396912, "learning_rate": 0.0002995708719014544, "loss": 4.769, "step": 86630 }, { "epoch": 0.12173732824923554, "grad_norm": 0.711789071559906, "learning_rate": 0.0002995703156170906, "loss": 4.7196, "step": 86640 }, { "epoch": 0.12175137918739912, "grad_norm": 0.6985942125320435, "learning_rate": 0.0002995697589729181, "loss": 4.6477, "step": 86650 }, { "epoch": 0.1217654301255627, "grad_norm": 0.6995700001716614, "learning_rate": 0.00029956920196893834, "loss": 4.5672, "step": 86660 }, { "epoch": 0.12177948106372627, "grad_norm": 0.7413903474807739, "learning_rate": 0.0002995686446051525, "loss": 4.7654, "step": 86670 }, { "epoch": 0.12179353200188985, "grad_norm": 0.7320995926856995, "learning_rate": 0.00029956808688156206, "loss": 4.7567, "step": 86680 }, { "epoch": 0.12180758294005344, "grad_norm": 0.7222046852111816, "learning_rate": 0.0002995675287981683, "loss": 4.6457, "step": 86690 }, { "epoch": 0.12182163387821701, "grad_norm": 0.711199939250946, "learning_rate": 0.00029956697035497256, "loss": 4.6399, "step": 86700 }, { "epoch": 0.12183568481638059, "grad_norm": 0.7224550247192383, "learning_rate": 0.00029956641155197617, "loss": 4.6731, "step": 86710 }, { "epoch": 0.12184973575454416, "grad_norm": 0.6902201175689697, "learning_rate": 0.00029956585238918057, "loss": 4.6802, "step": 86720 }, { "epoch": 0.12186378669270774, "grad_norm": 0.7383785247802734, "learning_rate": 0.00029956529286658695, "loss": 4.7232, "step": 86730 }, { "epoch": 0.12187783763087132, "grad_norm": 0.7981114387512207, "learning_rate": 0.00029956473298419677, "loss": 4.7792, "step": 86740 }, { "epoch": 0.12189188856903489, "grad_norm": 0.7090650200843811, "learning_rate": 0.00029956417274201134, "loss": 4.7199, "step": 86750 }, { "epoch": 0.12190593950719847, "grad_norm": 0.7152296900749207, "learning_rate": 0.00029956361214003205, "loss": 4.6483, "step": 86760 }, { "epoch": 0.12191999044536204, "grad_norm": 0.7130936980247498, "learning_rate": 0.0002995630511782602, "loss": 4.6469, "step": 86770 }, { "epoch": 0.12193404138352562, "grad_norm": 0.7285060286521912, "learning_rate": 0.0002995624898566971, "loss": 4.7693, "step": 86780 }, { "epoch": 0.1219480923216892, "grad_norm": 0.7463589906692505, "learning_rate": 0.0002995619281753442, "loss": 4.8208, "step": 86790 }, { "epoch": 0.12196214325985279, "grad_norm": 0.7308253049850464, "learning_rate": 0.00029956136613420276, "loss": 4.7281, "step": 86800 }, { "epoch": 0.12197619419801636, "grad_norm": 0.7179209589958191, "learning_rate": 0.0002995608037332742, "loss": 4.672, "step": 86810 }, { "epoch": 0.12199024513617994, "grad_norm": 0.731372594833374, "learning_rate": 0.00029956024097255987, "loss": 4.8004, "step": 86820 }, { "epoch": 0.12200429607434352, "grad_norm": 0.7444266080856323, "learning_rate": 0.00029955967785206107, "loss": 4.5729, "step": 86830 }, { "epoch": 0.12201834701250709, "grad_norm": 0.7302293181419373, "learning_rate": 0.0002995591143717792, "loss": 4.662, "step": 86840 }, { "epoch": 0.12203239795067067, "grad_norm": 0.7154730558395386, "learning_rate": 0.0002995585505317156, "loss": 4.5486, "step": 86850 }, { "epoch": 0.12204644888883424, "grad_norm": 0.701775074005127, "learning_rate": 0.00029955798633187164, "loss": 4.5835, "step": 86860 }, { "epoch": 0.12206049982699782, "grad_norm": 0.6990428566932678, "learning_rate": 0.0002995574217722487, "loss": 4.6875, "step": 86870 }, { "epoch": 0.1220745507651614, "grad_norm": 0.7048460245132446, "learning_rate": 0.000299556856852848, "loss": 4.6615, "step": 86880 }, { "epoch": 0.12208860170332497, "grad_norm": 0.7063455581665039, "learning_rate": 0.00029955629157367105, "loss": 4.8373, "step": 86890 }, { "epoch": 0.12210265264148856, "grad_norm": 0.7515528202056885, "learning_rate": 0.0002995557259347192, "loss": 4.7006, "step": 86900 }, { "epoch": 0.12211670357965214, "grad_norm": 0.7269037365913391, "learning_rate": 0.0002995551599359937, "loss": 4.7502, "step": 86910 }, { "epoch": 0.12213075451781572, "grad_norm": 0.7405303716659546, "learning_rate": 0.0002995545935774961, "loss": 4.6159, "step": 86920 }, { "epoch": 0.12214480545597929, "grad_norm": 0.7235696315765381, "learning_rate": 0.00029955402685922757, "loss": 4.7358, "step": 86930 }, { "epoch": 0.12215885639414287, "grad_norm": 0.711112916469574, "learning_rate": 0.0002995534597811895, "loss": 4.7783, "step": 86940 }, { "epoch": 0.12217290733230644, "grad_norm": 0.7069167494773865, "learning_rate": 0.0002995528923433834, "loss": 4.4935, "step": 86950 }, { "epoch": 0.12218695827047002, "grad_norm": 0.7162838578224182, "learning_rate": 0.0002995523245458105, "loss": 4.5865, "step": 86960 }, { "epoch": 0.1222010092086336, "grad_norm": 0.7079212069511414, "learning_rate": 0.0002995517563884722, "loss": 4.6902, "step": 86970 }, { "epoch": 0.12221506014679717, "grad_norm": 0.7136911749839783, "learning_rate": 0.0002995511878713699, "loss": 4.7081, "step": 86980 }, { "epoch": 0.12222911108496075, "grad_norm": 0.7026124000549316, "learning_rate": 0.0002995506189945049, "loss": 4.7789, "step": 86990 }, { "epoch": 0.12224316202312434, "grad_norm": 0.8226521611213684, "learning_rate": 0.00029955004975787863, "loss": 4.6564, "step": 87000 }, { "epoch": 0.12225721296128791, "grad_norm": 0.7236669659614563, "learning_rate": 0.0002995494801614924, "loss": 4.785, "step": 87010 }, { "epoch": 0.12227126389945149, "grad_norm": 0.7194319367408752, "learning_rate": 0.0002995489102053477, "loss": 4.7189, "step": 87020 }, { "epoch": 0.12228531483761507, "grad_norm": 0.710686445236206, "learning_rate": 0.00029954833988944576, "loss": 4.5753, "step": 87030 }, { "epoch": 0.12229936577577864, "grad_norm": 0.7050729393959045, "learning_rate": 0.000299547769213788, "loss": 4.4799, "step": 87040 }, { "epoch": 0.12231341671394222, "grad_norm": 0.7316957712173462, "learning_rate": 0.00029954719817837583, "loss": 4.772, "step": 87050 }, { "epoch": 0.1223274676521058, "grad_norm": 0.6963942050933838, "learning_rate": 0.0002995466267832106, "loss": 4.8665, "step": 87060 }, { "epoch": 0.12234151859026937, "grad_norm": 0.7359170913696289, "learning_rate": 0.00029954605502829364, "loss": 4.6619, "step": 87070 }, { "epoch": 0.12235556952843295, "grad_norm": 0.6984687447547913, "learning_rate": 0.0002995454829136264, "loss": 4.7002, "step": 87080 }, { "epoch": 0.12236962046659652, "grad_norm": 0.7410992383956909, "learning_rate": 0.00029954491043921025, "loss": 4.7953, "step": 87090 }, { "epoch": 0.1223836714047601, "grad_norm": 0.7628210186958313, "learning_rate": 0.0002995443376050465, "loss": 4.5949, "step": 87100 }, { "epoch": 0.12239772234292369, "grad_norm": 0.7284057140350342, "learning_rate": 0.0002995437644111366, "loss": 4.6736, "step": 87110 }, { "epoch": 0.12241177328108727, "grad_norm": 0.706147313117981, "learning_rate": 0.0002995431908574819, "loss": 4.7252, "step": 87120 }, { "epoch": 0.12242582421925084, "grad_norm": 0.7244818806648254, "learning_rate": 0.00029954261694408374, "loss": 4.6662, "step": 87130 }, { "epoch": 0.12243987515741442, "grad_norm": 0.7074465155601501, "learning_rate": 0.0002995420426709436, "loss": 4.7113, "step": 87140 }, { "epoch": 0.122453926095578, "grad_norm": 0.7343944311141968, "learning_rate": 0.0002995414680380627, "loss": 4.6968, "step": 87150 }, { "epoch": 0.12246797703374157, "grad_norm": 0.7110148668289185, "learning_rate": 0.0002995408930454426, "loss": 4.7478, "step": 87160 }, { "epoch": 0.12248202797190515, "grad_norm": 0.6801456809043884, "learning_rate": 0.0002995403176930846, "loss": 4.6153, "step": 87170 }, { "epoch": 0.12249607891006872, "grad_norm": 0.7084956169128418, "learning_rate": 0.0002995397419809901, "loss": 4.7813, "step": 87180 }, { "epoch": 0.1225101298482323, "grad_norm": 0.7134016156196594, "learning_rate": 0.0002995391659091604, "loss": 4.7072, "step": 87190 }, { "epoch": 0.12252418078639588, "grad_norm": 0.7109416723251343, "learning_rate": 0.000299538589477597, "loss": 4.6814, "step": 87200 }, { "epoch": 0.12253823172455947, "grad_norm": 0.7339860200881958, "learning_rate": 0.00029953801268630125, "loss": 4.7426, "step": 87210 }, { "epoch": 0.12255228266272304, "grad_norm": 0.7281706929206848, "learning_rate": 0.00029953743553527457, "loss": 4.6708, "step": 87220 }, { "epoch": 0.12256633360088662, "grad_norm": 0.7278706431388855, "learning_rate": 0.0002995368580245183, "loss": 4.6032, "step": 87230 }, { "epoch": 0.1225803845390502, "grad_norm": 0.6926674842834473, "learning_rate": 0.00029953628015403375, "loss": 4.7332, "step": 87240 }, { "epoch": 0.12259443547721377, "grad_norm": 0.6893281936645508, "learning_rate": 0.00029953570192382253, "loss": 4.6901, "step": 87250 }, { "epoch": 0.12260848641537735, "grad_norm": 0.7695411443710327, "learning_rate": 0.0002995351233338858, "loss": 4.6268, "step": 87260 }, { "epoch": 0.12262253735354092, "grad_norm": 0.7140504121780396, "learning_rate": 0.00029953454438422515, "loss": 4.6299, "step": 87270 }, { "epoch": 0.1226365882917045, "grad_norm": 0.7018819451332092, "learning_rate": 0.0002995339650748418, "loss": 4.7092, "step": 87280 }, { "epoch": 0.12265063922986807, "grad_norm": 0.7678191065788269, "learning_rate": 0.00029953338540573727, "loss": 4.7711, "step": 87290 }, { "epoch": 0.12266469016803165, "grad_norm": 0.7389617562294006, "learning_rate": 0.00029953280537691287, "loss": 4.7441, "step": 87300 }, { "epoch": 0.12267874110619524, "grad_norm": 0.7068891525268555, "learning_rate": 0.00029953222498837003, "loss": 4.6663, "step": 87310 }, { "epoch": 0.12269279204435882, "grad_norm": 0.7148218750953674, "learning_rate": 0.00029953164424011017, "loss": 4.7392, "step": 87320 }, { "epoch": 0.1227068429825224, "grad_norm": 0.68925541639328, "learning_rate": 0.0002995310631321347, "loss": 4.5429, "step": 87330 }, { "epoch": 0.12272089392068597, "grad_norm": 0.7171045541763306, "learning_rate": 0.00029953048166444493, "loss": 4.7119, "step": 87340 }, { "epoch": 0.12273494485884955, "grad_norm": 0.7084298133850098, "learning_rate": 0.0002995298998370423, "loss": 4.7834, "step": 87350 }, { "epoch": 0.12274899579701312, "grad_norm": 0.7399872541427612, "learning_rate": 0.00029952931764992826, "loss": 4.6349, "step": 87360 }, { "epoch": 0.1227630467351767, "grad_norm": 0.7395424246788025, "learning_rate": 0.00029952873510310417, "loss": 4.6944, "step": 87370 }, { "epoch": 0.12277709767334027, "grad_norm": 0.7185593843460083, "learning_rate": 0.00029952815219657145, "loss": 4.6208, "step": 87380 }, { "epoch": 0.12279114861150385, "grad_norm": 0.7182762622833252, "learning_rate": 0.0002995275689303314, "loss": 4.536, "step": 87390 }, { "epoch": 0.12280519954966743, "grad_norm": 0.7268804311752319, "learning_rate": 0.0002995269853043856, "loss": 4.6515, "step": 87400 }, { "epoch": 0.122819250487831, "grad_norm": 0.7242562174797058, "learning_rate": 0.0002995264013187353, "loss": 4.7664, "step": 87410 }, { "epoch": 0.12283330142599459, "grad_norm": 0.7390630841255188, "learning_rate": 0.000299525816973382, "loss": 4.7246, "step": 87420 }, { "epoch": 0.12284735236415817, "grad_norm": 0.7184797525405884, "learning_rate": 0.00029952523226832705, "loss": 4.6634, "step": 87430 }, { "epoch": 0.12286140330232175, "grad_norm": 0.7052783370018005, "learning_rate": 0.0002995246472035719, "loss": 4.6488, "step": 87440 }, { "epoch": 0.12287545424048532, "grad_norm": 0.7244933247566223, "learning_rate": 0.0002995240617791179, "loss": 4.655, "step": 87450 }, { "epoch": 0.1228895051786489, "grad_norm": 0.6974210739135742, "learning_rate": 0.00029952347599496653, "loss": 4.6001, "step": 87460 }, { "epoch": 0.12290355611681247, "grad_norm": 0.7236213088035583, "learning_rate": 0.00029952288985111915, "loss": 4.7134, "step": 87470 }, { "epoch": 0.12291760705497605, "grad_norm": 0.7080331444740295, "learning_rate": 0.0002995223033475772, "loss": 4.6978, "step": 87480 }, { "epoch": 0.12293165799313963, "grad_norm": 0.7316338419914246, "learning_rate": 0.00029952171648434204, "loss": 4.601, "step": 87490 }, { "epoch": 0.1229457089313032, "grad_norm": 0.687455415725708, "learning_rate": 0.0002995211292614151, "loss": 4.5823, "step": 87500 }, { "epoch": 0.12295975986946678, "grad_norm": 0.7364456653594971, "learning_rate": 0.0002995205416787979, "loss": 4.6627, "step": 87510 }, { "epoch": 0.12297381080763037, "grad_norm": 0.7389670014381409, "learning_rate": 0.00029951995373649174, "loss": 4.6651, "step": 87520 }, { "epoch": 0.12298786174579394, "grad_norm": 0.6907218098640442, "learning_rate": 0.000299519365434498, "loss": 4.6806, "step": 87530 }, { "epoch": 0.12300191268395752, "grad_norm": 0.7170994281768799, "learning_rate": 0.0002995187767728182, "loss": 4.6415, "step": 87540 }, { "epoch": 0.1230159636221211, "grad_norm": 0.7091425061225891, "learning_rate": 0.0002995181877514537, "loss": 4.669, "step": 87550 }, { "epoch": 0.12303001456028467, "grad_norm": 0.7479362487792969, "learning_rate": 0.0002995175983704059, "loss": 4.6114, "step": 87560 }, { "epoch": 0.12304406549844825, "grad_norm": 0.7046723961830139, "learning_rate": 0.00029951700862967625, "loss": 4.6076, "step": 87570 }, { "epoch": 0.12305811643661183, "grad_norm": 0.7411142587661743, "learning_rate": 0.0002995164185292662, "loss": 4.6606, "step": 87580 }, { "epoch": 0.1230721673747754, "grad_norm": 0.7122248411178589, "learning_rate": 0.00029951582806917707, "loss": 4.8045, "step": 87590 }, { "epoch": 0.12308621831293898, "grad_norm": 0.7476603984832764, "learning_rate": 0.00029951523724941037, "loss": 4.6634, "step": 87600 }, { "epoch": 0.12310026925110255, "grad_norm": 0.7404193878173828, "learning_rate": 0.0002995146460699675, "loss": 4.6486, "step": 87610 }, { "epoch": 0.12311432018926614, "grad_norm": 0.707002580165863, "learning_rate": 0.0002995140545308499, "loss": 4.7272, "step": 87620 }, { "epoch": 0.12312837112742972, "grad_norm": 0.762656569480896, "learning_rate": 0.0002995134626320589, "loss": 4.6558, "step": 87630 }, { "epoch": 0.1231424220655933, "grad_norm": 0.7162299752235413, "learning_rate": 0.00029951287037359605, "loss": 4.6148, "step": 87640 }, { "epoch": 0.12315647300375687, "grad_norm": 0.7232511043548584, "learning_rate": 0.0002995122777554627, "loss": 4.6304, "step": 87650 }, { "epoch": 0.12317052394192045, "grad_norm": 0.7200028896331787, "learning_rate": 0.00029951168477766033, "loss": 4.7234, "step": 87660 }, { "epoch": 0.12318457488008402, "grad_norm": 0.705317497253418, "learning_rate": 0.00029951109144019024, "loss": 4.6568, "step": 87670 }, { "epoch": 0.1231986258182476, "grad_norm": 0.7253167033195496, "learning_rate": 0.000299510497743054, "loss": 4.7056, "step": 87680 }, { "epoch": 0.12321267675641118, "grad_norm": 0.7323938012123108, "learning_rate": 0.000299509903686253, "loss": 4.6481, "step": 87690 }, { "epoch": 0.12322672769457475, "grad_norm": 0.6936154961585999, "learning_rate": 0.00029950930926978865, "loss": 4.7263, "step": 87700 }, { "epoch": 0.12324077863273833, "grad_norm": 0.7268804311752319, "learning_rate": 0.0002995087144936624, "loss": 4.6468, "step": 87710 }, { "epoch": 0.1232548295709019, "grad_norm": 0.7117732167243958, "learning_rate": 0.0002995081193578756, "loss": 4.7268, "step": 87720 }, { "epoch": 0.1232688805090655, "grad_norm": 0.7191809415817261, "learning_rate": 0.0002995075238624298, "loss": 4.7574, "step": 87730 }, { "epoch": 0.12328293144722907, "grad_norm": 0.7095280885696411, "learning_rate": 0.0002995069280073263, "loss": 4.689, "step": 87740 }, { "epoch": 0.12329698238539265, "grad_norm": 0.7109695076942444, "learning_rate": 0.0002995063317925667, "loss": 4.6289, "step": 87750 }, { "epoch": 0.12331103332355622, "grad_norm": 0.7158311605453491, "learning_rate": 0.0002995057352181523, "loss": 4.6884, "step": 87760 }, { "epoch": 0.1233250842617198, "grad_norm": 0.7213734984397888, "learning_rate": 0.00029950513828408464, "loss": 4.8162, "step": 87770 }, { "epoch": 0.12333913519988338, "grad_norm": 0.7377204895019531, "learning_rate": 0.00029950454099036505, "loss": 4.6634, "step": 87780 }, { "epoch": 0.12335318613804695, "grad_norm": 0.7066074013710022, "learning_rate": 0.000299503943336995, "loss": 4.6969, "step": 87790 }, { "epoch": 0.12336723707621053, "grad_norm": 0.7790802121162415, "learning_rate": 0.00029950334532397597, "loss": 4.7552, "step": 87800 }, { "epoch": 0.1233812880143741, "grad_norm": 0.7286075949668884, "learning_rate": 0.0002995027469513094, "loss": 4.6512, "step": 87810 }, { "epoch": 0.12339533895253768, "grad_norm": 0.7531540989875793, "learning_rate": 0.00029950214821899664, "loss": 4.7612, "step": 87820 }, { "epoch": 0.12340938989070127, "grad_norm": 0.7376166582107544, "learning_rate": 0.00029950154912703923, "loss": 4.6591, "step": 87830 }, { "epoch": 0.12342344082886485, "grad_norm": 0.7482818365097046, "learning_rate": 0.0002995009496754385, "loss": 4.5681, "step": 87840 }, { "epoch": 0.12343749176702842, "grad_norm": 0.7085266709327698, "learning_rate": 0.00029950034986419606, "loss": 4.7676, "step": 87850 }, { "epoch": 0.123451542705192, "grad_norm": 0.7395931482315063, "learning_rate": 0.0002994997496933132, "loss": 4.6275, "step": 87860 }, { "epoch": 0.12346559364335558, "grad_norm": 0.7464209198951721, "learning_rate": 0.0002994991491627914, "loss": 4.746, "step": 87870 }, { "epoch": 0.12347964458151915, "grad_norm": 0.7421188950538635, "learning_rate": 0.0002994985482726322, "loss": 4.6564, "step": 87880 }, { "epoch": 0.12349369551968273, "grad_norm": 0.7147485017776489, "learning_rate": 0.0002994979470228369, "loss": 4.6841, "step": 87890 }, { "epoch": 0.1235077464578463, "grad_norm": 0.7064347267150879, "learning_rate": 0.0002994973454134071, "loss": 4.6705, "step": 87900 }, { "epoch": 0.12352179739600988, "grad_norm": 0.6962974071502686, "learning_rate": 0.00029949674344434407, "loss": 4.5298, "step": 87910 }, { "epoch": 0.12353584833417346, "grad_norm": 0.7260435223579407, "learning_rate": 0.0002994961411156494, "loss": 4.8508, "step": 87920 }, { "epoch": 0.12354989927233705, "grad_norm": 0.7157175540924072, "learning_rate": 0.00029949553842732447, "loss": 4.5779, "step": 87930 }, { "epoch": 0.12356395021050062, "grad_norm": 0.7105233669281006, "learning_rate": 0.0002994949353793708, "loss": 4.6836, "step": 87940 }, { "epoch": 0.1235780011486642, "grad_norm": 0.7243243455886841, "learning_rate": 0.0002994943319717897, "loss": 4.6184, "step": 87950 }, { "epoch": 0.12359205208682777, "grad_norm": 0.7264546751976013, "learning_rate": 0.0002994937282045828, "loss": 4.6754, "step": 87960 }, { "epoch": 0.12360610302499135, "grad_norm": 0.7493976354598999, "learning_rate": 0.00029949312407775145, "loss": 4.709, "step": 87970 }, { "epoch": 0.12362015396315493, "grad_norm": 0.7562281489372253, "learning_rate": 0.0002994925195912971, "loss": 4.6625, "step": 87980 }, { "epoch": 0.1236342049013185, "grad_norm": 0.7009782195091248, "learning_rate": 0.00029949191474522123, "loss": 4.6659, "step": 87990 }, { "epoch": 0.12364825583948208, "grad_norm": 0.7240691781044006, "learning_rate": 0.0002994913095395253, "loss": 4.6298, "step": 88000 }, { "epoch": 0.12366230677764566, "grad_norm": 0.7109857797622681, "learning_rate": 0.0002994907039742107, "loss": 4.829, "step": 88010 }, { "epoch": 0.12367635771580923, "grad_norm": 0.734104335308075, "learning_rate": 0.000299490098049279, "loss": 4.7854, "step": 88020 }, { "epoch": 0.12369040865397281, "grad_norm": 0.707338273525238, "learning_rate": 0.00029948949176473153, "loss": 4.4639, "step": 88030 }, { "epoch": 0.1237044595921364, "grad_norm": 0.7034321427345276, "learning_rate": 0.0002994888851205699, "loss": 4.5313, "step": 88040 }, { "epoch": 0.12371851053029997, "grad_norm": 0.7377339005470276, "learning_rate": 0.0002994882781167955, "loss": 4.6773, "step": 88050 }, { "epoch": 0.12373256146846355, "grad_norm": 0.7189347147941589, "learning_rate": 0.0002994876707534097, "loss": 4.7458, "step": 88060 }, { "epoch": 0.12374661240662713, "grad_norm": 0.7128537893295288, "learning_rate": 0.00029948706303041403, "loss": 4.7733, "step": 88070 }, { "epoch": 0.1237606633447907, "grad_norm": 0.7069502472877502, "learning_rate": 0.00029948645494781, "loss": 4.6594, "step": 88080 }, { "epoch": 0.12377471428295428, "grad_norm": 0.7175171375274658, "learning_rate": 0.00029948584650559907, "loss": 4.8268, "step": 88090 }, { "epoch": 0.12378876522111786, "grad_norm": 0.7087175846099854, "learning_rate": 0.0002994852377037826, "loss": 4.7501, "step": 88100 }, { "epoch": 0.12380281615928143, "grad_norm": 0.7500482201576233, "learning_rate": 0.00029948462854236213, "loss": 4.6788, "step": 88110 }, { "epoch": 0.12381686709744501, "grad_norm": 0.7136823534965515, "learning_rate": 0.00029948401902133917, "loss": 4.6901, "step": 88120 }, { "epoch": 0.12383091803560858, "grad_norm": 0.7292098999023438, "learning_rate": 0.0002994834091407151, "loss": 4.7064, "step": 88130 }, { "epoch": 0.12384496897377217, "grad_norm": 0.698124349117279, "learning_rate": 0.0002994827989004914, "loss": 4.8037, "step": 88140 }, { "epoch": 0.12385901991193575, "grad_norm": 0.7261769771575928, "learning_rate": 0.00029948218830066955, "loss": 4.5345, "step": 88150 }, { "epoch": 0.12387307085009933, "grad_norm": 0.706026554107666, "learning_rate": 0.000299481577341251, "loss": 4.6582, "step": 88160 }, { "epoch": 0.1238871217882629, "grad_norm": 0.7198874354362488, "learning_rate": 0.0002994809660222373, "loss": 4.7463, "step": 88170 }, { "epoch": 0.12390117272642648, "grad_norm": 0.7321640849113464, "learning_rate": 0.0002994803543436299, "loss": 4.6111, "step": 88180 }, { "epoch": 0.12391522366459005, "grad_norm": 0.7120465040206909, "learning_rate": 0.0002994797423054302, "loss": 4.7361, "step": 88190 }, { "epoch": 0.12392927460275363, "grad_norm": 0.718183159828186, "learning_rate": 0.0002994791299076397, "loss": 4.6667, "step": 88200 }, { "epoch": 0.1239433255409172, "grad_norm": 0.7051864266395569, "learning_rate": 0.0002994785171502599, "loss": 4.7235, "step": 88210 }, { "epoch": 0.12395737647908078, "grad_norm": 0.71971595287323, "learning_rate": 0.0002994779040332922, "loss": 4.7439, "step": 88220 }, { "epoch": 0.12397142741724436, "grad_norm": 0.6985116004943848, "learning_rate": 0.0002994772905567382, "loss": 4.6536, "step": 88230 }, { "epoch": 0.12398547835540795, "grad_norm": 0.7011168003082275, "learning_rate": 0.0002994766767205993, "loss": 4.7449, "step": 88240 }, { "epoch": 0.12399952929357153, "grad_norm": 0.7223268747329712, "learning_rate": 0.00029947606252487695, "loss": 4.7109, "step": 88250 }, { "epoch": 0.1240135802317351, "grad_norm": 0.7336310744285583, "learning_rate": 0.0002994754479695727, "loss": 4.5966, "step": 88260 }, { "epoch": 0.12402763116989868, "grad_norm": 0.7206735014915466, "learning_rate": 0.000299474833054688, "loss": 4.5686, "step": 88270 }, { "epoch": 0.12404168210806225, "grad_norm": 0.7099548578262329, "learning_rate": 0.0002994742177802243, "loss": 4.7035, "step": 88280 }, { "epoch": 0.12405573304622583, "grad_norm": 0.7359626293182373, "learning_rate": 0.0002994736021461831, "loss": 4.6886, "step": 88290 }, { "epoch": 0.1240697839843894, "grad_norm": 0.7080744504928589, "learning_rate": 0.00029947298615256587, "loss": 4.7505, "step": 88300 }, { "epoch": 0.12408383492255298, "grad_norm": 0.7350786924362183, "learning_rate": 0.00029947236979937413, "loss": 4.5652, "step": 88310 }, { "epoch": 0.12409788586071656, "grad_norm": 1.0457093715667725, "learning_rate": 0.0002994717530866093, "loss": 4.7684, "step": 88320 }, { "epoch": 0.12411193679888013, "grad_norm": 0.7474514245986938, "learning_rate": 0.00029947113601427286, "loss": 4.7522, "step": 88330 }, { "epoch": 0.12412598773704371, "grad_norm": 0.7411333322525024, "learning_rate": 0.0002994705185823664, "loss": 4.7131, "step": 88340 }, { "epoch": 0.1241400386752073, "grad_norm": 0.7307577133178711, "learning_rate": 0.0002994699007908914, "loss": 4.5768, "step": 88350 }, { "epoch": 0.12415408961337088, "grad_norm": 0.7115901708602905, "learning_rate": 0.00029946928263984915, "loss": 4.6929, "step": 88360 }, { "epoch": 0.12416814055153445, "grad_norm": 0.7410994172096252, "learning_rate": 0.00029946866412924135, "loss": 4.7275, "step": 88370 }, { "epoch": 0.12418219148969803, "grad_norm": 0.7032224535942078, "learning_rate": 0.0002994680452590694, "loss": 4.7629, "step": 88380 }, { "epoch": 0.1241962424278616, "grad_norm": 0.6944802403450012, "learning_rate": 0.00029946742602933484, "loss": 4.7391, "step": 88390 }, { "epoch": 0.12421029336602518, "grad_norm": 0.7015830874443054, "learning_rate": 0.000299466806440039, "loss": 4.5912, "step": 88400 }, { "epoch": 0.12422434430418876, "grad_norm": 0.7076965570449829, "learning_rate": 0.0002994661864911836, "loss": 4.7955, "step": 88410 }, { "epoch": 0.12423839524235233, "grad_norm": 0.7263289093971252, "learning_rate": 0.00029946556618277, "loss": 4.7882, "step": 88420 }, { "epoch": 0.12425244618051591, "grad_norm": 0.7141406536102295, "learning_rate": 0.0002994649455147997, "loss": 4.7778, "step": 88430 }, { "epoch": 0.12426649711867949, "grad_norm": 0.7139621376991272, "learning_rate": 0.00029946432448727417, "loss": 4.7299, "step": 88440 }, { "epoch": 0.12428054805684308, "grad_norm": 0.7191937565803528, "learning_rate": 0.000299463703100195, "loss": 4.6505, "step": 88450 }, { "epoch": 0.12429459899500665, "grad_norm": 0.7122807502746582, "learning_rate": 0.00029946308135356356, "loss": 4.7084, "step": 88460 }, { "epoch": 0.12430864993317023, "grad_norm": 0.707273006439209, "learning_rate": 0.00029946245924738146, "loss": 4.6373, "step": 88470 }, { "epoch": 0.1243227008713338, "grad_norm": 0.6925734281539917, "learning_rate": 0.00029946183678165015, "loss": 4.7202, "step": 88480 }, { "epoch": 0.12433675180949738, "grad_norm": 0.6993895769119263, "learning_rate": 0.0002994612139563711, "loss": 4.7537, "step": 88490 }, { "epoch": 0.12435080274766096, "grad_norm": 0.7005565166473389, "learning_rate": 0.00029946059077154587, "loss": 4.7321, "step": 88500 }, { "epoch": 0.12436485368582453, "grad_norm": 0.7049751281738281, "learning_rate": 0.00029945996722717587, "loss": 4.7177, "step": 88510 }, { "epoch": 0.12437890462398811, "grad_norm": 0.7042818069458008, "learning_rate": 0.00029945934332326264, "loss": 4.5704, "step": 88520 }, { "epoch": 0.12439295556215169, "grad_norm": 0.6895237565040588, "learning_rate": 0.00029945871905980774, "loss": 4.6741, "step": 88530 }, { "epoch": 0.12440700650031526, "grad_norm": 0.7173465490341187, "learning_rate": 0.0002994580944368126, "loss": 4.7058, "step": 88540 }, { "epoch": 0.12442105743847885, "grad_norm": 0.7089383006095886, "learning_rate": 0.00029945746945427877, "loss": 4.7225, "step": 88550 }, { "epoch": 0.12443510837664243, "grad_norm": 0.7033636569976807, "learning_rate": 0.00029945684411220773, "loss": 4.7401, "step": 88560 }, { "epoch": 0.124449159314806, "grad_norm": 0.7240288257598877, "learning_rate": 0.00029945621841060094, "loss": 4.7217, "step": 88570 }, { "epoch": 0.12446321025296958, "grad_norm": 0.7067498564720154, "learning_rate": 0.00029945559234945997, "loss": 4.6413, "step": 88580 }, { "epoch": 0.12447726119113316, "grad_norm": 0.7195616364479065, "learning_rate": 0.00029945496592878633, "loss": 4.7053, "step": 88590 }, { "epoch": 0.12449131212929673, "grad_norm": 0.7112637162208557, "learning_rate": 0.0002994543391485815, "loss": 4.6635, "step": 88600 }, { "epoch": 0.12450536306746031, "grad_norm": 0.6900596618652344, "learning_rate": 0.000299453712008847, "loss": 4.7249, "step": 88610 }, { "epoch": 0.12451941400562389, "grad_norm": 0.6978746652603149, "learning_rate": 0.00029945308450958426, "loss": 4.6273, "step": 88620 }, { "epoch": 0.12453346494378746, "grad_norm": 0.707542359828949, "learning_rate": 0.00029945245665079494, "loss": 4.648, "step": 88630 }, { "epoch": 0.12454751588195104, "grad_norm": 0.7132956981658936, "learning_rate": 0.00029945182843248044, "loss": 4.6548, "step": 88640 }, { "epoch": 0.12456156682011461, "grad_norm": 0.7130081057548523, "learning_rate": 0.0002994511998546423, "loss": 4.666, "step": 88650 }, { "epoch": 0.1245756177582782, "grad_norm": 0.7389416098594666, "learning_rate": 0.000299450570917282, "loss": 4.6872, "step": 88660 }, { "epoch": 0.12458966869644178, "grad_norm": 0.7738654613494873, "learning_rate": 0.00029944994162040114, "loss": 4.6537, "step": 88670 }, { "epoch": 0.12460371963460536, "grad_norm": 0.7259090542793274, "learning_rate": 0.00029944931196400115, "loss": 4.6315, "step": 88680 }, { "epoch": 0.12461777057276893, "grad_norm": 0.7047815322875977, "learning_rate": 0.00029944868194808357, "loss": 4.7483, "step": 88690 }, { "epoch": 0.12463182151093251, "grad_norm": 0.722084641456604, "learning_rate": 0.0002994480515726499, "loss": 4.7039, "step": 88700 }, { "epoch": 0.12464587244909608, "grad_norm": 0.717406153678894, "learning_rate": 0.00029944742083770176, "loss": 4.622, "step": 88710 }, { "epoch": 0.12465992338725966, "grad_norm": 0.7036492228507996, "learning_rate": 0.00029944678974324045, "loss": 4.7124, "step": 88720 }, { "epoch": 0.12467397432542324, "grad_norm": 0.7213016748428345, "learning_rate": 0.0002994461582892678, "loss": 4.6973, "step": 88730 }, { "epoch": 0.12468802526358681, "grad_norm": 0.775976300239563, "learning_rate": 0.000299445526475785, "loss": 4.7035, "step": 88740 }, { "epoch": 0.12470207620175039, "grad_norm": 0.7207806706428528, "learning_rate": 0.00029944489430279377, "loss": 4.7464, "step": 88750 }, { "epoch": 0.12471612713991398, "grad_norm": 0.6966215968132019, "learning_rate": 0.00029944426177029555, "loss": 4.7001, "step": 88760 }, { "epoch": 0.12473017807807756, "grad_norm": 0.7105135321617126, "learning_rate": 0.000299443628878292, "loss": 4.778, "step": 88770 }, { "epoch": 0.12474422901624113, "grad_norm": 0.7118408679962158, "learning_rate": 0.0002994429956267844, "loss": 4.7942, "step": 88780 }, { "epoch": 0.12475827995440471, "grad_norm": 0.7115956544876099, "learning_rate": 0.0002994423620157745, "loss": 4.7289, "step": 88790 }, { "epoch": 0.12477233089256828, "grad_norm": 0.8141958117485046, "learning_rate": 0.0002994417280452637, "loss": 4.7197, "step": 88800 }, { "epoch": 0.12478638183073186, "grad_norm": 0.6964655518531799, "learning_rate": 0.00029944109371525356, "loss": 4.6832, "step": 88810 }, { "epoch": 0.12480043276889544, "grad_norm": 0.6905677318572998, "learning_rate": 0.0002994404590257456, "loss": 4.6532, "step": 88820 }, { "epoch": 0.12481448370705901, "grad_norm": 0.7176385521888733, "learning_rate": 0.0002994398239767413, "loss": 4.6367, "step": 88830 }, { "epoch": 0.12482853464522259, "grad_norm": 0.723534107208252, "learning_rate": 0.0002994391885682423, "loss": 4.6079, "step": 88840 }, { "epoch": 0.12484258558338616, "grad_norm": 0.7028889060020447, "learning_rate": 0.0002994385528002501, "loss": 4.7336, "step": 88850 }, { "epoch": 0.12485663652154975, "grad_norm": 0.7260593771934509, "learning_rate": 0.0002994379166727661, "loss": 4.5638, "step": 88860 }, { "epoch": 0.12487068745971333, "grad_norm": 0.7041234374046326, "learning_rate": 0.00029943728018579205, "loss": 4.7209, "step": 88870 }, { "epoch": 0.12488473839787691, "grad_norm": 0.6976323127746582, "learning_rate": 0.0002994366433393293, "loss": 4.6149, "step": 88880 }, { "epoch": 0.12489878933604048, "grad_norm": 0.7066828012466431, "learning_rate": 0.00029943600613337943, "loss": 4.6027, "step": 88890 }, { "epoch": 0.12491284027420406, "grad_norm": 0.7120361328125, "learning_rate": 0.000299435368567944, "loss": 4.5695, "step": 88900 }, { "epoch": 0.12492689121236764, "grad_norm": 0.7178036570549011, "learning_rate": 0.0002994347306430245, "loss": 4.7551, "step": 88910 }, { "epoch": 0.12494094215053121, "grad_norm": 1.0115084648132324, "learning_rate": 0.0002994340923586225, "loss": 4.5328, "step": 88920 }, { "epoch": 0.12495499308869479, "grad_norm": 0.7211881279945374, "learning_rate": 0.0002994334537147395, "loss": 4.753, "step": 88930 }, { "epoch": 0.12496904402685836, "grad_norm": 0.72914057970047, "learning_rate": 0.00029943281471137706, "loss": 4.6328, "step": 88940 }, { "epoch": 0.12498309496502194, "grad_norm": 0.7203587889671326, "learning_rate": 0.00029943217534853677, "loss": 4.7458, "step": 88950 }, { "epoch": 0.12499714590318552, "grad_norm": 0.7104395031929016, "learning_rate": 0.0002994315356262201, "loss": 4.6597, "step": 88960 }, { "epoch": 0.1250111968413491, "grad_norm": 0.7179931998252869, "learning_rate": 0.0002994308955444286, "loss": 4.6605, "step": 88970 }, { "epoch": 0.12502524777951268, "grad_norm": 0.7127233743667603, "learning_rate": 0.00029943025510316385, "loss": 4.7703, "step": 88980 }, { "epoch": 0.12503929871767624, "grad_norm": 0.7128562331199646, "learning_rate": 0.00029942961430242727, "loss": 4.8032, "step": 88990 }, { "epoch": 0.12505334965583983, "grad_norm": 0.7086876630783081, "learning_rate": 0.0002994289731422206, "loss": 4.6504, "step": 89000 }, { "epoch": 0.1250674005940034, "grad_norm": 0.7091412544250488, "learning_rate": 0.0002994283316225452, "loss": 4.6319, "step": 89010 }, { "epoch": 0.125081451532167, "grad_norm": 0.7031800746917725, "learning_rate": 0.0002994276897434027, "loss": 4.6304, "step": 89020 }, { "epoch": 0.12509550247033058, "grad_norm": 0.7421759963035583, "learning_rate": 0.00029942704750479464, "loss": 4.7184, "step": 89030 }, { "epoch": 0.12510955340849414, "grad_norm": 0.7047469019889832, "learning_rate": 0.00029942640490672256, "loss": 4.6966, "step": 89040 }, { "epoch": 0.12512360434665773, "grad_norm": 0.717984139919281, "learning_rate": 0.00029942576194918796, "loss": 4.6959, "step": 89050 }, { "epoch": 0.1251376552848213, "grad_norm": 0.674823522567749, "learning_rate": 0.0002994251186321925, "loss": 4.7184, "step": 89060 }, { "epoch": 0.12515170622298488, "grad_norm": 0.725084662437439, "learning_rate": 0.0002994244749557376, "loss": 4.5896, "step": 89070 }, { "epoch": 0.12516575716114844, "grad_norm": 0.7343477010726929, "learning_rate": 0.00029942383091982486, "loss": 4.6372, "step": 89080 }, { "epoch": 0.12517980809931203, "grad_norm": 0.6985862255096436, "learning_rate": 0.0002994231865244558, "loss": 4.7568, "step": 89090 }, { "epoch": 0.1251938590374756, "grad_norm": 0.7344580292701721, "learning_rate": 0.00029942254176963207, "loss": 4.774, "step": 89100 }, { "epoch": 0.1252079099756392, "grad_norm": 0.714970588684082, "learning_rate": 0.0002994218966553551, "loss": 4.7269, "step": 89110 }, { "epoch": 0.12522196091380278, "grad_norm": 0.7142235636711121, "learning_rate": 0.00029942125118162657, "loss": 4.7118, "step": 89120 }, { "epoch": 0.12523601185196634, "grad_norm": 0.7200468182563782, "learning_rate": 0.00029942060534844786, "loss": 4.5759, "step": 89130 }, { "epoch": 0.12525006279012993, "grad_norm": 0.7273971438407898, "learning_rate": 0.0002994199591558207, "loss": 4.596, "step": 89140 }, { "epoch": 0.1252641137282935, "grad_norm": 0.708501398563385, "learning_rate": 0.00029941931260374653, "loss": 4.7371, "step": 89150 }, { "epoch": 0.12527816466645708, "grad_norm": 0.6819218397140503, "learning_rate": 0.00029941866569222695, "loss": 4.6924, "step": 89160 }, { "epoch": 0.12529221560462064, "grad_norm": 0.7276637554168701, "learning_rate": 0.0002994180184212635, "loss": 4.5807, "step": 89170 }, { "epoch": 0.12530626654278423, "grad_norm": 0.8149365186691284, "learning_rate": 0.00029941737079085777, "loss": 4.7547, "step": 89180 }, { "epoch": 0.1253203174809478, "grad_norm": 0.7019021511077881, "learning_rate": 0.00029941672280101126, "loss": 4.7022, "step": 89190 }, { "epoch": 0.12533436841911139, "grad_norm": 0.7072144150733948, "learning_rate": 0.00029941607445172554, "loss": 4.6415, "step": 89200 }, { "epoch": 0.12534841935727495, "grad_norm": 0.6896094083786011, "learning_rate": 0.00029941542574300225, "loss": 4.5062, "step": 89210 }, { "epoch": 0.12536247029543854, "grad_norm": 0.7020169496536255, "learning_rate": 0.00029941477667484284, "loss": 4.8094, "step": 89220 }, { "epoch": 0.12537652123360213, "grad_norm": 0.6777108311653137, "learning_rate": 0.000299414127247249, "loss": 4.6478, "step": 89230 }, { "epoch": 0.1253905721717657, "grad_norm": 0.7213195562362671, "learning_rate": 0.00029941347746022213, "loss": 4.7103, "step": 89240 }, { "epoch": 0.12540462310992928, "grad_norm": 0.6935251951217651, "learning_rate": 0.00029941282731376387, "loss": 4.3878, "step": 89250 }, { "epoch": 0.12541867404809284, "grad_norm": 0.8068158626556396, "learning_rate": 0.00029941217680787583, "loss": 4.6871, "step": 89260 }, { "epoch": 0.12543272498625643, "grad_norm": 0.7309907078742981, "learning_rate": 0.00029941152594255955, "loss": 4.7632, "step": 89270 }, { "epoch": 0.12544677592442, "grad_norm": 0.7052793502807617, "learning_rate": 0.0002994108747178166, "loss": 4.8095, "step": 89280 }, { "epoch": 0.12546082686258359, "grad_norm": 0.7147111892700195, "learning_rate": 0.00029941022313364845, "loss": 4.6167, "step": 89290 }, { "epoch": 0.12547487780074715, "grad_norm": 0.7007347345352173, "learning_rate": 0.00029940957119005683, "loss": 4.7428, "step": 89300 }, { "epoch": 0.12548892873891074, "grad_norm": 0.6927651166915894, "learning_rate": 0.0002994089188870432, "loss": 4.7348, "step": 89310 }, { "epoch": 0.1255029796770743, "grad_norm": 0.7285720109939575, "learning_rate": 0.0002994082662246091, "loss": 4.6294, "step": 89320 }, { "epoch": 0.1255170306152379, "grad_norm": 0.7167104482650757, "learning_rate": 0.0002994076132027563, "loss": 4.6708, "step": 89330 }, { "epoch": 0.12553108155340148, "grad_norm": 0.7202353477478027, "learning_rate": 0.00029940695982148607, "loss": 4.7455, "step": 89340 }, { "epoch": 0.12554513249156504, "grad_norm": 0.7299073934555054, "learning_rate": 0.0002994063060808002, "loss": 4.6263, "step": 89350 }, { "epoch": 0.12555918342972863, "grad_norm": 0.7049494981765747, "learning_rate": 0.00029940565198070015, "loss": 4.6685, "step": 89360 }, { "epoch": 0.1255732343678922, "grad_norm": 0.7378440499305725, "learning_rate": 0.00029940499752118764, "loss": 4.628, "step": 89370 }, { "epoch": 0.12558728530605578, "grad_norm": 0.705937922000885, "learning_rate": 0.00029940434270226405, "loss": 4.6245, "step": 89380 }, { "epoch": 0.12560133624421935, "grad_norm": 0.7085596323013306, "learning_rate": 0.00029940368752393113, "loss": 4.607, "step": 89390 }, { "epoch": 0.12561538718238294, "grad_norm": 0.7117762565612793, "learning_rate": 0.00029940303198619034, "loss": 4.5603, "step": 89400 }, { "epoch": 0.1256294381205465, "grad_norm": 0.7034866809844971, "learning_rate": 0.0002994023760890433, "loss": 4.6884, "step": 89410 }, { "epoch": 0.1256434890587101, "grad_norm": 0.6858923435211182, "learning_rate": 0.00029940171983249155, "loss": 4.6888, "step": 89420 }, { "epoch": 0.12565753999687368, "grad_norm": 0.7072145938873291, "learning_rate": 0.00029940106321653674, "loss": 4.6249, "step": 89430 }, { "epoch": 0.12567159093503724, "grad_norm": 0.730768620967865, "learning_rate": 0.0002994004062411804, "loss": 4.5767, "step": 89440 }, { "epoch": 0.12568564187320083, "grad_norm": 0.7527211308479309, "learning_rate": 0.0002993997489064242, "loss": 4.6003, "step": 89450 }, { "epoch": 0.1256996928113644, "grad_norm": 0.7296200394630432, "learning_rate": 0.0002993990912122695, "loss": 4.7804, "step": 89460 }, { "epoch": 0.12571374374952798, "grad_norm": 0.7212024927139282, "learning_rate": 0.00029939843315871813, "loss": 4.6649, "step": 89470 }, { "epoch": 0.12572779468769155, "grad_norm": 0.7408531308174133, "learning_rate": 0.00029939777474577157, "loss": 4.7058, "step": 89480 }, { "epoch": 0.12574184562585514, "grad_norm": 0.7241307497024536, "learning_rate": 0.00029939711597343135, "loss": 4.7249, "step": 89490 }, { "epoch": 0.1257558965640187, "grad_norm": 0.7100182175636292, "learning_rate": 0.00029939645684169913, "loss": 4.5997, "step": 89500 }, { "epoch": 0.1257699475021823, "grad_norm": 0.7237468957901001, "learning_rate": 0.00029939579735057645, "loss": 4.698, "step": 89510 }, { "epoch": 0.12578399844034585, "grad_norm": 0.6810537576675415, "learning_rate": 0.0002993951375000649, "loss": 4.6484, "step": 89520 }, { "epoch": 0.12579804937850944, "grad_norm": 0.7313446402549744, "learning_rate": 0.0002993944772901662, "loss": 4.5818, "step": 89530 }, { "epoch": 0.12581210031667303, "grad_norm": 0.7293576598167419, "learning_rate": 0.00029939381672088175, "loss": 4.6622, "step": 89540 }, { "epoch": 0.1258261512548366, "grad_norm": 0.7267341613769531, "learning_rate": 0.0002993931557922132, "loss": 4.5724, "step": 89550 }, { "epoch": 0.12584020219300018, "grad_norm": 0.722690999507904, "learning_rate": 0.00029939249450416217, "loss": 4.7577, "step": 89560 }, { "epoch": 0.12585425313116375, "grad_norm": 0.7288978695869446, "learning_rate": 0.00029939183285673023, "loss": 4.6926, "step": 89570 }, { "epoch": 0.12586830406932734, "grad_norm": 0.7535728216171265, "learning_rate": 0.000299391170849919, "loss": 4.6465, "step": 89580 }, { "epoch": 0.1258823550074909, "grad_norm": 0.7304617166519165, "learning_rate": 0.00029939050848373, "loss": 4.6726, "step": 89590 }, { "epoch": 0.1258964059456545, "grad_norm": 0.7804543972015381, "learning_rate": 0.00029938984575816495, "loss": 4.7101, "step": 89600 }, { "epoch": 0.12591045688381805, "grad_norm": 0.7636072635650635, "learning_rate": 0.0002993891826732253, "loss": 4.7559, "step": 89610 }, { "epoch": 0.12592450782198164, "grad_norm": 0.8159419298171997, "learning_rate": 0.0002993885192289128, "loss": 4.5094, "step": 89620 }, { "epoch": 0.1259385587601452, "grad_norm": 0.7560939192771912, "learning_rate": 0.00029938785542522883, "loss": 4.7041, "step": 89630 }, { "epoch": 0.1259526096983088, "grad_norm": 0.7355748414993286, "learning_rate": 0.0002993871912621752, "loss": 4.7708, "step": 89640 }, { "epoch": 0.12596666063647238, "grad_norm": 0.7174279689788818, "learning_rate": 0.0002993865267397534, "loss": 4.6629, "step": 89650 }, { "epoch": 0.12598071157463595, "grad_norm": 0.7072028517723083, "learning_rate": 0.00029938586185796504, "loss": 4.708, "step": 89660 }, { "epoch": 0.12599476251279954, "grad_norm": 0.7467581033706665, "learning_rate": 0.00029938519661681175, "loss": 4.6189, "step": 89670 }, { "epoch": 0.1260088134509631, "grad_norm": 0.724288821220398, "learning_rate": 0.0002993845310162951, "loss": 4.6856, "step": 89680 }, { "epoch": 0.1260228643891267, "grad_norm": 0.7614923715591431, "learning_rate": 0.0002993838650564167, "loss": 4.6472, "step": 89690 }, { "epoch": 0.12603691532729025, "grad_norm": 0.7412780523300171, "learning_rate": 0.00029938319873717815, "loss": 4.6539, "step": 89700 }, { "epoch": 0.12605096626545384, "grad_norm": 0.7168459892272949, "learning_rate": 0.00029938253205858106, "loss": 4.775, "step": 89710 }, { "epoch": 0.1260650172036174, "grad_norm": 0.7272344827651978, "learning_rate": 0.00029938186502062705, "loss": 4.698, "step": 89720 }, { "epoch": 0.126079068141781, "grad_norm": 0.6984730958938599, "learning_rate": 0.0002993811976233177, "loss": 4.5482, "step": 89730 }, { "epoch": 0.12609311907994458, "grad_norm": 0.698017418384552, "learning_rate": 0.0002993805298666546, "loss": 4.6122, "step": 89740 }, { "epoch": 0.12610717001810814, "grad_norm": 0.7286360263824463, "learning_rate": 0.0002993798617506394, "loss": 4.7628, "step": 89750 }, { "epoch": 0.12612122095627173, "grad_norm": 0.6975075006484985, "learning_rate": 0.00029937919327527367, "loss": 4.7699, "step": 89760 }, { "epoch": 0.1261352718944353, "grad_norm": 0.7229320406913757, "learning_rate": 0.000299378524440559, "loss": 4.7071, "step": 89770 }, { "epoch": 0.1261493228325989, "grad_norm": 0.7221950888633728, "learning_rate": 0.00029937785524649713, "loss": 4.6989, "step": 89780 }, { "epoch": 0.12616337377076245, "grad_norm": 0.6790987849235535, "learning_rate": 0.00029937718569308946, "loss": 4.7405, "step": 89790 }, { "epoch": 0.12617742470892604, "grad_norm": 0.7140920162200928, "learning_rate": 0.0002993765157803378, "loss": 4.7422, "step": 89800 }, { "epoch": 0.1261914756470896, "grad_norm": 0.7047672867774963, "learning_rate": 0.0002993758455082436, "loss": 4.6582, "step": 89810 }, { "epoch": 0.1262055265852532, "grad_norm": 0.7307078242301941, "learning_rate": 0.0002993751748768086, "loss": 4.6722, "step": 89820 }, { "epoch": 0.12621957752341675, "grad_norm": 0.7230780124664307, "learning_rate": 0.0002993745038860344, "loss": 4.7186, "step": 89830 }, { "epoch": 0.12623362846158034, "grad_norm": 0.7278953790664673, "learning_rate": 0.00029937383253592244, "loss": 4.7258, "step": 89840 }, { "epoch": 0.12624767939974393, "grad_norm": 0.7198377251625061, "learning_rate": 0.00029937316082647456, "loss": 4.7229, "step": 89850 }, { "epoch": 0.1262617303379075, "grad_norm": 0.6939035058021545, "learning_rate": 0.00029937248875769226, "loss": 4.6455, "step": 89860 }, { "epoch": 0.1262757812760711, "grad_norm": 0.7266935706138611, "learning_rate": 0.00029937181632957717, "loss": 4.5993, "step": 89870 }, { "epoch": 0.12628983221423465, "grad_norm": 0.7101845145225525, "learning_rate": 0.00029937114354213097, "loss": 4.6855, "step": 89880 }, { "epoch": 0.12630388315239824, "grad_norm": 0.7384141087532043, "learning_rate": 0.0002993704703953552, "loss": 4.7441, "step": 89890 }, { "epoch": 0.1263179340905618, "grad_norm": 0.6860256791114807, "learning_rate": 0.0002993697968892515, "loss": 4.6038, "step": 89900 }, { "epoch": 0.1263319850287254, "grad_norm": 0.7191735506057739, "learning_rate": 0.0002993691230238215, "loss": 4.6831, "step": 89910 }, { "epoch": 0.12634603596688895, "grad_norm": 0.7158761620521545, "learning_rate": 0.0002993684487990668, "loss": 4.5944, "step": 89920 }, { "epoch": 0.12636008690505254, "grad_norm": 0.7162553071975708, "learning_rate": 0.0002993677742149891, "loss": 4.6579, "step": 89930 }, { "epoch": 0.1263741378432161, "grad_norm": 0.7150312662124634, "learning_rate": 0.0002993670992715899, "loss": 4.7404, "step": 89940 }, { "epoch": 0.1263881887813797, "grad_norm": 0.7158754467964172, "learning_rate": 0.0002993664239688709, "loss": 4.6764, "step": 89950 }, { "epoch": 0.12640223971954329, "grad_norm": 0.7216680645942688, "learning_rate": 0.0002993657483068337, "loss": 4.6328, "step": 89960 }, { "epoch": 0.12641629065770685, "grad_norm": 0.7339308261871338, "learning_rate": 0.00029936507228547996, "loss": 4.6651, "step": 89970 }, { "epoch": 0.12643034159587044, "grad_norm": 0.700171709060669, "learning_rate": 0.00029936439590481126, "loss": 4.6854, "step": 89980 }, { "epoch": 0.126444392534034, "grad_norm": 0.7425507307052612, "learning_rate": 0.0002993637191648293, "loss": 4.5799, "step": 89990 }, { "epoch": 0.1264584434721976, "grad_norm": 0.7096763849258423, "learning_rate": 0.0002993630420655356, "loss": 4.5681, "step": 90000 } ], "logging_steps": 10, "max_steps": 711696, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.7405109256192e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }