| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9040063466878223, | |
| "eval_steps": 300, | |
| "global_step": 1200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01586671955573185, | |
| "grad_norm": 0.6832552552223206, | |
| "learning_rate": 2.6455026455026455e-06, | |
| "loss": 1.3171, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0317334391114637, | |
| "grad_norm": 0.9994391798973083, | |
| "learning_rate": 5.291005291005291e-06, | |
| "loss": 1.1922, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04760015866719556, | |
| "grad_norm": 0.9905434846878052, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 1.2352, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0634668782229274, | |
| "grad_norm": 1.0371067523956299, | |
| "learning_rate": 1.0582010582010582e-05, | |
| "loss": 1.1191, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07933359777865927, | |
| "grad_norm": 0.9686773419380188, | |
| "learning_rate": 1.3227513227513228e-05, | |
| "loss": 1.0697, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09520031733439112, | |
| "grad_norm": 0.5632955431938171, | |
| "learning_rate": 1.5873015873015872e-05, | |
| "loss": 0.741, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11106703689012297, | |
| "grad_norm": 0.4180806875228882, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.6942, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1269337564458548, | |
| "grad_norm": 0.3943168520927429, | |
| "learning_rate": 2.1164021164021164e-05, | |
| "loss": 0.6314, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.14280047600158668, | |
| "grad_norm": 0.3465676009654999, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.6257, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.15866719555731854, | |
| "grad_norm": 0.3200153708457947, | |
| "learning_rate": 2.6455026455026456e-05, | |
| "loss": 0.5426, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1745339151130504, | |
| "grad_norm": 0.28294724225997925, | |
| "learning_rate": 2.91005291005291e-05, | |
| "loss": 0.5209, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.19040063466878224, | |
| "grad_norm": 0.3477802574634552, | |
| "learning_rate": 3.1746031746031745e-05, | |
| "loss": 0.4633, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2062673542245141, | |
| "grad_norm": 0.36890918016433716, | |
| "learning_rate": 3.439153439153439e-05, | |
| "loss": 0.3798, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.22213407378024594, | |
| "grad_norm": 0.38711702823638916, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.4397, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2380007933359778, | |
| "grad_norm": 0.4116644859313965, | |
| "learning_rate": 3.968253968253968e-05, | |
| "loss": 0.4021, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2538675128917096, | |
| "grad_norm": 0.402497798204422, | |
| "learning_rate": 4.232804232804233e-05, | |
| "loss": 0.3796, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2697342324474415, | |
| "grad_norm": 0.6916673183441162, | |
| "learning_rate": 4.4973544973544974e-05, | |
| "loss": 0.4462, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.28560095200317337, | |
| "grad_norm": 0.49015486240386963, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.3902, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3014676715589052, | |
| "grad_norm": 0.4605613648891449, | |
| "learning_rate": 4.999995736158938e-05, | |
| "loss": 0.3751, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.31733439111463707, | |
| "grad_norm": 0.3663236200809479, | |
| "learning_rate": 4.999484092829756e-05, | |
| "loss": 0.3399, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3332011106703689, | |
| "grad_norm": 0.6052913069725037, | |
| "learning_rate": 4.998119881260576e-05, | |
| "loss": 0.3738, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3490678302261008, | |
| "grad_norm": 0.5979182720184326, | |
| "learning_rate": 4.995903566780805e-05, | |
| "loss": 0.3732, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3649345497818326, | |
| "grad_norm": 0.7640148401260376, | |
| "learning_rate": 4.992835905370186e-05, | |
| "loss": 0.4339, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3808012693375645, | |
| "grad_norm": 0.5569157600402832, | |
| "learning_rate": 4.988917943400924e-05, | |
| "loss": 0.3893, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3966679888932963, | |
| "grad_norm": 0.6464380621910095, | |
| "learning_rate": 4.9841510172807834e-05, | |
| "loss": 0.3642, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4125347084490282, | |
| "grad_norm": 0.5494194030761719, | |
| "learning_rate": 4.97853675299723e-05, | |
| "loss": 0.3481, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.42840142800476, | |
| "grad_norm": 0.5208620429039001, | |
| "learning_rate": 4.972077065562821e-05, | |
| "loss": 0.3968, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4442681475604919, | |
| "grad_norm": 0.6078989505767822, | |
| "learning_rate": 4.964774158361991e-05, | |
| "loss": 0.337, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4601348671162237, | |
| "grad_norm": 0.5791096687316895, | |
| "learning_rate": 4.956630522399487e-05, | |
| "loss": 0.3495, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4760015866719556, | |
| "grad_norm": 0.5267443060874939, | |
| "learning_rate": 4.947648935450689e-05, | |
| "loss": 0.3191, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4760015866719556, | |
| "eval_loss": 0.38255831599235535, | |
| "eval_runtime": 93.6436, | |
| "eval_samples_per_second": 5.991, | |
| "eval_steps_per_second": 5.991, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4918683062276874, | |
| "grad_norm": 0.42828086018562317, | |
| "learning_rate": 4.937832461114123e-05, | |
| "loss": 0.3164, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5077350257834192, | |
| "grad_norm": 0.5684987306594849, | |
| "learning_rate": 4.927184447766467e-05, | |
| "loss": 0.3047, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5236017453391512, | |
| "grad_norm": 0.6015241742134094, | |
| "learning_rate": 4.915708527420435e-05, | |
| "loss": 0.2568, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.539468464894883, | |
| "grad_norm": 0.7351698279380798, | |
| "learning_rate": 4.903408614485899e-05, | |
| "loss": 0.3554, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5553351844506148, | |
| "grad_norm": 0.5476783514022827, | |
| "learning_rate": 4.890288904434699e-05, | |
| "loss": 0.2934, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5712019040063467, | |
| "grad_norm": 0.7653456330299377, | |
| "learning_rate": 4.8763538723695726e-05, | |
| "loss": 0.3548, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5870686235620786, | |
| "grad_norm": 0.5909974575042725, | |
| "learning_rate": 4.8616082714977097e-05, | |
| "loss": 0.2958, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6029353431178104, | |
| "grad_norm": 0.8618314266204834, | |
| "learning_rate": 4.8460571315094456e-05, | |
| "loss": 0.3323, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6188020626735422, | |
| "grad_norm": 0.43425798416137695, | |
| "learning_rate": 4.829705756862642e-05, | |
| "loss": 0.3256, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6346687822292741, | |
| "grad_norm": 0.6728402972221375, | |
| "learning_rate": 4.812559724973355e-05, | |
| "loss": 0.3289, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.650535501785006, | |
| "grad_norm": 0.6804693341255188, | |
| "learning_rate": 4.79462488431338e-05, | |
| "loss": 0.3494, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6664022213407378, | |
| "grad_norm": 0.7322263121604919, | |
| "learning_rate": 4.775907352415367e-05, | |
| "loss": 0.286, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6822689408964696, | |
| "grad_norm": 0.5389537215232849, | |
| "learning_rate": 4.75641351378613e-05, | |
| "loss": 0.2597, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6981356604522015, | |
| "grad_norm": 0.7679384350776672, | |
| "learning_rate": 4.7361500177289156e-05, | |
| "loss": 0.3265, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7140023800079334, | |
| "grad_norm": 0.9662689566612244, | |
| "learning_rate": 4.715123776075336e-05, | |
| "loss": 0.3493, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7298690995636652, | |
| "grad_norm": 0.7264727354049683, | |
| "learning_rate": 4.693341960827764e-05, | |
| "loss": 0.3412, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.745735819119397, | |
| "grad_norm": 0.6046968698501587, | |
| "learning_rate": 4.670812001712973e-05, | |
| "loss": 0.349, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.761602538675129, | |
| "grad_norm": 0.7867446541786194, | |
| "learning_rate": 4.647541583647883e-05, | |
| "loss": 0.3394, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7774692582308608, | |
| "grad_norm": 0.7447336912155151, | |
| "learning_rate": 4.623538644118244e-05, | |
| "loss": 0.3738, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7933359777865926, | |
| "grad_norm": 0.5049989819526672, | |
| "learning_rate": 4.5988113704711846e-05, | |
| "loss": 0.2899, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8092026973423245, | |
| "grad_norm": 0.8148102164268494, | |
| "learning_rate": 4.573368197122524e-05, | |
| "loss": 0.3144, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8250694168980564, | |
| "grad_norm": 0.7628346681594849, | |
| "learning_rate": 4.547217802679814e-05, | |
| "loss": 0.2996, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8409361364537882, | |
| "grad_norm": 0.6373751759529114, | |
| "learning_rate": 4.520369106982084e-05, | |
| "loss": 0.2887, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.85680285600952, | |
| "grad_norm": 0.6852309107780457, | |
| "learning_rate": 4.4928312680573064e-05, | |
| "loss": 0.2862, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8726695755652519, | |
| "grad_norm": 0.7369856238365173, | |
| "learning_rate": 4.464613678998612e-05, | |
| "loss": 0.3386, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8885362951209838, | |
| "grad_norm": 0.6538604497909546, | |
| "learning_rate": 4.435725964760331e-05, | |
| "loss": 0.3225, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9044030146767156, | |
| "grad_norm": 0.8635338544845581, | |
| "learning_rate": 4.406177978874941e-05, | |
| "loss": 0.3328, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9202697342324474, | |
| "grad_norm": 0.9363442063331604, | |
| "learning_rate": 4.3759798000920496e-05, | |
| "loss": 0.3315, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9361364537881793, | |
| "grad_norm": 0.7191005349159241, | |
| "learning_rate": 4.3451417289405586e-05, | |
| "loss": 0.271, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9520031733439112, | |
| "grad_norm": 0.7281237244606018, | |
| "learning_rate": 4.313674284215176e-05, | |
| "loss": 0.2945, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9520031733439112, | |
| "eval_loss": 0.3320656418800354, | |
| "eval_runtime": 93.5162, | |
| "eval_samples_per_second": 5.999, | |
| "eval_steps_per_second": 5.999, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.967869892899643, | |
| "grad_norm": 0.6777219176292419, | |
| "learning_rate": 4.281588199388476e-05, | |
| "loss": 0.2844, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9837366124553748, | |
| "grad_norm": 0.7066994905471802, | |
| "learning_rate": 4.248894418949746e-05, | |
| "loss": 0.3348, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9996033320111067, | |
| "grad_norm": 0.5948446989059448, | |
| "learning_rate": 4.215604094671835e-05, | |
| "loss": 0.2532, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0154700515668384, | |
| "grad_norm": 0.8124284744262695, | |
| "learning_rate": 4.181728581807316e-05, | |
| "loss": 0.3068, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0313367711225705, | |
| "grad_norm": 0.6020251512527466, | |
| "learning_rate": 4.1472794352152366e-05, | |
| "loss": 0.2912, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0472034906783023, | |
| "grad_norm": 0.9106693863868713, | |
| "learning_rate": 4.112268405419782e-05, | |
| "loss": 0.2739, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0630702102340341, | |
| "grad_norm": 0.9437083005905151, | |
| "learning_rate": 4.076707434602194e-05, | |
| "loss": 0.2563, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.078936929789766, | |
| "grad_norm": 0.7824203968048096, | |
| "learning_rate": 4.040608652527328e-05, | |
| "loss": 0.2898, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0948036493454978, | |
| "grad_norm": 0.8340286612510681, | |
| "learning_rate": 4.003984372406212e-05, | |
| "loss": 0.2665, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1106703689012296, | |
| "grad_norm": 0.7771459817886353, | |
| "learning_rate": 3.966847086696045e-05, | |
| "loss": 0.2711, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1265370884569614, | |
| "grad_norm": 0.6131294369697571, | |
| "learning_rate": 3.929209462839041e-05, | |
| "loss": 0.2825, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.1424038080126935, | |
| "grad_norm": 0.8090108633041382, | |
| "learning_rate": 3.891084338941603e-05, | |
| "loss": 0.2725, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1582705275684253, | |
| "grad_norm": 0.6462133526802063, | |
| "learning_rate": 3.852484719395264e-05, | |
| "loss": 0.2406, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.1741372471241571, | |
| "grad_norm": 0.7676876783370972, | |
| "learning_rate": 3.8134237704409295e-05, | |
| "loss": 0.2648, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.190003966679889, | |
| "grad_norm": 0.8399226665496826, | |
| "learning_rate": 3.773914815677897e-05, | |
| "loss": 0.2693, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.2058706862356208, | |
| "grad_norm": 0.7439931631088257, | |
| "learning_rate": 3.733971331519206e-05, | |
| "loss": 0.2602, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.2217374057913526, | |
| "grad_norm": 0.7992270588874817, | |
| "learning_rate": 3.693606942594873e-05, | |
| "loss": 0.2647, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.2376041253470844, | |
| "grad_norm": 0.8335320949554443, | |
| "learning_rate": 3.65283541710455e-05, | |
| "loss": 0.2723, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2534708449028162, | |
| "grad_norm": 0.813829779624939, | |
| "learning_rate": 3.611670662121234e-05, | |
| "loss": 0.2285, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.269337564458548, | |
| "grad_norm": 1.047428846359253, | |
| "learning_rate": 3.570126718847589e-05, | |
| "loss": 0.2788, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.28520428401428, | |
| "grad_norm": 1.1291059255599976, | |
| "learning_rate": 3.5282177578265296e-05, | |
| "loss": 0.3008, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.301071003570012, | |
| "grad_norm": 0.6732133626937866, | |
| "learning_rate": 3.485958074107677e-05, | |
| "loss": 0.2446, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.3169377231257438, | |
| "grad_norm": 0.8102436065673828, | |
| "learning_rate": 3.4433620823713564e-05, | |
| "loss": 0.2646, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.3328044426814756, | |
| "grad_norm": 0.8745511770248413, | |
| "learning_rate": 3.400444312011776e-05, | |
| "loss": 0.2606, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.3486711622372074, | |
| "grad_norm": 0.8574343323707581, | |
| "learning_rate": 3.3572194021810896e-05, | |
| "loss": 0.2294, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3645378817929394, | |
| "grad_norm": 0.9338182806968689, | |
| "learning_rate": 3.3137020967960154e-05, | |
| "loss": 0.2551, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.3804046013486713, | |
| "grad_norm": 0.7893859148025513, | |
| "learning_rate": 3.269907239508714e-05, | |
| "loss": 0.231, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.396271320904403, | |
| "grad_norm": 0.9416842460632324, | |
| "learning_rate": 3.2258497686436606e-05, | |
| "loss": 0.2528, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.412138040460135, | |
| "grad_norm": 0.7524838447570801, | |
| "learning_rate": 3.181544712102216e-05, | |
| "loss": 0.2669, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.4280047600158667, | |
| "grad_norm": 0.9508939981460571, | |
| "learning_rate": 3.137007182236637e-05, | |
| "loss": 0.2436, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.4280047600158667, | |
| "eval_loss": 0.31376519799232483, | |
| "eval_runtime": 93.4332, | |
| "eval_samples_per_second": 6.004, | |
| "eval_steps_per_second": 6.004, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.4438714795715986, | |
| "grad_norm": 1.3880516290664673, | |
| "learning_rate": 3.092252370695298e-05, | |
| "loss": 0.2781, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.4597381991273304, | |
| "grad_norm": 0.7659589648246765, | |
| "learning_rate": 3.0472955432408485e-05, | |
| "loss": 0.294, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.4756049186830622, | |
| "grad_norm": 0.7849363088607788, | |
| "learning_rate": 3.002152034543098e-05, | |
| "loss": 0.2768, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.491471638238794, | |
| "grad_norm": 0.666022777557373, | |
| "learning_rate": 2.9568372429483966e-05, | |
| "loss": 0.2526, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.5073383577945259, | |
| "grad_norm": 0.9934321641921997, | |
| "learning_rate": 2.9113666252272943e-05, | |
| "loss": 0.2524, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.5232050773502577, | |
| "grad_norm": 0.8849703073501587, | |
| "learning_rate": 2.865755691302272e-05, | |
| "loss": 0.2905, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.5390717969059897, | |
| "grad_norm": 0.7805452942848206, | |
| "learning_rate": 2.8200199989573432e-05, | |
| "loss": 0.242, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.5549385164617215, | |
| "grad_norm": 0.9101535081863403, | |
| "learning_rate": 2.7741751485313296e-05, | |
| "loss": 0.2178, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.5708052360174534, | |
| "grad_norm": 1.0016871690750122, | |
| "learning_rate": 2.728236777596621e-05, | |
| "loss": 0.2738, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.5866719555731852, | |
| "grad_norm": 0.7558535933494568, | |
| "learning_rate": 2.6822205556252383e-05, | |
| "loss": 0.2363, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6025386751289172, | |
| "grad_norm": 0.886492908000946, | |
| "learning_rate": 2.636142178644009e-05, | |
| "loss": 0.2645, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.618405394684649, | |
| "grad_norm": 0.9074681997299194, | |
| "learning_rate": 2.590017363880691e-05, | |
| "loss": 0.2616, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.6342721142403809, | |
| "grad_norm": 0.7710486054420471, | |
| "learning_rate": 2.5438618444028627e-05, | |
| "loss": 0.2776, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.6501388337961127, | |
| "grad_norm": 0.5658883452415466, | |
| "learning_rate": 2.4976913637514103e-05, | |
| "loss": 0.2259, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.6660055533518445, | |
| "grad_norm": 0.871059000492096, | |
| "learning_rate": 2.4515216705704395e-05, | |
| "loss": 0.2106, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6818722729075763, | |
| "grad_norm": 0.6641427874565125, | |
| "learning_rate": 2.405368513235453e-05, | |
| "loss": 0.2242, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.6977389924633082, | |
| "grad_norm": 0.8793797492980957, | |
| "learning_rate": 2.359247634481615e-05, | |
| "loss": 0.2555, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.71360571201904, | |
| "grad_norm": 0.9106749296188354, | |
| "learning_rate": 2.3131747660339394e-05, | |
| "loss": 0.2903, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.7294724315747718, | |
| "grad_norm": 0.8714670538902283, | |
| "learning_rate": 2.2671656232412378e-05, | |
| "loss": 0.2885, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.7453391511305036, | |
| "grad_norm": 0.7964282631874084, | |
| "learning_rate": 2.2212358997156445e-05, | |
| "loss": 0.2579, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.7612058706862355, | |
| "grad_norm": 0.855613648891449, | |
| "learning_rate": 2.175401261979569e-05, | |
| "loss": 0.2926, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.7770725902419675, | |
| "grad_norm": 0.8496893048286438, | |
| "learning_rate": 2.1296773441218787e-05, | |
| "loss": 0.2404, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.7929393097976993, | |
| "grad_norm": 0.7642855644226074, | |
| "learning_rate": 2.084079742465142e-05, | |
| "loss": 0.2471, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.8088060293534312, | |
| "grad_norm": 0.8110942244529724, | |
| "learning_rate": 2.0386240102457682e-05, | |
| "loss": 0.236, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.824672748909163, | |
| "grad_norm": 0.9976955056190491, | |
| "learning_rate": 1.993325652308828e-05, | |
| "loss": 0.2609, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.840539468464895, | |
| "grad_norm": 0.8436864614486694, | |
| "learning_rate": 1.9482001198193882e-05, | |
| "loss": 0.2771, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.8564061880206268, | |
| "grad_norm": 0.8823544979095459, | |
| "learning_rate": 1.903262804992156e-05, | |
| "loss": 0.2399, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.8722729075763587, | |
| "grad_norm": 0.6062878370285034, | |
| "learning_rate": 1.8585290358412297e-05, | |
| "loss": 0.2344, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.8881396271320905, | |
| "grad_norm": 0.7493007183074951, | |
| "learning_rate": 1.8140140709517465e-05, | |
| "loss": 0.2311, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.9040063466878223, | |
| "grad_norm": 0.8461095094680786, | |
| "learning_rate": 1.7697330942752193e-05, | |
| "loss": 0.2414, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.9040063466878223, | |
| "eval_loss": 0.3031667470932007, | |
| "eval_runtime": 93.4886, | |
| "eval_samples_per_second": 6.001, | |
| "eval_steps_per_second": 6.001, | |
| "step": 1200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1890, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.7498295064425267e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |