{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 2215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "contrastive_loss": 1.7576, "epoch": 0.002257336343115124, "grad_norm": 83.0724105834961, "learning_rate": 2.0000000000000002e-07, "lm_loss": 18.2929, "loss": 5.5681, "step": 1, "text_contrastive_loss": 3.9624 }, { "contrastive_loss": 1.7382, "epoch": 0.004514672686230248, "grad_norm": 80.65425109863281, "learning_rate": 4.0000000000000003e-07, "lm_loss": 18.5957, "loss": 5.1631, "step": 2, "text_contrastive_loss": 3.1305 }, { "contrastive_loss": 1.4854, "epoch": 0.006772009029345372, "grad_norm": 73.6375961303711, "learning_rate": 6.000000000000001e-07, "lm_loss": 17.8023, "loss": 4.828, "step": 3, "text_contrastive_loss": 3.1249 }, { "contrastive_loss": 1.4299, "epoch": 0.009029345372460496, "grad_norm": 68.13858032226562, "learning_rate": 8.000000000000001e-07, "lm_loss": 17.6495, "loss": 4.8285, "step": 4, "text_contrastive_loss": 3.2673 }, { "contrastive_loss": 1.4406, "epoch": 0.011286681715575621, "grad_norm": 57.55910110473633, "learning_rate": 1.0000000000000002e-06, "lm_loss": 17.7182, "loss": 4.4308, "step": 5, "text_contrastive_loss": 2.4367 }, { "contrastive_loss": 1.6318, "epoch": 0.013544018058690745, "grad_norm": 68.49832153320312, "learning_rate": 1.2000000000000002e-06, "lm_loss": 17.6306, "loss": 5.1547, "step": 6, "text_contrastive_loss": 3.5197 }, { "contrastive_loss": 1.4941, "epoch": 0.01580135440180587, "grad_norm": 63.47561264038086, "learning_rate": 1.4000000000000001e-06, "lm_loss": 17.8862, "loss": 4.6936, "step": 7, "text_contrastive_loss": 2.8219 }, { "contrastive_loss": 1.3978, "epoch": 0.01805869074492099, "grad_norm": 52.5750617980957, "learning_rate": 1.6000000000000001e-06, "lm_loss": 17.6667, "loss": 4.4269, "step": 8, "text_contrastive_loss": 2.5248 }, { "contrastive_loss": 1.3932, "epoch": 0.020316027088036117, "grad_norm": 58.56637954711914, "learning_rate": 1.8000000000000001e-06, "lm_loss": 17.3383, "loss": 4.5758, "step": 9, "text_contrastive_loss": 2.8975 }, { "contrastive_loss": 1.388, "epoch": 0.022573363431151242, "grad_norm": 51.45430374145508, "learning_rate": 2.0000000000000003e-06, "lm_loss": 17.4333, "loss": 4.1629, "step": 10, "text_contrastive_loss": 2.063 }, { "contrastive_loss": 1.453, "epoch": 0.024830699774266364, "grad_norm": 52.92259979248047, "learning_rate": 2.2e-06, "lm_loss": 16.8459, "loss": 4.4766, "step": 11, "text_contrastive_loss": 2.6781 }, { "contrastive_loss": 1.4529, "epoch": 0.02708803611738149, "grad_norm": 47.33721923828125, "learning_rate": 2.4000000000000003e-06, "lm_loss": 16.5753, "loss": 4.1433, "step": 12, "text_contrastive_loss": 2.0658 }, { "contrastive_loss": 1.3384, "epoch": 0.029345372460496615, "grad_norm": 43.125755310058594, "learning_rate": 2.6e-06, "lm_loss": 16.4345, "loss": 4.1263, "step": 13, "text_contrastive_loss": 2.289 }, { "contrastive_loss": 1.2533, "epoch": 0.03160270880361174, "grad_norm": 40.11970138549805, "learning_rate": 2.8000000000000003e-06, "lm_loss": 16.2871, "loss": 3.9051, "step": 14, "text_contrastive_loss": 2.0462 }, { "contrastive_loss": 1.3104, "epoch": 0.033860045146726865, "grad_norm": 36.58595275878906, "learning_rate": 3e-06, "lm_loss": 15.9117, "loss": 3.8372, "step": 15, "text_contrastive_loss": 1.8711 }, { "contrastive_loss": 1.3056, "epoch": 0.03611738148984198, "grad_norm": 35.10764694213867, "learning_rate": 3.2000000000000003e-06, "lm_loss": 15.4712, "loss": 3.7313, "step": 16, "text_contrastive_loss": 1.7571 }, { "contrastive_loss": 1.3351, "epoch": 0.03837471783295711, "grad_norm": 32.6670036315918, "learning_rate": 3.4000000000000005e-06, "lm_loss": 15.0543, "loss": 3.6613, "step": 17, "text_contrastive_loss": 1.6414 }, { "contrastive_loss": 1.4445, "epoch": 0.040632054176072234, "grad_norm": 37.94453811645508, "learning_rate": 3.6000000000000003e-06, "lm_loss": 14.7468, "loss": 3.8162, "step": 18, "text_contrastive_loss": 1.794 }, { "contrastive_loss": 1.3024, "epoch": 0.04288939051918736, "grad_norm": 30.52195930480957, "learning_rate": 3.8000000000000005e-06, "lm_loss": 14.5291, "loss": 3.5248, "step": 19, "text_contrastive_loss": 1.539 }, { "contrastive_loss": 1.3973, "epoch": 0.045146726862302484, "grad_norm": 28.206356048583984, "learning_rate": 4.000000000000001e-06, "lm_loss": 14.1728, "loss": 3.5596, "step": 20, "text_contrastive_loss": 1.49 }, { "contrastive_loss": 1.3414, "epoch": 0.04740406320541761, "grad_norm": 28.01343536376953, "learning_rate": 4.2000000000000004e-06, "lm_loss": 13.6672, "loss": 3.5034, "step": 21, "text_contrastive_loss": 1.5904 }, { "contrastive_loss": 1.4096, "epoch": 0.04966139954853273, "grad_norm": 30.40696907043457, "learning_rate": 4.4e-06, "lm_loss": 13.4134, "loss": 3.4848, "step": 22, "text_contrastive_loss": 1.4676 }, { "contrastive_loss": 1.3485, "epoch": 0.05191873589164785, "grad_norm": 27.267833709716797, "learning_rate": 4.600000000000001e-06, "lm_loss": 13.0796, "loss": 3.3649, "step": 23, "text_contrastive_loss": 1.4167 }, { "contrastive_loss": 1.2928, "epoch": 0.05417607223476298, "grad_norm": 26.30367660522461, "learning_rate": 4.800000000000001e-06, "lm_loss": 12.588, "loss": 3.2157, "step": 24, "text_contrastive_loss": 1.3283 }, { "contrastive_loss": 1.1886, "epoch": 0.056433408577878104, "grad_norm": 25.767690658569336, "learning_rate": 5e-06, "lm_loss": 12.3093, "loss": 3.0822, "step": 25, "text_contrastive_loss": 1.3254 }, { "contrastive_loss": 1.3105, "epoch": 0.05869074492099323, "grad_norm": 26.667863845825195, "learning_rate": 5.2e-06, "lm_loss": 11.9948, "loss": 3.1951, "step": 26, "text_contrastive_loss": 1.3703 }, { "contrastive_loss": 1.3686, "epoch": 0.060948081264108354, "grad_norm": 24.12013816833496, "learning_rate": 5.400000000000001e-06, "lm_loss": 11.6429, "loss": 3.1517, "step": 27, "text_contrastive_loss": 1.2377 }, { "contrastive_loss": 1.2975, "epoch": 0.06320541760722348, "grad_norm": 21.883312225341797, "learning_rate": 5.600000000000001e-06, "lm_loss": 11.3803, "loss": 3.1306, "step": 28, "text_contrastive_loss": 1.3901 }, { "contrastive_loss": 1.3535, "epoch": 0.0654627539503386, "grad_norm": 25.86800765991211, "learning_rate": 5.8e-06, "lm_loss": 10.9448, "loss": 3.0264, "step": 29, "text_contrastive_loss": 1.1568 }, { "contrastive_loss": 1.3445, "epoch": 0.06772009029345373, "grad_norm": 23.156831741333008, "learning_rate": 6e-06, "lm_loss": 10.6367, "loss": 3.1278, "step": 30, "text_contrastive_loss": 1.4392 }, { "contrastive_loss": 1.1378, "epoch": 0.06997742663656885, "grad_norm": 21.37388038635254, "learning_rate": 6.200000000000001e-06, "lm_loss": 10.5811, "loss": 2.7304, "step": 31, "text_contrastive_loss": 1.069 }, { "contrastive_loss": 1.2668, "epoch": 0.07223476297968397, "grad_norm": 22.608537673950195, "learning_rate": 6.4000000000000006e-06, "lm_loss": 10.1412, "loss": 2.9049, "step": 32, "text_contrastive_loss": 1.248 }, { "contrastive_loss": 1.1991, "epoch": 0.0744920993227991, "grad_norm": 20.33625030517578, "learning_rate": 6.600000000000001e-06, "lm_loss": 10.1149, "loss": 2.7904, "step": 33, "text_contrastive_loss": 1.1597 }, { "contrastive_loss": 1.2407, "epoch": 0.07674943566591422, "grad_norm": 22.88743019104004, "learning_rate": 6.800000000000001e-06, "lm_loss": 9.8612, "loss": 2.8439, "step": 34, "text_contrastive_loss": 1.2343 }, { "contrastive_loss": 1.1218, "epoch": 0.07900677200902935, "grad_norm": 22.587696075439453, "learning_rate": 7e-06, "lm_loss": 9.5444, "loss": 2.6075, "step": 35, "text_contrastive_loss": 1.0625 }, { "contrastive_loss": 1.2272, "epoch": 0.08126410835214447, "grad_norm": 22.853036880493164, "learning_rate": 7.2000000000000005e-06, "lm_loss": 9.3847, "loss": 2.7395, "step": 36, "text_contrastive_loss": 1.1476 }, { "contrastive_loss": 1.2236, "epoch": 0.0835214446952596, "grad_norm": 23.60159683227539, "learning_rate": 7.4e-06, "lm_loss": 9.1248, "loss": 2.7053, "step": 37, "text_contrastive_loss": 1.1384 }, { "contrastive_loss": 1.1706, "epoch": 0.08577878103837472, "grad_norm": 25.40546417236328, "learning_rate": 7.600000000000001e-06, "lm_loss": 9.15, "loss": 2.7461, "step": 38, "text_contrastive_loss": 1.321 }, { "contrastive_loss": 1.0864, "epoch": 0.08803611738148984, "grad_norm": 25.925676345825195, "learning_rate": 7.800000000000002e-06, "lm_loss": 9.0864, "loss": 2.5104, "step": 39, "text_contrastive_loss": 1.0307 }, { "contrastive_loss": 0.999, "epoch": 0.09029345372460497, "grad_norm": 23.36672592163086, "learning_rate": 8.000000000000001e-06, "lm_loss": 8.905, "loss": 2.4223, "step": 40, "text_contrastive_loss": 1.0656 }, { "contrastive_loss": 1.1439, "epoch": 0.09255079006772009, "grad_norm": 24.166732788085938, "learning_rate": 8.2e-06, "lm_loss": 8.8927, "loss": 2.6421, "step": 41, "text_contrastive_loss": 1.218 }, { "contrastive_loss": 1.2004, "epoch": 0.09480812641083522, "grad_norm": 25.256484985351562, "learning_rate": 8.400000000000001e-06, "lm_loss": 8.6496, "loss": 2.656, "step": 42, "text_contrastive_loss": 1.1813 }, { "contrastive_loss": 1.0696, "epoch": 0.09706546275395034, "grad_norm": 22.554882049560547, "learning_rate": 8.6e-06, "lm_loss": 8.5634, "loss": 2.4508, "step": 43, "text_contrastive_loss": 1.0498 }, { "contrastive_loss": 1.1158, "epoch": 0.09932279909706546, "grad_norm": 23.315597534179688, "learning_rate": 8.8e-06, "lm_loss": 8.4833, "loss": 2.5364, "step": 44, "text_contrastive_loss": 1.1445 }, { "contrastive_loss": 1.169, "epoch": 0.10158013544018059, "grad_norm": 26.591489791870117, "learning_rate": 9e-06, "lm_loss": 8.4115, "loss": 2.6611, "step": 45, "text_contrastive_loss": 1.302 }, { "contrastive_loss": 1.0582, "epoch": 0.1038374717832957, "grad_norm": 25.223093032836914, "learning_rate": 9.200000000000002e-06, "lm_loss": 8.3529, "loss": 2.4645, "step": 46, "text_contrastive_loss": 1.142 }, { "contrastive_loss": 1.0643, "epoch": 0.10609480812641084, "grad_norm": 21.448993682861328, "learning_rate": 9.4e-06, "lm_loss": 8.3398, "loss": 2.4296, "step": 47, "text_contrastive_loss": 1.0627 }, { "contrastive_loss": 1.1898, "epoch": 0.10835214446952596, "grad_norm": 22.973129272460938, "learning_rate": 9.600000000000001e-06, "lm_loss": 8.2453, "loss": 2.5315, "step": 48, "text_contrastive_loss": 1.0344 }, { "contrastive_loss": 1.1033, "epoch": 0.11060948081264109, "grad_norm": 24.533485412597656, "learning_rate": 9.800000000000001e-06, "lm_loss": 8.211, "loss": 2.4239, "step": 49, "text_contrastive_loss": 0.9989 }, { "contrastive_loss": 0.9491, "epoch": 0.11286681715575621, "grad_norm": 20.077064514160156, "learning_rate": 1e-05, "lm_loss": 8.1512, "loss": 2.283, "step": 50, "text_contrastive_loss": 1.0376 }, { "contrastive_loss": 1.137, "epoch": 0.11512415349887133, "grad_norm": 21.466537475585938, "learning_rate": 9.999994735903083e-06, "lm_loss": 8.0854, "loss": 2.4723, "step": 51, "text_contrastive_loss": 1.0535 }, { "contrastive_loss": 1.0502, "epoch": 0.11738148984198646, "grad_norm": 23.226255416870117, "learning_rate": 9.999978943623417e-06, "lm_loss": 7.9951, "loss": 2.4097, "step": 52, "text_contrastive_loss": 1.12 }, { "contrastive_loss": 0.9545, "epoch": 0.11963882618510158, "grad_norm": 20.001995086669922, "learning_rate": 9.999952623194252e-06, "lm_loss": 7.9469, "loss": 2.2316, "step": 53, "text_contrastive_loss": 0.9648 }, { "contrastive_loss": 1.0112, "epoch": 0.12189616252821671, "grad_norm": 25.90329360961914, "learning_rate": 9.999915774671009e-06, "lm_loss": 7.933, "loss": 2.3686, "step": 54, "text_contrastive_loss": 1.1281 }, { "contrastive_loss": 1.0171, "epoch": 0.12415349887133183, "grad_norm": 21.86749839782715, "learning_rate": 9.999868398131282e-06, "lm_loss": 8.0448, "loss": 2.396, "step": 55, "text_contrastive_loss": 1.1488 }, { "contrastive_loss": 0.8968, "epoch": 0.12641083521444696, "grad_norm": 22.552684783935547, "learning_rate": 9.999810493674826e-06, "lm_loss": 7.9022, "loss": 2.1881, "step": 56, "text_contrastive_loss": 1.0021 }, { "contrastive_loss": 0.9698, "epoch": 0.12866817155756208, "grad_norm": 23.647756576538086, "learning_rate": 9.999742061423567e-06, "lm_loss": 7.9914, "loss": 2.3512, "step": 57, "text_contrastive_loss": 1.1645 }, { "contrastive_loss": 1.0828, "epoch": 0.1309255079006772, "grad_norm": 20.83641815185547, "learning_rate": 9.999663101521599e-06, "lm_loss": 7.8775, "loss": 2.401, "step": 58, "text_contrastive_loss": 1.0609 }, { "contrastive_loss": 0.9098, "epoch": 0.13318284424379231, "grad_norm": 19.888952255249023, "learning_rate": 9.999573614135183e-06, "lm_loss": 7.8223, "loss": 2.1864, "step": 59, "text_contrastive_loss": 0.9888 }, { "contrastive_loss": 1.0804, "epoch": 0.13544018058690746, "grad_norm": 23.106822967529297, "learning_rate": 9.999473599452746e-06, "lm_loss": 7.6884, "loss": 2.411, "step": 60, "text_contrastive_loss": 1.1234 }, { "contrastive_loss": 0.962, "epoch": 0.13769751693002258, "grad_norm": 21.09849739074707, "learning_rate": 9.999363057684885e-06, "lm_loss": 7.7574, "loss": 2.2024, "step": 61, "text_contrastive_loss": 0.9293 }, { "contrastive_loss": 1.1026, "epoch": 0.1399548532731377, "grad_norm": 22.664743423461914, "learning_rate": 9.999241989064358e-06, "lm_loss": 7.6629, "loss": 2.3475, "step": 62, "text_contrastive_loss": 0.9571 }, { "contrastive_loss": 1.0655, "epoch": 0.14221218961625282, "grad_norm": 21.019250869750977, "learning_rate": 9.999110393846097e-06, "lm_loss": 7.6433, "loss": 2.4061, "step": 63, "text_contrastive_loss": 1.1526 }, { "contrastive_loss": 0.94, "epoch": 0.14446952595936793, "grad_norm": 22.08234977722168, "learning_rate": 9.998968272307187e-06, "lm_loss": 7.6577, "loss": 2.18, "step": 64, "text_contrastive_loss": 0.9484 }, { "contrastive_loss": 1.045, "epoch": 0.14672686230248308, "grad_norm": 20.573204040527344, "learning_rate": 9.99881562474689e-06, "lm_loss": 7.5849, "loss": 2.3881, "step": 65, "text_contrastive_loss": 1.1692 }, { "contrastive_loss": 0.9304, "epoch": 0.1489841986455982, "grad_norm": 19.680683135986328, "learning_rate": 9.998652451486626e-06, "lm_loss": 7.5698, "loss": 2.2469, "step": 66, "text_contrastive_loss": 1.119 }, { "contrastive_loss": 1.0139, "epoch": 0.15124153498871332, "grad_norm": 21.903793334960938, "learning_rate": 9.998478752869976e-06, "lm_loss": 7.5142, "loss": 2.2693, "step": 67, "text_contrastive_loss": 1.0078 }, { "contrastive_loss": 1.0315, "epoch": 0.15349887133182843, "grad_norm": 21.78392219543457, "learning_rate": 9.998294529262688e-06, "lm_loss": 7.4581, "loss": 2.3674, "step": 68, "text_contrastive_loss": 1.1802 }, { "contrastive_loss": 1.0517, "epoch": 0.15575620767494355, "grad_norm": 22.221651077270508, "learning_rate": 9.998099781052673e-06, "lm_loss": 7.4321, "loss": 2.34, "step": 69, "text_contrastive_loss": 1.0903 }, { "contrastive_loss": 0.8489, "epoch": 0.1580135440180587, "grad_norm": 18.843486785888672, "learning_rate": 9.997894508649995e-06, "lm_loss": 7.5177, "loss": 2.0465, "step": 70, "text_contrastive_loss": 0.8917 }, { "contrastive_loss": 0.9347, "epoch": 0.16027088036117382, "grad_norm": 21.679800033569336, "learning_rate": 9.997678712486889e-06, "lm_loss": 7.4389, "loss": 2.1878, "step": 71, "text_contrastive_loss": 1.0185 }, { "contrastive_loss": 0.9222, "epoch": 0.16252821670428894, "grad_norm": 21.853797912597656, "learning_rate": 9.99745239301774e-06, "lm_loss": 7.4099, "loss": 2.1896, "step": 72, "text_contrastive_loss": 1.0527 }, { "contrastive_loss": 0.936, "epoch": 0.16478555304740405, "grad_norm": 19.19109344482422, "learning_rate": 9.997215550719097e-06, "lm_loss": 7.504, "loss": 2.1866, "step": 73, "text_contrastive_loss": 1.0003 }, { "contrastive_loss": 1.017, "epoch": 0.1670428893905192, "grad_norm": 23.202587127685547, "learning_rate": 9.996968186089664e-06, "lm_loss": 7.5318, "loss": 2.3209, "step": 74, "text_contrastive_loss": 1.1015 }, { "contrastive_loss": 0.8918, "epoch": 0.16930022573363432, "grad_norm": 18.873233795166016, "learning_rate": 9.996710299650302e-06, "lm_loss": 7.3373, "loss": 2.1512, "step": 75, "text_contrastive_loss": 1.0514 }, { "contrastive_loss": 1.0035, "epoch": 0.17155756207674944, "grad_norm": 22.463476181030273, "learning_rate": 9.996441891944023e-06, "lm_loss": 7.4114, "loss": 2.3061, "step": 76, "text_contrastive_loss": 1.1229 }, { "contrastive_loss": 1.0282, "epoch": 0.17381489841986456, "grad_norm": 21.54694366455078, "learning_rate": 9.996162963536004e-06, "lm_loss": 7.3556, "loss": 2.2261, "step": 77, "text_contrastive_loss": 0.9248 }, { "contrastive_loss": 1.0397, "epoch": 0.17607223476297967, "grad_norm": 22.61308479309082, "learning_rate": 9.995873515013562e-06, "lm_loss": 7.2681, "loss": 2.382, "step": 78, "text_contrastive_loss": 1.2308 }, { "contrastive_loss": 0.9218, "epoch": 0.17832957110609482, "grad_norm": 20.243572235107422, "learning_rate": 9.99557354698617e-06, "lm_loss": 7.3342, "loss": 2.2013, "step": 79, "text_contrastive_loss": 1.0922 }, { "contrastive_loss": 0.9145, "epoch": 0.18058690744920994, "grad_norm": 20.536954879760742, "learning_rate": 9.995263060085456e-06, "lm_loss": 7.3494, "loss": 2.1959, "step": 80, "text_contrastive_loss": 1.0929 }, { "contrastive_loss": 0.957, "epoch": 0.18284424379232506, "grad_norm": 21.177587509155273, "learning_rate": 9.99494205496519e-06, "lm_loss": 7.2084, "loss": 2.2445, "step": 81, "text_contrastive_loss": 1.1331 }, { "contrastive_loss": 0.7592, "epoch": 0.18510158013544017, "grad_norm": 18.126768112182617, "learning_rate": 9.994610532301296e-06, "lm_loss": 7.3328, "loss": 1.9956, "step": 82, "text_contrastive_loss": 1.0064 }, { "contrastive_loss": 1.0863, "epoch": 0.1873589164785553, "grad_norm": 20.58598518371582, "learning_rate": 9.99426849279184e-06, "lm_loss": 7.1922, "loss": 2.3667, "step": 83, "text_contrastive_loss": 1.1222 }, { "contrastive_loss": 1.0172, "epoch": 0.18961625282167044, "grad_norm": 21.558956146240234, "learning_rate": 9.993915937157033e-06, "lm_loss": 7.2709, "loss": 2.2399, "step": 84, "text_contrastive_loss": 0.9911 }, { "contrastive_loss": 1.0567, "epoch": 0.19187358916478556, "grad_norm": 20.75426483154297, "learning_rate": 9.99355286613923e-06, "lm_loss": 7.1795, "loss": 2.407, "step": 85, "text_contrastive_loss": 1.2648 }, { "contrastive_loss": 0.8501, "epoch": 0.19413092550790068, "grad_norm": 18.426937103271484, "learning_rate": 9.993179280502926e-06, "lm_loss": 7.1365, "loss": 2.0173, "step": 86, "text_contrastive_loss": 0.9071 }, { "contrastive_loss": 0.9691, "epoch": 0.1963882618510158, "grad_norm": 24.029739379882812, "learning_rate": 9.99279518103476e-06, "lm_loss": 7.3336, "loss": 2.2099, "step": 87, "text_contrastive_loss": 1.0147 }, { "contrastive_loss": 1.0107, "epoch": 0.1986455981941309, "grad_norm": 22.55620765686035, "learning_rate": 9.992400568543506e-06, "lm_loss": 7.1243, "loss": 2.2311, "step": 88, "text_contrastive_loss": 1.0161 }, { "contrastive_loss": 0.9813, "epoch": 0.20090293453724606, "grad_norm": 23.12877082824707, "learning_rate": 9.991995443860074e-06, "lm_loss": 7.0373, "loss": 2.1753, "step": 89, "text_contrastive_loss": 0.9805 }, { "contrastive_loss": 1.0067, "epoch": 0.20316027088036118, "grad_norm": 20.94490623474121, "learning_rate": 9.991579807837511e-06, "lm_loss": 7.0666, "loss": 2.2587, "step": 90, "text_contrastive_loss": 1.0907 }, { "contrastive_loss": 0.8441, "epoch": 0.2054176072234763, "grad_norm": 17.67084503173828, "learning_rate": 9.991153661350996e-06, "lm_loss": 7.0421, "loss": 2.0348, "step": 91, "text_contrastive_loss": 0.9731 }, { "contrastive_loss": 0.8157, "epoch": 0.2076749435665914, "grad_norm": 19.95044708251953, "learning_rate": 9.990717005297841e-06, "lm_loss": 7.139, "loss": 2.1037, "step": 92, "text_contrastive_loss": 1.1482 }, { "contrastive_loss": 0.9876, "epoch": 0.20993227990970656, "grad_norm": 19.68552017211914, "learning_rate": 9.990269840597484e-06, "lm_loss": 7.0277, "loss": 2.1938, "step": 93, "text_contrastive_loss": 1.0068 }, { "contrastive_loss": 0.8988, "epoch": 0.21218961625282168, "grad_norm": 18.531797409057617, "learning_rate": 9.989812168191495e-06, "lm_loss": 7.0713, "loss": 2.0347, "step": 94, "text_contrastive_loss": 0.8575 }, { "contrastive_loss": 1.0284, "epoch": 0.2144469525959368, "grad_norm": 21.68889045715332, "learning_rate": 9.989343989043563e-06, "lm_loss": 6.9658, "loss": 2.2725, "step": 95, "text_contrastive_loss": 1.095 }, { "contrastive_loss": 0.9913, "epoch": 0.21670428893905191, "grad_norm": 23.28238868713379, "learning_rate": 9.988865304139509e-06, "lm_loss": 7.0698, "loss": 2.2637, "step": 96, "text_contrastive_loss": 1.1309 }, { "contrastive_loss": 1.0535, "epoch": 0.21896162528216703, "grad_norm": 21.559371948242188, "learning_rate": 9.988376114487264e-06, "lm_loss": 7.0501, "loss": 2.3147, "step": 97, "text_contrastive_loss": 1.1124 }, { "contrastive_loss": 0.7835, "epoch": 0.22121896162528218, "grad_norm": 17.92148780822754, "learning_rate": 9.98787642111689e-06, "lm_loss": 6.9048, "loss": 1.9187, "step": 98, "text_contrastive_loss": 0.8894 }, { "contrastive_loss": 1.0026, "epoch": 0.2234762979683973, "grad_norm": 20.566707611083984, "learning_rate": 9.98736622508056e-06, "lm_loss": 7.0112, "loss": 2.1804, "step": 99, "text_contrastive_loss": 0.9533 }, { "contrastive_loss": 1.1049, "epoch": 0.22573363431151242, "grad_norm": 22.571447372436523, "learning_rate": 9.98684552745256e-06, "lm_loss": 6.8937, "loss": 2.3282, "step": 100, "text_contrastive_loss": 1.0679 }, { "contrastive_loss": 0.9926, "epoch": 0.22799097065462753, "grad_norm": 22.88062858581543, "learning_rate": 9.986314329329294e-06, "lm_loss": 6.9328, "loss": 2.2112, "step": 101, "text_contrastive_loss": 1.0507 }, { "contrastive_loss": 0.926, "epoch": 0.23024830699774265, "grad_norm": 23.333864212036133, "learning_rate": 9.985772631829272e-06, "lm_loss": 7.0156, "loss": 2.0837, "step": 102, "text_contrastive_loss": 0.9122 }, { "contrastive_loss": 0.8629, "epoch": 0.2325056433408578, "grad_norm": 19.736459732055664, "learning_rate": 9.985220436093112e-06, "lm_loss": 6.8497, "loss": 2.1504, "step": 103, "text_contrastive_loss": 1.2051 }, { "contrastive_loss": 0.9073, "epoch": 0.23476297968397292, "grad_norm": 22.719139099121094, "learning_rate": 9.984657743283543e-06, "lm_loss": 6.9904, "loss": 2.1453, "step": 104, "text_contrastive_loss": 1.0779 }, { "contrastive_loss": 1.0299, "epoch": 0.23702031602708803, "grad_norm": 24.38851547241211, "learning_rate": 9.984084554585387e-06, "lm_loss": 6.9032, "loss": 2.2046, "step": 105, "text_contrastive_loss": 0.9688 }, { "contrastive_loss": 0.7721, "epoch": 0.23927765237020315, "grad_norm": 19.937759399414062, "learning_rate": 9.983500871205577e-06, "lm_loss": 6.8874, "loss": 1.8608, "step": 106, "text_contrastive_loss": 0.8 }, { "contrastive_loss": 0.9991, "epoch": 0.24153498871331827, "grad_norm": 22.109214782714844, "learning_rate": 9.982906694373136e-06, "lm_loss": 6.8954, "loss": 2.2567, "step": 107, "text_contrastive_loss": 1.1362 }, { "contrastive_loss": 1.0091, "epoch": 0.24379232505643342, "grad_norm": 23.73514175415039, "learning_rate": 9.98230202533919e-06, "lm_loss": 6.8097, "loss": 2.2244, "step": 108, "text_contrastive_loss": 1.0687 }, { "contrastive_loss": 0.8579, "epoch": 0.24604966139954854, "grad_norm": 19.403186798095703, "learning_rate": 9.98168686537695e-06, "lm_loss": 6.8249, "loss": 1.9528, "step": 109, "text_contrastive_loss": 0.8249 }, { "contrastive_loss": 0.8255, "epoch": 0.24830699774266365, "grad_norm": 22.000633239746094, "learning_rate": 9.98106121578172e-06, "lm_loss": 6.841, "loss": 2.0006, "step": 110, "text_contrastive_loss": 0.982 }, { "contrastive_loss": 0.9317, "epoch": 0.2505643340857788, "grad_norm": 22.940288543701172, "learning_rate": 9.980425077870895e-06, "lm_loss": 6.8525, "loss": 2.1067, "step": 111, "text_contrastive_loss": 0.9794 }, { "contrastive_loss": 0.8398, "epoch": 0.2528216704288939, "grad_norm": 19.426469802856445, "learning_rate": 9.979778452983949e-06, "lm_loss": 6.8473, "loss": 2.0275, "step": 112, "text_contrastive_loss": 1.006 }, { "contrastive_loss": 0.9049, "epoch": 0.255079006772009, "grad_norm": 21.40607452392578, "learning_rate": 9.979121342482442e-06, "lm_loss": 6.7444, "loss": 2.1078, "step": 113, "text_contrastive_loss": 1.0568 }, { "contrastive_loss": 0.8558, "epoch": 0.25733634311512416, "grad_norm": 22.809553146362305, "learning_rate": 9.978453747750012e-06, "lm_loss": 6.8781, "loss": 1.9855, "step": 114, "text_contrastive_loss": 0.8837 }, { "contrastive_loss": 0.8186, "epoch": 0.2595936794582393, "grad_norm": 17.59353256225586, "learning_rate": 9.977775670192373e-06, "lm_loss": 6.7933, "loss": 1.9924, "step": 115, "text_contrastive_loss": 0.989 }, { "contrastive_loss": 0.9305, "epoch": 0.2618510158013544, "grad_norm": 21.62824249267578, "learning_rate": 9.977087111237307e-06, "lm_loss": 6.7723, "loss": 2.145, "step": 116, "text_contrastive_loss": 1.0745 }, { "contrastive_loss": 0.9281, "epoch": 0.26410835214446954, "grad_norm": 24.133895874023438, "learning_rate": 9.976388072334674e-06, "lm_loss": 6.7408, "loss": 2.0355, "step": 117, "text_contrastive_loss": 0.8666 }, { "contrastive_loss": 0.949, "epoch": 0.26636568848758463, "grad_norm": 20.17030906677246, "learning_rate": 9.975678554956397e-06, "lm_loss": 6.7308, "loss": 2.0432, "step": 118, "text_contrastive_loss": 0.8423 }, { "contrastive_loss": 0.8287, "epoch": 0.2686230248306998, "grad_norm": 19.101581573486328, "learning_rate": 9.974958560596464e-06, "lm_loss": 6.7608, "loss": 1.9723, "step": 119, "text_contrastive_loss": 0.935 }, { "contrastive_loss": 0.94, "epoch": 0.2708803611738149, "grad_norm": 20.302513122558594, "learning_rate": 9.97422809077092e-06, "lm_loss": 6.6856, "loss": 2.1868, "step": 120, "text_contrastive_loss": 1.1564 }, { "contrastive_loss": 0.836, "epoch": 0.27313769751693, "grad_norm": 18.526567459106445, "learning_rate": 9.973487147017874e-06, "lm_loss": 6.7732, "loss": 1.998, "step": 121, "text_contrastive_loss": 0.9693 }, { "contrastive_loss": 0.8308, "epoch": 0.27539503386004516, "grad_norm": 20.648765563964844, "learning_rate": 9.972735730897484e-06, "lm_loss": 6.7667, "loss": 2.0166, "step": 122, "text_contrastive_loss": 1.0183 }, { "contrastive_loss": 0.9621, "epoch": 0.27765237020316025, "grad_norm": 20.575937271118164, "learning_rate": 9.97197384399196e-06, "lm_loss": 6.7598, "loss": 2.1682, "step": 123, "text_contrastive_loss": 1.0602 }, { "contrastive_loss": 0.7394, "epoch": 0.2799097065462754, "grad_norm": 19.688508987426758, "learning_rate": 9.971201487905563e-06, "lm_loss": 6.7787, "loss": 1.8177, "step": 124, "text_contrastive_loss": 0.8007 }, { "contrastive_loss": 0.8498, "epoch": 0.28216704288939054, "grad_norm": 19.605121612548828, "learning_rate": 9.970418664264596e-06, "lm_loss": 6.7351, "loss": 2.0701, "step": 125, "text_contrastive_loss": 1.0937 }, { "contrastive_loss": 0.9438, "epoch": 0.28442437923250563, "grad_norm": 20.848880767822266, "learning_rate": 9.969625374717401e-06, "lm_loss": 6.6819, "loss": 2.1431, "step": 126, "text_contrastive_loss": 1.0621 }, { "contrastive_loss": 0.9114, "epoch": 0.2866817155756208, "grad_norm": 21.030580520629883, "learning_rate": 9.96882162093436e-06, "lm_loss": 6.6468, "loss": 2.0173, "step": 127, "text_contrastive_loss": 0.8825 }, { "contrastive_loss": 0.8366, "epoch": 0.28893905191873587, "grad_norm": 20.475706100463867, "learning_rate": 9.968007404607887e-06, "lm_loss": 6.6789, "loss": 2.0272, "step": 128, "text_contrastive_loss": 1.0453 }, { "contrastive_loss": 0.8921, "epoch": 0.291196388261851, "grad_norm": 20.923486709594727, "learning_rate": 9.96718272745243e-06, "lm_loss": 6.5704, "loss": 2.0007, "step": 129, "text_contrastive_loss": 0.903 }, { "contrastive_loss": 0.9031, "epoch": 0.29345372460496616, "grad_norm": 23.098770141601562, "learning_rate": 9.966347591204459e-06, "lm_loss": 6.6826, "loss": 2.0489, "step": 130, "text_contrastive_loss": 0.9552 }, { "contrastive_loss": 0.7868, "epoch": 0.29571106094808125, "grad_norm": 18.975332260131836, "learning_rate": 9.96550199762247e-06, "lm_loss": 6.602, "loss": 1.9859, "step": 131, "text_contrastive_loss": 1.0778 }, { "contrastive_loss": 0.8502, "epoch": 0.2979683972911964, "grad_norm": 19.30949592590332, "learning_rate": 9.964645948486978e-06, "lm_loss": 6.5967, "loss": 1.9755, "step": 132, "text_contrastive_loss": 0.9313 }, { "contrastive_loss": 0.9409, "epoch": 0.3002257336343115, "grad_norm": 19.818702697753906, "learning_rate": 9.963779445600512e-06, "lm_loss": 6.655, "loss": 2.1949, "step": 133, "text_contrastive_loss": 1.1771 }, { "contrastive_loss": 0.7461, "epoch": 0.30248306997742663, "grad_norm": 17.95615005493164, "learning_rate": 9.962902490787616e-06, "lm_loss": 6.7079, "loss": 1.914, "step": 134, "text_contrastive_loss": 0.9942 }, { "contrastive_loss": 0.8042, "epoch": 0.3047404063205418, "grad_norm": 18.942901611328125, "learning_rate": 9.962015085894838e-06, "lm_loss": 6.6384, "loss": 1.9746, "step": 135, "text_contrastive_loss": 1.0131 }, { "contrastive_loss": 0.9972, "epoch": 0.30699774266365687, "grad_norm": 24.471101760864258, "learning_rate": 9.961117232790734e-06, "lm_loss": 6.6302, "loss": 2.1493, "step": 136, "text_contrastive_loss": 0.9783 }, { "contrastive_loss": 0.9711, "epoch": 0.309255079006772, "grad_norm": 20.733802795410156, "learning_rate": 9.960208933365857e-06, "lm_loss": 6.7176, "loss": 2.1471, "step": 137, "text_contrastive_loss": 1.0086 }, { "contrastive_loss": 0.9169, "epoch": 0.3115124153498871, "grad_norm": 20.384777069091797, "learning_rate": 9.959290189532757e-06, "lm_loss": 6.5893, "loss": 2.0997, "step": 138, "text_contrastive_loss": 1.0476 }, { "contrastive_loss": 0.7578, "epoch": 0.31376975169300225, "grad_norm": 19.190717697143555, "learning_rate": 9.958361003225979e-06, "lm_loss": 6.5529, "loss": 1.8754, "step": 139, "text_contrastive_loss": 0.9248 }, { "contrastive_loss": 0.8863, "epoch": 0.3160270880361174, "grad_norm": 19.11601448059082, "learning_rate": 9.957421376402053e-06, "lm_loss": 6.575, "loss": 2.0286, "step": 140, "text_contrastive_loss": 0.9696 }, { "contrastive_loss": 0.7891, "epoch": 0.3182844243792325, "grad_norm": 19.93131446838379, "learning_rate": 9.956471311039491e-06, "lm_loss": 6.571, "loss": 1.9056, "step": 141, "text_contrastive_loss": 0.9187 }, { "contrastive_loss": 0.9544, "epoch": 0.32054176072234764, "grad_norm": 20.333568572998047, "learning_rate": 9.95551080913879e-06, "lm_loss": 6.5818, "loss": 2.1021, "step": 142, "text_contrastive_loss": 0.979 }, { "contrastive_loss": 0.8276, "epoch": 0.3227990970654628, "grad_norm": 19.743680953979492, "learning_rate": 9.954539872722417e-06, "lm_loss": 6.6458, "loss": 1.8983, "step": 143, "text_contrastive_loss": 0.8123 }, { "contrastive_loss": 0.9204, "epoch": 0.32505643340857787, "grad_norm": 20.866682052612305, "learning_rate": 9.953558503834819e-06, "lm_loss": 6.6503, "loss": 2.1714, "step": 144, "text_contrastive_loss": 1.172 }, { "contrastive_loss": 0.9538, "epoch": 0.327313769751693, "grad_norm": 19.932788848876953, "learning_rate": 9.9525667045424e-06, "lm_loss": 6.5406, "loss": 2.0964, "step": 145, "text_contrastive_loss": 0.9772 }, { "contrastive_loss": 0.7554, "epoch": 0.3295711060948081, "grad_norm": 18.704795837402344, "learning_rate": 9.951564476933534e-06, "lm_loss": 6.6267, "loss": 1.8488, "step": 146, "text_contrastive_loss": 0.8613 }, { "contrastive_loss": 0.8672, "epoch": 0.33182844243792325, "grad_norm": 18.536409378051758, "learning_rate": 9.950551823118544e-06, "lm_loss": 6.5201, "loss": 1.9945, "step": 147, "text_contrastive_loss": 0.9506 }, { "contrastive_loss": 0.926, "epoch": 0.3340857787810384, "grad_norm": 21.630870819091797, "learning_rate": 9.949528745229721e-06, "lm_loss": 6.5951, "loss": 2.1363, "step": 148, "text_contrastive_loss": 1.1015 }, { "contrastive_loss": 0.8198, "epoch": 0.3363431151241535, "grad_norm": 20.16887664794922, "learning_rate": 9.948495245421294e-06, "lm_loss": 6.6474, "loss": 2.0023, "step": 149, "text_contrastive_loss": 1.0355 }, { "contrastive_loss": 0.9098, "epoch": 0.33860045146726864, "grad_norm": 21.951305389404297, "learning_rate": 9.94745132586944e-06, "lm_loss": 6.5247, "loss": 2.0433, "step": 150, "text_contrastive_loss": 0.962 }, { "contrastive_loss": 0.7625, "epoch": 0.34085778781038373, "grad_norm": 17.890987396240234, "learning_rate": 9.946396988772275e-06, "lm_loss": 6.5289, "loss": 1.8931, "step": 151, "text_contrastive_loss": 0.9554 }, { "contrastive_loss": 0.8016, "epoch": 0.3431151241534989, "grad_norm": 21.804550170898438, "learning_rate": 9.945332236349857e-06, "lm_loss": 6.6814, "loss": 1.8688, "step": 152, "text_contrastive_loss": 0.7981 }, { "contrastive_loss": 0.8857, "epoch": 0.345372460496614, "grad_norm": 21.859079360961914, "learning_rate": 9.944257070844165e-06, "lm_loss": 6.5121, "loss": 2.0385, "step": 153, "text_contrastive_loss": 1.0031 }, { "contrastive_loss": 0.8998, "epoch": 0.3476297968397291, "grad_norm": 20.89244270324707, "learning_rate": 9.943171494519111e-06, "lm_loss": 6.4703, "loss": 2.0058, "step": 154, "text_contrastive_loss": 0.9181 }, { "contrastive_loss": 0.779, "epoch": 0.34988713318284426, "grad_norm": 20.04449462890625, "learning_rate": 9.942075509660527e-06, "lm_loss": 6.4538, "loss": 1.9743, "step": 155, "text_contrastive_loss": 1.0998 }, { "contrastive_loss": 0.7987, "epoch": 0.35214446952595935, "grad_norm": 19.918045043945312, "learning_rate": 9.94096911857616e-06, "lm_loss": 6.6398, "loss": 1.956, "step": 156, "text_contrastive_loss": 0.9865 }, { "contrastive_loss": 0.8872, "epoch": 0.3544018058690745, "grad_norm": 22.130380630493164, "learning_rate": 9.939852323595671e-06, "lm_loss": 6.5633, "loss": 1.9952, "step": 157, "text_contrastive_loss": 0.9034 }, { "contrastive_loss": 0.8786, "epoch": 0.35665914221218964, "grad_norm": 22.071300506591797, "learning_rate": 9.938725127070628e-06, "lm_loss": 6.5936, "loss": 2.0969, "step": 158, "text_contrastive_loss": 1.118 }, { "contrastive_loss": 0.8317, "epoch": 0.35891647855530473, "grad_norm": 20.15006446838379, "learning_rate": 9.937587531374497e-06, "lm_loss": 6.5906, "loss": 1.9588, "step": 159, "text_contrastive_loss": 0.9361 }, { "contrastive_loss": 0.7938, "epoch": 0.3611738148984199, "grad_norm": 20.539600372314453, "learning_rate": 9.936439538902644e-06, "lm_loss": 6.4727, "loss": 1.8619, "step": 160, "text_contrastive_loss": 0.8417 }, { "contrastive_loss": 0.7934, "epoch": 0.36343115124153497, "grad_norm": 19.92677116394043, "learning_rate": 9.935281152072329e-06, "lm_loss": 6.5515, "loss": 1.9428, "step": 161, "text_contrastive_loss": 0.9885 }, { "contrastive_loss": 0.8887, "epoch": 0.3656884875846501, "grad_norm": 19.666276931762695, "learning_rate": 9.934112373322695e-06, "lm_loss": 6.5895, "loss": 2.1052, "step": 162, "text_contrastive_loss": 1.1152 }, { "contrastive_loss": 0.8653, "epoch": 0.36794582392776526, "grad_norm": 24.452041625976562, "learning_rate": 9.932933205114766e-06, "lm_loss": 6.45, "loss": 1.9736, "step": 163, "text_contrastive_loss": 0.9265 }, { "contrastive_loss": 0.7489, "epoch": 0.37020316027088035, "grad_norm": 18.748714447021484, "learning_rate": 9.931743649931446e-06, "lm_loss": 6.4978, "loss": 1.8218, "step": 164, "text_contrastive_loss": 0.8463 }, { "contrastive_loss": 0.8158, "epoch": 0.3724604966139955, "grad_norm": 20.521987915039062, "learning_rate": 9.93054371027751e-06, "lm_loss": 6.465, "loss": 2.0003, "step": 165, "text_contrastive_loss": 1.076 }, { "contrastive_loss": 0.7755, "epoch": 0.3747178329571106, "grad_norm": 18.84111976623535, "learning_rate": 9.929333388679593e-06, "lm_loss": 6.5212, "loss": 1.8329, "step": 166, "text_contrastive_loss": 0.8105 }, { "contrastive_loss": 0.8448, "epoch": 0.37697516930022573, "grad_norm": 18.741506576538086, "learning_rate": 9.928112687686197e-06, "lm_loss": 6.5177, "loss": 1.946, "step": 167, "text_contrastive_loss": 0.8989 }, { "contrastive_loss": 0.7236, "epoch": 0.3792325056433409, "grad_norm": 17.72517204284668, "learning_rate": 9.92688160986768e-06, "lm_loss": 6.3992, "loss": 1.7728, "step": 168, "text_contrastive_loss": 0.8185 }, { "contrastive_loss": 0.8869, "epoch": 0.38148984198645597, "grad_norm": 21.22738265991211, "learning_rate": 9.925640157816246e-06, "lm_loss": 6.4493, "loss": 1.9893, "step": 169, "text_contrastive_loss": 0.9149 }, { "contrastive_loss": 0.8058, "epoch": 0.3837471783295711, "grad_norm": 19.492149353027344, "learning_rate": 9.924388334145943e-06, "lm_loss": 6.3983, "loss": 1.9272, "step": 170, "text_contrastive_loss": 0.9632 }, { "contrastive_loss": 0.7366, "epoch": 0.3860045146726862, "grad_norm": 19.459230422973633, "learning_rate": 9.92312614149266e-06, "lm_loss": 6.3751, "loss": 1.8532, "step": 171, "text_contrastive_loss": 0.9582 }, { "contrastive_loss": 0.8346, "epoch": 0.38826185101580135, "grad_norm": 20.554122924804688, "learning_rate": 9.92185358251412e-06, "lm_loss": 6.3351, "loss": 1.9061, "step": 172, "text_contrastive_loss": 0.8758 }, { "contrastive_loss": 0.7572, "epoch": 0.3905191873589165, "grad_norm": 19.220319747924805, "learning_rate": 9.92057065988987e-06, "lm_loss": 6.493, "loss": 1.8178, "step": 173, "text_contrastive_loss": 0.8226 }, { "contrastive_loss": 0.8348, "epoch": 0.3927765237020316, "grad_norm": 21.056917190551758, "learning_rate": 9.919277376321284e-06, "lm_loss": 6.4358, "loss": 1.947, "step": 174, "text_contrastive_loss": 0.9373 }, { "contrastive_loss": 0.8956, "epoch": 0.39503386004514673, "grad_norm": 19.383411407470703, "learning_rate": 9.917973734531549e-06, "lm_loss": 6.4578, "loss": 1.9908, "step": 175, "text_contrastive_loss": 0.8988 }, { "contrastive_loss": 0.7699, "epoch": 0.3972911963882618, "grad_norm": 19.435169219970703, "learning_rate": 9.916659737265664e-06, "lm_loss": 6.4342, "loss": 1.917, "step": 176, "text_contrastive_loss": 1.0074 }, { "contrastive_loss": 0.757, "epoch": 0.39954853273137697, "grad_norm": 21.105255126953125, "learning_rate": 9.915335387290432e-06, "lm_loss": 6.2528, "loss": 1.7563, "step": 177, "text_contrastive_loss": 0.7481 }, { "contrastive_loss": 0.7856, "epoch": 0.4018058690744921, "grad_norm": 19.341381072998047, "learning_rate": 9.914000687394457e-06, "lm_loss": 6.4932, "loss": 1.8719, "step": 178, "text_contrastive_loss": 0.874 }, { "contrastive_loss": 0.7868, "epoch": 0.4040632054176072, "grad_norm": 19.426422119140625, "learning_rate": 9.912655640388134e-06, "lm_loss": 6.3566, "loss": 1.8835, "step": 179, "text_contrastive_loss": 0.9222 }, { "contrastive_loss": 0.8502, "epoch": 0.40632054176072235, "grad_norm": 21.145036697387695, "learning_rate": 9.911300249103646e-06, "lm_loss": 6.4195, "loss": 1.9542, "step": 180, "text_contrastive_loss": 0.9241 }, { "contrastive_loss": 0.8322, "epoch": 0.40857787810383744, "grad_norm": 18.03723907470703, "learning_rate": 9.909934516394957e-06, "lm_loss": 6.2954, "loss": 2.0169, "step": 181, "text_contrastive_loss": 1.1104 }, { "contrastive_loss": 1.003, "epoch": 0.4108352144469526, "grad_norm": 20.614885330200195, "learning_rate": 9.908558445137807e-06, "lm_loss": 6.4824, "loss": 2.1626, "step": 182, "text_contrastive_loss": 1.0227 }, { "contrastive_loss": 0.7777, "epoch": 0.41309255079006774, "grad_norm": 20.063800811767578, "learning_rate": 9.907172038229706e-06, "lm_loss": 6.3171, "loss": 1.8316, "step": 183, "text_contrastive_loss": 0.8445 }, { "contrastive_loss": 0.8547, "epoch": 0.4153498871331828, "grad_norm": 18.237977981567383, "learning_rate": 9.905775298589923e-06, "lm_loss": 6.4206, "loss": 1.9172, "step": 184, "text_contrastive_loss": 0.8408 }, { "contrastive_loss": 0.8484, "epoch": 0.417607223476298, "grad_norm": 20.95216941833496, "learning_rate": 9.904368229159494e-06, "lm_loss": 6.3509, "loss": 1.945, "step": 185, "text_contrastive_loss": 0.9229 }, { "contrastive_loss": 0.8764, "epoch": 0.4198645598194131, "grad_norm": 19.93537712097168, "learning_rate": 9.90295083290119e-06, "lm_loss": 6.6118, "loss": 2.0261, "step": 186, "text_contrastive_loss": 0.977 }, { "contrastive_loss": 1.0481, "epoch": 0.4221218961625282, "grad_norm": 22.335296630859375, "learning_rate": 9.901523112799543e-06, "lm_loss": 6.3017, "loss": 2.2153, "step": 187, "text_contrastive_loss": 1.0739 }, { "contrastive_loss": 0.9157, "epoch": 0.42437923250564336, "grad_norm": 21.24988555908203, "learning_rate": 9.90008507186081e-06, "lm_loss": 6.2699, "loss": 2.0187, "step": 188, "text_contrastive_loss": 0.952 }, { "contrastive_loss": 0.7626, "epoch": 0.42663656884875845, "grad_norm": 17.65534782409668, "learning_rate": 9.898636713112992e-06, "lm_loss": 6.3991, "loss": 1.8415, "step": 189, "text_contrastive_loss": 0.878 }, { "contrastive_loss": 0.8812, "epoch": 0.4288939051918736, "grad_norm": 20.629322052001953, "learning_rate": 9.897178039605803e-06, "lm_loss": 6.3606, "loss": 1.9729, "step": 190, "text_contrastive_loss": 0.9112 }, { "contrastive_loss": 0.8356, "epoch": 0.43115124153498874, "grad_norm": 19.353788375854492, "learning_rate": 9.895709054410686e-06, "lm_loss": 6.3502, "loss": 1.911, "step": 191, "text_contrastive_loss": 0.8808 }, { "contrastive_loss": 0.8544, "epoch": 0.43340857787810383, "grad_norm": 21.310510635375977, "learning_rate": 9.894229760620793e-06, "lm_loss": 6.31, "loss": 1.9133, "step": 192, "text_contrastive_loss": 0.8558 }, { "contrastive_loss": 0.8522, "epoch": 0.435665914221219, "grad_norm": 18.96470069885254, "learning_rate": 9.892740161350981e-06, "lm_loss": 6.3418, "loss": 1.9353, "step": 193, "text_contrastive_loss": 0.8978 }, { "contrastive_loss": 0.7986, "epoch": 0.43792325056433407, "grad_norm": 20.11381721496582, "learning_rate": 9.891240259737809e-06, "lm_loss": 6.3733, "loss": 1.9381, "step": 194, "text_contrastive_loss": 1.0043 }, { "contrastive_loss": 0.8889, "epoch": 0.4401805869074492, "grad_norm": 20.309608459472656, "learning_rate": 9.889730058939529e-06, "lm_loss": 6.4461, "loss": 1.96, "step": 195, "text_contrastive_loss": 0.8529 }, { "contrastive_loss": 0.8012, "epoch": 0.44243792325056436, "grad_norm": 20.747764587402344, "learning_rate": 9.888209562136074e-06, "lm_loss": 6.3646, "loss": 1.9054, "step": 196, "text_contrastive_loss": 0.9354 }, { "contrastive_loss": 0.841, "epoch": 0.44469525959367945, "grad_norm": 21.405588150024414, "learning_rate": 9.886678772529069e-06, "lm_loss": 6.2747, "loss": 2.0179, "step": 197, "text_contrastive_loss": 1.0988 }, { "contrastive_loss": 0.8754, "epoch": 0.4469525959367946, "grad_norm": 19.198837280273438, "learning_rate": 9.885137693341795e-06, "lm_loss": 6.328, "loss": 2.1004, "step": 198, "text_contrastive_loss": 1.1845 }, { "contrastive_loss": 0.8876, "epoch": 0.4492099322799097, "grad_norm": 18.697420120239258, "learning_rate": 9.883586327819214e-06, "lm_loss": 6.3426, "loss": 1.9726, "step": 199, "text_contrastive_loss": 0.9014 }, { "contrastive_loss": 0.8075, "epoch": 0.45146726862302483, "grad_norm": 19.029743194580078, "learning_rate": 9.88202467922794e-06, "lm_loss": 6.3034, "loss": 1.9712, "step": 200, "text_contrastive_loss": 1.0667 }, { "contrastive_loss": 0.8803, "epoch": 0.45372460496614, "grad_norm": 19.263851165771484, "learning_rate": 9.880452750856239e-06, "lm_loss": 6.3328, "loss": 2.043, "step": 201, "text_contrastive_loss": 1.0589 }, { "contrastive_loss": 0.8736, "epoch": 0.45598194130925507, "grad_norm": 19.789085388183594, "learning_rate": 9.878870546014025e-06, "lm_loss": 6.297, "loss": 1.9871, "step": 202, "text_contrastive_loss": 0.9676 }, { "contrastive_loss": 0.8993, "epoch": 0.4582392776523702, "grad_norm": 23.019350051879883, "learning_rate": 9.877278068032852e-06, "lm_loss": 6.2742, "loss": 2.0623, "step": 203, "text_contrastive_loss": 1.0711 }, { "contrastive_loss": 0.7822, "epoch": 0.4604966139954853, "grad_norm": 19.80655288696289, "learning_rate": 9.875675320265903e-06, "lm_loss": 6.2757, "loss": 1.8815, "step": 204, "text_contrastive_loss": 0.9435 }, { "contrastive_loss": 0.8734, "epoch": 0.46275395033860045, "grad_norm": 20.55428123474121, "learning_rate": 9.874062306087983e-06, "lm_loss": 6.2592, "loss": 1.9863, "step": 205, "text_contrastive_loss": 0.9739 }, { "contrastive_loss": 0.921, "epoch": 0.4650112866817156, "grad_norm": 21.780397415161133, "learning_rate": 9.872439028895518e-06, "lm_loss": 6.2387, "loss": 2.0729, "step": 206, "text_contrastive_loss": 1.056 }, { "contrastive_loss": 0.723, "epoch": 0.4672686230248307, "grad_norm": 17.902149200439453, "learning_rate": 9.870805492106546e-06, "lm_loss": 6.2261, "loss": 1.7971, "step": 207, "text_contrastive_loss": 0.9029 }, { "contrastive_loss": 0.8151, "epoch": 0.46952595936794583, "grad_norm": 17.340742111206055, "learning_rate": 9.869161699160704e-06, "lm_loss": 6.1862, "loss": 1.8445, "step": 208, "text_contrastive_loss": 0.8216 }, { "contrastive_loss": 0.7565, "epoch": 0.4717832957110609, "grad_norm": 17.048093795776367, "learning_rate": 9.867507653519225e-06, "lm_loss": 6.2923, "loss": 1.8102, "step": 209, "text_contrastive_loss": 0.8489 }, { "contrastive_loss": 0.6876, "epoch": 0.47404063205417607, "grad_norm": 17.589111328125, "learning_rate": 9.865843358664933e-06, "lm_loss": 6.2506, "loss": 1.753, "step": 210, "text_contrastive_loss": 0.8807 }, { "contrastive_loss": 0.8278, "epoch": 0.4762979683972912, "grad_norm": 18.502525329589844, "learning_rate": 9.86416881810223e-06, "lm_loss": 6.2913, "loss": 1.9284, "step": 211, "text_contrastive_loss": 0.943 }, { "contrastive_loss": 0.7451, "epoch": 0.4785553047404063, "grad_norm": 18.441038131713867, "learning_rate": 9.862484035357095e-06, "lm_loss": 6.3486, "loss": 1.7617, "step": 212, "text_contrastive_loss": 0.7634 }, { "contrastive_loss": 0.8968, "epoch": 0.48081264108352145, "grad_norm": 24.60144805908203, "learning_rate": 9.860789013977074e-06, "lm_loss": 6.3089, "loss": 2.0082, "step": 213, "text_contrastive_loss": 0.961 }, { "contrastive_loss": 0.7364, "epoch": 0.48306997742663654, "grad_norm": 18.438432693481445, "learning_rate": 9.859083757531265e-06, "lm_loss": 6.2724, "loss": 1.8209, "step": 214, "text_contrastive_loss": 0.9144 }, { "contrastive_loss": 0.8151, "epoch": 0.4853273137697517, "grad_norm": 19.14459228515625, "learning_rate": 9.857368269610325e-06, "lm_loss": 6.2602, "loss": 1.9227, "step": 215, "text_contrastive_loss": 0.9632 }, { "contrastive_loss": 0.8037, "epoch": 0.48758465011286684, "grad_norm": 19.237321853637695, "learning_rate": 9.85564255382645e-06, "lm_loss": 6.2941, "loss": 1.8994, "step": 216, "text_contrastive_loss": 0.9326 }, { "contrastive_loss": 0.7454, "epoch": 0.4898419864559819, "grad_norm": 18.339815139770508, "learning_rate": 9.853906613813378e-06, "lm_loss": 6.2437, "loss": 1.7913, "step": 217, "text_contrastive_loss": 0.8431 }, { "contrastive_loss": 0.7796, "epoch": 0.49209932279909707, "grad_norm": 20.978559494018555, "learning_rate": 9.852160453226367e-06, "lm_loss": 6.0887, "loss": 1.9323, "step": 218, "text_contrastive_loss": 1.0878 }, { "contrastive_loss": 0.8093, "epoch": 0.49435665914221216, "grad_norm": 19.603336334228516, "learning_rate": 9.850404075742204e-06, "lm_loss": 6.2458, "loss": 1.9098, "step": 219, "text_contrastive_loss": 0.9519 }, { "contrastive_loss": 0.8294, "epoch": 0.4966139954853273, "grad_norm": 19.92656707763672, "learning_rate": 9.848637485059183e-06, "lm_loss": 6.2273, "loss": 1.929, "step": 220, "text_contrastive_loss": 0.9537 }, { "contrastive_loss": 0.8109, "epoch": 0.49887133182844245, "grad_norm": 23.059837341308594, "learning_rate": 9.846860684897107e-06, "lm_loss": 6.2547, "loss": 2.0089, "step": 221, "text_contrastive_loss": 1.1451 }, { "contrastive_loss": 0.713, "epoch": 0.5011286681715575, "grad_norm": 18.783077239990234, "learning_rate": 9.845073678997275e-06, "lm_loss": 6.1863, "loss": 1.722, "step": 222, "text_contrastive_loss": 0.7807 }, { "contrastive_loss": 0.7074, "epoch": 0.5033860045146726, "grad_norm": 17.86538314819336, "learning_rate": 9.843276471122473e-06, "lm_loss": 6.1494, "loss": 1.7629, "step": 223, "text_contrastive_loss": 0.8812 }, { "contrastive_loss": 0.8604, "epoch": 0.5056433408577878, "grad_norm": 18.984697341918945, "learning_rate": 9.84146906505698e-06, "lm_loss": 6.3609, "loss": 1.9277, "step": 224, "text_contrastive_loss": 0.8625 }, { "contrastive_loss": 0.7167, "epoch": 0.5079006772009029, "grad_norm": 17.270008087158203, "learning_rate": 9.83965146460653e-06, "lm_loss": 6.2559, "loss": 1.8291, "step": 225, "text_contrastive_loss": 0.9736 }, { "contrastive_loss": 0.71, "epoch": 0.510158013544018, "grad_norm": 17.672595977783203, "learning_rate": 9.83782367359834e-06, "lm_loss": 6.0805, "loss": 1.6965, "step": 226, "text_contrastive_loss": 0.7568 }, { "contrastive_loss": 0.7668, "epoch": 0.5124153498871332, "grad_norm": 18.119373321533203, "learning_rate": 9.835985695881076e-06, "lm_loss": 6.2149, "loss": 1.846, "step": 227, "text_contrastive_loss": 0.9155 }, { "contrastive_loss": 0.6243, "epoch": 0.5146726862302483, "grad_norm": 17.03944969177246, "learning_rate": 9.834137535324852e-06, "lm_loss": 6.2418, "loss": 1.6596, "step": 228, "text_contrastive_loss": 0.8222 }, { "contrastive_loss": 0.8295, "epoch": 0.5169300225733634, "grad_norm": 20.065959930419922, "learning_rate": 9.83227919582123e-06, "lm_loss": 6.1313, "loss": 1.9263, "step": 229, "text_contrastive_loss": 0.9675 }, { "contrastive_loss": 0.7715, "epoch": 0.5191873589164786, "grad_norm": 18.148019790649414, "learning_rate": 9.830410681283203e-06, "lm_loss": 6.3683, "loss": 1.9036, "step": 230, "text_contrastive_loss": 0.9904 }, { "contrastive_loss": 0.7624, "epoch": 0.5214446952595937, "grad_norm": 20.754575729370117, "learning_rate": 9.828531995645183e-06, "lm_loss": 6.2368, "loss": 1.7355, "step": 231, "text_contrastive_loss": 0.6987 }, { "contrastive_loss": 0.9046, "epoch": 0.5237020316027088, "grad_norm": 23.928089141845703, "learning_rate": 9.826643142863006e-06, "lm_loss": 6.2934, "loss": 2.1454, "step": 232, "text_contrastive_loss": 1.2228 }, { "contrastive_loss": 0.848, "epoch": 0.5259593679458239, "grad_norm": 24.783737182617188, "learning_rate": 9.824744126913914e-06, "lm_loss": 6.2529, "loss": 2.0252, "step": 233, "text_contrastive_loss": 1.104 }, { "contrastive_loss": 0.7888, "epoch": 0.5282167042889391, "grad_norm": 17.51032257080078, "learning_rate": 9.822834951796547e-06, "lm_loss": 6.3361, "loss": 1.9283, "step": 234, "text_contrastive_loss": 1.0118 }, { "contrastive_loss": 0.8544, "epoch": 0.5304740406320542, "grad_norm": 19.135835647583008, "learning_rate": 9.820915621530939e-06, "lm_loss": 6.27, "loss": 1.9822, "step": 235, "text_contrastive_loss": 1.0017 }, { "contrastive_loss": 0.8286, "epoch": 0.5327313769751693, "grad_norm": 19.796823501586914, "learning_rate": 9.818986140158507e-06, "lm_loss": 6.1489, "loss": 1.862, "step": 236, "text_contrastive_loss": 0.8369 }, { "contrastive_loss": 0.7975, "epoch": 0.5349887133182845, "grad_norm": 18.388835906982422, "learning_rate": 9.817046511742042e-06, "lm_loss": 6.1075, "loss": 1.854, "step": 237, "text_contrastive_loss": 0.8914 }, { "contrastive_loss": 0.7875, "epoch": 0.5372460496613995, "grad_norm": 18.588680267333984, "learning_rate": 9.815096740365698e-06, "lm_loss": 6.1547, "loss": 1.8664, "step": 238, "text_contrastive_loss": 0.927 }, { "contrastive_loss": 0.7999, "epoch": 0.5395033860045146, "grad_norm": 19.11373519897461, "learning_rate": 9.81313683013499e-06, "lm_loss": 6.1818, "loss": 1.8483, "step": 239, "text_contrastive_loss": 0.8605 }, { "contrastive_loss": 0.8682, "epoch": 0.5417607223476298, "grad_norm": 17.680660247802734, "learning_rate": 9.811166785176785e-06, "lm_loss": 6.1513, "loss": 1.9229, "step": 240, "text_contrastive_loss": 0.8791 }, { "contrastive_loss": 0.7493, "epoch": 0.5440180586907449, "grad_norm": 17.710893630981445, "learning_rate": 9.809186609639281e-06, "lm_loss": 6.2055, "loss": 1.7842, "step": 241, "text_contrastive_loss": 0.8287 }, { "contrastive_loss": 0.8647, "epoch": 0.54627539503386, "grad_norm": 19.41048812866211, "learning_rate": 9.807196307692015e-06, "lm_loss": 6.1424, "loss": 1.9948, "step": 242, "text_contrastive_loss": 1.0316 }, { "contrastive_loss": 0.7898, "epoch": 0.5485327313769752, "grad_norm": 19.4019832611084, "learning_rate": 9.805195883525844e-06, "lm_loss": 6.178, "loss": 1.8574, "step": 243, "text_contrastive_loss": 0.8996 }, { "contrastive_loss": 0.7414, "epoch": 0.5507900677200903, "grad_norm": 18.429107666015625, "learning_rate": 9.803185341352936e-06, "lm_loss": 6.1879, "loss": 1.7764, "step": 244, "text_contrastive_loss": 0.8325 }, { "contrastive_loss": 0.8691, "epoch": 0.5530474040632054, "grad_norm": 19.68091583251953, "learning_rate": 9.80116468540677e-06, "lm_loss": 6.1057, "loss": 1.9459, "step": 245, "text_contrastive_loss": 0.9324 }, { "contrastive_loss": 0.8321, "epoch": 0.5553047404063205, "grad_norm": 19.439861297607422, "learning_rate": 9.799133919942117e-06, "lm_loss": 6.1777, "loss": 1.9379, "step": 246, "text_contrastive_loss": 0.9762 }, { "contrastive_loss": 0.6576, "epoch": 0.5575620767494357, "grad_norm": 18.352218627929688, "learning_rate": 9.797093049235034e-06, "lm_loss": 6.1988, "loss": 1.7478, "step": 247, "text_contrastive_loss": 0.9408 }, { "contrastive_loss": 0.7989, "epoch": 0.5598194130925508, "grad_norm": 18.497886657714844, "learning_rate": 9.795042077582856e-06, "lm_loss": 6.2574, "loss": 1.8518, "step": 248, "text_contrastive_loss": 0.8545 }, { "contrastive_loss": 0.6566, "epoch": 0.5620767494356659, "grad_norm": 16.71062660217285, "learning_rate": 9.792981009304192e-06, "lm_loss": 6.1203, "loss": 1.7234, "step": 249, "text_contrastive_loss": 0.9095 }, { "contrastive_loss": 0.7675, "epoch": 0.5643340857787811, "grad_norm": 19.55929946899414, "learning_rate": 9.790909848738907e-06, "lm_loss": 6.1617, "loss": 1.9057, "step": 250, "text_contrastive_loss": 1.044 }, { "contrastive_loss": 0.8264, "epoch": 0.5665914221218962, "grad_norm": 19.731369018554688, "learning_rate": 9.788828600248114e-06, "lm_loss": 6.1771, "loss": 1.9228, "step": 251, "text_contrastive_loss": 0.9573 }, { "contrastive_loss": 0.7257, "epoch": 0.5688487584650113, "grad_norm": 16.746164321899414, "learning_rate": 9.786737268214172e-06, "lm_loss": 6.1749, "loss": 1.8192, "step": 252, "text_contrastive_loss": 0.9519 }, { "contrastive_loss": 0.8816, "epoch": 0.5711060948081265, "grad_norm": 19.127241134643555, "learning_rate": 9.784635857040672e-06, "lm_loss": 6.086, "loss": 1.9611, "step": 253, "text_contrastive_loss": 0.9417 }, { "contrastive_loss": 0.8804, "epoch": 0.5733634311512416, "grad_norm": 20.695344924926758, "learning_rate": 9.782524371152425e-06, "lm_loss": 6.1488, "loss": 2.0119, "step": 254, "text_contrastive_loss": 1.0332 }, { "contrastive_loss": 0.844, "epoch": 0.5756207674943566, "grad_norm": 20.949626922607422, "learning_rate": 9.780402814995458e-06, "lm_loss": 6.2809, "loss": 1.8781, "step": 255, "text_contrastive_loss": 0.8119 }, { "contrastive_loss": 0.7005, "epoch": 0.5778781038374717, "grad_norm": 18.161090850830078, "learning_rate": 9.778271193037003e-06, "lm_loss": 6.1164, "loss": 1.8367, "step": 256, "text_contrastive_loss": 1.0491 }, { "contrastive_loss": 0.741, "epoch": 0.5801354401805869, "grad_norm": 17.87932777404785, "learning_rate": 9.776129509765487e-06, "lm_loss": 6.1365, "loss": 1.7805, "step": 257, "text_contrastive_loss": 0.8517 }, { "contrastive_loss": 0.8227, "epoch": 0.582392776523702, "grad_norm": 19.600927352905273, "learning_rate": 9.773977769690517e-06, "lm_loss": 6.1083, "loss": 1.9466, "step": 258, "text_contrastive_loss": 1.0261 }, { "contrastive_loss": 0.8353, "epoch": 0.5846501128668171, "grad_norm": 17.820899963378906, "learning_rate": 9.771815977342882e-06, "lm_loss": 6.0494, "loss": 1.9368, "step": 259, "text_contrastive_loss": 0.9931 }, { "contrastive_loss": 0.8439, "epoch": 0.5869074492099323, "grad_norm": 19.391754150390625, "learning_rate": 9.76964413727454e-06, "lm_loss": 6.1206, "loss": 1.9207, "step": 260, "text_contrastive_loss": 0.9295 }, { "contrastive_loss": 0.7734, "epoch": 0.5891647855530474, "grad_norm": 19.11463165283203, "learning_rate": 9.767462254058593e-06, "lm_loss": 6.0388, "loss": 1.8577, "step": 261, "text_contrastive_loss": 0.961 }, { "contrastive_loss": 0.8034, "epoch": 0.5914221218961625, "grad_norm": 19.316638946533203, "learning_rate": 9.765270332289307e-06, "lm_loss": 6.0882, "loss": 1.8743, "step": 262, "text_contrastive_loss": 0.9242 }, { "contrastive_loss": 0.84, "epoch": 0.5936794582392777, "grad_norm": 20.758302688598633, "learning_rate": 9.763068376582075e-06, "lm_loss": 6.033, "loss": 1.8907, "step": 263, "text_contrastive_loss": 0.8948 }, { "contrastive_loss": 0.6596, "epoch": 0.5959367945823928, "grad_norm": 17.379722595214844, "learning_rate": 9.76085639157342e-06, "lm_loss": 6.1246, "loss": 1.7109, "step": 264, "text_contrastive_loss": 0.8776 }, { "contrastive_loss": 0.7387, "epoch": 0.5981941309255079, "grad_norm": 19.40483283996582, "learning_rate": 9.758634381920982e-06, "lm_loss": 6.2125, "loss": 1.7771, "step": 265, "text_contrastive_loss": 0.8343 }, { "contrastive_loss": 0.9388, "epoch": 0.600451467268623, "grad_norm": 22.724979400634766, "learning_rate": 9.756402352303513e-06, "lm_loss": 6.1672, "loss": 2.1706, "step": 266, "text_contrastive_loss": 1.2302 }, { "contrastive_loss": 0.7913, "epoch": 0.6027088036117382, "grad_norm": 19.11801528930664, "learning_rate": 9.754160307420858e-06, "lm_loss": 6.2058, "loss": 1.8721, "step": 267, "text_contrastive_loss": 0.9203 }, { "contrastive_loss": 0.8368, "epoch": 0.6049661399548533, "grad_norm": 18.960634231567383, "learning_rate": 9.751908251993956e-06, "lm_loss": 6.153, "loss": 1.9811, "step": 268, "text_contrastive_loss": 1.058 }, { "contrastive_loss": 0.722, "epoch": 0.6072234762979684, "grad_norm": 18.81309700012207, "learning_rate": 9.749646190764823e-06, "lm_loss": 6.0044, "loss": 1.737, "step": 269, "text_contrastive_loss": 0.8293 }, { "contrastive_loss": 0.665, "epoch": 0.6094808126410836, "grad_norm": 18.15359115600586, "learning_rate": 9.747374128496541e-06, "lm_loss": 6.225, "loss": 1.7902, "step": 270, "text_contrastive_loss": 1.0054 }, { "contrastive_loss": 0.8478, "epoch": 0.6117381489841986, "grad_norm": 18.519039154052734, "learning_rate": 9.745092069973254e-06, "lm_loss": 6.1183, "loss": 1.9146, "step": 271, "text_contrastive_loss": 0.9099 }, { "contrastive_loss": 0.6826, "epoch": 0.6139954853273137, "grad_norm": 18.80630874633789, "learning_rate": 9.74280002000015e-06, "lm_loss": 6.1514, "loss": 1.736, "step": 272, "text_contrastive_loss": 0.8765 }, { "contrastive_loss": 0.8552, "epoch": 0.6162528216704289, "grad_norm": 22.33182144165039, "learning_rate": 9.74049798340346e-06, "lm_loss": 6.0026, "loss": 1.942, "step": 273, "text_contrastive_loss": 0.9731 }, { "contrastive_loss": 0.7087, "epoch": 0.618510158013544, "grad_norm": 18.393476486206055, "learning_rate": 9.738185965030444e-06, "lm_loss": 6.1862, "loss": 1.777, "step": 274, "text_contrastive_loss": 0.8993 }, { "contrastive_loss": 0.9343, "epoch": 0.6207674943566591, "grad_norm": 25.709531784057617, "learning_rate": 9.735863969749373e-06, "lm_loss": 5.9993, "loss": 1.9874, "step": 275, "text_contrastive_loss": 0.9063 }, { "contrastive_loss": 0.7387, "epoch": 0.6230248306997742, "grad_norm": 19.612539291381836, "learning_rate": 9.733532002449533e-06, "lm_loss": 6.1208, "loss": 1.8223, "step": 276, "text_contrastive_loss": 0.943 }, { "contrastive_loss": 0.8726, "epoch": 0.6252821670428894, "grad_norm": 18.62133026123047, "learning_rate": 9.731190068041205e-06, "lm_loss": 6.0839, "loss": 2.0505, "step": 277, "text_contrastive_loss": 1.1389 }, { "contrastive_loss": 0.7972, "epoch": 0.6275395033860045, "grad_norm": 18.76290512084961, "learning_rate": 9.728838171455655e-06, "lm_loss": 6.1786, "loss": 1.8659, "step": 278, "text_contrastive_loss": 0.9017 }, { "contrastive_loss": 0.7623, "epoch": 0.6297968397291196, "grad_norm": 19.95505714416504, "learning_rate": 9.72647631764513e-06, "lm_loss": 6.1835, "loss": 1.8325, "step": 279, "text_contrastive_loss": 0.9037 }, { "contrastive_loss": 0.7498, "epoch": 0.6320541760722348, "grad_norm": 17.03273582458496, "learning_rate": 9.724104511582838e-06, "lm_loss": 6.0583, "loss": 1.8232, "step": 280, "text_contrastive_loss": 0.9351 }, { "contrastive_loss": 0.8429, "epoch": 0.6343115124153499, "grad_norm": 21.37076187133789, "learning_rate": 9.721722758262948e-06, "lm_loss": 6.0827, "loss": 1.896, "step": 281, "text_contrastive_loss": 0.8896 }, { "contrastive_loss": 0.8617, "epoch": 0.636568848758465, "grad_norm": 19.734153747558594, "learning_rate": 9.719331062700572e-06, "lm_loss": 6.0058, "loss": 1.9611, "step": 282, "text_contrastive_loss": 0.9976 }, { "contrastive_loss": 0.8787, "epoch": 0.6388261851015802, "grad_norm": 21.746551513671875, "learning_rate": 9.716929429931757e-06, "lm_loss": 6.0947, "loss": 1.9843, "step": 283, "text_contrastive_loss": 0.9922 }, { "contrastive_loss": 0.832, "epoch": 0.6410835214446953, "grad_norm": 18.039628982543945, "learning_rate": 9.714517865013473e-06, "lm_loss": 6.1092, "loss": 1.9548, "step": 284, "text_contrastive_loss": 1.0239 }, { "contrastive_loss": 0.764, "epoch": 0.6433408577878104, "grad_norm": 18.836318969726562, "learning_rate": 9.712096373023603e-06, "lm_loss": 6.0648, "loss": 1.832, "step": 285, "text_contrastive_loss": 0.9231 }, { "contrastive_loss": 0.9148, "epoch": 0.6455981941309256, "grad_norm": 20.60382652282715, "learning_rate": 9.70966495906094e-06, "lm_loss": 6.0534, "loss": 2.0786, "step": 286, "text_contrastive_loss": 1.1169 }, { "contrastive_loss": 0.6971, "epoch": 0.6478555304740407, "grad_norm": 16.91483497619629, "learning_rate": 9.707223628245157e-06, "lm_loss": 6.0742, "loss": 1.7743, "step": 287, "text_contrastive_loss": 0.9396 }, { "contrastive_loss": 0.7757, "epoch": 0.6501128668171557, "grad_norm": 17.441919326782227, "learning_rate": 9.70477238571682e-06, "lm_loss": 6.1147, "loss": 1.8067, "step": 288, "text_contrastive_loss": 0.8391 }, { "contrastive_loss": 0.825, "epoch": 0.6523702031602708, "grad_norm": 18.200672149658203, "learning_rate": 9.702311236637357e-06, "lm_loss": 6.0121, "loss": 1.9316, "step": 289, "text_contrastive_loss": 1.0108 }, { "contrastive_loss": 0.764, "epoch": 0.654627539503386, "grad_norm": 18.553850173950195, "learning_rate": 9.699840186189061e-06, "lm_loss": 6.0134, "loss": 1.8213, "step": 290, "text_contrastive_loss": 0.9119 }, { "contrastive_loss": 0.876, "epoch": 0.6568848758465011, "grad_norm": 18.69213104248047, "learning_rate": 9.697359239575069e-06, "lm_loss": 6.1142, "loss": 1.904, "step": 291, "text_contrastive_loss": 0.8331 }, { "contrastive_loss": 0.7976, "epoch": 0.6591422121896162, "grad_norm": 19.904512405395508, "learning_rate": 9.694868402019362e-06, "lm_loss": 6.0478, "loss": 1.8918, "step": 292, "text_contrastive_loss": 0.9788 }, { "contrastive_loss": 0.8514, "epoch": 0.6613995485327314, "grad_norm": 20.151090621948242, "learning_rate": 9.69236767876674e-06, "lm_loss": 5.9712, "loss": 2.0123, "step": 293, "text_contrastive_loss": 1.1275 }, { "contrastive_loss": 0.7869, "epoch": 0.6636568848758465, "grad_norm": 19.191810607910156, "learning_rate": 9.689857075082828e-06, "lm_loss": 6.0753, "loss": 1.8907, "step": 294, "text_contrastive_loss": 0.9925 }, { "contrastive_loss": 0.9015, "epoch": 0.6659142212189616, "grad_norm": 18.698793411254883, "learning_rate": 9.687336596254045e-06, "lm_loss": 6.009, "loss": 2.0058, "step": 295, "text_contrastive_loss": 1.0067 }, { "contrastive_loss": 0.7328, "epoch": 0.6681715575620768, "grad_norm": 18.889925003051758, "learning_rate": 9.68480624758761e-06, "lm_loss": 6.1208, "loss": 1.748, "step": 296, "text_contrastive_loss": 0.8062 }, { "contrastive_loss": 0.7373, "epoch": 0.6704288939051919, "grad_norm": 17.773231506347656, "learning_rate": 9.682266034411527e-06, "lm_loss": 6.0026, "loss": 1.7348, "step": 297, "text_contrastive_loss": 0.7945 }, { "contrastive_loss": 0.8293, "epoch": 0.672686230248307, "grad_norm": 17.689512252807617, "learning_rate": 9.679715962074566e-06, "lm_loss": 6.0864, "loss": 1.8706, "step": 298, "text_contrastive_loss": 0.8653 }, { "contrastive_loss": 0.7081, "epoch": 0.6749435665914221, "grad_norm": 16.270689010620117, "learning_rate": 9.677156035946253e-06, "lm_loss": 6.1083, "loss": 1.8451, "step": 299, "text_contrastive_loss": 1.0523 }, { "contrastive_loss": 0.6502, "epoch": 0.6772009029345373, "grad_norm": 16.979660034179688, "learning_rate": 9.674586261416874e-06, "lm_loss": 6.0773, "loss": 1.6506, "step": 300, "text_contrastive_loss": 0.7853 }, { "contrastive_loss": 0.7074, "epoch": 0.6794582392776524, "grad_norm": 17.27545928955078, "learning_rate": 9.672006643897444e-06, "lm_loss": 6.1153, "loss": 1.8001, "step": 301, "text_contrastive_loss": 0.9624 }, { "contrastive_loss": 0.8176, "epoch": 0.6817155756207675, "grad_norm": 18.559659957885742, "learning_rate": 9.669417188819704e-06, "lm_loss": 6.0473, "loss": 1.9087, "step": 302, "text_contrastive_loss": 0.9728 }, { "contrastive_loss": 0.741, "epoch": 0.6839729119638827, "grad_norm": 16.704532623291016, "learning_rate": 9.666817901636115e-06, "lm_loss": 6.0039, "loss": 1.7943, "step": 303, "text_contrastive_loss": 0.9059 }, { "contrastive_loss": 0.8288, "epoch": 0.6862302483069977, "grad_norm": 20.246355056762695, "learning_rate": 9.664208787819833e-06, "lm_loss": 6.0545, "loss": 1.8484, "step": 304, "text_contrastive_loss": 0.8284 }, { "contrastive_loss": 0.6943, "epoch": 0.6884875846501128, "grad_norm": 17.586076736450195, "learning_rate": 9.66158985286471e-06, "lm_loss": 5.9785, "loss": 1.7344, "step": 305, "text_contrastive_loss": 0.8846 }, { "contrastive_loss": 0.6383, "epoch": 0.690744920993228, "grad_norm": 16.763389587402344, "learning_rate": 9.658961102285276e-06, "lm_loss": 6.1017, "loss": 1.6815, "step": 306, "text_contrastive_loss": 0.8661 }, { "contrastive_loss": 0.7344, "epoch": 0.6930022573363431, "grad_norm": 17.52794647216797, "learning_rate": 9.656322541616734e-06, "lm_loss": 5.981, "loss": 1.7619, "step": 307, "text_contrastive_loss": 0.8589 }, { "contrastive_loss": 0.9579, "epoch": 0.6952595936794582, "grad_norm": 21.528606414794922, "learning_rate": 9.653674176414936e-06, "lm_loss": 6.0425, "loss": 2.0525, "step": 308, "text_contrastive_loss": 0.9807 }, { "contrastive_loss": 0.803, "epoch": 0.6975169300225733, "grad_norm": 19.41025161743164, "learning_rate": 9.651016012256382e-06, "lm_loss": 5.9865, "loss": 1.9478, "step": 309, "text_contrastive_loss": 1.0923 }, { "contrastive_loss": 0.7292, "epoch": 0.6997742663656885, "grad_norm": 19.212434768676758, "learning_rate": 9.648348054738208e-06, "lm_loss": 6.2198, "loss": 1.8035, "step": 310, "text_contrastive_loss": 0.9047 }, { "contrastive_loss": 0.7602, "epoch": 0.7020316027088036, "grad_norm": 20.256837844848633, "learning_rate": 9.64567030947817e-06, "lm_loss": 6.1579, "loss": 1.8317, "step": 311, "text_contrastive_loss": 0.9115 }, { "contrastive_loss": 0.7533, "epoch": 0.7042889390519187, "grad_norm": 17.484106063842773, "learning_rate": 9.642982782114628e-06, "lm_loss": 6.0454, "loss": 1.8201, "step": 312, "text_contrastive_loss": 0.9245 }, { "contrastive_loss": 0.8264, "epoch": 0.7065462753950339, "grad_norm": 18.091968536376953, "learning_rate": 9.640285478306546e-06, "lm_loss": 6.1478, "loss": 1.8968, "step": 313, "text_contrastive_loss": 0.9113 }, { "contrastive_loss": 0.7541, "epoch": 0.708803611738149, "grad_norm": 24.659730911254883, "learning_rate": 9.63757840373347e-06, "lm_loss": 6.1216, "loss": 1.8043, "step": 314, "text_contrastive_loss": 0.876 }, { "contrastive_loss": 0.765, "epoch": 0.7110609480812641, "grad_norm": 19.639833450317383, "learning_rate": 9.634861564095525e-06, "lm_loss": 6.1299, "loss": 1.9027, "step": 315, "text_contrastive_loss": 1.0496 }, { "contrastive_loss": 0.8109, "epoch": 0.7133182844243793, "grad_norm": 17.972936630249023, "learning_rate": 9.632134965113389e-06, "lm_loss": 5.9087, "loss": 1.9626, "step": 316, "text_contrastive_loss": 1.1217 }, { "contrastive_loss": 0.7516, "epoch": 0.7155756207674944, "grad_norm": 18.55760383605957, "learning_rate": 9.629398612528299e-06, "lm_loss": 6.0409, "loss": 1.8372, "step": 317, "text_contrastive_loss": 0.963 }, { "contrastive_loss": 0.7369, "epoch": 0.7178329571106095, "grad_norm": 16.857271194458008, "learning_rate": 9.626652512102021e-06, "lm_loss": 6.049, "loss": 1.8467, "step": 318, "text_contrastive_loss": 1.0097 }, { "contrastive_loss": 0.7255, "epoch": 0.7200902934537246, "grad_norm": 17.906700134277344, "learning_rate": 9.623896669616855e-06, "lm_loss": 6.1112, "loss": 1.791, "step": 319, "text_contrastive_loss": 0.9087 }, { "contrastive_loss": 0.8361, "epoch": 0.7223476297968398, "grad_norm": 18.165489196777344, "learning_rate": 9.621131090875603e-06, "lm_loss": 6.0386, "loss": 1.8247, "step": 320, "text_contrastive_loss": 0.7695 }, { "contrastive_loss": 0.9146, "epoch": 0.7246049661399548, "grad_norm": 18.603918075561523, "learning_rate": 9.618355781701584e-06, "lm_loss": 5.9065, "loss": 1.9471, "step": 321, "text_contrastive_loss": 0.8838 }, { "contrastive_loss": 0.77, "epoch": 0.7268623024830699, "grad_norm": 18.002338409423828, "learning_rate": 9.61557074793859e-06, "lm_loss": 6.0749, "loss": 1.8642, "step": 322, "text_contrastive_loss": 0.9735 }, { "contrastive_loss": 0.8282, "epoch": 0.7291196388261851, "grad_norm": 18.4736385345459, "learning_rate": 9.612775995450896e-06, "lm_loss": 6.0022, "loss": 1.899, "step": 323, "text_contrastive_loss": 0.9412 }, { "contrastive_loss": 0.7157, "epoch": 0.7313769751693002, "grad_norm": 16.921104431152344, "learning_rate": 9.609971530123243e-06, "lm_loss": 5.9862, "loss": 1.7669, "step": 324, "text_contrastive_loss": 0.9052 }, { "contrastive_loss": 0.7339, "epoch": 0.7336343115124153, "grad_norm": 18.970903396606445, "learning_rate": 9.607157357860823e-06, "lm_loss": 6.0996, "loss": 1.7801, "step": 325, "text_contrastive_loss": 0.8727 }, { "contrastive_loss": 0.7823, "epoch": 0.7358916478555305, "grad_norm": 19.708547592163086, "learning_rate": 9.604333484589266e-06, "lm_loss": 5.9082, "loss": 1.8611, "step": 326, "text_contrastive_loss": 0.976 }, { "contrastive_loss": 0.7769, "epoch": 0.7381489841986456, "grad_norm": 18.057262420654297, "learning_rate": 9.601499916254626e-06, "lm_loss": 5.992, "loss": 1.9043, "step": 327, "text_contrastive_loss": 1.0565 }, { "contrastive_loss": 0.7535, "epoch": 0.7404063205417607, "grad_norm": 19.701824188232422, "learning_rate": 9.598656658823378e-06, "lm_loss": 6.1302, "loss": 1.8041, "step": 328, "text_contrastive_loss": 0.8751 }, { "contrastive_loss": 0.75, "epoch": 0.7426636568848759, "grad_norm": 18.57602310180664, "learning_rate": 9.595803718282391e-06, "lm_loss": 6.0646, "loss": 1.7876, "step": 329, "text_contrastive_loss": 0.8623 }, { "contrastive_loss": 0.7559, "epoch": 0.744920993227991, "grad_norm": 17.691495895385742, "learning_rate": 9.59294110063893e-06, "lm_loss": 5.971, "loss": 1.8275, "step": 330, "text_contrastive_loss": 0.9489 }, { "contrastive_loss": 0.7651, "epoch": 0.7471783295711061, "grad_norm": 19.426851272583008, "learning_rate": 9.590068811920637e-06, "lm_loss": 5.9845, "loss": 1.8219, "step": 331, "text_contrastive_loss": 0.9168 }, { "contrastive_loss": 0.7343, "epoch": 0.7494356659142212, "grad_norm": 18.151290893554688, "learning_rate": 9.587186858175507e-06, "lm_loss": 5.9706, "loss": 1.7612, "step": 332, "text_contrastive_loss": 0.8596 }, { "contrastive_loss": 0.7179, "epoch": 0.7516930022573364, "grad_norm": 16.147844314575195, "learning_rate": 9.584295245471898e-06, "lm_loss": 6.0191, "loss": 1.7633, "step": 333, "text_contrastive_loss": 0.887 }, { "contrastive_loss": 0.696, "epoch": 0.7539503386004515, "grad_norm": 16.67496681213379, "learning_rate": 9.581393979898502e-06, "lm_loss": 6.0286, "loss": 1.7429, "step": 334, "text_contrastive_loss": 0.8882 }, { "contrastive_loss": 0.7027, "epoch": 0.7562076749435666, "grad_norm": 17.9287166595459, "learning_rate": 9.578483067564335e-06, "lm_loss": 6.037, "loss": 1.8146, "step": 335, "text_contrastive_loss": 1.0164 }, { "contrastive_loss": 0.8264, "epoch": 0.7584650112866818, "grad_norm": 18.266008377075195, "learning_rate": 9.575562514598727e-06, "lm_loss": 5.93, "loss": 1.8494, "step": 336, "text_contrastive_loss": 0.86 }, { "contrastive_loss": 0.6572, "epoch": 0.7607223476297968, "grad_norm": 17.233238220214844, "learning_rate": 9.572632327151309e-06, "lm_loss": 6.0427, "loss": 1.7353, "step": 337, "text_contrastive_loss": 0.9478 }, { "contrastive_loss": 0.8304, "epoch": 0.7629796839729119, "grad_norm": 18.098194122314453, "learning_rate": 9.569692511391995e-06, "lm_loss": 5.9944, "loss": 1.8887, "step": 338, "text_contrastive_loss": 0.9178 }, { "contrastive_loss": 0.8, "epoch": 0.7652370203160271, "grad_norm": 17.877992630004883, "learning_rate": 9.566743073510976e-06, "lm_loss": 5.9982, "loss": 1.8333, "step": 339, "text_contrastive_loss": 0.867 }, { "contrastive_loss": 0.725, "epoch": 0.7674943566591422, "grad_norm": 17.114086151123047, "learning_rate": 9.563784019718704e-06, "lm_loss": 6.0061, "loss": 1.6668, "step": 340, "text_contrastive_loss": 0.6822 }, { "contrastive_loss": 0.7336, "epoch": 0.7697516930022573, "grad_norm": 17.13675308227539, "learning_rate": 9.560815356245875e-06, "lm_loss": 5.9689, "loss": 1.7971, "step": 341, "text_contrastive_loss": 0.9332 }, { "contrastive_loss": 0.6919, "epoch": 0.7720090293453724, "grad_norm": 17.996566772460938, "learning_rate": 9.557837089343424e-06, "lm_loss": 6.0123, "loss": 1.7188, "step": 342, "text_contrastive_loss": 0.8514 }, { "contrastive_loss": 0.795, "epoch": 0.7742663656884876, "grad_norm": 18.684648513793945, "learning_rate": 9.554849225282503e-06, "lm_loss": 5.9597, "loss": 1.8536, "step": 343, "text_contrastive_loss": 0.9251 }, { "contrastive_loss": 0.7855, "epoch": 0.7765237020316027, "grad_norm": 20.135141372680664, "learning_rate": 9.551851770354477e-06, "lm_loss": 6.0397, "loss": 1.8296, "step": 344, "text_contrastive_loss": 0.8803 }, { "contrastive_loss": 0.7646, "epoch": 0.7787810383747178, "grad_norm": 18.024913787841797, "learning_rate": 9.548844730870903e-06, "lm_loss": 5.933, "loss": 1.82, "step": 345, "text_contrastive_loss": 0.9243 }, { "contrastive_loss": 0.8148, "epoch": 0.781038374717833, "grad_norm": 19.366764068603516, "learning_rate": 9.545828113163516e-06, "lm_loss": 6.0345, "loss": 1.9181, "step": 346, "text_contrastive_loss": 0.9997 }, { "contrastive_loss": 0.6809, "epoch": 0.7832957110609481, "grad_norm": 16.887325286865234, "learning_rate": 9.542801923584228e-06, "lm_loss": 6.0031, "loss": 1.7064, "step": 347, "text_contrastive_loss": 0.8503 }, { "contrastive_loss": 0.8188, "epoch": 0.7855530474040632, "grad_norm": 19.915956497192383, "learning_rate": 9.5397661685051e-06, "lm_loss": 5.9961, "loss": 1.8996, "step": 348, "text_contrastive_loss": 0.9623 }, { "contrastive_loss": 0.8719, "epoch": 0.7878103837471784, "grad_norm": 17.26915168762207, "learning_rate": 9.536720854318333e-06, "lm_loss": 5.9717, "loss": 1.9456, "step": 349, "text_contrastive_loss": 0.9532 }, { "contrastive_loss": 0.7545, "epoch": 0.7900677200902935, "grad_norm": 19.145780563354492, "learning_rate": 9.533665987436262e-06, "lm_loss": 6.1077, "loss": 1.7845, "step": 350, "text_contrastive_loss": 0.8383 }, { "contrastive_loss": 0.71, "epoch": 0.7923250564334086, "grad_norm": 18.116910934448242, "learning_rate": 9.530601574291331e-06, "lm_loss": 5.972, "loss": 1.7411, "step": 351, "text_contrastive_loss": 0.8678 }, { "contrastive_loss": 0.8342, "epoch": 0.7945823927765236, "grad_norm": 22.180986404418945, "learning_rate": 9.527527621336087e-06, "lm_loss": 6.0789, "loss": 1.9307, "step": 352, "text_contrastive_loss": 0.9772 }, { "contrastive_loss": 0.8068, "epoch": 0.7968397291196389, "grad_norm": 17.729379653930664, "learning_rate": 9.524444135043168e-06, "lm_loss": 6.0396, "loss": 1.9457, "step": 353, "text_contrastive_loss": 1.0698 }, { "contrastive_loss": 0.6485, "epoch": 0.7990970654627539, "grad_norm": 16.314897537231445, "learning_rate": 9.521351121905278e-06, "lm_loss": 5.8872, "loss": 1.6582, "step": 354, "text_contrastive_loss": 0.842 }, { "contrastive_loss": 0.819, "epoch": 0.801354401805869, "grad_norm": 18.633771896362305, "learning_rate": 9.518248588435185e-06, "lm_loss": 6.0862, "loss": 1.8484, "step": 355, "text_contrastive_loss": 0.8414 }, { "contrastive_loss": 0.8041, "epoch": 0.8036117381489842, "grad_norm": 17.356449127197266, "learning_rate": 9.515136541165708e-06, "lm_loss": 5.9713, "loss": 1.9172, "step": 356, "text_contrastive_loss": 1.0319 }, { "contrastive_loss": 0.7013, "epoch": 0.8058690744920993, "grad_norm": 17.476306915283203, "learning_rate": 9.512014986649691e-06, "lm_loss": 6.0015, "loss": 1.6744, "step": 357, "text_contrastive_loss": 0.7459 }, { "contrastive_loss": 0.6759, "epoch": 0.8081264108352144, "grad_norm": 17.530406951904297, "learning_rate": 9.50888393146e-06, "lm_loss": 5.9793, "loss": 1.7313, "step": 358, "text_contrastive_loss": 0.9151 }, { "contrastive_loss": 0.7349, "epoch": 0.8103837471783296, "grad_norm": 17.47347640991211, "learning_rate": 9.50574338218951e-06, "lm_loss": 5.8924, "loss": 1.7656, "step": 359, "text_contrastive_loss": 0.883 }, { "contrastive_loss": 0.7602, "epoch": 0.8126410835214447, "grad_norm": 17.209794998168945, "learning_rate": 9.502593345451078e-06, "lm_loss": 5.8913, "loss": 1.8603, "step": 360, "text_contrastive_loss": 1.0221 }, { "contrastive_loss": 0.5707, "epoch": 0.8148984198645598, "grad_norm": 15.557596206665039, "learning_rate": 9.499433827877547e-06, "lm_loss": 5.9123, "loss": 1.5769, "step": 361, "text_contrastive_loss": 0.83 }, { "contrastive_loss": 0.7045, "epoch": 0.8171557562076749, "grad_norm": 17.717817306518555, "learning_rate": 9.49626483612172e-06, "lm_loss": 5.971, "loss": 1.7505, "step": 362, "text_contrastive_loss": 0.8978 }, { "contrastive_loss": 0.7613, "epoch": 0.8194130925507901, "grad_norm": 18.573936462402344, "learning_rate": 9.493086376856346e-06, "lm_loss": 5.97, "loss": 1.8467, "step": 363, "text_contrastive_loss": 0.9768 }, { "contrastive_loss": 0.7302, "epoch": 0.8216704288939052, "grad_norm": 19.44474983215332, "learning_rate": 9.489898456774116e-06, "lm_loss": 5.9779, "loss": 1.751, "step": 364, "text_contrastive_loss": 0.846 }, { "contrastive_loss": 0.7228, "epoch": 0.8239277652370203, "grad_norm": 16.996919631958008, "learning_rate": 9.486701082587635e-06, "lm_loss": 5.9198, "loss": 1.6931, "step": 365, "text_contrastive_loss": 0.7567 }, { "contrastive_loss": 0.7575, "epoch": 0.8261851015801355, "grad_norm": 19.95162010192871, "learning_rate": 9.483494261029418e-06, "lm_loss": 5.9313, "loss": 1.8519, "step": 366, "text_contrastive_loss": 1.0027 }, { "contrastive_loss": 0.7983, "epoch": 0.8284424379232506, "grad_norm": 21.023330688476562, "learning_rate": 9.480277998851875e-06, "lm_loss": 5.941, "loss": 1.8368, "step": 367, "text_contrastive_loss": 0.8888 }, { "contrastive_loss": 1.0023, "epoch": 0.8306997742663657, "grad_norm": 24.805105209350586, "learning_rate": 9.47705230282729e-06, "lm_loss": 5.915, "loss": 2.1282, "step": 368, "text_contrastive_loss": 1.0688 }, { "contrastive_loss": 0.6559, "epoch": 0.8329571106094809, "grad_norm": 17.166223526000977, "learning_rate": 9.473817179747815e-06, "lm_loss": 5.921, "loss": 1.6759, "step": 369, "text_contrastive_loss": 0.8558 }, { "contrastive_loss": 0.8761, "epoch": 0.835214446952596, "grad_norm": 19.827442169189453, "learning_rate": 9.470572636425451e-06, "lm_loss": 5.8163, "loss": 1.934, "step": 370, "text_contrastive_loss": 0.9525 }, { "contrastive_loss": 0.7552, "epoch": 0.837471783295711, "grad_norm": 18.570728302001953, "learning_rate": 9.467318679692031e-06, "lm_loss": 6.1239, "loss": 1.7529, "step": 371, "text_contrastive_loss": 0.7706 }, { "contrastive_loss": 0.6568, "epoch": 0.8397291196388262, "grad_norm": 15.911295890808105, "learning_rate": 9.464055316399217e-06, "lm_loss": 6.0501, "loss": 1.6502, "step": 372, "text_contrastive_loss": 0.7767 }, { "contrastive_loss": 0.6174, "epoch": 0.8419864559819413, "grad_norm": 17.889629364013672, "learning_rate": 9.46078255341847e-06, "lm_loss": 5.9044, "loss": 1.5875, "step": 373, "text_contrastive_loss": 0.7594 }, { "contrastive_loss": 0.8546, "epoch": 0.8442437923250564, "grad_norm": 20.568992614746094, "learning_rate": 9.457500397641049e-06, "lm_loss": 6.0276, "loss": 1.9056, "step": 374, "text_contrastive_loss": 0.8965 }, { "contrastive_loss": 0.6998, "epoch": 0.8465011286681715, "grad_norm": 19.322673797607422, "learning_rate": 9.454208855977986e-06, "lm_loss": 5.9043, "loss": 1.657, "step": 375, "text_contrastive_loss": 0.7336 }, { "contrastive_loss": 0.7738, "epoch": 0.8487584650112867, "grad_norm": 20.86590576171875, "learning_rate": 9.450907935360081e-06, "lm_loss": 5.931, "loss": 1.7915, "step": 376, "text_contrastive_loss": 0.8493 }, { "contrastive_loss": 0.7651, "epoch": 0.8510158013544018, "grad_norm": 17.621313095092773, "learning_rate": 9.447597642737878e-06, "lm_loss": 6.0016, "loss": 1.8215, "step": 377, "text_contrastive_loss": 0.9126 }, { "contrastive_loss": 0.7374, "epoch": 0.8532731376975169, "grad_norm": 18.98619842529297, "learning_rate": 9.44427798508166e-06, "lm_loss": 5.9121, "loss": 1.8098, "step": 378, "text_contrastive_loss": 0.9625 }, { "contrastive_loss": 0.7457, "epoch": 0.8555304740406321, "grad_norm": 19.90561866760254, "learning_rate": 9.440948969381425e-06, "lm_loss": 5.9216, "loss": 1.7157, "step": 379, "text_contrastive_loss": 0.7558 }, { "contrastive_loss": 0.7678, "epoch": 0.8577878103837472, "grad_norm": 18.853580474853516, "learning_rate": 9.437610602646878e-06, "lm_loss": 5.9362, "loss": 1.8166, "step": 380, "text_contrastive_loss": 0.9103 }, { "contrastive_loss": 0.6174, "epoch": 0.8600451467268623, "grad_norm": 16.593408584594727, "learning_rate": 9.434262891907413e-06, "lm_loss": 5.9121, "loss": 1.6502, "step": 381, "text_contrastive_loss": 0.883 }, { "contrastive_loss": 0.7372, "epoch": 0.8623024830699775, "grad_norm": 19.423614501953125, "learning_rate": 9.430905844212102e-06, "lm_loss": 5.9725, "loss": 1.7701, "step": 382, "text_contrastive_loss": 0.8712 }, { "contrastive_loss": 0.7376, "epoch": 0.8645598194130926, "grad_norm": 18.79985809326172, "learning_rate": 9.427539466629672e-06, "lm_loss": 5.8906, "loss": 1.7281, "step": 383, "text_contrastive_loss": 0.8029 }, { "contrastive_loss": 0.6192, "epoch": 0.8668171557562077, "grad_norm": 16.10624122619629, "learning_rate": 9.424163766248499e-06, "lm_loss": 5.9218, "loss": 1.6325, "step": 384, "text_contrastive_loss": 0.8423 }, { "contrastive_loss": 0.8226, "epoch": 0.8690744920993227, "grad_norm": 19.454587936401367, "learning_rate": 9.420778750176588e-06, "lm_loss": 5.9127, "loss": 1.8905, "step": 385, "text_contrastive_loss": 0.9533 }, { "contrastive_loss": 0.8053, "epoch": 0.871331828442438, "grad_norm": 19.90314483642578, "learning_rate": 9.41738442554156e-06, "lm_loss": 5.8353, "loss": 1.8316, "step": 386, "text_contrastive_loss": 0.8856 }, { "contrastive_loss": 0.9067, "epoch": 0.873589164785553, "grad_norm": 20.5092830657959, "learning_rate": 9.41398079949064e-06, "lm_loss": 5.97, "loss": 1.9714, "step": 387, "text_contrastive_loss": 0.9354 }, { "contrastive_loss": 0.7665, "epoch": 0.8758465011286681, "grad_norm": 19.935422897338867, "learning_rate": 9.41056787919063e-06, "lm_loss": 5.7969, "loss": 1.7965, "step": 388, "text_contrastive_loss": 0.9006 }, { "contrastive_loss": 0.7677, "epoch": 0.8781038374717833, "grad_norm": 18.127853393554688, "learning_rate": 9.407145671827909e-06, "lm_loss": 5.9307, "loss": 1.7778, "step": 389, "text_contrastive_loss": 0.8342 }, { "contrastive_loss": 0.7298, "epoch": 0.8803611738148984, "grad_norm": 17.655113220214844, "learning_rate": 9.403714184608411e-06, "lm_loss": 5.7757, "loss": 1.7349, "step": 390, "text_contrastive_loss": 0.8551 }, { "contrastive_loss": 0.8102, "epoch": 0.8826185101580135, "grad_norm": 18.176137924194336, "learning_rate": 9.400273424757607e-06, "lm_loss": 6.0719, "loss": 1.8673, "step": 391, "text_contrastive_loss": 0.8999 }, { "contrastive_loss": 0.6804, "epoch": 0.8848758465011287, "grad_norm": 19.85501480102539, "learning_rate": 9.396823399520495e-06, "lm_loss": 5.861, "loss": 1.6786, "step": 392, "text_contrastive_loss": 0.8241 }, { "contrastive_loss": 0.7525, "epoch": 0.8871331828442438, "grad_norm": 18.777780532836914, "learning_rate": 9.393364116161582e-06, "lm_loss": 5.9507, "loss": 1.8802, "step": 393, "text_contrastive_loss": 1.0652 }, { "contrastive_loss": 0.6989, "epoch": 0.8893905191873589, "grad_norm": 17.526351928710938, "learning_rate": 9.38989558196487e-06, "lm_loss": 5.8805, "loss": 1.7488, "step": 394, "text_contrastive_loss": 0.9238 }, { "contrastive_loss": 0.8578, "epoch": 0.891647855530474, "grad_norm": 19.801944732666016, "learning_rate": 9.386417804233836e-06, "lm_loss": 5.9629, "loss": 1.9293, "step": 395, "text_contrastive_loss": 0.9504 }, { "contrastive_loss": 0.7496, "epoch": 0.8939051918735892, "grad_norm": 18.684749603271484, "learning_rate": 9.382930790291426e-06, "lm_loss": 5.8636, "loss": 1.7431, "step": 396, "text_contrastive_loss": 0.8142 }, { "contrastive_loss": 0.6452, "epoch": 0.8961625282167043, "grad_norm": 14.384578704833984, "learning_rate": 9.37943454748003e-06, "lm_loss": 6.0057, "loss": 1.6747, "step": 397, "text_contrastive_loss": 0.8579 }, { "contrastive_loss": 0.6706, "epoch": 0.8984198645598194, "grad_norm": 16.447463989257812, "learning_rate": 9.375929083161475e-06, "lm_loss": 5.9143, "loss": 1.6825, "step": 398, "text_contrastive_loss": 0.8411 }, { "contrastive_loss": 0.6437, "epoch": 0.9006772009029346, "grad_norm": 16.03485870361328, "learning_rate": 9.372414404717001e-06, "lm_loss": 5.8768, "loss": 1.6824, "step": 399, "text_contrastive_loss": 0.902 }, { "contrastive_loss": 0.7504, "epoch": 0.9029345372460497, "grad_norm": 17.238916397094727, "learning_rate": 9.36889051954725e-06, "lm_loss": 5.8913, "loss": 1.7632, "step": 400, "text_contrastive_loss": 0.8474 }, { "contrastive_loss": 0.7562, "epoch": 0.9051918735891648, "grad_norm": 18.555673599243164, "learning_rate": 9.365357435072255e-06, "lm_loss": 5.9769, "loss": 1.8466, "step": 401, "text_contrastive_loss": 0.9854 }, { "contrastive_loss": 0.7598, "epoch": 0.90744920993228, "grad_norm": 17.139881134033203, "learning_rate": 9.361815158731413e-06, "lm_loss": 5.8767, "loss": 1.7825, "step": 402, "text_contrastive_loss": 0.8701 }, { "contrastive_loss": 0.7891, "epoch": 0.909706546275395, "grad_norm": 19.237648010253906, "learning_rate": 9.358263697983479e-06, "lm_loss": 5.974, "loss": 1.849, "step": 403, "text_contrastive_loss": 0.9252 }, { "contrastive_loss": 0.73, "epoch": 0.9119638826185101, "grad_norm": 17.79875373840332, "learning_rate": 9.354703060306546e-06, "lm_loss": 5.8671, "loss": 1.7131, "step": 404, "text_contrastive_loss": 0.7927 }, { "contrastive_loss": 0.7431, "epoch": 0.9142212189616253, "grad_norm": 17.295480728149414, "learning_rate": 9.351133253198027e-06, "lm_loss": 5.941, "loss": 1.8208, "step": 405, "text_contrastive_loss": 0.9671 }, { "contrastive_loss": 0.7145, "epoch": 0.9164785553047404, "grad_norm": 17.430517196655273, "learning_rate": 9.347554284174654e-06, "lm_loss": 5.9689, "loss": 1.8182, "step": 406, "text_contrastive_loss": 1.0136 }, { "contrastive_loss": 0.7077, "epoch": 0.9187358916478555, "grad_norm": 16.849336624145508, "learning_rate": 9.343966160772438e-06, "lm_loss": 5.9726, "loss": 1.722, "step": 407, "text_contrastive_loss": 0.8341 }, { "contrastive_loss": 0.7493, "epoch": 0.9209932279909706, "grad_norm": 15.629685401916504, "learning_rate": 9.340368890546672e-06, "lm_loss": 5.8878, "loss": 1.7794, "step": 408, "text_contrastive_loss": 0.8826 }, { "contrastive_loss": 0.652, "epoch": 0.9232505643340858, "grad_norm": 16.84798812866211, "learning_rate": 9.336762481071906e-06, "lm_loss": 5.9996, "loss": 1.6227, "step": 409, "text_contrastive_loss": 0.7415 }, { "contrastive_loss": 0.868, "epoch": 0.9255079006772009, "grad_norm": 19.487653732299805, "learning_rate": 9.333146939941938e-06, "lm_loss": 5.8464, "loss": 1.9711, "step": 410, "text_contrastive_loss": 1.037 }, { "contrastive_loss": 0.6776, "epoch": 0.927765237020316, "grad_norm": 16.648977279663086, "learning_rate": 9.329522274769791e-06, "lm_loss": 5.8577, "loss": 1.6416, "step": 411, "text_contrastive_loss": 0.7565 }, { "contrastive_loss": 0.7495, "epoch": 0.9300225733634312, "grad_norm": 18.50621223449707, "learning_rate": 9.325888493187699e-06, "lm_loss": 5.8654, "loss": 1.7748, "step": 412, "text_contrastive_loss": 0.8775 }, { "contrastive_loss": 0.6992, "epoch": 0.9322799097065463, "grad_norm": 19.02260971069336, "learning_rate": 9.322245602847094e-06, "lm_loss": 5.8865, "loss": 1.734, "step": 413, "text_contrastive_loss": 0.8923 }, { "contrastive_loss": 0.6929, "epoch": 0.9345372460496614, "grad_norm": 19.366676330566406, "learning_rate": 9.31859361141859e-06, "lm_loss": 5.9832, "loss": 1.7528, "step": 414, "text_contrastive_loss": 0.9231 }, { "contrastive_loss": 0.6418, "epoch": 0.9367945823927766, "grad_norm": 16.030540466308594, "learning_rate": 9.314932526591956e-06, "lm_loss": 5.9664, "loss": 1.6605, "step": 415, "text_contrastive_loss": 0.8441 }, { "contrastive_loss": 0.6763, "epoch": 0.9390519187358917, "grad_norm": 16.432239532470703, "learning_rate": 9.311262356076118e-06, "lm_loss": 5.9031, "loss": 1.7403, "step": 416, "text_contrastive_loss": 0.9474 }, { "contrastive_loss": 0.7221, "epoch": 0.9413092550790068, "grad_norm": 19.767297744750977, "learning_rate": 9.30758310759913e-06, "lm_loss": 5.8982, "loss": 1.7693, "step": 417, "text_contrastive_loss": 0.9148 }, { "contrastive_loss": 0.7386, "epoch": 0.9435665914221218, "grad_norm": 18.91975212097168, "learning_rate": 9.303894788908158e-06, "lm_loss": 5.7443, "loss": 1.7289, "step": 418, "text_contrastive_loss": 0.8318 }, { "contrastive_loss": 0.9391, "epoch": 0.945823927765237, "grad_norm": 20.29686737060547, "learning_rate": 9.300197407769472e-06, "lm_loss": 6.0597, "loss": 1.9885, "step": 419, "text_contrastive_loss": 0.887 }, { "contrastive_loss": 0.6991, "epoch": 0.9480812641083521, "grad_norm": 18.48418617248535, "learning_rate": 9.296490971968416e-06, "lm_loss": 5.9284, "loss": 1.6989, "step": 420, "text_contrastive_loss": 0.8139 }, { "contrastive_loss": 0.7461, "epoch": 0.9503386004514672, "grad_norm": 17.921653747558594, "learning_rate": 9.292775489309409e-06, "lm_loss": 5.8958, "loss": 1.7633, "step": 421, "text_contrastive_loss": 0.8552 }, { "contrastive_loss": 0.6148, "epoch": 0.9525959367945824, "grad_norm": 16.497175216674805, "learning_rate": 9.289050967615914e-06, "lm_loss": 5.9521, "loss": 1.6272, "step": 422, "text_contrastive_loss": 0.8344 }, { "contrastive_loss": 0.7885, "epoch": 0.9548532731376975, "grad_norm": 16.786640167236328, "learning_rate": 9.285317414730427e-06, "lm_loss": 5.8784, "loss": 1.7938, "step": 423, "text_contrastive_loss": 0.8349 }, { "contrastive_loss": 0.7501, "epoch": 0.9571106094808126, "grad_norm": 17.82651710510254, "learning_rate": 9.281574838514464e-06, "lm_loss": 5.9497, "loss": 1.7509, "step": 424, "text_contrastive_loss": 0.8116 }, { "contrastive_loss": 0.7176, "epoch": 0.9593679458239278, "grad_norm": 16.84232521057129, "learning_rate": 9.277823246848537e-06, "lm_loss": 5.9303, "loss": 1.7595, "step": 425, "text_contrastive_loss": 0.8978 }, { "contrastive_loss": 0.8188, "epoch": 0.9616252821670429, "grad_norm": 19.891075134277344, "learning_rate": 9.274062647632144e-06, "lm_loss": 5.8945, "loss": 1.9416, "step": 426, "text_contrastive_loss": 1.0667 }, { "contrastive_loss": 0.7146, "epoch": 0.963882618510158, "grad_norm": 17.92348289489746, "learning_rate": 9.270293048783747e-06, "lm_loss": 5.9633, "loss": 1.6935, "step": 427, "text_contrastive_loss": 0.7651 }, { "contrastive_loss": 0.6258, "epoch": 0.9661399548532731, "grad_norm": 17.62327003479004, "learning_rate": 9.266514458240762e-06, "lm_loss": 5.9171, "loss": 1.5839, "step": 428, "text_contrastive_loss": 0.7328 }, { "contrastive_loss": 0.7873, "epoch": 0.9683972911963883, "grad_norm": 19.740936279296875, "learning_rate": 9.262726883959535e-06, "lm_loss": 5.8752, "loss": 1.773, "step": 429, "text_contrastive_loss": 0.7965 }, { "contrastive_loss": 0.7144, "epoch": 0.9706546275395034, "grad_norm": 16.983598709106445, "learning_rate": 9.258930333915325e-06, "lm_loss": 5.8795, "loss": 1.7912, "step": 430, "text_contrastive_loss": 0.9778 }, { "contrastive_loss": 0.6559, "epoch": 0.9729119638826185, "grad_norm": 17.392724990844727, "learning_rate": 9.2551248161023e-06, "lm_loss": 5.762, "loss": 1.6471, "step": 431, "text_contrastive_loss": 0.8301 }, { "contrastive_loss": 0.6605, "epoch": 0.9751693002257337, "grad_norm": 17.272245407104492, "learning_rate": 9.251310338533504e-06, "lm_loss": 5.8766, "loss": 1.7141, "step": 432, "text_contrastive_loss": 0.9318 }, { "contrastive_loss": 0.7755, "epoch": 0.9774266365688488, "grad_norm": 18.896547317504883, "learning_rate": 9.247486909240849e-06, "lm_loss": 6.0408, "loss": 1.8467, "step": 433, "text_contrastive_loss": 0.9341 }, { "contrastive_loss": 0.745, "epoch": 0.9796839729119639, "grad_norm": 20.267478942871094, "learning_rate": 9.243654536275095e-06, "lm_loss": 5.9013, "loss": 1.7646, "step": 434, "text_contrastive_loss": 0.859 }, { "contrastive_loss": 0.7923, "epoch": 0.981941309255079, "grad_norm": 18.046537399291992, "learning_rate": 9.23981322770584e-06, "lm_loss": 5.8208, "loss": 1.8683, "step": 435, "text_contrastive_loss": 0.9878 }, { "contrastive_loss": 0.7119, "epoch": 0.9841986455981941, "grad_norm": 17.922704696655273, "learning_rate": 9.235962991621484e-06, "lm_loss": 5.933, "loss": 1.8701, "step": 436, "text_contrastive_loss": 1.1298 }, { "contrastive_loss": 0.8629, "epoch": 0.9864559819413092, "grad_norm": 18.518964767456055, "learning_rate": 9.232103836129239e-06, "lm_loss": 5.8936, "loss": 1.8983, "step": 437, "text_contrastive_loss": 0.8921 }, { "contrastive_loss": 0.8594, "epoch": 0.9887133182844243, "grad_norm": 20.471054077148438, "learning_rate": 9.22823576935509e-06, "lm_loss": 5.873, "loss": 1.8687, "step": 438, "text_contrastive_loss": 0.844 }, { "contrastive_loss": 0.6949, "epoch": 0.9909706546275395, "grad_norm": 19.415773391723633, "learning_rate": 9.224358799443791e-06, "lm_loss": 5.7932, "loss": 1.6119, "step": 439, "text_contrastive_loss": 0.6754 }, { "contrastive_loss": 0.7808, "epoch": 0.9932279909706546, "grad_norm": 17.44244384765625, "learning_rate": 9.220472934558838e-06, "lm_loss": 5.8916, "loss": 1.8226, "step": 440, "text_contrastive_loss": 0.9054 }, { "contrastive_loss": 0.5842, "epoch": 0.9954853273137697, "grad_norm": 15.602773666381836, "learning_rate": 9.216578182882459e-06, "lm_loss": 5.8873, "loss": 1.6194, "step": 441, "text_contrastive_loss": 0.8929 }, { "contrastive_loss": 0.7421, "epoch": 0.9977426636568849, "grad_norm": 18.081762313842773, "learning_rate": 9.212674552615594e-06, "lm_loss": 5.8661, "loss": 1.7894, "step": 442, "text_contrastive_loss": 0.9215 }, { "contrastive_loss": 0.5515, "epoch": 1.0, "grad_norm": 24.072240829467773, "learning_rate": 9.208762051977879e-06, "lm_loss": 5.969, "loss": 1.4913, "step": 443, "text_contrastive_loss": 0.6858 }, { "contrastive_loss": 0.6564, "epoch": 1.002257336343115, "grad_norm": 16.146533966064453, "learning_rate": 9.204840689207626e-06, "lm_loss": 5.8738, "loss": 1.679, "step": 444, "text_contrastive_loss": 0.8705 }, { "contrastive_loss": 0.7222, "epoch": 1.0045146726862302, "grad_norm": 16.364309310913086, "learning_rate": 9.20091047256181e-06, "lm_loss": 5.7891, "loss": 1.7542, "step": 445, "text_contrastive_loss": 0.9063 }, { "contrastive_loss": 0.7453, "epoch": 1.0067720090293453, "grad_norm": 18.12066650390625, "learning_rate": 9.196971410316047e-06, "lm_loss": 5.8606, "loss": 1.7919, "step": 446, "text_contrastive_loss": 0.921 }, { "contrastive_loss": 0.6281, "epoch": 1.0090293453724606, "grad_norm": 15.592330932617188, "learning_rate": 9.193023510764578e-06, "lm_loss": 5.7501, "loss": 1.6208, "step": 447, "text_contrastive_loss": 0.8353 }, { "contrastive_loss": 0.6958, "epoch": 1.0112866817155757, "grad_norm": 16.594594955444336, "learning_rate": 9.189066782220253e-06, "lm_loss": 5.9351, "loss": 1.781, "step": 448, "text_contrastive_loss": 0.9834 }, { "contrastive_loss": 0.6019, "epoch": 1.0135440180586908, "grad_norm": 14.94572925567627, "learning_rate": 9.185101233014516e-06, "lm_loss": 5.8615, "loss": 1.6711, "step": 449, "text_contrastive_loss": 0.966 }, { "contrastive_loss": 0.6496, "epoch": 1.0158013544018059, "grad_norm": 15.78742790222168, "learning_rate": 9.181126871497378e-06, "lm_loss": 5.9094, "loss": 1.6736, "step": 450, "text_contrastive_loss": 0.8662 }, { "contrastive_loss": 0.5917, "epoch": 1.018058690744921, "grad_norm": 16.283035278320312, "learning_rate": 9.177143706037411e-06, "lm_loss": 5.8957, "loss": 1.6316, "step": 451, "text_contrastive_loss": 0.9006 }, { "contrastive_loss": 0.5687, "epoch": 1.020316027088036, "grad_norm": 16.268436431884766, "learning_rate": 9.173151745021722e-06, "lm_loss": 5.8449, "loss": 1.5149, "step": 452, "text_contrastive_loss": 0.7233 }, { "contrastive_loss": 0.597, "epoch": 1.0225733634311513, "grad_norm": 17.817989349365234, "learning_rate": 9.169150996855939e-06, "lm_loss": 5.9054, "loss": 1.5376, "step": 453, "text_contrastive_loss": 0.7 }, { "contrastive_loss": 0.6642, "epoch": 1.0248306997742664, "grad_norm": 17.480836868286133, "learning_rate": 9.16514146996419e-06, "lm_loss": 5.8982, "loss": 1.6826, "step": 454, "text_contrastive_loss": 0.8571 }, { "contrastive_loss": 0.5013, "epoch": 1.0270880361173815, "grad_norm": 13.058002471923828, "learning_rate": 9.161123172789091e-06, "lm_loss": 5.8368, "loss": 1.4934, "step": 455, "text_contrastive_loss": 0.8169 }, { "contrastive_loss": 0.6949, "epoch": 1.0293453724604966, "grad_norm": 17.163982391357422, "learning_rate": 9.157096113791727e-06, "lm_loss": 5.8088, "loss": 1.7878, "step": 456, "text_contrastive_loss": 1.0242 }, { "contrastive_loss": 0.7183, "epoch": 1.0316027088036117, "grad_norm": 15.776236534118652, "learning_rate": 9.153060301451629e-06, "lm_loss": 5.8989, "loss": 1.7657, "step": 457, "text_contrastive_loss": 0.9149 }, { "contrastive_loss": 0.7033, "epoch": 1.0338600451467268, "grad_norm": 16.365798950195312, "learning_rate": 9.149015744266759e-06, "lm_loss": 5.8877, "loss": 1.697, "step": 458, "text_contrastive_loss": 0.8098 }, { "contrastive_loss": 0.7502, "epoch": 1.036117381489842, "grad_norm": 20.798824310302734, "learning_rate": 9.144962450753491e-06, "lm_loss": 5.8439, "loss": 1.7898, "step": 459, "text_contrastive_loss": 0.9104 }, { "contrastive_loss": 0.8053, "epoch": 1.0383747178329572, "grad_norm": 19.01142120361328, "learning_rate": 9.140900429446601e-06, "lm_loss": 5.7618, "loss": 1.8092, "step": 460, "text_contrastive_loss": 0.8555 }, { "contrastive_loss": 0.7128, "epoch": 1.0406320541760723, "grad_norm": 17.179027557373047, "learning_rate": 9.136829688899236e-06, "lm_loss": 5.8604, "loss": 1.7158, "step": 461, "text_contrastive_loss": 0.8338 }, { "contrastive_loss": 0.6353, "epoch": 1.0428893905191874, "grad_norm": 17.52668571472168, "learning_rate": 9.132750237682907e-06, "lm_loss": 5.8812, "loss": 1.578, "step": 462, "text_contrastive_loss": 0.7092 }, { "contrastive_loss": 0.7385, "epoch": 1.0451467268623025, "grad_norm": 17.666667938232422, "learning_rate": 9.128662084387462e-06, "lm_loss": 5.7808, "loss": 1.7409, "step": 463, "text_contrastive_loss": 0.8487 }, { "contrastive_loss": 0.6852, "epoch": 1.0474040632054176, "grad_norm": 17.20553207397461, "learning_rate": 9.12456523762108e-06, "lm_loss": 5.8279, "loss": 1.6443, "step": 464, "text_contrastive_loss": 0.7527 }, { "contrastive_loss": 0.6127, "epoch": 1.0496613995485327, "grad_norm": 15.813065528869629, "learning_rate": 9.120459706010233e-06, "lm_loss": 5.9009, "loss": 1.5875, "step": 465, "text_contrastive_loss": 0.7696 }, { "contrastive_loss": 0.579, "epoch": 1.0519187358916477, "grad_norm": 15.512067794799805, "learning_rate": 9.116345498199693e-06, "lm_loss": 5.8277, "loss": 1.5727, "step": 466, "text_contrastive_loss": 0.8218 }, { "contrastive_loss": 0.6469, "epoch": 1.054176072234763, "grad_norm": 17.455263137817383, "learning_rate": 9.112222622852494e-06, "lm_loss": 5.9135, "loss": 1.7021, "step": 467, "text_contrastive_loss": 0.9276 }, { "contrastive_loss": 0.6732, "epoch": 1.0564334085778782, "grad_norm": 15.996644020080566, "learning_rate": 9.108091088649922e-06, "lm_loss": 5.8312, "loss": 1.6661, "step": 468, "text_contrastive_loss": 0.8195 }, { "contrastive_loss": 0.6767, "epoch": 1.0586907449209932, "grad_norm": 18.72588348388672, "learning_rate": 9.103950904291496e-06, "lm_loss": 5.6876, "loss": 1.7071, "step": 469, "text_contrastive_loss": 0.9233 }, { "contrastive_loss": 0.5908, "epoch": 1.0609480812641083, "grad_norm": 15.662790298461914, "learning_rate": 9.099802078494947e-06, "lm_loss": 5.7395, "loss": 1.5848, "step": 470, "text_contrastive_loss": 0.8401 }, { "contrastive_loss": 0.5719, "epoch": 1.0632054176072234, "grad_norm": 14.050433158874512, "learning_rate": 9.095644619996206e-06, "lm_loss": 5.9394, "loss": 1.6242, "step": 471, "text_contrastive_loss": 0.9167 }, { "contrastive_loss": 0.6776, "epoch": 1.0654627539503385, "grad_norm": 16.951717376708984, "learning_rate": 9.09147853754938e-06, "lm_loss": 5.8241, "loss": 1.7083, "step": 472, "text_contrastive_loss": 0.8966 }, { "contrastive_loss": 0.6857, "epoch": 1.0677200902934538, "grad_norm": 18.096969604492188, "learning_rate": 9.087303839926727e-06, "lm_loss": 5.7433, "loss": 1.7767, "step": 473, "text_contrastive_loss": 1.0333 }, { "contrastive_loss": 0.6358, "epoch": 1.069977426636569, "grad_norm": 15.680360794067383, "learning_rate": 9.08312053591866e-06, "lm_loss": 5.8963, "loss": 1.6427, "step": 474, "text_contrastive_loss": 0.8345 }, { "contrastive_loss": 0.647, "epoch": 1.072234762979684, "grad_norm": 16.32209014892578, "learning_rate": 9.0789286343337e-06, "lm_loss": 5.8095, "loss": 1.7041, "step": 475, "text_contrastive_loss": 0.9523 }, { "contrastive_loss": 0.6131, "epoch": 1.074492099322799, "grad_norm": 15.483674049377441, "learning_rate": 9.07472814399848e-06, "lm_loss": 5.8696, "loss": 1.6418, "step": 476, "text_contrastive_loss": 0.8836 }, { "contrastive_loss": 0.7275, "epoch": 1.0767494356659142, "grad_norm": 16.682987213134766, "learning_rate": 9.070519073757717e-06, "lm_loss": 5.8161, "loss": 1.7329, "step": 477, "text_contrastive_loss": 0.8474 }, { "contrastive_loss": 0.7091, "epoch": 1.0790067720090293, "grad_norm": 17.670696258544922, "learning_rate": 9.06630143247419e-06, "lm_loss": 5.8142, "loss": 1.6948, "step": 478, "text_contrastive_loss": 0.8085 }, { "contrastive_loss": 0.6821, "epoch": 1.0812641083521444, "grad_norm": 17.2860107421875, "learning_rate": 9.062075229028728e-06, "lm_loss": 5.8454, "loss": 1.7093, "step": 479, "text_contrastive_loss": 0.8853 }, { "contrastive_loss": 0.7009, "epoch": 1.0835214446952597, "grad_norm": 16.963953018188477, "learning_rate": 9.057840472320192e-06, "lm_loss": 5.7601, "loss": 1.6516, "step": 480, "text_contrastive_loss": 0.7494 }, { "contrastive_loss": 0.603, "epoch": 1.0857787810383748, "grad_norm": 16.2852783203125, "learning_rate": 9.053597171265447e-06, "lm_loss": 5.8464, "loss": 1.6351, "step": 481, "text_contrastive_loss": 0.8949 }, { "contrastive_loss": 0.62, "epoch": 1.0880361173814899, "grad_norm": 17.021404266357422, "learning_rate": 9.04934533479935e-06, "lm_loss": 5.8477, "loss": 1.6446, "step": 482, "text_contrastive_loss": 0.8795 }, { "contrastive_loss": 0.6784, "epoch": 1.090293453724605, "grad_norm": 19.252443313598633, "learning_rate": 9.045084971874738e-06, "lm_loss": 5.8896, "loss": 1.6798, "step": 483, "text_contrastive_loss": 0.825 }, { "contrastive_loss": 0.5168, "epoch": 1.09255079006772, "grad_norm": 16.297832489013672, "learning_rate": 9.040816091462393e-06, "lm_loss": 5.8755, "loss": 1.4908, "step": 484, "text_contrastive_loss": 0.7729 }, { "contrastive_loss": 0.6637, "epoch": 1.0948081264108351, "grad_norm": 16.924846649169922, "learning_rate": 9.036538702551037e-06, "lm_loss": 5.8421, "loss": 1.6557, "step": 485, "text_contrastive_loss": 0.8156 }, { "contrastive_loss": 0.7114, "epoch": 1.0970654627539504, "grad_norm": 18.735641479492188, "learning_rate": 9.032252814147302e-06, "lm_loss": 5.8253, "loss": 1.7244, "step": 486, "text_contrastive_loss": 0.861 }, { "contrastive_loss": 0.7342, "epoch": 1.0993227990970655, "grad_norm": 18.187530517578125, "learning_rate": 9.027958435275726e-06, "lm_loss": 5.9273, "loss": 1.7577, "step": 487, "text_contrastive_loss": 0.8616 }, { "contrastive_loss": 0.618, "epoch": 1.1015801354401806, "grad_norm": 17.452129364013672, "learning_rate": 9.023655574978716e-06, "lm_loss": 5.8096, "loss": 1.6569, "step": 488, "text_contrastive_loss": 0.9159 }, { "contrastive_loss": 0.549, "epoch": 1.1038374717832957, "grad_norm": 15.237523078918457, "learning_rate": 9.019344242316542e-06, "lm_loss": 5.7269, "loss": 1.509, "step": 489, "text_contrastive_loss": 0.7746 }, { "contrastive_loss": 0.6338, "epoch": 1.1060948081264108, "grad_norm": 19.214378356933594, "learning_rate": 9.015024446367315e-06, "lm_loss": 5.9157, "loss": 1.5921, "step": 490, "text_contrastive_loss": 0.7335 }, { "contrastive_loss": 0.6884, "epoch": 1.108352144469526, "grad_norm": 17.360498428344727, "learning_rate": 9.010696196226963e-06, "lm_loss": 5.852, "loss": 1.7197, "step": 491, "text_contrastive_loss": 0.8923 }, { "contrastive_loss": 0.7024, "epoch": 1.110609480812641, "grad_norm": 18.37981414794922, "learning_rate": 9.00635950100922e-06, "lm_loss": 5.7854, "loss": 1.7987, "step": 492, "text_contrastive_loss": 1.0355 }, { "contrastive_loss": 0.688, "epoch": 1.1128668171557563, "grad_norm": 17.151948928833008, "learning_rate": 9.002014369845592e-06, "lm_loss": 5.7959, "loss": 1.7285, "step": 493, "text_contrastive_loss": 0.9218 }, { "contrastive_loss": 0.5828, "epoch": 1.1151241534988714, "grad_norm": 16.890501022338867, "learning_rate": 8.997660811885367e-06, "lm_loss": 5.8493, "loss": 1.6414, "step": 494, "text_contrastive_loss": 0.9474 }, { "contrastive_loss": 0.681, "epoch": 1.1173814898419865, "grad_norm": 16.833452224731445, "learning_rate": 8.993298836295556e-06, "lm_loss": 5.8133, "loss": 1.6957, "step": 495, "text_contrastive_loss": 0.8668 }, { "contrastive_loss": 0.5756, "epoch": 1.1196388261851016, "grad_norm": 15.70335578918457, "learning_rate": 8.988928452260909e-06, "lm_loss": 5.7206, "loss": 1.5213, "step": 496, "text_contrastive_loss": 0.7472 }, { "contrastive_loss": 0.5968, "epoch": 1.1218961625282167, "grad_norm": 18.256250381469727, "learning_rate": 8.984549668983875e-06, "lm_loss": 5.8204, "loss": 1.5841, "step": 497, "text_contrastive_loss": 0.8105 }, { "contrastive_loss": 0.6764, "epoch": 1.1241534988713318, "grad_norm": 19.21333885192871, "learning_rate": 8.980162495684587e-06, "lm_loss": 5.8301, "loss": 1.6476, "step": 498, "text_contrastive_loss": 0.7764 }, { "contrastive_loss": 0.688, "epoch": 1.1264108352144468, "grad_norm": 20.066160202026367, "learning_rate": 8.975766941600852e-06, "lm_loss": 5.7774, "loss": 1.6899, "step": 499, "text_contrastive_loss": 0.8483 }, { "contrastive_loss": 0.6225, "epoch": 1.1286681715575622, "grad_norm": 18.304738998413086, "learning_rate": 8.971363015988115e-06, "lm_loss": 5.7485, "loss": 1.6332, "step": 500, "text_contrastive_loss": 0.8717 }, { "contrastive_loss": 0.5801, "epoch": 1.1309255079006773, "grad_norm": 16.779212951660156, "learning_rate": 8.966950728119453e-06, "lm_loss": 5.7688, "loss": 1.5785, "step": 501, "text_contrastive_loss": 0.843 }, { "contrastive_loss": 0.6416, "epoch": 1.1331828442437923, "grad_norm": 16.1234130859375, "learning_rate": 8.962530087285552e-06, "lm_loss": 5.8799, "loss": 1.6927, "step": 502, "text_contrastive_loss": 0.9262 }, { "contrastive_loss": 0.6148, "epoch": 1.1354401805869074, "grad_norm": 17.34881591796875, "learning_rate": 8.958101102794686e-06, "lm_loss": 5.7185, "loss": 1.6059, "step": 503, "text_contrastive_loss": 0.8385 }, { "contrastive_loss": 0.7299, "epoch": 1.1376975169300225, "grad_norm": 20.895261764526367, "learning_rate": 8.953663783972692e-06, "lm_loss": 5.6982, "loss": 1.7532, "step": 504, "text_contrastive_loss": 0.907 }, { "contrastive_loss": 0.6176, "epoch": 1.1399548532731376, "grad_norm": 19.33408546447754, "learning_rate": 8.949218140162965e-06, "lm_loss": 5.8697, "loss": 1.6283, "step": 505, "text_contrastive_loss": 0.8474 }, { "contrastive_loss": 0.6625, "epoch": 1.1422121896162527, "grad_norm": 16.830759048461914, "learning_rate": 8.944764180726423e-06, "lm_loss": 5.8084, "loss": 1.7575, "step": 506, "text_contrastive_loss": 1.0283 }, { "contrastive_loss": 0.7758, "epoch": 1.144469525959368, "grad_norm": 20.973735809326172, "learning_rate": 8.940301915041496e-06, "lm_loss": 5.7669, "loss": 1.8436, "step": 507, "text_contrastive_loss": 0.9822 }, { "contrastive_loss": 0.7011, "epoch": 1.146726862302483, "grad_norm": 17.06911849975586, "learning_rate": 8.935831352504103e-06, "lm_loss": 5.7935, "loss": 1.7367, "step": 508, "text_contrastive_loss": 0.9125 }, { "contrastive_loss": 0.6511, "epoch": 1.1489841986455982, "grad_norm": 16.534908294677734, "learning_rate": 8.931352502527633e-06, "lm_loss": 5.8002, "loss": 1.656, "step": 509, "text_contrastive_loss": 0.8496 }, { "contrastive_loss": 0.6919, "epoch": 1.1512415349887133, "grad_norm": 17.590503692626953, "learning_rate": 8.926865374542928e-06, "lm_loss": 5.8197, "loss": 1.7221, "step": 510, "text_contrastive_loss": 0.8965 }, { "contrastive_loss": 0.6852, "epoch": 1.1534988713318284, "grad_norm": 17.848155975341797, "learning_rate": 8.922369977998257e-06, "lm_loss": 5.7688, "loss": 1.827, "step": 511, "text_contrastive_loss": 1.1299 }, { "contrastive_loss": 0.6549, "epoch": 1.1557562076749435, "grad_norm": 16.373695373535156, "learning_rate": 8.917866322359303e-06, "lm_loss": 5.7991, "loss": 1.6557, "step": 512, "text_contrastive_loss": 0.8419 }, { "contrastive_loss": 0.6713, "epoch": 1.1580135440180588, "grad_norm": 16.844192504882812, "learning_rate": 8.913354417109136e-06, "lm_loss": 5.7205, "loss": 1.605, "step": 513, "text_contrastive_loss": 0.7233 }, { "contrastive_loss": 0.5913, "epoch": 1.1602708803611739, "grad_norm": 16.347095489501953, "learning_rate": 8.908834271748202e-06, "lm_loss": 5.7978, "loss": 1.5581, "step": 514, "text_contrastive_loss": 0.7742 }, { "contrastive_loss": 0.6158, "epoch": 1.162528216704289, "grad_norm": 16.738037109375, "learning_rate": 8.904305895794292e-06, "lm_loss": 5.7616, "loss": 1.5996, "step": 515, "text_contrastive_loss": 0.8154 }, { "contrastive_loss": 0.7822, "epoch": 1.164785553047404, "grad_norm": 19.219724655151367, "learning_rate": 8.899769298782528e-06, "lm_loss": 5.8163, "loss": 1.823, "step": 516, "text_contrastive_loss": 0.9184 }, { "contrastive_loss": 0.6589, "epoch": 1.1670428893905191, "grad_norm": 18.149572372436523, "learning_rate": 8.895224490265346e-06, "lm_loss": 5.8049, "loss": 1.6915, "step": 517, "text_contrastive_loss": 0.9041 }, { "contrastive_loss": 0.58, "epoch": 1.1693002257336342, "grad_norm": 15.932313919067383, "learning_rate": 8.890671479812472e-06, "lm_loss": 5.6878, "loss": 1.5722, "step": 518, "text_contrastive_loss": 0.8469 }, { "contrastive_loss": 0.6245, "epoch": 1.1715575620767495, "grad_norm": 17.646120071411133, "learning_rate": 8.886110277010902e-06, "lm_loss": 5.8012, "loss": 1.7384, "step": 519, "text_contrastive_loss": 1.0677 }, { "contrastive_loss": 0.6592, "epoch": 1.1738148984198646, "grad_norm": 17.75513458251953, "learning_rate": 8.88154089146488e-06, "lm_loss": 5.7272, "loss": 1.7283, "step": 520, "text_contrastive_loss": 0.9926 }, { "contrastive_loss": 0.7284, "epoch": 1.1760722347629797, "grad_norm": 18.321510314941406, "learning_rate": 8.876963332795881e-06, "lm_loss": 5.8515, "loss": 1.8513, "step": 521, "text_contrastive_loss": 1.0755 }, { "contrastive_loss": 0.6343, "epoch": 1.1783295711060948, "grad_norm": 16.36499786376953, "learning_rate": 8.87237761064259e-06, "lm_loss": 5.7822, "loss": 1.6614, "step": 522, "text_contrastive_loss": 0.8979 }, { "contrastive_loss": 0.6272, "epoch": 1.18058690744921, "grad_norm": 16.643741607666016, "learning_rate": 8.867783734660883e-06, "lm_loss": 5.6926, "loss": 1.6266, "step": 523, "text_contrastive_loss": 0.8603 }, { "contrastive_loss": 0.6937, "epoch": 1.182844243792325, "grad_norm": 16.587600708007812, "learning_rate": 8.8631817145238e-06, "lm_loss": 5.7304, "loss": 1.6685, "step": 524, "text_contrastive_loss": 0.8036 }, { "contrastive_loss": 0.5257, "epoch": 1.18510158013544, "grad_norm": 16.325925827026367, "learning_rate": 8.858571559921539e-06, "lm_loss": 5.7365, "loss": 1.4816, "step": 525, "text_contrastive_loss": 0.7644 }, { "contrastive_loss": 0.6224, "epoch": 1.1873589164785554, "grad_norm": 15.408103942871094, "learning_rate": 8.853953280561412e-06, "lm_loss": 5.6735, "loss": 1.6471, "step": 526, "text_contrastive_loss": 0.9147 }, { "contrastive_loss": 0.552, "epoch": 1.1896162528216705, "grad_norm": 14.61215877532959, "learning_rate": 8.849326886167854e-06, "lm_loss": 5.8261, "loss": 1.4629, "step": 527, "text_contrastive_loss": 0.6564 }, { "contrastive_loss": 0.5832, "epoch": 1.1918735891647856, "grad_norm": 14.496379852294922, "learning_rate": 8.844692386482379e-06, "lm_loss": 5.6996, "loss": 1.5809, "step": 528, "text_contrastive_loss": 0.8553 }, { "contrastive_loss": 0.5086, "epoch": 1.1941309255079007, "grad_norm": 14.251022338867188, "learning_rate": 8.840049791263567e-06, "lm_loss": 5.7484, "loss": 1.5166, "step": 529, "text_contrastive_loss": 0.8663 }, { "contrastive_loss": 0.6403, "epoch": 1.1963882618510158, "grad_norm": 16.765060424804688, "learning_rate": 8.835399110287046e-06, "lm_loss": 5.8704, "loss": 1.7473, "step": 530, "text_contrastive_loss": 1.0399 }, { "contrastive_loss": 0.7202, "epoch": 1.1986455981941309, "grad_norm": 16.017635345458984, "learning_rate": 8.830740353345475e-06, "lm_loss": 5.7155, "loss": 1.7729, "step": 531, "text_contrastive_loss": 0.9623 }, { "contrastive_loss": 0.6431, "epoch": 1.200902934537246, "grad_norm": 16.272201538085938, "learning_rate": 8.826073530248508e-06, "lm_loss": 5.7467, "loss": 1.6205, "step": 532, "text_contrastive_loss": 0.8054 }, { "contrastive_loss": 0.6697, "epoch": 1.2031602708803613, "grad_norm": 19.03203773498535, "learning_rate": 8.82139865082279e-06, "lm_loss": 5.8501, "loss": 1.6495, "step": 533, "text_contrastive_loss": 0.7896 }, { "contrastive_loss": 0.7965, "epoch": 1.2054176072234764, "grad_norm": 17.053361892700195, "learning_rate": 8.81671572491193e-06, "lm_loss": 5.6792, "loss": 1.7992, "step": 534, "text_contrastive_loss": 0.8696 }, { "contrastive_loss": 0.6738, "epoch": 1.2076749435665914, "grad_norm": 17.29465103149414, "learning_rate": 8.812024762376477e-06, "lm_loss": 5.682, "loss": 1.6524, "step": 535, "text_contrastive_loss": 0.8209 }, { "contrastive_loss": 0.7326, "epoch": 1.2099322799097065, "grad_norm": 18.285282135009766, "learning_rate": 8.807325773093904e-06, "lm_loss": 5.7431, "loss": 1.7493, "step": 536, "text_contrastive_loss": 0.8848 }, { "contrastive_loss": 0.7514, "epoch": 1.2121896162528216, "grad_norm": 18.895790100097656, "learning_rate": 8.802618766958586e-06, "lm_loss": 5.7001, "loss": 1.8127, "step": 537, "text_contrastive_loss": 0.9826 }, { "contrastive_loss": 0.7019, "epoch": 1.2144469525959367, "grad_norm": 17.55310821533203, "learning_rate": 8.797903753881775e-06, "lm_loss": 5.701, "loss": 1.806, "step": 538, "text_contrastive_loss": 1.0679 }, { "contrastive_loss": 0.6178, "epoch": 1.2167042889390518, "grad_norm": 18.120283126831055, "learning_rate": 8.793180743791587e-06, "lm_loss": 5.7682, "loss": 1.6386, "step": 539, "text_contrastive_loss": 0.8878 }, { "contrastive_loss": 0.7747, "epoch": 1.2189616252821671, "grad_norm": 17.4415340423584, "learning_rate": 8.788449746632976e-06, "lm_loss": 5.6951, "loss": 1.796, "step": 540, "text_contrastive_loss": 0.9037 }, { "contrastive_loss": 0.6149, "epoch": 1.2212189616252822, "grad_norm": 16.051456451416016, "learning_rate": 8.78371077236771e-06, "lm_loss": 5.8233, "loss": 1.5976, "step": 541, "text_contrastive_loss": 0.8006 }, { "contrastive_loss": 0.5918, "epoch": 1.2234762979683973, "grad_norm": 16.21177864074707, "learning_rate": 8.778963830974362e-06, "lm_loss": 5.8637, "loss": 1.7496, "step": 542, "text_contrastive_loss": 1.1429 }, { "contrastive_loss": 0.6083, "epoch": 1.2257336343115124, "grad_norm": 16.857582092285156, "learning_rate": 8.77420893244827e-06, "lm_loss": 5.7729, "loss": 1.6038, "step": 543, "text_contrastive_loss": 0.8365 }, { "contrastive_loss": 0.5756, "epoch": 1.2279909706546275, "grad_norm": 16.42177963256836, "learning_rate": 8.769446086801536e-06, "lm_loss": 5.7586, "loss": 1.5623, "step": 544, "text_contrastive_loss": 0.8217 }, { "contrastive_loss": 0.6637, "epoch": 1.2302483069977426, "grad_norm": 17.359819412231445, "learning_rate": 8.764675304062992e-06, "lm_loss": 5.6912, "loss": 1.6575, "step": 545, "text_contrastive_loss": 0.8493 }, { "contrastive_loss": 0.6592, "epoch": 1.2325056433408579, "grad_norm": 17.608606338500977, "learning_rate": 8.759896594278183e-06, "lm_loss": 5.767, "loss": 1.6739, "step": 546, "text_contrastive_loss": 0.8761 }, { "contrastive_loss": 0.6723, "epoch": 1.234762979683973, "grad_norm": 16.28169822692871, "learning_rate": 8.755109967509345e-06, "lm_loss": 5.732, "loss": 1.6321, "step": 547, "text_contrastive_loss": 0.773 }, { "contrastive_loss": 0.78, "epoch": 1.237020316027088, "grad_norm": 17.190608978271484, "learning_rate": 8.750315433835387e-06, "lm_loss": 5.744, "loss": 1.8254, "step": 548, "text_contrastive_loss": 0.9421 }, { "contrastive_loss": 0.6585, "epoch": 1.2392776523702032, "grad_norm": 16.766374588012695, "learning_rate": 8.745513003351862e-06, "lm_loss": 5.6844, "loss": 1.6865, "step": 549, "text_contrastive_loss": 0.9191 }, { "contrastive_loss": 0.8379, "epoch": 1.2415349887133182, "grad_norm": 19.849149703979492, "learning_rate": 8.740702686170955e-06, "lm_loss": 5.7946, "loss": 1.9411, "step": 550, "text_contrastive_loss": 1.0475 }, { "contrastive_loss": 0.6246, "epoch": 1.2437923250564333, "grad_norm": 16.94928550720215, "learning_rate": 8.735884492421457e-06, "lm_loss": 5.6652, "loss": 1.513, "step": 551, "text_contrastive_loss": 0.6438 }, { "contrastive_loss": 0.5589, "epoch": 1.2460496613995486, "grad_norm": 14.843914985656738, "learning_rate": 8.731058432248743e-06, "lm_loss": 5.7704, "loss": 1.5717, "step": 552, "text_contrastive_loss": 0.8714 }, { "contrastive_loss": 0.6603, "epoch": 1.2483069977426637, "grad_norm": 17.591907501220703, "learning_rate": 8.726224515814752e-06, "lm_loss": 5.6885, "loss": 1.6171, "step": 553, "text_contrastive_loss": 0.7759 }, { "contrastive_loss": 0.6484, "epoch": 1.2505643340857788, "grad_norm": 17.86832618713379, "learning_rate": 8.721382753297967e-06, "lm_loss": 5.6409, "loss": 1.67, "step": 554, "text_contrastive_loss": 0.915 }, { "contrastive_loss": 0.6211, "epoch": 1.252821670428894, "grad_norm": 17.385845184326172, "learning_rate": 8.71653315489339e-06, "lm_loss": 5.8337, "loss": 1.6136, "step": 555, "text_contrastive_loss": 0.8183 }, { "contrastive_loss": 0.7168, "epoch": 1.255079006772009, "grad_norm": 18.187938690185547, "learning_rate": 8.711675730812522e-06, "lm_loss": 5.7761, "loss": 1.746, "step": 556, "text_contrastive_loss": 0.9032 }, { "contrastive_loss": 0.7436, "epoch": 1.257336343115124, "grad_norm": 20.165138244628906, "learning_rate": 8.706810491283346e-06, "lm_loss": 5.7146, "loss": 1.803, "step": 557, "text_contrastive_loss": 0.9759 }, { "contrastive_loss": 0.7407, "epoch": 1.2595936794582392, "grad_norm": 20.50693130493164, "learning_rate": 8.701937446550298e-06, "lm_loss": 5.7585, "loss": 1.7661, "step": 558, "text_contrastive_loss": 0.8991 }, { "contrastive_loss": 0.6625, "epoch": 1.2618510158013545, "grad_norm": 15.074134826660156, "learning_rate": 8.69705660687425e-06, "lm_loss": 5.7851, "loss": 1.7489, "step": 559, "text_contrastive_loss": 1.0158 }, { "contrastive_loss": 0.7133, "epoch": 1.2641083521444696, "grad_norm": 17.727819442749023, "learning_rate": 8.692167982532487e-06, "lm_loss": 5.7226, "loss": 1.7565, "step": 560, "text_contrastive_loss": 0.9418 }, { "contrastive_loss": 0.7011, "epoch": 1.2663656884875847, "grad_norm": 17.928321838378906, "learning_rate": 8.687271583818687e-06, "lm_loss": 5.6885, "loss": 1.7219, "step": 561, "text_contrastive_loss": 0.904 }, { "contrastive_loss": 0.659, "epoch": 1.2686230248306998, "grad_norm": 16.586898803710938, "learning_rate": 8.682367421042895e-06, "lm_loss": 5.8365, "loss": 1.6248, "step": 562, "text_contrastive_loss": 0.7643 }, { "contrastive_loss": 0.6807, "epoch": 1.2708803611738149, "grad_norm": 16.01237678527832, "learning_rate": 8.677455504531507e-06, "lm_loss": 5.6571, "loss": 1.6628, "step": 563, "text_contrastive_loss": 0.8328 }, { "contrastive_loss": 0.6099, "epoch": 1.27313769751693, "grad_norm": 14.771806716918945, "learning_rate": 8.672535844627243e-06, "lm_loss": 5.7449, "loss": 1.5882, "step": 564, "text_contrastive_loss": 0.8076 }, { "contrastive_loss": 0.6119, "epoch": 1.275395033860045, "grad_norm": 17.364458084106445, "learning_rate": 8.667608451689135e-06, "lm_loss": 5.8589, "loss": 1.6081, "step": 565, "text_contrastive_loss": 0.8206 }, { "contrastive_loss": 0.6576, "epoch": 1.2776523702031604, "grad_norm": 17.16614532470703, "learning_rate": 8.662673336092487e-06, "lm_loss": 5.8289, "loss": 1.6548, "step": 566, "text_contrastive_loss": 0.8286 }, { "contrastive_loss": 0.6646, "epoch": 1.2799097065462754, "grad_norm": 17.29311752319336, "learning_rate": 8.657730508228874e-06, "lm_loss": 5.8073, "loss": 1.6869, "step": 567, "text_contrastive_loss": 0.883 }, { "contrastive_loss": 0.628, "epoch": 1.2821670428893905, "grad_norm": 15.421435356140137, "learning_rate": 8.652779978506103e-06, "lm_loss": 5.91, "loss": 1.6394, "step": 568, "text_contrastive_loss": 0.8408 }, { "contrastive_loss": 0.6373, "epoch": 1.2844243792325056, "grad_norm": 15.888031005859375, "learning_rate": 8.647821757348202e-06, "lm_loss": 5.8405, "loss": 1.5627, "step": 569, "text_contrastive_loss": 0.6827 }, { "contrastive_loss": 0.7228, "epoch": 1.2866817155756207, "grad_norm": 17.23069953918457, "learning_rate": 8.642855855195394e-06, "lm_loss": 5.6198, "loss": 1.7024, "step": 570, "text_contrastive_loss": 0.8352 }, { "contrastive_loss": 0.5868, "epoch": 1.2889390519187358, "grad_norm": 15.839852333068848, "learning_rate": 8.637882282504075e-06, "lm_loss": 5.8711, "loss": 1.6117, "step": 571, "text_contrastive_loss": 0.8756 }, { "contrastive_loss": 0.6876, "epoch": 1.291196388261851, "grad_norm": 18.078350067138672, "learning_rate": 8.632901049746793e-06, "lm_loss": 5.8506, "loss": 1.7769, "step": 572, "text_contrastive_loss": 1.0084 }, { "contrastive_loss": 0.7239, "epoch": 1.2934537246049662, "grad_norm": 19.60801124572754, "learning_rate": 8.627912167412222e-06, "lm_loss": 5.7687, "loss": 1.7269, "step": 573, "text_contrastive_loss": 0.8523 }, { "contrastive_loss": 0.6649, "epoch": 1.2957110609480813, "grad_norm": 17.82904052734375, "learning_rate": 8.622915646005152e-06, "lm_loss": 5.7369, "loss": 1.6693, "step": 574, "text_contrastive_loss": 0.8615 }, { "contrastive_loss": 0.5279, "epoch": 1.2979683972911964, "grad_norm": 16.017885208129883, "learning_rate": 8.617911496046446e-06, "lm_loss": 5.6855, "loss": 1.4724, "step": 575, "text_contrastive_loss": 0.752 }, { "contrastive_loss": 0.6755, "epoch": 1.3002257336343115, "grad_norm": 17.49553871154785, "learning_rate": 8.612899728073039e-06, "lm_loss": 5.8032, "loss": 1.6608, "step": 576, "text_contrastive_loss": 0.8099 }, { "contrastive_loss": 0.6034, "epoch": 1.3024830699774266, "grad_norm": 16.62548065185547, "learning_rate": 8.607880352637905e-06, "lm_loss": 5.651, "loss": 1.6213, "step": 577, "text_contrastive_loss": 0.9056 }, { "contrastive_loss": 0.6879, "epoch": 1.304740406320542, "grad_norm": 18.7608699798584, "learning_rate": 8.602853380310033e-06, "lm_loss": 5.8627, "loss": 1.7153, "step": 578, "text_contrastive_loss": 0.8822 }, { "contrastive_loss": 0.5729, "epoch": 1.3069977426636568, "grad_norm": 16.681779861450195, "learning_rate": 8.59781882167441e-06, "lm_loss": 5.7276, "loss": 1.6316, "step": 579, "text_contrastive_loss": 0.9718 }, { "contrastive_loss": 0.6994, "epoch": 1.309255079006772, "grad_norm": 18.027572631835938, "learning_rate": 8.592776687332003e-06, "lm_loss": 5.7858, "loss": 1.6982, "step": 580, "text_contrastive_loss": 0.8403 }, { "contrastive_loss": 0.643, "epoch": 1.3115124153498872, "grad_norm": 15.352033615112305, "learning_rate": 8.58772698789972e-06, "lm_loss": 5.6869, "loss": 1.669, "step": 581, "text_contrastive_loss": 0.9146 }, { "contrastive_loss": 0.6563, "epoch": 1.3137697516930023, "grad_norm": 18.501941680908203, "learning_rate": 8.582669734010407e-06, "lm_loss": 5.8005, "loss": 1.6796, "step": 582, "text_contrastive_loss": 0.8866 }, { "contrastive_loss": 0.5751, "epoch": 1.3160270880361173, "grad_norm": 15.410369873046875, "learning_rate": 8.577604936312813e-06, "lm_loss": 5.6843, "loss": 1.5664, "step": 583, "text_contrastive_loss": 0.8458 }, { "contrastive_loss": 0.648, "epoch": 1.3182844243792324, "grad_norm": 15.552810668945312, "learning_rate": 8.572532605471572e-06, "lm_loss": 5.7188, "loss": 1.5767, "step": 584, "text_contrastive_loss": 0.7137 }, { "contrastive_loss": 0.6221, "epoch": 1.3205417607223477, "grad_norm": 18.540748596191406, "learning_rate": 8.567452752167183e-06, "lm_loss": 5.6481, "loss": 1.5526, "step": 585, "text_contrastive_loss": 0.7313 }, { "contrastive_loss": 0.5366, "epoch": 1.3227990970654628, "grad_norm": 16.199588775634766, "learning_rate": 8.562365387095977e-06, "lm_loss": 5.6682, "loss": 1.4805, "step": 586, "text_contrastive_loss": 0.7541 }, { "contrastive_loss": 0.6598, "epoch": 1.325056433408578, "grad_norm": 16.728755950927734, "learning_rate": 8.557270520970111e-06, "lm_loss": 5.6622, "loss": 1.6551, "step": 587, "text_contrastive_loss": 0.8582 }, { "contrastive_loss": 0.6284, "epoch": 1.327313769751693, "grad_norm": 16.7345027923584, "learning_rate": 8.552168164517532e-06, "lm_loss": 5.6653, "loss": 1.6524, "step": 588, "text_contrastive_loss": 0.9149 }, { "contrastive_loss": 0.5527, "epoch": 1.329571106094808, "grad_norm": 14.206117630004883, "learning_rate": 8.547058328481959e-06, "lm_loss": 5.6429, "loss": 1.5285, "step": 589, "text_contrastive_loss": 0.823 }, { "contrastive_loss": 0.6591, "epoch": 1.3318284424379232, "grad_norm": 15.768681526184082, "learning_rate": 8.54194102362286e-06, "lm_loss": 5.7572, "loss": 1.6542, "step": 590, "text_contrastive_loss": 0.8387 }, { "contrastive_loss": 0.7248, "epoch": 1.3340857787810383, "grad_norm": 19.255586624145508, "learning_rate": 8.536816260715433e-06, "lm_loss": 5.6369, "loss": 1.7682, "step": 591, "text_contrastive_loss": 0.9594 }, { "contrastive_loss": 0.6456, "epoch": 1.3363431151241536, "grad_norm": 16.265714645385742, "learning_rate": 8.531684050550575e-06, "lm_loss": 5.7152, "loss": 1.6206, "step": 592, "text_contrastive_loss": 0.8069 }, { "contrastive_loss": 0.6153, "epoch": 1.3386004514672687, "grad_norm": 16.676197052001953, "learning_rate": 8.526544403934868e-06, "lm_loss": 5.7057, "loss": 1.5796, "step": 593, "text_contrastive_loss": 0.7875 }, { "contrastive_loss": 0.6598, "epoch": 1.3408577878103838, "grad_norm": 17.399089813232422, "learning_rate": 8.521397331690551e-06, "lm_loss": 5.6061, "loss": 1.6512, "step": 594, "text_contrastive_loss": 0.8617 }, { "contrastive_loss": 0.6508, "epoch": 1.3431151241534989, "grad_norm": 18.223529815673828, "learning_rate": 8.516242844655498e-06, "lm_loss": 5.7351, "loss": 1.5835, "step": 595, "text_contrastive_loss": 0.7186 }, { "contrastive_loss": 0.6331, "epoch": 1.345372460496614, "grad_norm": 16.342121124267578, "learning_rate": 8.5110809536832e-06, "lm_loss": 5.7773, "loss": 1.6419, "step": 596, "text_contrastive_loss": 0.862 }, { "contrastive_loss": 0.6344, "epoch": 1.347629796839729, "grad_norm": 17.7054443359375, "learning_rate": 8.50591166964273e-06, "lm_loss": 5.7044, "loss": 1.6997, "step": 597, "text_contrastive_loss": 0.9899 }, { "contrastive_loss": 0.7845, "epoch": 1.3498871331828441, "grad_norm": 15.945330619812012, "learning_rate": 8.500735003418734e-06, "lm_loss": 5.8071, "loss": 1.8835, "step": 598, "text_contrastive_loss": 1.0364 }, { "contrastive_loss": 0.6898, "epoch": 1.3521444695259595, "grad_norm": 17.745105743408203, "learning_rate": 8.495550965911403e-06, "lm_loss": 5.8617, "loss": 1.731, "step": 599, "text_contrastive_loss": 0.9101 }, { "contrastive_loss": 0.6985, "epoch": 1.3544018058690745, "grad_norm": 18.3575382232666, "learning_rate": 8.490359568036446e-06, "lm_loss": 5.7694, "loss": 1.7739, "step": 600, "text_contrastive_loss": 0.997 }, { "contrastive_loss": 0.6558, "epoch": 1.3566591422121896, "grad_norm": 16.287246704101562, "learning_rate": 8.485160820725073e-06, "lm_loss": 5.794, "loss": 1.672, "step": 601, "text_contrastive_loss": 0.8737 }, { "contrastive_loss": 0.5926, "epoch": 1.3589164785553047, "grad_norm": 15.705453872680664, "learning_rate": 8.479954734923967e-06, "lm_loss": 5.7311, "loss": 1.609, "step": 602, "text_contrastive_loss": 0.8867 }, { "contrastive_loss": 0.6082, "epoch": 1.3611738148984198, "grad_norm": 15.387367248535156, "learning_rate": 8.474741321595263e-06, "lm_loss": 5.7703, "loss": 1.605, "step": 603, "text_contrastive_loss": 0.8396 }, { "contrastive_loss": 0.6713, "epoch": 1.363431151241535, "grad_norm": 19.237621307373047, "learning_rate": 8.46952059171653e-06, "lm_loss": 5.6506, "loss": 1.7068, "step": 604, "text_contrastive_loss": 0.9411 }, { "contrastive_loss": 0.5959, "epoch": 1.36568848758465, "grad_norm": 15.905624389648438, "learning_rate": 8.464292556280734e-06, "lm_loss": 5.6875, "loss": 1.5965, "step": 605, "text_contrastive_loss": 0.8635 }, { "contrastive_loss": 0.4923, "epoch": 1.3679458239277653, "grad_norm": 14.173022270202637, "learning_rate": 8.459057226296232e-06, "lm_loss": 5.8231, "loss": 1.4332, "step": 606, "text_contrastive_loss": 0.7171 }, { "contrastive_loss": 0.6652, "epoch": 1.3702031602708804, "grad_norm": 17.57984161376953, "learning_rate": 8.453814612786736e-06, "lm_loss": 5.7891, "loss": 1.6656, "step": 607, "text_contrastive_loss": 0.8429 }, { "contrastive_loss": 0.6025, "epoch": 1.3724604966139955, "grad_norm": 15.989808082580566, "learning_rate": 8.4485647267913e-06, "lm_loss": 5.7487, "loss": 1.5859, "step": 608, "text_contrastive_loss": 0.8172 }, { "contrastive_loss": 0.6286, "epoch": 1.3747178329571106, "grad_norm": 17.961231231689453, "learning_rate": 8.443307579364282e-06, "lm_loss": 5.7026, "loss": 1.608, "step": 609, "text_contrastive_loss": 0.8183 }, { "contrastive_loss": 0.6308, "epoch": 1.3769751693002257, "grad_norm": 17.083267211914062, "learning_rate": 8.43804318157534e-06, "lm_loss": 5.6939, "loss": 1.6289, "step": 610, "text_contrastive_loss": 0.8573 }, { "contrastive_loss": 0.6925, "epoch": 1.379232505643341, "grad_norm": 17.16958999633789, "learning_rate": 8.432771544509395e-06, "lm_loss": 5.7154, "loss": 1.7076, "step": 611, "text_contrastive_loss": 0.8871 }, { "contrastive_loss": 0.5918, "epoch": 1.3814898419864559, "grad_norm": 16.935251235961914, "learning_rate": 8.427492679266605e-06, "lm_loss": 5.7298, "loss": 1.5374, "step": 612, "text_contrastive_loss": 0.7451 }, { "contrastive_loss": 0.73, "epoch": 1.3837471783295712, "grad_norm": 18.650781631469727, "learning_rate": 8.422206596962357e-06, "lm_loss": 5.7431, "loss": 1.7484, "step": 613, "text_contrastive_loss": 0.8882 }, { "contrastive_loss": 0.7085, "epoch": 1.3860045146726863, "grad_norm": 17.921417236328125, "learning_rate": 8.416913308727229e-06, "lm_loss": 5.6926, "loss": 1.6725, "step": 614, "text_contrastive_loss": 0.7894 }, { "contrastive_loss": 0.5775, "epoch": 1.3882618510158014, "grad_norm": 17.152624130249023, "learning_rate": 8.411612825706976e-06, "lm_loss": 5.7791, "loss": 1.598, "step": 615, "text_contrastive_loss": 0.8852 }, { "contrastive_loss": 0.6252, "epoch": 1.3905191873589164, "grad_norm": 16.69524574279785, "learning_rate": 8.4063051590625e-06, "lm_loss": 5.6092, "loss": 1.6112, "step": 616, "text_contrastive_loss": 0.8503 }, { "contrastive_loss": 0.6031, "epoch": 1.3927765237020315, "grad_norm": 18.74748992919922, "learning_rate": 8.400990319969829e-06, "lm_loss": 5.6256, "loss": 1.6014, "step": 617, "text_contrastive_loss": 0.8716 }, { "contrastive_loss": 0.6992, "epoch": 1.3950338600451468, "grad_norm": 16.87995147705078, "learning_rate": 8.395668319620092e-06, "lm_loss": 5.8057, "loss": 1.7324, "step": 618, "text_contrastive_loss": 0.9052 }, { "contrastive_loss": 0.5642, "epoch": 1.3972911963882617, "grad_norm": 17.030317306518555, "learning_rate": 8.390339169219504e-06, "lm_loss": 5.8441, "loss": 1.611, "step": 619, "text_contrastive_loss": 0.9249 }, { "contrastive_loss": 0.6297, "epoch": 1.399548532731377, "grad_norm": 16.5139102935791, "learning_rate": 8.385002879989328e-06, "lm_loss": 5.7669, "loss": 1.6511, "step": 620, "text_contrastive_loss": 0.8894 }, { "contrastive_loss": 0.6669, "epoch": 1.4018058690744921, "grad_norm": 17.75321388244629, "learning_rate": 8.37965946316586e-06, "lm_loss": 5.5939, "loss": 1.6587, "step": 621, "text_contrastive_loss": 0.8647 }, { "contrastive_loss": 0.5724, "epoch": 1.4040632054176072, "grad_norm": 17.158180236816406, "learning_rate": 8.37430893000041e-06, "lm_loss": 5.6086, "loss": 1.5737, "step": 622, "text_contrastive_loss": 0.8808 }, { "contrastive_loss": 0.667, "epoch": 1.4063205417607223, "grad_norm": 16.76217269897461, "learning_rate": 8.368951291759264e-06, "lm_loss": 5.6943, "loss": 1.6317, "step": 623, "text_contrastive_loss": 0.7906 }, { "contrastive_loss": 0.7416, "epoch": 1.4085778781038374, "grad_norm": 18.57181167602539, "learning_rate": 8.363586559723675e-06, "lm_loss": 5.8373, "loss": 1.7799, "step": 624, "text_contrastive_loss": 0.9091 }, { "contrastive_loss": 0.6005, "epoch": 1.4108352144469527, "grad_norm": 16.08402442932129, "learning_rate": 8.35821474518983e-06, "lm_loss": 5.6551, "loss": 1.4957, "step": 625, "text_contrastive_loss": 0.6593 }, { "contrastive_loss": 0.6548, "epoch": 1.4130925507900678, "grad_norm": 16.66714096069336, "learning_rate": 8.352835859468829e-06, "lm_loss": 5.7476, "loss": 1.6845, "step": 626, "text_contrastive_loss": 0.9099 }, { "contrastive_loss": 0.7207, "epoch": 1.4153498871331829, "grad_norm": 18.51789093017578, "learning_rate": 8.347449913886662e-06, "lm_loss": 5.6149, "loss": 1.7858, "step": 627, "text_contrastive_loss": 1.0074 }, { "contrastive_loss": 0.6197, "epoch": 1.417607223476298, "grad_norm": 14.465909004211426, "learning_rate": 8.34205691978419e-06, "lm_loss": 5.6513, "loss": 1.6331, "step": 628, "text_contrastive_loss": 0.8966 }, { "contrastive_loss": 0.5238, "epoch": 1.419864559819413, "grad_norm": 15.883209228515625, "learning_rate": 8.336656888517103e-06, "lm_loss": 5.7084, "loss": 1.4716, "step": 629, "text_contrastive_loss": 0.7539 }, { "contrastive_loss": 0.5395, "epoch": 1.4221218961625282, "grad_norm": 15.386760711669922, "learning_rate": 8.331249831455921e-06, "lm_loss": 5.7436, "loss": 1.4861, "step": 630, "text_contrastive_loss": 0.7443 }, { "contrastive_loss": 0.6283, "epoch": 1.4243792325056432, "grad_norm": 15.466524124145508, "learning_rate": 8.325835759985951e-06, "lm_loss": 5.8448, "loss": 1.6198, "step": 631, "text_contrastive_loss": 0.814 }, { "contrastive_loss": 0.6742, "epoch": 1.4266365688487586, "grad_norm": 17.011396408081055, "learning_rate": 8.320414685507272e-06, "lm_loss": 5.753, "loss": 1.6581, "step": 632, "text_contrastive_loss": 0.8171 }, { "contrastive_loss": 0.7357, "epoch": 1.4288939051918736, "grad_norm": 17.184545516967773, "learning_rate": 8.31498661943471e-06, "lm_loss": 5.6525, "loss": 1.7411, "step": 633, "text_contrastive_loss": 0.8803 }, { "contrastive_loss": 0.6741, "epoch": 1.4311512415349887, "grad_norm": 16.616487503051758, "learning_rate": 8.309551573197809e-06, "lm_loss": 5.6614, "loss": 1.6868, "step": 634, "text_contrastive_loss": 0.8931 }, { "contrastive_loss": 0.5647, "epoch": 1.4334085778781038, "grad_norm": 15.058182716369629, "learning_rate": 8.304109558240817e-06, "lm_loss": 5.7895, "loss": 1.5024, "step": 635, "text_contrastive_loss": 0.7175 }, { "contrastive_loss": 0.6847, "epoch": 1.435665914221219, "grad_norm": 17.552400588989258, "learning_rate": 8.298660586022646e-06, "lm_loss": 5.6282, "loss": 1.6699, "step": 636, "text_contrastive_loss": 0.8448 }, { "contrastive_loss": 0.7085, "epoch": 1.437923250564334, "grad_norm": 16.694761276245117, "learning_rate": 8.293204668016867e-06, "lm_loss": 5.7376, "loss": 1.7699, "step": 637, "text_contrastive_loss": 0.9752 }, { "contrastive_loss": 0.7224, "epoch": 1.440180586907449, "grad_norm": 18.381868362426758, "learning_rate": 8.287741815711674e-06, "lm_loss": 5.8248, "loss": 1.8135, "step": 638, "text_contrastive_loss": 1.0172 }, { "contrastive_loss": 0.7324, "epoch": 1.4424379232505644, "grad_norm": 17.921268463134766, "learning_rate": 8.282272040609855e-06, "lm_loss": 5.6446, "loss": 1.7582, "step": 639, "text_contrastive_loss": 0.9228 }, { "contrastive_loss": 0.6363, "epoch": 1.4446952595936795, "grad_norm": 18.61540985107422, "learning_rate": 8.276795354228785e-06, "lm_loss": 5.7035, "loss": 1.6168, "step": 640, "text_contrastive_loss": 0.8203 }, { "contrastive_loss": 0.5566, "epoch": 1.4469525959367946, "grad_norm": 15.693979263305664, "learning_rate": 8.271311768100386e-06, "lm_loss": 5.74, "loss": 1.5628, "step": 641, "text_contrastive_loss": 0.8644 }, { "contrastive_loss": 0.6698, "epoch": 1.4492099322799097, "grad_norm": 17.22825813293457, "learning_rate": 8.26582129377111e-06, "lm_loss": 5.789, "loss": 1.7538, "step": 642, "text_contrastive_loss": 1.0104 }, { "contrastive_loss": 0.5938, "epoch": 1.4514672686230248, "grad_norm": 16.227344512939453, "learning_rate": 8.26032394280191e-06, "lm_loss": 5.8488, "loss": 1.6316, "step": 643, "text_contrastive_loss": 0.9059 }, { "contrastive_loss": 0.7882, "epoch": 1.45372460496614, "grad_norm": 18.474050521850586, "learning_rate": 8.254819726768224e-06, "lm_loss": 5.6374, "loss": 1.8157, "step": 644, "text_contrastive_loss": 0.9275 }, { "contrastive_loss": 0.6401, "epoch": 1.455981941309255, "grad_norm": 16.156492233276367, "learning_rate": 8.249308657259943e-06, "lm_loss": 5.662, "loss": 1.6219, "step": 645, "text_contrastive_loss": 0.8312 }, { "contrastive_loss": 0.5753, "epoch": 1.4582392776523703, "grad_norm": 15.180570602416992, "learning_rate": 8.243790745881389e-06, "lm_loss": 5.6963, "loss": 1.5059, "step": 646, "text_contrastive_loss": 0.722 }, { "contrastive_loss": 0.7943, "epoch": 1.4604966139954854, "grad_norm": 18.923797607421875, "learning_rate": 8.238266004251284e-06, "lm_loss": 5.6449, "loss": 1.8869, "step": 647, "text_contrastive_loss": 1.0563 }, { "contrastive_loss": 0.6139, "epoch": 1.4627539503386005, "grad_norm": 16.359981536865234, "learning_rate": 8.232734444002748e-06, "lm_loss": 5.7505, "loss": 1.5844, "step": 648, "text_contrastive_loss": 0.7908 }, { "contrastive_loss": 0.6666, "epoch": 1.4650112866817155, "grad_norm": 17.091907501220703, "learning_rate": 8.22719607678324e-06, "lm_loss": 5.576, "loss": 1.6539, "step": 649, "text_contrastive_loss": 0.8594 }, { "contrastive_loss": 0.5477, "epoch": 1.4672686230248306, "grad_norm": 15.386693000793457, "learning_rate": 8.221650914254566e-06, "lm_loss": 5.6408, "loss": 1.5554, "step": 650, "text_contrastive_loss": 0.8871 }, { "contrastive_loss": 0.6767, "epoch": 1.469525959367946, "grad_norm": 17.18435287475586, "learning_rate": 8.216098968092833e-06, "lm_loss": 5.6699, "loss": 1.6436, "step": 651, "text_contrastive_loss": 0.7998 }, { "contrastive_loss": 0.5608, "epoch": 1.4717832957110608, "grad_norm": 14.66658878326416, "learning_rate": 8.210540249988435e-06, "lm_loss": 5.8803, "loss": 1.5156, "step": 652, "text_contrastive_loss": 0.7336 }, { "contrastive_loss": 0.6009, "epoch": 1.4740406320541761, "grad_norm": 16.17041778564453, "learning_rate": 8.204974771646023e-06, "lm_loss": 5.6205, "loss": 1.5734, "step": 653, "text_contrastive_loss": 0.8209 }, { "contrastive_loss": 0.6915, "epoch": 1.4762979683972912, "grad_norm": 16.177255630493164, "learning_rate": 8.199402544784485e-06, "lm_loss": 5.6258, "loss": 1.7184, "step": 654, "text_contrastive_loss": 0.9286 }, { "contrastive_loss": 0.7241, "epoch": 1.4785553047404063, "grad_norm": 17.70816421508789, "learning_rate": 8.193823581136919e-06, "lm_loss": 5.7242, "loss": 1.7506, "step": 655, "text_contrastive_loss": 0.9082 }, { "contrastive_loss": 0.6462, "epoch": 1.4808126410835214, "grad_norm": 15.968859672546387, "learning_rate": 8.188237892450603e-06, "lm_loss": 5.6605, "loss": 1.626, "step": 656, "text_contrastive_loss": 0.8276 }, { "contrastive_loss": 0.6915, "epoch": 1.4830699774266365, "grad_norm": 16.231510162353516, "learning_rate": 8.182645490486986e-06, "lm_loss": 5.6709, "loss": 1.6858, "step": 657, "text_contrastive_loss": 0.8545 }, { "contrastive_loss": 0.6668, "epoch": 1.4853273137697518, "grad_norm": 17.567447662353516, "learning_rate": 8.177046387021641e-06, "lm_loss": 5.8608, "loss": 1.6953, "step": 658, "text_contrastive_loss": 0.8848 }, { "contrastive_loss": 0.6871, "epoch": 1.487584650112867, "grad_norm": 17.185976028442383, "learning_rate": 8.17144059384426e-06, "lm_loss": 5.7217, "loss": 1.613, "step": 659, "text_contrastive_loss": 0.7075 }, { "contrastive_loss": 0.6068, "epoch": 1.489841986455982, "grad_norm": 17.20939064025879, "learning_rate": 8.165828122758615e-06, "lm_loss": 5.6503, "loss": 1.6265, "step": 660, "text_contrastive_loss": 0.9093 }, { "contrastive_loss": 0.6366, "epoch": 1.492099322799097, "grad_norm": 17.39378547668457, "learning_rate": 8.160208985582547e-06, "lm_loss": 5.7519, "loss": 1.7123, "step": 661, "text_contrastive_loss": 1.0011 }, { "contrastive_loss": 0.5162, "epoch": 1.4943566591422122, "grad_norm": 15.506391525268555, "learning_rate": 8.154583194147929e-06, "lm_loss": 5.7635, "loss": 1.438, "step": 662, "text_contrastive_loss": 0.6908 }, { "contrastive_loss": 0.4868, "epoch": 1.4966139954853273, "grad_norm": 15.273134231567383, "learning_rate": 8.148950760300642e-06, "lm_loss": 5.7494, "loss": 1.4756, "step": 663, "text_contrastive_loss": 0.8277 }, { "contrastive_loss": 0.7131, "epoch": 1.4988713318284423, "grad_norm": 18.11568260192871, "learning_rate": 8.14331169590056e-06, "lm_loss": 5.667, "loss": 1.7176, "step": 664, "text_contrastive_loss": 0.8756 }, { "contrastive_loss": 0.562, "epoch": 1.5011286681715577, "grad_norm": 17.426877975463867, "learning_rate": 8.137666012821514e-06, "lm_loss": 5.7174, "loss": 1.5592, "step": 665, "text_contrastive_loss": 0.8509 }, { "contrastive_loss": 0.6659, "epoch": 1.5033860045146725, "grad_norm": 18.524707794189453, "learning_rate": 8.132013722951275e-06, "lm_loss": 5.6328, "loss": 1.6976, "step": 666, "text_contrastive_loss": 0.9368 }, { "contrastive_loss": 0.6454, "epoch": 1.5056433408577878, "grad_norm": 16.65372657775879, "learning_rate": 8.12635483819152e-06, "lm_loss": 5.7965, "loss": 1.6676, "step": 667, "text_contrastive_loss": 0.8852 }, { "contrastive_loss": 0.6684, "epoch": 1.507900677200903, "grad_norm": 18.827136993408203, "learning_rate": 8.12068937045782e-06, "lm_loss": 5.638, "loss": 1.6413, "step": 668, "text_contrastive_loss": 0.8183 }, { "contrastive_loss": 0.5611, "epoch": 1.510158013544018, "grad_norm": 15.873318672180176, "learning_rate": 8.115017331679602e-06, "lm_loss": 5.7432, "loss": 1.56, "step": 669, "text_contrastive_loss": 0.8492 }, { "contrastive_loss": 0.6918, "epoch": 1.5124153498871333, "grad_norm": 18.114233016967773, "learning_rate": 8.109338733800132e-06, "lm_loss": 5.7047, "loss": 1.7948, "step": 670, "text_contrastive_loss": 1.0651 }, { "contrastive_loss": 0.5811, "epoch": 1.5146726862302482, "grad_norm": 15.506185531616211, "learning_rate": 8.103653588776483e-06, "lm_loss": 5.7547, "loss": 1.6146, "step": 671, "text_contrastive_loss": 0.9161 }, { "contrastive_loss": 0.5985, "epoch": 1.5169300225733635, "grad_norm": 15.577679634094238, "learning_rate": 8.09796190857952e-06, "lm_loss": 5.713, "loss": 1.594, "step": 672, "text_contrastive_loss": 0.8484 }, { "contrastive_loss": 0.6494, "epoch": 1.5191873589164786, "grad_norm": 15.215145111083984, "learning_rate": 8.09226370519386e-06, "lm_loss": 5.5607, "loss": 1.649, "step": 673, "text_contrastive_loss": 0.8871 }, { "contrastive_loss": 0.4342, "epoch": 1.5214446952595937, "grad_norm": 13.164642333984375, "learning_rate": 8.08655899061787e-06, "lm_loss": 5.6408, "loss": 1.4285, "step": 674, "text_contrastive_loss": 0.8605 }, { "contrastive_loss": 0.6813, "epoch": 1.5237020316027088, "grad_norm": 17.442340850830078, "learning_rate": 8.080847776863609e-06, "lm_loss": 5.6544, "loss": 1.6585, "step": 675, "text_contrastive_loss": 0.8235 }, { "contrastive_loss": 0.6757, "epoch": 1.5259593679458239, "grad_norm": 17.141281127929688, "learning_rate": 8.075130075956836e-06, "lm_loss": 5.6888, "loss": 1.7499, "step": 676, "text_contrastive_loss": 1.0107 }, { "contrastive_loss": 0.6188, "epoch": 1.5282167042889392, "grad_norm": 16.08421516418457, "learning_rate": 8.069405899936961e-06, "lm_loss": 5.6586, "loss": 1.6152, "step": 677, "text_contrastive_loss": 0.8611 }, { "contrastive_loss": 0.5577, "epoch": 1.530474040632054, "grad_norm": 14.082602500915527, "learning_rate": 8.06367526085703e-06, "lm_loss": 5.7302, "loss": 1.5262, "step": 678, "text_contrastive_loss": 0.791 }, { "contrastive_loss": 0.6431, "epoch": 1.5327313769751694, "grad_norm": 15.874601364135742, "learning_rate": 8.057938170783704e-06, "lm_loss": 5.7184, "loss": 1.6087, "step": 679, "text_contrastive_loss": 0.7874 }, { "contrastive_loss": 0.5947, "epoch": 1.5349887133182845, "grad_norm": 15.756805419921875, "learning_rate": 8.052194641797217e-06, "lm_loss": 5.6824, "loss": 1.5455, "step": 680, "text_contrastive_loss": 0.7652 }, { "contrastive_loss": 0.5992, "epoch": 1.5372460496613995, "grad_norm": 15.742182731628418, "learning_rate": 8.046444685991369e-06, "lm_loss": 5.6288, "loss": 1.5945, "step": 681, "text_contrastive_loss": 0.8649 }, { "contrastive_loss": 0.5685, "epoch": 1.5395033860045146, "grad_norm": 16.00629234313965, "learning_rate": 8.040688315473489e-06, "lm_loss": 5.6869, "loss": 1.5184, "step": 682, "text_contrastive_loss": 0.7625 }, { "contrastive_loss": 0.6884, "epoch": 1.5417607223476297, "grad_norm": 17.776254653930664, "learning_rate": 8.034925542364412e-06, "lm_loss": 5.6575, "loss": 1.636, "step": 683, "text_contrastive_loss": 0.7636 }, { "contrastive_loss": 0.5358, "epoch": 1.544018058690745, "grad_norm": 14.859524726867676, "learning_rate": 8.029156378798459e-06, "lm_loss": 5.677, "loss": 1.5058, "step": 684, "text_contrastive_loss": 0.8047 }, { "contrastive_loss": 0.5631, "epoch": 1.54627539503386, "grad_norm": 15.648707389831543, "learning_rate": 8.023380836923404e-06, "lm_loss": 5.6694, "loss": 1.5223, "step": 685, "text_contrastive_loss": 0.7845 }, { "contrastive_loss": 0.5461, "epoch": 1.5485327313769752, "grad_norm": 16.77898597717285, "learning_rate": 8.017598928900452e-06, "lm_loss": 5.6663, "loss": 1.4575, "step": 686, "text_contrastive_loss": 0.6896 }, { "contrastive_loss": 0.6517, "epoch": 1.5507900677200903, "grad_norm": 17.933149337768555, "learning_rate": 8.011810666904212e-06, "lm_loss": 5.6912, "loss": 1.668, "step": 687, "text_contrastive_loss": 0.8943 }, { "contrastive_loss": 0.7144, "epoch": 1.5530474040632054, "grad_norm": 19.620168685913086, "learning_rate": 8.006016063122672e-06, "lm_loss": 5.634, "loss": 1.7312, "step": 688, "text_contrastive_loss": 0.9069 }, { "contrastive_loss": 0.7824, "epoch": 1.5553047404063205, "grad_norm": 19.481861114501953, "learning_rate": 8.000215129757178e-06, "lm_loss": 5.5779, "loss": 1.8526, "step": 689, "text_contrastive_loss": 1.0248 }, { "contrastive_loss": 0.6186, "epoch": 1.5575620767494356, "grad_norm": 17.900890350341797, "learning_rate": 7.994407879022397e-06, "lm_loss": 5.6997, "loss": 1.5957, "step": 690, "text_contrastive_loss": 0.8142 }, { "contrastive_loss": 0.69, "epoch": 1.559819413092551, "grad_norm": 18.235824584960938, "learning_rate": 7.9885943231463e-06, "lm_loss": 5.6989, "loss": 1.7303, "step": 691, "text_contrastive_loss": 0.9408 }, { "contrastive_loss": 0.6137, "epoch": 1.5620767494356658, "grad_norm": 16.703012466430664, "learning_rate": 7.98277447437014e-06, "lm_loss": 5.6882, "loss": 1.6007, "step": 692, "text_contrastive_loss": 0.8362 }, { "contrastive_loss": 0.6348, "epoch": 1.564334085778781, "grad_norm": 16.685638427734375, "learning_rate": 7.976948344948412e-06, "lm_loss": 5.5761, "loss": 1.6125, "step": 693, "text_contrastive_loss": 0.8402 }, { "contrastive_loss": 0.6494, "epoch": 1.5665914221218962, "grad_norm": 16.68299102783203, "learning_rate": 7.971115947148842e-06, "lm_loss": 5.6636, "loss": 1.6867, "step": 694, "text_contrastive_loss": 0.942 }, { "contrastive_loss": 0.7732, "epoch": 1.5688487584650113, "grad_norm": 18.180322647094727, "learning_rate": 7.965277293252354e-06, "lm_loss": 5.6107, "loss": 1.7923, "step": 695, "text_contrastive_loss": 0.916 }, { "contrastive_loss": 0.6828, "epoch": 1.5711060948081266, "grad_norm": 17.642742156982422, "learning_rate": 7.95943239555304e-06, "lm_loss": 5.6807, "loss": 1.7629, "step": 696, "text_contrastive_loss": 1.0239 }, { "contrastive_loss": 0.6149, "epoch": 1.5733634311512414, "grad_norm": 15.9694242477417, "learning_rate": 7.953581266358148e-06, "lm_loss": 5.6692, "loss": 1.5975, "step": 697, "text_contrastive_loss": 0.8312 }, { "contrastive_loss": 0.5002, "epoch": 1.5756207674943568, "grad_norm": 15.842578887939453, "learning_rate": 7.94772391798804e-06, "lm_loss": 5.564, "loss": 1.382, "step": 698, "text_contrastive_loss": 0.6508 }, { "contrastive_loss": 0.7284, "epoch": 1.5778781038374716, "grad_norm": 15.847796440124512, "learning_rate": 7.941860362776176e-06, "lm_loss": 5.622, "loss": 1.7816, "step": 699, "text_contrastive_loss": 0.982 }, { "contrastive_loss": 0.6855, "epoch": 1.580135440180587, "grad_norm": 18.121475219726562, "learning_rate": 7.935990613069087e-06, "lm_loss": 5.6488, "loss": 1.6833, "step": 700, "text_contrastive_loss": 0.8658 }, { "contrastive_loss": 0.6365, "epoch": 1.582392776523702, "grad_norm": 16.283931732177734, "learning_rate": 7.930114681226341e-06, "lm_loss": 5.6791, "loss": 1.6332, "step": 701, "text_contrastive_loss": 0.8576 }, { "contrastive_loss": 0.6198, "epoch": 1.5846501128668171, "grad_norm": 17.043893814086914, "learning_rate": 7.924232579620533e-06, "lm_loss": 5.5998, "loss": 1.5759, "step": 702, "text_contrastive_loss": 0.7922 }, { "contrastive_loss": 0.6537, "epoch": 1.5869074492099324, "grad_norm": 16.453500747680664, "learning_rate": 7.91834432063724e-06, "lm_loss": 5.574, "loss": 1.6571, "step": 703, "text_contrastive_loss": 0.8919 }, { "contrastive_loss": 0.6257, "epoch": 1.5891647855530473, "grad_norm": 16.873483657836914, "learning_rate": 7.912449916675008e-06, "lm_loss": 5.7479, "loss": 1.6281, "step": 704, "text_contrastive_loss": 0.8553 }, { "contrastive_loss": 0.5225, "epoch": 1.5914221218961626, "grad_norm": 14.994361877441406, "learning_rate": 7.90654938014533e-06, "lm_loss": 5.7882, "loss": 1.5076, "step": 705, "text_contrastive_loss": 0.8125 }, { "contrastive_loss": 0.5905, "epoch": 1.5936794582392777, "grad_norm": 16.01613426208496, "learning_rate": 7.900642723472596e-06, "lm_loss": 5.7974, "loss": 1.5573, "step": 706, "text_contrastive_loss": 0.7741 }, { "contrastive_loss": 0.5499, "epoch": 1.5959367945823928, "grad_norm": 15.973322868347168, "learning_rate": 7.894729959094097e-06, "lm_loss": 5.5895, "loss": 1.5522, "step": 707, "text_contrastive_loss": 0.8867 }, { "contrastive_loss": 0.7159, "epoch": 1.5981941309255079, "grad_norm": 17.146879196166992, "learning_rate": 7.888811099459974e-06, "lm_loss": 5.6006, "loss": 1.7478, "step": 708, "text_contrastive_loss": 0.9437 }, { "contrastive_loss": 0.5521, "epoch": 1.600451467268623, "grad_norm": 15.732077598571777, "learning_rate": 7.882886157033209e-06, "lm_loss": 5.6624, "loss": 1.5276, "step": 709, "text_contrastive_loss": 0.8185 }, { "contrastive_loss": 0.5883, "epoch": 1.6027088036117383, "grad_norm": 16.416379928588867, "learning_rate": 7.876955144289594e-06, "lm_loss": 5.7265, "loss": 1.6427, "step": 710, "text_contrastive_loss": 0.9633 }, { "contrastive_loss": 0.6321, "epoch": 1.6049661399548532, "grad_norm": 17.86478614807129, "learning_rate": 7.871018073717693e-06, "lm_loss": 5.7549, "loss": 1.5937, "step": 711, "text_contrastive_loss": 0.7723 }, { "contrastive_loss": 0.5866, "epoch": 1.6072234762979685, "grad_norm": 17.412540435791016, "learning_rate": 7.865074957818839e-06, "lm_loss": 5.7178, "loss": 1.5264, "step": 712, "text_contrastive_loss": 0.736 }, { "contrastive_loss": 0.6745, "epoch": 1.6094808126410836, "grad_norm": 17.987707138061523, "learning_rate": 7.859125809107082e-06, "lm_loss": 5.6346, "loss": 1.6675, "step": 713, "text_contrastive_loss": 0.8591 }, { "contrastive_loss": 0.6714, "epoch": 1.6117381489841986, "grad_norm": 17.884342193603516, "learning_rate": 7.853170640109182e-06, "lm_loss": 5.7804, "loss": 1.6289, "step": 714, "text_contrastive_loss": 0.759 }, { "contrastive_loss": 0.6766, "epoch": 1.6139954853273137, "grad_norm": 16.94292449951172, "learning_rate": 7.847209463364574e-06, "lm_loss": 5.7075, "loss": 1.7594, "step": 715, "text_contrastive_loss": 1.024 }, { "contrastive_loss": 0.6867, "epoch": 1.6162528216704288, "grad_norm": 19.232624053955078, "learning_rate": 7.841242291425342e-06, "lm_loss": 5.69, "loss": 1.6586, "step": 716, "text_contrastive_loss": 0.8058 }, { "contrastive_loss": 0.7001, "epoch": 1.6185101580135441, "grad_norm": 17.87067222595215, "learning_rate": 7.835269136856194e-06, "lm_loss": 5.544, "loss": 1.7777, "step": 717, "text_contrastive_loss": 1.0465 }, { "contrastive_loss": 0.5075, "epoch": 1.620767494356659, "grad_norm": 20.154897689819336, "learning_rate": 7.829290012234438e-06, "lm_loss": 5.6526, "loss": 1.4962, "step": 718, "text_contrastive_loss": 0.8469 }, { "contrastive_loss": 0.5362, "epoch": 1.6230248306997743, "grad_norm": 15.40334415435791, "learning_rate": 7.823304930149949e-06, "lm_loss": 5.6081, "loss": 1.4894, "step": 719, "text_contrastive_loss": 0.7848 }, { "contrastive_loss": 0.5925, "epoch": 1.6252821670428894, "grad_norm": 15.82955265045166, "learning_rate": 7.817313903205148e-06, "lm_loss": 5.6713, "loss": 1.5427, "step": 720, "text_contrastive_loss": 0.7662 }, { "contrastive_loss": 0.6818, "epoch": 1.6275395033860045, "grad_norm": 16.279958724975586, "learning_rate": 7.811316944014974e-06, "lm_loss": 5.6306, "loss": 1.6209, "step": 721, "text_contrastive_loss": 0.7521 }, { "contrastive_loss": 0.5416, "epoch": 1.6297968397291196, "grad_norm": 16.718181610107422, "learning_rate": 7.805314065206857e-06, "lm_loss": 5.5601, "loss": 1.4453, "step": 722, "text_contrastive_loss": 0.6954 }, { "contrastive_loss": 0.7102, "epoch": 1.6320541760722347, "grad_norm": 17.937105178833008, "learning_rate": 7.799305279420691e-06, "lm_loss": 5.5872, "loss": 1.6505, "step": 723, "text_contrastive_loss": 0.7631 }, { "contrastive_loss": 0.5706, "epoch": 1.63431151241535, "grad_norm": 16.313173294067383, "learning_rate": 7.793290599308807e-06, "lm_loss": 5.6602, "loss": 1.5522, "step": 724, "text_contrastive_loss": 0.8312 }, { "contrastive_loss": 0.7123, "epoch": 1.6365688487584649, "grad_norm": 20.161495208740234, "learning_rate": 7.78727003753595e-06, "lm_loss": 5.5927, "loss": 1.7213, "step": 725, "text_contrastive_loss": 0.8995 }, { "contrastive_loss": 0.5938, "epoch": 1.6388261851015802, "grad_norm": 16.399438858032227, "learning_rate": 7.78124360677925e-06, "lm_loss": 5.7231, "loss": 1.5695, "step": 726, "text_contrastive_loss": 0.8068 }, { "contrastive_loss": 0.6285, "epoch": 1.6410835214446953, "grad_norm": 15.218472480773926, "learning_rate": 7.775211319728191e-06, "lm_loss": 5.5433, "loss": 1.5815, "step": 727, "text_contrastive_loss": 0.7972 }, { "contrastive_loss": 0.5776, "epoch": 1.6433408577878104, "grad_norm": 15.522934913635254, "learning_rate": 7.769173189084589e-06, "lm_loss": 5.7274, "loss": 1.556, "step": 728, "text_contrastive_loss": 0.8112 }, { "contrastive_loss": 0.7049, "epoch": 1.6455981941309257, "grad_norm": 16.525672912597656, "learning_rate": 7.763129227562568e-06, "lm_loss": 5.6235, "loss": 1.7948, "step": 729, "text_contrastive_loss": 1.055 }, { "contrastive_loss": 0.6319, "epoch": 1.6478555304740405, "grad_norm": 21.969823837280273, "learning_rate": 7.757079447888529e-06, "lm_loss": 5.6324, "loss": 1.5885, "step": 730, "text_contrastive_loss": 0.7868 }, { "contrastive_loss": 0.6082, "epoch": 1.6501128668171559, "grad_norm": 16.272430419921875, "learning_rate": 7.75102386280112e-06, "lm_loss": 5.581, "loss": 1.6565, "step": 731, "text_contrastive_loss": 0.9803 }, { "contrastive_loss": 0.5209, "epoch": 1.6523702031602707, "grad_norm": 15.750184059143066, "learning_rate": 7.744962485051217e-06, "lm_loss": 5.6793, "loss": 1.4867, "step": 732, "text_contrastive_loss": 0.7959 }, { "contrastive_loss": 0.8521, "epoch": 1.654627539503386, "grad_norm": 19.615497589111328, "learning_rate": 7.738895327401891e-06, "lm_loss": 5.6483, "loss": 1.9015, "step": 733, "text_contrastive_loss": 0.9692 }, { "contrastive_loss": 0.5763, "epoch": 1.6568848758465011, "grad_norm": 14.801111221313477, "learning_rate": 7.732822402628385e-06, "lm_loss": 5.5696, "loss": 1.5111, "step": 734, "text_contrastive_loss": 0.7557 }, { "contrastive_loss": 0.7067, "epoch": 1.6591422121896162, "grad_norm": 16.893308639526367, "learning_rate": 7.726743723518087e-06, "lm_loss": 5.6091, "loss": 1.7366, "step": 735, "text_contrastive_loss": 0.9378 }, { "contrastive_loss": 0.7603, "epoch": 1.6613995485327315, "grad_norm": 16.60019302368164, "learning_rate": 7.720659302870496e-06, "lm_loss": 5.5871, "loss": 1.7595, "step": 736, "text_contrastive_loss": 0.881 }, { "contrastive_loss": 0.6263, "epoch": 1.6636568848758464, "grad_norm": 15.957043647766113, "learning_rate": 7.714569153497204e-06, "lm_loss": 5.6781, "loss": 1.6366, "step": 737, "text_contrastive_loss": 0.885 }, { "contrastive_loss": 0.5992, "epoch": 1.6659142212189617, "grad_norm": 14.994237899780273, "learning_rate": 7.708473288221868e-06, "lm_loss": 5.5369, "loss": 1.4997, "step": 738, "text_contrastive_loss": 0.6935 }, { "contrastive_loss": 0.5831, "epoch": 1.6681715575620768, "grad_norm": 15.554744720458984, "learning_rate": 7.702371719880178e-06, "lm_loss": 5.6742, "loss": 1.5097, "step": 739, "text_contrastive_loss": 0.7185 }, { "contrastive_loss": 0.657, "epoch": 1.670428893905192, "grad_norm": 14.496306419372559, "learning_rate": 7.696264461319831e-06, "lm_loss": 5.6038, "loss": 1.6738, "step": 740, "text_contrastive_loss": 0.913 }, { "contrastive_loss": 0.633, "epoch": 1.672686230248307, "grad_norm": 16.079214096069336, "learning_rate": 7.69015152540051e-06, "lm_loss": 5.7092, "loss": 1.6277, "step": 741, "text_contrastive_loss": 0.8476 }, { "contrastive_loss": 0.5856, "epoch": 1.674943566591422, "grad_norm": 16.214691162109375, "learning_rate": 7.684032924993845e-06, "lm_loss": 5.6346, "loss": 1.5279, "step": 742, "text_contrastive_loss": 0.7577 }, { "contrastive_loss": 0.6277, "epoch": 1.6772009029345374, "grad_norm": 15.978594779968262, "learning_rate": 7.677908672983404e-06, "lm_loss": 5.5738, "loss": 1.5994, "step": 743, "text_contrastive_loss": 0.8286 }, { "contrastive_loss": 0.6228, "epoch": 1.6794582392776523, "grad_norm": 14.20463752746582, "learning_rate": 7.671778782264647e-06, "lm_loss": 5.635, "loss": 1.6288, "step": 744, "text_contrastive_loss": 0.8849 }, { "contrastive_loss": 0.787, "epoch": 1.6817155756207676, "grad_norm": 17.111305236816406, "learning_rate": 7.66564326574491e-06, "lm_loss": 5.6514, "loss": 1.8412, "step": 745, "text_contrastive_loss": 0.978 }, { "contrastive_loss": 0.6928, "epoch": 1.6839729119638827, "grad_norm": 19.419923782348633, "learning_rate": 7.65950213634337e-06, "lm_loss": 5.6392, "loss": 1.6916, "step": 746, "text_contrastive_loss": 0.8696 }, { "contrastive_loss": 0.5187, "epoch": 1.6862302483069977, "grad_norm": 15.22336196899414, "learning_rate": 7.653355406991034e-06, "lm_loss": 5.6101, "loss": 1.4763, "step": 747, "text_contrastive_loss": 0.7933 }, { "contrastive_loss": 0.5471, "epoch": 1.6884875846501128, "grad_norm": 16.713504791259766, "learning_rate": 7.64720309063069e-06, "lm_loss": 5.5356, "loss": 1.5305, "step": 748, "text_contrastive_loss": 0.8597 }, { "contrastive_loss": 0.6765, "epoch": 1.690744920993228, "grad_norm": 15.930522918701172, "learning_rate": 7.641045200216896e-06, "lm_loss": 5.597, "loss": 1.647, "step": 749, "text_contrastive_loss": 0.8216 }, { "contrastive_loss": 0.7218, "epoch": 1.6930022573363432, "grad_norm": 18.222816467285156, "learning_rate": 7.634881748715941e-06, "lm_loss": 5.74, "loss": 1.6928, "step": 750, "text_contrastive_loss": 0.7941 }, { "contrastive_loss": 0.6519, "epoch": 1.695259593679458, "grad_norm": 17.758466720581055, "learning_rate": 7.628712749105831e-06, "lm_loss": 5.6711, "loss": 1.7348, "step": 751, "text_contrastive_loss": 1.0316 }, { "contrastive_loss": 0.5155, "epoch": 1.6975169300225734, "grad_norm": 16.984390258789062, "learning_rate": 7.622538214376248e-06, "lm_loss": 5.53, "loss": 1.4664, "step": 752, "text_contrastive_loss": 0.7957 }, { "contrastive_loss": 0.5652, "epoch": 1.6997742663656885, "grad_norm": 15.909612655639648, "learning_rate": 7.616358157528535e-06, "lm_loss": 5.6624, "loss": 1.5215, "step": 753, "text_contrastive_loss": 0.7802 }, { "contrastive_loss": 0.5905, "epoch": 1.7020316027088036, "grad_norm": 16.675256729125977, "learning_rate": 7.610172591575656e-06, "lm_loss": 5.6161, "loss": 1.5576, "step": 754, "text_contrastive_loss": 0.811 }, { "contrastive_loss": 0.6756, "epoch": 1.7042889390519187, "grad_norm": 18.014429092407227, "learning_rate": 7.60398152954218e-06, "lm_loss": 5.6102, "loss": 1.6643, "step": 755, "text_contrastive_loss": 0.8554 }, { "contrastive_loss": 0.5153, "epoch": 1.7065462753950338, "grad_norm": 15.344369888305664, "learning_rate": 7.597784984464248e-06, "lm_loss": 5.6551, "loss": 1.4603, "step": 756, "text_contrastive_loss": 0.7589 }, { "contrastive_loss": 0.708, "epoch": 1.708803611738149, "grad_norm": 19.520374298095703, "learning_rate": 7.5915829693895435e-06, "lm_loss": 5.5414, "loss": 1.7017, "step": 757, "text_contrastive_loss": 0.879 }, { "contrastive_loss": 0.7059, "epoch": 1.711060948081264, "grad_norm": 16.9716796875, "learning_rate": 7.585375497377271e-06, "lm_loss": 5.6469, "loss": 1.778, "step": 758, "text_contrastive_loss": 1.0148 }, { "contrastive_loss": 0.5584, "epoch": 1.7133182844243793, "grad_norm": 14.892501831054688, "learning_rate": 7.579162581498125e-06, "lm_loss": 5.5184, "loss": 1.5484, "step": 759, "text_contrastive_loss": 0.8765 }, { "contrastive_loss": 0.6526, "epoch": 1.7155756207674944, "grad_norm": 17.0418643951416, "learning_rate": 7.572944234834261e-06, "lm_loss": 5.5582, "loss": 1.6884, "step": 760, "text_contrastive_loss": 0.9599 }, { "contrastive_loss": 0.6604, "epoch": 1.7178329571106095, "grad_norm": 15.868025779724121, "learning_rate": 7.5667204704792706e-06, "lm_loss": 5.6248, "loss": 1.6557, "step": 761, "text_contrastive_loss": 0.8656 }, { "contrastive_loss": 0.6528, "epoch": 1.7200902934537246, "grad_norm": 16.643239974975586, "learning_rate": 7.5604913015381535e-06, "lm_loss": 5.6341, "loss": 1.6089, "step": 762, "text_contrastive_loss": 0.7854 }, { "contrastive_loss": 0.6528, "epoch": 1.7223476297968396, "grad_norm": 15.834339141845703, "learning_rate": 7.554256741127291e-06, "lm_loss": 5.6096, "loss": 1.6176, "step": 763, "text_contrastive_loss": 0.8078 }, { "contrastive_loss": 0.6601, "epoch": 1.724604966139955, "grad_norm": 17.527952194213867, "learning_rate": 7.548016802374412e-06, "lm_loss": 5.5987, "loss": 1.6024, "step": 764, "text_contrastive_loss": 0.7649 }, { "contrastive_loss": 0.6799, "epoch": 1.7268623024830698, "grad_norm": 16.515607833862305, "learning_rate": 7.541771498418575e-06, "lm_loss": 5.7484, "loss": 1.7105, "step": 765, "text_contrastive_loss": 0.9116 }, { "contrastive_loss": 0.6125, "epoch": 1.7291196388261851, "grad_norm": 18.52354621887207, "learning_rate": 7.535520842410136e-06, "lm_loss": 5.6428, "loss": 1.5753, "step": 766, "text_contrastive_loss": 0.7972 }, { "contrastive_loss": 0.6454, "epoch": 1.7313769751693002, "grad_norm": 17.109241485595703, "learning_rate": 7.529264847510715e-06, "lm_loss": 5.6524, "loss": 1.6544, "step": 767, "text_contrastive_loss": 0.8875 }, { "contrastive_loss": 0.6188, "epoch": 1.7336343115124153, "grad_norm": 17.155672073364258, "learning_rate": 7.52300352689318e-06, "lm_loss": 5.6825, "loss": 1.6844, "step": 768, "text_contrastive_loss": 0.9947 }, { "contrastive_loss": 0.6882, "epoch": 1.7358916478555306, "grad_norm": 17.156185150146484, "learning_rate": 7.516736893741611e-06, "lm_loss": 5.6875, "loss": 1.6641, "step": 769, "text_contrastive_loss": 0.8142 }, { "contrastive_loss": 0.5441, "epoch": 1.7381489841986455, "grad_norm": 15.328943252563477, "learning_rate": 7.510464961251271e-06, "lm_loss": 5.6166, "loss": 1.4909, "step": 770, "text_contrastive_loss": 0.7704 }, { "contrastive_loss": 0.5575, "epoch": 1.7404063205417608, "grad_norm": 15.422810554504395, "learning_rate": 7.5041877426285856e-06, "lm_loss": 5.6112, "loss": 1.5464, "step": 771, "text_contrastive_loss": 0.8555 }, { "contrastive_loss": 0.6555, "epoch": 1.742663656884876, "grad_norm": 17.159561157226562, "learning_rate": 7.49790525109111e-06, "lm_loss": 5.6352, "loss": 1.6591, "step": 772, "text_contrastive_loss": 0.8802 }, { "contrastive_loss": 0.5397, "epoch": 1.744920993227991, "grad_norm": 14.259885787963867, "learning_rate": 7.491617499867502e-06, "lm_loss": 5.5852, "loss": 1.4501, "step": 773, "text_contrastive_loss": 0.7039 }, { "contrastive_loss": 0.5738, "epoch": 1.747178329571106, "grad_norm": 14.911514282226562, "learning_rate": 7.485324502197494e-06, "lm_loss": 5.6263, "loss": 1.5204, "step": 774, "text_contrastive_loss": 0.768 }, { "contrastive_loss": 0.4498, "epoch": 1.7494356659142212, "grad_norm": 13.753421783447266, "learning_rate": 7.479026271331864e-06, "lm_loss": 5.561, "loss": 1.4023, "step": 775, "text_contrastive_loss": 0.7929 }, { "contrastive_loss": 0.7496, "epoch": 1.7516930022573365, "grad_norm": 19.0356388092041, "learning_rate": 7.472722820532414e-06, "lm_loss": 5.5895, "loss": 1.7852, "step": 776, "text_contrastive_loss": 0.9534 }, { "contrastive_loss": 0.5782, "epoch": 1.7539503386004514, "grad_norm": 16.46868324279785, "learning_rate": 7.466414163071934e-06, "lm_loss": 5.5661, "loss": 1.6188, "step": 777, "text_contrastive_loss": 0.968 }, { "contrastive_loss": 0.732, "epoch": 1.7562076749435667, "grad_norm": 18.876466751098633, "learning_rate": 7.460100312234176e-06, "lm_loss": 5.6912, "loss": 1.7865, "step": 778, "text_contrastive_loss": 0.9707 }, { "contrastive_loss": 0.6535, "epoch": 1.7584650112866818, "grad_norm": 17.19675636291504, "learning_rate": 7.453781281313831e-06, "lm_loss": 5.6308, "loss": 1.6226, "step": 779, "text_contrastive_loss": 0.8121 }, { "contrastive_loss": 0.6757, "epoch": 1.7607223476297968, "grad_norm": 16.143611907958984, "learning_rate": 7.447457083616494e-06, "lm_loss": 5.6272, "loss": 1.7421, "step": 780, "text_contrastive_loss": 1.0074 }, { "contrastive_loss": 0.572, "epoch": 1.762979683972912, "grad_norm": 13.709511756896973, "learning_rate": 7.441127732458642e-06, "lm_loss": 5.5948, "loss": 1.5837, "step": 781, "text_contrastive_loss": 0.9045 }, { "contrastive_loss": 0.5823, "epoch": 1.765237020316027, "grad_norm": 15.926129341125488, "learning_rate": 7.434793241167601e-06, "lm_loss": 5.6045, "loss": 1.5605, "step": 782, "text_contrastive_loss": 0.8355 }, { "contrastive_loss": 0.5187, "epoch": 1.7674943566591423, "grad_norm": 14.050180435180664, "learning_rate": 7.428453623081522e-06, "lm_loss": 5.5293, "loss": 1.5309, "step": 783, "text_contrastive_loss": 0.9185 }, { "contrastive_loss": 0.6622, "epoch": 1.7697516930022572, "grad_norm": 15.827361106872559, "learning_rate": 7.422108891549349e-06, "lm_loss": 5.7322, "loss": 1.6548, "step": 784, "text_contrastive_loss": 0.8389 }, { "contrastive_loss": 0.5779, "epoch": 1.7720090293453725, "grad_norm": 16.51972770690918, "learning_rate": 7.415759059930799e-06, "lm_loss": 5.6133, "loss": 1.4832, "step": 785, "text_contrastive_loss": 0.688 }, { "contrastive_loss": 0.5768, "epoch": 1.7742663656884876, "grad_norm": 15.352350234985352, "learning_rate": 7.409404141596319e-06, "lm_loss": 5.4875, "loss": 1.5152, "step": 786, "text_contrastive_loss": 0.7793 }, { "contrastive_loss": 0.4536, "epoch": 1.7765237020316027, "grad_norm": 15.709349632263184, "learning_rate": 7.403044149927074e-06, "lm_loss": 5.617, "loss": 1.3583, "step": 787, "text_contrastive_loss": 0.6859 }, { "contrastive_loss": 0.5617, "epoch": 1.7787810383747178, "grad_norm": 15.498834609985352, "learning_rate": 7.396679098314908e-06, "lm_loss": 5.4998, "loss": 1.5357, "step": 788, "text_contrastive_loss": 0.8482 }, { "contrastive_loss": 0.6754, "epoch": 1.7810383747178329, "grad_norm": 17.250186920166016, "learning_rate": 7.390309000162321e-06, "lm_loss": 5.6479, "loss": 1.6909, "step": 789, "text_contrastive_loss": 0.9013 }, { "contrastive_loss": 0.6052, "epoch": 1.7832957110609482, "grad_norm": 17.019081115722656, "learning_rate": 7.383933868882438e-06, "lm_loss": 5.72, "loss": 1.5785, "step": 790, "text_contrastive_loss": 0.8025 }, { "contrastive_loss": 0.6701, "epoch": 1.785553047404063, "grad_norm": 18.17104148864746, "learning_rate": 7.377553717898983e-06, "lm_loss": 5.5142, "loss": 1.6041, "step": 791, "text_contrastive_loss": 0.7652 }, { "contrastive_loss": 0.5827, "epoch": 1.7878103837471784, "grad_norm": 17.872495651245117, "learning_rate": 7.37116856064625e-06, "lm_loss": 5.7807, "loss": 1.5565, "step": 792, "text_contrastive_loss": 0.7916 }, { "contrastive_loss": 0.7069, "epoch": 1.7900677200902935, "grad_norm": 18.860488891601562, "learning_rate": 7.364778410569071e-06, "lm_loss": 5.5773, "loss": 1.7215, "step": 793, "text_contrastive_loss": 0.9137 }, { "contrastive_loss": 0.5601, "epoch": 1.7923250564334086, "grad_norm": 16.680278778076172, "learning_rate": 7.358383281122797e-06, "lm_loss": 5.5869, "loss": 1.4951, "step": 794, "text_contrastive_loss": 0.7525 }, { "contrastive_loss": 0.693, "epoch": 1.7945823927765236, "grad_norm": 16.43022918701172, "learning_rate": 7.351983185773259e-06, "lm_loss": 5.6024, "loss": 1.7592, "step": 795, "text_contrastive_loss": 1.0118 }, { "contrastive_loss": 0.6788, "epoch": 1.7968397291196387, "grad_norm": 16.972368240356445, "learning_rate": 7.345578137996745e-06, "lm_loss": 5.5441, "loss": 1.6168, "step": 796, "text_contrastive_loss": 0.7673 }, { "contrastive_loss": 0.6206, "epoch": 1.799097065462754, "grad_norm": 15.841362953186035, "learning_rate": 7.339168151279974e-06, "lm_loss": 5.7047, "loss": 1.6207, "step": 797, "text_contrastive_loss": 0.8593 }, { "contrastive_loss": 0.5893, "epoch": 1.801354401805869, "grad_norm": 16.630756378173828, "learning_rate": 7.332753239120061e-06, "lm_loss": 5.6058, "loss": 1.5894, "step": 798, "text_contrastive_loss": 0.8792 }, { "contrastive_loss": 0.6043, "epoch": 1.8036117381489842, "grad_norm": 15.179779052734375, "learning_rate": 7.326333415024494e-06, "lm_loss": 5.5835, "loss": 1.5948, "step": 799, "text_contrastive_loss": 0.8643 }, { "contrastive_loss": 0.5845, "epoch": 1.8058690744920993, "grad_norm": 15.992825508117676, "learning_rate": 7.319908692511103e-06, "lm_loss": 5.6918, "loss": 1.5861, "step": 800, "text_contrastive_loss": 0.8648 }, { "contrastive_loss": 0.6359, "epoch": 1.8081264108352144, "grad_norm": 16.51412582397461, "learning_rate": 7.313479085108033e-06, "lm_loss": 5.6701, "loss": 1.6496, "step": 801, "text_contrastive_loss": 0.8935 }, { "contrastive_loss": 0.6834, "epoch": 1.8103837471783297, "grad_norm": 16.19455337524414, "learning_rate": 7.307044606353715e-06, "lm_loss": 5.5504, "loss": 1.6494, "step": 802, "text_contrastive_loss": 0.8219 }, { "contrastive_loss": 0.6577, "epoch": 1.8126410835214446, "grad_norm": 16.510953903198242, "learning_rate": 7.300605269796839e-06, "lm_loss": 5.5902, "loss": 1.6321, "step": 803, "text_contrastive_loss": 0.8306 }, { "contrastive_loss": 0.6756, "epoch": 1.81489841986456, "grad_norm": 18.364416122436523, "learning_rate": 7.2941610889963164e-06, "lm_loss": 5.6027, "loss": 1.7232, "step": 804, "text_contrastive_loss": 0.9745 }, { "contrastive_loss": 0.6775, "epoch": 1.8171557562076748, "grad_norm": 17.008562088012695, "learning_rate": 7.2877120775212685e-06, "lm_loss": 5.7175, "loss": 1.7378, "step": 805, "text_contrastive_loss": 0.9769 }, { "contrastive_loss": 0.6402, "epoch": 1.81941309255079, "grad_norm": 16.21780014038086, "learning_rate": 7.2812582489509844e-06, "lm_loss": 5.6571, "loss": 1.5613, "step": 806, "text_contrastive_loss": 0.7108 }, { "contrastive_loss": 0.6218, "epoch": 1.8216704288939052, "grad_norm": 17.749752044677734, "learning_rate": 7.2747996168748915e-06, "lm_loss": 5.5993, "loss": 1.5797, "step": 807, "text_contrastive_loss": 0.7959 }, { "contrastive_loss": 0.5546, "epoch": 1.8239277652370203, "grad_norm": 14.439248085021973, "learning_rate": 7.26833619489254e-06, "lm_loss": 5.5922, "loss": 1.4468, "step": 808, "text_contrastive_loss": 0.666 }, { "contrastive_loss": 0.6521, "epoch": 1.8261851015801356, "grad_norm": 17.860071182250977, "learning_rate": 7.261867996613559e-06, "lm_loss": 5.7373, "loss": 1.6656, "step": 809, "text_contrastive_loss": 0.8796 }, { "contrastive_loss": 0.5127, "epoch": 1.8284424379232505, "grad_norm": 16.678512573242188, "learning_rate": 7.255395035657639e-06, "lm_loss": 5.6824, "loss": 1.4757, "step": 810, "text_contrastive_loss": 0.7894 }, { "contrastive_loss": 0.5386, "epoch": 1.8306997742663658, "grad_norm": 16.262500762939453, "learning_rate": 7.2489173256544975e-06, "lm_loss": 5.5996, "loss": 1.5711, "step": 811, "text_contrastive_loss": 0.9452 }, { "contrastive_loss": 0.5726, "epoch": 1.8329571106094809, "grad_norm": 15.10853099822998, "learning_rate": 7.242434880243851e-06, "lm_loss": 5.57, "loss": 1.5025, "step": 812, "text_contrastive_loss": 0.7457 }, { "contrastive_loss": 0.5137, "epoch": 1.835214446952596, "grad_norm": 16.65679931640625, "learning_rate": 7.235947713075389e-06, "lm_loss": 5.5702, "loss": 1.3889, "step": 813, "text_contrastive_loss": 0.6364 }, { "contrastive_loss": 0.6597, "epoch": 1.837471783295711, "grad_norm": 18.37490463256836, "learning_rate": 7.229455837808741e-06, "lm_loss": 5.6616, "loss": 1.6702, "step": 814, "text_contrastive_loss": 0.8886 }, { "contrastive_loss": 0.5845, "epoch": 1.8397291196388261, "grad_norm": 16.170198440551758, "learning_rate": 7.222959268113452e-06, "lm_loss": 5.5558, "loss": 1.5941, "step": 815, "text_contrastive_loss": 0.908 }, { "contrastive_loss": 0.5638, "epoch": 1.8419864559819414, "grad_norm": 16.061973571777344, "learning_rate": 7.216458017668951e-06, "lm_loss": 5.6042, "loss": 1.4835, "step": 816, "text_contrastive_loss": 0.7185 }, { "contrastive_loss": 0.5483, "epoch": 1.8442437923250563, "grad_norm": 16.66253662109375, "learning_rate": 7.2099521001645225e-06, "lm_loss": 5.5736, "loss": 1.4765, "step": 817, "text_contrastive_loss": 0.7417 }, { "contrastive_loss": 0.6933, "epoch": 1.8465011286681716, "grad_norm": 16.928455352783203, "learning_rate": 7.20344152929928e-06, "lm_loss": 5.7431, "loss": 1.7452, "step": 818, "text_contrastive_loss": 0.9552 }, { "contrastive_loss": 0.4993, "epoch": 1.8487584650112867, "grad_norm": 15.764592170715332, "learning_rate": 7.19692631878213e-06, "lm_loss": 5.6099, "loss": 1.4026, "step": 819, "text_contrastive_loss": 0.6845 }, { "contrastive_loss": 0.6099, "epoch": 1.8510158013544018, "grad_norm": 15.19287395477295, "learning_rate": 7.190406482331757e-06, "lm_loss": 5.6551, "loss": 1.5898, "step": 820, "text_contrastive_loss": 0.8288 }, { "contrastive_loss": 0.6181, "epoch": 1.853273137697517, "grad_norm": 17.772113800048828, "learning_rate": 7.183882033676579e-06, "lm_loss": 5.5202, "loss": 1.538, "step": 821, "text_contrastive_loss": 0.7357 }, { "contrastive_loss": 0.6082, "epoch": 1.855530474040632, "grad_norm": 17.984954833984375, "learning_rate": 7.177352986554729e-06, "lm_loss": 5.5946, "loss": 1.6758, "step": 822, "text_contrastive_loss": 1.0162 }, { "contrastive_loss": 0.5879, "epoch": 1.8577878103837473, "grad_norm": 16.008399963378906, "learning_rate": 7.1708193547140205e-06, "lm_loss": 5.7365, "loss": 1.5873, "step": 823, "text_contrastive_loss": 0.8514 }, { "contrastive_loss": 0.5878, "epoch": 1.8600451467268622, "grad_norm": 15.425915718078613, "learning_rate": 7.164281151911923e-06, "lm_loss": 5.6353, "loss": 1.5688, "step": 824, "text_contrastive_loss": 0.8348 }, { "contrastive_loss": 0.673, "epoch": 1.8623024830699775, "grad_norm": 15.963248252868652, "learning_rate": 7.157738391915531e-06, "lm_loss": 5.5391, "loss": 1.6485, "step": 825, "text_contrastive_loss": 0.8431 }, { "contrastive_loss": 0.6093, "epoch": 1.8645598194130926, "grad_norm": 16.09975242614746, "learning_rate": 7.151191088501531e-06, "lm_loss": 5.5798, "loss": 1.591, "step": 826, "text_contrastive_loss": 0.8473 }, { "contrastive_loss": 0.6162, "epoch": 1.8668171557562077, "grad_norm": 15.717811584472656, "learning_rate": 7.14463925545618e-06, "lm_loss": 5.6483, "loss": 1.6201, "step": 827, "text_contrastive_loss": 0.878 }, { "contrastive_loss": 0.5907, "epoch": 1.8690744920993227, "grad_norm": 15.425777435302734, "learning_rate": 7.138082906575271e-06, "lm_loss": 5.5912, "loss": 1.5954, "step": 828, "text_contrastive_loss": 0.8913 }, { "contrastive_loss": 0.6175, "epoch": 1.8713318284424378, "grad_norm": 16.30082893371582, "learning_rate": 7.131522055664109e-06, "lm_loss": 5.4914, "loss": 1.5953, "step": 829, "text_contrastive_loss": 0.8574 }, { "contrastive_loss": 0.5407, "epoch": 1.8735891647855532, "grad_norm": 14.981559753417969, "learning_rate": 7.124956716537471e-06, "lm_loss": 5.6236, "loss": 1.5297, "step": 830, "text_contrastive_loss": 0.8534 }, { "contrastive_loss": 0.5908, "epoch": 1.875846501128668, "grad_norm": 15.349632263183594, "learning_rate": 7.118386903019594e-06, "lm_loss": 5.5637, "loss": 1.5616, "step": 831, "text_contrastive_loss": 0.8288 }, { "contrastive_loss": 0.664, "epoch": 1.8781038374717833, "grad_norm": 16.663297653198242, "learning_rate": 7.111812628944132e-06, "lm_loss": 5.703, "loss": 1.6699, "step": 832, "text_contrastive_loss": 0.8711 }, { "contrastive_loss": 0.6863, "epoch": 1.8803611738148984, "grad_norm": 17.10484504699707, "learning_rate": 7.105233908154128e-06, "lm_loss": 5.494, "loss": 1.7709, "step": 833, "text_contrastive_loss": 1.0703 }, { "contrastive_loss": 0.6986, "epoch": 1.8826185101580135, "grad_norm": 16.48059844970703, "learning_rate": 7.098650754501994e-06, "lm_loss": 5.614, "loss": 1.6907, "step": 834, "text_contrastive_loss": 0.8613 }, { "contrastive_loss": 0.5455, "epoch": 1.8848758465011288, "grad_norm": 15.246926307678223, "learning_rate": 7.0920631818494745e-06, "lm_loss": 5.749, "loss": 1.5203, "step": 835, "text_contrastive_loss": 0.7998 }, { "contrastive_loss": 0.6555, "epoch": 1.8871331828442437, "grad_norm": 16.64633560180664, "learning_rate": 7.085471204067616e-06, "lm_loss": 5.5486, "loss": 1.5612, "step": 836, "text_contrastive_loss": 0.7016 }, { "contrastive_loss": 0.6638, "epoch": 1.889390519187359, "grad_norm": 16.877185821533203, "learning_rate": 7.078874835036742e-06, "lm_loss": 5.5712, "loss": 1.6556, "step": 837, "text_contrastive_loss": 0.8694 }, { "contrastive_loss": 0.5417, "epoch": 1.8916478555304739, "grad_norm": 15.430839538574219, "learning_rate": 7.072274088646425e-06, "lm_loss": 5.5435, "loss": 1.5126, "step": 838, "text_contrastive_loss": 0.8332 }, { "contrastive_loss": 0.6148, "epoch": 1.8939051918735892, "grad_norm": 14.822012901306152, "learning_rate": 7.065668978795449e-06, "lm_loss": 5.5579, "loss": 1.6278, "step": 839, "text_contrastive_loss": 0.9145 }, { "contrastive_loss": 0.5328, "epoch": 1.8961625282167043, "grad_norm": 14.90259838104248, "learning_rate": 7.059059519391794e-06, "lm_loss": 5.6553, "loss": 1.482, "step": 840, "text_contrastive_loss": 0.7674 }, { "contrastive_loss": 0.6179, "epoch": 1.8984198645598194, "grad_norm": 15.720355987548828, "learning_rate": 7.05244572435259e-06, "lm_loss": 5.5941, "loss": 1.5282, "step": 841, "text_contrastive_loss": 0.7018 }, { "contrastive_loss": 0.5856, "epoch": 1.9006772009029347, "grad_norm": 18.02252769470215, "learning_rate": 7.045827607604103e-06, "lm_loss": 5.5812, "loss": 1.4887, "step": 842, "text_contrastive_loss": 0.6899 }, { "contrastive_loss": 0.6467, "epoch": 1.9029345372460496, "grad_norm": 16.064016342163086, "learning_rate": 7.039205183081694e-06, "lm_loss": 5.6405, "loss": 1.6037, "step": 843, "text_contrastive_loss": 0.7858 }, { "contrastive_loss": 0.6753, "epoch": 1.9051918735891649, "grad_norm": 17.041418075561523, "learning_rate": 7.0325784647298e-06, "lm_loss": 5.65, "loss": 1.6561, "step": 844, "text_contrastive_loss": 0.8316 }, { "contrastive_loss": 0.5641, "epoch": 1.90744920993228, "grad_norm": 16.694120407104492, "learning_rate": 7.0259474665018915e-06, "lm_loss": 5.6036, "loss": 1.5535, "step": 845, "text_contrastive_loss": 0.8581 }, { "contrastive_loss": 0.5545, "epoch": 1.909706546275395, "grad_norm": 15.350920677185059, "learning_rate": 7.019312202360457e-06, "lm_loss": 5.5084, "loss": 1.4644, "step": 846, "text_contrastive_loss": 0.718 }, { "contrastive_loss": 0.7071, "epoch": 1.9119638826185101, "grad_norm": 17.851167678833008, "learning_rate": 7.012672686276969e-06, "lm_loss": 5.565, "loss": 1.6721, "step": 847, "text_contrastive_loss": 0.8169 }, { "contrastive_loss": 0.662, "epoch": 1.9142212189616252, "grad_norm": 17.496871948242188, "learning_rate": 7.006028932231847e-06, "lm_loss": 5.4837, "loss": 1.6971, "step": 848, "text_contrastive_loss": 0.9735 }, { "contrastive_loss": 0.6087, "epoch": 1.9164785553047405, "grad_norm": 15.356800079345703, "learning_rate": 6.999380954214438e-06, "lm_loss": 5.6388, "loss": 1.6348, "step": 849, "text_contrastive_loss": 0.9244 }, { "contrastive_loss": 0.5352, "epoch": 1.9187358916478554, "grad_norm": 16.45505142211914, "learning_rate": 6.992728766222982e-06, "lm_loss": 5.6371, "loss": 1.4918, "step": 850, "text_contrastive_loss": 0.7856 }, { "contrastive_loss": 0.6315, "epoch": 1.9209932279909707, "grad_norm": 16.982715606689453, "learning_rate": 6.9860723822645825e-06, "lm_loss": 5.5917, "loss": 1.6422, "step": 851, "text_contrastive_loss": 0.9031 }, { "contrastive_loss": 0.5311, "epoch": 1.9232505643340858, "grad_norm": 15.755159378051758, "learning_rate": 6.979411816355183e-06, "lm_loss": 5.5169, "loss": 1.4205, "step": 852, "text_contrastive_loss": 0.6753 }, { "contrastive_loss": 0.6723, "epoch": 1.925507900677201, "grad_norm": 17.17357063293457, "learning_rate": 6.972747082519526e-06, "lm_loss": 5.5413, "loss": 1.731, "step": 853, "text_contrastive_loss": 1.0091 }, { "contrastive_loss": 0.5499, "epoch": 1.927765237020316, "grad_norm": 15.229076385498047, "learning_rate": 6.966078194791133e-06, "lm_loss": 5.5788, "loss": 1.4664, "step": 854, "text_contrastive_loss": 0.7172 }, { "contrastive_loss": 0.7271, "epoch": 1.930022573363431, "grad_norm": 17.912700653076172, "learning_rate": 6.959405167212278e-06, "lm_loss": 5.6208, "loss": 1.7654, "step": 855, "text_contrastive_loss": 0.9525 }, { "contrastive_loss": 0.6074, "epoch": 1.9322799097065464, "grad_norm": 15.696358680725098, "learning_rate": 6.952728013833941e-06, "lm_loss": 5.6749, "loss": 1.6688, "step": 856, "text_contrastive_loss": 0.9878 }, { "contrastive_loss": 0.6983, "epoch": 1.9345372460496613, "grad_norm": 17.30682373046875, "learning_rate": 6.946046748715796e-06, "lm_loss": 5.6378, "loss": 1.7168, "step": 857, "text_contrastive_loss": 0.9095 }, { "contrastive_loss": 0.5927, "epoch": 1.9367945823927766, "grad_norm": 17.79252052307129, "learning_rate": 6.9393613859261755e-06, "lm_loss": 5.6205, "loss": 1.5651, "step": 858, "text_contrastive_loss": 0.8207 }, { "contrastive_loss": 0.6771, "epoch": 1.9390519187358917, "grad_norm": 18.796653747558594, "learning_rate": 6.932671939542037e-06, "lm_loss": 5.5481, "loss": 1.7077, "step": 859, "text_contrastive_loss": 0.9515 }, { "contrastive_loss": 0.7152, "epoch": 1.9413092550790068, "grad_norm": 17.163236618041992, "learning_rate": 6.925978423648941e-06, "lm_loss": 5.659, "loss": 1.7527, "step": 860, "text_contrastive_loss": 0.9432 }, { "contrastive_loss": 0.6069, "epoch": 1.9435665914221218, "grad_norm": 14.760443687438965, "learning_rate": 6.919280852341011e-06, "lm_loss": 5.5914, "loss": 1.6069, "step": 861, "text_contrastive_loss": 0.8818 }, { "contrastive_loss": 0.4893, "epoch": 1.945823927765237, "grad_norm": 14.319083213806152, "learning_rate": 6.912579239720913e-06, "lm_loss": 5.6018, "loss": 1.3888, "step": 862, "text_contrastive_loss": 0.6787 }, { "contrastive_loss": 0.6111, "epoch": 1.9480812641083523, "grad_norm": 15.737593650817871, "learning_rate": 6.9058735998998224e-06, "lm_loss": 5.5189, "loss": 1.5841, "step": 863, "text_contrastive_loss": 0.8422 }, { "contrastive_loss": 0.7067, "epoch": 1.9503386004514671, "grad_norm": 16.638887405395508, "learning_rate": 6.899163946997396e-06, "lm_loss": 5.5953, "loss": 1.6981, "step": 864, "text_contrastive_loss": 0.8638 }, { "contrastive_loss": 0.7025, "epoch": 1.9525959367945824, "grad_norm": 16.231441497802734, "learning_rate": 6.892450295141737e-06, "lm_loss": 5.6633, "loss": 1.6773, "step": 865, "text_contrastive_loss": 0.8169 }, { "contrastive_loss": 0.6093, "epoch": 1.9548532731376975, "grad_norm": 15.671695709228516, "learning_rate": 6.885732658469374e-06, "lm_loss": 5.6814, "loss": 1.6418, "step": 866, "text_contrastive_loss": 0.9287 }, { "contrastive_loss": 0.6353, "epoch": 1.9571106094808126, "grad_norm": 17.087011337280273, "learning_rate": 6.8790110511252195e-06, "lm_loss": 5.6027, "loss": 1.5883, "step": 867, "text_contrastive_loss": 0.7855 }, { "contrastive_loss": 0.6424, "epoch": 1.959367945823928, "grad_norm": 14.936962127685547, "learning_rate": 6.872285487262555e-06, "lm_loss": 5.5884, "loss": 1.6137, "step": 868, "text_contrastive_loss": 0.8249 }, { "contrastive_loss": 0.6417, "epoch": 1.9616252821670428, "grad_norm": 16.661287307739258, "learning_rate": 6.865555981042983e-06, "lm_loss": 5.6825, "loss": 1.668, "step": 869, "text_contrastive_loss": 0.9159 }, { "contrastive_loss": 0.5622, "epoch": 1.963882618510158, "grad_norm": 15.579157829284668, "learning_rate": 6.858822546636417e-06, "lm_loss": 5.4974, "loss": 1.508, "step": 870, "text_contrastive_loss": 0.7922 }, { "contrastive_loss": 0.6186, "epoch": 1.966139954853273, "grad_norm": 15.09514331817627, "learning_rate": 6.852085198221035e-06, "lm_loss": 5.5999, "loss": 1.6754, "step": 871, "text_contrastive_loss": 0.9935 }, { "contrastive_loss": 0.668, "epoch": 1.9683972911963883, "grad_norm": 18.11931037902832, "learning_rate": 6.845343949983258e-06, "lm_loss": 5.6957, "loss": 1.6854, "step": 872, "text_contrastive_loss": 0.8957 }, { "contrastive_loss": 0.6113, "epoch": 1.9706546275395034, "grad_norm": 15.733857154846191, "learning_rate": 6.838598816117725e-06, "lm_loss": 5.6184, "loss": 1.6452, "step": 873, "text_contrastive_loss": 0.944 }, { "contrastive_loss": 0.6003, "epoch": 1.9729119638826185, "grad_norm": 15.424590110778809, "learning_rate": 6.831849810827247e-06, "lm_loss": 5.4789, "loss": 1.5784, "step": 874, "text_contrastive_loss": 0.8603 }, { "contrastive_loss": 0.6459, "epoch": 1.9751693002257338, "grad_norm": 17.472450256347656, "learning_rate": 6.825096948322791e-06, "lm_loss": 5.5224, "loss": 1.6739, "step": 875, "text_contrastive_loss": 0.9516 }, { "contrastive_loss": 0.6078, "epoch": 1.9774266365688487, "grad_norm": 17.250064849853516, "learning_rate": 6.818340242823449e-06, "lm_loss": 5.5071, "loss": 1.545, "step": 876, "text_contrastive_loss": 0.7729 }, { "contrastive_loss": 0.5681, "epoch": 1.979683972911964, "grad_norm": 15.3363618850708, "learning_rate": 6.8115797085564e-06, "lm_loss": 5.5644, "loss": 1.5219, "step": 877, "text_contrastive_loss": 0.7947 }, { "contrastive_loss": 0.6233, "epoch": 1.981941309255079, "grad_norm": 16.2225341796875, "learning_rate": 6.804815359756887e-06, "lm_loss": 5.6397, "loss": 1.5594, "step": 878, "text_contrastive_loss": 0.7442 }, { "contrastive_loss": 0.5095, "epoch": 1.9841986455981941, "grad_norm": 14.354838371276855, "learning_rate": 6.798047210668185e-06, "lm_loss": 5.5914, "loss": 1.4432, "step": 879, "text_contrastive_loss": 0.7491 }, { "contrastive_loss": 0.5826, "epoch": 1.9864559819413092, "grad_norm": 18.07027244567871, "learning_rate": 6.7912752755415716e-06, "lm_loss": 5.6192, "loss": 1.5915, "step": 880, "text_contrastive_loss": 0.8941 }, { "contrastive_loss": 0.6141, "epoch": 1.9887133182844243, "grad_norm": 16.308622360229492, "learning_rate": 6.7844995686362955e-06, "lm_loss": 5.6988, "loss": 1.5604, "step": 881, "text_contrastive_loss": 0.7529 }, { "contrastive_loss": 0.5646, "epoch": 1.9909706546275396, "grad_norm": 14.187543869018555, "learning_rate": 6.777720104219548e-06, "lm_loss": 5.6544, "loss": 1.5361, "step": 882, "text_contrastive_loss": 0.812 }, { "contrastive_loss": 0.6631, "epoch": 1.9932279909706545, "grad_norm": 16.99749755859375, "learning_rate": 6.770936896566434e-06, "lm_loss": 5.6296, "loss": 1.6973, "step": 883, "text_contrastive_loss": 0.9424 }, { "contrastive_loss": 0.5883, "epoch": 1.9954853273137698, "grad_norm": 16.888206481933594, "learning_rate": 6.7641499599599355e-06, "lm_loss": 5.6055, "loss": 1.5547, "step": 884, "text_contrastive_loss": 0.8117 }, { "contrastive_loss": 0.5973, "epoch": 1.997742663656885, "grad_norm": 16.451189041137695, "learning_rate": 6.757359308690889e-06, "lm_loss": 5.5911, "loss": 1.6725, "step": 885, "text_contrastive_loss": 1.0321 }, { "contrastive_loss": 0.3834, "epoch": 2.0, "grad_norm": 14.721372604370117, "learning_rate": 6.750564957057958e-06, "lm_loss": 5.4662, "loss": 1.2558, "step": 886, "text_contrastive_loss": 0.6516 }, { "contrastive_loss": 0.5205, "epoch": 2.0022573363431153, "grad_norm": 14.922968864440918, "learning_rate": 6.743766919367588e-06, "lm_loss": 5.5173, "loss": 1.5126, "step": 887, "text_contrastive_loss": 0.8808 }, { "contrastive_loss": 0.5919, "epoch": 2.00451467268623, "grad_norm": 13.492685317993164, "learning_rate": 6.736965209933992e-06, "lm_loss": 5.4521, "loss": 1.5412, "step": 888, "text_contrastive_loss": 0.8082 }, { "contrastive_loss": 0.5223, "epoch": 2.0067720090293455, "grad_norm": 14.532832145690918, "learning_rate": 6.730159843079113e-06, "lm_loss": 5.4331, "loss": 1.5689, "step": 889, "text_contrastive_loss": 1.0067 }, { "contrastive_loss": 0.5552, "epoch": 2.0090293453724604, "grad_norm": 15.606881141662598, "learning_rate": 6.723350833132596e-06, "lm_loss": 5.4307, "loss": 1.546, "step": 890, "text_contrastive_loss": 0.8954 }, { "contrastive_loss": 0.6329, "epoch": 2.0112866817155757, "grad_norm": 17.337417602539062, "learning_rate": 6.716538194431754e-06, "lm_loss": 5.6051, "loss": 1.6961, "step": 891, "text_contrastive_loss": 1.0053 }, { "contrastive_loss": 0.5831, "epoch": 2.0135440180586905, "grad_norm": 15.493812561035156, "learning_rate": 6.7097219413215474e-06, "lm_loss": 5.6355, "loss": 1.5764, "step": 892, "text_contrastive_loss": 0.8596 }, { "contrastive_loss": 0.5551, "epoch": 2.015801354401806, "grad_norm": 15.401676177978516, "learning_rate": 6.702902088154539e-06, "lm_loss": 5.6025, "loss": 1.5234, "step": 893, "text_contrastive_loss": 0.8163 }, { "contrastive_loss": 0.4718, "epoch": 2.018058690744921, "grad_norm": 15.74850082397461, "learning_rate": 6.696078649290878e-06, "lm_loss": 5.5779, "loss": 1.4007, "step": 894, "text_contrastive_loss": 0.7422 }, { "contrastive_loss": 0.4403, "epoch": 2.020316027088036, "grad_norm": 14.414772033691406, "learning_rate": 6.689251639098261e-06, "lm_loss": 5.6971, "loss": 1.323, "step": 895, "text_contrastive_loss": 0.626 }, { "contrastive_loss": 0.6067, "epoch": 2.0225733634311513, "grad_norm": 15.891339302062988, "learning_rate": 6.682421071951907e-06, "lm_loss": 5.665, "loss": 1.6, "step": 896, "text_contrastive_loss": 0.8535 }, { "contrastive_loss": 0.5972, "epoch": 2.024830699774266, "grad_norm": 15.956475257873535, "learning_rate": 6.67558696223452e-06, "lm_loss": 5.6644, "loss": 1.609, "step": 897, "text_contrastive_loss": 0.8907 }, { "contrastive_loss": 0.5028, "epoch": 2.0270880361173815, "grad_norm": 14.930764198303223, "learning_rate": 6.668749324336268e-06, "lm_loss": 5.6442, "loss": 1.4141, "step": 898, "text_contrastive_loss": 0.6937 }, { "contrastive_loss": 0.5613, "epoch": 2.0293453724604964, "grad_norm": 15.869874000549316, "learning_rate": 6.661908172654746e-06, "lm_loss": 5.5546, "loss": 1.4967, "step": 899, "text_contrastive_loss": 0.7599 }, { "contrastive_loss": 0.5959, "epoch": 2.0316027088036117, "grad_norm": 15.18961238861084, "learning_rate": 6.65506352159495e-06, "lm_loss": 5.5258, "loss": 1.5579, "step": 900, "text_contrastive_loss": 0.8189 }, { "contrastive_loss": 0.4927, "epoch": 2.033860045146727, "grad_norm": 14.179841041564941, "learning_rate": 6.6482153855692395e-06, "lm_loss": 5.6035, "loss": 1.4198, "step": 901, "text_contrastive_loss": 0.7335 }, { "contrastive_loss": 0.6173, "epoch": 2.036117381489842, "grad_norm": 16.063549041748047, "learning_rate": 6.64136377899732e-06, "lm_loss": 5.6174, "loss": 1.5094, "step": 902, "text_contrastive_loss": 0.6607 }, { "contrastive_loss": 0.6371, "epoch": 2.038374717832957, "grad_norm": 17.114227294921875, "learning_rate": 6.6345087163061935e-06, "lm_loss": 5.5659, "loss": 1.7515, "step": 903, "text_contrastive_loss": 1.1155 }, { "contrastive_loss": 0.5376, "epoch": 2.040632054176072, "grad_norm": 15.359448432922363, "learning_rate": 6.627650211930152e-06, "lm_loss": 5.535, "loss": 1.6288, "step": 904, "text_contrastive_loss": 1.0754 }, { "contrastive_loss": 0.5696, "epoch": 2.0428893905191874, "grad_norm": 14.724507331848145, "learning_rate": 6.620788280310722e-06, "lm_loss": 5.4937, "loss": 1.5034, "step": 905, "text_contrastive_loss": 0.7688 }, { "contrastive_loss": 0.5079, "epoch": 2.0451467268623027, "grad_norm": 14.867406845092773, "learning_rate": 6.613922935896659e-06, "lm_loss": 5.5898, "loss": 1.401, "step": 906, "text_contrastive_loss": 0.6683 }, { "contrastive_loss": 0.6476, "epoch": 2.0474040632054176, "grad_norm": 17.842845916748047, "learning_rate": 6.607054193143894e-06, "lm_loss": 5.5677, "loss": 1.6823, "step": 907, "text_contrastive_loss": 0.9559 }, { "contrastive_loss": 0.5156, "epoch": 2.049661399548533, "grad_norm": 14.921574592590332, "learning_rate": 6.600182066515519e-06, "lm_loss": 5.6815, "loss": 1.409, "step": 908, "text_contrastive_loss": 0.6505 }, { "contrastive_loss": 0.5357, "epoch": 2.0519187358916477, "grad_norm": 15.891834259033203, "learning_rate": 6.593306570481751e-06, "lm_loss": 5.5604, "loss": 1.4846, "step": 909, "text_contrastive_loss": 0.7856 }, { "contrastive_loss": 0.5784, "epoch": 2.054176072234763, "grad_norm": 17.075801849365234, "learning_rate": 6.586427719519901e-06, "lm_loss": 5.4482, "loss": 1.5577, "step": 910, "text_contrastive_loss": 0.8688 }, { "contrastive_loss": 0.5847, "epoch": 2.056433408577878, "grad_norm": 15.731554985046387, "learning_rate": 6.579545528114344e-06, "lm_loss": 5.6414, "loss": 1.5577, "step": 911, "text_contrastive_loss": 0.8177 }, { "contrastive_loss": 0.5448, "epoch": 2.0586907449209932, "grad_norm": 16.339303970336914, "learning_rate": 6.572660010756489e-06, "lm_loss": 5.4309, "loss": 1.4814, "step": 912, "text_contrastive_loss": 0.7871 }, { "contrastive_loss": 0.4855, "epoch": 2.0609480812641086, "grad_norm": 13.657450675964355, "learning_rate": 6.565771181944747e-06, "lm_loss": 5.5683, "loss": 1.3892, "step": 913, "text_contrastive_loss": 0.6938 }, { "contrastive_loss": 0.5451, "epoch": 2.0632054176072234, "grad_norm": 15.438440322875977, "learning_rate": 6.558879056184505e-06, "lm_loss": 5.657, "loss": 1.5392, "step": 914, "text_contrastive_loss": 0.8569 }, { "contrastive_loss": 0.5521, "epoch": 2.0654627539503387, "grad_norm": 14.946206092834473, "learning_rate": 6.551983647988089e-06, "lm_loss": 5.5941, "loss": 1.5225, "step": 915, "text_contrastive_loss": 0.822 }, { "contrastive_loss": 0.6857, "epoch": 2.0677200902934536, "grad_norm": 16.517148971557617, "learning_rate": 6.545084971874738e-06, "lm_loss": 5.5931, "loss": 1.7039, "step": 916, "text_contrastive_loss": 0.9178 }, { "contrastive_loss": 0.4806, "epoch": 2.069977426636569, "grad_norm": 16.00227165222168, "learning_rate": 6.5381830423705714e-06, "lm_loss": 5.5547, "loss": 1.4019, "step": 917, "text_contrastive_loss": 0.7317 }, { "contrastive_loss": 0.5729, "epoch": 2.072234762979684, "grad_norm": 16.76964569091797, "learning_rate": 6.531277874008562e-06, "lm_loss": 5.5874, "loss": 1.6875, "step": 918, "text_contrastive_loss": 1.1116 }, { "contrastive_loss": 0.4385, "epoch": 2.074492099322799, "grad_norm": 18.411714553833008, "learning_rate": 6.5243694813284975e-06, "lm_loss": 5.5367, "loss": 1.431, "step": 919, "text_contrastive_loss": 0.8777 }, { "contrastive_loss": 0.6399, "epoch": 2.0767494356659144, "grad_norm": 16.799848556518555, "learning_rate": 6.517457878876958e-06, "lm_loss": 5.5023, "loss": 1.6103, "step": 920, "text_contrastive_loss": 0.8404 }, { "contrastive_loss": 0.5781, "epoch": 2.0790067720090293, "grad_norm": 15.620450019836426, "learning_rate": 6.510543081207281e-06, "lm_loss": 5.6476, "loss": 1.6084, "step": 921, "text_contrastive_loss": 0.931 }, { "contrastive_loss": 0.5393, "epoch": 2.0812641083521446, "grad_norm": 14.422009468078613, "learning_rate": 6.503625102879534e-06, "lm_loss": 5.6779, "loss": 1.4782, "step": 922, "text_contrastive_loss": 0.7423 }, { "contrastive_loss": 0.5087, "epoch": 2.0835214446952595, "grad_norm": 15.963953018188477, "learning_rate": 6.496703958460479e-06, "lm_loss": 5.5043, "loss": 1.4632, "step": 923, "text_contrastive_loss": 0.8081 }, { "contrastive_loss": 0.6506, "epoch": 2.0857787810383748, "grad_norm": 16.180904388427734, "learning_rate": 6.489779662523545e-06, "lm_loss": 5.3862, "loss": 1.6508, "step": 924, "text_contrastive_loss": 0.9232 }, { "contrastive_loss": 0.5739, "epoch": 2.0880361173814896, "grad_norm": 16.602985382080078, "learning_rate": 6.4828522296488014e-06, "lm_loss": 5.666, "loss": 1.5834, "step": 925, "text_contrastive_loss": 0.8859 }, { "contrastive_loss": 0.6208, "epoch": 2.090293453724605, "grad_norm": 16.005596160888672, "learning_rate": 6.475921674422917e-06, "lm_loss": 5.5651, "loss": 1.6567, "step": 926, "text_contrastive_loss": 0.9587 }, { "contrastive_loss": 0.5289, "epoch": 2.0925507900677203, "grad_norm": 14.40626335144043, "learning_rate": 6.4689880114391375e-06, "lm_loss": 5.687, "loss": 1.4922, "step": 927, "text_contrastive_loss": 0.7891 }, { "contrastive_loss": 0.593, "epoch": 2.094808126410835, "grad_norm": 15.942220687866211, "learning_rate": 6.462051255297255e-06, "lm_loss": 5.6157, "loss": 1.5557, "step": 928, "text_contrastive_loss": 0.8023 }, { "contrastive_loss": 0.564, "epoch": 2.0970654627539504, "grad_norm": 15.784947395324707, "learning_rate": 6.455111420603568e-06, "lm_loss": 5.6122, "loss": 1.5411, "step": 929, "text_contrastive_loss": 0.8316 }, { "contrastive_loss": 0.5249, "epoch": 2.0993227990970653, "grad_norm": 17.09646224975586, "learning_rate": 6.448168521970865e-06, "lm_loss": 5.5094, "loss": 1.4558, "step": 930, "text_contrastive_loss": 0.7599 }, { "contrastive_loss": 0.6272, "epoch": 2.1015801354401806, "grad_norm": 17.021900177001953, "learning_rate": 6.441222574018378e-06, "lm_loss": 5.6399, "loss": 1.6545, "step": 931, "text_contrastive_loss": 0.9266 }, { "contrastive_loss": 0.5656, "epoch": 2.1038374717832955, "grad_norm": 17.24850082397461, "learning_rate": 6.434273591371771e-06, "lm_loss": 5.5486, "loss": 1.6227, "step": 932, "text_contrastive_loss": 1.0044 }, { "contrastive_loss": 0.4486, "epoch": 2.106094808126411, "grad_norm": 14.898802757263184, "learning_rate": 6.427321588663085e-06, "lm_loss": 5.568, "loss": 1.3912, "step": 933, "text_contrastive_loss": 0.7716 }, { "contrastive_loss": 0.5385, "epoch": 2.108352144469526, "grad_norm": 14.44471549987793, "learning_rate": 6.42036658053073e-06, "lm_loss": 5.5793, "loss": 1.5247, "step": 934, "text_contrastive_loss": 0.8567 }, { "contrastive_loss": 0.6541, "epoch": 2.110609480812641, "grad_norm": 17.068880081176758, "learning_rate": 6.41340858161944e-06, "lm_loss": 5.5587, "loss": 1.707, "step": 935, "text_contrastive_loss": 0.9941 }, { "contrastive_loss": 0.5043, "epoch": 2.1128668171557563, "grad_norm": 13.335604667663574, "learning_rate": 6.406447606580248e-06, "lm_loss": 5.5333, "loss": 1.4185, "step": 936, "text_contrastive_loss": 0.7216 }, { "contrastive_loss": 0.4877, "epoch": 2.115124153498871, "grad_norm": 13.42530632019043, "learning_rate": 6.399483670070451e-06, "lm_loss": 5.5543, "loss": 1.4184, "step": 937, "text_contrastive_loss": 0.7505 }, { "contrastive_loss": 0.5192, "epoch": 2.1173814898419865, "grad_norm": 14.670174598693848, "learning_rate": 6.392516786753586e-06, "lm_loss": 5.5975, "loss": 1.4684, "step": 938, "text_contrastive_loss": 0.7788 }, { "contrastive_loss": 0.4451, "epoch": 2.119638826185102, "grad_norm": 14.285451889038086, "learning_rate": 6.385546971299389e-06, "lm_loss": 5.606, "loss": 1.3854, "step": 939, "text_contrastive_loss": 0.7594 }, { "contrastive_loss": 0.4997, "epoch": 2.1218961625282167, "grad_norm": 15.921854019165039, "learning_rate": 6.378574238383776e-06, "lm_loss": 5.4192, "loss": 1.447, "step": 940, "text_contrastive_loss": 0.8108 }, { "contrastive_loss": 0.47, "epoch": 2.124153498871332, "grad_norm": 13.7954740524292, "learning_rate": 6.3715986026888046e-06, "lm_loss": 5.5366, "loss": 1.3794, "step": 941, "text_contrastive_loss": 0.7116 }, { "contrastive_loss": 0.5892, "epoch": 2.126410835214447, "grad_norm": 16.45612335205078, "learning_rate": 6.3646200789026426e-06, "lm_loss": 5.445, "loss": 1.6059, "step": 942, "text_contrastive_loss": 0.9444 }, { "contrastive_loss": 0.5043, "epoch": 2.128668171557562, "grad_norm": 17.27448844909668, "learning_rate": 6.35763868171954e-06, "lm_loss": 5.6406, "loss": 1.5254, "step": 943, "text_contrastive_loss": 0.9139 }, { "contrastive_loss": 0.5443, "epoch": 2.130925507900677, "grad_norm": 16.419078826904297, "learning_rate": 6.350654425839799e-06, "lm_loss": 5.5499, "loss": 1.5785, "step": 944, "text_contrastive_loss": 0.9585 }, { "contrastive_loss": 0.6089, "epoch": 2.1331828442437923, "grad_norm": 16.696115493774414, "learning_rate": 6.343667325969736e-06, "lm_loss": 5.509, "loss": 1.564, "step": 945, "text_contrastive_loss": 0.8083 }, { "contrastive_loss": 0.6238, "epoch": 2.1354401805869077, "grad_norm": 17.05726432800293, "learning_rate": 6.336677396821663e-06, "lm_loss": 5.505, "loss": 1.6649, "step": 946, "text_contrastive_loss": 0.9813 }, { "contrastive_loss": 0.4916, "epoch": 2.1376975169300225, "grad_norm": 13.684940338134766, "learning_rate": 6.3296846531138445e-06, "lm_loss": 5.6108, "loss": 1.4635, "step": 947, "text_contrastive_loss": 0.8216 }, { "contrastive_loss": 0.5568, "epoch": 2.139954853273138, "grad_norm": 14.160411834716797, "learning_rate": 6.322689109570472e-06, "lm_loss": 5.6072, "loss": 1.5434, "step": 948, "text_contrastive_loss": 0.8518 }, { "contrastive_loss": 0.6086, "epoch": 2.1422121896162527, "grad_norm": 17.340606689453125, "learning_rate": 6.315690780921634e-06, "lm_loss": 5.5506, "loss": 1.58, "step": 949, "text_contrastive_loss": 0.8325 }, { "contrastive_loss": 0.541, "epoch": 2.144469525959368, "grad_norm": 14.390788078308105, "learning_rate": 6.3086896819032814e-06, "lm_loss": 5.6194, "loss": 1.4609, "step": 950, "text_contrastive_loss": 0.716 }, { "contrastive_loss": 0.5307, "epoch": 2.146726862302483, "grad_norm": 16.99388885498047, "learning_rate": 6.301685827257202e-06, "lm_loss": 5.6178, "loss": 1.5383, "step": 951, "text_contrastive_loss": 0.8917 }, { "contrastive_loss": 0.5455, "epoch": 2.148984198645598, "grad_norm": 14.5534029006958, "learning_rate": 6.294679231730983e-06, "lm_loss": 5.4725, "loss": 1.5116, "step": 952, "text_contrastive_loss": 0.8377 }, { "contrastive_loss": 0.593, "epoch": 2.1512415349887135, "grad_norm": 16.284915924072266, "learning_rate": 6.2876699100779815e-06, "lm_loss": 5.5311, "loss": 1.5691, "step": 953, "text_contrastive_loss": 0.8461 }, { "contrastive_loss": 0.5442, "epoch": 2.1534988713318284, "grad_norm": 14.362798690795898, "learning_rate": 6.2806578770573e-06, "lm_loss": 5.568, "loss": 1.4761, "step": 954, "text_contrastive_loss": 0.7503 }, { "contrastive_loss": 0.4771, "epoch": 2.1557562076749437, "grad_norm": 14.400273323059082, "learning_rate": 6.273643147433743e-06, "lm_loss": 5.6257, "loss": 1.4969, "step": 955, "text_contrastive_loss": 0.9144 }, { "contrastive_loss": 0.4491, "epoch": 2.1580135440180586, "grad_norm": 14.197193145751953, "learning_rate": 6.266625735977802e-06, "lm_loss": 5.5312, "loss": 1.3221, "step": 956, "text_contrastive_loss": 0.6398 }, { "contrastive_loss": 0.5357, "epoch": 2.160270880361174, "grad_norm": 14.38227367401123, "learning_rate": 6.259605657465607e-06, "lm_loss": 5.6018, "loss": 1.5223, "step": 957, "text_contrastive_loss": 0.8529 }, { "contrastive_loss": 0.5237, "epoch": 2.1625282167042887, "grad_norm": 13.620275497436523, "learning_rate": 6.252582926678908e-06, "lm_loss": 5.611, "loss": 1.4757, "step": 958, "text_contrastive_loss": 0.7819 }, { "contrastive_loss": 0.5603, "epoch": 2.164785553047404, "grad_norm": 16.51104164123535, "learning_rate": 6.24555755840504e-06, "lm_loss": 5.759, "loss": 1.5847, "step": 959, "text_contrastive_loss": 0.897 }, { "contrastive_loss": 0.5889, "epoch": 2.1670428893905194, "grad_norm": 15.760680198669434, "learning_rate": 6.238529567436892e-06, "lm_loss": 5.528, "loss": 1.5646, "step": 960, "text_contrastive_loss": 0.8456 }, { "contrastive_loss": 0.5447, "epoch": 2.1693002257336342, "grad_norm": 15.119182586669922, "learning_rate": 6.231498968572872e-06, "lm_loss": 5.5281, "loss": 1.5874, "step": 961, "text_contrastive_loss": 0.9797 }, { "contrastive_loss": 0.5813, "epoch": 2.1715575620767495, "grad_norm": 15.867561340332031, "learning_rate": 6.224465776616883e-06, "lm_loss": 5.5412, "loss": 1.5815, "step": 962, "text_contrastive_loss": 0.8922 }, { "contrastive_loss": 0.6029, "epoch": 2.1738148984198644, "grad_norm": 16.155677795410156, "learning_rate": 6.217430006378285e-06, "lm_loss": 5.6498, "loss": 1.6473, "step": 963, "text_contrastive_loss": 0.9588 }, { "contrastive_loss": 0.4913, "epoch": 2.1760722347629797, "grad_norm": 14.698906898498535, "learning_rate": 6.210391672671869e-06, "lm_loss": 5.4799, "loss": 1.3923, "step": 964, "text_contrastive_loss": 0.7059 }, { "contrastive_loss": 0.5443, "epoch": 2.1783295711060946, "grad_norm": 15.556617736816406, "learning_rate": 6.203350790317825e-06, "lm_loss": 5.6059, "loss": 1.5384, "step": 965, "text_contrastive_loss": 0.8669 }, { "contrastive_loss": 0.6068, "epoch": 2.18058690744921, "grad_norm": 15.900672912597656, "learning_rate": 6.196307374141707e-06, "lm_loss": 5.475, "loss": 1.6357, "step": 966, "text_contrastive_loss": 0.9627 }, { "contrastive_loss": 0.625, "epoch": 2.1828442437923252, "grad_norm": 17.61025047302246, "learning_rate": 6.189261438974403e-06, "lm_loss": 5.4916, "loss": 1.5829, "step": 967, "text_contrastive_loss": 0.8175 }, { "contrastive_loss": 0.5096, "epoch": 2.18510158013544, "grad_norm": 15.51711654663086, "learning_rate": 6.1822129996521105e-06, "lm_loss": 5.6518, "loss": 1.449, "step": 968, "text_contrastive_loss": 0.7483 }, { "contrastive_loss": 0.6001, "epoch": 2.1873589164785554, "grad_norm": 16.458173751831055, "learning_rate": 6.175162071016295e-06, "lm_loss": 5.6193, "loss": 1.6752, "step": 969, "text_contrastive_loss": 1.0262 }, { "contrastive_loss": 0.619, "epoch": 2.1896162528216703, "grad_norm": 16.29840087890625, "learning_rate": 6.168108667913666e-06, "lm_loss": 5.5032, "loss": 1.594, "step": 970, "text_contrastive_loss": 0.8493 }, { "contrastive_loss": 0.5649, "epoch": 2.1918735891647856, "grad_norm": 14.947978019714355, "learning_rate": 6.161052805196141e-06, "lm_loss": 5.5425, "loss": 1.5648, "step": 971, "text_contrastive_loss": 0.8913 }, { "contrastive_loss": 0.4871, "epoch": 2.194130925507901, "grad_norm": 13.491371154785156, "learning_rate": 6.15399449772082e-06, "lm_loss": 5.5075, "loss": 1.4412, "step": 972, "text_contrastive_loss": 0.8069 }, { "contrastive_loss": 0.4715, "epoch": 2.1963882618510158, "grad_norm": 15.550296783447266, "learning_rate": 6.146933760349947e-06, "lm_loss": 5.59, "loss": 1.346, "step": 973, "text_contrastive_loss": 0.6311 }, { "contrastive_loss": 0.5665, "epoch": 2.198645598194131, "grad_norm": 15.592700004577637, "learning_rate": 6.139870607950885e-06, "lm_loss": 5.5149, "loss": 1.576, "step": 974, "text_contrastive_loss": 0.9159 }, { "contrastive_loss": 0.5757, "epoch": 2.200902934537246, "grad_norm": 15.696521759033203, "learning_rate": 6.1328050553960804e-06, "lm_loss": 5.6159, "loss": 1.5487, "step": 975, "text_contrastive_loss": 0.8227 }, { "contrastive_loss": 0.5798, "epoch": 2.2031602708803613, "grad_norm": 17.08321189880371, "learning_rate": 6.1257371175630375e-06, "lm_loss": 5.6681, "loss": 1.5939, "step": 976, "text_contrastive_loss": 0.8946 }, { "contrastive_loss": 0.6711, "epoch": 2.205417607223476, "grad_norm": 16.547780990600586, "learning_rate": 6.118666809334277e-06, "lm_loss": 5.5481, "loss": 1.7015, "step": 977, "text_contrastive_loss": 0.9512 }, { "contrastive_loss": 0.5117, "epoch": 2.2076749435665914, "grad_norm": 14.462315559387207, "learning_rate": 6.111594145597319e-06, "lm_loss": 5.4319, "loss": 1.4326, "step": 978, "text_contrastive_loss": 0.7555 }, { "contrastive_loss": 0.5518, "epoch": 2.2099322799097068, "grad_norm": 13.699687957763672, "learning_rate": 6.104519141244631e-06, "lm_loss": 5.414, "loss": 1.5038, "step": 979, "text_contrastive_loss": 0.8212 }, { "contrastive_loss": 0.5758, "epoch": 2.2121896162528216, "grad_norm": 15.209568977355957, "learning_rate": 6.0974418111736235e-06, "lm_loss": 5.5458, "loss": 1.5145, "step": 980, "text_contrastive_loss": 0.7682 }, { "contrastive_loss": 0.5101, "epoch": 2.214446952595937, "grad_norm": 15.351068496704102, "learning_rate": 6.090362170286591e-06, "lm_loss": 5.5988, "loss": 1.4326, "step": 981, "text_contrastive_loss": 0.7253 }, { "contrastive_loss": 0.6009, "epoch": 2.216704288939052, "grad_norm": 16.76323699951172, "learning_rate": 6.0832802334907044e-06, "lm_loss": 5.5892, "loss": 1.6298, "step": 982, "text_contrastive_loss": 0.94 }, { "contrastive_loss": 0.4723, "epoch": 2.218961625282167, "grad_norm": 15.08696174621582, "learning_rate": 6.076196015697963e-06, "lm_loss": 5.5802, "loss": 1.4103, "step": 983, "text_contrastive_loss": 0.7599 }, { "contrastive_loss": 0.4897, "epoch": 2.221218961625282, "grad_norm": 15.753348350524902, "learning_rate": 6.069109531825169e-06, "lm_loss": 5.7111, "loss": 1.4806, "step": 984, "text_contrastive_loss": 0.8395 }, { "contrastive_loss": 0.4869, "epoch": 2.2234762979683973, "grad_norm": 15.647372245788574, "learning_rate": 6.0620207967939e-06, "lm_loss": 5.5092, "loss": 1.4976, "step": 985, "text_contrastive_loss": 0.9195 }, { "contrastive_loss": 0.5674, "epoch": 2.2257336343115126, "grad_norm": 16.478822708129883, "learning_rate": 6.054929825530469e-06, "lm_loss": 5.5507, "loss": 1.5417, "step": 986, "text_contrastive_loss": 0.8385 }, { "contrastive_loss": 0.4659, "epoch": 2.2279909706546275, "grad_norm": 14.923596382141113, "learning_rate": 6.047836632965901e-06, "lm_loss": 5.6204, "loss": 1.4151, "step": 987, "text_contrastive_loss": 0.7743 }, { "contrastive_loss": 0.6934, "epoch": 2.230248306997743, "grad_norm": 17.369243621826172, "learning_rate": 6.040741234035898e-06, "lm_loss": 5.6445, "loss": 1.7237, "step": 988, "text_contrastive_loss": 0.9319 }, { "contrastive_loss": 0.5848, "epoch": 2.2325056433408577, "grad_norm": 18.03710174560547, "learning_rate": 6.0336436436808054e-06, "lm_loss": 5.642, "loss": 1.5677, "step": 989, "text_contrastive_loss": 0.8374 }, { "contrastive_loss": 0.5226, "epoch": 2.234762979683973, "grad_norm": 14.763822555541992, "learning_rate": 6.026543876845586e-06, "lm_loss": 5.6278, "loss": 1.5478, "step": 990, "text_contrastive_loss": 0.9248 }, { "contrastive_loss": 0.5392, "epoch": 2.237020316027088, "grad_norm": 15.49229621887207, "learning_rate": 6.019441948479784e-06, "lm_loss": 5.5376, "loss": 1.5442, "step": 991, "text_contrastive_loss": 0.9026 }, { "contrastive_loss": 0.5638, "epoch": 2.239277652370203, "grad_norm": 15.575782775878906, "learning_rate": 6.012337873537494e-06, "lm_loss": 5.5279, "loss": 1.4383, "step": 992, "text_contrastive_loss": 0.6434 }, { "contrastive_loss": 0.5159, "epoch": 2.2415349887133185, "grad_norm": 16.054767608642578, "learning_rate": 6.005231666977331e-06, "lm_loss": 5.5313, "loss": 1.4993, "step": 993, "text_contrastive_loss": 0.8606 }, { "contrastive_loss": 0.6831, "epoch": 2.2437923250564333, "grad_norm": 18.615949630737305, "learning_rate": 5.998123343762403e-06, "lm_loss": 5.6966, "loss": 1.6825, "step": 994, "text_contrastive_loss": 0.8595 }, { "contrastive_loss": 0.4579, "epoch": 2.2460496613995486, "grad_norm": 14.217131614685059, "learning_rate": 5.9910129188602665e-06, "lm_loss": 5.6927, "loss": 1.4118, "step": 995, "text_contrastive_loss": 0.7694 }, { "contrastive_loss": 0.4881, "epoch": 2.2483069977426635, "grad_norm": 13.442610740661621, "learning_rate": 5.983900407242911e-06, "lm_loss": 5.5797, "loss": 1.4403, "step": 996, "text_contrastive_loss": 0.7884 }, { "contrastive_loss": 0.5378, "epoch": 2.250564334085779, "grad_norm": 16.705442428588867, "learning_rate": 5.976785823886713e-06, "lm_loss": 5.4976, "loss": 1.5061, "step": 997, "text_contrastive_loss": 0.8372 }, { "contrastive_loss": 0.5148, "epoch": 2.2528216704288937, "grad_norm": 14.701661109924316, "learning_rate": 5.96966918377242e-06, "lm_loss": 5.3676, "loss": 1.4534, "step": 998, "text_contrastive_loss": 0.8037 }, { "contrastive_loss": 0.5229, "epoch": 2.255079006772009, "grad_norm": 16.033830642700195, "learning_rate": 5.9625505018851e-06, "lm_loss": 5.4788, "loss": 1.4674, "step": 999, "text_contrastive_loss": 0.7932 }, { "contrastive_loss": 0.5061, "epoch": 2.2573363431151243, "grad_norm": 15.547621726989746, "learning_rate": 5.955429793214129e-06, "lm_loss": 5.5908, "loss": 1.454, "step": 1000, "text_contrastive_loss": 0.7777 }, { "contrastive_loss": 0.6188, "epoch": 2.259593679458239, "grad_norm": 18.8467960357666, "learning_rate": 5.948307072753146e-06, "lm_loss": 5.5699, "loss": 1.5722, "step": 1001, "text_contrastive_loss": 0.7927 }, { "contrastive_loss": 0.5788, "epoch": 2.2618510158013545, "grad_norm": 14.916223526000977, "learning_rate": 5.941182355500028e-06, "lm_loss": 5.6379, "loss": 1.5869, "step": 1002, "text_contrastive_loss": 0.8886 }, { "contrastive_loss": 0.4433, "epoch": 2.2641083521444694, "grad_norm": 14.932116508483887, "learning_rate": 5.934055656456855e-06, "lm_loss": 5.5532, "loss": 1.4275, "step": 1003, "text_contrastive_loss": 0.8576 }, { "contrastive_loss": 0.4633, "epoch": 2.2663656884875847, "grad_norm": 13.377215385437012, "learning_rate": 5.926926990629883e-06, "lm_loss": 5.4663, "loss": 1.3174, "step": 1004, "text_contrastive_loss": 0.6149 }, { "contrastive_loss": 0.5241, "epoch": 2.2686230248307, "grad_norm": 14.37695598602295, "learning_rate": 5.919796373029504e-06, "lm_loss": 5.5585, "loss": 1.5059, "step": 1005, "text_contrastive_loss": 0.8521 }, { "contrastive_loss": 0.4805, "epoch": 2.270880361173815, "grad_norm": 15.076510429382324, "learning_rate": 5.912663818670224e-06, "lm_loss": 5.5408, "loss": 1.4197, "step": 1006, "text_contrastive_loss": 0.7702 }, { "contrastive_loss": 0.4996, "epoch": 2.27313769751693, "grad_norm": 14.953193664550781, "learning_rate": 5.905529342570627e-06, "lm_loss": 5.5402, "loss": 1.429, "step": 1007, "text_contrastive_loss": 0.7507 }, { "contrastive_loss": 0.4851, "epoch": 2.275395033860045, "grad_norm": 15.045732498168945, "learning_rate": 5.898392959753343e-06, "lm_loss": 5.569, "loss": 1.4581, "step": 1008, "text_contrastive_loss": 0.8322 }, { "contrastive_loss": 0.6655, "epoch": 2.2776523702031604, "grad_norm": 17.517780303955078, "learning_rate": 5.8912546852450116e-06, "lm_loss": 5.5835, "loss": 1.655, "step": 1009, "text_contrastive_loss": 0.8622 }, { "contrastive_loss": 0.4815, "epoch": 2.2799097065462752, "grad_norm": 15.604001998901367, "learning_rate": 5.8841145340762665e-06, "lm_loss": 5.5648, "loss": 1.4179, "step": 1010, "text_contrastive_loss": 0.7598 }, { "contrastive_loss": 0.447, "epoch": 2.2821670428893905, "grad_norm": 14.039140701293945, "learning_rate": 5.876972521281683e-06, "lm_loss": 5.4931, "loss": 1.3804, "step": 1011, "text_contrastive_loss": 0.7682 }, { "contrastive_loss": 0.485, "epoch": 2.2844243792325054, "grad_norm": 14.94192886352539, "learning_rate": 5.869828661899761e-06, "lm_loss": 5.6206, "loss": 1.4788, "step": 1012, "text_contrastive_loss": 0.8635 }, { "contrastive_loss": 0.4348, "epoch": 2.2866817155756207, "grad_norm": 13.55521297454834, "learning_rate": 5.862682970972888e-06, "lm_loss": 5.5266, "loss": 1.35, "step": 1013, "text_contrastive_loss": 0.725 }, { "contrastive_loss": 0.572, "epoch": 2.288939051918736, "grad_norm": 16.7547664642334, "learning_rate": 5.855535463547309e-06, "lm_loss": 5.4203, "loss": 1.5671, "step": 1014, "text_contrastive_loss": 0.9061 }, { "contrastive_loss": 0.579, "epoch": 2.291196388261851, "grad_norm": 16.843368530273438, "learning_rate": 5.8483861546730915e-06, "lm_loss": 5.5052, "loss": 1.5681, "step": 1015, "text_contrastive_loss": 0.8772 }, { "contrastive_loss": 0.5231, "epoch": 2.293453724604966, "grad_norm": 16.329423904418945, "learning_rate": 5.841235059404097e-06, "lm_loss": 5.4309, "loss": 1.473, "step": 1016, "text_contrastive_loss": 0.8135 }, { "contrastive_loss": 0.4935, "epoch": 2.295711060948081, "grad_norm": 16.164878845214844, "learning_rate": 5.834082192797948e-06, "lm_loss": 5.5583, "loss": 1.4542, "step": 1017, "text_contrastive_loss": 0.8098 }, { "contrastive_loss": 0.6854, "epoch": 2.2979683972911964, "grad_norm": 16.56847381591797, "learning_rate": 5.826927569915999e-06, "lm_loss": 5.5319, "loss": 1.7242, "step": 1018, "text_contrastive_loss": 0.9713 }, { "contrastive_loss": 0.58, "epoch": 2.3002257336343117, "grad_norm": 16.325702667236328, "learning_rate": 5.819771205823303e-06, "lm_loss": 5.5061, "loss": 1.6016, "step": 1019, "text_contrastive_loss": 0.9418 }, { "contrastive_loss": 0.5142, "epoch": 2.3024830699774266, "grad_norm": 17.441755294799805, "learning_rate": 5.812613115588575e-06, "lm_loss": 5.5065, "loss": 1.4703, "step": 1020, "text_contrastive_loss": 0.8108 }, { "contrastive_loss": 0.447, "epoch": 2.304740406320542, "grad_norm": 15.045120239257812, "learning_rate": 5.805453314284168e-06, "lm_loss": 5.6466, "loss": 1.3683, "step": 1021, "text_contrastive_loss": 0.7133 }, { "contrastive_loss": 0.5044, "epoch": 2.3069977426636568, "grad_norm": 15.242061614990234, "learning_rate": 5.7982918169860395e-06, "lm_loss": 5.4769, "loss": 1.4511, "step": 1022, "text_contrastive_loss": 0.798 }, { "contrastive_loss": 0.4894, "epoch": 2.309255079006772, "grad_norm": 15.019240379333496, "learning_rate": 5.791128638773711e-06, "lm_loss": 5.4992, "loss": 1.4205, "step": 1023, "text_contrastive_loss": 0.7624 }, { "contrastive_loss": 0.4916, "epoch": 2.311512415349887, "grad_norm": 14.840372085571289, "learning_rate": 5.783963794730254e-06, "lm_loss": 5.5569, "loss": 1.3969, "step": 1024, "text_contrastive_loss": 0.6992 }, { "contrastive_loss": 0.56, "epoch": 2.3137697516930023, "grad_norm": 15.438685417175293, "learning_rate": 5.776797299942236e-06, "lm_loss": 5.6762, "loss": 1.5331, "step": 1025, "text_contrastive_loss": 0.8109 }, { "contrastive_loss": 0.5842, "epoch": 2.3160270880361176, "grad_norm": 16.042137145996094, "learning_rate": 5.7696291694997105e-06, "lm_loss": 5.5272, "loss": 1.5411, "step": 1026, "text_contrastive_loss": 0.8084 }, { "contrastive_loss": 0.6225, "epoch": 2.3182844243792324, "grad_norm": 17.81473731994629, "learning_rate": 5.762459418496169e-06, "lm_loss": 5.535, "loss": 1.6085, "step": 1027, "text_contrastive_loss": 0.8649 }, { "contrastive_loss": 0.5751, "epoch": 2.3205417607223477, "grad_norm": 16.053951263427734, "learning_rate": 5.755288062028519e-06, "lm_loss": 5.5689, "loss": 1.5817, "step": 1028, "text_contrastive_loss": 0.8994 }, { "contrastive_loss": 0.442, "epoch": 2.3227990970654626, "grad_norm": 12.914002418518066, "learning_rate": 5.748115115197045e-06, "lm_loss": 5.5298, "loss": 1.3506, "step": 1029, "text_contrastive_loss": 0.7112 }, { "contrastive_loss": 0.5847, "epoch": 2.325056433408578, "grad_norm": 15.449382781982422, "learning_rate": 5.740940593105383e-06, "lm_loss": 5.4938, "loss": 1.4823, "step": 1030, "text_contrastive_loss": 0.6964 }, { "contrastive_loss": 0.5351, "epoch": 2.327313769751693, "grad_norm": 15.321664810180664, "learning_rate": 5.733764510860482e-06, "lm_loss": 5.5159, "loss": 1.4961, "step": 1031, "text_contrastive_loss": 0.8188 }, { "contrastive_loss": 0.4763, "epoch": 2.329571106094808, "grad_norm": 15.322822570800781, "learning_rate": 5.726586883572584e-06, "lm_loss": 5.5235, "loss": 1.4653, "step": 1032, "text_contrastive_loss": 0.8732 }, { "contrastive_loss": 0.5442, "epoch": 2.3318284424379234, "grad_norm": 15.321481704711914, "learning_rate": 5.719407726355174e-06, "lm_loss": 5.4556, "loss": 1.4708, "step": 1033, "text_contrastive_loss": 0.7621 }, { "contrastive_loss": 0.5535, "epoch": 2.3340857787810383, "grad_norm": 17.706205368041992, "learning_rate": 5.712227054324968e-06, "lm_loss": 5.475, "loss": 1.5449, "step": 1034, "text_contrastive_loss": 0.8878 }, { "contrastive_loss": 0.5824, "epoch": 2.3363431151241536, "grad_norm": 15.88497543334961, "learning_rate": 5.705044882601862e-06, "lm_loss": 5.5598, "loss": 1.5435, "step": 1035, "text_contrastive_loss": 0.8102 }, { "contrastive_loss": 0.5771, "epoch": 2.3386004514672685, "grad_norm": 16.463577270507812, "learning_rate": 5.697861226308923e-06, "lm_loss": 5.6166, "loss": 1.6027, "step": 1036, "text_contrastive_loss": 0.9279 }, { "contrastive_loss": 0.6645, "epoch": 2.340857787810384, "grad_norm": 15.305360794067383, "learning_rate": 5.69067610057233e-06, "lm_loss": 5.4175, "loss": 1.6381, "step": 1037, "text_contrastive_loss": 0.8637 }, { "contrastive_loss": 0.5334, "epoch": 2.343115124153499, "grad_norm": 14.267290115356445, "learning_rate": 5.683489520521365e-06, "lm_loss": 5.6242, "loss": 1.509, "step": 1038, "text_contrastive_loss": 0.8262 }, { "contrastive_loss": 0.6476, "epoch": 2.345372460496614, "grad_norm": 16.81133460998535, "learning_rate": 5.6763015012883686e-06, "lm_loss": 5.4711, "loss": 1.6709, "step": 1039, "text_contrastive_loss": 0.9524 }, { "contrastive_loss": 0.5176, "epoch": 2.3476297968397293, "grad_norm": 13.270197868347168, "learning_rate": 5.6691120580087126e-06, "lm_loss": 5.5653, "loss": 1.3955, "step": 1040, "text_contrastive_loss": 0.6428 }, { "contrastive_loss": 0.5129, "epoch": 2.349887133182844, "grad_norm": 12.754667282104492, "learning_rate": 5.661921205820767e-06, "lm_loss": 5.6232, "loss": 1.4187, "step": 1041, "text_contrastive_loss": 0.6868 }, { "contrastive_loss": 0.5021, "epoch": 2.3521444695259595, "grad_norm": 14.27110767364502, "learning_rate": 5.654728959865872e-06, "lm_loss": 5.4533, "loss": 1.4623, "step": 1042, "text_contrastive_loss": 0.8298 }, { "contrastive_loss": 0.5154, "epoch": 2.3544018058690743, "grad_norm": 15.107547760009766, "learning_rate": 5.647535335288296e-06, "lm_loss": 5.6959, "loss": 1.5089, "step": 1043, "text_contrastive_loss": 0.8478 }, { "contrastive_loss": 0.4731, "epoch": 2.3566591422121896, "grad_norm": 13.888667106628418, "learning_rate": 5.640340347235215e-06, "lm_loss": 5.703, "loss": 1.4613, "step": 1044, "text_contrastive_loss": 0.8357 }, { "contrastive_loss": 0.6407, "epoch": 2.3589164785553045, "grad_norm": 16.244823455810547, "learning_rate": 5.6331440108566735e-06, "lm_loss": 5.7367, "loss": 1.6674, "step": 1045, "text_contrastive_loss": 0.906 }, { "contrastive_loss": 0.5418, "epoch": 2.36117381489842, "grad_norm": 16.122529983520508, "learning_rate": 5.6259463413055604e-06, "lm_loss": 5.4931, "loss": 1.5393, "step": 1046, "text_contrastive_loss": 0.8965 }, { "contrastive_loss": 0.4846, "epoch": 2.363431151241535, "grad_norm": 13.905860900878906, "learning_rate": 5.6187473537375635e-06, "lm_loss": 5.6189, "loss": 1.4072, "step": 1047, "text_contrastive_loss": 0.7214 }, { "contrastive_loss": 0.4365, "epoch": 2.36568848758465, "grad_norm": 15.10843563079834, "learning_rate": 5.611547063311152e-06, "lm_loss": 5.5181, "loss": 1.3854, "step": 1048, "text_contrastive_loss": 0.7942 }, { "contrastive_loss": 0.6531, "epoch": 2.3679458239277653, "grad_norm": 15.9534912109375, "learning_rate": 5.604345485187535e-06, "lm_loss": 5.5287, "loss": 1.6077, "step": 1049, "text_contrastive_loss": 0.8036 }, { "contrastive_loss": 0.5596, "epoch": 2.37020316027088, "grad_norm": 15.557417869567871, "learning_rate": 5.597142634530639e-06, "lm_loss": 5.4709, "loss": 1.5331, "step": 1050, "text_contrastive_loss": 0.8528 }, { "contrastive_loss": 0.4985, "epoch": 2.3724604966139955, "grad_norm": 14.158095359802246, "learning_rate": 5.589938526507059e-06, "lm_loss": 5.6086, "loss": 1.4683, "step": 1051, "text_contrastive_loss": 0.818 }, { "contrastive_loss": 0.5393, "epoch": 2.374717832957111, "grad_norm": 16.066301345825195, "learning_rate": 5.582733176286048e-06, "lm_loss": 5.5186, "loss": 1.498, "step": 1052, "text_contrastive_loss": 0.8138 }, { "contrastive_loss": 0.4833, "epoch": 2.3769751693002257, "grad_norm": 14.408507347106934, "learning_rate": 5.575526599039472e-06, "lm_loss": 5.4066, "loss": 1.4347, "step": 1053, "text_contrastive_loss": 0.8216 }, { "contrastive_loss": 0.5148, "epoch": 2.379232505643341, "grad_norm": 14.628676414489746, "learning_rate": 5.568318809941777e-06, "lm_loss": 5.578, "loss": 1.4936, "step": 1054, "text_contrastive_loss": 0.8419 }, { "contrastive_loss": 0.4618, "epoch": 2.381489841986456, "grad_norm": 13.120189666748047, "learning_rate": 5.561109824169962e-06, "lm_loss": 5.4411, "loss": 1.3932, "step": 1055, "text_contrastive_loss": 0.7746 }, { "contrastive_loss": 0.4568, "epoch": 2.383747178329571, "grad_norm": 14.947108268737793, "learning_rate": 5.553899656903552e-06, "lm_loss": 5.5084, "loss": 1.4191, "step": 1056, "text_contrastive_loss": 0.8229 }, { "contrastive_loss": 0.5233, "epoch": 2.386004514672686, "grad_norm": 14.998693466186523, "learning_rate": 5.546688323324548e-06, "lm_loss": 5.4731, "loss": 1.5457, "step": 1057, "text_contrastive_loss": 0.9501 }, { "contrastive_loss": 0.506, "epoch": 2.3882618510158014, "grad_norm": 14.649811744689941, "learning_rate": 5.53947583861742e-06, "lm_loss": 5.4863, "loss": 1.4666, "step": 1058, "text_contrastive_loss": 0.824 }, { "contrastive_loss": 0.589, "epoch": 2.3905191873589167, "grad_norm": 14.989198684692383, "learning_rate": 5.5322622179690514e-06, "lm_loss": 5.5289, "loss": 1.4968, "step": 1059, "text_contrastive_loss": 0.7098 }, { "contrastive_loss": 0.4902, "epoch": 2.3927765237020315, "grad_norm": 14.715935707092285, "learning_rate": 5.525047476568722e-06, "lm_loss": 5.471, "loss": 1.4134, "step": 1060, "text_contrastive_loss": 0.7523 }, { "contrastive_loss": 0.4415, "epoch": 2.395033860045147, "grad_norm": 12.693151473999023, "learning_rate": 5.51783162960807e-06, "lm_loss": 5.4789, "loss": 1.2823, "step": 1061, "text_contrastive_loss": 0.586 }, { "contrastive_loss": 0.531, "epoch": 2.3972911963882617, "grad_norm": 16.371997833251953, "learning_rate": 5.5106146922810664e-06, "lm_loss": 5.5396, "loss": 1.5192, "step": 1062, "text_contrastive_loss": 0.8684 }, { "contrastive_loss": 0.583, "epoch": 2.399548532731377, "grad_norm": 17.31743049621582, "learning_rate": 5.50339667978397e-06, "lm_loss": 5.5777, "loss": 1.6913, "step": 1063, "text_contrastive_loss": 1.1012 }, { "contrastive_loss": 0.695, "epoch": 2.401805869074492, "grad_norm": 18.16950798034668, "learning_rate": 5.496177607315312e-06, "lm_loss": 5.6135, "loss": 1.6956, "step": 1064, "text_contrastive_loss": 0.8784 }, { "contrastive_loss": 0.5599, "epoch": 2.404063205417607, "grad_norm": 15.148139953613281, "learning_rate": 5.488957490075846e-06, "lm_loss": 5.5422, "loss": 1.4928, "step": 1065, "text_contrastive_loss": 0.7573 }, { "contrastive_loss": 0.5054, "epoch": 2.4063205417607225, "grad_norm": 14.579808235168457, "learning_rate": 5.4817363432685355e-06, "lm_loss": 5.5686, "loss": 1.5219, "step": 1066, "text_contrastive_loss": 0.9192 }, { "contrastive_loss": 0.5734, "epoch": 2.4085778781038374, "grad_norm": 15.381420135498047, "learning_rate": 5.474514182098504e-06, "lm_loss": 5.5125, "loss": 1.537, "step": 1067, "text_contrastive_loss": 0.8247 }, { "contrastive_loss": 0.636, "epoch": 2.4108352144469527, "grad_norm": 15.787850379943848, "learning_rate": 5.4672910217730155e-06, "lm_loss": 5.5951, "loss": 1.6146, "step": 1068, "text_contrastive_loss": 0.8382 }, { "contrastive_loss": 0.4333, "epoch": 2.4130925507900676, "grad_norm": 14.761796951293945, "learning_rate": 5.4600668775014355e-06, "lm_loss": 5.6594, "loss": 1.3938, "step": 1069, "text_contrastive_loss": 0.7891 }, { "contrastive_loss": 0.4984, "epoch": 2.415349887133183, "grad_norm": 13.197561264038086, "learning_rate": 5.452841764495203e-06, "lm_loss": 5.4475, "loss": 1.4568, "step": 1070, "text_contrastive_loss": 0.8273 }, { "contrastive_loss": 0.4797, "epoch": 2.417607223476298, "grad_norm": 15.631025314331055, "learning_rate": 5.445615697967797e-06, "lm_loss": 5.3548, "loss": 1.3307, "step": 1071, "text_contrastive_loss": 0.631 }, { "contrastive_loss": 0.791, "epoch": 2.419864559819413, "grad_norm": 17.64402961730957, "learning_rate": 5.438388693134702e-06, "lm_loss": 5.5563, "loss": 1.8377, "step": 1072, "text_contrastive_loss": 0.9822 }, { "contrastive_loss": 0.5658, "epoch": 2.4221218961625284, "grad_norm": 13.911042213439941, "learning_rate": 5.431160765213379e-06, "lm_loss": 5.4367, "loss": 1.5188, "step": 1073, "text_contrastive_loss": 0.8188 }, { "contrastive_loss": 0.5071, "epoch": 2.4243792325056432, "grad_norm": 14.111321449279785, "learning_rate": 5.423931929423235e-06, "lm_loss": 5.4104, "loss": 1.4533, "step": 1074, "text_contrastive_loss": 0.8102 }, { "contrastive_loss": 0.5081, "epoch": 2.4266365688487586, "grad_norm": 13.774117469787598, "learning_rate": 5.416702200985585e-06, "lm_loss": 5.5223, "loss": 1.4333, "step": 1075, "text_contrastive_loss": 0.746 }, { "contrastive_loss": 0.6259, "epoch": 2.4288939051918734, "grad_norm": 15.925483703613281, "learning_rate": 5.409471595123628e-06, "lm_loss": 5.4057, "loss": 1.5579, "step": 1076, "text_contrastive_loss": 0.7829 }, { "contrastive_loss": 0.5815, "epoch": 2.4311512415349887, "grad_norm": 15.957097053527832, "learning_rate": 5.4022401270624036e-06, "lm_loss": 5.5097, "loss": 1.521, "step": 1077, "text_contrastive_loss": 0.7769 }, { "contrastive_loss": 0.5654, "epoch": 2.4334085778781036, "grad_norm": 14.975671768188477, "learning_rate": 5.395007812028775e-06, "lm_loss": 5.5032, "loss": 1.4999, "step": 1078, "text_contrastive_loss": 0.7684 }, { "contrastive_loss": 0.6256, "epoch": 2.435665914221219, "grad_norm": 16.601072311401367, "learning_rate": 5.387774665251385e-06, "lm_loss": 5.5002, "loss": 1.6017, "step": 1079, "text_contrastive_loss": 0.8521 }, { "contrastive_loss": 0.4895, "epoch": 2.4379232505643342, "grad_norm": 15.283729553222656, "learning_rate": 5.380540701960627e-06, "lm_loss": 5.4928, "loss": 1.4356, "step": 1080, "text_contrastive_loss": 0.7936 }, { "contrastive_loss": 0.6107, "epoch": 2.440180586907449, "grad_norm": 15.71643352508545, "learning_rate": 5.373305937388613e-06, "lm_loss": 5.492, "loss": 1.5936, "step": 1081, "text_contrastive_loss": 0.8675 }, { "contrastive_loss": 0.5536, "epoch": 2.4424379232505644, "grad_norm": 14.633138656616211, "learning_rate": 5.3660703867691475e-06, "lm_loss": 5.5287, "loss": 1.4925, "step": 1082, "text_contrastive_loss": 0.7722 }, { "contrastive_loss": 0.5391, "epoch": 2.4446952595936793, "grad_norm": 15.10629940032959, "learning_rate": 5.358834065337684e-06, "lm_loss": 5.5012, "loss": 1.5241, "step": 1083, "text_contrastive_loss": 0.8696 }, { "contrastive_loss": 0.5987, "epoch": 2.4469525959367946, "grad_norm": 16.985130310058594, "learning_rate": 5.3515969883313e-06, "lm_loss": 5.5252, "loss": 1.5473, "step": 1084, "text_contrastive_loss": 0.7921 }, { "contrastive_loss": 0.4536, "epoch": 2.44920993227991, "grad_norm": 13.985669136047363, "learning_rate": 5.344359170988668e-06, "lm_loss": 5.4269, "loss": 1.349, "step": 1085, "text_contrastive_loss": 0.7056 }, { "contrastive_loss": 0.5222, "epoch": 2.4514672686230248, "grad_norm": 14.75936222076416, "learning_rate": 5.337120628550016e-06, "lm_loss": 5.5617, "loss": 1.4309, "step": 1086, "text_contrastive_loss": 0.705 }, { "contrastive_loss": 0.4161, "epoch": 2.45372460496614, "grad_norm": 12.124979972839355, "learning_rate": 5.329881376257098e-06, "lm_loss": 5.4788, "loss": 1.2727, "step": 1087, "text_contrastive_loss": 0.6174 }, { "contrastive_loss": 0.5465, "epoch": 2.455981941309255, "grad_norm": 14.528797149658203, "learning_rate": 5.322641429353167e-06, "lm_loss": 5.6025, "loss": 1.5016, "step": 1088, "text_contrastive_loss": 0.7897 }, { "contrastive_loss": 0.5103, "epoch": 2.4582392776523703, "grad_norm": 14.936470031738281, "learning_rate": 5.315400803082934e-06, "lm_loss": 5.5148, "loss": 1.4588, "step": 1089, "text_contrastive_loss": 0.7941 }, { "contrastive_loss": 0.5379, "epoch": 2.460496613995485, "grad_norm": 15.522775650024414, "learning_rate": 5.308159512692544e-06, "lm_loss": 5.3921, "loss": 1.472, "step": 1090, "text_contrastive_loss": 0.7897 }, { "contrastive_loss": 0.6376, "epoch": 2.4627539503386005, "grad_norm": 15.824444770812988, "learning_rate": 5.300917573429536e-06, "lm_loss": 5.5943, "loss": 1.5699, "step": 1091, "text_contrastive_loss": 0.7457 }, { "contrastive_loss": 0.5642, "epoch": 2.4650112866817158, "grad_norm": 17.166841506958008, "learning_rate": 5.293675000542822e-06, "lm_loss": 5.6214, "loss": 1.5572, "step": 1092, "text_contrastive_loss": 0.8616 }, { "contrastive_loss": 0.5636, "epoch": 2.4672686230248306, "grad_norm": 16.25465202331543, "learning_rate": 5.286431809282639e-06, "lm_loss": 5.5358, "loss": 1.6832, "step": 1093, "text_contrastive_loss": 1.1321 }, { "contrastive_loss": 0.5424, "epoch": 2.469525959367946, "grad_norm": 17.31192970275879, "learning_rate": 5.279188014900537e-06, "lm_loss": 5.5631, "loss": 1.4952, "step": 1094, "text_contrastive_loss": 0.793 }, { "contrastive_loss": 0.5037, "epoch": 2.471783295711061, "grad_norm": 15.22265911102295, "learning_rate": 5.2719436326493255e-06, "lm_loss": 5.5699, "loss": 1.5099, "step": 1095, "text_contrastive_loss": 0.8984 }, { "contrastive_loss": 0.5294, "epoch": 2.474040632054176, "grad_norm": 14.717487335205078, "learning_rate": 5.26469867778306e-06, "lm_loss": 5.5627, "loss": 1.478, "step": 1096, "text_contrastive_loss": 0.7848 }, { "contrastive_loss": 0.5232, "epoch": 2.476297968397291, "grad_norm": 14.459714889526367, "learning_rate": 5.257453165556996e-06, "lm_loss": 5.4178, "loss": 1.4919, "step": 1097, "text_contrastive_loss": 0.8539 }, { "contrastive_loss": 0.6076, "epoch": 2.4785553047404063, "grad_norm": 16.94345474243164, "learning_rate": 5.2502071112275675e-06, "lm_loss": 5.4922, "loss": 1.5671, "step": 1098, "text_contrastive_loss": 0.8205 }, { "contrastive_loss": 0.5167, "epoch": 2.4808126410835216, "grad_norm": 16.165454864501953, "learning_rate": 5.242960530052344e-06, "lm_loss": 5.5237, "loss": 1.5062, "step": 1099, "text_contrastive_loss": 0.8741 }, { "contrastive_loss": 0.5703, "epoch": 2.4830699774266365, "grad_norm": 15.800847053527832, "learning_rate": 5.235713437290012e-06, "lm_loss": 5.5983, "loss": 1.6058, "step": 1100, "text_contrastive_loss": 0.9515 }, { "contrastive_loss": 0.5203, "epoch": 2.485327313769752, "grad_norm": 15.280631065368652, "learning_rate": 5.228465848200327e-06, "lm_loss": 5.5601, "loss": 1.4564, "step": 1101, "text_contrastive_loss": 0.7602 }, { "contrastive_loss": 0.5569, "epoch": 2.4875846501128667, "grad_norm": 16.5999755859375, "learning_rate": 5.221217778044096e-06, "lm_loss": 5.5037, "loss": 1.5132, "step": 1102, "text_contrastive_loss": 0.8119 }, { "contrastive_loss": 0.5037, "epoch": 2.489841986455982, "grad_norm": 16.614097595214844, "learning_rate": 5.2139692420831325e-06, "lm_loss": 5.5185, "loss": 1.423, "step": 1103, "text_contrastive_loss": 0.7348 }, { "contrastive_loss": 0.4788, "epoch": 2.4920993227990973, "grad_norm": 14.173137664794922, "learning_rate": 5.206720255580241e-06, "lm_loss": 5.5314, "loss": 1.3925, "step": 1104, "text_contrastive_loss": 0.7211 }, { "contrastive_loss": 0.5616, "epoch": 2.494356659142212, "grad_norm": 15.582025527954102, "learning_rate": 5.199470833799164e-06, "lm_loss": 5.5397, "loss": 1.5229, "step": 1105, "text_contrastive_loss": 0.8147 }, { "contrastive_loss": 0.4588, "epoch": 2.4966139954853275, "grad_norm": 14.748689651489258, "learning_rate": 5.192220992004569e-06, "lm_loss": 5.4949, "loss": 1.3965, "step": 1106, "text_contrastive_loss": 0.7763 }, { "contrastive_loss": 0.6206, "epoch": 2.4988713318284423, "grad_norm": 17.029296875, "learning_rate": 5.184970745461998e-06, "lm_loss": 5.5767, "loss": 1.6424, "step": 1107, "text_contrastive_loss": 0.9282 }, { "contrastive_loss": 0.6042, "epoch": 2.5011286681715577, "grad_norm": 15.634419441223145, "learning_rate": 5.177720109437857e-06, "lm_loss": 5.5833, "loss": 1.6048, "step": 1108, "text_contrastive_loss": 0.8846 }, { "contrastive_loss": 0.4785, "epoch": 2.5033860045146725, "grad_norm": 14.658867835998535, "learning_rate": 5.170469099199363e-06, "lm_loss": 5.5818, "loss": 1.4511, "step": 1109, "text_contrastive_loss": 0.8288 }, { "contrastive_loss": 0.5242, "epoch": 2.505643340857788, "grad_norm": 14.744585037231445, "learning_rate": 5.1632177300145255e-06, "lm_loss": 5.4845, "loss": 1.503, "step": 1110, "text_contrastive_loss": 0.8606 }, { "contrastive_loss": 0.6345, "epoch": 2.5079006772009027, "grad_norm": 16.3204288482666, "learning_rate": 5.155966017152108e-06, "lm_loss": 5.5486, "loss": 1.673, "step": 1111, "text_contrastive_loss": 0.9673 }, { "contrastive_loss": 0.6088, "epoch": 2.510158013544018, "grad_norm": 14.689356803894043, "learning_rate": 5.148713975881598e-06, "lm_loss": 5.4947, "loss": 1.6335, "step": 1112, "text_contrastive_loss": 0.9503 }, { "contrastive_loss": 0.4956, "epoch": 2.5124153498871333, "grad_norm": 13.767909049987793, "learning_rate": 5.141461621473175e-06, "lm_loss": 5.6347, "loss": 1.4861, "step": 1113, "text_contrastive_loss": 0.854 }, { "contrastive_loss": 0.6261, "epoch": 2.514672686230248, "grad_norm": 16.780237197875977, "learning_rate": 5.1342089691976794e-06, "lm_loss": 5.4666, "loss": 1.6463, "step": 1114, "text_contrastive_loss": 0.9472 }, { "contrastive_loss": 0.5675, "epoch": 2.5169300225733635, "grad_norm": 16.320783615112305, "learning_rate": 5.126956034326573e-06, "lm_loss": 5.4596, "loss": 1.5824, "step": 1115, "text_contrastive_loss": 0.9377 }, { "contrastive_loss": 0.4942, "epoch": 2.5191873589164784, "grad_norm": 15.041061401367188, "learning_rate": 5.119702832131922e-06, "lm_loss": 5.6353, "loss": 1.4093, "step": 1116, "text_contrastive_loss": 0.703 }, { "contrastive_loss": 0.5249, "epoch": 2.5214446952595937, "grad_norm": 13.427505493164062, "learning_rate": 5.112449377886345e-06, "lm_loss": 5.5718, "loss": 1.5324, "step": 1117, "text_contrastive_loss": 0.9006 }, { "contrastive_loss": 0.5309, "epoch": 2.523702031602709, "grad_norm": 17.36693572998047, "learning_rate": 5.105195686863e-06, "lm_loss": 5.5065, "loss": 1.5013, "step": 1118, "text_contrastive_loss": 0.8395 }, { "contrastive_loss": 0.5017, "epoch": 2.525959367945824, "grad_norm": 15.0901517868042, "learning_rate": 5.097941774335538e-06, "lm_loss": 5.5845, "loss": 1.5407, "step": 1119, "text_contrastive_loss": 0.961 }, { "contrastive_loss": 0.5806, "epoch": 2.528216704288939, "grad_norm": 15.08397388458252, "learning_rate": 5.090687655578078e-06, "lm_loss": 5.4375, "loss": 1.6074, "step": 1120, "text_contrastive_loss": 0.9661 }, { "contrastive_loss": 0.6207, "epoch": 2.530474040632054, "grad_norm": 15.705390930175781, "learning_rate": 5.083433345865175e-06, "lm_loss": 5.4951, "loss": 1.5651, "step": 1121, "text_contrastive_loss": 0.7897 }, { "contrastive_loss": 0.6248, "epoch": 2.5327313769751694, "grad_norm": 18.175708770751953, "learning_rate": 5.076178860471787e-06, "lm_loss": 5.5092, "loss": 1.6338, "step": 1122, "text_contrastive_loss": 0.9161 }, { "contrastive_loss": 0.5244, "epoch": 2.5349887133182847, "grad_norm": 15.123210906982422, "learning_rate": 5.068924214673234e-06, "lm_loss": 5.4212, "loss": 1.4828, "step": 1123, "text_contrastive_loss": 0.8326 }, { "contrastive_loss": 0.4389, "epoch": 2.5372460496613995, "grad_norm": 14.30945873260498, "learning_rate": 5.061669423745185e-06, "lm_loss": 5.5389, "loss": 1.3666, "step": 1124, "text_contrastive_loss": 0.7477 }, { "contrastive_loss": 0.4769, "epoch": 2.5395033860045144, "grad_norm": 16.882328033447266, "learning_rate": 5.054414502963605e-06, "lm_loss": 5.6542, "loss": 1.4617, "step": 1125, "text_contrastive_loss": 0.8388 }, { "contrastive_loss": 0.4262, "epoch": 2.5417607223476297, "grad_norm": 12.748918533325195, "learning_rate": 5.0471594676047385e-06, "lm_loss": 5.588, "loss": 1.3344, "step": 1126, "text_contrastive_loss": 0.6987 }, { "contrastive_loss": 0.4708, "epoch": 2.544018058690745, "grad_norm": 14.473797798156738, "learning_rate": 5.039904332945069e-06, "lm_loss": 5.5085, "loss": 1.3946, "step": 1127, "text_contrastive_loss": 0.7459 }, { "contrastive_loss": 0.4823, "epoch": 2.54627539503386, "grad_norm": 15.05002498626709, "learning_rate": 5.03264911426129e-06, "lm_loss": 5.4364, "loss": 1.4151, "step": 1128, "text_contrastive_loss": 0.7782 }, { "contrastive_loss": 0.5926, "epoch": 2.5485327313769752, "grad_norm": 15.238771438598633, "learning_rate": 5.025393826830267e-06, "lm_loss": 5.5455, "loss": 1.6105, "step": 1129, "text_contrastive_loss": 0.9267 }, { "contrastive_loss": 0.5255, "epoch": 2.55079006772009, "grad_norm": 15.349923133850098, "learning_rate": 5.0181384859290215e-06, "lm_loss": 5.5044, "loss": 1.4948, "step": 1130, "text_contrastive_loss": 0.8377 }, { "contrastive_loss": 0.5483, "epoch": 2.5530474040632054, "grad_norm": 15.675585746765137, "learning_rate": 5.010883106834676e-06, "lm_loss": 5.5256, "loss": 1.5209, "step": 1131, "text_contrastive_loss": 0.84 }, { "contrastive_loss": 0.5889, "epoch": 2.5553047404063207, "grad_norm": 16.66605567932129, "learning_rate": 5.003627704824438e-06, "lm_loss": 5.4589, "loss": 1.5148, "step": 1132, "text_contrastive_loss": 0.7601 }, { "contrastive_loss": 0.4575, "epoch": 2.5575620767494356, "grad_norm": 13.640859603881836, "learning_rate": 4.996372295175563e-06, "lm_loss": 5.536, "loss": 1.3606, "step": 1133, "text_contrastive_loss": 0.6991 }, { "contrastive_loss": 0.645, "epoch": 2.559819413092551, "grad_norm": 16.621692657470703, "learning_rate": 4.989116893165325e-06, "lm_loss": 5.5277, "loss": 1.6136, "step": 1134, "text_contrastive_loss": 0.8318 }, { "contrastive_loss": 0.6095, "epoch": 2.5620767494356658, "grad_norm": 15.903656005859375, "learning_rate": 4.981861514070979e-06, "lm_loss": 5.4128, "loss": 1.6205, "step": 1135, "text_contrastive_loss": 0.9394 }, { "contrastive_loss": 0.5439, "epoch": 2.564334085778781, "grad_norm": 15.493552207946777, "learning_rate": 4.974606173169733e-06, "lm_loss": 5.6034, "loss": 1.5336, "step": 1136, "text_contrastive_loss": 0.8587 }, { "contrastive_loss": 0.491, "epoch": 2.5665914221218964, "grad_norm": 13.51240348815918, "learning_rate": 4.9673508857387115e-06, "lm_loss": 5.3974, "loss": 1.3996, "step": 1137, "text_contrastive_loss": 0.7378 }, { "contrastive_loss": 0.535, "epoch": 2.5688487584650113, "grad_norm": 14.415314674377441, "learning_rate": 4.9600956670549324e-06, "lm_loss": 5.5457, "loss": 1.5186, "step": 1138, "text_contrastive_loss": 0.858 }, { "contrastive_loss": 0.6606, "epoch": 2.5711060948081266, "grad_norm": 15.831226348876953, "learning_rate": 4.952840532395262e-06, "lm_loss": 5.5284, "loss": 1.689, "step": 1139, "text_contrastive_loss": 0.9512 }, { "contrastive_loss": 0.5523, "epoch": 2.5733634311512414, "grad_norm": 15.637651443481445, "learning_rate": 4.945585497036396e-06, "lm_loss": 5.5387, "loss": 1.489, "step": 1140, "text_contrastive_loss": 0.7656 }, { "contrastive_loss": 0.6667, "epoch": 2.5756207674943568, "grad_norm": 18.41811180114746, "learning_rate": 4.938330576254817e-06, "lm_loss": 5.4911, "loss": 1.677, "step": 1141, "text_contrastive_loss": 0.9223 }, { "contrastive_loss": 0.5854, "epoch": 2.5778781038374716, "grad_norm": 16.636863708496094, "learning_rate": 4.931075785326767e-06, "lm_loss": 5.4529, "loss": 1.5712, "step": 1142, "text_contrastive_loss": 0.8811 }, { "contrastive_loss": 0.587, "epoch": 2.580135440180587, "grad_norm": 16.620792388916016, "learning_rate": 4.9238211395282156e-06, "lm_loss": 5.4365, "loss": 1.5025, "step": 1143, "text_contrastive_loss": 0.7438 }, { "contrastive_loss": 0.4897, "epoch": 2.582392776523702, "grad_norm": 15.157502174377441, "learning_rate": 4.9165666541348265e-06, "lm_loss": 5.422, "loss": 1.4327, "step": 1144, "text_contrastive_loss": 0.8017 }, { "contrastive_loss": 0.4966, "epoch": 2.584650112866817, "grad_norm": 14.812685012817383, "learning_rate": 4.909312344421923e-06, "lm_loss": 5.4986, "loss": 1.4148, "step": 1145, "text_contrastive_loss": 0.7367 }, { "contrastive_loss": 0.4962, "epoch": 2.5869074492099324, "grad_norm": 14.94445514678955, "learning_rate": 4.902058225664465e-06, "lm_loss": 5.4792, "loss": 1.4704, "step": 1146, "text_contrastive_loss": 0.8526 }, { "contrastive_loss": 0.4477, "epoch": 2.5891647855530473, "grad_norm": 14.922908782958984, "learning_rate": 4.8948043131370025e-06, "lm_loss": 5.4961, "loss": 1.3859, "step": 1147, "text_contrastive_loss": 0.777 }, { "contrastive_loss": 0.5108, "epoch": 2.5914221218961626, "grad_norm": 15.819107055664062, "learning_rate": 4.887550622113657e-06, "lm_loss": 5.4651, "loss": 1.4422, "step": 1148, "text_contrastive_loss": 0.7698 }, { "contrastive_loss": 0.4764, "epoch": 2.5936794582392775, "grad_norm": 16.320865631103516, "learning_rate": 4.88029716786808e-06, "lm_loss": 5.5991, "loss": 1.448, "step": 1149, "text_contrastive_loss": 0.8234 }, { "contrastive_loss": 0.5614, "epoch": 2.595936794582393, "grad_norm": 17.54962921142578, "learning_rate": 4.873043965673427e-06, "lm_loss": 5.5239, "loss": 1.5026, "step": 1150, "text_contrastive_loss": 0.7778 }, { "contrastive_loss": 0.5654, "epoch": 2.598194130925508, "grad_norm": 19.690631866455078, "learning_rate": 4.8657910308023205e-06, "lm_loss": 5.5625, "loss": 1.5904, "step": 1151, "text_contrastive_loss": 0.9375 }, { "contrastive_loss": 0.6657, "epoch": 2.600451467268623, "grad_norm": 18.43953514099121, "learning_rate": 4.858538378526825e-06, "lm_loss": 5.6181, "loss": 1.6538, "step": 1152, "text_contrastive_loss": 0.8525 }, { "contrastive_loss": 0.6193, "epoch": 2.6027088036117383, "grad_norm": 15.578585624694824, "learning_rate": 4.851286024118402e-06, "lm_loss": 5.3932, "loss": 1.5595, "step": 1153, "text_contrastive_loss": 0.8018 }, { "contrastive_loss": 0.5564, "epoch": 2.604966139954853, "grad_norm": 17.017620086669922, "learning_rate": 4.844033982847893e-06, "lm_loss": 5.4934, "loss": 1.5226, "step": 1154, "text_contrastive_loss": 0.8338 }, { "contrastive_loss": 0.5105, "epoch": 2.6072234762979685, "grad_norm": 16.31838035583496, "learning_rate": 4.836782269985475e-06, "lm_loss": 5.4835, "loss": 1.4426, "step": 1155, "text_contrastive_loss": 0.7675 }, { "contrastive_loss": 0.4958, "epoch": 2.609480812641084, "grad_norm": 14.419219017028809, "learning_rate": 4.829530900800638e-06, "lm_loss": 5.4756, "loss": 1.4355, "step": 1156, "text_contrastive_loss": 0.7841 }, { "contrastive_loss": 0.6406, "epoch": 2.6117381489841986, "grad_norm": 15.977461814880371, "learning_rate": 4.8222798905621445e-06, "lm_loss": 5.5373, "loss": 1.6012, "step": 1157, "text_contrastive_loss": 0.8137 }, { "contrastive_loss": 0.6028, "epoch": 2.6139954853273135, "grad_norm": 18.114654541015625, "learning_rate": 4.815029254538003e-06, "lm_loss": 5.5876, "loss": 1.5115, "step": 1158, "text_contrastive_loss": 0.7 }, { "contrastive_loss": 0.5203, "epoch": 2.616252821670429, "grad_norm": 13.368484497070312, "learning_rate": 4.807779007995434e-06, "lm_loss": 5.4564, "loss": 1.4583, "step": 1159, "text_contrastive_loss": 0.7849 }, { "contrastive_loss": 0.4534, "epoch": 2.618510158013544, "grad_norm": 14.692235946655273, "learning_rate": 4.800529166200837e-06, "lm_loss": 5.6025, "loss": 1.3771, "step": 1160, "text_contrastive_loss": 0.7269 }, { "contrastive_loss": 0.5249, "epoch": 2.620767494356659, "grad_norm": 15.356192588806152, "learning_rate": 4.7932797444197604e-06, "lm_loss": 5.602, "loss": 1.4717, "step": 1161, "text_contrastive_loss": 0.7733 }, { "contrastive_loss": 0.517, "epoch": 2.6230248306997743, "grad_norm": 13.836146354675293, "learning_rate": 4.786030757916868e-06, "lm_loss": 5.5499, "loss": 1.4582, "step": 1162, "text_contrastive_loss": 0.7724 }, { "contrastive_loss": 0.6164, "epoch": 2.625282167042889, "grad_norm": 18.457595825195312, "learning_rate": 4.778782221955907e-06, "lm_loss": 5.5664, "loss": 1.5773, "step": 1163, "text_contrastive_loss": 0.8086 }, { "contrastive_loss": 0.4743, "epoch": 2.6275395033860045, "grad_norm": 14.833595275878906, "learning_rate": 4.771534151799676e-06, "lm_loss": 5.5332, "loss": 1.4507, "step": 1164, "text_contrastive_loss": 0.8461 }, { "contrastive_loss": 0.4967, "epoch": 2.62979683972912, "grad_norm": 14.186467170715332, "learning_rate": 4.76428656270999e-06, "lm_loss": 5.6641, "loss": 1.4269, "step": 1165, "text_contrastive_loss": 0.7277 }, { "contrastive_loss": 0.5967, "epoch": 2.6320541760722347, "grad_norm": 16.209814071655273, "learning_rate": 4.757039469947658e-06, "lm_loss": 5.4752, "loss": 1.5592, "step": 1166, "text_contrastive_loss": 0.8299 }, { "contrastive_loss": 0.4835, "epoch": 2.63431151241535, "grad_norm": 15.594867706298828, "learning_rate": 4.7497928887724325e-06, "lm_loss": 5.55, "loss": 1.4447, "step": 1167, "text_contrastive_loss": 0.8123 }, { "contrastive_loss": 0.5326, "epoch": 2.636568848758465, "grad_norm": 15.795209884643555, "learning_rate": 4.7425468344430035e-06, "lm_loss": 5.5219, "loss": 1.4649, "step": 1168, "text_contrastive_loss": 0.7603 }, { "contrastive_loss": 0.4957, "epoch": 2.63882618510158, "grad_norm": 16.403053283691406, "learning_rate": 4.73530132221694e-06, "lm_loss": 5.4785, "loss": 1.4122, "step": 1169, "text_contrastive_loss": 0.7372 }, { "contrastive_loss": 0.518, "epoch": 2.6410835214446955, "grad_norm": 16.638166427612305, "learning_rate": 4.7280563673506745e-06, "lm_loss": 5.6407, "loss": 1.5187, "step": 1170, "text_contrastive_loss": 0.8733 }, { "contrastive_loss": 0.5433, "epoch": 2.6433408577878104, "grad_norm": 16.512386322021484, "learning_rate": 4.720811985099464e-06, "lm_loss": 5.4574, "loss": 1.543, "step": 1171, "text_contrastive_loss": 0.908 }, { "contrastive_loss": 0.465, "epoch": 2.6455981941309257, "grad_norm": 14.082274436950684, "learning_rate": 4.713568190717362e-06, "lm_loss": 5.4258, "loss": 1.4103, "step": 1172, "text_contrastive_loss": 0.8054 }, { "contrastive_loss": 0.5123, "epoch": 2.6478555304740405, "grad_norm": 14.941728591918945, "learning_rate": 4.70632499945718e-06, "lm_loss": 5.5014, "loss": 1.4248, "step": 1173, "text_contrastive_loss": 0.7248 }, { "contrastive_loss": 0.6703, "epoch": 2.650112866817156, "grad_norm": 17.750526428222656, "learning_rate": 4.699082426570465e-06, "lm_loss": 5.5859, "loss": 1.6718, "step": 1174, "text_contrastive_loss": 0.8858 }, { "contrastive_loss": 0.6189, "epoch": 2.6523702031602707, "grad_norm": 18.926570892333984, "learning_rate": 4.6918404873074574e-06, "lm_loss": 5.4535, "loss": 1.6068, "step": 1175, "text_contrastive_loss": 0.8851 }, { "contrastive_loss": 0.4899, "epoch": 2.654627539503386, "grad_norm": 14.999008178710938, "learning_rate": 4.684599196917067e-06, "lm_loss": 5.7192, "loss": 1.4694, "step": 1176, "text_contrastive_loss": 0.8153 }, { "contrastive_loss": 0.5594, "epoch": 2.656884875846501, "grad_norm": 14.709131240844727, "learning_rate": 4.677358570646834e-06, "lm_loss": 5.4593, "loss": 1.5028, "step": 1177, "text_contrastive_loss": 0.795 }, { "contrastive_loss": 0.4572, "epoch": 2.659142212189616, "grad_norm": 13.443650245666504, "learning_rate": 4.670118623742904e-06, "lm_loss": 5.6383, "loss": 1.3786, "step": 1178, "text_contrastive_loss": 0.7151 }, { "contrastive_loss": 0.4666, "epoch": 2.6613995485327315, "grad_norm": 15.100601196289062, "learning_rate": 4.662879371449987e-06, "lm_loss": 5.4287, "loss": 1.4094, "step": 1179, "text_contrastive_loss": 0.8 }, { "contrastive_loss": 0.546, "epoch": 2.6636568848758464, "grad_norm": 15.13619327545166, "learning_rate": 4.655640829011335e-06, "lm_loss": 5.5045, "loss": 1.463, "step": 1180, "text_contrastive_loss": 0.7331 }, { "contrastive_loss": 0.509, "epoch": 2.6659142212189617, "grad_norm": 14.496602058410645, "learning_rate": 4.6484030116687014e-06, "lm_loss": 5.4848, "loss": 1.4208, "step": 1181, "text_contrastive_loss": 0.7267 }, { "contrastive_loss": 0.518, "epoch": 2.6681715575620766, "grad_norm": 15.54823112487793, "learning_rate": 4.64116593466232e-06, "lm_loss": 5.3756, "loss": 1.5412, "step": 1182, "text_contrastive_loss": 0.9713 }, { "contrastive_loss": 0.4753, "epoch": 2.670428893905192, "grad_norm": 15.684138298034668, "learning_rate": 4.633929613230855e-06, "lm_loss": 5.4609, "loss": 1.4673, "step": 1183, "text_contrastive_loss": 0.8917 }, { "contrastive_loss": 0.5154, "epoch": 2.672686230248307, "grad_norm": 14.663494110107422, "learning_rate": 4.626694062611387e-06, "lm_loss": 5.447, "loss": 1.4817, "step": 1184, "text_contrastive_loss": 0.8431 }, { "contrastive_loss": 0.5792, "epoch": 2.674943566591422, "grad_norm": 17.431798934936523, "learning_rate": 4.619459298039373e-06, "lm_loss": 5.4315, "loss": 1.5457, "step": 1185, "text_contrastive_loss": 0.8467 }, { "contrastive_loss": 0.4407, "epoch": 2.6772009029345374, "grad_norm": 13.369098663330078, "learning_rate": 4.612225334748616e-06, "lm_loss": 5.6408, "loss": 1.3813, "step": 1186, "text_contrastive_loss": 0.753 }, { "contrastive_loss": 0.4911, "epoch": 2.6794582392776523, "grad_norm": 15.295650482177734, "learning_rate": 4.6049921879712254e-06, "lm_loss": 5.4293, "loss": 1.3968, "step": 1187, "text_contrastive_loss": 0.7257 }, { "contrastive_loss": 0.5897, "epoch": 2.6817155756207676, "grad_norm": 14.655820846557617, "learning_rate": 4.597759872937597e-06, "lm_loss": 5.5653, "loss": 1.4946, "step": 1188, "text_contrastive_loss": 0.6966 }, { "contrastive_loss": 0.6091, "epoch": 2.683972911963883, "grad_norm": 15.665904998779297, "learning_rate": 4.590528404876374e-06, "lm_loss": 5.5102, "loss": 1.605, "step": 1189, "text_contrastive_loss": 0.8899 }, { "contrastive_loss": 0.573, "epoch": 2.6862302483069977, "grad_norm": 15.560206413269043, "learning_rate": 4.5832977990144165e-06, "lm_loss": 5.5248, "loss": 1.573, "step": 1190, "text_contrastive_loss": 0.895 }, { "contrastive_loss": 0.5217, "epoch": 2.6884875846501126, "grad_norm": 14.409103393554688, "learning_rate": 4.5760680705767665e-06, "lm_loss": 5.4393, "loss": 1.4393, "step": 1191, "text_contrastive_loss": 0.7472 }, { "contrastive_loss": 0.5037, "epoch": 2.690744920993228, "grad_norm": 15.002391815185547, "learning_rate": 4.5688392347866226e-06, "lm_loss": 5.5225, "loss": 1.4061, "step": 1192, "text_contrastive_loss": 0.7002 }, { "contrastive_loss": 0.6246, "epoch": 2.6930022573363432, "grad_norm": 15.702973365783691, "learning_rate": 4.561611306865299e-06, "lm_loss": 5.4947, "loss": 1.5696, "step": 1193, "text_contrastive_loss": 0.7912 }, { "contrastive_loss": 0.4687, "epoch": 2.695259593679458, "grad_norm": 14.511507987976074, "learning_rate": 4.554384302032204e-06, "lm_loss": 5.4935, "loss": 1.3736, "step": 1194, "text_contrastive_loss": 0.7112 }, { "contrastive_loss": 0.5338, "epoch": 2.6975169300225734, "grad_norm": 14.777057647705078, "learning_rate": 4.547158235504797e-06, "lm_loss": 5.6297, "loss": 1.5445, "step": 1195, "text_contrastive_loss": 0.8953 }, { "contrastive_loss": 0.567, "epoch": 2.6997742663656883, "grad_norm": 14.681438446044922, "learning_rate": 4.539933122498566e-06, "lm_loss": 5.4765, "loss": 1.5354, "step": 1196, "text_contrastive_loss": 0.8415 }, { "contrastive_loss": 0.4323, "epoch": 2.7020316027088036, "grad_norm": 14.590365409851074, "learning_rate": 4.532708978226987e-06, "lm_loss": 5.4267, "loss": 1.323, "step": 1197, "text_contrastive_loss": 0.696 }, { "contrastive_loss": 0.4428, "epoch": 2.704288939051919, "grad_norm": 14.539432525634766, "learning_rate": 4.525485817901499e-06, "lm_loss": 5.4905, "loss": 1.3467, "step": 1198, "text_contrastive_loss": 0.7098 }, { "contrastive_loss": 0.5636, "epoch": 2.706546275395034, "grad_norm": 16.865188598632812, "learning_rate": 4.518263656731468e-06, "lm_loss": 5.4869, "loss": 1.5635, "step": 1199, "text_contrastive_loss": 0.9024 }, { "contrastive_loss": 0.6087, "epoch": 2.708803611738149, "grad_norm": 17.277889251708984, "learning_rate": 4.511042509924157e-06, "lm_loss": 5.4855, "loss": 1.5486, "step": 1200, "text_contrastive_loss": 0.7826 }, { "contrastive_loss": 0.5515, "epoch": 2.711060948081264, "grad_norm": 14.8447847366333, "learning_rate": 4.5038223926846905e-06, "lm_loss": 5.4666, "loss": 1.5006, "step": 1201, "text_contrastive_loss": 0.8048 }, { "contrastive_loss": 0.578, "epoch": 2.7133182844243793, "grad_norm": 16.393775939941406, "learning_rate": 4.49660332021603e-06, "lm_loss": 5.3864, "loss": 1.5414, "step": 1202, "text_contrastive_loss": 0.8497 }, { "contrastive_loss": 0.4884, "epoch": 2.7155756207674946, "grad_norm": 16.465248107910156, "learning_rate": 4.489385307718934e-06, "lm_loss": 5.523, "loss": 1.4537, "step": 1203, "text_contrastive_loss": 0.826 }, { "contrastive_loss": 0.494, "epoch": 2.7178329571106095, "grad_norm": 15.032328605651855, "learning_rate": 4.482168370391931e-06, "lm_loss": 5.4548, "loss": 1.4819, "step": 1204, "text_contrastive_loss": 0.8847 }, { "contrastive_loss": 0.5458, "epoch": 2.7200902934537243, "grad_norm": 15.656888008117676, "learning_rate": 4.47495252343128e-06, "lm_loss": 5.4434, "loss": 1.4992, "step": 1205, "text_contrastive_loss": 0.8182 }, { "contrastive_loss": 0.4925, "epoch": 2.7223476297968396, "grad_norm": 16.2528076171875, "learning_rate": 4.467737782030951e-06, "lm_loss": 5.5406, "loss": 1.4561, "step": 1206, "text_contrastive_loss": 0.819 }, { "contrastive_loss": 0.4839, "epoch": 2.724604966139955, "grad_norm": 14.774943351745605, "learning_rate": 4.460524161382582e-06, "lm_loss": 5.5624, "loss": 1.4267, "step": 1207, "text_contrastive_loss": 0.7732 }, { "contrastive_loss": 0.5676, "epoch": 2.72686230248307, "grad_norm": 17.021915435791016, "learning_rate": 4.453311676675453e-06, "lm_loss": 5.4838, "loss": 1.5747, "step": 1208, "text_contrastive_loss": 0.9174 }, { "contrastive_loss": 0.5606, "epoch": 2.729119638826185, "grad_norm": 15.182758331298828, "learning_rate": 4.44610034309645e-06, "lm_loss": 5.4755, "loss": 1.523, "step": 1209, "text_contrastive_loss": 0.8298 }, { "contrastive_loss": 0.4408, "epoch": 2.7313769751693, "grad_norm": 13.728645324707031, "learning_rate": 4.438890175830039e-06, "lm_loss": 5.5334, "loss": 1.3396, "step": 1210, "text_contrastive_loss": 0.6909 }, { "contrastive_loss": 0.545, "epoch": 2.7336343115124153, "grad_norm": 15.889242172241211, "learning_rate": 4.431681190058224e-06, "lm_loss": 5.4992, "loss": 1.5851, "step": 1211, "text_contrastive_loss": 0.9804 }, { "contrastive_loss": 0.4922, "epoch": 2.7358916478555306, "grad_norm": 14.040260314941406, "learning_rate": 4.42447340096053e-06, "lm_loss": 5.6347, "loss": 1.3842, "step": 1212, "text_contrastive_loss": 0.6572 }, { "contrastive_loss": 0.5602, "epoch": 2.7381489841986455, "grad_norm": 14.200431823730469, "learning_rate": 4.417266823713953e-06, "lm_loss": 5.3882, "loss": 1.4308, "step": 1213, "text_contrastive_loss": 0.6635 }, { "contrastive_loss": 0.464, "epoch": 2.740406320541761, "grad_norm": 13.672407150268555, "learning_rate": 4.410061473492943e-06, "lm_loss": 5.4964, "loss": 1.3614, "step": 1214, "text_contrastive_loss": 0.6955 }, { "contrastive_loss": 0.5088, "epoch": 2.7426636568848757, "grad_norm": 16.66963005065918, "learning_rate": 4.402857365469364e-06, "lm_loss": 5.5433, "loss": 1.4142, "step": 1215, "text_contrastive_loss": 0.7022 }, { "contrastive_loss": 0.4742, "epoch": 2.744920993227991, "grad_norm": 14.881044387817383, "learning_rate": 4.3956545148124665e-06, "lm_loss": 5.4994, "loss": 1.4533, "step": 1216, "text_contrastive_loss": 0.8582 }, { "contrastive_loss": 0.4697, "epoch": 2.7471783295711063, "grad_norm": 13.808037757873535, "learning_rate": 4.38845293668885e-06, "lm_loss": 5.7113, "loss": 1.5018, "step": 1217, "text_contrastive_loss": 0.9219 }, { "contrastive_loss": 0.5519, "epoch": 2.749435665914221, "grad_norm": 16.27696990966797, "learning_rate": 4.381252646262437e-06, "lm_loss": 5.6259, "loss": 1.4901, "step": 1218, "text_contrastive_loss": 0.7512 }, { "contrastive_loss": 0.4959, "epoch": 2.7516930022573365, "grad_norm": 14.359052658081055, "learning_rate": 4.37405365869444e-06, "lm_loss": 5.5, "loss": 1.4236, "step": 1219, "text_contrastive_loss": 0.7554 }, { "contrastive_loss": 0.4808, "epoch": 2.7539503386004514, "grad_norm": 13.013676643371582, "learning_rate": 4.366855989143326e-06, "lm_loss": 5.4649, "loss": 1.3498, "step": 1220, "text_contrastive_loss": 0.6449 }, { "contrastive_loss": 0.4465, "epoch": 2.7562076749435667, "grad_norm": 15.298121452331543, "learning_rate": 4.359659652764786e-06, "lm_loss": 5.408, "loss": 1.4054, "step": 1221, "text_contrastive_loss": 0.8361 }, { "contrastive_loss": 0.5785, "epoch": 2.758465011286682, "grad_norm": 14.701443672180176, "learning_rate": 4.352464664711706e-06, "lm_loss": 5.4825, "loss": 1.5586, "step": 1222, "text_contrastive_loss": 0.8637 }, { "contrastive_loss": 0.5882, "epoch": 2.760722347629797, "grad_norm": 16.48676872253418, "learning_rate": 4.345271040134129e-06, "lm_loss": 5.4937, "loss": 1.6228, "step": 1223, "text_contrastive_loss": 0.9705 }, { "contrastive_loss": 0.5671, "epoch": 2.7629796839729117, "grad_norm": 15.255523681640625, "learning_rate": 4.338078794179234e-06, "lm_loss": 5.505, "loss": 1.5007, "step": 1224, "text_contrastive_loss": 0.7661 }, { "contrastive_loss": 0.4796, "epoch": 2.765237020316027, "grad_norm": 15.49962043762207, "learning_rate": 4.330887941991288e-06, "lm_loss": 5.5146, "loss": 1.4869, "step": 1225, "text_contrastive_loss": 0.9117 }, { "contrastive_loss": 0.4473, "epoch": 2.7674943566591423, "grad_norm": 12.847675323486328, "learning_rate": 4.323698498711634e-06, "lm_loss": 5.3823, "loss": 1.3547, "step": 1226, "text_contrastive_loss": 0.7384 }, { "contrastive_loss": 0.4975, "epoch": 2.769751693002257, "grad_norm": 14.924741744995117, "learning_rate": 4.316510479478636e-06, "lm_loss": 5.4048, "loss": 1.4483, "step": 1227, "text_contrastive_loss": 0.8206 }, { "contrastive_loss": 0.582, "epoch": 2.7720090293453725, "grad_norm": 17.28886604309082, "learning_rate": 4.309323899427671e-06, "lm_loss": 5.5183, "loss": 1.5399, "step": 1228, "text_contrastive_loss": 0.8122 }, { "contrastive_loss": 0.5158, "epoch": 2.7742663656884874, "grad_norm": 16.77458381652832, "learning_rate": 4.302138773691079e-06, "lm_loss": 5.4194, "loss": 1.4964, "step": 1229, "text_contrastive_loss": 0.8773 }, { "contrastive_loss": 0.4536, "epoch": 2.7765237020316027, "grad_norm": 15.139562606811523, "learning_rate": 4.294955117398139e-06, "lm_loss": 5.492, "loss": 1.3421, "step": 1230, "text_contrastive_loss": 0.6786 }, { "contrastive_loss": 0.4736, "epoch": 2.778781038374718, "grad_norm": 14.490601539611816, "learning_rate": 4.287772945675035e-06, "lm_loss": 5.4395, "loss": 1.4308, "step": 1231, "text_contrastive_loss": 0.8265 }, { "contrastive_loss": 0.4915, "epoch": 2.781038374717833, "grad_norm": 14.887679100036621, "learning_rate": 4.280592273644829e-06, "lm_loss": 5.4762, "loss": 1.397, "step": 1232, "text_contrastive_loss": 0.7159 }, { "contrastive_loss": 0.477, "epoch": 2.783295711060948, "grad_norm": 15.118358612060547, "learning_rate": 4.273413116427419e-06, "lm_loss": 5.517, "loss": 1.3906, "step": 1233, "text_contrastive_loss": 0.7237 }, { "contrastive_loss": 0.5144, "epoch": 2.785553047404063, "grad_norm": 16.686277389526367, "learning_rate": 4.26623548913952e-06, "lm_loss": 5.4588, "loss": 1.4282, "step": 1234, "text_contrastive_loss": 0.7359 }, { "contrastive_loss": 0.5352, "epoch": 2.7878103837471784, "grad_norm": 14.889708518981934, "learning_rate": 4.259059406894619e-06, "lm_loss": 5.438, "loss": 1.431, "step": 1235, "text_contrastive_loss": 0.704 }, { "contrastive_loss": 0.5859, "epoch": 2.7900677200902937, "grad_norm": 16.397249221801758, "learning_rate": 4.251884884802956e-06, "lm_loss": 5.4049, "loss": 1.5383, "step": 1236, "text_contrastive_loss": 0.8239 }, { "contrastive_loss": 0.5136, "epoch": 2.7923250564334086, "grad_norm": 16.485980987548828, "learning_rate": 4.2447119379714805e-06, "lm_loss": 5.538, "loss": 1.4517, "step": 1237, "text_contrastive_loss": 0.7685 }, { "contrastive_loss": 0.4696, "epoch": 2.7945823927765234, "grad_norm": 13.96359634399414, "learning_rate": 4.237540581503831e-06, "lm_loss": 5.4367, "loss": 1.3865, "step": 1238, "text_contrastive_loss": 0.7464 }, { "contrastive_loss": 0.4392, "epoch": 2.7968397291196387, "grad_norm": 14.440736770629883, "learning_rate": 4.23037083050029e-06, "lm_loss": 5.5737, "loss": 1.4151, "step": 1239, "text_contrastive_loss": 0.8371 }, { "contrastive_loss": 0.5125, "epoch": 2.799097065462754, "grad_norm": 17.516603469848633, "learning_rate": 4.223202700057765e-06, "lm_loss": 5.5025, "loss": 1.4648, "step": 1240, "text_contrastive_loss": 0.8039 }, { "contrastive_loss": 0.4722, "epoch": 2.801354401805869, "grad_norm": 14.528120040893555, "learning_rate": 4.216036205269748e-06, "lm_loss": 5.3938, "loss": 1.3782, "step": 1241, "text_contrastive_loss": 0.7332 }, { "contrastive_loss": 0.4854, "epoch": 2.8036117381489842, "grad_norm": 13.95931625366211, "learning_rate": 4.20887136122629e-06, "lm_loss": 5.5154, "loss": 1.4556, "step": 1242, "text_contrastive_loss": 0.8372 }, { "contrastive_loss": 0.6281, "epoch": 2.805869074492099, "grad_norm": 17.318078994750977, "learning_rate": 4.201708183013963e-06, "lm_loss": 5.4792, "loss": 1.6898, "step": 1243, "text_contrastive_loss": 1.0275 }, { "contrastive_loss": 0.5044, "epoch": 2.8081264108352144, "grad_norm": 16.64516258239746, "learning_rate": 4.1945466857158336e-06, "lm_loss": 5.4547, "loss": 1.4273, "step": 1244, "text_contrastive_loss": 0.7547 }, { "contrastive_loss": 0.4817, "epoch": 2.8103837471783297, "grad_norm": 17.341358184814453, "learning_rate": 4.187386884411426e-06, "lm_loss": 5.5751, "loss": 1.4236, "step": 1245, "text_contrastive_loss": 0.7687 }, { "contrastive_loss": 0.6545, "epoch": 2.8126410835214446, "grad_norm": 18.0773983001709, "learning_rate": 4.1802287941767e-06, "lm_loss": 5.5201, "loss": 1.6484, "step": 1246, "text_contrastive_loss": 0.8837 }, { "contrastive_loss": 0.5701, "epoch": 2.81489841986456, "grad_norm": 15.543575286865234, "learning_rate": 4.173072430084002e-06, "lm_loss": 5.5253, "loss": 1.588, "step": 1247, "text_contrastive_loss": 0.9308 }, { "contrastive_loss": 0.4257, "epoch": 2.8171557562076748, "grad_norm": 14.513049125671387, "learning_rate": 4.165917807202055e-06, "lm_loss": 5.54, "loss": 1.3121, "step": 1248, "text_contrastive_loss": 0.6648 }, { "contrastive_loss": 0.4788, "epoch": 2.81941309255079, "grad_norm": 14.176787376403809, "learning_rate": 4.1587649405959065e-06, "lm_loss": 5.465, "loss": 1.4523, "step": 1249, "text_contrastive_loss": 0.854 }, { "contrastive_loss": 0.5616, "epoch": 2.8216704288939054, "grad_norm": 15.48112964630127, "learning_rate": 4.151613845326912e-06, "lm_loss": 5.4052, "loss": 1.514, "step": 1250, "text_contrastive_loss": 0.8237 }, { "contrastive_loss": 0.5131, "epoch": 2.8239277652370203, "grad_norm": 16.84682273864746, "learning_rate": 4.144464536452693e-06, "lm_loss": 5.4927, "loss": 1.3859, "step": 1251, "text_contrastive_loss": 0.6471 }, { "contrastive_loss": 0.4262, "epoch": 2.8261851015801356, "grad_norm": 12.922840118408203, "learning_rate": 4.137317029027111e-06, "lm_loss": 5.5065, "loss": 1.3289, "step": 1252, "text_contrastive_loss": 0.704 }, { "contrastive_loss": 0.4614, "epoch": 2.8284424379232505, "grad_norm": 14.069493293762207, "learning_rate": 4.1301713381002394e-06, "lm_loss": 5.515, "loss": 1.4152, "step": 1253, "text_contrastive_loss": 0.8045 }, { "contrastive_loss": 0.5484, "epoch": 2.8306997742663658, "grad_norm": 14.741721153259277, "learning_rate": 4.123027478718318e-06, "lm_loss": 5.5015, "loss": 1.457, "step": 1254, "text_contrastive_loss": 0.7169 }, { "contrastive_loss": 0.5105, "epoch": 2.832957110609481, "grad_norm": 15.50547981262207, "learning_rate": 4.115885465923734e-06, "lm_loss": 5.5024, "loss": 1.4671, "step": 1255, "text_contrastive_loss": 0.8127 }, { "contrastive_loss": 0.5651, "epoch": 2.835214446952596, "grad_norm": 15.123098373413086, "learning_rate": 4.108745314754989e-06, "lm_loss": 5.4771, "loss": 1.4908, "step": 1256, "text_contrastive_loss": 0.7559 }, { "contrastive_loss": 0.5935, "epoch": 2.837471783295711, "grad_norm": 16.420448303222656, "learning_rate": 4.101607040246659e-06, "lm_loss": 5.4353, "loss": 1.5466, "step": 1257, "text_contrastive_loss": 0.8192 }, { "contrastive_loss": 0.5077, "epoch": 2.839729119638826, "grad_norm": 16.60091781616211, "learning_rate": 4.094470657429374e-06, "lm_loss": 5.444, "loss": 1.4865, "step": 1258, "text_contrastive_loss": 0.8688 }, { "contrastive_loss": 0.4556, "epoch": 2.8419864559819414, "grad_norm": 14.131499290466309, "learning_rate": 4.087336181329777e-06, "lm_loss": 5.5058, "loss": 1.4254, "step": 1259, "text_contrastive_loss": 0.8385 }, { "contrastive_loss": 0.5435, "epoch": 2.8442437923250563, "grad_norm": 16.15544319152832, "learning_rate": 4.080203626970498e-06, "lm_loss": 5.527, "loss": 1.5237, "step": 1260, "text_contrastive_loss": 0.8549 }, { "contrastive_loss": 0.5164, "epoch": 2.8465011286681716, "grad_norm": 14.349729537963867, "learning_rate": 4.0730730093701185e-06, "lm_loss": 5.425, "loss": 1.4032, "step": 1261, "text_contrastive_loss": 0.6887 }, { "contrastive_loss": 0.5041, "epoch": 2.8487584650112865, "grad_norm": 15.340802192687988, "learning_rate": 4.065944343543146e-06, "lm_loss": 5.4794, "loss": 1.4466, "step": 1262, "text_contrastive_loss": 0.7892 }, { "contrastive_loss": 0.5959, "epoch": 2.851015801354402, "grad_norm": 16.059484481811523, "learning_rate": 4.058817644499973e-06, "lm_loss": 5.5768, "loss": 1.5973, "step": 1263, "text_contrastive_loss": 0.8875 }, { "contrastive_loss": 0.6366, "epoch": 2.853273137697517, "grad_norm": 15.674849510192871, "learning_rate": 4.051692927246857e-06, "lm_loss": 5.4997, "loss": 1.5755, "step": 1264, "text_contrastive_loss": 0.7779 }, { "contrastive_loss": 0.4139, "epoch": 2.855530474040632, "grad_norm": 16.712678909301758, "learning_rate": 4.044570206785874e-06, "lm_loss": 5.4635, "loss": 1.327, "step": 1265, "text_contrastive_loss": 0.7334 }, { "contrastive_loss": 0.4657, "epoch": 2.8577878103837473, "grad_norm": 14.141221046447754, "learning_rate": 4.037449498114903e-06, "lm_loss": 5.4901, "loss": 1.4713, "step": 1266, "text_contrastive_loss": 0.9131 }, { "contrastive_loss": 0.5255, "epoch": 2.860045146726862, "grad_norm": 15.25301742553711, "learning_rate": 4.0303308162275835e-06, "lm_loss": 5.5251, "loss": 1.468, "step": 1267, "text_contrastive_loss": 0.7801 }, { "contrastive_loss": 0.4374, "epoch": 2.8623024830699775, "grad_norm": 13.475282669067383, "learning_rate": 4.0232141761132894e-06, "lm_loss": 5.4753, "loss": 1.3752, "step": 1268, "text_contrastive_loss": 0.7805 }, { "contrastive_loss": 0.5645, "epoch": 2.864559819413093, "grad_norm": 16.055419921875, "learning_rate": 4.016099592757091e-06, "lm_loss": 5.4632, "loss": 1.5468, "step": 1269, "text_contrastive_loss": 0.872 }, { "contrastive_loss": 0.5056, "epoch": 2.8668171557562077, "grad_norm": 15.780978202819824, "learning_rate": 4.008987081139734e-06, "lm_loss": 5.5773, "loss": 1.4674, "step": 1270, "text_contrastive_loss": 0.8082 }, { "contrastive_loss": 0.5171, "epoch": 2.8690744920993225, "grad_norm": 14.984731674194336, "learning_rate": 4.0018766562375984e-06, "lm_loss": 5.389, "loss": 1.499, "step": 1271, "text_contrastive_loss": 0.8861 }, { "contrastive_loss": 0.4955, "epoch": 2.871331828442438, "grad_norm": 14.706584930419922, "learning_rate": 3.994768333022669e-06, "lm_loss": 5.4877, "loss": 1.4122, "step": 1272, "text_contrastive_loss": 0.7357 }, { "contrastive_loss": 0.6436, "epoch": 2.873589164785553, "grad_norm": 15.665273666381836, "learning_rate": 3.987662126462507e-06, "lm_loss": 5.483, "loss": 1.6234, "step": 1273, "text_contrastive_loss": 0.863 }, { "contrastive_loss": 0.5189, "epoch": 2.875846501128668, "grad_norm": 16.631532669067383, "learning_rate": 3.980558051520218e-06, "lm_loss": 5.5155, "loss": 1.4515, "step": 1274, "text_contrastive_loss": 0.7621 }, { "contrastive_loss": 0.5533, "epoch": 2.8781038374717833, "grad_norm": 14.17483901977539, "learning_rate": 3.973456123154415e-06, "lm_loss": 5.5582, "loss": 1.5751, "step": 1275, "text_contrastive_loss": 0.932 }, { "contrastive_loss": 0.4405, "epoch": 2.880361173814898, "grad_norm": 14.921122550964355, "learning_rate": 3.966356356319196e-06, "lm_loss": 5.4335, "loss": 1.3677, "step": 1276, "text_contrastive_loss": 0.7678 }, { "contrastive_loss": 0.4333, "epoch": 2.8826185101580135, "grad_norm": 14.177828788757324, "learning_rate": 3.959258765964104e-06, "lm_loss": 5.4802, "loss": 1.3804, "step": 1277, "text_contrastive_loss": 0.7981 }, { "contrastive_loss": 0.5632, "epoch": 2.884875846501129, "grad_norm": 16.90080451965332, "learning_rate": 3.9521633670341005e-06, "lm_loss": 5.524, "loss": 1.5014, "step": 1278, "text_contrastive_loss": 0.7715 }, { "contrastive_loss": 0.538, "epoch": 2.8871331828442437, "grad_norm": 16.282695770263672, "learning_rate": 3.9450701744695325e-06, "lm_loss": 5.5656, "loss": 1.5157, "step": 1279, "text_contrastive_loss": 0.8424 }, { "contrastive_loss": 0.4526, "epoch": 2.889390519187359, "grad_norm": 15.23658275604248, "learning_rate": 3.937979203206103e-06, "lm_loss": 5.5136, "loss": 1.37, "step": 1280, "text_contrastive_loss": 0.732 }, { "contrastive_loss": 0.4487, "epoch": 2.891647855530474, "grad_norm": 12.806868553161621, "learning_rate": 3.930890468174833e-06, "lm_loss": 5.4335, "loss": 1.4022, "step": 1281, "text_contrastive_loss": 0.8204 }, { "contrastive_loss": 0.551, "epoch": 2.893905191873589, "grad_norm": 13.964062690734863, "learning_rate": 3.92380398430204e-06, "lm_loss": 5.4116, "loss": 1.5054, "step": 1282, "text_contrastive_loss": 0.8266 }, { "contrastive_loss": 0.5569, "epoch": 2.8961625282167045, "grad_norm": 14.256570816040039, "learning_rate": 3.916719766509297e-06, "lm_loss": 5.4027, "loss": 1.4482, "step": 1283, "text_contrastive_loss": 0.7021 }, { "contrastive_loss": 0.5465, "epoch": 2.8984198645598194, "grad_norm": 16.259355545043945, "learning_rate": 3.9096378297134115e-06, "lm_loss": 5.4163, "loss": 1.5062, "step": 1284, "text_contrastive_loss": 0.8361 }, { "contrastive_loss": 0.5112, "epoch": 2.9006772009029347, "grad_norm": 15.341547966003418, "learning_rate": 3.90255818882638e-06, "lm_loss": 5.512, "loss": 1.4347, "step": 1285, "text_contrastive_loss": 0.7445 }, { "contrastive_loss": 0.5085, "epoch": 2.9029345372460496, "grad_norm": 16.757909774780273, "learning_rate": 3.89548085875537e-06, "lm_loss": 5.3939, "loss": 1.4695, "step": 1286, "text_contrastive_loss": 0.8432 }, { "contrastive_loss": 0.5826, "epoch": 2.905191873589165, "grad_norm": 14.489827156066895, "learning_rate": 3.888405854402684e-06, "lm_loss": 5.5054, "loss": 1.6021, "step": 1287, "text_contrastive_loss": 0.9379 }, { "contrastive_loss": 0.3929, "epoch": 2.90744920993228, "grad_norm": 12.609889030456543, "learning_rate": 3.881333190665723e-06, "lm_loss": 5.4827, "loss": 1.2927, "step": 1288, "text_contrastive_loss": 0.703 }, { "contrastive_loss": 0.5341, "epoch": 2.909706546275395, "grad_norm": 15.732161521911621, "learning_rate": 3.8742628824369624e-06, "lm_loss": 5.4473, "loss": 1.4551, "step": 1289, "text_contrastive_loss": 0.7525 }, { "contrastive_loss": 0.442, "epoch": 2.91196388261851, "grad_norm": 14.732170104980469, "learning_rate": 3.86719494460392e-06, "lm_loss": 5.5936, "loss": 1.4205, "step": 1290, "text_contrastive_loss": 0.8383 }, { "contrastive_loss": 0.5042, "epoch": 2.9142212189616252, "grad_norm": 15.174171447753906, "learning_rate": 3.8601293920491165e-06, "lm_loss": 5.6159, "loss": 1.4833, "step": 1291, "text_contrastive_loss": 0.835 }, { "contrastive_loss": 0.6062, "epoch": 2.9164785553047405, "grad_norm": 16.383495330810547, "learning_rate": 3.853066239650055e-06, "lm_loss": 5.5655, "loss": 1.6561, "step": 1292, "text_contrastive_loss": 0.9867 }, { "contrastive_loss": 0.5467, "epoch": 2.9187358916478554, "grad_norm": 17.01769256591797, "learning_rate": 3.846005502279182e-06, "lm_loss": 5.3907, "loss": 1.4632, "step": 1293, "text_contrastive_loss": 0.7548 }, { "contrastive_loss": 0.5771, "epoch": 2.9209932279909707, "grad_norm": 16.033763885498047, "learning_rate": 3.83894719480386e-06, "lm_loss": 5.4395, "loss": 1.5986, "step": 1294, "text_contrastive_loss": 0.9552 }, { "contrastive_loss": 0.4185, "epoch": 2.9232505643340856, "grad_norm": 13.943612098693848, "learning_rate": 3.8318913320863355e-06, "lm_loss": 5.4561, "loss": 1.3335, "step": 1295, "text_contrastive_loss": 0.7388 }, { "contrastive_loss": 0.4371, "epoch": 2.925507900677201, "grad_norm": 13.424967765808105, "learning_rate": 3.8248379289837065e-06, "lm_loss": 5.3721, "loss": 1.3949, "step": 1296, "text_contrastive_loss": 0.841 }, { "contrastive_loss": 0.5854, "epoch": 2.927765237020316, "grad_norm": 16.367528915405273, "learning_rate": 3.81778700034789e-06, "lm_loss": 5.4614, "loss": 1.542, "step": 1297, "text_contrastive_loss": 0.821 }, { "contrastive_loss": 0.4734, "epoch": 2.930022573363431, "grad_norm": 14.303869247436523, "learning_rate": 3.810738561025599e-06, "lm_loss": 5.4918, "loss": 1.3774, "step": 1298, "text_contrastive_loss": 0.7096 }, { "contrastive_loss": 0.557, "epoch": 2.9322799097065464, "grad_norm": 17.35906982421875, "learning_rate": 3.803692625858295e-06, "lm_loss": 5.4766, "loss": 1.5341, "step": 1299, "text_contrastive_loss": 0.8588 }, { "contrastive_loss": 0.5556, "epoch": 2.9345372460496613, "grad_norm": 17.419885635375977, "learning_rate": 3.7966492096821773e-06, "lm_loss": 5.558, "loss": 1.5786, "step": 1300, "text_contrastive_loss": 0.9343 }, { "contrastive_loss": 0.4632, "epoch": 2.9367945823927766, "grad_norm": 14.226881980895996, "learning_rate": 3.7896083273281324e-06, "lm_loss": 5.4989, "loss": 1.3574, "step": 1301, "text_contrastive_loss": 0.6886 }, { "contrastive_loss": 0.5653, "epoch": 2.939051918735892, "grad_norm": 16.705991744995117, "learning_rate": 3.7825699936217183e-06, "lm_loss": 5.434, "loss": 1.5657, "step": 1302, "text_contrastive_loss": 0.9139 }, { "contrastive_loss": 0.5484, "epoch": 2.9413092550790068, "grad_norm": 16.482421875, "learning_rate": 3.7755342233831188e-06, "lm_loss": 5.4384, "loss": 1.5302, "step": 1303, "text_contrastive_loss": 0.876 }, { "contrastive_loss": 0.4876, "epoch": 2.9435665914221216, "grad_norm": 14.822606086730957, "learning_rate": 3.7685010314271287e-06, "lm_loss": 5.5107, "loss": 1.5152, "step": 1304, "text_contrastive_loss": 0.953 }, { "contrastive_loss": 0.5419, "epoch": 2.945823927765237, "grad_norm": 14.894735336303711, "learning_rate": 3.761470432563109e-06, "lm_loss": 5.6487, "loss": 1.4286, "step": 1305, "text_contrastive_loss": 0.6437 }, { "contrastive_loss": 0.5134, "epoch": 2.9480812641083523, "grad_norm": 15.99804401397705, "learning_rate": 3.75444244159496e-06, "lm_loss": 5.461, "loss": 1.4507, "step": 1306, "text_contrastive_loss": 0.7824 }, { "contrastive_loss": 0.6006, "epoch": 2.950338600451467, "grad_norm": 15.931785583496094, "learning_rate": 3.747417073321092e-06, "lm_loss": 5.6079, "loss": 1.5664, "step": 1307, "text_contrastive_loss": 0.8101 }, { "contrastive_loss": 0.5514, "epoch": 2.9525959367945824, "grad_norm": 14.610923767089844, "learning_rate": 3.740394342534394e-06, "lm_loss": 5.4427, "loss": 1.524, "step": 1308, "text_contrastive_loss": 0.8566 }, { "contrastive_loss": 0.5241, "epoch": 2.9548532731376973, "grad_norm": 14.238372802734375, "learning_rate": 3.7333742640221994e-06, "lm_loss": 5.6008, "loss": 1.5176, "step": 1309, "text_contrastive_loss": 0.8669 }, { "contrastive_loss": 0.6472, "epoch": 2.9571106094808126, "grad_norm": 16.7310791015625, "learning_rate": 3.7263568525662574e-06, "lm_loss": 5.5729, "loss": 1.6776, "step": 1310, "text_contrastive_loss": 0.9462 }, { "contrastive_loss": 0.5512, "epoch": 2.959367945823928, "grad_norm": 15.256760597229004, "learning_rate": 3.7193421229427017e-06, "lm_loss": 5.418, "loss": 1.4937, "step": 1311, "text_contrastive_loss": 0.8014 }, { "contrastive_loss": 0.5147, "epoch": 2.961625282167043, "grad_norm": 15.781255722045898, "learning_rate": 3.7123300899220193e-06, "lm_loss": 5.4881, "loss": 1.3988, "step": 1312, "text_contrastive_loss": 0.6707 }, { "contrastive_loss": 0.4753, "epoch": 2.963882618510158, "grad_norm": 14.34984016418457, "learning_rate": 3.7053207682690184e-06, "lm_loss": 5.5296, "loss": 1.3822, "step": 1313, "text_contrastive_loss": 0.7078 }, { "contrastive_loss": 0.5374, "epoch": 2.966139954853273, "grad_norm": 15.308257102966309, "learning_rate": 3.698314172742799e-06, "lm_loss": 5.3984, "loss": 1.5562, "step": 1314, "text_contrastive_loss": 0.958 }, { "contrastive_loss": 0.6667, "epoch": 2.9683972911963883, "grad_norm": 18.594757080078125, "learning_rate": 3.691310318096719e-06, "lm_loss": 5.5117, "loss": 1.669, "step": 1315, "text_contrastive_loss": 0.9024 }, { "contrastive_loss": 0.4934, "epoch": 2.9706546275395036, "grad_norm": 14.322915077209473, "learning_rate": 3.684309219078368e-06, "lm_loss": 5.5156, "loss": 1.3692, "step": 1316, "text_contrastive_loss": 0.6484 }, { "contrastive_loss": 0.4379, "epoch": 2.9729119638826185, "grad_norm": 14.292099952697754, "learning_rate": 3.6773108904295294e-06, "lm_loss": 5.4977, "loss": 1.3835, "step": 1317, "text_contrastive_loss": 0.7918 }, { "contrastive_loss": 0.6038, "epoch": 2.975169300225734, "grad_norm": 15.286895751953125, "learning_rate": 3.6703153468861585e-06, "lm_loss": 5.4807, "loss": 1.5857, "step": 1318, "text_contrastive_loss": 0.8677 }, { "contrastive_loss": 0.5631, "epoch": 2.9774266365688487, "grad_norm": 15.211936950683594, "learning_rate": 3.663322603178339e-06, "lm_loss": 5.5773, "loss": 1.593, "step": 1319, "text_contrastive_loss": 0.9444 }, { "contrastive_loss": 0.5717, "epoch": 2.979683972911964, "grad_norm": 16.08132553100586, "learning_rate": 3.6563326740302664e-06, "lm_loss": 5.4526, "loss": 1.5835, "step": 1320, "text_contrastive_loss": 0.9329 }, { "contrastive_loss": 0.5526, "epoch": 2.9819413092550793, "grad_norm": 14.952582359313965, "learning_rate": 3.6493455741602035e-06, "lm_loss": 5.5379, "loss": 1.4936, "step": 1321, "text_contrastive_loss": 0.7743 }, { "contrastive_loss": 0.4175, "epoch": 2.984198645598194, "grad_norm": 15.59471321105957, "learning_rate": 3.642361318280461e-06, "lm_loss": 5.4184, "loss": 1.2694, "step": 1322, "text_contrastive_loss": 0.6201 }, { "contrastive_loss": 0.4906, "epoch": 2.986455981941309, "grad_norm": 14.077731132507324, "learning_rate": 3.635379921097359e-06, "lm_loss": 5.5053, "loss": 1.4442, "step": 1323, "text_contrastive_loss": 0.806 }, { "contrastive_loss": 0.536, "epoch": 2.9887133182844243, "grad_norm": 15.875018119812012, "learning_rate": 3.6284013973111962e-06, "lm_loss": 5.5958, "loss": 1.4832, "step": 1324, "text_contrastive_loss": 0.7754 }, { "contrastive_loss": 0.5619, "epoch": 2.9909706546275396, "grad_norm": 16.182844161987305, "learning_rate": 3.621425761616224e-06, "lm_loss": 5.4129, "loss": 1.579, "step": 1325, "text_contrastive_loss": 0.9516 }, { "contrastive_loss": 0.5516, "epoch": 2.9932279909706545, "grad_norm": 16.243816375732422, "learning_rate": 3.614453028700613e-06, "lm_loss": 5.4592, "loss": 1.4853, "step": 1326, "text_contrastive_loss": 0.7756 }, { "contrastive_loss": 0.6289, "epoch": 2.99548532731377, "grad_norm": 17.56565284729004, "learning_rate": 3.6074832132464165e-06, "lm_loss": 5.5505, "loss": 1.5472, "step": 1327, "text_contrastive_loss": 0.7264 }, { "contrastive_loss": 0.5773, "epoch": 2.9977426636568847, "grad_norm": 18.263599395751953, "learning_rate": 3.600516329929551e-06, "lm_loss": 5.678, "loss": 1.5534, "step": 1328, "text_contrastive_loss": 0.8166 }, { "contrastive_loss": 0.3188, "epoch": 3.0, "grad_norm": 14.68419361114502, "learning_rate": 3.5935523934197537e-06, "lm_loss": 5.4675, "loss": 1.0959, "step": 1329, "text_contrastive_loss": 0.4607 }, { "contrastive_loss": 0.5175, "epoch": 3.0022573363431153, "grad_norm": 14.676491737365723, "learning_rate": 3.5865914183805606e-06, "lm_loss": 5.485, "loss": 1.5562, "step": 1330, "text_contrastive_loss": 0.9802 }, { "contrastive_loss": 0.4993, "epoch": 3.00451467268623, "grad_norm": 13.769871711730957, "learning_rate": 3.5796334194692704e-06, "lm_loss": 5.5265, "loss": 1.4622, "step": 1331, "text_contrastive_loss": 0.8205 }, { "contrastive_loss": 0.4971, "epoch": 3.0067720090293455, "grad_norm": 13.985860824584961, "learning_rate": 3.572678411336916e-06, "lm_loss": 5.4778, "loss": 1.3986, "step": 1332, "text_contrastive_loss": 0.7073 }, { "contrastive_loss": 0.5364, "epoch": 3.0090293453724604, "grad_norm": 14.060493469238281, "learning_rate": 3.5657264086282317e-06, "lm_loss": 5.4879, "loss": 1.4643, "step": 1333, "text_contrastive_loss": 0.7582 }, { "contrastive_loss": 0.4358, "epoch": 3.0112866817155757, "grad_norm": 14.356574058532715, "learning_rate": 3.5587774259816234e-06, "lm_loss": 5.5635, "loss": 1.3742, "step": 1334, "text_contrastive_loss": 0.7641 }, { "contrastive_loss": 0.5886, "epoch": 3.0135440180586905, "grad_norm": 13.934671401977539, "learning_rate": 3.5518314780291384e-06, "lm_loss": 5.5427, "loss": 1.5663, "step": 1335, "text_contrastive_loss": 0.8469 }, { "contrastive_loss": 0.4827, "epoch": 3.015801354401806, "grad_norm": 15.489354133605957, "learning_rate": 3.544888579396435e-06, "lm_loss": 5.4781, "loss": 1.5782, "step": 1336, "text_contrastive_loss": 1.0954 }, { "contrastive_loss": 0.4895, "epoch": 3.018058690744921, "grad_norm": 14.8477201461792, "learning_rate": 3.5379487447027483e-06, "lm_loss": 5.4953, "loss": 1.3889, "step": 1337, "text_contrastive_loss": 0.6998 }, { "contrastive_loss": 0.39, "epoch": 3.020316027088036, "grad_norm": 12.811365127563477, "learning_rate": 3.5310119885608625e-06, "lm_loss": 5.5246, "loss": 1.25, "step": 1338, "text_contrastive_loss": 0.615 }, { "contrastive_loss": 0.4704, "epoch": 3.0225733634311513, "grad_norm": 15.107719421386719, "learning_rate": 3.524078325577084e-06, "lm_loss": 5.5308, "loss": 1.3804, "step": 1339, "text_contrastive_loss": 0.7139 }, { "contrastive_loss": 0.5516, "epoch": 3.024830699774266, "grad_norm": 15.045534133911133, "learning_rate": 3.517147770351199e-06, "lm_loss": 5.4304, "loss": 1.495, "step": 1340, "text_contrastive_loss": 0.8007 }, { "contrastive_loss": 0.5238, "epoch": 3.0270880361173815, "grad_norm": 14.877632141113281, "learning_rate": 3.5102203374764555e-06, "lm_loss": 5.4377, "loss": 1.4971, "step": 1341, "text_contrastive_loss": 0.859 }, { "contrastive_loss": 0.5307, "epoch": 3.0293453724604964, "grad_norm": 14.242051124572754, "learning_rate": 3.503296041539522e-06, "lm_loss": 5.6175, "loss": 1.471, "step": 1342, "text_contrastive_loss": 0.7571 }, { "contrastive_loss": 0.4857, "epoch": 3.0316027088036117, "grad_norm": 14.8358736038208, "learning_rate": 3.496374897120467e-06, "lm_loss": 5.4324, "loss": 1.4134, "step": 1343, "text_contrastive_loss": 0.769 }, { "contrastive_loss": 0.4495, "epoch": 3.033860045146727, "grad_norm": 12.73051929473877, "learning_rate": 3.4894569187927204e-06, "lm_loss": 5.4313, "loss": 1.2726, "step": 1344, "text_contrastive_loss": 0.56 }, { "contrastive_loss": 0.4155, "epoch": 3.036117381489842, "grad_norm": 13.076519012451172, "learning_rate": 3.4825421211230437e-06, "lm_loss": 5.4435, "loss": 1.3203, "step": 1345, "text_contrastive_loss": 0.7209 }, { "contrastive_loss": 0.4479, "epoch": 3.038374717832957, "grad_norm": 13.648487091064453, "learning_rate": 3.4756305186715046e-06, "lm_loss": 5.4931, "loss": 1.361, "step": 1346, "text_contrastive_loss": 0.7275 }, { "contrastive_loss": 0.5394, "epoch": 3.040632054176072, "grad_norm": 14.72055721282959, "learning_rate": 3.4687221259914394e-06, "lm_loss": 5.3795, "loss": 1.4644, "step": 1347, "text_contrastive_loss": 0.7739 }, { "contrastive_loss": 0.4843, "epoch": 3.0428893905191874, "grad_norm": 15.072990417480469, "learning_rate": 3.461816957629429e-06, "lm_loss": 5.4639, "loss": 1.4175, "step": 1348, "text_contrastive_loss": 0.7737 }, { "contrastive_loss": 0.4437, "epoch": 3.0451467268623027, "grad_norm": 14.582855224609375, "learning_rate": 3.4549150281252635e-06, "lm_loss": 5.4765, "loss": 1.3774, "step": 1349, "text_contrastive_loss": 0.7721 }, { "contrastive_loss": 0.5707, "epoch": 3.0474040632054176, "grad_norm": 16.179119110107422, "learning_rate": 3.448016352011914e-06, "lm_loss": 5.5304, "loss": 1.5403, "step": 1350, "text_contrastive_loss": 0.833 }, { "contrastive_loss": 0.475, "epoch": 3.049661399548533, "grad_norm": 15.216938018798828, "learning_rate": 3.441120943815497e-06, "lm_loss": 5.4244, "loss": 1.4228, "step": 1351, "text_contrastive_loss": 0.8107 }, { "contrastive_loss": 0.5152, "epoch": 3.0519187358916477, "grad_norm": 16.394363403320312, "learning_rate": 3.4342288180552556e-06, "lm_loss": 5.4509, "loss": 1.4574, "step": 1352, "text_contrastive_loss": 0.7942 }, { "contrastive_loss": 0.4183, "epoch": 3.054176072234763, "grad_norm": 14.682599067687988, "learning_rate": 3.427339989243514e-06, "lm_loss": 5.4245, "loss": 1.3967, "step": 1353, "text_contrastive_loss": 0.8719 }, { "contrastive_loss": 0.3864, "epoch": 3.056433408577878, "grad_norm": 12.30835247039795, "learning_rate": 3.420454471885659e-06, "lm_loss": 5.4758, "loss": 1.3097, "step": 1354, "text_contrastive_loss": 0.7515 }, { "contrastive_loss": 0.5665, "epoch": 3.0586907449209932, "grad_norm": 16.540470123291016, "learning_rate": 3.4135722804801004e-06, "lm_loss": 5.4518, "loss": 1.5741, "step": 1355, "text_contrastive_loss": 0.9249 }, { "contrastive_loss": 0.5511, "epoch": 3.0609480812641086, "grad_norm": 14.620187759399414, "learning_rate": 3.4066934295182496e-06, "lm_loss": 5.4517, "loss": 1.4554, "step": 1356, "text_contrastive_loss": 0.7182 }, { "contrastive_loss": 0.3712, "epoch": 3.0632054176072234, "grad_norm": 12.636406898498535, "learning_rate": 3.3998179334844823e-06, "lm_loss": 5.4696, "loss": 1.2953, "step": 1357, "text_contrastive_loss": 0.7543 }, { "contrastive_loss": 0.5408, "epoch": 3.0654627539503387, "grad_norm": 18.53982925415039, "learning_rate": 3.3929458068561073e-06, "lm_loss": 5.4789, "loss": 1.5216, "step": 1358, "text_contrastive_loss": 0.8658 }, { "contrastive_loss": 0.4477, "epoch": 3.0677200902934536, "grad_norm": 15.201266288757324, "learning_rate": 3.3860770641033417e-06, "lm_loss": 5.4602, "loss": 1.3675, "step": 1359, "text_contrastive_loss": 0.7477 }, { "contrastive_loss": 0.5169, "epoch": 3.069977426636569, "grad_norm": 15.906881332397461, "learning_rate": 3.379211719689278e-06, "lm_loss": 5.4948, "loss": 1.492, "step": 1360, "text_contrastive_loss": 0.8512 }, { "contrastive_loss": 0.5008, "epoch": 3.072234762979684, "grad_norm": 14.120617866516113, "learning_rate": 3.37234978806985e-06, "lm_loss": 5.4651, "loss": 1.4897, "step": 1361, "text_contrastive_loss": 0.8846 }, { "contrastive_loss": 0.4614, "epoch": 3.074492099322799, "grad_norm": 14.68795394897461, "learning_rate": 3.365491283693807e-06, "lm_loss": 5.4428, "loss": 1.3755, "step": 1362, "text_contrastive_loss": 0.7397 }, { "contrastive_loss": 0.5015, "epoch": 3.0767494356659144, "grad_norm": 14.28650188446045, "learning_rate": 3.358636221002682e-06, "lm_loss": 5.5232, "loss": 1.3865, "step": 1363, "text_contrastive_loss": 0.6653 }, { "contrastive_loss": 0.3761, "epoch": 3.0790067720090293, "grad_norm": 13.642817497253418, "learning_rate": 3.351784614430761e-06, "lm_loss": 5.6015, "loss": 1.2892, "step": 1364, "text_contrastive_loss": 0.706 }, { "contrastive_loss": 0.4728, "epoch": 3.0812641083521446, "grad_norm": 13.771718978881836, "learning_rate": 3.3449364784050515e-06, "lm_loss": 5.393, "loss": 1.438, "step": 1365, "text_contrastive_loss": 0.8518 }, { "contrastive_loss": 0.3946, "epoch": 3.0835214446952595, "grad_norm": 13.04595947265625, "learning_rate": 3.3380918273452557e-06, "lm_loss": 5.5928, "loss": 1.3011, "step": 1366, "text_contrastive_loss": 0.6945 }, { "contrastive_loss": 0.5262, "epoch": 3.0857787810383748, "grad_norm": 16.817808151245117, "learning_rate": 3.3312506756637343e-06, "lm_loss": 5.5432, "loss": 1.5065, "step": 1367, "text_contrastive_loss": 0.8521 }, { "contrastive_loss": 0.4632, "epoch": 3.0880361173814896, "grad_norm": 13.763033866882324, "learning_rate": 3.324413037765483e-06, "lm_loss": 5.4797, "loss": 1.4042, "step": 1368, "text_contrastive_loss": 0.786 }, { "contrastive_loss": 0.5539, "epoch": 3.090293453724605, "grad_norm": 18.377771377563477, "learning_rate": 3.317578928048096e-06, "lm_loss": 5.4347, "loss": 1.4958, "step": 1369, "text_contrastive_loss": 0.7969 }, { "contrastive_loss": 0.4844, "epoch": 3.0925507900677203, "grad_norm": 15.015218734741211, "learning_rate": 3.310748360901741e-06, "lm_loss": 5.4362, "loss": 1.4578, "step": 1370, "text_contrastive_loss": 0.8594 }, { "contrastive_loss": 0.3774, "epoch": 3.094808126410835, "grad_norm": 12.887053489685059, "learning_rate": 3.303921350709124e-06, "lm_loss": 5.4698, "loss": 1.2701, "step": 1371, "text_contrastive_loss": 0.6914 }, { "contrastive_loss": 0.4761, "epoch": 3.0970654627539504, "grad_norm": 15.221031188964844, "learning_rate": 3.2970979118454616e-06, "lm_loss": 5.3706, "loss": 1.3838, "step": 1372, "text_contrastive_loss": 0.7412 }, { "contrastive_loss": 0.4335, "epoch": 3.0993227990970653, "grad_norm": 15.106535911560059, "learning_rate": 3.2902780586784542e-06, "lm_loss": 5.6387, "loss": 1.4091, "step": 1373, "text_contrastive_loss": 0.8233 }, { "contrastive_loss": 0.6104, "epoch": 3.1015801354401806, "grad_norm": 15.874143600463867, "learning_rate": 3.283461805568246e-06, "lm_loss": 5.6024, "loss": 1.6476, "step": 1374, "text_contrastive_loss": 0.954 }, { "contrastive_loss": 0.5049, "epoch": 3.1038374717832955, "grad_norm": 13.519198417663574, "learning_rate": 3.276649166867406e-06, "lm_loss": 5.4215, "loss": 1.4626, "step": 1375, "text_contrastive_loss": 0.831 }, { "contrastive_loss": 0.4709, "epoch": 3.106094808126411, "grad_norm": 13.302258491516113, "learning_rate": 3.2698401569208883e-06, "lm_loss": 5.3711, "loss": 1.3215, "step": 1376, "text_contrastive_loss": 0.6268 }, { "contrastive_loss": 0.4418, "epoch": 3.108352144469526, "grad_norm": 14.467670440673828, "learning_rate": 3.2630347900660094e-06, "lm_loss": 5.6234, "loss": 1.3432, "step": 1377, "text_contrastive_loss": 0.678 }, { "contrastive_loss": 0.4821, "epoch": 3.110609480812641, "grad_norm": 15.450533866882324, "learning_rate": 3.256233080632414e-06, "lm_loss": 5.5193, "loss": 1.3991, "step": 1378, "text_contrastive_loss": 0.7302 }, { "contrastive_loss": 0.4553, "epoch": 3.1128668171557563, "grad_norm": 13.368943214416504, "learning_rate": 3.249435042942043e-06, "lm_loss": 5.4157, "loss": 1.3467, "step": 1379, "text_contrastive_loss": 0.6997 }, { "contrastive_loss": 0.516, "epoch": 3.115124153498871, "grad_norm": 14.798224449157715, "learning_rate": 3.242640691309111e-06, "lm_loss": 5.5178, "loss": 1.4446, "step": 1380, "text_contrastive_loss": 0.7537 }, { "contrastive_loss": 0.5371, "epoch": 3.1173814898419865, "grad_norm": 15.642748832702637, "learning_rate": 3.235850040040066e-06, "lm_loss": 5.5772, "loss": 1.4784, "step": 1381, "text_contrastive_loss": 0.7672 }, { "contrastive_loss": 0.505, "epoch": 3.119638826185102, "grad_norm": 15.201095581054688, "learning_rate": 3.2290631034335684e-06, "lm_loss": 5.4741, "loss": 1.5187, "step": 1382, "text_contrastive_loss": 0.9325 }, { "contrastive_loss": 0.5862, "epoch": 3.1218961625282167, "grad_norm": 17.422672271728516, "learning_rate": 3.2222798957804524e-06, "lm_loss": 5.4733, "loss": 1.5078, "step": 1383, "text_contrastive_loss": 0.7486 }, { "contrastive_loss": 0.4441, "epoch": 3.124153498871332, "grad_norm": 13.599923133850098, "learning_rate": 3.215500431363706e-06, "lm_loss": 5.4271, "loss": 1.3811, "step": 1384, "text_contrastive_loss": 0.7885 }, { "contrastive_loss": 0.5033, "epoch": 3.126410835214447, "grad_norm": 14.964048385620117, "learning_rate": 3.20872472445843e-06, "lm_loss": 5.4844, "loss": 1.4989, "step": 1385, "text_contrastive_loss": 0.8943 }, { "contrastive_loss": 0.5057, "epoch": 3.128668171557562, "grad_norm": 15.891070365905762, "learning_rate": 3.2019527893318177e-06, "lm_loss": 5.3869, "loss": 1.4726, "step": 1386, "text_contrastive_loss": 0.8565 }, { "contrastive_loss": 0.4319, "epoch": 3.130925507900677, "grad_norm": 13.052963256835938, "learning_rate": 3.195184640243115e-06, "lm_loss": 5.4589, "loss": 1.3472, "step": 1387, "text_contrastive_loss": 0.7387 }, { "contrastive_loss": 0.544, "epoch": 3.1331828442437923, "grad_norm": 16.660829544067383, "learning_rate": 3.1884202914436024e-06, "lm_loss": 5.4135, "loss": 1.455, "step": 1388, "text_contrastive_loss": 0.7393 }, { "contrastive_loss": 0.4587, "epoch": 3.1354401805869077, "grad_norm": 14.902560234069824, "learning_rate": 3.1816597571765517e-06, "lm_loss": 5.4346, "loss": 1.4785, "step": 1389, "text_contrastive_loss": 0.9526 }, { "contrastive_loss": 0.4568, "epoch": 3.1376975169300225, "grad_norm": 13.48047924041748, "learning_rate": 3.1749030516772084e-06, "lm_loss": 5.4984, "loss": 1.4147, "step": 1390, "text_contrastive_loss": 0.8161 }, { "contrastive_loss": 0.4536, "epoch": 3.139954853273138, "grad_norm": 14.445048332214355, "learning_rate": 3.168150189172754e-06, "lm_loss": 5.5247, "loss": 1.393, "step": 1391, "text_contrastive_loss": 0.7738 }, { "contrastive_loss": 0.4446, "epoch": 3.1422121896162527, "grad_norm": 15.271405220031738, "learning_rate": 3.1614011838822755e-06, "lm_loss": 5.592, "loss": 1.4035, "step": 1392, "text_contrastive_loss": 0.7994 }, { "contrastive_loss": 0.4147, "epoch": 3.144469525959368, "grad_norm": 13.864374160766602, "learning_rate": 3.154656050016742e-06, "lm_loss": 5.4321, "loss": 1.4075, "step": 1393, "text_contrastive_loss": 0.8991 }, { "contrastive_loss": 0.4614, "epoch": 3.146726862302483, "grad_norm": 15.20340347290039, "learning_rate": 3.1479148017789673e-06, "lm_loss": 5.4993, "loss": 1.3472, "step": 1394, "text_contrastive_loss": 0.6717 }, { "contrastive_loss": 0.3928, "epoch": 3.148984198645598, "grad_norm": 13.848822593688965, "learning_rate": 3.1411774533635854e-06, "lm_loss": 5.5124, "loss": 1.3486, "step": 1395, "text_contrastive_loss": 0.8091 }, { "contrastive_loss": 0.3748, "epoch": 3.1512415349887135, "grad_norm": 12.218823432922363, "learning_rate": 3.134444018957019e-06, "lm_loss": 5.5252, "loss": 1.3103, "step": 1396, "text_contrastive_loss": 0.7659 }, { "contrastive_loss": 0.4481, "epoch": 3.1534988713318284, "grad_norm": 14.697312355041504, "learning_rate": 3.1277145127374475e-06, "lm_loss": 5.5859, "loss": 1.4681, "step": 1397, "text_contrastive_loss": 0.9228 }, { "contrastive_loss": 0.4735, "epoch": 3.1557562076749437, "grad_norm": 15.564329147338867, "learning_rate": 3.1209889488747813e-06, "lm_loss": 5.5729, "loss": 1.4461, "step": 1398, "text_contrastive_loss": 0.8308 }, { "contrastive_loss": 0.4814, "epoch": 3.1580135440180586, "grad_norm": 14.367502212524414, "learning_rate": 3.114267341530627e-06, "lm_loss": 5.4576, "loss": 1.4155, "step": 1399, "text_contrastive_loss": 0.7766 }, { "contrastive_loss": 0.4437, "epoch": 3.160270880361174, "grad_norm": 13.590703010559082, "learning_rate": 3.1075497048582635e-06, "lm_loss": 5.3726, "loss": 1.3244, "step": 1400, "text_contrastive_loss": 0.687 }, { "contrastive_loss": 0.4385, "epoch": 3.1625282167042887, "grad_norm": 13.9979887008667, "learning_rate": 3.1008360530026053e-06, "lm_loss": 5.4082, "loss": 1.4658, "step": 1401, "text_contrastive_loss": 0.9728 }, { "contrastive_loss": 0.437, "epoch": 3.164785553047404, "grad_norm": 12.361220359802246, "learning_rate": 3.0941264001001796e-06, "lm_loss": 5.4415, "loss": 1.3352, "step": 1402, "text_contrastive_loss": 0.7081 }, { "contrastive_loss": 0.4989, "epoch": 3.1670428893905194, "grad_norm": 14.86351203918457, "learning_rate": 3.0874207602790895e-06, "lm_loss": 5.435, "loss": 1.467, "step": 1403, "text_contrastive_loss": 0.8492 }, { "contrastive_loss": 0.5322, "epoch": 3.1693002257336342, "grad_norm": 14.946853637695312, "learning_rate": 3.0807191476589926e-06, "lm_loss": 5.4772, "loss": 1.5568, "step": 1404, "text_contrastive_loss": 0.9538 }, { "contrastive_loss": 0.518, "epoch": 3.1715575620767495, "grad_norm": 15.552018165588379, "learning_rate": 3.0740215763510617e-06, "lm_loss": 5.5036, "loss": 1.455, "step": 1405, "text_contrastive_loss": 0.7732 }, { "contrastive_loss": 0.418, "epoch": 3.1738148984198644, "grad_norm": 13.696687698364258, "learning_rate": 3.0673280604579623e-06, "lm_loss": 5.3499, "loss": 1.36, "step": 1406, "text_contrastive_loss": 0.814 }, { "contrastive_loss": 0.5179, "epoch": 3.1760722347629797, "grad_norm": 16.097196578979492, "learning_rate": 3.0606386140738253e-06, "lm_loss": 5.5417, "loss": 1.4344, "step": 1407, "text_contrastive_loss": 0.7247 }, { "contrastive_loss": 0.3777, "epoch": 3.1783295711060946, "grad_norm": 15.0361328125, "learning_rate": 3.053953251284205e-06, "lm_loss": 5.4739, "loss": 1.2988, "step": 1408, "text_contrastive_loss": 0.7475 }, { "contrastive_loss": 0.4927, "epoch": 3.18058690744921, "grad_norm": 13.873917579650879, "learning_rate": 3.047271986166061e-06, "lm_loss": 5.546, "loss": 1.4229, "step": 1409, "text_contrastive_loss": 0.7513 }, { "contrastive_loss": 0.4725, "epoch": 3.1828442437923252, "grad_norm": 15.101540565490723, "learning_rate": 3.0405948327877233e-06, "lm_loss": 5.4231, "loss": 1.477, "step": 1410, "text_contrastive_loss": 0.9243 }, { "contrastive_loss": 0.4674, "epoch": 3.18510158013544, "grad_norm": 13.89570140838623, "learning_rate": 3.033921805208867e-06, "lm_loss": 5.4773, "loss": 1.4004, "step": 1411, "text_contrastive_loss": 0.7706 }, { "contrastive_loss": 0.4747, "epoch": 3.1873589164785554, "grad_norm": 14.832307815551758, "learning_rate": 3.027252917480476e-06, "lm_loss": 5.5482, "loss": 1.384, "step": 1412, "text_contrastive_loss": 0.7089 }, { "contrastive_loss": 0.5005, "epoch": 3.1896162528216703, "grad_norm": 14.91125202178955, "learning_rate": 3.0205881836448186e-06, "lm_loss": 5.6158, "loss": 1.413, "step": 1413, "text_contrastive_loss": 0.7018 }, { "contrastive_loss": 0.4625, "epoch": 3.1918735891647856, "grad_norm": 13.466875076293945, "learning_rate": 3.0139276177354188e-06, "lm_loss": 5.4973, "loss": 1.4075, "step": 1414, "text_contrastive_loss": 0.7905 }, { "contrastive_loss": 0.4334, "epoch": 3.194130925507901, "grad_norm": 12.072528839111328, "learning_rate": 3.00727123377702e-06, "lm_loss": 5.5764, "loss": 1.3598, "step": 1415, "text_contrastive_loss": 0.7375 }, { "contrastive_loss": 0.4601, "epoch": 3.1963882618510158, "grad_norm": 14.04871654510498, "learning_rate": 3.0006190457855643e-06, "lm_loss": 5.5244, "loss": 1.4103, "step": 1416, "text_contrastive_loss": 0.7956 }, { "contrastive_loss": 0.4577, "epoch": 3.198645598194131, "grad_norm": 16.075223922729492, "learning_rate": 2.9939710677681545e-06, "lm_loss": 5.4323, "loss": 1.3909, "step": 1417, "text_contrastive_loss": 0.7801 }, { "contrastive_loss": 0.433, "epoch": 3.200902934537246, "grad_norm": 14.013437271118164, "learning_rate": 2.987327313723033e-06, "lm_loss": 5.4048, "loss": 1.3656, "step": 1418, "text_contrastive_loss": 0.7842 }, { "contrastive_loss": 0.4274, "epoch": 3.2031602708803613, "grad_norm": 12.726801872253418, "learning_rate": 2.980687797639543e-06, "lm_loss": 5.4843, "loss": 1.3715, "step": 1419, "text_contrastive_loss": 0.7914 }, { "contrastive_loss": 0.4272, "epoch": 3.205417607223476, "grad_norm": 15.397507667541504, "learning_rate": 2.9740525334981105e-06, "lm_loss": 5.5576, "loss": 1.3759, "step": 1420, "text_contrastive_loss": 0.7858 }, { "contrastive_loss": 0.4926, "epoch": 3.2076749435665914, "grad_norm": 14.691521644592285, "learning_rate": 2.967421535270203e-06, "lm_loss": 5.556, "loss": 1.4491, "step": 1421, "text_contrastive_loss": 0.8018 }, { "contrastive_loss": 0.5022, "epoch": 3.2099322799097068, "grad_norm": 16.048423767089844, "learning_rate": 2.9607948169183077e-06, "lm_loss": 5.4641, "loss": 1.4238, "step": 1422, "text_contrastive_loss": 0.7504 }, { "contrastive_loss": 0.4289, "epoch": 3.2121896162528216, "grad_norm": 13.062213897705078, "learning_rate": 2.9541723923958975e-06, "lm_loss": 5.4684, "loss": 1.354, "step": 1423, "text_contrastive_loss": 0.7564 }, { "contrastive_loss": 0.5075, "epoch": 3.214446952595937, "grad_norm": 15.797381401062012, "learning_rate": 2.94755427564741e-06, "lm_loss": 5.6064, "loss": 1.515, "step": 1424, "text_contrastive_loss": 0.8937 }, { "contrastive_loss": 0.491, "epoch": 3.216704288939052, "grad_norm": 16.2073917388916, "learning_rate": 2.9409404806082077e-06, "lm_loss": 5.3949, "loss": 1.4089, "step": 1425, "text_contrastive_loss": 0.7569 }, { "contrastive_loss": 0.4289, "epoch": 3.218961625282167, "grad_norm": 14.816788673400879, "learning_rate": 2.934331021204551e-06, "lm_loss": 5.4471, "loss": 1.3935, "step": 1426, "text_contrastive_loss": 0.8399 }, { "contrastive_loss": 0.4099, "epoch": 3.221218961625282, "grad_norm": 13.100132942199707, "learning_rate": 2.9277259113535774e-06, "lm_loss": 5.5277, "loss": 1.2779, "step": 1427, "text_contrastive_loss": 0.6304 }, { "contrastive_loss": 0.4655, "epoch": 3.2234762979683973, "grad_norm": 13.787766456604004, "learning_rate": 2.9211251649632587e-06, "lm_loss": 5.4817, "loss": 1.4278, "step": 1428, "text_contrastive_loss": 0.8283 }, { "contrastive_loss": 0.4856, "epoch": 3.2257336343115126, "grad_norm": 14.164327621459961, "learning_rate": 2.9145287959323852e-06, "lm_loss": 5.4339, "loss": 1.3871, "step": 1429, "text_contrastive_loss": 0.7163 }, { "contrastive_loss": 0.4384, "epoch": 3.2279909706546275, "grad_norm": 14.701132774353027, "learning_rate": 2.9079368181505263e-06, "lm_loss": 5.4552, "loss": 1.3807, "step": 1430, "text_contrastive_loss": 0.7937 }, { "contrastive_loss": 0.4371, "epoch": 3.230248306997743, "grad_norm": 13.563252449035645, "learning_rate": 2.9013492454980074e-06, "lm_loss": 5.4159, "loss": 1.3341, "step": 1431, "text_contrastive_loss": 0.7107 }, { "contrastive_loss": 0.501, "epoch": 3.2325056433408577, "grad_norm": 14.329377174377441, "learning_rate": 2.894766091845873e-06, "lm_loss": 5.51, "loss": 1.4502, "step": 1432, "text_contrastive_loss": 0.7963 }, { "contrastive_loss": 0.5936, "epoch": 3.234762979683973, "grad_norm": 17.089048385620117, "learning_rate": 2.88818737105587e-06, "lm_loss": 5.5394, "loss": 1.6195, "step": 1433, "text_contrastive_loss": 0.9439 }, { "contrastive_loss": 0.4673, "epoch": 3.237020316027088, "grad_norm": 13.321817398071289, "learning_rate": 2.881613096980407e-06, "lm_loss": 5.4206, "loss": 1.4222, "step": 1434, "text_contrastive_loss": 0.8256 }, { "contrastive_loss": 0.6084, "epoch": 3.239277652370203, "grad_norm": 16.38990020751953, "learning_rate": 2.8750432834625312e-06, "lm_loss": 5.4195, "loss": 1.6313, "step": 1435, "text_contrastive_loss": 0.962 }, { "contrastive_loss": 0.4847, "epoch": 3.2415349887133185, "grad_norm": 13.881186485290527, "learning_rate": 2.8684779443358945e-06, "lm_loss": 5.5713, "loss": 1.4127, "step": 1436, "text_contrastive_loss": 0.7417 }, { "contrastive_loss": 0.48, "epoch": 3.2437923250564333, "grad_norm": 14.018843650817871, "learning_rate": 2.861917093424731e-06, "lm_loss": 5.46, "loss": 1.5133, "step": 1437, "text_contrastive_loss": 0.9746 }, { "contrastive_loss": 0.4335, "epoch": 3.2460496613995486, "grad_norm": 16.42011260986328, "learning_rate": 2.855360744543822e-06, "lm_loss": 5.4131, "loss": 1.3443, "step": 1438, "text_contrastive_loss": 0.7389 }, { "contrastive_loss": 0.5224, "epoch": 3.2483069977426635, "grad_norm": 14.962552070617676, "learning_rate": 2.8488089114984725e-06, "lm_loss": 5.4925, "loss": 1.4171, "step": 1439, "text_contrastive_loss": 0.691 }, { "contrastive_loss": 0.4703, "epoch": 3.250564334085779, "grad_norm": 14.868867874145508, "learning_rate": 2.84226160808447e-06, "lm_loss": 5.5117, "loss": 1.4661, "step": 1440, "text_contrastive_loss": 0.8893 }, { "contrastive_loss": 0.4934, "epoch": 3.2528216704288937, "grad_norm": 14.345926284790039, "learning_rate": 2.835718848088076e-06, "lm_loss": 5.4378, "loss": 1.3861, "step": 1441, "text_contrastive_loss": 0.6978 }, { "contrastive_loss": 0.434, "epoch": 3.255079006772009, "grad_norm": 14.553913116455078, "learning_rate": 2.8291806452859803e-06, "lm_loss": 5.5351, "loss": 1.387, "step": 1442, "text_contrastive_loss": 0.799 }, { "contrastive_loss": 0.4189, "epoch": 3.2573363431151243, "grad_norm": 12.91666030883789, "learning_rate": 2.822647013445272e-06, "lm_loss": 5.4574, "loss": 1.3539, "step": 1443, "text_contrastive_loss": 0.7785 }, { "contrastive_loss": 0.4741, "epoch": 3.259593679458239, "grad_norm": 13.636680603027344, "learning_rate": 2.8161179663234215e-06, "lm_loss": 5.3573, "loss": 1.4103, "step": 1444, "text_contrastive_loss": 0.8009 }, { "contrastive_loss": 0.479, "epoch": 3.2618510158013545, "grad_norm": 14.111836433410645, "learning_rate": 2.809593517668243e-06, "lm_loss": 5.4982, "loss": 1.4668, "step": 1445, "text_contrastive_loss": 0.8761 }, { "contrastive_loss": 0.544, "epoch": 3.2641083521444694, "grad_norm": 15.635238647460938, "learning_rate": 2.8030736812178717e-06, "lm_loss": 5.4932, "loss": 1.5298, "step": 1446, "text_contrastive_loss": 0.873 }, { "contrastive_loss": 0.4661, "epoch": 3.2663656884875847, "grad_norm": 15.782267570495605, "learning_rate": 2.796558470700723e-06, "lm_loss": 5.4626, "loss": 1.4277, "step": 1447, "text_contrastive_loss": 0.8306 }, { "contrastive_loss": 0.4545, "epoch": 3.2686230248307, "grad_norm": 12.718178749084473, "learning_rate": 2.790047899835479e-06, "lm_loss": 5.3625, "loss": 1.4277, "step": 1448, "text_contrastive_loss": 0.8739 }, { "contrastive_loss": 0.5246, "epoch": 3.270880361173815, "grad_norm": 15.982274055480957, "learning_rate": 2.7835419823310507e-06, "lm_loss": 5.5273, "loss": 1.5151, "step": 1449, "text_contrastive_loss": 0.8756 }, { "contrastive_loss": 0.454, "epoch": 3.27313769751693, "grad_norm": 16.382341384887695, "learning_rate": 2.777040731886549e-06, "lm_loss": 5.4664, "loss": 1.377, "step": 1450, "text_contrastive_loss": 0.7526 }, { "contrastive_loss": 0.4552, "epoch": 3.275395033860045, "grad_norm": 15.161858558654785, "learning_rate": 2.770544162191261e-06, "lm_loss": 5.4801, "loss": 1.3577, "step": 1451, "text_contrastive_loss": 0.7091 }, { "contrastive_loss": 0.4925, "epoch": 3.2776523702031604, "grad_norm": 14.238808631896973, "learning_rate": 2.7640522869246134e-06, "lm_loss": 5.4234, "loss": 1.4124, "step": 1452, "text_contrastive_loss": 0.7551 }, { "contrastive_loss": 0.4706, "epoch": 3.2799097065462752, "grad_norm": 15.547906875610352, "learning_rate": 2.7575651197561504e-06, "lm_loss": 5.5024, "loss": 1.4493, "step": 1453, "text_contrastive_loss": 0.8567 }, { "contrastive_loss": 0.4395, "epoch": 3.2821670428893905, "grad_norm": 14.469873428344727, "learning_rate": 2.7510826743455037e-06, "lm_loss": 5.4523, "loss": 1.3504, "step": 1454, "text_contrastive_loss": 0.7314 }, { "contrastive_loss": 0.5703, "epoch": 3.2844243792325054, "grad_norm": 15.518226623535156, "learning_rate": 2.744604964342364e-06, "lm_loss": 5.4507, "loss": 1.5544, "step": 1455, "text_contrastive_loss": 0.8782 }, { "contrastive_loss": 0.495, "epoch": 3.2866817155756207, "grad_norm": 13.762090682983398, "learning_rate": 2.7381320033864434e-06, "lm_loss": 5.5043, "loss": 1.4291, "step": 1456, "text_contrastive_loss": 0.7674 }, { "contrastive_loss": 0.4654, "epoch": 3.288939051918736, "grad_norm": 13.592652320861816, "learning_rate": 2.7316638051074605e-06, "lm_loss": 5.4472, "loss": 1.3406, "step": 1457, "text_contrastive_loss": 0.661 }, { "contrastive_loss": 0.4706, "epoch": 3.291196388261851, "grad_norm": 14.618680953979492, "learning_rate": 2.72520038312511e-06, "lm_loss": 5.4162, "loss": 1.4038, "step": 1458, "text_contrastive_loss": 0.7831 }, { "contrastive_loss": 0.4144, "epoch": 3.293453724604966, "grad_norm": 13.54752254486084, "learning_rate": 2.7187417510490176e-06, "lm_loss": 5.5561, "loss": 1.3636, "step": 1459, "text_contrastive_loss": 0.7872 }, { "contrastive_loss": 0.4795, "epoch": 3.295711060948081, "grad_norm": 15.078756332397461, "learning_rate": 2.7122879224787315e-06, "lm_loss": 5.5425, "loss": 1.4632, "step": 1460, "text_contrastive_loss": 0.8588 }, { "contrastive_loss": 0.4911, "epoch": 3.2979683972911964, "grad_norm": 16.779354095458984, "learning_rate": 2.7058389110036835e-06, "lm_loss": 5.4832, "loss": 1.4307, "step": 1461, "text_contrastive_loss": 0.7826 }, { "contrastive_loss": 0.5161, "epoch": 3.3002257336343117, "grad_norm": 14.62807846069336, "learning_rate": 2.6993947302031643e-06, "lm_loss": 5.483, "loss": 1.523, "step": 1462, "text_contrastive_loss": 0.9172 }, { "contrastive_loss": 0.4469, "epoch": 3.3024830699774266, "grad_norm": 14.627190589904785, "learning_rate": 2.692955393646286e-06, "lm_loss": 5.4985, "loss": 1.343, "step": 1463, "text_contrastive_loss": 0.6924 }, { "contrastive_loss": 0.4104, "epoch": 3.304740406320542, "grad_norm": 14.061870574951172, "learning_rate": 2.686520914891968e-06, "lm_loss": 5.4354, "loss": 1.2653, "step": 1464, "text_contrastive_loss": 0.6227 }, { "contrastive_loss": 0.4294, "epoch": 3.3069977426636568, "grad_norm": 13.842485427856445, "learning_rate": 2.6800913074888984e-06, "lm_loss": 5.4847, "loss": 1.3673, "step": 1465, "text_contrastive_loss": 0.7789 }, { "contrastive_loss": 0.3909, "epoch": 3.309255079006772, "grad_norm": 13.694429397583008, "learning_rate": 2.6736665849755073e-06, "lm_loss": 5.4691, "loss": 1.3706, "step": 1466, "text_contrastive_loss": 0.8657 }, { "contrastive_loss": 0.4237, "epoch": 3.311512415349887, "grad_norm": 13.254097938537598, "learning_rate": 2.6672467608799413e-06, "lm_loss": 5.4571, "loss": 1.3693, "step": 1467, "text_contrastive_loss": 0.7997 }, { "contrastive_loss": 0.5489, "epoch": 3.3137697516930023, "grad_norm": 14.583216667175293, "learning_rate": 2.660831848720028e-06, "lm_loss": 5.4687, "loss": 1.5559, "step": 1468, "text_contrastive_loss": 0.9203 }, { "contrastive_loss": 0.4414, "epoch": 3.3160270880361176, "grad_norm": 19.19298553466797, "learning_rate": 2.654421862003256e-06, "lm_loss": 5.425, "loss": 1.3671, "step": 1469, "text_contrastive_loss": 0.7665 }, { "contrastive_loss": 0.4941, "epoch": 3.3182844243792324, "grad_norm": 15.22370433807373, "learning_rate": 2.648016814226742e-06, "lm_loss": 5.5326, "loss": 1.4579, "step": 1470, "text_contrastive_loss": 0.8211 }, { "contrastive_loss": 0.5334, "epoch": 3.3205417607223477, "grad_norm": 15.47024917602539, "learning_rate": 2.6416167188772052e-06, "lm_loss": 5.4499, "loss": 1.48, "step": 1471, "text_contrastive_loss": 0.8033 }, { "contrastive_loss": 0.4386, "epoch": 3.3227990970654626, "grad_norm": 13.895892143249512, "learning_rate": 2.6352215894309306e-06, "lm_loss": 5.4307, "loss": 1.3779, "step": 1472, "text_contrastive_loss": 0.7924 }, { "contrastive_loss": 0.5044, "epoch": 3.325056433408578, "grad_norm": 13.95846176147461, "learning_rate": 2.6288314393537522e-06, "lm_loss": 5.4381, "loss": 1.4103, "step": 1473, "text_contrastive_loss": 0.7243 }, { "contrastive_loss": 0.4356, "epoch": 3.327313769751693, "grad_norm": 12.028575897216797, "learning_rate": 2.6224462821010185e-06, "lm_loss": 5.4458, "loss": 1.4117, "step": 1474, "text_contrastive_loss": 0.8631 }, { "contrastive_loss": 0.4626, "epoch": 3.329571106094808, "grad_norm": 13.620333671569824, "learning_rate": 2.616066131117563e-06, "lm_loss": 5.4997, "loss": 1.4582, "step": 1475, "text_contrastive_loss": 0.8913 }, { "contrastive_loss": 0.5076, "epoch": 3.3318284424379234, "grad_norm": 15.211213111877441, "learning_rate": 2.6096909998376794e-06, "lm_loss": 5.4001, "loss": 1.4919, "step": 1476, "text_contrastive_loss": 0.8886 }, { "contrastive_loss": 0.5028, "epoch": 3.3340857787810383, "grad_norm": 15.72059154510498, "learning_rate": 2.6033209016850926e-06, "lm_loss": 5.4267, "loss": 1.4654, "step": 1477, "text_contrastive_loss": 0.8399 }, { "contrastive_loss": 0.4581, "epoch": 3.3363431151241536, "grad_norm": 15.193544387817383, "learning_rate": 2.596955850072928e-06, "lm_loss": 5.4303, "loss": 1.4232, "step": 1478, "text_contrastive_loss": 0.8442 }, { "contrastive_loss": 0.59, "epoch": 3.3386004514672685, "grad_norm": 15.448929786682129, "learning_rate": 2.5905958584036826e-06, "lm_loss": 5.4836, "loss": 1.5784, "step": 1479, "text_contrastive_loss": 0.8801 }, { "contrastive_loss": 0.4063, "epoch": 3.340857787810384, "grad_norm": 12.013140678405762, "learning_rate": 2.5842409400692026e-06, "lm_loss": 5.4711, "loss": 1.2616, "step": 1480, "text_contrastive_loss": 0.6164 }, { "contrastive_loss": 0.472, "epoch": 3.343115124153499, "grad_norm": 14.958868026733398, "learning_rate": 2.577891108450651e-06, "lm_loss": 5.4646, "loss": 1.4138, "step": 1481, "text_contrastive_loss": 0.7907 }, { "contrastive_loss": 0.4594, "epoch": 3.345372460496614, "grad_norm": 14.461827278137207, "learning_rate": 2.571546376918479e-06, "lm_loss": 5.4562, "loss": 1.4113, "step": 1482, "text_contrastive_loss": 0.8125 }, { "contrastive_loss": 0.4867, "epoch": 3.3476297968397293, "grad_norm": 13.656875610351562, "learning_rate": 2.5652067588324015e-06, "lm_loss": 5.6341, "loss": 1.4406, "step": 1483, "text_contrastive_loss": 0.7811 }, { "contrastive_loss": 0.3927, "epoch": 3.349887133182844, "grad_norm": 13.251200675964355, "learning_rate": 2.55887226754136e-06, "lm_loss": 5.5251, "loss": 1.3182, "step": 1484, "text_contrastive_loss": 0.7459 }, { "contrastive_loss": 0.4508, "epoch": 3.3521444695259595, "grad_norm": 14.159133911132812, "learning_rate": 2.552542916383507e-06, "lm_loss": 5.3832, "loss": 1.4053, "step": 1485, "text_contrastive_loss": 0.8324 }, { "contrastive_loss": 0.5345, "epoch": 3.3544018058690743, "grad_norm": 13.861654281616211, "learning_rate": 2.5462187186861697e-06, "lm_loss": 5.6045, "loss": 1.5416, "step": 1486, "text_contrastive_loss": 0.8932 }, { "contrastive_loss": 0.4247, "epoch": 3.3566591422121896, "grad_norm": 13.136606216430664, "learning_rate": 2.5398996877658256e-06, "lm_loss": 5.4987, "loss": 1.3594, "step": 1487, "text_contrastive_loss": 0.7698 }, { "contrastive_loss": 0.5246, "epoch": 3.3589164785553045, "grad_norm": 15.686463356018066, "learning_rate": 2.5335858369280674e-06, "lm_loss": 5.4922, "loss": 1.5206, "step": 1488, "text_contrastive_loss": 0.8934 }, { "contrastive_loss": 0.4245, "epoch": 3.36117381489842, "grad_norm": 14.053102493286133, "learning_rate": 2.5272771794675866e-06, "lm_loss": 5.473, "loss": 1.3263, "step": 1489, "text_contrastive_loss": 0.7091 }, { "contrastive_loss": 0.551, "epoch": 3.363431151241535, "grad_norm": 14.97412109375, "learning_rate": 2.5209737286681367e-06, "lm_loss": 5.3077, "loss": 1.4425, "step": 1490, "text_contrastive_loss": 0.7216 }, { "contrastive_loss": 0.5847, "epoch": 3.36568848758465, "grad_norm": 16.91216278076172, "learning_rate": 2.514675497802508e-06, "lm_loss": 5.4616, "loss": 1.6223, "step": 1491, "text_contrastive_loss": 0.983 }, { "contrastive_loss": 0.5068, "epoch": 3.3679458239277653, "grad_norm": 16.04525375366211, "learning_rate": 2.508382500132499e-06, "lm_loss": 5.5927, "loss": 1.4422, "step": 1492, "text_contrastive_loss": 0.7522 }, { "contrastive_loss": 0.5001, "epoch": 3.37020316027088, "grad_norm": 14.205605506896973, "learning_rate": 2.50209474890889e-06, "lm_loss": 5.3962, "loss": 1.454, "step": 1493, "text_contrastive_loss": 0.8286 }, { "contrastive_loss": 0.3953, "epoch": 3.3724604966139955, "grad_norm": 13.9257173538208, "learning_rate": 2.495812257371416e-06, "lm_loss": 5.4938, "loss": 1.3433, "step": 1494, "text_contrastive_loss": 0.7973 }, { "contrastive_loss": 0.47, "epoch": 3.374717832957111, "grad_norm": 14.477286338806152, "learning_rate": 2.4895350387487304e-06, "lm_loss": 5.5802, "loss": 1.4061, "step": 1495, "text_contrastive_loss": 0.7561 }, { "contrastive_loss": 0.5315, "epoch": 3.3769751693002257, "grad_norm": 15.752272605895996, "learning_rate": 2.4832631062583906e-06, "lm_loss": 5.4693, "loss": 1.5519, "step": 1496, "text_contrastive_loss": 0.9469 }, { "contrastive_loss": 0.586, "epoch": 3.379232505643341, "grad_norm": 16.18108558654785, "learning_rate": 2.47699647310682e-06, "lm_loss": 5.4499, "loss": 1.5836, "step": 1497, "text_contrastive_loss": 0.9054 }, { "contrastive_loss": 0.4066, "epoch": 3.381489841986456, "grad_norm": 12.718653678894043, "learning_rate": 2.470735152489287e-06, "lm_loss": 5.4281, "loss": 1.3057, "step": 1498, "text_contrastive_loss": 0.7126 }, { "contrastive_loss": 0.4779, "epoch": 3.383747178329571, "grad_norm": 15.179163932800293, "learning_rate": 2.4644791575898665e-06, "lm_loss": 5.4779, "loss": 1.453, "step": 1499, "text_contrastive_loss": 0.8545 }, { "contrastive_loss": 0.4282, "epoch": 3.386004514672686, "grad_norm": 14.62775707244873, "learning_rate": 2.4582285015814263e-06, "lm_loss": 5.4897, "loss": 1.3416, "step": 1500, "text_contrastive_loss": 0.7288 }, { "contrastive_loss": 0.4135, "epoch": 3.3882618510158014, "grad_norm": 13.477630615234375, "learning_rate": 2.4519831976255892e-06, "lm_loss": 5.4605, "loss": 1.2748, "step": 1501, "text_contrastive_loss": 0.6304 }, { "contrastive_loss": 0.4456, "epoch": 3.3905191873589167, "grad_norm": 15.13442325592041, "learning_rate": 2.445743258872711e-06, "lm_loss": 5.4527, "loss": 1.3953, "step": 1502, "text_contrastive_loss": 0.809 }, { "contrastive_loss": 0.443, "epoch": 3.3927765237020315, "grad_norm": 16.368581771850586, "learning_rate": 2.4395086984618486e-06, "lm_loss": 5.5224, "loss": 1.4082, "step": 1503, "text_contrastive_loss": 0.8261 }, { "contrastive_loss": 0.4459, "epoch": 3.395033860045147, "grad_norm": 14.504566192626953, "learning_rate": 2.433279529520732e-06, "lm_loss": 5.4485, "loss": 1.393, "step": 1504, "text_contrastive_loss": 0.8045 }, { "contrastive_loss": 0.5416, "epoch": 3.3972911963882617, "grad_norm": 16.19035530090332, "learning_rate": 2.427055765165741e-06, "lm_loss": 5.4202, "loss": 1.4919, "step": 1505, "text_contrastive_loss": 0.8166 }, { "contrastive_loss": 0.5178, "epoch": 3.399548532731377, "grad_norm": 15.244782447814941, "learning_rate": 2.420837418501876e-06, "lm_loss": 5.4967, "loss": 1.5043, "step": 1506, "text_contrastive_loss": 0.8738 }, { "contrastive_loss": 0.4833, "epoch": 3.401805869074492, "grad_norm": 15.166763305664062, "learning_rate": 2.414624502622731e-06, "lm_loss": 5.416, "loss": 1.4109, "step": 1507, "text_contrastive_loss": 0.7721 }, { "contrastive_loss": 0.4482, "epoch": 3.404063205417607, "grad_norm": 15.023187637329102, "learning_rate": 2.408417030610457e-06, "lm_loss": 5.5333, "loss": 1.3832, "step": 1508, "text_contrastive_loss": 0.7634 }, { "contrastive_loss": 0.4076, "epoch": 3.4063205417607225, "grad_norm": 16.45237159729004, "learning_rate": 2.4022150155357526e-06, "lm_loss": 5.5212, "loss": 1.3757, "step": 1509, "text_contrastive_loss": 0.8319 }, { "contrastive_loss": 0.4563, "epoch": 3.4085778781038374, "grad_norm": 15.986237525939941, "learning_rate": 2.396018470457821e-06, "lm_loss": 5.5014, "loss": 1.4568, "step": 1510, "text_contrastive_loss": 0.9007 }, { "contrastive_loss": 0.442, "epoch": 3.4108352144469527, "grad_norm": 14.491031646728516, "learning_rate": 2.389827408424345e-06, "lm_loss": 5.4717, "loss": 1.3683, "step": 1511, "text_contrastive_loss": 0.7582 }, { "contrastive_loss": 0.5759, "epoch": 3.4130925507900676, "grad_norm": 17.080780029296875, "learning_rate": 2.3836418424714665e-06, "lm_loss": 5.427, "loss": 1.5746, "step": 1512, "text_contrastive_loss": 0.9121 }, { "contrastive_loss": 0.5488, "epoch": 3.415349887133183, "grad_norm": 15.472517967224121, "learning_rate": 2.377461785623752e-06, "lm_loss": 5.465, "loss": 1.4835, "step": 1513, "text_contrastive_loss": 0.7765 }, { "contrastive_loss": 0.4572, "epoch": 3.417607223476298, "grad_norm": 14.8615140914917, "learning_rate": 2.3712872508941714e-06, "lm_loss": 5.4163, "loss": 1.3947, "step": 1514, "text_contrastive_loss": 0.7918 }, { "contrastive_loss": 0.4622, "epoch": 3.419864559819413, "grad_norm": 16.12051010131836, "learning_rate": 2.3651182512840604e-06, "lm_loss": 5.4557, "loss": 1.3376, "step": 1515, "text_contrastive_loss": 0.6597 }, { "contrastive_loss": 0.5526, "epoch": 3.4221218961625284, "grad_norm": 16.446517944335938, "learning_rate": 2.358954799783106e-06, "lm_loss": 5.5447, "loss": 1.6239, "step": 1516, "text_contrastive_loss": 1.0335 }, { "contrastive_loss": 0.5071, "epoch": 3.4243792325056432, "grad_norm": 14.582303047180176, "learning_rate": 2.3527969093693105e-06, "lm_loss": 5.3842, "loss": 1.377, "step": 1517, "text_contrastive_loss": 0.6629 }, { "contrastive_loss": 0.4184, "epoch": 3.4266365688487586, "grad_norm": 12.374114036560059, "learning_rate": 2.346644593008966e-06, "lm_loss": 5.3269, "loss": 1.3535, "step": 1518, "text_contrastive_loss": 0.8049 }, { "contrastive_loss": 0.5105, "epoch": 3.4288939051918734, "grad_norm": 15.860591888427734, "learning_rate": 2.3404978636566312e-06, "lm_loss": 5.5244, "loss": 1.4897, "step": 1519, "text_contrastive_loss": 0.8536 }, { "contrastive_loss": 0.4791, "epoch": 3.4311512415349887, "grad_norm": 15.427270889282227, "learning_rate": 2.3343567342550933e-06, "lm_loss": 5.5524, "loss": 1.4218, "step": 1520, "text_contrastive_loss": 0.775 }, { "contrastive_loss": 0.5032, "epoch": 3.4334085778781036, "grad_norm": 16.81085205078125, "learning_rate": 2.328221217735355e-06, "lm_loss": 5.4353, "loss": 1.4378, "step": 1521, "text_contrastive_loss": 0.7823 }, { "contrastive_loss": 0.5521, "epoch": 3.435665914221219, "grad_norm": 14.574199676513672, "learning_rate": 2.322091327016597e-06, "lm_loss": 5.3922, "loss": 1.4772, "step": 1522, "text_contrastive_loss": 0.7718 }, { "contrastive_loss": 0.4961, "epoch": 3.4379232505643342, "grad_norm": 14.520478248596191, "learning_rate": 2.3159670750061563e-06, "lm_loss": 5.5431, "loss": 1.4631, "step": 1523, "text_contrastive_loss": 0.8255 }, { "contrastive_loss": 0.5214, "epoch": 3.440180586907449, "grad_norm": 17.026172637939453, "learning_rate": 2.3098484745994933e-06, "lm_loss": 5.3637, "loss": 1.489, "step": 1524, "text_contrastive_loss": 0.8624 }, { "contrastive_loss": 0.4649, "epoch": 3.4424379232505644, "grad_norm": 14.461862564086914, "learning_rate": 2.3037355386801683e-06, "lm_loss": 5.5148, "loss": 1.4059, "step": 1525, "text_contrastive_loss": 0.7791 }, { "contrastive_loss": 0.4097, "epoch": 3.4446952595936793, "grad_norm": 14.151453018188477, "learning_rate": 2.2976282801198237e-06, "lm_loss": 5.5114, "loss": 1.3445, "step": 1526, "text_contrastive_loss": 0.7673 }, { "contrastive_loss": 0.5351, "epoch": 3.4469525959367946, "grad_norm": 15.692597389221191, "learning_rate": 2.2915267117781328e-06, "lm_loss": 5.4158, "loss": 1.5236, "step": 1527, "text_contrastive_loss": 0.8938 }, { "contrastive_loss": 0.4498, "epoch": 3.44920993227991, "grad_norm": 13.88866138458252, "learning_rate": 2.2854308465027963e-06, "lm_loss": 5.4396, "loss": 1.3871, "step": 1528, "text_contrastive_loss": 0.7867 }, { "contrastive_loss": 0.463, "epoch": 3.4514672686230248, "grad_norm": 14.166723251342773, "learning_rate": 2.279340697129505e-06, "lm_loss": 5.4298, "loss": 1.3945, "step": 1529, "text_contrastive_loss": 0.7769 }, { "contrastive_loss": 0.5447, "epoch": 3.45372460496614, "grad_norm": 15.951334953308105, "learning_rate": 2.2732562764819157e-06, "lm_loss": 5.4896, "loss": 1.5022, "step": 1530, "text_contrastive_loss": 0.817 }, { "contrastive_loss": 0.4087, "epoch": 3.455981941309255, "grad_norm": 15.156458854675293, "learning_rate": 2.267177597371616e-06, "lm_loss": 5.4176, "loss": 1.2951, "step": 1531, "text_contrastive_loss": 0.6893 }, { "contrastive_loss": 0.5003, "epoch": 3.4582392776523703, "grad_norm": 15.428759574890137, "learning_rate": 2.26110467259811e-06, "lm_loss": 5.3475, "loss": 1.4773, "step": 1532, "text_contrastive_loss": 0.8846 }, { "contrastive_loss": 0.4233, "epoch": 3.460496613995485, "grad_norm": 15.227729797363281, "learning_rate": 2.255037514948785e-06, "lm_loss": 5.3626, "loss": 1.3801, "step": 1533, "text_contrastive_loss": 0.841 }, { "contrastive_loss": 0.5161, "epoch": 3.4627539503386005, "grad_norm": 15.018134117126465, "learning_rate": 2.2489761371988826e-06, "lm_loss": 5.4664, "loss": 1.4959, "step": 1534, "text_contrastive_loss": 0.8663 }, { "contrastive_loss": 0.4129, "epoch": 3.4650112866817158, "grad_norm": 14.654855728149414, "learning_rate": 2.242920552111473e-06, "lm_loss": 5.4464, "loss": 1.3298, "step": 1535, "text_contrastive_loss": 0.7445 }, { "contrastive_loss": 0.4956, "epoch": 3.4672686230248306, "grad_norm": 15.13311767578125, "learning_rate": 2.236870772437433e-06, "lm_loss": 5.5042, "loss": 1.4198, "step": 1536, "text_contrastive_loss": 0.7476 }, { "contrastive_loss": 0.378, "epoch": 3.469525959367946, "grad_norm": 13.35870361328125, "learning_rate": 2.2308268109154126e-06, "lm_loss": 5.4243, "loss": 1.3046, "step": 1537, "text_contrastive_loss": 0.7683 }, { "contrastive_loss": 0.4847, "epoch": 3.471783295711061, "grad_norm": 15.677458763122559, "learning_rate": 2.224788680271811e-06, "lm_loss": 5.4965, "loss": 1.3943, "step": 1538, "text_contrastive_loss": 0.7197 }, { "contrastive_loss": 0.4336, "epoch": 3.474040632054176, "grad_norm": 13.910303115844727, "learning_rate": 2.218756393220753e-06, "lm_loss": 5.4606, "loss": 1.3446, "step": 1539, "text_contrastive_loss": 0.7298 }, { "contrastive_loss": 0.4165, "epoch": 3.476297968397291, "grad_norm": 14.130895614624023, "learning_rate": 2.212729962464051e-06, "lm_loss": 5.5941, "loss": 1.282, "step": 1540, "text_contrastive_loss": 0.6122 }, { "contrastive_loss": 0.5003, "epoch": 3.4785553047404063, "grad_norm": 14.862709999084473, "learning_rate": 2.2067094006911943e-06, "lm_loss": 5.4334, "loss": 1.4298, "step": 1541, "text_contrastive_loss": 0.7724 }, { "contrastive_loss": 0.5442, "epoch": 3.4808126410835216, "grad_norm": 15.639609336853027, "learning_rate": 2.2006947205793107e-06, "lm_loss": 5.4148, "loss": 1.4872, "step": 1542, "text_contrastive_loss": 0.803 }, { "contrastive_loss": 0.4212, "epoch": 3.4830699774266365, "grad_norm": 14.766778945922852, "learning_rate": 2.1946859347931442e-06, "lm_loss": 5.3569, "loss": 1.2715, "step": 1543, "text_contrastive_loss": 0.6293 }, { "contrastive_loss": 0.5174, "epoch": 3.485327313769752, "grad_norm": 15.845047950744629, "learning_rate": 2.1886830559850264e-06, "lm_loss": 5.4649, "loss": 1.5043, "step": 1544, "text_contrastive_loss": 0.8808 }, { "contrastive_loss": 0.4867, "epoch": 3.4875846501128667, "grad_norm": 14.702009201049805, "learning_rate": 2.182686096794852e-06, "lm_loss": 5.4745, "loss": 1.4068, "step": 1545, "text_contrastive_loss": 0.7454 }, { "contrastive_loss": 0.5007, "epoch": 3.489841986455982, "grad_norm": 16.13895034790039, "learning_rate": 2.176695069850053e-06, "lm_loss": 5.3923, "loss": 1.513, "step": 1546, "text_contrastive_loss": 0.9463 }, { "contrastive_loss": 0.5704, "epoch": 3.4920993227990973, "grad_norm": 15.552277565002441, "learning_rate": 2.1707099877655634e-06, "lm_loss": 5.406, "loss": 1.5892, "step": 1547, "text_contrastive_loss": 0.9565 }, { "contrastive_loss": 0.4615, "epoch": 3.494356659142212, "grad_norm": 14.597433090209961, "learning_rate": 2.1647308631438068e-06, "lm_loss": 5.4116, "loss": 1.3862, "step": 1548, "text_contrastive_loss": 0.7671 }, { "contrastive_loss": 0.3948, "epoch": 3.4966139954853275, "grad_norm": 12.96075439453125, "learning_rate": 2.1587577085746596e-06, "lm_loss": 5.4675, "loss": 1.3205, "step": 1549, "text_contrastive_loss": 0.7579 }, { "contrastive_loss": 0.5114, "epoch": 3.4988713318284423, "grad_norm": 15.659757614135742, "learning_rate": 2.1527905366354292e-06, "lm_loss": 5.4302, "loss": 1.3715, "step": 1550, "text_contrastive_loss": 0.6342 }, { "contrastive_loss": 0.6357, "epoch": 3.5011286681715577, "grad_norm": 16.875946044921875, "learning_rate": 2.14682935989082e-06, "lm_loss": 5.4462, "loss": 1.6049, "step": 1551, "text_contrastive_loss": 0.8491 }, { "contrastive_loss": 0.5353, "epoch": 3.5033860045146725, "grad_norm": 16.93952751159668, "learning_rate": 2.14087419089292e-06, "lm_loss": 5.5169, "loss": 1.5205, "step": 1552, "text_contrastive_loss": 0.8669 }, { "contrastive_loss": 0.5005, "epoch": 3.505643340857788, "grad_norm": 15.430221557617188, "learning_rate": 2.1349250421811622e-06, "lm_loss": 5.5929, "loss": 1.4884, "step": 1553, "text_contrastive_loss": 0.8571 }, { "contrastive_loss": 0.4847, "epoch": 3.5079006772009027, "grad_norm": 15.394756317138672, "learning_rate": 2.1289819262823065e-06, "lm_loss": 5.4703, "loss": 1.386, "step": 1554, "text_contrastive_loss": 0.7087 }, { "contrastive_loss": 0.4992, "epoch": 3.510158013544018, "grad_norm": 15.481775283813477, "learning_rate": 2.1230448557104087e-06, "lm_loss": 5.3952, "loss": 1.4936, "step": 1555, "text_contrastive_loss": 0.9097 }, { "contrastive_loss": 0.4316, "epoch": 3.5124153498871333, "grad_norm": 15.534514427185059, "learning_rate": 2.117113842966792e-06, "lm_loss": 5.4969, "loss": 1.3996, "step": 1556, "text_contrastive_loss": 0.8367 }, { "contrastive_loss": 0.4614, "epoch": 3.514672686230248, "grad_norm": 15.321314811706543, "learning_rate": 2.111188900540028e-06, "lm_loss": 5.3665, "loss": 1.3687, "step": 1557, "text_contrastive_loss": 0.7413 }, { "contrastive_loss": 0.4875, "epoch": 3.5169300225733635, "grad_norm": 13.656997680664062, "learning_rate": 2.1052700409059057e-06, "lm_loss": 5.4849, "loss": 1.3841, "step": 1558, "text_contrastive_loss": 0.6963 }, { "contrastive_loss": 0.423, "epoch": 3.5191873589164784, "grad_norm": 15.110286712646484, "learning_rate": 2.0993572765274044e-06, "lm_loss": 5.426, "loss": 1.3611, "step": 1559, "text_contrastive_loss": 0.7909 }, { "contrastive_loss": 0.4749, "epoch": 3.5214446952595937, "grad_norm": 13.659746170043945, "learning_rate": 2.093450619854671e-06, "lm_loss": 5.4491, "loss": 1.514, "step": 1560, "text_contrastive_loss": 0.9883 }, { "contrastive_loss": 0.3576, "epoch": 3.523702031602709, "grad_norm": 12.51785945892334, "learning_rate": 2.08755008332499e-06, "lm_loss": 5.4431, "loss": 1.2728, "step": 1561, "text_contrastive_loss": 0.7418 }, { "contrastive_loss": 0.4406, "epoch": 3.525959367945824, "grad_norm": 14.191641807556152, "learning_rate": 2.0816556793627624e-06, "lm_loss": 5.5272, "loss": 1.3843, "step": 1562, "text_contrastive_loss": 0.7819 }, { "contrastive_loss": 0.5504, "epoch": 3.528216704288939, "grad_norm": 14.523859024047852, "learning_rate": 2.0757674203794696e-06, "lm_loss": 5.5149, "loss": 1.5177, "step": 1563, "text_contrastive_loss": 0.8316 }, { "contrastive_loss": 0.4686, "epoch": 3.530474040632054, "grad_norm": 14.75478744506836, "learning_rate": 2.06988531877366e-06, "lm_loss": 5.511, "loss": 1.3882, "step": 1564, "text_contrastive_loss": 0.7371 }, { "contrastive_loss": 0.5425, "epoch": 3.5327313769751694, "grad_norm": 15.918438911437988, "learning_rate": 2.064009386930915e-06, "lm_loss": 5.4933, "loss": 1.4855, "step": 1565, "text_contrastive_loss": 0.7873 }, { "contrastive_loss": 0.5333, "epoch": 3.5349887133182847, "grad_norm": 16.604145050048828, "learning_rate": 2.0581396372238254e-06, "lm_loss": 5.4734, "loss": 1.5831, "step": 1566, "text_contrastive_loss": 1.0049 }, { "contrastive_loss": 0.4561, "epoch": 3.5372460496613995, "grad_norm": 13.487076759338379, "learning_rate": 2.0522760820119615e-06, "lm_loss": 5.3955, "loss": 1.3157, "step": 1567, "text_contrastive_loss": 0.64 }, { "contrastive_loss": 0.4488, "epoch": 3.5395033860045144, "grad_norm": 14.998278617858887, "learning_rate": 2.046418733641853e-06, "lm_loss": 5.4563, "loss": 1.3863, "step": 1568, "text_contrastive_loss": 0.7837 }, { "contrastive_loss": 0.4252, "epoch": 3.5417607223476297, "grad_norm": 14.402261734008789, "learning_rate": 2.04056760444696e-06, "lm_loss": 5.5006, "loss": 1.3672, "step": 1569, "text_contrastive_loss": 0.7838 }, { "contrastive_loss": 0.5558, "epoch": 3.544018058690745, "grad_norm": 16.445056915283203, "learning_rate": 2.0347227067476478e-06, "lm_loss": 5.5556, "loss": 1.5293, "step": 1570, "text_contrastive_loss": 0.8359 }, { "contrastive_loss": 0.3906, "epoch": 3.54627539503386, "grad_norm": 13.13399600982666, "learning_rate": 2.02888405285116e-06, "lm_loss": 5.5058, "loss": 1.3473, "step": 1571, "text_contrastive_loss": 0.8122 }, { "contrastive_loss": 0.4879, "epoch": 3.5485327313769752, "grad_norm": 15.91758918762207, "learning_rate": 2.02305165505159e-06, "lm_loss": 5.4396, "loss": 1.514, "step": 1572, "text_contrastive_loss": 0.9643 }, { "contrastive_loss": 0.4369, "epoch": 3.55079006772009, "grad_norm": 13.412368774414062, "learning_rate": 2.0172255256298623e-06, "lm_loss": 5.3833, "loss": 1.4127, "step": 1573, "text_contrastive_loss": 0.8749 }, { "contrastive_loss": 0.4322, "epoch": 3.5530474040632054, "grad_norm": 13.076878547668457, "learning_rate": 2.0114056768537005e-06, "lm_loss": 5.4023, "loss": 1.2969, "step": 1574, "text_contrastive_loss": 0.6489 }, { "contrastive_loss": 0.4718, "epoch": 3.5553047404063207, "grad_norm": 15.465753555297852, "learning_rate": 2.005592120977606e-06, "lm_loss": 5.4437, "loss": 1.4883, "step": 1575, "text_contrastive_loss": 0.9444 }, { "contrastive_loss": 0.5009, "epoch": 3.5575620767494356, "grad_norm": 15.188689231872559, "learning_rate": 1.9997848702428226e-06, "lm_loss": 5.4143, "loss": 1.4225, "step": 1576, "text_contrastive_loss": 0.7604 }, { "contrastive_loss": 0.5332, "epoch": 3.559819413092551, "grad_norm": 16.304014205932617, "learning_rate": 1.9939839368773267e-06, "lm_loss": 5.6139, "loss": 1.5483, "step": 1577, "text_contrastive_loss": 0.9075 }, { "contrastive_loss": 0.5034, "epoch": 3.5620767494356658, "grad_norm": 15.200770378112793, "learning_rate": 1.9881893330957893e-06, "lm_loss": 5.5008, "loss": 1.5508, "step": 1578, "text_contrastive_loss": 0.9947 }, { "contrastive_loss": 0.4698, "epoch": 3.564334085778781, "grad_norm": 13.521749496459961, "learning_rate": 1.982401071099549e-06, "lm_loss": 5.4551, "loss": 1.3238, "step": 1579, "text_contrastive_loss": 0.6172 }, { "contrastive_loss": 0.5775, "epoch": 3.5665914221218964, "grad_norm": 15.051464080810547, "learning_rate": 1.9766191630765964e-06, "lm_loss": 5.5055, "loss": 1.584, "step": 1580, "text_contrastive_loss": 0.9119 }, { "contrastive_loss": 0.4802, "epoch": 3.5688487584650113, "grad_norm": 14.562134742736816, "learning_rate": 1.970843621201541e-06, "lm_loss": 5.5413, "loss": 1.4693, "step": 1581, "text_contrastive_loss": 0.8698 }, { "contrastive_loss": 0.5069, "epoch": 3.5711060948081266, "grad_norm": 14.539229393005371, "learning_rate": 1.9650744576355894e-06, "lm_loss": 5.3543, "loss": 1.4229, "step": 1582, "text_contrastive_loss": 0.7612 }, { "contrastive_loss": 0.4969, "epoch": 3.5733634311512414, "grad_norm": 14.167746543884277, "learning_rate": 1.959311684526513e-06, "lm_loss": 5.3595, "loss": 1.4696, "step": 1583, "text_contrastive_loss": 0.8736 }, { "contrastive_loss": 0.4733, "epoch": 3.5756207674943568, "grad_norm": 13.857242584228516, "learning_rate": 1.9535553140086322e-06, "lm_loss": 5.3496, "loss": 1.3703, "step": 1584, "text_contrastive_loss": 0.724 }, { "contrastive_loss": 0.5158, "epoch": 3.5778781038374716, "grad_norm": 16.7324275970459, "learning_rate": 1.9478053582027826e-06, "lm_loss": 5.3888, "loss": 1.486, "step": 1585, "text_contrastive_loss": 0.8625 }, { "contrastive_loss": 0.4899, "epoch": 3.580135440180587, "grad_norm": 14.35672378540039, "learning_rate": 1.9420618292162974e-06, "lm_loss": 5.4006, "loss": 1.4499, "step": 1586, "text_contrastive_loss": 0.84 }, { "contrastive_loss": 0.449, "epoch": 3.582392776523702, "grad_norm": 15.682668685913086, "learning_rate": 1.9363247391429695e-06, "lm_loss": 5.36, "loss": 1.3723, "step": 1587, "text_contrastive_loss": 0.7747 }, { "contrastive_loss": 0.3653, "epoch": 3.584650112866817, "grad_norm": 11.676576614379883, "learning_rate": 1.93059410006304e-06, "lm_loss": 5.4248, "loss": 1.2342, "step": 1588, "text_contrastive_loss": 0.6528 }, { "contrastive_loss": 0.4217, "epoch": 3.5869074492099324, "grad_norm": 14.936031341552734, "learning_rate": 1.924869924043165e-06, "lm_loss": 5.3893, "loss": 1.3702, "step": 1589, "text_contrastive_loss": 0.8192 }, { "contrastive_loss": 0.4403, "epoch": 3.5891647855530473, "grad_norm": 15.025103569030762, "learning_rate": 1.919152223136391e-06, "lm_loss": 5.5393, "loss": 1.3577, "step": 1590, "text_contrastive_loss": 0.7269 }, { "contrastive_loss": 0.4997, "epoch": 3.5914221218961626, "grad_norm": 15.543709754943848, "learning_rate": 1.913441009382133e-06, "lm_loss": 5.4648, "loss": 1.4077, "step": 1591, "text_contrastive_loss": 0.7231 }, { "contrastive_loss": 0.542, "epoch": 3.5936794582392775, "grad_norm": 13.94593620300293, "learning_rate": 1.9077362948061404e-06, "lm_loss": 5.4439, "loss": 1.4549, "step": 1592, "text_contrastive_loss": 0.7371 }, { "contrastive_loss": 0.4923, "epoch": 3.595936794582393, "grad_norm": 13.830196380615234, "learning_rate": 1.902038091420481e-06, "lm_loss": 5.6238, "loss": 1.4659, "step": 1593, "text_contrastive_loss": 0.8224 }, { "contrastive_loss": 0.5044, "epoch": 3.598194130925508, "grad_norm": 14.951301574707031, "learning_rate": 1.8963464112235185e-06, "lm_loss": 5.4754, "loss": 1.4658, "step": 1594, "text_contrastive_loss": 0.8278 }, { "contrastive_loss": 0.4535, "epoch": 3.600451467268623, "grad_norm": 15.162139892578125, "learning_rate": 1.8906612661998698e-06, "lm_loss": 5.4616, "loss": 1.3023, "step": 1595, "text_contrastive_loss": 0.6052 }, { "contrastive_loss": 0.4793, "epoch": 3.6027088036117383, "grad_norm": 14.030896186828613, "learning_rate": 1.884982668320398e-06, "lm_loss": 5.4521, "loss": 1.3707, "step": 1596, "text_contrastive_loss": 0.6923 }, { "contrastive_loss": 0.4435, "epoch": 3.604966139954853, "grad_norm": 15.813409805297852, "learning_rate": 1.8793106295421797e-06, "lm_loss": 5.4842, "loss": 1.3921, "step": 1597, "text_contrastive_loss": 0.8005 }, { "contrastive_loss": 0.5051, "epoch": 3.6072234762979685, "grad_norm": 15.220564842224121, "learning_rate": 1.873645161808481e-06, "lm_loss": 5.4737, "loss": 1.4929, "step": 1598, "text_contrastive_loss": 0.8807 }, { "contrastive_loss": 0.4523, "epoch": 3.609480812641084, "grad_norm": 14.675507545471191, "learning_rate": 1.8679862770487273e-06, "lm_loss": 5.5892, "loss": 1.4142, "step": 1599, "text_contrastive_loss": 0.806 }, { "contrastive_loss": 0.5317, "epoch": 3.6117381489841986, "grad_norm": 15.481648445129395, "learning_rate": 1.8623339871784869e-06, "lm_loss": 5.375, "loss": 1.5374, "step": 1600, "text_contrastive_loss": 0.9363 }, { "contrastive_loss": 0.6098, "epoch": 3.6139954853273135, "grad_norm": 18.212594985961914, "learning_rate": 1.8566883040994411e-06, "lm_loss": 5.43, "loss": 1.6688, "step": 1601, "text_contrastive_loss": 1.032 }, { "contrastive_loss": 0.4442, "epoch": 3.616252821670429, "grad_norm": 14.503066062927246, "learning_rate": 1.8510492396993595e-06, "lm_loss": 5.5556, "loss": 1.3495, "step": 1602, "text_contrastive_loss": 0.6995 }, { "contrastive_loss": 0.5378, "epoch": 3.618510158013544, "grad_norm": 14.18012523651123, "learning_rate": 1.8454168058520732e-06, "lm_loss": 5.4036, "loss": 1.4484, "step": 1603, "text_contrastive_loss": 0.7403 }, { "contrastive_loss": 0.4311, "epoch": 3.620767494356659, "grad_norm": 13.57884407043457, "learning_rate": 1.8397910144174536e-06, "lm_loss": 5.3306, "loss": 1.3593, "step": 1604, "text_contrastive_loss": 0.7902 }, { "contrastive_loss": 0.4251, "epoch": 3.6230248306997743, "grad_norm": 14.99509048461914, "learning_rate": 1.8341718772413852e-06, "lm_loss": 5.2957, "loss": 1.3066, "step": 1605, "text_contrastive_loss": 0.7039 }, { "contrastive_loss": 0.4451, "epoch": 3.625282167042889, "grad_norm": 13.73076343536377, "learning_rate": 1.8285594061557421e-06, "lm_loss": 5.4181, "loss": 1.4453, "step": 1606, "text_contrastive_loss": 0.9168 }, { "contrastive_loss": 0.5404, "epoch": 3.6275395033860045, "grad_norm": 15.812504768371582, "learning_rate": 1.822953612978362e-06, "lm_loss": 5.3931, "loss": 1.4733, "step": 1607, "text_contrastive_loss": 0.7871 }, { "contrastive_loss": 0.4427, "epoch": 3.62979683972912, "grad_norm": 13.716792106628418, "learning_rate": 1.817354509513017e-06, "lm_loss": 5.4027, "loss": 1.3851, "step": 1608, "text_contrastive_loss": 0.8042 }, { "contrastive_loss": 0.5463, "epoch": 3.6320541760722347, "grad_norm": 17.64068031311035, "learning_rate": 1.8117621075493979e-06, "lm_loss": 5.4712, "loss": 1.4617, "step": 1609, "text_contrastive_loss": 0.7365 }, { "contrastive_loss": 0.4548, "epoch": 3.63431151241535, "grad_norm": 15.060933113098145, "learning_rate": 1.8061764188630831e-06, "lm_loss": 5.4258, "loss": 1.3966, "step": 1610, "text_contrastive_loss": 0.7985 }, { "contrastive_loss": 0.4503, "epoch": 3.636568848758465, "grad_norm": 13.883279800415039, "learning_rate": 1.8005974552155158e-06, "lm_loss": 5.5137, "loss": 1.371, "step": 1611, "text_contrastive_loss": 0.7387 }, { "contrastive_loss": 0.5283, "epoch": 3.63882618510158, "grad_norm": 14.469231605529785, "learning_rate": 1.7950252283539776e-06, "lm_loss": 5.4306, "loss": 1.5237, "step": 1612, "text_contrastive_loss": 0.9048 }, { "contrastive_loss": 0.4296, "epoch": 3.6410835214446955, "grad_norm": 14.495342254638672, "learning_rate": 1.7894597500115657e-06, "lm_loss": 5.431, "loss": 1.3127, "step": 1613, "text_contrastive_loss": 0.68 }, { "contrastive_loss": 0.4475, "epoch": 3.6433408577878104, "grad_norm": 14.319993019104004, "learning_rate": 1.7839010319071687e-06, "lm_loss": 5.4516, "loss": 1.4174, "step": 1614, "text_contrastive_loss": 0.8496 }, { "contrastive_loss": 0.4959, "epoch": 3.6455981941309257, "grad_norm": 15.90335464477539, "learning_rate": 1.7783490857454354e-06, "lm_loss": 5.4921, "loss": 1.4638, "step": 1615, "text_contrastive_loss": 0.8374 }, { "contrastive_loss": 0.5409, "epoch": 3.6478555304740405, "grad_norm": 15.516691207885742, "learning_rate": 1.7728039232167603e-06, "lm_loss": 5.4983, "loss": 1.4933, "step": 1616, "text_contrastive_loss": 0.8051 }, { "contrastive_loss": 0.449, "epoch": 3.650112866817156, "grad_norm": 13.99092960357666, "learning_rate": 1.7672655559972535e-06, "lm_loss": 5.3946, "loss": 1.3597, "step": 1617, "text_contrastive_loss": 0.7424 }, { "contrastive_loss": 0.3678, "epoch": 3.6523702031602707, "grad_norm": 13.596165657043457, "learning_rate": 1.7617339957487167e-06, "lm_loss": 5.6572, "loss": 1.3028, "step": 1618, "text_contrastive_loss": 0.7386 }, { "contrastive_loss": 0.496, "epoch": 3.654627539503386, "grad_norm": 15.550728797912598, "learning_rate": 1.7562092541186144e-06, "lm_loss": 5.512, "loss": 1.4366, "step": 1619, "text_contrastive_loss": 0.7788 }, { "contrastive_loss": 0.3968, "epoch": 3.656884875846501, "grad_norm": 12.663150787353516, "learning_rate": 1.750691342740058e-06, "lm_loss": 5.3956, "loss": 1.2821, "step": 1620, "text_contrastive_loss": 0.6915 }, { "contrastive_loss": 0.4398, "epoch": 3.659142212189616, "grad_norm": 13.776789665222168, "learning_rate": 1.7451802732317763e-06, "lm_loss": 5.4691, "loss": 1.3938, "step": 1621, "text_contrastive_loss": 0.8142 }, { "contrastive_loss": 0.3829, "epoch": 3.6613995485327315, "grad_norm": 13.244256973266602, "learning_rate": 1.7396760571980902e-06, "lm_loss": 5.5434, "loss": 1.3086, "step": 1622, "text_contrastive_loss": 0.7428 }, { "contrastive_loss": 0.4854, "epoch": 3.6636568848758464, "grad_norm": 14.853069305419922, "learning_rate": 1.7341787062288928e-06, "lm_loss": 5.4619, "loss": 1.3947, "step": 1623, "text_contrastive_loss": 0.7262 }, { "contrastive_loss": 0.6319, "epoch": 3.6659142212189617, "grad_norm": 16.079355239868164, "learning_rate": 1.7286882318996162e-06, "lm_loss": 5.5133, "loss": 1.5815, "step": 1624, "text_contrastive_loss": 0.7966 }, { "contrastive_loss": 0.437, "epoch": 3.6681715575620766, "grad_norm": 13.949938774108887, "learning_rate": 1.7232046457712164e-06, "lm_loss": 5.4726, "loss": 1.3366, "step": 1625, "text_contrastive_loss": 0.7048 }, { "contrastive_loss": 0.5804, "epoch": 3.670428893905192, "grad_norm": 19.09020233154297, "learning_rate": 1.7177279593901463e-06, "lm_loss": 5.358, "loss": 1.5091, "step": 1626, "text_contrastive_loss": 0.7858 }, { "contrastive_loss": 0.4591, "epoch": 3.672686230248307, "grad_norm": 13.983572959899902, "learning_rate": 1.712258184288328e-06, "lm_loss": 5.3888, "loss": 1.3874, "step": 1627, "text_contrastive_loss": 0.7789 }, { "contrastive_loss": 0.486, "epoch": 3.674943566591422, "grad_norm": 14.134383201599121, "learning_rate": 1.7067953319831327e-06, "lm_loss": 5.371, "loss": 1.423, "step": 1628, "text_contrastive_loss": 0.7999 }, { "contrastive_loss": 0.5198, "epoch": 3.6772009029345374, "grad_norm": 15.602533340454102, "learning_rate": 1.7013394139773537e-06, "lm_loss": 5.478, "loss": 1.458, "step": 1629, "text_contrastive_loss": 0.7809 }, { "contrastive_loss": 0.4249, "epoch": 3.6794582392776523, "grad_norm": 14.587101936340332, "learning_rate": 1.6958904417591853e-06, "lm_loss": 5.4295, "loss": 1.3875, "step": 1630, "text_contrastive_loss": 0.8394 }, { "contrastive_loss": 0.6462, "epoch": 3.6817155756207676, "grad_norm": 17.65336036682129, "learning_rate": 1.6904484268021915e-06, "lm_loss": 5.4553, "loss": 1.6541, "step": 1631, "text_contrastive_loss": 0.9248 }, { "contrastive_loss": 0.5325, "epoch": 3.683972911963883, "grad_norm": 16.566082000732422, "learning_rate": 1.6850133805652907e-06, "lm_loss": 5.3911, "loss": 1.5751, "step": 1632, "text_contrastive_loss": 1.0071 }, { "contrastive_loss": 0.38, "epoch": 3.6862302483069977, "grad_norm": 13.386906623840332, "learning_rate": 1.6795853144927282e-06, "lm_loss": 5.45, "loss": 1.2994, "step": 1633, "text_contrastive_loss": 0.7489 }, { "contrastive_loss": 0.3964, "epoch": 3.6884875846501126, "grad_norm": 12.794499397277832, "learning_rate": 1.6741642400140513e-06, "lm_loss": 5.4881, "loss": 1.3298, "step": 1634, "text_contrastive_loss": 0.7691 }, { "contrastive_loss": 0.4154, "epoch": 3.690744920993228, "grad_norm": 13.860358238220215, "learning_rate": 1.668750168544081e-06, "lm_loss": 5.5105, "loss": 1.3183, "step": 1635, "text_contrastive_loss": 0.7037 }, { "contrastive_loss": 0.5132, "epoch": 3.6930022573363432, "grad_norm": 16.342418670654297, "learning_rate": 1.663343111482898e-06, "lm_loss": 5.342, "loss": 1.4531, "step": 1636, "text_contrastive_loss": 0.8115 }, { "contrastive_loss": 0.4659, "epoch": 3.695259593679458, "grad_norm": 14.466636657714844, "learning_rate": 1.657943080215812e-06, "lm_loss": 5.5518, "loss": 1.3731, "step": 1637, "text_contrastive_loss": 0.704 }, { "contrastive_loss": 0.3998, "epoch": 3.6975169300225734, "grad_norm": 13.353544235229492, "learning_rate": 1.6525500861133386e-06, "lm_loss": 5.4991, "loss": 1.2143, "step": 1638, "text_contrastive_loss": 0.5293 }, { "contrastive_loss": 0.5195, "epoch": 3.6997742663656883, "grad_norm": 15.653892517089844, "learning_rate": 1.6471641405311727e-06, "lm_loss": 5.3331, "loss": 1.4522, "step": 1639, "text_contrastive_loss": 0.7987 }, { "contrastive_loss": 0.3807, "epoch": 3.7020316027088036, "grad_norm": 13.191279411315918, "learning_rate": 1.641785254810172e-06, "lm_loss": 5.3584, "loss": 1.2324, "step": 1640, "text_contrastive_loss": 0.6317 }, { "contrastive_loss": 0.4739, "epoch": 3.704288939051919, "grad_norm": 14.209907531738281, "learning_rate": 1.636413440276326e-06, "lm_loss": 5.4345, "loss": 1.4423, "step": 1641, "text_contrastive_loss": 0.8498 }, { "contrastive_loss": 0.527, "epoch": 3.706546275395034, "grad_norm": 15.565753936767578, "learning_rate": 1.631048708240736e-06, "lm_loss": 5.4348, "loss": 1.5435, "step": 1642, "text_contrastive_loss": 0.9461 }, { "contrastive_loss": 0.5093, "epoch": 3.708803611738149, "grad_norm": 16.196430206298828, "learning_rate": 1.6256910699995921e-06, "lm_loss": 5.4427, "loss": 1.4312, "step": 1643, "text_contrastive_loss": 0.7552 }, { "contrastive_loss": 0.5041, "epoch": 3.711060948081264, "grad_norm": 16.06947898864746, "learning_rate": 1.620340536834139e-06, "lm_loss": 5.4575, "loss": 1.4531, "step": 1644, "text_contrastive_loss": 0.8063 }, { "contrastive_loss": 0.4474, "epoch": 3.7133182844243793, "grad_norm": 15.422207832336426, "learning_rate": 1.6149971200106723e-06, "lm_loss": 5.4493, "loss": 1.3903, "step": 1645, "text_contrastive_loss": 0.7959 }, { "contrastive_loss": 0.4229, "epoch": 3.7155756207674946, "grad_norm": 13.07422924041748, "learning_rate": 1.6096608307804973e-06, "lm_loss": 5.3305, "loss": 1.3167, "step": 1646, "text_contrastive_loss": 0.7215 }, { "contrastive_loss": 0.4428, "epoch": 3.7178329571106095, "grad_norm": 13.395735740661621, "learning_rate": 1.604331680379908e-06, "lm_loss": 5.5048, "loss": 1.3651, "step": 1647, "text_contrastive_loss": 0.7435 }, { "contrastive_loss": 0.3978, "epoch": 3.7200902934537243, "grad_norm": 12.62199878692627, "learning_rate": 1.599009680030173e-06, "lm_loss": 5.4844, "loss": 1.2788, "step": 1648, "text_contrastive_loss": 0.665 }, { "contrastive_loss": 0.4683, "epoch": 3.7223476297968396, "grad_norm": 13.023398399353027, "learning_rate": 1.5936948409375007e-06, "lm_loss": 5.3911, "loss": 1.4088, "step": 1649, "text_contrastive_loss": 0.8027 }, { "contrastive_loss": 0.5661, "epoch": 3.724604966139955, "grad_norm": 14.816571235656738, "learning_rate": 1.5883871742930257e-06, "lm_loss": 5.4983, "loss": 1.6129, "step": 1650, "text_contrastive_loss": 0.9938 }, { "contrastive_loss": 0.5046, "epoch": 3.72686230248307, "grad_norm": 16.056814193725586, "learning_rate": 1.5830866912727722e-06, "lm_loss": 5.343, "loss": 1.4311, "step": 1651, "text_contrastive_loss": 0.7844 }, { "contrastive_loss": 0.4904, "epoch": 3.729119638826185, "grad_norm": 15.371965408325195, "learning_rate": 1.5777934030376445e-06, "lm_loss": 5.3956, "loss": 1.5193, "step": 1652, "text_contrastive_loss": 0.9785 }, { "contrastive_loss": 0.4133, "epoch": 3.7313769751693, "grad_norm": 14.072219848632812, "learning_rate": 1.5725073207333963e-06, "lm_loss": 5.4368, "loss": 1.2556, "step": 1653, "text_contrastive_loss": 0.5972 }, { "contrastive_loss": 0.4406, "epoch": 3.7336343115124153, "grad_norm": 14.953420639038086, "learning_rate": 1.5672284554906087e-06, "lm_loss": 5.4728, "loss": 1.3302, "step": 1654, "text_contrastive_loss": 0.6846 }, { "contrastive_loss": 0.4477, "epoch": 3.7358916478555306, "grad_norm": 13.6334228515625, "learning_rate": 1.561956818424661e-06, "lm_loss": 5.4214, "loss": 1.4501, "step": 1655, "text_contrastive_loss": 0.9206 }, { "contrastive_loss": 0.386, "epoch": 3.7381489841986455, "grad_norm": 13.051822662353516, "learning_rate": 1.5566924206357187e-06, "lm_loss": 5.4257, "loss": 1.3043, "step": 1656, "text_contrastive_loss": 0.7516 }, { "contrastive_loss": 0.4568, "epoch": 3.740406320541761, "grad_norm": 14.413710594177246, "learning_rate": 1.5514352732087024e-06, "lm_loss": 5.5269, "loss": 1.3322, "step": 1657, "text_contrastive_loss": 0.6453 }, { "contrastive_loss": 0.3845, "epoch": 3.7426636568848757, "grad_norm": 13.425925254821777, "learning_rate": 1.5461853872132648e-06, "lm_loss": 5.3478, "loss": 1.2498, "step": 1658, "text_contrastive_loss": 0.6612 }, { "contrastive_loss": 0.4097, "epoch": 3.744920993227991, "grad_norm": 12.664298057556152, "learning_rate": 1.5409427737037713e-06, "lm_loss": 5.41, "loss": 1.326, "step": 1659, "text_contrastive_loss": 0.7505 }, { "contrastive_loss": 0.5343, "epoch": 3.7471783295711063, "grad_norm": 16.94868278503418, "learning_rate": 1.5357074437192688e-06, "lm_loss": 5.5243, "loss": 1.5483, "step": 1660, "text_contrastive_loss": 0.9232 }, { "contrastive_loss": 0.4782, "epoch": 3.749435665914221, "grad_norm": 13.815977096557617, "learning_rate": 1.5304794082834713e-06, "lm_loss": 5.4108, "loss": 1.4028, "step": 1661, "text_contrastive_loss": 0.7669 }, { "contrastive_loss": 0.4463, "epoch": 3.7516930022573365, "grad_norm": 13.913975715637207, "learning_rate": 1.5252586784047374e-06, "lm_loss": 5.4865, "loss": 1.383, "step": 1662, "text_contrastive_loss": 0.7761 }, { "contrastive_loss": 0.4646, "epoch": 3.7539503386004514, "grad_norm": 13.805709838867188, "learning_rate": 1.520045265076034e-06, "lm_loss": 5.4509, "loss": 1.3843, "step": 1663, "text_contrastive_loss": 0.7493 }, { "contrastive_loss": 0.4252, "epoch": 3.7562076749435667, "grad_norm": 15.102557182312012, "learning_rate": 1.5148391792749272e-06, "lm_loss": 5.3517, "loss": 1.3715, "step": 1664, "text_contrastive_loss": 0.8222 }, { "contrastive_loss": 0.5332, "epoch": 3.758465011286682, "grad_norm": 16.021533966064453, "learning_rate": 1.5096404319635533e-06, "lm_loss": 5.3574, "loss": 1.4838, "step": 1665, "text_contrastive_loss": 0.8297 }, { "contrastive_loss": 0.3996, "epoch": 3.760722347629797, "grad_norm": 13.671859741210938, "learning_rate": 1.5044490340885987e-06, "lm_loss": 5.5058, "loss": 1.2973, "step": 1666, "text_contrastive_loss": 0.6942 }, { "contrastive_loss": 0.5671, "epoch": 3.7629796839729117, "grad_norm": 15.433830261230469, "learning_rate": 1.4992649965812673e-06, "lm_loss": 5.3907, "loss": 1.5554, "step": 1667, "text_contrastive_loss": 0.8985 }, { "contrastive_loss": 0.4496, "epoch": 3.765237020316027, "grad_norm": 12.787394523620605, "learning_rate": 1.4940883303572724e-06, "lm_loss": 5.2853, "loss": 1.3244, "step": 1668, "text_contrastive_loss": 0.6927 }, { "contrastive_loss": 0.5049, "epoch": 3.7674943566591423, "grad_norm": 15.6350736618042, "learning_rate": 1.4889190463168019e-06, "lm_loss": 5.3913, "loss": 1.4614, "step": 1669, "text_contrastive_loss": 0.8348 }, { "contrastive_loss": 0.463, "epoch": 3.769751693002257, "grad_norm": 14.160258293151855, "learning_rate": 1.483757155344503e-06, "lm_loss": 5.4456, "loss": 1.4444, "step": 1670, "text_contrastive_loss": 0.8737 }, { "contrastive_loss": 0.464, "epoch": 3.7720090293453725, "grad_norm": 14.008391380310059, "learning_rate": 1.47860266830945e-06, "lm_loss": 5.3581, "loss": 1.3778, "step": 1671, "text_contrastive_loss": 0.756 }, { "contrastive_loss": 0.3969, "epoch": 3.7742663656884874, "grad_norm": 12.669832229614258, "learning_rate": 1.473455596065133e-06, "lm_loss": 5.3824, "loss": 1.2799, "step": 1672, "text_contrastive_loss": 0.6896 }, { "contrastive_loss": 0.4465, "epoch": 3.7765237020316027, "grad_norm": 14.399201393127441, "learning_rate": 1.4683159494494259e-06, "lm_loss": 5.4547, "loss": 1.3726, "step": 1673, "text_contrastive_loss": 0.7614 }, { "contrastive_loss": 0.5326, "epoch": 3.778781038374718, "grad_norm": 15.133414268493652, "learning_rate": 1.4631837392845694e-06, "lm_loss": 5.474, "loss": 1.4792, "step": 1674, "text_contrastive_loss": 0.7982 }, { "contrastive_loss": 0.4698, "epoch": 3.781038374717833, "grad_norm": 14.398970603942871, "learning_rate": 1.4580589763771413e-06, "lm_loss": 5.4299, "loss": 1.4172, "step": 1675, "text_contrastive_loss": 0.8089 }, { "contrastive_loss": 0.4365, "epoch": 3.783295711060948, "grad_norm": 12.78225040435791, "learning_rate": 1.4529416715180434e-06, "lm_loss": 5.3764, "loss": 1.3824, "step": 1676, "text_contrastive_loss": 0.8166 }, { "contrastive_loss": 0.4425, "epoch": 3.785553047404063, "grad_norm": 15.15576457977295, "learning_rate": 1.44783183548247e-06, "lm_loss": 5.4167, "loss": 1.3419, "step": 1677, "text_contrastive_loss": 0.7154 }, { "contrastive_loss": 0.4026, "epoch": 3.7878103837471784, "grad_norm": 14.194446563720703, "learning_rate": 1.4427294790298902e-06, "lm_loss": 5.3794, "loss": 1.259, "step": 1678, "text_contrastive_loss": 0.6371 }, { "contrastive_loss": 0.4905, "epoch": 3.7900677200902937, "grad_norm": 15.428753852844238, "learning_rate": 1.4376346129040243e-06, "lm_loss": 5.5568, "loss": 1.4511, "step": 1679, "text_contrastive_loss": 0.8099 }, { "contrastive_loss": 0.4435, "epoch": 3.7923250564334086, "grad_norm": 14.056903839111328, "learning_rate": 1.432547247832819e-06, "lm_loss": 5.4258, "loss": 1.3487, "step": 1680, "text_contrastive_loss": 0.7252 }, { "contrastive_loss": 0.4828, "epoch": 3.7945823927765234, "grad_norm": 15.9234037399292, "learning_rate": 1.4274673945284278e-06, "lm_loss": 5.404, "loss": 1.3773, "step": 1681, "text_contrastive_loss": 0.7082 }, { "contrastive_loss": 0.5236, "epoch": 3.7968397291196387, "grad_norm": 16.38911247253418, "learning_rate": 1.422395063687188e-06, "lm_loss": 5.4339, "loss": 1.4378, "step": 1682, "text_contrastive_loss": 0.7415 }, { "contrastive_loss": 0.4982, "epoch": 3.799097065462754, "grad_norm": 15.284442901611328, "learning_rate": 1.4173302659895938e-06, "lm_loss": 5.3332, "loss": 1.4302, "step": 1683, "text_contrastive_loss": 0.7973 }, { "contrastive_loss": 0.5244, "epoch": 3.801354401805869, "grad_norm": 16.61286735534668, "learning_rate": 1.4122730121002808e-06, "lm_loss": 5.4125, "loss": 1.4997, "step": 1684, "text_contrastive_loss": 0.8681 }, { "contrastive_loss": 0.355, "epoch": 3.8036117381489842, "grad_norm": 13.047539710998535, "learning_rate": 1.4072233126679985e-06, "lm_loss": 5.4868, "loss": 1.1981, "step": 1685, "text_contrastive_loss": 0.5888 }, { "contrastive_loss": 0.4249, "epoch": 3.805869074492099, "grad_norm": 13.029471397399902, "learning_rate": 1.4021811783255912e-06, "lm_loss": 5.5234, "loss": 1.3422, "step": 1686, "text_contrastive_loss": 0.73 }, { "contrastive_loss": 0.3907, "epoch": 3.8081264108352144, "grad_norm": 14.358359336853027, "learning_rate": 1.3971466196899697e-06, "lm_loss": 5.4404, "loss": 1.3179, "step": 1687, "text_contrastive_loss": 0.7664 }, { "contrastive_loss": 0.4448, "epoch": 3.8103837471783297, "grad_norm": 13.757017135620117, "learning_rate": 1.3921196473620975e-06, "lm_loss": 5.4708, "loss": 1.3456, "step": 1688, "text_contrastive_loss": 0.7073 }, { "contrastive_loss": 0.4341, "epoch": 3.8126410835214446, "grad_norm": 14.16545295715332, "learning_rate": 1.3871002719269616e-06, "lm_loss": 5.5009, "loss": 1.3487, "step": 1689, "text_contrastive_loss": 0.7291 }, { "contrastive_loss": 0.413, "epoch": 3.81489841986456, "grad_norm": 13.017946243286133, "learning_rate": 1.3820885039535564e-06, "lm_loss": 5.4397, "loss": 1.3781, "step": 1690, "text_contrastive_loss": 0.8422 }, { "contrastive_loss": 0.4212, "epoch": 3.8171557562076748, "grad_norm": 14.79958438873291, "learning_rate": 1.3770843539948508e-06, "lm_loss": 5.4827, "loss": 1.3174, "step": 1691, "text_contrastive_loss": 0.6958 }, { "contrastive_loss": 0.4541, "epoch": 3.81941309255079, "grad_norm": 15.317020416259766, "learning_rate": 1.3720878325877785e-06, "lm_loss": 5.2869, "loss": 1.3768, "step": 1692, "text_contrastive_loss": 0.788 }, { "contrastive_loss": 0.4699, "epoch": 3.8216704288939054, "grad_norm": 15.10213565826416, "learning_rate": 1.3670989502532089e-06, "lm_loss": 5.4177, "loss": 1.3706, "step": 1693, "text_contrastive_loss": 0.718 }, { "contrastive_loss": 0.5646, "epoch": 3.8239277652370203, "grad_norm": 15.355016708374023, "learning_rate": 1.362117717495926e-06, "lm_loss": 5.5147, "loss": 1.4691, "step": 1694, "text_contrastive_loss": 0.7061 }, { "contrastive_loss": 0.4912, "epoch": 3.8261851015801356, "grad_norm": 16.03980255126953, "learning_rate": 1.3571441448046086e-06, "lm_loss": 5.3597, "loss": 1.3763, "step": 1695, "text_contrastive_loss": 0.6984 }, { "contrastive_loss": 0.5484, "epoch": 3.8284424379232505, "grad_norm": 15.086762428283691, "learning_rate": 1.3521782426517988e-06, "lm_loss": 5.4318, "loss": 1.5481, "step": 1696, "text_contrastive_loss": 0.913 }, { "contrastive_loss": 0.4006, "epoch": 3.8306997742663658, "grad_norm": 13.688698768615723, "learning_rate": 1.3472200214938974e-06, "lm_loss": 5.3465, "loss": 1.2766, "step": 1697, "text_contrastive_loss": 0.6826 }, { "contrastive_loss": 0.4939, "epoch": 3.832957110609481, "grad_norm": 14.000205993652344, "learning_rate": 1.3422694917711276e-06, "lm_loss": 5.3961, "loss": 1.4655, "step": 1698, "text_contrastive_loss": 0.8639 }, { "contrastive_loss": 0.4755, "epoch": 3.835214446952596, "grad_norm": 14.355860710144043, "learning_rate": 1.3373266639075134e-06, "lm_loss": 5.4054, "loss": 1.445, "step": 1699, "text_contrastive_loss": 0.8579 }, { "contrastive_loss": 0.5761, "epoch": 3.837471783295711, "grad_norm": 16.03438377380371, "learning_rate": 1.3323915483108662e-06, "lm_loss": 5.4396, "loss": 1.4326, "step": 1700, "text_contrastive_loss": 0.6251 }, { "contrastive_loss": 0.4676, "epoch": 3.839729119638826, "grad_norm": 13.848464012145996, "learning_rate": 1.3274641553727568e-06, "lm_loss": 5.5739, "loss": 1.4439, "step": 1701, "text_contrastive_loss": 0.8377 }, { "contrastive_loss": 0.5247, "epoch": 3.8419864559819414, "grad_norm": 15.230380058288574, "learning_rate": 1.3225444954684962e-06, "lm_loss": 5.4977, "loss": 1.4957, "step": 1702, "text_contrastive_loss": 0.8424 }, { "contrastive_loss": 0.5274, "epoch": 3.8442437923250563, "grad_norm": 15.571111679077148, "learning_rate": 1.3176325789571075e-06, "lm_loss": 5.5268, "loss": 1.5071, "step": 1703, "text_contrastive_loss": 0.854 }, { "contrastive_loss": 0.4379, "epoch": 3.8465011286681716, "grad_norm": 13.863898277282715, "learning_rate": 1.3127284161813153e-06, "lm_loss": 5.4114, "loss": 1.294, "step": 1704, "text_contrastive_loss": 0.6299 }, { "contrastive_loss": 0.4852, "epoch": 3.8487584650112865, "grad_norm": 15.147299766540527, "learning_rate": 1.3078320174675141e-06, "lm_loss": 5.4927, "loss": 1.4492, "step": 1705, "text_contrastive_loss": 0.8295 }, { "contrastive_loss": 0.3782, "epoch": 3.851015801354402, "grad_norm": 12.823390007019043, "learning_rate": 1.3029433931257524e-06, "lm_loss": 5.4188, "loss": 1.2186, "step": 1706, "text_contrastive_loss": 0.5971 }, { "contrastive_loss": 0.4107, "epoch": 3.853273137697517, "grad_norm": 12.695087432861328, "learning_rate": 1.2980625534497037e-06, "lm_loss": 5.4325, "loss": 1.321, "step": 1707, "text_contrastive_loss": 0.7341 }, { "contrastive_loss": 0.3792, "epoch": 3.855530474040632, "grad_norm": 12.667981147766113, "learning_rate": 1.2931895087166551e-06, "lm_loss": 5.4338, "loss": 1.346, "step": 1708, "text_contrastive_loss": 0.8469 }, { "contrastive_loss": 0.442, "epoch": 3.8577878103837473, "grad_norm": 13.558056831359863, "learning_rate": 1.2883242691874792e-06, "lm_loss": 5.4484, "loss": 1.354, "step": 1709, "text_contrastive_loss": 0.7343 }, { "contrastive_loss": 0.511, "epoch": 3.860045146726862, "grad_norm": 14.185285568237305, "learning_rate": 1.2834668451066118e-06, "lm_loss": 5.4079, "loss": 1.4592, "step": 1710, "text_contrastive_loss": 0.8148 }, { "contrastive_loss": 0.4396, "epoch": 3.8623024830699775, "grad_norm": 13.763110160827637, "learning_rate": 1.2786172467020357e-06, "lm_loss": 5.4503, "loss": 1.323, "step": 1711, "text_contrastive_loss": 0.6768 }, { "contrastive_loss": 0.597, "epoch": 3.864559819413093, "grad_norm": 14.601582527160645, "learning_rate": 1.2737754841852501e-06, "lm_loss": 5.3727, "loss": 1.5593, "step": 1712, "text_contrastive_loss": 0.8499 }, { "contrastive_loss": 0.4817, "epoch": 3.8668171557562077, "grad_norm": 13.016423225402832, "learning_rate": 1.2689415677512574e-06, "lm_loss": 5.4965, "loss": 1.4231, "step": 1713, "text_contrastive_loss": 0.7835 }, { "contrastive_loss": 0.4452, "epoch": 3.8690744920993225, "grad_norm": 14.020245552062988, "learning_rate": 1.2641155075785444e-06, "lm_loss": 5.5019, "loss": 1.3958, "step": 1714, "text_contrastive_loss": 0.8008 }, { "contrastive_loss": 0.5186, "epoch": 3.871331828442438, "grad_norm": 14.728551864624023, "learning_rate": 1.259297313829046e-06, "lm_loss": 5.3166, "loss": 1.4605, "step": 1715, "text_contrastive_loss": 0.8205 }, { "contrastive_loss": 0.5546, "epoch": 3.873589164785553, "grad_norm": 15.889209747314453, "learning_rate": 1.2544869966481389e-06, "lm_loss": 5.3853, "loss": 1.4501, "step": 1716, "text_contrastive_loss": 0.714 }, { "contrastive_loss": 0.375, "epoch": 3.875846501128668, "grad_norm": 12.50887393951416, "learning_rate": 1.249684566164614e-06, "lm_loss": 5.2945, "loss": 1.2586, "step": 1717, "text_contrastive_loss": 0.7083 }, { "contrastive_loss": 0.5103, "epoch": 3.8781038374717833, "grad_norm": 14.486296653747559, "learning_rate": 1.2448900324906559e-06, "lm_loss": 5.3843, "loss": 1.5037, "step": 1718, "text_contrastive_loss": 0.91 }, { "contrastive_loss": 0.4905, "epoch": 3.880361173814898, "grad_norm": 14.827803611755371, "learning_rate": 1.2401034057218181e-06, "lm_loss": 5.3555, "loss": 1.4405, "step": 1719, "text_contrastive_loss": 0.8288 }, { "contrastive_loss": 0.524, "epoch": 3.8826185101580135, "grad_norm": 16.471458435058594, "learning_rate": 1.2353246959370086e-06, "lm_loss": 5.4732, "loss": 1.5214, "step": 1720, "text_contrastive_loss": 0.9001 }, { "contrastive_loss": 0.5092, "epoch": 3.884875846501129, "grad_norm": 16.233083724975586, "learning_rate": 1.2305539131984646e-06, "lm_loss": 5.4302, "loss": 1.4203, "step": 1721, "text_contrastive_loss": 0.7361 }, { "contrastive_loss": 0.4494, "epoch": 3.8871331828442437, "grad_norm": 16.18627166748047, "learning_rate": 1.2257910675517315e-06, "lm_loss": 5.3978, "loss": 1.3565, "step": 1722, "text_contrastive_loss": 0.7346 }, { "contrastive_loss": 0.4763, "epoch": 3.889390519187359, "grad_norm": 14.904261589050293, "learning_rate": 1.22103616902564e-06, "lm_loss": 5.4004, "loss": 1.4777, "step": 1723, "text_contrastive_loss": 0.9226 }, { "contrastive_loss": 0.5008, "epoch": 3.891647855530474, "grad_norm": 15.24119758605957, "learning_rate": 1.21628922763229e-06, "lm_loss": 5.5289, "loss": 1.4381, "step": 1724, "text_contrastive_loss": 0.7689 }, { "contrastive_loss": 0.4041, "epoch": 3.893905191873589, "grad_norm": 13.18508529663086, "learning_rate": 1.2115502533670253e-06, "lm_loss": 5.4764, "loss": 1.2925, "step": 1725, "text_contrastive_loss": 0.6815 }, { "contrastive_loss": 0.5578, "epoch": 3.8961625282167045, "grad_norm": 15.574223518371582, "learning_rate": 1.2068192562084146e-06, "lm_loss": 5.3439, "loss": 1.5831, "step": 1726, "text_contrastive_loss": 0.9817 }, { "contrastive_loss": 0.481, "epoch": 3.8984198645598194, "grad_norm": 13.740864753723145, "learning_rate": 1.2020962461182268e-06, "lm_loss": 5.3276, "loss": 1.3377, "step": 1727, "text_contrastive_loss": 0.6479 }, { "contrastive_loss": 0.3554, "epoch": 3.9006772009029347, "grad_norm": 13.950639724731445, "learning_rate": 1.1973812330414159e-06, "lm_loss": 5.3181, "loss": 1.1667, "step": 1728, "text_contrastive_loss": 0.559 }, { "contrastive_loss": 0.5262, "epoch": 3.9029345372460496, "grad_norm": 14.745194435119629, "learning_rate": 1.1926742269060965e-06, "lm_loss": 5.391, "loss": 1.4259, "step": 1729, "text_contrastive_loss": 0.7212 }, { "contrastive_loss": 0.4077, "epoch": 3.905191873589165, "grad_norm": 13.899352073669434, "learning_rate": 1.1879752376235231e-06, "lm_loss": 5.5298, "loss": 1.3059, "step": 1730, "text_contrastive_loss": 0.6904 }, { "contrastive_loss": 0.4328, "epoch": 3.90744920993228, "grad_norm": 13.672224044799805, "learning_rate": 1.1832842750880702e-06, "lm_loss": 5.425, "loss": 1.3616, "step": 1731, "text_contrastive_loss": 0.7726 }, { "contrastive_loss": 0.4109, "epoch": 3.909706546275395, "grad_norm": 13.264911651611328, "learning_rate": 1.1786013491772103e-06, "lm_loss": 5.3754, "loss": 1.3222, "step": 1732, "text_contrastive_loss": 0.7474 }, { "contrastive_loss": 0.4658, "epoch": 3.91196388261851, "grad_norm": 14.345077514648438, "learning_rate": 1.173926469751493e-06, "lm_loss": 5.3543, "loss": 1.426, "step": 1733, "text_contrastive_loss": 0.8496 }, { "contrastive_loss": 0.5206, "epoch": 3.9142212189616252, "grad_norm": 14.702255249023438, "learning_rate": 1.1692596466545275e-06, "lm_loss": 5.4292, "loss": 1.5241, "step": 1734, "text_contrastive_loss": 0.9211 }, { "contrastive_loss": 0.4251, "epoch": 3.9164785553047405, "grad_norm": 15.130386352539062, "learning_rate": 1.1646008897129546e-06, "lm_loss": 5.4999, "loss": 1.4109, "step": 1735, "text_contrastive_loss": 0.8716 }, { "contrastive_loss": 0.4766, "epoch": 3.9187358916478554, "grad_norm": 14.442639350891113, "learning_rate": 1.1599502087364345e-06, "lm_loss": 5.4516, "loss": 1.3841, "step": 1736, "text_contrastive_loss": 0.7246 }, { "contrastive_loss": 0.5367, "epoch": 3.9209932279909707, "grad_norm": 14.633401870727539, "learning_rate": 1.1553076135176222e-06, "lm_loss": 5.4943, "loss": 1.5827, "step": 1737, "text_contrastive_loss": 0.9931 }, { "contrastive_loss": 0.4812, "epoch": 3.9232505643340856, "grad_norm": 15.330426216125488, "learning_rate": 1.1506731138321474e-06, "lm_loss": 5.3996, "loss": 1.4129, "step": 1738, "text_contrastive_loss": 0.7835 }, { "contrastive_loss": 0.4586, "epoch": 3.925507900677201, "grad_norm": 14.180137634277344, "learning_rate": 1.1460467194385889e-06, "lm_loss": 5.5286, "loss": 1.4647, "step": 1739, "text_contrastive_loss": 0.9064 }, { "contrastive_loss": 0.5139, "epoch": 3.927765237020316, "grad_norm": 15.674978256225586, "learning_rate": 1.1414284400784643e-06, "lm_loss": 5.4489, "loss": 1.5258, "step": 1740, "text_contrastive_loss": 0.934 }, { "contrastive_loss": 0.4641, "epoch": 3.930022573363431, "grad_norm": 15.997124671936035, "learning_rate": 1.1368182854762005e-06, "lm_loss": 5.4158, "loss": 1.3675, "step": 1741, "text_contrastive_loss": 0.7236 }, { "contrastive_loss": 0.3942, "epoch": 3.9322799097065464, "grad_norm": 14.213823318481445, "learning_rate": 1.13221626533912e-06, "lm_loss": 5.4638, "loss": 1.2717, "step": 1742, "text_contrastive_loss": 0.6622 }, { "contrastive_loss": 0.4433, "epoch": 3.9345372460496613, "grad_norm": 15.289708137512207, "learning_rate": 1.1276223893574123e-06, "lm_loss": 5.4448, "loss": 1.3599, "step": 1743, "text_contrastive_loss": 0.7442 }, { "contrastive_loss": 0.3326, "epoch": 3.9367945823927766, "grad_norm": 11.817400932312012, "learning_rate": 1.1230366672041216e-06, "lm_loss": 5.3922, "loss": 1.2229, "step": 1744, "text_contrastive_loss": 0.7022 }, { "contrastive_loss": 0.4143, "epoch": 3.939051918735892, "grad_norm": 14.098214149475098, "learning_rate": 1.118459108535122e-06, "lm_loss": 5.3779, "loss": 1.324, "step": 1745, "text_contrastive_loss": 0.7438 }, { "contrastive_loss": 0.4169, "epoch": 3.9413092550790068, "grad_norm": 12.802600860595703, "learning_rate": 1.1138897229890995e-06, "lm_loss": 5.5226, "loss": 1.2836, "step": 1746, "text_contrastive_loss": 0.6289 }, { "contrastive_loss": 0.5109, "epoch": 3.9435665914221216, "grad_norm": 15.707575798034668, "learning_rate": 1.109328520187528e-06, "lm_loss": 5.4749, "loss": 1.5525, "step": 1747, "text_contrastive_loss": 0.9882 }, { "contrastive_loss": 0.4281, "epoch": 3.945823927765237, "grad_norm": 13.964192390441895, "learning_rate": 1.1047755097346541e-06, "lm_loss": 5.4598, "loss": 1.3548, "step": 1748, "text_contrastive_loss": 0.7615 }, { "contrastive_loss": 0.5166, "epoch": 3.9480812641083523, "grad_norm": 15.360565185546875, "learning_rate": 1.100230701217473e-06, "lm_loss": 5.3465, "loss": 1.5531, "step": 1749, "text_contrastive_loss": 1.0037 }, { "contrastive_loss": 0.4775, "epoch": 3.950338600451467, "grad_norm": 13.847414016723633, "learning_rate": 1.0956941042057106e-06, "lm_loss": 5.4561, "loss": 1.4327, "step": 1750, "text_contrastive_loss": 0.819 }, { "contrastive_loss": 0.505, "epoch": 3.9525959367945824, "grad_norm": 14.46722412109375, "learning_rate": 1.091165728251799e-06, "lm_loss": 5.526, "loss": 1.4573, "step": 1751, "text_contrastive_loss": 0.7993 }, { "contrastive_loss": 0.4729, "epoch": 3.9548532731376973, "grad_norm": 13.864908218383789, "learning_rate": 1.0866455828908634e-06, "lm_loss": 5.5209, "loss": 1.3877, "step": 1752, "text_contrastive_loss": 0.7255 }, { "contrastive_loss": 0.4516, "epoch": 3.9571106094808126, "grad_norm": 13.434297561645508, "learning_rate": 1.082133677640697e-06, "lm_loss": 5.6237, "loss": 1.3943, "step": 1753, "text_contrastive_loss": 0.7606 }, { "contrastive_loss": 0.5079, "epoch": 3.959367945823928, "grad_norm": 14.11734676361084, "learning_rate": 1.0776300220017437e-06, "lm_loss": 5.4648, "loss": 1.5113, "step": 1754, "text_contrastive_loss": 0.914 }, { "contrastive_loss": 0.5111, "epoch": 3.961625282167043, "grad_norm": 15.088600158691406, "learning_rate": 1.0731346254570735e-06, "lm_loss": 5.4019, "loss": 1.5318, "step": 1755, "text_contrastive_loss": 0.961 }, { "contrastive_loss": 0.4083, "epoch": 3.963882618510158, "grad_norm": 12.624308586120605, "learning_rate": 1.068647497472368e-06, "lm_loss": 5.3553, "loss": 1.3268, "step": 1756, "text_contrastive_loss": 0.7659 }, { "contrastive_loss": 0.5247, "epoch": 3.966139954853273, "grad_norm": 15.862508773803711, "learning_rate": 1.064168647495899e-06, "lm_loss": 5.4139, "loss": 1.4569, "step": 1757, "text_contrastive_loss": 0.7817 }, { "contrastive_loss": 0.5116, "epoch": 3.9683972911963883, "grad_norm": 16.503145217895508, "learning_rate": 1.0596980849585065e-06, "lm_loss": 5.4638, "loss": 1.4759, "step": 1758, "text_contrastive_loss": 0.8359 }, { "contrastive_loss": 0.4919, "epoch": 3.9706546275395036, "grad_norm": 16.222732543945312, "learning_rate": 1.0552358192735784e-06, "lm_loss": 5.3278, "loss": 1.4059, "step": 1759, "text_contrastive_loss": 0.7624 }, { "contrastive_loss": 0.5374, "epoch": 3.9729119638826185, "grad_norm": 15.1146240234375, "learning_rate": 1.0507818598370355e-06, "lm_loss": 5.5162, "loss": 1.4554, "step": 1760, "text_contrastive_loss": 0.7328 }, { "contrastive_loss": 0.4234, "epoch": 3.975169300225734, "grad_norm": 13.736013412475586, "learning_rate": 1.0463362160273076e-06, "lm_loss": 5.3969, "loss": 1.3239, "step": 1761, "text_contrastive_loss": 0.7215 }, { "contrastive_loss": 0.504, "epoch": 3.9774266365688487, "grad_norm": 21.926189422607422, "learning_rate": 1.0418988972053162e-06, "lm_loss": 5.4649, "loss": 1.4763, "step": 1762, "text_contrastive_loss": 0.8517 }, { "contrastive_loss": 0.4926, "epoch": 3.979683972911964, "grad_norm": 16.662925720214844, "learning_rate": 1.037469912714449e-06, "lm_loss": 5.4606, "loss": 1.4339, "step": 1763, "text_contrastive_loss": 0.7906 }, { "contrastive_loss": 0.4355, "epoch": 3.9819413092550793, "grad_norm": 14.614496231079102, "learning_rate": 1.0330492718805469e-06, "lm_loss": 5.4014, "loss": 1.3031, "step": 1764, "text_contrastive_loss": 0.6549 }, { "contrastive_loss": 0.4375, "epoch": 3.984198645598194, "grad_norm": 14.14395809173584, "learning_rate": 1.0286369840118859e-06, "lm_loss": 5.3914, "loss": 1.374, "step": 1765, "text_contrastive_loss": 0.7948 }, { "contrastive_loss": 0.5569, "epoch": 3.986455981941309, "grad_norm": 15.8093843460083, "learning_rate": 1.0242330583991507e-06, "lm_loss": 5.2657, "loss": 1.5593, "step": 1766, "text_contrastive_loss": 0.9518 }, { "contrastive_loss": 0.4489, "epoch": 3.9887133182844243, "grad_norm": 15.425673484802246, "learning_rate": 1.0198375043154142e-06, "lm_loss": 5.4266, "loss": 1.376, "step": 1767, "text_contrastive_loss": 0.7689 }, { "contrastive_loss": 0.4843, "epoch": 3.9909706546275396, "grad_norm": 15.343976974487305, "learning_rate": 1.0154503310161269e-06, "lm_loss": 5.4899, "loss": 1.4064, "step": 1768, "text_contrastive_loss": 0.7462 }, { "contrastive_loss": 0.4129, "epoch": 3.9932279909706545, "grad_norm": 13.66511344909668, "learning_rate": 1.0110715477390915e-06, "lm_loss": 5.4581, "loss": 1.349, "step": 1769, "text_contrastive_loss": 0.7806 }, { "contrastive_loss": 0.4695, "epoch": 3.99548532731377, "grad_norm": 15.749361038208008, "learning_rate": 1.006701163704445e-06, "lm_loss": 5.4947, "loss": 1.4914, "step": 1770, "text_contrastive_loss": 0.9449 }, { "contrastive_loss": 0.3889, "epoch": 3.9977426636568847, "grad_norm": 13.284844398498535, "learning_rate": 1.0023391881146349e-06, "lm_loss": 5.504, "loss": 1.3642, "step": 1771, "text_contrastive_loss": 0.8498 }, { "contrastive_loss": 0.3234, "epoch": 4.0, "grad_norm": 17.181509017944336, "learning_rate": 9.97985630154407e-07, "lm_loss": 5.4794, "loss": 1.1494, "step": 1772, "text_contrastive_loss": 0.5561 }, { "contrastive_loss": 0.4377, "epoch": 4.002257336343115, "grad_norm": 13.104743003845215, "learning_rate": 9.936404989907828e-07, "lm_loss": 5.4367, "loss": 1.3376, "step": 1773, "text_contrastive_loss": 0.7125 }, { "contrastive_loss": 0.5083, "epoch": 4.004514672686231, "grad_norm": 15.707433700561523, "learning_rate": 9.89303803773039e-07, "lm_loss": 5.4245, "loss": 1.4998, "step": 1774, "text_contrastive_loss": 0.8981 }, { "contrastive_loss": 0.4177, "epoch": 4.006772009029345, "grad_norm": 14.663115501403809, "learning_rate": 9.849755536326866e-07, "lm_loss": 5.3359, "loss": 1.3495, "step": 1775, "text_contrastive_loss": 0.7963 }, { "contrastive_loss": 0.4854, "epoch": 4.00902934537246, "grad_norm": 15.18082046508789, "learning_rate": 9.806557576834591e-07, "lm_loss": 5.4073, "loss": 1.4111, "step": 1776, "text_contrastive_loss": 0.77 }, { "contrastive_loss": 0.471, "epoch": 4.011286681715576, "grad_norm": 14.365391731262207, "learning_rate": 9.763444250212855e-07, "lm_loss": 5.4356, "loss": 1.4296, "step": 1777, "text_contrastive_loss": 0.8301 }, { "contrastive_loss": 0.4621, "epoch": 4.013544018058691, "grad_norm": 13.901593208312988, "learning_rate": 9.72041564724277e-07, "lm_loss": 5.5117, "loss": 1.4068, "step": 1778, "text_contrastive_loss": 0.7871 }, { "contrastive_loss": 0.4065, "epoch": 4.015801354401806, "grad_norm": 11.42273235321045, "learning_rate": 9.677471858526998e-07, "lm_loss": 5.5015, "loss": 1.3673, "step": 1779, "text_contrastive_loss": 0.8212 }, { "contrastive_loss": 0.4544, "epoch": 4.018058690744921, "grad_norm": 13.887290000915527, "learning_rate": 9.63461297448966e-07, "lm_loss": 5.3918, "loss": 1.3662, "step": 1780, "text_contrastive_loss": 0.7452 }, { "contrastive_loss": 0.5371, "epoch": 4.020316027088036, "grad_norm": 15.941572189331055, "learning_rate": 9.59183908537607e-07, "lm_loss": 5.4437, "loss": 1.5493, "step": 1781, "text_contrastive_loss": 0.9356 }, { "contrastive_loss": 0.4149, "epoch": 4.022573363431151, "grad_norm": 15.179197311401367, "learning_rate": 9.549150281252633e-07, "lm_loss": 5.5243, "loss": 1.3193, "step": 1782, "text_contrastive_loss": 0.7039 }, { "contrastive_loss": 0.4376, "epoch": 4.024830699774267, "grad_norm": 14.96658706665039, "learning_rate": 9.506546652006504e-07, "lm_loss": 5.4864, "loss": 1.3762, "step": 1783, "text_contrastive_loss": 0.78 }, { "contrastive_loss": 0.4023, "epoch": 4.027088036117381, "grad_norm": 14.274824142456055, "learning_rate": 9.464028287345551e-07, "lm_loss": 5.4383, "loss": 1.32, "step": 1784, "text_contrastive_loss": 0.7477 }, { "contrastive_loss": 0.4299, "epoch": 4.029345372460496, "grad_norm": 13.89789867401123, "learning_rate": 9.421595276798084e-07, "lm_loss": 5.5811, "loss": 1.4355, "step": 1785, "text_contrastive_loss": 0.895 }, { "contrastive_loss": 0.4944, "epoch": 4.031602708803612, "grad_norm": 15.849641799926758, "learning_rate": 9.379247709712725e-07, "lm_loss": 5.4862, "loss": 1.5002, "step": 1786, "text_contrastive_loss": 0.9144 }, { "contrastive_loss": 0.5184, "epoch": 4.033860045146727, "grad_norm": 13.822864532470703, "learning_rate": 9.336985675258109e-07, "lm_loss": 5.3553, "loss": 1.5108, "step": 1787, "text_contrastive_loss": 0.9138 }, { "contrastive_loss": 0.4489, "epoch": 4.036117381489842, "grad_norm": 13.175774574279785, "learning_rate": 9.294809262422838e-07, "lm_loss": 5.5385, "loss": 1.392, "step": 1788, "text_contrastive_loss": 0.7784 }, { "contrastive_loss": 0.4705, "epoch": 4.038374717832957, "grad_norm": 15.42026138305664, "learning_rate": 9.2527185600152e-07, "lm_loss": 5.3664, "loss": 1.3918, "step": 1789, "text_contrastive_loss": 0.7694 }, { "contrastive_loss": 0.376, "epoch": 4.040632054176072, "grad_norm": 12.850754737854004, "learning_rate": 9.210713656663023e-07, "lm_loss": 5.3777, "loss": 1.3102, "step": 1790, "text_contrastive_loss": 0.7928 }, { "contrastive_loss": 0.417, "epoch": 4.042889390519187, "grad_norm": 14.971044540405273, "learning_rate": 9.168794640813428e-07, "lm_loss": 5.3417, "loss": 1.2681, "step": 1791, "text_contrastive_loss": 0.6338 }, { "contrastive_loss": 0.4723, "epoch": 4.045146726862303, "grad_norm": 14.143229484558105, "learning_rate": 9.126961600732742e-07, "lm_loss": 5.4334, "loss": 1.3816, "step": 1792, "text_contrastive_loss": 0.7319 }, { "contrastive_loss": 0.4157, "epoch": 4.047404063205418, "grad_norm": 12.646199226379395, "learning_rate": 9.085214624506228e-07, "lm_loss": 5.5195, "loss": 1.3676, "step": 1793, "text_contrastive_loss": 0.8 }, { "contrastive_loss": 0.4584, "epoch": 4.049661399548532, "grad_norm": 13.476517677307129, "learning_rate": 9.043553800037952e-07, "lm_loss": 5.4054, "loss": 1.3187, "step": 1794, "text_contrastive_loss": 0.6395 }, { "contrastive_loss": 0.489, "epoch": 4.051918735891648, "grad_norm": 15.695509910583496, "learning_rate": 9.001979215050544e-07, "lm_loss": 5.417, "loss": 1.4629, "step": 1795, "text_contrastive_loss": 0.8644 }, { "contrastive_loss": 0.4725, "epoch": 4.054176072234763, "grad_norm": 14.158413887023926, "learning_rate": 8.960490957085061e-07, "lm_loss": 5.2968, "loss": 1.3294, "step": 1796, "text_contrastive_loss": 0.6545 }, { "contrastive_loss": 0.5789, "epoch": 4.056433408577878, "grad_norm": 16.090099334716797, "learning_rate": 8.919089113500795e-07, "lm_loss": 5.4087, "loss": 1.5167, "step": 1797, "text_contrastive_loss": 0.794 }, { "contrastive_loss": 0.5213, "epoch": 4.058690744920993, "grad_norm": 14.36949348449707, "learning_rate": 8.877773771475074e-07, "lm_loss": 5.5536, "loss": 1.453, "step": 1798, "text_contrastive_loss": 0.7525 }, { "contrastive_loss": 0.4, "epoch": 4.060948081264108, "grad_norm": 12.922706604003906, "learning_rate": 8.836545018003084e-07, "lm_loss": 5.5052, "loss": 1.3011, "step": 1799, "text_contrastive_loss": 0.7011 }, { "contrastive_loss": 0.4973, "epoch": 4.063205417607223, "grad_norm": 14.795793533325195, "learning_rate": 8.795402939897679e-07, "lm_loss": 5.4265, "loss": 1.4791, "step": 1800, "text_contrastive_loss": 0.8782 }, { "contrastive_loss": 0.4619, "epoch": 4.065462753950339, "grad_norm": 14.072027206420898, "learning_rate": 8.754347623789222e-07, "lm_loss": 5.4428, "loss": 1.4786, "step": 1801, "text_contrastive_loss": 0.9448 }, { "contrastive_loss": 0.3627, "epoch": 4.067720090293454, "grad_norm": 13.702142715454102, "learning_rate": 8.713379156125385e-07, "lm_loss": 5.537, "loss": 1.2641, "step": 1802, "text_contrastive_loss": 0.6955 }, { "contrastive_loss": 0.5504, "epoch": 4.0699774266365685, "grad_norm": 15.301467895507812, "learning_rate": 8.672497623170944e-07, "lm_loss": 5.341, "loss": 1.4963, "step": 1803, "text_contrastive_loss": 0.8237 }, { "contrastive_loss": 0.3903, "epoch": 4.072234762979684, "grad_norm": 13.139961242675781, "learning_rate": 8.631703111007645e-07, "lm_loss": 5.4437, "loss": 1.2991, "step": 1804, "text_contrastive_loss": 0.7289 }, { "contrastive_loss": 0.4969, "epoch": 4.074492099322799, "grad_norm": 13.641329765319824, "learning_rate": 8.590995705533994e-07, "lm_loss": 5.458, "loss": 1.4525, "step": 1805, "text_contrastive_loss": 0.8197 }, { "contrastive_loss": 0.4982, "epoch": 4.076749435665914, "grad_norm": 13.534671783447266, "learning_rate": 8.550375492465102e-07, "lm_loss": 5.5236, "loss": 1.384, "step": 1806, "text_contrastive_loss": 0.6671 }, { "contrastive_loss": 0.4624, "epoch": 4.07900677200903, "grad_norm": 13.669743537902832, "learning_rate": 8.509842557332437e-07, "lm_loss": 5.5115, "loss": 1.4749, "step": 1807, "text_contrastive_loss": 0.9227 }, { "contrastive_loss": 0.4852, "epoch": 4.081264108352144, "grad_norm": 16.874771118164062, "learning_rate": 8.469396985483724e-07, "lm_loss": 5.3365, "loss": 1.4552, "step": 1808, "text_contrastive_loss": 0.8728 }, { "contrastive_loss": 0.5253, "epoch": 4.0835214446952595, "grad_norm": 15.528621673583984, "learning_rate": 8.429038862082734e-07, "lm_loss": 5.52, "loss": 1.5023, "step": 1809, "text_contrastive_loss": 0.8501 }, { "contrastive_loss": 0.409, "epoch": 4.085778781038375, "grad_norm": 13.915301322937012, "learning_rate": 8.388768272109105e-07, "lm_loss": 5.4367, "loss": 1.3735, "step": 1810, "text_contrastive_loss": 0.8417 }, { "contrastive_loss": 0.4184, "epoch": 4.08803611738149, "grad_norm": 14.407670974731445, "learning_rate": 8.34858530035813e-07, "lm_loss": 5.4788, "loss": 1.2872, "step": 1811, "text_contrastive_loss": 0.6419 }, { "contrastive_loss": 0.6038, "epoch": 4.090293453724605, "grad_norm": 16.431411743164062, "learning_rate": 8.308490031440641e-07, "lm_loss": 5.4824, "loss": 1.6084, "step": 1812, "text_contrastive_loss": 0.9129 }, { "contrastive_loss": 0.3561, "epoch": 4.09255079006772, "grad_norm": 13.252738952636719, "learning_rate": 8.268482549782797e-07, "lm_loss": 5.4143, "loss": 1.3345, "step": 1813, "text_contrastive_loss": 0.874 }, { "contrastive_loss": 0.4037, "epoch": 4.094808126410835, "grad_norm": 14.096956253051758, "learning_rate": 8.228562939625906e-07, "lm_loss": 5.3922, "loss": 1.2912, "step": 1814, "text_contrastive_loss": 0.6966 }, { "contrastive_loss": 0.4045, "epoch": 4.0970654627539504, "grad_norm": 13.017566680908203, "learning_rate": 8.188731285026219e-07, "lm_loss": 5.3951, "loss": 1.3021, "step": 1815, "text_contrastive_loss": 0.7161 }, { "contrastive_loss": 0.5328, "epoch": 4.099322799097066, "grad_norm": 14.632208824157715, "learning_rate": 8.148987669854846e-07, "lm_loss": 5.4135, "loss": 1.4236, "step": 1816, "text_contrastive_loss": 0.699 }, { "contrastive_loss": 0.4509, "epoch": 4.10158013544018, "grad_norm": 13.238189697265625, "learning_rate": 8.109332177797469e-07, "lm_loss": 5.553, "loss": 1.4275, "step": 1817, "text_contrastive_loss": 0.8426 }, { "contrastive_loss": 0.4242, "epoch": 4.1038374717832955, "grad_norm": 14.228952407836914, "learning_rate": 8.069764892354237e-07, "lm_loss": 5.3318, "loss": 1.4155, "step": 1818, "text_contrastive_loss": 0.9163 }, { "contrastive_loss": 0.4922, "epoch": 4.106094808126411, "grad_norm": 14.51209831237793, "learning_rate": 8.030285896839546e-07, "lm_loss": 5.4793, "loss": 1.4443, "step": 1819, "text_contrastive_loss": 0.8084 }, { "contrastive_loss": 0.5215, "epoch": 4.108352144469526, "grad_norm": 16.435653686523438, "learning_rate": 7.99089527438191e-07, "lm_loss": 5.3626, "loss": 1.461, "step": 1820, "text_contrastive_loss": 0.8065 }, { "contrastive_loss": 0.4164, "epoch": 4.110609480812641, "grad_norm": 14.234169960021973, "learning_rate": 7.951593107923744e-07, "lm_loss": 5.4942, "loss": 1.3652, "step": 1821, "text_contrastive_loss": 0.7987 }, { "contrastive_loss": 0.4092, "epoch": 4.112866817155756, "grad_norm": 13.296266555786133, "learning_rate": 7.912379480221228e-07, "lm_loss": 5.3685, "loss": 1.3136, "step": 1822, "text_contrastive_loss": 0.7351 }, { "contrastive_loss": 0.4893, "epoch": 4.115124153498871, "grad_norm": 15.535466194152832, "learning_rate": 7.873254473844077e-07, "lm_loss": 5.4085, "loss": 1.5374, "step": 1823, "text_contrastive_loss": 1.0144 }, { "contrastive_loss": 0.4283, "epoch": 4.1173814898419865, "grad_norm": 12.822464942932129, "learning_rate": 7.834218171175428e-07, "lm_loss": 5.3586, "loss": 1.2899, "step": 1824, "text_contrastive_loss": 0.6514 }, { "contrastive_loss": 0.5152, "epoch": 4.119638826185102, "grad_norm": 14.885557174682617, "learning_rate": 7.795270654411635e-07, "lm_loss": 5.4649, "loss": 1.4202, "step": 1825, "text_contrastive_loss": 0.7169 }, { "contrastive_loss": 0.501, "epoch": 4.121896162528217, "grad_norm": 14.53622817993164, "learning_rate": 7.756412005562114e-07, "lm_loss": 5.4078, "loss": 1.4516, "step": 1826, "text_contrastive_loss": 0.8195 }, { "contrastive_loss": 0.4134, "epoch": 4.1241534988713315, "grad_norm": 13.508371353149414, "learning_rate": 7.717642306449113e-07, "lm_loss": 5.4248, "loss": 1.3355, "step": 1827, "text_contrastive_loss": 0.7593 }, { "contrastive_loss": 0.521, "epoch": 4.126410835214447, "grad_norm": 15.592608451843262, "learning_rate": 7.678961638707633e-07, "lm_loss": 5.4752, "loss": 1.4903, "step": 1828, "text_contrastive_loss": 0.8436 }, { "contrastive_loss": 0.4885, "epoch": 4.128668171557562, "grad_norm": 14.605595588684082, "learning_rate": 7.640370083785175e-07, "lm_loss": 5.3554, "loss": 1.4552, "step": 1829, "text_contrastive_loss": 0.8622 }, { "contrastive_loss": 0.454, "epoch": 4.1309255079006775, "grad_norm": 15.124018669128418, "learning_rate": 7.601867722941642e-07, "lm_loss": 5.5599, "loss": 1.4615, "step": 1830, "text_contrastive_loss": 0.903 }, { "contrastive_loss": 0.3847, "epoch": 4.133182844243792, "grad_norm": 12.90140438079834, "learning_rate": 7.563454637249056e-07, "lm_loss": 5.6135, "loss": 1.352, "step": 1831, "text_contrastive_loss": 0.8119 }, { "contrastive_loss": 0.4153, "epoch": 4.135440180586907, "grad_norm": 13.231061935424805, "learning_rate": 7.52513090759151e-07, "lm_loss": 5.419, "loss": 1.3291, "step": 1832, "text_contrastive_loss": 0.7439 }, { "contrastive_loss": 0.4059, "epoch": 4.1376975169300225, "grad_norm": 12.05694580078125, "learning_rate": 7.486896614664962e-07, "lm_loss": 5.5334, "loss": 1.3346, "step": 1833, "text_contrastive_loss": 0.7508 }, { "contrastive_loss": 0.3801, "epoch": 4.139954853273138, "grad_norm": 11.814329147338867, "learning_rate": 7.448751838977014e-07, "lm_loss": 5.4661, "loss": 1.2422, "step": 1834, "text_contrastive_loss": 0.6309 }, { "contrastive_loss": 0.4647, "epoch": 4.142212189616253, "grad_norm": 14.641359329223633, "learning_rate": 7.410696660846761e-07, "lm_loss": 5.5061, "loss": 1.4345, "step": 1835, "text_contrastive_loss": 0.8383 }, { "contrastive_loss": 0.4706, "epoch": 4.144469525959368, "grad_norm": 14.823846817016602, "learning_rate": 7.372731160404672e-07, "lm_loss": 5.3886, "loss": 1.3952, "step": 1836, "text_contrastive_loss": 0.7714 }, { "contrastive_loss": 0.4976, "epoch": 4.146726862302483, "grad_norm": 14.605664253234863, "learning_rate": 7.334855417592385e-07, "lm_loss": 5.41, "loss": 1.4627, "step": 1837, "text_contrastive_loss": 0.8481 }, { "contrastive_loss": 0.4269, "epoch": 4.148984198645598, "grad_norm": 13.060043334960938, "learning_rate": 7.297069512162535e-07, "lm_loss": 5.5036, "loss": 1.3205, "step": 1838, "text_contrastive_loss": 0.6865 }, { "contrastive_loss": 0.485, "epoch": 4.1512415349887135, "grad_norm": 15.681782722473145, "learning_rate": 7.25937352367857e-07, "lm_loss": 5.4305, "loss": 1.4929, "step": 1839, "text_contrastive_loss": 0.9297 }, { "contrastive_loss": 0.4293, "epoch": 4.153498871331829, "grad_norm": 13.646442413330078, "learning_rate": 7.22176753151464e-07, "lm_loss": 5.515, "loss": 1.3904, "step": 1840, "text_contrastive_loss": 0.8192 }, { "contrastive_loss": 0.3938, "epoch": 4.155756207674943, "grad_norm": 13.667914390563965, "learning_rate": 7.184251614855369e-07, "lm_loss": 5.5125, "loss": 1.319, "step": 1841, "text_contrastive_loss": 0.7477 }, { "contrastive_loss": 0.4512, "epoch": 4.158013544018059, "grad_norm": 15.908001899719238, "learning_rate": 7.146825852695749e-07, "lm_loss": 5.4195, "loss": 1.4085, "step": 1842, "text_contrastive_loss": 0.8306 }, { "contrastive_loss": 0.4323, "epoch": 4.160270880361174, "grad_norm": 13.84862232208252, "learning_rate": 7.109490323840884e-07, "lm_loss": 5.4441, "loss": 1.35, "step": 1843, "text_contrastive_loss": 0.7466 }, { "contrastive_loss": 0.3973, "epoch": 4.162528216704289, "grad_norm": 13.460807800292969, "learning_rate": 7.072245106905928e-07, "lm_loss": 5.5048, "loss": 1.3119, "step": 1844, "text_contrastive_loss": 0.7281 }, { "contrastive_loss": 0.3929, "epoch": 4.164785553047404, "grad_norm": 12.910388946533203, "learning_rate": 7.035090280315854e-07, "lm_loss": 5.4203, "loss": 1.3607, "step": 1845, "text_contrastive_loss": 0.8514 }, { "contrastive_loss": 0.4592, "epoch": 4.167042889390519, "grad_norm": 13.615018844604492, "learning_rate": 6.998025922305313e-07, "lm_loss": 5.4446, "loss": 1.3625, "step": 1846, "text_contrastive_loss": 0.7177 }, { "contrastive_loss": 0.4183, "epoch": 4.169300225733634, "grad_norm": 12.962539672851562, "learning_rate": 6.961052110918432e-07, "lm_loss": 5.385, "loss": 1.3024, "step": 1847, "text_contrastive_loss": 0.6911 }, { "contrastive_loss": 0.3318, "epoch": 4.1715575620767495, "grad_norm": 13.18665885925293, "learning_rate": 6.924168924008712e-07, "lm_loss": 5.3562, "loss": 1.2534, "step": 1848, "text_contrastive_loss": 0.772 }, { "contrastive_loss": 0.4751, "epoch": 4.173814898419865, "grad_norm": 16.380186080932617, "learning_rate": 6.887376439238813e-07, "lm_loss": 5.4731, "loss": 1.4056, "step": 1849, "text_contrastive_loss": 0.7664 }, { "contrastive_loss": 0.3609, "epoch": 4.176072234762979, "grad_norm": 13.097086906433105, "learning_rate": 6.850674734080454e-07, "lm_loss": 5.2816, "loss": 1.2457, "step": 1850, "text_contrastive_loss": 0.7133 }, { "contrastive_loss": 0.4522, "epoch": 4.178329571106095, "grad_norm": 14.39322280883789, "learning_rate": 6.814063885814127e-07, "lm_loss": 5.4431, "loss": 1.3514, "step": 1851, "text_contrastive_loss": 0.7098 }, { "contrastive_loss": 0.4811, "epoch": 4.18058690744921, "grad_norm": 14.79831600189209, "learning_rate": 6.77754397152906e-07, "lm_loss": 5.3433, "loss": 1.387, "step": 1852, "text_contrastive_loss": 0.7431 }, { "contrastive_loss": 0.5088, "epoch": 4.182844243792325, "grad_norm": 14.93602466583252, "learning_rate": 6.741115068123017e-07, "lm_loss": 5.4505, "loss": 1.499, "step": 1853, "text_contrastive_loss": 0.8904 }, { "contrastive_loss": 0.4484, "epoch": 4.1851015801354405, "grad_norm": 16.18085289001465, "learning_rate": 6.704777252302108e-07, "lm_loss": 5.3618, "loss": 1.3419, "step": 1854, "text_contrastive_loss": 0.7147 }, { "contrastive_loss": 0.3328, "epoch": 4.187358916478555, "grad_norm": 12.081189155578613, "learning_rate": 6.66853060058063e-07, "lm_loss": 5.4285, "loss": 1.189, "step": 1855, "text_contrastive_loss": 0.6266 }, { "contrastive_loss": 0.371, "epoch": 4.18961625282167, "grad_norm": 12.349919319152832, "learning_rate": 6.632375189280948e-07, "lm_loss": 5.4435, "loss": 1.2288, "step": 1856, "text_contrastive_loss": 0.6268 }, { "contrastive_loss": 0.4886, "epoch": 4.191873589164786, "grad_norm": 15.420004844665527, "learning_rate": 6.596311094533292e-07, "lm_loss": 5.4754, "loss": 1.5055, "step": 1857, "text_contrastive_loss": 0.9387 }, { "contrastive_loss": 0.4205, "epoch": 4.194130925507901, "grad_norm": 13.639395713806152, "learning_rate": 6.56033839227564e-07, "lm_loss": 5.4007, "loss": 1.2925, "step": 1858, "text_contrastive_loss": 0.6639 }, { "contrastive_loss": 0.4923, "epoch": 4.196388261851016, "grad_norm": 14.98202133178711, "learning_rate": 6.524457158253472e-07, "lm_loss": 5.397, "loss": 1.4891, "step": 1859, "text_contrastive_loss": 0.9143 }, { "contrastive_loss": 0.5228, "epoch": 4.198645598194131, "grad_norm": 15.258172988891602, "learning_rate": 6.488667468019727e-07, "lm_loss": 5.4108, "loss": 1.4718, "step": 1860, "text_contrastive_loss": 0.8158 }, { "contrastive_loss": 0.3381, "epoch": 4.200902934537246, "grad_norm": 11.965526580810547, "learning_rate": 6.452969396934567e-07, "lm_loss": 5.6014, "loss": 1.2549, "step": 1861, "text_contrastive_loss": 0.7134 }, { "contrastive_loss": 0.33, "epoch": 4.203160270880361, "grad_norm": 12.12585735321045, "learning_rate": 6.417363020165235e-07, "lm_loss": 5.3321, "loss": 1.2319, "step": 1862, "text_contrastive_loss": 0.7373 }, { "contrastive_loss": 0.361, "epoch": 4.205417607223477, "grad_norm": 11.505969047546387, "learning_rate": 6.381848412685882e-07, "lm_loss": 5.4543, "loss": 1.3033, "step": 1863, "text_contrastive_loss": 0.7938 }, { "contrastive_loss": 0.4826, "epoch": 4.207674943566591, "grad_norm": 15.115910530090332, "learning_rate": 6.346425649277454e-07, "lm_loss": 5.3651, "loss": 1.4377, "step": 1864, "text_contrastive_loss": 0.8371 }, { "contrastive_loss": 0.5046, "epoch": 4.209932279909706, "grad_norm": 17.10780143737793, "learning_rate": 6.31109480452749e-07, "lm_loss": 5.4801, "loss": 1.4573, "step": 1865, "text_contrastive_loss": 0.8095 }, { "contrastive_loss": 0.4326, "epoch": 4.212189616252822, "grad_norm": 13.92415714263916, "learning_rate": 6.275855952829995e-07, "lm_loss": 5.4767, "loss": 1.3341, "step": 1866, "text_contrastive_loss": 0.7076 }, { "contrastive_loss": 0.4305, "epoch": 4.214446952595937, "grad_norm": 13.554123878479004, "learning_rate": 6.240709168385251e-07, "lm_loss": 5.3376, "loss": 1.3276, "step": 1867, "text_contrastive_loss": 0.7266 }, { "contrastive_loss": 0.4668, "epoch": 4.216704288939052, "grad_norm": 14.565821647644043, "learning_rate": 6.2056545251997e-07, "lm_loss": 5.4353, "loss": 1.3705, "step": 1868, "text_contrastive_loss": 0.7203 }, { "contrastive_loss": 0.5562, "epoch": 4.218961625282167, "grad_norm": 17.39376449584961, "learning_rate": 6.170692097085751e-07, "lm_loss": 5.4903, "loss": 1.5995, "step": 1869, "text_contrastive_loss": 0.9887 }, { "contrastive_loss": 0.3762, "epoch": 4.221218961625282, "grad_norm": 13.919958114624023, "learning_rate": 6.135821957661658e-07, "lm_loss": 5.5311, "loss": 1.2931, "step": 1870, "text_contrastive_loss": 0.7276 }, { "contrastive_loss": 0.4136, "epoch": 4.223476297968397, "grad_norm": 14.79681396484375, "learning_rate": 6.101044180351318e-07, "lm_loss": 5.4694, "loss": 1.3332, "step": 1871, "text_contrastive_loss": 0.7453 }, { "contrastive_loss": 0.4591, "epoch": 4.225733634311513, "grad_norm": 14.416868209838867, "learning_rate": 6.066358838384184e-07, "lm_loss": 5.4351, "loss": 1.3984, "step": 1872, "text_contrastive_loss": 0.7916 }, { "contrastive_loss": 0.4307, "epoch": 4.227990970654628, "grad_norm": 14.388350486755371, "learning_rate": 6.031766004795047e-07, "lm_loss": 5.4554, "loss": 1.3488, "step": 1873, "text_contrastive_loss": 0.7452 }, { "contrastive_loss": 0.4374, "epoch": 4.230248306997742, "grad_norm": 14.363672256469727, "learning_rate": 5.997265752423936e-07, "lm_loss": 5.463, "loss": 1.3909, "step": 1874, "text_contrastive_loss": 0.8144 }, { "contrastive_loss": 0.4589, "epoch": 4.232505643340858, "grad_norm": 13.792438507080078, "learning_rate": 5.962858153915896e-07, "lm_loss": 5.5655, "loss": 1.345, "step": 1875, "text_contrastive_loss": 0.659 }, { "contrastive_loss": 0.5754, "epoch": 4.234762979683973, "grad_norm": 16.32940673828125, "learning_rate": 5.928543281720917e-07, "lm_loss": 5.4759, "loss": 1.5221, "step": 1876, "text_contrastive_loss": 0.7982 }, { "contrastive_loss": 0.4951, "epoch": 4.237020316027088, "grad_norm": 15.022712707519531, "learning_rate": 5.894321208093712e-07, "lm_loss": 5.4656, "loss": 1.4397, "step": 1877, "text_contrastive_loss": 0.796 }, { "contrastive_loss": 0.4523, "epoch": 4.239277652370204, "grad_norm": 13.635482788085938, "learning_rate": 5.860192005093624e-07, "lm_loss": 5.4288, "loss": 1.3612, "step": 1878, "text_contrastive_loss": 0.7321 }, { "contrastive_loss": 0.4383, "epoch": 4.241534988713318, "grad_norm": 15.492339134216309, "learning_rate": 5.826155744584405e-07, "lm_loss": 5.3379, "loss": 1.3359, "step": 1879, "text_contrastive_loss": 0.7277 }, { "contrastive_loss": 0.3843, "epoch": 4.243792325056433, "grad_norm": 12.946342468261719, "learning_rate": 5.792212498234134e-07, "lm_loss": 5.39, "loss": 1.3303, "step": 1880, "text_contrastive_loss": 0.8139 }, { "contrastive_loss": 0.3976, "epoch": 4.246049661399549, "grad_norm": 13.186134338378906, "learning_rate": 5.758362337515028e-07, "lm_loss": 5.3961, "loss": 1.2939, "step": 1881, "text_contrastive_loss": 0.7133 }, { "contrastive_loss": 0.4201, "epoch": 4.248306997742664, "grad_norm": 13.884965896606445, "learning_rate": 5.724605333703303e-07, "lm_loss": 5.5192, "loss": 1.4111, "step": 1882, "text_contrastive_loss": 0.878 }, { "contrastive_loss": 0.395, "epoch": 4.250564334085778, "grad_norm": 13.937795639038086, "learning_rate": 5.690941557878988e-07, "lm_loss": 5.4367, "loss": 1.3737, "step": 1883, "text_contrastive_loss": 0.87 }, { "contrastive_loss": 0.3783, "epoch": 4.252821670428894, "grad_norm": 12.562926292419434, "learning_rate": 5.657371080925866e-07, "lm_loss": 5.4335, "loss": 1.2505, "step": 1884, "text_contrastive_loss": 0.6577 }, { "contrastive_loss": 0.4003, "epoch": 4.255079006772009, "grad_norm": 13.565262794494629, "learning_rate": 5.623893973531225e-07, "lm_loss": 5.4687, "loss": 1.2524, "step": 1885, "text_contrastive_loss": 0.6104 }, { "contrastive_loss": 0.4286, "epoch": 4.257336343115124, "grad_norm": 13.630789756774902, "learning_rate": 5.590510306185765e-07, "lm_loss": 5.5025, "loss": 1.3563, "step": 1886, "text_contrastive_loss": 0.755 }, { "contrastive_loss": 0.444, "epoch": 4.25959367945824, "grad_norm": 14.704750061035156, "learning_rate": 5.557220149183412e-07, "lm_loss": 5.4745, "loss": 1.4715, "step": 1887, "text_contrastive_loss": 0.9602 }, { "contrastive_loss": 0.4543, "epoch": 4.261851015801354, "grad_norm": 15.774658203125, "learning_rate": 5.524023572621229e-07, "lm_loss": 5.5554, "loss": 1.4817, "step": 1888, "text_contrastive_loss": 0.9437 }, { "contrastive_loss": 0.4249, "epoch": 4.264108352144469, "grad_norm": 14.661408424377441, "learning_rate": 5.4909206463992e-07, "lm_loss": 5.5236, "loss": 1.4554, "step": 1889, "text_contrastive_loss": 0.9563 }, { "contrastive_loss": 0.4631, "epoch": 4.266365688487585, "grad_norm": 14.910243034362793, "learning_rate": 5.457911440220154e-07, "lm_loss": 5.3929, "loss": 1.392, "step": 1890, "text_contrastive_loss": 0.7791 }, { "contrastive_loss": 0.4282, "epoch": 4.2686230248307, "grad_norm": 15.006203651428223, "learning_rate": 5.424996023589524e-07, "lm_loss": 5.4791, "loss": 1.3596, "step": 1891, "text_contrastive_loss": 0.7669 }, { "contrastive_loss": 0.447, "epoch": 4.270880361173815, "grad_norm": 14.607666969299316, "learning_rate": 5.392174465815308e-07, "lm_loss": 5.4387, "loss": 1.3757, "step": 1892, "text_contrastive_loss": 0.7698 }, { "contrastive_loss": 0.2817, "epoch": 4.27313769751693, "grad_norm": 10.532715797424316, "learning_rate": 5.359446836007842e-07, "lm_loss": 5.496, "loss": 1.1333, "step": 1893, "text_contrastive_loss": 0.6041 }, { "contrastive_loss": 0.421, "epoch": 4.275395033860045, "grad_norm": 15.028196334838867, "learning_rate": 5.326813203079706e-07, "lm_loss": 5.5297, "loss": 1.3248, "step": 1894, "text_contrastive_loss": 0.7017 }, { "contrastive_loss": 0.4315, "epoch": 4.27765237020316, "grad_norm": 12.929876327514648, "learning_rate": 5.294273635745517e-07, "lm_loss": 5.3855, "loss": 1.3244, "step": 1895, "text_contrastive_loss": 0.7088 }, { "contrastive_loss": 0.4986, "epoch": 4.279909706546276, "grad_norm": 13.516950607299805, "learning_rate": 5.261828202521868e-07, "lm_loss": 5.3402, "loss": 1.4781, "step": 1896, "text_contrastive_loss": 0.891 }, { "contrastive_loss": 0.3615, "epoch": 4.282167042889391, "grad_norm": 12.600197792053223, "learning_rate": 5.229476971727115e-07, "lm_loss": 5.392, "loss": 1.289, "step": 1897, "text_contrastive_loss": 0.7765 }, { "contrastive_loss": 0.4494, "epoch": 4.284424379232505, "grad_norm": 14.624226570129395, "learning_rate": 5.197220011481274e-07, "lm_loss": 5.3513, "loss": 1.3867, "step": 1898, "text_contrastive_loss": 0.8043 }, { "contrastive_loss": 0.4321, "epoch": 4.286681715575621, "grad_norm": 12.813630104064941, "learning_rate": 5.165057389705835e-07, "lm_loss": 5.4004, "loss": 1.42, "step": 1899, "text_contrastive_loss": 0.8957 }, { "contrastive_loss": 0.433, "epoch": 4.288939051918736, "grad_norm": 14.062405586242676, "learning_rate": 5.132989174123659e-07, "lm_loss": 5.4482, "loss": 1.3866, "step": 1900, "text_contrastive_loss": 0.8175 }, { "contrastive_loss": 0.4673, "epoch": 4.291196388261851, "grad_norm": 14.387012481689453, "learning_rate": 5.101015432258843e-07, "lm_loss": 5.3963, "loss": 1.3872, "step": 1901, "text_contrastive_loss": 0.7604 }, { "contrastive_loss": 0.4905, "epoch": 4.293453724604966, "grad_norm": 14.758625030517578, "learning_rate": 5.069136231436539e-07, "lm_loss": 5.4778, "loss": 1.4374, "step": 1902, "text_contrastive_loss": 0.7982 }, { "contrastive_loss": 0.4708, "epoch": 4.295711060948081, "grad_norm": 12.979969024658203, "learning_rate": 5.037351638782812e-07, "lm_loss": 5.4252, "loss": 1.4259, "step": 1903, "text_contrastive_loss": 0.8251 }, { "contrastive_loss": 0.4759, "epoch": 4.297968397291196, "grad_norm": 15.118889808654785, "learning_rate": 5.00566172122453e-07, "lm_loss": 5.4265, "loss": 1.4689, "step": 1904, "text_contrastive_loss": 0.9008 }, { "contrastive_loss": 0.4953, "epoch": 4.300225733634312, "grad_norm": 14.978907585144043, "learning_rate": 4.97406654548922e-07, "lm_loss": 5.4912, "loss": 1.4967, "step": 1905, "text_contrastive_loss": 0.9045 }, { "contrastive_loss": 0.5469, "epoch": 4.302483069977427, "grad_norm": 15.992609024047852, "learning_rate": 4.942566178104924e-07, "lm_loss": 5.3889, "loss": 1.5341, "step": 1906, "text_contrastive_loss": 0.8967 }, { "contrastive_loss": 0.431, "epoch": 4.3047404063205414, "grad_norm": 13.045210838317871, "learning_rate": 4.911160685400008e-07, "lm_loss": 5.3889, "loss": 1.3757, "step": 1907, "text_contrastive_loss": 0.8116 }, { "contrastive_loss": 0.4624, "epoch": 4.306997742663657, "grad_norm": 15.070562362670898, "learning_rate": 4.879850133503106e-07, "lm_loss": 5.4262, "loss": 1.3649, "step": 1908, "text_contrastive_loss": 0.7199 }, { "contrastive_loss": 0.4193, "epoch": 4.309255079006772, "grad_norm": 14.346759796142578, "learning_rate": 4.848634588342932e-07, "lm_loss": 5.6188, "loss": 1.3668, "step": 1909, "text_contrastive_loss": 0.7712 }, { "contrastive_loss": 0.4336, "epoch": 4.311512415349887, "grad_norm": 13.917705535888672, "learning_rate": 4.817514115648164e-07, "lm_loss": 5.3995, "loss": 1.3475, "step": 1910, "text_contrastive_loss": 0.748 }, { "contrastive_loss": 0.4357, "epoch": 4.313769751693002, "grad_norm": 14.126520156860352, "learning_rate": 4.786488780947246e-07, "lm_loss": 5.4209, "loss": 1.337, "step": 1911, "text_contrastive_loss": 0.7184 }, { "contrastive_loss": 0.4058, "epoch": 4.316027088036117, "grad_norm": 13.53493595123291, "learning_rate": 4.755558649568337e-07, "lm_loss": 5.5103, "loss": 1.2778, "step": 1912, "text_contrastive_loss": 0.642 }, { "contrastive_loss": 0.4, "epoch": 4.318284424379232, "grad_norm": 13.509313583374023, "learning_rate": 4.7247237866391236e-07, "lm_loss": 5.3604, "loss": 1.2979, "step": 1913, "text_contrastive_loss": 0.7236 }, { "contrastive_loss": 0.3733, "epoch": 4.320541760722348, "grad_norm": 12.044815063476562, "learning_rate": 4.6939842570867034e-07, "lm_loss": 5.4313, "loss": 1.2297, "step": 1914, "text_contrastive_loss": 0.6266 }, { "contrastive_loss": 0.4015, "epoch": 4.322799097065463, "grad_norm": 13.225977897644043, "learning_rate": 4.663340125637389e-07, "lm_loss": 5.458, "loss": 1.3096, "step": 1915, "text_contrastive_loss": 0.7246 }, { "contrastive_loss": 0.504, "epoch": 4.3250564334085775, "grad_norm": 15.951153755187988, "learning_rate": 4.6327914568166763e-07, "lm_loss": 5.4381, "loss": 1.4387, "step": 1916, "text_contrastive_loss": 0.7817 }, { "contrastive_loss": 0.4944, "epoch": 4.327313769751693, "grad_norm": 15.046382904052734, "learning_rate": 4.6023383149490066e-07, "lm_loss": 5.4797, "loss": 1.5171, "step": 1917, "text_contrastive_loss": 0.9493 }, { "contrastive_loss": 0.373, "epoch": 4.329571106094808, "grad_norm": 14.629324913024902, "learning_rate": 4.571980764157724e-07, "lm_loss": 5.5032, "loss": 1.2773, "step": 1918, "text_contrastive_loss": 0.7081 }, { "contrastive_loss": 0.3948, "epoch": 4.331828442437923, "grad_norm": 13.322248458862305, "learning_rate": 4.5417188683648417e-07, "lm_loss": 5.4454, "loss": 1.3763, "step": 1919, "text_contrastive_loss": 0.8739 }, { "contrastive_loss": 0.3949, "epoch": 4.334085778781039, "grad_norm": 12.811241149902344, "learning_rate": 4.511552691290988e-07, "lm_loss": 5.425, "loss": 1.298, "step": 1920, "text_contrastive_loss": 0.7213 }, { "contrastive_loss": 0.4061, "epoch": 4.336343115124153, "grad_norm": 13.924341201782227, "learning_rate": 4.4814822964552363e-07, "lm_loss": 5.4122, "loss": 1.3042, "step": 1921, "text_contrastive_loss": 0.7136 }, { "contrastive_loss": 0.4002, "epoch": 4.3386004514672685, "grad_norm": 14.01033878326416, "learning_rate": 4.4515077471749767e-07, "lm_loss": 5.472, "loss": 1.3447, "step": 1922, "text_contrastive_loss": 0.7946 }, { "contrastive_loss": 0.6312, "epoch": 4.340857787810384, "grad_norm": 15.406018257141113, "learning_rate": 4.421629106565778e-07, "lm_loss": 5.4587, "loss": 1.6234, "step": 1923, "text_contrastive_loss": 0.8926 }, { "contrastive_loss": 0.4491, "epoch": 4.343115124153499, "grad_norm": 13.877806663513184, "learning_rate": 4.391846437541258e-07, "lm_loss": 5.552, "loss": 1.4634, "step": 1924, "text_contrastive_loss": 0.9181 }, { "contrastive_loss": 0.5328, "epoch": 4.345372460496614, "grad_norm": 13.972939491271973, "learning_rate": 4.362159802812971e-07, "lm_loss": 5.3688, "loss": 1.4885, "step": 1925, "text_contrastive_loss": 0.8374 }, { "contrastive_loss": 0.485, "epoch": 4.347629796839729, "grad_norm": 14.351852416992188, "learning_rate": 4.332569264890252e-07, "lm_loss": 5.3837, "loss": 1.4136, "step": 1926, "text_contrastive_loss": 0.7806 }, { "contrastive_loss": 0.4459, "epoch": 4.349887133182844, "grad_norm": 13.421630859375, "learning_rate": 4.3030748860800606e-07, "lm_loss": 5.4281, "loss": 1.3481, "step": 1927, "text_contrastive_loss": 0.7187 }, { "contrastive_loss": 0.437, "epoch": 4.3521444695259595, "grad_norm": 14.19968032836914, "learning_rate": 4.273676728486925e-07, "lm_loss": 5.4662, "loss": 1.3448, "step": 1928, "text_contrastive_loss": 0.7223 }, { "contrastive_loss": 0.3847, "epoch": 4.354401805869075, "grad_norm": 14.895120620727539, "learning_rate": 4.244374854012734e-07, "lm_loss": 5.5523, "loss": 1.2974, "step": 1929, "text_contrastive_loss": 0.7148 }, { "contrastive_loss": 0.4926, "epoch": 4.356659142212189, "grad_norm": 14.713385581970215, "learning_rate": 4.215169324356666e-07, "lm_loss": 5.5129, "loss": 1.4233, "step": 1930, "text_contrastive_loss": 0.7587 }, { "contrastive_loss": 0.4918, "epoch": 4.3589164785553045, "grad_norm": 16.201128005981445, "learning_rate": 4.186060201014991e-07, "lm_loss": 5.5518, "loss": 1.4261, "step": 1931, "text_contrastive_loss": 0.7584 }, { "contrastive_loss": 0.3521, "epoch": 4.36117381489842, "grad_norm": 11.698371887207031, "learning_rate": 4.157047545281029e-07, "lm_loss": 5.4341, "loss": 1.2211, "step": 1932, "text_contrastive_loss": 0.6511 }, { "contrastive_loss": 0.5779, "epoch": 4.363431151241535, "grad_norm": 15.930716514587402, "learning_rate": 4.1281314182449405e-07, "lm_loss": 5.4676, "loss": 1.5558, "step": 1933, "text_contrastive_loss": 0.8623 }, { "contrastive_loss": 0.5238, "epoch": 4.3656884875846504, "grad_norm": 17.162410736083984, "learning_rate": 4.099311880793655e-07, "lm_loss": 5.4837, "loss": 1.5025, "step": 1934, "text_contrastive_loss": 0.8608 }, { "contrastive_loss": 0.3549, "epoch": 4.367945823927765, "grad_norm": 11.071837425231934, "learning_rate": 4.070588993610697e-07, "lm_loss": 5.4856, "loss": 1.2363, "step": 1935, "text_contrastive_loss": 0.6657 }, { "contrastive_loss": 0.4307, "epoch": 4.37020316027088, "grad_norm": 14.291321754455566, "learning_rate": 4.0419628171760927e-07, "lm_loss": 5.3843, "loss": 1.3906, "step": 1936, "text_contrastive_loss": 0.843 }, { "contrastive_loss": 0.4539, "epoch": 4.3724604966139955, "grad_norm": 13.059618949890137, "learning_rate": 4.0134334117662375e-07, "lm_loss": 5.3896, "loss": 1.3542, "step": 1937, "text_contrastive_loss": 0.7226 }, { "contrastive_loss": 0.4515, "epoch": 4.374717832957111, "grad_norm": 14.538610458374023, "learning_rate": 3.985000837453756e-07, "lm_loss": 5.3757, "loss": 1.3529, "step": 1938, "text_contrastive_loss": 0.7277 }, { "contrastive_loss": 0.4556, "epoch": 4.376975169300226, "grad_norm": 13.206327438354492, "learning_rate": 3.9566651541073586e-07, "lm_loss": 5.3729, "loss": 1.3516, "step": 1939, "text_contrastive_loss": 0.7175 }, { "contrastive_loss": 0.4274, "epoch": 4.3792325056433405, "grad_norm": 12.887798309326172, "learning_rate": 3.928426421391773e-07, "lm_loss": 5.4968, "loss": 1.3883, "step": 1940, "text_contrastive_loss": 0.8225 }, { "contrastive_loss": 0.4148, "epoch": 4.381489841986456, "grad_norm": 13.589632987976074, "learning_rate": 3.9002846987675704e-07, "lm_loss": 5.3314, "loss": 1.2816, "step": 1941, "text_contrastive_loss": 0.6673 }, { "contrastive_loss": 0.4093, "epoch": 4.383747178329571, "grad_norm": 14.307594299316406, "learning_rate": 3.872240045491055e-07, "lm_loss": 5.3974, "loss": 1.3727, "step": 1942, "text_contrastive_loss": 0.8474 }, { "contrastive_loss": 0.4296, "epoch": 4.3860045146726865, "grad_norm": 15.387022972106934, "learning_rate": 3.8442925206141237e-07, "lm_loss": 5.4738, "loss": 1.3277, "step": 1943, "text_contrastive_loss": 0.7015 }, { "contrastive_loss": 0.4235, "epoch": 4.388261851015802, "grad_norm": 13.895306587219238, "learning_rate": 3.8164421829841756e-07, "lm_loss": 5.4586, "loss": 1.3625, "step": 1944, "text_contrastive_loss": 0.7863 }, { "contrastive_loss": 0.5274, "epoch": 4.390519187358916, "grad_norm": 16.344566345214844, "learning_rate": 3.7886890912439633e-07, "lm_loss": 5.3447, "loss": 1.4867, "step": 1945, "text_contrastive_loss": 0.8496 }, { "contrastive_loss": 0.3899, "epoch": 4.3927765237020315, "grad_norm": 12.419146537780762, "learning_rate": 3.761033303831474e-07, "lm_loss": 5.3228, "loss": 1.2334, "step": 1946, "text_contrastive_loss": 0.6224 }, { "contrastive_loss": 0.4346, "epoch": 4.395033860045147, "grad_norm": 14.739822387695312, "learning_rate": 3.733474878979798e-07, "lm_loss": 5.4051, "loss": 1.403, "step": 1947, "text_contrastive_loss": 0.8557 }, { "contrastive_loss": 0.4122, "epoch": 4.397291196388262, "grad_norm": 13.91802978515625, "learning_rate": 3.706013874717024e-07, "lm_loss": 5.4228, "loss": 1.2934, "step": 1948, "text_contrastive_loss": 0.6778 }, { "contrastive_loss": 0.4537, "epoch": 4.399548532731377, "grad_norm": 14.394438743591309, "learning_rate": 3.678650348866114e-07, "lm_loss": 5.4463, "loss": 1.374, "step": 1949, "text_contrastive_loss": 0.7513 }, { "contrastive_loss": 0.4394, "epoch": 4.401805869074492, "grad_norm": 13.035497665405273, "learning_rate": 3.651384359044774e-07, "lm_loss": 5.4935, "loss": 1.3769, "step": 1950, "text_contrastive_loss": 0.7764 }, { "contrastive_loss": 0.3555, "epoch": 4.404063205417607, "grad_norm": 13.068267822265625, "learning_rate": 3.6242159626653004e-07, "lm_loss": 5.4236, "loss": 1.2316, "step": 1951, "text_contrastive_loss": 0.6675 }, { "contrastive_loss": 0.4441, "epoch": 4.4063205417607225, "grad_norm": 13.434308052062988, "learning_rate": 3.597145216934556e-07, "lm_loss": 5.4971, "loss": 1.3334, "step": 1952, "text_contrastive_loss": 0.6792 }, { "contrastive_loss": 0.4748, "epoch": 4.408577878103838, "grad_norm": 15.370001792907715, "learning_rate": 3.570172178853731e-07, "lm_loss": 5.4621, "loss": 1.4576, "step": 1953, "text_contrastive_loss": 0.8732 }, { "contrastive_loss": 0.4463, "epoch": 4.410835214446952, "grad_norm": 13.672945022583008, "learning_rate": 3.5432969052183186e-07, "lm_loss": 5.3621, "loss": 1.394, "step": 1954, "text_contrastive_loss": 0.823 }, { "contrastive_loss": 0.3904, "epoch": 4.413092550790068, "grad_norm": 12.61204719543457, "learning_rate": 3.516519452617922e-07, "lm_loss": 5.492, "loss": 1.3419, "step": 1955, "text_contrastive_loss": 0.8045 }, { "contrastive_loss": 0.5187, "epoch": 4.415349887133183, "grad_norm": 14.892425537109375, "learning_rate": 3.4898398774361854e-07, "lm_loss": 5.3201, "loss": 1.491, "step": 1956, "text_contrastive_loss": 0.8804 }, { "contrastive_loss": 0.3911, "epoch": 4.417607223476298, "grad_norm": 13.570329666137695, "learning_rate": 3.463258235850653e-07, "lm_loss": 5.4478, "loss": 1.3248, "step": 1957, "text_contrastive_loss": 0.7779 }, { "contrastive_loss": 0.474, "epoch": 4.4198645598194135, "grad_norm": 14.859395980834961, "learning_rate": 3.4367745838326807e-07, "lm_loss": 5.4708, "loss": 1.3871, "step": 1958, "text_contrastive_loss": 0.7321 }, { "contrastive_loss": 0.4756, "epoch": 4.422121896162528, "grad_norm": 14.346101760864258, "learning_rate": 3.410388977147244e-07, "lm_loss": 5.3331, "loss": 1.4394, "step": 1959, "text_contrastive_loss": 0.8611 }, { "contrastive_loss": 0.4861, "epoch": 4.424379232505643, "grad_norm": 15.08126163482666, "learning_rate": 3.3841014713529184e-07, "lm_loss": 5.4299, "loss": 1.4246, "step": 1960, "text_contrastive_loss": 0.7911 }, { "contrastive_loss": 0.5389, "epoch": 4.426636568848759, "grad_norm": 14.744633674621582, "learning_rate": 3.357912121801682e-07, "lm_loss": 5.49, "loss": 1.526, "step": 1961, "text_contrastive_loss": 0.8762 }, { "contrastive_loss": 0.4777, "epoch": 4.428893905191874, "grad_norm": 14.078417778015137, "learning_rate": 3.331820983638867e-07, "lm_loss": 5.5039, "loss": 1.4177, "step": 1962, "text_contrastive_loss": 0.7793 }, { "contrastive_loss": 0.5299, "epoch": 4.431151241534989, "grad_norm": 16.73001480102539, "learning_rate": 3.3058281118029553e-07, "lm_loss": 5.5428, "loss": 1.4909, "step": 1963, "text_contrastive_loss": 0.8135 }, { "contrastive_loss": 0.4935, "epoch": 4.433408577878104, "grad_norm": 13.877079010009766, "learning_rate": 3.279933561025567e-07, "lm_loss": 5.465, "loss": 1.4524, "step": 1964, "text_contrastive_loss": 0.8249 }, { "contrastive_loss": 0.43, "epoch": 4.435665914221219, "grad_norm": 12.670683860778809, "learning_rate": 3.254137385831263e-07, "lm_loss": 5.4102, "loss": 1.3859, "step": 1965, "text_contrastive_loss": 0.8298 }, { "contrastive_loss": 0.5592, "epoch": 4.437923250564334, "grad_norm": 15.805489540100098, "learning_rate": 3.2284396405374787e-07, "lm_loss": 5.4256, "loss": 1.532, "step": 1966, "text_contrastive_loss": 0.8603 }, { "contrastive_loss": 0.3286, "epoch": 4.4401805869074495, "grad_norm": 11.388947486877441, "learning_rate": 3.202840379254374e-07, "lm_loss": 5.4651, "loss": 1.2775, "step": 1967, "text_contrastive_loss": 0.8049 }, { "contrastive_loss": 0.4544, "epoch": 4.442437923250564, "grad_norm": 13.956562995910645, "learning_rate": 3.177339655884737e-07, "lm_loss": 5.5186, "loss": 1.3878, "step": 1968, "text_contrastive_loss": 0.763 }, { "contrastive_loss": 0.4719, "epoch": 4.444695259593679, "grad_norm": 13.878486633300781, "learning_rate": 3.151937524123905e-07, "lm_loss": 5.4145, "loss": 1.4145, "step": 1969, "text_contrastive_loss": 0.8022 }, { "contrastive_loss": 0.3935, "epoch": 4.446952595936795, "grad_norm": 13.50877571105957, "learning_rate": 3.1266340374595693e-07, "lm_loss": 5.4862, "loss": 1.3173, "step": 1970, "text_contrastive_loss": 0.7503 }, { "contrastive_loss": 0.3619, "epoch": 4.44920993227991, "grad_norm": 13.413561820983887, "learning_rate": 3.1014292491717444e-07, "lm_loss": 5.5391, "loss": 1.2586, "step": 1971, "text_contrastive_loss": 0.6856 }, { "contrastive_loss": 0.5004, "epoch": 4.451467268623025, "grad_norm": 14.72677993774414, "learning_rate": 3.076323212332605e-07, "lm_loss": 5.5368, "loss": 1.4929, "step": 1972, "text_contrastive_loss": 0.8777 }, { "contrastive_loss": 0.3491, "epoch": 4.45372460496614, "grad_norm": 12.470561027526855, "learning_rate": 3.0513159798063906e-07, "lm_loss": 5.4153, "loss": 1.3191, "step": 1973, "text_contrastive_loss": 0.857 }, { "contrastive_loss": 0.4606, "epoch": 4.455981941309255, "grad_norm": 14.206003189086914, "learning_rate": 3.026407604249315e-07, "lm_loss": 5.4837, "loss": 1.3799, "step": 1974, "text_contrastive_loss": 0.7418 }, { "contrastive_loss": 0.4276, "epoch": 4.45823927765237, "grad_norm": 13.35498046875, "learning_rate": 3.0015981381094073e-07, "lm_loss": 5.3522, "loss": 1.388, "step": 1975, "text_contrastive_loss": 0.8504 }, { "contrastive_loss": 0.4814, "epoch": 4.460496613995486, "grad_norm": 14.195591926574707, "learning_rate": 2.976887633626435e-07, "lm_loss": 5.5419, "loss": 1.4163, "step": 1976, "text_contrastive_loss": 0.7613 }, { "contrastive_loss": 0.361, "epoch": 4.4627539503386, "grad_norm": 13.182605743408203, "learning_rate": 2.952276142831806e-07, "lm_loss": 5.4325, "loss": 1.2021, "step": 1977, "text_contrastive_loss": 0.5957 }, { "contrastive_loss": 0.3725, "epoch": 4.465011286681715, "grad_norm": 14.167166709899902, "learning_rate": 2.9277637175484376e-07, "lm_loss": 5.4348, "loss": 1.2838, "step": 1978, "text_contrastive_loss": 0.7356 }, { "contrastive_loss": 0.424, "epoch": 4.467268623024831, "grad_norm": 13.755749702453613, "learning_rate": 2.9033504093906207e-07, "lm_loss": 5.4003, "loss": 1.3916, "step": 1979, "text_contrastive_loss": 0.8553 }, { "contrastive_loss": 0.5471, "epoch": 4.469525959367946, "grad_norm": 17.518085479736328, "learning_rate": 2.8790362697639685e-07, "lm_loss": 5.4183, "loss": 1.559, "step": 1980, "text_contrastive_loss": 0.9402 }, { "contrastive_loss": 0.4565, "epoch": 4.471783295711061, "grad_norm": 15.208088874816895, "learning_rate": 2.854821349865289e-07, "lm_loss": 5.492, "loss": 1.4434, "step": 1981, "text_contrastive_loss": 0.8754 }, { "contrastive_loss": 0.3749, "epoch": 4.474040632054176, "grad_norm": 13.097951889038086, "learning_rate": 2.8307057006824514e-07, "lm_loss": 5.4126, "loss": 1.3196, "step": 1982, "text_contrastive_loss": 0.8068 }, { "contrastive_loss": 0.4867, "epoch": 4.476297968397291, "grad_norm": 15.0460205078125, "learning_rate": 2.806689372994292e-07, "lm_loss": 5.3848, "loss": 1.4281, "step": 1983, "text_contrastive_loss": 0.8059 }, { "contrastive_loss": 0.4521, "epoch": 4.478555304740406, "grad_norm": 14.740790367126465, "learning_rate": 2.7827724173705273e-07, "lm_loss": 5.418, "loss": 1.3498, "step": 1984, "text_contrastive_loss": 0.7118 }, { "contrastive_loss": 0.4936, "epoch": 4.480812641083522, "grad_norm": 15.868513107299805, "learning_rate": 2.7589548841716274e-07, "lm_loss": 5.3949, "loss": 1.432, "step": 1985, "text_contrastive_loss": 0.7978 }, { "contrastive_loss": 0.4035, "epoch": 4.483069977426637, "grad_norm": 14.197154998779297, "learning_rate": 2.735236823548715e-07, "lm_loss": 5.4923, "loss": 1.3214, "step": 1986, "text_contrastive_loss": 0.7372 }, { "contrastive_loss": 0.4937, "epoch": 4.485327313769751, "grad_norm": 15.719579696655273, "learning_rate": 2.711618285443457e-07, "lm_loss": 5.4, "loss": 1.4754, "step": 1987, "text_contrastive_loss": 0.8834 }, { "contrastive_loss": 0.5175, "epoch": 4.487584650112867, "grad_norm": 15.776280403137207, "learning_rate": 2.6880993195879614e-07, "lm_loss": 5.5162, "loss": 1.539, "step": 1988, "text_contrastive_loss": 0.9399 }, { "contrastive_loss": 0.3959, "epoch": 4.489841986455982, "grad_norm": 14.887728691101074, "learning_rate": 2.6646799755046746e-07, "lm_loss": 5.4649, "loss": 1.3176, "step": 1989, "text_contrastive_loss": 0.7504 }, { "contrastive_loss": 0.3833, "epoch": 4.492099322799097, "grad_norm": 12.696367263793945, "learning_rate": 2.64136030250628e-07, "lm_loss": 5.3242, "loss": 1.2353, "step": 1990, "text_contrastive_loss": 0.6392 }, { "contrastive_loss": 0.4416, "epoch": 4.494356659142213, "grad_norm": 13.526455879211426, "learning_rate": 2.618140349695575e-07, "lm_loss": 5.4823, "loss": 1.3949, "step": 1991, "text_contrastive_loss": 0.8103 }, { "contrastive_loss": 0.3788, "epoch": 4.496613995485327, "grad_norm": 13.053594589233398, "learning_rate": 2.595020165965401e-07, "lm_loss": 5.4288, "loss": 1.3021, "step": 1992, "text_contrastive_loss": 0.7609 }, { "contrastive_loss": 0.4455, "epoch": 4.498871331828442, "grad_norm": 13.829413414001465, "learning_rate": 2.571999799998509e-07, "lm_loss": 5.4231, "loss": 1.4108, "step": 1993, "text_contrastive_loss": 0.846 }, { "contrastive_loss": 0.437, "epoch": 4.501128668171558, "grad_norm": 13.574397087097168, "learning_rate": 2.549079300267482e-07, "lm_loss": 5.4456, "loss": 1.3316, "step": 1994, "text_contrastive_loss": 0.7002 }, { "contrastive_loss": 0.5266, "epoch": 4.503386004514673, "grad_norm": 15.497942924499512, "learning_rate": 2.526258715034602e-07, "lm_loss": 5.3979, "loss": 1.5456, "step": 1995, "text_contrastive_loss": 0.9583 }, { "contrastive_loss": 0.5077, "epoch": 4.505643340857787, "grad_norm": 15.3302583694458, "learning_rate": 2.503538092351782e-07, "lm_loss": 5.466, "loss": 1.4918, "step": 1996, "text_contrastive_loss": 0.8749 }, { "contrastive_loss": 0.4095, "epoch": 4.507900677200903, "grad_norm": 13.3062162399292, "learning_rate": 2.480917480060441e-07, "lm_loss": 5.4016, "loss": 1.3543, "step": 1997, "text_contrastive_loss": 0.8092 }, { "contrastive_loss": 0.4771, "epoch": 4.510158013544018, "grad_norm": 14.244848251342773, "learning_rate": 2.458396925791434e-07, "lm_loss": 5.5374, "loss": 1.445, "step": 1998, "text_contrastive_loss": 0.8283 }, { "contrastive_loss": 0.4325, "epoch": 4.512415349887133, "grad_norm": 13.032058715820312, "learning_rate": 2.4359764769648907e-07, "lm_loss": 5.4126, "loss": 1.3252, "step": 1999, "text_contrastive_loss": 0.703 }, { "contrastive_loss": 0.4971, "epoch": 4.514672686230249, "grad_norm": 15.64871597290039, "learning_rate": 2.4136561807901916e-07, "lm_loss": 5.42, "loss": 1.5228, "step": 2000, "text_contrastive_loss": 0.9674 }, { "contrastive_loss": 0.4192, "epoch": 4.516930022573363, "grad_norm": 14.64123249053955, "learning_rate": 2.391436084265814e-07, "lm_loss": 5.3506, "loss": 1.3261, "step": 2001, "text_contrastive_loss": 0.7437 }, { "contrastive_loss": 0.4378, "epoch": 4.519187358916478, "grad_norm": 13.714446067810059, "learning_rate": 2.3693162341792532e-07, "lm_loss": 5.3923, "loss": 1.364, "step": 2002, "text_contrastive_loss": 0.774 }, { "contrastive_loss": 0.4487, "epoch": 4.521444695259594, "grad_norm": 13.786931991577148, "learning_rate": 2.347296677106925e-07, "lm_loss": 5.4813, "loss": 1.3724, "step": 2003, "text_contrastive_loss": 0.7511 }, { "contrastive_loss": 0.4898, "epoch": 4.523702031602709, "grad_norm": 13.873068809509277, "learning_rate": 2.3253774594140633e-07, "lm_loss": 5.4082, "loss": 1.4093, "step": 2004, "text_contrastive_loss": 0.7574 }, { "contrastive_loss": 0.4589, "epoch": 4.525959367945823, "grad_norm": 14.35563850402832, "learning_rate": 2.3035586272546207e-07, "lm_loss": 5.3493, "loss": 1.3454, "step": 2005, "text_contrastive_loss": 0.703 }, { "contrastive_loss": 0.3581, "epoch": 4.528216704288939, "grad_norm": 12.581876754760742, "learning_rate": 2.2818402265711858e-07, "lm_loss": 5.4672, "loss": 1.2163, "step": 2006, "text_contrastive_loss": 0.6229 }, { "contrastive_loss": 0.4342, "epoch": 4.530474040632054, "grad_norm": 14.916787147521973, "learning_rate": 2.2602223030948445e-07, "lm_loss": 5.3392, "loss": 1.4251, "step": 2007, "text_contrastive_loss": 0.914 }, { "contrastive_loss": 0.4943, "epoch": 4.532731376975169, "grad_norm": 16.06678009033203, "learning_rate": 2.2387049023451458e-07, "lm_loss": 5.4612, "loss": 1.4213, "step": 2008, "text_contrastive_loss": 0.7617 }, { "contrastive_loss": 0.447, "epoch": 4.534988713318285, "grad_norm": 14.13612174987793, "learning_rate": 2.2172880696299692e-07, "lm_loss": 5.375, "loss": 1.3563, "step": 2009, "text_contrastive_loss": 0.7437 }, { "contrastive_loss": 0.4334, "epoch": 4.5372460496614, "grad_norm": 14.04653263092041, "learning_rate": 2.1959718500454196e-07, "lm_loss": 5.4325, "loss": 1.3352, "step": 2010, "text_contrastive_loss": 0.7172 }, { "contrastive_loss": 0.3929, "epoch": 4.539503386004514, "grad_norm": 13.984199523925781, "learning_rate": 2.17475628847576e-07, "lm_loss": 5.4223, "loss": 1.3207, "step": 2011, "text_contrastive_loss": 0.7711 }, { "contrastive_loss": 0.3626, "epoch": 4.54176072234763, "grad_norm": 11.853575706481934, "learning_rate": 2.1536414295932896e-07, "lm_loss": 5.3687, "loss": 1.2356, "step": 2012, "text_contrastive_loss": 0.6723 }, { "contrastive_loss": 0.4397, "epoch": 4.544018058690745, "grad_norm": 13.724562644958496, "learning_rate": 2.1326273178582822e-07, "lm_loss": 5.4594, "loss": 1.4015, "step": 2013, "text_contrastive_loss": 0.8316 }, { "contrastive_loss": 0.5173, "epoch": 4.54627539503386, "grad_norm": 14.897256851196289, "learning_rate": 2.1117139975188716e-07, "lm_loss": 5.4468, "loss": 1.4539, "step": 2014, "text_contrastive_loss": 0.7838 }, { "contrastive_loss": 0.4113, "epoch": 4.548532731376975, "grad_norm": 15.196002960205078, "learning_rate": 2.0909015126109488e-07, "lm_loss": 5.411, "loss": 1.3154, "step": 2015, "text_contrastive_loss": 0.726 }, { "contrastive_loss": 0.4638, "epoch": 4.55079006772009, "grad_norm": 14.481385231018066, "learning_rate": 2.070189906958081e-07, "lm_loss": 5.4344, "loss": 1.3786, "step": 2016, "text_contrastive_loss": 0.7428 }, { "contrastive_loss": 0.4465, "epoch": 4.553047404063205, "grad_norm": 14.491253852844238, "learning_rate": 2.0495792241714386e-07, "lm_loss": 5.315, "loss": 1.3858, "step": 2017, "text_contrastive_loss": 0.8157 }, { "contrastive_loss": 0.5093, "epoch": 4.555304740406321, "grad_norm": 16.6595401763916, "learning_rate": 2.029069507649678e-07, "lm_loss": 5.4377, "loss": 1.4429, "step": 2018, "text_contrastive_loss": 0.7795 }, { "contrastive_loss": 0.4251, "epoch": 4.557562076749436, "grad_norm": 14.268233299255371, "learning_rate": 2.0086608005788376e-07, "lm_loss": 5.4478, "loss": 1.3497, "step": 2019, "text_contrastive_loss": 0.7597 }, { "contrastive_loss": 0.3755, "epoch": 4.5598194130925505, "grad_norm": 13.330026626586914, "learning_rate": 1.988353145932298e-07, "lm_loss": 5.4278, "loss": 1.1776, "step": 2020, "text_contrastive_loss": 0.5188 }, { "contrastive_loss": 0.3888, "epoch": 4.562076749435666, "grad_norm": 12.81385612487793, "learning_rate": 1.9681465864706372e-07, "lm_loss": 5.5862, "loss": 1.3231, "step": 2021, "text_contrastive_loss": 0.7513 }, { "contrastive_loss": 0.4296, "epoch": 4.564334085778781, "grad_norm": 13.457114219665527, "learning_rate": 1.9480411647415708e-07, "lm_loss": 5.4892, "loss": 1.3491, "step": 2022, "text_contrastive_loss": 0.7413 }, { "contrastive_loss": 0.4774, "epoch": 4.566591422121896, "grad_norm": 13.965457916259766, "learning_rate": 1.9280369230798568e-07, "lm_loss": 5.4485, "loss": 1.4685, "step": 2023, "text_contrastive_loss": 0.8926 }, { "contrastive_loss": 0.4722, "epoch": 4.568848758465011, "grad_norm": 13.905646324157715, "learning_rate": 1.9081339036071956e-07, "lm_loss": 5.4528, "loss": 1.4211, "step": 2024, "text_contrastive_loss": 0.8073 }, { "contrastive_loss": 0.4237, "epoch": 4.571106094808126, "grad_norm": 12.96377944946289, "learning_rate": 1.8883321482321583e-07, "lm_loss": 5.4854, "loss": 1.4255, "step": 2025, "text_contrastive_loss": 0.9064 }, { "contrastive_loss": 0.4817, "epoch": 4.573363431151241, "grad_norm": 15.496378898620605, "learning_rate": 1.8686316986500974e-07, "lm_loss": 5.4471, "loss": 1.4965, "step": 2026, "text_contrastive_loss": 0.9402 }, { "contrastive_loss": 0.4595, "epoch": 4.575620767494357, "grad_norm": 13.194942474365234, "learning_rate": 1.8490325963430368e-07, "lm_loss": 5.5628, "loss": 1.4637, "step": 2027, "text_contrastive_loss": 0.8958 }, { "contrastive_loss": 0.4527, "epoch": 4.577878103837472, "grad_norm": 14.945164680480957, "learning_rate": 1.829534882579598e-07, "lm_loss": 5.4007, "loss": 1.4171, "step": 2028, "text_contrastive_loss": 0.8487 }, { "contrastive_loss": 0.4101, "epoch": 4.580135440180587, "grad_norm": 12.962059020996094, "learning_rate": 1.8101385984149343e-07, "lm_loss": 5.5119, "loss": 1.3662, "step": 2029, "text_contrastive_loss": 0.8098 }, { "contrastive_loss": 0.472, "epoch": 4.582392776523702, "grad_norm": 14.130294799804688, "learning_rate": 1.7908437846906158e-07, "lm_loss": 5.5662, "loss": 1.5369, "step": 2030, "text_contrastive_loss": 1.0166 }, { "contrastive_loss": 0.5411, "epoch": 4.584650112866817, "grad_norm": 14.8324613571167, "learning_rate": 1.7716504820345427e-07, "lm_loss": 5.5394, "loss": 1.4772, "step": 2031, "text_contrastive_loss": 0.7645 }, { "contrastive_loss": 0.36, "epoch": 4.586907449209932, "grad_norm": 14.10505485534668, "learning_rate": 1.752558730860876e-07, "lm_loss": 5.454, "loss": 1.2786, "step": 2032, "text_contrastive_loss": 0.7464 }, { "contrastive_loss": 0.4411, "epoch": 4.589164785553048, "grad_norm": 14.178133964538574, "learning_rate": 1.733568571369948e-07, "lm_loss": 5.4993, "loss": 1.4287, "step": 2033, "text_contrastive_loss": 0.8754 }, { "contrastive_loss": 0.4109, "epoch": 4.591422121896162, "grad_norm": 12.664596557617188, "learning_rate": 1.7146800435481837e-07, "lm_loss": 5.4682, "loss": 1.4096, "step": 2034, "text_contrastive_loss": 0.9037 }, { "contrastive_loss": 0.477, "epoch": 4.5936794582392775, "grad_norm": 15.237536430358887, "learning_rate": 1.6958931871679908e-07, "lm_loss": 5.4776, "loss": 1.4628, "step": 2035, "text_contrastive_loss": 0.8761 }, { "contrastive_loss": 0.4118, "epoch": 4.595936794582393, "grad_norm": 13.368701934814453, "learning_rate": 1.677208041787698e-07, "lm_loss": 5.3856, "loss": 1.3221, "step": 2036, "text_contrastive_loss": 0.7435 }, { "contrastive_loss": 0.3885, "epoch": 4.598194130925508, "grad_norm": 14.1590576171875, "learning_rate": 1.6586246467514833e-07, "lm_loss": 5.3744, "loss": 1.3235, "step": 2037, "text_contrastive_loss": 0.795 }, { "contrastive_loss": 0.4713, "epoch": 4.600451467268623, "grad_norm": 15.29771614074707, "learning_rate": 1.6401430411892572e-07, "lm_loss": 5.3639, "loss": 1.4432, "step": 2038, "text_contrastive_loss": 0.871 }, { "contrastive_loss": 0.5518, "epoch": 4.602708803611738, "grad_norm": 14.823325157165527, "learning_rate": 1.621763264016607e-07, "lm_loss": 5.2741, "loss": 1.506, "step": 2039, "text_contrastive_loss": 0.8535 }, { "contrastive_loss": 0.4574, "epoch": 4.604966139954853, "grad_norm": 15.320375442504883, "learning_rate": 1.603485353934703e-07, "lm_loss": 5.319, "loss": 1.4113, "step": 2040, "text_contrastive_loss": 0.8439 }, { "contrastive_loss": 0.3564, "epoch": 4.6072234762979685, "grad_norm": 12.8389253616333, "learning_rate": 1.5853093494302195e-07, "lm_loss": 5.6329, "loss": 1.2724, "step": 2041, "text_contrastive_loss": 0.7054 }, { "contrastive_loss": 0.4384, "epoch": 4.609480812641084, "grad_norm": 14.57319450378418, "learning_rate": 1.567235288775265e-07, "lm_loss": 5.4662, "loss": 1.3948, "step": 2042, "text_contrastive_loss": 0.8196 }, { "contrastive_loss": 0.4363, "epoch": 4.611738148984198, "grad_norm": 13.848459243774414, "learning_rate": 1.5492632100272686e-07, "lm_loss": 5.4411, "loss": 1.3514, "step": 2043, "text_contrastive_loss": 0.7419 }, { "contrastive_loss": 0.477, "epoch": 4.6139954853273135, "grad_norm": 14.179244041442871, "learning_rate": 1.5313931510289482e-07, "lm_loss": 5.3852, "loss": 1.4202, "step": 2044, "text_contrastive_loss": 0.8094 }, { "contrastive_loss": 0.4274, "epoch": 4.616252821670429, "grad_norm": 13.810185432434082, "learning_rate": 1.5136251494081822e-07, "lm_loss": 5.3967, "loss": 1.3777, "step": 2045, "text_contrastive_loss": 0.8212 }, { "contrastive_loss": 0.4711, "epoch": 4.618510158013544, "grad_norm": 14.85326862335205, "learning_rate": 1.4959592425779768e-07, "lm_loss": 5.5282, "loss": 1.3928, "step": 2046, "text_contrastive_loss": 0.7378 }, { "contrastive_loss": 0.472, "epoch": 4.6207674943566595, "grad_norm": 13.674016952514648, "learning_rate": 1.4783954677363376e-07, "lm_loss": 5.5314, "loss": 1.3513, "step": 2047, "text_contrastive_loss": 0.6524 }, { "contrastive_loss": 0.4051, "epoch": 4.623024830699774, "grad_norm": 12.20019245147705, "learning_rate": 1.4609338618662318e-07, "lm_loss": 5.4559, "loss": 1.3385, "step": 2048, "text_contrastive_loss": 0.7756 }, { "contrastive_loss": 0.532, "epoch": 4.625282167042889, "grad_norm": 15.878229141235352, "learning_rate": 1.4435744617354975e-07, "lm_loss": 5.4619, "loss": 1.5532, "step": 2049, "text_contrastive_loss": 0.9501 }, { "contrastive_loss": 0.399, "epoch": 4.6275395033860045, "grad_norm": 12.863652229309082, "learning_rate": 1.4263173038967627e-07, "lm_loss": 5.3891, "loss": 1.3012, "step": 2050, "text_contrastive_loss": 0.7265 }, { "contrastive_loss": 0.4699, "epoch": 4.62979683972912, "grad_norm": 16.129262924194336, "learning_rate": 1.409162424687366e-07, "lm_loss": 5.465, "loss": 1.4483, "step": 2051, "text_contrastive_loss": 0.8638 }, { "contrastive_loss": 0.4412, "epoch": 4.632054176072235, "grad_norm": 15.443099021911621, "learning_rate": 1.3921098602292793e-07, "lm_loss": 5.4736, "loss": 1.443, "step": 2052, "text_contrastive_loss": 0.9088 }, { "contrastive_loss": 0.4351, "epoch": 4.6343115124153496, "grad_norm": 14.124246597290039, "learning_rate": 1.3751596464290529e-07, "lm_loss": 5.4517, "loss": 1.3379, "step": 2053, "text_contrastive_loss": 0.7153 }, { "contrastive_loss": 0.419, "epoch": 4.636568848758465, "grad_norm": 14.496289253234863, "learning_rate": 1.358311818977709e-07, "lm_loss": 5.4731, "loss": 1.3683, "step": 2054, "text_contrastive_loss": 0.804 }, { "contrastive_loss": 0.5128, "epoch": 4.63882618510158, "grad_norm": 14.910599708557129, "learning_rate": 1.3415664133506812e-07, "lm_loss": 5.4918, "loss": 1.4829, "step": 2055, "text_contrastive_loss": 0.8417 }, { "contrastive_loss": 0.3839, "epoch": 4.6410835214446955, "grad_norm": 13.312881469726562, "learning_rate": 1.324923464807759e-07, "lm_loss": 5.3817, "loss": 1.3029, "step": 2056, "text_contrastive_loss": 0.7616 }, { "contrastive_loss": 0.529, "epoch": 4.643340857787811, "grad_norm": 15.78528118133545, "learning_rate": 1.308383008392977e-07, "lm_loss": 5.4242, "loss": 1.5217, "step": 2057, "text_contrastive_loss": 0.9006 }, { "contrastive_loss": 0.4262, "epoch": 4.645598194130925, "grad_norm": 13.879262924194336, "learning_rate": 1.2919450789345477e-07, "lm_loss": 5.4463, "loss": 1.3103, "step": 2058, "text_contrastive_loss": 0.6789 }, { "contrastive_loss": 0.3579, "epoch": 4.6478555304740405, "grad_norm": 13.028387069702148, "learning_rate": 1.275609711044823e-07, "lm_loss": 5.4102, "loss": 1.2543, "step": 2059, "text_contrastive_loss": 0.7108 }, { "contrastive_loss": 0.4899, "epoch": 4.650112866817156, "grad_norm": 13.89792251586914, "learning_rate": 1.2593769391201827e-07, "lm_loss": 5.5376, "loss": 1.4622, "step": 2060, "text_contrastive_loss": 0.8369 }, { "contrastive_loss": 0.4765, "epoch": 4.652370203160271, "grad_norm": 13.671670913696289, "learning_rate": 1.2432467973409857e-07, "lm_loss": 5.3557, "loss": 1.4552, "step": 2061, "text_contrastive_loss": 0.8864 }, { "contrastive_loss": 0.4409, "epoch": 4.654627539503386, "grad_norm": 16.313514709472656, "learning_rate": 1.2272193196714854e-07, "lm_loss": 5.5179, "loss": 1.3112, "step": 2062, "text_contrastive_loss": 0.6372 }, { "contrastive_loss": 0.4862, "epoch": 4.656884875846501, "grad_norm": 14.998929977416992, "learning_rate": 1.211294539859753e-07, "lm_loss": 5.4963, "loss": 1.4207, "step": 2063, "text_contrastive_loss": 0.7698 }, { "contrastive_loss": 0.346, "epoch": 4.659142212189616, "grad_norm": 12.416764259338379, "learning_rate": 1.1954724914376215e-07, "lm_loss": 5.558, "loss": 1.2345, "step": 2064, "text_contrastive_loss": 0.6653 }, { "contrastive_loss": 0.3529, "epoch": 4.6613995485327315, "grad_norm": 11.774885177612305, "learning_rate": 1.1797532077206187e-07, "lm_loss": 5.4785, "loss": 1.2877, "step": 2065, "text_contrastive_loss": 0.774 }, { "contrastive_loss": 0.4346, "epoch": 4.663656884875847, "grad_norm": 13.302604675292969, "learning_rate": 1.1641367218078736e-07, "lm_loss": 5.3816, "loss": 1.4156, "step": 2066, "text_contrastive_loss": 0.8857 }, { "contrastive_loss": 0.4733, "epoch": 4.665914221218961, "grad_norm": 13.181002616882324, "learning_rate": 1.1486230665820552e-07, "lm_loss": 5.371, "loss": 1.4199, "step": 2067, "text_contrastive_loss": 0.8189 }, { "contrastive_loss": 0.5697, "epoch": 4.668171557562077, "grad_norm": 15.390506744384766, "learning_rate": 1.1332122747093277e-07, "lm_loss": 5.3907, "loss": 1.6271, "step": 2068, "text_contrastive_loss": 1.0366 }, { "contrastive_loss": 0.4268, "epoch": 4.670428893905192, "grad_norm": 14.155954360961914, "learning_rate": 1.1179043786392507e-07, "lm_loss": 5.3104, "loss": 1.337, "step": 2069, "text_contrastive_loss": 0.7584 }, { "contrastive_loss": 0.5129, "epoch": 4.672686230248307, "grad_norm": 14.72008228302002, "learning_rate": 1.1026994106047296e-07, "lm_loss": 5.3694, "loss": 1.5167, "step": 2070, "text_contrastive_loss": 0.9338 }, { "contrastive_loss": 0.477, "epoch": 4.674943566591422, "grad_norm": 13.70339298248291, "learning_rate": 1.0875974026219149e-07, "lm_loss": 5.3814, "loss": 1.41, "step": 2071, "text_contrastive_loss": 0.7897 }, { "contrastive_loss": 0.4021, "epoch": 4.677200902934537, "grad_norm": 13.836319923400879, "learning_rate": 1.0725983864901978e-07, "lm_loss": 5.404, "loss": 1.3078, "step": 2072, "text_contrastive_loss": 0.7307 }, { "contrastive_loss": 0.4337, "epoch": 4.679458239277652, "grad_norm": 12.894781112670898, "learning_rate": 1.0577023937920816e-07, "lm_loss": 5.4553, "loss": 1.3727, "step": 2073, "text_contrastive_loss": 0.787 }, { "contrastive_loss": 0.4722, "epoch": 4.681715575620768, "grad_norm": 14.383455276489258, "learning_rate": 1.0429094558931485e-07, "lm_loss": 5.4431, "loss": 1.4711, "step": 2074, "text_contrastive_loss": 0.9092 }, { "contrastive_loss": 0.4462, "epoch": 4.683972911963883, "grad_norm": 14.542683601379395, "learning_rate": 1.0282196039419823e-07, "lm_loss": 5.4639, "loss": 1.3895, "step": 2075, "text_contrastive_loss": 0.7937 }, { "contrastive_loss": 0.4731, "epoch": 4.686230248306998, "grad_norm": 14.17236614227295, "learning_rate": 1.0136328688700958e-07, "lm_loss": 5.4445, "loss": 1.3846, "step": 2076, "text_contrastive_loss": 0.7341 }, { "contrastive_loss": 0.4429, "epoch": 4.688487584650113, "grad_norm": 14.122315406799316, "learning_rate": 9.99149281391898e-08, "lm_loss": 5.3135, "loss": 1.343, "step": 2077, "text_contrastive_loss": 0.7374 }, { "contrastive_loss": 0.5026, "epoch": 4.690744920993228, "grad_norm": 15.895790100097656, "learning_rate": 9.847688720045878e-08, "lm_loss": 5.5192, "loss": 1.4753, "step": 2078, "text_contrastive_loss": 0.8416 }, { "contrastive_loss": 0.3838, "epoch": 4.693002257336343, "grad_norm": 13.768829345703125, "learning_rate": 9.704916709881052e-08, "lm_loss": 5.501, "loss": 1.2695, "step": 2079, "text_contrastive_loss": 0.6712 }, { "contrastive_loss": 0.4364, "epoch": 4.6952595936794586, "grad_norm": 15.006620407104492, "learning_rate": 9.5631770840508e-08, "lm_loss": 5.341, "loss": 1.3842, "step": 2080, "text_contrastive_loss": 0.8275 }, { "contrastive_loss": 0.4271, "epoch": 4.697516930022573, "grad_norm": 11.774499893188477, "learning_rate": 9.422470141007667e-08, "lm_loss": 5.3438, "loss": 1.2992, "step": 2081, "text_contrastive_loss": 0.6754 }, { "contrastive_loss": 0.4275, "epoch": 4.699774266365688, "grad_norm": 12.590245246887207, "learning_rate": 9.282796177029596e-08, "lm_loss": 5.4998, "loss": 1.4736, "step": 2082, "text_contrastive_loss": 0.9923 }, { "contrastive_loss": 0.4735, "epoch": 4.702031602708804, "grad_norm": 16.8325252532959, "learning_rate": 9.144155486219442e-08, "lm_loss": 5.4661, "loss": 1.4121, "step": 2083, "text_contrastive_loss": 0.7839 }, { "contrastive_loss": 0.4601, "epoch": 4.704288939051919, "grad_norm": 14.064495086669922, "learning_rate": 9.006548360504463e-08, "lm_loss": 5.478, "loss": 1.3478, "step": 2084, "text_contrastive_loss": 0.6799 }, { "contrastive_loss": 0.3832, "epoch": 4.706546275395034, "grad_norm": 13.605438232421875, "learning_rate": 8.869975089635552e-08, "lm_loss": 5.4232, "loss": 1.3083, "step": 2085, "text_contrastive_loss": 0.7655 }, { "contrastive_loss": 0.4496, "epoch": 4.708803611738149, "grad_norm": 13.64784049987793, "learning_rate": 8.734435961186782e-08, "lm_loss": 5.3262, "loss": 1.4139, "step": 2086, "text_contrastive_loss": 0.8633 }, { "contrastive_loss": 0.4386, "epoch": 4.711060948081264, "grad_norm": 14.360736846923828, "learning_rate": 8.599931260554417e-08, "lm_loss": 5.4841, "loss": 1.3418, "step": 2087, "text_contrastive_loss": 0.7097 }, { "contrastive_loss": 0.4388, "epoch": 4.713318284424379, "grad_norm": 14.426239013671875, "learning_rate": 8.466461270956794e-08, "lm_loss": 5.5207, "loss": 1.4497, "step": 2088, "text_contrastive_loss": 0.9176 }, { "contrastive_loss": 0.4233, "epoch": 4.715575620767495, "grad_norm": 14.370715141296387, "learning_rate": 8.334026273433659e-08, "lm_loss": 5.413, "loss": 1.4366, "step": 2089, "text_contrastive_loss": 0.9438 }, { "contrastive_loss": 0.4106, "epoch": 4.717832957110609, "grad_norm": 14.648252487182617, "learning_rate": 8.202626546845172e-08, "lm_loss": 5.4194, "loss": 1.2998, "step": 2090, "text_contrastive_loss": 0.6945 }, { "contrastive_loss": 0.3891, "epoch": 4.720090293453724, "grad_norm": 12.321579933166504, "learning_rate": 8.072262367871675e-08, "lm_loss": 5.5332, "loss": 1.3766, "step": 2091, "text_contrastive_loss": 0.8684 }, { "contrastive_loss": 0.3416, "epoch": 4.72234762979684, "grad_norm": 14.964371681213379, "learning_rate": 7.942934011013037e-08, "lm_loss": 5.4944, "loss": 1.2942, "step": 2092, "text_contrastive_loss": 0.8062 }, { "contrastive_loss": 0.4328, "epoch": 4.724604966139955, "grad_norm": 13.795408248901367, "learning_rate": 7.814641748588148e-08, "lm_loss": 5.4023, "loss": 1.3785, "step": 2093, "text_contrastive_loss": 0.811 }, { "contrastive_loss": 0.3735, "epoch": 4.72686230248307, "grad_norm": 13.68954086303711, "learning_rate": 7.687385850734086e-08, "lm_loss": 5.4513, "loss": 1.3088, "step": 2094, "text_contrastive_loss": 0.7805 }, { "contrastive_loss": 0.3887, "epoch": 4.729119638826186, "grad_norm": 12.677016258239746, "learning_rate": 7.561166585405789e-08, "lm_loss": 5.3759, "loss": 1.2922, "step": 2095, "text_contrastive_loss": 0.7319 }, { "contrastive_loss": 0.4128, "epoch": 4.7313769751693, "grad_norm": 15.801119804382324, "learning_rate": 7.435984218375436e-08, "lm_loss": 5.4852, "loss": 1.3634, "step": 2096, "text_contrastive_loss": 0.8041 }, { "contrastive_loss": 0.3626, "epoch": 4.733634311512415, "grad_norm": 12.709342002868652, "learning_rate": 7.311839013231959e-08, "lm_loss": 5.4356, "loss": 1.2718, "step": 2097, "text_contrastive_loss": 0.7314 }, { "contrastive_loss": 0.4318, "epoch": 4.735891647855531, "grad_norm": 14.638545036315918, "learning_rate": 7.188731231380253e-08, "lm_loss": 5.4465, "loss": 1.4102, "step": 2098, "text_contrastive_loss": 0.8676 }, { "contrastive_loss": 0.4499, "epoch": 4.738148984198646, "grad_norm": 15.033775329589844, "learning_rate": 7.066661132040853e-08, "lm_loss": 5.4048, "loss": 1.3958, "step": 2099, "text_contrastive_loss": 0.8108 }, { "contrastive_loss": 0.4161, "epoch": 4.74040632054176, "grad_norm": 15.725586891174316, "learning_rate": 6.945628972249208e-08, "lm_loss": 5.442, "loss": 1.3051, "step": 2100, "text_contrastive_loss": 0.6896 }, { "contrastive_loss": 0.3725, "epoch": 4.742663656884876, "grad_norm": 12.941192626953125, "learning_rate": 6.825635006855458e-08, "lm_loss": 5.398, "loss": 1.2867, "step": 2101, "text_contrastive_loss": 0.7488 }, { "contrastive_loss": 0.4289, "epoch": 4.744920993227991, "grad_norm": 14.20491886138916, "learning_rate": 6.706679488523494e-08, "lm_loss": 5.4437, "loss": 1.2981, "step": 2102, "text_contrastive_loss": 0.6496 }, { "contrastive_loss": 0.447, "epoch": 4.747178329571106, "grad_norm": 14.286320686340332, "learning_rate": 6.58876266773062e-08, "lm_loss": 5.3617, "loss": 1.3436, "step": 2103, "text_contrastive_loss": 0.7208 }, { "contrastive_loss": 0.492, "epoch": 4.749435665914222, "grad_norm": 15.338074684143066, "learning_rate": 6.471884792767169e-08, "lm_loss": 5.5514, "loss": 1.4135, "step": 2104, "text_contrastive_loss": 0.7327 }, { "contrastive_loss": 0.4855, "epoch": 4.751693002257336, "grad_norm": 14.050557136535645, "learning_rate": 6.356046109735614e-08, "lm_loss": 5.4199, "loss": 1.4453, "step": 2105, "text_contrastive_loss": 0.8356 }, { "contrastive_loss": 0.4435, "epoch": 4.753950338600451, "grad_norm": 14.893415451049805, "learning_rate": 6.241246862550398e-08, "lm_loss": 5.5084, "loss": 1.4007, "step": 2106, "text_contrastive_loss": 0.8127 }, { "contrastive_loss": 0.453, "epoch": 4.756207674943567, "grad_norm": 14.830306053161621, "learning_rate": 6.127487292937328e-08, "lm_loss": 5.4497, "loss": 1.4174, "step": 2107, "text_contrastive_loss": 0.8388 }, { "contrastive_loss": 0.4327, "epoch": 4.758465011286682, "grad_norm": 14.190763473510742, "learning_rate": 6.014767640432905e-08, "lm_loss": 5.4272, "loss": 1.2878, "step": 2108, "text_contrastive_loss": 0.6248 }, { "contrastive_loss": 0.4893, "epoch": 4.760722347629796, "grad_norm": 13.900871276855469, "learning_rate": 5.903088142384106e-08, "lm_loss": 5.4392, "loss": 1.5198, "step": 2109, "text_contrastive_loss": 0.9731 }, { "contrastive_loss": 0.4402, "epoch": 4.762979683972912, "grad_norm": 13.574312210083008, "learning_rate": 5.7924490339474335e-08, "lm_loss": 5.6821, "loss": 1.4436, "step": 2110, "text_contrastive_loss": 0.8703 }, { "contrastive_loss": 0.4133, "epoch": 4.765237020316027, "grad_norm": 12.468347549438477, "learning_rate": 5.682850548089036e-08, "lm_loss": 5.4131, "loss": 1.2932, "step": 2111, "text_contrastive_loss": 0.6772 }, { "contrastive_loss": 0.443, "epoch": 4.767494356659142, "grad_norm": 13.637884140014648, "learning_rate": 5.574292915583646e-08, "lm_loss": 5.4979, "loss": 1.413, "step": 2112, "text_contrastive_loss": 0.8404 }, { "contrastive_loss": 0.4399, "epoch": 4.769751693002258, "grad_norm": 14.228924751281738, "learning_rate": 5.46677636501447e-08, "lm_loss": 5.5151, "loss": 1.3432, "step": 2113, "text_contrastive_loss": 0.7036 }, { "contrastive_loss": 0.4076, "epoch": 4.772009029345372, "grad_norm": 13.731237411499023, "learning_rate": 5.3603011227725265e-08, "lm_loss": 5.333, "loss": 1.3463, "step": 2114, "text_contrastive_loss": 0.8109 }, { "contrastive_loss": 0.4588, "epoch": 4.774266365688487, "grad_norm": 15.329798698425293, "learning_rate": 5.2548674130561974e-08, "lm_loss": 5.4235, "loss": 1.4041, "step": 2115, "text_contrastive_loss": 0.8059 }, { "contrastive_loss": 0.4198, "epoch": 4.776523702031603, "grad_norm": 14.6963472366333, "learning_rate": 5.1504754578707294e-08, "lm_loss": 5.3975, "loss": 1.2944, "step": 2116, "text_contrastive_loss": 0.6697 }, { "contrastive_loss": 0.466, "epoch": 4.778781038374718, "grad_norm": 13.653057098388672, "learning_rate": 5.047125477027959e-08, "lm_loss": 5.3148, "loss": 1.3894, "step": 2117, "text_contrastive_loss": 0.7838 }, { "contrastive_loss": 0.4982, "epoch": 4.781038374717833, "grad_norm": 14.952482223510742, "learning_rate": 4.944817688145642e-08, "lm_loss": 5.4016, "loss": 1.4058, "step": 2118, "text_contrastive_loss": 0.735 }, { "contrastive_loss": 0.5136, "epoch": 4.783295711060948, "grad_norm": 16.02672576904297, "learning_rate": 4.843552306646904e-08, "lm_loss": 5.5164, "loss": 1.54, "step": 2119, "text_contrastive_loss": 0.9495 }, { "contrastive_loss": 0.4505, "epoch": 4.785553047404063, "grad_norm": 15.640702247619629, "learning_rate": 4.743329545760122e-08, "lm_loss": 5.5695, "loss": 1.4046, "step": 2120, "text_contrastive_loss": 0.7943 }, { "contrastive_loss": 0.4595, "epoch": 4.787810383747178, "grad_norm": 13.84422779083252, "learning_rate": 4.644149616518212e-08, "lm_loss": 5.4796, "loss": 1.3819, "step": 2121, "text_contrastive_loss": 0.7487 }, { "contrastive_loss": 0.4441, "epoch": 4.790067720090294, "grad_norm": 14.773959159851074, "learning_rate": 4.5460127277582863e-08, "lm_loss": 5.3661, "loss": 1.3248, "step": 2122, "text_contrastive_loss": 0.6881 }, { "contrastive_loss": 0.3099, "epoch": 4.792325056433409, "grad_norm": 12.641318321228027, "learning_rate": 4.448919086121217e-08, "lm_loss": 5.4332, "loss": 1.2295, "step": 2123, "text_contrastive_loss": 0.7525 }, { "contrastive_loss": 0.4525, "epoch": 4.794582392776523, "grad_norm": 14.078239440917969, "learning_rate": 4.352868896051077e-08, "lm_loss": 5.3568, "loss": 1.4022, "step": 2124, "text_contrastive_loss": 0.828 }, { "contrastive_loss": 0.5084, "epoch": 4.796839729119639, "grad_norm": 14.259356498718262, "learning_rate": 4.2578623597949174e-08, "lm_loss": 5.4509, "loss": 1.4819, "step": 2125, "text_contrastive_loss": 0.8568 }, { "contrastive_loss": 0.4212, "epoch": 4.799097065462754, "grad_norm": 13.38056468963623, "learning_rate": 4.163899677402161e-08, "lm_loss": 5.3855, "loss": 1.2987, "step": 2126, "text_contrastive_loss": 0.678 }, { "contrastive_loss": 0.3706, "epoch": 4.801354401805869, "grad_norm": 13.210601806640625, "learning_rate": 4.0709810467243204e-08, "lm_loss": 5.4815, "loss": 1.2833, "step": 2127, "text_contrastive_loss": 0.7292 }, { "contrastive_loss": 0.4523, "epoch": 4.803611738148984, "grad_norm": 15.537960052490234, "learning_rate": 3.979106663414389e-08, "lm_loss": 5.5434, "loss": 1.4061, "step": 2128, "text_contrastive_loss": 0.799 }, { "contrastive_loss": 0.53, "epoch": 4.805869074492099, "grad_norm": 14.932734489440918, "learning_rate": 3.8882767209266756e-08, "lm_loss": 5.4925, "loss": 1.5161, "step": 2129, "text_contrastive_loss": 0.8738 }, { "contrastive_loss": 0.3426, "epoch": 4.808126410835214, "grad_norm": 12.81087589263916, "learning_rate": 3.7984914105162474e-08, "lm_loss": 5.421, "loss": 1.2241, "step": 2130, "text_contrastive_loss": 0.6787 }, { "contrastive_loss": 0.5529, "epoch": 4.81038374717833, "grad_norm": 15.956156730651855, "learning_rate": 3.709750921238486e-08, "lm_loss": 5.3988, "loss": 1.4407, "step": 2131, "text_contrastive_loss": 0.6957 }, { "contrastive_loss": 0.3956, "epoch": 4.812641083521445, "grad_norm": 12.624953269958496, "learning_rate": 3.622055439948813e-08, "lm_loss": 5.4586, "loss": 1.3469, "step": 2132, "text_contrastive_loss": 0.8108 }, { "contrastive_loss": 0.4179, "epoch": 4.8148984198645595, "grad_norm": 14.218852996826172, "learning_rate": 3.5354051513022405e-08, "lm_loss": 5.5242, "loss": 1.3791, "step": 2133, "text_contrastive_loss": 0.8176 }, { "contrastive_loss": 0.4733, "epoch": 4.817155756207675, "grad_norm": 13.357524871826172, "learning_rate": 3.449800237753043e-08, "lm_loss": 5.4931, "loss": 1.4689, "step": 2134, "text_contrastive_loss": 0.8926 }, { "contrastive_loss": 0.3953, "epoch": 4.81941309255079, "grad_norm": 13.375908851623535, "learning_rate": 3.365240879554144e-08, "lm_loss": 5.4471, "loss": 1.316, "step": 2135, "text_contrastive_loss": 0.752 }, { "contrastive_loss": 0.4906, "epoch": 4.821670428893905, "grad_norm": 15.428407669067383, "learning_rate": 3.281727254757061e-08, "lm_loss": 5.458, "loss": 1.4649, "step": 2136, "text_contrastive_loss": 0.8569 }, { "contrastive_loss": 0.4583, "epoch": 4.82392776523702, "grad_norm": 13.634210586547852, "learning_rate": 3.1992595392112966e-08, "lm_loss": 5.4425, "loss": 1.3316, "step": 2137, "text_contrastive_loss": 0.6581 }, { "contrastive_loss": 0.4418, "epoch": 4.826185101580135, "grad_norm": 13.692935943603516, "learning_rate": 3.117837906564114e-08, "lm_loss": 5.4512, "loss": 1.3735, "step": 2138, "text_contrastive_loss": 0.7731 }, { "contrastive_loss": 0.3172, "epoch": 4.8284424379232505, "grad_norm": 12.065055847167969, "learning_rate": 3.0374625282599826e-08, "lm_loss": 5.4717, "loss": 1.2044, "step": 2139, "text_contrastive_loss": 0.6802 }, { "contrastive_loss": 0.5013, "epoch": 4.830699774266366, "grad_norm": 15.22729206085205, "learning_rate": 2.9581335735404672e-08, "lm_loss": 5.4379, "loss": 1.4651, "step": 2140, "text_contrastive_loss": 0.84 }, { "contrastive_loss": 0.4933, "epoch": 4.832957110609481, "grad_norm": 14.925759315490723, "learning_rate": 2.8798512094436738e-08, "lm_loss": 5.4871, "loss": 1.4504, "step": 2141, "text_contrastive_loss": 0.8167 }, { "contrastive_loss": 0.4151, "epoch": 4.835214446952596, "grad_norm": 13.641921997070312, "learning_rate": 2.802615600804026e-08, "lm_loss": 5.4401, "loss": 1.3243, "step": 2142, "text_contrastive_loss": 0.7305 }, { "contrastive_loss": 0.4343, "epoch": 4.837471783295711, "grad_norm": 13.87121868133545, "learning_rate": 2.7264269102517117e-08, "lm_loss": 5.4836, "loss": 1.3676, "step": 2143, "text_contrastive_loss": 0.77 }, { "contrastive_loss": 0.3371, "epoch": 4.839729119638826, "grad_norm": 11.819186210632324, "learning_rate": 2.6512852982127357e-08, "lm_loss": 5.342, "loss": 1.2904, "step": 2144, "text_contrastive_loss": 0.8383 }, { "contrastive_loss": 0.4695, "epoch": 4.841986455981941, "grad_norm": 16.108856201171875, "learning_rate": 2.577190922908035e-08, "lm_loss": 5.4491, "loss": 1.4813, "step": 2145, "text_contrastive_loss": 0.9338 }, { "contrastive_loss": 0.539, "epoch": 4.844243792325057, "grad_norm": 15.733048439025879, "learning_rate": 2.5041439403537537e-08, "lm_loss": 5.3733, "loss": 1.4817, "step": 2146, "text_contrastive_loss": 0.8109 }, { "contrastive_loss": 0.4406, "epoch": 4.846501128668171, "grad_norm": 13.685016632080078, "learning_rate": 2.4321445043603565e-08, "lm_loss": 5.4232, "loss": 1.3548, "step": 2147, "text_contrastive_loss": 0.7437 }, { "contrastive_loss": 0.3976, "epoch": 4.8487584650112865, "grad_norm": 13.296514511108398, "learning_rate": 2.3611927665326275e-08, "lm_loss": 5.4631, "loss": 1.3468, "step": 2148, "text_contrastive_loss": 0.8058 }, { "contrastive_loss": 0.4797, "epoch": 4.851015801354402, "grad_norm": 13.962272644042969, "learning_rate": 2.291288876269393e-08, "lm_loss": 5.4876, "loss": 1.455, "step": 2149, "text_contrastive_loss": 0.8533 }, { "contrastive_loss": 0.495, "epoch": 4.853273137697517, "grad_norm": 14.14433479309082, "learning_rate": 2.222432980762912e-08, "lm_loss": 5.3203, "loss": 1.5115, "step": 2150, "text_contrastive_loss": 0.969 }, { "contrastive_loss": 0.4221, "epoch": 4.855530474040632, "grad_norm": 14.23910903930664, "learning_rate": 2.1546252249988186e-08, "lm_loss": 5.4121, "loss": 1.3148, "step": 2151, "text_contrastive_loss": 0.7029 }, { "contrastive_loss": 0.4442, "epoch": 4.857787810383747, "grad_norm": 14.589170455932617, "learning_rate": 2.087865751755791e-08, "lm_loss": 5.452, "loss": 1.361, "step": 2152, "text_contrastive_loss": 0.7431 }, { "contrastive_loss": 0.4601, "epoch": 4.860045146726862, "grad_norm": 14.748631477355957, "learning_rate": 2.0221547016051614e-08, "lm_loss": 5.4208, "loss": 1.4126, "step": 2153, "text_contrastive_loss": 0.8209 }, { "contrastive_loss": 0.4349, "epoch": 4.8623024830699775, "grad_norm": 14.336165428161621, "learning_rate": 1.957492212910639e-08, "lm_loss": 5.4313, "loss": 1.4636, "step": 2154, "text_contrastive_loss": 0.9711 }, { "contrastive_loss": 0.3987, "epoch": 4.864559819413093, "grad_norm": 15.184710502624512, "learning_rate": 1.8938784218281435e-08, "lm_loss": 5.5225, "loss": 1.3297, "step": 2155, "text_contrastive_loss": 0.7576 }, { "contrastive_loss": 0.4392, "epoch": 4.866817155756207, "grad_norm": 14.413093566894531, "learning_rate": 1.8313134623051955e-08, "lm_loss": 5.4948, "loss": 1.3908, "step": 2156, "text_contrastive_loss": 0.8041 }, { "contrastive_loss": 0.4065, "epoch": 4.8690744920993225, "grad_norm": 12.513361930847168, "learning_rate": 1.7697974660811357e-08, "lm_loss": 5.225, "loss": 1.2455, "step": 2157, "text_contrastive_loss": 0.6329 }, { "contrastive_loss": 0.4739, "epoch": 4.871331828442438, "grad_norm": 14.576942443847656, "learning_rate": 1.7093305626864065e-08, "lm_loss": 5.3395, "loss": 1.4442, "step": 2158, "text_contrastive_loss": 0.8727 }, { "contrastive_loss": 0.5077, "epoch": 4.873589164785553, "grad_norm": 15.830459594726562, "learning_rate": 1.6499128794423836e-08, "lm_loss": 5.4957, "loss": 1.5044, "step": 2159, "text_contrastive_loss": 0.8943 }, { "contrastive_loss": 0.5232, "epoch": 4.8758465011286685, "grad_norm": 16.970827102661133, "learning_rate": 1.5915445414613208e-08, "lm_loss": 5.4019, "loss": 1.5436, "step": 2160, "text_contrastive_loss": 0.9604 }, { "contrastive_loss": 0.4526, "epoch": 4.878103837471784, "grad_norm": 14.501419067382812, "learning_rate": 1.5342256716459058e-08, "lm_loss": 5.4434, "loss": 1.4074, "step": 2161, "text_contrastive_loss": 0.821 }, { "contrastive_loss": 0.4276, "epoch": 4.880361173814898, "grad_norm": 13.567630767822266, "learning_rate": 1.4779563906888172e-08, "lm_loss": 5.4065, "loss": 1.3312, "step": 2162, "text_contrastive_loss": 0.726 }, { "contrastive_loss": 0.4361, "epoch": 4.8826185101580135, "grad_norm": 13.970993041992188, "learning_rate": 1.4227368170728894e-08, "lm_loss": 5.3345, "loss": 1.3425, "step": 2163, "text_contrastive_loss": 0.7458 }, { "contrastive_loss": 0.4098, "epoch": 4.884875846501129, "grad_norm": 13.189488410949707, "learning_rate": 1.3685670670706697e-08, "lm_loss": 5.5338, "loss": 1.3498, "step": 2164, "text_contrastive_loss": 0.7734 }, { "contrastive_loss": 0.4967, "epoch": 4.887133182844244, "grad_norm": 15.030165672302246, "learning_rate": 1.3154472547440289e-08, "lm_loss": 5.5263, "loss": 1.4195, "step": 2165, "text_contrastive_loss": 0.7402 }, { "contrastive_loss": 0.4859, "epoch": 4.889390519187359, "grad_norm": 13.789488792419434, "learning_rate": 1.2633774919441622e-08, "lm_loss": 5.4352, "loss": 1.46, "step": 2166, "text_contrastive_loss": 0.8613 }, { "contrastive_loss": 0.3811, "epoch": 4.891647855530474, "grad_norm": 12.546195983886719, "learning_rate": 1.2123578883110887e-08, "lm_loss": 5.5252, "loss": 1.324, "step": 2167, "text_contrastive_loss": 0.7807 }, { "contrastive_loss": 0.4371, "epoch": 4.893905191873589, "grad_norm": 13.049041748046875, "learning_rate": 1.1623885512737076e-08, "lm_loss": 5.4636, "loss": 1.3888, "step": 2168, "text_contrastive_loss": 0.8107 }, { "contrastive_loss": 0.3575, "epoch": 4.8961625282167045, "grad_norm": 13.92898178100586, "learning_rate": 1.1134695860493539e-08, "lm_loss": 5.356, "loss": 1.2361, "step": 2169, "text_contrastive_loss": 0.686 }, { "contrastive_loss": 0.5755, "epoch": 4.89841986455982, "grad_norm": 15.284348487854004, "learning_rate": 1.0656010956437979e-08, "lm_loss": 5.3665, "loss": 1.4893, "step": 2170, "text_contrastive_loss": 0.7544 }, { "contrastive_loss": 0.4703, "epoch": 4.900677200902934, "grad_norm": 14.312403678894043, "learning_rate": 1.018783180850691e-08, "lm_loss": 5.4431, "loss": 1.3639, "step": 2171, "text_contrastive_loss": 0.6985 }, { "contrastive_loss": 0.5322, "epoch": 4.9029345372460496, "grad_norm": 14.89530086517334, "learning_rate": 9.73015940251676e-09, "lm_loss": 5.364, "loss": 1.5712, "step": 2172, "text_contrastive_loss": 1.0051 }, { "contrastive_loss": 0.5007, "epoch": 4.905191873589165, "grad_norm": 14.06528091430664, "learning_rate": 9.282994702159986e-09, "lm_loss": 5.4174, "loss": 1.4469, "step": 2173, "text_contrastive_loss": 0.8088 }, { "contrastive_loss": 0.5287, "epoch": 4.90744920993228, "grad_norm": 14.88776969909668, "learning_rate": 8.846338649005082e-09, "lm_loss": 5.4686, "loss": 1.5223, "step": 2174, "text_contrastive_loss": 0.8934 }, { "contrastive_loss": 0.4793, "epoch": 4.909706546275395, "grad_norm": 17.28993797302246, "learning_rate": 8.42019216249046e-09, "lm_loss": 5.5607, "loss": 1.4531, "step": 2175, "text_contrastive_loss": 0.8355 }, { "contrastive_loss": 0.4204, "epoch": 4.91196388261851, "grad_norm": 13.668623924255371, "learning_rate": 8.004556139927788e-09, "lm_loss": 5.4568, "loss": 1.3353, "step": 2176, "text_contrastive_loss": 0.7385 }, { "contrastive_loss": 0.4881, "epoch": 4.914221218961625, "grad_norm": 15.048455238342285, "learning_rate": 7.599431456495888e-09, "lm_loss": 5.4685, "loss": 1.3765, "step": 2177, "text_contrastive_loss": 0.683 }, { "contrastive_loss": 0.3824, "epoch": 4.9164785553047405, "grad_norm": 13.137067794799805, "learning_rate": 7.2048189652412784e-09, "lm_loss": 5.5002, "loss": 1.3224, "step": 2178, "text_contrastive_loss": 0.7798 }, { "contrastive_loss": 0.4013, "epoch": 4.918735891647856, "grad_norm": 12.891266822814941, "learning_rate": 6.820719497074857e-09, "lm_loss": 5.4584, "loss": 1.3154, "step": 2179, "text_contrastive_loss": 0.7366 }, { "contrastive_loss": 0.4835, "epoch": 4.92099322799097, "grad_norm": 14.461901664733887, "learning_rate": 6.447133860771893e-09, "lm_loss": 5.393, "loss": 1.4567, "step": 2180, "text_contrastive_loss": 0.8678 }, { "contrastive_loss": 0.44, "epoch": 4.923250564334086, "grad_norm": 13.45708179473877, "learning_rate": 6.084062842968696e-09, "lm_loss": 5.339, "loss": 1.3715, "step": 2181, "text_contrastive_loss": 0.7952 }, { "contrastive_loss": 0.4562, "epoch": 4.925507900677201, "grad_norm": 14.697256088256836, "learning_rate": 5.731507208160958e-09, "lm_loss": 5.4975, "loss": 1.4098, "step": 2182, "text_contrastive_loss": 0.8077 }, { "contrastive_loss": 0.5158, "epoch": 4.927765237020316, "grad_norm": 15.932068824768066, "learning_rate": 5.389467698704298e-09, "lm_loss": 5.4076, "loss": 1.4842, "step": 2183, "text_contrastive_loss": 0.8553 }, { "contrastive_loss": 0.481, "epoch": 4.9300225733634315, "grad_norm": 13.238265991210938, "learning_rate": 5.057945034810385e-09, "lm_loss": 5.3901, "loss": 1.3689, "step": 2184, "text_contrastive_loss": 0.6978 }, { "contrastive_loss": 0.3412, "epoch": 4.932279909706546, "grad_norm": 11.94501781463623, "learning_rate": 4.736939914545824e-09, "lm_loss": 5.3867, "loss": 1.29, "step": 2185, "text_contrastive_loss": 0.8203 }, { "contrastive_loss": 0.4008, "epoch": 4.934537246049661, "grad_norm": 12.516329765319824, "learning_rate": 4.4264530138310445e-09, "lm_loss": 5.3825, "loss": 1.3353, "step": 2186, "text_contrastive_loss": 0.7925 }, { "contrastive_loss": 0.4752, "epoch": 4.936794582392777, "grad_norm": 14.929271697998047, "learning_rate": 4.1264849864403044e-09, "lm_loss": 5.4827, "loss": 1.4275, "step": 2187, "text_contrastive_loss": 0.8081 }, { "contrastive_loss": 0.5297, "epoch": 4.939051918735892, "grad_norm": 15.888978958129883, "learning_rate": 3.837036463997246e-09, "lm_loss": 5.4956, "loss": 1.5813, "step": 2188, "text_contrastive_loss": 1.004 }, { "contrastive_loss": 0.5268, "epoch": 4.941309255079007, "grad_norm": 16.01238250732422, "learning_rate": 3.558108055976006e-09, "lm_loss": 5.4604, "loss": 1.5329, "step": 2189, "text_contrastive_loss": 0.9199 }, { "contrastive_loss": 0.474, "epoch": 4.943566591422122, "grad_norm": 14.927141189575195, "learning_rate": 3.289700349698999e-09, "lm_loss": 5.4481, "loss": 1.3322, "step": 2190, "text_contrastive_loss": 0.6269 }, { "contrastive_loss": 0.3963, "epoch": 4.945823927765237, "grad_norm": 13.972898483276367, "learning_rate": 3.0318139103363564e-09, "lm_loss": 5.4095, "loss": 1.3252, "step": 2191, "text_contrastive_loss": 0.7757 }, { "contrastive_loss": 0.4645, "epoch": 4.948081264108352, "grad_norm": 15.38901424407959, "learning_rate": 2.7844492809031567e-09, "lm_loss": 5.5284, "loss": 1.4136, "step": 2192, "text_contrastive_loss": 0.7926 }, { "contrastive_loss": 0.4929, "epoch": 4.950338600451468, "grad_norm": 14.944534301757812, "learning_rate": 2.547606982260531e-09, "lm_loss": 5.4075, "loss": 1.5026, "step": 2193, "text_contrastive_loss": 0.9379 }, { "contrastive_loss": 0.4853, "epoch": 4.952595936794582, "grad_norm": 16.36857032775879, "learning_rate": 2.3212875131117805e-09, "lm_loss": 5.3899, "loss": 1.5077, "step": 2194, "text_contrastive_loss": 0.9668 }, { "contrastive_loss": 0.4602, "epoch": 4.954853273137697, "grad_norm": 16.05774688720703, "learning_rate": 2.1054913500051512e-09, "lm_loss": 5.3544, "loss": 1.3854, "step": 2195, "text_contrastive_loss": 0.7795 }, { "contrastive_loss": 0.4751, "epoch": 4.957110609480813, "grad_norm": 13.837918281555176, "learning_rate": 1.9002189473288356e-09, "lm_loss": 5.4565, "loss": 1.4566, "step": 2196, "text_contrastive_loss": 0.8716 }, { "contrastive_loss": 0.4103, "epoch": 4.959367945823928, "grad_norm": 13.76887035369873, "learning_rate": 1.7054707373126423e-09, "lm_loss": 5.4632, "loss": 1.3389, "step": 2197, "text_contrastive_loss": 0.7646 }, { "contrastive_loss": 0.3864, "epoch": 4.961625282167043, "grad_norm": 12.906379699707031, "learning_rate": 1.5212471300252163e-09, "lm_loss": 5.385, "loss": 1.2397, "step": 2198, "text_contrastive_loss": 0.6297 }, { "contrastive_loss": 0.3989, "epoch": 4.963882618510158, "grad_norm": 13.055458068847656, "learning_rate": 1.347548513375707e-09, "lm_loss": 5.4405, "loss": 1.2693, "step": 2199, "text_contrastive_loss": 0.6528 }, { "contrastive_loss": 0.4608, "epoch": 4.966139954853273, "grad_norm": 14.787433624267578, "learning_rate": 1.1843752531104368e-09, "lm_loss": 5.4163, "loss": 1.4305, "step": 2200, "text_contrastive_loss": 0.8561 }, { "contrastive_loss": 0.4724, "epoch": 4.968397291196388, "grad_norm": 14.985630989074707, "learning_rate": 1.0317276928134557e-09, "lm_loss": 5.4577, "loss": 1.3722, "step": 2201, "text_contrastive_loss": 0.7079 }, { "contrastive_loss": 0.5348, "epoch": 4.970654627539504, "grad_norm": 14.82340145111084, "learning_rate": 8.896061539048762e-10, "lm_loss": 5.4347, "loss": 1.4779, "step": 2202, "text_contrastive_loss": 0.7993 }, { "contrastive_loss": 0.4554, "epoch": 4.972911963882618, "grad_norm": 14.87088680267334, "learning_rate": 7.580109356419841e-10, "lm_loss": 5.4693, "loss": 1.4443, "step": 2203, "text_contrastive_loss": 0.8839 }, { "contrastive_loss": 0.4641, "epoch": 4.975169300225733, "grad_norm": 14.46483039855957, "learning_rate": 6.369423151164622e-10, "lm_loss": 5.4776, "loss": 1.3967, "step": 2204, "text_contrastive_loss": 0.7697 }, { "contrastive_loss": 0.4639, "epoch": 4.977426636568849, "grad_norm": 15.03017807006836, "learning_rate": 5.264005472549461e-10, "lm_loss": 5.4595, "loss": 1.3375, "step": 2205, "text_contrastive_loss": 0.6554 }, { "contrastive_loss": 0.3595, "epoch": 4.979683972911964, "grad_norm": 12.984131813049316, "learning_rate": 4.2638586481846823e-10, "lm_loss": 5.4768, "loss": 1.2663, "step": 2206, "text_contrastive_loss": 0.7183 }, { "contrastive_loss": 0.406, "epoch": 4.981941309255079, "grad_norm": 12.96347713470459, "learning_rate": 3.368984784024587e-10, "lm_loss": 5.5238, "loss": 1.3241, "step": 2207, "text_contrastive_loss": 0.7313 }, { "contrastive_loss": 0.4922, "epoch": 4.984198645598195, "grad_norm": 14.127623558044434, "learning_rate": 2.5793857643396924e-10, "lm_loss": 5.4, "loss": 1.4226, "step": 2208, "text_contrastive_loss": 0.7808 }, { "contrastive_loss": 0.4765, "epoch": 4.986455981941309, "grad_norm": 14.21364974975586, "learning_rate": 1.89506325175004e-10, "lm_loss": 5.4665, "loss": 1.4513, "step": 2209, "text_contrastive_loss": 0.8562 }, { "contrastive_loss": 0.4283, "epoch": 4.988713318284424, "grad_norm": 14.55248737335205, "learning_rate": 1.316018687191889e-10, "lm_loss": 5.4608, "loss": 1.4691, "step": 2210, "text_contrastive_loss": 0.9894 }, { "contrastive_loss": 0.4747, "epoch": 4.99097065462754, "grad_norm": 14.030537605285645, "learning_rate": 8.422532899121649e-11, "lm_loss": 5.3691, "loss": 1.3971, "step": 2211, "text_contrastive_loss": 0.7709 }, { "contrastive_loss": 0.4853, "epoch": 4.993227990970655, "grad_norm": 16.008319854736328, "learning_rate": 4.737680575017667e-11, "lm_loss": 5.347, "loss": 1.5166, "step": 2212, "text_contrastive_loss": 0.9931 }, { "contrastive_loss": 0.4301, "epoch": 4.995485327313769, "grad_norm": 15.602864265441895, "learning_rate": 2.1056376585115723e-11, "lm_loss": 5.4221, "loss": 1.3487, "step": 2213, "text_contrastive_loss": 0.7528 }, { "contrastive_loss": 0.4854, "epoch": 4.997742663656885, "grad_norm": 14.93419361114502, "learning_rate": 5.2640969172568225e-12, "lm_loss": 5.5717, "loss": 1.4927, "step": 2214, "text_contrastive_loss": 0.9002 }, { "contrastive_loss": 0.3065, "epoch": 5.0, "grad_norm": 16.354124069213867, "learning_rate": 0.0, "lm_loss": 5.4002, "loss": 1.0928, "step": 2215, "text_contrastive_loss": 0.4926 }, { "epoch": 5.0, "step": 2215, "total_flos": 5.949279369691136e+17, "train_loss": 1.6049966390730297, "train_runtime": 4868.8105, "train_samples_per_second": 116.34, "train_steps_per_second": 0.455 } ], "logging_steps": 1, "max_steps": 2215, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.949279369691136e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }