{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 3002, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006662780044973765, "learning_rate": 0.0, "loss": 6.0407, "step": 1 }, { "epoch": 0.001332556008994753, "learning_rate": 5.494505494505495e-07, "loss": 6.0155, "step": 2 }, { "epoch": 0.0019988340134921294, "learning_rate": 1.098901098901099e-06, "loss": 6.0367, "step": 3 }, { "epoch": 0.002665112017989506, "learning_rate": 1.6483516483516484e-06, "loss": 6.0308, "step": 4 }, { "epoch": 0.0033313900224868826, "learning_rate": 2.197802197802198e-06, "loss": 6.0202, "step": 5 }, { "epoch": 0.003997668026984259, "learning_rate": 2.747252747252747e-06, "loss": 6.0369, "step": 6 }, { "epoch": 0.004663946031481635, "learning_rate": 3.2967032967032968e-06, "loss": 6.0004, "step": 7 }, { "epoch": 0.005330224035979012, "learning_rate": 3.846153846153847e-06, "loss": 6.0081, "step": 8 }, { "epoch": 0.005996502040476389, "learning_rate": 4.395604395604396e-06, "loss": 6.0258, "step": 9 }, { "epoch": 0.006662780044973765, "learning_rate": 4.945054945054945e-06, "loss": 5.965, "step": 10 }, { "epoch": 0.007329058049471142, "learning_rate": 5.494505494505494e-06, "loss": 5.9869, "step": 11 }, { "epoch": 0.007995336053968518, "learning_rate": 6.043956043956044e-06, "loss": 5.9934, "step": 12 }, { "epoch": 0.008661614058465895, "learning_rate": 6.5934065934065935e-06, "loss": 5.9449, "step": 13 }, { "epoch": 0.00932789206296327, "learning_rate": 7.142857142857143e-06, "loss": 5.9949, "step": 14 }, { "epoch": 0.009994170067460648, "learning_rate": 7.692307692307694e-06, "loss": 5.921, "step": 15 }, { "epoch": 0.010660448071958024, "learning_rate": 8.241758241758243e-06, "loss": 5.9454, "step": 16 }, { "epoch": 0.011326726076455402, "learning_rate": 8.791208791208792e-06, "loss": 5.9267, "step": 17 }, { "epoch": 0.011993004080952777, "learning_rate": 9.340659340659341e-06, "loss": 5.9296, "step": 18 }, { "epoch": 0.012659282085450155, "learning_rate": 9.89010989010989e-06, "loss": 5.8777, "step": 19 }, { "epoch": 0.01332556008994753, "learning_rate": 1.0439560439560441e-05, "loss": 5.8725, "step": 20 }, { "epoch": 0.013991838094444908, "learning_rate": 1.0989010989010989e-05, "loss": 5.8727, "step": 21 }, { "epoch": 0.014658116098942284, "learning_rate": 1.153846153846154e-05, "loss": 5.7883, "step": 22 }, { "epoch": 0.01532439410343966, "learning_rate": 1.2087912087912089e-05, "loss": 5.8406, "step": 23 }, { "epoch": 0.015990672107937035, "learning_rate": 1.2637362637362638e-05, "loss": 5.8149, "step": 24 }, { "epoch": 0.016656950112434413, "learning_rate": 1.3186813186813187e-05, "loss": 5.7868, "step": 25 }, { "epoch": 0.01732322811693179, "learning_rate": 1.3736263736263738e-05, "loss": 5.7693, "step": 26 }, { "epoch": 0.017989506121429168, "learning_rate": 1.4285714285714285e-05, "loss": 5.7749, "step": 27 }, { "epoch": 0.01865578412592654, "learning_rate": 1.4835164835164836e-05, "loss": 5.7304, "step": 28 }, { "epoch": 0.01932206213042392, "learning_rate": 1.5384615384615387e-05, "loss": 5.7311, "step": 29 }, { "epoch": 0.019988340134921297, "learning_rate": 1.5934065934065933e-05, "loss": 5.6832, "step": 30 }, { "epoch": 0.020654618139418674, "learning_rate": 1.6483516483516486e-05, "loss": 5.691, "step": 31 }, { "epoch": 0.021320896143916048, "learning_rate": 1.7032967032967035e-05, "loss": 5.65, "step": 32 }, { "epoch": 0.021987174148413426, "learning_rate": 1.7582417582417584e-05, "loss": 5.6071, "step": 33 }, { "epoch": 0.022653452152910803, "learning_rate": 1.8131868131868133e-05, "loss": 5.6248, "step": 34 }, { "epoch": 0.023319730157408177, "learning_rate": 1.8681318681318682e-05, "loss": 5.6343, "step": 35 }, { "epoch": 0.023986008161905555, "learning_rate": 1.923076923076923e-05, "loss": 5.5838, "step": 36 }, { "epoch": 0.024652286166402932, "learning_rate": 1.978021978021978e-05, "loss": 5.5894, "step": 37 }, { "epoch": 0.02531856417090031, "learning_rate": 2.032967032967033e-05, "loss": 5.5606, "step": 38 }, { "epoch": 0.025984842175397684, "learning_rate": 2.0879120879120882e-05, "loss": 5.5264, "step": 39 }, { "epoch": 0.02665112017989506, "learning_rate": 2.1428571428571428e-05, "loss": 5.4988, "step": 40 }, { "epoch": 0.02731739818439244, "learning_rate": 2.1978021978021977e-05, "loss": 5.4701, "step": 41 }, { "epoch": 0.027983676188889816, "learning_rate": 2.252747252747253e-05, "loss": 5.4785, "step": 42 }, { "epoch": 0.02864995419338719, "learning_rate": 2.307692307692308e-05, "loss": 5.4427, "step": 43 }, { "epoch": 0.029316232197884567, "learning_rate": 2.3626373626373628e-05, "loss": 5.4109, "step": 44 }, { "epoch": 0.029982510202381945, "learning_rate": 2.4175824175824177e-05, "loss": 5.4286, "step": 45 }, { "epoch": 0.03064878820687932, "learning_rate": 2.4725274725274727e-05, "loss": 5.4021, "step": 46 }, { "epoch": 0.031315066211376696, "learning_rate": 2.5274725274725276e-05, "loss": 5.38, "step": 47 }, { "epoch": 0.03198134421587407, "learning_rate": 2.582417582417583e-05, "loss": 5.3758, "step": 48 }, { "epoch": 0.03264762222037145, "learning_rate": 2.6373626373626374e-05, "loss": 5.3265, "step": 49 }, { "epoch": 0.033313900224868825, "learning_rate": 2.6923076923076923e-05, "loss": 5.3135, "step": 50 }, { "epoch": 0.033980178229366206, "learning_rate": 2.7472527472527476e-05, "loss": 5.3547, "step": 51 }, { "epoch": 0.03464645623386358, "learning_rate": 2.8021978021978025e-05, "loss": 5.311, "step": 52 }, { "epoch": 0.035312734238360954, "learning_rate": 2.857142857142857e-05, "loss": 5.3107, "step": 53 }, { "epoch": 0.035979012242858335, "learning_rate": 2.9120879120879123e-05, "loss": 5.2785, "step": 54 }, { "epoch": 0.03664529024735571, "learning_rate": 2.9670329670329673e-05, "loss": 5.1847, "step": 55 }, { "epoch": 0.03731156825185308, "learning_rate": 3.021978021978022e-05, "loss": 5.2295, "step": 56 }, { "epoch": 0.037977846256350464, "learning_rate": 3.0769230769230774e-05, "loss": 5.2033, "step": 57 }, { "epoch": 0.03864412426084784, "learning_rate": 3.131868131868132e-05, "loss": 5.1961, "step": 58 }, { "epoch": 0.03931040226534521, "learning_rate": 3.1868131868131866e-05, "loss": 5.1697, "step": 59 }, { "epoch": 0.03997668026984259, "learning_rate": 3.241758241758242e-05, "loss": 5.1378, "step": 60 }, { "epoch": 0.04064295827433997, "learning_rate": 3.296703296703297e-05, "loss": 5.1154, "step": 61 }, { "epoch": 0.04130923627883735, "learning_rate": 3.3516483516483513e-05, "loss": 5.119, "step": 62 }, { "epoch": 0.04197551428333472, "learning_rate": 3.406593406593407e-05, "loss": 5.1348, "step": 63 }, { "epoch": 0.042641792287832096, "learning_rate": 3.461538461538462e-05, "loss": 5.0983, "step": 64 }, { "epoch": 0.04330807029232948, "learning_rate": 3.516483516483517e-05, "loss": 5.0798, "step": 65 }, { "epoch": 0.04397434829682685, "learning_rate": 3.571428571428572e-05, "loss": 5.0724, "step": 66 }, { "epoch": 0.044640626301324225, "learning_rate": 3.6263736263736266e-05, "loss": 5.068, "step": 67 }, { "epoch": 0.045306904305821606, "learning_rate": 3.6813186813186815e-05, "loss": 5.0335, "step": 68 }, { "epoch": 0.04597318231031898, "learning_rate": 3.7362637362637365e-05, "loss": 5.0003, "step": 69 }, { "epoch": 0.046639460314816354, "learning_rate": 3.7912087912087914e-05, "loss": 5.0059, "step": 70 }, { "epoch": 0.047305738319313735, "learning_rate": 3.846153846153846e-05, "loss": 5.0186, "step": 71 }, { "epoch": 0.04797201632381111, "learning_rate": 3.901098901098901e-05, "loss": 4.9817, "step": 72 }, { "epoch": 0.04863829432830849, "learning_rate": 3.956043956043956e-05, "loss": 4.9817, "step": 73 }, { "epoch": 0.049304572332805864, "learning_rate": 4.010989010989011e-05, "loss": 4.9954, "step": 74 }, { "epoch": 0.04997085033730324, "learning_rate": 4.065934065934066e-05, "loss": 4.9764, "step": 75 }, { "epoch": 0.05063712834180062, "learning_rate": 4.120879120879121e-05, "loss": 4.9578, "step": 76 }, { "epoch": 0.05130340634629799, "learning_rate": 4.1758241758241765e-05, "loss": 4.9679, "step": 77 }, { "epoch": 0.05196968435079537, "learning_rate": 4.230769230769231e-05, "loss": 4.9494, "step": 78 }, { "epoch": 0.05263596235529275, "learning_rate": 4.2857142857142856e-05, "loss": 4.9266, "step": 79 }, { "epoch": 0.05330224035979012, "learning_rate": 4.340659340659341e-05, "loss": 4.9264, "step": 80 }, { "epoch": 0.053968518364287496, "learning_rate": 4.3956043956043955e-05, "loss": 4.9325, "step": 81 }, { "epoch": 0.05463479636878488, "learning_rate": 4.4505494505494504e-05, "loss": 4.9036, "step": 82 }, { "epoch": 0.05530107437328225, "learning_rate": 4.505494505494506e-05, "loss": 4.933, "step": 83 }, { "epoch": 0.05596735237777963, "learning_rate": 4.56043956043956e-05, "loss": 4.9094, "step": 84 }, { "epoch": 0.056633630382277006, "learning_rate": 4.615384615384616e-05, "loss": 4.8983, "step": 85 }, { "epoch": 0.05729990838677438, "learning_rate": 4.670329670329671e-05, "loss": 4.9235, "step": 86 }, { "epoch": 0.05796618639127176, "learning_rate": 4.7252747252747257e-05, "loss": 4.8999, "step": 87 }, { "epoch": 0.058632464395769135, "learning_rate": 4.7802197802197806e-05, "loss": 4.8887, "step": 88 }, { "epoch": 0.05929874240026651, "learning_rate": 4.8351648351648355e-05, "loss": 4.8888, "step": 89 }, { "epoch": 0.05996502040476389, "learning_rate": 4.8901098901098904e-05, "loss": 4.896, "step": 90 }, { "epoch": 0.060631298409261264, "learning_rate": 4.945054945054945e-05, "loss": 4.8584, "step": 91 }, { "epoch": 0.06129757641375864, "learning_rate": 5e-05, "loss": 4.8635, "step": 92 }, { "epoch": 0.06196385441825602, "learning_rate": 4.9999985441209204e-05, "loss": 4.8335, "step": 93 }, { "epoch": 0.06263013242275339, "learning_rate": 4.9999941764853785e-05, "loss": 4.8425, "step": 94 }, { "epoch": 0.06329641042725077, "learning_rate": 4.99998689709846e-05, "loss": 4.8485, "step": 95 }, { "epoch": 0.06396268843174814, "learning_rate": 4.999976705968644e-05, "loss": 4.7787, "step": 96 }, { "epoch": 0.06462896643624552, "learning_rate": 4.9999636031078e-05, "loss": 4.8111, "step": 97 }, { "epoch": 0.0652952444407429, "learning_rate": 4.9999475885311884e-05, "loss": 4.8388, "step": 98 }, { "epoch": 0.06596152244524027, "learning_rate": 4.9999286622574626e-05, "loss": 4.8166, "step": 99 }, { "epoch": 0.06662780044973765, "learning_rate": 4.9999068243086644e-05, "loss": 4.7999, "step": 100 }, { "epoch": 0.06729407845423503, "learning_rate": 4.9998820747102305e-05, "loss": 4.8313, "step": 101 }, { "epoch": 0.06796035645873241, "learning_rate": 4.999854413490985e-05, "loss": 4.8158, "step": 102 }, { "epoch": 0.06862663446322978, "learning_rate": 4.999823840683147e-05, "loss": 4.7823, "step": 103 }, { "epoch": 0.06929291246772716, "learning_rate": 4.999790356322323e-05, "loss": 4.8122, "step": 104 }, { "epoch": 0.06995919047222454, "learning_rate": 4.999753960447513e-05, "loss": 4.7459, "step": 105 }, { "epoch": 0.07062546847672191, "learning_rate": 4.9997146531011076e-05, "loss": 4.7865, "step": 106 }, { "epoch": 0.07129174648121929, "learning_rate": 4.9996724343288875e-05, "loss": 4.7432, "step": 107 }, { "epoch": 0.07195802448571667, "learning_rate": 4.9996273041800257e-05, "loss": 4.7635, "step": 108 }, { "epoch": 0.07262430249021404, "learning_rate": 4.9995792627070856e-05, "loss": 4.7816, "step": 109 }, { "epoch": 0.07329058049471142, "learning_rate": 4.999528309966021e-05, "loss": 4.7464, "step": 110 }, { "epoch": 0.0739568584992088, "learning_rate": 4.999474446016176e-05, "loss": 4.7717, "step": 111 }, { "epoch": 0.07462313650370617, "learning_rate": 4.999417670920287e-05, "loss": 4.7658, "step": 112 }, { "epoch": 0.07528941450820355, "learning_rate": 4.999357984744479e-05, "loss": 4.7398, "step": 113 }, { "epoch": 0.07595569251270093, "learning_rate": 4.999295387558271e-05, "loss": 4.7242, "step": 114 }, { "epoch": 0.0766219705171983, "learning_rate": 4.999229879434568e-05, "loss": 4.6888, "step": 115 }, { "epoch": 0.07728824852169568, "learning_rate": 4.999161460449669e-05, "loss": 4.7404, "step": 116 }, { "epoch": 0.07795452652619306, "learning_rate": 4.99909013068326e-05, "loss": 4.7114, "step": 117 }, { "epoch": 0.07862080453069042, "learning_rate": 4.999015890218421e-05, "loss": 4.7017, "step": 118 }, { "epoch": 0.0792870825351878, "learning_rate": 4.9989387391416185e-05, "loss": 4.7107, "step": 119 }, { "epoch": 0.07995336053968519, "learning_rate": 4.998858677542711e-05, "loss": 4.7256, "step": 120 }, { "epoch": 0.08061963854418255, "learning_rate": 4.998775705514947e-05, "loss": 4.7365, "step": 121 }, { "epoch": 0.08128591654867993, "learning_rate": 4.998689823154965e-05, "loss": 4.723, "step": 122 }, { "epoch": 0.08195219455317732, "learning_rate": 4.998601030562791e-05, "loss": 4.6716, "step": 123 }, { "epoch": 0.0826184725576747, "learning_rate": 4.9985093278418426e-05, "loss": 4.7119, "step": 124 }, { "epoch": 0.08328475056217206, "learning_rate": 4.998414715098926e-05, "loss": 4.6685, "step": 125 }, { "epoch": 0.08395102856666944, "learning_rate": 4.9983171924442374e-05, "loss": 4.6733, "step": 126 }, { "epoch": 0.08461730657116683, "learning_rate": 4.998216759991361e-05, "loss": 4.7082, "step": 127 }, { "epoch": 0.08528358457566419, "learning_rate": 4.998113417857272e-05, "loss": 4.6918, "step": 128 }, { "epoch": 0.08594986258016157, "learning_rate": 4.998007166162333e-05, "loss": 4.6874, "step": 129 }, { "epoch": 0.08661614058465895, "learning_rate": 4.997898005030295e-05, "loss": 4.683, "step": 130 }, { "epoch": 0.08728241858915632, "learning_rate": 4.997785934588298e-05, "loss": 4.6609, "step": 131 }, { "epoch": 0.0879486965936537, "learning_rate": 4.997670954966872e-05, "loss": 4.647, "step": 132 }, { "epoch": 0.08861497459815108, "learning_rate": 4.9975530662999344e-05, "loss": 4.6885, "step": 133 }, { "epoch": 0.08928125260264845, "learning_rate": 4.997432268724789e-05, "loss": 4.6627, "step": 134 }, { "epoch": 0.08994753060714583, "learning_rate": 4.9973085623821304e-05, "loss": 4.6224, "step": 135 }, { "epoch": 0.09061380861164321, "learning_rate": 4.9971819474160384e-05, "loss": 4.6601, "step": 136 }, { "epoch": 0.09128008661614058, "learning_rate": 4.997052423973983e-05, "loss": 4.6226, "step": 137 }, { "epoch": 0.09194636462063796, "learning_rate": 4.996919992206821e-05, "loss": 4.666, "step": 138 }, { "epoch": 0.09261264262513534, "learning_rate": 4.996784652268795e-05, "loss": 4.6424, "step": 139 }, { "epoch": 0.09327892062963271, "learning_rate": 4.996646404317537e-05, "loss": 4.6593, "step": 140 }, { "epoch": 0.09394519863413009, "learning_rate": 4.996505248514063e-05, "loss": 4.6517, "step": 141 }, { "epoch": 0.09461147663862747, "learning_rate": 4.996361185022779e-05, "loss": 4.6581, "step": 142 }, { "epoch": 0.09527775464312484, "learning_rate": 4.996214214011476e-05, "loss": 4.6286, "step": 143 }, { "epoch": 0.09594403264762222, "learning_rate": 4.996064335651332e-05, "loss": 4.649, "step": 144 }, { "epoch": 0.0966103106521196, "learning_rate": 4.995911550116911e-05, "loss": 4.6143, "step": 145 }, { "epoch": 0.09727658865661698, "learning_rate": 4.9957558575861606e-05, "loss": 4.6136, "step": 146 }, { "epoch": 0.09794286666111435, "learning_rate": 4.9955972582404185e-05, "loss": 4.6261, "step": 147 }, { "epoch": 0.09860914466561173, "learning_rate": 4.995435752264406e-05, "loss": 4.6352, "step": 148 }, { "epoch": 0.09927542267010911, "learning_rate": 4.995271339846229e-05, "loss": 4.6492, "step": 149 }, { "epoch": 0.09994170067460648, "learning_rate": 4.9951040211773795e-05, "loss": 4.5977, "step": 150 }, { "epoch": 0.10060797867910386, "learning_rate": 4.9949337964527334e-05, "loss": 4.6205, "step": 151 }, { "epoch": 0.10127425668360124, "learning_rate": 4.994760665870552e-05, "loss": 4.6554, "step": 152 }, { "epoch": 0.1019405346880986, "learning_rate": 4.994584629632482e-05, "loss": 4.6201, "step": 153 }, { "epoch": 0.10260681269259599, "learning_rate": 4.994405687943552e-05, "loss": 4.5537, "step": 154 }, { "epoch": 0.10327309069709337, "learning_rate": 4.994223841012178e-05, "loss": 4.5944, "step": 155 }, { "epoch": 0.10393936870159073, "learning_rate": 4.994039089050156e-05, "loss": 4.5989, "step": 156 }, { "epoch": 0.10460564670608812, "learning_rate": 4.9938514322726676e-05, "loss": 4.5668, "step": 157 }, { "epoch": 0.1052719247105855, "learning_rate": 4.993660870898278e-05, "loss": 4.5856, "step": 158 }, { "epoch": 0.10593820271508286, "learning_rate": 4.9934674051489334e-05, "loss": 4.5817, "step": 159 }, { "epoch": 0.10660448071958024, "learning_rate": 4.9932710352499644e-05, "loss": 4.6066, "step": 160 }, { "epoch": 0.10727075872407763, "learning_rate": 4.9930717614300846e-05, "loss": 4.6158, "step": 161 }, { "epoch": 0.10793703672857499, "learning_rate": 4.992869583921388e-05, "loss": 4.5853, "step": 162 }, { "epoch": 0.10860331473307237, "learning_rate": 4.992664502959351e-05, "loss": 4.594, "step": 163 }, { "epoch": 0.10926959273756975, "learning_rate": 4.9924565187828334e-05, "loss": 4.5727, "step": 164 }, { "epoch": 0.10993587074206712, "learning_rate": 4.9922456316340746e-05, "loss": 4.6132, "step": 165 }, { "epoch": 0.1106021487465645, "learning_rate": 4.9920318417586944e-05, "loss": 4.5477, "step": 166 }, { "epoch": 0.11126842675106188, "learning_rate": 4.9918151494056956e-05, "loss": 4.572, "step": 167 }, { "epoch": 0.11193470475555926, "learning_rate": 4.9915955548274606e-05, "loss": 4.5472, "step": 168 }, { "epoch": 0.11260098276005663, "learning_rate": 4.9913730582797514e-05, "loss": 4.5751, "step": 169 }, { "epoch": 0.11326726076455401, "learning_rate": 4.991147660021711e-05, "loss": 4.5776, "step": 170 }, { "epoch": 0.11393353876905139, "learning_rate": 4.99091936031586e-05, "loss": 4.5879, "step": 171 }, { "epoch": 0.11459981677354876, "learning_rate": 4.9906881594281016e-05, "loss": 4.5459, "step": 172 }, { "epoch": 0.11526609477804614, "learning_rate": 4.9904540576277164e-05, "loss": 4.6133, "step": 173 }, { "epoch": 0.11593237278254352, "learning_rate": 4.990217055187362e-05, "loss": 4.5549, "step": 174 }, { "epoch": 0.11659865078704089, "learning_rate": 4.9899771523830776e-05, "loss": 4.5956, "step": 175 }, { "epoch": 0.11726492879153827, "learning_rate": 4.989734349494277e-05, "loss": 4.553, "step": 176 }, { "epoch": 0.11793120679603565, "learning_rate": 4.989488646803754e-05, "loss": 4.5065, "step": 177 }, { "epoch": 0.11859748480053302, "learning_rate": 4.98924004459768e-05, "loss": 4.6218, "step": 178 }, { "epoch": 0.1192637628050304, "learning_rate": 4.9889885431656024e-05, "loss": 4.5372, "step": 179 }, { "epoch": 0.11993004080952778, "learning_rate": 4.9887341428004466e-05, "loss": 4.4715, "step": 180 }, { "epoch": 0.12059631881402515, "learning_rate": 4.988476843798512e-05, "loss": 4.5993, "step": 181 }, { "epoch": 0.12126259681852253, "learning_rate": 4.988216646459477e-05, "loss": 4.5548, "step": 182 }, { "epoch": 0.12192887482301991, "learning_rate": 4.9879535510863926e-05, "loss": 4.4919, "step": 183 }, { "epoch": 0.12259515282751728, "learning_rate": 4.9876875579856884e-05, "loss": 4.5132, "step": 184 }, { "epoch": 0.12326143083201466, "learning_rate": 4.987418667467167e-05, "loss": 4.5502, "step": 185 }, { "epoch": 0.12392770883651204, "learning_rate": 4.987146879844006e-05, "loss": 4.5136, "step": 186 }, { "epoch": 0.1245939868410094, "learning_rate": 4.986872195432757e-05, "loss": 4.5594, "step": 187 }, { "epoch": 0.12526026484550679, "learning_rate": 4.986594614553346e-05, "loss": 4.5294, "step": 188 }, { "epoch": 0.12592654285000415, "learning_rate": 4.9863141375290726e-05, "loss": 4.5162, "step": 189 }, { "epoch": 0.12659282085450155, "learning_rate": 4.986030764686609e-05, "loss": 4.5408, "step": 190 }, { "epoch": 0.12725909885899891, "learning_rate": 4.985744496356002e-05, "loss": 4.5451, "step": 191 }, { "epoch": 0.12792537686349628, "learning_rate": 4.9854553328706667e-05, "loss": 4.5246, "step": 192 }, { "epoch": 0.12859165486799368, "learning_rate": 4.985163274567394e-05, "loss": 4.549, "step": 193 }, { "epoch": 0.12925793287249104, "learning_rate": 4.984868321786345e-05, "loss": 4.5119, "step": 194 }, { "epoch": 0.1299242108769884, "learning_rate": 4.984570474871053e-05, "loss": 4.5146, "step": 195 }, { "epoch": 0.1305904888814858, "learning_rate": 4.9842697341684195e-05, "loss": 4.4813, "step": 196 }, { "epoch": 0.13125676688598317, "learning_rate": 4.983966100028721e-05, "loss": 4.4736, "step": 197 }, { "epoch": 0.13192304489048054, "learning_rate": 4.983659572805598e-05, "loss": 4.5111, "step": 198 }, { "epoch": 0.13258932289497793, "learning_rate": 4.9833501528560654e-05, "loss": 4.5274, "step": 199 }, { "epoch": 0.1332556008994753, "learning_rate": 4.9830378405405056e-05, "loss": 4.5251, "step": 200 }, { "epoch": 0.1339218789039727, "learning_rate": 4.9827226362226695e-05, "loss": 4.4726, "step": 201 }, { "epoch": 0.13458815690847006, "learning_rate": 4.982404540269677e-05, "loss": 4.5376, "step": 202 }, { "epoch": 0.13525443491296743, "learning_rate": 4.982083553052015e-05, "loss": 4.5167, "step": 203 }, { "epoch": 0.13592071291746483, "learning_rate": 4.981759674943538e-05, "loss": 4.5353, "step": 204 }, { "epoch": 0.1365869909219622, "learning_rate": 4.981432906321469e-05, "loss": 4.524, "step": 205 }, { "epoch": 0.13725326892645956, "learning_rate": 4.981103247566396e-05, "loss": 4.5275, "step": 206 }, { "epoch": 0.13791954693095695, "learning_rate": 4.980770699062273e-05, "loss": 4.5068, "step": 207 }, { "epoch": 0.13858582493545432, "learning_rate": 4.980435261196421e-05, "loss": 4.4791, "step": 208 }, { "epoch": 0.1392521029399517, "learning_rate": 4.980096934359526e-05, "loss": 4.5073, "step": 209 }, { "epoch": 0.13991838094444908, "learning_rate": 4.9797557189456376e-05, "loss": 4.5383, "step": 210 }, { "epoch": 0.14058465894894645, "learning_rate": 4.9794116153521695e-05, "loss": 4.5615, "step": 211 }, { "epoch": 0.14125093695344382, "learning_rate": 4.9790646239799035e-05, "loss": 4.4864, "step": 212 }, { "epoch": 0.1419172149579412, "learning_rate": 4.978714745232979e-05, "loss": 4.4746, "step": 213 }, { "epoch": 0.14258349296243858, "learning_rate": 4.978361979518901e-05, "loss": 4.5139, "step": 214 }, { "epoch": 0.14324977096693595, "learning_rate": 4.978006327248537e-05, "loss": 4.4864, "step": 215 }, { "epoch": 0.14391604897143334, "learning_rate": 4.977647788836117e-05, "loss": 4.4936, "step": 216 }, { "epoch": 0.1445823269759307, "learning_rate": 4.977286364699232e-05, "loss": 4.497, "step": 217 }, { "epoch": 0.14524860498042808, "learning_rate": 4.976922055258833e-05, "loss": 4.4785, "step": 218 }, { "epoch": 0.14591488298492547, "learning_rate": 4.976554860939233e-05, "loss": 4.4677, "step": 219 }, { "epoch": 0.14658116098942284, "learning_rate": 4.9761847821681045e-05, "loss": 4.4901, "step": 220 }, { "epoch": 0.1472474389939202, "learning_rate": 4.9758118193764794e-05, "loss": 4.5107, "step": 221 }, { "epoch": 0.1479137169984176, "learning_rate": 4.9754359729987475e-05, "loss": 4.5, "step": 222 }, { "epoch": 0.14857999500291497, "learning_rate": 4.9750572434726603e-05, "loss": 4.4358, "step": 223 }, { "epoch": 0.14924627300741233, "learning_rate": 4.974675631239324e-05, "loss": 4.4847, "step": 224 }, { "epoch": 0.14991255101190973, "learning_rate": 4.974291136743204e-05, "loss": 4.5072, "step": 225 }, { "epoch": 0.1505788290164071, "learning_rate": 4.973903760432123e-05, "loss": 4.4339, "step": 226 }, { "epoch": 0.15124510702090446, "learning_rate": 4.9735135027572576e-05, "loss": 4.4287, "step": 227 }, { "epoch": 0.15191138502540186, "learning_rate": 4.973120364173144e-05, "loss": 4.4386, "step": 228 }, { "epoch": 0.15257766302989922, "learning_rate": 4.972724345137671e-05, "loss": 4.5081, "step": 229 }, { "epoch": 0.1532439410343966, "learning_rate": 4.9723254461120826e-05, "loss": 4.4975, "step": 230 }, { "epoch": 0.153910219038894, "learning_rate": 4.97192366756098e-05, "loss": 4.4634, "step": 231 }, { "epoch": 0.15457649704339135, "learning_rate": 4.9715190099523146e-05, "loss": 4.4864, "step": 232 }, { "epoch": 0.15524277504788872, "learning_rate": 4.971111473757392e-05, "loss": 4.5022, "step": 233 }, { "epoch": 0.15590905305238612, "learning_rate": 4.970701059450872e-05, "loss": 4.4954, "step": 234 }, { "epoch": 0.15657533105688348, "learning_rate": 4.970287767510764e-05, "loss": 4.4835, "step": 235 }, { "epoch": 0.15724160906138085, "learning_rate": 4.9698715984184326e-05, "loss": 4.4899, "step": 236 }, { "epoch": 0.15790788706587824, "learning_rate": 4.96945255265859e-05, "loss": 4.5067, "step": 237 }, { "epoch": 0.1585741650703756, "learning_rate": 4.9690306307192996e-05, "loss": 4.4808, "step": 238 }, { "epoch": 0.15924044307487298, "learning_rate": 4.9686058330919764e-05, "loss": 4.4594, "step": 239 }, { "epoch": 0.15990672107937037, "learning_rate": 4.9681781602713826e-05, "loss": 4.4624, "step": 240 }, { "epoch": 0.16057299908386774, "learning_rate": 4.967747612755632e-05, "loss": 4.4525, "step": 241 }, { "epoch": 0.1612392770883651, "learning_rate": 4.9673141910461826e-05, "loss": 4.441, "step": 242 }, { "epoch": 0.1619055550928625, "learning_rate": 4.966877895647843e-05, "loss": 4.426, "step": 243 }, { "epoch": 0.16257183309735987, "learning_rate": 4.966438727068767e-05, "loss": 4.4253, "step": 244 }, { "epoch": 0.16323811110185724, "learning_rate": 4.9659966858204576e-05, "loss": 4.4875, "step": 245 }, { "epoch": 0.16390438910635463, "learning_rate": 4.96555177241776e-05, "loss": 4.4981, "step": 246 }, { "epoch": 0.164570667110852, "learning_rate": 4.965103987378866e-05, "loss": 4.4453, "step": 247 }, { "epoch": 0.1652369451153494, "learning_rate": 4.964653331225314e-05, "loss": 4.453, "step": 248 }, { "epoch": 0.16590322311984676, "learning_rate": 4.964199804481984e-05, "loss": 4.4701, "step": 249 }, { "epoch": 0.16656950112434413, "learning_rate": 4.9637434076770994e-05, "loss": 4.4298, "step": 250 }, { "epoch": 0.16723577912884152, "learning_rate": 4.9632841413422276e-05, "loss": 4.4781, "step": 251 }, { "epoch": 0.1679020571333389, "learning_rate": 4.962822006012278e-05, "loss": 4.4735, "step": 252 }, { "epoch": 0.16856833513783626, "learning_rate": 4.962357002225499e-05, "loss": 4.4732, "step": 253 }, { "epoch": 0.16923461314233365, "learning_rate": 4.961889130523485e-05, "loss": 4.4595, "step": 254 }, { "epoch": 0.16990089114683102, "learning_rate": 4.961418391451166e-05, "loss": 4.4666, "step": 255 }, { "epoch": 0.17056716915132838, "learning_rate": 4.960944785556814e-05, "loss": 4.4482, "step": 256 }, { "epoch": 0.17123344715582578, "learning_rate": 4.960468313392039e-05, "loss": 4.4532, "step": 257 }, { "epoch": 0.17189972516032315, "learning_rate": 4.9599889755117895e-05, "loss": 4.4472, "step": 258 }, { "epoch": 0.17256600316482051, "learning_rate": 4.959506772474352e-05, "loss": 4.4018, "step": 259 }, { "epoch": 0.1732322811693179, "learning_rate": 4.95902170484135e-05, "loss": 4.4424, "step": 260 }, { "epoch": 0.17389855917381528, "learning_rate": 4.9585337731777434e-05, "loss": 4.4756, "step": 261 }, { "epoch": 0.17456483717831264, "learning_rate": 4.958042978051829e-05, "loss": 4.3812, "step": 262 }, { "epoch": 0.17523111518281004, "learning_rate": 4.957549320035235e-05, "loss": 4.4625, "step": 263 }, { "epoch": 0.1758973931873074, "learning_rate": 4.957052799702928e-05, "loss": 4.4497, "step": 264 }, { "epoch": 0.17656367119180477, "learning_rate": 4.956553417633207e-05, "loss": 4.4164, "step": 265 }, { "epoch": 0.17722994919630217, "learning_rate": 4.956051174407703e-05, "loss": 4.373, "step": 266 }, { "epoch": 0.17789622720079953, "learning_rate": 4.955546070611381e-05, "loss": 4.3998, "step": 267 }, { "epoch": 0.1785625052052969, "learning_rate": 4.955038106832537e-05, "loss": 4.4354, "step": 268 }, { "epoch": 0.1792287832097943, "learning_rate": 4.9545272836627986e-05, "loss": 4.4223, "step": 269 }, { "epoch": 0.17989506121429166, "learning_rate": 4.9540136016971215e-05, "loss": 4.4381, "step": 270 }, { "epoch": 0.18056133921878903, "learning_rate": 4.953497061533795e-05, "loss": 4.4095, "step": 271 }, { "epoch": 0.18122761722328642, "learning_rate": 4.952977663774434e-05, "loss": 4.4331, "step": 272 }, { "epoch": 0.1818938952277838, "learning_rate": 4.952455409023982e-05, "loss": 4.489, "step": 273 }, { "epoch": 0.18256017323228116, "learning_rate": 4.9519302978907125e-05, "loss": 4.4181, "step": 274 }, { "epoch": 0.18322645123677855, "learning_rate": 4.951402330986222e-05, "loss": 4.4823, "step": 275 }, { "epoch": 0.18389272924127592, "learning_rate": 4.950871508925437e-05, "loss": 4.4344, "step": 276 }, { "epoch": 0.1845590072457733, "learning_rate": 4.9503378323266076e-05, "loss": 4.4195, "step": 277 }, { "epoch": 0.18522528525027068, "learning_rate": 4.9498013018113076e-05, "loss": 4.44, "step": 278 }, { "epoch": 0.18589156325476805, "learning_rate": 4.949261918004437e-05, "loss": 4.4229, "step": 279 }, { "epoch": 0.18655784125926542, "learning_rate": 4.948719681534218e-05, "loss": 4.422, "step": 280 }, { "epoch": 0.1872241192637628, "learning_rate": 4.9481745930321935e-05, "loss": 4.4572, "step": 281 }, { "epoch": 0.18789039726826018, "learning_rate": 4.94762665313323e-05, "loss": 4.445, "step": 282 }, { "epoch": 0.18855667527275755, "learning_rate": 4.947075862475518e-05, "loss": 4.4102, "step": 283 }, { "epoch": 0.18922295327725494, "learning_rate": 4.946522221700562e-05, "loss": 4.39, "step": 284 }, { "epoch": 0.1898892312817523, "learning_rate": 4.94596573145319e-05, "loss": 4.3899, "step": 285 }, { "epoch": 0.19055550928624967, "learning_rate": 4.9454063923815485e-05, "loss": 4.4474, "step": 286 }, { "epoch": 0.19122178729074707, "learning_rate": 4.944844205137101e-05, "loss": 4.4055, "step": 287 }, { "epoch": 0.19188806529524444, "learning_rate": 4.94427917037463e-05, "loss": 4.4216, "step": 288 }, { "epoch": 0.1925543432997418, "learning_rate": 4.9437112887522316e-05, "loss": 4.4892, "step": 289 }, { "epoch": 0.1932206213042392, "learning_rate": 4.943140560931321e-05, "loss": 4.4498, "step": 290 }, { "epoch": 0.19388689930873657, "learning_rate": 4.942566987576625e-05, "loss": 4.3976, "step": 291 }, { "epoch": 0.19455317731323396, "learning_rate": 4.941990569356187e-05, "loss": 4.3917, "step": 292 }, { "epoch": 0.19521945531773133, "learning_rate": 4.9414113069413646e-05, "loss": 4.4213, "step": 293 }, { "epoch": 0.1958857333222287, "learning_rate": 4.9408292010068244e-05, "loss": 4.4582, "step": 294 }, { "epoch": 0.1965520113267261, "learning_rate": 4.9402442522305494e-05, "loss": 4.3994, "step": 295 }, { "epoch": 0.19721828933122346, "learning_rate": 4.939656461293829e-05, "loss": 4.4307, "step": 296 }, { "epoch": 0.19788456733572082, "learning_rate": 4.9390658288812675e-05, "loss": 4.3798, "step": 297 }, { "epoch": 0.19855084534021822, "learning_rate": 4.9384723556807744e-05, "loss": 4.4153, "step": 298 }, { "epoch": 0.19921712334471559, "learning_rate": 4.937876042383571e-05, "loss": 4.3923, "step": 299 }, { "epoch": 0.19988340134921295, "learning_rate": 4.937276889684185e-05, "loss": 4.3945, "step": 300 }, { "epoch": 0.20054967935371035, "learning_rate": 4.9366748982804516e-05, "loss": 4.3906, "step": 301 }, { "epoch": 0.20121595735820771, "learning_rate": 4.9360700688735124e-05, "loss": 4.4157, "step": 302 }, { "epoch": 0.20188223536270508, "learning_rate": 4.935462402167814e-05, "loss": 4.4023, "step": 303 }, { "epoch": 0.20254851336720248, "learning_rate": 4.9348518988711066e-05, "loss": 4.3835, "step": 304 }, { "epoch": 0.20321479137169984, "learning_rate": 4.934238559694448e-05, "loss": 4.4232, "step": 305 }, { "epoch": 0.2038810693761972, "learning_rate": 4.933622385352194e-05, "loss": 4.394, "step": 306 }, { "epoch": 0.2045473473806946, "learning_rate": 4.933003376562006e-05, "loss": 4.4142, "step": 307 }, { "epoch": 0.20521362538519197, "learning_rate": 4.932381534044845e-05, "loss": 4.414, "step": 308 }, { "epoch": 0.20587990338968934, "learning_rate": 4.931756858524973e-05, "loss": 4.3999, "step": 309 }, { "epoch": 0.20654618139418673, "learning_rate": 4.931129350729953e-05, "loss": 4.469, "step": 310 }, { "epoch": 0.2072124593986841, "learning_rate": 4.930499011390644e-05, "loss": 4.3687, "step": 311 }, { "epoch": 0.20787873740318147, "learning_rate": 4.9298658412412036e-05, "loss": 4.3746, "step": 312 }, { "epoch": 0.20854501540767886, "learning_rate": 4.929229841019089e-05, "loss": 4.3859, "step": 313 }, { "epoch": 0.20921129341217623, "learning_rate": 4.92859101146505e-05, "loss": 4.407, "step": 314 }, { "epoch": 0.2098775714166736, "learning_rate": 4.9279493533231346e-05, "loss": 4.356, "step": 315 }, { "epoch": 0.210543849421171, "learning_rate": 4.927304867340684e-05, "loss": 4.3561, "step": 316 }, { "epoch": 0.21121012742566836, "learning_rate": 4.926657554268333e-05, "loss": 4.3962, "step": 317 }, { "epoch": 0.21187640543016573, "learning_rate": 4.926007414860009e-05, "loss": 4.3516, "step": 318 }, { "epoch": 0.21254268343466312, "learning_rate": 4.925354449872932e-05, "loss": 4.389, "step": 319 }, { "epoch": 0.2132089614391605, "learning_rate": 4.924698660067612e-05, "loss": 4.3703, "step": 320 }, { "epoch": 0.21387523944365786, "learning_rate": 4.924040046207849e-05, "loss": 4.3894, "step": 321 }, { "epoch": 0.21454151744815525, "learning_rate": 4.923378609060734e-05, "loss": 4.3517, "step": 322 }, { "epoch": 0.21520779545265262, "learning_rate": 4.9227143493966446e-05, "loss": 4.4595, "step": 323 }, { "epoch": 0.21587407345714998, "learning_rate": 4.922047267989246e-05, "loss": 4.3718, "step": 324 }, { "epoch": 0.21654035146164738, "learning_rate": 4.921377365615489e-05, "loss": 4.3841, "step": 325 }, { "epoch": 0.21720662946614475, "learning_rate": 4.9207046430556135e-05, "loss": 4.3538, "step": 326 }, { "epoch": 0.2178729074706421, "learning_rate": 4.92002910109314e-05, "loss": 4.3913, "step": 327 }, { "epoch": 0.2185391854751395, "learning_rate": 4.919350740514874e-05, "loss": 4.3916, "step": 328 }, { "epoch": 0.21920546347963688, "learning_rate": 4.918669562110906e-05, "loss": 4.4403, "step": 329 }, { "epoch": 0.21987174148413424, "learning_rate": 4.9179855666746054e-05, "loss": 4.366, "step": 330 }, { "epoch": 0.22053801948863164, "learning_rate": 4.917298755002624e-05, "loss": 4.3779, "step": 331 }, { "epoch": 0.221204297493129, "learning_rate": 4.916609127894895e-05, "loss": 4.4494, "step": 332 }, { "epoch": 0.22187057549762637, "learning_rate": 4.915916686154626e-05, "loss": 4.3516, "step": 333 }, { "epoch": 0.22253685350212377, "learning_rate": 4.91522143058831e-05, "loss": 4.4395, "step": 334 }, { "epoch": 0.22320313150662113, "learning_rate": 4.914523362005711e-05, "loss": 4.4005, "step": 335 }, { "epoch": 0.22386940951111853, "learning_rate": 4.913822481219873e-05, "loss": 4.4068, "step": 336 }, { "epoch": 0.2245356875156159, "learning_rate": 4.9131187890471134e-05, "loss": 4.4002, "step": 337 }, { "epoch": 0.22520196552011326, "learning_rate": 4.9124122863070255e-05, "loss": 4.388, "step": 338 }, { "epoch": 0.22586824352461066, "learning_rate": 4.911702973822474e-05, "loss": 4.3542, "step": 339 }, { "epoch": 0.22653452152910802, "learning_rate": 4.910990852419599e-05, "loss": 4.3643, "step": 340 }, { "epoch": 0.2272007995336054, "learning_rate": 4.910275922927809e-05, "loss": 4.395, "step": 341 }, { "epoch": 0.22786707753810279, "learning_rate": 4.9095581861797876e-05, "loss": 4.3709, "step": 342 }, { "epoch": 0.22853335554260015, "learning_rate": 4.9088376430114816e-05, "loss": 4.3973, "step": 343 }, { "epoch": 0.22919963354709752, "learning_rate": 4.9081142942621125e-05, "loss": 4.4222, "step": 344 }, { "epoch": 0.22986591155159491, "learning_rate": 4.907388140774165e-05, "loss": 4.3416, "step": 345 }, { "epoch": 0.23053218955609228, "learning_rate": 4.9066591833933946e-05, "loss": 4.4016, "step": 346 }, { "epoch": 0.23119846756058965, "learning_rate": 4.905927422968819e-05, "loss": 4.3485, "step": 347 }, { "epoch": 0.23186474556508704, "learning_rate": 4.905192860352722e-05, "loss": 4.3736, "step": 348 }, { "epoch": 0.2325310235695844, "learning_rate": 4.9044554964006505e-05, "loss": 4.3722, "step": 349 }, { "epoch": 0.23319730157408178, "learning_rate": 4.903715331971417e-05, "loss": 4.311, "step": 350 }, { "epoch": 0.23386357957857917, "learning_rate": 4.902972367927091e-05, "loss": 4.3522, "step": 351 }, { "epoch": 0.23452985758307654, "learning_rate": 4.9022266051330055e-05, "loss": 4.3447, "step": 352 }, { "epoch": 0.2351961355875739, "learning_rate": 4.9014780444577544e-05, "loss": 4.4169, "step": 353 }, { "epoch": 0.2358624135920713, "learning_rate": 4.900726686773187e-05, "loss": 4.3726, "step": 354 }, { "epoch": 0.23652869159656867, "learning_rate": 4.899972532954413e-05, "loss": 4.4393, "step": 355 }, { "epoch": 0.23719496960106604, "learning_rate": 4.8992155838797976e-05, "loss": 4.3376, "step": 356 }, { "epoch": 0.23786124760556343, "learning_rate": 4.898455840430962e-05, "loss": 4.325, "step": 357 }, { "epoch": 0.2385275256100608, "learning_rate": 4.8976933034927816e-05, "loss": 4.3556, "step": 358 }, { "epoch": 0.23919380361455816, "learning_rate": 4.8969279739533856e-05, "loss": 4.3251, "step": 359 }, { "epoch": 0.23986008161905556, "learning_rate": 4.896159852704156e-05, "loss": 4.3675, "step": 360 }, { "epoch": 0.24052635962355293, "learning_rate": 4.895388940639727e-05, "loss": 4.396, "step": 361 }, { "epoch": 0.2411926376280503, "learning_rate": 4.894615238657981e-05, "loss": 4.404, "step": 362 }, { "epoch": 0.2418589156325477, "learning_rate": 4.893838747660052e-05, "loss": 4.4052, "step": 363 }, { "epoch": 0.24252519363704506, "learning_rate": 4.893059468550321e-05, "loss": 4.3857, "step": 364 }, { "epoch": 0.24319147164154242, "learning_rate": 4.892277402236417e-05, "loss": 4.3674, "step": 365 }, { "epoch": 0.24385774964603982, "learning_rate": 4.8914925496292165e-05, "loss": 4.3845, "step": 366 }, { "epoch": 0.24452402765053718, "learning_rate": 4.890704911642838e-05, "loss": 4.4042, "step": 367 }, { "epoch": 0.24519030565503455, "learning_rate": 4.889914489194647e-05, "loss": 4.3262, "step": 368 }, { "epoch": 0.24585658365953195, "learning_rate": 4.8891212832052516e-05, "loss": 4.3431, "step": 369 }, { "epoch": 0.2465228616640293, "learning_rate": 4.8883252945985e-05, "loss": 4.3381, "step": 370 }, { "epoch": 0.24718913966852668, "learning_rate": 4.887526524301484e-05, "loss": 4.3895, "step": 371 }, { "epoch": 0.24785541767302408, "learning_rate": 4.886724973244533e-05, "loss": 4.3479, "step": 372 }, { "epoch": 0.24852169567752144, "learning_rate": 4.885920642361217e-05, "loss": 4.3252, "step": 373 }, { "epoch": 0.2491879736820188, "learning_rate": 4.8851135325883424e-05, "loss": 4.3856, "step": 374 }, { "epoch": 0.2498542516865162, "learning_rate": 4.884303644865953e-05, "loss": 4.3647, "step": 375 }, { "epoch": 0.25052052969101357, "learning_rate": 4.8834909801373264e-05, "loss": 4.4015, "step": 376 }, { "epoch": 0.25118680769551094, "learning_rate": 4.8826755393489774e-05, "loss": 4.3679, "step": 377 }, { "epoch": 0.2518530857000083, "learning_rate": 4.881857323450652e-05, "loss": 4.3645, "step": 378 }, { "epoch": 0.25251936370450573, "learning_rate": 4.881036333395329e-05, "loss": 4.3342, "step": 379 }, { "epoch": 0.2531856417090031, "learning_rate": 4.880212570139218e-05, "loss": 4.4006, "step": 380 }, { "epoch": 0.25385191971350046, "learning_rate": 4.8793860346417574e-05, "loss": 4.3553, "step": 381 }, { "epoch": 0.25451819771799783, "learning_rate": 4.8785567278656186e-05, "loss": 4.3129, "step": 382 }, { "epoch": 0.2551844757224952, "learning_rate": 4.877724650776696e-05, "loss": 4.3494, "step": 383 }, { "epoch": 0.25585075372699256, "learning_rate": 4.8768898043441136e-05, "loss": 4.4031, "step": 384 }, { "epoch": 0.25651703173149, "learning_rate": 4.876052189540219e-05, "loss": 4.3717, "step": 385 }, { "epoch": 0.25718330973598735, "learning_rate": 4.875211807340584e-05, "loss": 4.3652, "step": 386 }, { "epoch": 0.2578495877404847, "learning_rate": 4.874368658724007e-05, "loss": 4.3544, "step": 387 }, { "epoch": 0.2585158657449821, "learning_rate": 4.873522744672503e-05, "loss": 4.3551, "step": 388 }, { "epoch": 0.25918214374947945, "learning_rate": 4.8726740661713135e-05, "loss": 4.3392, "step": 389 }, { "epoch": 0.2598484217539768, "learning_rate": 4.871822624208895e-05, "loss": 4.3595, "step": 390 }, { "epoch": 0.26051469975847424, "learning_rate": 4.8709684197769266e-05, "loss": 4.3586, "step": 391 }, { "epoch": 0.2611809777629716, "learning_rate": 4.870111453870302e-05, "loss": 4.3792, "step": 392 }, { "epoch": 0.261847255767469, "learning_rate": 4.869251727487132e-05, "loss": 4.3683, "step": 393 }, { "epoch": 0.26251353377196635, "learning_rate": 4.868389241628742e-05, "loss": 4.3693, "step": 394 }, { "epoch": 0.2631798117764637, "learning_rate": 4.8675239972996746e-05, "loss": 4.3389, "step": 395 }, { "epoch": 0.2638460897809611, "learning_rate": 4.8666559955076804e-05, "loss": 4.3907, "step": 396 }, { "epoch": 0.2645123677854585, "learning_rate": 4.8657852372637253e-05, "loss": 4.3473, "step": 397 }, { "epoch": 0.26517864578995587, "learning_rate": 4.8649117235819835e-05, "loss": 4.3294, "step": 398 }, { "epoch": 0.26584492379445324, "learning_rate": 4.86403545547984e-05, "loss": 4.3471, "step": 399 }, { "epoch": 0.2665112017989506, "learning_rate": 4.863156433977884e-05, "loss": 4.3125, "step": 400 }, { "epoch": 0.26717747980344797, "learning_rate": 4.86227466009992e-05, "loss": 4.395, "step": 401 }, { "epoch": 0.2678437578079454, "learning_rate": 4.861390134872949e-05, "loss": 4.336, "step": 402 }, { "epoch": 0.26851003581244276, "learning_rate": 4.860502859327182e-05, "loss": 4.3663, "step": 403 }, { "epoch": 0.2691763138169401, "learning_rate": 4.8596128344960304e-05, "loss": 4.3622, "step": 404 }, { "epoch": 0.2698425918214375, "learning_rate": 4.858720061416111e-05, "loss": 4.3397, "step": 405 }, { "epoch": 0.27050886982593486, "learning_rate": 4.857824541127238e-05, "loss": 4.3679, "step": 406 }, { "epoch": 0.27117514783043223, "learning_rate": 4.856926274672427e-05, "loss": 4.3421, "step": 407 }, { "epoch": 0.27184142583492965, "learning_rate": 4.856025263097892e-05, "loss": 4.3228, "step": 408 }, { "epoch": 0.272507703839427, "learning_rate": 4.855121507453045e-05, "loss": 4.2952, "step": 409 }, { "epoch": 0.2731739818439244, "learning_rate": 4.854215008790492e-05, "loss": 4.3737, "step": 410 }, { "epoch": 0.27384025984842175, "learning_rate": 4.8533057681660356e-05, "loss": 4.3474, "step": 411 }, { "epoch": 0.2745065378529191, "learning_rate": 4.85239378663867e-05, "loss": 4.3744, "step": 412 }, { "epoch": 0.2751728158574165, "learning_rate": 4.8514790652705847e-05, "loss": 4.337, "step": 413 }, { "epoch": 0.2758390938619139, "learning_rate": 4.8505616051271584e-05, "loss": 4.3603, "step": 414 }, { "epoch": 0.2765053718664113, "learning_rate": 4.8496414072769594e-05, "loss": 4.3149, "step": 415 }, { "epoch": 0.27717164987090864, "learning_rate": 4.848718472791746e-05, "loss": 4.2901, "step": 416 }, { "epoch": 0.277837927875406, "learning_rate": 4.847792802746461e-05, "loss": 4.3632, "step": 417 }, { "epoch": 0.2785042058799034, "learning_rate": 4.846864398219237e-05, "loss": 4.3358, "step": 418 }, { "epoch": 0.27917048388440074, "learning_rate": 4.8459332602913904e-05, "loss": 4.352, "step": 419 }, { "epoch": 0.27983676188889817, "learning_rate": 4.8449993900474187e-05, "loss": 4.3812, "step": 420 }, { "epoch": 0.28050303989339553, "learning_rate": 4.844062788575005e-05, "loss": 4.3054, "step": 421 }, { "epoch": 0.2811693178978929, "learning_rate": 4.843123456965012e-05, "loss": 4.3336, "step": 422 }, { "epoch": 0.28183559590239027, "learning_rate": 4.8421813963114815e-05, "loss": 4.3006, "step": 423 }, { "epoch": 0.28250187390688764, "learning_rate": 4.8412366077116344e-05, "loss": 4.3658, "step": 424 }, { "epoch": 0.283168151911385, "learning_rate": 4.840289092265871e-05, "loss": 4.2924, "step": 425 }, { "epoch": 0.2838344299158824, "learning_rate": 4.8393388510777635e-05, "loss": 4.3534, "step": 426 }, { "epoch": 0.2845007079203798, "learning_rate": 4.838385885254062e-05, "loss": 4.3209, "step": 427 }, { "epoch": 0.28516698592487716, "learning_rate": 4.8374301959046886e-05, "loss": 4.2978, "step": 428 }, { "epoch": 0.2858332639293745, "learning_rate": 4.836471784142738e-05, "loss": 4.3677, "step": 429 }, { "epoch": 0.2864995419338719, "learning_rate": 4.835510651084475e-05, "loss": 4.3094, "step": 430 }, { "epoch": 0.28716581993836926, "learning_rate": 4.834546797849335e-05, "loss": 4.2839, "step": 431 }, { "epoch": 0.2878320979428667, "learning_rate": 4.8335802255599217e-05, "loss": 4.3324, "step": 432 }, { "epoch": 0.28849837594736405, "learning_rate": 4.832610935342003e-05, "loss": 4.3461, "step": 433 }, { "epoch": 0.2891646539518614, "learning_rate": 4.8316389283245155e-05, "loss": 4.2987, "step": 434 }, { "epoch": 0.2898309319563588, "learning_rate": 4.830664205639559e-05, "loss": 4.2962, "step": 435 }, { "epoch": 0.29049720996085615, "learning_rate": 4.829686768422397e-05, "loss": 4.3337, "step": 436 }, { "epoch": 0.2911634879653535, "learning_rate": 4.828706617811452e-05, "loss": 4.3322, "step": 437 }, { "epoch": 0.29182976596985094, "learning_rate": 4.82772375494831e-05, "loss": 4.293, "step": 438 }, { "epoch": 0.2924960439743483, "learning_rate": 4.826738180977714e-05, "loss": 4.2618, "step": 439 }, { "epoch": 0.2931623219788457, "learning_rate": 4.825749897047565e-05, "loss": 4.2737, "step": 440 }, { "epoch": 0.29382859998334304, "learning_rate": 4.8247589043089215e-05, "loss": 4.3273, "step": 441 }, { "epoch": 0.2944948779878404, "learning_rate": 4.823765203915995e-05, "loss": 4.273, "step": 442 }, { "epoch": 0.2951611559923378, "learning_rate": 4.822768797026151e-05, "loss": 4.3042, "step": 443 }, { "epoch": 0.2958274339968352, "learning_rate": 4.821769684799911e-05, "loss": 4.367, "step": 444 }, { "epoch": 0.29649371200133257, "learning_rate": 4.8207678684009404e-05, "loss": 4.3187, "step": 445 }, { "epoch": 0.29715999000582993, "learning_rate": 4.81976334899606e-05, "loss": 4.3225, "step": 446 }, { "epoch": 0.2978262680103273, "learning_rate": 4.8187561277552374e-05, "loss": 4.3644, "step": 447 }, { "epoch": 0.29849254601482467, "learning_rate": 4.817746205851584e-05, "loss": 4.3395, "step": 448 }, { "epoch": 0.2991588240193221, "learning_rate": 4.816733584461362e-05, "loss": 4.3006, "step": 449 }, { "epoch": 0.29982510202381946, "learning_rate": 4.815718264763973e-05, "loss": 4.3122, "step": 450 }, { "epoch": 0.3004913800283168, "learning_rate": 4.814700247941964e-05, "loss": 4.3761, "step": 451 }, { "epoch": 0.3011576580328142, "learning_rate": 4.813679535181022e-05, "loss": 4.3505, "step": 452 }, { "epoch": 0.30182393603731156, "learning_rate": 4.812656127669975e-05, "loss": 4.3503, "step": 453 }, { "epoch": 0.3024902140418089, "learning_rate": 4.8116300266007884e-05, "loss": 4.294, "step": 454 }, { "epoch": 0.30315649204630635, "learning_rate": 4.810601233168567e-05, "loss": 4.2903, "step": 455 }, { "epoch": 0.3038227700508037, "learning_rate": 4.809569748571547e-05, "loss": 4.3025, "step": 456 }, { "epoch": 0.3044890480553011, "learning_rate": 4.8085355740111046e-05, "loss": 4.3524, "step": 457 }, { "epoch": 0.30515532605979845, "learning_rate": 4.807498710691746e-05, "loss": 4.3089, "step": 458 }, { "epoch": 0.3058216040642958, "learning_rate": 4.806459159821107e-05, "loss": 4.3293, "step": 459 }, { "epoch": 0.3064878820687932, "learning_rate": 4.805416922609959e-05, "loss": 4.32, "step": 460 }, { "epoch": 0.3071541600732906, "learning_rate": 4.804372000272196e-05, "loss": 4.3316, "step": 461 }, { "epoch": 0.307820438077788, "learning_rate": 4.803324394024845e-05, "loss": 4.3145, "step": 462 }, { "epoch": 0.30848671608228534, "learning_rate": 4.8022741050880546e-05, "loss": 4.3416, "step": 463 }, { "epoch": 0.3091529940867827, "learning_rate": 4.8012211346851024e-05, "loss": 4.2449, "step": 464 }, { "epoch": 0.3098192720912801, "learning_rate": 4.8001654840423826e-05, "loss": 4.3183, "step": 465 }, { "epoch": 0.31048555009577744, "learning_rate": 4.799107154389418e-05, "loss": 4.3192, "step": 466 }, { "epoch": 0.31115182810027486, "learning_rate": 4.798046146958848e-05, "loss": 4.3324, "step": 467 }, { "epoch": 0.31181810610477223, "learning_rate": 4.79698246298643e-05, "loss": 4.3416, "step": 468 }, { "epoch": 0.3124843841092696, "learning_rate": 4.795916103711042e-05, "loss": 4.3037, "step": 469 }, { "epoch": 0.31315066211376696, "learning_rate": 4.794847070374675e-05, "loss": 4.3129, "step": 470 }, { "epoch": 0.31381694011826433, "learning_rate": 4.793775364222436e-05, "loss": 4.3191, "step": 471 }, { "epoch": 0.3144832181227617, "learning_rate": 4.792700986502544e-05, "loss": 4.3342, "step": 472 }, { "epoch": 0.3151494961272591, "learning_rate": 4.791623938466332e-05, "loss": 4.3435, "step": 473 }, { "epoch": 0.3158157741317565, "learning_rate": 4.7905442213682396e-05, "loss": 4.2463, "step": 474 }, { "epoch": 0.31648205213625386, "learning_rate": 4.789461836465817e-05, "loss": 4.2915, "step": 475 }, { "epoch": 0.3171483301407512, "learning_rate": 4.7883767850197225e-05, "loss": 4.3703, "step": 476 }, { "epoch": 0.3178146081452486, "learning_rate": 4.787289068293718e-05, "loss": 4.3181, "step": 477 }, { "epoch": 0.31848088614974596, "learning_rate": 4.786198687554672e-05, "loss": 4.24, "step": 478 }, { "epoch": 0.3191471641542434, "learning_rate": 4.7851056440725526e-05, "loss": 4.2733, "step": 479 }, { "epoch": 0.31981344215874075, "learning_rate": 4.784009939120433e-05, "loss": 4.3235, "step": 480 }, { "epoch": 0.3204797201632381, "learning_rate": 4.782911573974482e-05, "loss": 4.2917, "step": 481 }, { "epoch": 0.3211459981677355, "learning_rate": 4.781810549913972e-05, "loss": 4.3617, "step": 482 }, { "epoch": 0.32181227617223285, "learning_rate": 4.780706868221267e-05, "loss": 4.3159, "step": 483 }, { "epoch": 0.3224785541767302, "learning_rate": 4.7796005301818305e-05, "loss": 4.3571, "step": 484 }, { "epoch": 0.32314483218122764, "learning_rate": 4.778491537084216e-05, "loss": 4.3384, "step": 485 }, { "epoch": 0.323811110185725, "learning_rate": 4.777379890220074e-05, "loss": 4.3371, "step": 486 }, { "epoch": 0.32447738819022237, "learning_rate": 4.77626559088414e-05, "loss": 4.318, "step": 487 }, { "epoch": 0.32514366619471974, "learning_rate": 4.775148640374245e-05, "loss": 4.2472, "step": 488 }, { "epoch": 0.3258099441992171, "learning_rate": 4.7740290399913033e-05, "loss": 4.301, "step": 489 }, { "epoch": 0.3264762222037145, "learning_rate": 4.772906791039317e-05, "loss": 4.3924, "step": 490 }, { "epoch": 0.3271425002082119, "learning_rate": 4.771781894825374e-05, "loss": 4.3328, "step": 491 }, { "epoch": 0.32780877821270926, "learning_rate": 4.7706543526596445e-05, "loss": 4.3356, "step": 492 }, { "epoch": 0.32847505621720663, "learning_rate": 4.7695241658553794e-05, "loss": 4.2828, "step": 493 }, { "epoch": 0.329141334221704, "learning_rate": 4.7683913357289124e-05, "loss": 4.3, "step": 494 }, { "epoch": 0.32980761222620136, "learning_rate": 4.767255863599653e-05, "loss": 4.3164, "step": 495 }, { "epoch": 0.3304738902306988, "learning_rate": 4.766117750790091e-05, "loss": 4.3029, "step": 496 }, { "epoch": 0.33114016823519615, "learning_rate": 4.764976998625789e-05, "loss": 4.3285, "step": 497 }, { "epoch": 0.3318064462396935, "learning_rate": 4.763833608435385e-05, "loss": 4.2908, "step": 498 }, { "epoch": 0.3324727242441909, "learning_rate": 4.7626875815505894e-05, "loss": 4.2819, "step": 499 }, { "epoch": 0.33313900224868825, "learning_rate": 4.761538919306183e-05, "loss": 4.3024, "step": 500 }, { "epoch": 0.3338052802531856, "learning_rate": 4.7603876230400166e-05, "loss": 4.2631, "step": 501 }, { "epoch": 0.33447155825768304, "learning_rate": 4.75923369409301e-05, "loss": 4.2957, "step": 502 }, { "epoch": 0.3351378362621804, "learning_rate": 4.7580771338091464e-05, "loss": 4.3331, "step": 503 }, { "epoch": 0.3358041142666778, "learning_rate": 4.7569179435354756e-05, "loss": 4.338, "step": 504 }, { "epoch": 0.33647039227117514, "learning_rate": 4.755756124622111e-05, "loss": 4.3519, "step": 505 }, { "epoch": 0.3371366702756725, "learning_rate": 4.7545916784222254e-05, "loss": 4.2666, "step": 506 }, { "epoch": 0.3378029482801699, "learning_rate": 4.753424606292055e-05, "loss": 4.3137, "step": 507 }, { "epoch": 0.3384692262846673, "learning_rate": 4.752254909590892e-05, "loss": 4.265, "step": 508 }, { "epoch": 0.33913550428916467, "learning_rate": 4.7510825896810845e-05, "loss": 4.3073, "step": 509 }, { "epoch": 0.33980178229366204, "learning_rate": 4.749907647928039e-05, "loss": 4.2592, "step": 510 }, { "epoch": 0.3404680602981594, "learning_rate": 4.7487300857002134e-05, "loss": 4.2814, "step": 511 }, { "epoch": 0.34113433830265677, "learning_rate": 4.7475499043691186e-05, "loss": 4.2944, "step": 512 }, { "epoch": 0.34180061630715414, "learning_rate": 4.746367105309315e-05, "loss": 4.2777, "step": 513 }, { "epoch": 0.34246689431165156, "learning_rate": 4.7451816898984137e-05, "loss": 4.3257, "step": 514 }, { "epoch": 0.3431331723161489, "learning_rate": 4.74399365951707e-05, "loss": 4.3083, "step": 515 }, { "epoch": 0.3437994503206463, "learning_rate": 4.7428030155489875e-05, "loss": 4.2947, "step": 516 }, { "epoch": 0.34446572832514366, "learning_rate": 4.741609759380915e-05, "loss": 4.2583, "step": 517 }, { "epoch": 0.34513200632964103, "learning_rate": 4.740413892402639e-05, "loss": 4.3088, "step": 518 }, { "epoch": 0.3457982843341384, "learning_rate": 4.739215416006992e-05, "loss": 4.2661, "step": 519 }, { "epoch": 0.3464645623386358, "learning_rate": 4.738014331589842e-05, "loss": 4.3365, "step": 520 }, { "epoch": 0.3471308403431332, "learning_rate": 4.736810640550096e-05, "loss": 4.2673, "step": 521 }, { "epoch": 0.34779711834763055, "learning_rate": 4.735604344289697e-05, "loss": 4.2513, "step": 522 }, { "epoch": 0.3484633963521279, "learning_rate": 4.734395444213622e-05, "loss": 4.299, "step": 523 }, { "epoch": 0.3491296743566253, "learning_rate": 4.7331839417298825e-05, "loss": 4.3144, "step": 524 }, { "epoch": 0.34979595236112265, "learning_rate": 4.731969838249517e-05, "loss": 4.2797, "step": 525 }, { "epoch": 0.3504622303656201, "learning_rate": 4.7307531351865976e-05, "loss": 4.3216, "step": 526 }, { "epoch": 0.35112850837011744, "learning_rate": 4.7295338339582215e-05, "loss": 4.323, "step": 527 }, { "epoch": 0.3517947863746148, "learning_rate": 4.728311935984513e-05, "loss": 4.2798, "step": 528 }, { "epoch": 0.3524610643791122, "learning_rate": 4.7270874426886205e-05, "loss": 4.3008, "step": 529 }, { "epoch": 0.35312734238360954, "learning_rate": 4.7258603554967154e-05, "loss": 4.3164, "step": 530 }, { "epoch": 0.3537936203881069, "learning_rate": 4.72463067583799e-05, "loss": 4.3061, "step": 531 }, { "epoch": 0.35445989839260433, "learning_rate": 4.7233984051446564e-05, "loss": 4.2878, "step": 532 }, { "epoch": 0.3551261763971017, "learning_rate": 4.722163544851945e-05, "loss": 4.2692, "step": 533 }, { "epoch": 0.35579245440159907, "learning_rate": 4.7209260963981003e-05, "loss": 4.2621, "step": 534 }, { "epoch": 0.35645873240609643, "learning_rate": 4.719686061224383e-05, "loss": 4.3184, "step": 535 }, { "epoch": 0.3571250104105938, "learning_rate": 4.7184434407750664e-05, "loss": 4.2968, "step": 536 }, { "epoch": 0.3577912884150912, "learning_rate": 4.7171982364974346e-05, "loss": 4.2835, "step": 537 }, { "epoch": 0.3584575664195886, "learning_rate": 4.7159504498417814e-05, "loss": 4.2714, "step": 538 }, { "epoch": 0.35912384442408596, "learning_rate": 4.714700082261407e-05, "loss": 4.2761, "step": 539 }, { "epoch": 0.3597901224285833, "learning_rate": 4.71344713521262e-05, "loss": 4.2837, "step": 540 }, { "epoch": 0.3604564004330807, "learning_rate": 4.71219161015473e-05, "loss": 4.2803, "step": 541 }, { "epoch": 0.36112267843757806, "learning_rate": 4.710933508550053e-05, "loss": 4.3266, "step": 542 }, { "epoch": 0.3617889564420755, "learning_rate": 4.7096728318639025e-05, "loss": 4.3041, "step": 543 }, { "epoch": 0.36245523444657285, "learning_rate": 4.708409581564594e-05, "loss": 4.2871, "step": 544 }, { "epoch": 0.3631215124510702, "learning_rate": 4.707143759123439e-05, "loss": 4.2918, "step": 545 }, { "epoch": 0.3637877904555676, "learning_rate": 4.705875366014745e-05, "loss": 4.3206, "step": 546 }, { "epoch": 0.36445406846006495, "learning_rate": 4.7046044037158126e-05, "loss": 4.286, "step": 547 }, { "epoch": 0.3651203464645623, "learning_rate": 4.703330873706937e-05, "loss": 4.2914, "step": 548 }, { "epoch": 0.36578662446905974, "learning_rate": 4.7020547774714016e-05, "loss": 4.2762, "step": 549 }, { "epoch": 0.3664529024735571, "learning_rate": 4.700776116495481e-05, "loss": 4.2879, "step": 550 }, { "epoch": 0.3671191804780545, "learning_rate": 4.699494892268436e-05, "loss": 4.3085, "step": 551 }, { "epoch": 0.36778545848255184, "learning_rate": 4.698211106282511e-05, "loss": 4.3023, "step": 552 }, { "epoch": 0.3684517364870492, "learning_rate": 4.696924760032937e-05, "loss": 4.2972, "step": 553 }, { "epoch": 0.3691180144915466, "learning_rate": 4.6956358550179255e-05, "loss": 4.3259, "step": 554 }, { "epoch": 0.369784292496044, "learning_rate": 4.694344392738668e-05, "loss": 4.2715, "step": 555 }, { "epoch": 0.37045057050054137, "learning_rate": 4.693050374699335e-05, "loss": 4.27, "step": 556 }, { "epoch": 0.37111684850503873, "learning_rate": 4.691753802407074e-05, "loss": 4.3548, "step": 557 }, { "epoch": 0.3717831265095361, "learning_rate": 4.690454677372007e-05, "loss": 4.3184, "step": 558 }, { "epoch": 0.37244940451403347, "learning_rate": 4.689153001107228e-05, "loss": 4.326, "step": 559 }, { "epoch": 0.37311568251853083, "learning_rate": 4.687848775128804e-05, "loss": 4.2631, "step": 560 }, { "epoch": 0.37378196052302826, "learning_rate": 4.686542000955772e-05, "loss": 4.3253, "step": 561 }, { "epoch": 0.3744482385275256, "learning_rate": 4.685232680110136e-05, "loss": 4.2675, "step": 562 }, { "epoch": 0.375114516532023, "learning_rate": 4.683920814116866e-05, "loss": 4.26, "step": 563 }, { "epoch": 0.37578079453652036, "learning_rate": 4.682606404503896e-05, "loss": 4.3134, "step": 564 }, { "epoch": 0.3764470725410177, "learning_rate": 4.681289452802125e-05, "loss": 4.2914, "step": 565 }, { "epoch": 0.3771133505455151, "learning_rate": 4.679969960545409e-05, "loss": 4.2463, "step": 566 }, { "epoch": 0.3777796285500125, "learning_rate": 4.678647929270565e-05, "loss": 4.2999, "step": 567 }, { "epoch": 0.3784459065545099, "learning_rate": 4.677323360517369e-05, "loss": 4.2751, "step": 568 }, { "epoch": 0.37911218455900725, "learning_rate": 4.675996255828549e-05, "loss": 4.2648, "step": 569 }, { "epoch": 0.3797784625635046, "learning_rate": 4.674666616749789e-05, "loss": 4.2676, "step": 570 }, { "epoch": 0.380444740568002, "learning_rate": 4.6733344448297224e-05, "loss": 4.3179, "step": 571 }, { "epoch": 0.38111101857249935, "learning_rate": 4.6719997416199354e-05, "loss": 4.2805, "step": 572 }, { "epoch": 0.38177729657699677, "learning_rate": 4.670662508674962e-05, "loss": 4.3175, "step": 573 }, { "epoch": 0.38244357458149414, "learning_rate": 4.66932274755228e-05, "loss": 4.2639, "step": 574 }, { "epoch": 0.3831098525859915, "learning_rate": 4.6679804598123145e-05, "loss": 4.2832, "step": 575 }, { "epoch": 0.3837761305904889, "learning_rate": 4.666635647018433e-05, "loss": 4.2793, "step": 576 }, { "epoch": 0.38444240859498624, "learning_rate": 4.665288310736942e-05, "loss": 4.2557, "step": 577 }, { "epoch": 0.3851086865994836, "learning_rate": 4.663938452537089e-05, "loss": 4.2854, "step": 578 }, { "epoch": 0.38577496460398103, "learning_rate": 4.662586073991059e-05, "loss": 4.3111, "step": 579 }, { "epoch": 0.3864412426084784, "learning_rate": 4.6612311766739703e-05, "loss": 4.28, "step": 580 }, { "epoch": 0.38710752061297576, "learning_rate": 4.6598737621638774e-05, "loss": 4.2775, "step": 581 }, { "epoch": 0.38777379861747313, "learning_rate": 4.658513832041765e-05, "loss": 4.2968, "step": 582 }, { "epoch": 0.3884400766219705, "learning_rate": 4.657151387891548e-05, "loss": 4.2953, "step": 583 }, { "epoch": 0.3891063546264679, "learning_rate": 4.6557864313000695e-05, "loss": 4.263, "step": 584 }, { "epoch": 0.3897726326309653, "learning_rate": 4.6544189638570994e-05, "loss": 4.2777, "step": 585 }, { "epoch": 0.39043891063546265, "learning_rate": 4.653048987155332e-05, "loss": 4.2276, "step": 586 }, { "epoch": 0.39110518863996, "learning_rate": 4.651676502790381e-05, "loss": 4.3269, "step": 587 }, { "epoch": 0.3917714666444574, "learning_rate": 4.650301512360787e-05, "loss": 4.2756, "step": 588 }, { "epoch": 0.39243774464895476, "learning_rate": 4.648924017468003e-05, "loss": 4.3051, "step": 589 }, { "epoch": 0.3931040226534522, "learning_rate": 4.647544019716403e-05, "loss": 4.2598, "step": 590 }, { "epoch": 0.39377030065794955, "learning_rate": 4.6461615207132756e-05, "loss": 4.2566, "step": 591 }, { "epoch": 0.3944365786624469, "learning_rate": 4.6447765220688204e-05, "loss": 4.2586, "step": 592 }, { "epoch": 0.3951028566669443, "learning_rate": 4.6433890253961506e-05, "loss": 4.2448, "step": 593 }, { "epoch": 0.39576913467144165, "learning_rate": 4.641999032311288e-05, "loss": 4.2516, "step": 594 }, { "epoch": 0.396435412675939, "learning_rate": 4.6406065444331624e-05, "loss": 4.2631, "step": 595 }, { "epoch": 0.39710169068043644, "learning_rate": 4.639211563383609e-05, "loss": 4.2723, "step": 596 }, { "epoch": 0.3977679686849338, "learning_rate": 4.637814090787365e-05, "loss": 4.2688, "step": 597 }, { "epoch": 0.39843424668943117, "learning_rate": 4.6364141282720744e-05, "loss": 4.2832, "step": 598 }, { "epoch": 0.39910052469392854, "learning_rate": 4.635011677468275e-05, "loss": 4.2598, "step": 599 }, { "epoch": 0.3997668026984259, "learning_rate": 4.633606740009408e-05, "loss": 4.3138, "step": 600 }, { "epoch": 0.40043308070292327, "learning_rate": 4.632199317531808e-05, "loss": 4.2201, "step": 601 }, { "epoch": 0.4010993587074207, "learning_rate": 4.630789411674704e-05, "loss": 4.3189, "step": 602 }, { "epoch": 0.40176563671191806, "learning_rate": 4.6293770240802195e-05, "loss": 4.2446, "step": 603 }, { "epoch": 0.40243191471641543, "learning_rate": 4.627962156393365e-05, "loss": 4.2733, "step": 604 }, { "epoch": 0.4030981927209128, "learning_rate": 4.6265448102620424e-05, "loss": 4.3175, "step": 605 }, { "epoch": 0.40376447072541016, "learning_rate": 4.6251249873370396e-05, "loss": 4.2339, "step": 606 }, { "epoch": 0.40443074872990753, "learning_rate": 4.623702689272029e-05, "loss": 4.3064, "step": 607 }, { "epoch": 0.40509702673440495, "learning_rate": 4.622277917723565e-05, "loss": 4.2997, "step": 608 }, { "epoch": 0.4057633047389023, "learning_rate": 4.620850674351084e-05, "loss": 4.2887, "step": 609 }, { "epoch": 0.4064295827433997, "learning_rate": 4.619420960816902e-05, "loss": 4.2593, "step": 610 }, { "epoch": 0.40709586074789705, "learning_rate": 4.61798877878621e-05, "loss": 4.2811, "step": 611 }, { "epoch": 0.4077621387523944, "learning_rate": 4.6165541299270756e-05, "loss": 4.2913, "step": 612 }, { "epoch": 0.4084284167568918, "learning_rate": 4.615117015910438e-05, "loss": 4.3161, "step": 613 }, { "epoch": 0.4090946947613892, "learning_rate": 4.61367743841011e-05, "loss": 4.2581, "step": 614 }, { "epoch": 0.4097609727658866, "learning_rate": 4.612235399102771e-05, "loss": 4.243, "step": 615 }, { "epoch": 0.41042725077038394, "learning_rate": 4.61079089966797e-05, "loss": 4.3219, "step": 616 }, { "epoch": 0.4110935287748813, "learning_rate": 4.609343941788119e-05, "loss": 4.3108, "step": 617 }, { "epoch": 0.4117598067793787, "learning_rate": 4.6078945271484956e-05, "loss": 4.2747, "step": 618 }, { "epoch": 0.41242608478387605, "learning_rate": 4.6064426574372374e-05, "loss": 4.3115, "step": 619 }, { "epoch": 0.41309236278837347, "learning_rate": 4.6049883343453416e-05, "loss": 4.2493, "step": 620 }, { "epoch": 0.41375864079287084, "learning_rate": 4.603531559566664e-05, "loss": 4.2897, "step": 621 }, { "epoch": 0.4144249187973682, "learning_rate": 4.602072334797913e-05, "loss": 4.2619, "step": 622 }, { "epoch": 0.41509119680186557, "learning_rate": 4.600610661738654e-05, "loss": 4.2506, "step": 623 }, { "epoch": 0.41575747480636294, "learning_rate": 4.599146542091302e-05, "loss": 4.2382, "step": 624 }, { "epoch": 0.41642375281086036, "learning_rate": 4.597679977561122e-05, "loss": 4.3003, "step": 625 }, { "epoch": 0.4170900308153577, "learning_rate": 4.5962109698562264e-05, "loss": 4.2225, "step": 626 }, { "epoch": 0.4177563088198551, "learning_rate": 4.5947395206875735e-05, "loss": 4.3036, "step": 627 }, { "epoch": 0.41842258682435246, "learning_rate": 4.5932656317689635e-05, "loss": 4.2312, "step": 628 }, { "epoch": 0.4190888648288498, "learning_rate": 4.591789304817042e-05, "loss": 4.254, "step": 629 }, { "epoch": 0.4197551428333472, "learning_rate": 4.5903105415512905e-05, "loss": 4.2782, "step": 630 }, { "epoch": 0.4204214208378446, "learning_rate": 4.588829343694029e-05, "loss": 4.2447, "step": 631 }, { "epoch": 0.421087698842342, "learning_rate": 4.587345712970415e-05, "loss": 4.2315, "step": 632 }, { "epoch": 0.42175397684683935, "learning_rate": 4.585859651108437e-05, "loss": 4.2479, "step": 633 }, { "epoch": 0.4224202548513367, "learning_rate": 4.5843711598389156e-05, "loss": 4.2562, "step": 634 }, { "epoch": 0.4230865328558341, "learning_rate": 4.582880240895502e-05, "loss": 4.2834, "step": 635 }, { "epoch": 0.42375281086033145, "learning_rate": 4.581386896014674e-05, "loss": 4.3125, "step": 636 }, { "epoch": 0.4244190888648289, "learning_rate": 4.579891126935737e-05, "loss": 4.2427, "step": 637 }, { "epoch": 0.42508536686932624, "learning_rate": 4.5783929354008164e-05, "loss": 4.2338, "step": 638 }, { "epoch": 0.4257516448738236, "learning_rate": 4.5768923231548604e-05, "loss": 4.2505, "step": 639 }, { "epoch": 0.426417922878321, "learning_rate": 4.5753892919456386e-05, "loss": 4.2493, "step": 640 }, { "epoch": 0.42708420088281834, "learning_rate": 4.573883843523735e-05, "loss": 4.2572, "step": 641 }, { "epoch": 0.4277504788873157, "learning_rate": 4.5723759796425506e-05, "loss": 4.266, "step": 642 }, { "epoch": 0.42841675689181313, "learning_rate": 4.5708657020583e-05, "loss": 4.3052, "step": 643 }, { "epoch": 0.4290830348963105, "learning_rate": 4.5693530125300074e-05, "loss": 4.2828, "step": 644 }, { "epoch": 0.42974931290080787, "learning_rate": 4.5678379128195085e-05, "loss": 4.3018, "step": 645 }, { "epoch": 0.43041559090530523, "learning_rate": 4.5663204046914435e-05, "loss": 4.2437, "step": 646 }, { "epoch": 0.4310818689098026, "learning_rate": 4.56480048991326e-05, "loss": 4.3304, "step": 647 }, { "epoch": 0.43174814691429997, "learning_rate": 4.563278170255208e-05, "loss": 4.2885, "step": 648 }, { "epoch": 0.4324144249187974, "learning_rate": 4.5617534474903375e-05, "loss": 4.2569, "step": 649 }, { "epoch": 0.43308070292329476, "learning_rate": 4.560226323394498e-05, "loss": 4.2464, "step": 650 }, { "epoch": 0.4337469809277921, "learning_rate": 4.558696799746336e-05, "loss": 4.2618, "step": 651 }, { "epoch": 0.4344132589322895, "learning_rate": 4.557164878327292e-05, "loss": 4.275, "step": 652 }, { "epoch": 0.43507953693678686, "learning_rate": 4.555630560921602e-05, "loss": 4.2522, "step": 653 }, { "epoch": 0.4357458149412842, "learning_rate": 4.554093849316289e-05, "loss": 4.3075, "step": 654 }, { "epoch": 0.43641209294578165, "learning_rate": 4.5525547453011663e-05, "loss": 4.2954, "step": 655 }, { "epoch": 0.437078370950279, "learning_rate": 4.551013250668833e-05, "loss": 4.2741, "step": 656 }, { "epoch": 0.4377446489547764, "learning_rate": 4.549469367214673e-05, "loss": 4.2786, "step": 657 }, { "epoch": 0.43841092695927375, "learning_rate": 4.5479230967368525e-05, "loss": 4.2383, "step": 658 }, { "epoch": 0.4390772049637711, "learning_rate": 4.546374441036319e-05, "loss": 4.2614, "step": 659 }, { "epoch": 0.4397434829682685, "learning_rate": 4.5448234019167945e-05, "loss": 4.2433, "step": 660 }, { "epoch": 0.4404097609727659, "learning_rate": 4.543269981184781e-05, "loss": 4.2641, "step": 661 }, { "epoch": 0.4410760389772633, "learning_rate": 4.5417141806495524e-05, "loss": 4.2505, "step": 662 }, { "epoch": 0.44174231698176064, "learning_rate": 4.540156002123154e-05, "loss": 4.3148, "step": 663 }, { "epoch": 0.442408594986258, "learning_rate": 4.5385954474204026e-05, "loss": 4.2294, "step": 664 }, { "epoch": 0.4430748729907554, "learning_rate": 4.5370325183588804e-05, "loss": 4.2602, "step": 665 }, { "epoch": 0.44374115099525274, "learning_rate": 4.535467216758936e-05, "loss": 4.2285, "step": 666 }, { "epoch": 0.44440742899975016, "learning_rate": 4.533899544443682e-05, "loss": 4.2565, "step": 667 }, { "epoch": 0.44507370700424753, "learning_rate": 4.53232950323899e-05, "loss": 4.2251, "step": 668 }, { "epoch": 0.4457399850087449, "learning_rate": 4.530757094973494e-05, "loss": 4.2846, "step": 669 }, { "epoch": 0.44640626301324227, "learning_rate": 4.529182321478582e-05, "loss": 4.2559, "step": 670 }, { "epoch": 0.44707254101773963, "learning_rate": 4.527605184588398e-05, "loss": 4.2151, "step": 671 }, { "epoch": 0.44773881902223706, "learning_rate": 4.5260256861398386e-05, "loss": 4.2438, "step": 672 }, { "epoch": 0.4484050970267344, "learning_rate": 4.52444382797255e-05, "loss": 4.2494, "step": 673 }, { "epoch": 0.4490713750312318, "learning_rate": 4.522859611928929e-05, "loss": 4.2726, "step": 674 }, { "epoch": 0.44973765303572916, "learning_rate": 4.521273039854116e-05, "loss": 4.2323, "step": 675 }, { "epoch": 0.4504039310402265, "learning_rate": 4.5196841135959975e-05, "loss": 4.2525, "step": 676 }, { "epoch": 0.4510702090447239, "learning_rate": 4.5180928350052e-05, "loss": 4.2437, "step": 677 }, { "epoch": 0.4517364870492213, "learning_rate": 4.516499205935092e-05, "loss": 4.281, "step": 678 }, { "epoch": 0.4524027650537187, "learning_rate": 4.514903228241778e-05, "loss": 4.2821, "step": 679 }, { "epoch": 0.45306904305821605, "learning_rate": 4.513304903784099e-05, "loss": 4.2997, "step": 680 }, { "epoch": 0.4537353210627134, "learning_rate": 4.5117042344236274e-05, "loss": 4.2286, "step": 681 }, { "epoch": 0.4544015990672108, "learning_rate": 4.510101222024669e-05, "loss": 4.2808, "step": 682 }, { "epoch": 0.45506787707170815, "learning_rate": 4.5084958684542576e-05, "loss": 4.2706, "step": 683 }, { "epoch": 0.45573415507620557, "learning_rate": 4.506888175582153e-05, "loss": 4.2944, "step": 684 }, { "epoch": 0.45640043308070294, "learning_rate": 4.5052781452808416e-05, "loss": 4.255, "step": 685 }, { "epoch": 0.4570667110852003, "learning_rate": 4.50366577942553e-05, "loss": 4.2678, "step": 686 }, { "epoch": 0.4577329890896977, "learning_rate": 4.502051079894146e-05, "loss": 4.2141, "step": 687 }, { "epoch": 0.45839926709419504, "learning_rate": 4.500434048567336e-05, "loss": 4.2604, "step": 688 }, { "epoch": 0.4590655450986924, "learning_rate": 4.498814687328461e-05, "loss": 4.2354, "step": 689 }, { "epoch": 0.45973182310318983, "learning_rate": 4.4971929980635964e-05, "loss": 4.2456, "step": 690 }, { "epoch": 0.4603981011076872, "learning_rate": 4.4955689826615296e-05, "loss": 4.2048, "step": 691 }, { "epoch": 0.46106437911218456, "learning_rate": 4.493942643013756e-05, "loss": 4.2111, "step": 692 }, { "epoch": 0.46173065711668193, "learning_rate": 4.492313981014479e-05, "loss": 4.2683, "step": 693 }, { "epoch": 0.4623969351211793, "learning_rate": 4.490682998560606e-05, "loss": 4.2924, "step": 694 }, { "epoch": 0.46306321312567666, "learning_rate": 4.489049697551747e-05, "loss": 4.2802, "step": 695 }, { "epoch": 0.4637294911301741, "learning_rate": 4.4874140798902164e-05, "loss": 4.229, "step": 696 }, { "epoch": 0.46439576913467145, "learning_rate": 4.48577614748102e-05, "loss": 4.2068, "step": 697 }, { "epoch": 0.4650620471391688, "learning_rate": 4.4841359022318636e-05, "loss": 4.2129, "step": 698 }, { "epoch": 0.4657283251436662, "learning_rate": 4.482493346053147e-05, "loss": 4.2323, "step": 699 }, { "epoch": 0.46639460314816356, "learning_rate": 4.480848480857961e-05, "loss": 4.2851, "step": 700 }, { "epoch": 0.4670608811526609, "learning_rate": 4.4792013085620844e-05, "loss": 4.2009, "step": 701 }, { "epoch": 0.46772715915715835, "learning_rate": 4.477551831083985e-05, "loss": 4.2571, "step": 702 }, { "epoch": 0.4683934371616557, "learning_rate": 4.4759000503448137e-05, "loss": 4.2715, "step": 703 }, { "epoch": 0.4690597151661531, "learning_rate": 4.4742459682684054e-05, "loss": 4.2592, "step": 704 }, { "epoch": 0.46972599317065045, "learning_rate": 4.472589586781275e-05, "loss": 4.2726, "step": 705 }, { "epoch": 0.4703922711751478, "learning_rate": 4.470930907812616e-05, "loss": 4.2587, "step": 706 }, { "epoch": 0.4710585491796452, "learning_rate": 4.469269933294296e-05, "loss": 4.2211, "step": 707 }, { "epoch": 0.4717248271841426, "learning_rate": 4.4676066651608587e-05, "loss": 4.2239, "step": 708 }, { "epoch": 0.47239110518863997, "learning_rate": 4.465941105349516e-05, "loss": 4.2517, "step": 709 }, { "epoch": 0.47305738319313734, "learning_rate": 4.4642732558001534e-05, "loss": 4.2599, "step": 710 }, { "epoch": 0.4737236611976347, "learning_rate": 4.4626031184553186e-05, "loss": 4.2144, "step": 711 }, { "epoch": 0.47438993920213207, "learning_rate": 4.460930695260227e-05, "loss": 4.2287, "step": 712 }, { "epoch": 0.47505621720662944, "learning_rate": 4.459255988162755e-05, "loss": 4.2708, "step": 713 }, { "epoch": 0.47572249521112686, "learning_rate": 4.45757899911344e-05, "loss": 4.2292, "step": 714 }, { "epoch": 0.47638877321562423, "learning_rate": 4.455899730065476e-05, "loss": 4.2043, "step": 715 }, { "epoch": 0.4770550512201216, "learning_rate": 4.454218182974713e-05, "loss": 4.237, "step": 716 }, { "epoch": 0.47772132922461896, "learning_rate": 4.4525343597996556e-05, "loss": 4.2446, "step": 717 }, { "epoch": 0.47838760722911633, "learning_rate": 4.4508482625014566e-05, "loss": 4.2293, "step": 718 }, { "epoch": 0.47905388523361375, "learning_rate": 4.4491598930439205e-05, "loss": 4.2516, "step": 719 }, { "epoch": 0.4797201632381111, "learning_rate": 4.4474692533934944e-05, "loss": 4.2391, "step": 720 }, { "epoch": 0.4803864412426085, "learning_rate": 4.4457763455192746e-05, "loss": 4.2249, "step": 721 }, { "epoch": 0.48105271924710585, "learning_rate": 4.444081171392995e-05, "loss": 4.2725, "step": 722 }, { "epoch": 0.4817189972516032, "learning_rate": 4.4423837329890304e-05, "loss": 4.2293, "step": 723 }, { "epoch": 0.4823852752561006, "learning_rate": 4.440684032284394e-05, "loss": 4.3053, "step": 724 }, { "epoch": 0.483051553260598, "learning_rate": 4.4389820712587305e-05, "loss": 4.2321, "step": 725 }, { "epoch": 0.4837178312650954, "learning_rate": 4.4372778518943215e-05, "loss": 4.2247, "step": 726 }, { "epoch": 0.48438410926959274, "learning_rate": 4.435571376176076e-05, "loss": 4.205, "step": 727 }, { "epoch": 0.4850503872740901, "learning_rate": 4.4338626460915317e-05, "loss": 4.2773, "step": 728 }, { "epoch": 0.4857166652785875, "learning_rate": 4.432151663630853e-05, "loss": 4.2562, "step": 729 }, { "epoch": 0.48638294328308485, "learning_rate": 4.430438430786825e-05, "loss": 4.2592, "step": 730 }, { "epoch": 0.48704922128758227, "learning_rate": 4.428722949554857e-05, "loss": 4.2315, "step": 731 }, { "epoch": 0.48771549929207964, "learning_rate": 4.427005221932976e-05, "loss": 4.2966, "step": 732 }, { "epoch": 0.488381777296577, "learning_rate": 4.425285249921825e-05, "loss": 4.247, "step": 733 }, { "epoch": 0.48904805530107437, "learning_rate": 4.423563035524658e-05, "loss": 4.2325, "step": 734 }, { "epoch": 0.48971433330557174, "learning_rate": 4.4218385807473475e-05, "loss": 4.2208, "step": 735 }, { "epoch": 0.4903806113100691, "learning_rate": 4.42011188759837e-05, "loss": 4.3079, "step": 736 }, { "epoch": 0.4910468893145665, "learning_rate": 4.418382958088811e-05, "loss": 4.2137, "step": 737 }, { "epoch": 0.4917131673190639, "learning_rate": 4.4166517942323596e-05, "loss": 4.2672, "step": 738 }, { "epoch": 0.49237944532356126, "learning_rate": 4.414918398045309e-05, "loss": 4.2671, "step": 739 }, { "epoch": 0.4930457233280586, "learning_rate": 4.41318277154655e-05, "loss": 4.2664, "step": 740 }, { "epoch": 0.493712001332556, "learning_rate": 4.4114449167575744e-05, "loss": 4.2004, "step": 741 }, { "epoch": 0.49437827933705336, "learning_rate": 4.4097048357024665e-05, "loss": 4.2822, "step": 742 }, { "epoch": 0.4950445573415508, "learning_rate": 4.407962530407903e-05, "loss": 4.2319, "step": 743 }, { "epoch": 0.49571083534604815, "learning_rate": 4.4062180029031544e-05, "loss": 4.2761, "step": 744 }, { "epoch": 0.4963771133505455, "learning_rate": 4.404471255220076e-05, "loss": 4.2015, "step": 745 }, { "epoch": 0.4970433913550429, "learning_rate": 4.402722289393113e-05, "loss": 4.2396, "step": 746 }, { "epoch": 0.49770966935954025, "learning_rate": 4.400971107459288e-05, "loss": 4.2327, "step": 747 }, { "epoch": 0.4983759473640376, "learning_rate": 4.3992177114582124e-05, "loss": 4.2155, "step": 748 }, { "epoch": 0.49904222536853504, "learning_rate": 4.397462103432069e-05, "loss": 4.2446, "step": 749 }, { "epoch": 0.4997085033730324, "learning_rate": 4.395704285425623e-05, "loss": 4.2276, "step": 750 }, { "epoch": 0.5003747813775298, "learning_rate": 4.393944259486208e-05, "loss": 4.2468, "step": 751 }, { "epoch": 0.5010410593820271, "learning_rate": 4.392182027663733e-05, "loss": 4.2422, "step": 752 }, { "epoch": 0.5017073373865245, "learning_rate": 4.390417592010675e-05, "loss": 4.251, "step": 753 }, { "epoch": 0.5023736153910219, "learning_rate": 4.3886509545820786e-05, "loss": 4.24, "step": 754 }, { "epoch": 0.5030398933955192, "learning_rate": 4.3868821174355525e-05, "loss": 4.2764, "step": 755 }, { "epoch": 0.5037061714000166, "learning_rate": 4.3851110826312656e-05, "loss": 4.2686, "step": 756 }, { "epoch": 0.504372449404514, "learning_rate": 4.383337852231949e-05, "loss": 4.244, "step": 757 }, { "epoch": 0.5050387274090115, "learning_rate": 4.38156242830289e-05, "loss": 4.1726, "step": 758 }, { "epoch": 0.5057050054135088, "learning_rate": 4.37978481291193e-05, "loss": 4.2363, "step": 759 }, { "epoch": 0.5063712834180062, "learning_rate": 4.378005008129463e-05, "loss": 4.251, "step": 760 }, { "epoch": 0.5070375614225036, "learning_rate": 4.376223016028435e-05, "loss": 4.231, "step": 761 }, { "epoch": 0.5077038394270009, "learning_rate": 4.374438838684337e-05, "loss": 4.2909, "step": 762 }, { "epoch": 0.5083701174314983, "learning_rate": 4.3726524781752065e-05, "loss": 4.2289, "step": 763 }, { "epoch": 0.5090363954359957, "learning_rate": 4.370863936581624e-05, "loss": 4.2608, "step": 764 }, { "epoch": 0.509702673440493, "learning_rate": 4.369073215986708e-05, "loss": 4.1911, "step": 765 }, { "epoch": 0.5103689514449904, "learning_rate": 4.367280318476118e-05, "loss": 4.2843, "step": 766 }, { "epoch": 0.5110352294494878, "learning_rate": 4.365485246138048e-05, "loss": 4.2515, "step": 767 }, { "epoch": 0.5117015074539851, "learning_rate": 4.363688001063222e-05, "loss": 4.2588, "step": 768 }, { "epoch": 0.5123677854584826, "learning_rate": 4.361888585344901e-05, "loss": 4.2155, "step": 769 }, { "epoch": 0.51303406346298, "learning_rate": 4.3600870010788675e-05, "loss": 4.2719, "step": 770 }, { "epoch": 0.5137003414674773, "learning_rate": 4.358283250363434e-05, "loss": 4.2391, "step": 771 }, { "epoch": 0.5143666194719747, "learning_rate": 4.356477335299434e-05, "loss": 4.247, "step": 772 }, { "epoch": 0.5150328974764721, "learning_rate": 4.354669257990223e-05, "loss": 4.1949, "step": 773 }, { "epoch": 0.5156991754809694, "learning_rate": 4.352859020541674e-05, "loss": 4.2437, "step": 774 }, { "epoch": 0.5163654534854668, "learning_rate": 4.351046625062177e-05, "loss": 4.2356, "step": 775 }, { "epoch": 0.5170317314899642, "learning_rate": 4.3492320736626355e-05, "loss": 4.1832, "step": 776 }, { "epoch": 0.5176980094944615, "learning_rate": 4.347415368456463e-05, "loss": 4.1989, "step": 777 }, { "epoch": 0.5183642874989589, "learning_rate": 4.345596511559582e-05, "loss": 4.2583, "step": 778 }, { "epoch": 0.5190305655034563, "learning_rate": 4.34377550509042e-05, "loss": 4.2389, "step": 779 }, { "epoch": 0.5196968435079536, "learning_rate": 4.341952351169911e-05, "loss": 4.2399, "step": 780 }, { "epoch": 0.5203631215124511, "learning_rate": 4.340127051921488e-05, "loss": 4.2071, "step": 781 }, { "epoch": 0.5210293995169485, "learning_rate": 4.338299609471081e-05, "loss": 4.2212, "step": 782 }, { "epoch": 0.5216956775214459, "learning_rate": 4.3364700259471205e-05, "loss": 4.2261, "step": 783 }, { "epoch": 0.5223619555259432, "learning_rate": 4.3346383034805264e-05, "loss": 4.245, "step": 784 }, { "epoch": 0.5230282335304406, "learning_rate": 4.332804444204714e-05, "loss": 4.2142, "step": 785 }, { "epoch": 0.523694511534938, "learning_rate": 4.3309684502555834e-05, "loss": 4.2483, "step": 786 }, { "epoch": 0.5243607895394353, "learning_rate": 4.329130323771524e-05, "loss": 4.2738, "step": 787 }, { "epoch": 0.5250270675439327, "learning_rate": 4.327290066893407e-05, "loss": 4.2719, "step": 788 }, { "epoch": 0.5256933455484301, "learning_rate": 4.325447681764586e-05, "loss": 4.2152, "step": 789 }, { "epoch": 0.5263596235529274, "learning_rate": 4.323603170530892e-05, "loss": 4.2065, "step": 790 }, { "epoch": 0.5270259015574248, "learning_rate": 4.3217565353406346e-05, "loss": 4.258, "step": 791 }, { "epoch": 0.5276921795619222, "learning_rate": 4.319907778344595e-05, "loss": 4.1857, "step": 792 }, { "epoch": 0.5283584575664196, "learning_rate": 4.318056901696027e-05, "loss": 4.2331, "step": 793 }, { "epoch": 0.529024735570917, "learning_rate": 4.316203907550652e-05, "loss": 4.2316, "step": 794 }, { "epoch": 0.5296910135754144, "learning_rate": 4.31434879806666e-05, "loss": 4.221, "step": 795 }, { "epoch": 0.5303572915799117, "learning_rate": 4.3124915754047004e-05, "loss": 4.231, "step": 796 }, { "epoch": 0.5310235695844091, "learning_rate": 4.310632241727888e-05, "loss": 4.2364, "step": 797 }, { "epoch": 0.5316898475889065, "learning_rate": 4.3087707992017954e-05, "loss": 4.2342, "step": 798 }, { "epoch": 0.5323561255934038, "learning_rate": 4.3069072499944494e-05, "loss": 4.2463, "step": 799 }, { "epoch": 0.5330224035979012, "learning_rate": 4.305041596276333e-05, "loss": 4.2112, "step": 800 }, { "epoch": 0.5336886816023986, "learning_rate": 4.3031738402203784e-05, "loss": 4.2026, "step": 801 }, { "epoch": 0.5343549596068959, "learning_rate": 4.301303984001967e-05, "loss": 4.2276, "step": 802 }, { "epoch": 0.5350212376113933, "learning_rate": 4.2994320297989285e-05, "loss": 4.2252, "step": 803 }, { "epoch": 0.5356875156158908, "learning_rate": 4.2975579797915314e-05, "loss": 4.2762, "step": 804 }, { "epoch": 0.5363537936203882, "learning_rate": 4.295681836162489e-05, "loss": 4.1782, "step": 805 }, { "epoch": 0.5370200716248855, "learning_rate": 4.293803601096952e-05, "loss": 4.2288, "step": 806 }, { "epoch": 0.5376863496293829, "learning_rate": 4.291923276782507e-05, "loss": 4.2366, "step": 807 }, { "epoch": 0.5383526276338803, "learning_rate": 4.2900408654091726e-05, "loss": 4.2069, "step": 808 }, { "epoch": 0.5390189056383776, "learning_rate": 4.2881563691694015e-05, "loss": 4.2132, "step": 809 }, { "epoch": 0.539685183642875, "learning_rate": 4.2862697902580715e-05, "loss": 4.2309, "step": 810 }, { "epoch": 0.5403514616473724, "learning_rate": 4.284381130872487e-05, "loss": 4.2408, "step": 811 }, { "epoch": 0.5410177396518697, "learning_rate": 4.282490393212376e-05, "loss": 4.226, "step": 812 }, { "epoch": 0.5416840176563671, "learning_rate": 4.2805975794798866e-05, "loss": 4.1678, "step": 813 }, { "epoch": 0.5423502956608645, "learning_rate": 4.278702691879587e-05, "loss": 4.2434, "step": 814 }, { "epoch": 0.5430165736653618, "learning_rate": 4.2768057326184565e-05, "loss": 4.2417, "step": 815 }, { "epoch": 0.5436828516698593, "learning_rate": 4.27490670390589e-05, "loss": 4.2283, "step": 816 }, { "epoch": 0.5443491296743567, "learning_rate": 4.273005607953694e-05, "loss": 4.2347, "step": 817 }, { "epoch": 0.545015407678854, "learning_rate": 4.27110244697608e-05, "loss": 4.2459, "step": 818 }, { "epoch": 0.5456816856833514, "learning_rate": 4.2691972231896656e-05, "loss": 4.2998, "step": 819 }, { "epoch": 0.5463479636878488, "learning_rate": 4.2672899388134715e-05, "loss": 4.1929, "step": 820 }, { "epoch": 0.5470142416923461, "learning_rate": 4.265380596068919e-05, "loss": 4.2207, "step": 821 }, { "epoch": 0.5476805196968435, "learning_rate": 4.2634691971798246e-05, "loss": 4.2642, "step": 822 }, { "epoch": 0.5483467977013409, "learning_rate": 4.2615557443724005e-05, "loss": 4.2372, "step": 823 }, { "epoch": 0.5490130757058382, "learning_rate": 4.2596402398752533e-05, "loss": 4.1944, "step": 824 }, { "epoch": 0.5496793537103356, "learning_rate": 4.257722685919375e-05, "loss": 4.2493, "step": 825 }, { "epoch": 0.550345631714833, "learning_rate": 4.2558030847381495e-05, "loss": 4.2457, "step": 826 }, { "epoch": 0.5510119097193303, "learning_rate": 4.2538814385673405e-05, "loss": 4.2236, "step": 827 }, { "epoch": 0.5516781877238278, "learning_rate": 4.251957749645096e-05, "loss": 4.193, "step": 828 }, { "epoch": 0.5523444657283252, "learning_rate": 4.2500320202119434e-05, "loss": 4.2588, "step": 829 }, { "epoch": 0.5530107437328226, "learning_rate": 4.2481042525107854e-05, "loss": 4.2357, "step": 830 }, { "epoch": 0.5536770217373199, "learning_rate": 4.2461744487868996e-05, "loss": 4.2646, "step": 831 }, { "epoch": 0.5543432997418173, "learning_rate": 4.2442426112879354e-05, "loss": 4.1719, "step": 832 }, { "epoch": 0.5550095777463147, "learning_rate": 4.2423087422639085e-05, "loss": 4.1706, "step": 833 }, { "epoch": 0.555675855750812, "learning_rate": 4.240372843967203e-05, "loss": 4.1958, "step": 834 }, { "epoch": 0.5563421337553094, "learning_rate": 4.238434918652568e-05, "loss": 4.2535, "step": 835 }, { "epoch": 0.5570084117598068, "learning_rate": 4.2364949685771094e-05, "loss": 4.2599, "step": 836 }, { "epoch": 0.5576746897643041, "learning_rate": 4.234552996000294e-05, "loss": 4.2108, "step": 837 }, { "epoch": 0.5583409677688015, "learning_rate": 4.232609003183943e-05, "loss": 4.2315, "step": 838 }, { "epoch": 0.5590072457732989, "learning_rate": 4.230662992392232e-05, "loss": 4.2531, "step": 839 }, { "epoch": 0.5596735237777963, "learning_rate": 4.228714965891686e-05, "loss": 4.2042, "step": 840 }, { "epoch": 0.5603398017822937, "learning_rate": 4.226764925951177e-05, "loss": 4.2279, "step": 841 }, { "epoch": 0.5610060797867911, "learning_rate": 4.224812874841923e-05, "loss": 4.248, "step": 842 }, { "epoch": 0.5616723577912884, "learning_rate": 4.222858814837487e-05, "loss": 4.2185, "step": 843 }, { "epoch": 0.5623386357957858, "learning_rate": 4.220902748213765e-05, "loss": 4.2127, "step": 844 }, { "epoch": 0.5630049138002832, "learning_rate": 4.218944677248996e-05, "loss": 4.2062, "step": 845 }, { "epoch": 0.5636711918047805, "learning_rate": 4.2169846042237525e-05, "loss": 4.2546, "step": 846 }, { "epoch": 0.5643374698092779, "learning_rate": 4.215022531420937e-05, "loss": 4.2368, "step": 847 }, { "epoch": 0.5650037478137753, "learning_rate": 4.213058461125781e-05, "loss": 4.1808, "step": 848 }, { "epoch": 0.5656700258182726, "learning_rate": 4.211092395625846e-05, "loss": 4.2232, "step": 849 }, { "epoch": 0.56633630382277, "learning_rate": 4.209124337211013e-05, "loss": 4.2647, "step": 850 }, { "epoch": 0.5670025818272675, "learning_rate": 4.207154288173488e-05, "loss": 4.1633, "step": 851 }, { "epoch": 0.5676688598317648, "learning_rate": 4.205182250807791e-05, "loss": 4.2835, "step": 852 }, { "epoch": 0.5683351378362622, "learning_rate": 4.203208227410762e-05, "loss": 4.2192, "step": 853 }, { "epoch": 0.5690014158407596, "learning_rate": 4.2012322202815525e-05, "loss": 4.2358, "step": 854 }, { "epoch": 0.569667693845257, "learning_rate": 4.199254231721624e-05, "loss": 4.2915, "step": 855 }, { "epoch": 0.5703339718497543, "learning_rate": 4.197274264034746e-05, "loss": 4.2737, "step": 856 }, { "epoch": 0.5710002498542517, "learning_rate": 4.195292319526995e-05, "loss": 4.2371, "step": 857 }, { "epoch": 0.571666527858749, "learning_rate": 4.193308400506745e-05, "loss": 4.2163, "step": 858 }, { "epoch": 0.5723328058632464, "learning_rate": 4.191322509284675e-05, "loss": 4.2461, "step": 859 }, { "epoch": 0.5729990838677438, "learning_rate": 4.189334648173761e-05, "loss": 4.1969, "step": 860 }, { "epoch": 0.5736653618722412, "learning_rate": 4.1873448194892675e-05, "loss": 4.2311, "step": 861 }, { "epoch": 0.5743316398767385, "learning_rate": 4.1853530255487557e-05, "loss": 4.2437, "step": 862 }, { "epoch": 0.574997917881236, "learning_rate": 4.183359268672076e-05, "loss": 4.2013, "step": 863 }, { "epoch": 0.5756641958857334, "learning_rate": 4.181363551181361e-05, "loss": 4.1796, "step": 864 }, { "epoch": 0.5763304738902307, "learning_rate": 4.179365875401032e-05, "loss": 4.2242, "step": 865 }, { "epoch": 0.5769967518947281, "learning_rate": 4.1773662436577876e-05, "loss": 4.308, "step": 866 }, { "epoch": 0.5776630298992255, "learning_rate": 4.1753646582806046e-05, "loss": 4.206, "step": 867 }, { "epoch": 0.5783293079037228, "learning_rate": 4.173361121600737e-05, "loss": 4.1659, "step": 868 }, { "epoch": 0.5789955859082202, "learning_rate": 4.171355635951709e-05, "loss": 4.282, "step": 869 }, { "epoch": 0.5796618639127176, "learning_rate": 4.169348203669319e-05, "loss": 4.2339, "step": 870 }, { "epoch": 0.5803281419172149, "learning_rate": 4.167338827091627e-05, "loss": 4.2394, "step": 871 }, { "epoch": 0.5809944199217123, "learning_rate": 4.1653275085589625e-05, "loss": 4.2392, "step": 872 }, { "epoch": 0.5816606979262097, "learning_rate": 4.163314250413913e-05, "loss": 4.2317, "step": 873 }, { "epoch": 0.582326975930707, "learning_rate": 4.16129905500133e-05, "loss": 4.2525, "step": 874 }, { "epoch": 0.5829932539352045, "learning_rate": 4.159281924668314e-05, "loss": 4.2303, "step": 875 }, { "epoch": 0.5836595319397019, "learning_rate": 4.1572628617642255e-05, "loss": 4.2183, "step": 876 }, { "epoch": 0.5843258099441992, "learning_rate": 4.155241868640674e-05, "loss": 4.2334, "step": 877 }, { "epoch": 0.5849920879486966, "learning_rate": 4.153218947651516e-05, "loss": 4.2457, "step": 878 }, { "epoch": 0.585658365953194, "learning_rate": 4.151194101152855e-05, "loss": 4.2006, "step": 879 }, { "epoch": 0.5863246439576913, "learning_rate": 4.149167331503036e-05, "loss": 4.2277, "step": 880 }, { "epoch": 0.5869909219621887, "learning_rate": 4.147138641062642e-05, "loss": 4.2306, "step": 881 }, { "epoch": 0.5876571999666861, "learning_rate": 4.1451080321945e-05, "loss": 4.2417, "step": 882 }, { "epoch": 0.5883234779711835, "learning_rate": 4.1430755072636626e-05, "loss": 4.1818, "step": 883 }, { "epoch": 0.5889897559756808, "learning_rate": 4.1410410686374195e-05, "loss": 4.1559, "step": 884 }, { "epoch": 0.5896560339801782, "learning_rate": 4.139004718685289e-05, "loss": 4.1916, "step": 885 }, { "epoch": 0.5903223119846756, "learning_rate": 4.1369664597790134e-05, "loss": 4.2387, "step": 886 }, { "epoch": 0.590988589989173, "learning_rate": 4.13492629429256e-05, "loss": 4.2322, "step": 887 }, { "epoch": 0.5916548679936704, "learning_rate": 4.132884224602116e-05, "loss": 4.2554, "step": 888 }, { "epoch": 0.5923211459981678, "learning_rate": 4.130840253086087e-05, "loss": 4.2454, "step": 889 }, { "epoch": 0.5929874240026651, "learning_rate": 4.128794382125093e-05, "loss": 4.1987, "step": 890 }, { "epoch": 0.5936537020071625, "learning_rate": 4.126746614101966e-05, "loss": 4.2116, "step": 891 }, { "epoch": 0.5943199800116599, "learning_rate": 4.1246969514017494e-05, "loss": 4.2571, "step": 892 }, { "epoch": 0.5949862580161572, "learning_rate": 4.122645396411691e-05, "loss": 4.2463, "step": 893 }, { "epoch": 0.5956525360206546, "learning_rate": 4.120591951521244e-05, "loss": 4.2308, "step": 894 }, { "epoch": 0.596318814025152, "learning_rate": 4.118536619122062e-05, "loss": 4.1972, "step": 895 }, { "epoch": 0.5969850920296493, "learning_rate": 4.1164794016079985e-05, "loss": 4.2093, "step": 896 }, { "epoch": 0.5976513700341467, "learning_rate": 4.114420301375099e-05, "loss": 4.2425, "step": 897 }, { "epoch": 0.5983176480386442, "learning_rate": 4.112359320821607e-05, "loss": 4.1783, "step": 898 }, { "epoch": 0.5989839260431415, "learning_rate": 4.1102964623479523e-05, "loss": 4.2436, "step": 899 }, { "epoch": 0.5996502040476389, "learning_rate": 4.1082317283567526e-05, "loss": 4.1911, "step": 900 }, { "epoch": 0.6003164820521363, "learning_rate": 4.106165121252811e-05, "loss": 4.2198, "step": 901 }, { "epoch": 0.6009827600566336, "learning_rate": 4.10409664344311e-05, "loss": 4.21, "step": 902 }, { "epoch": 0.601649038061131, "learning_rate": 4.102026297336814e-05, "loss": 4.2593, "step": 903 }, { "epoch": 0.6023153160656284, "learning_rate": 4.0999540853452605e-05, "loss": 4.2448, "step": 904 }, { "epoch": 0.6029815940701257, "learning_rate": 4.0978800098819636e-05, "loss": 4.2628, "step": 905 }, { "epoch": 0.6036478720746231, "learning_rate": 4.0958040733626036e-05, "loss": 4.2293, "step": 906 }, { "epoch": 0.6043141500791205, "learning_rate": 4.093726278205031e-05, "loss": 4.2572, "step": 907 }, { "epoch": 0.6049804280836178, "learning_rate": 4.091646626829263e-05, "loss": 4.1672, "step": 908 }, { "epoch": 0.6056467060881152, "learning_rate": 4.0895651216574725e-05, "loss": 4.2201, "step": 909 }, { "epoch": 0.6063129840926127, "learning_rate": 4.087481765113999e-05, "loss": 4.1759, "step": 910 }, { "epoch": 0.6069792620971101, "learning_rate": 4.0853965596253315e-05, "loss": 4.2364, "step": 911 }, { "epoch": 0.6076455401016074, "learning_rate": 4.083309507620118e-05, "loss": 4.1883, "step": 912 }, { "epoch": 0.6083118181061048, "learning_rate": 4.081220611529153e-05, "loss": 4.2004, "step": 913 }, { "epoch": 0.6089780961106022, "learning_rate": 4.079129873785382e-05, "loss": 4.2148, "step": 914 }, { "epoch": 0.6096443741150995, "learning_rate": 4.077037296823893e-05, "loss": 4.2, "step": 915 }, { "epoch": 0.6103106521195969, "learning_rate": 4.0749428830819195e-05, "loss": 4.1947, "step": 916 }, { "epoch": 0.6109769301240943, "learning_rate": 4.072846634998829e-05, "loss": 4.226, "step": 917 }, { "epoch": 0.6116432081285916, "learning_rate": 4.07074855501613e-05, "loss": 4.2578, "step": 918 }, { "epoch": 0.612309486133089, "learning_rate": 4.068648645577462e-05, "loss": 4.2288, "step": 919 }, { "epoch": 0.6129757641375864, "learning_rate": 4.066546909128598e-05, "loss": 4.2197, "step": 920 }, { "epoch": 0.6136420421420837, "learning_rate": 4.064443348117436e-05, "loss": 4.23, "step": 921 }, { "epoch": 0.6143083201465812, "learning_rate": 4.0623379649940026e-05, "loss": 4.2508, "step": 922 }, { "epoch": 0.6149745981510786, "learning_rate": 4.060230762210441e-05, "loss": 4.2537, "step": 923 }, { "epoch": 0.615640876155576, "learning_rate": 4.0581217422210197e-05, "loss": 4.1968, "step": 924 }, { "epoch": 0.6163071541600733, "learning_rate": 4.0560109074821195e-05, "loss": 4.1874, "step": 925 }, { "epoch": 0.6169734321645707, "learning_rate": 4.0538982604522376e-05, "loss": 4.1946, "step": 926 }, { "epoch": 0.617639710169068, "learning_rate": 4.051783803591982e-05, "loss": 4.2286, "step": 927 }, { "epoch": 0.6183059881735654, "learning_rate": 4.0496675393640645e-05, "loss": 4.1978, "step": 928 }, { "epoch": 0.6189722661780628, "learning_rate": 4.0475494702333075e-05, "loss": 4.1638, "step": 929 }, { "epoch": 0.6196385441825601, "learning_rate": 4.045429598666632e-05, "loss": 4.2151, "step": 930 }, { "epoch": 0.6203048221870575, "learning_rate": 4.0433079271330586e-05, "loss": 4.1608, "step": 931 }, { "epoch": 0.6209711001915549, "learning_rate": 4.0411844581037075e-05, "loss": 4.219, "step": 932 }, { "epoch": 0.6216373781960522, "learning_rate": 4.0390591940517874e-05, "loss": 4.1692, "step": 933 }, { "epoch": 0.6223036562005497, "learning_rate": 4.0369321374526016e-05, "loss": 4.1979, "step": 934 }, { "epoch": 0.6229699342050471, "learning_rate": 4.0348032907835386e-05, "loss": 4.2169, "step": 935 }, { "epoch": 0.6236362122095445, "learning_rate": 4.032672656524075e-05, "loss": 4.2121, "step": 936 }, { "epoch": 0.6243024902140418, "learning_rate": 4.0305402371557664e-05, "loss": 4.2254, "step": 937 }, { "epoch": 0.6249687682185392, "learning_rate": 4.0284060351622485e-05, "loss": 4.2227, "step": 938 }, { "epoch": 0.6256350462230366, "learning_rate": 4.026270053029233e-05, "loss": 4.2157, "step": 939 }, { "epoch": 0.6263013242275339, "learning_rate": 4.024132293244507e-05, "loss": 4.2488, "step": 940 }, { "epoch": 0.6269676022320313, "learning_rate": 4.021992758297925e-05, "loss": 4.2065, "step": 941 }, { "epoch": 0.6276338802365287, "learning_rate": 4.0198514506814097e-05, "loss": 4.2889, "step": 942 }, { "epoch": 0.628300158241026, "learning_rate": 4.0177083728889495e-05, "loss": 4.2318, "step": 943 }, { "epoch": 0.6289664362455234, "learning_rate": 4.015563527416595e-05, "loss": 4.1941, "step": 944 }, { "epoch": 0.6296327142500209, "learning_rate": 4.013416916762455e-05, "loss": 4.2394, "step": 945 }, { "epoch": 0.6302989922545182, "learning_rate": 4.011268543426692e-05, "loss": 4.1886, "step": 946 }, { "epoch": 0.6309652702590156, "learning_rate": 4.0091184099115245e-05, "loss": 4.2066, "step": 947 }, { "epoch": 0.631631548263513, "learning_rate": 4.006966518721219e-05, "loss": 4.1906, "step": 948 }, { "epoch": 0.6322978262680103, "learning_rate": 4.004812872362093e-05, "loss": 4.2071, "step": 949 }, { "epoch": 0.6329641042725077, "learning_rate": 4.002657473342503e-05, "loss": 4.1946, "step": 950 }, { "epoch": 0.6336303822770051, "learning_rate": 4.000500324172849e-05, "loss": 4.2157, "step": 951 }, { "epoch": 0.6342966602815024, "learning_rate": 3.998341427365572e-05, "loss": 4.2002, "step": 952 }, { "epoch": 0.6349629382859998, "learning_rate": 3.996180785435144e-05, "loss": 4.2172, "step": 953 }, { "epoch": 0.6356292162904972, "learning_rate": 3.994018400898072e-05, "loss": 4.2399, "step": 954 }, { "epoch": 0.6362954942949945, "learning_rate": 3.991854276272894e-05, "loss": 4.2244, "step": 955 }, { "epoch": 0.6369617722994919, "learning_rate": 3.989688414080171e-05, "loss": 4.2035, "step": 956 }, { "epoch": 0.6376280503039894, "learning_rate": 3.987520816842491e-05, "loss": 4.2239, "step": 957 }, { "epoch": 0.6382943283084868, "learning_rate": 3.98535148708446e-05, "loss": 4.2266, "step": 958 }, { "epoch": 0.6389606063129841, "learning_rate": 3.9831804273327054e-05, "loss": 4.1804, "step": 959 }, { "epoch": 0.6396268843174815, "learning_rate": 3.981007640115867e-05, "loss": 4.2055, "step": 960 }, { "epoch": 0.6402931623219789, "learning_rate": 3.978833127964596e-05, "loss": 4.2253, "step": 961 }, { "epoch": 0.6409594403264762, "learning_rate": 3.9766568934115556e-05, "loss": 4.2573, "step": 962 }, { "epoch": 0.6416257183309736, "learning_rate": 3.9744789389914124e-05, "loss": 4.1668, "step": 963 }, { "epoch": 0.642291996335471, "learning_rate": 3.9722992672408374e-05, "loss": 4.2275, "step": 964 }, { "epoch": 0.6429582743399683, "learning_rate": 3.9701178806985004e-05, "loss": 4.2245, "step": 965 }, { "epoch": 0.6436245523444657, "learning_rate": 3.96793478190507e-05, "loss": 4.2108, "step": 966 }, { "epoch": 0.6442908303489631, "learning_rate": 3.9657499734032086e-05, "loss": 4.1881, "step": 967 }, { "epoch": 0.6449571083534604, "learning_rate": 3.963563457737569e-05, "loss": 4.2355, "step": 968 }, { "epoch": 0.6456233863579579, "learning_rate": 3.961375237454795e-05, "loss": 4.2139, "step": 969 }, { "epoch": 0.6462896643624553, "learning_rate": 3.9591853151035123e-05, "loss": 4.2079, "step": 970 }, { "epoch": 0.6469559423669526, "learning_rate": 3.956993693234331e-05, "loss": 4.2299, "step": 971 }, { "epoch": 0.64762222037145, "learning_rate": 3.9548003743998406e-05, "loss": 4.1861, "step": 972 }, { "epoch": 0.6482884983759474, "learning_rate": 3.9526053611546064e-05, "loss": 4.1676, "step": 973 }, { "epoch": 0.6489547763804447, "learning_rate": 3.950408656055168e-05, "loss": 4.2492, "step": 974 }, { "epoch": 0.6496210543849421, "learning_rate": 3.9482102616600336e-05, "loss": 4.2625, "step": 975 }, { "epoch": 0.6502873323894395, "learning_rate": 3.946010180529681e-05, "loss": 4.2408, "step": 976 }, { "epoch": 0.6509536103939368, "learning_rate": 3.943808415226553e-05, "loss": 4.2515, "step": 977 }, { "epoch": 0.6516198883984342, "learning_rate": 3.941604968315052e-05, "loss": 4.2261, "step": 978 }, { "epoch": 0.6522861664029316, "learning_rate": 3.93939984236154e-05, "loss": 4.1979, "step": 979 }, { "epoch": 0.652952444407429, "learning_rate": 3.937193039934333e-05, "loss": 4.2479, "step": 980 }, { "epoch": 0.6536187224119264, "learning_rate": 3.934984563603703e-05, "loss": 4.1933, "step": 981 }, { "epoch": 0.6542850004164238, "learning_rate": 3.9327744159418675e-05, "loss": 4.1862, "step": 982 }, { "epoch": 0.6549512784209212, "learning_rate": 3.9305625995229954e-05, "loss": 4.1988, "step": 983 }, { "epoch": 0.6556175564254185, "learning_rate": 3.9283491169231944e-05, "loss": 4.2475, "step": 984 }, { "epoch": 0.6562838344299159, "learning_rate": 3.926133970720516e-05, "loss": 4.1495, "step": 985 }, { "epoch": 0.6569501124344133, "learning_rate": 3.923917163494947e-05, "loss": 4.2448, "step": 986 }, { "epoch": 0.6576163904389106, "learning_rate": 3.921698697828411e-05, "loss": 4.2459, "step": 987 }, { "epoch": 0.658282668443408, "learning_rate": 3.919478576304762e-05, "loss": 4.2188, "step": 988 }, { "epoch": 0.6589489464479054, "learning_rate": 3.917256801509783e-05, "loss": 4.1891, "step": 989 }, { "epoch": 0.6596152244524027, "learning_rate": 3.915033376031182e-05, "loss": 4.2108, "step": 990 }, { "epoch": 0.6602815024569001, "learning_rate": 3.91280830245859e-05, "loss": 4.122, "step": 991 }, { "epoch": 0.6609477804613976, "learning_rate": 3.910581583383557e-05, "loss": 4.2183, "step": 992 }, { "epoch": 0.6616140584658949, "learning_rate": 3.908353221399551e-05, "loss": 4.174, "step": 993 }, { "epoch": 0.6622803364703923, "learning_rate": 3.906123219101952e-05, "loss": 4.1827, "step": 994 }, { "epoch": 0.6629466144748897, "learning_rate": 3.90389157908805e-05, "loss": 4.1739, "step": 995 }, { "epoch": 0.663612892479387, "learning_rate": 3.9016583039570454e-05, "loss": 4.2417, "step": 996 }, { "epoch": 0.6642791704838844, "learning_rate": 3.899423396310039e-05, "loss": 4.2193, "step": 997 }, { "epoch": 0.6649454484883818, "learning_rate": 3.897186858750036e-05, "loss": 4.2034, "step": 998 }, { "epoch": 0.6656117264928791, "learning_rate": 3.8949486938819394e-05, "loss": 4.2127, "step": 999 }, { "epoch": 0.6662780044973765, "learning_rate": 3.892708904312546e-05, "loss": 4.154, "step": 1000 }, { "epoch": 0.6669442825018739, "learning_rate": 3.890467492650548e-05, "loss": 4.2419, "step": 1001 }, { "epoch": 0.6676105605063712, "learning_rate": 3.888224461506522e-05, "loss": 4.2043, "step": 1002 }, { "epoch": 0.6682768385108686, "learning_rate": 3.8859798134929365e-05, "loss": 4.1697, "step": 1003 }, { "epoch": 0.6689431165153661, "learning_rate": 3.8837335512241376e-05, "loss": 4.1658, "step": 1004 }, { "epoch": 0.6696093945198635, "learning_rate": 3.8814856773163576e-05, "loss": 4.1931, "step": 1005 }, { "epoch": 0.6702756725243608, "learning_rate": 3.8792361943877e-05, "loss": 4.2376, "step": 1006 }, { "epoch": 0.6709419505288582, "learning_rate": 3.876985105058145e-05, "loss": 4.1789, "step": 1007 }, { "epoch": 0.6716082285333556, "learning_rate": 3.874732411949545e-05, "loss": 4.1591, "step": 1008 }, { "epoch": 0.6722745065378529, "learning_rate": 3.8724781176856184e-05, "loss": 4.2283, "step": 1009 }, { "epoch": 0.6729407845423503, "learning_rate": 3.8702222248919494e-05, "loss": 4.2157, "step": 1010 }, { "epoch": 0.6736070625468477, "learning_rate": 3.867964736195983e-05, "loss": 4.2075, "step": 1011 }, { "epoch": 0.674273340551345, "learning_rate": 3.865705654227024e-05, "loss": 4.2088, "step": 1012 }, { "epoch": 0.6749396185558424, "learning_rate": 3.863444981616232e-05, "loss": 4.2248, "step": 1013 }, { "epoch": 0.6756058965603398, "learning_rate": 3.861182720996621e-05, "loss": 4.2482, "step": 1014 }, { "epoch": 0.6762721745648371, "learning_rate": 3.858918875003053e-05, "loss": 4.1895, "step": 1015 }, { "epoch": 0.6769384525693346, "learning_rate": 3.856653446272236e-05, "loss": 4.2083, "step": 1016 }, { "epoch": 0.677604730573832, "learning_rate": 3.854386437442723e-05, "loss": 4.2116, "step": 1017 }, { "epoch": 0.6782710085783293, "learning_rate": 3.852117851154906e-05, "loss": 4.2645, "step": 1018 }, { "epoch": 0.6789372865828267, "learning_rate": 3.8498476900510154e-05, "loss": 4.2239, "step": 1019 }, { "epoch": 0.6796035645873241, "learning_rate": 3.8475759567751144e-05, "loss": 4.2048, "step": 1020 }, { "epoch": 0.6802698425918214, "learning_rate": 3.8453026539731e-05, "loss": 4.1515, "step": 1021 }, { "epoch": 0.6809361205963188, "learning_rate": 3.843027784292693e-05, "loss": 4.1805, "step": 1022 }, { "epoch": 0.6816023986008162, "learning_rate": 3.840751350383443e-05, "loss": 4.2233, "step": 1023 }, { "epoch": 0.6822686766053135, "learning_rate": 3.83847335489672e-05, "loss": 4.1952, "step": 1024 }, { "epoch": 0.6829349546098109, "learning_rate": 3.8361938004857124e-05, "loss": 4.2039, "step": 1025 }, { "epoch": 0.6836012326143083, "learning_rate": 3.8339126898054246e-05, "loss": 4.1632, "step": 1026 }, { "epoch": 0.6842675106188058, "learning_rate": 3.8316300255126735e-05, "loss": 4.2085, "step": 1027 }, { "epoch": 0.6849337886233031, "learning_rate": 3.829345810266086e-05, "loss": 4.1645, "step": 1028 }, { "epoch": 0.6856000666278005, "learning_rate": 3.8270600467260954e-05, "loss": 4.2249, "step": 1029 }, { "epoch": 0.6862663446322979, "learning_rate": 3.824772737554937e-05, "loss": 4.164, "step": 1030 }, { "epoch": 0.6869326226367952, "learning_rate": 3.822483885416649e-05, "loss": 4.2246, "step": 1031 }, { "epoch": 0.6875989006412926, "learning_rate": 3.820193492977062e-05, "loss": 4.1365, "step": 1032 }, { "epoch": 0.68826517864579, "learning_rate": 3.817901562903807e-05, "loss": 4.2022, "step": 1033 }, { "epoch": 0.6889314566502873, "learning_rate": 3.8156080978663e-05, "loss": 4.1927, "step": 1034 }, { "epoch": 0.6895977346547847, "learning_rate": 3.813313100535747e-05, "loss": 4.1731, "step": 1035 }, { "epoch": 0.6902640126592821, "learning_rate": 3.8110165735851413e-05, "loss": 4.2305, "step": 1036 }, { "epoch": 0.6909302906637794, "learning_rate": 3.8087185196892526e-05, "loss": 4.2104, "step": 1037 }, { "epoch": 0.6915965686682768, "learning_rate": 3.8064189415246346e-05, "loss": 4.1984, "step": 1038 }, { "epoch": 0.6922628466727743, "learning_rate": 3.804117841769611e-05, "loss": 4.1534, "step": 1039 }, { "epoch": 0.6929291246772716, "learning_rate": 3.801815223104281e-05, "loss": 4.224, "step": 1040 }, { "epoch": 0.693595402681769, "learning_rate": 3.7995110882105125e-05, "loss": 4.1879, "step": 1041 }, { "epoch": 0.6942616806862664, "learning_rate": 3.797205439771938e-05, "loss": 4.257, "step": 1042 }, { "epoch": 0.6949279586907637, "learning_rate": 3.794898280473955e-05, "loss": 4.1959, "step": 1043 }, { "epoch": 0.6955942366952611, "learning_rate": 3.792589613003719e-05, "loss": 4.2129, "step": 1044 }, { "epoch": 0.6962605146997585, "learning_rate": 3.790279440050143e-05, "loss": 4.1745, "step": 1045 }, { "epoch": 0.6969267927042558, "learning_rate": 3.7879677643038905e-05, "loss": 4.2004, "step": 1046 }, { "epoch": 0.6975930707087532, "learning_rate": 3.7856545884573816e-05, "loss": 4.1924, "step": 1047 }, { "epoch": 0.6982593487132506, "learning_rate": 3.783339915204777e-05, "loss": 4.2131, "step": 1048 }, { "epoch": 0.6989256267177479, "learning_rate": 3.781023747241985e-05, "loss": 4.2264, "step": 1049 }, { "epoch": 0.6995919047222453, "learning_rate": 3.7787060872666536e-05, "loss": 4.184, "step": 1050 }, { "epoch": 0.7002581827267428, "learning_rate": 3.776386937978169e-05, "loss": 4.1944, "step": 1051 }, { "epoch": 0.7009244607312402, "learning_rate": 3.7740663020776534e-05, "loss": 4.2569, "step": 1052 }, { "epoch": 0.7015907387357375, "learning_rate": 3.7717441822679576e-05, "loss": 4.1988, "step": 1053 }, { "epoch": 0.7022570167402349, "learning_rate": 3.769420581253662e-05, "loss": 4.1987, "step": 1054 }, { "epoch": 0.7029232947447323, "learning_rate": 3.767095501741073e-05, "loss": 4.1454, "step": 1055 }, { "epoch": 0.7035895727492296, "learning_rate": 3.764768946438219e-05, "loss": 4.2162, "step": 1056 }, { "epoch": 0.704255850753727, "learning_rate": 3.762440918054844e-05, "loss": 4.2183, "step": 1057 }, { "epoch": 0.7049221287582244, "learning_rate": 3.760111419302412e-05, "loss": 4.1844, "step": 1058 }, { "epoch": 0.7055884067627217, "learning_rate": 3.757780452894098e-05, "loss": 4.2091, "step": 1059 }, { "epoch": 0.7062546847672191, "learning_rate": 3.755448021544785e-05, "loss": 4.1752, "step": 1060 }, { "epoch": 0.7069209627717165, "learning_rate": 3.753114127971065e-05, "loss": 4.1877, "step": 1061 }, { "epoch": 0.7075872407762138, "learning_rate": 3.75077877489123e-05, "loss": 4.2283, "step": 1062 }, { "epoch": 0.7082535187807113, "learning_rate": 3.748441965025275e-05, "loss": 4.1877, "step": 1063 }, { "epoch": 0.7089197967852087, "learning_rate": 3.7461037010948884e-05, "loss": 4.2186, "step": 1064 }, { "epoch": 0.709586074789706, "learning_rate": 3.743763985823454e-05, "loss": 4.2735, "step": 1065 }, { "epoch": 0.7102523527942034, "learning_rate": 3.7414228219360474e-05, "loss": 4.1688, "step": 1066 }, { "epoch": 0.7109186307987008, "learning_rate": 3.739080212159429e-05, "loss": 4.241, "step": 1067 }, { "epoch": 0.7115849088031981, "learning_rate": 3.736736159222042e-05, "loss": 4.1769, "step": 1068 }, { "epoch": 0.7122511868076955, "learning_rate": 3.7343906658540154e-05, "loss": 4.2333, "step": 1069 }, { "epoch": 0.7129174648121929, "learning_rate": 3.732043734787152e-05, "loss": 4.2128, "step": 1070 }, { "epoch": 0.7135837428166902, "learning_rate": 3.72969536875493e-05, "loss": 4.181, "step": 1071 }, { "epoch": 0.7142500208211876, "learning_rate": 3.727345570492499e-05, "loss": 4.1877, "step": 1072 }, { "epoch": 0.714916298825685, "learning_rate": 3.724994342736676e-05, "loss": 4.1797, "step": 1073 }, { "epoch": 0.7155825768301824, "learning_rate": 3.722641688225944e-05, "loss": 4.2154, "step": 1074 }, { "epoch": 0.7162488548346798, "learning_rate": 3.7202876097004494e-05, "loss": 4.1815, "step": 1075 }, { "epoch": 0.7169151328391772, "learning_rate": 3.7179321099019916e-05, "loss": 4.1886, "step": 1076 }, { "epoch": 0.7175814108436745, "learning_rate": 3.715575191574031e-05, "loss": 4.1979, "step": 1077 }, { "epoch": 0.7182476888481719, "learning_rate": 3.7132168574616786e-05, "loss": 4.1575, "step": 1078 }, { "epoch": 0.7189139668526693, "learning_rate": 3.710857110311692e-05, "loss": 4.187, "step": 1079 }, { "epoch": 0.7195802448571667, "learning_rate": 3.7084959528724785e-05, "loss": 4.1499, "step": 1080 }, { "epoch": 0.720246522861664, "learning_rate": 3.706133387894084e-05, "loss": 4.1859, "step": 1081 }, { "epoch": 0.7209128008661614, "learning_rate": 3.703769418128197e-05, "loss": 4.2187, "step": 1082 }, { "epoch": 0.7215790788706588, "learning_rate": 3.7014040463281395e-05, "loss": 4.2081, "step": 1083 }, { "epoch": 0.7222453568751561, "learning_rate": 3.699037275248869e-05, "loss": 4.1752, "step": 1084 }, { "epoch": 0.7229116348796535, "learning_rate": 3.696669107646971e-05, "loss": 4.1821, "step": 1085 }, { "epoch": 0.723577912884151, "learning_rate": 3.694299546280657e-05, "loss": 4.2302, "step": 1086 }, { "epoch": 0.7242441908886483, "learning_rate": 3.691928593909766e-05, "loss": 4.135, "step": 1087 }, { "epoch": 0.7249104688931457, "learning_rate": 3.68955625329575e-05, "loss": 4.1695, "step": 1088 }, { "epoch": 0.7255767468976431, "learning_rate": 3.687182527201684e-05, "loss": 4.1922, "step": 1089 }, { "epoch": 0.7262430249021404, "learning_rate": 3.684807418392255e-05, "loss": 4.1641, "step": 1090 }, { "epoch": 0.7269093029066378, "learning_rate": 3.6824309296337584e-05, "loss": 4.1859, "step": 1091 }, { "epoch": 0.7275755809111352, "learning_rate": 3.6800530636941e-05, "loss": 4.2159, "step": 1092 }, { "epoch": 0.7282418589156325, "learning_rate": 3.677673823342786e-05, "loss": 4.215, "step": 1093 }, { "epoch": 0.7289081369201299, "learning_rate": 3.675293211350928e-05, "loss": 4.1805, "step": 1094 }, { "epoch": 0.7295744149246273, "learning_rate": 3.6729112304912305e-05, "loss": 4.2059, "step": 1095 }, { "epoch": 0.7302406929291246, "learning_rate": 3.6705278835379945e-05, "loss": 4.2213, "step": 1096 }, { "epoch": 0.730906970933622, "learning_rate": 3.6681431732671135e-05, "loss": 4.2346, "step": 1097 }, { "epoch": 0.7315732489381195, "learning_rate": 3.665757102456067e-05, "loss": 4.1891, "step": 1098 }, { "epoch": 0.7322395269426168, "learning_rate": 3.6633696738839176e-05, "loss": 4.2064, "step": 1099 }, { "epoch": 0.7329058049471142, "learning_rate": 3.660980890331313e-05, "loss": 4.2071, "step": 1100 }, { "epoch": 0.7335720829516116, "learning_rate": 3.658590754580476e-05, "loss": 4.2776, "step": 1101 }, { "epoch": 0.734238360956109, "learning_rate": 3.656199269415206e-05, "loss": 4.1662, "step": 1102 }, { "epoch": 0.7349046389606063, "learning_rate": 3.6538064376208745e-05, "loss": 4.1613, "step": 1103 }, { "epoch": 0.7355709169651037, "learning_rate": 3.651412261984419e-05, "loss": 4.1852, "step": 1104 }, { "epoch": 0.736237194969601, "learning_rate": 3.649016745294345e-05, "loss": 4.1927, "step": 1105 }, { "epoch": 0.7369034729740984, "learning_rate": 3.646619890340718e-05, "loss": 4.1849, "step": 1106 }, { "epoch": 0.7375697509785958, "learning_rate": 3.644221699915162e-05, "loss": 4.1726, "step": 1107 }, { "epoch": 0.7382360289830932, "learning_rate": 3.6418221768108586e-05, "loss": 4.1943, "step": 1108 }, { "epoch": 0.7389023069875905, "learning_rate": 3.639421323822539e-05, "loss": 4.2299, "step": 1109 }, { "epoch": 0.739568584992088, "learning_rate": 3.637019143746485e-05, "loss": 4.1848, "step": 1110 }, { "epoch": 0.7402348629965854, "learning_rate": 3.634615639380524e-05, "loss": 4.2036, "step": 1111 }, { "epoch": 0.7409011410010827, "learning_rate": 3.632210813524025e-05, "loss": 4.2011, "step": 1112 }, { "epoch": 0.7415674190055801, "learning_rate": 3.629804668977897e-05, "loss": 4.1585, "step": 1113 }, { "epoch": 0.7422336970100775, "learning_rate": 3.627397208544583e-05, "loss": 4.2146, "step": 1114 }, { "epoch": 0.7428999750145748, "learning_rate": 3.624988435028062e-05, "loss": 4.1856, "step": 1115 }, { "epoch": 0.7435662530190722, "learning_rate": 3.622578351233838e-05, "loss": 4.1601, "step": 1116 }, { "epoch": 0.7442325310235696, "learning_rate": 3.6201669599689465e-05, "loss": 4.2063, "step": 1117 }, { "epoch": 0.7448988090280669, "learning_rate": 3.6177542640419404e-05, "loss": 4.1957, "step": 1118 }, { "epoch": 0.7455650870325643, "learning_rate": 3.615340266262895e-05, "loss": 4.1461, "step": 1119 }, { "epoch": 0.7462313650370617, "learning_rate": 3.612924969443401e-05, "loss": 4.1819, "step": 1120 }, { "epoch": 0.7468976430415591, "learning_rate": 3.610508376396564e-05, "loss": 4.2322, "step": 1121 }, { "epoch": 0.7475639210460565, "learning_rate": 3.608090489936997e-05, "loss": 4.2183, "step": 1122 }, { "epoch": 0.7482301990505539, "learning_rate": 3.60567131288082e-05, "loss": 4.1577, "step": 1123 }, { "epoch": 0.7488964770550512, "learning_rate": 3.6032508480456555e-05, "loss": 4.2218, "step": 1124 }, { "epoch": 0.7495627550595486, "learning_rate": 3.600829098250629e-05, "loss": 4.2198, "step": 1125 }, { "epoch": 0.750229033064046, "learning_rate": 3.5984060663163586e-05, "loss": 4.1907, "step": 1126 }, { "epoch": 0.7508953110685433, "learning_rate": 3.595981755064959e-05, "loss": 4.2181, "step": 1127 }, { "epoch": 0.7515615890730407, "learning_rate": 3.5935561673200314e-05, "loss": 4.208, "step": 1128 }, { "epoch": 0.7522278670775381, "learning_rate": 3.591129305906668e-05, "loss": 4.1721, "step": 1129 }, { "epoch": 0.7528941450820354, "learning_rate": 3.5887011736514406e-05, "loss": 4.215, "step": 1130 }, { "epoch": 0.7535604230865328, "learning_rate": 3.586271773382403e-05, "loss": 4.1384, "step": 1131 }, { "epoch": 0.7542267010910302, "learning_rate": 3.5838411079290865e-05, "loss": 4.2322, "step": 1132 }, { "epoch": 0.7548929790955277, "learning_rate": 3.581409180122494e-05, "loss": 4.1533, "step": 1133 }, { "epoch": 0.755559257100025, "learning_rate": 3.5789759927951e-05, "loss": 4.2363, "step": 1134 }, { "epoch": 0.7562255351045224, "learning_rate": 3.576541548780847e-05, "loss": 4.1495, "step": 1135 }, { "epoch": 0.7568918131090198, "learning_rate": 3.5741058509151383e-05, "loss": 4.2378, "step": 1136 }, { "epoch": 0.7575580911135171, "learning_rate": 3.57166890203484e-05, "loss": 4.1475, "step": 1137 }, { "epoch": 0.7582243691180145, "learning_rate": 3.569230704978274e-05, "loss": 4.1475, "step": 1138 }, { "epoch": 0.7588906471225119, "learning_rate": 3.5667912625852164e-05, "loss": 4.2308, "step": 1139 }, { "epoch": 0.7595569251270092, "learning_rate": 3.5643505776968935e-05, "loss": 4.1621, "step": 1140 }, { "epoch": 0.7602232031315066, "learning_rate": 3.5619086531559796e-05, "loss": 4.1705, "step": 1141 }, { "epoch": 0.760889481136004, "learning_rate": 3.559465491806592e-05, "loss": 4.2135, "step": 1142 }, { "epoch": 0.7615557591405013, "learning_rate": 3.557021096494288e-05, "loss": 4.2271, "step": 1143 }, { "epoch": 0.7622220371449987, "learning_rate": 3.554575470066064e-05, "loss": 4.1981, "step": 1144 }, { "epoch": 0.7628883151494962, "learning_rate": 3.552128615370348e-05, "loss": 4.1845, "step": 1145 }, { "epoch": 0.7635545931539935, "learning_rate": 3.5496805352570006e-05, "loss": 4.2063, "step": 1146 }, { "epoch": 0.7642208711584909, "learning_rate": 3.5472312325773075e-05, "loss": 4.191, "step": 1147 }, { "epoch": 0.7648871491629883, "learning_rate": 3.544780710183981e-05, "loss": 4.2667, "step": 1148 }, { "epoch": 0.7655534271674856, "learning_rate": 3.5423289709311516e-05, "loss": 4.2344, "step": 1149 }, { "epoch": 0.766219705171983, "learning_rate": 3.539876017674367e-05, "loss": 4.1621, "step": 1150 }, { "epoch": 0.7668859831764804, "learning_rate": 3.537421853270592e-05, "loss": 4.1964, "step": 1151 }, { "epoch": 0.7675522611809777, "learning_rate": 3.534966480578198e-05, "loss": 4.1933, "step": 1152 }, { "epoch": 0.7682185391854751, "learning_rate": 3.532509902456968e-05, "loss": 4.1885, "step": 1153 }, { "epoch": 0.7688848171899725, "learning_rate": 3.530052121768084e-05, "loss": 4.218, "step": 1154 }, { "epoch": 0.7695510951944698, "learning_rate": 3.5275931413741324e-05, "loss": 4.2018, "step": 1155 }, { "epoch": 0.7702173731989672, "learning_rate": 3.525132964139096e-05, "loss": 4.2346, "step": 1156 }, { "epoch": 0.7708836512034647, "learning_rate": 3.5226715929283506e-05, "loss": 4.1779, "step": 1157 }, { "epoch": 0.7715499292079621, "learning_rate": 3.520209030608662e-05, "loss": 4.1478, "step": 1158 }, { "epoch": 0.7722162072124594, "learning_rate": 3.517745280048188e-05, "loss": 4.2018, "step": 1159 }, { "epoch": 0.7728824852169568, "learning_rate": 3.515280344116464e-05, "loss": 4.1713, "step": 1160 }, { "epoch": 0.7735487632214542, "learning_rate": 3.51281422568441e-05, "loss": 4.2154, "step": 1161 }, { "epoch": 0.7742150412259515, "learning_rate": 3.5103469276243216e-05, "loss": 4.2739, "step": 1162 }, { "epoch": 0.7748813192304489, "learning_rate": 3.50787845280987e-05, "loss": 4.2029, "step": 1163 }, { "epoch": 0.7755475972349463, "learning_rate": 3.505408804116095e-05, "loss": 4.2501, "step": 1164 }, { "epoch": 0.7762138752394436, "learning_rate": 3.502937984419405e-05, "loss": 4.1968, "step": 1165 }, { "epoch": 0.776880153243941, "learning_rate": 3.500465996597571e-05, "loss": 4.1449, "step": 1166 }, { "epoch": 0.7775464312484384, "learning_rate": 3.497992843529726e-05, "loss": 4.1505, "step": 1167 }, { "epoch": 0.7782127092529358, "learning_rate": 3.495518528096359e-05, "loss": 4.2196, "step": 1168 }, { "epoch": 0.7788789872574332, "learning_rate": 3.493043053179314e-05, "loss": 4.192, "step": 1169 }, { "epoch": 0.7795452652619306, "learning_rate": 3.4905664216617836e-05, "loss": 4.2375, "step": 1170 }, { "epoch": 0.7802115432664279, "learning_rate": 3.4880886364283095e-05, "loss": 4.1485, "step": 1171 }, { "epoch": 0.7808778212709253, "learning_rate": 3.4856097003647756e-05, "loss": 4.1517, "step": 1172 }, { "epoch": 0.7815440992754227, "learning_rate": 3.4831296163584074e-05, "loss": 4.2255, "step": 1173 }, { "epoch": 0.78221037727992, "learning_rate": 3.480648387297767e-05, "loss": 4.2144, "step": 1174 }, { "epoch": 0.7828766552844174, "learning_rate": 3.4781660160727493e-05, "loss": 4.2423, "step": 1175 }, { "epoch": 0.7835429332889148, "learning_rate": 3.47568250557458e-05, "loss": 4.2127, "step": 1176 }, { "epoch": 0.7842092112934121, "learning_rate": 3.4731978586958134e-05, "loss": 4.1583, "step": 1177 }, { "epoch": 0.7848754892979095, "learning_rate": 3.470712078330324e-05, "loss": 4.1945, "step": 1178 }, { "epoch": 0.7855417673024069, "learning_rate": 3.4682251673733094e-05, "loss": 4.2334, "step": 1179 }, { "epoch": 0.7862080453069044, "learning_rate": 3.465737128721281e-05, "loss": 4.1714, "step": 1180 }, { "epoch": 0.7868743233114017, "learning_rate": 3.463247965272069e-05, "loss": 4.1948, "step": 1181 }, { "epoch": 0.7875406013158991, "learning_rate": 3.460757679924808e-05, "loss": 4.1697, "step": 1182 }, { "epoch": 0.7882068793203965, "learning_rate": 3.4582662755799414e-05, "loss": 4.1957, "step": 1183 }, { "epoch": 0.7888731573248938, "learning_rate": 3.4557737551392174e-05, "loss": 4.1802, "step": 1184 }, { "epoch": 0.7895394353293912, "learning_rate": 3.45328012150568e-05, "loss": 4.1995, "step": 1185 }, { "epoch": 0.7902057133338886, "learning_rate": 3.4507853775836745e-05, "loss": 4.2059, "step": 1186 }, { "epoch": 0.7908719913383859, "learning_rate": 3.4482895262788375e-05, "loss": 4.2229, "step": 1187 }, { "epoch": 0.7915382693428833, "learning_rate": 3.4457925704980944e-05, "loss": 4.1879, "step": 1188 }, { "epoch": 0.7922045473473807, "learning_rate": 3.4432945131496576e-05, "loss": 4.1924, "step": 1189 }, { "epoch": 0.792870825351878, "learning_rate": 3.440795357143023e-05, "loss": 4.2252, "step": 1190 }, { "epoch": 0.7935371033563754, "learning_rate": 3.438295105388966e-05, "loss": 4.2348, "step": 1191 }, { "epoch": 0.7942033813608729, "learning_rate": 3.4357937607995364e-05, "loss": 4.2074, "step": 1192 }, { "epoch": 0.7948696593653702, "learning_rate": 3.4332913262880606e-05, "loss": 4.2365, "step": 1193 }, { "epoch": 0.7955359373698676, "learning_rate": 3.430787804769131e-05, "loss": 4.2631, "step": 1194 }, { "epoch": 0.796202215374365, "learning_rate": 3.428283199158609e-05, "loss": 4.1769, "step": 1195 }, { "epoch": 0.7968684933788623, "learning_rate": 3.425777512373613e-05, "loss": 4.1932, "step": 1196 }, { "epoch": 0.7975347713833597, "learning_rate": 3.4232707473325285e-05, "loss": 4.2005, "step": 1197 }, { "epoch": 0.7982010493878571, "learning_rate": 3.420762906954992e-05, "loss": 4.1937, "step": 1198 }, { "epoch": 0.7988673273923544, "learning_rate": 3.418253994161892e-05, "loss": 4.198, "step": 1199 }, { "epoch": 0.7995336053968518, "learning_rate": 3.415744011875369e-05, "loss": 4.2279, "step": 1200 }, { "epoch": 0.8001998834013492, "learning_rate": 3.4132329630188065e-05, "loss": 4.2062, "step": 1201 }, { "epoch": 0.8008661614058465, "learning_rate": 3.4107208505168315e-05, "loss": 4.1865, "step": 1202 }, { "epoch": 0.8015324394103439, "learning_rate": 3.40820767729531e-05, "loss": 4.2269, "step": 1203 }, { "epoch": 0.8021987174148414, "learning_rate": 3.405693446281343e-05, "loss": 4.1911, "step": 1204 }, { "epoch": 0.8028649954193388, "learning_rate": 3.403178160403263e-05, "loss": 4.1989, "step": 1205 }, { "epoch": 0.8035312734238361, "learning_rate": 3.400661822590632e-05, "loss": 4.1575, "step": 1206 }, { "epoch": 0.8041975514283335, "learning_rate": 3.398144435774237e-05, "loss": 4.1985, "step": 1207 }, { "epoch": 0.8048638294328309, "learning_rate": 3.395626002886087e-05, "loss": 4.1742, "step": 1208 }, { "epoch": 0.8055301074373282, "learning_rate": 3.393106526859408e-05, "loss": 4.1822, "step": 1209 }, { "epoch": 0.8061963854418256, "learning_rate": 3.390586010628643e-05, "loss": 4.2005, "step": 1210 }, { "epoch": 0.806862663446323, "learning_rate": 3.3880644571294445e-05, "loss": 4.2298, "step": 1211 }, { "epoch": 0.8075289414508203, "learning_rate": 3.3855418692986755e-05, "loss": 4.1902, "step": 1212 }, { "epoch": 0.8081952194553177, "learning_rate": 3.383018250074401e-05, "loss": 4.2057, "step": 1213 }, { "epoch": 0.8088614974598151, "learning_rate": 3.380493602395888e-05, "loss": 4.1824, "step": 1214 }, { "epoch": 0.8095277754643125, "learning_rate": 3.3779679292036036e-05, "loss": 4.1724, "step": 1215 }, { "epoch": 0.8101940534688099, "learning_rate": 3.375441233439207e-05, "loss": 4.178, "step": 1216 }, { "epoch": 0.8108603314733073, "learning_rate": 3.372913518045548e-05, "loss": 4.1541, "step": 1217 }, { "epoch": 0.8115266094778046, "learning_rate": 3.370384785966667e-05, "loss": 4.2326, "step": 1218 }, { "epoch": 0.812192887482302, "learning_rate": 3.367855040147785e-05, "loss": 4.2239, "step": 1219 }, { "epoch": 0.8128591654867994, "learning_rate": 3.365324283535305e-05, "loss": 4.1752, "step": 1220 }, { "epoch": 0.8135254434912967, "learning_rate": 3.362792519076808e-05, "loss": 4.146, "step": 1221 }, { "epoch": 0.8141917214957941, "learning_rate": 3.3602597497210496e-05, "loss": 4.1812, "step": 1222 }, { "epoch": 0.8148579995002915, "learning_rate": 3.3577259784179514e-05, "loss": 4.161, "step": 1223 }, { "epoch": 0.8155242775047888, "learning_rate": 3.355191208118608e-05, "loss": 4.1857, "step": 1224 }, { "epoch": 0.8161905555092862, "learning_rate": 3.352655441775273e-05, "loss": 4.18, "step": 1225 }, { "epoch": 0.8168568335137836, "learning_rate": 3.3501186823413636e-05, "loss": 4.174, "step": 1226 }, { "epoch": 0.817523111518281, "learning_rate": 3.34758093277145e-05, "loss": 4.236, "step": 1227 }, { "epoch": 0.8181893895227784, "learning_rate": 3.3450421960212566e-05, "loss": 4.1865, "step": 1228 }, { "epoch": 0.8188556675272758, "learning_rate": 3.342502475047661e-05, "loss": 4.1717, "step": 1229 }, { "epoch": 0.8195219455317732, "learning_rate": 3.339961772808683e-05, "loss": 4.1692, "step": 1230 }, { "epoch": 0.8201882235362705, "learning_rate": 3.337420092263487e-05, "loss": 4.1551, "step": 1231 }, { "epoch": 0.8208545015407679, "learning_rate": 3.3348774363723764e-05, "loss": 4.2164, "step": 1232 }, { "epoch": 0.8215207795452653, "learning_rate": 3.332333808096792e-05, "loss": 4.1438, "step": 1233 }, { "epoch": 0.8221870575497626, "learning_rate": 3.329789210399304e-05, "loss": 4.1848, "step": 1234 }, { "epoch": 0.82285333555426, "learning_rate": 3.327243646243615e-05, "loss": 4.1501, "step": 1235 }, { "epoch": 0.8235196135587574, "learning_rate": 3.324697118594552e-05, "loss": 4.207, "step": 1236 }, { "epoch": 0.8241858915632547, "learning_rate": 3.322149630418062e-05, "loss": 4.1693, "step": 1237 }, { "epoch": 0.8248521695677521, "learning_rate": 3.319601184681216e-05, "loss": 4.1725, "step": 1238 }, { "epoch": 0.8255184475722496, "learning_rate": 3.3170517843521945e-05, "loss": 4.2209, "step": 1239 }, { "epoch": 0.8261847255767469, "learning_rate": 3.3145014324002944e-05, "loss": 4.1924, "step": 1240 }, { "epoch": 0.8268510035812443, "learning_rate": 3.311950131795917e-05, "loss": 4.1881, "step": 1241 }, { "epoch": 0.8275172815857417, "learning_rate": 3.309397885510571e-05, "loss": 4.1776, "step": 1242 }, { "epoch": 0.828183559590239, "learning_rate": 3.306844696516867e-05, "loss": 4.1847, "step": 1243 }, { "epoch": 0.8288498375947364, "learning_rate": 3.304290567788512e-05, "loss": 4.2368, "step": 1244 }, { "epoch": 0.8295161155992338, "learning_rate": 3.3017355023003074e-05, "loss": 4.1638, "step": 1245 }, { "epoch": 0.8301823936037311, "learning_rate": 3.2991795030281466e-05, "loss": 4.1773, "step": 1246 }, { "epoch": 0.8308486716082285, "learning_rate": 3.2966225729490115e-05, "loss": 4.2235, "step": 1247 }, { "epoch": 0.8315149496127259, "learning_rate": 3.294064715040965e-05, "loss": 4.1574, "step": 1248 }, { "epoch": 0.8321812276172232, "learning_rate": 3.291505932283154e-05, "loss": 4.1527, "step": 1249 }, { "epoch": 0.8328475056217207, "learning_rate": 3.2889462276558006e-05, "loss": 4.1635, "step": 1250 }, { "epoch": 0.8335137836262181, "learning_rate": 3.286385604140201e-05, "loss": 4.2183, "step": 1251 }, { "epoch": 0.8341800616307155, "learning_rate": 3.2838240647187215e-05, "loss": 4.1622, "step": 1252 }, { "epoch": 0.8348463396352128, "learning_rate": 3.281261612374796e-05, "loss": 4.2247, "step": 1253 }, { "epoch": 0.8355126176397102, "learning_rate": 3.278698250092922e-05, "loss": 4.1852, "step": 1254 }, { "epoch": 0.8361788956442076, "learning_rate": 3.2761339808586536e-05, "loss": 4.2069, "step": 1255 }, { "epoch": 0.8368451736487049, "learning_rate": 3.273568807658605e-05, "loss": 4.1801, "step": 1256 }, { "epoch": 0.8375114516532023, "learning_rate": 3.271002733480441e-05, "loss": 4.1809, "step": 1257 }, { "epoch": 0.8381777296576997, "learning_rate": 3.268435761312879e-05, "loss": 4.1678, "step": 1258 }, { "epoch": 0.838844007662197, "learning_rate": 3.2658678941456764e-05, "loss": 4.1984, "step": 1259 }, { "epoch": 0.8395102856666944, "learning_rate": 3.2632991349696386e-05, "loss": 4.171, "step": 1260 }, { "epoch": 0.8401765636711918, "learning_rate": 3.260729486776608e-05, "loss": 4.1957, "step": 1261 }, { "epoch": 0.8408428416756892, "learning_rate": 3.25815895255946e-05, "loss": 4.172, "step": 1262 }, { "epoch": 0.8415091196801866, "learning_rate": 3.2555875353121066e-05, "loss": 4.2088, "step": 1263 }, { "epoch": 0.842175397684684, "learning_rate": 3.253015238029485e-05, "loss": 4.1847, "step": 1264 }, { "epoch": 0.8428416756891813, "learning_rate": 3.2504420637075585e-05, "loss": 4.1772, "step": 1265 }, { "epoch": 0.8435079536936787, "learning_rate": 3.247868015343311e-05, "loss": 4.1804, "step": 1266 }, { "epoch": 0.8441742316981761, "learning_rate": 3.245293095934745e-05, "loss": 4.2242, "step": 1267 }, { "epoch": 0.8448405097026734, "learning_rate": 3.2427173084808794e-05, "loss": 4.1317, "step": 1268 }, { "epoch": 0.8455067877071708, "learning_rate": 3.240140655981739e-05, "loss": 4.1682, "step": 1269 }, { "epoch": 0.8461730657116682, "learning_rate": 3.2375631414383616e-05, "loss": 4.1477, "step": 1270 }, { "epoch": 0.8468393437161655, "learning_rate": 3.2349847678527874e-05, "loss": 4.2516, "step": 1271 }, { "epoch": 0.8475056217206629, "learning_rate": 3.2324055382280546e-05, "loss": 4.2314, "step": 1272 }, { "epoch": 0.8481718997251603, "learning_rate": 3.229825455568201e-05, "loss": 4.2002, "step": 1273 }, { "epoch": 0.8488381777296578, "learning_rate": 3.227244522878258e-05, "loss": 4.1379, "step": 1274 }, { "epoch": 0.8495044557341551, "learning_rate": 3.224662743164246e-05, "loss": 4.1655, "step": 1275 }, { "epoch": 0.8501707337386525, "learning_rate": 3.222080119433171e-05, "loss": 4.1165, "step": 1276 }, { "epoch": 0.8508370117431499, "learning_rate": 3.219496654693026e-05, "loss": 4.2215, "step": 1277 }, { "epoch": 0.8515032897476472, "learning_rate": 3.216912351952778e-05, "loss": 4.1923, "step": 1278 }, { "epoch": 0.8521695677521446, "learning_rate": 3.214327214222375e-05, "loss": 4.1695, "step": 1279 }, { "epoch": 0.852835845756642, "learning_rate": 3.211741244512733e-05, "loss": 4.2303, "step": 1280 }, { "epoch": 0.8535021237611393, "learning_rate": 3.209154445835742e-05, "loss": 4.1826, "step": 1281 }, { "epoch": 0.8541684017656367, "learning_rate": 3.206566821204254e-05, "loss": 4.1582, "step": 1282 }, { "epoch": 0.854834679770134, "learning_rate": 3.203978373632082e-05, "loss": 4.1805, "step": 1283 }, { "epoch": 0.8555009577746314, "learning_rate": 3.201389106134001e-05, "loss": 4.1363, "step": 1284 }, { "epoch": 0.8561672357791288, "learning_rate": 3.198799021725741e-05, "loss": 4.2343, "step": 1285 }, { "epoch": 0.8568335137836263, "learning_rate": 3.196208123423978e-05, "loss": 4.1874, "step": 1286 }, { "epoch": 0.8574997917881236, "learning_rate": 3.1936164142463416e-05, "loss": 4.1847, "step": 1287 }, { "epoch": 0.858166069792621, "learning_rate": 3.191023897211405e-05, "loss": 4.2139, "step": 1288 }, { "epoch": 0.8588323477971184, "learning_rate": 3.1884305753386797e-05, "loss": 4.2081, "step": 1289 }, { "epoch": 0.8594986258016157, "learning_rate": 3.185836451648616e-05, "loss": 4.1769, "step": 1290 }, { "epoch": 0.8601649038061131, "learning_rate": 3.1832415291625995e-05, "loss": 4.2019, "step": 1291 }, { "epoch": 0.8608311818106105, "learning_rate": 3.1806458109029444e-05, "loss": 4.1711, "step": 1292 }, { "epoch": 0.8614974598151078, "learning_rate": 3.1780492998928916e-05, "loss": 4.1914, "step": 1293 }, { "epoch": 0.8621637378196052, "learning_rate": 3.175451999156607e-05, "loss": 4.1314, "step": 1294 }, { "epoch": 0.8628300158241026, "learning_rate": 3.1728539117191744e-05, "loss": 4.1966, "step": 1295 }, { "epoch": 0.8634962938285999, "learning_rate": 3.170255040606595e-05, "loss": 4.1655, "step": 1296 }, { "epoch": 0.8641625718330974, "learning_rate": 3.1676553888457824e-05, "loss": 4.2147, "step": 1297 }, { "epoch": 0.8648288498375948, "learning_rate": 3.165054959464558e-05, "loss": 4.1501, "step": 1298 }, { "epoch": 0.8654951278420921, "learning_rate": 3.162453755491655e-05, "loss": 4.2317, "step": 1299 }, { "epoch": 0.8661614058465895, "learning_rate": 3.159851779956699e-05, "loss": 4.2176, "step": 1300 }, { "epoch": 0.8668276838510869, "learning_rate": 3.157249035890222e-05, "loss": 4.182, "step": 1301 }, { "epoch": 0.8674939618555843, "learning_rate": 3.154645526323647e-05, "loss": 4.1717, "step": 1302 }, { "epoch": 0.8681602398600816, "learning_rate": 3.152041254289293e-05, "loss": 4.2025, "step": 1303 }, { "epoch": 0.868826517864579, "learning_rate": 3.14943622282036e-05, "loss": 4.1741, "step": 1304 }, { "epoch": 0.8694927958690764, "learning_rate": 3.146830434950941e-05, "loss": 4.1703, "step": 1305 }, { "epoch": 0.8701590738735737, "learning_rate": 3.144223893716003e-05, "loss": 4.2098, "step": 1306 }, { "epoch": 0.8708253518780711, "learning_rate": 3.1416166021513925e-05, "loss": 4.2207, "step": 1307 }, { "epoch": 0.8714916298825685, "learning_rate": 3.139008563293832e-05, "loss": 4.1605, "step": 1308 }, { "epoch": 0.8721579078870659, "learning_rate": 3.136399780180913e-05, "loss": 4.2133, "step": 1309 }, { "epoch": 0.8728241858915633, "learning_rate": 3.133790255851093e-05, "loss": 4.1608, "step": 1310 }, { "epoch": 0.8734904638960607, "learning_rate": 3.131179993343693e-05, "loss": 4.141, "step": 1311 }, { "epoch": 0.874156741900558, "learning_rate": 3.128568995698895e-05, "loss": 4.1674, "step": 1312 }, { "epoch": 0.8748230199050554, "learning_rate": 3.125957265957737e-05, "loss": 4.195, "step": 1313 }, { "epoch": 0.8754892979095528, "learning_rate": 3.1233448071621084e-05, "loss": 4.2007, "step": 1314 }, { "epoch": 0.8761555759140501, "learning_rate": 3.1207316223547484e-05, "loss": 4.1968, "step": 1315 }, { "epoch": 0.8768218539185475, "learning_rate": 3.1181177145792425e-05, "loss": 4.1533, "step": 1316 }, { "epoch": 0.8774881319230449, "learning_rate": 3.115503086880017e-05, "loss": 4.1973, "step": 1317 }, { "epoch": 0.8781544099275422, "learning_rate": 3.112887742302337e-05, "loss": 4.1821, "step": 1318 }, { "epoch": 0.8788206879320396, "learning_rate": 3.110271683892304e-05, "loss": 4.175, "step": 1319 }, { "epoch": 0.879486965936537, "learning_rate": 3.107654914696849e-05, "loss": 4.159, "step": 1320 }, { "epoch": 0.8801532439410344, "learning_rate": 3.105037437763732e-05, "loss": 4.1871, "step": 1321 }, { "epoch": 0.8808195219455318, "learning_rate": 3.102419256141536e-05, "loss": 4.1653, "step": 1322 }, { "epoch": 0.8814857999500292, "learning_rate": 3.0998003728796674e-05, "loss": 4.1505, "step": 1323 }, { "epoch": 0.8821520779545265, "learning_rate": 3.0971807910283465e-05, "loss": 4.2243, "step": 1324 }, { "epoch": 0.8828183559590239, "learning_rate": 3.094560513638609e-05, "loss": 4.1488, "step": 1325 }, { "epoch": 0.8834846339635213, "learning_rate": 3.091939543762301e-05, "loss": 4.1625, "step": 1326 }, { "epoch": 0.8841509119680186, "learning_rate": 3.089317884452076e-05, "loss": 4.1729, "step": 1327 }, { "epoch": 0.884817189972516, "learning_rate": 3.086695538761386e-05, "loss": 4.1655, "step": 1328 }, { "epoch": 0.8854834679770134, "learning_rate": 3.084072509744488e-05, "loss": 4.2072, "step": 1329 }, { "epoch": 0.8861497459815108, "learning_rate": 3.0814488004564323e-05, "loss": 4.1859, "step": 1330 }, { "epoch": 0.8868160239860081, "learning_rate": 3.078824413953061e-05, "loss": 4.1646, "step": 1331 }, { "epoch": 0.8874823019905055, "learning_rate": 3.076199353291005e-05, "loss": 4.136, "step": 1332 }, { "epoch": 0.888148579995003, "learning_rate": 3.073573621527682e-05, "loss": 4.2069, "step": 1333 }, { "epoch": 0.8888148579995003, "learning_rate": 3.070947221721291e-05, "loss": 4.1074, "step": 1334 }, { "epoch": 0.8894811360039977, "learning_rate": 3.0683201569308077e-05, "loss": 4.2027, "step": 1335 }, { "epoch": 0.8901474140084951, "learning_rate": 3.065692430215982e-05, "loss": 4.1333, "step": 1336 }, { "epoch": 0.8908136920129924, "learning_rate": 3.063064044637337e-05, "loss": 4.176, "step": 1337 }, { "epoch": 0.8914799700174898, "learning_rate": 3.060435003256161e-05, "loss": 4.1859, "step": 1338 }, { "epoch": 0.8921462480219872, "learning_rate": 3.0578053091345086e-05, "loss": 4.202, "step": 1339 }, { "epoch": 0.8928125260264845, "learning_rate": 3.055174965335192e-05, "loss": 4.199, "step": 1340 }, { "epoch": 0.8934788040309819, "learning_rate": 3.0525439749217824e-05, "loss": 4.1932, "step": 1341 }, { "epoch": 0.8941450820354793, "learning_rate": 3.0499123409586004e-05, "loss": 4.1309, "step": 1342 }, { "epoch": 0.8948113600399766, "learning_rate": 3.0472800665107205e-05, "loss": 4.2043, "step": 1343 }, { "epoch": 0.8954776380444741, "learning_rate": 3.044647154643962e-05, "loss": 4.1564, "step": 1344 }, { "epoch": 0.8961439160489715, "learning_rate": 3.0420136084248847e-05, "loss": 4.2096, "step": 1345 }, { "epoch": 0.8968101940534688, "learning_rate": 3.0393794309207884e-05, "loss": 4.1411, "step": 1346 }, { "epoch": 0.8974764720579662, "learning_rate": 3.036744625199709e-05, "loss": 4.1767, "step": 1347 }, { "epoch": 0.8981427500624636, "learning_rate": 3.0341091943304136e-05, "loss": 4.1549, "step": 1348 }, { "epoch": 0.898809028066961, "learning_rate": 3.031473141382396e-05, "loss": 4.1656, "step": 1349 }, { "epoch": 0.8994753060714583, "learning_rate": 3.0288364694258752e-05, "loss": 4.2039, "step": 1350 }, { "epoch": 0.9001415840759557, "learning_rate": 3.026199181531794e-05, "loss": 4.1848, "step": 1351 }, { "epoch": 0.900807862080453, "learning_rate": 3.0235612807718084e-05, "loss": 4.1699, "step": 1352 }, { "epoch": 0.9014741400849504, "learning_rate": 3.0209227702182903e-05, "loss": 4.2408, "step": 1353 }, { "epoch": 0.9021404180894478, "learning_rate": 3.0182836529443216e-05, "loss": 4.1646, "step": 1354 }, { "epoch": 0.9028066960939451, "learning_rate": 3.015643932023691e-05, "loss": 4.1557, "step": 1355 }, { "epoch": 0.9034729740984426, "learning_rate": 3.0130036105308902e-05, "loss": 4.2057, "step": 1356 }, { "epoch": 0.90413925210294, "learning_rate": 3.0103626915411093e-05, "loss": 4.1323, "step": 1357 }, { "epoch": 0.9048055301074374, "learning_rate": 3.007721178130237e-05, "loss": 4.143, "step": 1358 }, { "epoch": 0.9054718081119347, "learning_rate": 3.0050790733748514e-05, "loss": 4.1681, "step": 1359 }, { "epoch": 0.9061380861164321, "learning_rate": 3.00243638035222e-05, "loss": 4.2, "step": 1360 }, { "epoch": 0.9068043641209295, "learning_rate": 2.9997931021402975e-05, "loss": 4.152, "step": 1361 }, { "epoch": 0.9074706421254268, "learning_rate": 2.997149241817718e-05, "loss": 4.1748, "step": 1362 }, { "epoch": 0.9081369201299242, "learning_rate": 2.9945048024637935e-05, "loss": 4.1845, "step": 1363 }, { "epoch": 0.9088031981344216, "learning_rate": 2.991859787158512e-05, "loss": 4.197, "step": 1364 }, { "epoch": 0.9094694761389189, "learning_rate": 2.989214198982531e-05, "loss": 4.1846, "step": 1365 }, { "epoch": 0.9101357541434163, "learning_rate": 2.9865680410171764e-05, "loss": 4.1566, "step": 1366 }, { "epoch": 0.9108020321479137, "learning_rate": 2.9839213163444358e-05, "loss": 4.1297, "step": 1367 }, { "epoch": 0.9114683101524111, "learning_rate": 2.9812740280469596e-05, "loss": 4.1658, "step": 1368 }, { "epoch": 0.9121345881569085, "learning_rate": 2.9786261792080522e-05, "loss": 4.1426, "step": 1369 }, { "epoch": 0.9128008661614059, "learning_rate": 2.975977772911671e-05, "loss": 4.1999, "step": 1370 }, { "epoch": 0.9134671441659032, "learning_rate": 2.9733288122424246e-05, "loss": 4.155, "step": 1371 }, { "epoch": 0.9141334221704006, "learning_rate": 2.970679300285567e-05, "loss": 4.1734, "step": 1372 }, { "epoch": 0.914799700174898, "learning_rate": 2.9680292401269915e-05, "loss": 4.2032, "step": 1373 }, { "epoch": 0.9154659781793953, "learning_rate": 2.965378634853232e-05, "loss": 4.1747, "step": 1374 }, { "epoch": 0.9161322561838927, "learning_rate": 2.9627274875514588e-05, "loss": 4.1845, "step": 1375 }, { "epoch": 0.9167985341883901, "learning_rate": 2.9600758013094704e-05, "loss": 4.1636, "step": 1376 }, { "epoch": 0.9174648121928874, "learning_rate": 2.957423579215695e-05, "loss": 4.2149, "step": 1377 }, { "epoch": 0.9181310901973848, "learning_rate": 2.9547708243591836e-05, "loss": 4.2194, "step": 1378 }, { "epoch": 0.9187973682018822, "learning_rate": 2.952117539829609e-05, "loss": 4.1763, "step": 1379 }, { "epoch": 0.9194636462063797, "learning_rate": 2.949463728717261e-05, "loss": 4.245, "step": 1380 }, { "epoch": 0.920129924210877, "learning_rate": 2.9468093941130404e-05, "loss": 4.1469, "step": 1381 }, { "epoch": 0.9207962022153744, "learning_rate": 2.9441545391084608e-05, "loss": 4.174, "step": 1382 }, { "epoch": 0.9214624802198718, "learning_rate": 2.9414991667956403e-05, "loss": 4.1846, "step": 1383 }, { "epoch": 0.9221287582243691, "learning_rate": 2.9388432802672984e-05, "loss": 4.2096, "step": 1384 }, { "epoch": 0.9227950362288665, "learning_rate": 2.936186882616756e-05, "loss": 4.2139, "step": 1385 }, { "epoch": 0.9234613142333639, "learning_rate": 2.933529976937929e-05, "loss": 4.1711, "step": 1386 }, { "epoch": 0.9241275922378612, "learning_rate": 2.9308725663253223e-05, "loss": 4.2087, "step": 1387 }, { "epoch": 0.9247938702423586, "learning_rate": 2.928214653874031e-05, "loss": 4.2039, "step": 1388 }, { "epoch": 0.925460148246856, "learning_rate": 2.9255562426797362e-05, "loss": 4.1971, "step": 1389 }, { "epoch": 0.9261264262513533, "learning_rate": 2.922897335838696e-05, "loss": 4.1804, "step": 1390 }, { "epoch": 0.9267927042558508, "learning_rate": 2.920237936447749e-05, "loss": 4.1864, "step": 1391 }, { "epoch": 0.9274589822603482, "learning_rate": 2.917578047604305e-05, "loss": 4.1159, "step": 1392 }, { "epoch": 0.9281252602648455, "learning_rate": 2.9149176724063472e-05, "loss": 4.1594, "step": 1393 }, { "epoch": 0.9287915382693429, "learning_rate": 2.912256813952422e-05, "loss": 4.2039, "step": 1394 }, { "epoch": 0.9294578162738403, "learning_rate": 2.9095954753416397e-05, "loss": 4.1591, "step": 1395 }, { "epoch": 0.9301240942783376, "learning_rate": 2.9069336596736712e-05, "loss": 4.1827, "step": 1396 }, { "epoch": 0.930790372282835, "learning_rate": 2.9042713700487412e-05, "loss": 4.1792, "step": 1397 }, { "epoch": 0.9314566502873324, "learning_rate": 2.9016086095676264e-05, "loss": 4.0958, "step": 1398 }, { "epoch": 0.9321229282918297, "learning_rate": 2.8989453813316535e-05, "loss": 4.1769, "step": 1399 }, { "epoch": 0.9327892062963271, "learning_rate": 2.8962816884426945e-05, "loss": 4.1237, "step": 1400 }, { "epoch": 0.9334554843008245, "learning_rate": 2.8936175340031586e-05, "loss": 4.1924, "step": 1401 }, { "epoch": 0.9341217623053218, "learning_rate": 2.8909529211159958e-05, "loss": 4.1995, "step": 1402 }, { "epoch": 0.9347880403098193, "learning_rate": 2.888287852884691e-05, "loss": 4.2064, "step": 1403 }, { "epoch": 0.9354543183143167, "learning_rate": 2.885622332413256e-05, "loss": 4.1767, "step": 1404 }, { "epoch": 0.9361205963188141, "learning_rate": 2.882956362806232e-05, "loss": 4.19, "step": 1405 }, { "epoch": 0.9367868743233114, "learning_rate": 2.880289947168683e-05, "loss": 4.2011, "step": 1406 }, { "epoch": 0.9374531523278088, "learning_rate": 2.877623088606191e-05, "loss": 4.2116, "step": 1407 }, { "epoch": 0.9381194303323062, "learning_rate": 2.8749557902248558e-05, "loss": 4.1801, "step": 1408 }, { "epoch": 0.9387857083368035, "learning_rate": 2.8722880551312876e-05, "loss": 4.1376, "step": 1409 }, { "epoch": 0.9394519863413009, "learning_rate": 2.869619886432607e-05, "loss": 4.1801, "step": 1410 }, { "epoch": 0.9401182643457983, "learning_rate": 2.8669512872364386e-05, "loss": 4.2048, "step": 1411 }, { "epoch": 0.9407845423502956, "learning_rate": 2.8642822606509075e-05, "loss": 4.1797, "step": 1412 }, { "epoch": 0.941450820354793, "learning_rate": 2.861612809784639e-05, "loss": 4.1792, "step": 1413 }, { "epoch": 0.9421170983592904, "learning_rate": 2.8589429377467514e-05, "loss": 4.1836, "step": 1414 }, { "epoch": 0.9427833763637878, "learning_rate": 2.856272647646852e-05, "loss": 4.1556, "step": 1415 }, { "epoch": 0.9434496543682852, "learning_rate": 2.8536019425950373e-05, "loss": 4.1608, "step": 1416 }, { "epoch": 0.9441159323727826, "learning_rate": 2.8509308257018863e-05, "loss": 4.1406, "step": 1417 }, { "epoch": 0.9447822103772799, "learning_rate": 2.848259300078458e-05, "loss": 4.1548, "step": 1418 }, { "epoch": 0.9454484883817773, "learning_rate": 2.8455873688362862e-05, "loss": 4.2307, "step": 1419 }, { "epoch": 0.9461147663862747, "learning_rate": 2.8429150350873768e-05, "loss": 4.213, "step": 1420 }, { "epoch": 0.946781044390772, "learning_rate": 2.840242301944208e-05, "loss": 4.1926, "step": 1421 }, { "epoch": 0.9474473223952694, "learning_rate": 2.83756917251972e-05, "loss": 4.1769, "step": 1422 }, { "epoch": 0.9481136003997668, "learning_rate": 2.8348956499273144e-05, "loss": 4.1715, "step": 1423 }, { "epoch": 0.9487798784042641, "learning_rate": 2.832221737280853e-05, "loss": 4.1705, "step": 1424 }, { "epoch": 0.9494461564087615, "learning_rate": 2.8295474376946496e-05, "loss": 4.1361, "step": 1425 }, { "epoch": 0.9501124344132589, "learning_rate": 2.82687275428347e-05, "loss": 4.1744, "step": 1426 }, { "epoch": 0.9507787124177564, "learning_rate": 2.824197690162526e-05, "loss": 4.1061, "step": 1427 }, { "epoch": 0.9514449904222537, "learning_rate": 2.8215222484474762e-05, "loss": 4.2072, "step": 1428 }, { "epoch": 0.9521112684267511, "learning_rate": 2.8188464322544127e-05, "loss": 4.1508, "step": 1429 }, { "epoch": 0.9527775464312485, "learning_rate": 2.8161702446998694e-05, "loss": 4.153, "step": 1430 }, { "epoch": 0.9534438244357458, "learning_rate": 2.813493688900811e-05, "loss": 4.1635, "step": 1431 }, { "epoch": 0.9541101024402432, "learning_rate": 2.8108167679746294e-05, "loss": 4.1986, "step": 1432 }, { "epoch": 0.9547763804447406, "learning_rate": 2.8081394850391442e-05, "loss": 4.1771, "step": 1433 }, { "epoch": 0.9554426584492379, "learning_rate": 2.8054618432125947e-05, "loss": 4.208, "step": 1434 }, { "epoch": 0.9561089364537353, "learning_rate": 2.8027838456136397e-05, "loss": 4.135, "step": 1435 }, { "epoch": 0.9567752144582327, "learning_rate": 2.8001054953613513e-05, "loss": 4.2017, "step": 1436 }, { "epoch": 0.95744149246273, "learning_rate": 2.797426795575213e-05, "loss": 4.1272, "step": 1437 }, { "epoch": 0.9581077704672275, "learning_rate": 2.794747749375116e-05, "loss": 4.1857, "step": 1438 }, { "epoch": 0.9587740484717249, "learning_rate": 2.7920683598813536e-05, "loss": 4.1879, "step": 1439 }, { "epoch": 0.9594403264762222, "learning_rate": 2.7893886302146182e-05, "loss": 4.2102, "step": 1440 }, { "epoch": 0.9601066044807196, "learning_rate": 2.7867085634960016e-05, "loss": 4.1789, "step": 1441 }, { "epoch": 0.960772882485217, "learning_rate": 2.784028162846985e-05, "loss": 4.1838, "step": 1442 }, { "epoch": 0.9614391604897143, "learning_rate": 2.78134743138944e-05, "loss": 4.1577, "step": 1443 }, { "epoch": 0.9621054384942117, "learning_rate": 2.7786663722456235e-05, "loss": 4.19, "step": 1444 }, { "epoch": 0.9627717164987091, "learning_rate": 2.775984988538175e-05, "loss": 4.1857, "step": 1445 }, { "epoch": 0.9634379945032064, "learning_rate": 2.7733032833901085e-05, "loss": 4.1385, "step": 1446 }, { "epoch": 0.9641042725077038, "learning_rate": 2.7706212599248165e-05, "loss": 4.1667, "step": 1447 }, { "epoch": 0.9647705505122012, "learning_rate": 2.767938921266059e-05, "loss": 4.1302, "step": 1448 }, { "epoch": 0.9654368285166985, "learning_rate": 2.7652562705379663e-05, "loss": 4.1594, "step": 1449 }, { "epoch": 0.966103106521196, "learning_rate": 2.7625733108650298e-05, "loss": 4.1545, "step": 1450 }, { "epoch": 0.9667693845256934, "learning_rate": 2.7598900453721005e-05, "loss": 4.1727, "step": 1451 }, { "epoch": 0.9674356625301908, "learning_rate": 2.757206477184388e-05, "loss": 4.1644, "step": 1452 }, { "epoch": 0.9681019405346881, "learning_rate": 2.754522609427452e-05, "loss": 4.1677, "step": 1453 }, { "epoch": 0.9687682185391855, "learning_rate": 2.7518384452272013e-05, "loss": 4.1916, "step": 1454 }, { "epoch": 0.9694344965436829, "learning_rate": 2.7491539877098927e-05, "loss": 4.1815, "step": 1455 }, { "epoch": 0.9701007745481802, "learning_rate": 2.746469240002121e-05, "loss": 4.2179, "step": 1456 }, { "epoch": 0.9707670525526776, "learning_rate": 2.7437842052308206e-05, "loss": 4.1488, "step": 1457 }, { "epoch": 0.971433330557175, "learning_rate": 2.7410988865232612e-05, "loss": 4.1159, "step": 1458 }, { "epoch": 0.9720996085616723, "learning_rate": 2.7384132870070422e-05, "loss": 4.1715, "step": 1459 }, { "epoch": 0.9727658865661697, "learning_rate": 2.7357274098100895e-05, "loss": 4.1435, "step": 1460 }, { "epoch": 0.9734321645706671, "learning_rate": 2.7330412580606534e-05, "loss": 4.1653, "step": 1461 }, { "epoch": 0.9740984425751645, "learning_rate": 2.7303548348873032e-05, "loss": 4.1381, "step": 1462 }, { "epoch": 0.9747647205796619, "learning_rate": 2.7276681434189255e-05, "loss": 4.1792, "step": 1463 }, { "epoch": 0.9754309985841593, "learning_rate": 2.724981186784718e-05, "loss": 4.102, "step": 1464 }, { "epoch": 0.9760972765886566, "learning_rate": 2.722293968114188e-05, "loss": 4.1529, "step": 1465 }, { "epoch": 0.976763554593154, "learning_rate": 2.7196064905371478e-05, "loss": 4.198, "step": 1466 }, { "epoch": 0.9774298325976514, "learning_rate": 2.7169187571837118e-05, "loss": 4.1859, "step": 1467 }, { "epoch": 0.9780961106021487, "learning_rate": 2.7142307711842906e-05, "loss": 4.1567, "step": 1468 }, { "epoch": 0.9787623886066461, "learning_rate": 2.7115425356695918e-05, "loss": 4.1669, "step": 1469 }, { "epoch": 0.9794286666111435, "learning_rate": 2.708854053770611e-05, "loss": 4.1741, "step": 1470 }, { "epoch": 0.9800949446156408, "learning_rate": 2.7061653286186317e-05, "loss": 4.1972, "step": 1471 }, { "epoch": 0.9807612226201382, "learning_rate": 2.7034763633452227e-05, "loss": 4.1864, "step": 1472 }, { "epoch": 0.9814275006246357, "learning_rate": 2.7007871610822282e-05, "loss": 4.1316, "step": 1473 }, { "epoch": 0.982093778629133, "learning_rate": 2.6980977249617718e-05, "loss": 4.2117, "step": 1474 }, { "epoch": 0.9827600566336304, "learning_rate": 2.6954080581162494e-05, "loss": 4.151, "step": 1475 }, { "epoch": 0.9834263346381278, "learning_rate": 2.6927181636783232e-05, "loss": 4.183, "step": 1476 }, { "epoch": 0.9840926126426252, "learning_rate": 2.6900280447809235e-05, "loss": 4.1645, "step": 1477 }, { "epoch": 0.9847588906471225, "learning_rate": 2.6873377045572395e-05, "loss": 4.142, "step": 1478 }, { "epoch": 0.9854251686516199, "learning_rate": 2.6846471461407197e-05, "loss": 4.1316, "step": 1479 }, { "epoch": 0.9860914466561173, "learning_rate": 2.6819563726650665e-05, "loss": 4.2257, "step": 1480 }, { "epoch": 0.9867577246606146, "learning_rate": 2.679265387264232e-05, "loss": 4.1344, "step": 1481 }, { "epoch": 0.987424002665112, "learning_rate": 2.676574193072416e-05, "loss": 4.2045, "step": 1482 }, { "epoch": 0.9880902806696094, "learning_rate": 2.6738827932240612e-05, "loss": 4.2043, "step": 1483 }, { "epoch": 0.9887565586741067, "learning_rate": 2.6711911908538494e-05, "loss": 4.1548, "step": 1484 }, { "epoch": 0.9894228366786042, "learning_rate": 2.6684993890966985e-05, "loss": 4.1902, "step": 1485 }, { "epoch": 0.9900891146831016, "learning_rate": 2.6658073910877603e-05, "loss": 4.1492, "step": 1486 }, { "epoch": 0.9907553926875989, "learning_rate": 2.663115199962412e-05, "loss": 4.1645, "step": 1487 }, { "epoch": 0.9914216706920963, "learning_rate": 2.660422818856258e-05, "loss": 4.1998, "step": 1488 }, { "epoch": 0.9920879486965937, "learning_rate": 2.6577302509051232e-05, "loss": 4.2077, "step": 1489 }, { "epoch": 0.992754226701091, "learning_rate": 2.6550374992450504e-05, "loss": 4.1839, "step": 1490 }, { "epoch": 0.9934205047055884, "learning_rate": 2.6523445670122965e-05, "loss": 4.185, "step": 1491 }, { "epoch": 0.9940867827100858, "learning_rate": 2.6496514573433283e-05, "loss": 4.1437, "step": 1492 }, { "epoch": 0.9947530607145831, "learning_rate": 2.6469581733748196e-05, "loss": 4.1607, "step": 1493 }, { "epoch": 0.9954193387190805, "learning_rate": 2.6442647182436465e-05, "loss": 4.1835, "step": 1494 }, { "epoch": 0.9960856167235779, "learning_rate": 2.641571095086885e-05, "loss": 4.1136, "step": 1495 }, { "epoch": 0.9967518947280752, "learning_rate": 2.638877307041807e-05, "loss": 4.1866, "step": 1496 }, { "epoch": 0.9974181727325727, "learning_rate": 2.6361833572458767e-05, "loss": 4.1814, "step": 1497 }, { "epoch": 0.9980844507370701, "learning_rate": 2.6334892488367456e-05, "loss": 4.1532, "step": 1498 }, { "epoch": 0.9987507287415675, "learning_rate": 2.6307949849522506e-05, "loss": 4.1098, "step": 1499 }, { "epoch": 0.9994170067460648, "learning_rate": 2.62810056873041e-05, "loss": 4.2455, "step": 1500 }, { "epoch": 1.0, "learning_rate": 2.625406003309419e-05, "loss": 4.1771, "step": 1501 }, { "epoch": 1.0006662780044975, "learning_rate": 2.622711291827646e-05, "loss": 4.1266, "step": 1502 }, { "epoch": 1.0013325560089947, "learning_rate": 2.620016437423632e-05, "loss": 4.147, "step": 1503 }, { "epoch": 1.0019988340134922, "learning_rate": 2.6173214432360806e-05, "loss": 4.1251, "step": 1504 }, { "epoch": 1.0026651120179895, "learning_rate": 2.6146263124038617e-05, "loss": 4.2098, "step": 1505 }, { "epoch": 1.003331390022487, "learning_rate": 2.611931048066003e-05, "loss": 4.1448, "step": 1506 }, { "epoch": 1.0039976680269842, "learning_rate": 2.609235653361687e-05, "loss": 4.1709, "step": 1507 }, { "epoch": 1.0046639460314817, "learning_rate": 2.6065401314302494e-05, "loss": 4.129, "step": 1508 }, { "epoch": 1.005330224035979, "learning_rate": 2.6038444854111732e-05, "loss": 4.1714, "step": 1509 }, { "epoch": 1.0059965020404764, "learning_rate": 2.6011487184440853e-05, "loss": 4.1849, "step": 1510 }, { "epoch": 1.0066627800449737, "learning_rate": 2.598452833668755e-05, "loss": 4.1568, "step": 1511 }, { "epoch": 1.0073290580494711, "learning_rate": 2.595756834225089e-05, "loss": 4.2149, "step": 1512 }, { "epoch": 1.0079953360539686, "learning_rate": 2.5930607232531247e-05, "loss": 4.1465, "step": 1513 }, { "epoch": 1.0086616140584659, "learning_rate": 2.5903645038930326e-05, "loss": 4.1686, "step": 1514 }, { "epoch": 1.0093278920629634, "learning_rate": 2.5876681792851083e-05, "loss": 4.2141, "step": 1515 }, { "epoch": 1.0099941700674606, "learning_rate": 2.5849717525697687e-05, "loss": 4.1441, "step": 1516 }, { "epoch": 1.010660448071958, "learning_rate": 2.5822752268875517e-05, "loss": 4.1267, "step": 1517 }, { "epoch": 1.0113267260764554, "learning_rate": 2.5795786053791098e-05, "loss": 4.2115, "step": 1518 }, { "epoch": 1.0119930040809528, "learning_rate": 2.576881891185206e-05, "loss": 4.1316, "step": 1519 }, { "epoch": 1.01265928208545, "learning_rate": 2.574185087446714e-05, "loss": 4.1417, "step": 1520 }, { "epoch": 1.0133255600899476, "learning_rate": 2.5714881973046067e-05, "loss": 4.1786, "step": 1521 }, { "epoch": 1.0139918380944448, "learning_rate": 2.568791223899964e-05, "loss": 4.2103, "step": 1522 }, { "epoch": 1.0146581160989423, "learning_rate": 2.5660941703739576e-05, "loss": 4.1982, "step": 1523 }, { "epoch": 1.0153243941034396, "learning_rate": 2.5633970398678557e-05, "loss": 4.115, "step": 1524 }, { "epoch": 1.015990672107937, "learning_rate": 2.5606998355230143e-05, "loss": 4.1682, "step": 1525 }, { "epoch": 1.0166569501124345, "learning_rate": 2.5580025604808772e-05, "loss": 4.1832, "step": 1526 }, { "epoch": 1.0173232281169318, "learning_rate": 2.555305217882967e-05, "loss": 4.1563, "step": 1527 }, { "epoch": 1.0179895061214292, "learning_rate": 2.5526078108708906e-05, "loss": 4.1292, "step": 1528 }, { "epoch": 1.0186557841259265, "learning_rate": 2.5499103425863246e-05, "loss": 4.1805, "step": 1529 }, { "epoch": 1.019322062130424, "learning_rate": 2.5472128161710197e-05, "loss": 4.176, "step": 1530 }, { "epoch": 1.0199883401349212, "learning_rate": 2.544515234766794e-05, "loss": 4.1919, "step": 1531 }, { "epoch": 1.0206546181394187, "learning_rate": 2.5418176015155287e-05, "loss": 4.1638, "step": 1532 }, { "epoch": 1.021320896143916, "learning_rate": 2.5391199195591663e-05, "loss": 4.1805, "step": 1533 }, { "epoch": 1.0219871741484134, "learning_rate": 2.5364221920397064e-05, "loss": 4.1425, "step": 1534 }, { "epoch": 1.0226534521529107, "learning_rate": 2.5337244220991996e-05, "loss": 4.2028, "step": 1535 }, { "epoch": 1.0233197301574082, "learning_rate": 2.5310266128797493e-05, "loss": 4.1684, "step": 1536 }, { "epoch": 1.0239860081619057, "learning_rate": 2.5283287675235018e-05, "loss": 4.141, "step": 1537 }, { "epoch": 1.024652286166403, "learning_rate": 2.525630889172646e-05, "loss": 4.2051, "step": 1538 }, { "epoch": 1.0253185641709004, "learning_rate": 2.5229329809694097e-05, "loss": 4.202, "step": 1539 }, { "epoch": 1.0259848421753976, "learning_rate": 2.5202350460560563e-05, "loss": 4.1491, "step": 1540 }, { "epoch": 1.0266511201798951, "learning_rate": 2.5175370875748778e-05, "loss": 4.1997, "step": 1541 }, { "epoch": 1.0273173981843924, "learning_rate": 2.514839108668197e-05, "loss": 4.2229, "step": 1542 }, { "epoch": 1.0279836761888899, "learning_rate": 2.512141112478358e-05, "loss": 4.174, "step": 1543 }, { "epoch": 1.0286499541933871, "learning_rate": 2.5094431021477245e-05, "loss": 4.1598, "step": 1544 }, { "epoch": 1.0293162321978846, "learning_rate": 2.50674508081868e-05, "loss": 4.2215, "step": 1545 }, { "epoch": 1.0299825102023819, "learning_rate": 2.504047051633618e-05, "loss": 4.1654, "step": 1546 }, { "epoch": 1.0306487882068793, "learning_rate": 2.5013490177349414e-05, "loss": 4.2187, "step": 1547 }, { "epoch": 1.0313150662113766, "learning_rate": 2.498650982265059e-05, "loss": 4.1835, "step": 1548 }, { "epoch": 1.031981344215874, "learning_rate": 2.4959529483663826e-05, "loss": 4.2273, "step": 1549 }, { "epoch": 1.0326476222203715, "learning_rate": 2.493254919181321e-05, "loss": 4.1518, "step": 1550 }, { "epoch": 1.0333139002248688, "learning_rate": 2.4905568978522754e-05, "loss": 4.2227, "step": 1551 }, { "epoch": 1.0339801782293663, "learning_rate": 2.487858887521643e-05, "loss": 4.1928, "step": 1552 }, { "epoch": 1.0346464562338635, "learning_rate": 2.4851608913318035e-05, "loss": 4.168, "step": 1553 }, { "epoch": 1.035312734238361, "learning_rate": 2.482462912425122e-05, "loss": 4.1058, "step": 1554 }, { "epoch": 1.0359790122428583, "learning_rate": 2.4797649539439443e-05, "loss": 4.2087, "step": 1555 }, { "epoch": 1.0366452902473557, "learning_rate": 2.4770670190305905e-05, "loss": 4.1917, "step": 1556 }, { "epoch": 1.037311568251853, "learning_rate": 2.4743691108273542e-05, "loss": 4.1941, "step": 1557 }, { "epoch": 1.0379778462563505, "learning_rate": 2.4716712324764988e-05, "loss": 4.1881, "step": 1558 }, { "epoch": 1.0386441242608477, "learning_rate": 2.4689733871202513e-05, "loss": 4.1521, "step": 1559 }, { "epoch": 1.0393104022653452, "learning_rate": 2.4662755779008e-05, "loss": 4.1559, "step": 1560 }, { "epoch": 1.0399766802698427, "learning_rate": 2.4635778079602942e-05, "loss": 4.1822, "step": 1561 }, { "epoch": 1.04064295827434, "learning_rate": 2.4608800804408342e-05, "loss": 4.1995, "step": 1562 }, { "epoch": 1.0413092362788374, "learning_rate": 2.4581823984844722e-05, "loss": 4.2087, "step": 1563 }, { "epoch": 1.0419755142833347, "learning_rate": 2.455484765233207e-05, "loss": 4.0842, "step": 1564 }, { "epoch": 1.0426417922878322, "learning_rate": 2.4527871838289812e-05, "loss": 4.1475, "step": 1565 }, { "epoch": 1.0433080702923294, "learning_rate": 2.4500896574136764e-05, "loss": 4.1403, "step": 1566 }, { "epoch": 1.043974348296827, "learning_rate": 2.44739218912911e-05, "loss": 4.149, "step": 1567 }, { "epoch": 1.0446406263013241, "learning_rate": 2.444694782117033e-05, "loss": 4.1652, "step": 1568 }, { "epoch": 1.0453069043058216, "learning_rate": 2.4419974395191243e-05, "loss": 4.2032, "step": 1569 }, { "epoch": 1.0459731823103189, "learning_rate": 2.4393001644769863e-05, "loss": 4.1557, "step": 1570 }, { "epoch": 1.0466394603148164, "learning_rate": 2.436602960132145e-05, "loss": 4.1891, "step": 1571 }, { "epoch": 1.0473057383193138, "learning_rate": 2.433905829626043e-05, "loss": 4.17, "step": 1572 }, { "epoch": 1.047972016323811, "learning_rate": 2.431208776100036e-05, "loss": 4.1507, "step": 1573 }, { "epoch": 1.0486382943283086, "learning_rate": 2.428511802695394e-05, "loss": 4.196, "step": 1574 }, { "epoch": 1.0493045723328058, "learning_rate": 2.4258149125532876e-05, "loss": 4.1504, "step": 1575 }, { "epoch": 1.0499708503373033, "learning_rate": 2.4231181088147935e-05, "loss": 4.123, "step": 1576 }, { "epoch": 1.0506371283418006, "learning_rate": 2.4204213946208904e-05, "loss": 4.1012, "step": 1577 }, { "epoch": 1.051303406346298, "learning_rate": 2.4177247731124493e-05, "loss": 4.1361, "step": 1578 }, { "epoch": 1.0519696843507953, "learning_rate": 2.4150282474302315e-05, "loss": 4.1387, "step": 1579 }, { "epoch": 1.0526359623552928, "learning_rate": 2.4123318207148927e-05, "loss": 4.196, "step": 1580 }, { "epoch": 1.05330224035979, "learning_rate": 2.4096354961069687e-05, "loss": 4.1385, "step": 1581 }, { "epoch": 1.0539685183642875, "learning_rate": 2.4069392767468756e-05, "loss": 4.1504, "step": 1582 }, { "epoch": 1.0546347963687848, "learning_rate": 2.4042431657749117e-05, "loss": 4.1817, "step": 1583 }, { "epoch": 1.0553010743732822, "learning_rate": 2.401547166331245e-05, "loss": 4.1837, "step": 1584 }, { "epoch": 1.0559673523777797, "learning_rate": 2.3988512815559146e-05, "loss": 4.1516, "step": 1585 }, { "epoch": 1.056633630382277, "learning_rate": 2.3961555145888274e-05, "loss": 4.1185, "step": 1586 }, { "epoch": 1.0572999083867745, "learning_rate": 2.393459868569751e-05, "loss": 4.1925, "step": 1587 }, { "epoch": 1.0579661863912717, "learning_rate": 2.390764346638313e-05, "loss": 4.1807, "step": 1588 }, { "epoch": 1.0586324643957692, "learning_rate": 2.3880689519339972e-05, "loss": 4.1997, "step": 1589 }, { "epoch": 1.0592987424002664, "learning_rate": 2.3853736875961386e-05, "loss": 4.1518, "step": 1590 }, { "epoch": 1.059965020404764, "learning_rate": 2.38267855676392e-05, "loss": 4.2105, "step": 1591 }, { "epoch": 1.0606312984092612, "learning_rate": 2.379983562576369e-05, "loss": 4.158, "step": 1592 }, { "epoch": 1.0612975764137587, "learning_rate": 2.3772887081723546e-05, "loss": 4.1829, "step": 1593 }, { "epoch": 1.061963854418256, "learning_rate": 2.374593996690582e-05, "loss": 4.2276, "step": 1594 }, { "epoch": 1.0626301324227534, "learning_rate": 2.371899431269591e-05, "loss": 4.1852, "step": 1595 }, { "epoch": 1.0632964104272509, "learning_rate": 2.3692050150477503e-05, "loss": 4.1746, "step": 1596 }, { "epoch": 1.0639626884317481, "learning_rate": 2.3665107511632556e-05, "loss": 4.1614, "step": 1597 }, { "epoch": 1.0646289664362456, "learning_rate": 2.363816642754124e-05, "loss": 4.1863, "step": 1598 }, { "epoch": 1.0652952444407429, "learning_rate": 2.3611226929581935e-05, "loss": 4.1471, "step": 1599 }, { "epoch": 1.0659615224452403, "learning_rate": 2.358428904913116e-05, "loss": 4.1799, "step": 1600 }, { "epoch": 1.0666278004497376, "learning_rate": 2.3557352817563544e-05, "loss": 4.114, "step": 1601 }, { "epoch": 1.067294078454235, "learning_rate": 2.3530418266251814e-05, "loss": 4.1329, "step": 1602 }, { "epoch": 1.0679603564587323, "learning_rate": 2.3503485426566722e-05, "loss": 4.182, "step": 1603 }, { "epoch": 1.0686266344632298, "learning_rate": 2.347655432987703e-05, "loss": 4.1453, "step": 1604 }, { "epoch": 1.069292912467727, "learning_rate": 2.34496250075495e-05, "loss": 4.1532, "step": 1605 }, { "epoch": 1.0699591904722245, "learning_rate": 2.3422697490948777e-05, "loss": 4.1546, "step": 1606 }, { "epoch": 1.070625468476722, "learning_rate": 2.3395771811437422e-05, "loss": 4.1732, "step": 1607 }, { "epoch": 1.0712917464812193, "learning_rate": 2.3368848000375883e-05, "loss": 4.1819, "step": 1608 }, { "epoch": 1.0719580244857168, "learning_rate": 2.334192608912241e-05, "loss": 4.1984, "step": 1609 }, { "epoch": 1.072624302490214, "learning_rate": 2.331500610903301e-05, "loss": 4.1557, "step": 1610 }, { "epoch": 1.0732905804947115, "learning_rate": 2.3288088091461512e-05, "loss": 4.1639, "step": 1611 }, { "epoch": 1.0739568584992087, "learning_rate": 2.3261172067759397e-05, "loss": 4.1588, "step": 1612 }, { "epoch": 1.0746231365037062, "learning_rate": 2.323425806927584e-05, "loss": 4.1852, "step": 1613 }, { "epoch": 1.0752894145082035, "learning_rate": 2.320734612735768e-05, "loss": 4.1902, "step": 1614 }, { "epoch": 1.075955692512701, "learning_rate": 2.318043627334934e-05, "loss": 4.1311, "step": 1615 }, { "epoch": 1.0766219705171982, "learning_rate": 2.3153528538592802e-05, "loss": 4.1346, "step": 1616 }, { "epoch": 1.0772882485216957, "learning_rate": 2.3126622954427607e-05, "loss": 4.1346, "step": 1617 }, { "epoch": 1.077954526526193, "learning_rate": 2.309971955219077e-05, "loss": 4.168, "step": 1618 }, { "epoch": 1.0786208045306904, "learning_rate": 2.3072818363216774e-05, "loss": 4.1658, "step": 1619 }, { "epoch": 1.079287082535188, "learning_rate": 2.3045919418837515e-05, "loss": 4.1444, "step": 1620 }, { "epoch": 1.0799533605396852, "learning_rate": 2.3019022750382285e-05, "loss": 4.1644, "step": 1621 }, { "epoch": 1.0806196385441826, "learning_rate": 2.2992128389177727e-05, "loss": 4.183, "step": 1622 }, { "epoch": 1.08128591654868, "learning_rate": 2.2965236366547782e-05, "loss": 4.1573, "step": 1623 }, { "epoch": 1.0819521945531774, "learning_rate": 2.2938346713813685e-05, "loss": 4.1677, "step": 1624 }, { "epoch": 1.0826184725576746, "learning_rate": 2.29114594622939e-05, "loss": 4.0771, "step": 1625 }, { "epoch": 1.083284750562172, "learning_rate": 2.288457464330409e-05, "loss": 4.1597, "step": 1626 }, { "epoch": 1.0839510285666694, "learning_rate": 2.28576922881571e-05, "loss": 4.1221, "step": 1627 }, { "epoch": 1.0846173065711668, "learning_rate": 2.2830812428162894e-05, "loss": 4.1429, "step": 1628 }, { "epoch": 1.085283584575664, "learning_rate": 2.2803935094628528e-05, "loss": 4.1708, "step": 1629 }, { "epoch": 1.0859498625801616, "learning_rate": 2.277706031885813e-05, "loss": 4.2211, "step": 1630 }, { "epoch": 1.086616140584659, "learning_rate": 2.2750188132152832e-05, "loss": 4.1532, "step": 1631 }, { "epoch": 1.0872824185891563, "learning_rate": 2.2723318565810744e-05, "loss": 4.15, "step": 1632 }, { "epoch": 1.0879486965936538, "learning_rate": 2.2696451651126974e-05, "loss": 4.214, "step": 1633 }, { "epoch": 1.088614974598151, "learning_rate": 2.2669587419393475e-05, "loss": 4.234, "step": 1634 }, { "epoch": 1.0892812526026485, "learning_rate": 2.2642725901899108e-05, "loss": 4.1828, "step": 1635 }, { "epoch": 1.0899475306071458, "learning_rate": 2.261586712992958e-05, "loss": 4.1571, "step": 1636 }, { "epoch": 1.0906138086116433, "learning_rate": 2.2589011134767397e-05, "loss": 4.1719, "step": 1637 }, { "epoch": 1.0912800866161405, "learning_rate": 2.2562157947691793e-05, "loss": 4.1932, "step": 1638 }, { "epoch": 1.091946364620638, "learning_rate": 2.2535307599978797e-05, "loss": 4.1734, "step": 1639 }, { "epoch": 1.0926126426251352, "learning_rate": 2.2508460122901083e-05, "loss": 4.1427, "step": 1640 }, { "epoch": 1.0932789206296327, "learning_rate": 2.2481615547727986e-05, "loss": 4.1217, "step": 1641 }, { "epoch": 1.0939451986341302, "learning_rate": 2.2454773905725487e-05, "loss": 4.1671, "step": 1642 }, { "epoch": 1.0946114766386275, "learning_rate": 2.242793522815613e-05, "loss": 4.1558, "step": 1643 }, { "epoch": 1.095277754643125, "learning_rate": 2.2401099546279007e-05, "loss": 4.1795, "step": 1644 }, { "epoch": 1.0959440326476222, "learning_rate": 2.2374266891349708e-05, "loss": 4.1487, "step": 1645 }, { "epoch": 1.0966103106521197, "learning_rate": 2.234743729462034e-05, "loss": 4.1751, "step": 1646 }, { "epoch": 1.097276588656617, "learning_rate": 2.2320610787339413e-05, "loss": 4.1375, "step": 1647 }, { "epoch": 1.0979428666611144, "learning_rate": 2.229378740075184e-05, "loss": 4.1725, "step": 1648 }, { "epoch": 1.0986091446656117, "learning_rate": 2.226696716609892e-05, "loss": 4.1392, "step": 1649 }, { "epoch": 1.0992754226701091, "learning_rate": 2.224015011461826e-05, "loss": 4.1669, "step": 1650 }, { "epoch": 1.0999417006746064, "learning_rate": 2.2213336277543764e-05, "loss": 4.1703, "step": 1651 }, { "epoch": 1.1006079786791039, "learning_rate": 2.2186525686105605e-05, "loss": 4.1482, "step": 1652 }, { "epoch": 1.1012742566836011, "learning_rate": 2.215971837153016e-05, "loss": 4.167, "step": 1653 }, { "epoch": 1.1019405346880986, "learning_rate": 2.2132914365039993e-05, "loss": 4.1659, "step": 1654 }, { "epoch": 1.102606812692596, "learning_rate": 2.2106113697853824e-05, "loss": 4.196, "step": 1655 }, { "epoch": 1.1032730906970933, "learning_rate": 2.2079316401186477e-05, "loss": 4.1652, "step": 1656 }, { "epoch": 1.1039393687015908, "learning_rate": 2.2052522506248844e-05, "loss": 4.1353, "step": 1657 }, { "epoch": 1.104605646706088, "learning_rate": 2.2025732044247872e-05, "loss": 4.1299, "step": 1658 }, { "epoch": 1.1052719247105856, "learning_rate": 2.1998945046386492e-05, "loss": 4.1411, "step": 1659 }, { "epoch": 1.1059382027150828, "learning_rate": 2.1972161543863602e-05, "loss": 4.2049, "step": 1660 }, { "epoch": 1.1066044807195803, "learning_rate": 2.1945381567874062e-05, "loss": 4.1947, "step": 1661 }, { "epoch": 1.1072707587240775, "learning_rate": 2.191860514960857e-05, "loss": 4.1529, "step": 1662 }, { "epoch": 1.107937036728575, "learning_rate": 2.1891832320253708e-05, "loss": 4.1358, "step": 1663 }, { "epoch": 1.1086033147330723, "learning_rate": 2.1865063110991895e-05, "loss": 4.1636, "step": 1664 }, { "epoch": 1.1092695927375698, "learning_rate": 2.1838297553001312e-05, "loss": 4.1548, "step": 1665 }, { "epoch": 1.109935870742067, "learning_rate": 2.1811535677455872e-05, "loss": 4.1734, "step": 1666 }, { "epoch": 1.1106021487465645, "learning_rate": 2.1784777515525247e-05, "loss": 4.1616, "step": 1667 }, { "epoch": 1.111268426751062, "learning_rate": 2.1758023098374742e-05, "loss": 4.1578, "step": 1668 }, { "epoch": 1.1119347047555592, "learning_rate": 2.17312724571653e-05, "loss": 4.2165, "step": 1669 }, { "epoch": 1.1126009827600567, "learning_rate": 2.170452562305351e-05, "loss": 4.2199, "step": 1670 }, { "epoch": 1.113267260764554, "learning_rate": 2.167778262719148e-05, "loss": 4.1263, "step": 1671 }, { "epoch": 1.1139335387690514, "learning_rate": 2.165104350072687e-05, "loss": 4.1388, "step": 1672 }, { "epoch": 1.1145998167735487, "learning_rate": 2.162430827480281e-05, "loss": 4.1135, "step": 1673 }, { "epoch": 1.1152660947780462, "learning_rate": 2.1597576980557922e-05, "loss": 4.1185, "step": 1674 }, { "epoch": 1.1159323727825434, "learning_rate": 2.1570849649126234e-05, "loss": 4.1131, "step": 1675 }, { "epoch": 1.116598650787041, "learning_rate": 2.1544126311637147e-05, "loss": 4.1537, "step": 1676 }, { "epoch": 1.1172649287915384, "learning_rate": 2.1517406999215428e-05, "loss": 4.151, "step": 1677 }, { "epoch": 1.1179312067960356, "learning_rate": 2.149069174298114e-05, "loss": 4.2136, "step": 1678 }, { "epoch": 1.1185974848005331, "learning_rate": 2.1463980574049626e-05, "loss": 4.1395, "step": 1679 }, { "epoch": 1.1192637628050304, "learning_rate": 2.1437273523531483e-05, "loss": 4.1511, "step": 1680 }, { "epoch": 1.1199300408095278, "learning_rate": 2.1410570622532498e-05, "loss": 4.1743, "step": 1681 }, { "epoch": 1.120596318814025, "learning_rate": 2.1383871902153614e-05, "loss": 4.1053, "step": 1682 }, { "epoch": 1.1212625968185226, "learning_rate": 2.1357177393490927e-05, "loss": 4.1701, "step": 1683 }, { "epoch": 1.1219288748230198, "learning_rate": 2.133048712763563e-05, "loss": 4.1251, "step": 1684 }, { "epoch": 1.1225951528275173, "learning_rate": 2.1303801135673936e-05, "loss": 4.1877, "step": 1685 }, { "epoch": 1.1232614308320146, "learning_rate": 2.127711944868713e-05, "loss": 4.1441, "step": 1686 }, { "epoch": 1.123927708836512, "learning_rate": 2.125044209775145e-05, "loss": 4.1814, "step": 1687 }, { "epoch": 1.1245939868410093, "learning_rate": 2.1223769113938095e-05, "loss": 4.1895, "step": 1688 }, { "epoch": 1.1252602648455068, "learning_rate": 2.1197100528313176e-05, "loss": 4.1351, "step": 1689 }, { "epoch": 1.1259265428500043, "learning_rate": 2.1170436371937685e-05, "loss": 4.1791, "step": 1690 }, { "epoch": 1.1265928208545015, "learning_rate": 2.114377667586744e-05, "loss": 4.1446, "step": 1691 }, { "epoch": 1.127259098858999, "learning_rate": 2.11171214711531e-05, "loss": 4.1671, "step": 1692 }, { "epoch": 1.1279253768634963, "learning_rate": 2.109047078884005e-05, "loss": 4.1565, "step": 1693 }, { "epoch": 1.1285916548679937, "learning_rate": 2.1063824659968417e-05, "loss": 4.1843, "step": 1694 }, { "epoch": 1.129257932872491, "learning_rate": 2.103718311557306e-05, "loss": 4.2088, "step": 1695 }, { "epoch": 1.1299242108769885, "learning_rate": 2.101054618668347e-05, "loss": 4.154, "step": 1696 }, { "epoch": 1.1305904888814857, "learning_rate": 2.0983913904323735e-05, "loss": 4.1592, "step": 1697 }, { "epoch": 1.1312567668859832, "learning_rate": 2.0957286299512597e-05, "loss": 4.1455, "step": 1698 }, { "epoch": 1.1319230448904805, "learning_rate": 2.0930663403263294e-05, "loss": 4.1477, "step": 1699 }, { "epoch": 1.132589322894978, "learning_rate": 2.0904045246583615e-05, "loss": 4.1645, "step": 1700 }, { "epoch": 1.1332556008994752, "learning_rate": 2.087743186047579e-05, "loss": 4.143, "step": 1701 }, { "epoch": 1.1339218789039727, "learning_rate": 2.0850823275936534e-05, "loss": 4.2064, "step": 1702 }, { "epoch": 1.1345881569084701, "learning_rate": 2.0824219523956954e-05, "loss": 4.1352, "step": 1703 }, { "epoch": 1.1352544349129674, "learning_rate": 2.079762063552252e-05, "loss": 4.153, "step": 1704 }, { "epoch": 1.1359207129174649, "learning_rate": 2.0771026641613044e-05, "loss": 4.1924, "step": 1705 }, { "epoch": 1.1365869909219621, "learning_rate": 2.0744437573202647e-05, "loss": 4.134, "step": 1706 }, { "epoch": 1.1372532689264596, "learning_rate": 2.0717853461259688e-05, "loss": 4.171, "step": 1707 }, { "epoch": 1.1379195469309569, "learning_rate": 2.0691274336746783e-05, "loss": 4.1819, "step": 1708 }, { "epoch": 1.1385858249354543, "learning_rate": 2.0664700230620717e-05, "loss": 4.1761, "step": 1709 }, { "epoch": 1.1392521029399516, "learning_rate": 2.0638131173832438e-05, "loss": 4.209, "step": 1710 }, { "epoch": 1.139918380944449, "learning_rate": 2.061156719732702e-05, "loss": 4.1478, "step": 1711 }, { "epoch": 1.1405846589489466, "learning_rate": 2.058500833204361e-05, "loss": 4.1382, "step": 1712 }, { "epoch": 1.1412509369534438, "learning_rate": 2.0558454608915398e-05, "loss": 4.1551, "step": 1713 }, { "epoch": 1.1419172149579413, "learning_rate": 2.0531906058869602e-05, "loss": 4.141, "step": 1714 }, { "epoch": 1.1425834929624386, "learning_rate": 2.0505362712827402e-05, "loss": 4.1631, "step": 1715 }, { "epoch": 1.143249770966936, "learning_rate": 2.0478824601703915e-05, "loss": 4.23, "step": 1716 }, { "epoch": 1.1439160489714333, "learning_rate": 2.045229175640817e-05, "loss": 4.1911, "step": 1717 }, { "epoch": 1.1445823269759308, "learning_rate": 2.042576420784306e-05, "loss": 4.1691, "step": 1718 }, { "epoch": 1.145248604980428, "learning_rate": 2.0399241986905292e-05, "loss": 4.1545, "step": 1719 }, { "epoch": 1.1459148829849255, "learning_rate": 2.0372725124485418e-05, "loss": 4.1688, "step": 1720 }, { "epoch": 1.1465811609894228, "learning_rate": 2.0346213651467684e-05, "loss": 4.133, "step": 1721 }, { "epoch": 1.1472474389939202, "learning_rate": 2.0319707598730088e-05, "loss": 4.2236, "step": 1722 }, { "epoch": 1.1479137169984175, "learning_rate": 2.0293206997144333e-05, "loss": 4.1939, "step": 1723 }, { "epoch": 1.148579995002915, "learning_rate": 2.026671187757576e-05, "loss": 4.1326, "step": 1724 }, { "epoch": 1.1492462730074124, "learning_rate": 2.0240222270883288e-05, "loss": 4.1486, "step": 1725 }, { "epoch": 1.1499125510119097, "learning_rate": 2.0213738207919487e-05, "loss": 4.1815, "step": 1726 }, { "epoch": 1.1505788290164072, "learning_rate": 2.018725971953041e-05, "loss": 4.1563, "step": 1727 }, { "epoch": 1.1512451070209044, "learning_rate": 2.016078683655565e-05, "loss": 4.1746, "step": 1728 }, { "epoch": 1.151911385025402, "learning_rate": 2.0134319589828242e-05, "loss": 4.179, "step": 1729 }, { "epoch": 1.1525776630298992, "learning_rate": 2.0107858010174695e-05, "loss": 4.1043, "step": 1730 }, { "epoch": 1.1532439410343966, "learning_rate": 2.0081402128414893e-05, "loss": 4.1819, "step": 1731 }, { "epoch": 1.153910219038894, "learning_rate": 2.0054951975362067e-05, "loss": 4.1545, "step": 1732 }, { "epoch": 1.1545764970433914, "learning_rate": 2.0028507581822827e-05, "loss": 4.2212, "step": 1733 }, { "epoch": 1.1552427750478886, "learning_rate": 2.000206897859703e-05, "loss": 4.1246, "step": 1734 }, { "epoch": 1.1559090530523861, "learning_rate": 1.9975636196477797e-05, "loss": 4.1796, "step": 1735 }, { "epoch": 1.1565753310568834, "learning_rate": 1.994920926625149e-05, "loss": 4.1265, "step": 1736 }, { "epoch": 1.1572416090613808, "learning_rate": 1.9922788218697634e-05, "loss": 4.1905, "step": 1737 }, { "epoch": 1.1579078870658783, "learning_rate": 1.9896373084588906e-05, "loss": 4.1937, "step": 1738 }, { "epoch": 1.1585741650703756, "learning_rate": 1.9869963894691104e-05, "loss": 4.1811, "step": 1739 }, { "epoch": 1.159240443074873, "learning_rate": 1.9843560679763095e-05, "loss": 4.1519, "step": 1740 }, { "epoch": 1.1599067210793703, "learning_rate": 1.9817163470556787e-05, "loss": 4.1812, "step": 1741 }, { "epoch": 1.1605729990838678, "learning_rate": 1.9790772297817107e-05, "loss": 4.175, "step": 1742 }, { "epoch": 1.161239277088365, "learning_rate": 1.9764387192281928e-05, "loss": 4.1833, "step": 1743 }, { "epoch": 1.1619055550928625, "learning_rate": 1.9738008184682068e-05, "loss": 4.2118, "step": 1744 }, { "epoch": 1.1625718330973598, "learning_rate": 1.971163530574125e-05, "loss": 4.1194, "step": 1745 }, { "epoch": 1.1632381111018573, "learning_rate": 1.9685268586176055e-05, "loss": 4.1701, "step": 1746 }, { "epoch": 1.1639043891063547, "learning_rate": 1.9658908056695867e-05, "loss": 4.1723, "step": 1747 }, { "epoch": 1.164570667110852, "learning_rate": 1.9632553748002914e-05, "loss": 4.1291, "step": 1748 }, { "epoch": 1.1652369451153495, "learning_rate": 1.9606205690792122e-05, "loss": 4.158, "step": 1749 }, { "epoch": 1.1659032231198467, "learning_rate": 1.9579863915751152e-05, "loss": 4.1467, "step": 1750 }, { "epoch": 1.1665695011243442, "learning_rate": 1.955352845356039e-05, "loss": 4.1347, "step": 1751 }, { "epoch": 1.1672357791288415, "learning_rate": 1.9527199334892798e-05, "loss": 4.2235, "step": 1752 }, { "epoch": 1.167902057133339, "learning_rate": 1.9500876590413998e-05, "loss": 4.1635, "step": 1753 }, { "epoch": 1.1685683351378362, "learning_rate": 1.9474560250782185e-05, "loss": 4.1472, "step": 1754 }, { "epoch": 1.1692346131423337, "learning_rate": 1.944825034664809e-05, "loss": 4.1558, "step": 1755 }, { "epoch": 1.169900891146831, "learning_rate": 1.9421946908654927e-05, "loss": 4.1941, "step": 1756 }, { "epoch": 1.1705671691513284, "learning_rate": 1.939564996743839e-05, "loss": 4.201, "step": 1757 }, { "epoch": 1.1712334471558257, "learning_rate": 1.9369359553626636e-05, "loss": 4.1414, "step": 1758 }, { "epoch": 1.1718997251603231, "learning_rate": 1.9343075697840195e-05, "loss": 4.1876, "step": 1759 }, { "epoch": 1.1725660031648206, "learning_rate": 1.9316798430691933e-05, "loss": 4.1502, "step": 1760 }, { "epoch": 1.1732322811693179, "learning_rate": 1.9290527782787092e-05, "loss": 4.1574, "step": 1761 }, { "epoch": 1.1738985591738154, "learning_rate": 1.926426378472318e-05, "loss": 4.1474, "step": 1762 }, { "epoch": 1.1745648371783126, "learning_rate": 1.9238006467089953e-05, "loss": 4.0937, "step": 1763 }, { "epoch": 1.17523111518281, "learning_rate": 1.92117558604694e-05, "loss": 4.1249, "step": 1764 }, { "epoch": 1.1758973931873073, "learning_rate": 1.9185511995435686e-05, "loss": 4.1398, "step": 1765 }, { "epoch": 1.1765636711918048, "learning_rate": 1.915927490255512e-05, "loss": 4.1524, "step": 1766 }, { "epoch": 1.177229949196302, "learning_rate": 1.913304461238614e-05, "loss": 4.1758, "step": 1767 }, { "epoch": 1.1778962272007996, "learning_rate": 1.9106821155479252e-05, "loss": 4.1911, "step": 1768 }, { "epoch": 1.1785625052052968, "learning_rate": 1.9080604562376987e-05, "loss": 4.1528, "step": 1769 }, { "epoch": 1.1792287832097943, "learning_rate": 1.9054394863613914e-05, "loss": 4.2171, "step": 1770 }, { "epoch": 1.1798950612142916, "learning_rate": 1.9028192089716544e-05, "loss": 4.1627, "step": 1771 }, { "epoch": 1.180561339218789, "learning_rate": 1.9001996271203332e-05, "loss": 4.175, "step": 1772 }, { "epoch": 1.1812276172232865, "learning_rate": 1.8975807438584642e-05, "loss": 4.1223, "step": 1773 }, { "epoch": 1.1818938952277838, "learning_rate": 1.894962562236269e-05, "loss": 4.1225, "step": 1774 }, { "epoch": 1.1825601732322812, "learning_rate": 1.8923450853031515e-05, "loss": 4.1663, "step": 1775 }, { "epoch": 1.1832264512367785, "learning_rate": 1.889728316107697e-05, "loss": 4.1619, "step": 1776 }, { "epoch": 1.183892729241276, "learning_rate": 1.8871122576976634e-05, "loss": 4.1513, "step": 1777 }, { "epoch": 1.1845590072457732, "learning_rate": 1.8844969131199832e-05, "loss": 4.0926, "step": 1778 }, { "epoch": 1.1852252852502707, "learning_rate": 1.881882285420758e-05, "loss": 4.1892, "step": 1779 }, { "epoch": 1.185891563254768, "learning_rate": 1.8792683776452525e-05, "loss": 4.1195, "step": 1780 }, { "epoch": 1.1865578412592654, "learning_rate": 1.8766551928378925e-05, "loss": 4.1026, "step": 1781 }, { "epoch": 1.187224119263763, "learning_rate": 1.8740427340422628e-05, "loss": 4.1652, "step": 1782 }, { "epoch": 1.1878903972682602, "learning_rate": 1.8714310043011054e-05, "loss": 4.1768, "step": 1783 }, { "epoch": 1.1885566752727574, "learning_rate": 1.8688200066563076e-05, "loss": 4.1658, "step": 1784 }, { "epoch": 1.189222953277255, "learning_rate": 1.8662097441489073e-05, "loss": 4.1761, "step": 1785 }, { "epoch": 1.1898892312817524, "learning_rate": 1.8636002198190874e-05, "loss": 4.184, "step": 1786 }, { "epoch": 1.1905555092862496, "learning_rate": 1.8609914367061688e-05, "loss": 4.1576, "step": 1787 }, { "epoch": 1.1912217872907471, "learning_rate": 1.8583833978486078e-05, "loss": 4.1695, "step": 1788 }, { "epoch": 1.1918880652952444, "learning_rate": 1.855776106283998e-05, "loss": 4.1016, "step": 1789 }, { "epoch": 1.1925543432997419, "learning_rate": 1.8531695650490597e-05, "loss": 4.1164, "step": 1790 }, { "epoch": 1.1932206213042391, "learning_rate": 1.850563777179639e-05, "loss": 4.1272, "step": 1791 }, { "epoch": 1.1938868993087366, "learning_rate": 1.847958745710708e-05, "loss": 4.1689, "step": 1792 }, { "epoch": 1.1945531773132338, "learning_rate": 1.8453544736763527e-05, "loss": 4.1729, "step": 1793 }, { "epoch": 1.1952194553177313, "learning_rate": 1.8427509641097785e-05, "loss": 4.1716, "step": 1794 }, { "epoch": 1.1958857333222288, "learning_rate": 1.8401482200433014e-05, "loss": 4.1945, "step": 1795 }, { "epoch": 1.196552011326726, "learning_rate": 1.8375462445083464e-05, "loss": 4.1452, "step": 1796 }, { "epoch": 1.1972182893312235, "learning_rate": 1.834945040535441e-05, "loss": 4.1802, "step": 1797 }, { "epoch": 1.1978845673357208, "learning_rate": 1.8323446111542185e-05, "loss": 4.178, "step": 1798 }, { "epoch": 1.1985508453402183, "learning_rate": 1.829744959393406e-05, "loss": 4.2035, "step": 1799 }, { "epoch": 1.1992171233447155, "learning_rate": 1.827146088280826e-05, "loss": 4.2089, "step": 1800 }, { "epoch": 1.199883401349213, "learning_rate": 1.8245480008433936e-05, "loss": 4.0668, "step": 1801 }, { "epoch": 1.2005496793537103, "learning_rate": 1.821950700107109e-05, "loss": 4.1548, "step": 1802 }, { "epoch": 1.2012159573582077, "learning_rate": 1.8193541890970562e-05, "loss": 4.1734, "step": 1803 }, { "epoch": 1.201882235362705, "learning_rate": 1.8167584708374008e-05, "loss": 4.1707, "step": 1804 }, { "epoch": 1.2025485133672025, "learning_rate": 1.8141635483513845e-05, "loss": 4.1539, "step": 1805 }, { "epoch": 1.2032147913716997, "learning_rate": 1.8115694246613206e-05, "loss": 4.1811, "step": 1806 }, { "epoch": 1.2038810693761972, "learning_rate": 1.808976102788596e-05, "loss": 4.1624, "step": 1807 }, { "epoch": 1.2045473473806947, "learning_rate": 1.8063835857536587e-05, "loss": 4.2106, "step": 1808 }, { "epoch": 1.205213625385192, "learning_rate": 1.8037918765760232e-05, "loss": 4.1894, "step": 1809 }, { "epoch": 1.2058799033896894, "learning_rate": 1.8012009782742602e-05, "loss": 4.1519, "step": 1810 }, { "epoch": 1.2065461813941867, "learning_rate": 1.7986108938659993e-05, "loss": 4.1733, "step": 1811 }, { "epoch": 1.2072124593986842, "learning_rate": 1.7960216263679193e-05, "loss": 4.1611, "step": 1812 }, { "epoch": 1.2078787374031814, "learning_rate": 1.793433178795747e-05, "loss": 4.111, "step": 1813 }, { "epoch": 1.208545015407679, "learning_rate": 1.7908455541642584e-05, "loss": 4.1546, "step": 1814 }, { "epoch": 1.2092112934121761, "learning_rate": 1.7882587554872678e-05, "loss": 4.1464, "step": 1815 }, { "epoch": 1.2098775714166736, "learning_rate": 1.7856727857776258e-05, "loss": 4.1535, "step": 1816 }, { "epoch": 1.210543849421171, "learning_rate": 1.7830876480472226e-05, "loss": 4.1518, "step": 1817 }, { "epoch": 1.2112101274256684, "learning_rate": 1.7805033453069755e-05, "loss": 4.1373, "step": 1818 }, { "epoch": 1.2118764054301656, "learning_rate": 1.777919880566829e-05, "loss": 4.1723, "step": 1819 }, { "epoch": 1.212542683434663, "learning_rate": 1.7753372568357548e-05, "loss": 4.1465, "step": 1820 }, { "epoch": 1.2132089614391606, "learning_rate": 1.7727554771217427e-05, "loss": 4.1712, "step": 1821 }, { "epoch": 1.2138752394436578, "learning_rate": 1.770174544431799e-05, "loss": 4.1678, "step": 1822 }, { "epoch": 1.2145415174481553, "learning_rate": 1.7675944617719463e-05, "loss": 4.1397, "step": 1823 }, { "epoch": 1.2152077954526526, "learning_rate": 1.7650152321472135e-05, "loss": 4.1393, "step": 1824 }, { "epoch": 1.21587407345715, "learning_rate": 1.7624368585616383e-05, "loss": 4.1325, "step": 1825 }, { "epoch": 1.2165403514616473, "learning_rate": 1.7598593440182615e-05, "loss": 4.1601, "step": 1826 }, { "epoch": 1.2172066294661448, "learning_rate": 1.757282691519122e-05, "loss": 4.1669, "step": 1827 }, { "epoch": 1.217872907470642, "learning_rate": 1.754706904065255e-05, "loss": 4.125, "step": 1828 }, { "epoch": 1.2185391854751395, "learning_rate": 1.7521319846566896e-05, "loss": 4.143, "step": 1829 }, { "epoch": 1.219205463479637, "learning_rate": 1.749557936292442e-05, "loss": 4.1471, "step": 1830 }, { "epoch": 1.2198717414841342, "learning_rate": 1.746984761970515e-05, "loss": 4.1661, "step": 1831 }, { "epoch": 1.2205380194886317, "learning_rate": 1.7444124646878933e-05, "loss": 4.1453, "step": 1832 }, { "epoch": 1.221204297493129, "learning_rate": 1.7418410474405403e-05, "loss": 4.1692, "step": 1833 }, { "epoch": 1.2218705754976265, "learning_rate": 1.7392705132233922e-05, "loss": 4.1535, "step": 1834 }, { "epoch": 1.2225368535021237, "learning_rate": 1.7367008650303616e-05, "loss": 4.153, "step": 1835 }, { "epoch": 1.2232031315066212, "learning_rate": 1.734132105854324e-05, "loss": 4.1132, "step": 1836 }, { "epoch": 1.2238694095111184, "learning_rate": 1.7315642386871222e-05, "loss": 4.1089, "step": 1837 }, { "epoch": 1.224535687515616, "learning_rate": 1.728997266519559e-05, "loss": 4.1416, "step": 1838 }, { "epoch": 1.2252019655201132, "learning_rate": 1.726431192341396e-05, "loss": 4.1537, "step": 1839 }, { "epoch": 1.2258682435246107, "learning_rate": 1.7238660191413477e-05, "loss": 4.1193, "step": 1840 }, { "epoch": 1.226534521529108, "learning_rate": 1.721301749907079e-05, "loss": 4.1576, "step": 1841 }, { "epoch": 1.2272007995336054, "learning_rate": 1.7187383876252044e-05, "loss": 4.1252, "step": 1842 }, { "epoch": 1.2278670775381029, "learning_rate": 1.7161759352812794e-05, "loss": 4.1074, "step": 1843 }, { "epoch": 1.2285333555426001, "learning_rate": 1.7136143958597996e-05, "loss": 4.1315, "step": 1844 }, { "epoch": 1.2291996335470976, "learning_rate": 1.7110537723442e-05, "loss": 4.1312, "step": 1845 }, { "epoch": 1.2298659115515949, "learning_rate": 1.708494067716847e-05, "loss": 4.1469, "step": 1846 }, { "epoch": 1.2305321895560923, "learning_rate": 1.705935284959035e-05, "loss": 4.1112, "step": 1847 }, { "epoch": 1.2311984675605896, "learning_rate": 1.703377427050989e-05, "loss": 4.1532, "step": 1848 }, { "epoch": 1.231864745565087, "learning_rate": 1.7008204969718536e-05, "loss": 4.1505, "step": 1849 }, { "epoch": 1.2325310235695843, "learning_rate": 1.698264497699693e-05, "loss": 4.2101, "step": 1850 }, { "epoch": 1.2331973015740818, "learning_rate": 1.6957094322114888e-05, "loss": 4.1324, "step": 1851 }, { "epoch": 1.2338635795785793, "learning_rate": 1.6931553034831334e-05, "loss": 4.1746, "step": 1852 }, { "epoch": 1.2345298575830765, "learning_rate": 1.690602114489429e-05, "loss": 4.1612, "step": 1853 }, { "epoch": 1.2351961355875738, "learning_rate": 1.6880498682040836e-05, "loss": 4.188, "step": 1854 }, { "epoch": 1.2358624135920713, "learning_rate": 1.6854985675997066e-05, "loss": 4.1738, "step": 1855 }, { "epoch": 1.2365286915965688, "learning_rate": 1.6829482156478054e-05, "loss": 4.2075, "step": 1856 }, { "epoch": 1.237194969601066, "learning_rate": 1.6803988153187844e-05, "loss": 4.1236, "step": 1857 }, { "epoch": 1.2378612476055635, "learning_rate": 1.6778503695819382e-05, "loss": 4.1468, "step": 1858 }, { "epoch": 1.2385275256100607, "learning_rate": 1.675302881405449e-05, "loss": 4.1502, "step": 1859 }, { "epoch": 1.2391938036145582, "learning_rate": 1.6727563537563857e-05, "loss": 4.1842, "step": 1860 }, { "epoch": 1.2398600816190555, "learning_rate": 1.6702107896006966e-05, "loss": 4.1674, "step": 1861 }, { "epoch": 1.240526359623553, "learning_rate": 1.6676661919032085e-05, "loss": 4.128, "step": 1862 }, { "epoch": 1.2411926376280502, "learning_rate": 1.6651225636276235e-05, "loss": 4.1365, "step": 1863 }, { "epoch": 1.2418589156325477, "learning_rate": 1.6625799077365133e-05, "loss": 4.2016, "step": 1864 }, { "epoch": 1.2425251936370452, "learning_rate": 1.6600382271913177e-05, "loss": 4.1926, "step": 1865 }, { "epoch": 1.2431914716415424, "learning_rate": 1.6574975249523395e-05, "loss": 4.1349, "step": 1866 }, { "epoch": 1.24385774964604, "learning_rate": 1.6549578039787436e-05, "loss": 4.172, "step": 1867 }, { "epoch": 1.2445240276505372, "learning_rate": 1.6524190672285513e-05, "loss": 4.2072, "step": 1868 }, { "epoch": 1.2451903056550346, "learning_rate": 1.6498813176586367e-05, "loss": 4.1512, "step": 1869 }, { "epoch": 1.245856583659532, "learning_rate": 1.647344558224727e-05, "loss": 4.1881, "step": 1870 }, { "epoch": 1.2465228616640294, "learning_rate": 1.6448087918813925e-05, "loss": 4.1498, "step": 1871 }, { "epoch": 1.2471891396685266, "learning_rate": 1.6422740215820482e-05, "loss": 4.1508, "step": 1872 }, { "epoch": 1.247855417673024, "learning_rate": 1.639740250278951e-05, "loss": 4.1761, "step": 1873 }, { "epoch": 1.2485216956775214, "learning_rate": 1.6372074809231924e-05, "loss": 4.1611, "step": 1874 }, { "epoch": 1.2491879736820188, "learning_rate": 1.634675716464695e-05, "loss": 4.1932, "step": 1875 }, { "epoch": 1.249854251686516, "learning_rate": 1.6321449598522154e-05, "loss": 4.1401, "step": 1876 }, { "epoch": 1.2505205296910136, "learning_rate": 1.6296152140333332e-05, "loss": 4.122, "step": 1877 }, { "epoch": 1.251186807695511, "learning_rate": 1.627086481954451e-05, "loss": 4.1579, "step": 1878 }, { "epoch": 1.2518530857000083, "learning_rate": 1.624558766560793e-05, "loss": 4.1168, "step": 1879 }, { "epoch": 1.2525193637045058, "learning_rate": 1.6220320707963964e-05, "loss": 4.1438, "step": 1880 }, { "epoch": 1.253185641709003, "learning_rate": 1.619506397604112e-05, "loss": 4.1494, "step": 1881 }, { "epoch": 1.2538519197135005, "learning_rate": 1.6169817499255997e-05, "loss": 4.1433, "step": 1882 }, { "epoch": 1.2545181977179978, "learning_rate": 1.614458130701325e-05, "loss": 4.1545, "step": 1883 }, { "epoch": 1.2551844757224953, "learning_rate": 1.6119355428705554e-05, "loss": 4.1769, "step": 1884 }, { "epoch": 1.2558507537269925, "learning_rate": 1.6094139893713576e-05, "loss": 4.132, "step": 1885 }, { "epoch": 1.25651703173149, "learning_rate": 1.6068934731405927e-05, "loss": 4.1344, "step": 1886 }, { "epoch": 1.2571833097359875, "learning_rate": 1.6043739971139134e-05, "loss": 4.1573, "step": 1887 }, { "epoch": 1.2578495877404847, "learning_rate": 1.6018555642257633e-05, "loss": 4.1301, "step": 1888 }, { "epoch": 1.258515865744982, "learning_rate": 1.599338177409369e-05, "loss": 4.142, "step": 1889 }, { "epoch": 1.2591821437494795, "learning_rate": 1.5968218395967377e-05, "loss": 4.1634, "step": 1890 }, { "epoch": 1.259848421753977, "learning_rate": 1.594306553718658e-05, "loss": 4.198, "step": 1891 }, { "epoch": 1.2605146997584742, "learning_rate": 1.591792322704691e-05, "loss": 4.1597, "step": 1892 }, { "epoch": 1.2611809777629717, "learning_rate": 1.5892791494831694e-05, "loss": 4.1542, "step": 1893 }, { "epoch": 1.261847255767469, "learning_rate": 1.5867670369811944e-05, "loss": 4.1838, "step": 1894 }, { "epoch": 1.2625135337719664, "learning_rate": 1.584255988124632e-05, "loss": 4.1769, "step": 1895 }, { "epoch": 1.2631798117764637, "learning_rate": 1.5817460058381088e-05, "loss": 4.1369, "step": 1896 }, { "epoch": 1.2638460897809611, "learning_rate": 1.5792370930450083e-05, "loss": 4.1831, "step": 1897 }, { "epoch": 1.2645123677854584, "learning_rate": 1.5767292526674718e-05, "loss": 4.1425, "step": 1898 }, { "epoch": 1.2651786457899559, "learning_rate": 1.5742224876263873e-05, "loss": 4.1213, "step": 1899 }, { "epoch": 1.2658449237944533, "learning_rate": 1.571716800841392e-05, "loss": 4.1252, "step": 1900 }, { "epoch": 1.2665112017989506, "learning_rate": 1.5692121952308693e-05, "loss": 4.1249, "step": 1901 }, { "epoch": 1.2671774798034479, "learning_rate": 1.56670867371194e-05, "loss": 4.1743, "step": 1902 }, { "epoch": 1.2678437578079453, "learning_rate": 1.5642062392004635e-05, "loss": 4.1841, "step": 1903 }, { "epoch": 1.2685100358124428, "learning_rate": 1.5617048946110348e-05, "loss": 4.1036, "step": 1904 }, { "epoch": 1.26917631381694, "learning_rate": 1.5592046428569786e-05, "loss": 4.2117, "step": 1905 }, { "epoch": 1.2698425918214375, "learning_rate": 1.556705486850343e-05, "loss": 4.1409, "step": 1906 }, { "epoch": 1.2705088698259348, "learning_rate": 1.5542074295019062e-05, "loss": 4.1425, "step": 1907 }, { "epoch": 1.2711751478304323, "learning_rate": 1.551710473721163e-05, "loss": 4.1179, "step": 1908 }, { "epoch": 1.2718414258349298, "learning_rate": 1.5492146224163257e-05, "loss": 4.1361, "step": 1909 }, { "epoch": 1.272507703839427, "learning_rate": 1.5467198784943204e-05, "loss": 4.1454, "step": 1910 }, { "epoch": 1.2731739818439243, "learning_rate": 1.544226244860784e-05, "loss": 4.1509, "step": 1911 }, { "epoch": 1.2738402598484218, "learning_rate": 1.5417337244200588e-05, "loss": 4.1488, "step": 1912 }, { "epoch": 1.2745065378529192, "learning_rate": 1.5392423200751926e-05, "loss": 4.1024, "step": 1913 }, { "epoch": 1.2751728158574165, "learning_rate": 1.5367520347279317e-05, "loss": 4.1259, "step": 1914 }, { "epoch": 1.275839093861914, "learning_rate": 1.5342628712787188e-05, "loss": 4.1098, "step": 1915 }, { "epoch": 1.2765053718664112, "learning_rate": 1.531774832626692e-05, "loss": 4.1637, "step": 1916 }, { "epoch": 1.2771716498709087, "learning_rate": 1.529287921669677e-05, "loss": 4.1379, "step": 1917 }, { "epoch": 1.277837927875406, "learning_rate": 1.5268021413041875e-05, "loss": 4.1332, "step": 1918 }, { "epoch": 1.2785042058799034, "learning_rate": 1.5243174944254204e-05, "loss": 4.2549, "step": 1919 }, { "epoch": 1.2791704838844007, "learning_rate": 1.5218339839272516e-05, "loss": 4.1097, "step": 1920 }, { "epoch": 1.2798367618888982, "learning_rate": 1.519351612702234e-05, "loss": 4.1371, "step": 1921 }, { "epoch": 1.2805030398933956, "learning_rate": 1.5168703836415932e-05, "loss": 4.1223, "step": 1922 }, { "epoch": 1.281169317897893, "learning_rate": 1.514390299635225e-05, "loss": 4.1553, "step": 1923 }, { "epoch": 1.2818355959023902, "learning_rate": 1.5119113635716914e-05, "loss": 4.18, "step": 1924 }, { "epoch": 1.2825018739068876, "learning_rate": 1.5094335783382168e-05, "loss": 4.1875, "step": 1925 }, { "epoch": 1.2831681519113851, "learning_rate": 1.5069569468206871e-05, "loss": 4.1741, "step": 1926 }, { "epoch": 1.2838344299158824, "learning_rate": 1.5044814719036422e-05, "loss": 4.175, "step": 1927 }, { "epoch": 1.2845007079203798, "learning_rate": 1.5020071564702742e-05, "loss": 4.1654, "step": 1928 }, { "epoch": 1.285166985924877, "learning_rate": 1.4995340034024297e-05, "loss": 4.2006, "step": 1929 }, { "epoch": 1.2858332639293746, "learning_rate": 1.4970620155805964e-05, "loss": 4.1615, "step": 1930 }, { "epoch": 1.2864995419338718, "learning_rate": 1.494591195883905e-05, "loss": 4.1482, "step": 1931 }, { "epoch": 1.2871658199383693, "learning_rate": 1.4921215471901304e-05, "loss": 4.1657, "step": 1932 }, { "epoch": 1.2878320979428666, "learning_rate": 1.4896530723756791e-05, "loss": 4.1787, "step": 1933 }, { "epoch": 1.288498375947364, "learning_rate": 1.4871857743155904e-05, "loss": 4.1849, "step": 1934 }, { "epoch": 1.2891646539518615, "learning_rate": 1.4847196558835364e-05, "loss": 4.1942, "step": 1935 }, { "epoch": 1.2898309319563588, "learning_rate": 1.4822547199518127e-05, "loss": 4.1473, "step": 1936 }, { "epoch": 1.290497209960856, "learning_rate": 1.4797909693913376e-05, "loss": 4.1503, "step": 1937 }, { "epoch": 1.2911634879653535, "learning_rate": 1.4773284070716503e-05, "loss": 4.1239, "step": 1938 }, { "epoch": 1.291829765969851, "learning_rate": 1.4748670358609048e-05, "loss": 4.1975, "step": 1939 }, { "epoch": 1.2924960439743483, "learning_rate": 1.4724068586258677e-05, "loss": 4.1379, "step": 1940 }, { "epoch": 1.2931623219788457, "learning_rate": 1.4699478782319164e-05, "loss": 4.1714, "step": 1941 }, { "epoch": 1.293828599983343, "learning_rate": 1.4674900975430327e-05, "loss": 4.1684, "step": 1942 }, { "epoch": 1.2944948779878405, "learning_rate": 1.4650335194218016e-05, "loss": 4.1197, "step": 1943 }, { "epoch": 1.2951611559923377, "learning_rate": 1.4625781467294083e-05, "loss": 4.181, "step": 1944 }, { "epoch": 1.2958274339968352, "learning_rate": 1.4601239823256334e-05, "loss": 4.1212, "step": 1945 }, { "epoch": 1.2964937120013325, "learning_rate": 1.4576710290688497e-05, "loss": 4.1922, "step": 1946 }, { "epoch": 1.29715999000583, "learning_rate": 1.455219289816019e-05, "loss": 4.1477, "step": 1947 }, { "epoch": 1.2978262680103274, "learning_rate": 1.4527687674226926e-05, "loss": 4.1951, "step": 1948 }, { "epoch": 1.2984925460148247, "learning_rate": 1.4503194647430007e-05, "loss": 4.1689, "step": 1949 }, { "epoch": 1.2991588240193221, "learning_rate": 1.4478713846296521e-05, "loss": 4.1337, "step": 1950 }, { "epoch": 1.2998251020238194, "learning_rate": 1.4454245299339364e-05, "loss": 4.1907, "step": 1951 }, { "epoch": 1.3004913800283169, "learning_rate": 1.4429789035057124e-05, "loss": 4.1447, "step": 1952 }, { "epoch": 1.3011576580328141, "learning_rate": 1.440534508193408e-05, "loss": 4.1843, "step": 1953 }, { "epoch": 1.3018239360373116, "learning_rate": 1.4380913468440205e-05, "loss": 4.2032, "step": 1954 }, { "epoch": 1.3024902140418089, "learning_rate": 1.4356494223031067e-05, "loss": 4.1684, "step": 1955 }, { "epoch": 1.3031564920463063, "learning_rate": 1.4332087374147843e-05, "loss": 4.1313, "step": 1956 }, { "epoch": 1.3038227700508038, "learning_rate": 1.430769295021727e-05, "loss": 4.162, "step": 1957 }, { "epoch": 1.304489048055301, "learning_rate": 1.428331097965161e-05, "loss": 4.1958, "step": 1958 }, { "epoch": 1.3051553260597983, "learning_rate": 1.4258941490848616e-05, "loss": 4.1261, "step": 1959 }, { "epoch": 1.3058216040642958, "learning_rate": 1.4234584512191532e-05, "loss": 4.1613, "step": 1960 }, { "epoch": 1.3064878820687933, "learning_rate": 1.4210240072048996e-05, "loss": 4.1623, "step": 1961 }, { "epoch": 1.3071541600732905, "learning_rate": 1.4185908198775066e-05, "loss": 4.1174, "step": 1962 }, { "epoch": 1.307820438077788, "learning_rate": 1.4161588920709146e-05, "loss": 4.1569, "step": 1963 }, { "epoch": 1.3084867160822853, "learning_rate": 1.4137282266175977e-05, "loss": 4.1277, "step": 1964 }, { "epoch": 1.3091529940867828, "learning_rate": 1.41129882634856e-05, "loss": 4.1431, "step": 1965 }, { "epoch": 1.30981927209128, "learning_rate": 1.4088706940933327e-05, "loss": 4.2031, "step": 1966 }, { "epoch": 1.3104855500957775, "learning_rate": 1.406443832679969e-05, "loss": 4.1856, "step": 1967 }, { "epoch": 1.3111518281002748, "learning_rate": 1.4040182449350408e-05, "loss": 4.1447, "step": 1968 }, { "epoch": 1.3118181061047722, "learning_rate": 1.401593933683642e-05, "loss": 4.1525, "step": 1969 }, { "epoch": 1.3124843841092697, "learning_rate": 1.399170901749372e-05, "loss": 4.1609, "step": 1970 }, { "epoch": 1.313150662113767, "learning_rate": 1.3967491519543444e-05, "loss": 4.1564, "step": 1971 }, { "epoch": 1.3138169401182642, "learning_rate": 1.3943286871191807e-05, "loss": 4.1747, "step": 1972 }, { "epoch": 1.3144832181227617, "learning_rate": 1.3919095100630037e-05, "loss": 4.1397, "step": 1973 }, { "epoch": 1.3151494961272592, "learning_rate": 1.3894916236034367e-05, "loss": 4.1275, "step": 1974 }, { "epoch": 1.3158157741317564, "learning_rate": 1.3870750305565985e-05, "loss": 4.1765, "step": 1975 }, { "epoch": 1.316482052136254, "learning_rate": 1.3846597337371064e-05, "loss": 4.1429, "step": 1976 }, { "epoch": 1.3171483301407512, "learning_rate": 1.382245735958061e-05, "loss": 4.1918, "step": 1977 }, { "epoch": 1.3178146081452486, "learning_rate": 1.3798330400310539e-05, "loss": 4.1976, "step": 1978 }, { "epoch": 1.318480886149746, "learning_rate": 1.3774216487661618e-05, "loss": 4.1291, "step": 1979 }, { "epoch": 1.3191471641542434, "learning_rate": 1.3750115649719389e-05, "loss": 4.1367, "step": 1980 }, { "epoch": 1.3198134421587406, "learning_rate": 1.3726027914554166e-05, "loss": 4.1315, "step": 1981 }, { "epoch": 1.3204797201632381, "learning_rate": 1.3701953310221033e-05, "loss": 4.1615, "step": 1982 }, { "epoch": 1.3211459981677356, "learning_rate": 1.3677891864759751e-05, "loss": 4.1542, "step": 1983 }, { "epoch": 1.3218122761722328, "learning_rate": 1.365384360619476e-05, "loss": 4.135, "step": 1984 }, { "epoch": 1.32247855417673, "learning_rate": 1.3629808562535154e-05, "loss": 4.1915, "step": 1985 }, { "epoch": 1.3231448321812276, "learning_rate": 1.360578676177462e-05, "loss": 4.1682, "step": 1986 }, { "epoch": 1.323811110185725, "learning_rate": 1.3581778231891418e-05, "loss": 4.1778, "step": 1987 }, { "epoch": 1.3244773881902223, "learning_rate": 1.3557783000848384e-05, "loss": 4.1569, "step": 1988 }, { "epoch": 1.3251436661947198, "learning_rate": 1.353380109659283e-05, "loss": 4.1514, "step": 1989 }, { "epoch": 1.325809944199217, "learning_rate": 1.3509832547056556e-05, "loss": 4.1935, "step": 1990 }, { "epoch": 1.3264762222037145, "learning_rate": 1.3485877380155815e-05, "loss": 4.1495, "step": 1991 }, { "epoch": 1.327142500208212, "learning_rate": 1.3461935623791266e-05, "loss": 4.1464, "step": 1992 }, { "epoch": 1.3278087782127093, "learning_rate": 1.3438007305847939e-05, "loss": 4.1768, "step": 1993 }, { "epoch": 1.3284750562172065, "learning_rate": 1.3414092454195245e-05, "loss": 4.0995, "step": 1994 }, { "epoch": 1.329141334221704, "learning_rate": 1.3390191096686883e-05, "loss": 4.1451, "step": 1995 }, { "epoch": 1.3298076122262015, "learning_rate": 1.3366303261160823e-05, "loss": 4.1967, "step": 1996 }, { "epoch": 1.3304738902306987, "learning_rate": 1.3342428975439347e-05, "loss": 4.1422, "step": 1997 }, { "epoch": 1.3311401682351962, "learning_rate": 1.3318568267328874e-05, "loss": 4.199, "step": 1998 }, { "epoch": 1.3318064462396935, "learning_rate": 1.3294721164620053e-05, "loss": 4.1926, "step": 1999 }, { "epoch": 1.332472724244191, "learning_rate": 1.3270887695087703e-05, "loss": 4.1509, "step": 2000 }, { "epoch": 1.3331390022486882, "learning_rate": 1.3247067886490727e-05, "loss": 4.1693, "step": 2001 }, { "epoch": 1.3338052802531857, "learning_rate": 1.3223261766572143e-05, "loss": 4.1255, "step": 2002 }, { "epoch": 1.334471558257683, "learning_rate": 1.3199469363059002e-05, "loss": 4.1436, "step": 2003 }, { "epoch": 1.3351378362621804, "learning_rate": 1.3175690703662427e-05, "loss": 4.1677, "step": 2004 }, { "epoch": 1.3358041142666779, "learning_rate": 1.3151925816077464e-05, "loss": 4.1928, "step": 2005 }, { "epoch": 1.3364703922711751, "learning_rate": 1.312817472798316e-05, "loss": 4.1515, "step": 2006 }, { "epoch": 1.3371366702756724, "learning_rate": 1.3104437467042507e-05, "loss": 4.1502, "step": 2007 }, { "epoch": 1.3378029482801699, "learning_rate": 1.3080714060902355e-05, "loss": 4.1407, "step": 2008 }, { "epoch": 1.3384692262846674, "learning_rate": 1.3057004537193423e-05, "loss": 4.1516, "step": 2009 }, { "epoch": 1.3391355042891646, "learning_rate": 1.3033308923530296e-05, "loss": 4.1333, "step": 2010 }, { "epoch": 1.339801782293662, "learning_rate": 1.3009627247511313e-05, "loss": 4.1679, "step": 2011 }, { "epoch": 1.3404680602981593, "learning_rate": 1.2985959536718612e-05, "loss": 4.1339, "step": 2012 }, { "epoch": 1.3411343383026568, "learning_rate": 1.296230581871804e-05, "loss": 4.1259, "step": 2013 }, { "epoch": 1.341800616307154, "learning_rate": 1.2938666121059167e-05, "loss": 4.1639, "step": 2014 }, { "epoch": 1.3424668943116516, "learning_rate": 1.2915040471275219e-05, "loss": 4.1583, "step": 2015 }, { "epoch": 1.3431331723161488, "learning_rate": 1.2891428896883079e-05, "loss": 4.1174, "step": 2016 }, { "epoch": 1.3437994503206463, "learning_rate": 1.286783142538322e-05, "loss": 4.1204, "step": 2017 }, { "epoch": 1.3444657283251438, "learning_rate": 1.2844248084259691e-05, "loss": 4.1802, "step": 2018 }, { "epoch": 1.345132006329641, "learning_rate": 1.2820678900980093e-05, "loss": 4.1857, "step": 2019 }, { "epoch": 1.3457982843341383, "learning_rate": 1.2797123902995522e-05, "loss": 4.1878, "step": 2020 }, { "epoch": 1.3464645623386358, "learning_rate": 1.2773583117740555e-05, "loss": 4.145, "step": 2021 }, { "epoch": 1.3471308403431332, "learning_rate": 1.2750056572633246e-05, "loss": 4.1622, "step": 2022 }, { "epoch": 1.3477971183476305, "learning_rate": 1.2726544295075018e-05, "loss": 4.1661, "step": 2023 }, { "epoch": 1.348463396352128, "learning_rate": 1.2703046312450706e-05, "loss": 4.1599, "step": 2024 }, { "epoch": 1.3491296743566252, "learning_rate": 1.2679562652128485e-05, "loss": 4.2027, "step": 2025 }, { "epoch": 1.3497959523611227, "learning_rate": 1.2656093341459852e-05, "loss": 4.0843, "step": 2026 }, { "epoch": 1.3504622303656202, "learning_rate": 1.263263840777958e-05, "loss": 4.1995, "step": 2027 }, { "epoch": 1.3511285083701174, "learning_rate": 1.260919787840572e-05, "loss": 4.157, "step": 2028 }, { "epoch": 1.3517947863746147, "learning_rate": 1.258577178063953e-05, "loss": 4.1624, "step": 2029 }, { "epoch": 1.3524610643791122, "learning_rate": 1.256236014176546e-05, "loss": 4.125, "step": 2030 }, { "epoch": 1.3531273423836097, "learning_rate": 1.2538962989051115e-05, "loss": 4.1289, "step": 2031 }, { "epoch": 1.353793620388107, "learning_rate": 1.251558034974726e-05, "loss": 4.1138, "step": 2032 }, { "epoch": 1.3544598983926044, "learning_rate": 1.2492212251087706e-05, "loss": 4.1221, "step": 2033 }, { "epoch": 1.3551261763971016, "learning_rate": 1.2468858720289353e-05, "loss": 4.1894, "step": 2034 }, { "epoch": 1.3557924544015991, "learning_rate": 1.2445519784552153e-05, "loss": 4.1257, "step": 2035 }, { "epoch": 1.3564587324060964, "learning_rate": 1.2422195471059031e-05, "loss": 4.1946, "step": 2036 }, { "epoch": 1.3571250104105939, "learning_rate": 1.2398885806975883e-05, "loss": 4.1905, "step": 2037 }, { "epoch": 1.3577912884150911, "learning_rate": 1.2375590819451566e-05, "loss": 4.1312, "step": 2038 }, { "epoch": 1.3584575664195886, "learning_rate": 1.2352310535617823e-05, "loss": 4.1403, "step": 2039 }, { "epoch": 1.359123844424086, "learning_rate": 1.2329044982589275e-05, "loss": 4.1388, "step": 2040 }, { "epoch": 1.3597901224285833, "learning_rate": 1.2305794187463384e-05, "loss": 4.1412, "step": 2041 }, { "epoch": 1.3604564004330806, "learning_rate": 1.2282558177320434e-05, "loss": 4.2034, "step": 2042 }, { "epoch": 1.361122678437578, "learning_rate": 1.2259336979223465e-05, "loss": 4.1321, "step": 2043 }, { "epoch": 1.3617889564420755, "learning_rate": 1.2236130620218305e-05, "loss": 4.0973, "step": 2044 }, { "epoch": 1.3624552344465728, "learning_rate": 1.221293912733347e-05, "loss": 4.1514, "step": 2045 }, { "epoch": 1.3631215124510703, "learning_rate": 1.2189762527580159e-05, "loss": 4.2265, "step": 2046 }, { "epoch": 1.3637877904555675, "learning_rate": 1.2166600847952242e-05, "loss": 4.1987, "step": 2047 }, { "epoch": 1.364454068460065, "learning_rate": 1.2143454115426197e-05, "loss": 4.1546, "step": 2048 }, { "epoch": 1.3651203464645623, "learning_rate": 1.2120322356961092e-05, "loss": 4.1229, "step": 2049 }, { "epoch": 1.3657866244690597, "learning_rate": 1.2097205599498578e-05, "loss": 4.198, "step": 2050 }, { "epoch": 1.366452902473557, "learning_rate": 1.2074103869962814e-05, "loss": 4.1491, "step": 2051 }, { "epoch": 1.3671191804780545, "learning_rate": 1.2051017195260453e-05, "loss": 4.1647, "step": 2052 }, { "epoch": 1.367785458482552, "learning_rate": 1.2027945602280624e-05, "loss": 4.2034, "step": 2053 }, { "epoch": 1.3684517364870492, "learning_rate": 1.2004889117894885e-05, "loss": 4.1265, "step": 2054 }, { "epoch": 1.3691180144915465, "learning_rate": 1.1981847768957192e-05, "loss": 4.176, "step": 2055 }, { "epoch": 1.369784292496044, "learning_rate": 1.1958821582303898e-05, "loss": 4.1558, "step": 2056 }, { "epoch": 1.3704505705005414, "learning_rate": 1.1935810584753662e-05, "loss": 4.1591, "step": 2057 }, { "epoch": 1.3711168485050387, "learning_rate": 1.1912814803107474e-05, "loss": 4.1143, "step": 2058 }, { "epoch": 1.3717831265095362, "learning_rate": 1.1889834264148589e-05, "loss": 4.1486, "step": 2059 }, { "epoch": 1.3724494045140334, "learning_rate": 1.1866868994642535e-05, "loss": 4.1338, "step": 2060 }, { "epoch": 1.3731156825185309, "learning_rate": 1.1843919021337016e-05, "loss": 4.12, "step": 2061 }, { "epoch": 1.3737819605230284, "learning_rate": 1.1820984370961937e-05, "loss": 4.1511, "step": 2062 }, { "epoch": 1.3744482385275256, "learning_rate": 1.1798065070229383e-05, "loss": 4.136, "step": 2063 }, { "epoch": 1.3751145165320229, "learning_rate": 1.1775161145833524e-05, "loss": 4.1585, "step": 2064 }, { "epoch": 1.3757807945365204, "learning_rate": 1.1752272624450628e-05, "loss": 4.2002, "step": 2065 }, { "epoch": 1.3764470725410178, "learning_rate": 1.1729399532739047e-05, "loss": 4.1449, "step": 2066 }, { "epoch": 1.377113350545515, "learning_rate": 1.1706541897339151e-05, "loss": 4.152, "step": 2067 }, { "epoch": 1.3777796285500126, "learning_rate": 1.168369974487327e-05, "loss": 4.1659, "step": 2068 }, { "epoch": 1.3784459065545098, "learning_rate": 1.1660873101945763e-05, "loss": 4.1042, "step": 2069 }, { "epoch": 1.3791121845590073, "learning_rate": 1.1638061995142888e-05, "loss": 4.1899, "step": 2070 }, { "epoch": 1.3797784625635046, "learning_rate": 1.16152664510328e-05, "loss": 4.1686, "step": 2071 }, { "epoch": 1.380444740568002, "learning_rate": 1.159248649616557e-05, "loss": 4.1565, "step": 2072 }, { "epoch": 1.3811110185724993, "learning_rate": 1.1569722157073074e-05, "loss": 4.1974, "step": 2073 }, { "epoch": 1.3817772965769968, "learning_rate": 1.1546973460269009e-05, "loss": 4.1308, "step": 2074 }, { "epoch": 1.3824435745814942, "learning_rate": 1.1524240432248858e-05, "loss": 4.1347, "step": 2075 }, { "epoch": 1.3831098525859915, "learning_rate": 1.1501523099489855e-05, "loss": 4.1657, "step": 2076 }, { "epoch": 1.3837761305904888, "learning_rate": 1.147882148845094e-05, "loss": 4.1685, "step": 2077 }, { "epoch": 1.3844424085949862, "learning_rate": 1.1456135625572772e-05, "loss": 4.1662, "step": 2078 }, { "epoch": 1.3851086865994837, "learning_rate": 1.1433465537277641e-05, "loss": 4.1231, "step": 2079 }, { "epoch": 1.385774964603981, "learning_rate": 1.1410811249969475e-05, "loss": 4.1173, "step": 2080 }, { "epoch": 1.3864412426084785, "learning_rate": 1.138817279003379e-05, "loss": 4.1305, "step": 2081 }, { "epoch": 1.3871075206129757, "learning_rate": 1.1365550183837685e-05, "loss": 4.1332, "step": 2082 }, { "epoch": 1.3877737986174732, "learning_rate": 1.1342943457729763e-05, "loss": 4.1684, "step": 2083 }, { "epoch": 1.3884400766219704, "learning_rate": 1.1320352638040174e-05, "loss": 4.1294, "step": 2084 }, { "epoch": 1.389106354626468, "learning_rate": 1.1297777751080512e-05, "loss": 4.145, "step": 2085 }, { "epoch": 1.3897726326309652, "learning_rate": 1.1275218823143819e-05, "loss": 4.1549, "step": 2086 }, { "epoch": 1.3904389106354627, "learning_rate": 1.1252675880504553e-05, "loss": 4.1602, "step": 2087 }, { "epoch": 1.3911051886399601, "learning_rate": 1.1230148949418557e-05, "loss": 4.1491, "step": 2088 }, { "epoch": 1.3917714666444574, "learning_rate": 1.1207638056123012e-05, "loss": 4.1736, "step": 2089 }, { "epoch": 1.3924377446489546, "learning_rate": 1.1185143226836428e-05, "loss": 4.1685, "step": 2090 }, { "epoch": 1.3931040226534521, "learning_rate": 1.1162664487758623e-05, "loss": 4.1321, "step": 2091 }, { "epoch": 1.3937703006579496, "learning_rate": 1.1140201865070643e-05, "loss": 4.1302, "step": 2092 }, { "epoch": 1.3944365786624469, "learning_rate": 1.1117755384934774e-05, "loss": 4.1064, "step": 2093 }, { "epoch": 1.3951028566669443, "learning_rate": 1.1095325073494522e-05, "loss": 4.1933, "step": 2094 }, { "epoch": 1.3957691346714416, "learning_rate": 1.1072910956874544e-05, "loss": 4.1408, "step": 2095 }, { "epoch": 1.396435412675939, "learning_rate": 1.1050513061180606e-05, "loss": 4.1576, "step": 2096 }, { "epoch": 1.3971016906804365, "learning_rate": 1.102813141249964e-05, "loss": 4.1404, "step": 2097 }, { "epoch": 1.3977679686849338, "learning_rate": 1.1005766036899614e-05, "loss": 4.1406, "step": 2098 }, { "epoch": 1.398434246689431, "learning_rate": 1.0983416960429547e-05, "loss": 4.2539, "step": 2099 }, { "epoch": 1.3991005246939285, "learning_rate": 1.0961084209119496e-05, "loss": 4.1896, "step": 2100 }, { "epoch": 1.399766802698426, "learning_rate": 1.0938767808980486e-05, "loss": 4.1376, "step": 2101 }, { "epoch": 1.4004330807029233, "learning_rate": 1.0916467786004492e-05, "loss": 4.1533, "step": 2102 }, { "epoch": 1.4010993587074208, "learning_rate": 1.0894184166164435e-05, "loss": 4.1597, "step": 2103 }, { "epoch": 1.401765636711918, "learning_rate": 1.087191697541411e-05, "loss": 4.1626, "step": 2104 }, { "epoch": 1.4024319147164155, "learning_rate": 1.084966623968818e-05, "loss": 4.1129, "step": 2105 }, { "epoch": 1.4030981927209127, "learning_rate": 1.082743198490217e-05, "loss": 4.1908, "step": 2106 }, { "epoch": 1.4037644707254102, "learning_rate": 1.080521423695238e-05, "loss": 4.1267, "step": 2107 }, { "epoch": 1.4044307487299075, "learning_rate": 1.0783013021715892e-05, "loss": 4.1645, "step": 2108 }, { "epoch": 1.405097026734405, "learning_rate": 1.0760828365050535e-05, "loss": 4.1551, "step": 2109 }, { "epoch": 1.4057633047389024, "learning_rate": 1.073866029279485e-05, "loss": 4.1136, "step": 2110 }, { "epoch": 1.4064295827433997, "learning_rate": 1.0716508830768065e-05, "loss": 4.1962, "step": 2111 }, { "epoch": 1.407095860747897, "learning_rate": 1.0694374004770047e-05, "loss": 4.1463, "step": 2112 }, { "epoch": 1.4077621387523944, "learning_rate": 1.0672255840581324e-05, "loss": 4.1154, "step": 2113 }, { "epoch": 1.408428416756892, "learning_rate": 1.065015436396298e-05, "loss": 4.137, "step": 2114 }, { "epoch": 1.4090946947613892, "learning_rate": 1.0628069600656678e-05, "loss": 4.1301, "step": 2115 }, { "epoch": 1.4097609727658866, "learning_rate": 1.0606001576384617e-05, "loss": 4.1813, "step": 2116 }, { "epoch": 1.410427250770384, "learning_rate": 1.0583950316849491e-05, "loss": 4.1407, "step": 2117 }, { "epoch": 1.4110935287748814, "learning_rate": 1.056191584773447e-05, "loss": 4.1954, "step": 2118 }, { "epoch": 1.4117598067793786, "learning_rate": 1.0539898194703188e-05, "loss": 4.1612, "step": 2119 }, { "epoch": 1.412426084783876, "learning_rate": 1.0517897383399672e-05, "loss": 4.1458, "step": 2120 }, { "epoch": 1.4130923627883734, "learning_rate": 1.0495913439448324e-05, "loss": 4.1742, "step": 2121 }, { "epoch": 1.4137586407928708, "learning_rate": 1.0473946388453933e-05, "loss": 4.1383, "step": 2122 }, { "epoch": 1.4144249187973683, "learning_rate": 1.0451996256001603e-05, "loss": 4.175, "step": 2123 }, { "epoch": 1.4150911968018656, "learning_rate": 1.043006306765669e-05, "loss": 4.1684, "step": 2124 }, { "epoch": 1.4157574748063628, "learning_rate": 1.040814684896488e-05, "loss": 4.1351, "step": 2125 }, { "epoch": 1.4164237528108603, "learning_rate": 1.0386247625452056e-05, "loss": 4.1072, "step": 2126 }, { "epoch": 1.4170900308153578, "learning_rate": 1.0364365422624305e-05, "loss": 4.1495, "step": 2127 }, { "epoch": 1.417756308819855, "learning_rate": 1.034250026596792e-05, "loss": 4.1337, "step": 2128 }, { "epoch": 1.4184225868243525, "learning_rate": 1.0320652180949305e-05, "loss": 4.2267, "step": 2129 }, { "epoch": 1.4190888648288498, "learning_rate": 1.0298821193015005e-05, "loss": 4.1527, "step": 2130 }, { "epoch": 1.4197551428333473, "learning_rate": 1.0277007327591636e-05, "loss": 4.1185, "step": 2131 }, { "epoch": 1.4204214208378447, "learning_rate": 1.0255210610085882e-05, "loss": 4.1308, "step": 2132 }, { "epoch": 1.421087698842342, "learning_rate": 1.0233431065884441e-05, "loss": 4.147, "step": 2133 }, { "epoch": 1.4217539768468392, "learning_rate": 1.0211668720354037e-05, "loss": 4.1293, "step": 2134 }, { "epoch": 1.4224202548513367, "learning_rate": 1.0189923598841333e-05, "loss": 4.1794, "step": 2135 }, { "epoch": 1.4230865328558342, "learning_rate": 1.016819572667295e-05, "loss": 4.1518, "step": 2136 }, { "epoch": 1.4237528108603315, "learning_rate": 1.0146485129155405e-05, "loss": 4.1413, "step": 2137 }, { "epoch": 1.424419088864829, "learning_rate": 1.0124791831575103e-05, "loss": 4.1157, "step": 2138 }, { "epoch": 1.4250853668693262, "learning_rate": 1.0103115859198303e-05, "loss": 4.1394, "step": 2139 }, { "epoch": 1.4257516448738237, "learning_rate": 1.0081457237271066e-05, "loss": 4.1324, "step": 2140 }, { "epoch": 1.426417922878321, "learning_rate": 1.0059815991019281e-05, "loss": 4.1256, "step": 2141 }, { "epoch": 1.4270842008828184, "learning_rate": 1.0038192145648567e-05, "loss": 4.1035, "step": 2142 }, { "epoch": 1.4277504788873157, "learning_rate": 1.001658572634429e-05, "loss": 4.1484, "step": 2143 }, { "epoch": 1.4284167568918131, "learning_rate": 9.994996758271517e-06, "loss": 4.1673, "step": 2144 }, { "epoch": 1.4290830348963106, "learning_rate": 9.973425266574984e-06, "loss": 4.1534, "step": 2145 }, { "epoch": 1.4297493129008079, "learning_rate": 9.951871276379076e-06, "loss": 4.1291, "step": 2146 }, { "epoch": 1.4304155909053051, "learning_rate": 9.930334812787812e-06, "loss": 4.1362, "step": 2147 }, { "epoch": 1.4310818689098026, "learning_rate": 9.908815900884766e-06, "loss": 4.1451, "step": 2148 }, { "epoch": 1.4317481469143, "learning_rate": 9.887314565733086e-06, "loss": 4.1402, "step": 2149 }, { "epoch": 1.4324144249187973, "learning_rate": 9.865830832375467e-06, "loss": 4.143, "step": 2150 }, { "epoch": 1.4330807029232948, "learning_rate": 9.844364725834057e-06, "loss": 4.098, "step": 2151 }, { "epoch": 1.433746980927792, "learning_rate": 9.822916271110505e-06, "loss": 4.1457, "step": 2152 }, { "epoch": 1.4344132589322895, "learning_rate": 9.801485493185908e-06, "loss": 4.1718, "step": 2153 }, { "epoch": 1.4350795369367868, "learning_rate": 9.78007241702076e-06, "loss": 4.12, "step": 2154 }, { "epoch": 1.4357458149412843, "learning_rate": 9.758677067554927e-06, "loss": 4.1273, "step": 2155 }, { "epoch": 1.4364120929457815, "learning_rate": 9.737299469707663e-06, "loss": 4.1247, "step": 2156 }, { "epoch": 1.437078370950279, "learning_rate": 9.715939648377517e-06, "loss": 4.1979, "step": 2157 }, { "epoch": 1.4377446489547765, "learning_rate": 9.69459762844234e-06, "loss": 4.0758, "step": 2158 }, { "epoch": 1.4384109269592738, "learning_rate": 9.673273434759256e-06, "loss": 4.1577, "step": 2159 }, { "epoch": 1.439077204963771, "learning_rate": 9.651967092164618e-06, "loss": 4.0743, "step": 2160 }, { "epoch": 1.4397434829682685, "learning_rate": 9.630678625473988e-06, "loss": 4.1485, "step": 2161 }, { "epoch": 1.440409760972766, "learning_rate": 9.60940805948213e-06, "loss": 4.1655, "step": 2162 }, { "epoch": 1.4410760389772632, "learning_rate": 9.588155418962932e-06, "loss": 4.1785, "step": 2163 }, { "epoch": 1.4417423169817607, "learning_rate": 9.566920728669415e-06, "loss": 4.124, "step": 2164 }, { "epoch": 1.442408594986258, "learning_rate": 9.54570401333369e-06, "loss": 4.1186, "step": 2165 }, { "epoch": 1.4430748729907554, "learning_rate": 9.524505297666933e-06, "loss": 4.1733, "step": 2166 }, { "epoch": 1.4437411509952527, "learning_rate": 9.503324606359362e-06, "loss": 4.1, "step": 2167 }, { "epoch": 1.4444074289997502, "learning_rate": 9.482161964080185e-06, "loss": 4.1322, "step": 2168 }, { "epoch": 1.4450737070042474, "learning_rate": 9.46101739547762e-06, "loss": 4.1682, "step": 2169 }, { "epoch": 1.445739985008745, "learning_rate": 9.439890925178808e-06, "loss": 4.1553, "step": 2170 }, { "epoch": 1.4464062630132424, "learning_rate": 9.418782577789811e-06, "loss": 4.2035, "step": 2171 }, { "epoch": 1.4470725410177396, "learning_rate": 9.397692377895597e-06, "loss": 4.1604, "step": 2172 }, { "epoch": 1.4477388190222371, "learning_rate": 9.37662035005999e-06, "loss": 4.1905, "step": 2173 }, { "epoch": 1.4484050970267344, "learning_rate": 9.355566518825635e-06, "loss": 4.1725, "step": 2174 }, { "epoch": 1.4490713750312318, "learning_rate": 9.334530908714023e-06, "loss": 4.1524, "step": 2175 }, { "epoch": 1.449737653035729, "learning_rate": 9.313513544225383e-06, "loss": 4.1319, "step": 2176 }, { "epoch": 1.4504039310402266, "learning_rate": 9.292514449838705e-06, "loss": 4.1615, "step": 2177 }, { "epoch": 1.4510702090447238, "learning_rate": 9.271533650011721e-06, "loss": 4.1528, "step": 2178 }, { "epoch": 1.4517364870492213, "learning_rate": 9.25057116918082e-06, "loss": 4.1682, "step": 2179 }, { "epoch": 1.4524027650537188, "learning_rate": 9.229627031761065e-06, "loss": 4.1663, "step": 2180 }, { "epoch": 1.453069043058216, "learning_rate": 9.208701262146182e-06, "loss": 4.1267, "step": 2181 }, { "epoch": 1.4537353210627133, "learning_rate": 9.187793884708473e-06, "loss": 4.1966, "step": 2182 }, { "epoch": 1.4544015990672108, "learning_rate": 9.166904923798821e-06, "loss": 4.1945, "step": 2183 }, { "epoch": 1.4550678770717083, "learning_rate": 9.146034403746687e-06, "loss": 4.1776, "step": 2184 }, { "epoch": 1.4557341550762055, "learning_rate": 9.125182348860017e-06, "loss": 4.1475, "step": 2185 }, { "epoch": 1.456400433080703, "learning_rate": 9.104348783425276e-06, "loss": 4.1838, "step": 2186 }, { "epoch": 1.4570667110852003, "learning_rate": 9.083533731707381e-06, "loss": 4.1835, "step": 2187 }, { "epoch": 1.4577329890896977, "learning_rate": 9.06273721794969e-06, "loss": 4.19, "step": 2188 }, { "epoch": 1.458399267094195, "learning_rate": 9.041959266373964e-06, "loss": 4.1828, "step": 2189 }, { "epoch": 1.4590655450986925, "learning_rate": 9.021199901180369e-06, "loss": 4.1207, "step": 2190 }, { "epoch": 1.4597318231031897, "learning_rate": 9.000459146547397e-06, "loss": 4.1708, "step": 2191 }, { "epoch": 1.4603981011076872, "learning_rate": 8.979737026631869e-06, "loss": 4.1672, "step": 2192 }, { "epoch": 1.4610643791121847, "learning_rate": 8.959033565568909e-06, "loss": 4.1615, "step": 2193 }, { "epoch": 1.461730657116682, "learning_rate": 8.938348787471903e-06, "loss": 4.1389, "step": 2194 }, { "epoch": 1.4623969351211792, "learning_rate": 8.917682716432483e-06, "loss": 4.1457, "step": 2195 }, { "epoch": 1.4630632131256767, "learning_rate": 8.897035376520477e-06, "loss": 4.1819, "step": 2196 }, { "epoch": 1.4637294911301741, "learning_rate": 8.876406791783929e-06, "loss": 4.1521, "step": 2197 }, { "epoch": 1.4643957691346714, "learning_rate": 8.85579698624901e-06, "loss": 4.1173, "step": 2198 }, { "epoch": 1.4650620471391689, "learning_rate": 8.835205983920026e-06, "loss": 4.1563, "step": 2199 }, { "epoch": 1.4657283251436661, "learning_rate": 8.814633808779388e-06, "loss": 4.1305, "step": 2200 }, { "epoch": 1.4663946031481636, "learning_rate": 8.79408048478757e-06, "loss": 4.1131, "step": 2201 }, { "epoch": 1.4670608811526609, "learning_rate": 8.773546035883093e-06, "loss": 4.1643, "step": 2202 }, { "epoch": 1.4677271591571583, "learning_rate": 8.753030485982514e-06, "loss": 4.1386, "step": 2203 }, { "epoch": 1.4683934371616556, "learning_rate": 8.732533858980347e-06, "loss": 4.1364, "step": 2204 }, { "epoch": 1.469059715166153, "learning_rate": 8.712056178749074e-06, "loss": 4.1571, "step": 2205 }, { "epoch": 1.4697259931706506, "learning_rate": 8.69159746913914e-06, "loss": 4.1739, "step": 2206 }, { "epoch": 1.4703922711751478, "learning_rate": 8.671157753978851e-06, "loss": 4.1901, "step": 2207 }, { "epoch": 1.471058549179645, "learning_rate": 8.650737057074404e-06, "loss": 4.1681, "step": 2208 }, { "epoch": 1.4717248271841425, "learning_rate": 8.630335402209872e-06, "loss": 4.1922, "step": 2209 }, { "epoch": 1.47239110518864, "learning_rate": 8.609952813147117e-06, "loss": 4.1674, "step": 2210 }, { "epoch": 1.4730573831931373, "learning_rate": 8.589589313625804e-06, "loss": 4.1848, "step": 2211 }, { "epoch": 1.4737236611976348, "learning_rate": 8.56924492736338e-06, "loss": 4.1224, "step": 2212 }, { "epoch": 1.474389939202132, "learning_rate": 8.548919678055015e-06, "loss": 4.1696, "step": 2213 }, { "epoch": 1.4750562172066295, "learning_rate": 8.528613589373577e-06, "loss": 4.1516, "step": 2214 }, { "epoch": 1.475722495211127, "learning_rate": 8.50832668496965e-06, "loss": 4.1925, "step": 2215 }, { "epoch": 1.4763887732156242, "learning_rate": 8.488058988471457e-06, "loss": 4.1135, "step": 2216 }, { "epoch": 1.4770550512201215, "learning_rate": 8.467810523484835e-06, "loss": 4.1902, "step": 2217 }, { "epoch": 1.477721329224619, "learning_rate": 8.447581313593259e-06, "loss": 4.1559, "step": 2218 }, { "epoch": 1.4783876072291164, "learning_rate": 8.427371382357744e-06, "loss": 4.1414, "step": 2219 }, { "epoch": 1.4790538852336137, "learning_rate": 8.407180753316865e-06, "loss": 4.185, "step": 2220 }, { "epoch": 1.4797201632381112, "learning_rate": 8.387009449986713e-06, "loss": 4.1735, "step": 2221 }, { "epoch": 1.4803864412426084, "learning_rate": 8.36685749586087e-06, "loss": 4.1466, "step": 2222 }, { "epoch": 1.481052719247106, "learning_rate": 8.346724914410385e-06, "loss": 4.1443, "step": 2223 }, { "epoch": 1.4817189972516032, "learning_rate": 8.32661172908373e-06, "loss": 4.0921, "step": 2224 }, { "epoch": 1.4823852752561006, "learning_rate": 8.306517963306817e-06, "loss": 4.1475, "step": 2225 }, { "epoch": 1.483051553260598, "learning_rate": 8.286443640482911e-06, "loss": 4.1613, "step": 2226 }, { "epoch": 1.4837178312650954, "learning_rate": 8.26638878399264e-06, "loss": 4.1867, "step": 2227 }, { "epoch": 1.4843841092695929, "learning_rate": 8.246353417193961e-06, "loss": 4.1495, "step": 2228 }, { "epoch": 1.4850503872740901, "learning_rate": 8.226337563422134e-06, "loss": 4.1919, "step": 2229 }, { "epoch": 1.4857166652785874, "learning_rate": 8.20634124598968e-06, "loss": 4.2219, "step": 2230 }, { "epoch": 1.4863829432830848, "learning_rate": 8.18636448818639e-06, "loss": 4.1121, "step": 2231 }, { "epoch": 1.4870492212875823, "learning_rate": 8.16640731327925e-06, "loss": 4.1118, "step": 2232 }, { "epoch": 1.4877154992920796, "learning_rate": 8.14646974451245e-06, "loss": 4.1217, "step": 2233 }, { "epoch": 1.488381777296577, "learning_rate": 8.126551805107341e-06, "loss": 4.1626, "step": 2234 }, { "epoch": 1.4890480553010743, "learning_rate": 8.106653518262407e-06, "loss": 4.209, "step": 2235 }, { "epoch": 1.4897143333055718, "learning_rate": 8.086774907153246e-06, "loss": 4.2018, "step": 2236 }, { "epoch": 1.490380611310069, "learning_rate": 8.066915994932554e-06, "loss": 4.17, "step": 2237 }, { "epoch": 1.4910468893145665, "learning_rate": 8.047076804730064e-06, "loss": 4.1392, "step": 2238 }, { "epoch": 1.4917131673190638, "learning_rate": 8.027257359652535e-06, "loss": 4.1085, "step": 2239 }, { "epoch": 1.4923794453235613, "learning_rate": 8.007457682783758e-06, "loss": 4.1508, "step": 2240 }, { "epoch": 1.4930457233280587, "learning_rate": 7.987677797184484e-06, "loss": 4.1115, "step": 2241 }, { "epoch": 1.493712001332556, "learning_rate": 7.967917725892379e-06, "loss": 4.1269, "step": 2242 }, { "epoch": 1.4943782793370533, "learning_rate": 7.948177491922094e-06, "loss": 4.1366, "step": 2243 }, { "epoch": 1.4950445573415507, "learning_rate": 7.928457118265128e-06, "loss": 4.1918, "step": 2244 }, { "epoch": 1.4957108353460482, "learning_rate": 7.908756627889863e-06, "loss": 4.1268, "step": 2245 }, { "epoch": 1.4963771133505455, "learning_rate": 7.889076043741538e-06, "loss": 4.1517, "step": 2246 }, { "epoch": 1.497043391355043, "learning_rate": 7.869415388742187e-06, "loss": 4.1581, "step": 2247 }, { "epoch": 1.4977096693595402, "learning_rate": 7.84977468579064e-06, "loss": 4.1811, "step": 2248 }, { "epoch": 1.4983759473640377, "learning_rate": 7.830153957762481e-06, "loss": 4.1389, "step": 2249 }, { "epoch": 1.4990422253685352, "learning_rate": 7.810553227510045e-06, "loss": 4.1065, "step": 2250 }, { "epoch": 1.4997085033730324, "learning_rate": 7.79097251786236e-06, "loss": 4.1547, "step": 2251 }, { "epoch": 1.5003747813775297, "learning_rate": 7.771411851625138e-06, "loss": 4.176, "step": 2252 }, { "epoch": 1.5010410593820271, "learning_rate": 7.751871251580764e-06, "loss": 4.1274, "step": 2253 }, { "epoch": 1.5017073373865246, "learning_rate": 7.732350740488234e-06, "loss": 4.1943, "step": 2254 }, { "epoch": 1.5023736153910219, "learning_rate": 7.71285034108315e-06, "loss": 4.1544, "step": 2255 }, { "epoch": 1.5030398933955191, "learning_rate": 7.693370076077688e-06, "loss": 4.1835, "step": 2256 }, { "epoch": 1.5037061714000166, "learning_rate": 7.673909968160579e-06, "loss": 4.1955, "step": 2257 }, { "epoch": 1.504372449404514, "learning_rate": 7.654470039997064e-06, "loss": 4.1487, "step": 2258 }, { "epoch": 1.5050387274090116, "learning_rate": 7.635050314228909e-06, "loss": 4.1666, "step": 2259 }, { "epoch": 1.5057050054135088, "learning_rate": 7.615650813474323e-06, "loss": 4.1405, "step": 2260 }, { "epoch": 1.506371283418006, "learning_rate": 7.596271560327967e-06, "loss": 4.1522, "step": 2261 }, { "epoch": 1.5070375614225036, "learning_rate": 7.576912577360923e-06, "loss": 4.0985, "step": 2262 }, { "epoch": 1.507703839427001, "learning_rate": 7.557573887120662e-06, "loss": 4.0924, "step": 2263 }, { "epoch": 1.5083701174314983, "learning_rate": 7.538255512131007e-06, "loss": 4.1306, "step": 2264 }, { "epoch": 1.5090363954359955, "learning_rate": 7.518957474892149e-06, "loss": 4.1809, "step": 2265 }, { "epoch": 1.509702673440493, "learning_rate": 7.499679797880571e-06, "loss": 4.1218, "step": 2266 }, { "epoch": 1.5103689514449905, "learning_rate": 7.480422503549037e-06, "loss": 4.1858, "step": 2267 }, { "epoch": 1.5110352294494878, "learning_rate": 7.461185614326596e-06, "loss": 4.1495, "step": 2268 }, { "epoch": 1.511701507453985, "learning_rate": 7.441969152618516e-06, "loss": 4.1415, "step": 2269 }, { "epoch": 1.5123677854584825, "learning_rate": 7.4227731408062465e-06, "loss": 4.1505, "step": 2270 }, { "epoch": 1.51303406346298, "learning_rate": 7.403597601247472e-06, "loss": 4.1147, "step": 2271 }, { "epoch": 1.5137003414674775, "learning_rate": 7.384442556275997e-06, "loss": 4.1378, "step": 2272 }, { "epoch": 1.5143666194719747, "learning_rate": 7.365308028201756e-06, "loss": 4.1571, "step": 2273 }, { "epoch": 1.515032897476472, "learning_rate": 7.346194039310814e-06, "loss": 4.0884, "step": 2274 }, { "epoch": 1.5156991754809694, "learning_rate": 7.327100611865284e-06, "loss": 4.1996, "step": 2275 }, { "epoch": 1.516365453485467, "learning_rate": 7.308027768103357e-06, "loss": 4.1597, "step": 2276 }, { "epoch": 1.5170317314899642, "learning_rate": 7.288975530239211e-06, "loss": 4.1587, "step": 2277 }, { "epoch": 1.5176980094944614, "learning_rate": 7.269943920463071e-06, "loss": 4.128, "step": 2278 }, { "epoch": 1.518364287498959, "learning_rate": 7.250932960941109e-06, "loss": 4.1729, "step": 2279 }, { "epoch": 1.5190305655034564, "learning_rate": 7.231942673815442e-06, "loss": 4.0899, "step": 2280 }, { "epoch": 1.5196968435079536, "learning_rate": 7.212973081204136e-06, "loss": 4.126, "step": 2281 }, { "epoch": 1.5203631215124511, "learning_rate": 7.194024205201133e-06, "loss": 4.1381, "step": 2282 }, { "epoch": 1.5210293995169484, "learning_rate": 7.175096067876244e-06, "loss": 4.1703, "step": 2283 }, { "epoch": 1.5216956775214459, "learning_rate": 7.15618869127514e-06, "loss": 4.1583, "step": 2284 }, { "epoch": 1.5223619555259433, "learning_rate": 7.137302097419296e-06, "loss": 4.1311, "step": 2285 }, { "epoch": 1.5230282335304406, "learning_rate": 7.118436308305987e-06, "loss": 4.1378, "step": 2286 }, { "epoch": 1.5236945115349378, "learning_rate": 7.099591345908274e-06, "loss": 4.1566, "step": 2287 }, { "epoch": 1.5243607895394353, "learning_rate": 7.08076723217494e-06, "loss": 4.1793, "step": 2288 }, { "epoch": 1.5250270675439328, "learning_rate": 7.061963989030487e-06, "loss": 4.1565, "step": 2289 }, { "epoch": 1.52569334554843, "learning_rate": 7.043181638375118e-06, "loss": 4.1525, "step": 2290 }, { "epoch": 1.5263596235529273, "learning_rate": 7.024420202084694e-06, "loss": 4.1882, "step": 2291 }, { "epoch": 1.5270259015574248, "learning_rate": 7.00567970201072e-06, "loss": 4.2149, "step": 2292 }, { "epoch": 1.5276921795619223, "learning_rate": 6.986960159980327e-06, "loss": 4.1334, "step": 2293 }, { "epoch": 1.5283584575664197, "learning_rate": 6.968261597796219e-06, "loss": 4.1355, "step": 2294 }, { "epoch": 1.529024735570917, "learning_rate": 6.949584037236667e-06, "loss": 4.1586, "step": 2295 }, { "epoch": 1.5296910135754143, "learning_rate": 6.930927500055504e-06, "loss": 4.1751, "step": 2296 }, { "epoch": 1.5303572915799117, "learning_rate": 6.9122920079820544e-06, "loss": 4.1157, "step": 2297 }, { "epoch": 1.5310235695844092, "learning_rate": 6.89367758272112e-06, "loss": 4.182, "step": 2298 }, { "epoch": 1.5316898475889065, "learning_rate": 6.875084245953001e-06, "loss": 4.1482, "step": 2299 }, { "epoch": 1.5323561255934037, "learning_rate": 6.856512019333411e-06, "loss": 4.1714, "step": 2300 }, { "epoch": 1.5330224035979012, "learning_rate": 6.837960924493473e-06, "loss": 4.158, "step": 2301 }, { "epoch": 1.5336886816023987, "learning_rate": 6.819430983039726e-06, "loss": 4.163, "step": 2302 }, { "epoch": 1.534354959606896, "learning_rate": 6.800922216554048e-06, "loss": 4.1953, "step": 2303 }, { "epoch": 1.5350212376113932, "learning_rate": 6.78243464659366e-06, "loss": 4.1075, "step": 2304 }, { "epoch": 1.5356875156158907, "learning_rate": 6.763968294691081e-06, "loss": 4.0955, "step": 2305 }, { "epoch": 1.5363537936203882, "learning_rate": 6.745523182354147e-06, "loss": 4.2111, "step": 2306 }, { "epoch": 1.5370200716248856, "learning_rate": 6.727099331065936e-06, "loss": 4.1404, "step": 2307 }, { "epoch": 1.5376863496293829, "learning_rate": 6.70869676228476e-06, "loss": 4.1254, "step": 2308 }, { "epoch": 1.5383526276338801, "learning_rate": 6.690315497444166e-06, "loss": 4.1657, "step": 2309 }, { "epoch": 1.5390189056383776, "learning_rate": 6.671955557952867e-06, "loss": 4.1255, "step": 2310 }, { "epoch": 1.539685183642875, "learning_rate": 6.653616965194739e-06, "loss": 4.1442, "step": 2311 }, { "epoch": 1.5403514616473724, "learning_rate": 6.635299740528808e-06, "loss": 4.1948, "step": 2312 }, { "epoch": 1.5410177396518696, "learning_rate": 6.617003905289199e-06, "loss": 4.2138, "step": 2313 }, { "epoch": 1.541684017656367, "learning_rate": 6.5987294807851295e-06, "loss": 4.1869, "step": 2314 }, { "epoch": 1.5423502956608646, "learning_rate": 6.580476488300891e-06, "loss": 4.1885, "step": 2315 }, { "epoch": 1.5430165736653618, "learning_rate": 6.5622449490958e-06, "loss": 4.1158, "step": 2316 }, { "epoch": 1.5436828516698593, "learning_rate": 6.5440348844041875e-06, "loss": 4.1476, "step": 2317 }, { "epoch": 1.5443491296743566, "learning_rate": 6.525846315435375e-06, "loss": 4.1366, "step": 2318 }, { "epoch": 1.545015407678854, "learning_rate": 6.507679263373648e-06, "loss": 4.1416, "step": 2319 }, { "epoch": 1.5456816856833515, "learning_rate": 6.489533749378226e-06, "loss": 4.1678, "step": 2320 }, { "epoch": 1.5463479636878488, "learning_rate": 6.471409794583264e-06, "loss": 4.0998, "step": 2321 }, { "epoch": 1.547014241692346, "learning_rate": 6.453307420097779e-06, "loss": 4.1555, "step": 2322 }, { "epoch": 1.5476805196968435, "learning_rate": 6.435226647005663e-06, "loss": 4.1038, "step": 2323 }, { "epoch": 1.548346797701341, "learning_rate": 6.417167496365673e-06, "loss": 4.1324, "step": 2324 }, { "epoch": 1.5490130757058382, "learning_rate": 6.3991299892113336e-06, "loss": 4.1668, "step": 2325 }, { "epoch": 1.5496793537103355, "learning_rate": 6.3811141465509924e-06, "loss": 4.1257, "step": 2326 }, { "epoch": 1.550345631714833, "learning_rate": 6.363119989367777e-06, "loss": 4.2066, "step": 2327 }, { "epoch": 1.5510119097193305, "learning_rate": 6.345147538619531e-06, "loss": 4.1389, "step": 2328 }, { "epoch": 1.551678187723828, "learning_rate": 6.327196815238817e-06, "loss": 4.1262, "step": 2329 }, { "epoch": 1.5523444657283252, "learning_rate": 6.309267840132918e-06, "loss": 4.1342, "step": 2330 }, { "epoch": 1.5530107437328224, "learning_rate": 6.291360634183765e-06, "loss": 4.1126, "step": 2331 }, { "epoch": 1.55367702173732, "learning_rate": 6.2734752182479425e-06, "loss": 4.1405, "step": 2332 }, { "epoch": 1.5543432997418174, "learning_rate": 6.255611613156631e-06, "loss": 4.113, "step": 2333 }, { "epoch": 1.5550095777463147, "learning_rate": 6.237769839715654e-06, "loss": 4.1639, "step": 2334 }, { "epoch": 1.555675855750812, "learning_rate": 6.2199499187053755e-06, "loss": 4.1519, "step": 2335 }, { "epoch": 1.5563421337553094, "learning_rate": 6.2021518708807065e-06, "loss": 4.2014, "step": 2336 }, { "epoch": 1.5570084117598069, "learning_rate": 6.184375716971108e-06, "loss": 4.1747, "step": 2337 }, { "epoch": 1.5576746897643041, "learning_rate": 6.166621477680515e-06, "loss": 4.163, "step": 2338 }, { "epoch": 1.5583409677688014, "learning_rate": 6.1488891736873496e-06, "loss": 4.1644, "step": 2339 }, { "epoch": 1.5590072457732989, "learning_rate": 6.131178825644485e-06, "loss": 4.1816, "step": 2340 }, { "epoch": 1.5596735237777963, "learning_rate": 6.113490454179219e-06, "loss": 4.1047, "step": 2341 }, { "epoch": 1.5603398017822938, "learning_rate": 6.09582407989325e-06, "loss": 4.1784, "step": 2342 }, { "epoch": 1.561006079786791, "learning_rate": 6.078179723362676e-06, "loss": 4.1998, "step": 2343 }, { "epoch": 1.5616723577912883, "learning_rate": 6.060557405137929e-06, "loss": 4.1336, "step": 2344 }, { "epoch": 1.5623386357957858, "learning_rate": 6.04295714574378e-06, "loss": 4.0874, "step": 2345 }, { "epoch": 1.5630049138002833, "learning_rate": 6.02537896567931e-06, "loss": 4.1127, "step": 2346 }, { "epoch": 1.5636711918047805, "learning_rate": 6.007822885417882e-06, "loss": 4.1295, "step": 2347 }, { "epoch": 1.5643374698092778, "learning_rate": 5.9902889254071116e-06, "loss": 4.1511, "step": 2348 }, { "epoch": 1.5650037478137753, "learning_rate": 5.972777106068874e-06, "loss": 4.1331, "step": 2349 }, { "epoch": 1.5656700258182727, "learning_rate": 5.95528744779924e-06, "loss": 4.1651, "step": 2350 }, { "epoch": 1.56633630382277, "learning_rate": 5.937819970968458e-06, "loss": 4.1689, "step": 2351 }, { "epoch": 1.5670025818272675, "learning_rate": 5.9203746959209775e-06, "loss": 4.1783, "step": 2352 }, { "epoch": 1.5676688598317647, "learning_rate": 5.902951642975349e-06, "loss": 4.173, "step": 2353 }, { "epoch": 1.5683351378362622, "learning_rate": 5.885550832424258e-06, "loss": 4.121, "step": 2354 }, { "epoch": 1.5690014158407597, "learning_rate": 5.868172284534498e-06, "loss": 4.2004, "step": 2355 }, { "epoch": 1.569667693845257, "learning_rate": 5.850816019546918e-06, "loss": 4.1281, "step": 2356 }, { "epoch": 1.5703339718497542, "learning_rate": 5.833482057676401e-06, "loss": 4.1862, "step": 2357 }, { "epoch": 1.5710002498542517, "learning_rate": 5.816170419111891e-06, "loss": 4.1197, "step": 2358 }, { "epoch": 1.5716665278587492, "learning_rate": 5.7988811240163005e-06, "loss": 4.1184, "step": 2359 }, { "epoch": 1.5723328058632464, "learning_rate": 5.781614192526532e-06, "loss": 4.1686, "step": 2360 }, { "epoch": 1.5729990838677437, "learning_rate": 5.76436964475342e-06, "loss": 4.1688, "step": 2361 }, { "epoch": 1.5736653618722412, "learning_rate": 5.7471475007817635e-06, "loss": 4.0964, "step": 2362 }, { "epoch": 1.5743316398767386, "learning_rate": 5.7299477806702445e-06, "loss": 4.1645, "step": 2363 }, { "epoch": 1.574997917881236, "learning_rate": 5.712770504451426e-06, "loss": 4.1302, "step": 2364 }, { "epoch": 1.5756641958857334, "learning_rate": 5.695615692131751e-06, "loss": 4.1565, "step": 2365 }, { "epoch": 1.5763304738902306, "learning_rate": 5.678483363691478e-06, "loss": 4.0973, "step": 2366 }, { "epoch": 1.576996751894728, "learning_rate": 5.6613735390846884e-06, "loss": 4.135, "step": 2367 }, { "epoch": 1.5776630298992256, "learning_rate": 5.644286238239249e-06, "loss": 4.1455, "step": 2368 }, { "epoch": 1.5783293079037228, "learning_rate": 5.627221481056794e-06, "loss": 4.1298, "step": 2369 }, { "epoch": 1.57899558590822, "learning_rate": 5.610179287412695e-06, "loss": 4.1317, "step": 2370 }, { "epoch": 1.5796618639127176, "learning_rate": 5.593159677156068e-06, "loss": 4.1367, "step": 2371 }, { "epoch": 1.580328141917215, "learning_rate": 5.576162670109697e-06, "loss": 4.2087, "step": 2372 }, { "epoch": 1.5809944199217123, "learning_rate": 5.559188286070052e-06, "loss": 4.1468, "step": 2373 }, { "epoch": 1.5816606979262096, "learning_rate": 5.542236544807256e-06, "loss": 4.2008, "step": 2374 }, { "epoch": 1.582326975930707, "learning_rate": 5.525307466065058e-06, "loss": 4.1386, "step": 2375 }, { "epoch": 1.5829932539352045, "learning_rate": 5.508401069560801e-06, "loss": 4.1347, "step": 2376 }, { "epoch": 1.583659531939702, "learning_rate": 5.4915173749854335e-06, "loss": 4.1597, "step": 2377 }, { "epoch": 1.5843258099441992, "learning_rate": 5.474656402003448e-06, "loss": 4.1668, "step": 2378 }, { "epoch": 1.5849920879486965, "learning_rate": 5.457818170252862e-06, "loss": 4.1236, "step": 2379 }, { "epoch": 1.585658365953194, "learning_rate": 5.441002699345246e-06, "loss": 4.1765, "step": 2380 }, { "epoch": 1.5863246439576915, "learning_rate": 5.424210008865607e-06, "loss": 4.1439, "step": 2381 }, { "epoch": 1.5869909219621887, "learning_rate": 5.407440118372451e-06, "loss": 4.1581, "step": 2382 }, { "epoch": 1.587657199966686, "learning_rate": 5.390693047397735e-06, "loss": 4.1311, "step": 2383 }, { "epoch": 1.5883234779711835, "learning_rate": 5.37396881544682e-06, "loss": 4.184, "step": 2384 }, { "epoch": 1.588989755975681, "learning_rate": 5.3572674419984675e-06, "loss": 4.1052, "step": 2385 }, { "epoch": 1.5896560339801782, "learning_rate": 5.340588946504837e-06, "loss": 4.1744, "step": 2386 }, { "epoch": 1.5903223119846754, "learning_rate": 5.323933348391427e-06, "loss": 4.1654, "step": 2387 }, { "epoch": 1.590988589989173, "learning_rate": 5.307300667057049e-06, "loss": 4.1573, "step": 2388 }, { "epoch": 1.5916548679936704, "learning_rate": 5.2906909218738445e-06, "loss": 4.1522, "step": 2389 }, { "epoch": 1.5923211459981679, "learning_rate": 5.274104132187252e-06, "loss": 4.123, "step": 2390 }, { "epoch": 1.5929874240026651, "learning_rate": 5.257540317315951e-06, "loss": 4.1682, "step": 2391 }, { "epoch": 1.5936537020071624, "learning_rate": 5.240999496551866e-06, "loss": 4.1348, "step": 2392 }, { "epoch": 1.5943199800116599, "learning_rate": 5.2244816891601575e-06, "loss": 4.1541, "step": 2393 }, { "epoch": 1.5949862580161573, "learning_rate": 5.207986914379162e-06, "loss": 4.1922, "step": 2394 }, { "epoch": 1.5956525360206546, "learning_rate": 5.191515191420396e-06, "loss": 4.1435, "step": 2395 }, { "epoch": 1.5963188140251519, "learning_rate": 5.175066539468534e-06, "loss": 4.2017, "step": 2396 }, { "epoch": 1.5969850920296493, "learning_rate": 5.15864097768137e-06, "loss": 4.1342, "step": 2397 }, { "epoch": 1.5976513700341468, "learning_rate": 5.142238525189804e-06, "loss": 4.1694, "step": 2398 }, { "epoch": 1.5983176480386443, "learning_rate": 5.125859201097841e-06, "loss": 4.0999, "step": 2399 }, { "epoch": 1.5989839260431415, "learning_rate": 5.109503024482526e-06, "loss": 4.1193, "step": 2400 }, { "epoch": 1.5996502040476388, "learning_rate": 5.09317001439395e-06, "loss": 4.133, "step": 2401 }, { "epoch": 1.6003164820521363, "learning_rate": 5.076860189855223e-06, "loss": 4.1481, "step": 2402 }, { "epoch": 1.6009827600566338, "learning_rate": 5.060573569862451e-06, "loss": 4.1395, "step": 2403 }, { "epoch": 1.601649038061131, "learning_rate": 5.0443101733847085e-06, "loss": 4.1176, "step": 2404 }, { "epoch": 1.6023153160656283, "learning_rate": 5.0280700193640395e-06, "loss": 4.1493, "step": 2405 }, { "epoch": 1.6029815940701257, "learning_rate": 5.011853126715396e-06, "loss": 4.1845, "step": 2406 }, { "epoch": 1.6036478720746232, "learning_rate": 4.995659514326645e-06, "loss": 4.1006, "step": 2407 }, { "epoch": 1.6043141500791205, "learning_rate": 4.979489201058543e-06, "loss": 4.1333, "step": 2408 }, { "epoch": 1.6049804280836177, "learning_rate": 4.963342205744706e-06, "loss": 4.1038, "step": 2409 }, { "epoch": 1.6056467060881152, "learning_rate": 4.947218547191585e-06, "loss": 4.1208, "step": 2410 }, { "epoch": 1.6063129840926127, "learning_rate": 4.931118244178468e-06, "loss": 4.1528, "step": 2411 }, { "epoch": 1.6069792620971102, "learning_rate": 4.915041315457428e-06, "loss": 4.1384, "step": 2412 }, { "epoch": 1.6076455401016074, "learning_rate": 4.898987779753314e-06, "loss": 4.2233, "step": 2413 }, { "epoch": 1.6083118181061047, "learning_rate": 4.8829576557637255e-06, "loss": 4.122, "step": 2414 }, { "epoch": 1.6089780961106022, "learning_rate": 4.86695096215902e-06, "loss": 4.1806, "step": 2415 }, { "epoch": 1.6096443741150996, "learning_rate": 4.850967717582228e-06, "loss": 4.1683, "step": 2416 }, { "epoch": 1.610310652119597, "learning_rate": 4.83500794064908e-06, "loss": 4.1762, "step": 2417 }, { "epoch": 1.6109769301240942, "learning_rate": 4.819071649948004e-06, "loss": 4.1435, "step": 2418 }, { "epoch": 1.6116432081285916, "learning_rate": 4.803158864040033e-06, "loss": 4.0909, "step": 2419 }, { "epoch": 1.612309486133089, "learning_rate": 4.787269601458841e-06, "loss": 4.0991, "step": 2420 }, { "epoch": 1.6129757641375864, "learning_rate": 4.771403880710712e-06, "loss": 4.0856, "step": 2421 }, { "epoch": 1.6136420421420836, "learning_rate": 4.755561720274501e-06, "loss": 4.1568, "step": 2422 }, { "epoch": 1.614308320146581, "learning_rate": 4.739743138601621e-06, "loss": 4.1142, "step": 2423 }, { "epoch": 1.6149745981510786, "learning_rate": 4.7239481541160255e-06, "loss": 4.1173, "step": 2424 }, { "epoch": 1.615640876155576, "learning_rate": 4.708176785214188e-06, "loss": 4.1896, "step": 2425 }, { "epoch": 1.6163071541600733, "learning_rate": 4.692429050265062e-06, "loss": 4.1323, "step": 2426 }, { "epoch": 1.6169734321645706, "learning_rate": 4.676704967610101e-06, "loss": 4.1396, "step": 2427 }, { "epoch": 1.617639710169068, "learning_rate": 4.661004555563189e-06, "loss": 4.1878, "step": 2428 }, { "epoch": 1.6183059881735655, "learning_rate": 4.645327832410648e-06, "loss": 4.1964, "step": 2429 }, { "epoch": 1.6189722661780628, "learning_rate": 4.629674816411206e-06, "loss": 4.1971, "step": 2430 }, { "epoch": 1.61963854418256, "learning_rate": 4.614045525795985e-06, "loss": 4.1523, "step": 2431 }, { "epoch": 1.6203048221870575, "learning_rate": 4.598439978768462e-06, "loss": 4.1397, "step": 2432 }, { "epoch": 1.620971100191555, "learning_rate": 4.582858193504483e-06, "loss": 4.1179, "step": 2433 }, { "epoch": 1.6216373781960522, "learning_rate": 4.567300188152196e-06, "loss": 4.1402, "step": 2434 }, { "epoch": 1.6223036562005497, "learning_rate": 4.551765980832059e-06, "loss": 4.1469, "step": 2435 }, { "epoch": 1.622969934205047, "learning_rate": 4.53625558963682e-06, "loss": 4.1374, "step": 2436 }, { "epoch": 1.6236362122095445, "learning_rate": 4.520769032631478e-06, "loss": 4.1348, "step": 2437 }, { "epoch": 1.624302490214042, "learning_rate": 4.5053063278532735e-06, "loss": 4.1733, "step": 2438 }, { "epoch": 1.6249687682185392, "learning_rate": 4.489867493311676e-06, "loss": 4.1227, "step": 2439 }, { "epoch": 1.6256350462230365, "learning_rate": 4.474452546988342e-06, "loss": 4.1042, "step": 2440 }, { "epoch": 1.626301324227534, "learning_rate": 4.459061506837114e-06, "loss": 4.1039, "step": 2441 }, { "epoch": 1.6269676022320314, "learning_rate": 4.443694390783979e-06, "loss": 4.1586, "step": 2442 }, { "epoch": 1.6276338802365287, "learning_rate": 4.428351216727081e-06, "loss": 4.1643, "step": 2443 }, { "epoch": 1.628300158241026, "learning_rate": 4.413032002536652e-06, "loss": 4.1724, "step": 2444 }, { "epoch": 1.6289664362455234, "learning_rate": 4.3977367660550275e-06, "loss": 4.1834, "step": 2445 }, { "epoch": 1.6296327142500209, "learning_rate": 4.382465525096632e-06, "loss": 4.1741, "step": 2446 }, { "epoch": 1.6302989922545184, "learning_rate": 4.3672182974479255e-06, "loss": 4.1434, "step": 2447 }, { "epoch": 1.6309652702590156, "learning_rate": 4.351995100867398e-06, "loss": 4.1799, "step": 2448 }, { "epoch": 1.6316315482635129, "learning_rate": 4.336795953085565e-06, "loss": 4.1687, "step": 2449 }, { "epoch": 1.6322978262680103, "learning_rate": 4.321620871804926e-06, "loss": 4.157, "step": 2450 }, { "epoch": 1.6329641042725078, "learning_rate": 4.306469874699928e-06, "loss": 4.1593, "step": 2451 }, { "epoch": 1.633630382277005, "learning_rate": 4.291342979417007e-06, "loss": 4.1422, "step": 2452 }, { "epoch": 1.6342966602815023, "learning_rate": 4.276240203574499e-06, "loss": 4.1436, "step": 2453 }, { "epoch": 1.6349629382859998, "learning_rate": 4.261161564762653e-06, "loss": 4.1697, "step": 2454 }, { "epoch": 1.6356292162904973, "learning_rate": 4.246107080543618e-06, "loss": 4.1403, "step": 2455 }, { "epoch": 1.6362954942949945, "learning_rate": 4.231076768451397e-06, "loss": 4.1645, "step": 2456 }, { "epoch": 1.6369617722994918, "learning_rate": 4.216070645991843e-06, "loss": 4.1398, "step": 2457 }, { "epoch": 1.6376280503039893, "learning_rate": 4.201088730642633e-06, "loss": 4.1561, "step": 2458 }, { "epoch": 1.6382943283084868, "learning_rate": 4.186131039853258e-06, "loss": 4.1205, "step": 2459 }, { "epoch": 1.6389606063129842, "learning_rate": 4.1711975910449785e-06, "loss": 4.1107, "step": 2460 }, { "epoch": 1.6396268843174815, "learning_rate": 4.156288401610847e-06, "loss": 4.145, "step": 2461 }, { "epoch": 1.6402931623219787, "learning_rate": 4.141403488915638e-06, "loss": 4.1401, "step": 2462 }, { "epoch": 1.6409594403264762, "learning_rate": 4.126542870295855e-06, "loss": 4.1618, "step": 2463 }, { "epoch": 1.6416257183309737, "learning_rate": 4.111706563059711e-06, "loss": 4.179, "step": 2464 }, { "epoch": 1.642291996335471, "learning_rate": 4.096894584487102e-06, "loss": 4.1313, "step": 2465 }, { "epoch": 1.6429582743399682, "learning_rate": 4.082106951829581e-06, "loss": 4.1469, "step": 2466 }, { "epoch": 1.6436245523444657, "learning_rate": 4.067343682310365e-06, "loss": 4.1381, "step": 2467 }, { "epoch": 1.6442908303489632, "learning_rate": 4.052604793124273e-06, "loss": 4.1245, "step": 2468 }, { "epoch": 1.6449571083534604, "learning_rate": 4.037890301437744e-06, "loss": 4.1308, "step": 2469 }, { "epoch": 1.645623386357958, "learning_rate": 4.023200224388787e-06, "loss": 4.1458, "step": 2470 }, { "epoch": 1.6462896643624552, "learning_rate": 4.008534579086987e-06, "loss": 4.1475, "step": 2471 }, { "epoch": 1.6469559423669526, "learning_rate": 3.993893382613467e-06, "loss": 4.1356, "step": 2472 }, { "epoch": 1.6476222203714501, "learning_rate": 3.979276652020875e-06, "loss": 4.1828, "step": 2473 }, { "epoch": 1.6482884983759474, "learning_rate": 3.9646844043333685e-06, "loss": 4.152, "step": 2474 }, { "epoch": 1.6489547763804446, "learning_rate": 3.950116656546588e-06, "loss": 4.1029, "step": 2475 }, { "epoch": 1.649621054384942, "learning_rate": 3.935573425627626e-06, "loss": 4.1255, "step": 2476 }, { "epoch": 1.6502873323894396, "learning_rate": 3.9210547285150415e-06, "loss": 4.1409, "step": 2477 }, { "epoch": 1.6509536103939368, "learning_rate": 3.906560582118815e-06, "loss": 4.1464, "step": 2478 }, { "epoch": 1.651619888398434, "learning_rate": 3.8920910033203056e-06, "loss": 4.1287, "step": 2479 }, { "epoch": 1.6522861664029316, "learning_rate": 3.877646008972294e-06, "loss": 4.2269, "step": 2480 }, { "epoch": 1.652952444407429, "learning_rate": 3.863225615898908e-06, "loss": 4.1186, "step": 2481 }, { "epoch": 1.6536187224119265, "learning_rate": 3.848829840895621e-06, "loss": 4.104, "step": 2482 }, { "epoch": 1.6542850004164238, "learning_rate": 3.834458700729249e-06, "loss": 4.1207, "step": 2483 }, { "epoch": 1.654951278420921, "learning_rate": 3.820112212137903e-06, "loss": 4.101, "step": 2484 }, { "epoch": 1.6556175564254185, "learning_rate": 3.805790391830982e-06, "loss": 4.1469, "step": 2485 }, { "epoch": 1.656283834429916, "learning_rate": 3.791493256489162e-06, "loss": 4.1599, "step": 2486 }, { "epoch": 1.6569501124344133, "learning_rate": 3.7772208227643574e-06, "loss": 4.1768, "step": 2487 }, { "epoch": 1.6576163904389105, "learning_rate": 3.762973107279716e-06, "loss": 4.143, "step": 2488 }, { "epoch": 1.658282668443408, "learning_rate": 3.7487501266296078e-06, "loss": 4.1633, "step": 2489 }, { "epoch": 1.6589489464479055, "learning_rate": 3.734551897379579e-06, "loss": 4.1719, "step": 2490 }, { "epoch": 1.6596152244524027, "learning_rate": 3.7203784360663575e-06, "loss": 4.1182, "step": 2491 }, { "epoch": 1.6602815024569, "learning_rate": 3.7062297591978155e-06, "loss": 4.1725, "step": 2492 }, { "epoch": 1.6609477804613975, "learning_rate": 3.692105883252964e-06, "loss": 4.2103, "step": 2493 }, { "epoch": 1.661614058465895, "learning_rate": 3.6780068246819233e-06, "loss": 4.1382, "step": 2494 }, { "epoch": 1.6622803364703924, "learning_rate": 3.6639325999059225e-06, "loss": 4.1651, "step": 2495 }, { "epoch": 1.6629466144748897, "learning_rate": 3.649883225317255e-06, "loss": 4.1335, "step": 2496 }, { "epoch": 1.663612892479387, "learning_rate": 3.635858717279267e-06, "loss": 4.1836, "step": 2497 }, { "epoch": 1.6642791704838844, "learning_rate": 3.6218590921263534e-06, "loss": 4.2023, "step": 2498 }, { "epoch": 1.6649454484883819, "learning_rate": 3.6078843661639246e-06, "loss": 4.0935, "step": 2499 }, { "epoch": 1.6656117264928791, "learning_rate": 3.593934555668385e-06, "loss": 4.1237, "step": 2500 }, { "epoch": 1.6662780044973764, "learning_rate": 3.580009676887122e-06, "loss": 4.1255, "step": 2501 }, { "epoch": 1.6669442825018739, "learning_rate": 3.5661097460384985e-06, "loss": 4.0888, "step": 2502 }, { "epoch": 1.6676105605063714, "learning_rate": 3.5522347793118027e-06, "loss": 4.1469, "step": 2503 }, { "epoch": 1.6682768385108686, "learning_rate": 3.538384792867247e-06, "loss": 4.1726, "step": 2504 }, { "epoch": 1.668943116515366, "learning_rate": 3.5245598028359666e-06, "loss": 4.1061, "step": 2505 }, { "epoch": 1.6696093945198633, "learning_rate": 3.5107598253199758e-06, "loss": 4.1712, "step": 2506 }, { "epoch": 1.6702756725243608, "learning_rate": 3.496984876392137e-06, "loss": 4.1276, "step": 2507 }, { "epoch": 1.6709419505288583, "learning_rate": 3.483234972096189e-06, "loss": 4.1601, "step": 2508 }, { "epoch": 1.6716082285333556, "learning_rate": 3.4695101284466903e-06, "loss": 4.1901, "step": 2509 }, { "epoch": 1.6722745065378528, "learning_rate": 3.455810361429004e-06, "loss": 4.1276, "step": 2510 }, { "epoch": 1.6729407845423503, "learning_rate": 3.4421356869993037e-06, "loss": 4.1992, "step": 2511 }, { "epoch": 1.6736070625468478, "learning_rate": 3.428486121084523e-06, "loss": 4.1446, "step": 2512 }, { "epoch": 1.674273340551345, "learning_rate": 3.414861679582357e-06, "loss": 4.1624, "step": 2513 }, { "epoch": 1.6749396185558423, "learning_rate": 3.4012623783612333e-06, "loss": 4.1902, "step": 2514 }, { "epoch": 1.6756058965603398, "learning_rate": 3.3876882332603027e-06, "loss": 4.1994, "step": 2515 }, { "epoch": 1.6762721745648372, "learning_rate": 3.374139260089415e-06, "loss": 4.0897, "step": 2516 }, { "epoch": 1.6769384525693347, "learning_rate": 3.3606154746291114e-06, "loss": 4.135, "step": 2517 }, { "epoch": 1.677604730573832, "learning_rate": 3.3471168926305864e-06, "loss": 4.1372, "step": 2518 }, { "epoch": 1.6782710085783292, "learning_rate": 3.3336435298156775e-06, "loss": 4.1175, "step": 2519 }, { "epoch": 1.6789372865828267, "learning_rate": 3.3201954018768575e-06, "loss": 4.1257, "step": 2520 }, { "epoch": 1.6796035645873242, "learning_rate": 3.3067725244772058e-06, "loss": 4.1555, "step": 2521 }, { "epoch": 1.6802698425918214, "learning_rate": 3.2933749132503826e-06, "loss": 4.178, "step": 2522 }, { "epoch": 1.6809361205963187, "learning_rate": 3.2800025838006453e-06, "loss": 4.0662, "step": 2523 }, { "epoch": 1.6816023986008162, "learning_rate": 3.2666555517027796e-06, "loss": 4.1526, "step": 2524 }, { "epoch": 1.6822686766053137, "learning_rate": 3.25333383250212e-06, "loss": 4.0991, "step": 2525 }, { "epoch": 1.682934954609811, "learning_rate": 3.2400374417145135e-06, "loss": 4.1516, "step": 2526 }, { "epoch": 1.6836012326143082, "learning_rate": 3.2267663948263137e-06, "loss": 4.1127, "step": 2527 }, { "epoch": 1.6842675106188056, "learning_rate": 3.2135207072943512e-06, "loss": 4.1672, "step": 2528 }, { "epoch": 1.6849337886233031, "learning_rate": 3.200300394545913e-06, "loss": 4.1527, "step": 2529 }, { "epoch": 1.6856000666278006, "learning_rate": 3.1871054719787546e-06, "loss": 4.1435, "step": 2530 }, { "epoch": 1.6862663446322979, "learning_rate": 3.173935954961038e-06, "loss": 4.1503, "step": 2531 }, { "epoch": 1.686932622636795, "learning_rate": 3.1607918588313385e-06, "loss": 4.1579, "step": 2532 }, { "epoch": 1.6875989006412926, "learning_rate": 3.1476731988986456e-06, "loss": 4.1658, "step": 2533 }, { "epoch": 1.68826517864579, "learning_rate": 3.134579990442285e-06, "loss": 4.1869, "step": 2534 }, { "epoch": 1.6889314566502873, "learning_rate": 3.121512248711961e-06, "loss": 4.1413, "step": 2535 }, { "epoch": 1.6895977346547846, "learning_rate": 3.1084699889277284e-06, "loss": 4.1717, "step": 2536 }, { "epoch": 1.690264012659282, "learning_rate": 3.0954532262799404e-06, "loss": 4.1521, "step": 2537 }, { "epoch": 1.6909302906637795, "learning_rate": 3.08246197592926e-06, "loss": 4.1462, "step": 2538 }, { "epoch": 1.6915965686682768, "learning_rate": 3.069496253006651e-06, "loss": 4.1451, "step": 2539 }, { "epoch": 1.6922628466727743, "learning_rate": 3.056556072613323e-06, "loss": 4.1559, "step": 2540 }, { "epoch": 1.6929291246772715, "learning_rate": 3.04364144982075e-06, "loss": 4.1708, "step": 2541 }, { "epoch": 1.693595402681769, "learning_rate": 3.030752399670636e-06, "loss": 4.0608, "step": 2542 }, { "epoch": 1.6942616806862665, "learning_rate": 3.0178889371748953e-06, "loss": 4.1698, "step": 2543 }, { "epoch": 1.6949279586907637, "learning_rate": 3.005051077315643e-06, "loss": 4.1663, "step": 2544 }, { "epoch": 1.695594236695261, "learning_rate": 2.9922388350451886e-06, "loss": 4.1494, "step": 2545 }, { "epoch": 1.6962605146997585, "learning_rate": 2.9794522252859836e-06, "loss": 4.1646, "step": 2546 }, { "epoch": 1.696926792704256, "learning_rate": 2.966691262930635e-06, "loss": 4.1354, "step": 2547 }, { "epoch": 1.6975930707087532, "learning_rate": 2.9539559628418785e-06, "loss": 4.1843, "step": 2548 }, { "epoch": 1.6982593487132505, "learning_rate": 2.9412463398525577e-06, "loss": 4.1718, "step": 2549 }, { "epoch": 1.698925626717748, "learning_rate": 2.9285624087656142e-06, "loss": 4.1476, "step": 2550 }, { "epoch": 1.6995919047222454, "learning_rate": 2.915904184354057e-06, "loss": 4.1648, "step": 2551 }, { "epoch": 1.700258182726743, "learning_rate": 2.9032716813609723e-06, "loss": 4.1797, "step": 2552 }, { "epoch": 1.7009244607312402, "learning_rate": 2.8906649144994747e-06, "loss": 4.1656, "step": 2553 }, { "epoch": 1.7015907387357374, "learning_rate": 2.878083898452702e-06, "loss": 4.1565, "step": 2554 }, { "epoch": 1.7022570167402349, "learning_rate": 2.865528647873808e-06, "loss": 4.1559, "step": 2555 }, { "epoch": 1.7029232947447324, "learning_rate": 2.8529991773859314e-06, "loss": 4.1851, "step": 2556 }, { "epoch": 1.7035895727492296, "learning_rate": 2.8404955015821884e-06, "loss": 4.1604, "step": 2557 }, { "epoch": 1.7042558507537269, "learning_rate": 2.8280176350256536e-06, "loss": 4.1478, "step": 2558 }, { "epoch": 1.7049221287582244, "learning_rate": 2.8155655922493363e-06, "loss": 4.2111, "step": 2559 }, { "epoch": 1.7055884067627218, "learning_rate": 2.8031393877561706e-06, "loss": 4.1227, "step": 2560 }, { "epoch": 1.706254684767219, "learning_rate": 2.790739036019005e-06, "loss": 4.161, "step": 2561 }, { "epoch": 1.7069209627717163, "learning_rate": 2.7783645514805614e-06, "loss": 4.2107, "step": 2562 }, { "epoch": 1.7075872407762138, "learning_rate": 2.7660159485534383e-06, "loss": 4.1659, "step": 2563 }, { "epoch": 1.7082535187807113, "learning_rate": 2.753693241620106e-06, "loss": 4.1097, "step": 2564 }, { "epoch": 1.7089197967852088, "learning_rate": 2.7413964450328537e-06, "loss": 4.0755, "step": 2565 }, { "epoch": 1.709586074789706, "learning_rate": 2.7291255731138e-06, "loss": 4.0998, "step": 2566 }, { "epoch": 1.7102523527942033, "learning_rate": 2.7168806401548756e-06, "loss": 4.1506, "step": 2567 }, { "epoch": 1.7109186307987008, "learning_rate": 2.70466166041779e-06, "loss": 4.174, "step": 2568 }, { "epoch": 1.7115849088031982, "learning_rate": 2.692468648134028e-06, "loss": 4.1685, "step": 2569 }, { "epoch": 1.7122511868076955, "learning_rate": 2.6803016175048323e-06, "loss": 4.2036, "step": 2570 }, { "epoch": 1.7129174648121928, "learning_rate": 2.668160582701182e-06, "loss": 4.1263, "step": 2571 }, { "epoch": 1.7135837428166902, "learning_rate": 2.656045557863776e-06, "loss": 4.1998, "step": 2572 }, { "epoch": 1.7142500208211877, "learning_rate": 2.6439565571030334e-06, "loss": 4.1586, "step": 2573 }, { "epoch": 1.714916298825685, "learning_rate": 2.6318935944990464e-06, "loss": 4.1617, "step": 2574 }, { "epoch": 1.7155825768301824, "learning_rate": 2.6198566841015877e-06, "loss": 4.204, "step": 2575 }, { "epoch": 1.7162488548346797, "learning_rate": 2.607845839930087e-06, "loss": 4.1798, "step": 2576 }, { "epoch": 1.7169151328391772, "learning_rate": 2.595861075973613e-06, "loss": 4.164, "step": 2577 }, { "epoch": 1.7175814108436747, "learning_rate": 2.5839024061908577e-06, "loss": 4.1194, "step": 2578 }, { "epoch": 1.718247688848172, "learning_rate": 2.571969844510122e-06, "loss": 4.1183, "step": 2579 }, { "epoch": 1.7189139668526692, "learning_rate": 2.560063404829305e-06, "loss": 4.1914, "step": 2580 }, { "epoch": 1.7195802448571667, "learning_rate": 2.5481831010158717e-06, "loss": 4.0869, "step": 2581 }, { "epoch": 1.7202465228616641, "learning_rate": 2.536328946906852e-06, "loss": 4.1397, "step": 2582 }, { "epoch": 1.7209128008661614, "learning_rate": 2.5245009563088174e-06, "loss": 4.1423, "step": 2583 }, { "epoch": 1.7215790788706586, "learning_rate": 2.512699142997868e-06, "loss": 4.1848, "step": 2584 }, { "epoch": 1.7222453568751561, "learning_rate": 2.5009235207196115e-06, "loss": 4.1496, "step": 2585 }, { "epoch": 1.7229116348796536, "learning_rate": 2.489174103189157e-06, "loss": 4.1579, "step": 2586 }, { "epoch": 1.723577912884151, "learning_rate": 2.477450904091089e-06, "loss": 4.1521, "step": 2587 }, { "epoch": 1.7242441908886483, "learning_rate": 2.4657539370794486e-06, "loss": 4.1377, "step": 2588 }, { "epoch": 1.7249104688931456, "learning_rate": 2.45408321577775e-06, "loss": 4.1347, "step": 2589 }, { "epoch": 1.725576746897643, "learning_rate": 2.4424387537789e-06, "loss": 4.1486, "step": 2590 }, { "epoch": 1.7262430249021405, "learning_rate": 2.4308205646452474e-06, "loss": 4.1733, "step": 2591 }, { "epoch": 1.7269093029066378, "learning_rate": 2.419228661908543e-06, "loss": 4.1548, "step": 2592 }, { "epoch": 1.727575580911135, "learning_rate": 2.4076630590699062e-06, "loss": 4.1483, "step": 2593 }, { "epoch": 1.7282418589156325, "learning_rate": 2.3961237695998285e-06, "loss": 4.1432, "step": 2594 }, { "epoch": 1.72890813692013, "learning_rate": 2.38461080693817e-06, "loss": 4.1739, "step": 2595 }, { "epoch": 1.7295744149246273, "learning_rate": 2.3731241844941076e-06, "loss": 4.1157, "step": 2596 }, { "epoch": 1.7302406929291245, "learning_rate": 2.3616639156461505e-06, "loss": 4.1344, "step": 2597 }, { "epoch": 1.730906970933622, "learning_rate": 2.3502300137421134e-06, "loss": 4.1509, "step": 2598 }, { "epoch": 1.7315732489381195, "learning_rate": 2.3388224920990938e-06, "loss": 4.1598, "step": 2599 }, { "epoch": 1.732239526942617, "learning_rate": 2.3274413640034657e-06, "loss": 4.0994, "step": 2600 }, { "epoch": 1.7329058049471142, "learning_rate": 2.31608664271088e-06, "loss": 4.1663, "step": 2601 }, { "epoch": 1.7335720829516115, "learning_rate": 2.304758341446209e-06, "loss": 4.1659, "step": 2602 }, { "epoch": 1.734238360956109, "learning_rate": 2.293456473403563e-06, "loss": 4.1737, "step": 2603 }, { "epoch": 1.7349046389606064, "learning_rate": 2.2821810517462656e-06, "loss": 4.1779, "step": 2604 }, { "epoch": 1.7355709169651037, "learning_rate": 2.270932089606834e-06, "loss": 4.1117, "step": 2605 }, { "epoch": 1.736237194969601, "learning_rate": 2.259709600086976e-06, "loss": 4.109, "step": 2606 }, { "epoch": 1.7369034729740984, "learning_rate": 2.248513596257554e-06, "loss": 4.1293, "step": 2607 }, { "epoch": 1.737569750978596, "learning_rate": 2.2373440911586e-06, "loss": 4.1422, "step": 2608 }, { "epoch": 1.7382360289830932, "learning_rate": 2.226201097799266e-06, "loss": 4.1602, "step": 2609 }, { "epoch": 1.7389023069875904, "learning_rate": 2.2150846291578376e-06, "loss": 4.131, "step": 2610 }, { "epoch": 1.7395685849920879, "learning_rate": 2.2039946981816996e-06, "loss": 4.1167, "step": 2611 }, { "epoch": 1.7402348629965854, "learning_rate": 2.1929313177873305e-06, "loss": 4.1483, "step": 2612 }, { "epoch": 1.7409011410010828, "learning_rate": 2.181894500860282e-06, "loss": 4.0999, "step": 2613 }, { "epoch": 1.74156741900558, "learning_rate": 2.170884260255179e-06, "loss": 4.1824, "step": 2614 }, { "epoch": 1.7422336970100774, "learning_rate": 2.1599006087956786e-06, "loss": 4.2052, "step": 2615 }, { "epoch": 1.7428999750145748, "learning_rate": 2.1489435592744743e-06, "loss": 4.1807, "step": 2616 }, { "epoch": 1.7435662530190723, "learning_rate": 2.138013124453289e-06, "loss": 4.1477, "step": 2617 }, { "epoch": 1.7442325310235696, "learning_rate": 2.1271093170628254e-06, "loss": 4.1538, "step": 2618 }, { "epoch": 1.7448988090280668, "learning_rate": 2.116232149802777e-06, "loss": 4.1106, "step": 2619 }, { "epoch": 1.7455650870325643, "learning_rate": 2.1053816353418326e-06, "loss": 4.1804, "step": 2620 }, { "epoch": 1.7462313650370618, "learning_rate": 2.094557786317611e-06, "loss": 4.1575, "step": 2621 }, { "epoch": 1.7468976430415593, "learning_rate": 2.0837606153366827e-06, "loss": 4.1377, "step": 2622 }, { "epoch": 1.7475639210460565, "learning_rate": 2.072990134974559e-06, "loss": 4.1385, "step": 2623 }, { "epoch": 1.7482301990505538, "learning_rate": 2.062246357775649e-06, "loss": 4.1718, "step": 2624 }, { "epoch": 1.7488964770550512, "learning_rate": 2.0515292962532545e-06, "loss": 4.142, "step": 2625 }, { "epoch": 1.7495627550595487, "learning_rate": 2.040838962889585e-06, "loss": 4.1355, "step": 2626 }, { "epoch": 1.750229033064046, "learning_rate": 2.0301753701357034e-06, "loss": 4.1449, "step": 2627 }, { "epoch": 1.7508953110685432, "learning_rate": 2.0195385304115243e-06, "loss": 4.1414, "step": 2628 }, { "epoch": 1.7515615890730407, "learning_rate": 2.0089284561058213e-06, "loss": 4.2214, "step": 2629 }, { "epoch": 1.7522278670775382, "learning_rate": 1.998345159576173e-06, "loss": 4.1859, "step": 2630 }, { "epoch": 1.7528941450820354, "learning_rate": 1.987788653148984e-06, "loss": 4.1578, "step": 2631 }, { "epoch": 1.7535604230865327, "learning_rate": 1.977258949119451e-06, "loss": 4.143, "step": 2632 }, { "epoch": 1.7542267010910302, "learning_rate": 1.966756059751554e-06, "loss": 4.1228, "step": 2633 }, { "epoch": 1.7548929790955277, "learning_rate": 1.956279997278043e-06, "loss": 4.1847, "step": 2634 }, { "epoch": 1.7555592571000251, "learning_rate": 1.9458307739004174e-06, "loss": 4.148, "step": 2635 }, { "epoch": 1.7562255351045224, "learning_rate": 1.9354084017889324e-06, "loss": 4.1016, "step": 2636 }, { "epoch": 1.7568918131090197, "learning_rate": 1.9250128930825504e-06, "loss": 4.1501, "step": 2637 }, { "epoch": 1.7575580911135171, "learning_rate": 1.9146442598889564e-06, "loss": 4.168, "step": 2638 }, { "epoch": 1.7582243691180146, "learning_rate": 1.904302514284531e-06, "loss": 4.1663, "step": 2639 }, { "epoch": 1.7588906471225119, "learning_rate": 1.8939876683143398e-06, "loss": 4.159, "step": 2640 }, { "epoch": 1.7595569251270091, "learning_rate": 1.8836997339921143e-06, "loss": 4.1096, "step": 2641 }, { "epoch": 1.7602232031315066, "learning_rate": 1.8734387233002525e-06, "loss": 4.1918, "step": 2642 }, { "epoch": 1.760889481136004, "learning_rate": 1.8632046481897813e-06, "loss": 4.1083, "step": 2643 }, { "epoch": 1.7615557591405013, "learning_rate": 1.8529975205803628e-06, "loss": 4.1539, "step": 2644 }, { "epoch": 1.7622220371449986, "learning_rate": 1.8428173523602738e-06, "loss": 4.1304, "step": 2645 }, { "epoch": 1.762888315149496, "learning_rate": 1.832664155386385e-06, "loss": 4.134, "step": 2646 }, { "epoch": 1.7635545931539935, "learning_rate": 1.8225379414841592e-06, "loss": 4.1175, "step": 2647 }, { "epoch": 1.764220871158491, "learning_rate": 1.8124387224476347e-06, "loss": 4.1384, "step": 2648 }, { "epoch": 1.7648871491629883, "learning_rate": 1.8023665100394022e-06, "loss": 4.1378, "step": 2649 }, { "epoch": 1.7655534271674855, "learning_rate": 1.792321315990597e-06, "loss": 4.1247, "step": 2650 }, { "epoch": 1.766219705171983, "learning_rate": 1.7823031520008943e-06, "loss": 4.1653, "step": 2651 }, { "epoch": 1.7668859831764805, "learning_rate": 1.7723120297384877e-06, "loss": 4.1376, "step": 2652 }, { "epoch": 1.7675522611809777, "learning_rate": 1.762347960840055e-06, "loss": 4.1746, "step": 2653 }, { "epoch": 1.768218539185475, "learning_rate": 1.7524109569107911e-06, "loss": 4.1294, "step": 2654 }, { "epoch": 1.7688848171899725, "learning_rate": 1.7425010295243543e-06, "loss": 4.0916, "step": 2655 }, { "epoch": 1.76955109519447, "learning_rate": 1.7326181902228623e-06, "loss": 4.156, "step": 2656 }, { "epoch": 1.7702173731989672, "learning_rate": 1.7227624505169044e-06, "loss": 4.1364, "step": 2657 }, { "epoch": 1.7708836512034647, "learning_rate": 1.7129338218854818e-06, "loss": 4.1405, "step": 2658 }, { "epoch": 1.771549929207962, "learning_rate": 1.703132315776035e-06, "loss": 4.2146, "step": 2659 }, { "epoch": 1.7722162072124594, "learning_rate": 1.6933579436044094e-06, "loss": 4.1722, "step": 2660 }, { "epoch": 1.772882485216957, "learning_rate": 1.6836107167548493e-06, "loss": 4.1037, "step": 2661 }, { "epoch": 1.7735487632214542, "learning_rate": 1.6738906465799759e-06, "loss": 4.1115, "step": 2662 }, { "epoch": 1.7742150412259514, "learning_rate": 1.6641977444007888e-06, "loss": 4.2328, "step": 2663 }, { "epoch": 1.774881319230449, "learning_rate": 1.6545320215066496e-06, "loss": 4.16, "step": 2664 }, { "epoch": 1.7755475972349464, "learning_rate": 1.6448934891552526e-06, "loss": 4.1812, "step": 2665 }, { "epoch": 1.7762138752394436, "learning_rate": 1.6352821585726264e-06, "loss": 4.1562, "step": 2666 }, { "epoch": 1.7768801532439409, "learning_rate": 1.6256980409531192e-06, "loss": 4.1683, "step": 2667 }, { "epoch": 1.7775464312484384, "learning_rate": 1.6161411474593878e-06, "loss": 4.1129, "step": 2668 }, { "epoch": 1.7782127092529358, "learning_rate": 1.6066114892223676e-06, "loss": 4.1318, "step": 2669 }, { "epoch": 1.7788789872574333, "learning_rate": 1.5971090773412966e-06, "loss": 4.1885, "step": 2670 }, { "epoch": 1.7795452652619306, "learning_rate": 1.5876339228836579e-06, "loss": 4.1311, "step": 2671 }, { "epoch": 1.7802115432664278, "learning_rate": 1.578186036885193e-06, "loss": 4.1314, "step": 2672 }, { "epoch": 1.7808778212709253, "learning_rate": 1.5687654303498889e-06, "loss": 4.1178, "step": 2673 }, { "epoch": 1.7815440992754228, "learning_rate": 1.5593721142499545e-06, "loss": 4.1938, "step": 2674 }, { "epoch": 1.78221037727992, "learning_rate": 1.5500060995258137e-06, "loss": 4.1825, "step": 2675 }, { "epoch": 1.7828766552844173, "learning_rate": 1.5406673970861012e-06, "loss": 4.1473, "step": 2676 }, { "epoch": 1.7835429332889148, "learning_rate": 1.5313560178076307e-06, "loss": 4.1953, "step": 2677 }, { "epoch": 1.7842092112934123, "learning_rate": 1.522071972535391e-06, "loss": 4.129, "step": 2678 }, { "epoch": 1.7848754892979095, "learning_rate": 1.5128152720825462e-06, "loss": 4.1813, "step": 2679 }, { "epoch": 1.7855417673024068, "learning_rate": 1.503585927230411e-06, "loss": 4.1631, "step": 2680 }, { "epoch": 1.7862080453069042, "learning_rate": 1.4943839487284173e-06, "loss": 4.109, "step": 2681 }, { "epoch": 1.7868743233114017, "learning_rate": 1.485209347294153e-06, "loss": 4.149, "step": 2682 }, { "epoch": 1.7875406013158992, "learning_rate": 1.4760621336133013e-06, "loss": 4.1707, "step": 2683 }, { "epoch": 1.7882068793203965, "learning_rate": 1.4669423183396508e-06, "loss": 4.1598, "step": 2684 }, { "epoch": 1.7888731573248937, "learning_rate": 1.4578499120950829e-06, "loss": 4.1886, "step": 2685 }, { "epoch": 1.7895394353293912, "learning_rate": 1.448784925469554e-06, "loss": 4.1403, "step": 2686 }, { "epoch": 1.7902057133338887, "learning_rate": 1.43974736902108e-06, "loss": 4.1691, "step": 2687 }, { "epoch": 1.790871991338386, "learning_rate": 1.4307372532757324e-06, "loss": 4.1826, "step": 2688 }, { "epoch": 1.7915382693428832, "learning_rate": 1.4217545887276251e-06, "loss": 4.2406, "step": 2689 }, { "epoch": 1.7922045473473807, "learning_rate": 1.412799385838895e-06, "loss": 4.1845, "step": 2690 }, { "epoch": 1.7928708253518781, "learning_rate": 1.403871655039693e-06, "loss": 4.1568, "step": 2691 }, { "epoch": 1.7935371033563754, "learning_rate": 1.394971406728185e-06, "loss": 4.1694, "step": 2692 }, { "epoch": 1.7942033813608729, "learning_rate": 1.386098651270512e-06, "loss": 4.1339, "step": 2693 }, { "epoch": 1.7948696593653701, "learning_rate": 1.3772533990008053e-06, "loss": 4.1451, "step": 2694 }, { "epoch": 1.7955359373698676, "learning_rate": 1.368435660221154e-06, "loss": 4.1425, "step": 2695 }, { "epoch": 1.796202215374365, "learning_rate": 1.3596454452016128e-06, "loss": 4.1463, "step": 2696 }, { "epoch": 1.7968684933788623, "learning_rate": 1.3508827641801669e-06, "loss": 4.1108, "step": 2697 }, { "epoch": 1.7975347713833596, "learning_rate": 1.3421476273627498e-06, "loss": 4.1339, "step": 2698 }, { "epoch": 1.798201049387857, "learning_rate": 1.3334400449231954e-06, "loss": 4.1574, "step": 2699 }, { "epoch": 1.7988673273923546, "learning_rate": 1.324760027003255e-06, "loss": 4.1617, "step": 2700 }, { "epoch": 1.7995336053968518, "learning_rate": 1.316107583712578e-06, "loss": 4.2025, "step": 2701 }, { "epoch": 1.800199883401349, "learning_rate": 1.3074827251286892e-06, "loss": 4.1051, "step": 2702 }, { "epoch": 1.8008661614058465, "learning_rate": 1.2988854612969863e-06, "loss": 4.1306, "step": 2703 }, { "epoch": 1.801532439410344, "learning_rate": 1.2903158022307376e-06, "loss": 4.1393, "step": 2704 }, { "epoch": 1.8021987174148415, "learning_rate": 1.2817737579110506e-06, "loss": 4.1759, "step": 2705 }, { "epoch": 1.8028649954193388, "learning_rate": 1.2732593382868668e-06, "loss": 4.1367, "step": 2706 }, { "epoch": 1.803531273423836, "learning_rate": 1.2647725532749732e-06, "loss": 4.1571, "step": 2707 }, { "epoch": 1.8041975514283335, "learning_rate": 1.2563134127599407e-06, "loss": 4.1177, "step": 2708 }, { "epoch": 1.804863829432831, "learning_rate": 1.2478819265941604e-06, "loss": 4.1713, "step": 2709 }, { "epoch": 1.8055301074373282, "learning_rate": 1.2394781045978188e-06, "loss": 4.1605, "step": 2710 }, { "epoch": 1.8061963854418255, "learning_rate": 1.2311019565588694e-06, "loss": 4.1477, "step": 2711 }, { "epoch": 1.806862663446323, "learning_rate": 1.2227534922330391e-06, "loss": 4.1815, "step": 2712 }, { "epoch": 1.8075289414508204, "learning_rate": 1.2144327213438138e-06, "loss": 4.1544, "step": 2713 }, { "epoch": 1.8081952194553177, "learning_rate": 1.2061396535824249e-06, "loss": 4.1721, "step": 2714 }, { "epoch": 1.808861497459815, "learning_rate": 1.1978742986078316e-06, "loss": 4.1233, "step": 2715 }, { "epoch": 1.8095277754643124, "learning_rate": 1.1896366660467173e-06, "loss": 4.1123, "step": 2716 }, { "epoch": 1.81019405346881, "learning_rate": 1.1814267654934846e-06, "loss": 4.1669, "step": 2717 }, { "epoch": 1.8108603314733074, "learning_rate": 1.1732446065102292e-06, "loss": 4.1485, "step": 2718 }, { "epoch": 1.8115266094778046, "learning_rate": 1.1650901986267365e-06, "loss": 4.1477, "step": 2719 }, { "epoch": 1.812192887482302, "learning_rate": 1.1569635513404758e-06, "loss": 4.1144, "step": 2720 }, { "epoch": 1.8128591654867994, "learning_rate": 1.1488646741165787e-06, "loss": 4.1732, "step": 2721 }, { "epoch": 1.8135254434912969, "learning_rate": 1.140793576387833e-06, "loss": 4.1297, "step": 2722 }, { "epoch": 1.814191721495794, "learning_rate": 1.1327502675546748e-06, "loss": 4.1103, "step": 2723 }, { "epoch": 1.8148579995002914, "learning_rate": 1.1247347569851684e-06, "loss": 4.1335, "step": 2724 }, { "epoch": 1.8155242775047888, "learning_rate": 1.1167470540150048e-06, "loss": 4.1703, "step": 2725 }, { "epoch": 1.8161905555092863, "learning_rate": 1.1087871679474921e-06, "loss": 4.1251, "step": 2726 }, { "epoch": 1.8168568335137836, "learning_rate": 1.1008551080535334e-06, "loss": 4.1214, "step": 2727 }, { "epoch": 1.817523111518281, "learning_rate": 1.092950883571625e-06, "loss": 4.1474, "step": 2728 }, { "epoch": 1.8181893895227783, "learning_rate": 1.0850745037078419e-06, "loss": 4.1792, "step": 2729 }, { "epoch": 1.8188556675272758, "learning_rate": 1.0772259776358318e-06, "loss": 4.1364, "step": 2730 }, { "epoch": 1.8195219455317733, "learning_rate": 1.0694053144967936e-06, "loss": 4.1854, "step": 2731 }, { "epoch": 1.8201882235362705, "learning_rate": 1.0616125233994857e-06, "loss": 4.1493, "step": 2732 }, { "epoch": 1.8208545015407678, "learning_rate": 1.0538476134201919e-06, "loss": 4.1946, "step": 2733 }, { "epoch": 1.8215207795452653, "learning_rate": 1.046110593602731e-06, "loss": 4.1756, "step": 2734 }, { "epoch": 1.8221870575497627, "learning_rate": 1.038401472958439e-06, "loss": 4.1122, "step": 2735 }, { "epoch": 1.82285333555426, "learning_rate": 1.0307202604661448e-06, "loss": 4.1285, "step": 2736 }, { "epoch": 1.8235196135587572, "learning_rate": 1.0230669650721864e-06, "loss": 4.1297, "step": 2737 }, { "epoch": 1.8241858915632547, "learning_rate": 1.0154415956903834e-06, "loss": 4.1373, "step": 2738 }, { "epoch": 1.8248521695677522, "learning_rate": 1.0078441612020262e-06, "loss": 4.1579, "step": 2739 }, { "epoch": 1.8255184475722497, "learning_rate": 1.0002746704558725e-06, "loss": 4.1326, "step": 2740 }, { "epoch": 1.826184725576747, "learning_rate": 9.927331322681337e-07, "loss": 4.1147, "step": 2741 }, { "epoch": 1.8268510035812442, "learning_rate": 9.85219555422462e-07, "loss": 4.1696, "step": 2742 }, { "epoch": 1.8275172815857417, "learning_rate": 9.77733948669951e-07, "loss": 4.1549, "step": 2743 }, { "epoch": 1.8281835595902391, "learning_rate": 9.702763207290994e-07, "loss": 4.1306, "step": 2744 }, { "epoch": 1.8288498375947364, "learning_rate": 9.628466802858394e-07, "loss": 4.1374, "step": 2745 }, { "epoch": 1.8295161155992337, "learning_rate": 9.554450359934964e-07, "loss": 4.2407, "step": 2746 }, { "epoch": 1.8301823936037311, "learning_rate": 9.480713964727855e-07, "loss": 4.1069, "step": 2747 }, { "epoch": 1.8308486716082286, "learning_rate": 9.40725770311815e-07, "loss": 4.2067, "step": 2748 }, { "epoch": 1.8315149496127259, "learning_rate": 9.334081660660577e-07, "loss": 4.1821, "step": 2749 }, { "epoch": 1.8321812276172231, "learning_rate": 9.261185922583488e-07, "loss": 4.1194, "step": 2750 }, { "epoch": 1.8328475056217206, "learning_rate": 9.188570573788801e-07, "loss": 4.1765, "step": 2751 }, { "epoch": 1.833513783626218, "learning_rate": 9.116235698851866e-07, "loss": 4.1414, "step": 2752 }, { "epoch": 1.8341800616307156, "learning_rate": 9.044181382021289e-07, "loss": 4.1696, "step": 2753 }, { "epoch": 1.8348463396352128, "learning_rate": 8.972407707219049e-07, "loss": 4.1508, "step": 2754 }, { "epoch": 1.83551261763971, "learning_rate": 8.900914758040141e-07, "loss": 4.1665, "step": 2755 }, { "epoch": 1.8361788956442076, "learning_rate": 8.829702617752622e-07, "loss": 4.1618, "step": 2756 }, { "epoch": 1.836845173648705, "learning_rate": 8.758771369297536e-07, "loss": 4.1606, "step": 2757 }, { "epoch": 1.8375114516532023, "learning_rate": 8.688121095288715e-07, "loss": 4.0852, "step": 2758 }, { "epoch": 1.8381777296576995, "learning_rate": 8.617751878012726e-07, "loss": 4.124, "step": 2759 }, { "epoch": 1.838844007662197, "learning_rate": 8.547663799428924e-07, "loss": 4.1193, "step": 2760 }, { "epoch": 1.8395102856666945, "learning_rate": 8.477856941169066e-07, "loss": 4.1405, "step": 2761 }, { "epoch": 1.8401765636711918, "learning_rate": 8.408331384537393e-07, "loss": 4.1744, "step": 2762 }, { "epoch": 1.8408428416756892, "learning_rate": 8.339087210510632e-07, "loss": 4.1227, "step": 2763 }, { "epoch": 1.8415091196801865, "learning_rate": 8.270124499737631e-07, "loss": 4.1077, "step": 2764 }, { "epoch": 1.842175397684684, "learning_rate": 8.201443332539499e-07, "loss": 4.148, "step": 2765 }, { "epoch": 1.8428416756891814, "learning_rate": 8.133043788909417e-07, "loss": 4.1869, "step": 2766 }, { "epoch": 1.8435079536936787, "learning_rate": 8.064925948512575e-07, "loss": 4.1648, "step": 2767 }, { "epoch": 1.844174231698176, "learning_rate": 7.99708989068601e-07, "loss": 4.1334, "step": 2768 }, { "epoch": 1.8448405097026734, "learning_rate": 7.929535694438661e-07, "loss": 4.148, "step": 2769 }, { "epoch": 1.845506787707171, "learning_rate": 7.86226343845109e-07, "loss": 4.1497, "step": 2770 }, { "epoch": 1.8461730657116682, "learning_rate": 7.795273201075454e-07, "loss": 4.0929, "step": 2771 }, { "epoch": 1.8468393437161654, "learning_rate": 7.728565060335563e-07, "loss": 4.1003, "step": 2772 }, { "epoch": 1.847505621720663, "learning_rate": 7.662139093926601e-07, "loss": 4.1927, "step": 2773 }, { "epoch": 1.8481718997251604, "learning_rate": 7.595995379215098e-07, "loss": 4.0996, "step": 2774 }, { "epoch": 1.8488381777296579, "learning_rate": 7.530133993238847e-07, "loss": 4.1406, "step": 2775 }, { "epoch": 1.8495044557341551, "learning_rate": 7.464555012706847e-07, "loss": 4.1598, "step": 2776 }, { "epoch": 1.8501707337386524, "learning_rate": 7.399258513999113e-07, "loss": 4.1274, "step": 2777 }, { "epoch": 1.8508370117431499, "learning_rate": 7.334244573166726e-07, "loss": 4.1558, "step": 2778 }, { "epoch": 1.8515032897476473, "learning_rate": 7.269513265931644e-07, "loss": 4.1426, "step": 2779 }, { "epoch": 1.8521695677521446, "learning_rate": 7.205064667686584e-07, "loss": 4.1362, "step": 2780 }, { "epoch": 1.8528358457566418, "learning_rate": 7.140898853495032e-07, "loss": 4.163, "step": 2781 }, { "epoch": 1.8535021237611393, "learning_rate": 7.077015898091177e-07, "loss": 4.1926, "step": 2782 }, { "epoch": 1.8541684017656368, "learning_rate": 7.01341587587967e-07, "loss": 4.1262, "step": 2783 }, { "epoch": 1.854834679770134, "learning_rate": 6.9500988609357e-07, "loss": 4.1547, "step": 2784 }, { "epoch": 1.8555009577746313, "learning_rate": 6.88706492700475e-07, "loss": 4.1259, "step": 2785 }, { "epoch": 1.8561672357791288, "learning_rate": 6.824314147502703e-07, "loss": 4.1335, "step": 2786 }, { "epoch": 1.8568335137836263, "learning_rate": 6.761846595515515e-07, "loss": 4.1376, "step": 2787 }, { "epoch": 1.8574997917881237, "learning_rate": 6.699662343799428e-07, "loss": 4.1658, "step": 2788 }, { "epoch": 1.858166069792621, "learning_rate": 6.637761464780623e-07, "loss": 4.1744, "step": 2789 }, { "epoch": 1.8588323477971183, "learning_rate": 6.576144030555259e-07, "loss": 4.1345, "step": 2790 }, { "epoch": 1.8594986258016157, "learning_rate": 6.514810112889319e-07, "loss": 4.183, "step": 2791 }, { "epoch": 1.8601649038061132, "learning_rate": 6.453759783218688e-07, "loss": 4.1296, "step": 2792 }, { "epoch": 1.8608311818106105, "learning_rate": 6.392993112648793e-07, "loss": 4.136, "step": 2793 }, { "epoch": 1.8614974598151077, "learning_rate": 6.332510171954853e-07, "loss": 4.1041, "step": 2794 }, { "epoch": 1.8621637378196052, "learning_rate": 6.272311031581518e-07, "loss": 4.1936, "step": 2795 }, { "epoch": 1.8628300158241027, "learning_rate": 6.212395761642897e-07, "loss": 4.1364, "step": 2796 }, { "epoch": 1.8634962938286, "learning_rate": 6.152764431922586e-07, "loss": 4.1739, "step": 2797 }, { "epoch": 1.8641625718330974, "learning_rate": 6.093417111873306e-07, "loss": 4.1264, "step": 2798 }, { "epoch": 1.8648288498375947, "learning_rate": 6.034353870617127e-07, "loss": 4.1592, "step": 2799 }, { "epoch": 1.8654951278420921, "learning_rate": 5.975574776945103e-07, "loss": 4.1894, "step": 2800 }, { "epoch": 1.8661614058465896, "learning_rate": 5.917079899317557e-07, "loss": 4.1448, "step": 2801 }, { "epoch": 1.8668276838510869, "learning_rate": 5.858869305863601e-07, "loss": 4.1653, "step": 2802 }, { "epoch": 1.8674939618555841, "learning_rate": 5.800943064381282e-07, "loss": 4.213, "step": 2803 }, { "epoch": 1.8681602398600816, "learning_rate": 5.743301242337546e-07, "loss": 4.1559, "step": 2804 }, { "epoch": 1.868826517864579, "learning_rate": 5.685943906867996e-07, "loss": 4.163, "step": 2805 }, { "epoch": 1.8694927958690764, "learning_rate": 5.628871124776863e-07, "loss": 4.1862, "step": 2806 }, { "epoch": 1.8701590738735736, "learning_rate": 5.572082962537056e-07, "loss": 4.1827, "step": 2807 }, { "epoch": 1.870825351878071, "learning_rate": 5.515579486289891e-07, "loss": 4.147, "step": 2808 }, { "epoch": 1.8714916298825686, "learning_rate": 5.459360761845139e-07, "loss": 4.1508, "step": 2809 }, { "epoch": 1.872157907887066, "learning_rate": 5.403426854680982e-07, "loss": 4.1067, "step": 2810 }, { "epoch": 1.8728241858915633, "learning_rate": 5.347777829943835e-07, "loss": 4.146, "step": 2811 }, { "epoch": 1.8734904638960606, "learning_rate": 5.292413752448239e-07, "loss": 4.1318, "step": 2812 }, { "epoch": 1.874156741900558, "learning_rate": 5.237334686676948e-07, "loss": 4.1479, "step": 2813 }, { "epoch": 1.8748230199050555, "learning_rate": 5.18254069678073e-07, "loss": 4.1064, "step": 2814 }, { "epoch": 1.8754892979095528, "learning_rate": 5.128031846578285e-07, "loss": 4.1443, "step": 2815 }, { "epoch": 1.87615557591405, "learning_rate": 5.073808199556329e-07, "loss": 4.1644, "step": 2816 }, { "epoch": 1.8768218539185475, "learning_rate": 5.019869818869261e-07, "loss": 4.0891, "step": 2817 }, { "epoch": 1.877488131923045, "learning_rate": 4.966216767339299e-07, "loss": 4.1562, "step": 2818 }, { "epoch": 1.8781544099275422, "learning_rate": 4.912849107456318e-07, "loss": 4.1529, "step": 2819 }, { "epoch": 1.8788206879320395, "learning_rate": 4.859766901377849e-07, "loss": 4.1639, "step": 2820 }, { "epoch": 1.879486965936537, "learning_rate": 4.806970210928824e-07, "loss": 4.1728, "step": 2821 }, { "epoch": 1.8801532439410344, "learning_rate": 4.7544590976018324e-07, "loss": 4.1312, "step": 2822 }, { "epoch": 1.880819521945532, "learning_rate": 4.702233622556673e-07, "loss": 4.166, "step": 2823 }, { "epoch": 1.8814857999500292, "learning_rate": 4.650293846620496e-07, "loss": 4.1841, "step": 2824 }, { "epoch": 1.8821520779545264, "learning_rate": 4.598639830287799e-07, "loss": 4.1688, "step": 2825 }, { "epoch": 1.882818355959024, "learning_rate": 4.547271633720179e-07, "loss": 4.1762, "step": 2826 }, { "epoch": 1.8834846339635214, "learning_rate": 4.496189316746308e-07, "loss": 4.2074, "step": 2827 }, { "epoch": 1.8841509119680186, "learning_rate": 4.4453929388618976e-07, "loss": 4.1532, "step": 2828 }, { "epoch": 1.884817189972516, "learning_rate": 4.3948825592297347e-07, "loss": 4.1062, "step": 2829 }, { "epoch": 1.8854834679770134, "learning_rate": 4.344658236679372e-07, "loss": 4.1571, "step": 2830 }, { "epoch": 1.8861497459815109, "learning_rate": 4.294720029707211e-07, "loss": 4.1198, "step": 2831 }, { "epoch": 1.8868160239860081, "learning_rate": 4.2450679964765316e-07, "loss": 4.1972, "step": 2832 }, { "epoch": 1.8874823019905054, "learning_rate": 4.195702194817186e-07, "loss": 4.218, "step": 2833 }, { "epoch": 1.8881485799950029, "learning_rate": 4.146622682225626e-07, "loss": 4.2091, "step": 2834 }, { "epoch": 1.8888148579995003, "learning_rate": 4.097829515864987e-07, "loss": 4.134, "step": 2835 }, { "epoch": 1.8894811360039978, "learning_rate": 4.0493227525648105e-07, "loss": 4.1305, "step": 2836 }, { "epoch": 1.890147414008495, "learning_rate": 4.0011024488210703e-07, "loss": 4.1516, "step": 2837 }, { "epoch": 1.8908136920129923, "learning_rate": 3.953168660796119e-07, "loss": 4.1092, "step": 2838 }, { "epoch": 1.8914799700174898, "learning_rate": 3.905521444318605e-07, "loss": 4.1737, "step": 2839 }, { "epoch": 1.8921462480219873, "learning_rate": 3.8581608548833856e-07, "loss": 4.1772, "step": 2840 }, { "epoch": 1.8928125260264845, "learning_rate": 3.811086947651504e-07, "loss": 4.1143, "step": 2841 }, { "epoch": 1.8934788040309818, "learning_rate": 3.764299777450075e-07, "loss": 4.1709, "step": 2842 }, { "epoch": 1.8941450820354793, "learning_rate": 3.717799398772259e-07, "loss": 4.1753, "step": 2843 }, { "epoch": 1.8948113600399767, "learning_rate": 3.6715858657772604e-07, "loss": 4.1413, "step": 2844 }, { "epoch": 1.8954776380444742, "learning_rate": 3.6256592322900793e-07, "loss": 4.1726, "step": 2845 }, { "epoch": 1.8961439160489715, "learning_rate": 3.580019551801622e-07, "loss": 4.1293, "step": 2846 }, { "epoch": 1.8968101940534687, "learning_rate": 3.5346668774685897e-07, "loss": 4.1837, "step": 2847 }, { "epoch": 1.8974764720579662, "learning_rate": 3.489601262113368e-07, "loss": 4.1904, "step": 2848 }, { "epoch": 1.8981427500624637, "learning_rate": 3.4448227582240257e-07, "loss": 4.1837, "step": 2849 }, { "epoch": 1.898809028066961, "learning_rate": 3.400331417954289e-07, "loss": 4.1112, "step": 2850 }, { "epoch": 1.8994753060714582, "learning_rate": 3.35612729312329e-07, "loss": 4.1321, "step": 2851 }, { "epoch": 1.9001415840759557, "learning_rate": 3.3122104352157626e-07, "loss": 4.1549, "step": 2852 }, { "epoch": 1.9008078620804532, "learning_rate": 3.268580895381762e-07, "loss": 4.1446, "step": 2853 }, { "epoch": 1.9014741400849504, "learning_rate": 3.225238724436863e-07, "loss": 4.1812, "step": 2854 }, { "epoch": 1.9021404180894477, "learning_rate": 3.182183972861713e-07, "loss": 4.1392, "step": 2855 }, { "epoch": 1.9028066960939451, "learning_rate": 3.1394166908023936e-07, "loss": 4.1523, "step": 2856 }, { "epoch": 1.9034729740984426, "learning_rate": 3.09693692807006e-07, "loss": 4.1383, "step": 2857 }, { "epoch": 1.90413925210294, "learning_rate": 3.0547447341410797e-07, "loss": 4.1178, "step": 2858 }, { "epoch": 1.9048055301074374, "learning_rate": 3.0128401581567824e-07, "loss": 4.1357, "step": 2859 }, { "epoch": 1.9054718081119346, "learning_rate": 2.971223248923599e-07, "loss": 4.1673, "step": 2860 }, { "epoch": 1.906138086116432, "learning_rate": 2.9298940549128964e-07, "loss": 4.1446, "step": 2861 }, { "epoch": 1.9068043641209296, "learning_rate": 2.8888526242608347e-07, "loss": 4.181, "step": 2862 }, { "epoch": 1.9074706421254268, "learning_rate": 2.8480990047686227e-07, "loss": 4.1636, "step": 2863 }, { "epoch": 1.908136920129924, "learning_rate": 2.807633243902041e-07, "loss": 4.1482, "step": 2864 }, { "epoch": 1.9088031981344216, "learning_rate": 2.7674553887917234e-07, "loss": 4.1226, "step": 2865 }, { "epoch": 1.909469476138919, "learning_rate": 2.727565486232986e-07, "loss": 4.1825, "step": 2866 }, { "epoch": 1.9101357541434163, "learning_rate": 2.687963582685665e-07, "loss": 4.1094, "step": 2867 }, { "epoch": 1.9108020321479136, "learning_rate": 2.6486497242742827e-07, "loss": 4.1565, "step": 2868 }, { "epoch": 1.911468310152411, "learning_rate": 2.6096239567877656e-07, "loss": 4.1671, "step": 2869 }, { "epoch": 1.9121345881569085, "learning_rate": 2.570886325679617e-07, "loss": 4.1107, "step": 2870 }, { "epoch": 1.912800866161406, "learning_rate": 2.5324368760676066e-07, "loss": 4.1423, "step": 2871 }, { "epoch": 1.9134671441659032, "learning_rate": 2.494275652733968e-07, "loss": 4.1561, "step": 2872 }, { "epoch": 1.9141334221704005, "learning_rate": 2.456402700125232e-07, "loss": 4.143, "step": 2873 }, { "epoch": 1.914799700174898, "learning_rate": 2.418818062352113e-07, "loss": 4.1643, "step": 2874 }, { "epoch": 1.9154659781793955, "learning_rate": 2.3815217831895943e-07, "loss": 4.2009, "step": 2875 }, { "epoch": 1.9161322561838927, "learning_rate": 2.344513906076734e-07, "loss": 4.1067, "step": 2876 }, { "epoch": 1.91679853418839, "learning_rate": 2.30779447411672e-07, "loss": 4.1757, "step": 2877 }, { "epoch": 1.9174648121928874, "learning_rate": 2.2713635300768422e-07, "loss": 4.1784, "step": 2878 }, { "epoch": 1.918131090197385, "learning_rate": 2.2352211163883253e-07, "loss": 4.1617, "step": 2879 }, { "epoch": 1.9187973682018822, "learning_rate": 2.1993672751463579e-07, "loss": 4.0886, "step": 2880 }, { "epoch": 1.9194636462063797, "learning_rate": 2.1638020481100086e-07, "loss": 4.1535, "step": 2881 }, { "epoch": 1.920129924210877, "learning_rate": 2.1285254767022255e-07, "loss": 4.1984, "step": 2882 }, { "epoch": 1.9207962022153744, "learning_rate": 2.0935376020097263e-07, "loss": 4.1113, "step": 2883 }, { "epoch": 1.9214624802198719, "learning_rate": 2.058838464783025e-07, "loss": 4.1652, "step": 2884 }, { "epoch": 1.9221287582243691, "learning_rate": 2.0244281054363213e-07, "loss": 4.15, "step": 2885 }, { "epoch": 1.9227950362288664, "learning_rate": 1.990306564047445e-07, "loss": 4.135, "step": 2886 }, { "epoch": 1.9234613142333639, "learning_rate": 1.956473880357912e-07, "loss": 4.2053, "step": 2887 }, { "epoch": 1.9241275922378613, "learning_rate": 1.9229300937727291e-07, "loss": 4.1906, "step": 2888 }, { "epoch": 1.9247938702423586, "learning_rate": 1.889675243360478e-07, "loss": 4.1761, "step": 2889 }, { "epoch": 1.9254601482468559, "learning_rate": 1.8567093678531212e-07, "loss": 4.1709, "step": 2890 }, { "epoch": 1.9261264262513533, "learning_rate": 1.8240325056462227e-07, "loss": 4.1371, "step": 2891 }, { "epoch": 1.9267927042558508, "learning_rate": 1.791644694798561e-07, "loss": 4.1329, "step": 2892 }, { "epoch": 1.9274589822603483, "learning_rate": 1.7595459730323505e-07, "loss": 4.1657, "step": 2893 }, { "epoch": 1.9281252602648455, "learning_rate": 1.7277363777330745e-07, "loss": 4.1333, "step": 2894 }, { "epoch": 1.9287915382693428, "learning_rate": 1.6962159459494588e-07, "loss": 4.1404, "step": 2895 }, { "epoch": 1.9294578162738403, "learning_rate": 1.6649847143934972e-07, "loss": 4.1422, "step": 2896 }, { "epoch": 1.9301240942783378, "learning_rate": 1.634042719440232e-07, "loss": 4.1798, "step": 2897 }, { "epoch": 1.930790372282835, "learning_rate": 1.6033899971279743e-07, "loss": 4.1616, "step": 2898 }, { "epoch": 1.9314566502873323, "learning_rate": 1.573026583158027e-07, "loss": 4.1858, "step": 2899 }, { "epoch": 1.9321229282918297, "learning_rate": 1.542952512894741e-07, "loss": 4.1286, "step": 2900 }, { "epoch": 1.9327892062963272, "learning_rate": 1.5131678213655133e-07, "loss": 4.1216, "step": 2901 }, { "epoch": 1.9334554843008245, "learning_rate": 1.4836725432606503e-07, "loss": 4.1881, "step": 2902 }, { "epoch": 1.9341217623053217, "learning_rate": 1.4544667129333944e-07, "loss": 4.1873, "step": 2903 }, { "epoch": 1.9347880403098192, "learning_rate": 1.425550364399897e-07, "loss": 4.0843, "step": 2904 }, { "epoch": 1.9354543183143167, "learning_rate": 1.3969235313390782e-07, "loss": 4.1932, "step": 2905 }, { "epoch": 1.9361205963188142, "learning_rate": 1.3685862470927403e-07, "loss": 4.1585, "step": 2906 }, { "epoch": 1.9367868743233114, "learning_rate": 1.3405385446654261e-07, "loss": 4.1353, "step": 2907 }, { "epoch": 1.9374531523278087, "learning_rate": 1.312780456724366e-07, "loss": 4.099, "step": 2908 }, { "epoch": 1.9381194303323062, "learning_rate": 1.285312015599477e-07, "loss": 4.149, "step": 2909 }, { "epoch": 1.9387857083368036, "learning_rate": 1.2581332532833613e-07, "loss": 4.1043, "step": 2910 }, { "epoch": 1.939451986341301, "learning_rate": 1.2312442014311977e-07, "loss": 4.2044, "step": 2911 }, { "epoch": 1.9401182643457981, "learning_rate": 1.2046448913607678e-07, "loss": 4.1839, "step": 2912 }, { "epoch": 1.9407845423502956, "learning_rate": 1.1783353540523733e-07, "loss": 4.1679, "step": 2913 }, { "epoch": 1.941450820354793, "learning_rate": 1.1523156201488361e-07, "loss": 4.1182, "step": 2914 }, { "epoch": 1.9421170983592904, "learning_rate": 1.1265857199553864e-07, "loss": 4.1569, "step": 2915 }, { "epoch": 1.9427833763637878, "learning_rate": 1.101145683439747e-07, "loss": 4.1428, "step": 2916 }, { "epoch": 1.943449654368285, "learning_rate": 1.0759955402320221e-07, "loss": 4.1166, "step": 2917 }, { "epoch": 1.9441159323727826, "learning_rate": 1.0511353196246132e-07, "loss": 4.1682, "step": 2918 }, { "epoch": 1.94478221037728, "learning_rate": 1.0265650505723589e-07, "loss": 4.1749, "step": 2919 }, { "epoch": 1.9454484883817773, "learning_rate": 1.0022847616923126e-07, "loss": 4.1354, "step": 2920 }, { "epoch": 1.9461147663862746, "learning_rate": 9.782944812637973e-08, "loss": 4.1626, "step": 2921 }, { "epoch": 1.946781044390772, "learning_rate": 9.545942372283789e-08, "loss": 4.1018, "step": 2922 }, { "epoch": 1.9474473223952695, "learning_rate": 9.311840571898101e-08, "loss": 4.1339, "step": 2923 }, { "epoch": 1.9481136003997668, "learning_rate": 9.080639684139747e-08, "loss": 4.1663, "step": 2924 }, { "epoch": 1.948779878404264, "learning_rate": 8.852339978289714e-08, "loss": 4.1958, "step": 2925 }, { "epoch": 1.9494461564087615, "learning_rate": 8.626941720249193e-08, "loss": 4.1762, "step": 2926 }, { "epoch": 1.950112434413259, "learning_rate": 8.404445172539854e-08, "loss": 4.1507, "step": 2927 }, { "epoch": 1.9507787124177565, "learning_rate": 8.184850594304683e-08, "loss": 4.1392, "step": 2928 }, { "epoch": 1.9514449904222537, "learning_rate": 7.968158241306035e-08, "loss": 4.1445, "step": 2929 }, { "epoch": 1.952111268426751, "learning_rate": 7.75436836592619e-08, "loss": 4.1743, "step": 2930 }, { "epoch": 1.9527775464312485, "learning_rate": 7.543481217166803e-08, "loss": 4.1426, "step": 2931 }, { "epoch": 1.953443824435746, "learning_rate": 7.335497040648898e-08, "loss": 4.1183, "step": 2932 }, { "epoch": 1.9541101024402432, "learning_rate": 7.130416078612312e-08, "loss": 4.071, "step": 2933 }, { "epoch": 1.9547763804447404, "learning_rate": 6.928238569915701e-08, "loss": 4.1564, "step": 2934 }, { "epoch": 1.955442658449238, "learning_rate": 6.728964750035705e-08, "loss": 4.1987, "step": 2935 }, { "epoch": 1.9561089364537354, "learning_rate": 6.53259485106722e-08, "loss": 4.1178, "step": 2936 }, { "epoch": 1.9567752144582327, "learning_rate": 6.339129101722574e-08, "loss": 4.1754, "step": 2937 }, { "epoch": 1.95744149246273, "learning_rate": 6.148567727332633e-08, "loss": 4.167, "step": 2938 }, { "epoch": 1.9581077704672274, "learning_rate": 5.960910949844301e-08, "loss": 4.179, "step": 2939 }, { "epoch": 1.9587740484717249, "learning_rate": 5.776158987822467e-08, "loss": 4.1288, "step": 2940 }, { "epoch": 1.9594403264762223, "learning_rate": 5.5943120564477816e-08, "loss": 4.1622, "step": 2941 }, { "epoch": 1.9601066044807196, "learning_rate": 5.415370367518602e-08, "loss": 4.1316, "step": 2942 }, { "epoch": 1.9607728824852169, "learning_rate": 5.2393341294482145e-08, "loss": 4.1531, "step": 2943 }, { "epoch": 1.9614391604897143, "learning_rate": 5.0662035472673344e-08, "loss": 4.1871, "step": 2944 }, { "epoch": 1.9621054384942118, "learning_rate": 4.89597882262105e-08, "loss": 4.1169, "step": 2945 }, { "epoch": 1.962771716498709, "learning_rate": 4.728660153771047e-08, "loss": 4.1663, "step": 2946 }, { "epoch": 1.9634379945032063, "learning_rate": 4.564247735593941e-08, "loss": 4.2037, "step": 2947 }, { "epoch": 1.9641042725077038, "learning_rate": 4.402741759581275e-08, "loss": 4.1727, "step": 2948 }, { "epoch": 1.9647705505122013, "learning_rate": 4.244142413839525e-08, "loss": 4.144, "step": 2949 }, { "epoch": 1.9654368285166985, "learning_rate": 4.088449883089818e-08, "loss": 4.1395, "step": 2950 }, { "epoch": 1.966103106521196, "learning_rate": 3.935664348668211e-08, "loss": 4.1512, "step": 2951 }, { "epoch": 1.9667693845256933, "learning_rate": 3.7857859885240266e-08, "loss": 4.0827, "step": 2952 }, { "epoch": 1.9674356625301908, "learning_rate": 3.638814977221239e-08, "loss": 4.1329, "step": 2953 }, { "epoch": 1.9681019405346882, "learning_rate": 3.494751485937364e-08, "loss": 4.1581, "step": 2954 }, { "epoch": 1.9687682185391855, "learning_rate": 3.353595682463739e-08, "loss": 4.1706, "step": 2955 }, { "epoch": 1.9694344965436827, "learning_rate": 3.2153477312052424e-08, "loss": 4.164, "step": 2956 }, { "epoch": 1.9701007745481802, "learning_rate": 3.080007793179185e-08, "loss": 4.1254, "step": 2957 }, { "epoch": 1.9707670525526777, "learning_rate": 2.9475760260166962e-08, "loss": 4.1213, "step": 2958 }, { "epoch": 1.971433330557175, "learning_rate": 2.8180525839616168e-08, "loss": 4.1404, "step": 2959 }, { "epoch": 1.9720996085616722, "learning_rate": 2.6914376178702183e-08, "loss": 4.1738, "step": 2960 }, { "epoch": 1.9727658865661697, "learning_rate": 2.567731275211205e-08, "loss": 4.1826, "step": 2961 }, { "epoch": 1.9734321645706672, "learning_rate": 2.44693370006599e-08, "loss": 4.1243, "step": 2962 }, { "epoch": 1.9740984425751646, "learning_rate": 2.3290450331278635e-08, "loss": 4.1297, "step": 2963 }, { "epoch": 1.974764720579662, "learning_rate": 2.2140654117019933e-08, "loss": 4.1226, "step": 2964 }, { "epoch": 1.9754309985841592, "learning_rate": 2.1019949697054232e-08, "loss": 4.1435, "step": 2965 }, { "epoch": 1.9760972765886566, "learning_rate": 1.9928338376673517e-08, "loss": 4.134, "step": 2966 }, { "epoch": 1.9767635545931541, "learning_rate": 1.8865821427280216e-08, "loss": 4.128, "step": 2967 }, { "epoch": 1.9774298325976514, "learning_rate": 1.7832400086387202e-08, "loss": 4.1379, "step": 2968 }, { "epoch": 1.9780961106021486, "learning_rate": 1.6828075557628885e-08, "loss": 4.1769, "step": 2969 }, { "epoch": 1.978762388606646, "learning_rate": 1.58528490107418e-08, "loss": 4.1665, "step": 2970 }, { "epoch": 1.9794286666111436, "learning_rate": 1.490672158157569e-08, "loss": 4.1496, "step": 2971 }, { "epoch": 1.9800949446156408, "learning_rate": 1.398969437209352e-08, "loss": 4.1585, "step": 2972 }, { "epoch": 1.980761222620138, "learning_rate": 1.3101768450352048e-08, "loss": 4.1311, "step": 2973 }, { "epoch": 1.9814275006246356, "learning_rate": 1.2242944850524018e-08, "loss": 4.117, "step": 2974 }, { "epoch": 1.982093778629133, "learning_rate": 1.141322457288707e-08, "loss": 4.1387, "step": 2975 }, { "epoch": 1.9827600566336305, "learning_rate": 1.0612608583818185e-08, "loss": 4.1512, "step": 2976 }, { "epoch": 1.9834263346381278, "learning_rate": 9.841097815793675e-09, "loss": 4.1339, "step": 2977 }, { "epoch": 1.984092612642625, "learning_rate": 9.098693167400307e-09, "loss": 4.1253, "step": 2978 }, { "epoch": 1.9847588906471225, "learning_rate": 8.385395503315852e-09, "loss": 4.1259, "step": 2979 }, { "epoch": 1.98542516865162, "learning_rate": 7.701205654317422e-09, "loss": 4.172, "step": 2980 }, { "epoch": 1.9860914466561173, "learning_rate": 7.046124417289801e-09, "loss": 4.1273, "step": 2981 }, { "epoch": 1.9867577246606145, "learning_rate": 6.420152555206005e-09, "loss": 4.1533, "step": 2982 }, { "epoch": 1.987424002665112, "learning_rate": 5.823290797132841e-09, "loss": 4.1364, "step": 2983 }, { "epoch": 1.9880902806696095, "learning_rate": 5.255539838244783e-09, "loss": 4.1655, "step": 2984 }, { "epoch": 1.9887565586741067, "learning_rate": 4.716900339796215e-09, "loss": 4.1528, "step": 2985 }, { "epoch": 1.9894228366786042, "learning_rate": 4.2073729291464134e-09, "loss": 4.2005, "step": 2986 }, { "epoch": 1.9900891146831015, "learning_rate": 3.7269581997428916e-09, "loss": 4.1511, "step": 2987 }, { "epoch": 1.990755392687599, "learning_rate": 3.275656711126951e-09, "loss": 4.1594, "step": 2988 }, { "epoch": 1.9914216706920964, "learning_rate": 2.853468988928132e-09, "loss": 4.1658, "step": 2989 }, { "epoch": 1.9920879486965937, "learning_rate": 2.4603955248725386e-09, "loss": 4.145, "step": 2990 }, { "epoch": 1.992754226701091, "learning_rate": 2.0964367767717374e-09, "loss": 4.1078, "step": 2991 }, { "epoch": 1.9934205047055884, "learning_rate": 1.7615931685310837e-09, "loss": 4.1668, "step": 2992 }, { "epoch": 1.9940867827100859, "learning_rate": 1.4558650901469463e-09, "loss": 4.169, "step": 2993 }, { "epoch": 1.9947530607145831, "learning_rate": 1.1792528976983796e-09, "loss": 4.1474, "step": 2994 }, { "epoch": 1.9954193387190804, "learning_rate": 9.317569133554528e-10, "loss": 4.1263, "step": 2995 }, { "epoch": 1.9960856167235779, "learning_rate": 7.133774253792469e-10, "loss": 4.1267, "step": 2996 }, { "epoch": 1.9967518947280753, "learning_rate": 5.241146881163061e-10, "loss": 4.147, "step": 2997 }, { "epoch": 1.9974181727325728, "learning_rate": 3.639689220041875e-10, "loss": 4.1386, "step": 2998 }, { "epoch": 1.99808445073707, "learning_rate": 2.3294031356313473e-10, "loss": 4.1631, "step": 2999 }, { "epoch": 1.9987507287415673, "learning_rate": 1.3102901540162916e-10, "loss": 4.1565, "step": 3000 }, { "epoch": 1.9994170067460648, "learning_rate": 5.823514621638992e-11, "loss": 4.102, "step": 3001 }, { "epoch": 2.0, "learning_rate": 1.455879079237388e-11, "loss": 4.1819, "step": 3002 } ], "logging_steps": 1, "max_steps": 3002, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.554274880625731e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }