| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.17380271653645946, | |
| "eval_steps": 500, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0017380271653645947, | |
| "grad_norm": 1.1632381677627563, | |
| "learning_rate": 4.9978491913828615e-05, | |
| "loss": 3.6439, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0034760543307291894, | |
| "grad_norm": 0.6136592626571655, | |
| "learning_rate": 4.995676657426156e-05, | |
| "loss": 2.3619, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.005214081496093784, | |
| "grad_norm": 0.6270021796226501, | |
| "learning_rate": 4.99350412346945e-05, | |
| "loss": 2.0395, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.006952108661458379, | |
| "grad_norm": 0.9146378636360168, | |
| "learning_rate": 4.991331589512744e-05, | |
| "loss": 1.8894, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.008690135826822973, | |
| "grad_norm": 0.7162560224533081, | |
| "learning_rate": 4.989159055556039e-05, | |
| "loss": 1.8242, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.010428162992187568, | |
| "grad_norm": 0.31322506070137024, | |
| "learning_rate": 4.9869865215993326e-05, | |
| "loss": 1.8681, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.012166190157552163, | |
| "grad_norm": 0.5570130348205566, | |
| "learning_rate": 4.984813987642627e-05, | |
| "loss": 1.8099, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.013904217322916758, | |
| "grad_norm": 0.6080171465873718, | |
| "learning_rate": 4.982641453685921e-05, | |
| "loss": 1.7641, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.015642244488281352, | |
| "grad_norm": 0.553460955619812, | |
| "learning_rate": 4.980468919729215e-05, | |
| "loss": 1.7712, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.017380271653645946, | |
| "grad_norm": 0.625199019908905, | |
| "learning_rate": 4.97829638577251e-05, | |
| "loss": 1.7565, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.019118298819010542, | |
| "grad_norm": 0.579010546207428, | |
| "learning_rate": 4.9761238518158044e-05, | |
| "loss": 1.7322, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.020856325984375135, | |
| "grad_norm": 0.7429983615875244, | |
| "learning_rate": 4.9739513178590984e-05, | |
| "loss": 1.7407, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.022594353149739732, | |
| "grad_norm": 0.5801926255226135, | |
| "learning_rate": 4.971778783902393e-05, | |
| "loss": 1.6487, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.024332380315104325, | |
| "grad_norm": 0.7074835300445557, | |
| "learning_rate": 4.969606249945687e-05, | |
| "loss": 1.6959, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02607040748046892, | |
| "grad_norm": 0.6824275255203247, | |
| "learning_rate": 4.967433715988981e-05, | |
| "loss": 1.6958, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.027808434645833515, | |
| "grad_norm": 0.43216443061828613, | |
| "learning_rate": 4.9652611820322756e-05, | |
| "loss": 1.6824, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.029546461811198108, | |
| "grad_norm": 0.7867545485496521, | |
| "learning_rate": 4.9630886480755695e-05, | |
| "loss": 1.6671, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.031284488976562705, | |
| "grad_norm": 0.77516108751297, | |
| "learning_rate": 4.9609161141188635e-05, | |
| "loss": 1.6389, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.0330225161419273, | |
| "grad_norm": 0.5014050602912903, | |
| "learning_rate": 4.958743580162158e-05, | |
| "loss": 1.629, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.03476054330729189, | |
| "grad_norm": 0.6006432771682739, | |
| "learning_rate": 4.956571046205453e-05, | |
| "loss": 1.5977, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.036498570472656484, | |
| "grad_norm": 0.6153438091278076, | |
| "learning_rate": 4.954398512248747e-05, | |
| "loss": 1.5879, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.038236597638021085, | |
| "grad_norm": 0.8877372145652771, | |
| "learning_rate": 4.952225978292041e-05, | |
| "loss": 1.599, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.03997462480338568, | |
| "grad_norm": 0.7173994183540344, | |
| "learning_rate": 4.950053444335335e-05, | |
| "loss": 1.6205, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04171265196875027, | |
| "grad_norm": 0.8379663228988647, | |
| "learning_rate": 4.947880910378629e-05, | |
| "loss": 1.5794, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.043450679134114864, | |
| "grad_norm": 0.6160171031951904, | |
| "learning_rate": 4.945708376421924e-05, | |
| "loss": 1.5656, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.045188706299479464, | |
| "grad_norm": 0.8642494082450867, | |
| "learning_rate": 4.943535842465218e-05, | |
| "loss": 1.5665, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04692673346484406, | |
| "grad_norm": 0.6872414350509644, | |
| "learning_rate": 4.941363308508512e-05, | |
| "loss": 1.5552, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.04866476063020865, | |
| "grad_norm": 0.9998211860656738, | |
| "learning_rate": 4.9391907745518064e-05, | |
| "loss": 1.5458, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.050402787795573244, | |
| "grad_norm": 1.2175588607788086, | |
| "learning_rate": 4.937018240595101e-05, | |
| "loss": 1.5295, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05214081496093784, | |
| "grad_norm": 1.0134257078170776, | |
| "learning_rate": 4.934845706638395e-05, | |
| "loss": 1.516, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05387884212630244, | |
| "grad_norm": 0.8104642033576965, | |
| "learning_rate": 4.9326731726816896e-05, | |
| "loss": 1.5285, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.05561686929166703, | |
| "grad_norm": 0.9005429148674011, | |
| "learning_rate": 4.9305006387249836e-05, | |
| "loss": 1.5069, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05735489645703162, | |
| "grad_norm": 0.8855582475662231, | |
| "learning_rate": 4.9283281047682775e-05, | |
| "loss": 1.5046, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.059092923622396216, | |
| "grad_norm": 0.7807704210281372, | |
| "learning_rate": 4.926155570811572e-05, | |
| "loss": 1.4663, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.06083095078776081, | |
| "grad_norm": 1.2552438974380493, | |
| "learning_rate": 4.923983036854866e-05, | |
| "loss": 1.486, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06256897795312541, | |
| "grad_norm": 1.0079654455184937, | |
| "learning_rate": 4.92181050289816e-05, | |
| "loss": 1.4569, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06430700511849, | |
| "grad_norm": 1.0267302989959717, | |
| "learning_rate": 4.919637968941455e-05, | |
| "loss": 1.4746, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.0660450322838546, | |
| "grad_norm": 1.1427829265594482, | |
| "learning_rate": 4.9174654349847494e-05, | |
| "loss": 1.4867, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.0677830594492192, | |
| "grad_norm": 0.9080005884170532, | |
| "learning_rate": 4.915292901028043e-05, | |
| "loss": 1.4789, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.06952108661458378, | |
| "grad_norm": 0.78159499168396, | |
| "learning_rate": 4.913120367071338e-05, | |
| "loss": 1.4435, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07125911377994838, | |
| "grad_norm": 0.9199485778808594, | |
| "learning_rate": 4.910947833114632e-05, | |
| "loss": 1.4698, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.07299714094531297, | |
| "grad_norm": 1.1556053161621094, | |
| "learning_rate": 4.908775299157926e-05, | |
| "loss": 1.4233, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.07473516811067757, | |
| "grad_norm": 0.6093395948410034, | |
| "learning_rate": 4.9066027652012205e-05, | |
| "loss": 1.4607, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.07647319527604217, | |
| "grad_norm": 0.7765551209449768, | |
| "learning_rate": 4.9044302312445144e-05, | |
| "loss": 1.4067, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.07821122244140676, | |
| "grad_norm": 0.9261316061019897, | |
| "learning_rate": 4.9022576972878084e-05, | |
| "loss": 1.4437, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.07994924960677136, | |
| "grad_norm": 0.737016499042511, | |
| "learning_rate": 4.900085163331103e-05, | |
| "loss": 1.4394, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.08168727677213594, | |
| "grad_norm": 1.0518062114715576, | |
| "learning_rate": 4.897912629374397e-05, | |
| "loss": 1.442, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.08342530393750054, | |
| "grad_norm": 0.9163209795951843, | |
| "learning_rate": 4.8957400954176916e-05, | |
| "loss": 1.4126, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.08516333110286514, | |
| "grad_norm": 1.1651362180709839, | |
| "learning_rate": 4.893567561460986e-05, | |
| "loss": 1.4397, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.08690135826822973, | |
| "grad_norm": 1.2389508485794067, | |
| "learning_rate": 4.89139502750428e-05, | |
| "loss": 1.4226, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.08863938543359433, | |
| "grad_norm": 1.009730339050293, | |
| "learning_rate": 4.889222493547574e-05, | |
| "loss": 1.4643, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.09037741259895893, | |
| "grad_norm": 1.3371009826660156, | |
| "learning_rate": 4.887049959590869e-05, | |
| "loss": 1.4221, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.09211543976432351, | |
| "grad_norm": 1.0338963270187378, | |
| "learning_rate": 4.884877425634163e-05, | |
| "loss": 1.4122, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.09385346692968811, | |
| "grad_norm": 1.0023767948150635, | |
| "learning_rate": 4.8827048916774574e-05, | |
| "loss": 1.4034, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.0955914940950527, | |
| "grad_norm": 1.4514521360397339, | |
| "learning_rate": 4.880532357720751e-05, | |
| "loss": 1.4356, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.0973295212604173, | |
| "grad_norm": 1.0462247133255005, | |
| "learning_rate": 4.878359823764045e-05, | |
| "loss": 1.4038, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.0990675484257819, | |
| "grad_norm": 1.0881024599075317, | |
| "learning_rate": 4.87618728980734e-05, | |
| "loss": 1.3521, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.10080557559114649, | |
| "grad_norm": 1.1503826379776, | |
| "learning_rate": 4.8740147558506345e-05, | |
| "loss": 1.3455, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.10254360275651109, | |
| "grad_norm": 1.1788356304168701, | |
| "learning_rate": 4.8718422218939285e-05, | |
| "loss": 1.4246, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.10428162992187567, | |
| "grad_norm": 0.9009695649147034, | |
| "learning_rate": 4.8696696879372225e-05, | |
| "loss": 1.3701, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.10601965708724027, | |
| "grad_norm": 0.7886667251586914, | |
| "learning_rate": 4.867497153980517e-05, | |
| "loss": 1.3843, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.10775768425260487, | |
| "grad_norm": 1.0017770528793335, | |
| "learning_rate": 4.865335482693595e-05, | |
| "loss": 1.3785, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.10949571141796946, | |
| "grad_norm": 0.901871383190155, | |
| "learning_rate": 4.863162948736889e-05, | |
| "loss": 1.3627, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.11123373858333406, | |
| "grad_norm": 0.9240642189979553, | |
| "learning_rate": 4.860990414780183e-05, | |
| "loss": 1.3397, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.11297176574869865, | |
| "grad_norm": 1.2550582885742188, | |
| "learning_rate": 4.8588178808234776e-05, | |
| "loss": 1.368, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.11470979291406325, | |
| "grad_norm": 0.9313985705375671, | |
| "learning_rate": 4.8566453468667715e-05, | |
| "loss": 1.344, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.11644782007942785, | |
| "grad_norm": 0.8634843826293945, | |
| "learning_rate": 4.854472812910066e-05, | |
| "loss": 1.3308, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.11818584724479243, | |
| "grad_norm": 1.2060052156448364, | |
| "learning_rate": 4.85230027895336e-05, | |
| "loss": 1.355, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.11992387441015703, | |
| "grad_norm": 1.0419443845748901, | |
| "learning_rate": 4.850127744996655e-05, | |
| "loss": 1.3469, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.12166190157552162, | |
| "grad_norm": 1.2425956726074219, | |
| "learning_rate": 4.847955211039949e-05, | |
| "loss": 1.3368, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.12339992874088622, | |
| "grad_norm": 1.0397825241088867, | |
| "learning_rate": 4.8457826770832433e-05, | |
| "loss": 1.3211, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.12513795590625082, | |
| "grad_norm": 0.8406294584274292, | |
| "learning_rate": 4.843621005796321e-05, | |
| "loss": 1.3375, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.1268759830716154, | |
| "grad_norm": 0.816184401512146, | |
| "learning_rate": 4.841448471839615e-05, | |
| "loss": 1.3351, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.12861401023698, | |
| "grad_norm": 1.1904360055923462, | |
| "learning_rate": 4.839275937882909e-05, | |
| "loss": 1.3174, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.1303520374023446, | |
| "grad_norm": 1.2890825271606445, | |
| "learning_rate": 4.837103403926204e-05, | |
| "loss": 1.3294, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1320900645677092, | |
| "grad_norm": 0.9586935639381409, | |
| "learning_rate": 4.834930869969498e-05, | |
| "loss": 1.2934, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.13382809173307378, | |
| "grad_norm": 0.9654845595359802, | |
| "learning_rate": 4.832758336012792e-05, | |
| "loss": 1.3386, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.1355661188984384, | |
| "grad_norm": 1.1789395809173584, | |
| "learning_rate": 4.8305858020560864e-05, | |
| "loss": 1.3499, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.13730414606380298, | |
| "grad_norm": 1.2728456258773804, | |
| "learning_rate": 4.82841326809938e-05, | |
| "loss": 1.3396, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.13904217322916756, | |
| "grad_norm": 1.0807838439941406, | |
| "learning_rate": 4.826240734142675e-05, | |
| "loss": 1.3369, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.14078020039453218, | |
| "grad_norm": 1.11849045753479, | |
| "learning_rate": 4.8240682001859696e-05, | |
| "loss": 1.3664, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.14251822755989677, | |
| "grad_norm": 1.5169202089309692, | |
| "learning_rate": 4.821906528899047e-05, | |
| "loss": 1.3352, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.14425625472526135, | |
| "grad_norm": 0.8817140460014343, | |
| "learning_rate": 4.819733994942341e-05, | |
| "loss": 1.2924, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.14599428189062594, | |
| "grad_norm": 1.1285990476608276, | |
| "learning_rate": 4.8175614609856355e-05, | |
| "loss": 1.3497, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.14773230905599055, | |
| "grad_norm": 1.1072745323181152, | |
| "learning_rate": 4.81538892702893e-05, | |
| "loss": 1.3129, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.14947033622135514, | |
| "grad_norm": 1.1911921501159668, | |
| "learning_rate": 4.813216393072224e-05, | |
| "loss": 1.312, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.15120836338671972, | |
| "grad_norm": 0.7891075611114502, | |
| "learning_rate": 4.811043859115518e-05, | |
| "loss": 1.281, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.15294639055208434, | |
| "grad_norm": 0.9016463756561279, | |
| "learning_rate": 4.8088713251588126e-05, | |
| "loss": 1.3118, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.15468441771744892, | |
| "grad_norm": 1.1260063648223877, | |
| "learning_rate": 4.8066987912021066e-05, | |
| "loss": 1.2743, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.1564224448828135, | |
| "grad_norm": 1.0370497703552246, | |
| "learning_rate": 4.8045262572454005e-05, | |
| "loss": 1.3013, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.15816047204817812, | |
| "grad_norm": 1.4182652235031128, | |
| "learning_rate": 4.802353723288695e-05, | |
| "loss": 1.2994, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.1598984992135427, | |
| "grad_norm": 1.1322426795959473, | |
| "learning_rate": 4.800192052001773e-05, | |
| "loss": 1.3339, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.1616365263789073, | |
| "grad_norm": 1.4774497747421265, | |
| "learning_rate": 4.798019518045067e-05, | |
| "loss": 1.3381, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.16337455354427188, | |
| "grad_norm": 1.3371450901031494, | |
| "learning_rate": 4.795846984088361e-05, | |
| "loss": 1.304, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.1651125807096365, | |
| "grad_norm": 0.8607128858566284, | |
| "learning_rate": 4.793674450131656e-05, | |
| "loss": 1.2686, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.16685060787500108, | |
| "grad_norm": 1.1792031526565552, | |
| "learning_rate": 4.79150191617495e-05, | |
| "loss": 1.3099, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.16858863504036567, | |
| "grad_norm": 1.274556040763855, | |
| "learning_rate": 4.789329382218244e-05, | |
| "loss": 1.2745, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.17032666220573028, | |
| "grad_norm": 0.7774292230606079, | |
| "learning_rate": 4.787156848261539e-05, | |
| "loss": 1.2905, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.17206468937109487, | |
| "grad_norm": 1.204541802406311, | |
| "learning_rate": 4.784984314304833e-05, | |
| "loss": 1.3014, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.17380271653645946, | |
| "grad_norm": 0.9959656000137329, | |
| "learning_rate": 4.782811780348127e-05, | |
| "loss": 1.2798, | |
| "step": 20000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 460292, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.178779779072e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |