README / checkpoint-7656 /trainer_state.json
tiu2mo's picture
Upload 12 files
edcf63d verified
{
"best_metric": 0.96282727,
"best_model_checkpoint": "/data1/wjx/model/swift/output/v1_prompt/output/internvl2-26b/v0-20240806-203157/checkpoint-7656",
"epoch": 6.0,
"eval_steps": 1,
"global_step": 7656,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.51014894,
"epoch": 0.0,
"learning_rate": 1.3054830287206266e-07,
"loss": 1.93593538,
"memory(GiB)": 59.04,
"step": 1,
"train_speed(iter/s)": 0.032837
},
{
"acc": 0.53850144,
"epoch": 0.0,
"learning_rate": 6.527415143603134e-07,
"loss": 1.74545956,
"memory(GiB)": 64.54,
"step": 5,
"train_speed(iter/s)": 0.035227
},
{
"acc": 0.5365521,
"epoch": 0.01,
"learning_rate": 1.3054830287206267e-06,
"loss": 1.77564487,
"memory(GiB)": 75.45,
"step": 10,
"train_speed(iter/s)": 0.034818
},
{
"acc": 0.53638253,
"epoch": 0.01,
"learning_rate": 1.9582245430809403e-06,
"loss": 1.75824242,
"memory(GiB)": 75.45,
"step": 15,
"train_speed(iter/s)": 0.035152
},
{
"acc": 0.5359807,
"epoch": 0.02,
"learning_rate": 2.6109660574412534e-06,
"loss": 1.74080467,
"memory(GiB)": 78.2,
"step": 20,
"train_speed(iter/s)": 0.034968
},
{
"acc": 0.52935181,
"epoch": 0.02,
"learning_rate": 3.263707571801567e-06,
"loss": 1.78961926,
"memory(GiB)": 71.11,
"step": 25,
"train_speed(iter/s)": 0.035135
},
{
"acc": 0.52923927,
"epoch": 0.02,
"learning_rate": 3.9164490861618806e-06,
"loss": 1.77055035,
"memory(GiB)": 71.11,
"step": 30,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.51313214,
"epoch": 0.03,
"learning_rate": 4.569190600522193e-06,
"loss": 1.85138378,
"memory(GiB)": 71.11,
"step": 35,
"train_speed(iter/s)": 0.035382
},
{
"acc": 0.53307762,
"epoch": 0.03,
"learning_rate": 5.221932114882507e-06,
"loss": 1.75060291,
"memory(GiB)": 71.11,
"step": 40,
"train_speed(iter/s)": 0.035438
},
{
"acc": 0.54460502,
"epoch": 0.04,
"learning_rate": 5.87467362924282e-06,
"loss": 1.69541492,
"memory(GiB)": 71.11,
"step": 45,
"train_speed(iter/s)": 0.035472
},
{
"acc": 0.53159156,
"epoch": 0.04,
"learning_rate": 6.527415143603134e-06,
"loss": 1.75854664,
"memory(GiB)": 71.11,
"step": 50,
"train_speed(iter/s)": 0.035355
},
{
"acc": 0.54251604,
"epoch": 0.04,
"learning_rate": 7.180156657963447e-06,
"loss": 1.72001286,
"memory(GiB)": 71.11,
"step": 55,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.54704895,
"epoch": 0.05,
"learning_rate": 7.832898172323761e-06,
"loss": 1.69055214,
"memory(GiB)": 71.11,
"step": 60,
"train_speed(iter/s)": 0.035313
},
{
"acc": 0.54154701,
"epoch": 0.05,
"learning_rate": 8.485639686684073e-06,
"loss": 1.73966904,
"memory(GiB)": 71.11,
"step": 65,
"train_speed(iter/s)": 0.035332
},
{
"acc": 0.54317932,
"epoch": 0.05,
"learning_rate": 9.138381201044387e-06,
"loss": 1.72243404,
"memory(GiB)": 71.11,
"step": 70,
"train_speed(iter/s)": 0.035356
},
{
"acc": 0.54935493,
"epoch": 0.06,
"learning_rate": 9.7911227154047e-06,
"loss": 1.67216759,
"memory(GiB)": 71.11,
"step": 75,
"train_speed(iter/s)": 0.03538
},
{
"acc": 0.5512825,
"epoch": 0.06,
"learning_rate": 1.0443864229765014e-05,
"loss": 1.63593922,
"memory(GiB)": 71.11,
"step": 80,
"train_speed(iter/s)": 0.035391
},
{
"acc": 0.55266647,
"epoch": 0.07,
"learning_rate": 1.1096605744125327e-05,
"loss": 1.64257793,
"memory(GiB)": 71.11,
"step": 85,
"train_speed(iter/s)": 0.035413
},
{
"acc": 0.5626039,
"epoch": 0.07,
"learning_rate": 1.174934725848564e-05,
"loss": 1.62862835,
"memory(GiB)": 71.11,
"step": 90,
"train_speed(iter/s)": 0.035346
},
{
"acc": 0.55852866,
"epoch": 0.07,
"learning_rate": 1.2402088772845953e-05,
"loss": 1.66630707,
"memory(GiB)": 71.11,
"step": 95,
"train_speed(iter/s)": 0.035372
},
{
"acc": 0.56864738,
"epoch": 0.08,
"learning_rate": 1.3054830287206268e-05,
"loss": 1.5820467,
"memory(GiB)": 71.11,
"step": 100,
"train_speed(iter/s)": 0.035404
},
{
"acc": 0.57975974,
"epoch": 0.08,
"learning_rate": 1.370757180156658e-05,
"loss": 1.50294018,
"memory(GiB)": 71.11,
"step": 105,
"train_speed(iter/s)": 0.035429
},
{
"acc": 0.56583891,
"epoch": 0.09,
"learning_rate": 1.4360313315926893e-05,
"loss": 1.5526598,
"memory(GiB)": 71.11,
"step": 110,
"train_speed(iter/s)": 0.035444
},
{
"acc": 0.57876883,
"epoch": 0.09,
"learning_rate": 1.5013054830287207e-05,
"loss": 1.52297754,
"memory(GiB)": 71.11,
"step": 115,
"train_speed(iter/s)": 0.03546
},
{
"acc": 0.56726651,
"epoch": 0.09,
"learning_rate": 1.5665796344647522e-05,
"loss": 1.5246254,
"memory(GiB)": 71.11,
"step": 120,
"train_speed(iter/s)": 0.035468
},
{
"acc": 0.57492142,
"epoch": 0.1,
"learning_rate": 1.6318537859007836e-05,
"loss": 1.51940918,
"memory(GiB)": 71.11,
"step": 125,
"train_speed(iter/s)": 0.035473
},
{
"acc": 0.57668262,
"epoch": 0.1,
"learning_rate": 1.6971279373368146e-05,
"loss": 1.49961472,
"memory(GiB)": 71.11,
"step": 130,
"train_speed(iter/s)": 0.03548
},
{
"acc": 0.58218517,
"epoch": 0.11,
"learning_rate": 1.762402088772846e-05,
"loss": 1.46871567,
"memory(GiB)": 71.11,
"step": 135,
"train_speed(iter/s)": 0.035434
},
{
"acc": 0.5942565,
"epoch": 0.11,
"learning_rate": 1.8276762402088773e-05,
"loss": 1.41751032,
"memory(GiB)": 71.11,
"step": 140,
"train_speed(iter/s)": 0.035445
},
{
"acc": 0.58242044,
"epoch": 0.11,
"learning_rate": 1.8929503916449087e-05,
"loss": 1.454846,
"memory(GiB)": 71.11,
"step": 145,
"train_speed(iter/s)": 0.035456
},
{
"acc": 0.59025207,
"epoch": 0.12,
"learning_rate": 1.95822454308094e-05,
"loss": 1.44744148,
"memory(GiB)": 71.11,
"step": 150,
"train_speed(iter/s)": 0.035462
},
{
"acc": 0.60451708,
"epoch": 0.12,
"learning_rate": 2.0234986945169714e-05,
"loss": 1.38559418,
"memory(GiB)": 71.11,
"step": 155,
"train_speed(iter/s)": 0.035424
},
{
"acc": 0.5968575,
"epoch": 0.13,
"learning_rate": 2.0887728459530027e-05,
"loss": 1.38553591,
"memory(GiB)": 71.11,
"step": 160,
"train_speed(iter/s)": 0.035433
},
{
"acc": 0.60566149,
"epoch": 0.13,
"learning_rate": 2.154046997389034e-05,
"loss": 1.3881608,
"memory(GiB)": 71.11,
"step": 165,
"train_speed(iter/s)": 0.035438
},
{
"acc": 0.59593248,
"epoch": 0.13,
"learning_rate": 2.2193211488250655e-05,
"loss": 1.41851988,
"memory(GiB)": 71.11,
"step": 170,
"train_speed(iter/s)": 0.035445
},
{
"acc": 0.60208702,
"epoch": 0.14,
"learning_rate": 2.2845953002610968e-05,
"loss": 1.37305822,
"memory(GiB)": 71.11,
"step": 175,
"train_speed(iter/s)": 0.035454
},
{
"acc": 0.60938406,
"epoch": 0.14,
"learning_rate": 2.349869451697128e-05,
"loss": 1.37933292,
"memory(GiB)": 71.11,
"step": 180,
"train_speed(iter/s)": 0.035385
},
{
"acc": 0.61727815,
"epoch": 0.14,
"learning_rate": 2.4151436031331595e-05,
"loss": 1.3533494,
"memory(GiB)": 71.11,
"step": 185,
"train_speed(iter/s)": 0.035394
},
{
"acc": 0.60119219,
"epoch": 0.15,
"learning_rate": 2.4804177545691905e-05,
"loss": 1.36805573,
"memory(GiB)": 71.11,
"step": 190,
"train_speed(iter/s)": 0.035403
},
{
"acc": 0.59402022,
"epoch": 0.15,
"learning_rate": 2.545691906005222e-05,
"loss": 1.43324385,
"memory(GiB)": 71.11,
"step": 195,
"train_speed(iter/s)": 0.03533
},
{
"acc": 0.59447851,
"epoch": 0.16,
"learning_rate": 2.6109660574412536e-05,
"loss": 1.40030098,
"memory(GiB)": 71.11,
"step": 200,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.60737519,
"epoch": 0.16,
"learning_rate": 2.6762402088772846e-05,
"loss": 1.3658123,
"memory(GiB)": 71.11,
"step": 205,
"train_speed(iter/s)": 0.035312
},
{
"acc": 0.60835319,
"epoch": 0.16,
"learning_rate": 2.741514360313316e-05,
"loss": 1.36259966,
"memory(GiB)": 71.11,
"step": 210,
"train_speed(iter/s)": 0.03532
},
{
"acc": 0.60979681,
"epoch": 0.17,
"learning_rate": 2.8067885117493477e-05,
"loss": 1.33051977,
"memory(GiB)": 71.11,
"step": 215,
"train_speed(iter/s)": 0.035329
},
{
"acc": 0.62091312,
"epoch": 0.17,
"learning_rate": 2.8720626631853787e-05,
"loss": 1.3052928,
"memory(GiB)": 71.11,
"step": 220,
"train_speed(iter/s)": 0.035339
},
{
"acc": 0.62655573,
"epoch": 0.18,
"learning_rate": 2.9373368146214104e-05,
"loss": 1.29806547,
"memory(GiB)": 71.11,
"step": 225,
"train_speed(iter/s)": 0.035353
},
{
"acc": 0.60786376,
"epoch": 0.18,
"learning_rate": 3.0026109660574414e-05,
"loss": 1.3772687,
"memory(GiB)": 71.11,
"step": 230,
"train_speed(iter/s)": 0.03536
},
{
"acc": 0.60273538,
"epoch": 0.18,
"learning_rate": 3.067885117493473e-05,
"loss": 1.36951752,
"memory(GiB)": 71.11,
"step": 235,
"train_speed(iter/s)": 0.035368
},
{
"acc": 0.61111588,
"epoch": 0.19,
"learning_rate": 3.1331592689295045e-05,
"loss": 1.35591669,
"memory(GiB)": 71.11,
"step": 240,
"train_speed(iter/s)": 0.035378
},
{
"acc": 0.613943,
"epoch": 0.19,
"learning_rate": 3.1984334203655355e-05,
"loss": 1.36014338,
"memory(GiB)": 71.11,
"step": 245,
"train_speed(iter/s)": 0.035382
},
{
"acc": 0.61394243,
"epoch": 0.2,
"learning_rate": 3.263707571801567e-05,
"loss": 1.36864185,
"memory(GiB)": 71.11,
"step": 250,
"train_speed(iter/s)": 0.03539
},
{
"acc": 0.61200385,
"epoch": 0.2,
"learning_rate": 3.328981723237598e-05,
"loss": 1.3423625,
"memory(GiB)": 71.11,
"step": 255,
"train_speed(iter/s)": 0.035401
},
{
"acc": 0.61476417,
"epoch": 0.2,
"learning_rate": 3.394255874673629e-05,
"loss": 1.36127625,
"memory(GiB)": 71.11,
"step": 260,
"train_speed(iter/s)": 0.035413
},
{
"acc": 0.61477809,
"epoch": 0.21,
"learning_rate": 3.459530026109661e-05,
"loss": 1.35783281,
"memory(GiB)": 71.11,
"step": 265,
"train_speed(iter/s)": 0.035422
},
{
"acc": 0.62444715,
"epoch": 0.21,
"learning_rate": 3.524804177545692e-05,
"loss": 1.31687889,
"memory(GiB)": 71.11,
"step": 270,
"train_speed(iter/s)": 0.035432
},
{
"acc": 0.61132975,
"epoch": 0.22,
"learning_rate": 3.5900783289817236e-05,
"loss": 1.3453701,
"memory(GiB)": 71.11,
"step": 275,
"train_speed(iter/s)": 0.035387
},
{
"acc": 0.61926041,
"epoch": 0.22,
"learning_rate": 3.6553524804177546e-05,
"loss": 1.30053482,
"memory(GiB)": 71.11,
"step": 280,
"train_speed(iter/s)": 0.035397
},
{
"acc": 0.61626806,
"epoch": 0.22,
"learning_rate": 3.720626631853786e-05,
"loss": 1.33418636,
"memory(GiB)": 71.11,
"step": 285,
"train_speed(iter/s)": 0.035406
},
{
"acc": 0.61756167,
"epoch": 0.23,
"learning_rate": 3.7859007832898173e-05,
"loss": 1.32053947,
"memory(GiB)": 79.4,
"step": 290,
"train_speed(iter/s)": 0.035387
},
{
"acc": 0.61689606,
"epoch": 0.23,
"learning_rate": 3.8511749347258484e-05,
"loss": 1.30887852,
"memory(GiB)": 82.21,
"step": 295,
"train_speed(iter/s)": 0.035371
},
{
"acc": 0.62750869,
"epoch": 0.24,
"learning_rate": 3.91644908616188e-05,
"loss": 1.34463081,
"memory(GiB)": 76.69,
"step": 300,
"train_speed(iter/s)": 0.035378
},
{
"acc": 0.60788245,
"epoch": 0.24,
"learning_rate": 3.981723237597911e-05,
"loss": 1.38475618,
"memory(GiB)": 76.69,
"step": 305,
"train_speed(iter/s)": 0.035362
},
{
"acc": 0.62858248,
"epoch": 0.24,
"learning_rate": 4.046997389033943e-05,
"loss": 1.28304482,
"memory(GiB)": 76.69,
"step": 310,
"train_speed(iter/s)": 0.03537
},
{
"acc": 0.63606496,
"epoch": 0.25,
"learning_rate": 4.112271540469974e-05,
"loss": 1.28804827,
"memory(GiB)": 76.69,
"step": 315,
"train_speed(iter/s)": 0.035378
},
{
"acc": 0.62939467,
"epoch": 0.25,
"learning_rate": 4.1775456919060055e-05,
"loss": 1.29029045,
"memory(GiB)": 76.69,
"step": 320,
"train_speed(iter/s)": 0.035385
},
{
"acc": 0.618082,
"epoch": 0.25,
"learning_rate": 4.242819843342037e-05,
"loss": 1.29355927,
"memory(GiB)": 76.69,
"step": 325,
"train_speed(iter/s)": 0.035395
},
{
"acc": 0.61671734,
"epoch": 0.26,
"learning_rate": 4.308093994778068e-05,
"loss": 1.32377872,
"memory(GiB)": 76.69,
"step": 330,
"train_speed(iter/s)": 0.035381
},
{
"acc": 0.62930007,
"epoch": 0.26,
"learning_rate": 4.3733681462141e-05,
"loss": 1.26064873,
"memory(GiB)": 76.69,
"step": 335,
"train_speed(iter/s)": 0.035391
},
{
"acc": 0.62707295,
"epoch": 0.27,
"learning_rate": 4.438642297650131e-05,
"loss": 1.25469856,
"memory(GiB)": 76.69,
"step": 340,
"train_speed(iter/s)": 0.035377
},
{
"acc": 0.63075371,
"epoch": 0.27,
"learning_rate": 4.503916449086162e-05,
"loss": 1.25198593,
"memory(GiB)": 76.69,
"step": 345,
"train_speed(iter/s)": 0.035383
},
{
"acc": 0.63224497,
"epoch": 0.27,
"learning_rate": 4.5691906005221936e-05,
"loss": 1.27103462,
"memory(GiB)": 76.69,
"step": 350,
"train_speed(iter/s)": 0.03539
},
{
"acc": 0.62364006,
"epoch": 0.28,
"learning_rate": 4.6344647519582246e-05,
"loss": 1.30759945,
"memory(GiB)": 76.69,
"step": 355,
"train_speed(iter/s)": 0.035395
},
{
"acc": 0.63504333,
"epoch": 0.28,
"learning_rate": 4.699738903394256e-05,
"loss": 1.27207603,
"memory(GiB)": 76.69,
"step": 360,
"train_speed(iter/s)": 0.0354
},
{
"acc": 0.61879263,
"epoch": 0.29,
"learning_rate": 4.7650130548302874e-05,
"loss": 1.32293415,
"memory(GiB)": 76.69,
"step": 365,
"train_speed(iter/s)": 0.035405
},
{
"acc": 0.62347574,
"epoch": 0.29,
"learning_rate": 4.830287206266319e-05,
"loss": 1.30256233,
"memory(GiB)": 76.69,
"step": 370,
"train_speed(iter/s)": 0.035409
},
{
"acc": 0.64439292,
"epoch": 0.29,
"learning_rate": 4.89556135770235e-05,
"loss": 1.2355484,
"memory(GiB)": 85.02,
"step": 375,
"train_speed(iter/s)": 0.035394
},
{
"acc": 0.62272491,
"epoch": 0.3,
"learning_rate": 4.960835509138381e-05,
"loss": 1.30343094,
"memory(GiB)": 79.49,
"step": 380,
"train_speed(iter/s)": 0.035398
},
{
"acc": 0.62577806,
"epoch": 0.3,
"learning_rate": 5.026109660574413e-05,
"loss": 1.26718674,
"memory(GiB)": 79.49,
"step": 385,
"train_speed(iter/s)": 0.035387
},
{
"acc": 0.62948937,
"epoch": 0.31,
"learning_rate": 5.091383812010444e-05,
"loss": 1.2764926,
"memory(GiB)": 79.49,
"step": 390,
"train_speed(iter/s)": 0.035394
},
{
"acc": 0.62468576,
"epoch": 0.31,
"learning_rate": 5.156657963446475e-05,
"loss": 1.27845716,
"memory(GiB)": 79.49,
"step": 395,
"train_speed(iter/s)": 0.035401
},
{
"acc": 0.63630462,
"epoch": 0.31,
"learning_rate": 5.221932114882507e-05,
"loss": 1.23259668,
"memory(GiB)": 79.49,
"step": 400,
"train_speed(iter/s)": 0.035391
},
{
"acc": 0.62778835,
"epoch": 0.32,
"learning_rate": 5.287206266318538e-05,
"loss": 1.2565299,
"memory(GiB)": 79.49,
"step": 405,
"train_speed(iter/s)": 0.035396
},
{
"acc": 0.62839155,
"epoch": 0.32,
"learning_rate": 5.352480417754569e-05,
"loss": 1.29196539,
"memory(GiB)": 79.49,
"step": 410,
"train_speed(iter/s)": 0.035403
},
{
"acc": 0.62438302,
"epoch": 0.33,
"learning_rate": 5.417754569190601e-05,
"loss": 1.31461248,
"memory(GiB)": 79.49,
"step": 415,
"train_speed(iter/s)": 0.035408
},
{
"acc": 0.63546472,
"epoch": 0.33,
"learning_rate": 5.483028720626632e-05,
"loss": 1.25184908,
"memory(GiB)": 79.49,
"step": 420,
"train_speed(iter/s)": 0.035379
},
{
"acc": 0.6268621,
"epoch": 0.33,
"learning_rate": 5.5483028720626636e-05,
"loss": 1.28653288,
"memory(GiB)": 79.49,
"step": 425,
"train_speed(iter/s)": 0.035384
},
{
"acc": 0.62927337,
"epoch": 0.34,
"learning_rate": 5.613577023498695e-05,
"loss": 1.3057992,
"memory(GiB)": 79.49,
"step": 430,
"train_speed(iter/s)": 0.035372
},
{
"acc": 0.63009191,
"epoch": 0.34,
"learning_rate": 5.6788511749347264e-05,
"loss": 1.27884521,
"memory(GiB)": 79.49,
"step": 435,
"train_speed(iter/s)": 0.035361
},
{
"acc": 0.62101679,
"epoch": 0.34,
"learning_rate": 5.7441253263707574e-05,
"loss": 1.32610731,
"memory(GiB)": 79.49,
"step": 440,
"train_speed(iter/s)": 0.035365
},
{
"acc": 0.62638683,
"epoch": 0.35,
"learning_rate": 5.8093994778067884e-05,
"loss": 1.26304455,
"memory(GiB)": 79.49,
"step": 445,
"train_speed(iter/s)": 0.03537
},
{
"acc": 0.62493896,
"epoch": 0.35,
"learning_rate": 5.874673629242821e-05,
"loss": 1.29808645,
"memory(GiB)": 79.49,
"step": 450,
"train_speed(iter/s)": 0.035357
},
{
"acc": 0.63067155,
"epoch": 0.36,
"learning_rate": 5.939947780678852e-05,
"loss": 1.25492554,
"memory(GiB)": 79.49,
"step": 455,
"train_speed(iter/s)": 0.035345
},
{
"acc": 0.62847533,
"epoch": 0.36,
"learning_rate": 6.005221932114883e-05,
"loss": 1.29804668,
"memory(GiB)": 79.49,
"step": 460,
"train_speed(iter/s)": 0.035335
},
{
"acc": 0.61040778,
"epoch": 0.36,
"learning_rate": 6.070496083550914e-05,
"loss": 1.3139143,
"memory(GiB)": 79.49,
"step": 465,
"train_speed(iter/s)": 0.035312
},
{
"acc": 0.62770967,
"epoch": 0.37,
"learning_rate": 6.135770234986946e-05,
"loss": 1.26855469,
"memory(GiB)": 79.49,
"step": 470,
"train_speed(iter/s)": 0.035319
},
{
"acc": 0.62000332,
"epoch": 0.37,
"learning_rate": 6.201044386422978e-05,
"loss": 1.30712109,
"memory(GiB)": 79.49,
"step": 475,
"train_speed(iter/s)": 0.035309
},
{
"acc": 0.62734308,
"epoch": 0.38,
"learning_rate": 6.266318537859009e-05,
"loss": 1.24606352,
"memory(GiB)": 79.49,
"step": 480,
"train_speed(iter/s)": 0.035316
},
{
"acc": 0.63649292,
"epoch": 0.38,
"learning_rate": 6.33159268929504e-05,
"loss": 1.22983913,
"memory(GiB)": 79.49,
"step": 485,
"train_speed(iter/s)": 0.035319
},
{
"acc": 0.64232011,
"epoch": 0.38,
"learning_rate": 6.396866840731071e-05,
"loss": 1.23846798,
"memory(GiB)": 79.49,
"step": 490,
"train_speed(iter/s)": 0.035309
},
{
"acc": 0.63173985,
"epoch": 0.39,
"learning_rate": 6.462140992167102e-05,
"loss": 1.26958008,
"memory(GiB)": 79.49,
"step": 495,
"train_speed(iter/s)": 0.035315
},
{
"acc": 0.6342205,
"epoch": 0.39,
"learning_rate": 6.527415143603134e-05,
"loss": 1.23609667,
"memory(GiB)": 79.49,
"step": 500,
"train_speed(iter/s)": 0.035321
},
{
"acc": 0.64169135,
"epoch": 0.4,
"learning_rate": 6.592689295039165e-05,
"loss": 1.2388341,
"memory(GiB)": 79.49,
"step": 505,
"train_speed(iter/s)": 0.035324
},
{
"acc": 0.6370995,
"epoch": 0.4,
"learning_rate": 6.657963446475196e-05,
"loss": 1.24629793,
"memory(GiB)": 79.49,
"step": 510,
"train_speed(iter/s)": 0.03533
},
{
"acc": 0.64395504,
"epoch": 0.4,
"learning_rate": 6.723237597911227e-05,
"loss": 1.25374393,
"memory(GiB)": 79.49,
"step": 515,
"train_speed(iter/s)": 0.035319
},
{
"acc": 0.62348013,
"epoch": 0.41,
"learning_rate": 6.788511749347258e-05,
"loss": 1.30115499,
"memory(GiB)": 79.49,
"step": 520,
"train_speed(iter/s)": 0.035324
},
{
"acc": 0.6296721,
"epoch": 0.41,
"learning_rate": 6.853785900783291e-05,
"loss": 1.24414005,
"memory(GiB)": 79.49,
"step": 525,
"train_speed(iter/s)": 0.035316
},
{
"acc": 0.64563322,
"epoch": 0.42,
"learning_rate": 6.919060052219322e-05,
"loss": 1.20363417,
"memory(GiB)": 87.82,
"step": 530,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.63340597,
"epoch": 0.42,
"learning_rate": 6.984334203655353e-05,
"loss": 1.26357803,
"memory(GiB)": 87.82,
"step": 535,
"train_speed(iter/s)": 0.035298
},
{
"acc": 0.63216286,
"epoch": 0.42,
"learning_rate": 7.049608355091384e-05,
"loss": 1.22720327,
"memory(GiB)": 87.82,
"step": 540,
"train_speed(iter/s)": 0.035302
},
{
"acc": 0.63558154,
"epoch": 0.43,
"learning_rate": 7.114882506527415e-05,
"loss": 1.23628178,
"memory(GiB)": 87.82,
"step": 545,
"train_speed(iter/s)": 0.035307
},
{
"acc": 0.63481274,
"epoch": 0.43,
"learning_rate": 7.180156657963447e-05,
"loss": 1.27170897,
"memory(GiB)": 87.82,
"step": 550,
"train_speed(iter/s)": 0.035311
},
{
"acc": 0.62329593,
"epoch": 0.43,
"learning_rate": 7.245430809399478e-05,
"loss": 1.29271698,
"memory(GiB)": 87.82,
"step": 555,
"train_speed(iter/s)": 0.035315
},
{
"acc": 0.62661791,
"epoch": 0.44,
"learning_rate": 7.310704960835509e-05,
"loss": 1.25905991,
"memory(GiB)": 87.82,
"step": 560,
"train_speed(iter/s)": 0.035318
},
{
"acc": 0.63888206,
"epoch": 0.44,
"learning_rate": 7.37597911227154e-05,
"loss": 1.23162041,
"memory(GiB)": 87.82,
"step": 565,
"train_speed(iter/s)": 0.035322
},
{
"acc": 0.6273633,
"epoch": 0.45,
"learning_rate": 7.441253263707573e-05,
"loss": 1.27724876,
"memory(GiB)": 87.82,
"step": 570,
"train_speed(iter/s)": 0.035314
},
{
"acc": 0.65672364,
"epoch": 0.45,
"learning_rate": 7.506527415143604e-05,
"loss": 1.18381844,
"memory(GiB)": 87.82,
"step": 575,
"train_speed(iter/s)": 0.035319
},
{
"acc": 0.63787546,
"epoch": 0.45,
"learning_rate": 7.571801566579635e-05,
"loss": 1.24759865,
"memory(GiB)": 87.82,
"step": 580,
"train_speed(iter/s)": 0.035322
},
{
"acc": 0.62682576,
"epoch": 0.46,
"learning_rate": 7.637075718015666e-05,
"loss": 1.26602411,
"memory(GiB)": 87.82,
"step": 585,
"train_speed(iter/s)": 0.035314
},
{
"acc": 0.64525466,
"epoch": 0.46,
"learning_rate": 7.702349869451697e-05,
"loss": 1.20333176,
"memory(GiB)": 87.82,
"step": 590,
"train_speed(iter/s)": 0.035318
},
{
"acc": 0.63623095,
"epoch": 0.47,
"learning_rate": 7.767624020887729e-05,
"loss": 1.23436947,
"memory(GiB)": 87.82,
"step": 595,
"train_speed(iter/s)": 0.035323
},
{
"acc": 0.63377829,
"epoch": 0.47,
"learning_rate": 7.83289817232376e-05,
"loss": 1.25122204,
"memory(GiB)": 87.82,
"step": 600,
"train_speed(iter/s)": 0.035326
},
{
"acc": 0.64111838,
"epoch": 0.47,
"learning_rate": 7.898172323759791e-05,
"loss": 1.23531017,
"memory(GiB)": 87.82,
"step": 605,
"train_speed(iter/s)": 0.03533
},
{
"acc": 0.631183,
"epoch": 0.48,
"learning_rate": 7.963446475195822e-05,
"loss": 1.25364704,
"memory(GiB)": 87.82,
"step": 610,
"train_speed(iter/s)": 0.035335
},
{
"acc": 0.63098602,
"epoch": 0.48,
"learning_rate": 8.028720626631853e-05,
"loss": 1.25691967,
"memory(GiB)": 87.82,
"step": 615,
"train_speed(iter/s)": 0.035328
},
{
"acc": 0.63813372,
"epoch": 0.49,
"learning_rate": 8.093994778067886e-05,
"loss": 1.22148628,
"memory(GiB)": 87.82,
"step": 620,
"train_speed(iter/s)": 0.035321
},
{
"acc": 0.63645906,
"epoch": 0.49,
"learning_rate": 8.159268929503917e-05,
"loss": 1.2717802,
"memory(GiB)": 87.82,
"step": 625,
"train_speed(iter/s)": 0.035324
},
{
"acc": 0.63675809,
"epoch": 0.49,
"learning_rate": 8.224543080939948e-05,
"loss": 1.22741051,
"memory(GiB)": 87.82,
"step": 630,
"train_speed(iter/s)": 0.035328
},
{
"acc": 0.63550248,
"epoch": 0.5,
"learning_rate": 8.28981723237598e-05,
"loss": 1.25706348,
"memory(GiB)": 87.82,
"step": 635,
"train_speed(iter/s)": 0.035332
},
{
"acc": 0.63328991,
"epoch": 0.5,
"learning_rate": 8.355091383812011e-05,
"loss": 1.24116373,
"memory(GiB)": 87.82,
"step": 640,
"train_speed(iter/s)": 0.035337
},
{
"acc": 0.64723411,
"epoch": 0.51,
"learning_rate": 8.420365535248042e-05,
"loss": 1.21720247,
"memory(GiB)": 87.82,
"step": 645,
"train_speed(iter/s)": 0.03534
},
{
"acc": 0.63475337,
"epoch": 0.51,
"learning_rate": 8.485639686684074e-05,
"loss": 1.25098181,
"memory(GiB)": 87.82,
"step": 650,
"train_speed(iter/s)": 0.035343
},
{
"acc": 0.65048108,
"epoch": 0.51,
"learning_rate": 8.550913838120105e-05,
"loss": 1.19720526,
"memory(GiB)": 87.82,
"step": 655,
"train_speed(iter/s)": 0.035348
},
{
"acc": 0.63787851,
"epoch": 0.52,
"learning_rate": 8.616187989556136e-05,
"loss": 1.24119263,
"memory(GiB)": 87.82,
"step": 660,
"train_speed(iter/s)": 0.035342
},
{
"acc": 0.63413329,
"epoch": 0.52,
"learning_rate": 8.681462140992167e-05,
"loss": 1.27156887,
"memory(GiB)": 87.82,
"step": 665,
"train_speed(iter/s)": 0.035346
},
{
"acc": 0.62740493,
"epoch": 0.53,
"learning_rate": 8.7467362924282e-05,
"loss": 1.27027969,
"memory(GiB)": 87.82,
"step": 670,
"train_speed(iter/s)": 0.035337
},
{
"acc": 0.64307756,
"epoch": 0.53,
"learning_rate": 8.812010443864231e-05,
"loss": 1.2422493,
"memory(GiB)": 87.82,
"step": 675,
"train_speed(iter/s)": 0.035341
},
{
"acc": 0.63943014,
"epoch": 0.53,
"learning_rate": 8.877284595300262e-05,
"loss": 1.2298399,
"memory(GiB)": 87.82,
"step": 680,
"train_speed(iter/s)": 0.035333
},
{
"acc": 0.6344151,
"epoch": 0.54,
"learning_rate": 8.942558746736293e-05,
"loss": 1.24074373,
"memory(GiB)": 87.82,
"step": 685,
"train_speed(iter/s)": 0.035317
},
{
"acc": 0.62509422,
"epoch": 0.54,
"learning_rate": 9.007832898172324e-05,
"loss": 1.29264908,
"memory(GiB)": 90.61,
"step": 690,
"train_speed(iter/s)": 0.035299
},
{
"acc": 0.64127073,
"epoch": 0.54,
"learning_rate": 9.073107049608356e-05,
"loss": 1.21613617,
"memory(GiB)": 85.12,
"step": 695,
"train_speed(iter/s)": 0.035303
},
{
"acc": 0.63301926,
"epoch": 0.55,
"learning_rate": 9.138381201044387e-05,
"loss": 1.25347605,
"memory(GiB)": 85.12,
"step": 700,
"train_speed(iter/s)": 0.035307
},
{
"acc": 0.63968863,
"epoch": 0.55,
"learning_rate": 9.203655352480418e-05,
"loss": 1.20407486,
"memory(GiB)": 85.12,
"step": 705,
"train_speed(iter/s)": 0.035311
},
{
"acc": 0.64357448,
"epoch": 0.56,
"learning_rate": 9.268929503916449e-05,
"loss": 1.19348354,
"memory(GiB)": 85.12,
"step": 710,
"train_speed(iter/s)": 0.035315
},
{
"acc": 0.6440393,
"epoch": 0.56,
"learning_rate": 9.33420365535248e-05,
"loss": 1.21024275,
"memory(GiB)": 85.12,
"step": 715,
"train_speed(iter/s)": 0.035319
},
{
"acc": 0.64507108,
"epoch": 0.56,
"learning_rate": 9.399477806788513e-05,
"loss": 1.23527927,
"memory(GiB)": 85.12,
"step": 720,
"train_speed(iter/s)": 0.035313
},
{
"acc": 0.64306412,
"epoch": 0.57,
"learning_rate": 9.464751958224544e-05,
"loss": 1.24440823,
"memory(GiB)": 85.12,
"step": 725,
"train_speed(iter/s)": 0.035318
},
{
"acc": 0.64716368,
"epoch": 0.57,
"learning_rate": 9.530026109660575e-05,
"loss": 1.23672924,
"memory(GiB)": 85.12,
"step": 730,
"train_speed(iter/s)": 0.035321
},
{
"acc": 0.6508863,
"epoch": 0.58,
"learning_rate": 9.595300261096606e-05,
"loss": 1.16483746,
"memory(GiB)": 85.12,
"step": 735,
"train_speed(iter/s)": 0.035324
},
{
"acc": 0.64414196,
"epoch": 0.58,
"learning_rate": 9.660574412532638e-05,
"loss": 1.24241323,
"memory(GiB)": 85.12,
"step": 740,
"train_speed(iter/s)": 0.035326
},
{
"acc": 0.64069014,
"epoch": 0.58,
"learning_rate": 9.725848563968669e-05,
"loss": 1.21243725,
"memory(GiB)": 85.12,
"step": 745,
"train_speed(iter/s)": 0.035329
},
{
"acc": 0.6319931,
"epoch": 0.59,
"learning_rate": 9.7911227154047e-05,
"loss": 1.23099251,
"memory(GiB)": 85.12,
"step": 750,
"train_speed(iter/s)": 0.035332
},
{
"acc": 0.63715777,
"epoch": 0.59,
"learning_rate": 9.856396866840731e-05,
"loss": 1.23596754,
"memory(GiB)": 85.12,
"step": 755,
"train_speed(iter/s)": 0.035336
},
{
"acc": 0.63574848,
"epoch": 0.6,
"learning_rate": 9.921671018276762e-05,
"loss": 1.23194542,
"memory(GiB)": 85.12,
"step": 760,
"train_speed(iter/s)": 0.035339
},
{
"acc": 0.63391104,
"epoch": 0.6,
"learning_rate": 9.986945169712795e-05,
"loss": 1.25005703,
"memory(GiB)": 85.12,
"step": 765,
"train_speed(iter/s)": 0.035343
},
{
"acc": 0.64336019,
"epoch": 0.6,
"learning_rate": 9.999998134167974e-05,
"loss": 1.19814224,
"memory(GiB)": 85.12,
"step": 770,
"train_speed(iter/s)": 0.035346
},
{
"acc": 0.64235554,
"epoch": 0.61,
"learning_rate": 9.999990554227756e-05,
"loss": 1.20386868,
"memory(GiB)": 85.12,
"step": 775,
"train_speed(iter/s)": 0.035348
},
{
"acc": 0.63095374,
"epoch": 0.61,
"learning_rate": 9.999977143573674e-05,
"loss": 1.24922619,
"memory(GiB)": 85.12,
"step": 780,
"train_speed(iter/s)": 0.03535
},
{
"acc": 0.63008108,
"epoch": 0.62,
"learning_rate": 9.99995790222137e-05,
"loss": 1.26964073,
"memory(GiB)": 85.12,
"step": 785,
"train_speed(iter/s)": 0.035353
},
{
"acc": 0.63752484,
"epoch": 0.62,
"learning_rate": 9.999932830193279e-05,
"loss": 1.22619057,
"memory(GiB)": 85.12,
"step": 790,
"train_speed(iter/s)": 0.035348
},
{
"acc": 0.63318844,
"epoch": 0.62,
"learning_rate": 9.999901927518642e-05,
"loss": 1.26003723,
"memory(GiB)": 85.12,
"step": 795,
"train_speed(iter/s)": 0.035351
},
{
"acc": 0.63203177,
"epoch": 0.63,
"learning_rate": 9.999865194233496e-05,
"loss": 1.24707184,
"memory(GiB)": 85.12,
"step": 800,
"train_speed(iter/s)": 0.035344
},
{
"acc": 0.62407198,
"epoch": 0.63,
"learning_rate": 9.999822630380674e-05,
"loss": 1.28778019,
"memory(GiB)": 85.12,
"step": 805,
"train_speed(iter/s)": 0.035345
},
{
"acc": 0.62447062,
"epoch": 0.63,
"learning_rate": 9.999774236009813e-05,
"loss": 1.30319834,
"memory(GiB)": 85.12,
"step": 810,
"train_speed(iter/s)": 0.035348
},
{
"acc": 0.63752298,
"epoch": 0.64,
"learning_rate": 9.999720011177348e-05,
"loss": 1.24396782,
"memory(GiB)": 85.12,
"step": 815,
"train_speed(iter/s)": 0.035342
},
{
"acc": 0.6344676,
"epoch": 0.64,
"learning_rate": 9.999659955946514e-05,
"loss": 1.24418392,
"memory(GiB)": 85.12,
"step": 820,
"train_speed(iter/s)": 0.035327
},
{
"acc": 0.63493099,
"epoch": 0.65,
"learning_rate": 9.999594070387343e-05,
"loss": 1.25098581,
"memory(GiB)": 85.12,
"step": 825,
"train_speed(iter/s)": 0.035329
},
{
"acc": 0.64141641,
"epoch": 0.65,
"learning_rate": 9.999522354576669e-05,
"loss": 1.21629629,
"memory(GiB)": 85.12,
"step": 830,
"train_speed(iter/s)": 0.035323
},
{
"acc": 0.65251746,
"epoch": 0.65,
"learning_rate": 9.99944480859812e-05,
"loss": 1.16716757,
"memory(GiB)": 85.12,
"step": 835,
"train_speed(iter/s)": 0.035309
},
{
"acc": 0.63880744,
"epoch": 0.66,
"learning_rate": 9.999361432542128e-05,
"loss": 1.21747561,
"memory(GiB)": 85.12,
"step": 840,
"train_speed(iter/s)": 0.035304
},
{
"acc": 0.6420105,
"epoch": 0.66,
"learning_rate": 9.99927222650592e-05,
"loss": 1.2190115,
"memory(GiB)": 85.12,
"step": 845,
"train_speed(iter/s)": 0.035308
},
{
"acc": 0.63001771,
"epoch": 0.67,
"learning_rate": 9.999177190593525e-05,
"loss": 1.25841103,
"memory(GiB)": 85.12,
"step": 850,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.62701368,
"epoch": 0.67,
"learning_rate": 9.999076324915768e-05,
"loss": 1.28360729,
"memory(GiB)": 85.12,
"step": 855,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.6483603,
"epoch": 0.67,
"learning_rate": 9.998969629590274e-05,
"loss": 1.19540863,
"memory(GiB)": 85.12,
"step": 860,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.64731088,
"epoch": 0.68,
"learning_rate": 9.998857104741461e-05,
"loss": 1.19496069,
"memory(GiB)": 85.12,
"step": 865,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.64258432,
"epoch": 0.68,
"learning_rate": 9.998738750500553e-05,
"loss": 1.2038908,
"memory(GiB)": 85.12,
"step": 870,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.63443327,
"epoch": 0.69,
"learning_rate": 9.998614567005569e-05,
"loss": 1.24369755,
"memory(GiB)": 85.12,
"step": 875,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.63675046,
"epoch": 0.69,
"learning_rate": 9.998484554401323e-05,
"loss": 1.25242014,
"memory(GiB)": 85.12,
"step": 880,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.63459558,
"epoch": 0.69,
"learning_rate": 9.99834871283943e-05,
"loss": 1.20982609,
"memory(GiB)": 85.12,
"step": 885,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.62059612,
"epoch": 0.7,
"learning_rate": 9.998207042478298e-05,
"loss": 1.29975319,
"memory(GiB)": 85.12,
"step": 890,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.64450974,
"epoch": 0.7,
"learning_rate": 9.998059543483138e-05,
"loss": 1.21123343,
"memory(GiB)": 85.12,
"step": 895,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.64039574,
"epoch": 0.71,
"learning_rate": 9.997906216025954e-05,
"loss": 1.22988548,
"memory(GiB)": 85.12,
"step": 900,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.64195466,
"epoch": 0.71,
"learning_rate": 9.997747060285548e-05,
"loss": 1.23618727,
"memory(GiB)": 85.12,
"step": 905,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.64903636,
"epoch": 0.71,
"learning_rate": 9.99758207644752e-05,
"loss": 1.1861618,
"memory(GiB)": 85.12,
"step": 910,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.64557695,
"epoch": 0.72,
"learning_rate": 9.997411264704264e-05,
"loss": 1.16902866,
"memory(GiB)": 85.12,
"step": 915,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.6429708,
"epoch": 0.72,
"learning_rate": 9.99723462525497e-05,
"loss": 1.20082893,
"memory(GiB)": 85.12,
"step": 920,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.64234171,
"epoch": 0.72,
"learning_rate": 9.99705215830563e-05,
"loss": 1.2100441,
"memory(GiB)": 85.12,
"step": 925,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.64274926,
"epoch": 0.73,
"learning_rate": 9.996863864069019e-05,
"loss": 1.15481606,
"memory(GiB)": 85.12,
"step": 930,
"train_speed(iter/s)": 0.035285
},
{
"acc": 0.63187203,
"epoch": 0.73,
"learning_rate": 9.996669742764722e-05,
"loss": 1.26747904,
"memory(GiB)": 85.12,
"step": 935,
"train_speed(iter/s)": 0.035288
},
{
"acc": 0.65685897,
"epoch": 0.74,
"learning_rate": 9.996469794619111e-05,
"loss": 1.1622716,
"memory(GiB)": 85.12,
"step": 940,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.64430394,
"epoch": 0.74,
"learning_rate": 9.996264019865353e-05,
"loss": 1.21984491,
"memory(GiB)": 85.12,
"step": 945,
"train_speed(iter/s)": 0.035287
},
{
"acc": 0.64375916,
"epoch": 0.74,
"learning_rate": 9.996052418743414e-05,
"loss": 1.23850603,
"memory(GiB)": 85.12,
"step": 950,
"train_speed(iter/s)": 0.035289
},
{
"acc": 0.63599143,
"epoch": 0.75,
"learning_rate": 9.995834991500048e-05,
"loss": 1.2474575,
"memory(GiB)": 85.12,
"step": 955,
"train_speed(iter/s)": 0.035292
},
{
"acc": 0.63893137,
"epoch": 0.75,
"learning_rate": 9.99561173838881e-05,
"loss": 1.23318357,
"memory(GiB)": 85.12,
"step": 960,
"train_speed(iter/s)": 0.035294
},
{
"acc": 0.64323502,
"epoch": 0.76,
"learning_rate": 9.995382659670043e-05,
"loss": 1.20357561,
"memory(GiB)": 85.12,
"step": 965,
"train_speed(iter/s)": 0.035295
},
{
"acc": 0.64130268,
"epoch": 0.76,
"learning_rate": 9.995147755610885e-05,
"loss": 1.21935377,
"memory(GiB)": 85.12,
"step": 970,
"train_speed(iter/s)": 0.035299
},
{
"acc": 0.64334393,
"epoch": 0.76,
"learning_rate": 9.994907026485273e-05,
"loss": 1.18746109,
"memory(GiB)": 85.12,
"step": 975,
"train_speed(iter/s)": 0.035301
},
{
"acc": 0.64836397,
"epoch": 0.77,
"learning_rate": 9.994660472573929e-05,
"loss": 1.18723745,
"memory(GiB)": 85.12,
"step": 980,
"train_speed(iter/s)": 0.035305
},
{
"acc": 0.64067945,
"epoch": 0.77,
"learning_rate": 9.994408094164369e-05,
"loss": 1.21092262,
"memory(GiB)": 85.12,
"step": 985,
"train_speed(iter/s)": 0.035308
},
{
"acc": 0.65696807,
"epoch": 0.78,
"learning_rate": 9.994149891550906e-05,
"loss": 1.15461454,
"memory(GiB)": 85.12,
"step": 990,
"train_speed(iter/s)": 0.035311
},
{
"acc": 0.64268813,
"epoch": 0.78,
"learning_rate": 9.99388586503464e-05,
"loss": 1.2276782,
"memory(GiB)": 85.12,
"step": 995,
"train_speed(iter/s)": 0.0353
},
{
"acc": 0.64682531,
"epoch": 0.78,
"learning_rate": 9.993616014923464e-05,
"loss": 1.19210787,
"memory(GiB)": 85.12,
"step": 1000,
"train_speed(iter/s)": 0.035295
},
{
"acc": 0.63967905,
"epoch": 0.79,
"learning_rate": 9.993340341532063e-05,
"loss": 1.21900482,
"memory(GiB)": 85.12,
"step": 1005,
"train_speed(iter/s)": 0.035298
},
{
"acc": 0.64601598,
"epoch": 0.79,
"learning_rate": 9.993058845181913e-05,
"loss": 1.18572149,
"memory(GiB)": 85.12,
"step": 1010,
"train_speed(iter/s)": 0.0353
},
{
"acc": 0.6581532,
"epoch": 0.8,
"learning_rate": 9.992771526201278e-05,
"loss": 1.17555571,
"memory(GiB)": 85.12,
"step": 1015,
"train_speed(iter/s)": 0.035303
},
{
"acc": 0.63905029,
"epoch": 0.8,
"learning_rate": 9.992478384925215e-05,
"loss": 1.22506828,
"memory(GiB)": 85.12,
"step": 1020,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.63737507,
"epoch": 0.8,
"learning_rate": 9.992179421695566e-05,
"loss": 1.24342728,
"memory(GiB)": 85.12,
"step": 1025,
"train_speed(iter/s)": 0.035309
},
{
"acc": 0.63883562,
"epoch": 0.81,
"learning_rate": 9.99187463686097e-05,
"loss": 1.22715645,
"memory(GiB)": 85.12,
"step": 1030,
"train_speed(iter/s)": 0.035304
},
{
"acc": 0.64026632,
"epoch": 0.81,
"learning_rate": 9.991564030776847e-05,
"loss": 1.2529954,
"memory(GiB)": 85.12,
"step": 1035,
"train_speed(iter/s)": 0.035307
},
{
"acc": 0.65661197,
"epoch": 0.82,
"learning_rate": 9.99124760380541e-05,
"loss": 1.17418413,
"memory(GiB)": 85.12,
"step": 1040,
"train_speed(iter/s)": 0.035295
},
{
"acc": 0.64462228,
"epoch": 0.82,
"learning_rate": 9.990925356315659e-05,
"loss": 1.21107912,
"memory(GiB)": 85.12,
"step": 1045,
"train_speed(iter/s)": 0.035297
},
{
"acc": 0.65130072,
"epoch": 0.82,
"learning_rate": 9.99059728868338e-05,
"loss": 1.18552179,
"memory(GiB)": 85.12,
"step": 1050,
"train_speed(iter/s)": 0.0353
},
{
"acc": 0.64942465,
"epoch": 0.83,
"learning_rate": 9.990263401291149e-05,
"loss": 1.19978065,
"memory(GiB)": 85.12,
"step": 1055,
"train_speed(iter/s)": 0.035302
},
{
"acc": 0.64416938,
"epoch": 0.83,
"learning_rate": 9.989923694528327e-05,
"loss": 1.21087933,
"memory(GiB)": 85.12,
"step": 1060,
"train_speed(iter/s)": 0.035298
},
{
"acc": 0.64448967,
"epoch": 0.83,
"learning_rate": 9.989578168791059e-05,
"loss": 1.23659315,
"memory(GiB)": 85.12,
"step": 1065,
"train_speed(iter/s)": 0.0353
},
{
"acc": 0.6494998,
"epoch": 0.84,
"learning_rate": 9.989226824482281e-05,
"loss": 1.1761158,
"memory(GiB)": 85.12,
"step": 1070,
"train_speed(iter/s)": 0.035302
},
{
"acc": 0.64944773,
"epoch": 0.84,
"learning_rate": 9.98886966201171e-05,
"loss": 1.17971296,
"memory(GiB)": 85.12,
"step": 1075,
"train_speed(iter/s)": 0.035305
},
{
"acc": 0.64949713,
"epoch": 0.85,
"learning_rate": 9.98850668179585e-05,
"loss": 1.19467411,
"memory(GiB)": 85.12,
"step": 1080,
"train_speed(iter/s)": 0.035301
},
{
"acc": 0.6409584,
"epoch": 0.85,
"learning_rate": 9.988137884257987e-05,
"loss": 1.22220039,
"memory(GiB)": 85.12,
"step": 1085,
"train_speed(iter/s)": 0.035289
},
{
"acc": 0.65775137,
"epoch": 0.85,
"learning_rate": 9.987763269828194e-05,
"loss": 1.15307426,
"memory(GiB)": 85.12,
"step": 1090,
"train_speed(iter/s)": 0.035292
},
{
"acc": 0.65808535,
"epoch": 0.86,
"learning_rate": 9.987382838943325e-05,
"loss": 1.16586542,
"memory(GiB)": 85.12,
"step": 1095,
"train_speed(iter/s)": 0.035294
},
{
"acc": 0.63382239,
"epoch": 0.86,
"learning_rate": 9.986996592047017e-05,
"loss": 1.22561712,
"memory(GiB)": 85.12,
"step": 1100,
"train_speed(iter/s)": 0.035297
},
{
"acc": 0.64164915,
"epoch": 0.87,
"learning_rate": 9.986604529589691e-05,
"loss": 1.2104146,
"memory(GiB)": 85.12,
"step": 1105,
"train_speed(iter/s)": 0.035299
},
{
"acc": 0.65060873,
"epoch": 0.87,
"learning_rate": 9.98620665202855e-05,
"loss": 1.18811502,
"memory(GiB)": 85.12,
"step": 1110,
"train_speed(iter/s)": 0.035301
},
{
"acc": 0.64695697,
"epoch": 0.87,
"learning_rate": 9.985802959827573e-05,
"loss": 1.20489264,
"memory(GiB)": 85.12,
"step": 1115,
"train_speed(iter/s)": 0.035303
},
{
"acc": 0.63937025,
"epoch": 0.88,
"learning_rate": 9.985393453457526e-05,
"loss": 1.20603237,
"memory(GiB)": 85.12,
"step": 1120,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.65824099,
"epoch": 0.88,
"learning_rate": 9.984978133395954e-05,
"loss": 1.16597528,
"memory(GiB)": 85.12,
"step": 1125,
"train_speed(iter/s)": 0.035309
},
{
"acc": 0.63783302,
"epoch": 0.89,
"learning_rate": 9.984557000127177e-05,
"loss": 1.2381628,
"memory(GiB)": 85.12,
"step": 1130,
"train_speed(iter/s)": 0.035311
},
{
"acc": 0.64608231,
"epoch": 0.89,
"learning_rate": 9.984130054142302e-05,
"loss": 1.20785971,
"memory(GiB)": 85.12,
"step": 1135,
"train_speed(iter/s)": 0.035313
},
{
"acc": 0.64283195,
"epoch": 0.89,
"learning_rate": 9.983697295939205e-05,
"loss": 1.19586048,
"memory(GiB)": 85.12,
"step": 1140,
"train_speed(iter/s)": 0.035315
},
{
"acc": 0.64510169,
"epoch": 0.9,
"learning_rate": 9.983258726022549e-05,
"loss": 1.17720518,
"memory(GiB)": 85.12,
"step": 1145,
"train_speed(iter/s)": 0.035316
},
{
"acc": 0.64794512,
"epoch": 0.9,
"learning_rate": 9.982814344903766e-05,
"loss": 1.20341921,
"memory(GiB)": 85.12,
"step": 1150,
"train_speed(iter/s)": 0.035319
},
{
"acc": 0.64761252,
"epoch": 0.91,
"learning_rate": 9.982364153101072e-05,
"loss": 1.20330772,
"memory(GiB)": 85.12,
"step": 1155,
"train_speed(iter/s)": 0.035321
},
{
"acc": 0.64469285,
"epoch": 0.91,
"learning_rate": 9.981908151139456e-05,
"loss": 1.22739487,
"memory(GiB)": 85.12,
"step": 1160,
"train_speed(iter/s)": 0.035323
},
{
"acc": 0.65169878,
"epoch": 0.91,
"learning_rate": 9.98144633955068e-05,
"loss": 1.17159405,
"memory(GiB)": 85.12,
"step": 1165,
"train_speed(iter/s)": 0.035325
},
{
"acc": 0.64189563,
"epoch": 0.92,
"learning_rate": 9.980978718873286e-05,
"loss": 1.21570683,
"memory(GiB)": 85.12,
"step": 1170,
"train_speed(iter/s)": 0.035327
},
{
"acc": 0.65856137,
"epoch": 0.92,
"learning_rate": 9.980505289652585e-05,
"loss": 1.14105463,
"memory(GiB)": 85.12,
"step": 1175,
"train_speed(iter/s)": 0.035328
},
{
"acc": 0.63436284,
"epoch": 0.92,
"learning_rate": 9.980026052440665e-05,
"loss": 1.2412138,
"memory(GiB)": 85.12,
"step": 1180,
"train_speed(iter/s)": 0.035325
},
{
"acc": 0.65267048,
"epoch": 0.93,
"learning_rate": 9.979541007796388e-05,
"loss": 1.17890749,
"memory(GiB)": 85.12,
"step": 1185,
"train_speed(iter/s)": 0.035327
},
{
"acc": 0.64592175,
"epoch": 0.93,
"learning_rate": 9.979050156285384e-05,
"loss": 1.19027033,
"memory(GiB)": 85.12,
"step": 1190,
"train_speed(iter/s)": 0.03533
},
{
"acc": 0.66446619,
"epoch": 0.94,
"learning_rate": 9.978553498480057e-05,
"loss": 1.15592375,
"memory(GiB)": 85.12,
"step": 1195,
"train_speed(iter/s)": 0.035325
},
{
"acc": 0.64879594,
"epoch": 0.94,
"learning_rate": 9.978051034959583e-05,
"loss": 1.2092351,
"memory(GiB)": 85.12,
"step": 1200,
"train_speed(iter/s)": 0.035327
},
{
"acc": 0.64852567,
"epoch": 0.94,
"learning_rate": 9.977542766309907e-05,
"loss": 1.19442778,
"memory(GiB)": 85.12,
"step": 1205,
"train_speed(iter/s)": 0.035323
},
{
"acc": 0.640028,
"epoch": 0.95,
"learning_rate": 9.977028693123744e-05,
"loss": 1.21321182,
"memory(GiB)": 85.12,
"step": 1210,
"train_speed(iter/s)": 0.035325
},
{
"acc": 0.64608712,
"epoch": 0.95,
"learning_rate": 9.976508816000578e-05,
"loss": 1.21685104,
"memory(GiB)": 85.12,
"step": 1215,
"train_speed(iter/s)": 0.035317
},
{
"acc": 0.65058255,
"epoch": 0.96,
"learning_rate": 9.975983135546661e-05,
"loss": 1.20579329,
"memory(GiB)": 85.12,
"step": 1220,
"train_speed(iter/s)": 0.035308
},
{
"acc": 0.64077854,
"epoch": 0.96,
"learning_rate": 9.975451652375012e-05,
"loss": 1.22381687,
"memory(GiB)": 85.12,
"step": 1225,
"train_speed(iter/s)": 0.035304
},
{
"acc": 0.64167862,
"epoch": 0.96,
"learning_rate": 9.974914367105419e-05,
"loss": 1.20327978,
"memory(GiB)": 85.12,
"step": 1230,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.64583015,
"epoch": 0.97,
"learning_rate": 9.974371280364431e-05,
"loss": 1.19592552,
"memory(GiB)": 85.12,
"step": 1235,
"train_speed(iter/s)": 0.035308
},
{
"acc": 0.6488265,
"epoch": 0.97,
"learning_rate": 9.973822392785373e-05,
"loss": 1.17611341,
"memory(GiB)": 85.12,
"step": 1240,
"train_speed(iter/s)": 0.035304
},
{
"acc": 0.65858684,
"epoch": 0.98,
"learning_rate": 9.973267705008318e-05,
"loss": 1.11910753,
"memory(GiB)": 85.12,
"step": 1245,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.63276486,
"epoch": 0.98,
"learning_rate": 9.97270721768012e-05,
"loss": 1.24935932,
"memory(GiB)": 85.12,
"step": 1250,
"train_speed(iter/s)": 0.035298
},
{
"acc": 0.63920984,
"epoch": 0.98,
"learning_rate": 9.972140931454385e-05,
"loss": 1.24413643,
"memory(GiB)": 85.12,
"step": 1255,
"train_speed(iter/s)": 0.035299
},
{
"acc": 0.64515247,
"epoch": 0.99,
"learning_rate": 9.971568846991486e-05,
"loss": 1.18529148,
"memory(GiB)": 85.12,
"step": 1260,
"train_speed(iter/s)": 0.035301
},
{
"acc": 0.64360232,
"epoch": 0.99,
"learning_rate": 9.970990964958556e-05,
"loss": 1.21833725,
"memory(GiB)": 85.12,
"step": 1265,
"train_speed(iter/s)": 0.035303
},
{
"acc": 0.64962535,
"epoch": 1.0,
"learning_rate": 9.970407286029487e-05,
"loss": 1.16980963,
"memory(GiB)": 85.12,
"step": 1270,
"train_speed(iter/s)": 0.035306
},
{
"acc": 0.64517279,
"epoch": 1.0,
"learning_rate": 9.969817810884937e-05,
"loss": 1.19798498,
"memory(GiB)": 85.12,
"step": 1275,
"train_speed(iter/s)": 0.035307
},
{
"epoch": 1.0,
"eval_acc": 0.66190833959429,
"eval_loss": 1.1170213222503662,
"eval_runtime": 85.7668,
"eval_samples_per_second": 1.084,
"eval_steps_per_second": 1.084,
"step": 1276
},
{
"acc": 0.66531973,
"epoch": 1.0,
"learning_rate": 9.969222540212319e-05,
"loss": 1.12897282,
"memory(GiB)": 85.12,
"step": 1280,
"train_speed(iter/s)": 0.035228
},
{
"acc": 0.64908504,
"epoch": 1.01,
"learning_rate": 9.968621474705802e-05,
"loss": 1.19679098,
"memory(GiB)": 85.12,
"step": 1285,
"train_speed(iter/s)": 0.035225
},
{
"acc": 0.65595608,
"epoch": 1.01,
"learning_rate": 9.96801461506632e-05,
"loss": 1.12893848,
"memory(GiB)": 85.12,
"step": 1290,
"train_speed(iter/s)": 0.035228
},
{
"acc": 0.66325932,
"epoch": 1.01,
"learning_rate": 9.967401962001553e-05,
"loss": 1.12414293,
"memory(GiB)": 85.12,
"step": 1295,
"train_speed(iter/s)": 0.035225
},
{
"acc": 0.63181615,
"epoch": 1.02,
"learning_rate": 9.966783516225948e-05,
"loss": 1.23086386,
"memory(GiB)": 85.12,
"step": 1300,
"train_speed(iter/s)": 0.035227
},
{
"acc": 0.64660926,
"epoch": 1.02,
"learning_rate": 9.966159278460703e-05,
"loss": 1.1694212,
"memory(GiB)": 85.12,
"step": 1305,
"train_speed(iter/s)": 0.035223
},
{
"acc": 0.65136437,
"epoch": 1.03,
"learning_rate": 9.965529249433768e-05,
"loss": 1.14605751,
"memory(GiB)": 85.12,
"step": 1310,
"train_speed(iter/s)": 0.035226
},
{
"acc": 0.64723616,
"epoch": 1.03,
"learning_rate": 9.964893429879846e-05,
"loss": 1.17278271,
"memory(GiB)": 85.12,
"step": 1315,
"train_speed(iter/s)": 0.035228
},
{
"acc": 0.63427768,
"epoch": 1.03,
"learning_rate": 9.9642518205404e-05,
"loss": 1.19455042,
"memory(GiB)": 85.12,
"step": 1320,
"train_speed(iter/s)": 0.03523
},
{
"acc": 0.65323257,
"epoch": 1.04,
"learning_rate": 9.963604422163636e-05,
"loss": 1.15521383,
"memory(GiB)": 85.12,
"step": 1325,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.65213785,
"epoch": 1.04,
"learning_rate": 9.962951235504511e-05,
"loss": 1.17218103,
"memory(GiB)": 85.12,
"step": 1330,
"train_speed(iter/s)": 0.035225
},
{
"acc": 0.63362112,
"epoch": 1.05,
"learning_rate": 9.962292261324744e-05,
"loss": 1.21349621,
"memory(GiB)": 85.12,
"step": 1335,
"train_speed(iter/s)": 0.035222
},
{
"acc": 0.64905601,
"epoch": 1.05,
"learning_rate": 9.961627500392788e-05,
"loss": 1.19248028,
"memory(GiB)": 85.12,
"step": 1340,
"train_speed(iter/s)": 0.035224
},
{
"acc": 0.63903928,
"epoch": 1.05,
"learning_rate": 9.960956953483854e-05,
"loss": 1.21704388,
"memory(GiB)": 85.12,
"step": 1345,
"train_speed(iter/s)": 0.035226
},
{
"acc": 0.64893613,
"epoch": 1.06,
"learning_rate": 9.960280621379891e-05,
"loss": 1.18590031,
"memory(GiB)": 85.12,
"step": 1350,
"train_speed(iter/s)": 0.035228
},
{
"acc": 0.66188636,
"epoch": 1.06,
"learning_rate": 9.959598504869608e-05,
"loss": 1.1234787,
"memory(GiB)": 85.12,
"step": 1355,
"train_speed(iter/s)": 0.035225
},
{
"acc": 0.65642624,
"epoch": 1.07,
"learning_rate": 9.958910604748449e-05,
"loss": 1.17125835,
"memory(GiB)": 85.12,
"step": 1360,
"train_speed(iter/s)": 0.035227
},
{
"acc": 0.64671488,
"epoch": 1.07,
"learning_rate": 9.958216921818602e-05,
"loss": 1.17184534,
"memory(GiB)": 85.12,
"step": 1365,
"train_speed(iter/s)": 0.035224
},
{
"acc": 0.65104051,
"epoch": 1.07,
"learning_rate": 9.957517456889005e-05,
"loss": 1.15897675,
"memory(GiB)": 85.12,
"step": 1370,
"train_speed(iter/s)": 0.035227
},
{
"acc": 0.6516345,
"epoch": 1.08,
"learning_rate": 9.956812210775336e-05,
"loss": 1.16180744,
"memory(GiB)": 85.12,
"step": 1375,
"train_speed(iter/s)": 0.035229
},
{
"acc": 0.64146729,
"epoch": 1.08,
"learning_rate": 9.956101184300012e-05,
"loss": 1.1801156,
"memory(GiB)": 85.12,
"step": 1380,
"train_speed(iter/s)": 0.035231
},
{
"acc": 0.64465218,
"epoch": 1.09,
"learning_rate": 9.955384378292195e-05,
"loss": 1.16287785,
"memory(GiB)": 85.12,
"step": 1385,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.65234551,
"epoch": 1.09,
"learning_rate": 9.954661793587783e-05,
"loss": 1.16832972,
"memory(GiB)": 85.12,
"step": 1390,
"train_speed(iter/s)": 0.03523
},
{
"acc": 0.64926839,
"epoch": 1.09,
"learning_rate": 9.953933431029417e-05,
"loss": 1.16717663,
"memory(GiB)": 85.12,
"step": 1395,
"train_speed(iter/s)": 0.035232
},
{
"acc": 0.6558826,
"epoch": 1.1,
"learning_rate": 9.953199291466469e-05,
"loss": 1.14773283,
"memory(GiB)": 85.12,
"step": 1400,
"train_speed(iter/s)": 0.035234
},
{
"acc": 0.64793453,
"epoch": 1.1,
"learning_rate": 9.952459375755056e-05,
"loss": 1.19053068,
"memory(GiB)": 85.12,
"step": 1405,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.64704976,
"epoch": 1.11,
"learning_rate": 9.951713684758027e-05,
"loss": 1.18572483,
"memory(GiB)": 85.12,
"step": 1410,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.64501376,
"epoch": 1.11,
"learning_rate": 9.950962219344963e-05,
"loss": 1.17802401,
"memory(GiB)": 85.12,
"step": 1415,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.64344835,
"epoch": 1.11,
"learning_rate": 9.950204980392185e-05,
"loss": 1.21547565,
"memory(GiB)": 85.12,
"step": 1420,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.65633788,
"epoch": 1.12,
"learning_rate": 9.94944196878274e-05,
"loss": 1.15950899,
"memory(GiB)": 85.12,
"step": 1425,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.64611464,
"epoch": 1.12,
"learning_rate": 9.948673185406412e-05,
"loss": 1.21565819,
"memory(GiB)": 85.12,
"step": 1430,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.64696469,
"epoch": 1.12,
"learning_rate": 9.947898631159716e-05,
"loss": 1.18466921,
"memory(GiB)": 85.12,
"step": 1435,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65379381,
"epoch": 1.13,
"learning_rate": 9.947118306945888e-05,
"loss": 1.16033335,
"memory(GiB)": 85.12,
"step": 1440,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.64872618,
"epoch": 1.13,
"learning_rate": 9.946332213674907e-05,
"loss": 1.17809114,
"memory(GiB)": 85.12,
"step": 1445,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.64592113,
"epoch": 1.14,
"learning_rate": 9.945540352263467e-05,
"loss": 1.19367371,
"memory(GiB)": 85.12,
"step": 1450,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65893774,
"epoch": 1.14,
"learning_rate": 9.944742723634995e-05,
"loss": 1.15910034,
"memory(GiB)": 85.12,
"step": 1455,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.63832693,
"epoch": 1.14,
"learning_rate": 9.943939328719638e-05,
"loss": 1.21491098,
"memory(GiB)": 85.12,
"step": 1460,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.65693431,
"epoch": 1.15,
"learning_rate": 9.943130168454276e-05,
"loss": 1.17500277,
"memory(GiB)": 85.12,
"step": 1465,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66517062,
"epoch": 1.15,
"learning_rate": 9.942315243782504e-05,
"loss": 1.13439531,
"memory(GiB)": 85.12,
"step": 1470,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65051446,
"epoch": 1.16,
"learning_rate": 9.941494555654645e-05,
"loss": 1.19706593,
"memory(GiB)": 85.12,
"step": 1475,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.64909263,
"epoch": 1.16,
"learning_rate": 9.940668105027739e-05,
"loss": 1.19326334,
"memory(GiB)": 85.12,
"step": 1480,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.65501137,
"epoch": 1.16,
"learning_rate": 9.939835892865546e-05,
"loss": 1.17640152,
"memory(GiB)": 85.12,
"step": 1485,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.64871545,
"epoch": 1.17,
"learning_rate": 9.938997920138547e-05,
"loss": 1.18505135,
"memory(GiB)": 85.12,
"step": 1490,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.65266371,
"epoch": 1.17,
"learning_rate": 9.938154187823939e-05,
"loss": 1.15669746,
"memory(GiB)": 85.12,
"step": 1495,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66334338,
"epoch": 1.18,
"learning_rate": 9.937304696905636e-05,
"loss": 1.12421255,
"memory(GiB)": 85.12,
"step": 1500,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.65040469,
"epoch": 1.18,
"learning_rate": 9.93644944837427e-05,
"loss": 1.15235605,
"memory(GiB)": 85.12,
"step": 1505,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.64324026,
"epoch": 1.18,
"learning_rate": 9.935588443227184e-05,
"loss": 1.18840065,
"memory(GiB)": 85.12,
"step": 1510,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.64157495,
"epoch": 1.19,
"learning_rate": 9.934721682468433e-05,
"loss": 1.19425764,
"memory(GiB)": 85.12,
"step": 1515,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.65195494,
"epoch": 1.19,
"learning_rate": 9.933849167108787e-05,
"loss": 1.17684612,
"memory(GiB)": 85.12,
"step": 1520,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.64985819,
"epoch": 1.2,
"learning_rate": 9.932970898165723e-05,
"loss": 1.18239994,
"memory(GiB)": 85.12,
"step": 1525,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65231233,
"epoch": 1.2,
"learning_rate": 9.932086876663435e-05,
"loss": 1.16985979,
"memory(GiB)": 85.12,
"step": 1530,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.65894971,
"epoch": 1.2,
"learning_rate": 9.931197103632817e-05,
"loss": 1.11519146,
"memory(GiB)": 85.12,
"step": 1535,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.64390192,
"epoch": 1.21,
"learning_rate": 9.930301580111472e-05,
"loss": 1.23124371,
"memory(GiB)": 85.12,
"step": 1540,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66011586,
"epoch": 1.21,
"learning_rate": 9.929400307143712e-05,
"loss": 1.13707018,
"memory(GiB)": 85.12,
"step": 1545,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65588398,
"epoch": 1.21,
"learning_rate": 9.928493285780552e-05,
"loss": 1.15754347,
"memory(GiB)": 85.12,
"step": 1550,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.64823947,
"epoch": 1.22,
"learning_rate": 9.927580517079712e-05,
"loss": 1.2073925,
"memory(GiB)": 85.12,
"step": 1555,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.65572858,
"epoch": 1.22,
"learning_rate": 9.926662002105608e-05,
"loss": 1.16998863,
"memory(GiB)": 85.12,
"step": 1560,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66158614,
"epoch": 1.23,
"learning_rate": 9.925737741929367e-05,
"loss": 1.1435194,
"memory(GiB)": 85.12,
"step": 1565,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.64395499,
"epoch": 1.23,
"learning_rate": 9.924807737628807e-05,
"loss": 1.21585579,
"memory(GiB)": 85.12,
"step": 1570,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.64430709,
"epoch": 1.23,
"learning_rate": 9.923871990288448e-05,
"loss": 1.17890778,
"memory(GiB)": 85.12,
"step": 1575,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.64855452,
"epoch": 1.24,
"learning_rate": 9.922930500999508e-05,
"loss": 1.1541831,
"memory(GiB)": 85.12,
"step": 1580,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65893106,
"epoch": 1.24,
"learning_rate": 9.9219832708599e-05,
"loss": 1.13851299,
"memory(GiB)": 85.12,
"step": 1585,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.65967436,
"epoch": 1.25,
"learning_rate": 9.921030300974232e-05,
"loss": 1.12484913,
"memory(GiB)": 85.12,
"step": 1590,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.64779596,
"epoch": 1.25,
"learning_rate": 9.920071592453804e-05,
"loss": 1.19642706,
"memory(GiB)": 85.12,
"step": 1595,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.64805603,
"epoch": 1.25,
"learning_rate": 9.919107146416608e-05,
"loss": 1.18528366,
"memory(GiB)": 85.12,
"step": 1600,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.6561811,
"epoch": 1.26,
"learning_rate": 9.918136963987333e-05,
"loss": 1.16669703,
"memory(GiB)": 85.12,
"step": 1605,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66756039,
"epoch": 1.26,
"learning_rate": 9.917161046297346e-05,
"loss": 1.11620274,
"memory(GiB)": 85.12,
"step": 1610,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.65261698,
"epoch": 1.27,
"learning_rate": 9.916179394484713e-05,
"loss": 1.15845966,
"memory(GiB)": 85.12,
"step": 1615,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66402683,
"epoch": 1.27,
"learning_rate": 9.915192009694179e-05,
"loss": 1.11538677,
"memory(GiB)": 85.12,
"step": 1620,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65500402,
"epoch": 1.27,
"learning_rate": 9.91419889307718e-05,
"loss": 1.17441206,
"memory(GiB)": 85.12,
"step": 1625,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65412102,
"epoch": 1.28,
"learning_rate": 9.913200045791834e-05,
"loss": 1.16534052,
"memory(GiB)": 85.12,
"step": 1630,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.64114447,
"epoch": 1.28,
"learning_rate": 9.912195469002941e-05,
"loss": 1.18363466,
"memory(GiB)": 85.12,
"step": 1635,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65664382,
"epoch": 1.29,
"learning_rate": 9.911185163881984e-05,
"loss": 1.17111397,
"memory(GiB)": 85.12,
"step": 1640,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.65938292,
"epoch": 1.29,
"learning_rate": 9.910169131607123e-05,
"loss": 1.12585945,
"memory(GiB)": 85.12,
"step": 1645,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.64360905,
"epoch": 1.29,
"learning_rate": 9.909147373363202e-05,
"loss": 1.1944355,
"memory(GiB)": 85.12,
"step": 1650,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.6680274,
"epoch": 1.3,
"learning_rate": 9.908119890341737e-05,
"loss": 1.10261869,
"memory(GiB)": 85.12,
"step": 1655,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.6460361,
"epoch": 1.3,
"learning_rate": 9.907086683740924e-05,
"loss": 1.17447681,
"memory(GiB)": 85.12,
"step": 1660,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.65601449,
"epoch": 1.3,
"learning_rate": 9.906047754765629e-05,
"loss": 1.14073763,
"memory(GiB)": 85.12,
"step": 1665,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.65036592,
"epoch": 1.31,
"learning_rate": 9.905003104627397e-05,
"loss": 1.17392483,
"memory(GiB)": 85.12,
"step": 1670,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.65944376,
"epoch": 1.31,
"learning_rate": 9.90395273454444e-05,
"loss": 1.13640366,
"memory(GiB)": 85.12,
"step": 1675,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.65229316,
"epoch": 1.32,
"learning_rate": 9.902896645741639e-05,
"loss": 1.17808762,
"memory(GiB)": 85.12,
"step": 1680,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.6533186,
"epoch": 1.32,
"learning_rate": 9.901834839450553e-05,
"loss": 1.1593545,
"memory(GiB)": 85.12,
"step": 1685,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.64995356,
"epoch": 1.32,
"learning_rate": 9.900767316909396e-05,
"loss": 1.18070507,
"memory(GiB)": 85.12,
"step": 1690,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.65651155,
"epoch": 1.33,
"learning_rate": 9.899694079363058e-05,
"loss": 1.12338991,
"memory(GiB)": 85.12,
"step": 1695,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.65021605,
"epoch": 1.33,
"learning_rate": 9.898615128063086e-05,
"loss": 1.19300032,
"memory(GiB)": 85.12,
"step": 1700,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.63957796,
"epoch": 1.34,
"learning_rate": 9.897530464267699e-05,
"loss": 1.21851835,
"memory(GiB)": 85.12,
"step": 1705,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.66472945,
"epoch": 1.34,
"learning_rate": 9.896440089241767e-05,
"loss": 1.13589916,
"memory(GiB)": 85.12,
"step": 1710,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.65348268,
"epoch": 1.34,
"learning_rate": 9.895344004256827e-05,
"loss": 1.15424995,
"memory(GiB)": 85.12,
"step": 1715,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.6559926,
"epoch": 1.35,
"learning_rate": 9.894242210591073e-05,
"loss": 1.15576687,
"memory(GiB)": 85.12,
"step": 1720,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65292115,
"epoch": 1.35,
"learning_rate": 9.893134709529359e-05,
"loss": 1.18022537,
"memory(GiB)": 85.12,
"step": 1725,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66411386,
"epoch": 1.36,
"learning_rate": 9.892021502363187e-05,
"loss": 1.11516771,
"memory(GiB)": 85.12,
"step": 1730,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.63985095,
"epoch": 1.36,
"learning_rate": 9.89090259039072e-05,
"loss": 1.22506847,
"memory(GiB)": 85.12,
"step": 1735,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.64176164,
"epoch": 1.36,
"learning_rate": 9.889777974916774e-05,
"loss": 1.20334921,
"memory(GiB)": 85.12,
"step": 1740,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.63934112,
"epoch": 1.37,
"learning_rate": 9.888647657252809e-05,
"loss": 1.23192434,
"memory(GiB)": 85.12,
"step": 1745,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65798678,
"epoch": 1.37,
"learning_rate": 9.887511638716942e-05,
"loss": 1.15708418,
"memory(GiB)": 85.12,
"step": 1750,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.6396348,
"epoch": 1.38,
"learning_rate": 9.886369920633937e-05,
"loss": 1.22064037,
"memory(GiB)": 85.12,
"step": 1755,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.64947276,
"epoch": 1.38,
"learning_rate": 9.885222504335199e-05,
"loss": 1.18840179,
"memory(GiB)": 85.12,
"step": 1760,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.65698085,
"epoch": 1.38,
"learning_rate": 9.884069391158784e-05,
"loss": 1.16323624,
"memory(GiB)": 85.12,
"step": 1765,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.65098138,
"epoch": 1.39,
"learning_rate": 9.88291058244939e-05,
"loss": 1.16516037,
"memory(GiB)": 85.12,
"step": 1770,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65393229,
"epoch": 1.39,
"learning_rate": 9.881746079558353e-05,
"loss": 1.16837893,
"memory(GiB)": 85.12,
"step": 1775,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.64121981,
"epoch": 1.39,
"learning_rate": 9.880575883843655e-05,
"loss": 1.21210432,
"memory(GiB)": 85.12,
"step": 1780,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.66884899,
"epoch": 1.4,
"learning_rate": 9.879399996669911e-05,
"loss": 1.12667084,
"memory(GiB)": 85.12,
"step": 1785,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65376649,
"epoch": 1.4,
"learning_rate": 9.878218419408379e-05,
"loss": 1.1607131,
"memory(GiB)": 85.12,
"step": 1790,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.64392834,
"epoch": 1.41,
"learning_rate": 9.877031153436949e-05,
"loss": 1.20786915,
"memory(GiB)": 85.12,
"step": 1795,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65759964,
"epoch": 1.41,
"learning_rate": 9.875838200140142e-05,
"loss": 1.15941496,
"memory(GiB)": 85.12,
"step": 1800,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.65850377,
"epoch": 1.41,
"learning_rate": 9.874639560909117e-05,
"loss": 1.15575294,
"memory(GiB)": 85.12,
"step": 1805,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.65355692,
"epoch": 1.42,
"learning_rate": 9.873435237141664e-05,
"loss": 1.14666672,
"memory(GiB)": 85.12,
"step": 1810,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.64999933,
"epoch": 1.42,
"learning_rate": 9.872225230242194e-05,
"loss": 1.16884727,
"memory(GiB)": 85.12,
"step": 1815,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.65628324,
"epoch": 1.43,
"learning_rate": 9.871009541621752e-05,
"loss": 1.12916546,
"memory(GiB)": 85.12,
"step": 1820,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65538239,
"epoch": 1.43,
"learning_rate": 9.869788172698006e-05,
"loss": 1.1587122,
"memory(GiB)": 85.12,
"step": 1825,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.65519543,
"epoch": 1.43,
"learning_rate": 9.868561124895254e-05,
"loss": 1.1943471,
"memory(GiB)": 85.12,
"step": 1830,
"train_speed(iter/s)": 0.035235
},
{
"acc": 0.65793271,
"epoch": 1.44,
"learning_rate": 9.867328399644407e-05,
"loss": 1.15140657,
"memory(GiB)": 85.12,
"step": 1835,
"train_speed(iter/s)": 0.035236
},
{
"acc": 0.64654632,
"epoch": 1.44,
"learning_rate": 9.866089998383004e-05,
"loss": 1.19984751,
"memory(GiB)": 85.12,
"step": 1840,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.6539813,
"epoch": 1.45,
"learning_rate": 9.864845922555198e-05,
"loss": 1.16101418,
"memory(GiB)": 85.12,
"step": 1845,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.65071869,
"epoch": 1.45,
"learning_rate": 9.863596173611764e-05,
"loss": 1.1871336,
"memory(GiB)": 85.12,
"step": 1850,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.64802847,
"epoch": 1.45,
"learning_rate": 9.862340753010089e-05,
"loss": 1.17077522,
"memory(GiB)": 85.12,
"step": 1855,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.6609251,
"epoch": 1.46,
"learning_rate": 9.861079662214177e-05,
"loss": 1.12675228,
"memory(GiB)": 85.12,
"step": 1860,
"train_speed(iter/s)": 0.035236
},
{
"acc": 0.652352,
"epoch": 1.46,
"learning_rate": 9.85981290269464e-05,
"loss": 1.17576647,
"memory(GiB)": 85.12,
"step": 1865,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.65460477,
"epoch": 1.47,
"learning_rate": 9.858540475928706e-05,
"loss": 1.17708349,
"memory(GiB)": 85.12,
"step": 1870,
"train_speed(iter/s)": 0.035235
},
{
"acc": 0.6584599,
"epoch": 1.47,
"learning_rate": 9.857262383400207e-05,
"loss": 1.13749962,
"memory(GiB)": 85.12,
"step": 1875,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.6569325,
"epoch": 1.47,
"learning_rate": 9.855978626599585e-05,
"loss": 1.14678946,
"memory(GiB)": 85.12,
"step": 1880,
"train_speed(iter/s)": 0.035235
},
{
"acc": 0.64635262,
"epoch": 1.48,
"learning_rate": 9.854689207023887e-05,
"loss": 1.17655993,
"memory(GiB)": 85.12,
"step": 1885,
"train_speed(iter/s)": 0.035236
},
{
"acc": 0.65549922,
"epoch": 1.48,
"learning_rate": 9.853394126176763e-05,
"loss": 1.14425611,
"memory(GiB)": 85.12,
"step": 1890,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.66941037,
"epoch": 1.49,
"learning_rate": 9.852093385568466e-05,
"loss": 1.08940992,
"memory(GiB)": 85.12,
"step": 1895,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.6526649,
"epoch": 1.49,
"learning_rate": 9.850786986715846e-05,
"loss": 1.15163832,
"memory(GiB)": 85.12,
"step": 1900,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.65831594,
"epoch": 1.49,
"learning_rate": 9.849474931142353e-05,
"loss": 1.12980242,
"memory(GiB)": 85.12,
"step": 1905,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.66181054,
"epoch": 1.5,
"learning_rate": 9.848157220378038e-05,
"loss": 1.14682779,
"memory(GiB)": 85.12,
"step": 1910,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.64066448,
"epoch": 1.5,
"learning_rate": 9.846833855959539e-05,
"loss": 1.22032328,
"memory(GiB)": 85.12,
"step": 1915,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.6627368,
"epoch": 1.5,
"learning_rate": 9.845504839430091e-05,
"loss": 1.11947041,
"memory(GiB)": 85.12,
"step": 1920,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.65865917,
"epoch": 1.51,
"learning_rate": 9.844170172339521e-05,
"loss": 1.16217585,
"memory(GiB)": 85.12,
"step": 1925,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65347538,
"epoch": 1.51,
"learning_rate": 9.842829856244247e-05,
"loss": 1.16731787,
"memory(GiB)": 85.12,
"step": 1930,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.64839239,
"epoch": 1.52,
"learning_rate": 9.841483892707268e-05,
"loss": 1.18971329,
"memory(GiB)": 85.12,
"step": 1935,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.66025143,
"epoch": 1.52,
"learning_rate": 9.840132283298172e-05,
"loss": 1.12929058,
"memory(GiB)": 85.12,
"step": 1940,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.65671482,
"epoch": 1.52,
"learning_rate": 9.838775029593135e-05,
"loss": 1.12444181,
"memory(GiB)": 85.12,
"step": 1945,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.65074253,
"epoch": 1.53,
"learning_rate": 9.837412133174911e-05,
"loss": 1.14755917,
"memory(GiB)": 85.12,
"step": 1950,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.64375038,
"epoch": 1.53,
"learning_rate": 9.836043595632832e-05,
"loss": 1.17292156,
"memory(GiB)": 85.12,
"step": 1955,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.65182729,
"epoch": 1.54,
"learning_rate": 9.834669418562811e-05,
"loss": 1.17811108,
"memory(GiB)": 85.12,
"step": 1960,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.65077796,
"epoch": 1.54,
"learning_rate": 9.833289603567341e-05,
"loss": 1.1634614,
"memory(GiB)": 85.12,
"step": 1965,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.64500337,
"epoch": 1.54,
"learning_rate": 9.831904152255486e-05,
"loss": 1.18109503,
"memory(GiB)": 85.12,
"step": 1970,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.65929651,
"epoch": 1.55,
"learning_rate": 9.830513066242882e-05,
"loss": 1.14037209,
"memory(GiB)": 85.12,
"step": 1975,
"train_speed(iter/s)": 0.035236
},
{
"acc": 0.66190724,
"epoch": 1.55,
"learning_rate": 9.829116347151737e-05,
"loss": 1.16235342,
"memory(GiB)": 85.12,
"step": 1980,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.65755038,
"epoch": 1.56,
"learning_rate": 9.827713996610826e-05,
"loss": 1.16937008,
"memory(GiB)": 85.12,
"step": 1985,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.64079676,
"epoch": 1.56,
"learning_rate": 9.826306016255498e-05,
"loss": 1.19097614,
"memory(GiB)": 85.12,
"step": 1990,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.66023755,
"epoch": 1.56,
"learning_rate": 9.824892407727656e-05,
"loss": 1.12927694,
"memory(GiB)": 85.12,
"step": 1995,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.65292602,
"epoch": 1.57,
"learning_rate": 9.823473172675777e-05,
"loss": 1.16442251,
"memory(GiB)": 85.12,
"step": 2000,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65421362,
"epoch": 1.57,
"learning_rate": 9.822048312754893e-05,
"loss": 1.16522408,
"memory(GiB)": 85.12,
"step": 2005,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.65284295,
"epoch": 1.58,
"learning_rate": 9.820617829626598e-05,
"loss": 1.17013979,
"memory(GiB)": 85.12,
"step": 2010,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.64222498,
"epoch": 1.58,
"learning_rate": 9.819181724959044e-05,
"loss": 1.23573723,
"memory(GiB)": 85.12,
"step": 2015,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66108804,
"epoch": 1.58,
"learning_rate": 9.817740000426932e-05,
"loss": 1.13777189,
"memory(GiB)": 85.12,
"step": 2020,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66587753,
"epoch": 1.59,
"learning_rate": 9.816292657711527e-05,
"loss": 1.11172771,
"memory(GiB)": 85.12,
"step": 2025,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.65847149,
"epoch": 1.59,
"learning_rate": 9.814839698500641e-05,
"loss": 1.14090157,
"memory(GiB)": 85.12,
"step": 2030,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.65127797,
"epoch": 1.59,
"learning_rate": 9.813381124488631e-05,
"loss": 1.16807508,
"memory(GiB)": 85.12,
"step": 2035,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66447649,
"epoch": 1.6,
"learning_rate": 9.811916937376409e-05,
"loss": 1.16490545,
"memory(GiB)": 85.12,
"step": 2040,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65319815,
"epoch": 1.6,
"learning_rate": 9.810447138871426e-05,
"loss": 1.15913305,
"memory(GiB)": 85.12,
"step": 2045,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.65084143,
"epoch": 1.61,
"learning_rate": 9.808971730687684e-05,
"loss": 1.15471087,
"memory(GiB)": 85.12,
"step": 2050,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65464187,
"epoch": 1.61,
"learning_rate": 9.80749071454572e-05,
"loss": 1.14399872,
"memory(GiB)": 85.12,
"step": 2055,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65305209,
"epoch": 1.61,
"learning_rate": 9.806004092172616e-05,
"loss": 1.15933371,
"memory(GiB)": 85.12,
"step": 2060,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.64206853,
"epoch": 1.62,
"learning_rate": 9.804511865301989e-05,
"loss": 1.18681612,
"memory(GiB)": 85.12,
"step": 2065,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.65472612,
"epoch": 1.62,
"learning_rate": 9.803014035673987e-05,
"loss": 1.17128534,
"memory(GiB)": 85.12,
"step": 2070,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66416421,
"epoch": 1.63,
"learning_rate": 9.801510605035303e-05,
"loss": 1.12266273,
"memory(GiB)": 85.12,
"step": 2075,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65653076,
"epoch": 1.63,
"learning_rate": 9.800001575139152e-05,
"loss": 1.11308479,
"memory(GiB)": 85.12,
"step": 2080,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.65959449,
"epoch": 1.63,
"learning_rate": 9.798486947745282e-05,
"loss": 1.12792482,
"memory(GiB)": 85.12,
"step": 2085,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65187888,
"epoch": 1.64,
"learning_rate": 9.796966724619967e-05,
"loss": 1.16060781,
"memory(GiB)": 85.12,
"step": 2090,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65552635,
"epoch": 1.64,
"learning_rate": 9.79544090753601e-05,
"loss": 1.13989534,
"memory(GiB)": 85.12,
"step": 2095,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.65974436,
"epoch": 1.65,
"learning_rate": 9.793909498272733e-05,
"loss": 1.1274127,
"memory(GiB)": 85.12,
"step": 2100,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.66153345,
"epoch": 1.65,
"learning_rate": 9.792372498615981e-05,
"loss": 1.13705215,
"memory(GiB)": 85.12,
"step": 2105,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.64935384,
"epoch": 1.65,
"learning_rate": 9.790829910358122e-05,
"loss": 1.17313042,
"memory(GiB)": 85.12,
"step": 2110,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.66233768,
"epoch": 1.66,
"learning_rate": 9.789281735298032e-05,
"loss": 1.09848804,
"memory(GiB)": 85.12,
"step": 2115,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67054353,
"epoch": 1.66,
"learning_rate": 9.787727975241111e-05,
"loss": 1.11139088,
"memory(GiB)": 85.12,
"step": 2120,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66447287,
"epoch": 1.67,
"learning_rate": 9.786168631999269e-05,
"loss": 1.13588085,
"memory(GiB)": 85.12,
"step": 2125,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.65222578,
"epoch": 1.67,
"learning_rate": 9.784603707390922e-05,
"loss": 1.17907152,
"memory(GiB)": 85.12,
"step": 2130,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.65970678,
"epoch": 1.67,
"learning_rate": 9.783033203241006e-05,
"loss": 1.14419537,
"memory(GiB)": 85.12,
"step": 2135,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.65925775,
"epoch": 1.68,
"learning_rate": 9.78145712138095e-05,
"loss": 1.1633584,
"memory(GiB)": 85.12,
"step": 2140,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66307669,
"epoch": 1.68,
"learning_rate": 9.779875463648698e-05,
"loss": 1.12448187,
"memory(GiB)": 85.12,
"step": 2145,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.65571504,
"epoch": 1.68,
"learning_rate": 9.77828823188869e-05,
"loss": 1.20079041,
"memory(GiB)": 85.12,
"step": 2150,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66194067,
"epoch": 1.69,
"learning_rate": 9.77669542795187e-05,
"loss": 1.10752869,
"memory(GiB)": 85.12,
"step": 2155,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.6523849,
"epoch": 1.69,
"learning_rate": 9.775097053695677e-05,
"loss": 1.15065937,
"memory(GiB)": 85.12,
"step": 2160,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.64300294,
"epoch": 1.7,
"learning_rate": 9.773493110984047e-05,
"loss": 1.17376556,
"memory(GiB)": 85.12,
"step": 2165,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.65471711,
"epoch": 1.7,
"learning_rate": 9.77188360168741e-05,
"loss": 1.16260157,
"memory(GiB)": 85.12,
"step": 2170,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.66860032,
"epoch": 1.7,
"learning_rate": 9.770268527682687e-05,
"loss": 1.09885559,
"memory(GiB)": 85.12,
"step": 2175,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.6501049,
"epoch": 1.71,
"learning_rate": 9.76864789085329e-05,
"loss": 1.17552853,
"memory(GiB)": 85.12,
"step": 2180,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.64576588,
"epoch": 1.71,
"learning_rate": 9.767021693089116e-05,
"loss": 1.19620943,
"memory(GiB)": 85.12,
"step": 2185,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66087532,
"epoch": 1.72,
"learning_rate": 9.765389936286545e-05,
"loss": 1.1152669,
"memory(GiB)": 85.12,
"step": 2190,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.66009383,
"epoch": 1.72,
"learning_rate": 9.763752622348445e-05,
"loss": 1.13289509,
"memory(GiB)": 85.12,
"step": 2195,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.66180921,
"epoch": 1.72,
"learning_rate": 9.762109753184159e-05,
"loss": 1.1201334,
"memory(GiB)": 85.12,
"step": 2200,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.65290222,
"epoch": 1.73,
"learning_rate": 9.760461330709513e-05,
"loss": 1.1867281,
"memory(GiB)": 85.12,
"step": 2205,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.65245934,
"epoch": 1.73,
"learning_rate": 9.758807356846804e-05,
"loss": 1.1851923,
"memory(GiB)": 85.12,
"step": 2210,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.6554101,
"epoch": 1.74,
"learning_rate": 9.757147833524808e-05,
"loss": 1.12435026,
"memory(GiB)": 85.12,
"step": 2215,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.66130071,
"epoch": 1.74,
"learning_rate": 9.755482762678768e-05,
"loss": 1.11828518,
"memory(GiB)": 85.12,
"step": 2220,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.65428829,
"epoch": 1.74,
"learning_rate": 9.753812146250398e-05,
"loss": 1.17555447,
"memory(GiB)": 85.12,
"step": 2225,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.65815506,
"epoch": 1.75,
"learning_rate": 9.75213598618788e-05,
"loss": 1.14015465,
"memory(GiB)": 85.12,
"step": 2230,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.6582922,
"epoch": 1.75,
"learning_rate": 9.750454284445859e-05,
"loss": 1.13366365,
"memory(GiB)": 85.12,
"step": 2235,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.64991212,
"epoch": 1.76,
"learning_rate": 9.748767042985442e-05,
"loss": 1.17165375,
"memory(GiB)": 85.12,
"step": 2240,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.65585694,
"epoch": 1.76,
"learning_rate": 9.7470742637742e-05,
"loss": 1.15650015,
"memory(GiB)": 85.12,
"step": 2245,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.65717545,
"epoch": 1.76,
"learning_rate": 9.745375948786158e-05,
"loss": 1.15424328,
"memory(GiB)": 85.12,
"step": 2250,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.65874028,
"epoch": 1.77,
"learning_rate": 9.743672100001793e-05,
"loss": 1.14350729,
"memory(GiB)": 85.12,
"step": 2255,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.64995842,
"epoch": 1.77,
"learning_rate": 9.741962719408047e-05,
"loss": 1.16558609,
"memory(GiB)": 85.12,
"step": 2260,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.6412828,
"epoch": 1.78,
"learning_rate": 9.7402478089983e-05,
"loss": 1.24160509,
"memory(GiB)": 85.12,
"step": 2265,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.66066208,
"epoch": 1.78,
"learning_rate": 9.738527370772387e-05,
"loss": 1.13935509,
"memory(GiB)": 85.12,
"step": 2270,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.65170732,
"epoch": 1.78,
"learning_rate": 9.73680140673659e-05,
"loss": 1.15450306,
"memory(GiB)": 85.12,
"step": 2275,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.65960178,
"epoch": 1.79,
"learning_rate": 9.735069918903635e-05,
"loss": 1.13573933,
"memory(GiB)": 85.12,
"step": 2280,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.66337166,
"epoch": 1.79,
"learning_rate": 9.733332909292684e-05,
"loss": 1.15319395,
"memory(GiB)": 85.12,
"step": 2285,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.66128883,
"epoch": 1.79,
"learning_rate": 9.731590379929345e-05,
"loss": 1.158424,
"memory(GiB)": 85.12,
"step": 2290,
"train_speed(iter/s)": 0.035285
},
{
"acc": 0.65605984,
"epoch": 1.8,
"learning_rate": 9.729842332845657e-05,
"loss": 1.15069437,
"memory(GiB)": 85.12,
"step": 2295,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.63679175,
"epoch": 1.8,
"learning_rate": 9.7280887700801e-05,
"loss": 1.2136096,
"memory(GiB)": 85.12,
"step": 2300,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.64580112,
"epoch": 1.81,
"learning_rate": 9.726329693677578e-05,
"loss": 1.19345636,
"memory(GiB)": 85.12,
"step": 2305,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.65686107,
"epoch": 1.81,
"learning_rate": 9.724565105689432e-05,
"loss": 1.13980618,
"memory(GiB)": 85.12,
"step": 2310,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.66553755,
"epoch": 1.81,
"learning_rate": 9.722795008173427e-05,
"loss": 1.1280262,
"memory(GiB)": 85.12,
"step": 2315,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.65331993,
"epoch": 1.82,
"learning_rate": 9.721019403193753e-05,
"loss": 1.16992741,
"memory(GiB)": 85.12,
"step": 2320,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.65903072,
"epoch": 1.82,
"learning_rate": 9.719238292821022e-05,
"loss": 1.15253115,
"memory(GiB)": 85.12,
"step": 2325,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.65842004,
"epoch": 1.83,
"learning_rate": 9.71745167913227e-05,
"loss": 1.15765343,
"memory(GiB)": 85.12,
"step": 2330,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.64939175,
"epoch": 1.83,
"learning_rate": 9.715659564210944e-05,
"loss": 1.1643466,
"memory(GiB)": 85.12,
"step": 2335,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.66922626,
"epoch": 1.83,
"learning_rate": 9.713861950146912e-05,
"loss": 1.1116375,
"memory(GiB)": 85.12,
"step": 2340,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.64719138,
"epoch": 1.84,
"learning_rate": 9.712058839036451e-05,
"loss": 1.20366507,
"memory(GiB)": 85.12,
"step": 2345,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.65068803,
"epoch": 1.84,
"learning_rate": 9.71025023298225e-05,
"loss": 1.14096384,
"memory(GiB)": 85.12,
"step": 2350,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.6618053,
"epoch": 1.85,
"learning_rate": 9.708436134093408e-05,
"loss": 1.13345575,
"memory(GiB)": 85.12,
"step": 2355,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.65803671,
"epoch": 1.85,
"learning_rate": 9.706616544485428e-05,
"loss": 1.14154787,
"memory(GiB)": 85.12,
"step": 2360,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.65964813,
"epoch": 1.85,
"learning_rate": 9.70479146628021e-05,
"loss": 1.11802235,
"memory(GiB)": 85.12,
"step": 2365,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.6604876,
"epoch": 1.86,
"learning_rate": 9.702960901606064e-05,
"loss": 1.1343956,
"memory(GiB)": 85.12,
"step": 2370,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.66209579,
"epoch": 1.86,
"learning_rate": 9.701124852597692e-05,
"loss": 1.11040306,
"memory(GiB)": 85.12,
"step": 2375,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.66260943,
"epoch": 1.87,
"learning_rate": 9.699283321396195e-05,
"loss": 1.12860765,
"memory(GiB)": 85.12,
"step": 2380,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.66986876,
"epoch": 1.87,
"learning_rate": 9.697436310149066e-05,
"loss": 1.09946795,
"memory(GiB)": 85.12,
"step": 2385,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67240357,
"epoch": 1.87,
"learning_rate": 9.695583821010184e-05,
"loss": 1.12718344,
"memory(GiB)": 85.12,
"step": 2390,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.65421391,
"epoch": 1.88,
"learning_rate": 9.693725856139824e-05,
"loss": 1.14639235,
"memory(GiB)": 85.12,
"step": 2395,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.64448304,
"epoch": 1.88,
"learning_rate": 9.69186241770464e-05,
"loss": 1.21207218,
"memory(GiB)": 85.12,
"step": 2400,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.66120443,
"epoch": 1.88,
"learning_rate": 9.689993507877673e-05,
"loss": 1.12814407,
"memory(GiB)": 85.12,
"step": 2405,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.65368838,
"epoch": 1.89,
"learning_rate": 9.68811912883834e-05,
"loss": 1.15181837,
"memory(GiB)": 85.12,
"step": 2410,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.65113912,
"epoch": 1.89,
"learning_rate": 9.686239282772442e-05,
"loss": 1.16465178,
"memory(GiB)": 85.12,
"step": 2415,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.65074034,
"epoch": 1.9,
"learning_rate": 9.68435397187215e-05,
"loss": 1.16231499,
"memory(GiB)": 85.12,
"step": 2420,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.65878544,
"epoch": 1.9,
"learning_rate": 9.68246319833601e-05,
"loss": 1.14520617,
"memory(GiB)": 85.12,
"step": 2425,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.64476914,
"epoch": 1.9,
"learning_rate": 9.68056696436894e-05,
"loss": 1.20143547,
"memory(GiB)": 85.12,
"step": 2430,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.64212346,
"epoch": 1.91,
"learning_rate": 9.678665272182221e-05,
"loss": 1.22368813,
"memory(GiB)": 85.12,
"step": 2435,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.65965805,
"epoch": 1.91,
"learning_rate": 9.676758123993504e-05,
"loss": 1.12663536,
"memory(GiB)": 85.12,
"step": 2440,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.66423464,
"epoch": 1.92,
"learning_rate": 9.674845522026799e-05,
"loss": 1.12610149,
"memory(GiB)": 85.12,
"step": 2445,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.66499667,
"epoch": 1.92,
"learning_rate": 9.672927468512476e-05,
"loss": 1.1370595,
"memory(GiB)": 85.12,
"step": 2450,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.65996475,
"epoch": 1.92,
"learning_rate": 9.671003965687266e-05,
"loss": 1.14243317,
"memory(GiB)": 85.12,
"step": 2455,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.66339087,
"epoch": 1.93,
"learning_rate": 9.669075015794252e-05,
"loss": 1.11444197,
"memory(GiB)": 85.12,
"step": 2460,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.65870614,
"epoch": 1.93,
"learning_rate": 9.667140621082867e-05,
"loss": 1.1119628,
"memory(GiB)": 85.12,
"step": 2465,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.66413293,
"epoch": 1.94,
"learning_rate": 9.665200783808897e-05,
"loss": 1.12356319,
"memory(GiB)": 85.12,
"step": 2470,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.65245867,
"epoch": 1.94,
"learning_rate": 9.663255506234474e-05,
"loss": 1.15396376,
"memory(GiB)": 85.12,
"step": 2475,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.65678835,
"epoch": 1.94,
"learning_rate": 9.661304790628073e-05,
"loss": 1.16068563,
"memory(GiB)": 85.12,
"step": 2480,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.65423017,
"epoch": 1.95,
"learning_rate": 9.659348639264512e-05,
"loss": 1.16609097,
"memory(GiB)": 85.12,
"step": 2485,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.67275624,
"epoch": 1.95,
"learning_rate": 9.657387054424945e-05,
"loss": 1.09347582,
"memory(GiB)": 85.12,
"step": 2490,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.67117119,
"epoch": 1.96,
"learning_rate": 9.655420038396868e-05,
"loss": 1.11645508,
"memory(GiB)": 85.12,
"step": 2495,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.66794109,
"epoch": 1.96,
"learning_rate": 9.653447593474102e-05,
"loss": 1.10899277,
"memory(GiB)": 85.12,
"step": 2500,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.64761634,
"epoch": 1.96,
"learning_rate": 9.651469721956807e-05,
"loss": 1.18057451,
"memory(GiB)": 85.12,
"step": 2505,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.65636969,
"epoch": 1.97,
"learning_rate": 9.649486426151468e-05,
"loss": 1.15329361,
"memory(GiB)": 85.12,
"step": 2510,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.66316671,
"epoch": 1.97,
"learning_rate": 9.647497708370894e-05,
"loss": 1.13145294,
"memory(GiB)": 85.12,
"step": 2515,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.66438212,
"epoch": 1.97,
"learning_rate": 9.64550357093422e-05,
"loss": 1.10747566,
"memory(GiB)": 85.12,
"step": 2520,
"train_speed(iter/s)": 0.035285
},
{
"acc": 0.66867604,
"epoch": 1.98,
"learning_rate": 9.643504016166897e-05,
"loss": 1.10191135,
"memory(GiB)": 85.12,
"step": 2525,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.65627751,
"epoch": 1.98,
"learning_rate": 9.6414990464007e-05,
"loss": 1.14236097,
"memory(GiB)": 85.12,
"step": 2530,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.6637876,
"epoch": 1.99,
"learning_rate": 9.639488663973708e-05,
"loss": 1.13195019,
"memory(GiB)": 85.12,
"step": 2535,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.66524153,
"epoch": 1.99,
"learning_rate": 9.637472871230322e-05,
"loss": 1.11181889,
"memory(GiB)": 85.12,
"step": 2540,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.66480532,
"epoch": 1.99,
"learning_rate": 9.635451670521249e-05,
"loss": 1.11331224,
"memory(GiB)": 85.12,
"step": 2545,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.66729088,
"epoch": 2.0,
"learning_rate": 9.633425064203503e-05,
"loss": 1.09206867,
"memory(GiB)": 85.12,
"step": 2550,
"train_speed(iter/s)": 0.035276
},
{
"epoch": 2.0,
"eval_acc": 0.6775607312797396,
"eval_loss": 1.057321310043335,
"eval_runtime": 85.2273,
"eval_samples_per_second": 1.091,
"eval_steps_per_second": 1.091,
"step": 2552
},
{
"acc": 0.67183886,
"epoch": 2.0,
"learning_rate": 9.631393054640398e-05,
"loss": 1.07645693,
"memory(GiB)": 85.12,
"step": 2555,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.66320515,
"epoch": 2.01,
"learning_rate": 9.629355644201553e-05,
"loss": 1.10909252,
"memory(GiB)": 85.12,
"step": 2560,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.66873536,
"epoch": 2.01,
"learning_rate": 9.627312835262885e-05,
"loss": 1.09901686,
"memory(GiB)": 85.12,
"step": 2565,
"train_speed(iter/s)": 0.035231
},
{
"acc": 0.66333971,
"epoch": 2.01,
"learning_rate": 9.625264630206602e-05,
"loss": 1.11735725,
"memory(GiB)": 85.12,
"step": 2570,
"train_speed(iter/s)": 0.035232
},
{
"acc": 0.65929585,
"epoch": 2.02,
"learning_rate": 9.623211031421212e-05,
"loss": 1.12093697,
"memory(GiB)": 85.12,
"step": 2575,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.66388683,
"epoch": 2.02,
"learning_rate": 9.621152041301507e-05,
"loss": 1.11663198,
"memory(GiB)": 85.12,
"step": 2580,
"train_speed(iter/s)": 0.035231
},
{
"acc": 0.65557284,
"epoch": 2.03,
"learning_rate": 9.619087662248569e-05,
"loss": 1.13853168,
"memory(GiB)": 85.12,
"step": 2585,
"train_speed(iter/s)": 0.035232
},
{
"acc": 0.6557622,
"epoch": 2.03,
"learning_rate": 9.61701789666976e-05,
"loss": 1.14238157,
"memory(GiB)": 85.12,
"step": 2590,
"train_speed(iter/s)": 0.035234
},
{
"acc": 0.67138915,
"epoch": 2.03,
"learning_rate": 9.614942746978733e-05,
"loss": 1.0764698,
"memory(GiB)": 85.12,
"step": 2595,
"train_speed(iter/s)": 0.035232
},
{
"acc": 0.65261388,
"epoch": 2.04,
"learning_rate": 9.612862215595406e-05,
"loss": 1.1417222,
"memory(GiB)": 85.12,
"step": 2600,
"train_speed(iter/s)": 0.035233
},
{
"acc": 0.66247792,
"epoch": 2.04,
"learning_rate": 9.610776304945986e-05,
"loss": 1.13462439,
"memory(GiB)": 85.12,
"step": 2605,
"train_speed(iter/s)": 0.035235
},
{
"acc": 0.67750583,
"epoch": 2.05,
"learning_rate": 9.608685017462944e-05,
"loss": 1.0703146,
"memory(GiB)": 85.12,
"step": 2610,
"train_speed(iter/s)": 0.035236
},
{
"acc": 0.66992674,
"epoch": 2.05,
"learning_rate": 9.606588355585025e-05,
"loss": 1.10587053,
"memory(GiB)": 85.12,
"step": 2615,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.66086774,
"epoch": 2.05,
"learning_rate": 9.604486321757242e-05,
"loss": 1.13685856,
"memory(GiB)": 85.12,
"step": 2620,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.67727141,
"epoch": 2.06,
"learning_rate": 9.60237891843087e-05,
"loss": 1.06712856,
"memory(GiB)": 85.12,
"step": 2625,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.66415348,
"epoch": 2.06,
"learning_rate": 9.600266148063448e-05,
"loss": 1.16380205,
"memory(GiB)": 85.12,
"step": 2630,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.65921197,
"epoch": 2.07,
"learning_rate": 9.598148013118771e-05,
"loss": 1.11800652,
"memory(GiB)": 85.12,
"step": 2635,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.6564095,
"epoch": 2.07,
"learning_rate": 9.596024516066893e-05,
"loss": 1.14403868,
"memory(GiB)": 85.12,
"step": 2640,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.66070809,
"epoch": 2.07,
"learning_rate": 9.593895659384117e-05,
"loss": 1.1139576,
"memory(GiB)": 85.12,
"step": 2645,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.6555994,
"epoch": 2.08,
"learning_rate": 9.591761445553e-05,
"loss": 1.12918062,
"memory(GiB)": 85.12,
"step": 2650,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.66035252,
"epoch": 2.08,
"learning_rate": 9.589621877062346e-05,
"loss": 1.11460495,
"memory(GiB)": 85.12,
"step": 2655,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.66998568,
"epoch": 2.08,
"learning_rate": 9.5874769564072e-05,
"loss": 1.07866659,
"memory(GiB)": 85.12,
"step": 2660,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.65154204,
"epoch": 2.09,
"learning_rate": 9.585326686088851e-05,
"loss": 1.14091015,
"memory(GiB)": 85.12,
"step": 2665,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.6603961,
"epoch": 2.09,
"learning_rate": 9.583171068614827e-05,
"loss": 1.12223263,
"memory(GiB)": 85.12,
"step": 2670,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.66768351,
"epoch": 2.1,
"learning_rate": 9.58101010649889e-05,
"loss": 1.11046982,
"memory(GiB)": 85.12,
"step": 2675,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.66048141,
"epoch": 2.1,
"learning_rate": 9.578843802261036e-05,
"loss": 1.1147171,
"memory(GiB)": 85.12,
"step": 2680,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.66307225,
"epoch": 2.1,
"learning_rate": 9.576672158427485e-05,
"loss": 1.14018593,
"memory(GiB)": 85.12,
"step": 2685,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.66853991,
"epoch": 2.11,
"learning_rate": 9.574495177530693e-05,
"loss": 1.12644806,
"memory(GiB)": 85.12,
"step": 2690,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.67141595,
"epoch": 2.11,
"learning_rate": 9.572312862109335e-05,
"loss": 1.10125408,
"memory(GiB)": 85.12,
"step": 2695,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.66562076,
"epoch": 2.12,
"learning_rate": 9.570125214708309e-05,
"loss": 1.11102467,
"memory(GiB)": 85.12,
"step": 2700,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.6675756,
"epoch": 2.12,
"learning_rate": 9.567932237878726e-05,
"loss": 1.10656528,
"memory(GiB)": 85.12,
"step": 2705,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.67014441,
"epoch": 2.12,
"learning_rate": 9.565733934177915e-05,
"loss": 1.07535477,
"memory(GiB)": 85.12,
"step": 2710,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.67194867,
"epoch": 2.13,
"learning_rate": 9.563530306169415e-05,
"loss": 1.06938372,
"memory(GiB)": 85.12,
"step": 2715,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.65955586,
"epoch": 2.13,
"learning_rate": 9.56132135642298e-05,
"loss": 1.14082947,
"memory(GiB)": 85.12,
"step": 2720,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.65607781,
"epoch": 2.14,
"learning_rate": 9.559107087514562e-05,
"loss": 1.12005463,
"memory(GiB)": 85.12,
"step": 2725,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66828775,
"epoch": 2.14,
"learning_rate": 9.556887502026324e-05,
"loss": 1.10912933,
"memory(GiB)": 85.12,
"step": 2730,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65105276,
"epoch": 2.14,
"learning_rate": 9.554662602546622e-05,
"loss": 1.1439889,
"memory(GiB)": 85.12,
"step": 2735,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.66353316,
"epoch": 2.15,
"learning_rate": 9.552432391670009e-05,
"loss": 1.11523571,
"memory(GiB)": 85.12,
"step": 2740,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65910487,
"epoch": 2.15,
"learning_rate": 9.550196871997237e-05,
"loss": 1.10913839,
"memory(GiB)": 85.12,
"step": 2745,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66159306,
"epoch": 2.16,
"learning_rate": 9.547956046135247e-05,
"loss": 1.15435734,
"memory(GiB)": 85.12,
"step": 2750,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.65181875,
"epoch": 2.16,
"learning_rate": 9.545709916697164e-05,
"loss": 1.16566019,
"memory(GiB)": 85.12,
"step": 2755,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66318617,
"epoch": 2.16,
"learning_rate": 9.543458486302301e-05,
"loss": 1.10774937,
"memory(GiB)": 85.12,
"step": 2760,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.6740345,
"epoch": 2.17,
"learning_rate": 9.541201757576154e-05,
"loss": 1.1062582,
"memory(GiB)": 85.12,
"step": 2765,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.64558797,
"epoch": 2.17,
"learning_rate": 9.538939733150394e-05,
"loss": 1.18151665,
"memory(GiB)": 85.12,
"step": 2770,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.64788785,
"epoch": 2.17,
"learning_rate": 9.53667241566287e-05,
"loss": 1.15782328,
"memory(GiB)": 85.12,
"step": 2775,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65736341,
"epoch": 2.18,
"learning_rate": 9.534399807757606e-05,
"loss": 1.14570007,
"memory(GiB)": 85.12,
"step": 2780,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66911092,
"epoch": 2.18,
"learning_rate": 9.532121912084787e-05,
"loss": 1.09435015,
"memory(GiB)": 85.12,
"step": 2785,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6581212,
"epoch": 2.19,
"learning_rate": 9.529838731300774e-05,
"loss": 1.15303545,
"memory(GiB)": 85.12,
"step": 2790,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67710958,
"epoch": 2.19,
"learning_rate": 9.527550268068081e-05,
"loss": 1.0725668,
"memory(GiB)": 85.12,
"step": 2795,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65953379,
"epoch": 2.19,
"learning_rate": 9.525256525055395e-05,
"loss": 1.10951128,
"memory(GiB)": 85.12,
"step": 2800,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.65992632,
"epoch": 2.2,
"learning_rate": 9.522957504937549e-05,
"loss": 1.12102213,
"memory(GiB)": 85.12,
"step": 2805,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66131835,
"epoch": 2.2,
"learning_rate": 9.520653210395534e-05,
"loss": 1.08475084,
"memory(GiB)": 85.12,
"step": 2810,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.66965961,
"epoch": 2.21,
"learning_rate": 9.518343644116493e-05,
"loss": 1.08618603,
"memory(GiB)": 85.12,
"step": 2815,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.65925741,
"epoch": 2.21,
"learning_rate": 9.516028808793714e-05,
"loss": 1.1315218,
"memory(GiB)": 85.12,
"step": 2820,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66214266,
"epoch": 2.21,
"learning_rate": 9.51370870712663e-05,
"loss": 1.1035429,
"memory(GiB)": 85.12,
"step": 2825,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66194062,
"epoch": 2.22,
"learning_rate": 9.511383341820815e-05,
"loss": 1.1275753,
"memory(GiB)": 85.12,
"step": 2830,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.66372776,
"epoch": 2.22,
"learning_rate": 9.509052715587985e-05,
"loss": 1.10870571,
"memory(GiB)": 85.12,
"step": 2835,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66669927,
"epoch": 2.23,
"learning_rate": 9.506716831145988e-05,
"loss": 1.14299908,
"memory(GiB)": 85.12,
"step": 2840,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.66881795,
"epoch": 2.23,
"learning_rate": 9.504375691218802e-05,
"loss": 1.07950726,
"memory(GiB)": 85.12,
"step": 2845,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.67085314,
"epoch": 2.23,
"learning_rate": 9.502029298536535e-05,
"loss": 1.07588711,
"memory(GiB)": 85.12,
"step": 2850,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.65394874,
"epoch": 2.24,
"learning_rate": 9.499677655835421e-05,
"loss": 1.15423851,
"memory(GiB)": 85.12,
"step": 2855,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.64597754,
"epoch": 2.24,
"learning_rate": 9.49732076585782e-05,
"loss": 1.15842009,
"memory(GiB)": 85.12,
"step": 2860,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.67262759,
"epoch": 2.25,
"learning_rate": 9.494958631352204e-05,
"loss": 1.11735392,
"memory(GiB)": 85.12,
"step": 2865,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66508193,
"epoch": 2.25,
"learning_rate": 9.492591255073164e-05,
"loss": 1.09670143,
"memory(GiB)": 85.12,
"step": 2870,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67350874,
"epoch": 2.25,
"learning_rate": 9.490218639781407e-05,
"loss": 1.0795311,
"memory(GiB)": 85.12,
"step": 2875,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.66628647,
"epoch": 2.26,
"learning_rate": 9.487840788243744e-05,
"loss": 1.12455454,
"memory(GiB)": 85.12,
"step": 2880,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.6715724,
"epoch": 2.26,
"learning_rate": 9.485457703233094e-05,
"loss": 1.11749763,
"memory(GiB)": 85.12,
"step": 2885,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.6443604,
"epoch": 2.26,
"learning_rate": 9.483069387528482e-05,
"loss": 1.18035517,
"memory(GiB)": 85.12,
"step": 2890,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.6723536,
"epoch": 2.27,
"learning_rate": 9.480675843915028e-05,
"loss": 1.08084173,
"memory(GiB)": 85.12,
"step": 2895,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.66256552,
"epoch": 2.27,
"learning_rate": 9.478277075183955e-05,
"loss": 1.08778219,
"memory(GiB)": 85.12,
"step": 2900,
"train_speed(iter/s)": 0.035241
},
{
"acc": 0.65755138,
"epoch": 2.28,
"learning_rate": 9.47587308413257e-05,
"loss": 1.14270163,
"memory(GiB)": 85.12,
"step": 2905,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.64939547,
"epoch": 2.28,
"learning_rate": 9.473463873564275e-05,
"loss": 1.1697154,
"memory(GiB)": 85.12,
"step": 2910,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.67200222,
"epoch": 2.28,
"learning_rate": 9.471049446288564e-05,
"loss": 1.12756548,
"memory(GiB)": 85.12,
"step": 2915,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.66369948,
"epoch": 2.29,
"learning_rate": 9.468629805121005e-05,
"loss": 1.134799,
"memory(GiB)": 85.12,
"step": 2920,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.65741105,
"epoch": 2.29,
"learning_rate": 9.466204952883252e-05,
"loss": 1.12181864,
"memory(GiB)": 85.12,
"step": 2925,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.67293525,
"epoch": 2.3,
"learning_rate": 9.463774892403033e-05,
"loss": 1.08120308,
"memory(GiB)": 85.12,
"step": 2930,
"train_speed(iter/s)": 0.035235
},
{
"acc": 0.65945473,
"epoch": 2.3,
"learning_rate": 9.461339626514153e-05,
"loss": 1.12144451,
"memory(GiB)": 85.12,
"step": 2935,
"train_speed(iter/s)": 0.035236
},
{
"acc": 0.65807548,
"epoch": 2.3,
"learning_rate": 9.458899158056482e-05,
"loss": 1.1355731,
"memory(GiB)": 85.12,
"step": 2940,
"train_speed(iter/s)": 0.035237
},
{
"acc": 0.66247325,
"epoch": 2.31,
"learning_rate": 9.456453489875963e-05,
"loss": 1.11652193,
"memory(GiB)": 85.12,
"step": 2945,
"train_speed(iter/s)": 0.035238
},
{
"acc": 0.67103438,
"epoch": 2.31,
"learning_rate": 9.454002624824598e-05,
"loss": 1.10103321,
"memory(GiB)": 85.12,
"step": 2950,
"train_speed(iter/s)": 0.035239
},
{
"acc": 0.6699152,
"epoch": 2.32,
"learning_rate": 9.451546565760452e-05,
"loss": 1.07716627,
"memory(GiB)": 85.12,
"step": 2955,
"train_speed(iter/s)": 0.03524
},
{
"acc": 0.66332569,
"epoch": 2.32,
"learning_rate": 9.449085315547645e-05,
"loss": 1.1355279,
"memory(GiB)": 85.12,
"step": 2960,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.6688993,
"epoch": 2.32,
"learning_rate": 9.446618877056353e-05,
"loss": 1.08996553,
"memory(GiB)": 85.12,
"step": 2965,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.64563627,
"epoch": 2.33,
"learning_rate": 9.444147253162799e-05,
"loss": 1.18632555,
"memory(GiB)": 85.12,
"step": 2970,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66807208,
"epoch": 2.33,
"learning_rate": 9.441670446749253e-05,
"loss": 1.09000006,
"memory(GiB)": 85.12,
"step": 2975,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67623868,
"epoch": 2.34,
"learning_rate": 9.439188460704035e-05,
"loss": 1.06963615,
"memory(GiB)": 85.12,
"step": 2980,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67707462,
"epoch": 2.34,
"learning_rate": 9.436701297921499e-05,
"loss": 1.06638432,
"memory(GiB)": 85.12,
"step": 2985,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.64844503,
"epoch": 2.34,
"learning_rate": 9.434208961302037e-05,
"loss": 1.15902214,
"memory(GiB)": 85.12,
"step": 2990,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.66300759,
"epoch": 2.35,
"learning_rate": 9.431711453752074e-05,
"loss": 1.11802444,
"memory(GiB)": 85.12,
"step": 2995,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66209593,
"epoch": 2.35,
"learning_rate": 9.429208778184066e-05,
"loss": 1.12048893,
"memory(GiB)": 85.12,
"step": 3000,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65673513,
"epoch": 2.36,
"learning_rate": 9.426700937516498e-05,
"loss": 1.12821989,
"memory(GiB)": 85.12,
"step": 3005,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67200632,
"epoch": 2.36,
"learning_rate": 9.424187934673872e-05,
"loss": 1.08947983,
"memory(GiB)": 85.12,
"step": 3010,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66114106,
"epoch": 2.36,
"learning_rate": 9.421669772586716e-05,
"loss": 1.1081459,
"memory(GiB)": 85.12,
"step": 3015,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.6648818,
"epoch": 2.37,
"learning_rate": 9.419146454191572e-05,
"loss": 1.09442472,
"memory(GiB)": 85.12,
"step": 3020,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.65600429,
"epoch": 2.37,
"learning_rate": 9.416617982430994e-05,
"loss": 1.11577091,
"memory(GiB)": 85.12,
"step": 3025,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66453032,
"epoch": 2.37,
"learning_rate": 9.414084360253547e-05,
"loss": 1.11425781,
"memory(GiB)": 85.12,
"step": 3030,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.65060616,
"epoch": 2.38,
"learning_rate": 9.411545590613803e-05,
"loss": 1.13694382,
"memory(GiB)": 85.12,
"step": 3035,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.65530295,
"epoch": 2.38,
"learning_rate": 9.409001676472335e-05,
"loss": 1.16810818,
"memory(GiB)": 85.12,
"step": 3040,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66546168,
"epoch": 2.39,
"learning_rate": 9.406452620795714e-05,
"loss": 1.11694679,
"memory(GiB)": 85.12,
"step": 3045,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.673035,
"epoch": 2.39,
"learning_rate": 9.40389842655651e-05,
"loss": 1.067171,
"memory(GiB)": 85.12,
"step": 3050,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.6619174,
"epoch": 2.39,
"learning_rate": 9.401339096733283e-05,
"loss": 1.14351206,
"memory(GiB)": 85.12,
"step": 3055,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66858106,
"epoch": 2.4,
"learning_rate": 9.398774634310583e-05,
"loss": 1.10405941,
"memory(GiB)": 85.12,
"step": 3060,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66761208,
"epoch": 2.4,
"learning_rate": 9.396205042278946e-05,
"loss": 1.09991446,
"memory(GiB)": 85.12,
"step": 3065,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66788816,
"epoch": 2.41,
"learning_rate": 9.393630323634888e-05,
"loss": 1.10259113,
"memory(GiB)": 85.12,
"step": 3070,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66071186,
"epoch": 2.41,
"learning_rate": 9.391050481380903e-05,
"loss": 1.13652515,
"memory(GiB)": 85.12,
"step": 3075,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66306629,
"epoch": 2.41,
"learning_rate": 9.388465518525464e-05,
"loss": 1.1402585,
"memory(GiB)": 85.12,
"step": 3080,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66560416,
"epoch": 2.42,
"learning_rate": 9.385875438083008e-05,
"loss": 1.12412586,
"memory(GiB)": 85.12,
"step": 3085,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65981436,
"epoch": 2.42,
"learning_rate": 9.383280243073948e-05,
"loss": 1.12645855,
"memory(GiB)": 85.12,
"step": 3090,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66078367,
"epoch": 2.43,
"learning_rate": 9.380679936524656e-05,
"loss": 1.13504505,
"memory(GiB)": 85.12,
"step": 3095,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66190977,
"epoch": 2.43,
"learning_rate": 9.378074521467469e-05,
"loss": 1.1244626,
"memory(GiB)": 85.12,
"step": 3100,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.65778189,
"epoch": 2.43,
"learning_rate": 9.375464000940676e-05,
"loss": 1.16129856,
"memory(GiB)": 85.12,
"step": 3105,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66200366,
"epoch": 2.44,
"learning_rate": 9.37284837798852e-05,
"loss": 1.12543259,
"memory(GiB)": 85.12,
"step": 3110,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66628103,
"epoch": 2.44,
"learning_rate": 9.370227655661203e-05,
"loss": 1.11405783,
"memory(GiB)": 85.12,
"step": 3115,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66346598,
"epoch": 2.45,
"learning_rate": 9.367601837014864e-05,
"loss": 1.11039734,
"memory(GiB)": 85.12,
"step": 3120,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66284456,
"epoch": 2.45,
"learning_rate": 9.364970925111587e-05,
"loss": 1.14555759,
"memory(GiB)": 85.12,
"step": 3125,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65998135,
"epoch": 2.45,
"learning_rate": 9.362334923019397e-05,
"loss": 1.13087349,
"memory(GiB)": 85.12,
"step": 3130,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66171045,
"epoch": 2.46,
"learning_rate": 9.359693833812255e-05,
"loss": 1.13765631,
"memory(GiB)": 85.12,
"step": 3135,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.66532288,
"epoch": 2.46,
"learning_rate": 9.357047660570056e-05,
"loss": 1.11209698,
"memory(GiB)": 85.12,
"step": 3140,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.67781734,
"epoch": 2.46,
"learning_rate": 9.354396406378618e-05,
"loss": 1.05338039,
"memory(GiB)": 85.12,
"step": 3145,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.66491776,
"epoch": 2.47,
"learning_rate": 9.35174007432969e-05,
"loss": 1.1117816,
"memory(GiB)": 85.12,
"step": 3150,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.65880842,
"epoch": 2.47,
"learning_rate": 9.34907866752094e-05,
"loss": 1.11286173,
"memory(GiB)": 85.12,
"step": 3155,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.66765223,
"epoch": 2.48,
"learning_rate": 9.346412189055955e-05,
"loss": 1.10158033,
"memory(GiB)": 85.12,
"step": 3160,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.65851316,
"epoch": 2.48,
"learning_rate": 9.343740642044232e-05,
"loss": 1.1240366,
"memory(GiB)": 85.12,
"step": 3165,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.66850886,
"epoch": 2.48,
"learning_rate": 9.341064029601188e-05,
"loss": 1.09285021,
"memory(GiB)": 85.12,
"step": 3170,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67183051,
"epoch": 2.49,
"learning_rate": 9.338382354848135e-05,
"loss": 1.08079424,
"memory(GiB)": 85.12,
"step": 3175,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67254267,
"epoch": 2.49,
"learning_rate": 9.335695620912298e-05,
"loss": 1.10049,
"memory(GiB)": 85.12,
"step": 3180,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67040033,
"epoch": 2.5,
"learning_rate": 9.333003830926799e-05,
"loss": 1.09397783,
"memory(GiB)": 85.12,
"step": 3185,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67311201,
"epoch": 2.5,
"learning_rate": 9.330306988030651e-05,
"loss": 1.07896299,
"memory(GiB)": 85.12,
"step": 3190,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.67690806,
"epoch": 2.5,
"learning_rate": 9.327605095368769e-05,
"loss": 1.10143909,
"memory(GiB)": 85.12,
"step": 3195,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.66498342,
"epoch": 2.51,
"learning_rate": 9.324898156091948e-05,
"loss": 1.11493244,
"memory(GiB)": 85.12,
"step": 3200,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.67564311,
"epoch": 2.51,
"learning_rate": 9.322186173356873e-05,
"loss": 1.08120804,
"memory(GiB)": 85.12,
"step": 3205,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.66903639,
"epoch": 2.52,
"learning_rate": 9.31946915032611e-05,
"loss": 1.08918238,
"memory(GiB)": 85.12,
"step": 3210,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67794003,
"epoch": 2.52,
"learning_rate": 9.316747090168101e-05,
"loss": 1.07291193,
"memory(GiB)": 85.12,
"step": 3215,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67113781,
"epoch": 2.52,
"learning_rate": 9.314019996057161e-05,
"loss": 1.08330698,
"memory(GiB)": 85.12,
"step": 3220,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.66346536,
"epoch": 2.53,
"learning_rate": 9.31128787117348e-05,
"loss": 1.13062449,
"memory(GiB)": 85.12,
"step": 3225,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66941795,
"epoch": 2.53,
"learning_rate": 9.308550718703111e-05,
"loss": 1.11006641,
"memory(GiB)": 85.12,
"step": 3230,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.65443735,
"epoch": 2.54,
"learning_rate": 9.305808541837969e-05,
"loss": 1.12953062,
"memory(GiB)": 85.12,
"step": 3235,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.66207342,
"epoch": 2.54,
"learning_rate": 9.30306134377583e-05,
"loss": 1.10449133,
"memory(GiB)": 85.12,
"step": 3240,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67379289,
"epoch": 2.54,
"learning_rate": 9.300309127720326e-05,
"loss": 1.07863102,
"memory(GiB)": 85.12,
"step": 3245,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66185699,
"epoch": 2.55,
"learning_rate": 9.297551896880938e-05,
"loss": 1.10122509,
"memory(GiB)": 85.12,
"step": 3250,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.66234708,
"epoch": 2.55,
"learning_rate": 9.294789654473002e-05,
"loss": 1.14223385,
"memory(GiB)": 85.12,
"step": 3255,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.65402446,
"epoch": 2.55,
"learning_rate": 9.292022403717688e-05,
"loss": 1.13783741,
"memory(GiB)": 85.12,
"step": 3260,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.65600824,
"epoch": 2.56,
"learning_rate": 9.289250147842014e-05,
"loss": 1.15849085,
"memory(GiB)": 85.12,
"step": 3265,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66205072,
"epoch": 2.56,
"learning_rate": 9.286472890078832e-05,
"loss": 1.1514534,
"memory(GiB)": 85.12,
"step": 3270,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.6654716,
"epoch": 2.57,
"learning_rate": 9.283690633666826e-05,
"loss": 1.11500664,
"memory(GiB)": 85.12,
"step": 3275,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.65892515,
"epoch": 2.57,
"learning_rate": 9.280903381850511e-05,
"loss": 1.13781528,
"memory(GiB)": 85.12,
"step": 3280,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66229644,
"epoch": 2.57,
"learning_rate": 9.278111137880228e-05,
"loss": 1.11094999,
"memory(GiB)": 85.12,
"step": 3285,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67452283,
"epoch": 2.58,
"learning_rate": 9.275313905012135e-05,
"loss": 1.0776885,
"memory(GiB)": 85.12,
"step": 3290,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.66590748,
"epoch": 2.58,
"learning_rate": 9.272511686508215e-05,
"loss": 1.08773432,
"memory(GiB)": 85.12,
"step": 3295,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67609005,
"epoch": 2.59,
"learning_rate": 9.269704485636259e-05,
"loss": 1.06893425,
"memory(GiB)": 85.12,
"step": 3300,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.66203904,
"epoch": 2.59,
"learning_rate": 9.26689230566987e-05,
"loss": 1.13459358,
"memory(GiB)": 85.12,
"step": 3305,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.64897304,
"epoch": 2.59,
"learning_rate": 9.264075149888459e-05,
"loss": 1.15744686,
"memory(GiB)": 85.12,
"step": 3310,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66094275,
"epoch": 2.6,
"learning_rate": 9.261253021577236e-05,
"loss": 1.15568581,
"memory(GiB)": 85.12,
"step": 3315,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.66136718,
"epoch": 2.6,
"learning_rate": 9.258425924027212e-05,
"loss": 1.10611639,
"memory(GiB)": 85.12,
"step": 3320,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66478972,
"epoch": 2.61,
"learning_rate": 9.255593860535194e-05,
"loss": 1.10232067,
"memory(GiB)": 85.12,
"step": 3325,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.65793505,
"epoch": 2.61,
"learning_rate": 9.252756834403778e-05,
"loss": 1.14366322,
"memory(GiB)": 85.12,
"step": 3330,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.66237917,
"epoch": 2.61,
"learning_rate": 9.249914848941348e-05,
"loss": 1.12491646,
"memory(GiB)": 85.12,
"step": 3335,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.66611323,
"epoch": 2.62,
"learning_rate": 9.24706790746207e-05,
"loss": 1.09040012,
"memory(GiB)": 85.12,
"step": 3340,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66260924,
"epoch": 2.62,
"learning_rate": 9.244216013285894e-05,
"loss": 1.10959892,
"memory(GiB)": 85.12,
"step": 3345,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.65978012,
"epoch": 2.63,
"learning_rate": 9.241359169738537e-05,
"loss": 1.11563673,
"memory(GiB)": 85.12,
"step": 3350,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.66533618,
"epoch": 2.63,
"learning_rate": 9.238497380151495e-05,
"loss": 1.10536203,
"memory(GiB)": 85.12,
"step": 3355,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.65439553,
"epoch": 2.63,
"learning_rate": 9.235630647862031e-05,
"loss": 1.1373312,
"memory(GiB)": 85.12,
"step": 3360,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.66412206,
"epoch": 2.64,
"learning_rate": 9.232758976213167e-05,
"loss": 1.09817734,
"memory(GiB)": 85.12,
"step": 3365,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66531549,
"epoch": 2.64,
"learning_rate": 9.229882368553692e-05,
"loss": 1.10946426,
"memory(GiB)": 85.12,
"step": 3370,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.66333899,
"epoch": 2.64,
"learning_rate": 9.227000828238146e-05,
"loss": 1.0850071,
"memory(GiB)": 85.12,
"step": 3375,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.66884055,
"epoch": 2.65,
"learning_rate": 9.224114358626823e-05,
"loss": 1.09241247,
"memory(GiB)": 85.12,
"step": 3380,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.6862587,
"epoch": 2.65,
"learning_rate": 9.221222963085765e-05,
"loss": 1.06248035,
"memory(GiB)": 85.12,
"step": 3385,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.66075082,
"epoch": 2.66,
"learning_rate": 9.218326644986758e-05,
"loss": 1.1324152,
"memory(GiB)": 85.12,
"step": 3390,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67370081,
"epoch": 2.66,
"learning_rate": 9.215425407707329e-05,
"loss": 1.08881779,
"memory(GiB)": 85.12,
"step": 3395,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.65856786,
"epoch": 2.66,
"learning_rate": 9.212519254630742e-05,
"loss": 1.12874718,
"memory(GiB)": 85.12,
"step": 3400,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.65734329,
"epoch": 2.67,
"learning_rate": 9.20960818914599e-05,
"loss": 1.15164032,
"memory(GiB)": 85.12,
"step": 3405,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67054696,
"epoch": 2.67,
"learning_rate": 9.206692214647803e-05,
"loss": 1.10470772,
"memory(GiB)": 85.12,
"step": 3410,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67578359,
"epoch": 2.68,
"learning_rate": 9.203771334536626e-05,
"loss": 1.08748617,
"memory(GiB)": 85.12,
"step": 3415,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66613045,
"epoch": 2.68,
"learning_rate": 9.200845552218626e-05,
"loss": 1.12501793,
"memory(GiB)": 85.12,
"step": 3420,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66724539,
"epoch": 2.68,
"learning_rate": 9.197914871105696e-05,
"loss": 1.11535177,
"memory(GiB)": 85.12,
"step": 3425,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.66614714,
"epoch": 2.69,
"learning_rate": 9.194979294615432e-05,
"loss": 1.11667767,
"memory(GiB)": 85.12,
"step": 3430,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67240911,
"epoch": 2.69,
"learning_rate": 9.192038826171138e-05,
"loss": 1.10492306,
"memory(GiB)": 85.12,
"step": 3435,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67085233,
"epoch": 2.7,
"learning_rate": 9.189093469201833e-05,
"loss": 1.11214399,
"memory(GiB)": 85.12,
"step": 3440,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66316462,
"epoch": 2.7,
"learning_rate": 9.186143227142225e-05,
"loss": 1.11524057,
"memory(GiB)": 85.12,
"step": 3445,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.67732773,
"epoch": 2.7,
"learning_rate": 9.183188103432729e-05,
"loss": 1.06655407,
"memory(GiB)": 85.12,
"step": 3450,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.65240765,
"epoch": 2.71,
"learning_rate": 9.180228101519443e-05,
"loss": 1.15858974,
"memory(GiB)": 85.12,
"step": 3455,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66337948,
"epoch": 2.71,
"learning_rate": 9.17726322485416e-05,
"loss": 1.12186775,
"memory(GiB)": 85.12,
"step": 3460,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67122927,
"epoch": 2.72,
"learning_rate": 9.174293476894356e-05,
"loss": 1.08822432,
"memory(GiB)": 85.12,
"step": 3465,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.65668936,
"epoch": 2.72,
"learning_rate": 9.171318861103188e-05,
"loss": 1.14420862,
"memory(GiB)": 85.12,
"step": 3470,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.65638585,
"epoch": 2.72,
"learning_rate": 9.16833938094949e-05,
"loss": 1.13455896,
"memory(GiB)": 85.12,
"step": 3475,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.68326602,
"epoch": 2.73,
"learning_rate": 9.165355039907766e-05,
"loss": 1.05959072,
"memory(GiB)": 85.12,
"step": 3480,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.64972272,
"epoch": 2.73,
"learning_rate": 9.162365841458192e-05,
"loss": 1.13724003,
"memory(GiB)": 85.12,
"step": 3485,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.66329112,
"epoch": 2.74,
"learning_rate": 9.159371789086606e-05,
"loss": 1.10761976,
"memory(GiB)": 85.12,
"step": 3490,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.65560665,
"epoch": 2.74,
"learning_rate": 9.156372886284507e-05,
"loss": 1.14408922,
"memory(GiB)": 85.12,
"step": 3495,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.66591907,
"epoch": 2.74,
"learning_rate": 9.15336913654905e-05,
"loss": 1.1079567,
"memory(GiB)": 85.12,
"step": 3500,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.65924816,
"epoch": 2.75,
"learning_rate": 9.150360543383042e-05,
"loss": 1.12748995,
"memory(GiB)": 85.12,
"step": 3505,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.66098423,
"epoch": 2.75,
"learning_rate": 9.147347110294941e-05,
"loss": 1.13659554,
"memory(GiB)": 85.12,
"step": 3510,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.66335301,
"epoch": 2.75,
"learning_rate": 9.144328840798848e-05,
"loss": 1.09939146,
"memory(GiB)": 85.12,
"step": 3515,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.65486746,
"epoch": 2.76,
"learning_rate": 9.141305738414499e-05,
"loss": 1.14847898,
"memory(GiB)": 85.12,
"step": 3520,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.65917635,
"epoch": 2.76,
"learning_rate": 9.138277806667271e-05,
"loss": 1.14824829,
"memory(GiB)": 85.12,
"step": 3525,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67288303,
"epoch": 2.77,
"learning_rate": 9.135245049088173e-05,
"loss": 1.05631142,
"memory(GiB)": 85.12,
"step": 3530,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.67359824,
"epoch": 2.77,
"learning_rate": 9.132207469213836e-05,
"loss": 1.06311998,
"memory(GiB)": 85.12,
"step": 3535,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.6569242,
"epoch": 2.77,
"learning_rate": 9.129165070586523e-05,
"loss": 1.12648764,
"memory(GiB)": 85.12,
"step": 3540,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67112989,
"epoch": 2.78,
"learning_rate": 9.12611785675411e-05,
"loss": 1.10996456,
"memory(GiB)": 85.12,
"step": 3545,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.66401305,
"epoch": 2.78,
"learning_rate": 9.123065831270091e-05,
"loss": 1.11186533,
"memory(GiB)": 85.12,
"step": 3550,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.6686008,
"epoch": 2.79,
"learning_rate": 9.120008997693569e-05,
"loss": 1.08463211,
"memory(GiB)": 85.12,
"step": 3555,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.66068683,
"epoch": 2.79,
"learning_rate": 9.116947359589255e-05,
"loss": 1.12286921,
"memory(GiB)": 85.12,
"step": 3560,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.67136216,
"epoch": 2.79,
"learning_rate": 9.113880920527463e-05,
"loss": 1.10150156,
"memory(GiB)": 85.12,
"step": 3565,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.6669219,
"epoch": 2.8,
"learning_rate": 9.110809684084107e-05,
"loss": 1.11419725,
"memory(GiB)": 85.12,
"step": 3570,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.65082202,
"epoch": 2.8,
"learning_rate": 9.107733653840692e-05,
"loss": 1.17740231,
"memory(GiB)": 85.12,
"step": 3575,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.66411786,
"epoch": 2.81,
"learning_rate": 9.104652833384317e-05,
"loss": 1.10019693,
"memory(GiB)": 85.12,
"step": 3580,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.67525086,
"epoch": 2.81,
"learning_rate": 9.101567226307664e-05,
"loss": 1.06568289,
"memory(GiB)": 85.12,
"step": 3585,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.66854186,
"epoch": 2.81,
"learning_rate": 9.098476836208997e-05,
"loss": 1.10631943,
"memory(GiB)": 85.12,
"step": 3590,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.66509161,
"epoch": 2.82,
"learning_rate": 9.095381666692164e-05,
"loss": 1.10477247,
"memory(GiB)": 85.12,
"step": 3595,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.66377773,
"epoch": 2.82,
"learning_rate": 9.092281721366575e-05,
"loss": 1.09822521,
"memory(GiB)": 85.12,
"step": 3600,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.66034365,
"epoch": 2.83,
"learning_rate": 9.089177003847218e-05,
"loss": 1.15647116,
"memory(GiB)": 85.12,
"step": 3605,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67322073,
"epoch": 2.83,
"learning_rate": 9.086067517754646e-05,
"loss": 1.09445295,
"memory(GiB)": 85.12,
"step": 3610,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.6580318,
"epoch": 2.83,
"learning_rate": 9.082953266714968e-05,
"loss": 1.14048214,
"memory(GiB)": 85.12,
"step": 3615,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.66556892,
"epoch": 2.84,
"learning_rate": 9.079834254359854e-05,
"loss": 1.10402184,
"memory(GiB)": 85.12,
"step": 3620,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67437925,
"epoch": 2.84,
"learning_rate": 9.076710484326522e-05,
"loss": 1.09448032,
"memory(GiB)": 85.12,
"step": 3625,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.66442852,
"epoch": 2.84,
"learning_rate": 9.073581960257742e-05,
"loss": 1.14604816,
"memory(GiB)": 85.12,
"step": 3630,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.66115713,
"epoch": 2.85,
"learning_rate": 9.070448685801829e-05,
"loss": 1.07533693,
"memory(GiB)": 85.12,
"step": 3635,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67220783,
"epoch": 2.85,
"learning_rate": 9.067310664612631e-05,
"loss": 1.11974783,
"memory(GiB)": 85.12,
"step": 3640,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.67338705,
"epoch": 2.86,
"learning_rate": 9.06416790034954e-05,
"loss": 1.11144781,
"memory(GiB)": 85.12,
"step": 3645,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.66914973,
"epoch": 2.86,
"learning_rate": 9.06102039667747e-05,
"loss": 1.08288326,
"memory(GiB)": 85.12,
"step": 3650,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.66724291,
"epoch": 2.86,
"learning_rate": 9.057868157266873e-05,
"loss": 1.09231892,
"memory(GiB)": 85.12,
"step": 3655,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.66573162,
"epoch": 2.87,
"learning_rate": 9.054711185793712e-05,
"loss": 1.13242044,
"memory(GiB)": 85.12,
"step": 3660,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.66399899,
"epoch": 2.87,
"learning_rate": 9.051549485939472e-05,
"loss": 1.08897943,
"memory(GiB)": 85.12,
"step": 3665,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.65977526,
"epoch": 2.88,
"learning_rate": 9.048383061391159e-05,
"loss": 1.12547369,
"memory(GiB)": 85.12,
"step": 3670,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.65447931,
"epoch": 2.88,
"learning_rate": 9.045211915841279e-05,
"loss": 1.16677542,
"memory(GiB)": 85.12,
"step": 3675,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.67030063,
"epoch": 2.88,
"learning_rate": 9.04203605298785e-05,
"loss": 1.12552681,
"memory(GiB)": 85.12,
"step": 3680,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.6612977,
"epoch": 2.89,
"learning_rate": 9.038855476534385e-05,
"loss": 1.1319479,
"memory(GiB)": 85.12,
"step": 3685,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.6624711,
"epoch": 2.89,
"learning_rate": 9.035670190189902e-05,
"loss": 1.09792414,
"memory(GiB)": 85.12,
"step": 3690,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.67158294,
"epoch": 2.9,
"learning_rate": 9.032480197668902e-05,
"loss": 1.10716963,
"memory(GiB)": 85.12,
"step": 3695,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.66656246,
"epoch": 2.9,
"learning_rate": 9.029285502691382e-05,
"loss": 1.1186574,
"memory(GiB)": 85.12,
"step": 3700,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.66599183,
"epoch": 2.9,
"learning_rate": 9.026086108982819e-05,
"loss": 1.09721441,
"memory(GiB)": 85.12,
"step": 3705,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.67279172,
"epoch": 2.91,
"learning_rate": 9.022882020274167e-05,
"loss": 1.09024601,
"memory(GiB)": 85.12,
"step": 3710,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.65694847,
"epoch": 2.91,
"learning_rate": 9.019673240301862e-05,
"loss": 1.12552786,
"memory(GiB)": 85.12,
"step": 3715,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.67931943,
"epoch": 2.92,
"learning_rate": 9.016459772807804e-05,
"loss": 1.08509073,
"memory(GiB)": 85.12,
"step": 3720,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.67091622,
"epoch": 2.92,
"learning_rate": 9.013241621539364e-05,
"loss": 1.08755741,
"memory(GiB)": 85.12,
"step": 3725,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.66849947,
"epoch": 2.92,
"learning_rate": 9.01001879024937e-05,
"loss": 1.11163702,
"memory(GiB)": 85.12,
"step": 3730,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.66142335,
"epoch": 2.93,
"learning_rate": 9.006791282696113e-05,
"loss": 1.11982279,
"memory(GiB)": 85.12,
"step": 3735,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.66708808,
"epoch": 2.93,
"learning_rate": 9.003559102643335e-05,
"loss": 1.12939367,
"memory(GiB)": 85.12,
"step": 3740,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.65388312,
"epoch": 2.93,
"learning_rate": 9.000322253860225e-05,
"loss": 1.13458776,
"memory(GiB)": 85.12,
"step": 3745,
"train_speed(iter/s)": 0.035282
},
{
"acc": 0.66235805,
"epoch": 2.94,
"learning_rate": 8.997080740121417e-05,
"loss": 1.12270813,
"memory(GiB)": 85.12,
"step": 3750,
"train_speed(iter/s)": 0.035283
},
{
"acc": 0.67565556,
"epoch": 2.94,
"learning_rate": 8.993834565206989e-05,
"loss": 1.08064222,
"memory(GiB)": 85.12,
"step": 3755,
"train_speed(iter/s)": 0.035284
},
{
"acc": 0.67491579,
"epoch": 2.95,
"learning_rate": 8.99058373290245e-05,
"loss": 1.07194624,
"memory(GiB)": 85.12,
"step": 3760,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.66467142,
"epoch": 2.95,
"learning_rate": 8.987328246998742e-05,
"loss": 1.1253994,
"memory(GiB)": 85.12,
"step": 3765,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.65193849,
"epoch": 2.95,
"learning_rate": 8.984068111292232e-05,
"loss": 1.16464453,
"memory(GiB)": 85.12,
"step": 3770,
"train_speed(iter/s)": 0.035281
},
{
"acc": 0.66975975,
"epoch": 2.96,
"learning_rate": 8.980803329584712e-05,
"loss": 1.07693071,
"memory(GiB)": 85.12,
"step": 3775,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.65947042,
"epoch": 2.96,
"learning_rate": 8.977533905683393e-05,
"loss": 1.12480698,
"memory(GiB)": 85.12,
"step": 3780,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.65800185,
"epoch": 2.97,
"learning_rate": 8.974259843400894e-05,
"loss": 1.1366251,
"memory(GiB)": 85.12,
"step": 3785,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.67165961,
"epoch": 2.97,
"learning_rate": 8.970981146555247e-05,
"loss": 1.10998592,
"memory(GiB)": 85.12,
"step": 3790,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67054834,
"epoch": 2.97,
"learning_rate": 8.967697818969889e-05,
"loss": 1.08357964,
"memory(GiB)": 85.12,
"step": 3795,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.65625038,
"epoch": 2.98,
"learning_rate": 8.964409864473656e-05,
"loss": 1.13358316,
"memory(GiB)": 85.12,
"step": 3800,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.66279435,
"epoch": 2.98,
"learning_rate": 8.961117286900777e-05,
"loss": 1.10367622,
"memory(GiB)": 85.12,
"step": 3805,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.65385418,
"epoch": 2.99,
"learning_rate": 8.957820090090877e-05,
"loss": 1.14131985,
"memory(GiB)": 85.12,
"step": 3810,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.6783905,
"epoch": 2.99,
"learning_rate": 8.954518277888966e-05,
"loss": 1.06005888,
"memory(GiB)": 85.12,
"step": 3815,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.66349859,
"epoch": 2.99,
"learning_rate": 8.951211854145434e-05,
"loss": 1.11471567,
"memory(GiB)": 85.12,
"step": 3820,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.65545945,
"epoch": 3.0,
"learning_rate": 8.947900822716053e-05,
"loss": 1.18849239,
"memory(GiB)": 85.12,
"step": 3825,
"train_speed(iter/s)": 0.035274
},
{
"epoch": 3.0,
"eval_acc": 0.6859504132231405,
"eval_loss": 1.0273702144622803,
"eval_runtime": 85.0825,
"eval_samples_per_second": 1.093,
"eval_steps_per_second": 1.093,
"step": 3828
},
{
"acc": 0.66745429,
"epoch": 3.0,
"learning_rate": 8.944585187461963e-05,
"loss": 1.09518471,
"memory(GiB)": 85.12,
"step": 3830,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.6809526,
"epoch": 3.01,
"learning_rate": 8.941264952249674e-05,
"loss": 1.05991888,
"memory(GiB)": 85.12,
"step": 3835,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67283616,
"epoch": 3.01,
"learning_rate": 8.937940120951068e-05,
"loss": 1.10407562,
"memory(GiB)": 85.12,
"step": 3840,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66963844,
"epoch": 3.01,
"learning_rate": 8.934610697443375e-05,
"loss": 1.08318148,
"memory(GiB)": 85.12,
"step": 3845,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67286458,
"epoch": 3.02,
"learning_rate": 8.931276685609187e-05,
"loss": 1.09335747,
"memory(GiB)": 85.12,
"step": 3850,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68292265,
"epoch": 3.02,
"learning_rate": 8.927938089336444e-05,
"loss": 1.03646545,
"memory(GiB)": 85.12,
"step": 3855,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67601447,
"epoch": 3.03,
"learning_rate": 8.924594912518435e-05,
"loss": 1.03905144,
"memory(GiB)": 85.12,
"step": 3860,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68262358,
"epoch": 3.03,
"learning_rate": 8.921247159053787e-05,
"loss": 1.04506426,
"memory(GiB)": 85.12,
"step": 3865,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67279882,
"epoch": 3.03,
"learning_rate": 8.917894832846467e-05,
"loss": 1.07862301,
"memory(GiB)": 85.12,
"step": 3870,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65542626,
"epoch": 3.04,
"learning_rate": 8.914537937805776e-05,
"loss": 1.15798473,
"memory(GiB)": 85.12,
"step": 3875,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.68080411,
"epoch": 3.04,
"learning_rate": 8.911176477846334e-05,
"loss": 1.03082008,
"memory(GiB)": 85.12,
"step": 3880,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66806016,
"epoch": 3.04,
"learning_rate": 8.907810456888097e-05,
"loss": 1.07673035,
"memory(GiB)": 85.12,
"step": 3885,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67421083,
"epoch": 3.05,
"learning_rate": 8.90443987885633e-05,
"loss": 1.11088667,
"memory(GiB)": 85.12,
"step": 3890,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.65671282,
"epoch": 3.05,
"learning_rate": 8.901064747681616e-05,
"loss": 1.14146271,
"memory(GiB)": 85.12,
"step": 3895,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67231884,
"epoch": 3.06,
"learning_rate": 8.897685067299846e-05,
"loss": 1.08252373,
"memory(GiB)": 85.12,
"step": 3900,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.677108,
"epoch": 3.06,
"learning_rate": 8.89430084165222e-05,
"loss": 1.05940466,
"memory(GiB)": 85.12,
"step": 3905,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68133845,
"epoch": 3.06,
"learning_rate": 8.890912074685236e-05,
"loss": 1.02218704,
"memory(GiB)": 85.12,
"step": 3910,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67558608,
"epoch": 3.07,
"learning_rate": 8.887518770350685e-05,
"loss": 1.06314049,
"memory(GiB)": 85.12,
"step": 3915,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66001134,
"epoch": 3.07,
"learning_rate": 8.884120932605653e-05,
"loss": 1.12467155,
"memory(GiB)": 85.12,
"step": 3920,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67021894,
"epoch": 3.08,
"learning_rate": 8.880718565412511e-05,
"loss": 1.07008371,
"memory(GiB)": 85.12,
"step": 3925,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.6668273,
"epoch": 3.08,
"learning_rate": 8.877311672738913e-05,
"loss": 1.09233027,
"memory(GiB)": 85.12,
"step": 3930,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66963663,
"epoch": 3.08,
"learning_rate": 8.87390025855779e-05,
"loss": 1.07303791,
"memory(GiB)": 85.12,
"step": 3935,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67969995,
"epoch": 3.09,
"learning_rate": 8.870484326847345e-05,
"loss": 1.06793242,
"memory(GiB)": 85.12,
"step": 3940,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66840816,
"epoch": 3.09,
"learning_rate": 8.867063881591049e-05,
"loss": 1.08742456,
"memory(GiB)": 85.12,
"step": 3945,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66912541,
"epoch": 3.1,
"learning_rate": 8.863638926777639e-05,
"loss": 1.09435921,
"memory(GiB)": 85.12,
"step": 3950,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66795225,
"epoch": 3.1,
"learning_rate": 8.860209466401106e-05,
"loss": 1.1207943,
"memory(GiB)": 85.12,
"step": 3955,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66590056,
"epoch": 3.1,
"learning_rate": 8.856775504460702e-05,
"loss": 1.07821617,
"memory(GiB)": 85.12,
"step": 3960,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67165837,
"epoch": 3.11,
"learning_rate": 8.853337044960918e-05,
"loss": 1.08614855,
"memory(GiB)": 85.12,
"step": 3965,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66955404,
"epoch": 3.11,
"learning_rate": 8.849894091911503e-05,
"loss": 1.09462404,
"memory(GiB)": 85.12,
"step": 3970,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.68378253,
"epoch": 3.12,
"learning_rate": 8.846446649327437e-05,
"loss": 1.03468351,
"memory(GiB)": 85.12,
"step": 3975,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66864328,
"epoch": 3.12,
"learning_rate": 8.842994721228936e-05,
"loss": 1.07628345,
"memory(GiB)": 85.12,
"step": 3980,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65653524,
"epoch": 3.12,
"learning_rate": 8.839538311641451e-05,
"loss": 1.13923292,
"memory(GiB)": 85.12,
"step": 3985,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67333913,
"epoch": 3.13,
"learning_rate": 8.836077424595657e-05,
"loss": 1.07015495,
"memory(GiB)": 85.12,
"step": 3990,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67381926,
"epoch": 3.13,
"learning_rate": 8.832612064127448e-05,
"loss": 1.06587915,
"memory(GiB)": 85.12,
"step": 3995,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66329141,
"epoch": 3.13,
"learning_rate": 8.829142234277936e-05,
"loss": 1.10319233,
"memory(GiB)": 85.12,
"step": 4000,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.68063631,
"epoch": 3.14,
"learning_rate": 8.825667939093449e-05,
"loss": 1.07182493,
"memory(GiB)": 85.12,
"step": 4005,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66322513,
"epoch": 3.14,
"learning_rate": 8.822189182625516e-05,
"loss": 1.12689009,
"memory(GiB)": 85.12,
"step": 4010,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67340002,
"epoch": 3.15,
"learning_rate": 8.818705968930874e-05,
"loss": 1.07936611,
"memory(GiB)": 85.12,
"step": 4015,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66903429,
"epoch": 3.15,
"learning_rate": 8.815218302071452e-05,
"loss": 1.10216112,
"memory(GiB)": 85.12,
"step": 4020,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66168714,
"epoch": 3.15,
"learning_rate": 8.811726186114377e-05,
"loss": 1.117346,
"memory(GiB)": 85.12,
"step": 4025,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66286302,
"epoch": 3.16,
"learning_rate": 8.808229625131963e-05,
"loss": 1.10122309,
"memory(GiB)": 85.12,
"step": 4030,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67067194,
"epoch": 3.16,
"learning_rate": 8.804728623201706e-05,
"loss": 1.09093103,
"memory(GiB)": 85.12,
"step": 4035,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66602073,
"epoch": 3.17,
"learning_rate": 8.801223184406283e-05,
"loss": 1.0800581,
"memory(GiB)": 85.12,
"step": 4040,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67198768,
"epoch": 3.17,
"learning_rate": 8.797713312833541e-05,
"loss": 1.05843801,
"memory(GiB)": 85.12,
"step": 4045,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67939711,
"epoch": 3.17,
"learning_rate": 8.794199012576502e-05,
"loss": 1.04836597,
"memory(GiB)": 85.12,
"step": 4050,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65837584,
"epoch": 3.18,
"learning_rate": 8.790680287733349e-05,
"loss": 1.11400471,
"memory(GiB)": 85.12,
"step": 4055,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.68302851,
"epoch": 3.18,
"learning_rate": 8.787157142407422e-05,
"loss": 1.04046106,
"memory(GiB)": 85.12,
"step": 4060,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66324239,
"epoch": 3.19,
"learning_rate": 8.783629580707225e-05,
"loss": 1.13401537,
"memory(GiB)": 85.12,
"step": 4065,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66952362,
"epoch": 3.19,
"learning_rate": 8.780097606746404e-05,
"loss": 1.10449305,
"memory(GiB)": 85.12,
"step": 4070,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.6671958,
"epoch": 3.19,
"learning_rate": 8.77656122464375e-05,
"loss": 1.09806108,
"memory(GiB)": 85.12,
"step": 4075,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66547656,
"epoch": 3.2,
"learning_rate": 8.773020438523202e-05,
"loss": 1.10695295,
"memory(GiB)": 85.12,
"step": 4080,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66510615,
"epoch": 3.2,
"learning_rate": 8.769475252513826e-05,
"loss": 1.13047438,
"memory(GiB)": 85.12,
"step": 4085,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66734986,
"epoch": 3.21,
"learning_rate": 8.765925670749824e-05,
"loss": 1.09393444,
"memory(GiB)": 85.12,
"step": 4090,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67996044,
"epoch": 3.21,
"learning_rate": 8.762371697370523e-05,
"loss": 1.06481838,
"memory(GiB)": 85.12,
"step": 4095,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.6714335,
"epoch": 3.21,
"learning_rate": 8.75881333652037e-05,
"loss": 1.10486279,
"memory(GiB)": 85.12,
"step": 4100,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67522779,
"epoch": 3.22,
"learning_rate": 8.755250592348933e-05,
"loss": 1.07262745,
"memory(GiB)": 85.12,
"step": 4105,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67835989,
"epoch": 3.22,
"learning_rate": 8.751683469010887e-05,
"loss": 1.05402975,
"memory(GiB)": 85.12,
"step": 4110,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.6806859,
"epoch": 3.22,
"learning_rate": 8.748111970666011e-05,
"loss": 1.01472445,
"memory(GiB)": 85.12,
"step": 4115,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67549682,
"epoch": 3.23,
"learning_rate": 8.744536101479195e-05,
"loss": 1.10141077,
"memory(GiB)": 85.12,
"step": 4120,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67967439,
"epoch": 3.23,
"learning_rate": 8.74095586562042e-05,
"loss": 1.07214756,
"memory(GiB)": 85.12,
"step": 4125,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66929421,
"epoch": 3.24,
"learning_rate": 8.737371267264757e-05,
"loss": 1.09711647,
"memory(GiB)": 85.12,
"step": 4130,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66597781,
"epoch": 3.24,
"learning_rate": 8.733782310592369e-05,
"loss": 1.09240694,
"memory(GiB)": 85.12,
"step": 4135,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.68246903,
"epoch": 3.24,
"learning_rate": 8.7301889997885e-05,
"loss": 1.04920025,
"memory(GiB)": 85.12,
"step": 4140,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66437941,
"epoch": 3.25,
"learning_rate": 8.726591339043472e-05,
"loss": 1.12496767,
"memory(GiB)": 85.12,
"step": 4145,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66526365,
"epoch": 3.25,
"learning_rate": 8.722989332552681e-05,
"loss": 1.12980747,
"memory(GiB)": 85.12,
"step": 4150,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65992541,
"epoch": 3.26,
"learning_rate": 8.719382984516581e-05,
"loss": 1.1206811,
"memory(GiB)": 85.12,
"step": 4155,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66868792,
"epoch": 3.26,
"learning_rate": 8.715772299140705e-05,
"loss": 1.09060631,
"memory(GiB)": 85.12,
"step": 4160,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66484022,
"epoch": 3.26,
"learning_rate": 8.712157280635634e-05,
"loss": 1.09277716,
"memory(GiB)": 85.12,
"step": 4165,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67286205,
"epoch": 3.27,
"learning_rate": 8.708537933216999e-05,
"loss": 1.093993,
"memory(GiB)": 85.12,
"step": 4170,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66262193,
"epoch": 3.27,
"learning_rate": 8.704914261105488e-05,
"loss": 1.10075159,
"memory(GiB)": 85.12,
"step": 4175,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.6764492,
"epoch": 3.28,
"learning_rate": 8.701286268526827e-05,
"loss": 1.05436106,
"memory(GiB)": 85.12,
"step": 4180,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66421719,
"epoch": 3.28,
"learning_rate": 8.69765395971178e-05,
"loss": 1.12668571,
"memory(GiB)": 85.12,
"step": 4185,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66706834,
"epoch": 3.28,
"learning_rate": 8.694017338896149e-05,
"loss": 1.0926302,
"memory(GiB)": 85.12,
"step": 4190,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67663679,
"epoch": 3.29,
"learning_rate": 8.690376410320758e-05,
"loss": 1.04948025,
"memory(GiB)": 85.12,
"step": 4195,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66628132,
"epoch": 3.29,
"learning_rate": 8.686731178231459e-05,
"loss": 1.08100224,
"memory(GiB)": 85.12,
"step": 4200,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67406206,
"epoch": 3.3,
"learning_rate": 8.68308164687912e-05,
"loss": 1.07763453,
"memory(GiB)": 85.12,
"step": 4205,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66126747,
"epoch": 3.3,
"learning_rate": 8.679427820519625e-05,
"loss": 1.11055937,
"memory(GiB)": 85.12,
"step": 4210,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.65678339,
"epoch": 3.3,
"learning_rate": 8.675769703413863e-05,
"loss": 1.12440147,
"memory(GiB)": 85.12,
"step": 4215,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66874876,
"epoch": 3.31,
"learning_rate": 8.672107299827732e-05,
"loss": 1.10668583,
"memory(GiB)": 85.12,
"step": 4220,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.6592135,
"epoch": 3.31,
"learning_rate": 8.668440614032124e-05,
"loss": 1.11936607,
"memory(GiB)": 85.12,
"step": 4225,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67665238,
"epoch": 3.32,
"learning_rate": 8.664769650302926e-05,
"loss": 1.07830534,
"memory(GiB)": 85.12,
"step": 4230,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66774406,
"epoch": 3.32,
"learning_rate": 8.661094412921014e-05,
"loss": 1.10304251,
"memory(GiB)": 85.12,
"step": 4235,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68347654,
"epoch": 3.32,
"learning_rate": 8.657414906172247e-05,
"loss": 1.0130826,
"memory(GiB)": 85.12,
"step": 4240,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65933442,
"epoch": 3.33,
"learning_rate": 8.653731134347464e-05,
"loss": 1.09179993,
"memory(GiB)": 85.12,
"step": 4245,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66771193,
"epoch": 3.33,
"learning_rate": 8.650043101742478e-05,
"loss": 1.09305897,
"memory(GiB)": 85.12,
"step": 4250,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.65708385,
"epoch": 3.33,
"learning_rate": 8.646350812658069e-05,
"loss": 1.12786036,
"memory(GiB)": 85.12,
"step": 4255,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67746892,
"epoch": 3.34,
"learning_rate": 8.642654271399979e-05,
"loss": 1.06408577,
"memory(GiB)": 85.12,
"step": 4260,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66043816,
"epoch": 3.34,
"learning_rate": 8.638953482278915e-05,
"loss": 1.12865877,
"memory(GiB)": 85.12,
"step": 4265,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67736588,
"epoch": 3.35,
"learning_rate": 8.63524844961053e-05,
"loss": 1.05247889,
"memory(GiB)": 85.12,
"step": 4270,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67974887,
"epoch": 3.35,
"learning_rate": 8.631539177715433e-05,
"loss": 1.06267481,
"memory(GiB)": 85.12,
"step": 4275,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67399712,
"epoch": 3.35,
"learning_rate": 8.62782567091917e-05,
"loss": 1.07866135,
"memory(GiB)": 85.12,
"step": 4280,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68216677,
"epoch": 3.36,
"learning_rate": 8.624107933552229e-05,
"loss": 1.04396935,
"memory(GiB)": 85.12,
"step": 4285,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.65968661,
"epoch": 3.36,
"learning_rate": 8.620385969950031e-05,
"loss": 1.09188328,
"memory(GiB)": 85.12,
"step": 4290,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67161303,
"epoch": 3.37,
"learning_rate": 8.616659784452925e-05,
"loss": 1.08730984,
"memory(GiB)": 85.12,
"step": 4295,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67344618,
"epoch": 3.37,
"learning_rate": 8.612929381406183e-05,
"loss": 1.06549606,
"memory(GiB)": 85.12,
"step": 4300,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67476172,
"epoch": 3.37,
"learning_rate": 8.609194765159996e-05,
"loss": 1.08071413,
"memory(GiB)": 85.12,
"step": 4305,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67202482,
"epoch": 3.38,
"learning_rate": 8.605455940069468e-05,
"loss": 1.09838877,
"memory(GiB)": 85.12,
"step": 4310,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67025285,
"epoch": 3.38,
"learning_rate": 8.601712910494612e-05,
"loss": 1.07920656,
"memory(GiB)": 85.12,
"step": 4315,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67839556,
"epoch": 3.39,
"learning_rate": 8.597965680800342e-05,
"loss": 1.06685658,
"memory(GiB)": 85.12,
"step": 4320,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67582765,
"epoch": 3.39,
"learning_rate": 8.594214255356472e-05,
"loss": 1.07480011,
"memory(GiB)": 85.12,
"step": 4325,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.6812016,
"epoch": 3.39,
"learning_rate": 8.590458638537706e-05,
"loss": 1.0496768,
"memory(GiB)": 85.12,
"step": 4330,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68081627,
"epoch": 3.4,
"learning_rate": 8.58669883472364e-05,
"loss": 1.03980932,
"memory(GiB)": 85.12,
"step": 4335,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66322517,
"epoch": 3.4,
"learning_rate": 8.58293484829875e-05,
"loss": 1.10416336,
"memory(GiB)": 85.12,
"step": 4340,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67386465,
"epoch": 3.41,
"learning_rate": 8.579166683652388e-05,
"loss": 1.06861038,
"memory(GiB)": 85.12,
"step": 4345,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67121987,
"epoch": 3.41,
"learning_rate": 8.575394345178782e-05,
"loss": 1.09512386,
"memory(GiB)": 85.12,
"step": 4350,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66761322,
"epoch": 3.41,
"learning_rate": 8.571617837277027e-05,
"loss": 1.085077,
"memory(GiB)": 85.12,
"step": 4355,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.68047543,
"epoch": 3.42,
"learning_rate": 8.567837164351075e-05,
"loss": 1.06838808,
"memory(GiB)": 85.12,
"step": 4360,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.68031921,
"epoch": 3.42,
"learning_rate": 8.56405233080974e-05,
"loss": 1.06294222,
"memory(GiB)": 85.12,
"step": 4365,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66957574,
"epoch": 3.42,
"learning_rate": 8.560263341066689e-05,
"loss": 1.09513245,
"memory(GiB)": 85.12,
"step": 4370,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66336145,
"epoch": 3.43,
"learning_rate": 8.55647019954043e-05,
"loss": 1.09110317,
"memory(GiB)": 85.12,
"step": 4375,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67718058,
"epoch": 3.43,
"learning_rate": 8.552672910654317e-05,
"loss": 1.07627859,
"memory(GiB)": 85.12,
"step": 4380,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66453261,
"epoch": 3.44,
"learning_rate": 8.548871478836542e-05,
"loss": 1.11319456,
"memory(GiB)": 85.12,
"step": 4385,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.68078775,
"epoch": 3.44,
"learning_rate": 8.545065908520123e-05,
"loss": 1.04445333,
"memory(GiB)": 85.12,
"step": 4390,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67710371,
"epoch": 3.44,
"learning_rate": 8.541256204142905e-05,
"loss": 1.04607086,
"memory(GiB)": 85.12,
"step": 4395,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67265029,
"epoch": 3.45,
"learning_rate": 8.53744237014756e-05,
"loss": 1.07834921,
"memory(GiB)": 85.12,
"step": 4400,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67727437,
"epoch": 3.45,
"learning_rate": 8.533624410981567e-05,
"loss": 1.06278925,
"memory(GiB)": 85.12,
"step": 4405,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66954894,
"epoch": 3.46,
"learning_rate": 8.529802331097223e-05,
"loss": 1.08836241,
"memory(GiB)": 85.12,
"step": 4410,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67296772,
"epoch": 3.46,
"learning_rate": 8.525976134951626e-05,
"loss": 1.06974878,
"memory(GiB)": 85.12,
"step": 4415,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66709204,
"epoch": 3.46,
"learning_rate": 8.522145827006675e-05,
"loss": 1.06892281,
"memory(GiB)": 85.12,
"step": 4420,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.68249197,
"epoch": 3.47,
"learning_rate": 8.518311411729068e-05,
"loss": 1.05143995,
"memory(GiB)": 85.12,
"step": 4425,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.65883646,
"epoch": 3.47,
"learning_rate": 8.514472893590285e-05,
"loss": 1.10914154,
"memory(GiB)": 85.12,
"step": 4430,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67034783,
"epoch": 3.48,
"learning_rate": 8.510630277066594e-05,
"loss": 1.06353884,
"memory(GiB)": 85.12,
"step": 4435,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67994227,
"epoch": 3.48,
"learning_rate": 8.506783566639045e-05,
"loss": 1.07216578,
"memory(GiB)": 85.12,
"step": 4440,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67223606,
"epoch": 3.48,
"learning_rate": 8.502932766793462e-05,
"loss": 1.06527033,
"memory(GiB)": 85.12,
"step": 4445,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67538528,
"epoch": 3.49,
"learning_rate": 8.49907788202043e-05,
"loss": 1.04301472,
"memory(GiB)": 85.12,
"step": 4450,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.68265018,
"epoch": 3.49,
"learning_rate": 8.49521891681531e-05,
"loss": 1.05498791,
"memory(GiB)": 85.12,
"step": 4455,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67961888,
"epoch": 3.5,
"learning_rate": 8.491355875678211e-05,
"loss": 1.06846848,
"memory(GiB)": 85.12,
"step": 4460,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66131968,
"epoch": 3.5,
"learning_rate": 8.487488763114e-05,
"loss": 1.09861307,
"memory(GiB)": 85.12,
"step": 4465,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66888275,
"epoch": 3.5,
"learning_rate": 8.483617583632292e-05,
"loss": 1.08226824,
"memory(GiB)": 85.12,
"step": 4470,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67894645,
"epoch": 3.51,
"learning_rate": 8.47974234174744e-05,
"loss": 1.07799463,
"memory(GiB)": 85.12,
"step": 4475,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66169238,
"epoch": 3.51,
"learning_rate": 8.475863041978543e-05,
"loss": 1.10147877,
"memory(GiB)": 85.12,
"step": 4480,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66542506,
"epoch": 3.51,
"learning_rate": 8.471979688849424e-05,
"loss": 1.09339066,
"memory(GiB)": 85.12,
"step": 4485,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66197405,
"epoch": 3.52,
"learning_rate": 8.468092286888634e-05,
"loss": 1.11503725,
"memory(GiB)": 85.12,
"step": 4490,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66685591,
"epoch": 3.52,
"learning_rate": 8.464200840629451e-05,
"loss": 1.07581072,
"memory(GiB)": 85.12,
"step": 4495,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67047796,
"epoch": 3.53,
"learning_rate": 8.460305354609863e-05,
"loss": 1.07585983,
"memory(GiB)": 85.12,
"step": 4500,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66793823,
"epoch": 3.53,
"learning_rate": 8.456405833372572e-05,
"loss": 1.10174761,
"memory(GiB)": 85.12,
"step": 4505,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67698441,
"epoch": 3.53,
"learning_rate": 8.452502281464986e-05,
"loss": 1.0740612,
"memory(GiB)": 85.12,
"step": 4510,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.68095536,
"epoch": 3.54,
"learning_rate": 8.448594703439213e-05,
"loss": 1.05643806,
"memory(GiB)": 85.12,
"step": 4515,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67278633,
"epoch": 3.54,
"learning_rate": 8.444683103852051e-05,
"loss": 1.06161213,
"memory(GiB)": 85.12,
"step": 4520,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.65890441,
"epoch": 3.55,
"learning_rate": 8.440767487264997e-05,
"loss": 1.12471752,
"memory(GiB)": 85.12,
"step": 4525,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65354443,
"epoch": 3.55,
"learning_rate": 8.436847858244226e-05,
"loss": 1.13845463,
"memory(GiB)": 85.12,
"step": 4530,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.67117491,
"epoch": 3.55,
"learning_rate": 8.432924221360594e-05,
"loss": 1.06492109,
"memory(GiB)": 85.12,
"step": 4535,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67677555,
"epoch": 3.56,
"learning_rate": 8.42899658118963e-05,
"loss": 1.06711979,
"memory(GiB)": 85.12,
"step": 4540,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67237253,
"epoch": 3.56,
"learning_rate": 8.425064942311532e-05,
"loss": 1.064182,
"memory(GiB)": 85.12,
"step": 4545,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.67695851,
"epoch": 3.57,
"learning_rate": 8.42112930931116e-05,
"loss": 1.04225302,
"memory(GiB)": 85.12,
"step": 4550,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67655463,
"epoch": 3.57,
"learning_rate": 8.417189686778034e-05,
"loss": 1.09111786,
"memory(GiB)": 85.12,
"step": 4555,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67512894,
"epoch": 3.57,
"learning_rate": 8.413246079306325e-05,
"loss": 1.06245384,
"memory(GiB)": 85.12,
"step": 4560,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.6641572,
"epoch": 3.58,
"learning_rate": 8.40929849149485e-05,
"loss": 1.089466,
"memory(GiB)": 85.12,
"step": 4565,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67081413,
"epoch": 3.58,
"learning_rate": 8.405346927947071e-05,
"loss": 1.06199369,
"memory(GiB)": 85.12,
"step": 4570,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67029018,
"epoch": 3.59,
"learning_rate": 8.401391393271081e-05,
"loss": 1.10045147,
"memory(GiB)": 85.12,
"step": 4575,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.66921582,
"epoch": 3.59,
"learning_rate": 8.397431892079608e-05,
"loss": 1.09112959,
"memory(GiB)": 85.12,
"step": 4580,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.66224871,
"epoch": 3.59,
"learning_rate": 8.393468428990005e-05,
"loss": 1.09612188,
"memory(GiB)": 85.12,
"step": 4585,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68403311,
"epoch": 3.6,
"learning_rate": 8.389501008624244e-05,
"loss": 1.06564083,
"memory(GiB)": 85.12,
"step": 4590,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67375054,
"epoch": 3.6,
"learning_rate": 8.385529635608913e-05,
"loss": 1.08890104,
"memory(GiB)": 85.12,
"step": 4595,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.66678824,
"epoch": 3.61,
"learning_rate": 8.381554314575206e-05,
"loss": 1.07385893,
"memory(GiB)": 85.12,
"step": 4600,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.67612591,
"epoch": 3.61,
"learning_rate": 8.377575050158929e-05,
"loss": 1.07160501,
"memory(GiB)": 85.12,
"step": 4605,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.66233315,
"epoch": 3.61,
"learning_rate": 8.373591847000476e-05,
"loss": 1.12124157,
"memory(GiB)": 85.12,
"step": 4610,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.66368275,
"epoch": 3.62,
"learning_rate": 8.369604709744842e-05,
"loss": 1.12185488,
"memory(GiB)": 85.12,
"step": 4615,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67620149,
"epoch": 3.62,
"learning_rate": 8.365613643041606e-05,
"loss": 1.0558217,
"memory(GiB)": 85.12,
"step": 4620,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67760687,
"epoch": 3.62,
"learning_rate": 8.361618651544932e-05,
"loss": 1.06278458,
"memory(GiB)": 85.12,
"step": 4625,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67772574,
"epoch": 3.63,
"learning_rate": 8.357619739913557e-05,
"loss": 1.05741844,
"memory(GiB)": 85.12,
"step": 4630,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68779845,
"epoch": 3.63,
"learning_rate": 8.353616912810793e-05,
"loss": 1.03406744,
"memory(GiB)": 85.12,
"step": 4635,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.67806859,
"epoch": 3.64,
"learning_rate": 8.349610174904517e-05,
"loss": 1.07612123,
"memory(GiB)": 85.12,
"step": 4640,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65736609,
"epoch": 3.64,
"learning_rate": 8.345599530867166e-05,
"loss": 1.11663847,
"memory(GiB)": 85.12,
"step": 4645,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.66991482,
"epoch": 3.64,
"learning_rate": 8.341584985375733e-05,
"loss": 1.08915453,
"memory(GiB)": 85.12,
"step": 4650,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.6729054,
"epoch": 3.65,
"learning_rate": 8.337566543111756e-05,
"loss": 1.06626625,
"memory(GiB)": 85.12,
"step": 4655,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.6768261,
"epoch": 3.65,
"learning_rate": 8.333544208761326e-05,
"loss": 1.04373264,
"memory(GiB)": 85.12,
"step": 4660,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66950965,
"epoch": 3.66,
"learning_rate": 8.329517987015067e-05,
"loss": 1.11504803,
"memory(GiB)": 85.12,
"step": 4665,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67875061,
"epoch": 3.66,
"learning_rate": 8.325487882568138e-05,
"loss": 1.05688515,
"memory(GiB)": 85.12,
"step": 4670,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66837807,
"epoch": 3.66,
"learning_rate": 8.321453900120223e-05,
"loss": 1.07563877,
"memory(GiB)": 85.12,
"step": 4675,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.68004751,
"epoch": 3.67,
"learning_rate": 8.31741604437553e-05,
"loss": 1.06593189,
"memory(GiB)": 85.12,
"step": 4680,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.68714409,
"epoch": 3.67,
"learning_rate": 8.313374320042785e-05,
"loss": 1.03218222,
"memory(GiB)": 85.12,
"step": 4685,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67838907,
"epoch": 3.68,
"learning_rate": 8.309328731835228e-05,
"loss": 1.05175476,
"memory(GiB)": 85.12,
"step": 4690,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66823692,
"epoch": 3.68,
"learning_rate": 8.305279284470595e-05,
"loss": 1.08211584,
"memory(GiB)": 85.12,
"step": 4695,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.65161371,
"epoch": 3.68,
"learning_rate": 8.301225982671133e-05,
"loss": 1.13491449,
"memory(GiB)": 85.12,
"step": 4700,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67203436,
"epoch": 3.69,
"learning_rate": 8.29716883116358e-05,
"loss": 1.08132124,
"memory(GiB)": 85.12,
"step": 4705,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66406636,
"epoch": 3.69,
"learning_rate": 8.293107834679159e-05,
"loss": 1.11655197,
"memory(GiB)": 85.12,
"step": 4710,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67401237,
"epoch": 3.7,
"learning_rate": 8.289042997953585e-05,
"loss": 1.09698009,
"memory(GiB)": 85.12,
"step": 4715,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66169372,
"epoch": 3.7,
"learning_rate": 8.284974325727043e-05,
"loss": 1.13821402,
"memory(GiB)": 85.12,
"step": 4720,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66252351,
"epoch": 3.7,
"learning_rate": 8.280901822744198e-05,
"loss": 1.12066956,
"memory(GiB)": 85.12,
"step": 4725,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66670227,
"epoch": 3.71,
"learning_rate": 8.276825493754176e-05,
"loss": 1.07495327,
"memory(GiB)": 85.12,
"step": 4730,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.67759528,
"epoch": 3.71,
"learning_rate": 8.27274534351057e-05,
"loss": 1.06112547,
"memory(GiB)": 85.12,
"step": 4735,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66779099,
"epoch": 3.71,
"learning_rate": 8.268661376771425e-05,
"loss": 1.07864561,
"memory(GiB)": 85.12,
"step": 4740,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.69414735,
"epoch": 3.72,
"learning_rate": 8.264573598299238e-05,
"loss": 1.01207485,
"memory(GiB)": 85.12,
"step": 4745,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67531772,
"epoch": 3.72,
"learning_rate": 8.260482012860957e-05,
"loss": 1.05929461,
"memory(GiB)": 85.12,
"step": 4750,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67257948,
"epoch": 3.73,
"learning_rate": 8.256386625227955e-05,
"loss": 1.09518213,
"memory(GiB)": 85.12,
"step": 4755,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67445426,
"epoch": 3.73,
"learning_rate": 8.252287440176053e-05,
"loss": 1.09971008,
"memory(GiB)": 85.12,
"step": 4760,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66840658,
"epoch": 3.73,
"learning_rate": 8.248184462485493e-05,
"loss": 1.10356541,
"memory(GiB)": 85.12,
"step": 4765,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66937456,
"epoch": 3.74,
"learning_rate": 8.244077696940944e-05,
"loss": 1.09562979,
"memory(GiB)": 85.12,
"step": 4770,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.66603575,
"epoch": 3.74,
"learning_rate": 8.239967148331488e-05,
"loss": 1.08866987,
"memory(GiB)": 85.12,
"step": 4775,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66549649,
"epoch": 3.75,
"learning_rate": 8.235852821450622e-05,
"loss": 1.11980772,
"memory(GiB)": 85.12,
"step": 4780,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66984949,
"epoch": 3.75,
"learning_rate": 8.231734721096246e-05,
"loss": 1.10109053,
"memory(GiB)": 85.12,
"step": 4785,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65589933,
"epoch": 3.75,
"learning_rate": 8.227612852070665e-05,
"loss": 1.13999405,
"memory(GiB)": 85.12,
"step": 4790,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.65890169,
"epoch": 3.76,
"learning_rate": 8.223487219180573e-05,
"loss": 1.12389488,
"memory(GiB)": 85.12,
"step": 4795,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.68056979,
"epoch": 3.76,
"learning_rate": 8.219357827237056e-05,
"loss": 1.06554289,
"memory(GiB)": 85.12,
"step": 4800,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67042527,
"epoch": 3.77,
"learning_rate": 8.215224681055585e-05,
"loss": 1.11027889,
"memory(GiB)": 85.12,
"step": 4805,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67441998,
"epoch": 3.77,
"learning_rate": 8.211087785456005e-05,
"loss": 1.06829157,
"memory(GiB)": 85.12,
"step": 4810,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67253809,
"epoch": 3.77,
"learning_rate": 8.206947145262541e-05,
"loss": 1.08531771,
"memory(GiB)": 85.12,
"step": 4815,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66685085,
"epoch": 3.78,
"learning_rate": 8.202802765303776e-05,
"loss": 1.10216599,
"memory(GiB)": 85.12,
"step": 4820,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66690621,
"epoch": 3.78,
"learning_rate": 8.198654650412659e-05,
"loss": 1.10417929,
"memory(GiB)": 85.12,
"step": 4825,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67201991,
"epoch": 3.79,
"learning_rate": 8.194502805426494e-05,
"loss": 1.0637537,
"memory(GiB)": 85.12,
"step": 4830,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66858087,
"epoch": 3.79,
"learning_rate": 8.190347235186932e-05,
"loss": 1.07713585,
"memory(GiB)": 85.12,
"step": 4835,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.69056597,
"epoch": 3.79,
"learning_rate": 8.186187944539973e-05,
"loss": 1.01855545,
"memory(GiB)": 85.12,
"step": 4840,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.67178264,
"epoch": 3.8,
"learning_rate": 8.18202493833595e-05,
"loss": 1.08217745,
"memory(GiB)": 85.12,
"step": 4845,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67628303,
"epoch": 3.8,
"learning_rate": 8.177858221429536e-05,
"loss": 1.05854607,
"memory(GiB)": 85.12,
"step": 4850,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.67847629,
"epoch": 3.8,
"learning_rate": 8.173687798679723e-05,
"loss": 1.06340704,
"memory(GiB)": 85.12,
"step": 4855,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65820971,
"epoch": 3.81,
"learning_rate": 8.169513674949829e-05,
"loss": 1.13936968,
"memory(GiB)": 85.12,
"step": 4860,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.68040419,
"epoch": 3.81,
"learning_rate": 8.16533585510749e-05,
"loss": 1.04977999,
"memory(GiB)": 85.12,
"step": 4865,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66686277,
"epoch": 3.82,
"learning_rate": 8.161154344024646e-05,
"loss": 1.10527754,
"memory(GiB)": 85.12,
"step": 4870,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.6596796,
"epoch": 3.82,
"learning_rate": 8.156969146577548e-05,
"loss": 1.11985264,
"memory(GiB)": 85.12,
"step": 4875,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66763144,
"epoch": 3.82,
"learning_rate": 8.152780267646743e-05,
"loss": 1.08364487,
"memory(GiB)": 85.12,
"step": 4880,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67398977,
"epoch": 3.83,
"learning_rate": 8.148587712117068e-05,
"loss": 1.07214155,
"memory(GiB)": 85.12,
"step": 4885,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.68034143,
"epoch": 3.83,
"learning_rate": 8.144391484877655e-05,
"loss": 1.0558445,
"memory(GiB)": 85.12,
"step": 4890,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67083588,
"epoch": 3.84,
"learning_rate": 8.14019159082191e-05,
"loss": 1.0750145,
"memory(GiB)": 85.12,
"step": 4895,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66514192,
"epoch": 3.84,
"learning_rate": 8.135988034847521e-05,
"loss": 1.10588713,
"memory(GiB)": 85.12,
"step": 4900,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.67397079,
"epoch": 3.84,
"learning_rate": 8.13178082185644e-05,
"loss": 1.05777845,
"memory(GiB)": 85.12,
"step": 4905,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66549215,
"epoch": 3.85,
"learning_rate": 8.12756995675489e-05,
"loss": 1.09327412,
"memory(GiB)": 85.12,
"step": 4910,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.6661788,
"epoch": 3.85,
"learning_rate": 8.12335544445335e-05,
"loss": 1.09228296,
"memory(GiB)": 85.12,
"step": 4915,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.68010106,
"epoch": 3.86,
"learning_rate": 8.119137289866551e-05,
"loss": 1.06913862,
"memory(GiB)": 85.12,
"step": 4920,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.66819158,
"epoch": 3.86,
"learning_rate": 8.114915497913473e-05,
"loss": 1.09536047,
"memory(GiB)": 85.12,
"step": 4925,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.67470613,
"epoch": 3.86,
"learning_rate": 8.11069007351734e-05,
"loss": 1.06130228,
"memory(GiB)": 85.12,
"step": 4930,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.67194877,
"epoch": 3.87,
"learning_rate": 8.106461021605607e-05,
"loss": 1.06740732,
"memory(GiB)": 85.12,
"step": 4935,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67518115,
"epoch": 3.87,
"learning_rate": 8.102228347109962e-05,
"loss": 1.09367285,
"memory(GiB)": 85.12,
"step": 4940,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68004875,
"epoch": 3.88,
"learning_rate": 8.097992054966317e-05,
"loss": 1.05609674,
"memory(GiB)": 85.12,
"step": 4945,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.65617743,
"epoch": 3.88,
"learning_rate": 8.093752150114804e-05,
"loss": 1.11393881,
"memory(GiB)": 85.12,
"step": 4950,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.66117711,
"epoch": 3.88,
"learning_rate": 8.089508637499765e-05,
"loss": 1.12456036,
"memory(GiB)": 85.12,
"step": 4955,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.66858802,
"epoch": 3.89,
"learning_rate": 8.085261522069752e-05,
"loss": 1.1040514,
"memory(GiB)": 85.12,
"step": 4960,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68399754,
"epoch": 3.89,
"learning_rate": 8.081010808777517e-05,
"loss": 1.01230736,
"memory(GiB)": 85.12,
"step": 4965,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67364149,
"epoch": 3.89,
"learning_rate": 8.07675650258001e-05,
"loss": 1.05729342,
"memory(GiB)": 85.12,
"step": 4970,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.66905775,
"epoch": 3.9,
"learning_rate": 8.072498608438363e-05,
"loss": 1.10162392,
"memory(GiB)": 85.12,
"step": 4975,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67838883,
"epoch": 3.9,
"learning_rate": 8.068237131317904e-05,
"loss": 1.03004761,
"memory(GiB)": 85.12,
"step": 4980,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.6654243,
"epoch": 3.91,
"learning_rate": 8.063972076188132e-05,
"loss": 1.09254379,
"memory(GiB)": 85.12,
"step": 4985,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.6707293,
"epoch": 3.91,
"learning_rate": 8.059703448022715e-05,
"loss": 1.06519508,
"memory(GiB)": 85.12,
"step": 4990,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67212982,
"epoch": 3.91,
"learning_rate": 8.055431251799499e-05,
"loss": 1.09433947,
"memory(GiB)": 85.12,
"step": 4995,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.65150967,
"epoch": 3.92,
"learning_rate": 8.051155492500478e-05,
"loss": 1.13897228,
"memory(GiB)": 85.12,
"step": 5000,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67477775,
"epoch": 3.92,
"learning_rate": 8.046876175111813e-05,
"loss": 1.07944336,
"memory(GiB)": 85.12,
"step": 5005,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.68616705,
"epoch": 3.93,
"learning_rate": 8.042593304623803e-05,
"loss": 1.05852995,
"memory(GiB)": 85.12,
"step": 5010,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.68202815,
"epoch": 3.93,
"learning_rate": 8.0383068860309e-05,
"loss": 1.05745516,
"memory(GiB)": 85.12,
"step": 5015,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.65037746,
"epoch": 3.93,
"learning_rate": 8.034016924331686e-05,
"loss": 1.1531786,
"memory(GiB)": 85.12,
"step": 5020,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67385054,
"epoch": 3.94,
"learning_rate": 8.029723424528884e-05,
"loss": 1.07016153,
"memory(GiB)": 85.12,
"step": 5025,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67603893,
"epoch": 3.94,
"learning_rate": 8.025426391629329e-05,
"loss": 1.06786518,
"memory(GiB)": 85.12,
"step": 5030,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.66948352,
"epoch": 3.95,
"learning_rate": 8.021125830643991e-05,
"loss": 1.09177036,
"memory(GiB)": 85.12,
"step": 5035,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67932153,
"epoch": 3.95,
"learning_rate": 8.016821746587947e-05,
"loss": 1.08133307,
"memory(GiB)": 85.12,
"step": 5040,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66458163,
"epoch": 3.95,
"learning_rate": 8.01251414448038e-05,
"loss": 1.0914546,
"memory(GiB)": 85.12,
"step": 5045,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.65853357,
"epoch": 3.96,
"learning_rate": 8.00820302934458e-05,
"loss": 1.12704811,
"memory(GiB)": 85.12,
"step": 5050,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66499863,
"epoch": 3.96,
"learning_rate": 8.003888406207932e-05,
"loss": 1.1266118,
"memory(GiB)": 85.12,
"step": 5055,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66503258,
"epoch": 3.97,
"learning_rate": 7.999570280101912e-05,
"loss": 1.10257483,
"memory(GiB)": 85.12,
"step": 5060,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67188749,
"epoch": 3.97,
"learning_rate": 7.995248656062081e-05,
"loss": 1.0711792,
"memory(GiB)": 85.12,
"step": 5065,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.66948404,
"epoch": 3.97,
"learning_rate": 7.99092353912808e-05,
"loss": 1.09926367,
"memory(GiB)": 85.12,
"step": 5070,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67658277,
"epoch": 3.98,
"learning_rate": 7.986594934343621e-05,
"loss": 1.06956873,
"memory(GiB)": 85.12,
"step": 5075,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67322578,
"epoch": 3.98,
"learning_rate": 7.982262846756488e-05,
"loss": 1.07685366,
"memory(GiB)": 85.12,
"step": 5080,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.66474819,
"epoch": 3.99,
"learning_rate": 7.977927281418518e-05,
"loss": 1.12432756,
"memory(GiB)": 85.12,
"step": 5085,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67171674,
"epoch": 3.99,
"learning_rate": 7.973588243385612e-05,
"loss": 1.09030991,
"memory(GiB)": 85.12,
"step": 5090,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68104601,
"epoch": 3.99,
"learning_rate": 7.969245737717718e-05,
"loss": 1.03982849,
"memory(GiB)": 85.12,
"step": 5095,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67410316,
"epoch": 4.0,
"learning_rate": 7.964899769478827e-05,
"loss": 1.07849312,
"memory(GiB)": 85.12,
"step": 5100,
"train_speed(iter/s)": 0.035265
},
{
"epoch": 4.0,
"eval_acc": 0.6908339594290007,
"eval_loss": 0.9954748749732971,
"eval_runtime": 84.9758,
"eval_samples_per_second": 1.094,
"eval_steps_per_second": 1.094,
"step": 5104
},
{
"acc": 0.68671699,
"epoch": 4.0,
"learning_rate": 7.960550343736965e-05,
"loss": 1.04726982,
"memory(GiB)": 85.12,
"step": 5105,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68219004,
"epoch": 4.0,
"learning_rate": 7.956197465564197e-05,
"loss": 1.02617922,
"memory(GiB)": 85.12,
"step": 5110,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.68375196,
"epoch": 4.01,
"learning_rate": 7.951841140036606e-05,
"loss": 1.01291771,
"memory(GiB)": 85.12,
"step": 5115,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66873488,
"epoch": 4.01,
"learning_rate": 7.947481372234303e-05,
"loss": 1.0916275,
"memory(GiB)": 85.12,
"step": 5120,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.67837057,
"epoch": 4.02,
"learning_rate": 7.943118167241406e-05,
"loss": 1.07693701,
"memory(GiB)": 85.12,
"step": 5125,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.66617122,
"epoch": 4.02,
"learning_rate": 7.938751530146047e-05,
"loss": 1.08172598,
"memory(GiB)": 85.12,
"step": 5130,
"train_speed(iter/s)": 0.035242
},
{
"acc": 0.68194261,
"epoch": 4.02,
"learning_rate": 7.934381466040356e-05,
"loss": 1.04084024,
"memory(GiB)": 85.12,
"step": 5135,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.68469057,
"epoch": 4.03,
"learning_rate": 7.930007980020464e-05,
"loss": 1.02090225,
"memory(GiB)": 85.12,
"step": 5140,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.68128481,
"epoch": 4.03,
"learning_rate": 7.925631077186487e-05,
"loss": 1.04276752,
"memory(GiB)": 85.12,
"step": 5145,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67515278,
"epoch": 4.04,
"learning_rate": 7.92125076264253e-05,
"loss": 1.05011406,
"memory(GiB)": 85.12,
"step": 5150,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68157773,
"epoch": 4.04,
"learning_rate": 7.916867041496674e-05,
"loss": 1.08598862,
"memory(GiB)": 85.12,
"step": 5155,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67785926,
"epoch": 4.04,
"learning_rate": 7.912479918860974e-05,
"loss": 1.05256701,
"memory(GiB)": 85.12,
"step": 5160,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.68705544,
"epoch": 4.05,
"learning_rate": 7.908089399851448e-05,
"loss": 1.04519091,
"memory(GiB)": 85.12,
"step": 5165,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67090273,
"epoch": 4.05,
"learning_rate": 7.903695489588083e-05,
"loss": 1.08642006,
"memory(GiB)": 85.12,
"step": 5170,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67382889,
"epoch": 4.06,
"learning_rate": 7.899298193194811e-05,
"loss": 1.07724438,
"memory(GiB)": 85.12,
"step": 5175,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.69452157,
"epoch": 4.06,
"learning_rate": 7.894897515799518e-05,
"loss": 0.98748884,
"memory(GiB)": 85.12,
"step": 5180,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.66965647,
"epoch": 4.06,
"learning_rate": 7.890493462534034e-05,
"loss": 1.06002617,
"memory(GiB)": 85.12,
"step": 5185,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.6691617,
"epoch": 4.07,
"learning_rate": 7.886086038534122e-05,
"loss": 1.05882254,
"memory(GiB)": 85.12,
"step": 5190,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67491331,
"epoch": 4.07,
"learning_rate": 7.881675248939476e-05,
"loss": 1.06821861,
"memory(GiB)": 85.12,
"step": 5195,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68093872,
"epoch": 4.08,
"learning_rate": 7.877261098893719e-05,
"loss": 1.04844933,
"memory(GiB)": 85.12,
"step": 5200,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67266617,
"epoch": 4.08,
"learning_rate": 7.872843593544388e-05,
"loss": 1.07000122,
"memory(GiB)": 85.12,
"step": 5205,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67317371,
"epoch": 4.08,
"learning_rate": 7.868422738042935e-05,
"loss": 1.09329424,
"memory(GiB)": 85.12,
"step": 5210,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66858578,
"epoch": 4.09,
"learning_rate": 7.863998537544719e-05,
"loss": 1.08054285,
"memory(GiB)": 85.12,
"step": 5215,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66879354,
"epoch": 4.09,
"learning_rate": 7.859570997208998e-05,
"loss": 1.08357344,
"memory(GiB)": 85.12,
"step": 5220,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66112986,
"epoch": 4.09,
"learning_rate": 7.855140122198927e-05,
"loss": 1.1116806,
"memory(GiB)": 85.12,
"step": 5225,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67013917,
"epoch": 4.1,
"learning_rate": 7.850705917681549e-05,
"loss": 1.0776103,
"memory(GiB)": 85.12,
"step": 5230,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67974381,
"epoch": 4.1,
"learning_rate": 7.846268388827789e-05,
"loss": 1.06719589,
"memory(GiB)": 85.12,
"step": 5235,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6868772,
"epoch": 4.11,
"learning_rate": 7.841827540812447e-05,
"loss": 1.02366819,
"memory(GiB)": 85.12,
"step": 5240,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67168031,
"epoch": 4.11,
"learning_rate": 7.837383378814197e-05,
"loss": 1.08055744,
"memory(GiB)": 85.12,
"step": 5245,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.67559457,
"epoch": 4.11,
"learning_rate": 7.832935908015578e-05,
"loss": 1.04622164,
"memory(GiB)": 85.12,
"step": 5250,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.66986046,
"epoch": 4.12,
"learning_rate": 7.828485133602981e-05,
"loss": 1.088204,
"memory(GiB)": 85.12,
"step": 5255,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.68791199,
"epoch": 4.12,
"learning_rate": 7.824031060766662e-05,
"loss": 1.02517748,
"memory(GiB)": 85.12,
"step": 5260,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67395439,
"epoch": 4.13,
"learning_rate": 7.819573694700707e-05,
"loss": 1.04129019,
"memory(GiB)": 85.12,
"step": 5265,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67076187,
"epoch": 4.13,
"learning_rate": 7.815113040603057e-05,
"loss": 1.09466763,
"memory(GiB)": 85.12,
"step": 5270,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.68308926,
"epoch": 4.13,
"learning_rate": 7.810649103675478e-05,
"loss": 1.01522207,
"memory(GiB)": 85.12,
"step": 5275,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66764603,
"epoch": 4.14,
"learning_rate": 7.80618188912357e-05,
"loss": 1.10394659,
"memory(GiB)": 85.12,
"step": 5280,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.6767786,
"epoch": 4.14,
"learning_rate": 7.801711402156752e-05,
"loss": 1.06015568,
"memory(GiB)": 85.12,
"step": 5285,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.69466491,
"epoch": 4.15,
"learning_rate": 7.797237647988259e-05,
"loss": 1.00177612,
"memory(GiB)": 85.12,
"step": 5290,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.6722373,
"epoch": 4.15,
"learning_rate": 7.792760631835138e-05,
"loss": 1.09138288,
"memory(GiB)": 85.12,
"step": 5295,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68212829,
"epoch": 4.15,
"learning_rate": 7.788280358918239e-05,
"loss": 1.0298542,
"memory(GiB)": 85.12,
"step": 5300,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68358073,
"epoch": 4.16,
"learning_rate": 7.783796834462208e-05,
"loss": 1.04165964,
"memory(GiB)": 85.12,
"step": 5305,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67844944,
"epoch": 4.16,
"learning_rate": 7.779310063695486e-05,
"loss": 1.06625547,
"memory(GiB)": 85.12,
"step": 5310,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67359776,
"epoch": 4.17,
"learning_rate": 7.7748200518503e-05,
"loss": 1.08562546,
"memory(GiB)": 85.12,
"step": 5315,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68680096,
"epoch": 4.17,
"learning_rate": 7.77032680416265e-05,
"loss": 1.01254492,
"memory(GiB)": 85.12,
"step": 5320,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67717962,
"epoch": 4.17,
"learning_rate": 7.765830325872318e-05,
"loss": 1.03918819,
"memory(GiB)": 85.12,
"step": 5325,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68738456,
"epoch": 4.18,
"learning_rate": 7.761330622222849e-05,
"loss": 1.01561775,
"memory(GiB)": 85.12,
"step": 5330,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66343398,
"epoch": 4.18,
"learning_rate": 7.75682769846155e-05,
"loss": 1.08996143,
"memory(GiB)": 85.12,
"step": 5335,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.68137712,
"epoch": 4.18,
"learning_rate": 7.75232155983948e-05,
"loss": 1.06495123,
"memory(GiB)": 85.12,
"step": 5340,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67577925,
"epoch": 4.19,
"learning_rate": 7.747812211611454e-05,
"loss": 1.0828968,
"memory(GiB)": 85.12,
"step": 5345,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6752552,
"epoch": 4.19,
"learning_rate": 7.743299659036023e-05,
"loss": 1.05694551,
"memory(GiB)": 85.12,
"step": 5350,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67123895,
"epoch": 4.2,
"learning_rate": 7.73878390737548e-05,
"loss": 1.07847023,
"memory(GiB)": 85.12,
"step": 5355,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68291116,
"epoch": 4.2,
"learning_rate": 7.734264961895843e-05,
"loss": 1.05738621,
"memory(GiB)": 85.12,
"step": 5360,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68122358,
"epoch": 4.2,
"learning_rate": 7.72974282786686e-05,
"loss": 1.03916407,
"memory(GiB)": 85.12,
"step": 5365,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67760181,
"epoch": 4.21,
"learning_rate": 7.725217510561993e-05,
"loss": 1.05562716,
"memory(GiB)": 85.12,
"step": 5370,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.6754374,
"epoch": 4.21,
"learning_rate": 7.72068901525842e-05,
"loss": 1.04640961,
"memory(GiB)": 85.12,
"step": 5375,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.6791121,
"epoch": 4.22,
"learning_rate": 7.716157347237022e-05,
"loss": 1.04016552,
"memory(GiB)": 85.12,
"step": 5380,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67782917,
"epoch": 4.22,
"learning_rate": 7.71162251178238e-05,
"loss": 1.05207224,
"memory(GiB)": 85.12,
"step": 5385,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.66641583,
"epoch": 4.22,
"learning_rate": 7.707084514182772e-05,
"loss": 1.09165554,
"memory(GiB)": 85.12,
"step": 5390,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66752553,
"epoch": 4.23,
"learning_rate": 7.702543359730158e-05,
"loss": 1.08811426,
"memory(GiB)": 85.12,
"step": 5395,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67671351,
"epoch": 4.23,
"learning_rate": 7.697999053720185e-05,
"loss": 1.03720827,
"memory(GiB)": 85.12,
"step": 5400,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67424908,
"epoch": 4.24,
"learning_rate": 7.693451601452173e-05,
"loss": 1.05379906,
"memory(GiB)": 85.12,
"step": 5405,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67244482,
"epoch": 4.24,
"learning_rate": 7.688901008229107e-05,
"loss": 1.09903154,
"memory(GiB)": 85.12,
"step": 5410,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67872109,
"epoch": 4.24,
"learning_rate": 7.684347279357642e-05,
"loss": 1.06607389,
"memory(GiB)": 85.12,
"step": 5415,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67639685,
"epoch": 4.25,
"learning_rate": 7.679790420148084e-05,
"loss": 1.06388321,
"memory(GiB)": 85.12,
"step": 5420,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66824012,
"epoch": 4.25,
"learning_rate": 7.675230435914394e-05,
"loss": 1.08031435,
"memory(GiB)": 85.12,
"step": 5425,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67619176,
"epoch": 4.26,
"learning_rate": 7.670667331974171e-05,
"loss": 1.05007677,
"memory(GiB)": 85.12,
"step": 5430,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.66911144,
"epoch": 4.26,
"learning_rate": 7.666101113648658e-05,
"loss": 1.06066961,
"memory(GiB)": 85.12,
"step": 5435,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.69307575,
"epoch": 4.26,
"learning_rate": 7.661531786262728e-05,
"loss": 1.00854187,
"memory(GiB)": 85.12,
"step": 5440,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67891207,
"epoch": 4.27,
"learning_rate": 7.656959355144879e-05,
"loss": 1.05966101,
"memory(GiB)": 85.12,
"step": 5445,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67878551,
"epoch": 4.27,
"learning_rate": 7.652383825627226e-05,
"loss": 1.0579504,
"memory(GiB)": 85.12,
"step": 5450,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66832333,
"epoch": 4.28,
"learning_rate": 7.647805203045504e-05,
"loss": 1.08938274,
"memory(GiB)": 85.12,
"step": 5455,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67352867,
"epoch": 4.28,
"learning_rate": 7.643223492739048e-05,
"loss": 1.07296839,
"memory(GiB)": 85.12,
"step": 5460,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68797994,
"epoch": 4.28,
"learning_rate": 7.638638700050796e-05,
"loss": 1.03214712,
"memory(GiB)": 85.12,
"step": 5465,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67085338,
"epoch": 4.29,
"learning_rate": 7.634050830327282e-05,
"loss": 1.07635412,
"memory(GiB)": 85.12,
"step": 5470,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67759714,
"epoch": 4.29,
"learning_rate": 7.629459888918627e-05,
"loss": 1.06018639,
"memory(GiB)": 85.12,
"step": 5475,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67132416,
"epoch": 4.29,
"learning_rate": 7.624865881178535e-05,
"loss": 1.08437328,
"memory(GiB)": 85.12,
"step": 5480,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.6840992,
"epoch": 4.3,
"learning_rate": 7.620268812464284e-05,
"loss": 1.03912249,
"memory(GiB)": 85.12,
"step": 5485,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68261967,
"epoch": 4.3,
"learning_rate": 7.615668688136724e-05,
"loss": 1.07205544,
"memory(GiB)": 85.12,
"step": 5490,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68234777,
"epoch": 4.31,
"learning_rate": 7.611065513560264e-05,
"loss": 1.03525667,
"memory(GiB)": 85.12,
"step": 5495,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6711832,
"epoch": 4.31,
"learning_rate": 7.606459294102876e-05,
"loss": 1.08924789,
"memory(GiB)": 85.12,
"step": 5500,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.68011131,
"epoch": 4.31,
"learning_rate": 7.601850035136078e-05,
"loss": 1.03413877,
"memory(GiB)": 85.12,
"step": 5505,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67553554,
"epoch": 4.32,
"learning_rate": 7.597237742034938e-05,
"loss": 1.05302973,
"memory(GiB)": 85.12,
"step": 5510,
"train_speed(iter/s)": 0.035243
},
{
"acc": 0.68478012,
"epoch": 4.32,
"learning_rate": 7.59262242017805e-05,
"loss": 1.01622705,
"memory(GiB)": 85.12,
"step": 5515,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67683172,
"epoch": 4.33,
"learning_rate": 7.588004074947556e-05,
"loss": 1.07921562,
"memory(GiB)": 85.12,
"step": 5520,
"train_speed(iter/s)": 0.035244
},
{
"acc": 0.67525272,
"epoch": 4.33,
"learning_rate": 7.583382711729114e-05,
"loss": 1.06174011,
"memory(GiB)": 85.12,
"step": 5525,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.69840279,
"epoch": 4.33,
"learning_rate": 7.578758335911901e-05,
"loss": 0.98411026,
"memory(GiB)": 85.12,
"step": 5530,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68036718,
"epoch": 4.34,
"learning_rate": 7.574130952888614e-05,
"loss": 1.07546577,
"memory(GiB)": 85.12,
"step": 5535,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67380986,
"epoch": 4.34,
"learning_rate": 7.569500568055448e-05,
"loss": 1.06016655,
"memory(GiB)": 85.12,
"step": 5540,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67196646,
"epoch": 4.35,
"learning_rate": 7.564867186812105e-05,
"loss": 1.07078009,
"memory(GiB)": 85.12,
"step": 5545,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68534956,
"epoch": 4.35,
"learning_rate": 7.560230814561781e-05,
"loss": 1.0424099,
"memory(GiB)": 85.12,
"step": 5550,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67195516,
"epoch": 4.35,
"learning_rate": 7.555591456711157e-05,
"loss": 1.06724882,
"memory(GiB)": 85.12,
"step": 5555,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.68624582,
"epoch": 4.36,
"learning_rate": 7.550949118670395e-05,
"loss": 1.03637371,
"memory(GiB)": 85.12,
"step": 5560,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67780466,
"epoch": 4.36,
"learning_rate": 7.546303805853136e-05,
"loss": 1.0577466,
"memory(GiB)": 85.12,
"step": 5565,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.66886325,
"epoch": 4.37,
"learning_rate": 7.541655523676489e-05,
"loss": 1.08753948,
"memory(GiB)": 85.12,
"step": 5570,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.68775806,
"epoch": 4.37,
"learning_rate": 7.537004277561022e-05,
"loss": 1.02740803,
"memory(GiB)": 85.12,
"step": 5575,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67876277,
"epoch": 4.37,
"learning_rate": 7.532350072930764e-05,
"loss": 1.02870722,
"memory(GiB)": 85.12,
"step": 5580,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67490358,
"epoch": 4.38,
"learning_rate": 7.527692915213193e-05,
"loss": 1.07739782,
"memory(GiB)": 85.12,
"step": 5585,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67852893,
"epoch": 4.38,
"learning_rate": 7.52303280983923e-05,
"loss": 1.04966307,
"memory(GiB)": 85.12,
"step": 5590,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68157201,
"epoch": 4.38,
"learning_rate": 7.518369762243232e-05,
"loss": 1.0376194,
"memory(GiB)": 85.12,
"step": 5595,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67769365,
"epoch": 4.39,
"learning_rate": 7.51370377786299e-05,
"loss": 1.05899029,
"memory(GiB)": 85.12,
"step": 5600,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68378305,
"epoch": 4.39,
"learning_rate": 7.509034862139717e-05,
"loss": 1.06501207,
"memory(GiB)": 85.12,
"step": 5605,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68093605,
"epoch": 4.4,
"learning_rate": 7.504363020518046e-05,
"loss": 1.05337543,
"memory(GiB)": 85.12,
"step": 5610,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.6819272,
"epoch": 4.4,
"learning_rate": 7.499688258446024e-05,
"loss": 1.03589249,
"memory(GiB)": 85.12,
"step": 5615,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66478381,
"epoch": 4.4,
"learning_rate": 7.495010581375097e-05,
"loss": 1.11290102,
"memory(GiB)": 85.12,
"step": 5620,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.69163618,
"epoch": 4.41,
"learning_rate": 7.490329994760118e-05,
"loss": 0.99750299,
"memory(GiB)": 85.12,
"step": 5625,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68824978,
"epoch": 4.41,
"learning_rate": 7.485646504059328e-05,
"loss": 1.0366888,
"memory(GiB)": 85.12,
"step": 5630,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6706286,
"epoch": 4.42,
"learning_rate": 7.480960114734357e-05,
"loss": 1.09517231,
"memory(GiB)": 85.12,
"step": 5635,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.66850762,
"epoch": 4.42,
"learning_rate": 7.476270832250213e-05,
"loss": 1.09369583,
"memory(GiB)": 85.12,
"step": 5640,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68807869,
"epoch": 4.42,
"learning_rate": 7.471578662075281e-05,
"loss": 1.04609451,
"memory(GiB)": 85.12,
"step": 5645,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67306752,
"epoch": 4.43,
"learning_rate": 7.46688360968131e-05,
"loss": 1.05134382,
"memory(GiB)": 85.12,
"step": 5650,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68500929,
"epoch": 4.43,
"learning_rate": 7.462185680543413e-05,
"loss": 1.03070574,
"memory(GiB)": 85.12,
"step": 5655,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67167764,
"epoch": 4.44,
"learning_rate": 7.457484880140057e-05,
"loss": 1.08116226,
"memory(GiB)": 85.12,
"step": 5660,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67725725,
"epoch": 4.44,
"learning_rate": 7.452781213953054e-05,
"loss": 1.05323343,
"memory(GiB)": 85.12,
"step": 5665,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68552766,
"epoch": 4.44,
"learning_rate": 7.448074687467564e-05,
"loss": 1.03019152,
"memory(GiB)": 85.12,
"step": 5670,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67342439,
"epoch": 4.45,
"learning_rate": 7.443365306172076e-05,
"loss": 1.07240591,
"memory(GiB)": 85.12,
"step": 5675,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.676577,
"epoch": 4.45,
"learning_rate": 7.438653075558412e-05,
"loss": 1.0539855,
"memory(GiB)": 85.12,
"step": 5680,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67203903,
"epoch": 4.46,
"learning_rate": 7.433938001121719e-05,
"loss": 1.06724186,
"memory(GiB)": 85.12,
"step": 5685,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67773714,
"epoch": 4.46,
"learning_rate": 7.429220088360456e-05,
"loss": 1.08628159,
"memory(GiB)": 85.12,
"step": 5690,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.6735105,
"epoch": 4.46,
"learning_rate": 7.424499342776392e-05,
"loss": 1.08572884,
"memory(GiB)": 85.12,
"step": 5695,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.68572245,
"epoch": 4.47,
"learning_rate": 7.419775769874601e-05,
"loss": 1.02478113,
"memory(GiB)": 85.12,
"step": 5700,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67862587,
"epoch": 4.47,
"learning_rate": 7.415049375163455e-05,
"loss": 1.04869251,
"memory(GiB)": 85.12,
"step": 5705,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.67932053,
"epoch": 4.47,
"learning_rate": 7.410320164154614e-05,
"loss": 1.04899778,
"memory(GiB)": 85.12,
"step": 5710,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6694593,
"epoch": 4.48,
"learning_rate": 7.405588142363026e-05,
"loss": 1.09497614,
"memory(GiB)": 85.12,
"step": 5715,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.69002652,
"epoch": 4.48,
"learning_rate": 7.40085331530691e-05,
"loss": 1.02802773,
"memory(GiB)": 85.12,
"step": 5720,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67512789,
"epoch": 4.49,
"learning_rate": 7.396115688507766e-05,
"loss": 1.0342535,
"memory(GiB)": 85.12,
"step": 5725,
"train_speed(iter/s)": 0.035245
},
{
"acc": 0.68088655,
"epoch": 4.49,
"learning_rate": 7.39137526749035e-05,
"loss": 1.05951233,
"memory(GiB)": 85.12,
"step": 5730,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6625989,
"epoch": 4.49,
"learning_rate": 7.386632057782683e-05,
"loss": 1.0969574,
"memory(GiB)": 85.12,
"step": 5735,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.67329836,
"epoch": 4.5,
"learning_rate": 7.381886064916031e-05,
"loss": 1.09573812,
"memory(GiB)": 85.12,
"step": 5740,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66656322,
"epoch": 4.5,
"learning_rate": 7.377137294424914e-05,
"loss": 1.07542658,
"memory(GiB)": 85.12,
"step": 5745,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68572235,
"epoch": 4.51,
"learning_rate": 7.372385751847084e-05,
"loss": 1.01502714,
"memory(GiB)": 85.12,
"step": 5750,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67641573,
"epoch": 4.51,
"learning_rate": 7.367631442723531e-05,
"loss": 1.08796015,
"memory(GiB)": 85.12,
"step": 5755,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66859589,
"epoch": 4.51,
"learning_rate": 7.362874372598465e-05,
"loss": 1.09382992,
"memory(GiB)": 85.12,
"step": 5760,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67875175,
"epoch": 4.52,
"learning_rate": 7.358114547019325e-05,
"loss": 1.03851318,
"memory(GiB)": 85.12,
"step": 5765,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66146779,
"epoch": 4.52,
"learning_rate": 7.353351971536753e-05,
"loss": 1.13547573,
"memory(GiB)": 85.12,
"step": 5770,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.66437259,
"epoch": 4.53,
"learning_rate": 7.348586651704603e-05,
"loss": 1.09721699,
"memory(GiB)": 85.12,
"step": 5775,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68509789,
"epoch": 4.53,
"learning_rate": 7.343818593079929e-05,
"loss": 1.04163866,
"memory(GiB)": 85.12,
"step": 5780,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6822547,
"epoch": 4.53,
"learning_rate": 7.339047801222982e-05,
"loss": 1.0457943,
"memory(GiB)": 85.12,
"step": 5785,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67156363,
"epoch": 4.54,
"learning_rate": 7.334274281697193e-05,
"loss": 1.05314388,
"memory(GiB)": 85.12,
"step": 5790,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.68432736,
"epoch": 4.54,
"learning_rate": 7.329498040069179e-05,
"loss": 1.03443298,
"memory(GiB)": 85.12,
"step": 5795,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.69290547,
"epoch": 4.55,
"learning_rate": 7.324719081908731e-05,
"loss": 0.98860283,
"memory(GiB)": 85.12,
"step": 5800,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67324476,
"epoch": 4.55,
"learning_rate": 7.319937412788804e-05,
"loss": 1.0707695,
"memory(GiB)": 85.12,
"step": 5805,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67157531,
"epoch": 4.55,
"learning_rate": 7.315153038285522e-05,
"loss": 1.08696251,
"memory(GiB)": 85.12,
"step": 5810,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.69197111,
"epoch": 4.56,
"learning_rate": 7.310365963978157e-05,
"loss": 1.00116425,
"memory(GiB)": 85.12,
"step": 5815,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67103248,
"epoch": 4.56,
"learning_rate": 7.305576195449131e-05,
"loss": 1.07649136,
"memory(GiB)": 85.12,
"step": 5820,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68711181,
"epoch": 4.57,
"learning_rate": 7.30078373828401e-05,
"loss": 1.0179204,
"memory(GiB)": 85.12,
"step": 5825,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.6692019,
"epoch": 4.57,
"learning_rate": 7.29598859807149e-05,
"loss": 1.09381266,
"memory(GiB)": 85.12,
"step": 5830,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67294335,
"epoch": 4.57,
"learning_rate": 7.291190780403406e-05,
"loss": 1.07299709,
"memory(GiB)": 85.12,
"step": 5835,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67547169,
"epoch": 4.58,
"learning_rate": 7.286390290874703e-05,
"loss": 1.057125,
"memory(GiB)": 85.12,
"step": 5840,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67364817,
"epoch": 4.58,
"learning_rate": 7.281587135083452e-05,
"loss": 1.06245632,
"memory(GiB)": 85.12,
"step": 5845,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67187681,
"epoch": 4.58,
"learning_rate": 7.276781318630826e-05,
"loss": 1.05665264,
"memory(GiB)": 85.12,
"step": 5850,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.66608486,
"epoch": 4.59,
"learning_rate": 7.271972847121108e-05,
"loss": 1.06528816,
"memory(GiB)": 85.12,
"step": 5855,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.66735435,
"epoch": 4.59,
"learning_rate": 7.267161726161668e-05,
"loss": 1.08798895,
"memory(GiB)": 85.12,
"step": 5860,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.68197312,
"epoch": 4.6,
"learning_rate": 7.262347961362972e-05,
"loss": 1.05005894,
"memory(GiB)": 85.12,
"step": 5865,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.69323926,
"epoch": 4.6,
"learning_rate": 7.257531558338569e-05,
"loss": 1.01712046,
"memory(GiB)": 85.12,
"step": 5870,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67442312,
"epoch": 4.6,
"learning_rate": 7.252712522705082e-05,
"loss": 1.0723731,
"memory(GiB)": 85.12,
"step": 5875,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67650232,
"epoch": 4.61,
"learning_rate": 7.247890860082206e-05,
"loss": 1.06735249,
"memory(GiB)": 85.12,
"step": 5880,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.6892065,
"epoch": 4.61,
"learning_rate": 7.243066576092696e-05,
"loss": 1.02709103,
"memory(GiB)": 85.12,
"step": 5885,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67309418,
"epoch": 4.62,
"learning_rate": 7.238239676362372e-05,
"loss": 1.07167187,
"memory(GiB)": 85.12,
"step": 5890,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67252998,
"epoch": 4.62,
"learning_rate": 7.233410166520093e-05,
"loss": 1.0607296,
"memory(GiB)": 85.12,
"step": 5895,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68240318,
"epoch": 4.62,
"learning_rate": 7.228578052197771e-05,
"loss": 1.04523249,
"memory(GiB)": 85.12,
"step": 5900,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68594875,
"epoch": 4.63,
"learning_rate": 7.223743339030352e-05,
"loss": 1.04490318,
"memory(GiB)": 85.12,
"step": 5905,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.68417301,
"epoch": 4.63,
"learning_rate": 7.21890603265581e-05,
"loss": 1.02140636,
"memory(GiB)": 85.12,
"step": 5910,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67098885,
"epoch": 4.64,
"learning_rate": 7.214066138715148e-05,
"loss": 1.07261381,
"memory(GiB)": 85.12,
"step": 5915,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.68442526,
"epoch": 4.64,
"learning_rate": 7.209223662852382e-05,
"loss": 1.02172146,
"memory(GiB)": 85.12,
"step": 5920,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.68503671,
"epoch": 4.64,
"learning_rate": 7.204378610714544e-05,
"loss": 1.03891659,
"memory(GiB)": 85.12,
"step": 5925,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66351156,
"epoch": 4.65,
"learning_rate": 7.199530987951662e-05,
"loss": 1.09004173,
"memory(GiB)": 85.12,
"step": 5930,
"train_speed(iter/s)": 0.035246
},
{
"acc": 0.6784656,
"epoch": 4.65,
"learning_rate": 7.194680800216773e-05,
"loss": 1.04306164,
"memory(GiB)": 85.12,
"step": 5935,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.66837859,
"epoch": 4.66,
"learning_rate": 7.189828053165895e-05,
"loss": 1.09347191,
"memory(GiB)": 85.12,
"step": 5940,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.68225694,
"epoch": 4.66,
"learning_rate": 7.184972752458034e-05,
"loss": 1.01862364,
"memory(GiB)": 85.12,
"step": 5945,
"train_speed(iter/s)": 0.035247
},
{
"acc": 0.67672114,
"epoch": 4.66,
"learning_rate": 7.180114903755178e-05,
"loss": 1.05343723,
"memory(GiB)": 85.12,
"step": 5950,
"train_speed(iter/s)": 0.035248
},
{
"acc": 0.67013016,
"epoch": 4.67,
"learning_rate": 7.175254512722281e-05,
"loss": 1.09360905,
"memory(GiB)": 85.12,
"step": 5955,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.6726356,
"epoch": 4.67,
"learning_rate": 7.170391585027263e-05,
"loss": 1.06504726,
"memory(GiB)": 85.12,
"step": 5960,
"train_speed(iter/s)": 0.035249
},
{
"acc": 0.67619967,
"epoch": 4.67,
"learning_rate": 7.165526126341004e-05,
"loss": 1.07366638,
"memory(GiB)": 85.12,
"step": 5965,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.68048444,
"epoch": 4.68,
"learning_rate": 7.160658142337333e-05,
"loss": 1.0334444,
"memory(GiB)": 85.12,
"step": 5970,
"train_speed(iter/s)": 0.03525
},
{
"acc": 0.67715273,
"epoch": 4.68,
"learning_rate": 7.155787638693026e-05,
"loss": 1.05196743,
"memory(GiB)": 85.12,
"step": 5975,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67561464,
"epoch": 4.69,
"learning_rate": 7.150914621087797e-05,
"loss": 1.04411011,
"memory(GiB)": 85.12,
"step": 5980,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67420325,
"epoch": 4.69,
"learning_rate": 7.146039095204288e-05,
"loss": 1.05389662,
"memory(GiB)": 85.12,
"step": 5985,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.67851129,
"epoch": 4.69,
"learning_rate": 7.141161066728072e-05,
"loss": 1.06385975,
"memory(GiB)": 85.12,
"step": 5990,
"train_speed(iter/s)": 0.035251
},
{
"acc": 0.67380395,
"epoch": 4.7,
"learning_rate": 7.136280541347638e-05,
"loss": 1.0779253,
"memory(GiB)": 85.12,
"step": 5995,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.66957092,
"epoch": 4.7,
"learning_rate": 7.131397524754381e-05,
"loss": 1.09954481,
"memory(GiB)": 85.12,
"step": 6000,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.67146378,
"epoch": 4.71,
"learning_rate": 7.126512022642613e-05,
"loss": 1.06633472,
"memory(GiB)": 85.12,
"step": 6005,
"train_speed(iter/s)": 0.035252
},
{
"acc": 0.68832026,
"epoch": 4.71,
"learning_rate": 7.121624040709533e-05,
"loss": 1.00683203,
"memory(GiB)": 85.12,
"step": 6010,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.66239319,
"epoch": 4.71,
"learning_rate": 7.116733584655237e-05,
"loss": 1.10340586,
"memory(GiB)": 85.12,
"step": 6015,
"train_speed(iter/s)": 0.035253
},
{
"acc": 0.68091083,
"epoch": 4.72,
"learning_rate": 7.11184066018271e-05,
"loss": 1.04083786,
"memory(GiB)": 85.12,
"step": 6020,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.68418207,
"epoch": 4.72,
"learning_rate": 7.106945272997807e-05,
"loss": 1.03925686,
"memory(GiB)": 85.12,
"step": 6025,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.68394904,
"epoch": 4.73,
"learning_rate": 7.102047428809259e-05,
"loss": 1.03902893,
"memory(GiB)": 85.12,
"step": 6030,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.6748229,
"epoch": 4.73,
"learning_rate": 7.097147133328666e-05,
"loss": 1.05747595,
"memory(GiB)": 85.12,
"step": 6035,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.65920019,
"epoch": 4.73,
"learning_rate": 7.092244392270477e-05,
"loss": 1.11649303,
"memory(GiB)": 85.12,
"step": 6040,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67655163,
"epoch": 4.74,
"learning_rate": 7.087339211352005e-05,
"loss": 1.05948544,
"memory(GiB)": 85.12,
"step": 6045,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.68559122,
"epoch": 4.74,
"learning_rate": 7.0824315962934e-05,
"loss": 1.01261806,
"memory(GiB)": 85.12,
"step": 6050,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.67522321,
"epoch": 4.75,
"learning_rate": 7.077521552817651e-05,
"loss": 1.06603298,
"memory(GiB)": 85.12,
"step": 6055,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.68662744,
"epoch": 4.75,
"learning_rate": 7.072609086650582e-05,
"loss": 1.03524733,
"memory(GiB)": 85.12,
"step": 6060,
"train_speed(iter/s)": 0.035254
},
{
"acc": 0.67083483,
"epoch": 4.75,
"learning_rate": 7.067694203520841e-05,
"loss": 1.07945566,
"memory(GiB)": 85.12,
"step": 6065,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.68091211,
"epoch": 4.76,
"learning_rate": 7.062776909159893e-05,
"loss": 1.07089872,
"memory(GiB)": 85.12,
"step": 6070,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.68336306,
"epoch": 4.76,
"learning_rate": 7.057857209302017e-05,
"loss": 1.03715401,
"memory(GiB)": 85.12,
"step": 6075,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66016645,
"epoch": 4.76,
"learning_rate": 7.0529351096843e-05,
"loss": 1.12012835,
"memory(GiB)": 85.12,
"step": 6080,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.66435328,
"epoch": 4.77,
"learning_rate": 7.048010616046614e-05,
"loss": 1.10283918,
"memory(GiB)": 85.12,
"step": 6085,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.68103104,
"epoch": 4.77,
"learning_rate": 7.043083734131643e-05,
"loss": 1.06891689,
"memory(GiB)": 85.12,
"step": 6090,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.66451955,
"epoch": 4.78,
"learning_rate": 7.038154469684838e-05,
"loss": 1.10513678,
"memory(GiB)": 85.12,
"step": 6095,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.6662466,
"epoch": 4.78,
"learning_rate": 7.033222828454442e-05,
"loss": 1.07707005,
"memory(GiB)": 85.12,
"step": 6100,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.6689785,
"epoch": 4.78,
"learning_rate": 7.028288816191457e-05,
"loss": 1.07475443,
"memory(GiB)": 85.12,
"step": 6105,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.6744,
"epoch": 4.79,
"learning_rate": 7.023352438649662e-05,
"loss": 1.07835417,
"memory(GiB)": 85.12,
"step": 6110,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.6828012,
"epoch": 4.79,
"learning_rate": 7.018413701585587e-05,
"loss": 1.0427166,
"memory(GiB)": 85.12,
"step": 6115,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67784457,
"epoch": 4.8,
"learning_rate": 7.013472610758515e-05,
"loss": 1.03452606,
"memory(GiB)": 85.12,
"step": 6120,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68209443,
"epoch": 4.8,
"learning_rate": 7.008529171930476e-05,
"loss": 1.05084362,
"memory(GiB)": 85.12,
"step": 6125,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.6858211,
"epoch": 4.8,
"learning_rate": 7.003583390866234e-05,
"loss": 1.00880671,
"memory(GiB)": 85.12,
"step": 6130,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67488031,
"epoch": 4.81,
"learning_rate": 6.998635273333289e-05,
"loss": 1.06708899,
"memory(GiB)": 85.12,
"step": 6135,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68384433,
"epoch": 4.81,
"learning_rate": 6.99368482510186e-05,
"loss": 1.01454229,
"memory(GiB)": 85.12,
"step": 6140,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68791485,
"epoch": 4.82,
"learning_rate": 6.98873205194489e-05,
"loss": 1.03954144,
"memory(GiB)": 85.12,
"step": 6145,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.67316933,
"epoch": 4.82,
"learning_rate": 6.983776959638032e-05,
"loss": 1.05018234,
"memory(GiB)": 85.12,
"step": 6150,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.6828548,
"epoch": 4.82,
"learning_rate": 6.97881955395964e-05,
"loss": 1.02742395,
"memory(GiB)": 85.12,
"step": 6155,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67206783,
"epoch": 4.83,
"learning_rate": 6.973859840690766e-05,
"loss": 1.08918076,
"memory(GiB)": 85.12,
"step": 6160,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67323599,
"epoch": 4.83,
"learning_rate": 6.968897825615158e-05,
"loss": 1.07985773,
"memory(GiB)": 85.12,
"step": 6165,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67930789,
"epoch": 4.84,
"learning_rate": 6.963933514519243e-05,
"loss": 1.04254446,
"memory(GiB)": 85.12,
"step": 6170,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.69380798,
"epoch": 4.84,
"learning_rate": 6.958966913192127e-05,
"loss": 1.00592508,
"memory(GiB)": 85.12,
"step": 6175,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67768106,
"epoch": 4.84,
"learning_rate": 6.95399802742559e-05,
"loss": 1.04340172,
"memory(GiB)": 85.12,
"step": 6180,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.66789145,
"epoch": 4.85,
"learning_rate": 6.94902686301407e-05,
"loss": 1.09427404,
"memory(GiB)": 85.12,
"step": 6185,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.65930495,
"epoch": 4.85,
"learning_rate": 6.944053425754668e-05,
"loss": 1.10919495,
"memory(GiB)": 85.12,
"step": 6190,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.68507056,
"epoch": 4.86,
"learning_rate": 6.939077721447129e-05,
"loss": 1.0415493,
"memory(GiB)": 85.12,
"step": 6195,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67345014,
"epoch": 4.86,
"learning_rate": 6.93409975589385e-05,
"loss": 1.07625484,
"memory(GiB)": 85.12,
"step": 6200,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.66527452,
"epoch": 4.86,
"learning_rate": 6.92911953489986e-05,
"loss": 1.08373318,
"memory(GiB)": 85.12,
"step": 6205,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.68124013,
"epoch": 4.87,
"learning_rate": 6.924137064272815e-05,
"loss": 1.05780458,
"memory(GiB)": 85.12,
"step": 6210,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67599878,
"epoch": 4.87,
"learning_rate": 6.919152349822999e-05,
"loss": 1.06544428,
"memory(GiB)": 85.12,
"step": 6215,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67822771,
"epoch": 4.87,
"learning_rate": 6.914165397363318e-05,
"loss": 1.08813448,
"memory(GiB)": 85.12,
"step": 6220,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67499804,
"epoch": 4.88,
"learning_rate": 6.909176212709272e-05,
"loss": 1.04310112,
"memory(GiB)": 85.12,
"step": 6225,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.68530326,
"epoch": 4.88,
"learning_rate": 6.90418480167898e-05,
"loss": 1.02356024,
"memory(GiB)": 85.12,
"step": 6230,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67177353,
"epoch": 4.89,
"learning_rate": 6.899191170093148e-05,
"loss": 1.09755363,
"memory(GiB)": 85.12,
"step": 6235,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68792162,
"epoch": 4.89,
"learning_rate": 6.894195323775078e-05,
"loss": 1.01962185,
"memory(GiB)": 85.12,
"step": 6240,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68935556,
"epoch": 4.89,
"learning_rate": 6.889197268550648e-05,
"loss": 1.00933504,
"memory(GiB)": 85.12,
"step": 6245,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67911396,
"epoch": 4.9,
"learning_rate": 6.884197010248314e-05,
"loss": 1.0349247,
"memory(GiB)": 85.12,
"step": 6250,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67767315,
"epoch": 4.9,
"learning_rate": 6.879194554699106e-05,
"loss": 1.04797144,
"memory(GiB)": 85.12,
"step": 6255,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67949104,
"epoch": 4.91,
"learning_rate": 6.874189907736608e-05,
"loss": 1.0555562,
"memory(GiB)": 85.12,
"step": 6260,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.66246786,
"epoch": 4.91,
"learning_rate": 6.869183075196968e-05,
"loss": 1.08616772,
"memory(GiB)": 85.12,
"step": 6265,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67599144,
"epoch": 4.91,
"learning_rate": 6.864174062918875e-05,
"loss": 1.05308266,
"memory(GiB)": 85.12,
"step": 6270,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68216491,
"epoch": 4.92,
"learning_rate": 6.859162876743565e-05,
"loss": 1.05794802,
"memory(GiB)": 85.12,
"step": 6275,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68431473,
"epoch": 4.92,
"learning_rate": 6.85414952251481e-05,
"loss": 1.03531227,
"memory(GiB)": 85.12,
"step": 6280,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66319323,
"epoch": 4.93,
"learning_rate": 6.849134006078904e-05,
"loss": 1.10098467,
"memory(GiB)": 85.12,
"step": 6285,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66633544,
"epoch": 4.93,
"learning_rate": 6.84411633328467e-05,
"loss": 1.0953721,
"memory(GiB)": 85.12,
"step": 6290,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.69023266,
"epoch": 4.93,
"learning_rate": 6.839096509983436e-05,
"loss": 1.01781502,
"memory(GiB)": 85.12,
"step": 6295,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68030787,
"epoch": 4.94,
"learning_rate": 6.83407454202905e-05,
"loss": 1.07228956,
"memory(GiB)": 85.12,
"step": 6300,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.66776667,
"epoch": 4.94,
"learning_rate": 6.82905043527785e-05,
"loss": 1.0892725,
"memory(GiB)": 85.12,
"step": 6305,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68062611,
"epoch": 4.95,
"learning_rate": 6.824024195588677e-05,
"loss": 1.04044657,
"memory(GiB)": 85.12,
"step": 6310,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.6697052,
"epoch": 4.95,
"learning_rate": 6.818995828822852e-05,
"loss": 1.07682076,
"memory(GiB)": 85.12,
"step": 6315,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.67304358,
"epoch": 4.95,
"learning_rate": 6.813965340844183e-05,
"loss": 1.09571772,
"memory(GiB)": 85.12,
"step": 6320,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.68612056,
"epoch": 4.96,
"learning_rate": 6.808932737518944e-05,
"loss": 1.02444048,
"memory(GiB)": 85.12,
"step": 6325,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.67951617,
"epoch": 4.96,
"learning_rate": 6.803898024715884e-05,
"loss": 1.07699089,
"memory(GiB)": 85.12,
"step": 6330,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.66642966,
"epoch": 4.96,
"learning_rate": 6.798861208306204e-05,
"loss": 1.09788532,
"memory(GiB)": 85.12,
"step": 6335,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.68518648,
"epoch": 4.97,
"learning_rate": 6.793822294163565e-05,
"loss": 1.03690128,
"memory(GiB)": 85.12,
"step": 6340,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.68445034,
"epoch": 4.97,
"learning_rate": 6.788781288164072e-05,
"loss": 1.04183044,
"memory(GiB)": 85.12,
"step": 6345,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.67292199,
"epoch": 4.98,
"learning_rate": 6.783738196186267e-05,
"loss": 1.06181889,
"memory(GiB)": 85.12,
"step": 6350,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.66322756,
"epoch": 4.98,
"learning_rate": 6.778693024111128e-05,
"loss": 1.11033144,
"memory(GiB)": 85.12,
"step": 6355,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.6683476,
"epoch": 4.98,
"learning_rate": 6.773645777822054e-05,
"loss": 1.09244795,
"memory(GiB)": 85.12,
"step": 6360,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67698259,
"epoch": 4.99,
"learning_rate": 6.76859646320487e-05,
"loss": 1.05961075,
"memory(GiB)": 85.12,
"step": 6365,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.69028482,
"epoch": 4.99,
"learning_rate": 6.763545086147806e-05,
"loss": 1.02525082,
"memory(GiB)": 85.12,
"step": 6370,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67674527,
"epoch": 5.0,
"learning_rate": 6.758491652541499e-05,
"loss": 1.06389141,
"memory(GiB)": 85.12,
"step": 6375,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.66953359,
"epoch": 5.0,
"learning_rate": 6.75343616827899e-05,
"loss": 1.06004562,
"memory(GiB)": 85.12,
"step": 6380,
"train_speed(iter/s)": 0.035272
},
{
"epoch": 5.0,
"eval_acc": 0.6952166291009266,
"eval_loss": 0.9776778817176819,
"eval_runtime": 84.814,
"eval_samples_per_second": 1.097,
"eval_steps_per_second": 1.097,
"step": 6380
},
{
"acc": 0.66123757,
"epoch": 5.0,
"learning_rate": 6.748378639255701e-05,
"loss": 1.09309893,
"memory(GiB)": 85.12,
"step": 6385,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.6866303,
"epoch": 5.01,
"learning_rate": 6.74331907136945e-05,
"loss": 1.01490898,
"memory(GiB)": 85.12,
"step": 6390,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.68321543,
"epoch": 5.01,
"learning_rate": 6.73825747052042e-05,
"loss": 1.04613428,
"memory(GiB)": 85.12,
"step": 6395,
"train_speed(iter/s)": 0.035255
},
{
"acc": 0.69075665,
"epoch": 5.02,
"learning_rate": 6.733193842611176e-05,
"loss": 1.02780151,
"memory(GiB)": 85.12,
"step": 6400,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.69020853,
"epoch": 5.02,
"learning_rate": 6.72812819354664e-05,
"loss": 1.0208807,
"memory(GiB)": 85.12,
"step": 6405,
"train_speed(iter/s)": 0.035256
},
{
"acc": 0.67573576,
"epoch": 5.02,
"learning_rate": 6.723060529234095e-05,
"loss": 1.03973875,
"memory(GiB)": 85.12,
"step": 6410,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67599573,
"epoch": 5.03,
"learning_rate": 6.717990855583171e-05,
"loss": 1.048526,
"memory(GiB)": 85.12,
"step": 6415,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.68013263,
"epoch": 5.03,
"learning_rate": 6.712919178505846e-05,
"loss": 1.04929171,
"memory(GiB)": 85.12,
"step": 6420,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67604647,
"epoch": 5.04,
"learning_rate": 6.707845503916424e-05,
"loss": 1.02617359,
"memory(GiB)": 85.12,
"step": 6425,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.67967114,
"epoch": 5.04,
"learning_rate": 6.70276983773155e-05,
"loss": 1.05347996,
"memory(GiB)": 85.12,
"step": 6430,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.69580359,
"epoch": 5.04,
"learning_rate": 6.697692185870185e-05,
"loss": 1.0092021,
"memory(GiB)": 85.12,
"step": 6435,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68660007,
"epoch": 5.05,
"learning_rate": 6.692612554253607e-05,
"loss": 1.00932379,
"memory(GiB)": 85.12,
"step": 6440,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67771325,
"epoch": 5.05,
"learning_rate": 6.687530948805404e-05,
"loss": 1.06307144,
"memory(GiB)": 85.12,
"step": 6445,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68935843,
"epoch": 5.05,
"learning_rate": 6.682447375451463e-05,
"loss": 0.98286209,
"memory(GiB)": 85.12,
"step": 6450,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.68584781,
"epoch": 5.06,
"learning_rate": 6.67736184011997e-05,
"loss": 1.02812233,
"memory(GiB)": 85.12,
"step": 6455,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68117771,
"epoch": 5.06,
"learning_rate": 6.672274348741396e-05,
"loss": 1.04238253,
"memory(GiB)": 85.12,
"step": 6460,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67923732,
"epoch": 5.07,
"learning_rate": 6.667184907248493e-05,
"loss": 1.03050461,
"memory(GiB)": 85.12,
"step": 6465,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.687995,
"epoch": 5.07,
"learning_rate": 6.662093521576285e-05,
"loss": 1.00678883,
"memory(GiB)": 85.12,
"step": 6470,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67301135,
"epoch": 5.07,
"learning_rate": 6.657000197662068e-05,
"loss": 1.06638184,
"memory(GiB)": 85.12,
"step": 6475,
"train_speed(iter/s)": 0.035257
},
{
"acc": 0.6931222,
"epoch": 5.08,
"learning_rate": 6.651904941445398e-05,
"loss": 1.01922817,
"memory(GiB)": 85.12,
"step": 6480,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68098392,
"epoch": 5.08,
"learning_rate": 6.64680775886808e-05,
"loss": 1.04567719,
"memory(GiB)": 85.12,
"step": 6485,
"train_speed(iter/s)": 0.035258
},
{
"acc": 0.68202472,
"epoch": 5.09,
"learning_rate": 6.641708655874169e-05,
"loss": 1.04754171,
"memory(GiB)": 85.12,
"step": 6490,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67911615,
"epoch": 5.09,
"learning_rate": 6.636607638409956e-05,
"loss": 1.06853676,
"memory(GiB)": 85.12,
"step": 6495,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68118539,
"epoch": 5.09,
"learning_rate": 6.63150471242397e-05,
"loss": 1.04338474,
"memory(GiB)": 85.12,
"step": 6500,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67814703,
"epoch": 5.1,
"learning_rate": 6.62639988386696e-05,
"loss": 1.02098122,
"memory(GiB)": 85.12,
"step": 6505,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68116179,
"epoch": 5.1,
"learning_rate": 6.6212931586919e-05,
"loss": 1.04279861,
"memory(GiB)": 85.12,
"step": 6510,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68639112,
"epoch": 5.11,
"learning_rate": 6.616184542853965e-05,
"loss": 1.00777941,
"memory(GiB)": 85.12,
"step": 6515,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.67839465,
"epoch": 5.11,
"learning_rate": 6.611074042310549e-05,
"loss": 1.03657656,
"memory(GiB)": 85.12,
"step": 6520,
"train_speed(iter/s)": 0.035259
},
{
"acc": 0.68946548,
"epoch": 5.11,
"learning_rate": 6.605961663021233e-05,
"loss": 1.01957436,
"memory(GiB)": 85.12,
"step": 6525,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.69059048,
"epoch": 5.12,
"learning_rate": 6.600847410947794e-05,
"loss": 1.01516457,
"memory(GiB)": 85.12,
"step": 6530,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.66432734,
"epoch": 5.12,
"learning_rate": 6.595731292054187e-05,
"loss": 1.08365002,
"memory(GiB)": 85.12,
"step": 6535,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.67369056,
"epoch": 5.13,
"learning_rate": 6.590613312306555e-05,
"loss": 1.0672245,
"memory(GiB)": 85.12,
"step": 6540,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.68721709,
"epoch": 5.13,
"learning_rate": 6.5854934776732e-05,
"loss": 1.00315237,
"memory(GiB)": 85.12,
"step": 6545,
"train_speed(iter/s)": 0.03526
},
{
"acc": 0.67401967,
"epoch": 5.13,
"learning_rate": 6.580371794124592e-05,
"loss": 1.04892883,
"memory(GiB)": 85.12,
"step": 6550,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.69203424,
"epoch": 5.14,
"learning_rate": 6.575248267633357e-05,
"loss": 1.01215448,
"memory(GiB)": 85.12,
"step": 6555,
"train_speed(iter/s)": 0.035261
},
{
"acc": 0.68317304,
"epoch": 5.14,
"learning_rate": 6.57012290417427e-05,
"loss": 1.0137826,
"memory(GiB)": 85.12,
"step": 6560,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67414193,
"epoch": 5.14,
"learning_rate": 6.564995709724246e-05,
"loss": 1.05723057,
"memory(GiB)": 85.12,
"step": 6565,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68139715,
"epoch": 5.15,
"learning_rate": 6.55986669026234e-05,
"loss": 1.02423239,
"memory(GiB)": 85.12,
"step": 6570,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67808838,
"epoch": 5.15,
"learning_rate": 6.554735851769729e-05,
"loss": 1.04498472,
"memory(GiB)": 85.12,
"step": 6575,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67794299,
"epoch": 5.16,
"learning_rate": 6.549603200229717e-05,
"loss": 1.06934061,
"memory(GiB)": 85.12,
"step": 6580,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67949061,
"epoch": 5.16,
"learning_rate": 6.54446874162772e-05,
"loss": 1.03536024,
"memory(GiB)": 85.12,
"step": 6585,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68172569,
"epoch": 5.16,
"learning_rate": 6.539332481951261e-05,
"loss": 1.03228369,
"memory(GiB)": 85.12,
"step": 6590,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67113338,
"epoch": 5.17,
"learning_rate": 6.534194427189961e-05,
"loss": 1.07480278,
"memory(GiB)": 85.12,
"step": 6595,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68143373,
"epoch": 5.17,
"learning_rate": 6.529054583335538e-05,
"loss": 1.04739819,
"memory(GiB)": 85.12,
"step": 6600,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68343229,
"epoch": 5.18,
"learning_rate": 6.523912956381797e-05,
"loss": 1.04386349,
"memory(GiB)": 85.12,
"step": 6605,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68208308,
"epoch": 5.18,
"learning_rate": 6.518769552324619e-05,
"loss": 1.03945503,
"memory(GiB)": 85.12,
"step": 6610,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68102374,
"epoch": 5.18,
"learning_rate": 6.513624377161957e-05,
"loss": 1.06787729,
"memory(GiB)": 85.12,
"step": 6615,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68312049,
"epoch": 5.19,
"learning_rate": 6.508477436893835e-05,
"loss": 1.01988068,
"memory(GiB)": 85.12,
"step": 6620,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68279829,
"epoch": 5.19,
"learning_rate": 6.503328737522327e-05,
"loss": 1.03501797,
"memory(GiB)": 85.12,
"step": 6625,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68076296,
"epoch": 5.2,
"learning_rate": 6.498178285051567e-05,
"loss": 1.02725744,
"memory(GiB)": 85.12,
"step": 6630,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.70253773,
"epoch": 5.2,
"learning_rate": 6.493026085487725e-05,
"loss": 0.96801195,
"memory(GiB)": 85.12,
"step": 6635,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.69069552,
"epoch": 5.2,
"learning_rate": 6.487872144839018e-05,
"loss": 1.02977905,
"memory(GiB)": 85.12,
"step": 6640,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68076792,
"epoch": 5.21,
"learning_rate": 6.482716469115685e-05,
"loss": 1.05776606,
"memory(GiB)": 85.12,
"step": 6645,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.68598046,
"epoch": 5.21,
"learning_rate": 6.47755906432999e-05,
"loss": 1.00461483,
"memory(GiB)": 85.12,
"step": 6650,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.67754989,
"epoch": 5.22,
"learning_rate": 6.472399936496219e-05,
"loss": 1.06693382,
"memory(GiB)": 85.12,
"step": 6655,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.703161,
"epoch": 5.22,
"learning_rate": 6.467239091630657e-05,
"loss": 0.96703682,
"memory(GiB)": 85.12,
"step": 6660,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.6778832,
"epoch": 5.22,
"learning_rate": 6.462076535751603e-05,
"loss": 1.05017014,
"memory(GiB)": 85.12,
"step": 6665,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.67984619,
"epoch": 5.23,
"learning_rate": 6.456912274879339e-05,
"loss": 1.02873812,
"memory(GiB)": 85.12,
"step": 6670,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.67118473,
"epoch": 5.23,
"learning_rate": 6.451746315036149e-05,
"loss": 1.05280457,
"memory(GiB)": 85.12,
"step": 6675,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67446933,
"epoch": 5.24,
"learning_rate": 6.446578662246287e-05,
"loss": 1.04249983,
"memory(GiB)": 85.12,
"step": 6680,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67916312,
"epoch": 5.24,
"learning_rate": 6.44140932253599e-05,
"loss": 1.04621458,
"memory(GiB)": 85.12,
"step": 6685,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68936515,
"epoch": 5.24,
"learning_rate": 6.43623830193345e-05,
"loss": 0.99246674,
"memory(GiB)": 85.12,
"step": 6690,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67838211,
"epoch": 5.25,
"learning_rate": 6.431065606468832e-05,
"loss": 1.06191158,
"memory(GiB)": 85.12,
"step": 6695,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67258978,
"epoch": 5.25,
"learning_rate": 6.425891242174247e-05,
"loss": 1.08182993,
"memory(GiB)": 85.12,
"step": 6700,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67531633,
"epoch": 5.25,
"learning_rate": 6.420715215083755e-05,
"loss": 1.0620513,
"memory(GiB)": 85.12,
"step": 6705,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68797979,
"epoch": 5.26,
"learning_rate": 6.415537531233354e-05,
"loss": 1.03437891,
"memory(GiB)": 85.12,
"step": 6710,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68858671,
"epoch": 5.26,
"learning_rate": 6.410358196660972e-05,
"loss": 1.0127037,
"memory(GiB)": 85.12,
"step": 6715,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68950157,
"epoch": 5.27,
"learning_rate": 6.405177217406467e-05,
"loss": 1.00921545,
"memory(GiB)": 85.12,
"step": 6720,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.69257259,
"epoch": 5.27,
"learning_rate": 6.399994599511607e-05,
"loss": 1.01077251,
"memory(GiB)": 85.12,
"step": 6725,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68077116,
"epoch": 5.27,
"learning_rate": 6.394810349020083e-05,
"loss": 1.0404789,
"memory(GiB)": 85.12,
"step": 6730,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68140783,
"epoch": 5.28,
"learning_rate": 6.389624471977476e-05,
"loss": 1.0449604,
"memory(GiB)": 85.12,
"step": 6735,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.69492321,
"epoch": 5.28,
"learning_rate": 6.384436974431274e-05,
"loss": 1.00443935,
"memory(GiB)": 85.12,
"step": 6740,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67667093,
"epoch": 5.29,
"learning_rate": 6.379247862430851e-05,
"loss": 1.05137119,
"memory(GiB)": 85.12,
"step": 6745,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68313727,
"epoch": 5.29,
"learning_rate": 6.374057142027463e-05,
"loss": 1.03881302,
"memory(GiB)": 85.12,
"step": 6750,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.68495326,
"epoch": 5.29,
"learning_rate": 6.368864819274243e-05,
"loss": 1.03522501,
"memory(GiB)": 85.12,
"step": 6755,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68464856,
"epoch": 5.3,
"learning_rate": 6.363670900226191e-05,
"loss": 1.03497791,
"memory(GiB)": 85.12,
"step": 6760,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68600035,
"epoch": 5.3,
"learning_rate": 6.358475390940172e-05,
"loss": 1.02388697,
"memory(GiB)": 85.12,
"step": 6765,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.69201202,
"epoch": 5.31,
"learning_rate": 6.3532782974749e-05,
"loss": 1.02907257,
"memory(GiB)": 85.12,
"step": 6770,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67293754,
"epoch": 5.31,
"learning_rate": 6.348079625890943e-05,
"loss": 1.0643261,
"memory(GiB)": 85.12,
"step": 6775,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67401209,
"epoch": 5.31,
"learning_rate": 6.342879382250701e-05,
"loss": 1.0822813,
"memory(GiB)": 85.12,
"step": 6780,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.68836966,
"epoch": 5.32,
"learning_rate": 6.337677572618417e-05,
"loss": 1.03913021,
"memory(GiB)": 85.12,
"step": 6785,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68064456,
"epoch": 5.32,
"learning_rate": 6.332474203060155e-05,
"loss": 1.02623987,
"memory(GiB)": 85.12,
"step": 6790,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67679968,
"epoch": 5.33,
"learning_rate": 6.327269279643792e-05,
"loss": 1.06826124,
"memory(GiB)": 85.12,
"step": 6795,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68307304,
"epoch": 5.33,
"learning_rate": 6.322062808439029e-05,
"loss": 1.02487335,
"memory(GiB)": 85.12,
"step": 6800,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.67592635,
"epoch": 5.33,
"learning_rate": 6.316854795517364e-05,
"loss": 1.07259035,
"memory(GiB)": 85.12,
"step": 6805,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.68332787,
"epoch": 5.34,
"learning_rate": 6.311645246952097e-05,
"loss": 1.00476055,
"memory(GiB)": 85.12,
"step": 6810,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67505136,
"epoch": 5.34,
"learning_rate": 6.306434168818315e-05,
"loss": 1.07261286,
"memory(GiB)": 85.12,
"step": 6815,
"train_speed(iter/s)": 0.035262
},
{
"acc": 0.68755107,
"epoch": 5.34,
"learning_rate": 6.301221567192892e-05,
"loss": 1.01972666,
"memory(GiB)": 85.12,
"step": 6820,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.688377,
"epoch": 5.35,
"learning_rate": 6.296007448154475e-05,
"loss": 1.04110975,
"memory(GiB)": 85.12,
"step": 6825,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.67587614,
"epoch": 5.35,
"learning_rate": 6.290791817783486e-05,
"loss": 1.05248299,
"memory(GiB)": 85.12,
"step": 6830,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.69134288,
"epoch": 5.36,
"learning_rate": 6.285574682162103e-05,
"loss": 1.00044346,
"memory(GiB)": 85.12,
"step": 6835,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67581382,
"epoch": 5.36,
"learning_rate": 6.280356047374264e-05,
"loss": 1.05379066,
"memory(GiB)": 85.12,
"step": 6840,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.68169432,
"epoch": 5.36,
"learning_rate": 6.275135919505655e-05,
"loss": 1.02964487,
"memory(GiB)": 85.12,
"step": 6845,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68080144,
"epoch": 5.37,
"learning_rate": 6.269914304643698e-05,
"loss": 1.02860794,
"memory(GiB)": 85.12,
"step": 6850,
"train_speed(iter/s)": 0.035263
},
{
"acc": 0.68284755,
"epoch": 5.37,
"learning_rate": 6.264691208877558e-05,
"loss": 1.02458563,
"memory(GiB)": 85.12,
"step": 6855,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67072563,
"epoch": 5.38,
"learning_rate": 6.259466638298118e-05,
"loss": 1.07109718,
"memory(GiB)": 85.12,
"step": 6860,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.69171076,
"epoch": 5.38,
"learning_rate": 6.254240598997985e-05,
"loss": 1.0028264,
"memory(GiB)": 85.12,
"step": 6865,
"train_speed(iter/s)": 0.035264
},
{
"acc": 0.67135153,
"epoch": 5.38,
"learning_rate": 6.24901309707148e-05,
"loss": 1.08797283,
"memory(GiB)": 85.12,
"step": 6870,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.6757844,
"epoch": 5.39,
"learning_rate": 6.243784138614627e-05,
"loss": 1.04371614,
"memory(GiB)": 85.12,
"step": 6875,
"train_speed(iter/s)": 0.035265
},
{
"acc": 0.67799335,
"epoch": 5.39,
"learning_rate": 6.238553729725151e-05,
"loss": 1.02313776,
"memory(GiB)": 85.12,
"step": 6880,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.67846012,
"epoch": 5.4,
"learning_rate": 6.233321876502468e-05,
"loss": 1.04524956,
"memory(GiB)": 85.12,
"step": 6885,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68262143,
"epoch": 5.4,
"learning_rate": 6.228088585047673e-05,
"loss": 1.02941513,
"memory(GiB)": 85.12,
"step": 6890,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.67653875,
"epoch": 5.4,
"learning_rate": 6.222853861463546e-05,
"loss": 1.07044706,
"memory(GiB)": 85.12,
"step": 6895,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68358874,
"epoch": 5.41,
"learning_rate": 6.217617711854534e-05,
"loss": 1.0135643,
"memory(GiB)": 85.12,
"step": 6900,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.68347077,
"epoch": 5.41,
"learning_rate": 6.212380142326743e-05,
"loss": 1.06620531,
"memory(GiB)": 85.12,
"step": 6905,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.68109732,
"epoch": 5.42,
"learning_rate": 6.207141158987943e-05,
"loss": 1.0621707,
"memory(GiB)": 85.12,
"step": 6910,
"train_speed(iter/s)": 0.035266
},
{
"acc": 0.66551132,
"epoch": 5.42,
"learning_rate": 6.201900767947544e-05,
"loss": 1.09948444,
"memory(GiB)": 85.12,
"step": 6915,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.6836462,
"epoch": 5.42,
"learning_rate": 6.196658975316604e-05,
"loss": 1.04037647,
"memory(GiB)": 85.12,
"step": 6920,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.68927107,
"epoch": 5.43,
"learning_rate": 6.191415787207813e-05,
"loss": 1.01255808,
"memory(GiB)": 85.12,
"step": 6925,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.67622333,
"epoch": 5.43,
"learning_rate": 6.186171209735489e-05,
"loss": 1.06019592,
"memory(GiB)": 85.12,
"step": 6930,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.68418527,
"epoch": 5.43,
"learning_rate": 6.180925249015566e-05,
"loss": 1.03336248,
"memory(GiB)": 85.12,
"step": 6935,
"train_speed(iter/s)": 0.035267
},
{
"acc": 0.68626051,
"epoch": 5.44,
"learning_rate": 6.175677911165599e-05,
"loss": 1.03925867,
"memory(GiB)": 85.12,
"step": 6940,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.68999286,
"epoch": 5.44,
"learning_rate": 6.170429202304744e-05,
"loss": 1.00964413,
"memory(GiB)": 85.12,
"step": 6945,
"train_speed(iter/s)": 0.035268
},
{
"acc": 0.68267813,
"epoch": 5.45,
"learning_rate": 6.165179128553754e-05,
"loss": 1.02514906,
"memory(GiB)": 85.12,
"step": 6950,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.67859344,
"epoch": 5.45,
"learning_rate": 6.15992769603498e-05,
"loss": 1.08015528,
"memory(GiB)": 85.12,
"step": 6955,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.67712188,
"epoch": 5.45,
"learning_rate": 6.15467491087235e-05,
"loss": 1.05515985,
"memory(GiB)": 85.12,
"step": 6960,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.68022337,
"epoch": 5.46,
"learning_rate": 6.149420779191373e-05,
"loss": 1.05591021,
"memory(GiB)": 85.12,
"step": 6965,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.6749332,
"epoch": 5.46,
"learning_rate": 6.144165307119129e-05,
"loss": 1.07103643,
"memory(GiB)": 85.12,
"step": 6970,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67788014,
"epoch": 5.47,
"learning_rate": 6.138908500784265e-05,
"loss": 1.05876656,
"memory(GiB)": 85.12,
"step": 6975,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67351999,
"epoch": 5.47,
"learning_rate": 6.133650366316972e-05,
"loss": 1.08339548,
"memory(GiB)": 85.12,
"step": 6980,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67984676,
"epoch": 5.47,
"learning_rate": 6.128390909849004e-05,
"loss": 1.04141293,
"memory(GiB)": 85.12,
"step": 6985,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.68225818,
"epoch": 5.48,
"learning_rate": 6.123130137513642e-05,
"loss": 1.01653795,
"memory(GiB)": 85.12,
"step": 6990,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67328138,
"epoch": 5.48,
"learning_rate": 6.117868055445715e-05,
"loss": 1.04721413,
"memory(GiB)": 85.12,
"step": 6995,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67902741,
"epoch": 5.49,
"learning_rate": 6.112604669781572e-05,
"loss": 1.03500223,
"memory(GiB)": 85.12,
"step": 7000,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68537979,
"epoch": 5.49,
"learning_rate": 6.107339986659084e-05,
"loss": 1.05380325,
"memory(GiB)": 85.12,
"step": 7005,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.6760211,
"epoch": 5.49,
"learning_rate": 6.1020740122176343e-05,
"loss": 1.06433525,
"memory(GiB)": 85.12,
"step": 7010,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67585163,
"epoch": 5.5,
"learning_rate": 6.096806752598112e-05,
"loss": 1.08780317,
"memory(GiB)": 85.12,
"step": 7015,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67884717,
"epoch": 5.5,
"learning_rate": 6.091538213942908e-05,
"loss": 1.03859797,
"memory(GiB)": 85.12,
"step": 7020,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.6934917,
"epoch": 5.51,
"learning_rate": 6.086268402395898e-05,
"loss": 1.00850601,
"memory(GiB)": 85.12,
"step": 7025,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.70368981,
"epoch": 5.51,
"learning_rate": 6.080997324102449e-05,
"loss": 0.97637157,
"memory(GiB)": 85.12,
"step": 7030,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.69735985,
"epoch": 5.51,
"learning_rate": 6.0757249852094026e-05,
"loss": 0.98540497,
"memory(GiB)": 85.12,
"step": 7035,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67899513,
"epoch": 5.52,
"learning_rate": 6.07045139186507e-05,
"loss": 1.01754208,
"memory(GiB)": 85.12,
"step": 7040,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.68062968,
"epoch": 5.52,
"learning_rate": 6.065176550219226e-05,
"loss": 1.04695129,
"memory(GiB)": 85.12,
"step": 7045,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67595897,
"epoch": 5.53,
"learning_rate": 6.0599004664230984e-05,
"loss": 1.05315809,
"memory(GiB)": 85.12,
"step": 7050,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67158957,
"epoch": 5.53,
"learning_rate": 6.054623146629368e-05,
"loss": 1.07019091,
"memory(GiB)": 85.12,
"step": 7055,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.68817263,
"epoch": 5.53,
"learning_rate": 6.049344596992153e-05,
"loss": 1.00896044,
"memory(GiB)": 85.12,
"step": 7060,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67556105,
"epoch": 5.54,
"learning_rate": 6.04406482366701e-05,
"loss": 1.09273539,
"memory(GiB)": 85.12,
"step": 7065,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.69138508,
"epoch": 5.54,
"learning_rate": 6.038783832810918e-05,
"loss": 0.99918337,
"memory(GiB)": 85.12,
"step": 7070,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67930202,
"epoch": 5.54,
"learning_rate": 6.03350163058228e-05,
"loss": 1.04020901,
"memory(GiB)": 85.12,
"step": 7075,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.67458458,
"epoch": 5.55,
"learning_rate": 6.028218223140908e-05,
"loss": 1.06615458,
"memory(GiB)": 85.12,
"step": 7080,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67609491,
"epoch": 5.55,
"learning_rate": 6.022933616648021e-05,
"loss": 1.06678152,
"memory(GiB)": 85.12,
"step": 7085,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67319765,
"epoch": 5.56,
"learning_rate": 6.017647817266236e-05,
"loss": 1.06721945,
"memory(GiB)": 85.12,
"step": 7090,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67800503,
"epoch": 5.56,
"learning_rate": 6.012360831159565e-05,
"loss": 1.04036112,
"memory(GiB)": 85.12,
"step": 7095,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.6719317,
"epoch": 5.56,
"learning_rate": 6.007072664493395e-05,
"loss": 1.07102108,
"memory(GiB)": 85.12,
"step": 7100,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67592273,
"epoch": 5.57,
"learning_rate": 6.0017833234344963e-05,
"loss": 1.0262372,
"memory(GiB)": 85.12,
"step": 7105,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68355751,
"epoch": 5.57,
"learning_rate": 5.996492814151011e-05,
"loss": 1.02106323,
"memory(GiB)": 85.12,
"step": 7110,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68380432,
"epoch": 5.58,
"learning_rate": 5.991201142812436e-05,
"loss": 1.03930197,
"memory(GiB)": 85.12,
"step": 7115,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67916827,
"epoch": 5.58,
"learning_rate": 5.98590831558963e-05,
"loss": 1.03912115,
"memory(GiB)": 85.12,
"step": 7120,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68918715,
"epoch": 5.58,
"learning_rate": 5.980614338654794e-05,
"loss": 1.01146679,
"memory(GiB)": 85.12,
"step": 7125,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.69327388,
"epoch": 5.59,
"learning_rate": 5.975319218181474e-05,
"loss": 1.01032009,
"memory(GiB)": 85.12,
"step": 7130,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.66974945,
"epoch": 5.59,
"learning_rate": 5.970022960344549e-05,
"loss": 1.06768141,
"memory(GiB)": 85.12,
"step": 7135,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67780514,
"epoch": 5.6,
"learning_rate": 5.9647255713202234e-05,
"loss": 1.064604,
"memory(GiB)": 85.12,
"step": 7140,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.67822738,
"epoch": 5.6,
"learning_rate": 5.959427057286019e-05,
"loss": 1.04480143,
"memory(GiB)": 85.12,
"step": 7145,
"train_speed(iter/s)": 0.035269
},
{
"acc": 0.68090706,
"epoch": 5.6,
"learning_rate": 5.954127424420773e-05,
"loss": 1.03140087,
"memory(GiB)": 85.12,
"step": 7150,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.68836522,
"epoch": 5.61,
"learning_rate": 5.9488266789046255e-05,
"loss": 1.02142658,
"memory(GiB)": 85.12,
"step": 7155,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67817101,
"epoch": 5.61,
"learning_rate": 5.943524826919013e-05,
"loss": 1.04541025,
"memory(GiB)": 85.12,
"step": 7160,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.67876797,
"epoch": 5.62,
"learning_rate": 5.9382218746466634e-05,
"loss": 1.07195463,
"memory(GiB)": 85.12,
"step": 7165,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67407222,
"epoch": 5.62,
"learning_rate": 5.93291782827159e-05,
"loss": 1.05578661,
"memory(GiB)": 85.12,
"step": 7170,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67716932,
"epoch": 5.62,
"learning_rate": 5.927612693979079e-05,
"loss": 1.0597784,
"memory(GiB)": 85.12,
"step": 7175,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.68023987,
"epoch": 5.63,
"learning_rate": 5.9223064779556846e-05,
"loss": 1.0568635,
"memory(GiB)": 85.12,
"step": 7180,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.67762904,
"epoch": 5.63,
"learning_rate": 5.916999186389227e-05,
"loss": 1.05888271,
"memory(GiB)": 85.12,
"step": 7185,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.68073959,
"epoch": 5.63,
"learning_rate": 5.911690825468774e-05,
"loss": 1.01983681,
"memory(GiB)": 85.12,
"step": 7190,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68627586,
"epoch": 5.64,
"learning_rate": 5.9063814013846475e-05,
"loss": 1.00290499,
"memory(GiB)": 85.12,
"step": 7195,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.68013487,
"epoch": 5.64,
"learning_rate": 5.901070920328402e-05,
"loss": 1.06031981,
"memory(GiB)": 85.12,
"step": 7200,
"train_speed(iter/s)": 0.03527
},
{
"acc": 0.68101711,
"epoch": 5.65,
"learning_rate": 5.89575938849283e-05,
"loss": 1.05255985,
"memory(GiB)": 85.12,
"step": 7205,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68656335,
"epoch": 5.65,
"learning_rate": 5.8904468120719506e-05,
"loss": 1.04291906,
"memory(GiB)": 85.12,
"step": 7210,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.6808826,
"epoch": 5.65,
"learning_rate": 5.885133197260993e-05,
"loss": 1.05651436,
"memory(GiB)": 85.12,
"step": 7215,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68361244,
"epoch": 5.66,
"learning_rate": 5.879818550256405e-05,
"loss": 1.05124416,
"memory(GiB)": 85.12,
"step": 7220,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.68785329,
"epoch": 5.66,
"learning_rate": 5.874502877255835e-05,
"loss": 1.00955372,
"memory(GiB)": 85.12,
"step": 7225,
"train_speed(iter/s)": 0.035271
},
{
"acc": 0.67910919,
"epoch": 5.67,
"learning_rate": 5.8691861844581295e-05,
"loss": 1.06434069,
"memory(GiB)": 85.12,
"step": 7230,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.68168969,
"epoch": 5.67,
"learning_rate": 5.8638684780633216e-05,
"loss": 1.03852062,
"memory(GiB)": 85.12,
"step": 7235,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.69188733,
"epoch": 5.67,
"learning_rate": 5.858549764272629e-05,
"loss": 1.01275368,
"memory(GiB)": 85.12,
"step": 7240,
"train_speed(iter/s)": 0.035272
},
{
"acc": 0.68451958,
"epoch": 5.68,
"learning_rate": 5.853230049288443e-05,
"loss": 1.02619667,
"memory(GiB)": 85.12,
"step": 7245,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.67939177,
"epoch": 5.68,
"learning_rate": 5.847909339314322e-05,
"loss": 1.04042921,
"memory(GiB)": 85.12,
"step": 7250,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.66912894,
"epoch": 5.69,
"learning_rate": 5.842587640554986e-05,
"loss": 1.08952456,
"memory(GiB)": 85.12,
"step": 7255,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.68664956,
"epoch": 5.69,
"learning_rate": 5.8372649592163056e-05,
"loss": 1.02034445,
"memory(GiB)": 85.12,
"step": 7260,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.68716116,
"epoch": 5.69,
"learning_rate": 5.8319413015052993e-05,
"loss": 1.017033,
"memory(GiB)": 85.12,
"step": 7265,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.66364636,
"epoch": 5.7,
"learning_rate": 5.826616673630125e-05,
"loss": 1.09431334,
"memory(GiB)": 85.12,
"step": 7270,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67621994,
"epoch": 5.7,
"learning_rate": 5.821291081800071e-05,
"loss": 1.07470884,
"memory(GiB)": 85.12,
"step": 7275,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.6812994,
"epoch": 5.71,
"learning_rate": 5.8159645322255475e-05,
"loss": 1.07116871,
"memory(GiB)": 85.12,
"step": 7280,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.684375,
"epoch": 5.71,
"learning_rate": 5.810637031118086e-05,
"loss": 1.02568693,
"memory(GiB)": 85.12,
"step": 7285,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68658953,
"epoch": 5.71,
"learning_rate": 5.805308584690321e-05,
"loss": 1.04678984,
"memory(GiB)": 85.12,
"step": 7290,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.69045858,
"epoch": 5.72,
"learning_rate": 5.799979199155998e-05,
"loss": 1.01404943,
"memory(GiB)": 85.12,
"step": 7295,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68868189,
"epoch": 5.72,
"learning_rate": 5.794648880729952e-05,
"loss": 1.01892052,
"memory(GiB)": 85.12,
"step": 7300,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68861918,
"epoch": 5.72,
"learning_rate": 5.7893176356281056e-05,
"loss": 1.0252737,
"memory(GiB)": 85.12,
"step": 7305,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68878713,
"epoch": 5.73,
"learning_rate": 5.7839854700674655e-05,
"loss": 1.01528711,
"memory(GiB)": 85.12,
"step": 7310,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67168713,
"epoch": 5.73,
"learning_rate": 5.778652390266107e-05,
"loss": 1.0582777,
"memory(GiB)": 85.12,
"step": 7315,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68114805,
"epoch": 5.74,
"learning_rate": 5.773318402443177e-05,
"loss": 1.05835133,
"memory(GiB)": 85.12,
"step": 7320,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67773662,
"epoch": 5.74,
"learning_rate": 5.767983512818877e-05,
"loss": 1.04364738,
"memory(GiB)": 85.12,
"step": 7325,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.68714108,
"epoch": 5.74,
"learning_rate": 5.762647727614462e-05,
"loss": 1.0286314,
"memory(GiB)": 85.12,
"step": 7330,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.69304905,
"epoch": 5.75,
"learning_rate": 5.757311053052232e-05,
"loss": 1.0080122,
"memory(GiB)": 85.12,
"step": 7335,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68668628,
"epoch": 5.75,
"learning_rate": 5.7519734953555225e-05,
"loss": 1.04978542,
"memory(GiB)": 85.12,
"step": 7340,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68079734,
"epoch": 5.76,
"learning_rate": 5.7466350607486994e-05,
"loss": 1.03145504,
"memory(GiB)": 85.12,
"step": 7345,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.6759232,
"epoch": 5.76,
"learning_rate": 5.7412957554571535e-05,
"loss": 1.04577227,
"memory(GiB)": 85.12,
"step": 7350,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67788,
"epoch": 5.76,
"learning_rate": 5.7359555857072865e-05,
"loss": 1.01738691,
"memory(GiB)": 85.12,
"step": 7355,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67940884,
"epoch": 5.77,
"learning_rate": 5.730614557726509e-05,
"loss": 1.0438838,
"memory(GiB)": 85.12,
"step": 7360,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67440863,
"epoch": 5.77,
"learning_rate": 5.725272677743238e-05,
"loss": 1.04039993,
"memory(GiB)": 85.12,
"step": 7365,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.66977754,
"epoch": 5.78,
"learning_rate": 5.719929951986875e-05,
"loss": 1.0763092,
"memory(GiB)": 85.12,
"step": 7370,
"train_speed(iter/s)": 0.035278
},
{
"acc": 0.68106508,
"epoch": 5.78,
"learning_rate": 5.71458638668782e-05,
"loss": 1.05014811,
"memory(GiB)": 85.12,
"step": 7375,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.68356209,
"epoch": 5.78,
"learning_rate": 5.7092419880774384e-05,
"loss": 1.04596844,
"memory(GiB)": 85.12,
"step": 7380,
"train_speed(iter/s)": 0.035279
},
{
"acc": 0.67028294,
"epoch": 5.79,
"learning_rate": 5.7038967623880766e-05,
"loss": 1.07842445,
"memory(GiB)": 85.12,
"step": 7385,
"train_speed(iter/s)": 0.03528
},
{
"acc": 0.69012942,
"epoch": 5.79,
"learning_rate": 5.698550715853041e-05,
"loss": 1.01788101,
"memory(GiB)": 85.12,
"step": 7390,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68352227,
"epoch": 5.8,
"learning_rate": 5.6932038547065994e-05,
"loss": 1.02470961,
"memory(GiB)": 85.12,
"step": 7395,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68133144,
"epoch": 5.8,
"learning_rate": 5.687856185183964e-05,
"loss": 1.00813093,
"memory(GiB)": 85.12,
"step": 7400,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67967892,
"epoch": 5.8,
"learning_rate": 5.682507713521297e-05,
"loss": 1.04251871,
"memory(GiB)": 85.12,
"step": 7405,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67965193,
"epoch": 5.81,
"learning_rate": 5.677158445955688e-05,
"loss": 1.04245062,
"memory(GiB)": 85.12,
"step": 7410,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68244319,
"epoch": 5.81,
"learning_rate": 5.6718083887251585e-05,
"loss": 1.04257526,
"memory(GiB)": 85.12,
"step": 7415,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.67110071,
"epoch": 5.82,
"learning_rate": 5.666457548068653e-05,
"loss": 1.04886589,
"memory(GiB)": 85.12,
"step": 7420,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68169503,
"epoch": 5.82,
"learning_rate": 5.661105930226027e-05,
"loss": 1.03461304,
"memory(GiB)": 85.12,
"step": 7425,
"train_speed(iter/s)": 0.035273
},
{
"acc": 0.68312588,
"epoch": 5.82,
"learning_rate": 5.65575354143804e-05,
"loss": 1.03809738,
"memory(GiB)": 85.12,
"step": 7430,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67332888,
"epoch": 5.83,
"learning_rate": 5.650400387946358e-05,
"loss": 1.04974604,
"memory(GiB)": 85.12,
"step": 7435,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67180338,
"epoch": 5.83,
"learning_rate": 5.6450464759935306e-05,
"loss": 1.07695293,
"memory(GiB)": 85.12,
"step": 7440,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67879071,
"epoch": 5.83,
"learning_rate": 5.6396918118229954e-05,
"loss": 1.07525129,
"memory(GiB)": 85.12,
"step": 7445,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.69320011,
"epoch": 5.84,
"learning_rate": 5.63433640167907e-05,
"loss": 0.99239464,
"memory(GiB)": 85.12,
"step": 7450,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.68558059,
"epoch": 5.84,
"learning_rate": 5.628980251806937e-05,
"loss": 1.02958326,
"memory(GiB)": 85.12,
"step": 7455,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68014541,
"epoch": 5.85,
"learning_rate": 5.6236233684526416e-05,
"loss": 1.01991968,
"memory(GiB)": 85.12,
"step": 7460,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.68883257,
"epoch": 5.85,
"learning_rate": 5.6182657578630896e-05,
"loss": 1.00945797,
"memory(GiB)": 85.12,
"step": 7465,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68561568,
"epoch": 5.85,
"learning_rate": 5.6129074262860304e-05,
"loss": 1.03655548,
"memory(GiB)": 85.12,
"step": 7470,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68775282,
"epoch": 5.86,
"learning_rate": 5.607548379970056e-05,
"loss": 1.00257397,
"memory(GiB)": 85.12,
"step": 7475,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.68747077,
"epoch": 5.86,
"learning_rate": 5.602188625164591e-05,
"loss": 1.02046089,
"memory(GiB)": 85.12,
"step": 7480,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.67813239,
"epoch": 5.87,
"learning_rate": 5.5968281681198864e-05,
"loss": 1.02797394,
"memory(GiB)": 85.12,
"step": 7485,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.68199043,
"epoch": 5.87,
"learning_rate": 5.591467015087012e-05,
"loss": 1.03519802,
"memory(GiB)": 85.12,
"step": 7490,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67812767,
"epoch": 5.87,
"learning_rate": 5.5861051723178494e-05,
"loss": 1.052808,
"memory(GiB)": 85.12,
"step": 7495,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68303499,
"epoch": 5.88,
"learning_rate": 5.580742646065085e-05,
"loss": 1.02687483,
"memory(GiB)": 85.12,
"step": 7500,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.67364564,
"epoch": 5.88,
"learning_rate": 5.575379442582203e-05,
"loss": 1.05254545,
"memory(GiB)": 85.12,
"step": 7505,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67409277,
"epoch": 5.89,
"learning_rate": 5.570015568123475e-05,
"loss": 1.04328775,
"memory(GiB)": 85.12,
"step": 7510,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.684273,
"epoch": 5.89,
"learning_rate": 5.564651028943956e-05,
"loss": 1.04421234,
"memory(GiB)": 85.12,
"step": 7515,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68276038,
"epoch": 5.89,
"learning_rate": 5.559285831299477e-05,
"loss": 1.05293627,
"memory(GiB)": 85.12,
"step": 7520,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68299809,
"epoch": 5.9,
"learning_rate": 5.553919981446635e-05,
"loss": 1.0334218,
"memory(GiB)": 85.12,
"step": 7525,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68169241,
"epoch": 5.9,
"learning_rate": 5.548553485642789e-05,
"loss": 1.03117504,
"memory(GiB)": 85.12,
"step": 7530,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.69697251,
"epoch": 5.91,
"learning_rate": 5.543186350146053e-05,
"loss": 0.98402576,
"memory(GiB)": 85.12,
"step": 7535,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.68771691,
"epoch": 5.91,
"learning_rate": 5.537818581215285e-05,
"loss": 1.01845856,
"memory(GiB)": 85.12,
"step": 7540,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67708225,
"epoch": 5.91,
"learning_rate": 5.53245018511008e-05,
"loss": 1.03625803,
"memory(GiB)": 85.12,
"step": 7545,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68522434,
"epoch": 5.92,
"learning_rate": 5.527081168090767e-05,
"loss": 1.0206852,
"memory(GiB)": 85.12,
"step": 7550,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.69737639,
"epoch": 5.92,
"learning_rate": 5.521711536418398e-05,
"loss": 0.96795731,
"memory(GiB)": 85.12,
"step": 7555,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68795233,
"epoch": 5.92,
"learning_rate": 5.5163412963547425e-05,
"loss": 1.00420456,
"memory(GiB)": 85.12,
"step": 7560,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67716355,
"epoch": 5.93,
"learning_rate": 5.5109704541622787e-05,
"loss": 1.05512428,
"memory(GiB)": 85.12,
"step": 7565,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.69076524,
"epoch": 5.93,
"learning_rate": 5.505599016104187e-05,
"loss": 0.99612122,
"memory(GiB)": 85.12,
"step": 7570,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.66488171,
"epoch": 5.94,
"learning_rate": 5.5002269884443433e-05,
"loss": 1.08279037,
"memory(GiB)": 85.12,
"step": 7575,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68213077,
"epoch": 5.94,
"learning_rate": 5.4948543774473105e-05,
"loss": 1.0349185,
"memory(GiB)": 85.12,
"step": 7580,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.67493806,
"epoch": 5.94,
"learning_rate": 5.4894811893783316e-05,
"loss": 1.06746645,
"memory(GiB)": 85.12,
"step": 7585,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67195811,
"epoch": 5.95,
"learning_rate": 5.484107430503322e-05,
"loss": 1.07974176,
"memory(GiB)": 85.12,
"step": 7590,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.68496284,
"epoch": 5.95,
"learning_rate": 5.4787331070888656e-05,
"loss": 1.03015051,
"memory(GiB)": 85.12,
"step": 7595,
"train_speed(iter/s)": 0.035277
},
{
"acc": 0.67029195,
"epoch": 5.96,
"learning_rate": 5.473358225402202e-05,
"loss": 1.07754288,
"memory(GiB)": 85.12,
"step": 7600,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.67144132,
"epoch": 5.96,
"learning_rate": 5.467982791711224e-05,
"loss": 1.04674873,
"memory(GiB)": 85.12,
"step": 7605,
"train_speed(iter/s)": 0.035276
},
{
"acc": 0.69102864,
"epoch": 5.96,
"learning_rate": 5.4626068122844634e-05,
"loss": 1.03087606,
"memory(GiB)": 85.12,
"step": 7610,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.68264012,
"epoch": 5.97,
"learning_rate": 5.4572302933910926e-05,
"loss": 1.03752575,
"memory(GiB)": 85.12,
"step": 7615,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68130126,
"epoch": 5.97,
"learning_rate": 5.451853241300913e-05,
"loss": 1.01153135,
"memory(GiB)": 85.12,
"step": 7620,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.66839213,
"epoch": 5.98,
"learning_rate": 5.446475662284346e-05,
"loss": 1.09095182,
"memory(GiB)": 85.12,
"step": 7625,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.67351193,
"epoch": 5.98,
"learning_rate": 5.4410975626124284e-05,
"loss": 1.07571201,
"memory(GiB)": 85.12,
"step": 7630,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.68798418,
"epoch": 5.98,
"learning_rate": 5.435718948556804e-05,
"loss": 1.00712776,
"memory(GiB)": 85.12,
"step": 7635,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.70076885,
"epoch": 5.99,
"learning_rate": 5.430339826389719e-05,
"loss": 0.9588829,
"memory(GiB)": 85.12,
"step": 7640,
"train_speed(iter/s)": 0.035274
},
{
"acc": 0.67537951,
"epoch": 5.99,
"learning_rate": 5.424960202384006e-05,
"loss": 1.05870562,
"memory(GiB)": 85.12,
"step": 7645,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.69180627,
"epoch": 6.0,
"learning_rate": 5.419580082813089e-05,
"loss": 1.00422449,
"memory(GiB)": 85.12,
"step": 7650,
"train_speed(iter/s)": 0.035275
},
{
"acc": 0.66602154,
"epoch": 6.0,
"learning_rate": 5.414199473950967e-05,
"loss": 1.09426003,
"memory(GiB)": 85.12,
"step": 7655,
"train_speed(iter/s)": 0.035276
},
{
"epoch": 6.0,
"eval_acc": 0.6978462309040822,
"eval_loss": 0.9628272652626038,
"eval_runtime": 84.9357,
"eval_samples_per_second": 1.095,
"eval_steps_per_second": 1.095,
"step": 7656
}
],
"logging_steps": 5,
"max_steps": 15312,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 1,
"total_flos": 3.3314641424613103e+22,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}