1025 lines
70 KiB
Plaintext

[#0] ReLU (32,) lr=1e-02 → top1=0.41, top3=0.65, top5=0.77
[#1] ReLU (32,) lr=5e-03 → top1=0.43, top3=0.66, top5=0.78
[#2] ReLU (32,) lr=1e-03 → top1=0.43, top3=0.67, top5=0.78
[#3] ReLU (32,) lr=5e-04 → top1=0.42, top3=0.66, top5=0.77
[#4] ReLU (32,) lr=1e-04 → top1=0.38, top3=0.62, top5=0.75
[#5] ReLU (64,) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#6] ReLU (64,) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#7] ReLU (64,) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#8] ReLU (64,) lr=5e-04 → top1=0.46, top3=0.69, top5=0.80
[#9] ReLU (64,) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#10] ReLU (128,) lr=1e-02 → top1=0.47, top3=0.71, top5=0.81
[#11] ReLU (128,) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#12] ReLU (128,) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#13] ReLU (128,) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#14] ReLU (128,) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#15] ReLU (256,) lr=1e-02 → top1=0.48, top3=0.72, top5=0.82
[#16] ReLU (256,) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#17] ReLU (256,) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#18] ReLU (256,) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#19] ReLU (256,) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#20] ReLU (32, 32) lr=1e-02 → top1=0.42, top3=0.66, top5=0.77
[#21] ReLU (32, 32) lr=5e-03 → top1=0.44, top3=0.68, top5=0.79
[#22] ReLU (32, 32) lr=1e-03 → top1=0.44, top3=0.68, top5=0.79
[#23] ReLU (32, 32) lr=5e-04 → top1=0.44, top3=0.68, top5=0.79
[#24] ReLU (32, 32) lr=1e-04 → top1=0.39, top3=0.63, top5=0.76
[#25] ReLU (32, 64) lr=1e-02 → top1=0.43, top3=0.67, top5=0.79
[#26] ReLU (32, 64) lr=5e-03 → top1=0.45, top3=0.69, top5=0.80
[#27] ReLU (32, 64) lr=1e-03 → top1=0.46, top3=0.69, top5=0.81
[#28] ReLU (32, 64) lr=5e-04 → top1=0.45, top3=0.69, top5=0.80
[#29] ReLU (32, 64) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#30] ReLU (32, 128) lr=1e-02 → top1=0.44, top3=0.68, top5=0.80
[#31] ReLU (32, 128) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#32] ReLU (32, 128) lr=1e-03 → top1=0.48, top3=0.71, top5=0.82
[#33] ReLU (32, 128) lr=5e-04 → top1=0.47, top3=0.71, top5=0.81
[#34] ReLU (32, 128) lr=1e-04 → top1=0.42, top3=0.67, top5=0.78
[#35] ReLU (32, 256) lr=1e-02 → top1=0.44, top3=0.69, top5=0.80
[#36] ReLU (32, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#37] ReLU (32, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#38] ReLU (32, 256) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#39] ReLU (32, 256) lr=1e-04 → top1=0.43, top3=0.67, top5=0.79
[#40] ReLU (64, 32) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#41] ReLU (64, 32) lr=5e-03 → top1=0.47, top3=0.70, top5=0.81
[#42] ReLU (64, 32) lr=1e-03 → top1=0.48, top3=0.70, top5=0.81
[#43] ReLU (64, 32) lr=5e-04 → top1=0.47, top3=0.70, top5=0.81
[#44] ReLU (64, 32) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#45] ReLU (64, 64) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#46] ReLU (64, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#47] ReLU (64, 64) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#48] ReLU (64, 64) lr=5e-04 → top1=0.48, top3=0.71, top5=0.82
[#49] ReLU (64, 64) lr=1e-04 → top1=0.43, top3=0.67, top5=0.78
[#50] ReLU (64, 128) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#51] ReLU (64, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#52] ReLU (64, 128) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#53] ReLU (64, 128) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#54] ReLU (64, 128) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#55] ReLU (64, 256) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#56] ReLU (64, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#57] ReLU (64, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#58] ReLU (64, 256) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#59] ReLU (64, 256) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#60] ReLU (128, 32) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#61] ReLU (128, 32) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#62] ReLU (128, 32) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#63] ReLU (128, 32) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#64] ReLU (128, 32) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#65] ReLU (128, 64) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#66] ReLU (128, 64) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#67] ReLU (128, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#68] ReLU (128, 64) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#69] ReLU (128, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#70] ReLU (128, 128) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#71] ReLU (128, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#72] ReLU (128, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#73] ReLU (128, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#74] ReLU (128, 128) lr=1e-04 → top1=0.47, top3=0.70, top5=0.80
[#75] ReLU (128, 256) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#76] ReLU (128, 256) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#77] ReLU (128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#78] ReLU (128, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#79] ReLU (128, 256) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#80] ReLU (256, 32) lr=1e-02 → top1=0.48, top3=0.70, top5=0.80
[#81] ReLU (256, 32) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#82] ReLU (256, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#83] ReLU (256, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#84] ReLU (256, 32) lr=1e-04 → top1=0.47, top3=0.70, top5=0.80
[#85] ReLU (256, 64) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#86] ReLU (256, 64) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#87] ReLU (256, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#88] ReLU (256, 64) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#89] ReLU (256, 64) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#90] ReLU (256, 128) lr=1e-02 → top1=0.48, top3=0.71, top5=0.81
[#91] ReLU (256, 128) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#92] ReLU (256, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#93] ReLU (256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#94] ReLU (256, 128) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#95] ReLU (256, 256) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#96] ReLU (256, 256) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#97] ReLU (256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.84
[#98] ReLU (256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#99] ReLU (256, 256) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#100] ReLU (32, 32, 32) lr=1e-02 → top1=0.41, top3=0.65, top5=0.77
[#101] ReLU (32, 32, 32) lr=5e-03 → top1=0.44, top3=0.68, top5=0.79
[#102] ReLU (32, 32, 32) lr=1e-03 → top1=0.45, top3=0.69, top5=0.80
[#103] ReLU (32, 32, 32) lr=5e-04 → top1=0.44, top3=0.68, top5=0.80
[#104] ReLU (32, 32, 32) lr=1e-04 → top1=0.40, top3=0.64, top5=0.76
[#105] ReLU (32, 32, 64) lr=1e-02 → top1=0.41, top3=0.65, top5=0.77
[#106] ReLU (32, 32, 64) lr=5e-03 → top1=0.45, top3=0.69, top5=0.80
[#107] ReLU (32, 32, 64) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#108] ReLU (32, 32, 64) lr=5e-04 → top1=0.46, top3=0.69, top5=0.80
[#109] ReLU (32, 32, 64) lr=1e-04 → top1=0.40, top3=0.64, top5=0.76
[#110] ReLU (32, 32, 128) lr=1e-02 → top1=0.40, top3=0.65, top5=0.77
[#111] ReLU (32, 32, 128) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#112] ReLU (32, 32, 128) lr=1e-03 → top1=0.48, top3=0.72, top5=0.82
[#113] ReLU (32, 32, 128) lr=5e-04 → top1=0.47, top3=0.70, top5=0.81
[#114] ReLU (32, 32, 128) lr=1e-04 → top1=0.41, top3=0.65, top5=0.78
[#115] ReLU (32, 32, 256) lr=1e-02 → top1=0.39, top3=0.64, top5=0.76
[#116] ReLU (32, 32, 256) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#117] ReLU (32, 32, 256) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#118] ReLU (32, 32, 256) lr=5e-04 → top1=0.48, top3=0.71, top5=0.82
[#119] ReLU (32, 32, 256) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#120] ReLU (32, 64, 32) lr=1e-02 → top1=0.41, top3=0.66, top5=0.77
[#121] ReLU (32, 64, 32) lr=5e-03 → top1=0.45, top3=0.68, top5=0.79
[#122] ReLU (32, 64, 32) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#123] ReLU (32, 64, 32) lr=5e-04 → top1=0.46, top3=0.70, top5=0.80
[#124] ReLU (32, 64, 32) lr=1e-04 → top1=0.40, top3=0.65, top5=0.77
[#125] ReLU (32, 64, 64) lr=1e-02 → top1=0.41, top3=0.66, top5=0.78
[#126] ReLU (32, 64, 64) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#127] ReLU (32, 64, 64) lr=1e-03 → top1=0.48, top3=0.71, top5=0.82
[#128] ReLU (32, 64, 64) lr=5e-04 → top1=0.47, top3=0.70, top5=0.81
[#129] ReLU (32, 64, 64) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#130] ReLU (32, 64, 128) lr=1e-02 → top1=0.40, top3=0.64, top5=0.76
[#131] ReLU (32, 64, 128) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#132] ReLU (32, 64, 128) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#133] ReLU (32, 64, 128) lr=5e-04 → top1=0.48, top3=0.71, top5=0.82
[#134] ReLU (32, 64, 128) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#135] ReLU (32, 64, 256) lr=1e-02 → top1=0.38, top3=0.63, top5=0.76
[#136] ReLU (32, 64, 256) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#137] ReLU (32, 64, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#138] ReLU (32, 64, 256) lr=5e-04 → top1=0.49, top3=0.72, top5=0.83
[#139] ReLU (32, 64, 256) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#140] ReLU (32, 128, 32) lr=1e-02 → top1=0.43, top3=0.66, top5=0.77
[#141] ReLU (32, 128, 32) lr=5e-03 → top1=0.47, top3=0.70, top5=0.81
[#142] ReLU (32, 128, 32) lr=1e-03 → top1=0.48, top3=0.72, top5=0.82
[#143] ReLU (32, 128, 32) lr=5e-04 → top1=0.47, top3=0.71, top5=0.81
[#144] ReLU (32, 128, 32) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#145] ReLU (32, 128, 64) lr=1e-02 → top1=0.42, top3=0.67, top5=0.78
[#146] ReLU (32, 128, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#147] ReLU (32, 128, 64) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#148] ReLU (32, 128, 64) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#149] ReLU (32, 128, 64) lr=1e-04 → top1=0.43, top3=0.67, top5=0.78
[#150] ReLU (32, 128, 128) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#151] ReLU (32, 128, 128) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#152] ReLU (32, 128, 128) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#153] ReLU (32, 128, 128) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#154] ReLU (32, 128, 128) lr=1e-04 → top1=0.43, top3=0.68, top5=0.79
[#155] ReLU (32, 128, 256) lr=1e-02 → top1=0.39, top3=0.64, top5=0.76
[#156] ReLU (32, 128, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#157] ReLU (32, 128, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#158] ReLU (32, 128, 256) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#159] ReLU (32, 128, 256) lr=1e-04 → top1=0.45, top3=0.69, top5=0.80
[#160] ReLU (32, 256, 32) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#161] ReLU (32, 256, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#162] ReLU (32, 256, 32) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#163] ReLU (32, 256, 32) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#164] ReLU (32, 256, 32) lr=1e-04 → top1=0.43, top3=0.67, top5=0.78
[#165] ReLU (32, 256, 64) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#166] ReLU (32, 256, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#167] ReLU (32, 256, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#168] ReLU (32, 256, 64) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#169] ReLU (32, 256, 64) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#170] ReLU (32, 256, 128) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#171] ReLU (32, 256, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#172] ReLU (32, 256, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#173] ReLU (32, 256, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#174] ReLU (32, 256, 128) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#175] ReLU (32, 256, 256) lr=1e-02 → top1=0.41, top3=0.64, top5=0.77
[#176] ReLU (32, 256, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#177] ReLU (32, 256, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#178] ReLU (32, 256, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#179] ReLU (32, 256, 256) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#180] ReLU (64, 32, 32) lr=1e-02 → top1=0.43, top3=0.66, top5=0.78
[#181] ReLU (64, 32, 32) lr=5e-03 → top1=0.47, top3=0.70, top5=0.80
[#182] ReLU (64, 32, 32) lr=1e-03 → top1=0.48, top3=0.71, top5=0.81
[#183] ReLU (64, 32, 32) lr=5e-04 → top1=0.47, top3=0.70, top5=0.81
[#184] ReLU (64, 32, 32) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#185] ReLU (64, 32, 64) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#186] ReLU (64, 32, 64) lr=5e-03 → top1=0.47, top3=0.70, top5=0.81
[#187] ReLU (64, 32, 64) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#188] ReLU (64, 32, 64) lr=5e-04 → top1=0.47, top3=0.71, top5=0.81
[#189] ReLU (64, 32, 64) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#190] ReLU (64, 32, 128) lr=1e-02 → top1=0.41, top3=0.65, top5=0.77
[#191] ReLU (64, 32, 128) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#192] ReLU (64, 32, 128) lr=1e-03 → top1=0.49, top3=0.73, top5=0.82
[#193] ReLU (64, 32, 128) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#194] ReLU (64, 32, 128) lr=1e-04 → top1=0.43, top3=0.67, top5=0.78
[#195] ReLU (64, 32, 256) lr=1e-02 → top1=0.40, top3=0.65, top5=0.77
[#196] ReLU (64, 32, 256) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#197] ReLU (64, 32, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#198] ReLU (64, 32, 256) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#199] ReLU (64, 32, 256) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#200] ReLU (64, 64, 32) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#201] ReLU (64, 64, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#202] ReLU (64, 64, 32) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#203] ReLU (64, 64, 32) lr=5e-04 → top1=0.49, top3=0.71, top5=0.81
[#204] ReLU (64, 64, 32) lr=1e-04 → top1=0.43, top3=0.67, top5=0.78
[#205] ReLU (64, 64, 64) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#206] ReLU (64, 64, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#207] ReLU (64, 64, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#208] ReLU (64, 64, 64) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#209] ReLU (64, 64, 64) lr=1e-04 → top1=0.43, top3=0.67, top5=0.79
[#210] ReLU (64, 64, 128) lr=1e-02 → top1=0.41, top3=0.65, top5=0.77
[#211] ReLU (64, 64, 128) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#212] ReLU (64, 64, 128) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#213] ReLU (64, 64, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#214] ReLU (64, 64, 128) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#215] ReLU (64, 64, 256) lr=1e-02 → top1=0.41, top3=0.65, top5=0.77
[#216] ReLU (64, 64, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#217] ReLU (64, 64, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#218] ReLU (64, 64, 256) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#219] ReLU (64, 64, 256) lr=1e-04 → top1=0.45, top3=0.69, top5=0.80
[#220] ReLU (64, 128, 32) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#221] ReLU (64, 128, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#222] ReLU (64, 128, 32) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#223] ReLU (64, 128, 32) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#224] ReLU (64, 128, 32) lr=1e-04 → top1=0.44, top3=0.67, top5=0.79
[#225] ReLU (64, 128, 64) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#226] ReLU (64, 128, 64) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#227] ReLU (64, 128, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#228] ReLU (64, 128, 64) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#229] ReLU (64, 128, 64) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#230] ReLU (64, 128, 128) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#231] ReLU (64, 128, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#232] ReLU (64, 128, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#233] ReLU (64, 128, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#234] ReLU (64, 128, 128) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#235] ReLU (64, 128, 256) lr=1e-02 → top1=0.38, top3=0.61, top5=0.74
[#236] ReLU (64, 128, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#237] ReLU (64, 128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#238] ReLU (64, 128, 256) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#239] ReLU (64, 128, 256) lr=1e-04 → top1=0.46, top3=0.70, top5=0.81
[#240] ReLU (64, 256, 32) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#241] ReLU (64, 256, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#242] ReLU (64, 256, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#243] ReLU (64, 256, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#244] ReLU (64, 256, 32) lr=1e-04 → top1=0.45, top3=0.68, top5=0.80
[#245] ReLU (64, 256, 64) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#246] ReLU (64, 256, 64) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#247] ReLU (64, 256, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#248] ReLU (64, 256, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#249] ReLU (64, 256, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#250] ReLU (64, 256, 128) lr=1e-02 → top1=0.43, top3=0.66, top5=0.78
[#251] ReLU (64, 256, 128) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#252] ReLU (64, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#253] ReLU (64, 256, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#254] ReLU (64, 256, 128) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#255] ReLU (64, 256, 256) lr=1e-02 → top1=0.41, top3=0.65, top5=0.76
[#256] ReLU (64, 256, 256) lr=5e-03 → top1=0.49, top3=0.73, top5=0.82
[#257] ReLU (64, 256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#258] ReLU (64, 256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#259] ReLU (64, 256, 256) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#260] ReLU (128, 32, 32) lr=1e-02 → top1=0.45, top3=0.67, top5=0.78
[#261] ReLU (128, 32, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#262] ReLU (128, 32, 32) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#263] ReLU (128, 32, 32) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#264] ReLU (128, 32, 32) lr=1e-04 → top1=0.44, top3=0.67, top5=0.78
[#265] ReLU (128, 32, 64) lr=1e-02 → top1=0.44, top3=0.66, top5=0.78
[#266] ReLU (128, 32, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#267] ReLU (128, 32, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#268] ReLU (128, 32, 64) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#269] ReLU (128, 32, 64) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#270] ReLU (128, 32, 128) lr=1e-02 → top1=0.41, top3=0.64, top5=0.75
[#271] ReLU (128, 32, 128) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#272] ReLU (128, 32, 128) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#273] ReLU (128, 32, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#274] ReLU (128, 32, 128) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#275] ReLU (128, 32, 256) lr=1e-02 → top1=0.43, top3=0.66, top5=0.77
[#276] ReLU (128, 32, 256) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#277] ReLU (128, 32, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#278] ReLU (128, 32, 256) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#279] ReLU (128, 32, 256) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#280] ReLU (128, 64, 32) lr=1e-02 → top1=0.45, top3=0.67, top5=0.79
[#281] ReLU (128, 64, 32) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#282] ReLU (128, 64, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.82
[#283] ReLU (128, 64, 32) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#284] ReLU (128, 64, 32) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#285] ReLU (128, 64, 64) lr=1e-02 → top1=0.45, top3=0.67, top5=0.78
[#286] ReLU (128, 64, 64) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#287] ReLU (128, 64, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#288] ReLU (128, 64, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.82
[#289] ReLU (128, 64, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#290] ReLU (128, 64, 128) lr=1e-02 → top1=0.43, top3=0.66, top5=0.77
[#291] ReLU (128, 64, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#292] ReLU (128, 64, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#293] ReLU (128, 64, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#294] ReLU (128, 64, 128) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#295] ReLU (128, 64, 256) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#296] ReLU (128, 64, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#297] ReLU (128, 64, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#298] ReLU (128, 64, 256) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#299] ReLU (128, 64, 256) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#300] ReLU (128, 128, 32) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#301] ReLU (128, 128, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#302] ReLU (128, 128, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#303] ReLU (128, 128, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#304] ReLU (128, 128, 32) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#305] ReLU (128, 128, 64) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#306] ReLU (128, 128, 64) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#307] ReLU (128, 128, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#308] ReLU (128, 128, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#309] ReLU (128, 128, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#310] ReLU (128, 128, 128) lr=1e-02 → top1=0.44, top3=0.66, top5=0.77
[#311] ReLU (128, 128, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#312] ReLU (128, 128, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#313] ReLU (128, 128, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#314] ReLU (128, 128, 128) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#315] ReLU (128, 128, 256) lr=1e-02 → top1=0.42, top3=0.66, top5=0.77
[#316] ReLU (128, 128, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#317] ReLU (128, 128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#318] ReLU (128, 128, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#319] ReLU (128, 128, 256) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#320] ReLU (128, 256, 32) lr=1e-02 → top1=0.46, top3=0.68, top5=0.78
[#321] ReLU (128, 256, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#322] ReLU (128, 256, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#323] ReLU (128, 256, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#324] ReLU (128, 256, 32) lr=1e-04 → top1=0.47, top3=0.70, top5=0.80
[#325] ReLU (128, 256, 64) lr=1e-02 → top1=0.45, top3=0.68, top5=0.78
[#326] ReLU (128, 256, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#327] ReLU (128, 256, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#328] ReLU (128, 256, 64) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#329] ReLU (128, 256, 64) lr=1e-04 → top1=0.48, top3=0.70, top5=0.81
[#330] ReLU (128, 256, 128) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#331] ReLU (128, 256, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#332] ReLU (128, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#333] ReLU (128, 256, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#334] ReLU (128, 256, 128) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#335] ReLU (128, 256, 256) lr=1e-02 → top1=0.38, top3=0.61, top5=0.73
[#336] ReLU (128, 256, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#337] ReLU (128, 256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#338] ReLU (128, 256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#339] ReLU (128, 256, 256) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#340] ReLU (256, 32, 32) lr=1e-02 → top1=0.46, top3=0.68, top5=0.79
[#341] ReLU (256, 32, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#342] ReLU (256, 32, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#343] ReLU (256, 32, 32) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#344] ReLU (256, 32, 32) lr=1e-04 → top1=0.47, top3=0.69, top5=0.80
[#345] ReLU (256, 32, 64) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#346] ReLU (256, 32, 64) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#347] ReLU (256, 32, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#348] ReLU (256, 32, 64) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#349] ReLU (256, 32, 64) lr=1e-04 → top1=0.46, top3=0.70, top5=0.80
[#350] ReLU (256, 32, 128) lr=1e-02 → top1=0.44, top3=0.66, top5=0.77
[#351] ReLU (256, 32, 128) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#352] ReLU (256, 32, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#353] ReLU (256, 32, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#354] ReLU (256, 32, 128) lr=1e-04 → top1=0.48, top3=0.71, top5=0.80
[#355] ReLU (256, 32, 256) lr=1e-02 → top1=0.43, top3=0.66, top5=0.77
[#356] ReLU (256, 32, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#357] ReLU (256, 32, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#358] ReLU (256, 32, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#359] ReLU (256, 32, 256) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#360] ReLU (256, 64, 32) lr=1e-02 → top1=0.47, top3=0.68, top5=0.79
[#361] ReLU (256, 64, 32) lr=5e-03 → top1=0.51, top3=0.72, top5=0.82
[#362] ReLU (256, 64, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#363] ReLU (256, 64, 32) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#364] ReLU (256, 64, 32) lr=1e-04 → top1=0.48, top3=0.70, top5=0.81
[#365] ReLU (256, 64, 64) lr=1e-02 → top1=0.45, top3=0.67, top5=0.78
[#366] ReLU (256, 64, 64) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#367] ReLU (256, 64, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#368] ReLU (256, 64, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#369] ReLU (256, 64, 64) lr=1e-04 → top1=0.48, top3=0.70, top5=0.81
[#370] ReLU (256, 64, 128) lr=1e-02 → top1=0.45, top3=0.68, top5=0.78
[#371] ReLU (256, 64, 128) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#372] ReLU (256, 64, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#373] ReLU (256, 64, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#374] ReLU (256, 64, 128) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#375] ReLU (256, 64, 256) lr=1e-02 → top1=0.42, top3=0.65, top5=0.76
[#376] ReLU (256, 64, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#377] ReLU (256, 64, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#378] ReLU (256, 64, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#379] ReLU (256, 64, 256) lr=1e-04 → top1=0.49, top3=0.71, top5=0.82
[#380] ReLU (256, 128, 32) lr=1e-02 → top1=0.47, top3=0.69, top5=0.79
[#381] ReLU (256, 128, 32) lr=5e-03 → top1=0.51, top3=0.73, top5=0.82
[#382] ReLU (256, 128, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#383] ReLU (256, 128, 32) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#384] ReLU (256, 128, 32) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#385] ReLU (256, 128, 64) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#386] ReLU (256, 128, 64) lr=5e-03 → top1=0.51, top3=0.73, top5=0.82
[#387] ReLU (256, 128, 64) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#388] ReLU (256, 128, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#389] ReLU (256, 128, 64) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#390] ReLU (256, 128, 128) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#391] ReLU (256, 128, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#392] ReLU (256, 128, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#393] ReLU (256, 128, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#394] ReLU (256, 128, 128) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#395] ReLU (256, 128, 256) lr=1e-02 → top1=0.42, top3=0.65, top5=0.77
[#396] ReLU (256, 128, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#397] ReLU (256, 128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#398] ReLU (256, 128, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#399] ReLU (256, 128, 256) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#400] ReLU (256, 256, 32) lr=1e-02 → top1=0.47, top3=0.69, top5=0.79
[#401] ReLU (256, 256, 32) lr=5e-03 → top1=0.51, top3=0.73, top5=0.82
[#402] ReLU (256, 256, 32) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#403] ReLU (256, 256, 32) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#404] ReLU (256, 256, 32) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#405] ReLU (256, 256, 64) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#406] ReLU (256, 256, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#407] ReLU (256, 256, 64) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#408] ReLU (256, 256, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#409] ReLU (256, 256, 64) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#410] ReLU (256, 256, 128) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#411] ReLU (256, 256, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#412] ReLU (256, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#413] ReLU (256, 256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#414] ReLU (256, 256, 128) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#415] ReLU (256, 256, 256) lr=1e-02 → top1=0.44, top3=0.66, top5=0.77
[#416] ReLU (256, 256, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#417] ReLU (256, 256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#418] ReLU (256, 256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#419] ReLU (256, 256, 256) lr=1e-04 → top1=0.50, top3=0.73, top5=0.83
[#420] GELU (32,) lr=1e-02 → top1=0.41, top3=0.66, top5=0.77
[#421] GELU (32,) lr=5e-03 → top1=0.42, top3=0.67, top5=0.79
[#422] GELU (32,) lr=1e-03 → top1=0.42, top3=0.67, top5=0.78
[#423] GELU (32,) lr=5e-04 → top1=0.43, top3=0.67, top5=0.78
[#424] GELU (32,) lr=1e-04 → top1=0.39, top3=0.63, top5=0.76
[#425] GELU (64,) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#426] GELU (64,) lr=5e-03 → top1=0.47, top3=0.70, top5=0.80
[#427] GELU (64,) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#428] GELU (64,) lr=5e-04 → top1=0.46, top3=0.70, top5=0.80
[#429] GELU (64,) lr=1e-04 → top1=0.41, top3=0.65, top5=0.78
[#430] GELU (128,) lr=1e-02 → top1=0.48, top3=0.71, top5=0.82
[#431] GELU (128,) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#432] GELU (128,) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#433] GELU (128,) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#434] GELU (128,) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#435] GELU (256,) lr=1e-02 → top1=0.50, top3=0.72, top5=0.82
[#436] GELU (256,) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#437] GELU (256,) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#438] GELU (256,) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#439] GELU (256,) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#440] GELU (32, 32) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#441] GELU (32, 32) lr=5e-03 → top1=0.45, top3=0.68, top5=0.79
[#442] GELU (32, 32) lr=1e-03 → top1=0.45, top3=0.68, top5=0.80
[#443] GELU (32, 32) lr=5e-04 → top1=0.44, top3=0.68, top5=0.79
[#444] GELU (32, 32) lr=1e-04 → top1=0.40, top3=0.64, top5=0.76
[#445] GELU (32, 64) lr=1e-02 → top1=0.43, top3=0.68, top5=0.79
[#446] GELU (32, 64) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#447] GELU (32, 64) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#448] GELU (32, 64) lr=5e-04 → top1=0.46, top3=0.69, top5=0.80
[#449] GELU (32, 64) lr=1e-04 → top1=0.40, top3=0.64, top5=0.77
[#450] GELU (32, 128) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#451] GELU (32, 128) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#452] GELU (32, 128) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#453] GELU (32, 128) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#454] GELU (32, 128) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#455] GELU (32, 256) lr=1e-02 → top1=0.45, top3=0.70, top5=0.81
[#456] GELU (32, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.81
[#457] GELU (32, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#458] GELU (32, 256) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#459] GELU (32, 256) lr=1e-04 → top1=0.43, top3=0.67, top5=0.79
[#460] GELU (64, 32) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#461] GELU (64, 32) lr=5e-03 → top1=0.48, top3=0.70, top5=0.81
[#462] GELU (64, 32) lr=1e-03 → top1=0.48, top3=0.71, top5=0.81
[#463] GELU (64, 32) lr=5e-04 → top1=0.48, top3=0.70, top5=0.81
[#464] GELU (64, 32) lr=1e-04 → top1=0.43, top3=0.66, top5=0.78
[#465] GELU (64, 64) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#466] GELU (64, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#467] GELU (64, 64) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#468] GELU (64, 64) lr=5e-04 → top1=0.49, top3=0.72, top5=0.81
[#469] GELU (64, 64) lr=1e-04 → top1=0.43, top3=0.67, top5=0.78
[#470] GELU (64, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.81
[#471] GELU (64, 128) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#472] GELU (64, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#473] GELU (64, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#474] GELU (64, 128) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#475] GELU (64, 256) lr=1e-02 → top1=0.47, top3=0.71, top5=0.81
[#476] GELU (64, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#477] GELU (64, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#478] GELU (64, 256) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#479] GELU (64, 256) lr=1e-04 → top1=0.46, top3=0.70, top5=0.80
[#480] GELU (128, 32) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#481] GELU (128, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#482] GELU (128, 32) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#483] GELU (128, 32) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#484] GELU (128, 32) lr=1e-04 → top1=0.45, top3=0.69, top5=0.79
[#485] GELU (128, 64) lr=1e-02 → top1=0.47, top3=0.70, top5=0.80
[#486] GELU (128, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#487] GELU (128, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#488] GELU (128, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#489] GELU (128, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#490] GELU (128, 128) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#491] GELU (128, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#492] GELU (128, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#493] GELU (128, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#494] GELU (128, 128) lr=1e-04 → top1=0.48, top3=0.70, top5=0.81
[#495] GELU (128, 256) lr=1e-02 → top1=0.47, top3=0.71, top5=0.81
[#496] GELU (128, 256) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#497] GELU (128, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#498] GELU (128, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#499] GELU (128, 256) lr=1e-04 → top1=0.48, top3=0.71, top5=0.82
[#500] GELU (256, 32) lr=1e-02 → top1=0.49, top3=0.71, top5=0.81
[#501] GELU (256, 32) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#502] GELU (256, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#503] GELU (256, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#504] GELU (256, 32) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#505] GELU (256, 64) lr=1e-02 → top1=0.48, top3=0.71, top5=0.81
[#506] GELU (256, 64) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#507] GELU (256, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#508] GELU (256, 64) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#509] GELU (256, 64) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#510] GELU (256, 128) lr=1e-02 → top1=0.48, top3=0.71, top5=0.81
[#511] GELU (256, 128) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#512] GELU (256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#513] GELU (256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#514] GELU (256, 128) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#515] GELU (256, 256) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#516] GELU (256, 256) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#517] GELU (256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#518] GELU (256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#519] GELU (256, 256) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#520] GELU (32, 32, 32) lr=1e-02 → top1=0.41, top3=0.66, top5=0.78
[#521] GELU (32, 32, 32) lr=5e-03 → top1=0.45, top3=0.68, top5=0.80
[#522] GELU (32, 32, 32) lr=1e-03 → top1=0.46, top3=0.69, top5=0.80
[#523] GELU (32, 32, 32) lr=5e-04 → top1=0.45, top3=0.69, top5=0.80
[#524] GELU (32, 32, 32) lr=1e-04 → top1=0.39, top3=0.64, top5=0.76
[#525] GELU (32, 32, 64) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#526] GELU (32, 32, 64) lr=5e-03 → top1=0.46, top3=0.69, top5=0.80
[#527] GELU (32, 32, 64) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#528] GELU (32, 32, 64) lr=5e-04 → top1=0.47, top3=0.70, top5=0.80
[#529] GELU (32, 32, 64) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#530] GELU (32, 32, 128) lr=1e-02 → top1=0.42, top3=0.67, top5=0.79
[#531] GELU (32, 32, 128) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#532] GELU (32, 32, 128) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#533] GELU (32, 32, 128) lr=5e-04 → top1=0.47, top3=0.71, top5=0.81
[#534] GELU (32, 32, 128) lr=1e-04 → top1=0.41, top3=0.66, top5=0.78
[#535] GELU (32, 32, 256) lr=1e-02 → top1=0.44, top3=0.69, top5=0.80
[#536] GELU (32, 32, 256) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#537] GELU (32, 32, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#538] GELU (32, 32, 256) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#539] GELU (32, 32, 256) lr=1e-04 → top1=0.44, top3=0.67, top5=0.79
[#540] GELU (32, 64, 32) lr=1e-02 → top1=0.42, top3=0.65, top5=0.77
[#541] GELU (32, 64, 32) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#542] GELU (32, 64, 32) lr=1e-03 → top1=0.47, top3=0.71, top5=0.81
[#543] GELU (32, 64, 32) lr=5e-04 → top1=0.47, top3=0.70, top5=0.80
[#544] GELU (32, 64, 32) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#545] GELU (32, 64, 64) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#546] GELU (32, 64, 64) lr=5e-03 → top1=0.47, top3=0.70, top5=0.81
[#547] GELU (32, 64, 64) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#548] GELU (32, 64, 64) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#549] GELU (32, 64, 64) lr=1e-04 → top1=0.42, top3=0.65, top5=0.77
[#550] GELU (32, 64, 128) lr=1e-02 → top1=0.44, top3=0.68, top5=0.80
[#551] GELU (32, 64, 128) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#552] GELU (32, 64, 128) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#553] GELU (32, 64, 128) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#554] GELU (32, 64, 128) lr=1e-04 → top1=0.42, top3=0.67, top5=0.79
[#555] GELU (32, 64, 256) lr=1e-02 → top1=0.44, top3=0.69, top5=0.80
[#556] GELU (32, 64, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#557] GELU (32, 64, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#558] GELU (32, 64, 256) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#559] GELU (32, 64, 256) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#560] GELU (32, 128, 32) lr=1e-02 → top1=0.42, top3=0.67, top5=0.78
[#561] GELU (32, 128, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#562] GELU (32, 128, 32) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#563] GELU (32, 128, 32) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#564] GELU (32, 128, 32) lr=1e-04 → top1=0.41, top3=0.66, top5=0.77
[#565] GELU (32, 128, 64) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#566] GELU (32, 128, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#567] GELU (32, 128, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#568] GELU (32, 128, 64) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#569] GELU (32, 128, 64) lr=1e-04 → top1=0.44, top3=0.67, top5=0.79
[#570] GELU (32, 128, 128) lr=1e-02 → top1=0.44, top3=0.68, top5=0.79
[#571] GELU (32, 128, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#572] GELU (32, 128, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#573] GELU (32, 128, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#574] GELU (32, 128, 128) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#575] GELU (32, 128, 256) lr=1e-02 → top1=0.44, top3=0.69, top5=0.80
[#576] GELU (32, 128, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#577] GELU (32, 128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#578] GELU (32, 128, 256) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#579] GELU (32, 128, 256) lr=1e-04 → top1=0.46, top3=0.70, top5=0.80
[#580] GELU (32, 256, 32) lr=1e-02 → top1=0.44, top3=0.68, top5=0.78
[#581] GELU (32, 256, 32) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#582] GELU (32, 256, 32) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#583] GELU (32, 256, 32) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#584] GELU (32, 256, 32) lr=1e-04 → top1=0.44, top3=0.67, top5=0.79
[#585] GELU (32, 256, 64) lr=1e-02 → top1=0.44, top3=0.67, top5=0.79
[#586] GELU (32, 256, 64) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#587] GELU (32, 256, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#588] GELU (32, 256, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#589] GELU (32, 256, 64) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#590] GELU (32, 256, 128) lr=1e-02 → top1=0.42, top3=0.67, top5=0.78
[#591] GELU (32, 256, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#592] GELU (32, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#593] GELU (32, 256, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#594] GELU (32, 256, 128) lr=1e-04 → top1=0.45, top3=0.69, top5=0.80
[#595] GELU (32, 256, 256) lr=1e-02 → top1=0.40, top3=0.65, top5=0.77
[#596] GELU (32, 256, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#597] GELU (32, 256, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#598] GELU (32, 256, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#599] GELU (32, 256, 256) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#600] GELU (64, 32, 32) lr=1e-02 → top1=0.44, top3=0.67, top5=0.78
[#601] GELU (64, 32, 32) lr=5e-03 → top1=0.47, top3=0.70, top5=0.81
[#602] GELU (64, 32, 32) lr=1e-03 → top1=0.48, top3=0.71, top5=0.81
[#603] GELU (64, 32, 32) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#0] SiLU (32,) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#1] SiLU (32,) lr=5e-03 → top1=0.43, top3=0.67, top5=0.78
[#2] SiLU (32,) lr=1e-03 → top1=0.43, top3=0.67, top5=0.78
[#3] SiLU (32,) lr=5e-04 → top1=0.43, top3=0.66, top5=0.78
[#4] SiLU (32,) lr=1e-04 → top1=0.38, top3=0.63, top5=0.75
[#5] SiLU (64,) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#6] SiLU (64,) lr=5e-03 → top1=0.47, top3=0.70, top5=0.81
[#7] SiLU (64,) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#8] SiLU (64,) lr=5e-04 → top1=0.47, top3=0.70, top5=0.81
[#9] SiLU (64,) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#10] SiLU (128,) lr=1e-02 → top1=0.48, top3=0.72, top5=0.81
[#11] SiLU (128,) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#12] SiLU (128,) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#13] SiLU (128,) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#14] SiLU (128,) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#15] SiLU (256,) lr=1e-02 → top1=0.50, top3=0.73, top5=0.82
[#16] SiLU (256,) lr=5e-03 → top1=0.51, top3=0.74, top5=0.83
[#17] SiLU (256,) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#18] SiLU (256,) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#19] SiLU (256,) lr=1e-04 → top1=0.47, top3=0.70, top5=0.80
[#20] SiLU (32, 32) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#21] SiLU (32, 32) lr=5e-03 → top1=0.45, top3=0.69, top5=0.80
[#22] SiLU (32, 32) lr=1e-03 → top1=0.45, top3=0.69, top5=0.79
[#23] SiLU (32, 32) lr=5e-04 → top1=0.44, top3=0.68, top5=0.79
[#24] SiLU (32, 32) lr=1e-04 → top1=0.39, top3=0.63, top5=0.75
[#25] SiLU (32, 64) lr=1e-02 → top1=0.44, top3=0.69, top5=0.80
[#26] SiLU (32, 64) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#27] SiLU (32, 64) lr=1e-03 → top1=0.47, top3=0.71, top5=0.81
[#28] SiLU (32, 64) lr=5e-04 → top1=0.46, top3=0.70, top5=0.81
[#29] SiLU (32, 64) lr=1e-04 → top1=0.39, top3=0.64, top5=0.76
[#30] SiLU (32, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#31] SiLU (32, 128) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#32] SiLU (32, 128) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#33] SiLU (32, 128) lr=5e-04 → top1=0.48, top3=0.71, top5=0.82
[#34] SiLU (32, 128) lr=1e-04 → top1=0.40, top3=0.65, top5=0.78
[#35] SiLU (32, 256) lr=1e-02 → top1=0.47, top3=0.71, top5=0.81
[#36] SiLU (32, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#37] SiLU (32, 256) lr=1e-03 → top1=0.49, top3=0.73, top5=0.82
[#38] SiLU (32, 256) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#39] SiLU (32, 256) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#40] SiLU (64, 32) lr=1e-02 → top1=0.45, top3=0.68, top5=0.80
[#41] SiLU (64, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#42] SiLU (64, 32) lr=1e-03 → top1=0.48, top3=0.71, top5=0.81
[#43] SiLU (64, 32) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#44] SiLU (64, 32) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#45] SiLU (64, 64) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#46] SiLU (64, 64) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#47] SiLU (64, 64) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#48] SiLU (64, 64) lr=5e-04 → top1=0.49, top3=0.71, top5=0.82
[#49] SiLU (64, 64) lr=1e-04 → top1=0.41, top3=0.66, top5=0.78
[#50] SiLU (64, 128) lr=1e-02 → top1=0.47, top3=0.71, top5=0.81
[#51] SiLU (64, 128) lr=5e-03 → top1=0.49, top3=0.73, top5=0.82
[#52] SiLU (64, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#53] SiLU (64, 128) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#54] SiLU (64, 128) lr=1e-04 → top1=0.43, top3=0.67, top5=0.79
[#55] SiLU (64, 256) lr=1e-02 → top1=0.47, top3=0.71, top5=0.82
[#56] SiLU (64, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#57] SiLU (64, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#58] SiLU (64, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#59] SiLU (64, 256) lr=1e-04 → top1=0.44, top3=0.68, top5=0.80
[#60] SiLU (128, 32) lr=1e-02 → top1=0.48, top3=0.71, top5=0.81
[#61] SiLU (128, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#62] SiLU (128, 32) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#63] SiLU (128, 32) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#64] SiLU (128, 32) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#65] SiLU (128, 64) lr=1e-02 → top1=0.48, top3=0.71, top5=0.81
[#66] SiLU (128, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#67] SiLU (128, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#68] SiLU (128, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#69] SiLU (128, 64) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#70] SiLU (128, 128) lr=1e-02 → top1=0.48, top3=0.72, top5=0.82
[#71] SiLU (128, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#72] SiLU (128, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#73] SiLU (128, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#74] SiLU (128, 128) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#75] SiLU (128, 256) lr=1e-02 → top1=0.48, top3=0.71, top5=0.81
[#76] SiLU (128, 256) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#77] SiLU (128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#78] SiLU (128, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#79] SiLU (128, 256) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#80] SiLU (256, 32) lr=1e-02 → top1=0.49, top3=0.71, top5=0.81
[#81] SiLU (256, 32) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#82] SiLU (256, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#83] SiLU (256, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#84] SiLU (256, 32) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#85] SiLU (256, 64) lr=1e-02 → top1=0.49, top3=0.72, top5=0.82
[#86] SiLU (256, 64) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#87] SiLU (256, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#88] SiLU (256, 64) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#89] SiLU (256, 64) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#90] SiLU (256, 128) lr=1e-02 → top1=0.49, top3=0.72, top5=0.82
[#91] SiLU (256, 128) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#92] SiLU (256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#93] SiLU (256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#94] SiLU (256, 128) lr=1e-04 → top1=0.48, top3=0.71, top5=0.82
[#95] SiLU (256, 256) lr=1e-02 → top1=0.48, top3=0.71, top5=0.82
[#96] SiLU (256, 256) lr=5e-03 → top1=0.51, top3=0.74, top5=0.83
[#97] SiLU (256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#98] SiLU (256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.84
[#99] SiLU (256, 256) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#100] SiLU (32, 32, 32) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#101] SiLU (32, 32, 32) lr=5e-03 → top1=0.45, top3=0.69, top5=0.80
[#102] SiLU (32, 32, 32) lr=1e-03 → top1=0.46, top3=0.70, top5=0.80
[#103] SiLU (32, 32, 32) lr=5e-04 → top1=0.45, top3=0.68, top5=0.79
[#104] SiLU (32, 32, 32) lr=1e-04 → top1=0.38, top3=0.62, top5=0.75
[#105] SiLU (32, 32, 64) lr=1e-02 → top1=0.44, top3=0.68, top5=0.80
[#106] SiLU (32, 32, 64) lr=5e-03 → top1=0.46, top3=0.70, top5=0.80
[#107] SiLU (32, 32, 64) lr=1e-03 → top1=0.47, top3=0.70, top5=0.81
[#108] SiLU (32, 32, 64) lr=5e-04 → top1=0.46, top3=0.70, top5=0.80
[#109] SiLU (32, 32, 64) lr=1e-04 → top1=0.40, top3=0.65, top5=0.76
[#110] SiLU (32, 32, 128) lr=1e-02 → top1=0.43, top3=0.69, top5=0.80
[#111] SiLU (32, 32, 128) lr=5e-03 → top1=0.47, top3=0.71, top5=0.81
[#112] SiLU (32, 32, 128) lr=1e-03 → top1=0.48, top3=0.72, top5=0.82
[#113] SiLU (32, 32, 128) lr=5e-04 → top1=0.48, top3=0.71, top5=0.82
[#114] SiLU (32, 32, 128) lr=1e-04 → top1=0.40, top3=0.65, top5=0.77
[#115] SiLU (32, 32, 256) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#116] SiLU (32, 32, 256) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#117] SiLU (32, 32, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#118] SiLU (32, 32, 256) lr=5e-04 → top1=0.48, top3=0.72, top5=0.82
[#119] SiLU (32, 32, 256) lr=1e-04 → top1=0.40, top3=0.66, top5=0.78
[#120] SiLU (32, 64, 32) lr=1e-02 → top1=0.43, top3=0.68, top5=0.79
[#121] SiLU (32, 64, 32) lr=5e-03 → top1=0.46, top3=0.70, top5=0.81
[#122] SiLU (32, 64, 32) lr=1e-03 → top1=0.47, top3=0.71, top5=0.81
[#123] SiLU (32, 64, 32) lr=5e-04 → top1=0.47, top3=0.70, top5=0.81
[#124] SiLU (32, 64, 32) lr=1e-04 → top1=0.39, top3=0.63, top5=0.76
[#125] SiLU (32, 64, 64) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#126] SiLU (32, 64, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#127] SiLU (32, 64, 64) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#128] SiLU (32, 64, 64) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#129] SiLU (32, 64, 64) lr=1e-04 → top1=0.41, top3=0.66, top5=0.77
[#130] SiLU (32, 64, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#131] SiLU (32, 64, 128) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#132] SiLU (32, 64, 128) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#133] SiLU (32, 64, 128) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#134] SiLU (32, 64, 128) lr=1e-04 → top1=0.41, top3=0.66, top5=0.78
[#135] SiLU (32, 64, 256) lr=1e-02 → top1=0.46, top3=0.70, top5=0.81
[#136] SiLU (32, 64, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#137] SiLU (32, 64, 256) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#138] SiLU (32, 64, 256) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#139] SiLU (32, 64, 256) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#140] SiLU (32, 128, 32) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#141] SiLU (32, 128, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#142] SiLU (32, 128, 32) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#143] SiLU (32, 128, 32) lr=5e-04 → top1=0.49, top3=0.71, top5=0.81
[#144] SiLU (32, 128, 32) lr=1e-04 → top1=0.40, top3=0.65, top5=0.77
[#145] SiLU (32, 128, 64) lr=1e-02 → top1=0.43, top3=0.68, top5=0.80
[#146] SiLU (32, 128, 64) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#147] SiLU (32, 128, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#148] SiLU (32, 128, 64) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#149] SiLU (32, 128, 64) lr=1e-04 → top1=0.41, top3=0.66, top5=0.77
[#150] SiLU (32, 128, 128) lr=1e-02 → top1=0.44, top3=0.68, top5=0.80
[#151] SiLU (32, 128, 128) lr=5e-03 → top1=0.49, top3=0.73, top5=0.83
[#152] SiLU (32, 128, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#153] SiLU (32, 128, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#154] SiLU (32, 128, 128) lr=1e-04 → top1=0.42, top3=0.67, top5=0.79
[#155] SiLU (32, 128, 256) lr=1e-02 → top1=0.46, top3=0.69, top5=0.81
[#156] SiLU (32, 128, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#157] SiLU (32, 128, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#158] SiLU (32, 128, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#159] SiLU (32, 128, 256) lr=1e-04 → top1=0.44, top3=0.68, top5=0.80
[#160] SiLU (32, 256, 32) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#161] SiLU (32, 256, 32) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#162] SiLU (32, 256, 32) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#163] SiLU (32, 256, 32) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#164] SiLU (32, 256, 32) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#165] SiLU (32, 256, 64) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#166] SiLU (32, 256, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#167] SiLU (32, 256, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#168] SiLU (32, 256, 64) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#169] SiLU (32, 256, 64) lr=1e-04 → top1=0.42, top3=0.67, top5=0.79
[#170] SiLU (32, 256, 128) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#171] SiLU (32, 256, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#172] SiLU (32, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#173] SiLU (32, 256, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#174] SiLU (32, 256, 128) lr=1e-04 → top1=0.44, top3=0.68, top5=0.80
[#175] SiLU (32, 256, 256) lr=1e-02 → top1=0.42, top3=0.65, top5=0.77
[#176] SiLU (32, 256, 256) lr=5e-03 → top1=0.49, top3=0.73, top5=0.82
[#177] SiLU (32, 256, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#178] SiLU (32, 256, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#179] SiLU (32, 256, 256) lr=1e-04 → top1=0.46, top3=0.70, top5=0.80
[#180] SiLU (64, 32, 32) lr=1e-02 → top1=0.45, top3=0.68, top5=0.79
[#181] SiLU (64, 32, 32) lr=5e-03 → top1=0.48, top3=0.70, top5=0.81
[#182] SiLU (64, 32, 32) lr=1e-03 → top1=0.49, top3=0.72, top5=0.82
[#183] SiLU (64, 32, 32) lr=5e-04 → top1=0.48, top3=0.71, top5=0.81
[#184] SiLU (64, 32, 32) lr=1e-04 → top1=0.41, top3=0.65, top5=0.77
[#185] SiLU (64, 32, 64) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#186] SiLU (64, 32, 64) lr=5e-03 → top1=0.48, top3=0.71, top5=0.81
[#187] SiLU (64, 32, 64) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#188] SiLU (64, 32, 64) lr=5e-04 → top1=0.49, top3=0.71, top5=0.82
[#189] SiLU (64, 32, 64) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#190] SiLU (64, 32, 128) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#191] SiLU (64, 32, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#192] SiLU (64, 32, 128) lr=1e-03 → top1=0.50, top3=0.72, top5=0.83
[#193] SiLU (64, 32, 128) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#194] SiLU (64, 32, 128) lr=1e-04 → top1=0.42, top3=0.66, top5=0.78
[#195] SiLU (64, 32, 256) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#196] SiLU (64, 32, 256) lr=5e-03 → top1=0.49, top3=0.72, top5=0.81
[#197] SiLU (64, 32, 256) lr=1e-03 → top1=0.50, top3=0.73, top5=0.83
[#198] SiLU (64, 32, 256) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#199] SiLU (64, 32, 256) lr=1e-04 → top1=0.43, top3=0.67, top5=0.79
[#200] SiLU (64, 64, 32) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#201] SiLU (64, 64, 32) lr=5e-03 → top1=0.48, top3=0.71, top5=0.82
[#202] SiLU (64, 64, 32) lr=1e-03 → top1=0.50, top3=0.72, top5=0.82
[#203] SiLU (64, 64, 32) lr=5e-04 → top1=0.49, top3=0.72, top5=0.82
[#204] SiLU (64, 64, 32) lr=1e-04 → top1=0.42, top3=0.65, top5=0.77
[#205] SiLU (64, 64, 64) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#206] SiLU (64, 64, 64) lr=5e-03 → top1=0.48, top3=0.72, top5=0.82
[#207] SiLU (64, 64, 64) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#208] SiLU (64, 64, 64) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#209] SiLU (64, 64, 64) lr=1e-04 → top1=0.41, top3=0.66, top5=0.78
[#210] SiLU (64, 64, 128) lr=1e-02 → top1=0.47, top3=0.70, top5=0.81
[#211] SiLU (64, 64, 128) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#212] SiLU (64, 64, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#213] SiLU (64, 64, 128) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#214] SiLU (64, 64, 128) lr=1e-04 → top1=0.43, top3=0.67, top5=0.79
[#215] SiLU (64, 64, 256) lr=1e-02 → top1=0.47, top3=0.71, top5=0.81
[#216] SiLU (64, 64, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#217] SiLU (64, 64, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#218] SiLU (64, 64, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#219] SiLU (64, 64, 256) lr=1e-04 → top1=0.44, top3=0.69, top5=0.80
[#220] SiLU (64, 128, 32) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#221] SiLU (64, 128, 32) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#222] SiLU (64, 128, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#223] SiLU (64, 128, 32) lr=5e-04 → top1=0.50, top3=0.73, top5=0.82
[#224] SiLU (64, 128, 32) lr=1e-04 → top1=0.42, top3=0.67, top5=0.78
[#225] SiLU (64, 128, 64) lr=1e-02 → top1=0.45, top3=0.70, top5=0.80
[#226] SiLU (64, 128, 64) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#227] SiLU (64, 128, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#228] SiLU (64, 128, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#229] SiLU (64, 128, 64) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#230] SiLU (64, 128, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.81
[#231] SiLU (64, 128, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#232] SiLU (64, 128, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#233] SiLU (64, 128, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#234] SiLU (64, 128, 128) lr=1e-04 → top1=0.45, top3=0.69, top5=0.80
[#235] SiLU (64, 128, 256) lr=1e-02 → top1=0.42, top3=0.66, top5=0.78
[#236] SiLU (64, 128, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#237] SiLU (64, 128, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#238] SiLU (64, 128, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#239] SiLU (64, 128, 256) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#240] SiLU (64, 256, 32) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#241] SiLU (64, 256, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#242] SiLU (64, 256, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#243] SiLU (64, 256, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#244] SiLU (64, 256, 32) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#245] SiLU (64, 256, 64) lr=1e-02 → top1=0.43, top3=0.67, top5=0.78
[#246] SiLU (64, 256, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#247] SiLU (64, 256, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#248] SiLU (64, 256, 64) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#249] SiLU (64, 256, 64) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#250] SiLU (64, 256, 128) lr=1e-02 → top1=0.30, top3=0.55, top5=0.68
[#251] SiLU (64, 256, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#252] SiLU (64, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#253] SiLU (64, 256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#254] SiLU (64, 256, 128) lr=1e-04 → top1=0.46, top3=0.70, top5=0.80
[#255] SiLU (64, 256, 256) lr=1e-02 → top1=0.40, top3=0.62, top5=0.75
[#256] SiLU (64, 256, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#257] SiLU (64, 256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#258] SiLU (64, 256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#259] SiLU (64, 256, 256) lr=1e-04 → top1=0.47, top3=0.71, top5=0.81
[#260] SiLU (128, 32, 32) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#261] SiLU (128, 32, 32) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#262] SiLU (128, 32, 32) lr=1e-03 → top1=0.50, top3=0.73, top5=0.82
[#263] SiLU (128, 32, 32) lr=5e-04 → top1=0.50, top3=0.72, top5=0.82
[#264] SiLU (128, 32, 32) lr=1e-04 → top1=0.44, top3=0.67, top5=0.78
[#265] SiLU (128, 32, 64) lr=1e-02 → top1=0.47, top3=0.69, top5=0.80
[#266] SiLU (128, 32, 64) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#267] SiLU (128, 32, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.82
[#268] SiLU (128, 32, 64) lr=5e-04 → top1=0.50, top3=0.73, top5=0.83
[#269] SiLU (128, 32, 64) lr=1e-04 → top1=0.44, top3=0.67, top5=0.79
[#270] SiLU (128, 32, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#271] SiLU (128, 32, 128) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#272] SiLU (128, 32, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#273] SiLU (128, 32, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#274] SiLU (128, 32, 128) lr=1e-04 → top1=0.45, top3=0.68, top5=0.79
[#275] SiLU (128, 32, 256) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#276] SiLU (128, 32, 256) lr=5e-03 → top1=0.49, top3=0.73, top5=0.82
[#277] SiLU (128, 32, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#278] SiLU (128, 32, 256) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#279] SiLU (128, 32, 256) lr=1e-04 → top1=0.45, top3=0.69, top5=0.80
[#280] SiLU (128, 64, 32) lr=1e-02 → top1=0.48, top3=0.70, top5=0.80
[#281] SiLU (128, 64, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#282] SiLU (128, 64, 32) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#283] SiLU (128, 64, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#284] SiLU (128, 64, 32) lr=1e-04 → top1=0.44, top3=0.68, top5=0.79
[#285] SiLU (128, 64, 64) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#286] SiLU (128, 64, 64) lr=5e-03 → top1=0.49, top3=0.72, top5=0.82
[#287] SiLU (128, 64, 64) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#288] SiLU (128, 64, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#289] SiLU (128, 64, 64) lr=1e-04 → top1=0.45, top3=0.69, top5=0.79
[#290] SiLU (128, 64, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#291] SiLU (128, 64, 128) lr=5e-03 → top1=0.49, top3=0.73, top5=0.82
[#292] SiLU (128, 64, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#293] SiLU (128, 64, 128) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#294] SiLU (128, 64, 128) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#295] SiLU (128, 64, 256) lr=1e-02 → top1=0.45, top3=0.69, top5=0.80
[#296] SiLU (128, 64, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#297] SiLU (128, 64, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#298] SiLU (128, 64, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#299] SiLU (128, 64, 256) lr=1e-04 → top1=0.46, top3=0.70, top5=0.81
[#300] SiLU (128, 128, 32) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#301] SiLU (128, 128, 32) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#302] SiLU (128, 128, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#303] SiLU (128, 128, 32) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#304] SiLU (128, 128, 32) lr=1e-04 → top1=0.45, top3=0.69, top5=0.79
[#305] SiLU (128, 128, 64) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#306] SiLU (128, 128, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#307] SiLU (128, 128, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#308] SiLU (128, 128, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#309] SiLU (128, 128, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#310] SiLU (128, 128, 128) lr=1e-02 → top1=0.33, top3=0.58, top5=0.71
[#311] SiLU (128, 128, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#312] SiLU (128, 128, 128) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#313] SiLU (128, 128, 128) lr=5e-04 → top1=0.51, top3=0.74, top5=0.83
[#314] SiLU (128, 128, 128) lr=1e-04 → top1=0.46, top3=0.70, top5=0.81
[#315] SiLU (128, 128, 256) lr=1e-02 → top1=0.38, top3=0.60, top5=0.73
[#316] SiLU (128, 128, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#317] SiLU (128, 128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#318] SiLU (128, 128, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#319] SiLU (128, 128, 256) lr=1e-04 → top1=0.47, top3=0.71, top5=0.81
[#320] SiLU (128, 256, 32) lr=1e-02 → top1=0.47, top3=0.70, top5=0.80
[#321] SiLU (128, 256, 32) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#322] SiLU (128, 256, 32) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#323] SiLU (128, 256, 32) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#324] SiLU (128, 256, 32) lr=1e-04 → top1=0.47, top3=0.70, top5=0.80
[#325] SiLU (128, 256, 64) lr=1e-02 → top1=0.38, top3=0.61, top5=0.73
[#326] SiLU (128, 256, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#327] SiLU (128, 256, 64) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#328] SiLU (128, 256, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#329] SiLU (128, 256, 64) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#330] SiLU (128, 256, 128) lr=1e-02 → top1=0.40, top3=0.63, top5=0.75
[#331] SiLU (128, 256, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#332] SiLU (128, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#333] SiLU (128, 256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#334] SiLU (128, 256, 128) lr=1e-04 → top1=0.48, top3=0.71, top5=0.82
[#335] SiLU (128, 256, 256) lr=1e-02 → top1=0.29, top3=0.56, top5=0.70
[#336] SiLU (128, 256, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#337] SiLU (128, 256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#338] SiLU (128, 256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.84
[#339] SiLU (128, 256, 256) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#340] SiLU (256, 32, 32) lr=1e-02 → top1=0.47, top3=0.69, top5=0.80
[#341] SiLU (256, 32, 32) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#342] SiLU (256, 32, 32) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#343] SiLU (256, 32, 32) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#344] SiLU (256, 32, 32) lr=1e-04 → top1=0.46, top3=0.69, top5=0.79
[#345] SiLU (256, 32, 64) lr=1e-02 → top1=0.47, top3=0.70, top5=0.80
[#346] SiLU (256, 32, 64) lr=5e-03 → top1=0.50, top3=0.72, top5=0.82
[#347] SiLU (256, 32, 64) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#348] SiLU (256, 32, 64) lr=5e-04 → top1=0.51, top3=0.73, top5=0.83
[#349] SiLU (256, 32, 64) lr=1e-04 → top1=0.46, top3=0.69, top5=0.80
[#350] SiLU (256, 32, 128) lr=1e-02 → top1=0.46, top3=0.70, top5=0.80
[#351] SiLU (256, 32, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#352] SiLU (256, 32, 128) lr=1e-03 → top1=0.51, top3=0.73, top5=0.83
[#353] SiLU (256, 32, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#354] SiLU (256, 32, 128) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#355] SiLU (256, 32, 256) lr=1e-02 → top1=0.47, top3=0.69, top5=0.80
[#356] SiLU (256, 32, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#357] SiLU (256, 32, 256) lr=1e-03 → top1=0.51, top3=0.74, top5=0.83
[#358] SiLU (256, 32, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#359] SiLU (256, 32, 256) lr=1e-04 → top1=0.47, top3=0.70, top5=0.81
[#360] SiLU (256, 64, 32) lr=1e-02 → top1=0.47, top3=0.69, top5=0.80
[#361] SiLU (256, 64, 32) lr=5e-03 → top1=0.51, top3=0.73, top5=0.82
[#362] SiLU (256, 64, 32) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#363] SiLU (256, 64, 32) lr=5e-04 → top1=0.52, top3=0.73, top5=0.83
[#364] SiLU (256, 64, 32) lr=1e-04 → top1=0.47, top3=0.69, top5=0.80
[#365] SiLU (256, 64, 64) lr=1e-02 → top1=0.46, top3=0.69, top5=0.79
[#366] SiLU (256, 64, 64) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#367] SiLU (256, 64, 64) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#368] SiLU (256, 64, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#369] SiLU (256, 64, 64) lr=1e-04 → top1=0.47, top3=0.70, top5=0.80
[#370] SiLU (256, 64, 128) lr=1e-02 → top1=0.46, top3=0.69, top5=0.79
[#371] SiLU (256, 64, 128) lr=5e-03 → top1=0.50, top3=0.73, top5=0.83
[#372] SiLU (256, 64, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#373] SiLU (256, 64, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#374] SiLU (256, 64, 128) lr=1e-04 → top1=0.47, top3=0.71, top5=0.81
[#375] SiLU (256, 64, 256) lr=1e-02 → top1=0.45, top3=0.67, top5=0.79
[#376] SiLU (256, 64, 256) lr=5e-03 → top1=0.49, top3=0.73, top5=0.83
[#377] SiLU (256, 64, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#378] SiLU (256, 64, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#379] SiLU (256, 64, 256) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#380] SiLU (256, 128, 32) lr=1e-02 → top1=0.46, top3=0.69, top5=0.80
[#381] SiLU (256, 128, 32) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#382] SiLU (256, 128, 32) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#383] SiLU (256, 128, 32) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#384] SiLU (256, 128, 32) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#385] SiLU (256, 128, 64) lr=1e-02 → top1=0.46, top3=0.69, top5=0.79
[#386] SiLU (256, 128, 64) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#387] SiLU (256, 128, 64) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#388] SiLU (256, 128, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#389] SiLU (256, 128, 64) lr=1e-04 → top1=0.48, top3=0.71, top5=0.81
[#390] SiLU (256, 128, 128) lr=1e-02 → top1=0.38, top3=0.61, top5=0.73
[#391] SiLU (256, 128, 128) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#392] SiLU (256, 128, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#393] SiLU (256, 128, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.84
[#394] SiLU (256, 128, 128) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#395] SiLU (256, 128, 256) lr=1e-02 → top1=0.38, top3=0.63, top5=0.75
[#396] SiLU (256, 128, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#397] SiLU (256, 128, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#398] SiLU (256, 128, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#399] SiLU (256, 128, 256) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#400] SiLU (256, 256, 32) lr=1e-02 → top1=0.47, top3=0.69, top5=0.80
[#401] SiLU (256, 256, 32) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#402] SiLU (256, 256, 32) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#403] SiLU (256, 256, 32) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#404] SiLU (256, 256, 32) lr=1e-04 → top1=0.49, top3=0.71, top5=0.81
[#405] SiLU (256, 256, 64) lr=1e-02 → top1=0.37, top3=0.61, top5=0.74
[#406] SiLU (256, 256, 64) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#407] SiLU (256, 256, 64) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#408] SiLU (256, 256, 64) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#409] SiLU (256, 256, 64) lr=1e-04 → top1=0.49, top3=0.72, top5=0.82
[#410] SiLU (256, 256, 128) lr=1e-02 → top1=0.38, top3=0.61, top5=0.73
[#411] SiLU (256, 256, 128) lr=5e-03 → top1=0.51, top3=0.73, top5=0.83
[#412] SiLU (256, 256, 128) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#413] SiLU (256, 256, 128) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#414] SiLU (256, 256, 128) lr=1e-04 → top1=0.50, top3=0.72, top5=0.82
[#415] SiLU (256, 256, 256) lr=1e-02 → top1=0.12, top3=0.30, top5=0.45
[#416] SiLU (256, 256, 256) lr=5e-03 → top1=0.50, top3=0.73, top5=0.82
[#417] SiLU (256, 256, 256) lr=1e-03 → top1=0.52, top3=0.74, top5=0.83
[#418] SiLU (256, 256, 256) lr=5e-04 → top1=0.52, top3=0.74, top5=0.83
[#419] SiLU (256, 256, 256) lr=1e-04 → top1=0.50, top3=0.73, top5=0.82