| { |
| "metadata": { |
| "ParamSize": 327, |
| "ParamBytes": 150885632.0, |
| "BitsPerParam": 4.502399359852415 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 83886080, |
| "records": [ |
| { |
| "name": "language_model.model.embed_tokens.q_weight", |
| "shape": [ |
| 262144, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 83886080, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "759c294ed3a6f81c12617131781b40e7" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 33201408, |
| "records": [ |
| { |
| "name": "language_model.model.embed_tokens.q_scale", |
| "shape": [ |
| 262144, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10485760, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "language_model.model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "language_model.model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 10487040 |
| }, |
| { |
| "name": "language_model.model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 11142400 |
| }, |
| { |
| "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 11224320 |
| }, |
| { |
| "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 12535040 |
| }, |
| { |
| "name": "language_model.model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 12698880 |
| }, |
| { |
| "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 12700160 |
| }, |
| { |
| "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 12701440 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 12702720 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 12703232 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 12785152 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 12795392 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 13123072 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 13164032 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 13164544 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 13492224 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 13533184 |
| }, |
| { |
| "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13615104 |
| }, |
| { |
| "name": "language_model.model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 13625344 |
| }, |
| { |
| "name": "language_model.model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 13626624 |
| }, |
| { |
| "name": "language_model.model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 14281984 |
| }, |
| { |
| "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 14363904 |
| }, |
| { |
| "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 15674624 |
| }, |
| { |
| "name": "language_model.model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 15838464 |
| }, |
| { |
| "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 15839744 |
| }, |
| { |
| "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 15841024 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 15842304 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 15842816 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 15924736 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 15934976 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 16262656 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 16303616 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 16304128 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 16631808 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 16672768 |
| }, |
| { |
| "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 16754688 |
| }, |
| { |
| "name": "language_model.model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 16764928 |
| }, |
| { |
| "name": "language_model.model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 16766208 |
| }, |
| { |
| "name": "language_model.model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 17421568 |
| }, |
| { |
| "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 17503488 |
| }, |
| { |
| "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 18814208 |
| }, |
| { |
| "name": "language_model.model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 18978048 |
| }, |
| { |
| "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 18979328 |
| }, |
| { |
| "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 18980608 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 18981888 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 18982400 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 19064320 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 19074560 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 19402240 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 19443200 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 19443712 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 19771392 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 19812352 |
| }, |
| { |
| "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 19894272 |
| }, |
| { |
| "name": "language_model.model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 19904512 |
| }, |
| { |
| "name": "language_model.model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 19905792 |
| }, |
| { |
| "name": "language_model.model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 20561152 |
| }, |
| { |
| "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 20643072 |
| }, |
| { |
| "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 21953792 |
| }, |
| { |
| "name": "language_model.model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 22117632 |
| }, |
| { |
| "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 22118912 |
| }, |
| { |
| "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 22120192 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 22121472 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 22121984 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 22203904 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 22214144 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 22541824 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 22582784 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 22583296 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 22910976 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 22951936 |
| }, |
| { |
| "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 23033856 |
| }, |
| { |
| "name": "language_model.model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 23044096 |
| }, |
| { |
| "name": "language_model.model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 23045376 |
| }, |
| { |
| "name": "language_model.model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 23700736 |
| }, |
| { |
| "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 23782656 |
| }, |
| { |
| "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 25093376 |
| }, |
| { |
| "name": "language_model.model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 25257216 |
| }, |
| { |
| "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 25258496 |
| }, |
| { |
| "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 25259776 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 25261056 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 25261568 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 25343488 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 25353728 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 25681408 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 25722368 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 25722880 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 26050560 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 26091520 |
| }, |
| { |
| "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 26173440 |
| }, |
| { |
| "name": "language_model.model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 26183680 |
| }, |
| { |
| "name": "language_model.model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 26184960 |
| }, |
| { |
| "name": "language_model.model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 26840320 |
| }, |
| { |
| "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 26922240 |
| }, |
| { |
| "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 28232960 |
| }, |
| { |
| "name": "language_model.model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 28396800 |
| }, |
| { |
| "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 28398080 |
| }, |
| { |
| "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 28399360 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 28400640 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 28401152 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28483072 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 28493312 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 28820992 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 28861952 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 28862464 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 29190144 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 29231104 |
| }, |
| { |
| "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 29313024 |
| }, |
| { |
| "name": "language_model.model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 29323264 |
| }, |
| { |
| "name": "language_model.model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 29324544 |
| }, |
| { |
| "name": "language_model.model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 29979904 |
| }, |
| { |
| "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 30061824 |
| }, |
| { |
| "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 31372544 |
| }, |
| { |
| "name": "language_model.model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 31536384 |
| }, |
| { |
| "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 31537664 |
| }, |
| { |
| "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 31538944 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 31540224 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 31540736 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 31622656 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 31632896 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 31960576 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 32001536 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 32002048 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 32329728 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 32370688 |
| }, |
| { |
| "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32452608 |
| }, |
| { |
| "name": "language_model.model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 32462848 |
| }, |
| { |
| "name": "language_model.model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 32464128 |
| }, |
| { |
| "name": "language_model.model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 33119488 |
| } |
| ], |
| "md5sum": "632cf778d8386261684a978964e0297b" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 33336064, |
| "records": [ |
| { |
| "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 1310720 |
| }, |
| { |
| "name": "language_model.model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 1474560 |
| }, |
| { |
| "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 1475840 |
| }, |
| { |
| "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 1477120 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 1478400 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 1478912 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 1560832 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 1571072 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 1898752 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 1939712 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 1940224 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 2267904 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 2308864 |
| }, |
| { |
| "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 2390784 |
| }, |
| { |
| "name": "language_model.model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 2401024 |
| }, |
| { |
| "name": "language_model.model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 2402304 |
| }, |
| { |
| "name": "language_model.model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 3057664 |
| }, |
| { |
| "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 3139584 |
| }, |
| { |
| "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 4450304 |
| }, |
| { |
| "name": "language_model.model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 4614144 |
| }, |
| { |
| "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 4615424 |
| }, |
| { |
| "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 4616704 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 4617984 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 4618496 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 4700416 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 4710656 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 5038336 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 5079296 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 5079808 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 5407488 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 5448448 |
| }, |
| { |
| "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 5530368 |
| }, |
| { |
| "name": "language_model.model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 5540608 |
| }, |
| { |
| "name": "language_model.model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 5541888 |
| }, |
| { |
| "name": "language_model.model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 6197248 |
| }, |
| { |
| "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 6279168 |
| }, |
| { |
| "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 7589888 |
| }, |
| { |
| "name": "language_model.model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 7753728 |
| }, |
| { |
| "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 7755008 |
| }, |
| { |
| "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 7756288 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 7757568 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 7758080 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 7840000 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 7850240 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 8177920 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 8218880 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 8219392 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 8547072 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 8588032 |
| }, |
| { |
| "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8669952 |
| }, |
| { |
| "name": "language_model.model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 8680192 |
| }, |
| { |
| "name": "language_model.model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 8681472 |
| }, |
| { |
| "name": "language_model.model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 9336832 |
| }, |
| { |
| "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 9418752 |
| }, |
| { |
| "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 10729472 |
| }, |
| { |
| "name": "language_model.model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 10893312 |
| }, |
| { |
| "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 10894592 |
| }, |
| { |
| "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 10895872 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 10897152 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 10897664 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 10979584 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 10989824 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 11317504 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 11358464 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 11358976 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 11686656 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 11727616 |
| }, |
| { |
| "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 11809536 |
| }, |
| { |
| "name": "language_model.model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 11819776 |
| }, |
| { |
| "name": "language_model.model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 11821056 |
| }, |
| { |
| "name": "language_model.model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 12476416 |
| }, |
| { |
| "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 12558336 |
| }, |
| { |
| "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 13869056 |
| }, |
| { |
| "name": "language_model.model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 14032896 |
| }, |
| { |
| "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 14034176 |
| }, |
| { |
| "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 14035456 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 14036736 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 14037248 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14119168 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 14129408 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 14457088 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 14498048 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 14498560 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 14826240 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 14867200 |
| }, |
| { |
| "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14949120 |
| }, |
| { |
| "name": "language_model.model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 14959360 |
| }, |
| { |
| "name": "language_model.model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 14960640 |
| }, |
| { |
| "name": "language_model.model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 15616000 |
| }, |
| { |
| "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 15697920 |
| }, |
| { |
| "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 17008640 |
| }, |
| { |
| "name": "language_model.model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 17172480 |
| }, |
| { |
| "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 17173760 |
| }, |
| { |
| "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 17175040 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 17176320 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 17176832 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 17258752 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 17268992 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 17596672 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 17637632 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 17638144 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 17965824 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 18006784 |
| }, |
| { |
| "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 18088704 |
| }, |
| { |
| "name": "language_model.model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 18098944 |
| }, |
| { |
| "name": "language_model.model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 18100224 |
| }, |
| { |
| "name": "language_model.model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 18755584 |
| }, |
| { |
| "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 18837504 |
| }, |
| { |
| "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 20148224 |
| }, |
| { |
| "name": "language_model.model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 20312064 |
| }, |
| { |
| "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 20313344 |
| }, |
| { |
| "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 20314624 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 20315904 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 20316416 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 20398336 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 20408576 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 20736256 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 20777216 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 20777728 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 21105408 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 21146368 |
| }, |
| { |
| "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 21228288 |
| }, |
| { |
| "name": "language_model.model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 21238528 |
| }, |
| { |
| "name": "language_model.model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 21239808 |
| }, |
| { |
| "name": "language_model.model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 21895168 |
| }, |
| { |
| "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 21977088 |
| }, |
| { |
| "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 23287808 |
| }, |
| { |
| "name": "language_model.model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 23451648 |
| }, |
| { |
| "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 23452928 |
| }, |
| { |
| "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 23454208 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 23455488 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 23456000 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 23537920 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 23548160 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 23875840 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 23916800 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 23917312 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 24244992 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 24285952 |
| }, |
| { |
| "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 24367872 |
| }, |
| { |
| "name": "language_model.model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 24378112 |
| }, |
| { |
| "name": "language_model.model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 24379392 |
| }, |
| { |
| "name": "language_model.model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 25034752 |
| }, |
| { |
| "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 25116672 |
| }, |
| { |
| "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 26427392 |
| }, |
| { |
| "name": "language_model.model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 26591232 |
| }, |
| { |
| "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 26592512 |
| }, |
| { |
| "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 26593792 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 26595072 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 26595584 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 26677504 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 26687744 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 27015424 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 27056384 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 27056896 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 27384576 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 27425536 |
| }, |
| { |
| "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 27507456 |
| }, |
| { |
| "name": "language_model.model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 27517696 |
| }, |
| { |
| "name": "language_model.model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 27518976 |
| }, |
| { |
| "name": "language_model.model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 28174336 |
| }, |
| { |
| "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 28256256 |
| }, |
| { |
| "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 29566976 |
| }, |
| { |
| "name": "language_model.model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 29730816 |
| }, |
| { |
| "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 29732096 |
| }, |
| { |
| "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 29733376 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 29734656 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 29735168 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 29817088 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 29827328 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 30155008 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 30195968 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 30196480 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 30524160 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 30565120 |
| }, |
| { |
| "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 30647040 |
| }, |
| { |
| "name": "language_model.model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 30657280 |
| }, |
| { |
| "name": "language_model.model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 640, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 655360, |
| "byteOffset": 30658560 |
| }, |
| { |
| "name": "language_model.model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 640, |
| 64 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 31313920 |
| }, |
| { |
| "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 4096, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 1310720, |
| "byteOffset": 31395840 |
| }, |
| { |
| "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 4096, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 163840, |
| "byteOffset": 32706560 |
| }, |
| { |
| "name": "language_model.model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 32870400 |
| }, |
| { |
| "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 32871680 |
| }, |
| { |
| "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 32872960 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.k_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 32874240 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 32874752 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32956672 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 640, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 32966912 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 640, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 33294592 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.q_norm.weight", |
| "shape": [ |
| 256 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 512, |
| "byteOffset": 33335552 |
| } |
| ], |
| "md5sum": "771db05b4d379d652f02a88b4fbb428c" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 462080, |
| "records": [ |
| { |
| "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", |
| "shape": [ |
| 1024, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 327680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", |
| "shape": [ |
| 1024, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 40960, |
| "byteOffset": 327680 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", |
| "shape": [ |
| 256, |
| 80 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81920, |
| "byteOffset": 368640 |
| }, |
| { |
| "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", |
| "shape": [ |
| 256, |
| 20 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 450560 |
| }, |
| { |
| "name": "language_model.model.norm.weight", |
| "shape": [ |
| 640 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1280, |
| "byteOffset": 460800 |
| } |
| ], |
| "md5sum": "f8a02319ec8a543fee87e9f79ca6aeee" |
| } |
| ] |
| } |