{ "metadata": { "ParamSize": 709, "ParamBytes": 20481200128.0, "BitsPerParam": 4.3530794586599075 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "e34ee27d8486a6b141dcd58cef2daeed" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "77f31d7f38b10daf6526334fffff5966" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3da6818607eb828bff0bbb67094d9901" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2a3b5b18e565893373cd2e92e8163f0d" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "26268632d0732c3a6151cb8b7231335f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 26572800, "records": [ { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 10240 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8857600 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26552320 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26562560 } ], "md5sum": "49cd4deab37aa71638f40bdf3446c8c0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0f6563da753157fe594c83c78afadfa8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "16ae4d2f54ed7bef07dd4348672f1f28" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "509bebb72f1919fe175d856c16fd9e32" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "11b944cbda53543c8aa74802d5e6a20e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6c181bdb486e41c130a4dae602b31f9e" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9c82a5764d8a542ddc80bbe2ca1fb7b5" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c5a0c0fd11caf92895e3b6008ecf915c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "20323d34c2b2560c477b5e5ad83ef5c0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4cc758dd880778553b0726ccbecc81d2" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f1ae18683c15635143436c24cd5831b8" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "89866cf3f51eaf5835e7b8d137c14dee" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8f34190965a1794f0caa43980d97d1c3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "0b69a19c79de3ba18f0f0eca49df1ee2" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "049a3eb118459269527743324bc323ac" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5d115a2ffe992e32c2cab95a694f2eaa" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "95755ab55c74f9db0ec4e062814fb00c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7090b12aea7aeba77de4e8819c9d81e4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "fdf0b9c24ff2152c1170bc2ee03c2293" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b6fd0096deb396689951083a6540a1ce" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dc19607146a93ec504c0c2cd5e36c775" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7c6e5bcb00ab6ed06e44de3f8fcf96f2" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0f57668ed30262a87d07b2b520c5056d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "16d506ab4cfcb941b435f51461dcd011" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "2c9989b8477d59e8a22cff792fd493d1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "d8734e17849fd66ba8259fc9754c7d8c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "61a5a4e9e8eb94950cde6dd762aff130" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d2146f0002c749e405e295b45709b8c6" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "2dabcf03b210dc1f9f0e0c4f16345677" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1f77912e6d7794a0064a02f294b088a0" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25931776, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14755840 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14766080 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23613440 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23623680 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23638016 } ], "md5sum": "450898689ad86c31e15beb1f175a2df8" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a23c6416047c2e0d6194543e7323c5e3" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7341626293f550a41d452f1a2a69cf9b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7c70f5d0ede73aebf7f16469919ba55e" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "72e1407dbf72169e0d2b10cad0f6b976" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6102b47f76f349ebc0909a356e23e8f4" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eb3321cfc77023e44eb6337323d33c4d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ae593f8e45a62c432447179ebc11414f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f01b4a68a61e1c9efc1ff58b25cc5f8e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e0be010091c41590f97fb04c8554a1f6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ad0743eca1793bf8d520f59a1ebf3794" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ea62911b43448b4780b28d69c35d40fe" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "58e6fbd418149445d350f3d9aa029876" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "abc9a2ff1411e9f377bbbbce641c204d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d104a7c7271cde5ab590070f597b7bad" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "4b452b13efb1501173f5635d8d042d62" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dd1e31724eca7766bb45cfe0d278cdc6" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9acd21668349845f98520725162fd87c" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "445eb21bf5073949ad60f5eb5c960742" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9a678cd5436cd008a68b9f3d20e621c0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5b4d7fea7b74ea661b79b68cabe570a8" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9cf9f59c049c6e4eead79190b96162d1" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "01296928920d58656caa5cd8c8a49b06" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1e636c5a49d4f1d08ab5fe4743519648" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e6f9b9d69c7689e18091cbea7ce0fd45" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "53e0caefebe3d85d57f90eee18345d66" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f36c02fa87b4ce27e0bf2d54422316f7" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b40491ae7533051ee419d2dcc7d6c30a" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d19e814907f7b6e03e1ea9d2c154c5d8" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "824311cebf94b788ea645d2db4bf9a8d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "86233c51c155b031f679ba483e20b408" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "84f4b017ebd8159ef349cd35b06d39b2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1022390dd50540802442ebbe12dc8c43" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "970f35c85ff372df8456189c46f0659b" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c5588e884ce556672de6460afb839e54" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a9535eac311eeb416e10d9351b414b7e" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d911fe74a24f642358751dc25a3a7a07" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3a7b1bb69f497d7d86329c9987b18167" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "bc0a3717244b294c814aabc6d6c10a15" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "93c6e0c14193f6cdcfc5e5528b669070" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "61755f2dd0512e511923f8ca31842854" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4345cec03ece6abffaaace98d57b5856" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "405f64abdb7203f5b3a9522fe4553d50" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "6fd442aded27de8568b84004e8aa85c6" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ffc8d92e5427c20a8f9f6ebb5cabc619" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a73e283c98926321002664b3de1a935e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "392f8d7128abbe0410777df39609515c" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cdf225644bc1b3be121aace401138f0d" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "387441129eb59f1eb198e39bdbb79cd8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a28d80d6474a155d9df818bcf816423a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ee303a13e5a9cc585dc3a38717ea93ea" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "39132743b75f88f06e9c73d7925572f2" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a36d02ab7f79e911c9d0e1c05b76035c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "1230037f7ece073de193c5758d736184" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5cc353f95b5888822609911549d297d8" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5c0f058c0ccc7b895dad15f97762fd7c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "794719637debf5848e7f7642d64c3742" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9252165853ea1826bd0b0766fdcc9425" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5ee5b0711f9a887514d91618108757c6" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "57e3f4bbb3369052e44b54a5346b4dca" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c1ed983eb45ab6e8f3ed00721e17cee3" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "16dd4d8ace9600610312b1800422f129" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bdebf60bcdf5f1362f996788d8a08f05" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "3338183659e67c86059efc8bedf798cd" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0cbcdd011773cabbac2cec6602cdd458" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8c8cb2aaf02edd8b556e725df9495242" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "cc494d5dc09e088ce86764fe9802f6e6" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2625fcbe96e2912639a010d0a214efc5" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "56e2f38eb4dc549777d2ef4487b17735" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "98a2d5fda2b999c3c1811ece1bc53df2" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5fdef4c7e6c596f3d36b259da2d161ce" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "43f1d0c08c8d9fd0e7cbb7611c44ae96" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b728cc5ef27223307bfec3f3ccbd2d60" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "88459ffccc6bdb6659ba4b0e909bf53a" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cf9f0181cafd6b998775abb2740bd216" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "469c8c9ee6f847b7d948384f95584997" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "5582f8a1c74541eaa793c1defa066781" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "153933e0026396c2296b94d9dcf2e17f" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c159b007fc8f00ad7ff686683d32611d" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "21db36fafdcb2a64020bb31b7e2cc901" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c9f2997f6154c8383b44b6a8101025cf" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "cc5ffbf96891364f5b1b260d194f3e9f" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4d262b0f692ab46892b5538f6c79656e" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "16008ad09825a78bd94979284b892cd0" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "78cd756eb539cde6fe4eed5977fd255d" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "73fdee9836c6a8061b8dbc17315ebe26" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "20f3ba785ca831dc26591fe2bf0cafb2" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "6c329bd6c0a7d9c312175495804de170" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "04847b40526cb68634f93ae94cb7a6f7" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1a1d1690e091d552403161d82d96d605" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1fd8a9155d75754afa0e49d9376024e8" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b52e756fdbfc79f95750d3a4abc4f5ee" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a79ab2d5adba6be0bdaeda5260e53a4c" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 32495616, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23613440 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 23623680 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32471040 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32481280 } ], "md5sum": "5b7a3d6a9193a8a7dcba3780e425b17b" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "13e894189096b4e9ea55c43a084ef544" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "83d7827077709ec43137e111b97abfe4" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a894475f0da34a23d918d717572d545c" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fe634bdd4e04b896bc0996dd8c59a536" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "cc839d11e0bcda845c4efac797701d6e" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "041664aa66b65cec70ff7aec861476c5" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "849680969ea97f716404314b8605ad3f" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f567cae1883f989bbf51abff859def41" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f216ae387d2c6e71d01ef1502a7cf358" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d55932caf21b594d8d7c8ef73625db67" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "39af219178807eb8d29d52ff1705cf82" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4e89ae9a1b6f1cdf45014f6e84422fc6" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1818a93da67e2d6b3e4114d9a140dc26" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4262fa4747688090191d637c15a8b4f9" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f0c27f6d099ec778f4c07b5e6a2d9b6c" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "13226242710a082d56690d922b33e91a" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "192dc3fd442b804ccc14821684136d28" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "34352a22491ba30039147c54871cde94" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a123ac620ce6e739df00522e61cf5a95" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e33f0ea529407fef40d6ed6319a024f4" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "945602ba56c1d6630cc578dbbc34b97d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dfe2f75e8f897f6c02e35190b2dd0f22" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "5e9692f13a21fd42384ef0484a987096" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7ff14a5e6bc256a258a6f7e20f5a66ae" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "7968e1da8c75c903c40334d2ea589ac4" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b2fb22769e0e5dfde85273bdbecb5c17" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d888eba9bd76868315f50250a338e692" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "71f6dc5369a1963a7172afacd8094369" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fc95291eba75e8a29043d217f30b0d96" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d3b1de2c546b32497706ceb75cf1bfed" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fea0678f1700c39fc9527bb043db59e9" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e71bb0321e289d2dfe9eccb8aa5c948e" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9ea63b9134aff7dcc6383a1b6b9f5fb1" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c081657f92e005752710c4a279722fab" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "55582a5e30dae2b17b30c11fc9678a88" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8f22e7d37fd695f294f182fcfe21f065" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8e37dd1cf1e4605576b06f557e2eaac8" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "084807c8cf3cf8f6f8e49cff6669650c" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "00fe021f0809690be4cac0806c439c2c" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "09c704d0a60b8ca417517929f0b27712" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "14f3c22fba2e38aed2f4bf1fe3b4f932" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b402ccb2634031640afb690dc2c9c1e2" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "16e172325afd91d0dfaf3265754a3a17" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "08e148518eb34ed4a63a55c455eb3f6a" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7ac140ba37a3662b1018babd67c42dee" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "37c90f7c337742a7787d6a68fdcc3bc5" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2f58e68814b59e5a6c958af482e6baa0" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "80baf849f505fc705bed8c27d0825d13" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "c3a11fb3bcb17e0e0e776cae3f7d38fd" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aef4e215d3a3b5ea6361960558274934" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9410e18004ea62c8b201693e27bd52f3" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "abb22bc7c25f23374e3d18a21a4024da" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "be4e9a6a9733581f17d4d490aadbfb21" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "73e9fda8d5b5884cf7ea237f5be0802c" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c4dfb54249c505d34600453b4513ac0e" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "920770737331c7c14385cffcea3badf5" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "dccfcff06bdfcab2af4d6fcc0c7f9963" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d50df911236f8c7ffb717db6391d35a5" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c6f84e75d775ab53b0797181104c5005" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6fe04c1d2f3bab97f531e1601bc1602c" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f98900ed23e2a37144fe0edab3c26de4" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "062e231cef669c04d311ced84b63e777" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d9abf96a4100a15c5d10e1875f82d789" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "a31c94633ee235fe2891697a01cf8623" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ecd66b2a861cd0a0fb496f14ab31c54d" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "14620574e40a0a631aa49cafe1645d76" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0680f1eefbedcf6f43467f4e9e1a3e12" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a29332baa1907cf96622dd394694037e" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "389992cf395650b01b27c07069d49d63" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "57adc599990ae88965da51e0d003ff70" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1b31c4e58f3a60cc63f15d3fb711285f" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b53d6bd81fe3bf808e50929711ebae65" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a06e1336aa2e5356f2cf8fb4ca17782f" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "dcddb56d1f92afddb4a8daff00770ea3" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "40226e634e224d7b612a32c6158f7a9f" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7bd137ac5822218cbd904b831987a9c3" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "301b941759e05678939101788b2f8da7" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cf83d27b2f726ff0757d950a46c2f63f" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "401600e48a20c25f29f0e65ea5eda1cb" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a7c87331c1ab9f1da576e7c6803eb831" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "999451717d8429965de89e94b6607529" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6cc395a65e6e45f15b4b12ff02b97751" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cf00ff944988bad74c020a6e8f9bc30d" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e05702f765bccfe65044725be045122f" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "81282e664e36083f3ee69a4ddd524044" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0627c7f758b3fab48a471262c3af88a2" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f31bb3d946c3ca850316690830f889ec" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1bbe8a72d5402ed5ca334aa1c1843715" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "22f19ecca21d192c74ef534774a622df" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "952a14c9b856e3ad757b27e9b0809929" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "871b092c211d0fe99e63fa98d8c0fc5a" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "8838bf24d39774616b11270872846aae" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4976d8c166889156c8456a569cc31976" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d27e924a1b3da3d468e4cd19306e679a" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "6b3e119d0b0aad300c3e003e62787000" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2f62b2efaeb94b9e3099b57bc5f34e19" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e64b9e6e31f2fde147ee521107841fe6" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "5270d83c7df52157e1a65e25e5955ebc" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3dc3a50cac99b71b7af497675ae83766" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "352b64594cda8f773ab29c788850fff6" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f999bdae04d4337e5a4808e98d60e815" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "eb763087093e8953e07c80d15c9c4312" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "e50ba30f9ff062654affa12dd6e88c1a" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dd017e97097175be83dfde4f5ecf35b2" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f935eba81e7cf7e3ce6fbc48fa21ab3e" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "293395cbdf4ac5a97bd2a5efcd16dcdf" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "83d4627f0b4e655f29440f1b842f92f4" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5f463e0cf93dd273c80bf4d0efa90780" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0b26f1e062f530251d5e65da6b984807" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5395c916198b2f58ba597897f98abe43" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "bbb0d36e181ba0ad20fef1b2951eee46" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b6c5e4ce1c5379103c2fa0ba5aa1071a" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c8001167e08963993517e64b56ccd66d" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c45f6d8df01090658fbee96e25d66547" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d3a374e67acae5a2328ce21f4c32af8b" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "5c5b2038a9f6287a7c622135dcb9129e" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5bc7a42f4446a8927763289a71e59bb2" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ca5d14ce5fdd7712c83997767503fab5" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "33e09e54d612d72968e926ca91244e73" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b901a27855965be656d73b6334b36deb" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "15a7a2596b20953cf57564c0d9df4b81" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "75f5cafc7309bfd044eccf964ad5e701" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "fba5eee5b0f1e0f26871e805f7e40fec" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7b7ebfd601a6d1598aae982bcdd427be" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9db33aff374b4c09323ab69b16f492fd" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "92b49ca02ca699dd0672d6d84e8b54d7" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4d5d443090526cb55fd384e0142fefe1" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "bac9fa2376b71c05a8686e59462c05bf" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f1a20f37d3bc5b403275e0394f239154" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9b6e168416ab2ce64571512022d5c68b" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f08130304dbf3f5be1495e43932351cb" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dcf52db7557fb825869c67c33e0347a7" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "bd7340e362e49b26e04cd47cb7fa658e" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "219c153cb9fbaabf779c6e6cf811f279" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8a382ca903b29a0b1041f64b1f77c155" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7688f3e5c2641b73e6f40b05ca6c0b81" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "349eb007a6c3999c4cd1bde77944ac88" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "305dcccfb172c18ac6b23e840d787748" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a849581e2d400b9e19f3d6d640a5461d" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b51a31b72480ccfa4caa62c6daaa1444" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "bf4eb710a295adb46bccb4fe31a80866" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d8076061d162dda161588486a7f12318" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "44a3dcd1d79c60712173edf1c611feab" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "2c0cdb131ec88bcfdc8f61d1155d4571" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d64a8517c029052838a1fc57ed9c77cb" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "38049e575fec76c268f1121604e644e9" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "317a21cc3cf05afd4c9eab3b0d8312f4" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7860f822044f9ffbe1f59863c9dc8017" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "391596b684418b4d60e0790761435721" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "2b2a9d0364bce37706a20e6e640e48bd" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "44a89f3c0ab22ffdc8726b57919821dd" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "522a108155c16e56ab8ee7210942551d" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6c8b2c709d62b36ea638bd0d75720016" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f8a1ee7ff578d089061b5f0a1bf4d959" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "53b5036be296da340f0f5439b7ad8562" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "88d3fbca7455cbe129e3eb1471162937" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "bb5c0a0b5814aa8207cb0d973476c499" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5a7d06cb9a50035c76d1bd5c99c56bdb" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e303155aae5178520f6a4313aeb28df6" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "21f86b7a9c8c07ddca343432af9f29f3" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7f29f3dc72c1884d7feb4b5f894f09b6" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "005eef45385f6951993e76f831fd2c3b" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e797855be2799941136a7c87dbcd4970" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "72934b3b96e45744ef25a4f42f34a016" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "bae6066ae1bcfdd6630e983b1e1ff509" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "895a25dbcf855fe98e7dd2e04a1fcda0" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "17be5070dbc211000631ef96f0b8e83f" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bb0b11cfc435311e85d4e374c265f40b" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b85c1f1b5ef759cc95ebcb67c8486c9f" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6ece4bbcd14ae5d17551f5ff7bbc8835" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1ae1abed78552a772e18be00dc5ef2d5" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "1525e7532d03cd65467960ac4bf3ca3e" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "73a115ccb8b48c8e88a14e377d7c300c" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b7f2379645ec1c999b8fc4473a2db479" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "eed15ab9765453213ad837318a4933ff" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bc171f0db55215ff3be1b408db7b0b6c" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5323c5b037a3f2d78d362e341d4b2f6e" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cec5c20841fa2cfb586ca9471890bf4b" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fa6e21cc92ffe5afed305177e077a115" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d00282478b009af864589f60cb4debba" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b1e9c536adbd12df489d417af8972272" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c37d996b23e2d5fa3c4e4b19559f18df" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "49b4c10c584c78743b13787414a09e27" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "52331be693248cb833d35ed4a35bb891" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "fb9f3b7f43ea2b8ee8ffd4c554e3146c" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ed1c6a63e5a8d0c81853e929f757f7ac" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "105794c4161fb6f8013bc55b92110b69" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "6d19b7f1b285030d74e3a7b9cc9e33ce" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 17039360, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 } ], "md5sum": "4a6cd517241e11106608a17e5099f034" } ] }