diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,9567 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 16895535104.0, + "BitsPerParam": 3.1791405645822484 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 640, + 152064 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "87af932a11da9ddbf25a81b6f46de56a" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b5a7abc95647a505cc9e7c0805fd6b53" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a20094e89f2bb7b118cc34140b791189" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "161185e7bb1e4c1992c14135b24753a9" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c391cd020afc25dcffa34846af955f02" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "09207a7499221e442bdf16731c75b62b" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 26044416, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 40, + 152064 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 0 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 12165120 + }, + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14376960 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18800640 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 18810880 + }, + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 21022720 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25446400 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25456640 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 25470976 + } + ], + "md5sum": "0e7f6c90f4d812bc59368d35f6c2153d" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "ba0d270c5131dac68af970d1dd2f7e21" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "747ddfebcfc1b4ff1e0ec3cce96155d1" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "43aa7612f8bb2c7a33020eb5dce7fdda" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "35e3dc94ab6bfe1e00635a3926c25f7b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 32935936, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 13527040 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25692160 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 25702400 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 27914240 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32337920 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32348160 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 32362496 + } + ], + "md5sum": "99d21746e193bb1352b20cdca681f65e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d1c1eae717e5d27aad1af203b1117028" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "149a859101ece83c9df2bfe6c4b0849b" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cf328b1ce4252e9ef8835b2d8f71326b" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "bc5481f4ac83c3e0f5f809b4a01f101e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "4d50f0fe4fae0026d6f58b57fe513703" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8ff4de8a05f7caa565394be6349bad37" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6c002d3b9d17ca72596fe563a2391d44" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32065536 + } + ], + "md5sum": "1e0b397fb24a4adebde832fd219f6ac9" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 23371776, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 4423680 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 4433920 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 4448256 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22798336 + } + ], + "md5sum": "f63b839ea61dddad2885fbf42b460855" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e7f860dad6b75dc7316c642c54e859cf" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "800992a652a39a6d7ab83aad17118c17" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "76b7ab6d49b062ab47639901383b68d8" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b3615018b67f97cb1159d7c1c6c9adf3" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "65d6d3dd894811ce9045954c31100ecb" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 27396096, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13516800 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15728640 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20152320 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 20162560 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 22374400 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26798080 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26808320 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 26822656 + } + ], + "md5sum": "26c0b89bc2473d15efd26b5f9d132b6c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0500ed10a2bf3cfa411a09f3f3a0e43e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "60f87be81d5a7f3f63f149ef11cbecb8" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "808d91a0c459c0f9dc20f0355c70480b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "38192a0b18f62ee9002658194f6d0803" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2c8461062bb7cd877b5b87ad80f2ac73" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b7a4309e10bf4a4ee432bd94972eeb6a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b87a1afbd5742826b5a0b734f1d5ddf8" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "83bcc306cb3d649376a8b6eb0bab9dab" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8a334d7cb4efbbadcdd555754bdcd8b4" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "09a05661591a4004a42ad5e14703c9ba" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6dd7ae67df45dc9766881b74adbb35f6" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bbc425da87a22042887fadbc41038b10" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22972416, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13516800 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15728640 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15738880 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17950720 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22374400 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 22384640 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22398976 + } + ], + "md5sum": "7da1dc73bd4df0a8c762206a6a4905f6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b54a9ad114900385811911aabaf077ce" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f5354a27c180a22d1216934019ee8dc8" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2dfb7138d9c06ecb2cb721a83b0b30b7" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "599a54a57f0076a36052d2f4d9de0d41" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a4707e132dffb26a20113ea78dd4f282" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "53130834f9628b0c9f4e309fffc775c0" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c3513ccd015ad292feaa72bbed946622" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "f8caba6e27439853927a3f5537acab75" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "bf4e520f55fc2b3e8c037723947f391a" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2bc0d3b1bc74cea8f9e263ca8594abba" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3fa6f6d1d87bd00541b036b80c51b97f" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "dd8bb259d048b42a4032ddeb38f868fe" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fac8acf6943ceb2619bbd72d6be08c0b" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "43d1a0716180ea53e2b7bfd0f776fd83" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "6234fea28d3e7f6ab7bf33e130be7aff" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "99e1085cadda42215e69609d56b8c8fd" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "763e5a7894847a0b6470f1309c524a18" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1738af24aa1a551ae04f10e778316268" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "6d01ce4a702b322efe2e50351943db1e" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5360336b19195d92d55563aab5ff48aa" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e23cf3f12e6019ec61cdc763021dcdf5" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "30f660641c7d14467e2f4f233825b062" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "152c86d20e3d0f7fe6c7df41722a7ef9" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "68ee7fde787f77bfb400898b78c28447" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5ca10e2afe972970e8ef510679f8f72a" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6cb0c5b462d581fe57efd2e3846c16e0" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "12030ead7c34e45823d05107cbd09baf" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "777c6b47dae1aca1ce6f4b4b34a94709" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "94cb75efbc8a5940460f1c9696d3e595" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "103db507feb9c29b553ddb9cd59a0d6e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0089d729f5b0c268d93c548af3ab0c95" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a7ee0ee3ca69fc9e65ca1059312f11be" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2afbbe2c8c4862a47a5e1441ecc20d97" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "e1195d370644a063cbd03eb222c29764" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a5ac5ce4900b66338a76d04b644f352b" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "703db33e6ddc9811c67a051cdc1a5ed9" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4d6a88a73a15b6fcae511aa54cc4168a" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "f2a21e3cbe7330e0048e5e0554b8b9fa" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "55e3b2e3505f9cdb5a338b9d7bebbdb9" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7a713eb58320dde140e91d75911d7789" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "170a7e750651b0189ccd68670bcfd8c7" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "a7c4ed5f0022c32a04cd3a8dea6ab8b6" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c64c61d777f48aedc44a1052ae4f6356" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ad625e9bf1dfc084cb415aa334f27788" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "1b4809c0c69382bdb5281d44e9deb0c9" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cb9cbd08583b4d0bd9cf945f2b15993c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7e9788ecd3784fd77c6c610e59476260" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4a4d7af922c273c19cd5f0293d2f36f7" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "312942f81d8d49c68f26d8db37dd64fd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b3b47f005e4bcb5dd2c26cd4df3a5b8e" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ba383e998ed7638c775a8083eedc329d" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2e0612bf9bbca76079d5022140a89cd0" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "741499b55efa3a0ae79a1704ce296de6" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32475136, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13527040 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13537280 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13551616 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31901696 + } + ], + "md5sum": "89bdb5d4b4065a179ec297f7328149f0" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bbb8daaa0cfdd24bb2671cd41c070fdb" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "af61274179f53b3b811bf25fa9868966" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "42dd91a67dedbe99b910a5e009426d90" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "733d2788b9283250248b622f65c97a13" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 22972416, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13516800 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15728640 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15738880 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17950720 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22374400 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 22384640 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22398976 + } + ], + "md5sum": "169731de59301f164f90fd0b4a0eb495" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "557fa1f7ebada57efb69551026449f6e" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cc042e27c30784219c3df0315422595d" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "94d56e9fc3baaa0a11c9c95e16de462f" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "6e758bcf7c1364ea7d0d461fe32c2efa" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0c79e9ff2d26e827fc94ee2e8b531166" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f387bb61356ea90c2522b58320969bb2" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "239c51f69d64b2f1f51e55ddd7908356" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "abae92e1c6cffc3ccdf3cef7311b6b72" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "df8d539a95fcbdd6ec7adaea9f0e3644" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6487a6cd3d8d1e6e325e91ce48940538" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e321d1da63eb09cd13d2bcad3d5c3601" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "6e3ebbeab1e783a91cd7e66881e3f600" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a4c7329b1995c07b4954308ac6d87ebb" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "76cb0fdf29cbbb9eae5ac2bc42221afe" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "a9c13d5e2615b02522f09105a67493b6" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2d359db9d935171acd76542e9aa57f81" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2f231199ac61c36932e914d050df4c73" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ee037600d475a788d127c184c299c3ef" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "b7fc9b5a977a01a55184a5a10ffd6676" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3c07c8a9b168c2dcb032a43400cb836e" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6bdf0ddb995d96c91a063624ea0d1640" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "844fda2927cb6f6d8a9007e50e71be1e" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "ba8120ecec6658ff3797648c98b9dcdf" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e55d752df11eb99a76c5fef5701682bb" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5b40f54076efe9b10b79c7c0eb28e92f" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ff5a385bf6d573dc03a0f2629d1f341e" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "ced9673f727d0b5915dfe03d49c45b47" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a5f90a3bf18ce239ec641c2c6e8f74e9" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d41966676c5b259b006c4a636f41454c" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "5f1732740fc0eec65b68240d208a1ed1" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5e7ad36e861be14e84be34776b1a1527" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0ba4e88fc0f7cc598599472a80961fa0" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f2545e8cf589b997fb906ce5d1335a0e" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "2f8f523216a8c96ea53d5fd62d690349" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b104d140e93cfe341989cb448c08bad1" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1c14f097c4284383236972fa6dc85df4" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5d46e284579e4469377a728515c6ee3c" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "8e2ae84f264bb77a2cc4dd2ef5e2db75" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "aca6c9ca55ff9ca06ff63815e77352da" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b1abb82ac1dbf72c4375ea3116f8f310" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d80c8b74484dc4cf2e7160c33cb869d3" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "18e97c4635a208434fe0b156811367bc" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "91c5ad357bbbc51fe5f6dce20f6b62a9" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "345991251a1762f1fa13aea8fc6793f6" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "363cf527d3081d94a28781e62a9d9ad6" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "28d5150611a5b754220243fa90aa9d99" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ebf5f5a18f390967a6977d4533255bce" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "35f36f322bc2b18bd8bc5ff76a2171e6" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "75df06daa9998cf8c81b4d975a4c8ea5" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b746effac1c5167e4acdca690c2ef90d" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "53da26c9ee60ce7b3bc437e12d55946b" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d32eadcaf6452a38bca7362d8b7e83d8" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "9fe52e82472dff5bb36e433a268a5936" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "caa06476edc387df7ebf18dce57ebb32" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "075da29c21317c6cc2b583b9b225973e" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "66b2ec224f6f3d2acff4e60395703d6a" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "314f2c10e46b4ca65168924bdfa5546f" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "369e988be930599077b3fcafb00bc631" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a3c2a2ab0bd9624a980a446dfb3de868" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "b65241919e32c5075896fcbe5c057e85" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5e418daf7f1f9ec244ff1dcbd6c47181" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0b5cd99baaa6ba89b8136691dbfb508f" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2c7213bdc70771c659b0db044960ee9b" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "c9fa19ef51c9ab4ec73931ebcb5d3319" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d5711d480bcf3b20cad89cecc8a29fad" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "97da612d0127417974c89021b516ffb0" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8d112414e30917bee2109b0b78abf082" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "7bc7f8c4189a226fe440ec65793cff1a" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "43354bf5d5b0115954da94b10cb69cec" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cd85380cf926148870c1df3059ba6962" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "50741e6c69e37fd79b9e06767bdd86e6" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "40a6e7489c146454225498eec6f6f54d" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c3842ddcd5a7e78768d4fa78a3f54005" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f748c4080cbf82386dad3224a0c8ba07" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "86ffb0dbbfa84dd978ca5db3ea17df96" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "406a193029f65d2f449b8458566a9702" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6220e11db2054f0de634c1b93cc69c38" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "27929bc9fac02883157e71431712d9c8" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "3747902449b049d51104b5295ebec664" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "605496941ad38cee49d67bb51e01bc49" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "51d975bafc4ed42c8d5814ac4dc089c3" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "64d2433384aaae2a76ca64ec5f3261ab" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "d12c3bcc496ff9560b01e74ce36f95d0" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a276567ccd3126eee73b579fbae6e886" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6274fb5d09e654896de3952db38f8aef" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "169099a81dc0910c26e81d963d82b83e" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "e56176f3bd4f1b6fe4b801162e7646fc" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "274cacb4823db0be4b04b26dc2ada05c" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "750e02e8a31614ac0761ecfba8df5318" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "57c848f18f25a254d080bd81d2567469" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "23840cf732b139e597c4f33016ca075b" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "23e3a11c87615a021a4714ee9f1a7d44" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b833d61630283b839be3f0ae17d8f53b" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "cc9932e3737c5e9776a91590fd10feea" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c26371254c242d2595715657ae8a08d9" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "333ae717234567dc7ee31e69d792be08" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2e7c188907c1f5f5b2752f3a119422b8" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "19316c321491413c66a6254497ab9824" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ae206331d553848e6825fe460f757934" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "72dd8ef65f7f9ee962dad056938e1907" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cb99b6f8a48903b8596b3a534049275b" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "03a538a411341e37f540f12a3aba8cf3" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "db988375b9e258e1a8bef2f5993c3ab6" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "55cf023f7895bfd1fbb2505f95258ce3" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "8bd4cbc2e8892f9fea2dbfb9cdedb4e4" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e83881f4ad8e29f25b99631404fce0b7" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "661ab1c64146daaa956d9d72d21cd447" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2aa99c9704d9579411623c73c7a46a36" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "14bf82fd3d31a7e61b692dccb7158dcf" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2930f6efad451813162bbe957688eb8a" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5a2648c6c2717dd7cb2f85c88a9fb1df" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cb33dded21f9937ae71c8b71a9adf145" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "382ced539cde5f539aa48038f1b06f07" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2852e79194f86eb89bb97e10b499cce4" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fc2d20b1272276cc396fcfde5539f11a" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0d921ea4c4a5943a6655876d9b67a724" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "8ef31aeaaf4e4a01ad9226c7cdafe108" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e11647a288b51eb49691deb8b11552d0" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "42f92f87157d9650b7085d9b3890b2b7" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "43d678b0ad5e26a2002dc17b13c94b55" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4b78f0f6b02586f89e436d7806739a2e" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "deba369e2283f2604cf255b373f925ac" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b003a924a6770050e24fe92a751efef2" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "958efbdb06af08e3d5fb0762575b8bef" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3c4da9f1b5f506d8bf98f4ff1a92a1c7" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "19ba27a201555a9db0d7516bc59c4d55" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ae70d04d8a0bdd205c3e1cf8e7490bbd" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "c45397a298212aac95d342890fd869be" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "aee1d408325f4efd0606ffdb8479476f" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "595406e1e71fa4de87f8f8c2a908571e" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9d2b5e47268d20d6d28ce24cf0c6ae18" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 32065536, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13527040 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17950720 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17960960 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17975296 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18548736 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31655936 + } + ], + "md5sum": "dd18e6c389c0c348833840f7a84d3a71" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d94cf5a3f4a521919de44ee4e87a25f3" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b1aad3cec25b627707a0f28da0893e19" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 27805696, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2222080 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4433920 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8857600 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8867840 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8882176 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27232256 + } + ], + "md5sum": "d7265bb9056b2786dfb6011622794cff" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0e5b4c9cd1b06f243d192a37af948eec" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5bac96aa10f0b047ee38bba4cf949251" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f59c0f4c95f924ad999b292d2a3af5e0" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "59ce4b2b4756832e78ea60ad83db57ff" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "daccb59fb07dbdc05c71347c05f8db28" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f30e509978d3cbf94cf86a0130dc8efc" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "10f033206aabfab5fe9df5d0be832ae5" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "2c507f125342fbe959c3874a1acada30" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 32475136, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13527040 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13537280 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13551616 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31901696 + } + ], + "md5sum": "ab5c8a9ec882e544df30d7a32210245f" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 13516800, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + } + ], + "md5sum": "d0ab8c4a0cf6204d52a65542f887e59c" + } + ] +} \ No newline at end of file