{ "metadata": { "ParamSize": 563, "ParamBytes": 145412407296.0, "BitsPerParam": 14.781631589720977 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2491416576, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2491416576, "byteOffset": 0 } ], "md5sum": "6822b8621a6af7062c219ba13b241400" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.79.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "9e23967d16a2978826cf5a1cf69eec9f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "8149157c63c1db21f997b5a04526e26a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 2491416576, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2491416576, "byteOffset": 0 } ], "md5sum": "e05db141ea5914763186bc15b5455830" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7cc2234aceba3bd45473edb7d2f316d7" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "013c411de70caa7ceb022810e3522b69" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2c4951bd8179f1d96c47bd54d5016f28" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b4617cb42e3a9dccc19ebc251e9dc3b7" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "5a46715a227b454e18dabfa9fae2f72c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "be71adbf7c3c907ad2d9a9c86c71bdab" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "6e086698a4139fa70be145ccddba7065" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0285ebe40ee1fd3ccbef36a9856ac6fc" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "13dcff2398f6088874c177725ca52a9e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "9fc86d8ef1ce3f63868b3c22c0eca6c7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "dbcbb7288507b49d0dd716d72f266922" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c5c752dec3029739c7b2b3217aa609d8" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7729b1ec8b830c2767cce14db2953ea3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "258f73c756e446bfa0d36ecb801c767b" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1e4af2b2bc94de53df12bbe8066879f5" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a13565b6c93de30dda8df11f47a27265" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "a4de50155e8f7c42c070a408299fb19d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "73313f5f1aacfd56f539fa2798f80156" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "19863186f03a3cd30ec5c173d65b5129" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ddb284fdf672137478820cf4a3d7dbcb" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7c545dd4d7566f4e6dce371fe02a61bb" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b217983ec61d5e256f2ce75c3e419496" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "febc5f52ec3b9a5eb52661d63d3f52c0" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cb4abc4743d75417094437a3365e08bc" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "71e383a31367874b7c4729f2a18a5750" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "48d52c93b8d862d91f72149205a6a053" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c841bc12a38f43f659d09a7c0e56604c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "abe6598aa4a493f24a379affa81894cd" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "82970cc6f1f73238463300090974abab" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5653ea5b39d98d4836c5f37f9793ed53" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9ab679b8f4570094d511c917651bc82e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "feb978983e842634b79873e66910120a" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "8b931c14bd6687a520a975a0f2aced24" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "330ab1bb2a0bdaa0f035f07a6a3f70de" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a637951d208a41140c999164232cb41f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e0ec884a6ee000bb1ccd2513b758628f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "95458625256e5260464270bdd0790914" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1eac8e8af9bce791e2e7d56ace23feb9" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e209d0ceff53f072978bea265fae406e" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "6d3920122f80ddf9ee28982e5a207d89" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "99a5073864dd89b86a0ae8af0cfeb052" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "34c1acb6b07b66cc3beee63ae99beb67" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c0bf0387874e68129d35077079d78000" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0da171d3a167e36ccb527982259e4102" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a9184d050a86f87b155c0a468d5bb403" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e3ccd8f5a1690127097a9d02dc6389f9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8651d3393aeb1e725b56d8a60fa4e901" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c5e58db93ceb83a82ec132378e61ac99" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7cdb9b3facae230c91192ef4000d4cc9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "51951a5375e93ac9bf6c302ca129a2a5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "15d9156913aa0d9b2e68d0ccefa89ae6" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "f4afec19dcb5512d7ccf52231ad36ebf" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "057766f81fbf2f9bc5b08fae2451b56e" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "23ae0b805fb3796e733f83ee90125233" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "9e2dac300b54989db7496fde321d23ef" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "86697abea67d22e6fdab76c2984505b7" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "fbe92686c0c0a512e94b3d05fc8f550b" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "5ab7aacfeff1dde1175b3681fb7e76c9" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "33e931042814cac513f62c11326fd964" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "567f000944d8042c16ad65fe5d5e9daa" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "de87021b059be16a4b5bd2fdd744c2a4" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "cb284db9037969f74347b7e885e84acd" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a7a21fca36277402c1fdefffa020aee0" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "f628bf26b65f6cae4c3ec9e966bc824b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "92d2599af6ce34137aaa709e153791fd" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "273809e1079c317aff28cd06af944c8e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3d2f52996dfdd6ac3084e33117997b49" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7e3cc75812050735b9cc28db411e0fe4" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "12ca1d8dbea4c84dadd45324d149c5fa" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8e80154959ff956d89b53ed52795657f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "f8f11e09422c62e587f6928f615ade43" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5684a75efa3cd08438eba60849111951" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "98133ddfc44f675d0cca267662ce2961" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "26a0cc03c7f0af315a8eb0d62a33fa89" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "adf164db9b1d830a44536237ae7bd08c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "d81a4d68364d4b937dbdc70d32ecb0b6" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "f716a1565ce7f8ec3e582ae59d5c61b1" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "04a09206ab2bfa53bfeb7ec9cc54bcf2" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "90c9b0429a55838d560724ce0a017599" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e66a476da99649f5af13178b816b5dc5" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2b6fe5e56a4b921d0f9f8c3e10fa5d5b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c2b1f31d34010ed8b3087e12f82931e7" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "90d933b72c73570de4da18f65b962155" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "5a8b917afcbd8efe97272dacb537f9e6" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "4c3b9c1fe7487474cb108db3b2c01b6d" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3b1650261d9968b8c1978c44660ca712" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1ef59b44472342b33ec970048e382f7a" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2d805c1ed1f769d316ce8f8645cd869c" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "d5f0ac863e5d68edb555dea71710fed6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "e2a8c7d2ab800f870c81f671ba4a25b5" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "44f42ebc2b511dc2ff1744051b70aafe" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1497f62d492461fb1941efaaa97cfe80" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "01aea945f93051a85dfb2c95e2317101" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3e21bfdc5970e907e680dcfd04d4ddf0" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "2eb0252fa430bf5a602247283f22c795" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9ff0d03f859e1873d709d9c9b2acea69" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "bf0edb51400072eac3effee8b266a73c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "654b8352123af9ff601f07198139da8d" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7221865ba4098e230c9b88271be27821" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0a19460c6c0f4eef0bd243e9a752cafe" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c2709ccc1beb3d9d6d000950eaf2d7d3" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "53cd8bd00f71926674d579ef544fe68f" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b0dd818c9fdb2adc96eb02536163d798" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "24970b95e1a9129c96b41aa46da30a2c" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c03713f1afe310fb18a3b0830d36a867" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "3b9e837afc89624d38ab7dd69049bac4" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f11b5472bd02d20f9708e64c8bebf959" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7b98f0cfcb1f0d15f353a053275f57e4" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c5211904369cdd0e372f5b3fcee42e87" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "dff32756fd25d37ac1c3e3f7637e2aaf" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "60f8c6654a66f0b1b393803d00dbe499" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "735a39d57fed3e197934f2f69d9f6c1e" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "67b51c53759b3e1327e9ca9f6a2d11b3" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "e64c498ce1ddf860831b3aff2e2a540b" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "b66fc4626d62eb803f07bb0a255988a6" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "0b835c506a0b6d9d30285eb6e0e77db6" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a398a05ca6e410c87e0ab2566b39160b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "17ce110ac422aae5a9e323ae56e38225" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f99303c397a7e058d421ca86ddfe9a6f" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "debe4442c68c7a1b122ebc37d0a1bfc9" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "039b38ee6d1155bd746236edaf647628" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ada6aa7c34d2eacae12f0510413e5f74" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8f7ae029ae8755add63e1eaf5aebd7ea" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "cddda8827f23864ccce705dcaf8a095d" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "cda02fd86a5288c066892c32abf6e57e" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0921cd28d76f090c76dc33f1c869c31d" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2ea88b4690a8aa6eca5393f0f8e2973e" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "aec505172a2f6d542e3463c329877ba2" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.36.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d05cda0987efa92bad77ac335e6a5a68" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "df37da6408541822df29856f4742107a" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "e1af6cfca4d7862b2c121746626dd2d7" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "9437d686cbce5bcf20204a297e52cd33" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e8873457a698f15b69b8546214cafbc4" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.37.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "403e010b95639b370f39f2015411a17d" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "fe0759b141c22fe036358328a90419a4" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "9417b84eb5b732bbbeea225ba64d857f" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "83c60ab3514e7040869cad477b453865" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.38.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b1442db2dab4e4bd06bc2e86fc4ffbee" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f45a618a81a1d8dde6b40ea17995ba2c" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "888e1d256ce746904b8d7efcbc66f7d6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "df3c8c4f2f04d11db745d634b03d0e39" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.39.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "878eb12ada758b248f29031a8b21283e" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9dc71c8537a3f5739808c9040fea15c7" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "89e29896d0fb35d0875698dd533b343b" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "725536f67c308782855d78c4f65173fb" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.40.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9db2c76b072496b21d92da9436ddf948" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9e5bc2597232858a09df542de0e92318" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.41.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "57bde2061b302ff70cf7483239948b97" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e319fbded4f1257433e0f2c20d103952" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "ebf743a9fb5f0a1c368b42068973c523" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "bf0944533907023bcb98b0e29569ba4a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "d3eafc3a8606f6e10ad950c6d7c32cd2" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "58ad22d2d70ac84bc4635c8771c8f47b" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.42.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2cd848b9cb59b6d64f1456af046cd027" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "031b6af23c051ef20716724de8a716d5" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "6bacaf291916a48e0ba91c40f2259461" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.43.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b6905240ad12ca1cdb1302dec7f49745" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f24664fddfb78002286e93a1dcc89242" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b18b1202ec7e295676f8eefd9fd2919b" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8dc250f12e412e24282be53e023a4ae4" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "f8989771d471fbef436935d265f97946" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.44.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "daf392448dcc0e8e074eb54d066cc60d" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9b9a2695da16e824b873df0d7194b42f" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "08b603d10904659d21c9489c583d7f43" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.45.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "81fd1ea8a1e079453d29b7f95d4eca60" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f0ee7ee1aa7782bb2821ff08f80706d7" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "43b342599dd03a5049fe75d300246774" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "edf4dade27e8f3c5fd941c3e87611e9b" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "91f58b34bc35f9e68bce5c3c38719d05" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.46.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2c56a6edc968d6a9419ae0da16f17441" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c41b93c59f167c6b518ec0a6a1242b54" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "79072589ec066d748298d2a952e50f08" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "0c1bb8276dfbc536cc82f029176b6a23" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.47.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cdc5a7eada85cb3b7d55f28d47b4a983" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "6c89ac1b2a7f57a9f04bc18c861bfa84" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.48.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "6b2ec4d8ea9c287522ad3e4c07794ef2" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "fadbdae9688ce58b73a387c8aafec683" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.48.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "fffda01cd9ff2c5b509e740a099b2a9a" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c960ca13c2d788cbec90a2d7578c5708" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.49.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8f5510e6b5fe4da83d2e70ccdcde366e" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "3945759eff833f540be9cb7977197bfa" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.49.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b0eb02cdc72299259209add5ef7ebe96" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7d041147bf3392ce792748df52db43d8" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.50.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "3b7968fbf19812b60f5ff52062678015" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "6503b1cb784659a50112011efc20a521" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "466e4ac8f2d5e3848df120e640ab0781" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1b7802904da9a6ca741ba7c8362a1ceb" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "1996fc62a7a4277fe052f9c14a9f0b53" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "64ac421b9d84896d205e3b0bfa01c929" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "237f6b6efa43df411bc4f7eeb2c366f9" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "46ef8d1577c4175110b492bfcf7a5f69" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "970ccda9e521fe4bc4cf818dbadd00e9" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e09615369963f8f22580c852cee04766" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "de7349965e04813b03d36f3a144d503a" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.50.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "99639e936c083d2aa695c89e309b7aaa" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "23f5ccbfb23a53296b81d3264994e4d2" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.51.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "cbd92cd60d950d5f01696f661a4b0e6b" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "81abcf5d0a08baa58b7e6d1636f76a61" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.51.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "88cdfa01780d77b2b9a84c0bbbf59acf" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a13aed90bc39571cd0e6c1bfe5d999ea" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "8a091a8c8d8295691c45da55e457ec5a" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.52.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "223254b1345b0102bd836821c019f03f" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.52.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8707e471e819401ed66c7f0887aeaa20" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.52.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "972dcf76e6f7e22d9aa3da9113aa4655" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.53.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "9f6704009f71fe0ed48009f129a68778" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a1131113345b399e55a92678d0da3dcc" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.53.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "991c623cc6b723c9c35222abdeb6e28b" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.53.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a5b11a1333e0069bb51a607c704aa915" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "310b90295c76dedbe5659a71d9fa84c0" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.54.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "47569ee780eed3dd3528747d9039ff97" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.54.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "5c7aef6d4ae40cb1082006eb4d8244d3" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.54.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "502f9b803a5cf8228129354476050f97" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.55.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "f171ab2fc6296f06d22ea27c8b32079b" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d5c46e45d1953e986482ad1cb4a4436e" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.55.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "25aa273a816881bbe21c22ff522fa9c1" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.55.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f237fb431512551a634c08b7003bb767" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.56.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "9380c99bbb8acc77a2f2202ad82265b7" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e5975dffdcd0d2b1ae756920d8ff0166" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.56.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1b4aca525607a3bee4d75f86bf959b48" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.56.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b58b7a5b0467d452f9f553f3165717f5" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.57.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "bec5fc6f3d343debc2e09dd3eb31539a" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "4a90b1b41dd0b1b8ed26e66a2de9dd43" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.57.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "061aada9a554f2d6a91dc346eaa89750" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.57.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b4441b68854e356390d221c90763afed" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.58.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8b19620441eed22865f48372b162e6b7" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "445e74e28fc9c0d98c654f14e9ae4b4a" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.58.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e97c73a815c69e830a9c4e5638c8f9aa" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.58.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f05fdd1ba6dd6c0c63166c7c98250983" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.59.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "6c1573202717126bb41a03766dc169c6" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.59.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "80128d5d8ee3d9cebbc22d40ab98d69d" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.59.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "e70965186ded2d9a86bfd9505714e71a" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "0d2ee4f084a6c89824b81e9d45a33d27" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.60.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "ca1e88e7618618bc67b513c3614582dd" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "ed9790ef82baf4994d4b236dc88459c4" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.60.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b48d7e07a78b1a97bd52e96ecb9ad44e" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.60.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "434c7c5d8bb8b1a23120fa72b84a157a" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "f36f799014ef8d00bb386b68629cbe12" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.61.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "676978de8498e16fbabde26824cdd9a9" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.61.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "242bcc4397c9b7c356e136a10fb3673c" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.61.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "1b62fec2544c1d644da8ced15fc0d8b8" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.62.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "a3335912323bc978d2dafb570b5d8a3e" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7261b56b8b04d53b05f22551ccfb5f49" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.62.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "7b8cb70444311fbf2504e8a05d67c266" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.62.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "da69e1cfaba0ce5c0382572496996a0d" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1d16a488b5692e9c074ffc829f7061bd" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.63.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1b378e440ef1f4f3b42ef46ce7486575" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.63.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c2d2ab8c3466cf38227fa97d9ef28315" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.63.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "79065b31ea8564f55f3dc6a1628302c6" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.64.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7d000576e74be6fcc0810cfe3eaf5cb0" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "ad9dc67db837a77286ca8bcb6a2e5527" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.64.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "041273e3d856bcc9002831d4b094d917" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.64.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "751e7ac139ab044d4349cb8b2be159ed" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.65.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "477a0ea6f7bac46853fe8253d54da4c4" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "510562dcc63a05dd4bce17aea94eb42a" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.65.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "98b8f6320c003a22eb2cb07786af3e31" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.65.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c1c1e8657914df71d451f858aed79514" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.66.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "51f5043bc124d531767640d81465dbc7" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c497d21d0233390de5fdb1a51c4815ba" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.66.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cbe6f914a57f5d42e786128c57f6117f" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.66.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a9fc80009468fb21f73181ed7765b634" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.67.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "0c55c1feb25b61abac2f02f80366a383" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "b21a120ab460f1a15bc9f022b1ba03fa" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.67.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ce7163e5f68c810e4df781150f1deca9" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.67.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2fec128260f80c68a009a75fa006e868" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.68.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "17d884791d7b1e5de86218ea4b413ad4" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.68.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c0728a96302992d7bf3305e3e202cbd6" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.68.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "bae0d755392fce75da9b95fa50a435ba" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "534f298a6d37bc5855f266b957d5782a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.69.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "52cc4b50b3527d7e64ab7603015369cf" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a602f39a4e4963224f44c5c34e9403af" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.69.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "77903b25d6a3cb5d6d0b60e76217beb7" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.69.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "21dd05f4e04a600d9d13216e048dc4d1" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d910116612d6cb57fe417d8f8c4c7fff" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.70.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c3c1820c4b711b74336b9c001b03357e" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.70.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b70f9dd58570cae8f39e331a07d64bda" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "86bf47765cf9848b6b56cd5b26db2a2a" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "0aa40406a685095150a9e2dd038cb030" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "5226dc6184f51b479451cbbdf13f29e3" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c20715c64e7115b340c45aeed357484b" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2b4546dfc3d25715fdfa991c6adde4a2" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "b4245de5fe24220d506c2b8e55272e40" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "302357cdfdd8cc366bbee02533813d8b" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2ac2919faee3e33717477afe3c541a02" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.70.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3f1569e896a997230f19a9368ac3f09f" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.71.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "6b29ca9476ab09767876f0eb36329630" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "6ff015951be03868f181e88c01fbfd76" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.71.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1a38c9255768414eb94f656dda0b85c3" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.71.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "fb56e0108cded410861f0f928fe52170" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "5a6f95d84f61810c479923f37ba7cfab" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.72.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c490315f66571bad9b6cea588e6706c1" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.72.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "22863a104130cf3e2777949cafb89ba9" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.72.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "98c68c758b89e41c7b3fcec63b021055" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.73.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8ebb873f7679e17ff8a69b5138ab5940" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "54441e2b621389dfe21c6e69fd495b6d" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.73.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "6b71cd3fb3c860d2e30ed85b75596809" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.73.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0751c4ac092732b2d21f3d8933c8a7b2" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.74.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "fdfcea70725f3059fff7185c9e0bbf82" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "f3f0ad7d6b11e747e050bd519182e9f1" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.74.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "3bd4f3d983d875d1c7fd81f15d2b0c92" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.74.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "da0b7ceda4fb1b5886061bbd70db1564" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.75.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8654ae84e06cf173d9e2bbcf91d82779" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "3a0dd471d26b838d53e4f5112f403ca1" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.75.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "95f3d8f881de6d4ed00173676fefbdea" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.75.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "21acb56416db2a26d1b7483be159db43" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.76.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3e24c8780126a2e3c35a37646bc1a1b5" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d90849a00879a7a59c9819a23537659d" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.76.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1ff42e6cb2ebecc147785a76053f2fa0" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.76.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "16e66969c817ba2b3eb791644c72dc80" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.77.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "45ef76ec00dcb4af246e286b13b1930b" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.77.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "79c69486c89f9b9749fc2b9a34e48df6" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.77.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "65cc14eef03f6b0353254380220c4b76" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "42a7dbe5a2a4ab5c70c00a88358ac9a5" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.78.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "fb27f83f815474a75c2c26e55eb9fc94" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "4889bd6645f7cf4a391040eb050be8bd" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.78.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5e4b535caf9fe532028ee790fce959ed" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.78.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a7368ac05706762c675913e57e5d85b9" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.79.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2c726001635acdf0da3a55c1a0b7b147" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.79.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9142f64b3b8c27b5166718c825444dbf" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 4276224, "records": [ { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32768 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 49152 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 69632 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 86016 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 102400 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 118784 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 135168 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 155648 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 172032 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 188416 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 208896 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 225280 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 241664 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 262144 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 278528 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 294912 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 315392 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 331776 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 348160 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 364544 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 380928 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 401408 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 417792 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 434176 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 454656 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 475136 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 491520 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 507904 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 524288 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 540672 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 561152 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 581632 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 598016 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 614400 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 630784 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 647168 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 667648 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 688128 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 704512 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 720896 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 737280 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 753664 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 774144 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 790528 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 806912 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 827392 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 843776 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 860160 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 880640 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 897024 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 913408 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 933888 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 954368 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 970752 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 987136 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1003520 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1019904 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1040384 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1060864 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1077248 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1093632 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1110016 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1126400 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1146880 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1167360 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1183744 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1200128 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1216512 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1232896 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1253376 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1269760 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1286144 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1306624 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1323008 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1339392 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1359872 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1376256 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1392640 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1413120 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1433600 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1449984 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1466368 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1486848 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1503232 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1519616 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1540096 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1560576 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1576960 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1593344 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1609728 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1626112 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1646592 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1667072 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1683456 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1699840 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1716224 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1732608 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1753088 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1773568 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1789952 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1806336 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1822720 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1839104 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1859584 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1875968 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1892352 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1912832 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1929216 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1945600 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1966080 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1982464 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1998848 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2019328 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2039808 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2056192 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2072576 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2088960 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2105344 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2125824 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2146304 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2162688 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2179072 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2195456 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2211840 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2232320 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2252800 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2269184 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2285568 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2301952 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2318336 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2338816 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2355200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2371584 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2408448 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2424832 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2445312 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2461696 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2478080 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2498560 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2519040 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2535424 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2551808 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2568192 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2584576 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2605056 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2625536 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2641920 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2658304 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2674688 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2691072 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2711552 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2732032 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2748416 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2764800 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2781184 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2797568 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2818048 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2838528 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2854912 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2871296 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2887680 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2904064 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2924544 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2940928 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2957312 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2977792 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2994176 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3010560 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3031040 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3047424 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3063808 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3084288 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3104768 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3121152 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3137536 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3153920 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3170304 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3190784 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3211264 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3227648 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3244032 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3260416 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3276800 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3297280 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3317760 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3334144 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3350528 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3366912 }, { "name": "model.layers.64.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3383296 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3403776 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3420160 }, { "name": "model.layers.65.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3436544 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3457024 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3473408 }, { "name": "model.layers.66.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3489792 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3510272 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3526656 }, { "name": "model.layers.67.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3543040 }, { "name": "model.layers.68.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3563520 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3584000 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3600384 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3616768 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3633152 }, { "name": "model.layers.69.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3649536 }, { "name": "model.layers.70.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3670016 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3690496 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3706880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3723264 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3739648 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3756032 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3776512 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3796992 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3813376 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3829760 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3846144 }, { "name": "model.layers.71.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3862528 }, { "name": "model.layers.72.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3883008 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3903488 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3919872 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3936256 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3952640 }, { "name": "model.layers.73.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3969024 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3989504 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4005888 }, { "name": "model.layers.74.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4022272 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4042752 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4059136 }, { "name": "model.layers.75.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4075520 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4096000 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4112384 }, { "name": "model.layers.76.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4128768 }, { "name": "model.layers.77.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4149248 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4169728 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4186112 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4202496 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4218880 }, { "name": "model.layers.78.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4235264 }, { "name": "model.layers.79.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4255744 } ], "md5sum": "de0187bc14b08046224bf0746aac9217" } ] }