diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5857 @@ +{ + "metadata": { + "ParamSize": 518, + "ParamBytes": 1563781120.0, + "BitsPerParam": 4.506794107285474 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 64552960, + "records": [ + { + "name": "gpt_neox.embed_in.q_weight", + "shape": [ + 50432, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 64552960, + "byteOffset": 0 + } + ], + "md5sum": "329cc89a23408ba148b9493016bb4b9d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 22855680, + "records": [ + { + "name": "gpt_neox.embed_in.q_scale", + "shape": [ + 50432, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8069120, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.0.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8069120 + }, + { + "name": "gpt_neox.layers.0.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8074240 + }, + { + "name": "gpt_neox.layers.0.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8079360 + }, + { + "name": "gpt_neox.layers.0.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8084480 + }, + { + "name": "gpt_neox.layers.0.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 8089600 + }, + { + "name": "gpt_neox.layers.0.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 17920000 + }, + { + "name": "gpt_neox.layers.0.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 19148800 + }, + { + "name": "gpt_neox.layers.0.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19164160 + }, + { + "name": "gpt_neox.layers.0.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 22440960 + }, + { + "name": "gpt_neox.layers.0.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 22850560 + } + ], + "md5sum": "48ed52fc6702d2bddbf0351be1e9fd37" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.0.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.0.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.0.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.0.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.1.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.1.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.1.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.1.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "51640b8cddad4e113fa97bc9467cba01" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.1.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.1.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.1.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.1.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.1.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.1.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.1.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.1.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "e39c37c6f260e21bc77986697ce4d273" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.1.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.1.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.2.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.2.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.2.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.2.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.2.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.2.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.2.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.2.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.2.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.2.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "1ab113aab989c3be1b920c9297d0aa5d" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.2.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.2.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.2.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.2.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.3.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.3.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.3.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.3.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "2ced13f53ec663a0bf13b660535cc38b" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.3.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.3.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.3.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.3.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.3.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.3.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.3.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.3.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "4a46819e6cb25e45bc937e3b5c6e8853" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.3.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.3.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.4.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.4.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.4.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.4.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.4.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.4.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.4.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.4.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.4.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.4.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "b88f9e9b75847dd42c4b27977ed7c7b9" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.4.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.4.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.4.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.4.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.5.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.5.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.5.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.5.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "c39492b3a169961f87e3481166ab264e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.5.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.5.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.5.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.5.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.5.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.5.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.5.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.5.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "f6f77d4cdf77af30bab451a241f56362" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.5.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.5.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.6.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.6.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.6.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.6.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.6.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.6.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.6.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.6.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.6.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.6.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "5c0b76dc0bf2ddae4ab1975929998b39" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.6.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.6.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.6.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.6.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.7.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.7.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.7.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.7.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "0ba4cea162097c459ad3c32095627b16" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.7.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.7.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.7.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.7.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.7.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.7.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.7.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.7.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "37d7b823ac619e8aa3fb2f83e4225988" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.7.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.7.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.8.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.8.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.8.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.8.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.8.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.8.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.8.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.8.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.8.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.8.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "7b6e370887bcac533d6afff1c2bfb82b" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.8.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.8.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.8.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.8.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.9.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.9.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.9.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.9.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "1e86b4eb6d34d432dc68b163be0954b7" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.9.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.9.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.9.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.9.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.9.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.9.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.9.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.9.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "5a019a16f1473c82203dc13fb0d9ac88" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.9.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.9.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.10.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.10.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.10.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.10.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.10.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.10.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.10.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.10.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.10.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.10.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "30c971177f9e26f4f994761f9e81479f" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.10.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.10.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.10.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.10.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.11.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.11.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.11.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.11.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "c7b0465466a9947f6bc5da7ae9706a2c" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.11.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.11.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.11.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.11.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.11.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.11.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.11.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.11.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "f2e10bc3c880960a797b3e9a1e7ee1ce" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.11.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.11.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.12.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.12.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.12.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.12.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.12.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.12.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.12.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.12.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.12.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.12.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "f5fc070653feee4fbc555dce27432686" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.12.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.12.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.12.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.12.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.13.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.13.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.13.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.13.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "83fcf50901e614a7e7432ab40383c22b" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.13.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.13.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.13.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.13.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.13.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.13.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.13.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.13.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "f199378dea8756153a2500e6de32304b" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.13.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.13.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.14.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.14.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.14.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.14.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.14.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.14.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.14.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.14.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.14.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.14.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "642b389ef2d6841a7dfee4e92e20d6ba" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.14.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.14.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.14.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.14.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.15.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.15.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.15.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.15.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "47240ea7ca273420b72ff3ee1a5b6334" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.15.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.15.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.15.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.15.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.15.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.15.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.15.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.15.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "8bc376dcdd840d052f5f6ce26da9e439" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.15.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.15.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.16.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.16.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.16.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.16.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.16.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.16.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.16.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.16.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.16.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.16.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "6541abffae10284f19193dd5ad916c0f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.16.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.16.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.16.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.16.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.17.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.17.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.17.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.17.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "3f7e118dec215d7e4924c533a7e64ec9" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.17.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.17.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.17.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.17.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.17.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.17.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.17.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.17.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "fde18fd691f51225b0c1678d66db7e95" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.17.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.17.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.18.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.18.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.18.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.18.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.18.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.18.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.18.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.18.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.18.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.18.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "ff306e8f9a97dd3fc0f3d8f4767cee9f" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.18.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.18.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.18.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.18.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.19.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.19.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.19.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.19.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "3db5d7a78929d950bbf51a61c4eaf7ff" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.19.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.19.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.19.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.19.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.19.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.19.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.19.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.19.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "b6815f02de21369d0e4c38d1fdc8397e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.19.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.19.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.20.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.20.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.20.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.20.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.20.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.20.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.20.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.20.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.20.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.20.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "16e8fc0e73c988d720b100001081949a" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.20.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.20.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.20.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.20.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.21.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.21.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.21.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.21.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "77582d6ccd6c806e6274b86a1391f9b1" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.21.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.21.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.21.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.21.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.21.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.21.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.21.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.21.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "89cdc53db8b2225cdecb4e2d00b62464" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.21.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.21.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.22.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.22.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.22.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.22.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.22.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.22.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.22.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.22.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.22.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.22.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "1a7960ce26971793ec2d6834cccec0cf" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.22.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.22.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.22.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.22.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.23.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.23.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.23.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.23.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "305a38baeb10d1d8453ffb22930c5e45" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.23.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.23.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.23.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.23.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.23.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.23.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.23.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.23.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "bdeb8b3a4f42e99c54422e6683b0702a" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.23.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.23.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.24.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.24.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.24.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.24.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.24.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.24.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.24.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.24.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.24.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.24.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "5068cb0da236eb5826b345fbf612c41b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.24.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.24.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.24.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.24.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.25.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.25.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.25.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.25.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "2d1b713978b453eb0b454ce961763a05" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.25.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.25.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.25.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.25.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.25.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.25.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.25.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.25.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "c4c0b614fbe6e1c4d27501a8ffa727cd" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.25.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.25.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.26.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.26.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.26.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.26.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.26.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.26.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.26.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.26.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.26.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.26.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "d25a8dde2ee89d69d8caa91312bd9493" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.26.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.26.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.26.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.26.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.27.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.27.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.27.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.27.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "3e4f02d06e3443703e333d6d1f9daa0c" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.27.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.27.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.27.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.27.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.27.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.27.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.27.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.27.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "322c723239483ca24b9963c50f3b5dbe" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.27.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.27.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.28.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.28.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.28.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.28.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.28.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.28.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.28.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.28.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.28.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.28.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "5e9772cbe8deb8211e71c7c9f43a92cf" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.28.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.28.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.28.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.28.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.29.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.29.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.29.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.29.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "c47dcc5464141072fdad99c4eaa5c28b" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.29.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.29.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.29.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.29.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.29.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.29.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.29.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.29.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "6d9d723a9ee7d64fe554249bfd0ad94d" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.29.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.29.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.30.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.layers.30.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "gpt_neox.layers.30.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.30.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.30.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 14771200 + }, + { + "name": "gpt_neox.layers.30.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 24601600 + }, + { + "name": "gpt_neox.layers.30.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 25830400 + }, + { + "name": "gpt_neox.layers.30.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 25845760 + }, + { + "name": "gpt_neox.layers.30.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29122560 + }, + { + "name": "gpt_neox.layers.30.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "a0345a64824785f091323acd9410cb98" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29537280, + "records": [ + { + "name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.30.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.30.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.30.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.30.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29511680 + }, + { + "name": "gpt_neox.layers.31.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29516800 + }, + { + "name": "gpt_neox.layers.31.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29521920 + }, + { + "name": "gpt_neox.layers.31.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29527040 + }, + { + "name": "gpt_neox.layers.31.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 29532160 + } + ], + "md5sum": "44a8dff70bf6b07046e3218d319a02bc" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29532160, + "records": [ + { + "name": "gpt_neox.layers.31.attention.query_key_value.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.31.attention.query_key_value.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "gpt_neox.layers.31.attention.query_key_value.bias", + "shape": [ + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15360, + "byteOffset": 11059200 + }, + { + "name": "gpt_neox.layers.31.attention.dense.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11074560 + }, + { + "name": "gpt_neox.layers.31.attention.dense.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14351360 + }, + { + "name": "gpt_neox.layers.31.attention.dense.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_weight", + "shape": [ + 10240, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "gpt_neox.layers.31.mlp.dense_h_to_4h.q_scale", + "shape": [ + 10240, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "gpt_neox.layers.31.mlp.dense_h_to_4h.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 29511680 + } + ], + "md5sum": "13afa83885b388099d528a3c4ec1cbfe" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 64552960, + "records": [ + { + "name": "embed_out.q_weight", + "shape": [ + 50432, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 64552960, + "byteOffset": 0 + } + ], + "md5sum": "b6d71012d278c4e2003b65b2ceffb0aa" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 22830080, + "records": [ + { + "name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "gpt_neox.layers.31.mlp.dense_4h_to_h.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "gpt_neox.layers.31.mlp.dense_4h_to_h.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "gpt_neox.final_layer_norm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "gpt_neox.final_layer_norm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "embed_out.q_scale", + "shape": [ + 50432, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8069120, + "byteOffset": 14760960 + } + ], + "md5sum": "add823c8648234bebd20671eeb3ae36a" + } + ] +} \ No newline at end of file