diff --git a/.gitattributes b/.gitattributes index c14a9cc992cbd4673ffa5ab5235d156b92692908..cbe63a53ee554d2a3fe8946e7d4aba243ff2e7d8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8240,3 +8240,16 @@ neuronxcc-2.17.194.0+d312836f/MODULE_a30822a1b2d1fbcee30c+bfe5714b/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_a849442b615562b13ba6+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_ccfcad18cf4ad9b6e4bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_ccfcad18cf4ad9b6e4bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f1cd1b9a81ce4544f9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f1cd1b9a81ce4544f9.json new file mode 100644 index 0000000000000000000000000000000000000000..948aa8e93467d08020438fca6c433d2fac8eecac --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f1cd1b9a81ce4544f9.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/007ed0c0cab705897799.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/007ed0c0cab705897799.json new file mode 100644 index 0000000000000000000000000000000000000000..d4416655bcbd318db1237accb344135b6c21db34 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/007ed0c0cab705897799.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/892d8ca0f2425da9c03b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/892d8ca0f2425da9c03b.json new file mode 100644 index 0000000000000000000000000000000000000000..d94960882dd600f2043a129f00aeeb710ddd76ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/892d8ca0f2425da9c03b.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a1adaee75c9e8cc04831.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a1adaee75c9e8cc04831.json new file mode 100644 index 0000000000000000000000000000000000000000..64f1a44056cf48874b93f4e1e42748fee6f59bdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a1adaee75c9e8cc04831.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a2e466575a68f3e72707.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a2e466575a68f3e72707.json new file mode 100644 index 0000000000000000000000000000000000000000..f3c6270414dee6923a324e759e8d4d946d5092ec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a2e466575a68f3e72707.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb8ceb9e85efe7e43d855fc6645b9ea7774419da --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a902c97b9db908e03187b21a6c01701c50bfe0df9f6b71b809da7f4f16af05c9 +size 80831 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5e233d34db3d4f7ef495c22cca0bef80fcb03705 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06f737474468cb436640+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0f5381043c9ef05027aa98ac3c13eb4414875c5b4fcdec7b813f04c3df912b +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..28ff114292ca168d8fa503855f3e6c6e8858171f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e23ddd2c1ceb0ca9b13f12cfe085496dda4a9903f08aa51dd4aaa763af0ab3e +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3ce41fe0bd997d6141083e9ca5468a17269632aa Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_228e41176dd02608ed1e+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1220f7785177535a1f86909478fad748132349af --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32d5db6cdf19c397941133ad17626fbb60335e60d3115b19046ba4d38e111f0 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..04a42cb3094147c1dde0f10a8e14b2b1f7d9503b Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_331276a07386ee77d52e+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fbd8ebe59f29e7326520b311ad039bb680188bc5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d938d24775001bb3a9c7fe290d4f0c9459eee3f32d275af31c99748fd81e81f +size 423227 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..40d8cd2d0432412d694eaf7cea0cfc52d2c0aa87 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f8c764353e2db9bb07fb9dc00b13fedf5cf29339faa18bf684bbbf5b19c272 +size 2806784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..89ea2dac78fcc5978cd88eb14a14fb9a4e87cd39 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4334b3c1a396e8ac4a6f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382ceeb24e11646e57fb436c0d64fbb0ce59caa919bd5f1a92d3d35308ed4a00 +size 2875313 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45da94458d038d09c818+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_45da94458d038d09c818+613edded/model.neff index 6b169e3ca46d536506eb53500bbd36344c3fe5bc..d64e7faeb59d3b0f724a15227ea7f8b865c60edd 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_45da94458d038d09c818+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45da94458d038d09c818+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abfc595163761b975d91957ba7e7e0a12dda49a12bd80321ca9643fbaa9caadc +oid sha256:05390870b066c993234ecd93a81cf6032e9775f84fefd55396ea230a4f453a0e size 18371584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..db2d6874c834062821515f07e31600a3e5ad161e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b5d0271ec9be39982fe3aeb4554340968a29392c77f6a65b122a47790efeb9 +size 80798 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6bc6a3aa84a982bea65703bdf53c32df1d5941f8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5bf722c4149f80ae71df74bae7079026871b56527cedd7a875d34831ea33292 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1f428faedfedd6d3149f70867d58a2db61a83a69 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_55c3972a85a9fd509c79+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a377b14bff0e1489f2e43ba5d0d3123c57d434accfc9481612f9935106a758a8 +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d912d3b12e8358dee29d9a603e1de813f9fb431a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d3726f2d8c619c2a146cfb9d0b4b7589f1a0de7bd8a2438446f534cd3a0f18 +size 91018 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..35c394c850367f3bba33048d851bbd269fe3c265 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e3263be13428364a15b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5e24e8b14ca4aabf4f19a5c7e081e69613c617752ef631fbde8cba746b5e08 +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6ea8e02bd97e9ac771ea33a699a497ee2031262f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170784eb623597dcaedd6e649d1cf8a28a6ed509fa2a9789be34c9c4fa4e5828 +size 80988 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b8bf21310b4ca9a21cee6d57287c0eb3143d21f8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2730e31a5dc351c9439641fe4ea14db3e69d1113ca90cd381dc33e6891d67813 +size 461824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0405737a5bcf8c9fb8cd9daf86633f977f9cee34 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d0c7e211138c3505935+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ac7c6a3343116727f7006d8c68267b9644dc27ead59c6c24b3de5c6ab1a703 +size 469663 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_81cb45fde7e941e6558d+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb45fde7e941e6558d+613edded/model.neff index 2ec978dad60f6a18130c055f55aa7fc37ca4f8d8..8f1ed4fa75809a5c669ac546403c1b1460a6f36a 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_81cb45fde7e941e6558d+613edded/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_81cb45fde7e941e6558d+613edded/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f76308b46268c61640b1d0f61fcfaaa5f1f30dcc7a4cf125c3cc70f1a94e4ac +oid sha256:75ba441f792a712d68d09aebc1d574b9a301bb19ef05fccac5daf7202cb44137 size 1414144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..24a71b39e1e9d860276339310674fdc8900153d3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abb3fdf792bb77cd45770a5d89fd5b633ebd23393f46ea7f65742c18e235d12 +size 493167 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1dfb695dd919fa0c44736b6780044b5445c72925 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9cc304c3ad910d31a27e+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50111b7b7febc7d24fc401a6e23e2596e480fa09e8393964bf01c6ac067afdc7 +size 5407744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8735d6a458d21d63a57081c8d431c69677ca8481 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e0e274b15f298d10619f21c5cba58d538fc37e1547bfd401d8c6064c09b709 +size 80831 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b491fad009c5d6e6e743a5dfd61404fbb3e18670 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11be33c81d54d95b9ff736e03c0987c8436a9b5f06872d2fe4ea6d0ef9b612f +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8f05e45e26d9fd311a6c9f0174d5a342246c1946 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d4291445f0fad2b3267c9fc11713f66b969c4b7def78292de1e0ef96a0c206 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ad7bd022be69ce77995503447fe624652573de4a Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_a669c4bb07e1a9435b7c+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..efb7dc71b2728323b7ac40362d650bbda53fe3a6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a827e792a1ef8a3c2f8dae21ba814fc50f7976f473a5e14e9760fe363a910f31 +size 67214 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ed67753750e8fd8b4f71891514505ee3c029a1b6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5fea695026734fd534713fbdb6bf344e6ba85a2813595389367a33ebb39ece +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..94ff6bac71a5057ab2d44d4ff2db795bfd40ea0f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9f2f885ee8b6dede3cde5b4a0860650b9992834b03cfea7eb436886d31f2b7 +size 80798 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0ff39b354653817798ec37ff39691ff602fcbc37 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300887730214e31a0072ce350180cb1ea3525ddce719ec372e71cf3cc98216aa +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5a591f228b543be6d6dc7852871185a08e2b2dc4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37cf14f309c07b0a2d38e74f66ee8f32ee3409d3a46fd1f00525745586acf994 +size 244319