diff --git a/.gitattributes b/.gitattributes index 116bbfda438a1c9f8f9308428a61e90fcb0c56c3..d182446a0518a92f0ed9a1e6b73ddc71b8f7e914 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8298,3 +8298,13 @@ neuronxcc-2.16.372.0+4a9b2326/MODULE_1ec970130638677f3187+613edded/model.neff fi neuronxcc-2.16.372.0+4a9b2326/MODULE_767aaffd027b9e3b85b0+613edded/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.16.372.0+4a9b2326/MODULE_8d85280a9a94b88c7245+613edded/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.16.372.0+4a9b2326/MODULE_b306ecc47e279296f3d4+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/05f1635334b6beb06d01.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/05f1635334b6beb06d01.json new file mode 100644 index 0000000000000000000000000000000000000000..d14d15205f40c119c4d67db43d2d9ea98ff564f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/05f1635334b6beb06d01.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/0652c1c39ac08c855ad8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/0652c1c39ac08c855ad8.json new file mode 100644 index 0000000000000000000000000000000000000000..88d03e84808e7c675032a7b80e7efc6b9f5b59c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/0652c1c39ac08c855ad8.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/d089f0da86f10a7685d3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/d089f0da86f10a7685d3.json new file mode 100644 index 0000000000000000000000000000000000000000..3fbb2f6ee8fa749e9ca965bc33a1a454a0960736 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev6/llama/unsloth/Llama-3.2-1B-Instruct/d089f0da86f10a7685d3.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev6", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1ddb68657f4e9b80fbca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1ddb68657f4e9b80fbca.json new file mode 100644 index 0000000000000000000000000000000000000000..b0348e42c046419cb685e7709928ba82ccb0874d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1ddb68657f4e9b80fbca.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7029722448f7c89cc06f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7029722448f7c89cc06f.json new file mode 100644 index 0000000000000000000000000000000000000000..9b0892f86c180eff37da12be6c5f7d56a1e690ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7029722448f7c89cc06f.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/788f593b3a42ce567731.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/788f593b3a42ce567731.json new file mode 100644 index 0000000000000000000000000000000000000000..dc88da36e7c94ede4c92abd93b1c1d5dd2ecc448 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/788f593b3a42ce567731.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a7608b4dc11bf5302b4b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a7608b4dc11bf5302b4b.json new file mode 100644 index 0000000000000000000000000000000000000000..dc044183dea94d359d50f6e661e0b174fbc1ffba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/a7608b4dc11bf5302b4b.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 131072, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/9fda7263f62daac0b858.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/9fda7263f62daac0b858.json new file mode 100644 index 0000000000000000000000000000000000000000..c36073f359a74d77dfc73208395c8f9b919aa26b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/9fda7263f62daac0b858.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/f2fa6a9d809db681c502.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/f2fa6a9d809db681c502.json new file mode 100644 index 0000000000000000000000000000000000000000..b680c8c95d835b796c22606170cf84065c1f1201 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.2-1B-Instruct/f2fa6a9d809db681c502.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/4e70c222bcc3d1952f3a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/4e70c222bcc3d1952f3a.json new file mode 100644 index 0000000000000000000000000000000000000000..05f06b6ee29d23322e2959ed1b3c8aec20e1b5ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/4e70c222bcc3d1952f3a.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 128, + "tp_degree": 24 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7e0ee2a3aa3c619b5f4a2c24653a4317a1a8fc5c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbea4c84293549c875612510a0afc6d3eb7d1162da2dfa4eeb9aa9c203094d45 +size 214507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..360e0a70ff29ed5219adde1d749221f97efeb668 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0191a5f7284b00665cc0+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a0aa03c074f7bcd907008b6b18d4d14bbe364dd98938d7f7e114ef757780db +size 472064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..56334eaa1684b160de192bf49e20a50f15000174 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b229130c77371a2a593d35bdd7958c7ce8e17d0acecd32cf976b1e3853e1f6d +size 20771 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5e6b99925ea26feeae476a6137aa4bf862afba67 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_15ff576e3081c9564b48+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45149e7a3a20985048c681bf179c530714fcb9d5759cd136e20b1938a243e313 +size 400384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4abc5a906cf2a130afce0e73fabeba74609db934 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94fe1a947149b651a7c79c80adadadad51f361bcfe470c5c63c2e0ab0550ba6 +size 16718 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19e4082a8bcf35c1f026f775a8eedbb4cafc8dc6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed82a5211308e005fecaac2b616b3b285f234a482ccd5ac1a1be4e47c53f2fc +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fdff7d538dbc95c3ca3559436742fe386eb1b1f0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae80c5021d8426bbcb8d44cb4eb336af3357f7559620811a34c60913211a1326 +size 22194 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..00c32ec2418806471f0d821cabc803ebd25950b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6bf86580e24a1eb275a27ab3a061cc5eb70cbaee41a624f938f0c23833e3fd +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25004c872e032ad759a0e534e8728ec30c8027cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0df5d765518a8a37e232a30badf00c24949b6001eda6acdb60c55bbf974e1df +size 20644 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b036f58375100f336c71055e32b35dd32b80767 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67a4245ec970153c19f88ea1e6c239644af48f531cd128eaf43d20a49b0a8a44 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..09428a971c440f90cc95e69b84251b578eaebbcd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15bb71673995075d87fb7b33a60286a408029ef670f778ac4c3c2d1b0eddfaf3 +size 27487 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..29398fa068dbde6486deb84e3a5c4929823fe611 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c8fcd1797ad84745497+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b79da8315b67f24207ac38ffc8b93434b39693e9be0b16306e94cdc5ba71b2 +size 215675904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..15dcb7c0155a50c6952b470945364207a9dba6c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8f9e3c0a49ed92e6325ffd642090a9d65dcbd565c812acb8bcc98d12204e08 +size 20644 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ac8f92e69e46ce2c24354d0fb1c5a17f7e375b15 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c49c0c3715f68c22b32f+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03dda9ca3d3466e0d8b4b92f6ac8e41df9732a4783392dea51d095dcbe121407 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6dbb25dabbb87c729eeca1a4e31ff4baf52a09b4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905a7378ccf2c97e45de51a848471829b62b6943fcd2bb223f09bc2664a772c0 +size 19321 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6866bfae6d1e54fd95823bcf923e0cae34dc8c51 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df48af4bf01af7f3857e+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0273eb7a77e8824a8b886166b9e9b154668894dee0f497492232975e640763 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4bbd654cc641cf441497b54fb48390fa1c70b003 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e8c33c575c8ed49e4da1d523a92d03ed72286df0ed1b6aa03b1862fb706914 +size 261354 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0b4dd1f6b39fe2085d1f82c42023d416df8aff36 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea55ff0c585401f8acb5+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211a60b58905bce5d4e87fc59bcf2e1e32e7395db6d7a73780cc807e48c9e274 +size 461824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9fa996af8937c49a5a5ceced87c69913aea681d5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc82c7660e5291c3e6ff8e2d543a91c98d439a82ddfd9d72c6b94f879783aebf +size 16718 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c16d74d6b331f433f8ca43d79e3a115cc39b5eb8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b304168e9f32c2eb385c1e58472edb7e004bb6633814c997d09fd123b0aafac7 +size 134144