dacorvo HF Staff commited on
Commit
c518a94
·
verified ·
1 Parent(s): 7fe5807

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +245 -0
  2. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json +77 -0
  3. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json +77 -0
  4. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json +72 -0
  5. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json +72 -0
  6. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json +51 -0
  7. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json +51 -0
  8. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json +51 -0
  9. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json +73 -0
  10. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json +73 -0
  11. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json +73 -0
  12. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json +73 -0
  13. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json +73 -0
  14. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json +73 -0
  15. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json +55 -0
  16. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json +55 -0
  17. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json +77 -0
  18. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json +77 -0
  19. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json +55 -0
  20. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json +77 -0
  21. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json +78 -0
  22. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json +78 -0
  23. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json +73 -0
  24. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json +73 -0
  25. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json +73 -0
  26. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json +55 -0
  27. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json +52 -0
  28. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json +52 -0
  29. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json +52 -0
  30. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json +71 -0
  31. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json +71 -0
  32. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json +71 -0
  33. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json +71 -0
  34. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json +75 -0
  35. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json +75 -0
  36. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json +75 -0
  37. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json +72 -0
  38. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json +73 -0
  39. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json +73 -0
  40. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json +73 -0
  41. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json +51 -0
  42. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json +77 -0
  43. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json +77 -0
  44. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json +77 -0
  45. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json +77 -0
  46. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json +78 -0
  47. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json +78 -0
  48. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json +78 -0
  49. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json +73 -0
  50. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json +73 -0
.gitattributes CHANGED
@@ -8717,3 +8717,248 @@ neuronxcc-2.17.194.0+d312836f/MODULE_733b16d0b3a2314f4e14+165e9558/wrapped_neff.
8717
  neuronxcc-2.17.194.0+d312836f/MODULE_37e0946a0c24edac473c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8718
  neuronxcc-2.17.194.0+d312836f/MODULE_37e0946a0c24edac473c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8719
  neuronxcc-2.17.194.0+d312836f/MODULE_431f4323665d4b4d39f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8717
  neuronxcc-2.17.194.0+d312836f/MODULE_37e0946a0c24edac473c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8718
  neuronxcc-2.17.194.0+d312836f/MODULE_37e0946a0c24edac473c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8719
  neuronxcc-2.17.194.0+d312836f/MODULE_431f4323665d4b4d39f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8720
+ neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8721
+ neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8722
+ neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8723
+ neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8724
+ neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8725
+ neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8726
+ neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8727
+ neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8728
+ neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8729
+ neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8730
+ neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8731
+ neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8732
+ neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8733
+ neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8734
+ neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8735
+ neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8736
+ neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8737
+ neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8738
+ neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8739
+ neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8740
+ neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8741
+ neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8742
+ neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8743
+ neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8744
+ neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8745
+ neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8746
+ neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8747
+ neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.neff filter=lfs diff=lfs merge=lfs -text
8748
+ neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8749
+ neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8750
+ neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8751
+ neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8752
+ neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8753
+ neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8754
+ neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8755
+ neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8756
+ neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8757
+ neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8758
+ neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8759
+ neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8760
+ neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8761
+ neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8762
+ neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8763
+ neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8764
+ neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8765
+ neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8766
+ neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8767
+ neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8768
+ neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8769
+ neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8770
+ neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8771
+ neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8772
+ neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8773
+ neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8774
+ neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8775
+ neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8776
+ neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8777
+ neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8778
+ neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8779
+ neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8780
+ neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8781
+ neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8782
+ neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8783
+ neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8784
+ neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8785
+ neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8786
+ neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8787
+ neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8788
+ neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8789
+ neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8790
+ neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8791
+ neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8792
+ neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
8793
+ neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8794
+ neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8795
+ neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8796
+ neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8797
+ neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8798
+ neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8799
+ neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8800
+ neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8801
+ neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8802
+ neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8803
+ neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8804
+ neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8805
+ neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8806
+ neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8807
+ neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8808
+ neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8809
+ neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8810
+ neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8811
+ neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8812
+ neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
8813
+ neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8814
+ neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8815
+ neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8816
+ neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8817
+ neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8818
+ neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8819
+ neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8820
+ neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8821
+ neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8822
+ neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8823
+ neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8824
+ neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8825
+ neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8826
+ neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8827
+ neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8828
+ neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8829
+ neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8830
+ neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8831
+ neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8832
+ neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8833
+ neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8834
+ neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8835
+ neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8836
+ neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8837
+ neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8838
+ neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8839
+ neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8840
+ neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8841
+ neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8842
+ neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8843
+ neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8844
+ neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8845
+ neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8846
+ neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8847
+ neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8848
+ neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8849
+ neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8850
+ neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8851
+ neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8852
+ neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8853
+ neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8854
+ neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8855
+ neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8856
+ neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8857
+ neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8858
+ neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8859
+ neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8860
+ neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8861
+ neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8862
+ neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8863
+ neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8864
+ neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8865
+ neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8866
+ neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8867
+ neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8868
+ neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8869
+ neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8870
+ neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8871
+ neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8872
+ neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8873
+ neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8874
+ neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8875
+ neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8876
+ neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8877
+ neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8878
+ neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8879
+ neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8880
+ neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8881
+ neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8882
+ neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8883
+ neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8884
+ neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8885
+ neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8886
+ neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8887
+ neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8888
+ neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8889
+ neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8890
+ neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8891
+ neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8892
+ neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8893
+ neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8894
+ neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8895
+ neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8896
+ neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8897
+ neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8898
+ neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8899
+ neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8900
+ neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8901
+ neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8902
+ neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
8903
+ neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8904
+ neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
8905
+ neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8906
+ neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8907
+ neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8908
+ neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8909
+ neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8910
+ neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8911
+ neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8912
+ neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8913
+ neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8914
+ neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8915
+ neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8916
+ neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8917
+ neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8918
+ neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8919
+ neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8920
+ neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8921
+ neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8922
+ neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8923
+ neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8924
+ neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8925
+ neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8926
+ neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8927
+ neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8928
+ neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8929
+ neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8930
+ neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8931
+ neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8932
+ neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8933
+ neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8934
+ neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8935
+ neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8936
+ neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8937
+ neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8938
+ neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8939
+ neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8940
+ neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8941
+ neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8942
+ neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8943
+ neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8944
+ neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8945
+ neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8946
+ neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8947
+ neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8948
+ neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8949
+ neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8950
+ neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text
8951
+ neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8952
+ neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8953
+ neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8954
+ neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8955
+ neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8956
+ neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8957
+ neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
8958
+ neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8959
+ neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8960
+ neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
8961
+ neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8962
+ neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8963
+ neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8964
+ neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "embd_pdrop": 0.0,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "phi3",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 4,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
29
+ "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85",
30
+ "continuous_batching": true,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": true,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 4,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.17.194.0+d312836f",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.0.dev0",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qk_layernorm": false,
53
+ "qkv_kernel_enabled": false,
54
+ "rpl_reduce_dtype": "bfloat16",
55
+ "sequence_length": 4096,
56
+ "sequence_parallel_enabled": false,
57
+ "speculation_length": 0,
58
+ "start_rank_id": 0,
59
+ "target": null,
60
+ "torch_dtype": "bfloat16",
61
+ "tp_degree": 2,
62
+ "vocab_parallel": false
63
+ },
64
+ "num_attention_heads": 32,
65
+ "num_hidden_layers": 32,
66
+ "num_key_value_heads": 32,
67
+ "original_max_position_embeddings": 4096,
68
+ "partial_rotary_factor": 1.0,
69
+ "resid_pdrop": 0.0,
70
+ "rms_norm_eps": 1e-05,
71
+ "rope_scaling": null,
72
+ "rope_theta": 10000.0,
73
+ "sliding_window": 2047,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 32064
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-1.7B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 28,
17
+ "model_type": "qwen3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "Qwen/Qwen3-1.7B",
26
+ "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev0",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 16,
62
+ "num_hidden_layers": 28,
63
+ "num_key_value_heads": 8,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 1000000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": true,
69
+ "use_cache": true,
70
+ "use_sliding_window": false,
71
+ "vocab_size": 151936
72
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-1.7B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 28,
17
+ "model_type": "qwen3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "Qwen/Qwen3-1.7B",
26
+ "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev0",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 16,
62
+ "num_hidden_layers": 28,
63
+ "num_key_value_heads": 8,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 1000000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": true,
69
+ "use_cache": true,
70
+ "use_sliding_window": false,
71
+ "vocab_size": 151936
72
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "fp16",
27
+ "batch_size": 2,
28
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
29
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": true,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": null,
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.3.0.dev1",
37
+ "output_all_logits": false,
38
+ "sequence_length": 100,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 2,
43
+ "num_key_value_heads": 4,
44
+ "residual_multiplier": 1.0,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "tie_word_embeddings": false,
49
+ "use_cache": true,
50
+ "vocab_size": 49152
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "bf16",
27
+ "batch_size": 1,
28
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
29
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": false,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": null,
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.3.0.dev1",
37
+ "output_all_logits": false,
38
+ "sequence_length": 100,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 2,
43
+ "num_key_value_heads": 4,
44
+ "residual_multiplier": 1.0,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "tie_word_embeddings": false,
49
+ "use_cache": true,
50
+ "vocab_size": 49152
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "fp16",
27
+ "batch_size": 1,
28
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
29
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": false,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": null,
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.3.0.dev1",
37
+ "output_all_logits": false,
38
+ "sequence_length": 100,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 2,
43
+ "num_key_value_heads": 4,
44
+ "residual_multiplier": 1.0,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "tie_word_embeddings": false,
49
+ "use_cache": true,
50
+ "vocab_size": 49152
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.3-8b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 0.0078125,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 4096,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 12800,
16
+ "logits_scaling": 16.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct",
28
+ "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev1",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49159
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
27
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": null,
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.3.0.dev1",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 4,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "bf16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
27
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": null,
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.3.0.dev1",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 4,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 2,
26
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
27
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": true,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": null,
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.3.0.dev1",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 4,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 128,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 128,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 128,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "embd_pdrop": 0.0,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "phi3",
21
+ "neuron": {
22
+ "_serialized_key": "HloNeuronConfig",
23
+ "all_reduce_dtype": null,
24
+ "allow_flash_attention": false,
25
+ "attention_layout": "HSB",
26
+ "attn_output_transposed": false,
27
+ "auto_cast_type": "bf16",
28
+ "batch_size": 4,
29
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
30
+ "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85",
31
+ "collectives_layout": "HSB",
32
+ "continuous_batching": true,
33
+ "fuse_qkv": true,
34
+ "group_query_attention": null,
35
+ "log_softmax_scores": false,
36
+ "neuronxcc_version": "2.17.194.0+d312836f",
37
+ "optimum_neuron_version": "0.3.0.dev1",
38
+ "output_all_logits": false,
39
+ "sequence_length": 4096,
40
+ "tp_degree": 2
41
+ },
42
+ "num_attention_heads": 32,
43
+ "num_hidden_layers": 32,
44
+ "num_key_value_heads": 32,
45
+ "original_max_position_embeddings": 4096,
46
+ "partial_rotary_factor": 1.0,
47
+ "resid_pdrop": 0.0,
48
+ "rms_norm_eps": 1e-05,
49
+ "rope_scaling": null,
50
+ "rope_theta": 10000.0,
51
+ "sliding_window": 2047,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 32064
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": false,
22
+ "attention_layout": "HSB",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "bf16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
27
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "replicated-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.3.0.dev1",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 2,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 1,
42
+ "original_max_position_embeddings": 16384,
43
+ "partial_rotary_factor": 1.0,
44
+ "resid_pdrop": 0.0,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 250000,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "vocab_size": 100352
52
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": false,
22
+ "attention_layout": "HSB",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 2,
26
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
27
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": true,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "replicated-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.3.0.dev1",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 2,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 1,
42
+ "original_max_position_embeddings": 16384,
43
+ "partial_rotary_factor": 1.0,
44
+ "resid_pdrop": 0.0,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 250000,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "vocab_size": 100352
52
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": false,
22
+ "attention_layout": "HSB",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
27
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "replicated-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.3.0.dev1",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 2,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 1,
42
+ "original_max_position_embeddings": 16384,
43
+ "partial_rotary_factor": 1.0,
44
+ "resid_pdrop": 0.0,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 250000,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "vocab_size": 100352
52
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 4,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": true,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 4,
35
+ "max_context_length": 4096,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 4096,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 4096,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 24,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 24,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 1,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 1,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": true,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev1",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-1.7B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 28,
17
+ "model_type": "qwen3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "Qwen/Qwen3-1.7B",
26
+ "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev1",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 16,
62
+ "num_hidden_layers": 28,
63
+ "num_key_value_heads": 8,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 1000000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": true,
69
+ "use_cache": true,
70
+ "use_sliding_window": false,
71
+ "vocab_size": 151936
72
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.17.194.0+d312836f",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "bf16",
27
+ "batch_size": 4,
28
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
29
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": true,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": "shard-over-heads",
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.3.0.dev2",
37
+ "output_all_logits": false,
38
+ "sequence_length": 4096,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 32,
42
+ "num_hidden_layers": 40,
43
+ "num_key_value_heads": 8,
44
+ "residual_multiplier": 0.22,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 5000000.0,
48
+ "tie_word_embeddings": true,
49
+ "use_cache": true,
50
+ "vocab_size": 49155
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 131072,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 131072,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 131072,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 5,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }