dacorvo HF Staff commited on
Commit
3b6312a
·
verified ·
1 Parent(s): 4103db7

Add batch size 4 configurations for LLama 1B and 3B models

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama.json +16 -0
inference-cache-config/llama.json CHANGED
@@ -74,6 +74,14 @@
74
  "num_cores": 2,
75
  "auto_cast_type": "bf16"
76
  }
 
 
 
 
 
 
 
 
77
  ],
78
  "meta-llama/Llama-3.2-3B": [
79
  {
@@ -82,6 +90,14 @@
82
  "num_cores": 2,
83
  "auto_cast_type": "bf16"
84
  }
 
 
 
 
 
 
 
 
85
  ],
86
  "TinyLlama/TinyLlama-1.1B-Chat-v1.0": [
87
  {
 
74
  "num_cores": 2,
75
  "auto_cast_type": "bf16"
76
  }
77
+ ],
78
+ "meta-llama/Llama-3.2-1B": [
79
+ {
80
+ "batch_size": 4,
81
+ "sequence_length": 4096,
82
+ "num_cores": 2,
83
+ "auto_cast_type": "bf16"
84
+ }
85
  ],
86
  "meta-llama/Llama-3.2-3B": [
87
  {
 
90
  "num_cores": 2,
91
  "auto_cast_type": "bf16"
92
  }
93
+ ],
94
+ "meta-llama/Llama-3.2-3B": [
95
+ {
96
+ "batch_size": 4,
97
+ "sequence_length": 4096,
98
+ "num_cores": 2,
99
+ "auto_cast_type": "bf16"
100
+ }
101
  ],
102
  "TinyLlama/TinyLlama-1.1B-Chat-v1.0": [
103
  {