Add batch size 4 configurations for LLama 1B and 3B models
Browse files
inference-cache-config/llama.json
CHANGED
@@ -74,6 +74,14 @@
|
|
74 |
"num_cores": 2,
|
75 |
"auto_cast_type": "bf16"
|
76 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
],
|
78 |
"meta-llama/Llama-3.2-3B": [
|
79 |
{
|
@@ -82,6 +90,14 @@
|
|
82 |
"num_cores": 2,
|
83 |
"auto_cast_type": "bf16"
|
84 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
],
|
86 |
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": [
|
87 |
{
|
|
|
74 |
"num_cores": 2,
|
75 |
"auto_cast_type": "bf16"
|
76 |
}
|
77 |
+
],
|
78 |
+
"meta-llama/Llama-3.2-1B": [
|
79 |
+
{
|
80 |
+
"batch_size": 4,
|
81 |
+
"sequence_length": 4096,
|
82 |
+
"num_cores": 2,
|
83 |
+
"auto_cast_type": "bf16"
|
84 |
+
}
|
85 |
],
|
86 |
"meta-llama/Llama-3.2-3B": [
|
87 |
{
|
|
|
90 |
"num_cores": 2,
|
91 |
"auto_cast_type": "bf16"
|
92 |
}
|
93 |
+
],
|
94 |
+
"meta-llama/Llama-3.2-3B": [
|
95 |
+
{
|
96 |
+
"batch_size": 4,
|
97 |
+
"sequence_length": 4096,
|
98 |
+
"num_cores": 2,
|
99 |
+
"auto_cast_type": "bf16"
|
100 |
+
}
|
101 |
],
|
102 |
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": [
|
103 |
{
|