alexmarques commited on
Commit
04e53e0
·
verified ·
1 Parent(s): 1373d3e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -19
README.md CHANGED
@@ -56,7 +56,7 @@ This model can be deployed efficiently using the [vLLM](https://docs.vllm.ai/en/
56
  from vllm import LLM, SamplingParams
57
  from transformers import AutoTokenizer
58
 
59
- model_id = "neuralmagic-ent/phi-4-FP8-dynamic"
60
  number_gpus = 1
61
 
62
  sampling_params = SamplingParams(temperature=0.7, top_p=0.8, max_tokens=256)
@@ -136,7 +136,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
136
  ```
137
  lm_eval \
138
  --model vllm \
139
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
140
  --tasks mmlu_llama \
141
  --fewshot_as_multiturn \
142
  --apply_chat_template \
@@ -148,7 +148,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
148
  ```
149
  lm_eval \
150
  --model vllm \
151
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=4064,max_gen_toks=1024,tensor_parallel_size=1 \
152
  --tasks mmlu_cot_llama \
153
  --apply_chat_template \
154
  --num_fewshot 0 \
@@ -159,7 +159,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
159
  ```
160
  lm_eval \
161
  --model vllm \
162
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3940,max_gen_toks=100,tensor_parallel_size=1 \
163
  --tasks arc_challenge_llama \
164
  --apply_chat_template \
165
  --num_fewshot 0 \
@@ -170,7 +170,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
170
  ```
171
  lm_eval \
172
  --model vllm \
173
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=4096,max_gen_toks=1024,tensor_parallel_size=1 \
174
  --tasks gsm8k_llama \
175
  --fewshot_as_multiturn \
176
  --apply_chat_template \
@@ -182,7 +182,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
182
  ```
183
  lm_eval \
184
  --model vllm \
185
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
186
  --tasks hellaswag \
187
  --num_fewshot 10 \
188
  --batch_size auto
@@ -192,7 +192,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
192
  ```
193
  lm_eval \
194
  --model vllm \
195
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
196
  --tasks winogrande \
197
  --num_fewshot 5 \
198
  --batch_size auto
@@ -202,7 +202,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
202
  ```
203
  lm_eval \
204
  --model vllm \
205
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
206
  --tasks truthfulqa \
207
  --num_fewshot 0 \
208
  --batch_size auto
@@ -212,7 +212,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
212
  ```
213
  lm_eval \
214
  --model vllm \
215
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=4096,tensor_parallel_size=1,enable_chunked_prefill=True \
216
  --apply_chat_template \
217
  --fewshot_as_multiturn \
218
  --tasks leaderboard \
@@ -223,7 +223,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
223
  ```
224
  lm_eval \
225
  --model vllm \
226
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
227
  --tasks mmlu_pt_llama \
228
  --fewshot_as_multiturn \
229
  --apply_chat_template \
@@ -235,7 +235,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
235
  ```
236
  lm_eval \
237
  --model vllm \
238
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
239
  --tasks mmlu_es_llama \
240
  --fewshot_as_multiturn \
241
  --apply_chat_template \
@@ -247,7 +247,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
247
  ```
248
  lm_eval \
249
  --model vllm \
250
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
251
  --tasks mmlu_it_llama \
252
  --fewshot_as_multiturn \
253
  --apply_chat_template \
@@ -259,7 +259,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
259
  ```
260
  lm_eval \
261
  --model vllm \
262
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
263
  --tasks mmlu_de_llama \
264
  --fewshot_as_multiturn \
265
  --apply_chat_template \
@@ -271,7 +271,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
271
  ```
272
  lm_eval \
273
  --model vllm \
274
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
275
  --tasks mmlu_fr_llama \
276
  --fewshot_as_multiturn \
277
  --apply_chat_template \
@@ -283,7 +283,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
283
  ```
284
  lm_eval \
285
  --model vllm \
286
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
287
  --tasks mmlu_hi_llama \
288
  --fewshot_as_multiturn \
289
  --apply_chat_template \
@@ -295,7 +295,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
295
  ```
296
  lm_eval \
297
  --model vllm \
298
- --model_args pretrained="neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
299
  --tasks mmlu_th_llama \
300
  --fewshot_as_multiturn \
301
  --apply_chat_template \
@@ -307,7 +307,7 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
307
  *Generation*
308
  ```
309
  python3 codegen/generate.py \
310
- --model neuralmagic-ent/Llama-3.3-70B-Instruct-FP8-dynamic \
311
  --bs 16 \
312
  --temperature 0.2 \
313
  --n_samples 50 \
@@ -318,14 +318,14 @@ HumanEval and HumanEval+ evaluations were conducted using Neural Magic's fork of
318
  *Sanitization*
319
  ```
320
  python3 evalplus/sanitize.py \
321
- humaneval/neuralmagic-ent--Llama-3.3-70B-Instruct-FP8-dynamic_vllm_temp_0.2
322
  ```
323
 
324
  *Evaluation*
325
  ```
326
  evalplus.evaluate \
327
  --dataset humaneval \
328
- --samples humaneval/neuralmagic-ent--Llama-3.3-70B-Instruct-FP8-dynamic_vllm_temp_0.2-sanitized
329
  ```
330
  </details>
331
 
 
56
  from vllm import LLM, SamplingParams
57
  from transformers import AutoTokenizer
58
 
59
+ model_id = "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
60
  number_gpus = 1
61
 
62
  sampling_params = SamplingParams(temperature=0.7, top_p=0.8, max_tokens=256)
 
136
  ```
137
  lm_eval \
138
  --model vllm \
139
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
140
  --tasks mmlu_llama \
141
  --fewshot_as_multiturn \
142
  --apply_chat_template \
 
148
  ```
149
  lm_eval \
150
  --model vllm \
151
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=4064,max_gen_toks=1024,tensor_parallel_size=1 \
152
  --tasks mmlu_cot_llama \
153
  --apply_chat_template \
154
  --num_fewshot 0 \
 
159
  ```
160
  lm_eval \
161
  --model vllm \
162
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3940,max_gen_toks=100,tensor_parallel_size=1 \
163
  --tasks arc_challenge_llama \
164
  --apply_chat_template \
165
  --num_fewshot 0 \
 
170
  ```
171
  lm_eval \
172
  --model vllm \
173
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=4096,max_gen_toks=1024,tensor_parallel_size=1 \
174
  --tasks gsm8k_llama \
175
  --fewshot_as_multiturn \
176
  --apply_chat_template \
 
182
  ```
183
  lm_eval \
184
  --model vllm \
185
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
186
  --tasks hellaswag \
187
  --num_fewshot 10 \
188
  --batch_size auto
 
192
  ```
193
  lm_eval \
194
  --model vllm \
195
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
196
  --tasks winogrande \
197
  --num_fewshot 5 \
198
  --batch_size auto
 
202
  ```
203
  lm_eval \
204
  --model vllm \
205
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
206
  --tasks truthfulqa \
207
  --num_fewshot 0 \
208
  --batch_size auto
 
212
  ```
213
  lm_eval \
214
  --model vllm \
215
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=4096,tensor_parallel_size=1,enable_chunked_prefill=True \
216
  --apply_chat_template \
217
  --fewshot_as_multiturn \
218
  --tasks leaderboard \
 
223
  ```
224
  lm_eval \
225
  --model vllm \
226
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
227
  --tasks mmlu_pt_llama \
228
  --fewshot_as_multiturn \
229
  --apply_chat_template \
 
235
  ```
236
  lm_eval \
237
  --model vllm \
238
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
239
  --tasks mmlu_es_llama \
240
  --fewshot_as_multiturn \
241
  --apply_chat_template \
 
247
  ```
248
  lm_eval \
249
  --model vllm \
250
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
251
  --tasks mmlu_it_llama \
252
  --fewshot_as_multiturn \
253
  --apply_chat_template \
 
259
  ```
260
  lm_eval \
261
  --model vllm \
262
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
263
  --tasks mmlu_de_llama \
264
  --fewshot_as_multiturn \
265
  --apply_chat_template \
 
271
  ```
272
  lm_eval \
273
  --model vllm \
274
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
275
  --tasks mmlu_fr_llama \
276
  --fewshot_as_multiturn \
277
  --apply_chat_template \
 
283
  ```
284
  lm_eval \
285
  --model vllm \
286
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
287
  --tasks mmlu_hi_llama \
288
  --fewshot_as_multiturn \
289
  --apply_chat_template \
 
295
  ```
296
  lm_eval \
297
  --model vllm \
298
+ --model_args pretrained="RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
299
  --tasks mmlu_th_llama \
300
  --fewshot_as_multiturn \
301
  --apply_chat_template \
 
307
  *Generation*
308
  ```
309
  python3 codegen/generate.py \
310
+ --model RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic \
311
  --bs 16 \
312
  --temperature 0.2 \
313
  --n_samples 50 \
 
318
  *Sanitization*
319
  ```
320
  python3 evalplus/sanitize.py \
321
+ humaneval/RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic_vllm_temp_0.2
322
  ```
323
 
324
  *Evaluation*
325
  ```
326
  evalplus.evaluate \
327
  --dataset humaneval \
328
+ --samples humaneval/RedHatAI--Llama-3.3-70B-Instruct-FP8-dynamic_vllm_temp_0.2-sanitized
329
  ```
330
  </details>
331