Update README.md
Browse files
README.md
CHANGED
@@ -334,7 +334,7 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
334 |
</thead>
|
335 |
<tbody>
|
336 |
<tr>
|
337 |
-
<
|
338 |
<td>Qwen/Qwen2-VL-72B-Instruct</td>
|
339 |
<td></td>
|
340 |
<td>0.3</td>
|
@@ -378,8 +378,8 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
378 |
<td>377</td>
|
379 |
</tr>
|
380 |
<tr>
|
381 |
-
<td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
|
382 |
<td>H100x2</td>
|
|
|
383 |
<td>1.70</td>
|
384 |
<td>0.8</td>
|
385 |
<td>236</td>
|
@@ -389,8 +389,8 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
389 |
<td>669</td>
|
390 |
</tr>
|
391 |
<tr>
|
392 |
-
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
393 |
<td>H100x1</td>
|
|
|
394 |
<td>2.35</td>
|
395 |
<td>1.3</td>
|
396 |
<td>350</td>
|
|
|
334 |
</thead>
|
335 |
<tbody>
|
336 |
<tr>
|
337 |
+
<td>A100x4</td>
|
338 |
<td>Qwen/Qwen2-VL-72B-Instruct</td>
|
339 |
<td></td>
|
340 |
<td>0.3</td>
|
|
|
378 |
<td>377</td>
|
379 |
</tr>
|
380 |
<tr>
|
|
|
381 |
<td>H100x2</td>
|
382 |
+
<td>neuralmagic/Qwen2-VL-72B-Instruct-FP8-Dynamic</td>
|
383 |
<td>1.70</td>
|
384 |
<td>0.8</td>
|
385 |
<td>236</td>
|
|
|
389 |
<td>669</td>
|
390 |
</tr>
|
391 |
<tr>
|
|
|
392 |
<td>H100x1</td>
|
393 |
+
<td>neuralmagic/Qwen2-VL-72B-Instruct-quantized.w4a16</td>
|
394 |
<td>2.35</td>
|
395 |
<td>1.3</td>
|
396 |
<td>350</td>
|