Update README.md
Browse files
README.md
CHANGED
|
@@ -129,9 +129,7 @@ TODO
|
|
| 129 |
|
| 130 |
# Memory Usage
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
| Memory | | |
|
| 135 |
|----------------------------------|----------------|-------------------------------|
|
| 136 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 137 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
|
@@ -198,8 +196,8 @@ print(f"Peak Memory Usage: {mem:.02f} GB")
|
|
| 198 |
|
| 199 |
# Model Performance
|
| 200 |
|
| 201 |
-
|
| 202 |
-
| Benchmark
|
| 203 |
|----------------------------------|----------------|-------------------------------|
|
| 204 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 205 |
| latency (batch_size=1) | 9.1s | 5.77s (-36.6%) |
|
|
|
|
| 129 |
|
| 130 |
# Memory Usage
|
| 131 |
|
| 132 |
+
| Memory (tested on H100) | | |
|
|
|
|
|
|
|
| 133 |
|----------------------------------|----------------|-------------------------------|
|
| 134 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 135 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
|
|
|
| 196 |
|
| 197 |
# Model Performance
|
| 198 |
|
| 199 |
+
|
| 200 |
+
| Benchmark (Tested on H100) | | |
|
| 201 |
|----------------------------------|----------------|-------------------------------|
|
| 202 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 203 |
| latency (batch_size=1) | 9.1s | 5.77s (-36.6%) |
|