Update README.md
Browse files
README.md
CHANGED
|
@@ -127,9 +127,11 @@ tokenizer.push_to_hub(save_to)
|
|
| 127 |
# Model Quality
|
| 128 |
TODO
|
| 129 |
|
| 130 |
-
#
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
|----------------------------------|----------------|-------------------------------|
|
| 134 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 135 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
|
@@ -196,7 +198,7 @@ print(f"Peak Memory Usage: {mem:.02f} GB")
|
|
| 196 |
|
| 197 |
# Model Performance
|
| 198 |
|
| 199 |
-
|
| 200 |
| Benchmark | | |
|
| 201 |
|----------------------------------|----------------|-------------------------------|
|
| 202 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
|
|
|
| 127 |
# Model Quality
|
| 128 |
TODO
|
| 129 |
|
| 130 |
+
# Memory Usage
|
| 131 |
|
| 132 |
+
Tested on H100
|
| 133 |
+
|
| 134 |
+
| Memory | | |
|
| 135 |
|----------------------------------|----------------|-------------------------------|
|
| 136 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|
| 137 |
| Peak Memory | 65.72 GB | 34.54 GB (-47.44%) |
|
|
|
|
| 198 |
|
| 199 |
# Model Performance
|
| 200 |
|
| 201 |
+
Tested on H100
|
| 202 |
| Benchmark | | |
|
| 203 |
|----------------------------------|----------------|-------------------------------|
|
| 204 |
| | Qwen3-32B | Qwen3-32B-float8dq |
|