Update README.md
Browse files
README.md
CHANGED
|
@@ -96,21 +96,9 @@ linear_config = Int8DynamicActivationIntxWeightConfig(
|
|
| 96 |
|
| 97 |
quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
|
| 98 |
quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
|
| 99 |
-
|
| 100 |
quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
|
| 101 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 102 |
|
| 103 |
-
# TODO: use AOPerModuleConfig once fix for tied weights is landed
|
| 104 |
-
quantize_(
|
| 105 |
-
quantized_model,
|
| 106 |
-
embedding_config,
|
| 107 |
-
lambda m, fqn: isinstance(m, torch.nn.Embedding)
|
| 108 |
-
)
|
| 109 |
-
quantize_(
|
| 110 |
-
quantized_model,
|
| 111 |
-
linear_config,
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
# Push to hub
|
| 115 |
# USER_ID = "YOUR_USER_ID"
|
| 116 |
# save_to = f"{USER_ID}/phi4-mini-8dq4w"
|
|
|
|
| 96 |
|
| 97 |
quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
|
| 98 |
quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
|
|
|
|
| 99 |
quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
|
| 100 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# Push to hub
|
| 103 |
# USER_ID = "YOUR_USER_ID"
|
| 104 |
# save_to = f"{USER_ID}/phi4-mini-8dq4w"
|