Update README.md

Browse files

Files changed (1) hide show

README.md +7 -11

README.md CHANGED Viewed

@@ -33,18 +33,11 @@ from transformers import (
   AutoModelForCausalLM,
   AutoProcessor,
   AutoTokenizer,
-  TorchAoConfig,
-)
-from torchao.quantization.quant_api import (
-    IntxWeightOnlyConfig,
-    Int8DynamicActivationIntxWeightConfig,
-    AOPerModuleConfig
 )
-from torchao.quantization.granularity import PerGroup, PerAxis
 import torch
 model_id = "microsoft/Phi-4-mini-instruct"
-untied_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="a\uto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 print(untied_model)
@@ -54,7 +47,7 @@ if getattr(untied_model.config.get_text_config(decoder=True), "tie_word_embeddin
     setattr(untied_model.config.get_text_config(decoder=True), "tie_word_embeddings", False)
 untied_model._tied_weights_keys = []
-untied_model.lm_head.weight = torch.nn.Parameter(quantized_model.lm_head.weight.clone())
 print("tied weights:", find_tied_parameters(untied_model))
@@ -91,7 +84,6 @@ USER_ID = "YOUR_USER_ID"
 MODEL_NAME = model_id.split("/")[-1]
 untied_model_id = f"{USER_ID}/{MODEL_NAME}-untied-weights"
 embedding_config = IntxWeightOnlyConfig(
     weight_dtype=torch.int8,
     granularity=PerAxis(0),
@@ -101,7 +93,11 @@ linear_config = Int8DynamicActivationIntxWeightConfig(
     weight_granularity=PerGroup(32),
     weight_scale_dtype=torch.bfloat16,
 )
-quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # TODO: use AOPerModuleConfig once fix for tied weights is landed

   AutoModelForCausalLM,
   AutoProcessor,
   AutoTokenizer,
 )
 import torch
 model_id = "microsoft/Phi-4-mini-instruct"
+untied_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 print(untied_model)
     setattr(untied_model.config.get_text_config(decoder=True), "tie_word_embeddings", False)
 untied_model._tied_weights_keys = []
+untied_model.lm_head.weight = torch.nn.Parameter(untied_model.lm_head.weight.clone())
 print("tied weights:", find_tied_parameters(untied_model))
 MODEL_NAME = model_id.split("/")[-1]
 untied_model_id = f"{USER_ID}/{MODEL_NAME}-untied-weights"
 embedding_config = IntxWeightOnlyConfig(
     weight_dtype=torch.int8,
     granularity=PerAxis(0),
     weight_granularity=PerGroup(32),
     weight_scale_dtype=torch.bfloat16,
 )
+quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
+quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
+quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # TODO: use AOPerModuleConfig once fix for tied weights is landed