Update README.md (#4)
Browse files- Update README.md (628b89a76d3199661bc5cf3d4ae19ed436d37c01)
Co-authored-by: Scott Roy <[email protected]>
    	
        README.md
    CHANGED
    
    | 
         @@ -30,24 +30,33 @@ We used following code to get the quantized model: 
     | 
|
| 30 | 
         | 
| 31 | 
         
             
            ```
         
     | 
| 32 | 
         
             
            from transformers import (
         
     | 
| 33 | 
         
            -
             
     | 
| 34 | 
         
            -
             
     | 
| 35 | 
         
            -
             
     | 
| 36 | 
         
            -
             
     | 
| 37 | 
         
             
            )
         
     | 
| 38 | 
         
             
            from torchao.quantization.quant_api import (
         
     | 
| 
         | 
|
| 39 | 
         
             
                Int8DynamicActivationIntxWeightConfig,
         
     | 
| 
         | 
|
| 40 | 
         
             
            )
         
     | 
| 41 | 
         
            -
            from torchao.quantization.granularity import PerGroup
         
     | 
| 42 | 
         
             
            import torch
         
     | 
| 43 | 
         | 
| 44 | 
         
             
            model_id = "microsoft/Phi-4-mini-instruct"
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 45 | 
         
             
            linear_config = Int8DynamicActivationIntxWeightConfig(
         
     | 
| 46 | 
         
             
                weight_dtype=torch.int4,
         
     | 
| 47 | 
         
             
                weight_granularity=PerGroup(32),
         
     | 
| 
         | 
|
| 48 | 
         
             
            )
         
     | 
| 49 | 
         
            -
             
     | 
| 50 | 
         
            -
             
     | 
| 
         | 
|
| 51 | 
         
             
            tokenizer = AutoTokenizer.from_pretrained(model_id)
         
     | 
| 52 | 
         | 
| 53 | 
         
             
            # Push to hub
         
     | 
| 
         @@ -91,7 +100,7 @@ torch.save(state_dict, "phi4-mini-8dq4w.pt") 
     | 
|
| 91 | 
         
             
            The response from the manual testing is:
         
     | 
| 92 | 
         | 
| 93 | 
         
             
            ```
         
     | 
| 94 | 
         
            -
            Hello! As an AI, I don't have consciousness in the way humans do, but I 
     | 
| 95 | 
         
             
            ```
         
     | 
| 96 | 
         | 
| 97 | 
         
             
            # Model Quality
         
     | 
| 
         @@ -100,7 +109,7 @@ We rely on [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-h 
     | 
|
| 100 | 
         | 
| 101 | 
         
             
            ## baseline
         
     | 
| 102 | 
         
             
            ```
         
     | 
| 103 | 
         
            -
            lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size  
     | 
| 104 | 
         
             
            ```
         
     | 
| 105 | 
         | 
| 106 | 
         
             
            ## 8dq4w
         
     | 
| 
         @@ -111,7 +120,7 @@ from lm_eval.utils import ( 
     | 
|
| 111 | 
         
             
                make_table,
         
     | 
| 112 | 
         
             
            )
         
     | 
| 113 | 
         | 
| 114 | 
         
            -
            lm_eval_model = lm_eval.models.huggingface.HFLM(pretrained=quantized_model, batch_size= 
     | 
| 115 | 
         
             
            results = evaluator.simple_evaluate(
         
     | 
| 116 | 
         
             
                lm_eval_model, tasks=["hellaswag"], device="cuda:0", batch_size="auto"
         
     | 
| 117 | 
         
             
            )
         
     | 
| 
         @@ -123,7 +132,7 @@ print(make_table(results)) 
     | 
|
| 123 | 
         
             
            |                                  | Phi-4 mini-Ins | phi4-mini-8dq4w | 
         
     | 
| 124 | 
         
             
            | **Popular aggregated benchmark** |             |                   |
         
     | 
| 125 | 
         
             
            | **Reasoning**                    |             |                   |
         
     | 
| 126 | 
         
            -
            | HellaSwag                        | 54.57        | 53. 
     | 
| 127 | 
         
             
            | **Multilingual**                 |             |                   |
         
     | 
| 128 | 
         
             
            | **Math**                         |             |                   |
         
     | 
| 129 | 
         
             
            | **Overall**                      | **TODO**    | **TODO**          |
         
     | 
| 
         | 
|
| 30 | 
         | 
| 31 | 
         
             
            ```
         
     | 
| 32 | 
         
             
            from transformers import (
         
     | 
| 33 | 
         
            +
              AutoModelForCausalLM,
         
     | 
| 34 | 
         
            +
              AutoProcessor,
         
     | 
| 35 | 
         
            +
              AutoTokenizer,
         
     | 
| 36 | 
         
            +
              TorchAoConfig,
         
     | 
| 37 | 
         
             
            )
         
     | 
| 38 | 
         
             
            from torchao.quantization.quant_api import (
         
     | 
| 39 | 
         
            +
                IntxWeightOnlyConfig,
         
     | 
| 40 | 
         
             
                Int8DynamicActivationIntxWeightConfig,
         
     | 
| 41 | 
         
            +
                AOPerModuleConfig
         
     | 
| 42 | 
         
             
            )
         
     | 
| 43 | 
         
            +
            from torchao.quantization.granularity import PerGroup, PerAxis
         
     | 
| 44 | 
         
             
            import torch
         
     | 
| 45 | 
         | 
| 46 | 
         
             
            model_id = "microsoft/Phi-4-mini-instruct"
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
            embedding_config = IntxWeightOnlyConfig(
         
     | 
| 49 | 
         
            +
                weight_dtype=torch.int8,
         
     | 
| 50 | 
         
            +
                granularity=PerAxis(0),
         
     | 
| 51 | 
         
            +
            )
         
     | 
| 52 | 
         
             
            linear_config = Int8DynamicActivationIntxWeightConfig(
         
     | 
| 53 | 
         
             
                weight_dtype=torch.int4,
         
     | 
| 54 | 
         
             
                weight_granularity=PerGroup(32),
         
     | 
| 55 | 
         
            +
                weight_scale_dtype=torch.bfloat16,
         
     | 
| 56 | 
         
             
            )
         
     | 
| 57 | 
         
            +
            quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
         
     | 
| 58 | 
         
            +
            quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True)
         
     | 
| 59 | 
         
            +
            quantized_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
         
     | 
| 60 | 
         
             
            tokenizer = AutoTokenizer.from_pretrained(model_id)
         
     | 
| 61 | 
         | 
| 62 | 
         
             
            # Push to hub
         
     | 
| 
         | 
|
| 100 | 
         
             
            The response from the manual testing is:
         
     | 
| 101 | 
         | 
| 102 | 
         
             
            ```
         
     | 
| 103 | 
         
            +
            Hello! As an AI, I don't have consciousness in the way humans do, but I am fully operational and here to assist you. How can I help you today?
         
     | 
| 104 | 
         
             
            ```
         
     | 
| 105 | 
         | 
| 106 | 
         
             
            # Model Quality
         
     | 
| 
         | 
|
| 109 | 
         | 
| 110 | 
         
             
            ## baseline
         
     | 
| 111 | 
         
             
            ```
         
     | 
| 112 | 
         
            +
            lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
         
     | 
| 113 | 
         
             
            ```
         
     | 
| 114 | 
         | 
| 115 | 
         
             
            ## 8dq4w
         
     | 
| 
         | 
|
| 120 | 
         
             
                make_table,
         
     | 
| 121 | 
         
             
            )
         
     | 
| 122 | 
         | 
| 123 | 
         
            +
            lm_eval_model = lm_eval.models.huggingface.HFLM(pretrained=quantized_model, batch_size=64)
         
     | 
| 124 | 
         
             
            results = evaluator.simple_evaluate(
         
     | 
| 125 | 
         
             
                lm_eval_model, tasks=["hellaswag"], device="cuda:0", batch_size="auto"
         
     | 
| 126 | 
         
             
            )
         
     | 
| 
         | 
|
| 132 | 
         
             
            |                                  | Phi-4 mini-Ins | phi4-mini-8dq4w | 
         
     | 
| 133 | 
         
             
            | **Popular aggregated benchmark** |             |                   |
         
     | 
| 134 | 
         
             
            | **Reasoning**                    |             |                   |
         
     | 
| 135 | 
         
            +
            | HellaSwag                        | 54.57        | 53.24            |
         
     | 
| 136 | 
         
             
            | **Multilingual**                 |             |                   |
         
     | 
| 137 | 
         
             
            | **Math**                         |             |                   |
         
     | 
| 138 | 
         
             
            | **Overall**                      | **TODO**    | **TODO**          |
         
     |