yujiepan commited on
Commit
58ea2cb
·
verified ·
1 Parent(s): 86d4ed7

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +34 -7
  2. config.json +12 -3
  3. model.safetensors +2 -2
README.md CHANGED
@@ -12,7 +12,7 @@ base_model:
12
 
13
  This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
14
 
15
- Note: This model is in BF16; quantized MXFP4 FFN is not used.
16
 
17
  ### Example usage:
18
 
@@ -33,8 +33,8 @@ model_id = "yujiepan/gpt-oss-tiny-random"
33
  pipe = pipeline(
34
  "text-generation",
35
  model=model_id,
36
- torch_dtype=torch.bfloat16,
37
- device_map="cuda"
38
  )
39
 
40
  messages = [
@@ -53,6 +53,7 @@ print(outputs[0]["generated_text"][-1])
53
  ```python
54
  import json
55
 
 
56
  import torch
57
  from huggingface_hub import hf_hub_download
58
  from transformers import (
@@ -76,7 +77,7 @@ with open(hf_hub_download(source_model_id, filename='config.json', repo_type='mo
76
  config_json = json.load(f)
77
  config_json.update({
78
  "head_dim": 32,
79
- "hidden_size": 16,
80
  "intermediate_size": 64,
81
  "layer_types": ["sliding_attention", "full_attention"],
82
  "num_attention_heads": 2,
@@ -93,7 +94,7 @@ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
93
  config = AutoConfig.from_pretrained(save_folder)
94
  print(config)
95
  torch.set_default_dtype(torch.bfloat16)
96
- model = AutoModelForCausalLM.from_config(config)
97
  torch.set_default_dtype(torch.float32)
98
  model.generation_config = GenerationConfig.from_pretrained(
99
  source_model_id, trust_remote_code=True,
@@ -106,6 +107,32 @@ with torch.no_grad():
106
  model.save_pretrained(save_folder)
107
 
108
  # mxfp4
109
- # model = AutoModelForCausalLM.from_pretrained(save_folder, trust_remote_code=True, torch_dtype=torch.bfloat16, quantization_config=quantization_config)
110
- # model.save_pretrained(save_folder, safe_serialization=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  ```
 
12
 
13
  This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
14
 
15
+ Note: This model used uantized MXFP4 FFN. `pip install -U triton git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels`
16
 
17
  ### Example usage:
18
 
 
33
  pipe = pipeline(
34
  "text-generation",
35
  model=model_id,
36
+ torch_dtype='auto',
37
+ device_map="cuda",
38
  )
39
 
40
  messages = [
 
53
  ```python
54
  import json
55
 
56
+ import safetensors
57
  import torch
58
  from huggingface_hub import hf_hub_download
59
  from transformers import (
 
77
  config_json = json.load(f)
78
  config_json.update({
79
  "head_dim": 32,
80
+ "hidden_size": 32, # required by Mxfp4GptOssExperts codes
81
  "intermediate_size": 64,
82
  "layer_types": ["sliding_attention", "full_attention"],
83
  "num_attention_heads": 2,
 
94
  config = AutoConfig.from_pretrained(save_folder)
95
  print(config)
96
  torch.set_default_dtype(torch.bfloat16)
97
+ model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16)
98
  torch.set_default_dtype(torch.float32)
99
  model.generation_config = GenerationConfig.from_pretrained(
100
  source_model_id, trust_remote_code=True,
 
107
  model.save_pretrained(save_folder)
108
 
109
  # mxfp4
110
+ from unittest.mock import Mock
111
+
112
+ from transformers.quantizers.auto import AutoHfQuantizer
113
+ from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
114
+ _get_device_capability = torch.cuda.get_device_capability
115
+ torch.cuda.get_device_capability = Mock(return_value=(9, 0))
116
+ set_seed(42)
117
+ bf16_state_dict = model.cuda().state_dict()
118
+ hf_quantizer: Mxfp4HfQuantizer = AutoHfQuantizer.from_config(quantization_config)
119
+ hf_quantizer.pre_quantized = False
120
+ ffn_keys = ['model.layers.0.mlp.experts.down_proj', 'model.layers.0.mlp.experts.gate_up_proj',
121
+ 'model.layers.1.mlp.experts.down_proj', 'model.layers.1.mlp.experts.gate_up_proj']
122
+ for key in ffn_keys:
123
+ hf_quantizer.create_quantized_param(model, bf16_state_dict[key], key, "cuda", bf16_state_dict)
124
+ state_dict = model.state_dict()
125
+ del state_dict['lm_head.weight']
126
+ for key in ffn_keys:
127
+ del state_dict[key]
128
+ for k, v in state_dict.items():
129
+ if str(v.device) == 'meta':
130
+ print(k, v.device, v.shape)
131
+
132
+ safetensors.torch.save_file(state_dict, f"{save_folder}/model.safetensors")
133
+ with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
134
+ config = json.load(f)
135
+ config['quantization_config'] = quantization_config
136
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
137
+ json.dump(config, f, indent=2)
138
  ```
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "experts_per_token": 4,
9
  "head_dim": 32,
10
  "hidden_act": "silu",
11
- "hidden_size": 16,
12
  "initial_context_length": 4096,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 64,
@@ -42,5 +42,14 @@
42
  "torch_dtype": "bfloat16",
43
  "transformers_version": "4.56.0.dev0",
44
  "use_cache": true,
45
- "vocab_size": 201088
46
- }
 
 
 
 
 
 
 
 
 
 
8
  "experts_per_token": 4,
9
  "head_dim": 32,
10
  "hidden_act": "silu",
11
+ "hidden_size": 32,
12
  "initial_context_length": 4096,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 64,
 
42
  "torch_dtype": "bfloat16",
43
  "transformers_version": "4.56.0.dev0",
44
  "use_cache": true,
45
+ "vocab_size": 201088,
46
+ "quantization_config": {
47
+ "modules_to_not_convert": [
48
+ "model.layers.*.self_attn",
49
+ "model.layers.*.mlp.router",
50
+ "model.embed_tokens",
51
+ "lm_head"
52
+ ],
53
+ "quant_method": "mxfp4"
54
+ }
55
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:304de707ed9cbe5bc3a7e574bb7da85f7afcbb9733cb39ada46200a17b740308
3
- size 6865464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ed1200e8437107e3a989c5582f4309c0eca660565638a6c4d95600d116b0f8
3
+ size 12923264