Upload folder using huggingface_hub
Browse files- README.md +34 -7
- config.json +12 -3
- model.safetensors +2 -2
README.md
CHANGED
@@ -12,7 +12,7 @@ base_model:
|
|
12 |
|
13 |
This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
|
14 |
|
15 |
-
Note: This model
|
16 |
|
17 |
### Example usage:
|
18 |
|
@@ -33,8 +33,8 @@ model_id = "yujiepan/gpt-oss-tiny-random"
|
|
33 |
pipe = pipeline(
|
34 |
"text-generation",
|
35 |
model=model_id,
|
36 |
-
torch_dtype=
|
37 |
-
device_map="cuda"
|
38 |
)
|
39 |
|
40 |
messages = [
|
@@ -53,6 +53,7 @@ print(outputs[0]["generated_text"][-1])
|
|
53 |
```python
|
54 |
import json
|
55 |
|
|
|
56 |
import torch
|
57 |
from huggingface_hub import hf_hub_download
|
58 |
from transformers import (
|
@@ -76,7 +77,7 @@ with open(hf_hub_download(source_model_id, filename='config.json', repo_type='mo
|
|
76 |
config_json = json.load(f)
|
77 |
config_json.update({
|
78 |
"head_dim": 32,
|
79 |
-
"hidden_size":
|
80 |
"intermediate_size": 64,
|
81 |
"layer_types": ["sliding_attention", "full_attention"],
|
82 |
"num_attention_heads": 2,
|
@@ -93,7 +94,7 @@ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
|
|
93 |
config = AutoConfig.from_pretrained(save_folder)
|
94 |
print(config)
|
95 |
torch.set_default_dtype(torch.bfloat16)
|
96 |
-
model = AutoModelForCausalLM.from_config(config)
|
97 |
torch.set_default_dtype(torch.float32)
|
98 |
model.generation_config = GenerationConfig.from_pretrained(
|
99 |
source_model_id, trust_remote_code=True,
|
@@ -106,6 +107,32 @@ with torch.no_grad():
|
|
106 |
model.save_pretrained(save_folder)
|
107 |
|
108 |
# mxfp4
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
```
|
|
|
12 |
|
13 |
This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
|
14 |
|
15 |
+
Note: This model used uantized MXFP4 FFN. `pip install -U triton git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels`
|
16 |
|
17 |
### Example usage:
|
18 |
|
|
|
33 |
pipe = pipeline(
|
34 |
"text-generation",
|
35 |
model=model_id,
|
36 |
+
torch_dtype='auto',
|
37 |
+
device_map="cuda",
|
38 |
)
|
39 |
|
40 |
messages = [
|
|
|
53 |
```python
|
54 |
import json
|
55 |
|
56 |
+
import safetensors
|
57 |
import torch
|
58 |
from huggingface_hub import hf_hub_download
|
59 |
from transformers import (
|
|
|
77 |
config_json = json.load(f)
|
78 |
config_json.update({
|
79 |
"head_dim": 32,
|
80 |
+
"hidden_size": 32, # required by Mxfp4GptOssExperts codes
|
81 |
"intermediate_size": 64,
|
82 |
"layer_types": ["sliding_attention", "full_attention"],
|
83 |
"num_attention_heads": 2,
|
|
|
94 |
config = AutoConfig.from_pretrained(save_folder)
|
95 |
print(config)
|
96 |
torch.set_default_dtype(torch.bfloat16)
|
97 |
+
model = AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16)
|
98 |
torch.set_default_dtype(torch.float32)
|
99 |
model.generation_config = GenerationConfig.from_pretrained(
|
100 |
source_model_id, trust_remote_code=True,
|
|
|
107 |
model.save_pretrained(save_folder)
|
108 |
|
109 |
# mxfp4
|
110 |
+
from unittest.mock import Mock
|
111 |
+
|
112 |
+
from transformers.quantizers.auto import AutoHfQuantizer
|
113 |
+
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
114 |
+
_get_device_capability = torch.cuda.get_device_capability
|
115 |
+
torch.cuda.get_device_capability = Mock(return_value=(9, 0))
|
116 |
+
set_seed(42)
|
117 |
+
bf16_state_dict = model.cuda().state_dict()
|
118 |
+
hf_quantizer: Mxfp4HfQuantizer = AutoHfQuantizer.from_config(quantization_config)
|
119 |
+
hf_quantizer.pre_quantized = False
|
120 |
+
ffn_keys = ['model.layers.0.mlp.experts.down_proj', 'model.layers.0.mlp.experts.gate_up_proj',
|
121 |
+
'model.layers.1.mlp.experts.down_proj', 'model.layers.1.mlp.experts.gate_up_proj']
|
122 |
+
for key in ffn_keys:
|
123 |
+
hf_quantizer.create_quantized_param(model, bf16_state_dict[key], key, "cuda", bf16_state_dict)
|
124 |
+
state_dict = model.state_dict()
|
125 |
+
del state_dict['lm_head.weight']
|
126 |
+
for key in ffn_keys:
|
127 |
+
del state_dict[key]
|
128 |
+
for k, v in state_dict.items():
|
129 |
+
if str(v.device) == 'meta':
|
130 |
+
print(k, v.device, v.shape)
|
131 |
+
|
132 |
+
safetensors.torch.save_file(state_dict, f"{save_folder}/model.safetensors")
|
133 |
+
with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
|
134 |
+
config = json.load(f)
|
135 |
+
config['quantization_config'] = quantization_config
|
136 |
+
with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
|
137 |
+
json.dump(config, f, indent=2)
|
138 |
```
|
config.json
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
"experts_per_token": 4,
|
9 |
"head_dim": 32,
|
10 |
"hidden_act": "silu",
|
11 |
-
"hidden_size":
|
12 |
"initial_context_length": 4096,
|
13 |
"initializer_range": 0.02,
|
14 |
"intermediate_size": 64,
|
@@ -42,5 +42,14 @@
|
|
42 |
"torch_dtype": "bfloat16",
|
43 |
"transformers_version": "4.56.0.dev0",
|
44 |
"use_cache": true,
|
45 |
-
"vocab_size": 201088
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"experts_per_token": 4,
|
9 |
"head_dim": 32,
|
10 |
"hidden_act": "silu",
|
11 |
+
"hidden_size": 32,
|
12 |
"initial_context_length": 4096,
|
13 |
"initializer_range": 0.02,
|
14 |
"intermediate_size": 64,
|
|
|
42 |
"torch_dtype": "bfloat16",
|
43 |
"transformers_version": "4.56.0.dev0",
|
44 |
"use_cache": true,
|
45 |
+
"vocab_size": 201088,
|
46 |
+
"quantization_config": {
|
47 |
+
"modules_to_not_convert": [
|
48 |
+
"model.layers.*.self_attn",
|
49 |
+
"model.layers.*.mlp.router",
|
50 |
+
"model.embed_tokens",
|
51 |
+
"lm_head"
|
52 |
+
],
|
53 |
+
"quant_method": "mxfp4"
|
54 |
+
}
|
55 |
+
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49ed1200e8437107e3a989c5582f4309c0eca660565638a6c4d95600d116b0f8
|
3 |
+
size 12923264
|