Upload folder using huggingface_hub
Browse files- README.md +6 -32
- config.json +2 -11
- model.safetensors +2 -2
README.md
CHANGED
@@ -12,7 +12,7 @@ base_model:
|
|
12 |
|
13 |
This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
|
14 |
|
15 |
-
Note: This model
|
16 |
|
17 |
### Example usage:
|
18 |
|
@@ -33,8 +33,8 @@ model_id = "yujiepan/gpt-oss-tiny-random"
|
|
33 |
pipe = pipeline(
|
34 |
"text-generation",
|
35 |
model=model_id,
|
36 |
-
torch_dtype=
|
37 |
-
device_map="cuda"
|
38 |
)
|
39 |
|
40 |
messages = [
|
@@ -53,7 +53,6 @@ print(outputs[0]["generated_text"][-1])
|
|
53 |
```python
|
54 |
import json
|
55 |
|
56 |
-
import safetensors
|
57 |
import torch
|
58 |
from huggingface_hub import hf_hub_download
|
59 |
from transformers import (
|
@@ -94,7 +93,7 @@ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
|
|
94 |
config = AutoConfig.from_pretrained(save_folder)
|
95 |
print(config)
|
96 |
torch.set_default_dtype(torch.bfloat16)
|
97 |
-
model = AutoModelForCausalLM.from_config(config
|
98 |
torch.set_default_dtype(torch.float32)
|
99 |
model.generation_config = GenerationConfig.from_pretrained(
|
100 |
source_model_id, trust_remote_code=True,
|
@@ -107,32 +106,7 @@ with torch.no_grad():
|
|
107 |
model.save_pretrained(save_folder)
|
108 |
|
109 |
# mxfp4
|
110 |
-
from unittest.mock import Mock
|
111 |
-
|
112 |
-
from transformers.quantizers.auto import AutoHfQuantizer
|
113 |
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
114 |
-
|
115 |
-
|
116 |
-
set_seed(42)
|
117 |
-
bf16_state_dict = model.cuda().state_dict()
|
118 |
-
hf_quantizer: Mxfp4HfQuantizer = AutoHfQuantizer.from_config(quantization_config)
|
119 |
-
hf_quantizer.pre_quantized = False
|
120 |
-
ffn_keys = ['model.layers.0.mlp.experts.down_proj', 'model.layers.0.mlp.experts.gate_up_proj',
|
121 |
-
'model.layers.1.mlp.experts.down_proj', 'model.layers.1.mlp.experts.gate_up_proj']
|
122 |
-
for key in ffn_keys:
|
123 |
-
hf_quantizer.create_quantized_param(model, bf16_state_dict[key], key, "cuda", bf16_state_dict)
|
124 |
-
state_dict = model.state_dict()
|
125 |
-
del state_dict['lm_head.weight']
|
126 |
-
for key in ffn_keys:
|
127 |
-
del state_dict[key]
|
128 |
-
for k, v in state_dict.items():
|
129 |
-
if str(v.device) == 'meta':
|
130 |
-
print(k, v.device, v.shape)
|
131 |
-
|
132 |
-
safetensors.torch.save_file(state_dict, f"{save_folder}/model.safetensors")
|
133 |
-
with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
|
134 |
-
config = json.load(f)
|
135 |
-
config['quantization_config'] = quantization_config
|
136 |
-
with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
|
137 |
-
json.dump(config, f, indent=2)
|
138 |
```
|
|
|
12 |
|
13 |
This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b).
|
14 |
|
15 |
+
Note: This model is in BF16; quantized MXFP4 FFN is not used.
|
16 |
|
17 |
### Example usage:
|
18 |
|
|
|
33 |
pipe = pipeline(
|
34 |
"text-generation",
|
35 |
model=model_id,
|
36 |
+
torch_dtype=torch.bfloat16,
|
37 |
+
device_map="cuda"
|
38 |
)
|
39 |
|
40 |
messages = [
|
|
|
53 |
```python
|
54 |
import json
|
55 |
|
|
|
56 |
import torch
|
57 |
from huggingface_hub import hf_hub_download
|
58 |
from transformers import (
|
|
|
93 |
config = AutoConfig.from_pretrained(save_folder)
|
94 |
print(config)
|
95 |
torch.set_default_dtype(torch.bfloat16)
|
96 |
+
model = AutoModelForCausalLM.from_config(config)
|
97 |
torch.set_default_dtype(torch.float32)
|
98 |
model.generation_config = GenerationConfig.from_pretrained(
|
99 |
source_model_id, trust_remote_code=True,
|
|
|
106 |
model.save_pretrained(save_folder)
|
107 |
|
108 |
# mxfp4
|
|
|
|
|
|
|
109 |
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
110 |
+
# model = AutoModelForCausalLM.from_pretrained(save_folder, trust_remote_code=True, torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
111 |
+
# model.save_pretrained(save_folder, safe_serialization=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
```
|
config.json
CHANGED
@@ -42,14 +42,5 @@
|
|
42 |
"torch_dtype": "bfloat16",
|
43 |
"transformers_version": "4.56.0.dev0",
|
44 |
"use_cache": true,
|
45 |
-
"vocab_size": 201088
|
46 |
-
|
47 |
-
"modules_to_not_convert": [
|
48 |
-
"model.layers.*.self_attn",
|
49 |
-
"model.layers.*.mlp.router",
|
50 |
-
"model.embed_tokens",
|
51 |
-
"lm_head"
|
52 |
-
],
|
53 |
-
"quant_method": "mxfp4"
|
54 |
-
}
|
55 |
-
}
|
|
|
42 |
"torch_dtype": "bfloat16",
|
43 |
"transformers_version": "4.56.0.dev0",
|
44 |
"use_cache": true,
|
45 |
+
"vocab_size": 201088
|
46 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aefe8b9c4b4969f6d13c5d778760f3dce4e25134324b33677934550d9df02a7c
|
3 |
+
size 13710176
|