|  | --- | 
					
						
						|  | library_name: transformers | 
					
						
						|  | pipeline_tag: text-generation | 
					
						
						|  | inference: true | 
					
						
						|  | widget: | 
					
						
						|  | - text: Hello! | 
					
						
						|  | example_title: Hello world | 
					
						
						|  | group: Python | 
					
						
						|  | base_model: | 
					
						
						|  | - openai/gpt-oss-120b | 
					
						
						|  | --- | 
					
						
						|  |  | 
					
						
						|  | This tiny model is for debugging. It is randomly initialized with the config adapted from [openai/gpt-oss-120b](https://huggingface.co/openai/gpt-oss-120b). | 
					
						
						|  |  | 
					
						
						|  | Note: This model is in BF16; quantized MXFP4 FFN is not used. | 
					
						
						|  |  | 
					
						
						|  | ### Example usage: | 
					
						
						|  |  | 
					
						
						|  | - vLLM | 
					
						
						|  |  | 
					
						
						|  | ```bash | 
					
						
						|  | vllm serve yujiepan/gpt-oss-tiny-random | 
					
						
						|  | ``` | 
					
						
						|  |  | 
					
						
						|  | - Transformers | 
					
						
						|  |  | 
					
						
						|  | ```python | 
					
						
						|  | import torch | 
					
						
						|  | from transformers import pipeline | 
					
						
						|  |  | 
					
						
						|  | model_id = "yujiepan/gpt-oss-tiny-random" | 
					
						
						|  |  | 
					
						
						|  | pipe = pipeline( | 
					
						
						|  | "text-generation", | 
					
						
						|  | model=model_id, | 
					
						
						|  | torch_dtype=torch.bfloat16, | 
					
						
						|  | device_map="cuda" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | messages = [ | 
					
						
						|  | {"role": "user", "content": "Explain quantum mechanics clearly and concisely."}, | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  | outputs = pipe( | 
					
						
						|  | messages, | 
					
						
						|  | max_new_tokens=16, | 
					
						
						|  | ) | 
					
						
						|  | print(outputs[0]["generated_text"][-1]) | 
					
						
						|  | ``` | 
					
						
						|  |  | 
					
						
						|  | ### Codes to create this repo: | 
					
						
						|  |  | 
					
						
						|  | ```python | 
					
						
						|  | import json | 
					
						
						|  |  | 
					
						
						|  | import torch | 
					
						
						|  | from huggingface_hub import hf_hub_download | 
					
						
						|  | from transformers import ( | 
					
						
						|  | AutoConfig, | 
					
						
						|  | AutoModelForCausalLM, | 
					
						
						|  | AutoProcessor, | 
					
						
						|  | AutoTokenizer, | 
					
						
						|  | GenerationConfig, | 
					
						
						|  | GptOssForCausalLM, | 
					
						
						|  | pipeline, | 
					
						
						|  | set_seed, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | source_model_id = "openai/gpt-oss-120b" | 
					
						
						|  | save_folder = "/tmp/yujiepan/gpt-oss-tiny-random" | 
					
						
						|  |  | 
					
						
						|  | processor = AutoProcessor.from_pretrained(source_model_id) | 
					
						
						|  | processor.save_pretrained(save_folder) | 
					
						
						|  |  | 
					
						
						|  | with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r') as f: | 
					
						
						|  | config_json = json.load(f) | 
					
						
						|  | config_json.update({ | 
					
						
						|  | "head_dim": 32, | 
					
						
						|  | "hidden_size": 32,  # required by Mxfp4GptOssExperts codes | 
					
						
						|  | "intermediate_size": 64, | 
					
						
						|  | "layer_types": ["sliding_attention", "full_attention"], | 
					
						
						|  | "num_attention_heads": 2, | 
					
						
						|  | "num_hidden_layers": 2, | 
					
						
						|  | "num_key_value_heads": 1, | 
					
						
						|  | "num_local_experts": 32, | 
					
						
						|  | "tie_word_embeddings": True, | 
					
						
						|  | }) | 
					
						
						|  | quantization_config = config_json['quantization_config'] | 
					
						
						|  | del config_json['quantization_config'] | 
					
						
						|  | with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f: | 
					
						
						|  | json.dump(config_json, f, indent=2) | 
					
						
						|  |  | 
					
						
						|  | config = AutoConfig.from_pretrained(save_folder) | 
					
						
						|  | print(config) | 
					
						
						|  | torch.set_default_dtype(torch.bfloat16) | 
					
						
						|  | model = AutoModelForCausalLM.from_config(config) | 
					
						
						|  | torch.set_default_dtype(torch.float32) | 
					
						
						|  | model.generation_config = GenerationConfig.from_pretrained( | 
					
						
						|  | source_model_id, trust_remote_code=True, | 
					
						
						|  | ) | 
					
						
						|  | set_seed(42) | 
					
						
						|  | with torch.no_grad(): | 
					
						
						|  | for name, p in sorted(model.named_parameters()): | 
					
						
						|  | torch.nn.init.normal_(p, 0, 0.1) | 
					
						
						|  | print(name, p.shape) | 
					
						
						|  | model.save_pretrained(save_folder) | 
					
						
						|  |  | 
					
						
						|  | # mxfp4 | 
					
						
						|  | from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer | 
					
						
						|  | # model = AutoModelForCausalLM.from_pretrained(save_folder, trust_remote_code=True, torch_dtype=torch.bfloat16, quantization_config=quantization_config) | 
					
						
						|  | # model.save_pretrained(save_folder, safe_serialization=True) | 
					
						
						|  | ``` |