Benyucong commited on
Commit
057cbcc
·
verified ·
1 Parent(s): a7ddfa8

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: Qwen/Qwen3-4B
3
  library_name: transformers
4
  model_name: sft_quantum_circuit_gen_4B
5
  tags:
@@ -11,7 +11,7 @@ licence: license
11
 
12
  # Model Card for sft_quantum_circuit_gen_4B
13
 
14
- This model is a fine-tuned version of [Qwen/Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
-
31
 
32
 
33
  This model was trained with SFT.
 
1
  ---
2
+ base_model: Qwen/Qwen3-4B-Instruct-2507
3
  library_name: transformers
4
  model_name: sft_quantum_circuit_gen_4B
5
  tags:
 
11
 
12
  # Model Card for sft_quantum_circuit_gen_4B
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/benyucong-aalto-university/sft-quantum/runs/z3id6q7q)
31
 
32
 
33
  This model was trained with SFT.
config.json CHANGED
@@ -49,7 +49,7 @@
49
  "full_attention",
50
  "full_attention"
51
  ],
52
- "max_position_embeddings": 40960,
53
  "max_window_layers": 36,
54
  "model_type": "qwen3",
55
  "num_attention_heads": 32,
@@ -57,7 +57,7 @@
57
  "num_key_value_heads": 8,
58
  "rms_norm_eps": 1e-06,
59
  "rope_scaling": null,
60
- "rope_theta": 1000000,
61
  "sliding_window": null,
62
  "tie_word_embeddings": true,
63
  "torch_dtype": "float32",
 
49
  "full_attention",
50
  "full_attention"
51
  ],
52
+ "max_position_embeddings": 262144,
53
  "max_window_layers": 36,
54
  "model_type": "qwen3",
55
  "num_attention_heads": 32,
 
57
  "num_key_value_heads": 8,
58
  "rms_norm_eps": 1e-06,
59
  "rope_scaling": null,
60
+ "rope_theta": 5000000,
61
  "sliding_window": null,
62
  "tie_word_embeddings": true,
63
  "torch_dtype": "float32",
generation_config.json CHANGED
@@ -6,8 +6,8 @@
6
  151643
7
  ],
8
  "pad_token_id": 151643,
9
- "temperature": 0.6,
10
  "top_k": 20,
11
- "top_p": 0.95,
12
  "transformers_version": "4.55.0"
13
  }
 
6
  151643
7
  ],
8
  "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
  "top_k": 20,
11
+ "top_p": 0.8,
12
  "transformers_version": "4.55.0"
13
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fb53cd2bba1544ae11d4304a5acc06f9ec46feb6cc50709b48fa76293c8901c
3
  size 4987371192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5465e77694fda2a7477886106c8a1328cbdb5c64591ef99385e69d7b0357de
3
  size 4987371192
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bb7e4643b16ca63861334851bd43f3fdb6df313f6b07f7f32f5257c88c2802c
3
  size 4944309096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af427811bc195d4ed506cb3b4c8ba47321b076085c6c8eadf155671ab538d0d1
3
  size 4944309096
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8f38637c1e8a064dee35661916cefe79a89c0f3a11a5671569c4e14bdd7fca8
3
  size 4996758848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81efa7d8508a367520ec0f437bf3f90150e2e4f2c456a78004db5458895834e8
3
  size 4996758848
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42393c69a9f37dfb7d31164a3c6ec5821fddc340e663136272a3b9aa227fb1dc
3
- size 1158754848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caadcd3105e8b3ca76d1af7c8cba186b91a5e0a3375fc8413f887f9a05fd6cc7
3
+ size 2711855800
model.safetensors.index.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 4021787136,
4
- "total_size": 16087148544
5
  },
6
  "weight_map": {
 
7
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
  "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 502723392,
4
+ "total_size": 17640249344
5
  },
6
  "weight_map": {
7
+ "lm_head.weight": "model-00004-of-00004.safetensors",
8
  "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
  "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",