Model save

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: Qwen/Qwen3-4B
 library_name: transformers
 model_name: sft_quantum_circuit_gen_4B
 tags:
@@ -11,7 +11,7 @@ licence: license
 # Model Card for sft_quantum_circuit_gen_4B
-This model is a fine-tuned version of [Qwen/Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
 This model was trained with SFT.

 ---
+base_model: Qwen/Qwen3-4B-Instruct-2507
 library_name: transformers
 model_name: sft_quantum_circuit_gen_4B
 tags:
 # Model Card for sft_quantum_circuit_gen_4B
+This model is a fine-tuned version of [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/benyucong-aalto-university/sft-quantum/runs/z3id6q7q)
 This model was trained with SFT.

config.json CHANGED Viewed

@@ -49,7 +49,7 @@
     "full_attention",
     "full_attention"
   ],
-  "max_position_embeddings": 40960,
   "max_window_layers": 36,
   "model_type": "qwen3",
   "num_attention_heads": 32,
@@ -57,7 +57,7 @@
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
-  "rope_theta": 1000000,
   "sliding_window": null,
   "tie_word_embeddings": true,
   "torch_dtype": "float32",

     "full_attention",
     "full_attention"
   ],
+  "max_position_embeddings": 262144,
   "max_window_layers": 36,
   "model_type": "qwen3",
   "num_attention_heads": 32,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
+  "rope_theta": 5000000,
   "sliding_window": null,
   "tie_word_embeddings": true,
   "torch_dtype": "float32",

generation_config.json CHANGED Viewed

@@ -6,8 +6,8 @@
     151643
   ],
   "pad_token_id": 151643,
-  "temperature": 0.6,
   "top_k": 20,
-  "top_p": 0.95,
   "transformers_version": "4.55.0"
 }

     151643
   ],
   "pad_token_id": 151643,
+  "temperature": 0.7,
   "top_k": 20,
+  "top_p": 0.8,
   "transformers_version": "4.55.0"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fb53cd2bba1544ae11d4304a5acc06f9ec46feb6cc50709b48fa76293c8901c
 size 4987371192

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b5465e77694fda2a7477886106c8a1328cbdb5c64591ef99385e69d7b0357de
 size 4987371192

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bb7e4643b16ca63861334851bd43f3fdb6df313f6b07f7f32f5257c88c2802c
 size 4944309096

 version https://git-lfs.github.com/spec/v1
+oid sha256:af427811bc195d4ed506cb3b4c8ba47321b076085c6c8eadf155671ab538d0d1
 size 4944309096

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8f38637c1e8a064dee35661916cefe79a89c0f3a11a5671569c4e14bdd7fca8
 size 4996758848

 version https://git-lfs.github.com/spec/v1
+oid sha256:81efa7d8508a367520ec0f437bf3f90150e2e4f2c456a78004db5458895834e8
 size 4996758848

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42393c69a9f37dfb7d31164a3c6ec5821fddc340e663136272a3b9aa227fb1dc
-size 1158754848

 version https://git-lfs.github.com/spec/v1
+oid sha256:caadcd3105e8b3ca76d1af7c8cba186b91a5e0a3375fc8413f887f9a05fd6cc7
+size 2711855800

model.safetensors.index.json CHANGED Viewed

@@ -1,9 +1,10 @@
 {
   "metadata": {
-    "total_parameters": 4021787136,
-    "total_size": 16087148544
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",

 {
   "metadata": {
+    "total_parameters": 502723392,
+    "total_size": 17640249344
   },
   "weight_map": {
+    "lm_head.weight": "model-00004-of-00004.safetensors",
     "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
     "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",