Training in progress, epoch 1

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 base_model: google/gemma-3-1b-it
 library_name: transformers
-model_name: gemma-3-1B-it-function_calling
 tags:
 - generated_from_trainer
 - trl
@@ -9,7 +9,7 @@ tags:
 licence: license
 ---
-# Model Card for gemma-3-1B-it-function_calling
 This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it).
 It has been trained using [TRL](https://github.com/huggingface/trl).
@@ -20,7 +20,7 @@ It has been trained using [TRL](https://github.com/huggingface/trl).
 from transformers import pipeline
 question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
-generator = pipeline("text-generation", model="lmassaron/gemma-3-1B-it-function_calling", device="cuda")
 output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
 print(output["generated_text"])
 ```
@@ -34,11 +34,11 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.17.0
-- Transformers: 4.51.3
-- Pytorch: 2.6.0+cu124
-- Datasets: 3.6.0
-- Tokenizers: 0.21.1
 ## Citations

 ---
 base_model: google/gemma-3-1b-it
 library_name: transformers
+model_name: gemma-3-1b-it-function_calling
 tags:
 - generated_from_trainer
 - trl
 licence: license
 ---
+# Model Card for gemma-3-1b-it-function_calling
 This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 from transformers import pipeline
 question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="lmassaron/gemma-3-1b-it-function_calling", device="cuda")
 output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
 print(output["generated_text"])
 ```
 ### Framework versions
+- TRL: 0.23.0
+- Transformers: 4.56.1
+- Pytorch: 2.8.0
+- Datasets: 4.0.0
+- Tokenizers: 0.22.0
 ## Citations

adapter_config.json CHANGED Viewed

@@ -20,22 +20,25 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "embed_tokens",
     "gate_proj",
     "down_proj",
     "lm_head",
-    "k_proj",
-    "v_proj",
-    "up_proj",
-    "o_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "v_proj",
     "gate_proj",
     "down_proj",
     "lm_head",
+    "embed_tokens",
+    "k_proj"
   ],
+  "target_parameters": null,
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5566bbb2855ca6935130a63d1ec7fc6804307891084ae08050201fb19a7d2fb
 size 1293936232

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed8b924052fe2267113a50edee018c577ac7885245869292906f954a4a8c00e3
 size 1293936232

chat_template.jinja ADDED Viewed

+{{ bos_token }}{% for message in messages %}{% if message['role'] != 'system' %}{{ '<start_of_turn>' + message['role'] + '
+' + message['content'] | trim + '<end_of_turn><eos>
+' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
+'}}{% endif %}

special_tokens_map.json CHANGED Viewed

@@ -20,7 +20,13 @@
     "single_word": false
   },
   "eoi_token": "<end_of_image>",
-  "eos_token": "<eos>",
   "image_token": "<image_soft_token>",
   "pad_token": {
     "content": "<pad>",

     "single_word": false
   },
   "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
   "image_token": "<image_soft_token>",
   "pad_token": {
     "content": "<pad>",

tokenizer_config.json CHANGED Viewed

@@ -51401,7 +51401,6 @@
   ],
   "boi_token": "<start_of_image>",
   "bos_token": "<bos>",
-  "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] != 'system' %}{{ '<start_of_turn>' + message['role'] + '\n' + message['content'] | trim + '<end_of_turn><eos>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<end_of_image>",
   "eos_token": "<eos>",

   ],
   "boi_token": "<start_of_image>",
   "bos_token": "<bos>",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<end_of_image>",
   "eos_token": "<eos>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08cfde619b67b4ed75a2fabf6a957f8c566fbaa6d42993c00ee1c1094e2d5c08
-size 5688

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc68671f86c1acc9e3dd33dc16b7ed34f69682b526c2d89a0c0b92ec788e05f2
+size 6161