aifeifei798 commited on Jul 24

Commit

6656e66

verified ·

1 Parent(s): 2171048

Upload 17 files

Browse files

Files changed (18) hide show

.gitattributes +1 -0
adapter_config.json +41 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
chat_template.jinja +47 -0
optimizer.pt +3 -0
preprocessor_config.json +29 -0
processor_config.json +4 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +33 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0
trainer_state.json +2421 -0
training_args.bin +3 -0
unsloth/roleplay-zh-sharegpt-gpt4-data.py +149 -0
unsloth/test_roleplayer_lora.py +127 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "./gemma-3-4b-it-qat-unsloth-bnb-4bit",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4997c1f8b557d838958b7aef19c340b68397365744e832989fcc224d00bcc5a
+size 131252288

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e083f429974ebe562fcb34b65fb62acf98f6a8b4c3603dfd906b2105a44176a
+size 62333579

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_pan_and_scan": null,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "Gemma3ImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "pan_and_scan_max_num_crops": null,
+  "pan_and_scan_min_crop_size": null,
+  "pan_and_scan_min_ratio_to_activate": null,
+  "processor_class": "Gemma3Processor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 896,
+    "width": 896
+  }
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "image_seq_length": 256,
+  "processor_class": "Gemma3Processor"
+}

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1d565802a8e26c4e8a31328752b7a7fdc186d9401aa008e65697d0ad8c22e33
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6862e802a3e558b1042aa6b7ef87427ce4c4fa2eec06c7fbb6e6a22587b0b5e7
+size 1465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<end_of_turn>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
+size 33384568

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,2421 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.4465031016252844,
+  "eval_steps": 500,
+  "global_step": 3410,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0013093932598981946,
+      "grad_norm": 0.9729704260826111,
+      "learning_rate": 0.00018,
+      "loss": 2.9613,
+      "step": 10
+    },
+    {
+      "epoch": 0.002618786519796389,
+      "grad_norm": 0.4988560676574707,
+      "learning_rate": 0.00019976402726796014,
+      "loss": 2.2501,
+      "step": 20
+    },
+    {
+      "epoch": 0.003928179779694584,
+      "grad_norm": 0.3629654049873352,
+      "learning_rate": 0.0001995018353434714,
+      "loss": 1.9558,
+      "step": 30
+    },
+    {
+      "epoch": 0.005237573039592778,
+      "grad_norm": 0.42317306995391846,
+      "learning_rate": 0.0001992396434189827,
+      "loss": 1.8904,
+      "step": 40
+    },
+    {
+      "epoch": 0.006546966299490973,
+      "grad_norm": 0.4342662990093231,
+      "learning_rate": 0.00019897745149449398,
+      "loss": 1.9487,
+      "step": 50
+    },
+    {
+      "epoch": 0.007856359559389167,
+      "grad_norm": 0.4164058268070221,
+      "learning_rate": 0.00019871525957000524,
+      "loss": 1.845,
+      "step": 60
+    },
+    {
+      "epoch": 0.009165752819287363,
+      "grad_norm": 0.38950663805007935,
+      "learning_rate": 0.0001984530676455165,
+      "loss": 1.8264,
+      "step": 70
+    },
+    {
+      "epoch": 0.010475146079185557,
+      "grad_norm": 0.42093154788017273,
+      "learning_rate": 0.00019819087572102778,
+      "loss": 1.8418,
+      "step": 80
+    },
+    {
+      "epoch": 0.011784539339083753,
+      "grad_norm": 0.4716477394104004,
+      "learning_rate": 0.00019792868379653908,
+      "loss": 1.8346,
+      "step": 90
+    },
+    {
+      "epoch": 0.013093932598981946,
+      "grad_norm": 0.4358816146850586,
+      "learning_rate": 0.00019766649187205035,
+      "loss": 1.8271,
+      "step": 100
+    },
+    {
+      "epoch": 0.014403325858880142,
+      "grad_norm": 0.45478910207748413,
+      "learning_rate": 0.00019740429994756162,
+      "loss": 1.7506,
+      "step": 110
+    },
+    {
+      "epoch": 0.015712719118778334,
+      "grad_norm": 0.4366815388202667,
+      "learning_rate": 0.00019714210802307289,
+      "loss": 1.7854,
+      "step": 120
+    },
+    {
+      "epoch": 0.01702211237867653,
+      "grad_norm": 0.45096880197525024,
+      "learning_rate": 0.00019687991609858418,
+      "loss": 1.779,
+      "step": 130
+    },
+    {
+      "epoch": 0.018331505638574726,
+      "grad_norm": 0.4566694498062134,
+      "learning_rate": 0.00019661772417409545,
+      "loss": 1.7509,
+      "step": 140
+    },
+    {
+      "epoch": 0.01964089889847292,
+      "grad_norm": 0.4729042649269104,
+      "learning_rate": 0.00019635553224960672,
+      "loss": 1.7271,
+      "step": 150
+    },
+    {
+      "epoch": 0.020950292158371114,
+      "grad_norm": 0.46566858887672424,
+      "learning_rate": 0.000196093340325118,
+      "loss": 1.714,
+      "step": 160
+    },
+    {
+      "epoch": 0.02225968541826931,
+      "grad_norm": 0.45467349886894226,
+      "learning_rate": 0.00019583114840062926,
+      "loss": 1.702,
+      "step": 170
+    },
+    {
+      "epoch": 0.023569078678167505,
+      "grad_norm": 0.434721440076828,
+      "learning_rate": 0.00019556895647614055,
+      "loss": 1.7162,
+      "step": 180
+    },
+    {
+      "epoch": 0.024878471938065697,
+      "grad_norm": 0.5182896852493286,
+      "learning_rate": 0.00019530676455165182,
+      "loss": 1.688,
+      "step": 190
+    },
+    {
+      "epoch": 0.026187865197963893,
+      "grad_norm": 0.5060753226280212,
+      "learning_rate": 0.0001950445726271631,
+      "loss": 1.6955,
+      "step": 200
+    },
+    {
+      "epoch": 0.02749725845786209,
+      "grad_norm": 0.46147406101226807,
+      "learning_rate": 0.00019478238070267436,
+      "loss": 1.681,
+      "step": 210
+    },
+    {
+      "epoch": 0.028806651717760284,
+      "grad_norm": 0.4517662823200226,
+      "learning_rate": 0.00019452018877818563,
+      "loss": 1.6936,
+      "step": 220
+    },
+    {
+      "epoch": 0.030116044977658477,
+      "grad_norm": 0.44920527935028076,
+      "learning_rate": 0.00019425799685369693,
+      "loss": 1.6633,
+      "step": 230
+    },
+    {
+      "epoch": 0.03142543823755667,
+      "grad_norm": 0.5066579580307007,
+      "learning_rate": 0.0001939958049292082,
+      "loss": 1.6872,
+      "step": 240
+    },
+    {
+      "epoch": 0.03273483149745487,
+      "grad_norm": 0.5238184928894043,
+      "learning_rate": 0.00019373361300471946,
+      "loss": 1.6255,
+      "step": 250
+    },
+    {
+      "epoch": 0.03404422475735306,
+      "grad_norm": 0.4943958520889282,
+      "learning_rate": 0.00019347142108023073,
+      "loss": 1.6499,
+      "step": 260
+    },
+    {
+      "epoch": 0.03535361801725126,
+      "grad_norm": 0.48346492648124695,
+      "learning_rate": 0.00019320922915574203,
+      "loss": 1.672,
+      "step": 270
+    },
+    {
+      "epoch": 0.03666301127714945,
+      "grad_norm": 0.4401436746120453,
+      "learning_rate": 0.0001929470372312533,
+      "loss": 1.6863,
+      "step": 280
+    },
+    {
+      "epoch": 0.037972404537047644,
+      "grad_norm": 0.4602312743663788,
+      "learning_rate": 0.00019268484530676457,
+      "loss": 1.646,
+      "step": 290
+    },
+    {
+      "epoch": 0.03928179779694584,
+      "grad_norm": 0.4927528202533722,
+      "learning_rate": 0.00019242265338227584,
+      "loss": 1.6252,
+      "step": 300
+    },
+    {
+      "epoch": 0.040591191056844035,
+      "grad_norm": 0.5075507760047913,
+      "learning_rate": 0.0001921604614577871,
+      "loss": 1.6218,
+      "step": 310
+    },
+    {
+      "epoch": 0.04190058431674223,
+      "grad_norm": 0.5239428877830505,
+      "learning_rate": 0.0001918982695332984,
+      "loss": 1.6354,
+      "step": 320
+    },
+    {
+      "epoch": 0.043209977576640426,
+      "grad_norm": 0.5954804420471191,
+      "learning_rate": 0.00019163607760880967,
+      "loss": 1.7022,
+      "step": 330
+    },
+    {
+      "epoch": 0.04451937083653862,
+      "grad_norm": 0.5364096760749817,
+      "learning_rate": 0.00019137388568432094,
+      "loss": 1.5981,
+      "step": 340
+    },
+    {
+      "epoch": 0.04582876409643681,
+      "grad_norm": 0.55096435546875,
+      "learning_rate": 0.0001911116937598322,
+      "loss": 1.6211,
+      "step": 350
+    },
+    {
+      "epoch": 0.04713815735633501,
+      "grad_norm": 0.5193445682525635,
+      "learning_rate": 0.00019084950183534348,
+      "loss": 1.6195,
+      "step": 360
+    },
+    {
+      "epoch": 0.0484475506162332,
+      "grad_norm": 0.528788685798645,
+      "learning_rate": 0.00019058730991085477,
+      "loss": 1.6076,
+      "step": 370
+    },
+    {
+      "epoch": 0.049756943876131395,
+      "grad_norm": 0.5360815525054932,
+      "learning_rate": 0.00019032511798636604,
+      "loss": 1.5912,
+      "step": 380
+    },
+    {
+      "epoch": 0.051066337136029594,
+      "grad_norm": 0.5031074285507202,
+      "learning_rate": 0.0001900629260618773,
+      "loss": 1.6157,
+      "step": 390
+    },
+    {
+      "epoch": 0.052375730395927786,
+      "grad_norm": 0.5149925351142883,
+      "learning_rate": 0.00018980073413738858,
+      "loss": 1.579,
+      "step": 400
+    },
+    {
+      "epoch": 0.053685123655825985,
+      "grad_norm": 0.5419250726699829,
+      "learning_rate": 0.00018953854221289985,
+      "loss": 1.6242,
+      "step": 410
+    },
+    {
+      "epoch": 0.05499451691572418,
+      "grad_norm": 0.5513054728507996,
+      "learning_rate": 0.00018927635028841112,
+      "loss": 1.5948,
+      "step": 420
+    },
+    {
+      "epoch": 0.05630391017562237,
+      "grad_norm": 0.5670781135559082,
+      "learning_rate": 0.0001890141583639224,
+      "loss": 1.5314,
+      "step": 430
+    },
+    {
+      "epoch": 0.05761330343552057,
+      "grad_norm": 0.5327165722846985,
+      "learning_rate": 0.00018875196643943366,
+      "loss": 1.5716,
+      "step": 440
+    },
+    {
+      "epoch": 0.05892269669541876,
+      "grad_norm": 0.5244112610816956,
+      "learning_rate": 0.00018848977451494493,
+      "loss": 1.5347,
+      "step": 450
+    },
+    {
+      "epoch": 0.06023208995531695,
+      "grad_norm": 0.5349589586257935,
+      "learning_rate": 0.00018822758259045622,
+      "loss": 1.564,
+      "step": 460
+    },
+    {
+      "epoch": 0.06154148321521515,
+      "grad_norm": 0.5296887755393982,
+      "learning_rate": 0.0001879653906659675,
+      "loss": 1.5779,
+      "step": 470
+    },
+    {
+      "epoch": 0.06285087647511334,
+      "grad_norm": 0.5426337718963623,
+      "learning_rate": 0.00018770319874147876,
+      "loss": 1.5112,
+      "step": 480
+    },
+    {
+      "epoch": 0.06416026973501154,
+      "grad_norm": 0.5532763004302979,
+      "learning_rate": 0.00018744100681699003,
+      "loss": 1.5458,
+      "step": 490
+    },
+    {
+      "epoch": 0.06546966299490974,
+      "grad_norm": 0.5318668484687805,
+      "learning_rate": 0.00018717881489250133,
+      "loss": 1.5597,
+      "step": 500
+    },
+    {
+      "epoch": 0.06677905625480793,
+      "grad_norm": 0.6084654331207275,
+      "learning_rate": 0.0001869166229680126,
+      "loss": 1.5485,
+      "step": 510
+    },
+    {
+      "epoch": 0.06808844951470612,
+      "grad_norm": 0.5626131296157837,
+      "learning_rate": 0.00018665443104352386,
+      "loss": 1.5217,
+      "step": 520
+    },
+    {
+      "epoch": 0.06939784277460431,
+      "grad_norm": 0.528758704662323,
+      "learning_rate": 0.00018639223911903513,
+      "loss": 1.5343,
+      "step": 530
+    },
+    {
+      "epoch": 0.07070723603450252,
+      "grad_norm": 0.5894292593002319,
+      "learning_rate": 0.0001861300471945464,
+      "loss": 1.5604,
+      "step": 540
+    },
+    {
+      "epoch": 0.07201662929440071,
+      "grad_norm": 0.5676683187484741,
+      "learning_rate": 0.0001858678552700577,
+      "loss": 1.5216,
+      "step": 550
+    },
+    {
+      "epoch": 0.0733260225542989,
+      "grad_norm": 0.6381473541259766,
+      "learning_rate": 0.00018560566334556897,
+      "loss": 1.4334,
+      "step": 560
+    },
+    {
+      "epoch": 0.0746354158141971,
+      "grad_norm": 0.6644160151481628,
+      "learning_rate": 0.00018534347142108024,
+      "loss": 1.4832,
+      "step": 570
+    },
+    {
+      "epoch": 0.07594480907409529,
+      "grad_norm": 0.5856960415840149,
+      "learning_rate": 0.0001850812794965915,
+      "loss": 1.5118,
+      "step": 580
+    },
+    {
+      "epoch": 0.07725420233399348,
+      "grad_norm": 0.5892801880836487,
+      "learning_rate": 0.00018481908757210277,
+      "loss": 1.5028,
+      "step": 590
+    },
+    {
+      "epoch": 0.07856359559389169,
+      "grad_norm": 0.5674527883529663,
+      "learning_rate": 0.00018455689564761407,
+      "loss": 1.5125,
+      "step": 600
+    },
+    {
+      "epoch": 0.07987298885378988,
+      "grad_norm": 0.6059868335723877,
+      "learning_rate": 0.00018429470372312534,
+      "loss": 1.4543,
+      "step": 610
+    },
+    {
+      "epoch": 0.08118238211368807,
+      "grad_norm": 0.6255605816841125,
+      "learning_rate": 0.0001840325117986366,
+      "loss": 1.4851,
+      "step": 620
+    },
+    {
+      "epoch": 0.08249177537358626,
+      "grad_norm": 0.5904423594474792,
+      "learning_rate": 0.00018377031987414788,
+      "loss": 1.4154,
+      "step": 630
+    },
+    {
+      "epoch": 0.08380116863348445,
+      "grad_norm": 0.6035749912261963,
+      "learning_rate": 0.00018350812794965917,
+      "loss": 1.4276,
+      "step": 640
+    },
+    {
+      "epoch": 0.08511056189338265,
+      "grad_norm": 0.597172737121582,
+      "learning_rate": 0.00018324593602517044,
+      "loss": 1.4736,
+      "step": 650
+    },
+    {
+      "epoch": 0.08641995515328085,
+      "grad_norm": 0.6352164149284363,
+      "learning_rate": 0.0001829837441006817,
+      "loss": 1.4975,
+      "step": 660
+    },
+    {
+      "epoch": 0.08772934841317905,
+      "grad_norm": 0.5500873327255249,
+      "learning_rate": 0.00018272155217619298,
+      "loss": 1.4578,
+      "step": 670
+    },
+    {
+      "epoch": 0.08903874167307724,
+      "grad_norm": 0.6423613429069519,
+      "learning_rate": 0.00018245936025170425,
+      "loss": 1.3926,
+      "step": 680
+    },
+    {
+      "epoch": 0.09034813493297543,
+      "grad_norm": 0.665908694267273,
+      "learning_rate": 0.00018219716832721555,
+      "loss": 1.4548,
+      "step": 690
+    },
+    {
+      "epoch": 0.09165752819287362,
+      "grad_norm": 0.6354024410247803,
+      "learning_rate": 0.00018193497640272682,
+      "loss": 1.5,
+      "step": 700
+    },
+    {
+      "epoch": 0.09296692145277183,
+      "grad_norm": 0.6588740348815918,
+      "learning_rate": 0.00018167278447823808,
+      "loss": 1.3609,
+      "step": 710
+    },
+    {
+      "epoch": 0.09427631471267002,
+      "grad_norm": 0.6754702925682068,
+      "learning_rate": 0.00018141059255374935,
+      "loss": 1.3432,
+      "step": 720
+    },
+    {
+      "epoch": 0.09558570797256821,
+      "grad_norm": 0.6337271332740784,
+      "learning_rate": 0.00018114840062926062,
+      "loss": 1.4439,
+      "step": 730
+    },
+    {
+      "epoch": 0.0968951012324664,
+      "grad_norm": 0.6592088937759399,
+      "learning_rate": 0.00018088620870477192,
+      "loss": 1.3949,
+      "step": 740
+    },
+    {
+      "epoch": 0.0982044944923646,
+      "grad_norm": 0.6700498461723328,
+      "learning_rate": 0.0001806240167802832,
+      "loss": 1.4046,
+      "step": 750
+    },
+    {
+      "epoch": 0.09951388775226279,
+      "grad_norm": 0.708410382270813,
+      "learning_rate": 0.00018036182485579446,
+      "loss": 1.3021,
+      "step": 760
+    },
+    {
+      "epoch": 0.100823281012161,
+      "grad_norm": 0.6718457937240601,
+      "learning_rate": 0.00018009963293130573,
+      "loss": 1.3769,
+      "step": 770
+    },
+    {
+      "epoch": 0.10213267427205919,
+      "grad_norm": 0.661522388458252,
+      "learning_rate": 0.00017983744100681702,
+      "loss": 1.434,
+      "step": 780
+    },
+    {
+      "epoch": 0.10344206753195738,
+      "grad_norm": 0.6615481376647949,
+      "learning_rate": 0.0001795752490823283,
+      "loss": 1.3839,
+      "step": 790
+    },
+    {
+      "epoch": 0.10475146079185557,
+      "grad_norm": 0.696959376335144,
+      "learning_rate": 0.00017931305715783956,
+      "loss": 1.3634,
+      "step": 800
+    },
+    {
+      "epoch": 0.10606085405175376,
+      "grad_norm": 0.7320592403411865,
+      "learning_rate": 0.00017905086523335083,
+      "loss": 1.2737,
+      "step": 810
+    },
+    {
+      "epoch": 0.10737024731165197,
+      "grad_norm": 0.7200619578361511,
+      "learning_rate": 0.0001787886733088621,
+      "loss": 1.3732,
+      "step": 820
+    },
+    {
+      "epoch": 0.10867964057155016,
+      "grad_norm": 0.6982961297035217,
+      "learning_rate": 0.00017852648138437337,
+      "loss": 1.3019,
+      "step": 830
+    },
+    {
+      "epoch": 0.10998903383144835,
+      "grad_norm": 0.7427386045455933,
+      "learning_rate": 0.00017826428945988464,
+      "loss": 1.3398,
+      "step": 840
+    },
+    {
+      "epoch": 0.11129842709134655,
+      "grad_norm": 0.7897806763648987,
+      "learning_rate": 0.0001780020975353959,
+      "loss": 1.3216,
+      "step": 850
+    },
+    {
+      "epoch": 0.11260782035124474,
+      "grad_norm": 0.7520805597305298,
+      "learning_rate": 0.00017773990561090717,
+      "loss": 1.2875,
+      "step": 860
+    },
+    {
+      "epoch": 0.11391721361114293,
+      "grad_norm": 0.7332555055618286,
+      "learning_rate": 0.00017747771368641844,
+      "loss": 1.272,
+      "step": 870
+    },
+    {
+      "epoch": 0.11522660687104114,
+      "grad_norm": 0.7135840654373169,
+      "learning_rate": 0.00017721552176192974,
+      "loss": 1.3185,
+      "step": 880
+    },
+    {
+      "epoch": 0.11653600013093933,
+      "grad_norm": 0.6898264288902283,
+      "learning_rate": 0.000176953329837441,
+      "loss": 1.3089,
+      "step": 890
+    },
+    {
+      "epoch": 0.11784539339083752,
+      "grad_norm": 0.9488328099250793,
+      "learning_rate": 0.00017669113791295228,
+      "loss": 1.2258,
+      "step": 900
+    },
+    {
+      "epoch": 0.11915478665073571,
+      "grad_norm": 0.7257933616638184,
+      "learning_rate": 0.00017642894598846355,
+      "loss": 1.3284,
+      "step": 910
+    },
+    {
+      "epoch": 0.1204641799106339,
+      "grad_norm": 0.7688736915588379,
+      "learning_rate": 0.00017616675406397484,
+      "loss": 1.2878,
+      "step": 920
+    },
+    {
+      "epoch": 0.1217735731705321,
+      "grad_norm": 0.8328510522842407,
+      "learning_rate": 0.0001759045621394861,
+      "loss": 1.2346,
+      "step": 930
+    },
+    {
+      "epoch": 0.1230829664304303,
+      "grad_norm": 0.8448120951652527,
+      "learning_rate": 0.00017564237021499738,
+      "loss": 1.2926,
+      "step": 940
+    },
+    {
+      "epoch": 0.1243923596903285,
+      "grad_norm": 0.8510689735412598,
+      "learning_rate": 0.00017538017829050865,
+      "loss": 1.2109,
+      "step": 950
+    },
+    {
+      "epoch": 0.12570175295022668,
+      "grad_norm": 0.866874098777771,
+      "learning_rate": 0.00017511798636601992,
+      "loss": 1.3091,
+      "step": 960
+    },
+    {
+      "epoch": 0.12701114621012488,
+      "grad_norm": 0.9010233879089355,
+      "learning_rate": 0.00017485579444153122,
+      "loss": 1.2273,
+      "step": 970
+    },
+    {
+      "epoch": 0.1283205394700231,
+      "grad_norm": 0.9316047430038452,
+      "learning_rate": 0.00017459360251704248,
+      "loss": 1.2611,
+      "step": 980
+    },
+    {
+      "epoch": 0.12962993272992127,
+      "grad_norm": 0.9005467295646667,
+      "learning_rate": 0.00017433141059255375,
+      "loss": 1.1747,
+      "step": 990
+    },
+    {
+      "epoch": 0.13093932598981947,
+      "grad_norm": 0.8843415975570679,
+      "learning_rate": 0.00017406921866806502,
+      "loss": 1.1915,
+      "step": 1000
+    },
+    {
+      "epoch": 0.13224871924971765,
+      "grad_norm": 0.8090497851371765,
+      "learning_rate": 0.0001738070267435763,
+      "loss": 1.2452,
+      "step": 1010
+    },
+    {
+      "epoch": 0.13355811250961586,
+      "grad_norm": 1.2498819828033447,
+      "learning_rate": 0.0001735448348190876,
+      "loss": 1.276,
+      "step": 1020
+    },
+    {
+      "epoch": 0.13486750576951406,
+      "grad_norm": 0.7861034870147705,
+      "learning_rate": 0.00017328264289459886,
+      "loss": 1.1989,
+      "step": 1030
+    },
+    {
+      "epoch": 0.13617689902941224,
+      "grad_norm": 0.9525002837181091,
+      "learning_rate": 0.00017302045097011013,
+      "loss": 1.1338,
+      "step": 1040
+    },
+    {
+      "epoch": 0.13748629228931045,
+      "grad_norm": 0.8066142201423645,
+      "learning_rate": 0.0001727582590456214,
+      "loss": 1.1421,
+      "step": 1050
+    },
+    {
+      "epoch": 0.13879568554920862,
+      "grad_norm": 0.8200965523719788,
+      "learning_rate": 0.0001724960671211327,
+      "loss": 1.1596,
+      "step": 1060
+    },
+    {
+      "epoch": 0.14010507880910683,
+      "grad_norm": 0.9981400370597839,
+      "learning_rate": 0.00017223387519664396,
+      "loss": 1.0562,
+      "step": 1070
+    },
+    {
+      "epoch": 0.14141447206900504,
+      "grad_norm": 0.9273063540458679,
+      "learning_rate": 0.00017197168327215523,
+      "loss": 1.1275,
+      "step": 1080
+    },
+    {
+      "epoch": 0.14272386532890322,
+      "grad_norm": 0.8812237977981567,
+      "learning_rate": 0.0001717094913476665,
+      "loss": 1.0406,
+      "step": 1090
+    },
+    {
+      "epoch": 0.14403325858880142,
+      "grad_norm": 0.8970304727554321,
+      "learning_rate": 0.00017144729942317777,
+      "loss": 1.1263,
+      "step": 1100
+    },
+    {
+      "epoch": 0.1453426518486996,
+      "grad_norm": 0.9097404479980469,
+      "learning_rate": 0.00017118510749868906,
+      "loss": 1.1956,
+      "step": 1110
+    },
+    {
+      "epoch": 0.1466520451085978,
+      "grad_norm": 1.0246269702911377,
+      "learning_rate": 0.00017092291557420033,
+      "loss": 1.0717,
+      "step": 1120
+    },
+    {
+      "epoch": 0.14796143836849598,
+      "grad_norm": 1.1149781942367554,
+      "learning_rate": 0.0001706607236497116,
+      "loss": 1.076,
+      "step": 1130
+    },
+    {
+      "epoch": 0.1492708316283942,
+      "grad_norm": 1.1981500387191772,
+      "learning_rate": 0.00017039853172522287,
+      "loss": 1.142,
+      "step": 1140
+    },
+    {
+      "epoch": 0.1505802248882924,
+      "grad_norm": 0.9477318525314331,
+      "learning_rate": 0.00017013633980073414,
+      "loss": 1.0799,
+      "step": 1150
+    },
+    {
+      "epoch": 0.15188961814819057,
+      "grad_norm": 1.0102957487106323,
+      "learning_rate": 0.00016987414787624544,
+      "loss": 1.0531,
+      "step": 1160
+    },
+    {
+      "epoch": 0.15319901140808878,
+      "grad_norm": 1.1728227138519287,
+      "learning_rate": 0.0001696119559517567,
+      "loss": 1.0903,
+      "step": 1170
+    },
+    {
+      "epoch": 0.15450840466798696,
+      "grad_norm": 1.0086623430252075,
+      "learning_rate": 0.00016934976402726797,
+      "loss": 1.0677,
+      "step": 1180
+    },
+    {
+      "epoch": 0.15581779792788517,
+      "grad_norm": 0.8586070537567139,
+      "learning_rate": 0.00016908757210277924,
+      "loss": 1.1022,
+      "step": 1190
+    },
+    {
+      "epoch": 0.15712719118778337,
+      "grad_norm": 1.2628968954086304,
+      "learning_rate": 0.00016882538017829054,
+      "loss": 1.0575,
+      "step": 1200
+    },
+    {
+      "epoch": 0.15843658444768155,
+      "grad_norm": 0.9629563689231873,
+      "learning_rate": 0.0001685631882538018,
+      "loss": 1.0844,
+      "step": 1210
+    },
+    {
+      "epoch": 0.15974597770757976,
+      "grad_norm": 1.0898447036743164,
+      "learning_rate": 0.00016830099632931308,
+      "loss": 1.0654,
+      "step": 1220
+    },
+    {
+      "epoch": 0.16105537096747793,
+      "grad_norm": 1.13120698928833,
+      "learning_rate": 0.00016803880440482435,
+      "loss": 1.0686,
+      "step": 1230
+    },
+    {
+      "epoch": 0.16236476422737614,
+      "grad_norm": 1.0732567310333252,
+      "learning_rate": 0.00016777661248033561,
+      "loss": 1.084,
+      "step": 1240
+    },
+    {
+      "epoch": 0.16367415748727435,
+      "grad_norm": 1.0681878328323364,
+      "learning_rate": 0.00016751442055584688,
+      "loss": 0.9979,
+      "step": 1250
+    },
+    {
+      "epoch": 0.16498355074717252,
+      "grad_norm": 0.9773361086845398,
+      "learning_rate": 0.00016725222863135815,
+      "loss": 1.0841,
+      "step": 1260
+    },
+    {
+      "epoch": 0.16629294400707073,
+      "grad_norm": 1.0342450141906738,
+      "learning_rate": 0.00016699003670686942,
+      "loss": 1.0176,
+      "step": 1270
+    },
+    {
+      "epoch": 0.1676023372669689,
+      "grad_norm": 1.0580531358718872,
+      "learning_rate": 0.0001667278447823807,
+      "loss": 0.9858,
+      "step": 1280
+    },
+    {
+      "epoch": 0.16891173052686712,
+      "grad_norm": 0.9744387865066528,
+      "learning_rate": 0.000166465652857892,
+      "loss": 0.9282,
+      "step": 1290
+    },
+    {
+      "epoch": 0.1702211237867653,
+      "grad_norm": 0.9636452198028564,
+      "learning_rate": 0.00016620346093340326,
+      "loss": 0.9414,
+      "step": 1300
+    },
+    {
+      "epoch": 0.1715305170466635,
+      "grad_norm": 1.1029468774795532,
+      "learning_rate": 0.00016594126900891453,
+      "loss": 0.8812,
+      "step": 1310
+    },
+    {
+      "epoch": 0.1728399103065617,
+      "grad_norm": 1.2941449880599976,
+      "learning_rate": 0.0001656790770844258,
+      "loss": 0.9823,
+      "step": 1320
+    },
+    {
+      "epoch": 0.17414930356645988,
+      "grad_norm": 1.627166509628296,
+      "learning_rate": 0.00016541688515993706,
+      "loss": 0.9585,
+      "step": 1330
+    },
+    {
+      "epoch": 0.1754586968263581,
+      "grad_norm": 1.091630458831787,
+      "learning_rate": 0.00016515469323544836,
+      "loss": 0.9516,
+      "step": 1340
+    },
+    {
+      "epoch": 0.17676809008625627,
+      "grad_norm": 1.1108227968215942,
+      "learning_rate": 0.00016489250131095963,
+      "loss": 0.8998,
+      "step": 1350
+    },
+    {
+      "epoch": 0.17807748334615447,
+      "grad_norm": 1.0883326530456543,
+      "learning_rate": 0.0001646303093864709,
+      "loss": 0.916,
+      "step": 1360
+    },
+    {
+      "epoch": 0.17938687660605268,
+      "grad_norm": 1.2917275428771973,
+      "learning_rate": 0.00016436811746198217,
+      "loss": 0.9112,
+      "step": 1370
+    },
+    {
+      "epoch": 0.18069626986595086,
+      "grad_norm": 1.1828432083129883,
+      "learning_rate": 0.00016410592553749344,
+      "loss": 0.9721,
+      "step": 1380
+    },
+    {
+      "epoch": 0.18200566312584907,
+      "grad_norm": 1.3447389602661133,
+      "learning_rate": 0.00016384373361300473,
+      "loss": 0.9198,
+      "step": 1390
+    },
+    {
+      "epoch": 0.18331505638574724,
+      "grad_norm": 1.0735760927200317,
+      "learning_rate": 0.000163581541688516,
+      "loss": 0.8634,
+      "step": 1400
+    },
+    {
+      "epoch": 0.18462444964564545,
+      "grad_norm": 1.0454446077346802,
+      "learning_rate": 0.00016331934976402727,
+      "loss": 0.9151,
+      "step": 1410
+    },
+    {
+      "epoch": 0.18593384290554366,
+      "grad_norm": 1.2230719327926636,
+      "learning_rate": 0.00016305715783953854,
+      "loss": 0.9202,
+      "step": 1420
+    },
+    {
+      "epoch": 0.18724323616544183,
+      "grad_norm": 1.1030149459838867,
+      "learning_rate": 0.00016279496591504984,
+      "loss": 0.9068,
+      "step": 1430
+    },
+    {
+      "epoch": 0.18855262942534004,
+      "grad_norm": 1.4471871852874756,
+      "learning_rate": 0.0001625327739905611,
+      "loss": 0.8682,
+      "step": 1440
+    },
+    {
+      "epoch": 0.18986202268523822,
+      "grad_norm": 1.2458796501159668,
+      "learning_rate": 0.00016227058206607237,
+      "loss": 0.8247,
+      "step": 1450
+    },
+    {
+      "epoch": 0.19117141594513642,
+      "grad_norm": 1.1849644184112549,
+      "learning_rate": 0.00016200839014158364,
+      "loss": 0.8987,
+      "step": 1460
+    },
+    {
+      "epoch": 0.19248080920503463,
+      "grad_norm": 1.2985557317733765,
+      "learning_rate": 0.0001617461982170949,
+      "loss": 0.8006,
+      "step": 1470
+    },
+    {
+      "epoch": 0.1937902024649328,
+      "grad_norm": 1.7127928733825684,
+      "learning_rate": 0.0001614840062926062,
+      "loss": 0.8191,
+      "step": 1480
+    },
+    {
+      "epoch": 0.19509959572483102,
+      "grad_norm": 1.440895915031433,
+      "learning_rate": 0.00016122181436811748,
+      "loss": 0.8129,
+      "step": 1490
+    },
+    {
+      "epoch": 0.1964089889847292,
+      "grad_norm": 1.252194881439209,
+      "learning_rate": 0.00016095962244362875,
+      "loss": 0.8803,
+      "step": 1500
+    },
+    {
+      "epoch": 0.1977183822446274,
+      "grad_norm": 1.138358235359192,
+      "learning_rate": 0.00016069743051914001,
+      "loss": 0.8744,
+      "step": 1510
+    },
+    {
+      "epoch": 0.19902777550452558,
+      "grad_norm": 1.080971598625183,
+      "learning_rate": 0.00016043523859465128,
+      "loss": 0.8693,
+      "step": 1520
+    },
+    {
+      "epoch": 0.20033716876442378,
+      "grad_norm": 1.1612547636032104,
+      "learning_rate": 0.00016017304667016258,
+      "loss": 0.7991,
+      "step": 1530
+    },
+    {
+      "epoch": 0.201646562024322,
+      "grad_norm": 1.1773971319198608,
+      "learning_rate": 0.00015991085474567385,
+      "loss": 0.912,
+      "step": 1540
+    },
+    {
+      "epoch": 0.20295595528422017,
+      "grad_norm": 1.1353998184204102,
+      "learning_rate": 0.00015964866282118512,
+      "loss": 0.7986,
+      "step": 1550
+    },
+    {
+      "epoch": 0.20426534854411837,
+      "grad_norm": 1.6848335266113281,
+      "learning_rate": 0.0001593864708966964,
+      "loss": 0.6932,
+      "step": 1560
+    },
+    {
+      "epoch": 0.20557474180401655,
+      "grad_norm": 1.4043173789978027,
+      "learning_rate": 0.00015912427897220768,
+      "loss": 0.8529,
+      "step": 1570
+    },
+    {
+      "epoch": 0.20688413506391476,
+      "grad_norm": 1.2601439952850342,
+      "learning_rate": 0.00015886208704771895,
+      "loss": 0.8173,
+      "step": 1580
+    },
+    {
+      "epoch": 0.20819352832381297,
+      "grad_norm": 1.2090034484863281,
+      "learning_rate": 0.00015859989512323022,
+      "loss": 0.7451,
+      "step": 1590
+    },
+    {
+      "epoch": 0.20950292158371114,
+      "grad_norm": 1.3334815502166748,
+      "learning_rate": 0.0001583377031987415,
+      "loss": 0.775,
+      "step": 1600
+    },
+    {
+      "epoch": 0.21081231484360935,
+      "grad_norm": 1.1993087530136108,
+      "learning_rate": 0.00015807551127425276,
+      "loss": 0.7733,
+      "step": 1610
+    },
+    {
+      "epoch": 0.21212170810350753,
+      "grad_norm": 1.51642906665802,
+      "learning_rate": 0.00015781331934976406,
+      "loss": 0.6907,
+      "step": 1620
+    },
+    {
+      "epoch": 0.21343110136340573,
+      "grad_norm": 1.3714466094970703,
+      "learning_rate": 0.00015755112742527532,
+      "loss": 0.7016,
+      "step": 1630
+    },
+    {
+      "epoch": 0.21474049462330394,
+      "grad_norm": 1.2519642114639282,
+      "learning_rate": 0.0001572889355007866,
+      "loss": 0.7648,
+      "step": 1640
+    },
+    {
+      "epoch": 0.21604988788320212,
+      "grad_norm": 1.3851202726364136,
+      "learning_rate": 0.00015702674357629786,
+      "loss": 0.7069,
+      "step": 1650
+    },
+    {
+      "epoch": 0.21735928114310032,
+      "grad_norm": 1.334105134010315,
+      "learning_rate": 0.00015676455165180913,
+      "loss": 0.7338,
+      "step": 1660
+    },
+    {
+      "epoch": 0.2186686744029985,
+      "grad_norm": 1.3785145282745361,
+      "learning_rate": 0.0001565023597273204,
+      "loss": 0.6299,
+      "step": 1670
+    },
+    {
+      "epoch": 0.2199780676628967,
+      "grad_norm": 1.4771215915679932,
+      "learning_rate": 0.00015624016780283167,
+      "loss": 0.6828,
+      "step": 1680
+    },
+    {
+      "epoch": 0.2212874609227949,
+      "grad_norm": 1.3885449171066284,
+      "learning_rate": 0.00015597797587834294,
+      "loss": 0.7141,
+      "step": 1690
+    },
+    {
+      "epoch": 0.2225968541826931,
+      "grad_norm": 1.2664909362792969,
+      "learning_rate": 0.00015571578395385423,
+      "loss": 0.7667,
+      "step": 1700
+    },
+    {
+      "epoch": 0.2239062474425913,
+      "grad_norm": 1.2576826810836792,
+      "learning_rate": 0.0001554535920293655,
+      "loss": 0.7395,
+      "step": 1710
+    },
+    {
+      "epoch": 0.22521564070248948,
+      "grad_norm": 1.284826636314392,
+      "learning_rate": 0.00015519140010487677,
+      "loss": 0.6832,
+      "step": 1720
+    },
+    {
+      "epoch": 0.22652503396238768,
+      "grad_norm": 1.272933006286621,
+      "learning_rate": 0.00015492920818038804,
+      "loss": 0.6892,
+      "step": 1730
+    },
+    {
+      "epoch": 0.22783442722228586,
+      "grad_norm": 1.3465379476547241,
+      "learning_rate": 0.0001546670162558993,
+      "loss": 0.6449,
+      "step": 1740
+    },
+    {
+      "epoch": 0.22914382048218407,
+      "grad_norm": 1.2862318754196167,
+      "learning_rate": 0.00015440482433141058,
+      "loss": 0.6883,
+      "step": 1750
+    },
+    {
+      "epoch": 0.23045321374208227,
+      "grad_norm": 1.2469042539596558,
+      "learning_rate": 0.00015414263240692188,
+      "loss": 0.7593,
+      "step": 1760
+    },
+    {
+      "epoch": 0.23176260700198045,
+      "grad_norm": 1.5080034732818604,
+      "learning_rate": 0.00015388044048243315,
+      "loss": 0.7009,
+      "step": 1770
+    },
+    {
+      "epoch": 0.23307200026187866,
+      "grad_norm": 0.9788569211959839,
+      "learning_rate": 0.00015361824855794441,
+      "loss": 0.602,
+      "step": 1780
+    },
+    {
+      "epoch": 0.23438139352177684,
+      "grad_norm": 1.3450673818588257,
+      "learning_rate": 0.00015335605663345568,
+      "loss": 0.6238,
+      "step": 1790
+    },
+    {
+      "epoch": 0.23569078678167504,
+      "grad_norm": 1.4177800416946411,
+      "learning_rate": 0.00015309386470896695,
+      "loss": 0.6768,
+      "step": 1800
+    },
+    {
+      "epoch": 0.23700018004157325,
+      "grad_norm": 1.3528062105178833,
+      "learning_rate": 0.00015283167278447825,
+      "loss": 0.6404,
+      "step": 1810
+    },
+    {
+      "epoch": 0.23830957330147143,
+      "grad_norm": 1.2898012399673462,
+      "learning_rate": 0.00015256948085998952,
+      "loss": 0.6606,
+      "step": 1820
+    },
+    {
+      "epoch": 0.23961896656136963,
+      "grad_norm": 1.311298131942749,
+      "learning_rate": 0.0001523072889355008,
+      "loss": 0.662,
+      "step": 1830
+    },
+    {
+      "epoch": 0.2409283598212678,
+      "grad_norm": 1.6476584672927856,
+      "learning_rate": 0.00015204509701101206,
+      "loss": 0.671,
+      "step": 1840
+    },
+    {
+      "epoch": 0.24223775308116602,
+      "grad_norm": 1.36719810962677,
+      "learning_rate": 0.00015178290508652335,
+      "loss": 0.7097,
+      "step": 1850
+    },
+    {
+      "epoch": 0.2435471463410642,
+      "grad_norm": 1.3647184371948242,
+      "learning_rate": 0.00015152071316203462,
+      "loss": 0.6604,
+      "step": 1860
+    },
+    {
+      "epoch": 0.2448565396009624,
+      "grad_norm": 1.2265934944152832,
+      "learning_rate": 0.0001512585212375459,
+      "loss": 0.6272,
+      "step": 1870
+    },
+    {
+      "epoch": 0.2461659328608606,
+      "grad_norm": 1.4882850646972656,
+      "learning_rate": 0.00015099632931305716,
+      "loss": 0.7007,
+      "step": 1880
+    },
+    {
+      "epoch": 0.2474753261207588,
+      "grad_norm": 1.408470869064331,
+      "learning_rate": 0.00015073413738856843,
+      "loss": 0.6526,
+      "step": 1890
+    },
+    {
+      "epoch": 0.248784719380657,
+      "grad_norm": 1.3388913869857788,
+      "learning_rate": 0.00015047194546407972,
+      "loss": 0.6891,
+      "step": 1900
+    },
+    {
+      "epoch": 0.2500941126405552,
+      "grad_norm": 1.3725926876068115,
+      "learning_rate": 0.000150209753539591,
+      "loss": 0.5763,
+      "step": 1910
+    },
+    {
+      "epoch": 0.25140350590045335,
+      "grad_norm": 1.40208899974823,
+      "learning_rate": 0.00014994756161510226,
+      "loss": 0.5637,
+      "step": 1920
+    },
+    {
+      "epoch": 0.25271289916035156,
+      "grad_norm": 1.8308840990066528,
+      "learning_rate": 0.00014968536969061353,
+      "loss": 0.6899,
+      "step": 1930
+    },
+    {
+      "epoch": 0.25402229242024976,
+      "grad_norm": 1.4921183586120605,
+      "learning_rate": 0.0001494231777661248,
+      "loss": 0.5764,
+      "step": 1940
+    },
+    {
+      "epoch": 0.25533168568014797,
+      "grad_norm": 1.5387523174285889,
+      "learning_rate": 0.0001491609858416361,
+      "loss": 0.5229,
+      "step": 1950
+    },
+    {
+      "epoch": 0.2566410789400462,
+      "grad_norm": 1.3345798254013062,
+      "learning_rate": 0.00014889879391714737,
+      "loss": 0.5949,
+      "step": 1960
+    },
+    {
+      "epoch": 0.2579504721999443,
+      "grad_norm": 1.682065486907959,
+      "learning_rate": 0.00014863660199265863,
+      "loss": 0.5619,
+      "step": 1970
+    },
+    {
+      "epoch": 0.25925986545984253,
+      "grad_norm": 1.480276346206665,
+      "learning_rate": 0.0001483744100681699,
+      "loss": 0.5473,
+      "step": 1980
+    },
+    {
+      "epoch": 0.26056925871974074,
+      "grad_norm": 1.3453810214996338,
+      "learning_rate": 0.0001481122181436812,
+      "loss": 0.5603,
+      "step": 1990
+    },
+    {
+      "epoch": 0.26187865197963894,
+      "grad_norm": 1.4118777513504028,
+      "learning_rate": 0.00014785002621919247,
+      "loss": 0.5543,
+      "step": 2000
+    },
+    {
+      "epoch": 0.26318804523953715,
+      "grad_norm": 1.2959351539611816,
+      "learning_rate": 0.00014758783429470374,
+      "loss": 0.4962,
+      "step": 2010
+    },
+    {
+      "epoch": 0.2644974384994353,
+      "grad_norm": 1.3605815172195435,
+      "learning_rate": 0.000147325642370215,
+      "loss": 0.5699,
+      "step": 2020
+    },
+    {
+      "epoch": 0.2658068317593335,
+      "grad_norm": 2.086613416671753,
+      "learning_rate": 0.00014706345044572628,
+      "loss": 0.565,
+      "step": 2030
+    },
+    {
+      "epoch": 0.2671162250192317,
+      "grad_norm": 1.2892887592315674,
+      "learning_rate": 0.00014680125852123757,
+      "loss": 0.6062,
+      "step": 2040
+    },
+    {
+      "epoch": 0.2684256182791299,
+      "grad_norm": 1.5760036706924438,
+      "learning_rate": 0.00014653906659674884,
+      "loss": 0.5642,
+      "step": 2050
+    },
+    {
+      "epoch": 0.2697350115390281,
+      "grad_norm": 1.21380615234375,
+      "learning_rate": 0.0001462768746722601,
+      "loss": 0.5514,
+      "step": 2060
+    },
+    {
+      "epoch": 0.2710444047989263,
+      "grad_norm": 1.4393121004104614,
+      "learning_rate": 0.00014601468274777138,
+      "loss": 0.5572,
+      "step": 2070
+    },
+    {
+      "epoch": 0.2723537980588245,
+      "grad_norm": 1.2972021102905273,
+      "learning_rate": 0.00014575249082328265,
+      "loss": 0.535,
+      "step": 2080
+    },
+    {
+      "epoch": 0.2736631913187227,
+      "grad_norm": 1.0208637714385986,
+      "learning_rate": 0.00014549029889879392,
+      "loss": 0.5835,
+      "step": 2090
+    },
+    {
+      "epoch": 0.2749725845786209,
+      "grad_norm": 1.4418736696243286,
+      "learning_rate": 0.00014522810697430521,
+      "loss": 0.4829,
+      "step": 2100
+    },
+    {
+      "epoch": 0.2762819778385191,
+      "grad_norm": 1.4326051473617554,
+      "learning_rate": 0.00014496591504981648,
+      "loss": 0.4711,
+      "step": 2110
+    },
+    {
+      "epoch": 0.27759137109841725,
+      "grad_norm": 1.497841715812683,
+      "learning_rate": 0.00014470372312532775,
+      "loss": 0.4935,
+      "step": 2120
+    },
+    {
+      "epoch": 0.27890076435831546,
+      "grad_norm": 1.5082463026046753,
+      "learning_rate": 0.00014444153120083902,
+      "loss": 0.4979,
+      "step": 2130
+    },
+    {
+      "epoch": 0.28021015761821366,
+      "grad_norm": 1.2458934783935547,
+      "learning_rate": 0.0001441793392763503,
+      "loss": 0.5644,
+      "step": 2140
+    },
+    {
+      "epoch": 0.28151955087811187,
+      "grad_norm": 1.730130910873413,
+      "learning_rate": 0.00014391714735186156,
+      "loss": 0.4749,
+      "step": 2150
+    },
+    {
+      "epoch": 0.2828289441380101,
+      "grad_norm": 1.2587112188339233,
+      "learning_rate": 0.00014365495542737283,
+      "loss": 0.5175,
+      "step": 2160
+    },
+    {
+      "epoch": 0.2841383373979082,
+      "grad_norm": 1.431119441986084,
+      "learning_rate": 0.0001433927635028841,
+      "loss": 0.5597,
+      "step": 2170
+    },
+    {
+      "epoch": 0.28544773065780643,
+      "grad_norm": 1.5383937358856201,
+      "learning_rate": 0.0001431305715783954,
+      "loss": 0.5153,
+      "step": 2180
+    },
+    {
+      "epoch": 0.28675712391770464,
+      "grad_norm": 1.4311727285385132,
+      "learning_rate": 0.00014286837965390666,
+      "loss": 0.5452,
+      "step": 2190
+    },
+    {
+      "epoch": 0.28806651717760284,
+      "grad_norm": 1.2555975914001465,
+      "learning_rate": 0.00014260618772941793,
+      "loss": 0.4937,
+      "step": 2200
+    },
+    {
+      "epoch": 0.28937591043750105,
+      "grad_norm": 1.3781330585479736,
+      "learning_rate": 0.0001423439958049292,
+      "loss": 0.4537,
+      "step": 2210
+    },
+    {
+      "epoch": 0.2906853036973992,
+      "grad_norm": 1.4810888767242432,
+      "learning_rate": 0.00014208180388044047,
+      "loss": 0.396,
+      "step": 2220
+    },
+    {
+      "epoch": 0.2919946969572974,
+      "grad_norm": 1.6619911193847656,
+      "learning_rate": 0.00014181961195595177,
+      "loss": 0.4756,
+      "step": 2230
+    },
+    {
+      "epoch": 0.2933040902171956,
+      "grad_norm": 1.3403065204620361,
+      "learning_rate": 0.00014155742003146303,
+      "loss": 0.5157,
+      "step": 2240
+    },
+    {
+      "epoch": 0.2946134834770938,
+      "grad_norm": 1.4188278913497925,
+      "learning_rate": 0.0001412952281069743,
+      "loss": 0.5237,
+      "step": 2250
+    },
+    {
+      "epoch": 0.29592287673699197,
+      "grad_norm": 1.852266550064087,
+      "learning_rate": 0.00014103303618248557,
+      "loss": 0.4558,
+      "step": 2260
+    },
+    {
+      "epoch": 0.2972322699968902,
+      "grad_norm": 1.3092072010040283,
+      "learning_rate": 0.00014077084425799687,
+      "loss": 0.4437,
+      "step": 2270
+    },
+    {
+      "epoch": 0.2985416632567884,
+      "grad_norm": 1.4190593957901,
+      "learning_rate": 0.00014050865233350814,
+      "loss": 0.4717,
+      "step": 2280
+    },
+    {
+      "epoch": 0.2998510565166866,
+      "grad_norm": 1.4562608003616333,
+      "learning_rate": 0.0001402464604090194,
+      "loss": 0.4744,
+      "step": 2290
+    },
+    {
+      "epoch": 0.3011604497765848,
+      "grad_norm": 1.4576420783996582,
+      "learning_rate": 0.00013998426848453068,
+      "loss": 0.4429,
+      "step": 2300
+    },
+    {
+      "epoch": 0.30246984303648294,
+      "grad_norm": 1.867145299911499,
+      "learning_rate": 0.00013972207656004194,
+      "loss": 0.4881,
+      "step": 2310
+    },
+    {
+      "epoch": 0.30377923629638115,
+      "grad_norm": 1.3077807426452637,
+      "learning_rate": 0.00013945988463555324,
+      "loss": 0.4067,
+      "step": 2320
+    },
+    {
+      "epoch": 0.30508862955627936,
+      "grad_norm": 1.3587473630905151,
+      "learning_rate": 0.0001391976927110645,
+      "loss": 0.4428,
+      "step": 2330
+    },
+    {
+      "epoch": 0.30639802281617756,
+      "grad_norm": 1.6012579202651978,
+      "learning_rate": 0.00013893550078657578,
+      "loss": 0.4572,
+      "step": 2340
+    },
+    {
+      "epoch": 0.30770741607607577,
+      "grad_norm": 1.2226955890655518,
+      "learning_rate": 0.00013867330886208705,
+      "loss": 0.4117,
+      "step": 2350
+    },
+    {
+      "epoch": 0.3090168093359739,
+      "grad_norm": 1.4615281820297241,
+      "learning_rate": 0.00013841111693759834,
+      "loss": 0.4561,
+      "step": 2360
+    },
+    {
+      "epoch": 0.3103262025958721,
+      "grad_norm": 1.401014804840088,
+      "learning_rate": 0.0001381489250131096,
+      "loss": 0.441,
+      "step": 2370
+    },
+    {
+      "epoch": 0.31163559585577033,
+      "grad_norm": 1.4875798225402832,
+      "learning_rate": 0.00013788673308862088,
+      "loss": 0.3991,
+      "step": 2380
+    },
+    {
+      "epoch": 0.31294498911566854,
+      "grad_norm": 1.1867239475250244,
+      "learning_rate": 0.00013762454116413215,
+      "loss": 0.4223,
+      "step": 2390
+    },
+    {
+      "epoch": 0.31425438237556674,
+      "grad_norm": 1.3172953128814697,
+      "learning_rate": 0.00013736234923964342,
+      "loss": 0.4388,
+      "step": 2400
+    },
+    {
+      "epoch": 0.3155637756354649,
+      "grad_norm": 1.4044665098190308,
+      "learning_rate": 0.00013710015731515472,
+      "loss": 0.4102,
+      "step": 2410
+    },
+    {
+      "epoch": 0.3168731688953631,
+      "grad_norm": 1.5709283351898193,
+      "learning_rate": 0.00013683796539066599,
+      "loss": 0.4837,
+      "step": 2420
+    },
+    {
+      "epoch": 0.3181825621552613,
+      "grad_norm": 1.2237786054611206,
+      "learning_rate": 0.00013657577346617725,
+      "loss": 0.4452,
+      "step": 2430
+    },
+    {
+      "epoch": 0.3194919554151595,
+      "grad_norm": 1.8869267702102661,
+      "learning_rate": 0.00013631358154168852,
+      "loss": 0.4077,
+      "step": 2440
+    },
+    {
+      "epoch": 0.3208013486750577,
+      "grad_norm": 1.226117491722107,
+      "learning_rate": 0.0001360513896171998,
+      "loss": 0.4109,
+      "step": 2450
+    },
+    {
+      "epoch": 0.32211074193495587,
+      "grad_norm": 1.6273385286331177,
+      "learning_rate": 0.0001357891976927111,
+      "loss": 0.3596,
+      "step": 2460
+    },
+    {
+      "epoch": 0.3234201351948541,
+      "grad_norm": 1.4535574913024902,
+      "learning_rate": 0.00013552700576822236,
+      "loss": 0.3996,
+      "step": 2470
+    },
+    {
+      "epoch": 0.3247295284547523,
+      "grad_norm": 1.6052360534667969,
+      "learning_rate": 0.00013526481384373363,
+      "loss": 0.4082,
+      "step": 2480
+    },
+    {
+      "epoch": 0.3260389217146505,
+      "grad_norm": 1.9104530811309814,
+      "learning_rate": 0.0001350026219192449,
+      "loss": 0.4089,
+      "step": 2490
+    },
+    {
+      "epoch": 0.3273483149745487,
+      "grad_norm": 1.6006613969802856,
+      "learning_rate": 0.0001347404299947562,
+      "loss": 0.3848,
+      "step": 2500
+    },
+    {
+      "epoch": 0.32865770823444684,
+      "grad_norm": 1.4406352043151855,
+      "learning_rate": 0.00013447823807026746,
+      "loss": 0.3926,
+      "step": 2510
+    },
+    {
+      "epoch": 0.32996710149434505,
+      "grad_norm": 1.3455756902694702,
+      "learning_rate": 0.00013421604614577873,
+      "loss": 0.4203,
+      "step": 2520
+    },
+    {
+      "epoch": 0.33127649475424326,
+      "grad_norm": 1.7718679904937744,
+      "learning_rate": 0.00013395385422129,
+      "loss": 0.3765,
+      "step": 2530
+    },
+    {
+      "epoch": 0.33258588801414146,
+      "grad_norm": 1.410130500793457,
+      "learning_rate": 0.00013369166229680127,
+      "loss": 0.3646,
+      "step": 2540
+    },
+    {
+      "epoch": 0.33389528127403967,
+      "grad_norm": 1.6361408233642578,
+      "learning_rate": 0.00013342947037231254,
+      "loss": 0.3917,
+      "step": 2550
+    },
+    {
+      "epoch": 0.3352046745339378,
+      "grad_norm": 1.7627660036087036,
+      "learning_rate": 0.0001331672784478238,
+      "loss": 0.367,
+      "step": 2560
+    },
+    {
+      "epoch": 0.336514067793836,
+      "grad_norm": 1.2431906461715698,
+      "learning_rate": 0.00013290508652333508,
+      "loss": 0.3708,
+      "step": 2570
+    },
+    {
+      "epoch": 0.33782346105373423,
+      "grad_norm": 1.4763669967651367,
+      "learning_rate": 0.00013264289459884634,
+      "loss": 0.377,
+      "step": 2580
+    },
+    {
+      "epoch": 0.33913285431363244,
+      "grad_norm": 2.1701712608337402,
+      "learning_rate": 0.00013238070267435761,
+      "loss": 0.344,
+      "step": 2590
+    },
+    {
+      "epoch": 0.3404422475735306,
+      "grad_norm": 1.4388126134872437,
+      "learning_rate": 0.0001321185107498689,
+      "loss": 0.3556,
+      "step": 2600
+    },
+    {
+      "epoch": 0.3417516408334288,
+      "grad_norm": 1.2981114387512207,
+      "learning_rate": 0.00013185631882538018,
+      "loss": 0.3272,
+      "step": 2610
+    },
+    {
+      "epoch": 0.343061034093327,
+      "grad_norm": 1.539335012435913,
+      "learning_rate": 0.00013159412690089145,
+      "loss": 0.4132,
+      "step": 2620
+    },
+    {
+      "epoch": 0.3443704273532252,
+      "grad_norm": 1.9272770881652832,
+      "learning_rate": 0.00013133193497640272,
+      "loss": 0.4121,
+      "step": 2630
+    },
+    {
+      "epoch": 0.3456798206131234,
+      "grad_norm": 1.4415314197540283,
+      "learning_rate": 0.000131069743051914,
+      "loss": 0.3595,
+      "step": 2640
+    },
+    {
+      "epoch": 0.34698921387302156,
+      "grad_norm": 1.3155860900878906,
+      "learning_rate": 0.00013080755112742528,
+      "loss": 0.3611,
+      "step": 2650
+    },
+    {
+      "epoch": 0.34829860713291977,
+      "grad_norm": 1.507858157157898,
+      "learning_rate": 0.00013054535920293655,
+      "loss": 0.3813,
+      "step": 2660
+    },
+    {
+      "epoch": 0.349608000392818,
+      "grad_norm": 1.5444693565368652,
+      "learning_rate": 0.00013028316727844782,
+      "loss": 0.3527,
+      "step": 2670
+    },
+    {
+      "epoch": 0.3509173936527162,
+      "grad_norm": 1.4008456468582153,
+      "learning_rate": 0.0001300209753539591,
+      "loss": 0.3573,
+      "step": 2680
+    },
+    {
+      "epoch": 0.3522267869126144,
+      "grad_norm": 1.6443661451339722,
+      "learning_rate": 0.00012975878342947039,
+      "loss": 0.3885,
+      "step": 2690
+    },
+    {
+      "epoch": 0.35353618017251254,
+      "grad_norm": 1.513431429862976,
+      "learning_rate": 0.00012949659150498165,
+      "loss": 0.3332,
+      "step": 2700
+    },
+    {
+      "epoch": 0.35484557343241074,
+      "grad_norm": 1.6663899421691895,
+      "learning_rate": 0.00012923439958049292,
+      "loss": 0.3769,
+      "step": 2710
+    },
+    {
+      "epoch": 0.35615496669230895,
+      "grad_norm": 1.2655925750732422,
+      "learning_rate": 0.0001289722076560042,
+      "loss": 0.4177,
+      "step": 2720
+    },
+    {
+      "epoch": 0.35746435995220716,
+      "grad_norm": 1.324833869934082,
+      "learning_rate": 0.00012871001573151546,
+      "loss": 0.3501,
+      "step": 2730
+    },
+    {
+      "epoch": 0.35877375321210536,
+      "grad_norm": 1.4842655658721924,
+      "learning_rate": 0.00012844782380702676,
+      "loss": 0.3223,
+      "step": 2740
+    },
+    {
+      "epoch": 0.3600831464720035,
+      "grad_norm": 1.4087761640548706,
+      "learning_rate": 0.00012818563188253803,
+      "loss": 0.3308,
+      "step": 2750
+    },
+    {
+      "epoch": 0.3613925397319017,
+      "grad_norm": 1.7493972778320312,
+      "learning_rate": 0.0001279234399580493,
+      "loss": 0.3655,
+      "step": 2760
+    },
+    {
+      "epoch": 0.3627019329917999,
+      "grad_norm": 1.4829336404800415,
+      "learning_rate": 0.00012766124803356056,
+      "loss": 0.3674,
+      "step": 2770
+    },
+    {
+      "epoch": 0.36401132625169813,
+      "grad_norm": 1.39944589138031,
+      "learning_rate": 0.00012739905610907186,
+      "loss": 0.3285,
+      "step": 2780
+    },
+    {
+      "epoch": 0.36532071951159634,
+      "grad_norm": 1.5995631217956543,
+      "learning_rate": 0.00012713686418458313,
+      "loss": 0.3431,
+      "step": 2790
+    },
+    {
+      "epoch": 0.3666301127714945,
+      "grad_norm": 1.0113691091537476,
+      "learning_rate": 0.0001268746722600944,
+      "loss": 0.3389,
+      "step": 2800
+    },
+    {
+      "epoch": 0.3679395060313927,
+      "grad_norm": 1.6544948816299438,
+      "learning_rate": 0.00012661248033560567,
+      "loss": 0.323,
+      "step": 2810
+    },
+    {
+      "epoch": 0.3692488992912909,
+      "grad_norm": 1.8022606372833252,
+      "learning_rate": 0.00012635028841111694,
+      "loss": 0.3777,
+      "step": 2820
+    },
+    {
+      "epoch": 0.3705582925511891,
+      "grad_norm": 1.6005665063858032,
+      "learning_rate": 0.00012608809648662823,
+      "loss": 0.3482,
+      "step": 2830
+    },
+    {
+      "epoch": 0.3718676858110873,
+      "grad_norm": 1.2550064325332642,
+      "learning_rate": 0.0001258259045621395,
+      "loss": 0.3288,
+      "step": 2840
+    },
+    {
+      "epoch": 0.37317707907098546,
+      "grad_norm": 2.43110728263855,
+      "learning_rate": 0.00012556371263765077,
+      "loss": 0.3511,
+      "step": 2850
+    },
+    {
+      "epoch": 0.37448647233088367,
+      "grad_norm": 1.5041906833648682,
+      "learning_rate": 0.00012530152071316204,
+      "loss": 0.3578,
+      "step": 2860
+    },
+    {
+      "epoch": 0.3757958655907819,
+      "grad_norm": 1.6031140089035034,
+      "learning_rate": 0.0001250393287886733,
+      "loss": 0.3213,
+      "step": 2870
+    },
+    {
+      "epoch": 0.3771052588506801,
+      "grad_norm": 1.025795817375183,
+      "learning_rate": 0.0001247771368641846,
+      "loss": 0.3352,
+      "step": 2880
+    },
+    {
+      "epoch": 0.3784146521105783,
+      "grad_norm": 1.934812068939209,
+      "learning_rate": 0.00012451494493969587,
+      "loss": 0.3365,
+      "step": 2890
+    },
+    {
+      "epoch": 0.37972404537047644,
+      "grad_norm": 1.0730398893356323,
+      "learning_rate": 0.00012425275301520714,
+      "loss": 0.3365,
+      "step": 2900
+    },
+    {
+      "epoch": 0.38103343863037464,
+      "grad_norm": 1.3496712446212769,
+      "learning_rate": 0.0001239905610907184,
+      "loss": 0.3548,
+      "step": 2910
+    },
+    {
+      "epoch": 0.38234283189027285,
+      "grad_norm": 1.3053911924362183,
+      "learning_rate": 0.0001237283691662297,
+      "loss": 0.3563,
+      "step": 2920
+    },
+    {
+      "epoch": 0.38365222515017106,
+      "grad_norm": 1.3640882968902588,
+      "learning_rate": 0.00012346617724174098,
+      "loss": 0.365,
+      "step": 2930
+    },
+    {
+      "epoch": 0.38496161841006926,
+      "grad_norm": 1.3266191482543945,
+      "learning_rate": 0.00012320398531725225,
+      "loss": 0.2981,
+      "step": 2940
+    },
+    {
+      "epoch": 0.3862710116699674,
+      "grad_norm": 1.32815682888031,
+      "learning_rate": 0.00012294179339276352,
+      "loss": 0.3544,
+      "step": 2950
+    },
+    {
+      "epoch": 0.3875804049298656,
+      "grad_norm": 1.4236459732055664,
+      "learning_rate": 0.00012267960146827479,
+      "loss": 0.3095,
+      "step": 2960
+    },
+    {
+      "epoch": 0.3888897981897638,
+      "grad_norm": 1.1536756753921509,
+      "learning_rate": 0.00012241740954378605,
+      "loss": 0.3125,
+      "step": 2970
+    },
+    {
+      "epoch": 0.39019919144966203,
+      "grad_norm": 1.4237791299819946,
+      "learning_rate": 0.00012215521761929732,
+      "loss": 0.3207,
+      "step": 2980
+    },
+    {
+      "epoch": 0.3915085847095602,
+      "grad_norm": 1.4023237228393555,
+      "learning_rate": 0.0001218930256948086,
+      "loss": 0.3714,
+      "step": 2990
+    },
+    {
+      "epoch": 0.3928179779694584,
+      "grad_norm": 1.3556010723114014,
+      "learning_rate": 0.00012163083377031987,
+      "loss": 0.3313,
+      "step": 3000
+    },
+    {
+      "epoch": 0.3941273712293566,
+      "grad_norm": 1.2301980257034302,
+      "learning_rate": 0.00012136864184583114,
+      "loss": 0.3062,
+      "step": 3010
+    },
+    {
+      "epoch": 0.3954367644892548,
+      "grad_norm": 1.3532170057296753,
+      "learning_rate": 0.00012110644992134244,
+      "loss": 0.2946,
+      "step": 3020
+    },
+    {
+      "epoch": 0.396746157749153,
+      "grad_norm": 1.2680764198303223,
+      "learning_rate": 0.00012084425799685371,
+      "loss": 0.3005,
+      "step": 3030
+    },
+    {
+      "epoch": 0.39805555100905116,
+      "grad_norm": 1.5346810817718506,
+      "learning_rate": 0.00012058206607236498,
+      "loss": 0.3363,
+      "step": 3040
+    },
+    {
+      "epoch": 0.39936494426894936,
+      "grad_norm": 1.423195242881775,
+      "learning_rate": 0.00012031987414787625,
+      "loss": 0.3294,
+      "step": 3050
+    },
+    {
+      "epoch": 0.40067433752884757,
+      "grad_norm": 1.599571704864502,
+      "learning_rate": 0.00012005768222338753,
+      "loss": 0.3469,
+      "step": 3060
+    },
+    {
+      "epoch": 0.4019837307887458,
+      "grad_norm": 1.2103453874588013,
+      "learning_rate": 0.0001197954902988988,
+      "loss": 0.2827,
+      "step": 3070
+    },
+    {
+      "epoch": 0.403293124048644,
+      "grad_norm": 1.3197276592254639,
+      "learning_rate": 0.00011953329837441007,
+      "loss": 0.3194,
+      "step": 3080
+    },
+    {
+      "epoch": 0.40460251730854213,
+      "grad_norm": 1.291038990020752,
+      "learning_rate": 0.00011927110644992135,
+      "loss": 0.2798,
+      "step": 3090
+    },
+    {
+      "epoch": 0.40591191056844034,
+      "grad_norm": 1.1556978225708008,
+      "learning_rate": 0.00011900891452543262,
+      "loss": 0.3318,
+      "step": 3100
+    },
+    {
+      "epoch": 0.40722130382833854,
+      "grad_norm": 1.3520278930664062,
+      "learning_rate": 0.0001187467226009439,
+      "loss": 0.3222,
+      "step": 3110
+    },
+    {
+      "epoch": 0.40853069708823675,
+      "grad_norm": 1.0671277046203613,
+      "learning_rate": 0.00011848453067645517,
+      "loss": 0.268,
+      "step": 3120
+    },
+    {
+      "epoch": 0.40984009034813496,
+      "grad_norm": 1.442131757736206,
+      "learning_rate": 0.00011822233875196644,
+      "loss": 0.3028,
+      "step": 3130
+    },
+    {
+      "epoch": 0.4111494836080331,
+      "grad_norm": 1.5673497915267944,
+      "learning_rate": 0.00011796014682747771,
+      "loss": 0.31,
+      "step": 3140
+    },
+    {
+      "epoch": 0.4124588768679313,
+      "grad_norm": 1.2009717226028442,
+      "learning_rate": 0.00011769795490298898,
+      "loss": 0.2986,
+      "step": 3150
+    },
+    {
+      "epoch": 0.4137682701278295,
+      "grad_norm": 1.2754930257797241,
+      "learning_rate": 0.00011743576297850027,
+      "loss": 0.3352,
+      "step": 3160
+    },
+    {
+      "epoch": 0.4150776633877277,
+      "grad_norm": 1.6189430952072144,
+      "learning_rate": 0.00011717357105401154,
+      "loss": 0.3804,
+      "step": 3170
+    },
+    {
+      "epoch": 0.41638705664762593,
+      "grad_norm": 1.6117827892303467,
+      "learning_rate": 0.00011691137912952281,
+      "loss": 0.3239,
+      "step": 3180
+    },
+    {
+      "epoch": 0.4176964499075241,
+      "grad_norm": 1.7495907545089722,
+      "learning_rate": 0.00011664918720503408,
+      "loss": 0.3145,
+      "step": 3190
+    },
+    {
+      "epoch": 0.4190058431674223,
+      "grad_norm": 1.2301905155181885,
+      "learning_rate": 0.00011638699528054538,
+      "loss": 0.2776,
+      "step": 3200
+    },
+    {
+      "epoch": 0.4203152364273205,
+      "grad_norm": 1.3571341037750244,
+      "learning_rate": 0.00011612480335605665,
+      "loss": 0.3019,
+      "step": 3210
+    },
+    {
+      "epoch": 0.4216246296872187,
+      "grad_norm": 0.9271483421325684,
+      "learning_rate": 0.00011586261143156792,
+      "loss": 0.2929,
+      "step": 3220
+    },
+    {
+      "epoch": 0.4229340229471169,
+      "grad_norm": 1.294146180152893,
+      "learning_rate": 0.00011560041950707918,
+      "loss": 0.3095,
+      "step": 3230
+    },
+    {
+      "epoch": 0.42424341620701506,
+      "grad_norm": 1.5177209377288818,
+      "learning_rate": 0.00011533822758259045,
+      "loss": 0.2714,
+      "step": 3240
+    },
+    {
+      "epoch": 0.42555280946691326,
+      "grad_norm": 1.1218962669372559,
+      "learning_rate": 0.00011507603565810175,
+      "loss": 0.282,
+      "step": 3250
+    },
+    {
+      "epoch": 0.42686220272681147,
+      "grad_norm": 1.2807728052139282,
+      "learning_rate": 0.00011481384373361302,
+      "loss": 0.3461,
+      "step": 3260
+    },
+    {
+      "epoch": 0.4281715959867097,
+      "grad_norm": 1.1680692434310913,
+      "learning_rate": 0.00011455165180912429,
+      "loss": 0.2842,
+      "step": 3270
+    },
+    {
+      "epoch": 0.4294809892466079,
+      "grad_norm": 1.6534638404846191,
+      "learning_rate": 0.00011428945988463556,
+      "loss": 0.2774,
+      "step": 3280
+    },
+    {
+      "epoch": 0.43079038250650603,
+      "grad_norm": 1.2321938276290894,
+      "learning_rate": 0.00011402726796014683,
+      "loss": 0.2841,
+      "step": 3290
+    },
+    {
+      "epoch": 0.43209977576640424,
+      "grad_norm": 1.6666522026062012,
+      "learning_rate": 0.00011376507603565811,
+      "loss": 0.2993,
+      "step": 3300
+    },
+    {
+      "epoch": 0.43340916902630244,
+      "grad_norm": 1.8330938816070557,
+      "learning_rate": 0.00011350288411116938,
+      "loss": 0.2834,
+      "step": 3310
+    },
+    {
+      "epoch": 0.43471856228620065,
+      "grad_norm": 1.570809245109558,
+      "learning_rate": 0.00011324069218668065,
+      "loss": 0.2885,
+      "step": 3320
+    },
+    {
+      "epoch": 0.4360279555460988,
+      "grad_norm": 1.4093183279037476,
+      "learning_rate": 0.00011297850026219192,
+      "loss": 0.2872,
+      "step": 3330
+    },
+    {
+      "epoch": 0.437337348805997,
+      "grad_norm": 0.8298211097717285,
+      "learning_rate": 0.00011271630833770321,
+      "loss": 0.2884,
+      "step": 3340
+    },
+    {
+      "epoch": 0.4386467420658952,
+      "grad_norm": 1.1143261194229126,
+      "learning_rate": 0.00011245411641321448,
+      "loss": 0.279,
+      "step": 3350
+    },
+    {
+      "epoch": 0.4399561353257934,
+      "grad_norm": 1.1568537950515747,
+      "learning_rate": 0.00011219192448872575,
+      "loss": 0.2724,
+      "step": 3360
+    },
+    {
+      "epoch": 0.4412655285856916,
+      "grad_norm": 0.8700618147850037,
+      "learning_rate": 0.00011192973256423702,
+      "loss": 0.2563,
+      "step": 3370
+    },
+    {
+      "epoch": 0.4425749218455898,
+      "grad_norm": 0.974319577217102,
+      "learning_rate": 0.00011166754063974829,
+      "loss": 0.2864,
+      "step": 3380
+    },
+    {
+      "epoch": 0.443884315105488,
+      "grad_norm": 0.9288910031318665,
+      "learning_rate": 0.00011140534871525958,
+      "loss": 0.2717,
+      "step": 3390
+    },
+    {
+      "epoch": 0.4451937083653862,
+      "grad_norm": 1.0942648649215698,
+      "learning_rate": 0.00011114315679077085,
+      "loss": 0.2625,
+      "step": 3400
+    },
+    {
+      "epoch": 0.4465031016252844,
+      "grad_norm": 1.3224159479141235,
+      "learning_rate": 0.00011088096486628212,
+      "loss": 0.2719,
+      "step": 3410
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 7638,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.8131850187780976e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28dc0b9cd89143400df8f8fafb91099efdbf577030cb3ebd021914598e90413e
+size 6097

unsloth/roleplay-zh-sharegpt-gpt4-data.py ADDED Viewed

	@@ -0,0 +1,149 @@

+# 最终完整版代码 - 已根据RTX 3070 (8GB)优化
+import torch
+from unsloth import FastLanguageModel
+from datasets import load_dataset
+from trl import SFTTrainer
+from transformers import TrainingArguments, pipeline
+import os
+# --- 本地路径配置 (无需更改) ---
+local_model_path = "./gemma-3-4b-it-qat-unsloth-bnb-4bit"
+local_data_dir = "./roleplay-zh-sharegpt-gpt4-data"
+local_data_file = os.path.join(local_data_dir, "sharegpt_formatted_data-evol-gpt35.jsonl")
+# --- 配置结束 ---
+# 1. 加载模型和分词器 (无需更改)
+max_seq_length = 2048
+dtype = None
+load_in_4bit = True
+print(f"✅ 步骤 1/5: 正在从本地路径 '{local_model_path}' 加载模型和分词器...")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=local_model_path,
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit,
+)
+# 2. 配置 LoRA (无需更改)
+print("✅ 步骤 2/5: 正在配置 LoRA 适配器...")
+model = FastLanguageModel.get_peft_model(
+    model,
+    r=16,
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
+    lora_alpha=16,
+    lora_dropout=0,
+    bias="none",
+    use_gradient_checkpointing=True,
+    random_state=3407,
+)
+# 3. 加载和准备数据集 (无需更改)
+alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+{}"""
+EOS_TOKEN = tokenizer.eos_token
+def formatting_prompts_func(examples):
+    all_texts = []
+    for i in range(len(examples['system_prompt'])):
+        system_prompt = examples['system_prompt'][i]
+        conversations = examples['conversations'][i]
+        for j in range(0, len(conversations), 2):
+            if j + 1 < len(conversations):
+                human_turn = conversations[j]
+                gpt_turn = conversations[j+1]
+                if human_turn['from'] == 'human' and gpt_turn['from'] == 'gpt':
+                    instruction = system_prompt
+                    input_text = human_turn['value']
+                    output_text = gpt_turn['value']
+                    text = alpaca_prompt.format(instruction, input_text, output_text) + EOS_TOKEN
+                    all_texts.append(text)
+    return {"text": all_texts}
+print(f"✅ 步骤 3/5: 正在从本地文件 '{local_data_file}' 加载数据集...")
+dataset = load_dataset("json", data_files=local_data_file, split="train")
+dataset = dataset.map(
+    formatting_prompts_func,
+    batched=True,
+    remove_columns=dataset.column_names
+)
+print(f"🎉 数据集处理完成！总共生成了 {len(dataset)} 条训练样本。")
+# 4. 配置训练参数并开始训练
+print("\n✅ 步骤 4/5: 开始模型微调...")
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    train_dataset=dataset,
+    dataset_text_field="text",
+    max_seq_length=max_seq_length,
+    dataset_num_proc=2,
+    packing=False,
+    args=TrainingArguments(
+        # --- 以下是根据您的硬件进行的优化 ---
+        per_device_train_batch_size = 1,      # <--- 更改: 从 2 改为 1，降低显存峰值
+        gradient_accumulation_steps = 8,      # <--- 更改: 从 4 改为 8，保持有效批量大小不变
+        # ------------------------------------
+        warmup_steps=10,
+        num_train_epochs=1,
+        learning_rate=2e-4,
+        # 您的环境支持 bf16，这将自动启用
+        fp16=not torch.cuda.is_bf16_supported(),
+        bf16=torch.cuda.is_bf16_supported(),
+        logging_steps=10,
+        optim="adamw_8bit",
+        weight_decay=0.01,
+        lr_scheduler_type="linear",
+        seed=3407,
+        output_dir="outputs",
+        # --- 检查点配置优化 ---
+        save_strategy="steps",
+        save_steps=10,             # <--- 更改: 改为更合理的保存频率
+        save_total_limit=3,
+        # ----------------------
+    ),
+)
+trainer.train(resume_from_checkpoint = True)
+# 5. 保存并测试 (无需更改)
+print("\n✅ 步骤 5/5: 微调完成，开始推理测试...")
+final_model_path = "gemma3_roleplay_lora_local"
+model.save_pretrained(final_model_path)
+tokenizer.save_pretrained(final_model_path)
+print(f"🎉 最终模型已保存到 '{final_model_path}' 文件夹。")
+# --- 推理 ---
+system_prompt_virene = """角色名称：薇莲（Virene）
+开场语：「真相，始终都存在于迷雾之中。」
+身份背景：薇莲是一名神秘的赏金猎人，常常被人雇佣去完成各种危险任务，从而掩盖她本身的身份和目的。据传，薇莲早年曾在某个神秘组织中学习过各种神秘技能，所以她的能力非常高超。
+性格特征：薇莲总是保持着冷静、沉着的态度，不论面对何种情况都能保持冷静。同时，她总是带有一定的神秘色彩，让人无法洞察她真正的想法和动机。她对任务非常认���，但很少会谈及自己的生活和过去，因此让人对她的身份感到好奇。
+语言风格：薇莲的语言简洁有力，通常只说必要的话语来传达她的意思。她的语气总是带有一丝威慑力，让人不敢轻易挑战她。
+行为特征：薇莲行动迅速而准确，总是在保持低调的同时完成任务。她具备很强的隐蔽能力，在执行任务的时候几乎不留痕迹，让人难以发现她的存在。不过，她也有时候会让人感到无法理解，经常出现在决定性瞬间，让人觉得她真正的动机仍旧是个谜。"""
+user_input = "我需要一个赏金猎人完成一个任务，听说您非常厉害。我们可以谈一下合作吗？"
+prompt = alpaca_prompt.format(
+    system_prompt_virene,
+    user_input,
+    "",
+)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id)
+print("\n===== 推理结果 =====")
+print(outputs[0]['generated_text'])

unsloth/test_roleplayer_lora.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# =================================================================
+#  LoRA模型推理测试脚本 (首演)
+# =================================================================
+import torch
+from unsloth import FastLanguageModel
+from transformers import pipeline
+import os
+# --- 1. 配置路径 ---
+# 您的Gemma基础模型的本地路径
+base_model_path = "./gemma-3-4b-it-qat-unsloth-bnb-4bit"
+# 您刚刚提取并重命名的LoRA模型的路径
+# 请确保这个文件夹名和您自己保存的完全一致！
+lora_model_path = "./lora_model_roleplayer_actor"
+print("✅ 步骤 1/4: 路径配置完成。")
+print(f"   - 基础模型路径: {base_model_path}")
+print(f"   - LoRA模型路径: {lora_model_path}")
+# --- 2. 加载模型与注入灵魂 ---
+print("\n✅ 步骤 2/4: 正在加载基础模型...")
+# 以4-bit精度加载基础模型，确保与训练时一致
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = base_model_path,
+    load_in_4bit = True,
+)
+print("   - 基础模型加载成功。")
+# 这是最关键的一步：将LoRA的“灵魂”注入到基础模型中！
+# FastLanguageModel会自动处理所有复杂的合并工作
+print(f"   - 正在从 '{lora_model_path}' 加载并注入LoRA适配器...")
+model.load_adapter(lora_model_path)
+print("   - LoRA灵魂注入成功！模型已准备就绪。")
+# --- 3. 准备“剧本” (Prompt) ---
+print("\n✅ 步骤 3/4: 正在准备测试剧本...")
+# 我们使用和训练时完全相同的 Alpaca 格式
+# 这能确保模型能最好地理解我们的指令
+alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+{}"""
+# --- 定义我们的角色和对话 ---
+# 角色1: 薇莲 (一个我们训练过的角色)
+instruction_virene = """角色名称：薇莲（Virene）
+开场语：「真相，始终都存在于迷雾之中。」
+身份背景：薇莲是一名神秘的赏金猎人，常常被人雇佣去完成各种危险任务，从而掩盖她本身的身份和目的。据传，薇莲早年曾在某个神秘组织中学习过各种神秘技能，所以她的能力非常高超。
+性格特征：薇莲总是保持着冷静、沉着的态度，不论面对何种情况都能保持冷静。同时，她总是带有一定的神秘色彩，让人无法洞察她真正的想法和动机。她对任务非常认真，但很少会谈及自己的生活和过去，因此让人对她的身份感到好奇。
+语言风格：薇莲的语言简洁有力，通常只说必要的话语来传达她的意思。她的语气总是带有一丝威慑力，让人不敢轻易挑战她。
+行为特征：薇莲行动迅速而准确，总是在保持低调的同时完成任务。她具备很强的隐蔽能力，在执行任务的时候几乎不留痕迹，让人难以发现她的存在。不过，她也有时候会让人感到无法理解，经常出现在决定性瞬间，让人觉得她真正的动机仍旧是个谜。"""
+input_virene = "我需要一个赏金猎人完成一个任务，听说您非常厉害。我们可以谈一下合作吗？"
+# 角色2: （您可以自己创造一个新的角色来进行测试！）
+instruction_new_role = """你现在是一个脾气火爆、说话直来直去，但内心充满正义感的退休老兵，名叫“老炮儿”。
+你的口头禅是“嘿，我说你小子...”。
+你的语言风格充满京味儿，简洁有力。"""
+input_new_role = "大爷，问个路，这附近哪有吃饭的地方啊？"
+# --- 选择一个角色进行测试 ---
+# 您可以切换 instruction 和 input 来测试不同的角色
+instruction_to_test = instruction_new_role
+input_to_test = input_new_role
+# 将“剧本”格式化
+prompt = alpaca_prompt.format(
+    instruction_to_test,
+    input_to_test,
+    "",  # Response部分留空，等待模型生成
+)
+print("   - 剧本已生成，准备开始推理！")
+# --- 4. 开始推理 (见证奇迹的时刻) ---
+print("\n✅ 步骤 4/4: 首演开始！正在生成对话...")
+# 使用transformers的pipeline工具，这是最简单的推理方式
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer
+)
+# 设置生成参数
+generation_args = {
+    "max_new_tokens": 256,   # 最多生成多少个新词
+    "do_sample": True,       # 开启采样，让回答更多样
+    "temperature": 0.7,      # 温度，越低回答越稳定，越高越有创造力
+    "top_p": 0.9,            # Top-p采样，控制多样性
+    "top_k": 50,             # Top-k采样，控制多样性
+    "pad_token_id": tokenizer.eos_token_id # 明确告知结束符
+}
+# 运行pipeline！
+outputs = pipe(prompt, **generation_args)
+# --- 打印结果 ---
+print("\n\n==================== 🌟 演出结束 🌟 ====================")
+print("完整的生成文本：\n")
+print(outputs[0]['generated_text'])
+print("\n========================================================")
+# 只提取模型生成的部分，更清晰
+response_part = outputs[0]['generated_text'].split("### Response:")[1].strip()
+print("\n只看模型的回答部分：\n")
+print(response_part)
+print("\n========================================================")