File size: 21,063 Bytes
f1f0f7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f841af43-faf7-4a7b-ad55-0da226f3220f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\user\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from datasets import load_dataset\n",
"ds = load_dataset('merve/vqav2-small')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b47b7e33-b5eb-46ec-9e43-ed118c09b290",
"metadata": {},
"outputs": [],
"source": [
"ds = ds['validation']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "877df06d-4384-4442-a8d7-7002706b7afe",
"metadata": {},
"outputs": [],
"source": [
"split_ds = ds.train_test_split(test_size=0.05) # we'll use a very small split for demo\n",
"train_ds = split_ds[\"test\"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "870b515b-d3f5-4638-adbf-70fa39ee2ac5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['multiple_choice_answer', 'question', 'image'],\n",
" num_rows: 1072\n",
"})"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_ds"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "50e42737-ff75-4c90-bdf4-012b45678292",
"metadata": {},
"outputs": [],
"source": [
"from transformers import PaliGemmaProcessor\n",
"model_id = r\"D:\\PaliGemma\\paligemma-3b-pt-224\"\n",
"processor = PaliGemmaProcessor.from_pretrained(model_id)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "83f6c8f7-1960-4ae9-93cc-0a2d25d0d5f4",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"device = \"cuda\"\n",
"\n",
"image_token = processor.tokenizer.convert_tokens_to_ids(\"<image>\")\n",
"def collate_fn(examples):\n",
" texts = [\"answer \" + example[\"question\"] for example in examples]\n",
" labels= [example['multiple_choice_answer'] for example in examples]\n",
" images = [example[\"image\"].convert(\"RGB\") for example in examples]\n",
" tokens = processor(text=texts, images=images, suffix=labels,\n",
" return_tensors=\"pt\", padding=\"longest\",\n",
" #tokenize_newline_separately=False\n",
" )\n",
"\n",
" tokens = tokens.to(torch.bfloat16).to(device)\n",
" return tokens\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "3fb8260e-c333-4948-8051-c85964409660",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading checkpoint shards: 100%|██████████| 3/3 [00:12<00:00, 4.05s/it]\n"
]
}
],
"source": [
"from transformers import PaliGemmaForConditionalGeneration\n",
"import torch\n",
"\n",
"model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)\n",
"\n",
"for param in model.vision_tower.parameters():\n",
" param.requires_grad = False\n",
"\n",
"for param in model.multi_modal_projector.parameters():\n",
" param.requires_grad = False"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7ae939ff-98e7-47f8-af29-8fd1ee8f237c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unused kwargs: ['bnb_4bit_compute_type']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.\n",
"Loading checkpoint shards: 100%|██████████| 3/3 [00:22<00:00, 7.45s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"trainable params: 11,298,816 || all params: 2,934,765,296 || trainable%: 0.3850\n"
]
}
],
"source": [
"from transformers import BitsAndBytesConfig\n",
"from peft import get_peft_model, LoraConfig\n",
"\n",
"bnb_config = BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_quant_type=\"nf4\",\n",
" bnb_4bit_compute_type=torch.bfloat16\n",
")\n",
"\n",
"lora_config = LoraConfig(\n",
" r=8,\n",
" target_modules=[\"q_proj\", \"o_proj\", \"k_proj\", \"v_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"],\n",
" task_type=\"CAUSAL_LM\",\n",
")\n",
"model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={\"\":0})\n",
"model = get_peft_model(model, lora_config)\n",
"model.print_trainable_parameters()\n",
"#trainable params: 11,298,816 || all params: 2,934,634,224 || trainable%: 0.38501616002417344"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "7fe77639-44ab-4747-8ced-343eb06e0efd",
"metadata": {},
"outputs": [],
"source": [
"import accelerate"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "98b996db-e9c5-42bf-b979-fd79a28f7e5e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.26.0\n"
]
}
],
"source": [
"print(accelerate.__version__)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "9a6546c4-90b3-4f4d-8de0-1e020883a702",
"metadata": {},
"outputs": [],
"source": [
"from transformers import TrainingArguments\n",
"args=TrainingArguments(\n",
" num_train_epochs=2,\n",
" remove_unused_columns=False,\n",
" per_device_train_batch_size=4,\n",
" gradient_accumulation_steps=4,\n",
" warmup_steps=2,\n",
" learning_rate=2e-5,\n",
" weight_decay=1e-6,\n",
" adam_beta2=0.999,\n",
" logging_steps=100,\n",
" optim=\"adamw_torch\",\n",
" save_strategy=\"steps\",\n",
" save_steps=1000,\n",
" # push_to_hub=True,\n",
" save_total_limit=1,\n",
" output_dir=\"paligemma_vqav2\",\n",
" bf16=True,\n",
" report_to=[\"tensorboard\"],\n",
" dataloader_pin_memory=False\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df25a8dc-8ab9-467a-b6ce-dee13addb776",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 20,
"id": "9a8de871-e869-4daf-a250-0aec6437f076",
"metadata": {},
"outputs": [],
"source": [
"from transformers import Trainer\n",
"\n",
"trainer = Trainer(\n",
" model=model,\n",
" train_dataset=train_ds ,\n",
" data_collator=collate_fn,\n",
" args=args\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "77d743e5-6b5b-40a0-a2f4-7591f2c8df50",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text and `<bos>` token after that. For this call, we will infer how many images each text has and add special tokens.\n",
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text and `<bos>` token after that. For this call, we will infer how many images each text has and add special tokens.\n",
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text and `<bos>` token after that. For this call, we will infer how many images each text has and add special tokens.\n",
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text and `<bos>` token after that. For this call, we will infer how many images each text has and add special tokens.\n",
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text and `<bos>` token after that. For this call, we will infer how many images each text has and add special tokens.\n",
"C:\\Users\\user\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\transformers\\models\\siglip\\modeling_siglip.py:574: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
" attn_output = torch.nn.functional.scaled_dot_product_attention(\n",
"C:\\Users\\user\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\bitsandbytes\\nn\\modules.py:452: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.\n",
" warnings.warn(\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[21], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\transformers\\trainer.py:2123\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m 2121\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[0;32m 2122\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2123\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2124\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2125\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2126\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2127\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2128\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\transformers\\trainer.py:2481\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m 2475\u001b[0m context \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 2476\u001b[0m functools\u001b[38;5;241m.\u001b[39mpartial(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mno_sync, model\u001b[38;5;241m=\u001b[39mmodel)\n\u001b[0;32m 2477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(batch_samples) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 2478\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m contextlib\u001b[38;5;241m.\u001b[39mnullcontext\n\u001b[0;32m 2479\u001b[0m )\n\u001b[0;32m 2480\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m context():\n\u001b[1;32m-> 2481\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_items_in_batch\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2483\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 2484\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m 2485\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[0;32m 2486\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[0;32m 2487\u001b[0m ):\n\u001b[0;32m 2488\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m 2489\u001b[0m tr_loss \u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m+\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\transformers\\trainer.py:3612\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[1;34m(***failed resolving arguments***)\u001b[0m\n\u001b[0;32m 3610\u001b[0m scaled_loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m 3611\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 3612\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mbackward(loss, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 3613\u001b[0m \u001b[38;5;66;03m# Finally we need to normalize the loss for reporting\u001b[39;00m\n\u001b[0;32m 3614\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_items_in_batch \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\accelerate\\accelerator.py:1964\u001b[0m, in \u001b[0;36mAccelerator.backward\u001b[1;34m(self, loss, **kwargs)\u001b[0m\n\u001b[0;32m 1962\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscaler\u001b[38;5;241m.\u001b[39mscale(loss)\u001b[38;5;241m.\u001b[39mbackward(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 1963\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1964\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\torch\\_tensor.py:521\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m 511\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m 512\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m 513\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[0;32m 514\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 519\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[0;32m 520\u001b[0m )\n\u001b[1;32m--> 521\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 522\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[0;32m 523\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\torch\\autograd\\__init__.py:289\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m 284\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[0;32m 286\u001b[0m \u001b[38;5;66;03m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[0;32m 287\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[0;32m 288\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[1;32m--> 289\u001b[0m \u001b[43m_engine_run_backward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 290\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 292\u001b[0m \u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 293\u001b[0m \u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 297\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32m~\\anaconda3\\envs\\Ultralytics\\lib\\site-packages\\torch\\autograd\\graph.py:768\u001b[0m, in \u001b[0;36m_engine_run_backward\u001b[1;34m(t_outputs, *args, **kwargs)\u001b[0m\n\u001b[0;32m 766\u001b[0m unregister_hooks \u001b[38;5;241m=\u001b[39m _register_logging_hooks_on_whole_graph(t_outputs)\n\u001b[0;32m 767\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 768\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Variable\u001b[38;5;241m.\u001b[39m_execution_engine\u001b[38;5;241m.\u001b[39mrun_backward( \u001b[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[0;32m 769\u001b[0m t_outputs, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 770\u001b[0m ) \u001b[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[0;32m 771\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 772\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attach_logging_hooks:\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "422d8f32-ecd9-4266-b5a4-bd26d45c4fc7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "365cf997-a80e-407e-9848-74e4d4b6a8a8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|