merve
/

smol-vision

Image-Text-to-Text

Transformers

notebook

Model card Files Files and versions Community

merve HF Staff commited on 10 days ago

Commit

8b9c168

verified ·

1 Parent(s): fe8c001

DINOv3

Browse files

Files changed (1) hide show

DINOv3_FT.ipynb +1774 -0

DINOv3_FT.ipynb ADDED Viewed

	@@ -0,0 +1,1774 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "machine_shape": "hm",
+      "gpuType": "L4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "32138245d41348928cc5b5834b07cb7e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_df6de04fdb204d348767dd0b2d0e88f7",
+              "IPY_MODEL_63a3800d62dd41d6b4a3f643a8930d95",
+              "IPY_MODEL_49d67bd205184874a5cee04d318d91fe"
+            ],
+            "layout": "IPY_MODEL_f00ace964f96471b9eb839cce48ce378"
+          }
+        },
+        "df6de04fdb204d348767dd0b2d0e88f7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_3ad0ac8def244930a3aff41d68a88a65",
+            "placeholder": "",
+            "style": "IPY_MODEL_7464841c193d492685bb929b1c0d230c",
+            "value": "preprocessor_config.json: 100%"
+          }
+        },
+        "63a3800d62dd41d6b4a3f643a8930d95": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_5c16553a2ff34a37a2cb62b4a4c42a6f",
+            "max": 585,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_34be83ddb4bf43e58cadbcbac5a606b7",
+            "value": 585
+          }
+        },
+        "49d67bd205184874a5cee04d318d91fe": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_0ce7bd7e52074f29b446ef2d4dd0921a",
+            "placeholder": "",
+            "style": "IPY_MODEL_7e2178d696c04d5787e736ace9ab57c0",
+            "value": " 585/585 [00:00&lt;00:00, 65.6kB/s]"
+          }
+        },
+        "f00ace964f96471b9eb839cce48ce378": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3ad0ac8def244930a3aff41d68a88a65": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7464841c193d492685bb929b1c0d230c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "5c16553a2ff34a37a2cb62b4a4c42a6f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "34be83ddb4bf43e58cadbcbac5a606b7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "0ce7bd7e52074f29b446ef2d4dd0921a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "7e2178d696c04d5787e736ace9ab57c0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3ff80bc2f64948408757caa8715d0603": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_12aa8675bca54f05a6deb7ec7a5def7a",
+              "IPY_MODEL_31a74feac76f4744a0f34fbc99433831",
+              "IPY_MODEL_bd51d97e739a4e78ad28083043f638d8"
+            ],
+            "layout": "IPY_MODEL_062d36b5d0c043a597eb9b3ebd35f313"
+          }
+        },
+        "12aa8675bca54f05a6deb7ec7a5def7a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2c2223a6ae3e4ff6be96a5f4e2d2d9b6",
+            "placeholder": "",
+            "style": "IPY_MODEL_f2c7be27f90b49a3abe51b5e3003c17d",
+            "value": "config.json: 100%"
+          }
+        },
+        "31a74feac76f4744a0f34fbc99433831": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_76d1f15c857640c3b06d98aef478f234",
+            "max": 744,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_d43089f8240c44339c6881355ff0aee3",
+            "value": 744
+          }
+        },
+        "bd51d97e739a4e78ad28083043f638d8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a139b85557a942b9b5d32b9d7def3e50",
+            "placeholder": "",
+            "style": "IPY_MODEL_92043bfce97e4629bf9e4b268aa88c11",
+            "value": " 744/744 [00:00&lt;00:00, 93.7kB/s]"
+          }
+        },
+        "062d36b5d0c043a597eb9b3ebd35f313": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2c2223a6ae3e4ff6be96a5f4e2d2d9b6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f2c7be27f90b49a3abe51b5e3003c17d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "76d1f15c857640c3b06d98aef478f234": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d43089f8240c44339c6881355ff0aee3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "a139b85557a942b9b5d32b9d7def3e50": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "92043bfce97e4629bf9e4b268aa88c11": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "f20b3989658642528f4ed91666320097": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_3ee9921a635d44ec9b248e2155b5b243",
+              "IPY_MODEL_caf0790dbf2544378cb04aa8eb3098c3",
+              "IPY_MODEL_3ff0fc5ce62a44b9950dd8575d90bd21"
+            ],
+            "layout": "IPY_MODEL_77cdafc6dae44107a43a46ae19ed390a"
+          }
+        },
+        "3ee9921a635d44ec9b248e2155b5b243": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_65d8b73e3bdd46fca8a42b67739e27f9",
+            "placeholder": "",
+            "style": "IPY_MODEL_b566321171044b0eb02ea3bd8c0472df",
+            "value": "model.safetensors: 100%"
+          }
+        },
+        "caf0790dbf2544378cb04aa8eb3098c3": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_62535e046f794a28b4002c3f34fe7ff7",
+            "max": 3362432800,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_663aa65fdb4e4349b2815b6bafce4dcd",
+            "value": 3362432800
+          }
+        },
+        "3ff0fc5ce62a44b9950dd8575d90bd21": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_8410c9d15bca4c9f8b3aab2b7d327211",
+            "placeholder": "",
+            "style": "IPY_MODEL_fb359d0651a74fe790aaace9a5d0e329",
+            "value": " 3.36G/3.36G [00:18&lt;00:00, 296MB/s]"
+          }
+        },
+        "77cdafc6dae44107a43a46ae19ed390a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "65d8b73e3bdd46fca8a42b67739e27f9": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b566321171044b0eb02ea3bd8c0472df": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "62535e046f794a28b4002c3f34fe7ff7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "663aa65fdb4e4349b2815b6bafce4dcd": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "8410c9d15bca4c9f8b3aab2b7d327211": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "fb359d0651a74fe790aaace9a5d0e329": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## DINOv3 Fine-tuning for Image Classification"
+      ],
+      "metadata": {
+        "id": "BCTUDjwiYn6T"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install -q trackio git+https://github.com/huggingface/transformers.git"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Aa1zPoxo_JBf",
+        "outputId": "958700f5-189c-42ec-dfc5-9852e5efe368"
+      },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m838.5/838.5 kB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Dataset"
+      ],
+      "metadata": {
+        "id": "5AJ3YVCE8S9Y"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We will do a very small run on food101 dataset."
+      ],
+      "metadata": {
+        "id": "s_Aabbb6VBZt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from datasets import load_dataset\n",
+        "\n",
+        "ds = load_dataset(\"ethz/food101\")\n",
+        "\n",
+        "train_ds = ds[\"train\"]\n",
+        "train_ds = train_ds.shuffle().train_test_split(test_size=0.9)[\"train\"]\n",
+        "val_ds = ds[\"validation\"].shuffle().train_test_split(test_size=0.9)[\"train\"]"
+      ],
+      "metadata": {
+        "id": "Cxzbngbq4K31"
+      },
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_ds"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "g1wl86sp8L6C",
+        "outputId": "1b42f43f-df62-4eba-f469-54cabd232cf9"
+      },
+      "execution_count": 8,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Dataset({\n",
+              "    features: ['image', 'label'],\n",
+              "    num_rows: 7575\n",
+              "})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 8
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "val_ds"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Tq5OiKxvVj9k",
+        "outputId": "391489ba-d95f-498a-b4bb-f959e19686b0"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Dataset({\n",
+              "    features: ['image', 'label'],\n",
+              "    num_rows: 2525\n",
+              "})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "num_classes = train_ds.features[\"label\"].num_classes\n",
+        "id2label = {i: name for i, name in enumerate(train_ds.features[\"label\"].names)}\n",
+        "label2id = {v: k for k, v in id2label.items()}\n",
+        "print(f\"Classes: {num_classes}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1JcvDPFK8Scd",
+        "outputId": "5c920e23-e96b-4c62-bf3a-7db183c97f48"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Classes: 101\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Load Model\n",
+        "\n",
+        "This model doesn't come with a head, so we need to write the headed model class."
+      ],
+      "metadata": {
+        "id": "_69A3AmO81c8"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch.nn as nn\n",
+        "import torch\n",
+        "from transformers import AutoImageProcessor, AutoModel, get_cosine_schedule_with_warmup\n",
+        "\n",
+        "MODEL_NAME = \"facebook/dinov3-vith16plus-pretrain-lvd1689m\"\n",
+        "\n",
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "\n",
+        "\n",
+        "image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)\n",
+        "backbone = AutoModel.from_pretrained(MODEL_NAME)\n",
+        "\n",
+        "hidden_size = getattr(backbone.config, \"hidden_size\", None)\n",
+        "\n",
+        "class DinoV3Linear(nn.Module):\n",
+        "    def __init__(self, backbone: AutoModel, hidden_size: int, num_classes: int, freeze_backbone: bool = True):\n",
+        "        super().__init__()\n",
+        "        self.backbone = backbone\n",
+        "        if freeze_backbone:\n",
+        "            for p in self.backbone.parameters():\n",
+        "                p.requires_grad = False\n",
+        "            self.backbone.eval()\n",
+        "\n",
+        "        self.head = nn.Linear(hidden_size, num_classes)\n",
+        "\n",
+        "    def forward(self, pixel_values):\n",
+        "        outputs = self.backbone(pixel_values=pixel_values)\n",
+        "        last_hidden = outputs.last_hidden_state\n",
+        "        cls = last_hidden[:, 0]\n",
+        "        logits = self.head(cls)\n",
+        "        return logits\n",
+        "\n",
+        "model = DinoV3Linear(backbone, hidden_size, num_classes, freeze_backbone=True).to(device) # we only train the head"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 113,
+          "referenced_widgets": [
+            "32138245d41348928cc5b5834b07cb7e",
+            "df6de04fdb204d348767dd0b2d0e88f7",
+            "63a3800d62dd41d6b4a3f643a8930d95",
+            "49d67bd205184874a5cee04d318d91fe",
+            "f00ace964f96471b9eb839cce48ce378",
+            "3ad0ac8def244930a3aff41d68a88a65",
+            "7464841c193d492685bb929b1c0d230c",
+            "5c16553a2ff34a37a2cb62b4a4c42a6f",
+            "34be83ddb4bf43e58cadbcbac5a606b7",
+            "0ce7bd7e52074f29b446ef2d4dd0921a",
+            "7e2178d696c04d5787e736ace9ab57c0",
+            "3ff80bc2f64948408757caa8715d0603",
+            "12aa8675bca54f05a6deb7ec7a5def7a",
+            "31a74feac76f4744a0f34fbc99433831",
+            "bd51d97e739a4e78ad28083043f638d8",
+            "062d36b5d0c043a597eb9b3ebd35f313",
+            "2c2223a6ae3e4ff6be96a5f4e2d2d9b6",
+            "f2c7be27f90b49a3abe51b5e3003c17d",
+            "76d1f15c857640c3b06d98aef478f234",
+            "d43089f8240c44339c6881355ff0aee3",
+            "a139b85557a942b9b5d32b9d7def3e50",
+            "92043bfce97e4629bf9e4b268aa88c11",
+            "f20b3989658642528f4ed91666320097",
+            "3ee9921a635d44ec9b248e2155b5b243",
+            "caf0790dbf2544378cb04aa8eb3098c3",
+            "3ff0fc5ce62a44b9950dd8575d90bd21",
+            "77cdafc6dae44107a43a46ae19ed390a",
+            "65d8b73e3bdd46fca8a42b67739e27f9",
+            "b566321171044b0eb02ea3bd8c0472df",
+            "62535e046f794a28b4002c3f34fe7ff7",
+            "663aa65fdb4e4349b2815b6bafce4dcd",
+            "8410c9d15bca4c9f8b3aab2b7d327211",
+            "fb359d0651a74fe790aaace9a5d0e329"
+          ]
+        },
+        "id": "_oqXAu_y81H4",
+        "outputId": "7c4a4f6f-2301-4a43-eecb-50f1adb004b9"
+      },
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "preprocessor_config.json:   0%|          | 0.00/585 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "32138245d41348928cc5b5834b07cb7e"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "config.json:   0%|          | 0.00/744 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "3ff80bc2f64948408757caa8715d0603"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/3.36G [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "f20b3989658642528f4ed91666320097"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Write the data collator to batch inputs and dataloaders for training."
+      ],
+      "metadata": {
+        "id": "IfC3TFbw9SlZ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from dataclasses import dataclass\n",
+        "from PIL import Image\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "from transformers import AutoImageProcessor\n",
+        "\n",
+        "@dataclass\n",
+        "class Collator:\n",
+        "    processor: AutoImageProcessor\n",
+        "\n",
+        "    def __call__(self, batch):\n",
+        "        raw_images = [x[\"image\"] for x in batch]\n",
+        "        labels = torch.tensor([x[\"label\"] for x in batch], dtype=torch.long)\n",
+        "\n",
+        "        rgb_images = []\n",
+        "        # there's grayscale images in the dataset\n",
+        "        for im in raw_images:\n",
+        "            if isinstance(im, Image.Image):\n",
+        "                rgb_images.append(im.convert(\"RGB\"))\n",
+        "                continue\n",
+        "\n",
+        "        inputs = self.processor(images=rgb_images, return_tensors=\"pt\")\n",
+        "        return {\"pixel_values\": inputs[\"pixel_values\"], \"labels\": labels}\n",
+        "\n",
+        "collate_fn = Collator(image_processor)"
+      ],
+      "metadata": {
+        "id": "Wlo3_8qE9SVR"
+      },
+      "execution_count": 12,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torch.utils.data import DataLoader\n",
+        "import os\n",
+        "\n",
+        "BATCH_SIZE = 8\n",
+        "NUM_WORKERS = min(8, os.cpu_count() or 2)\n",
+        "\n",
+        "train_loader = DataLoader(\n",
+        "    train_ds,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    shuffle=True,\n",
+        "    num_workers=NUM_WORKERS,\n",
+        "    pin_memory=True,\n",
+        "    collate_fn=collate_fn,\n",
+        ")\n",
+        "val_loader = DataLoader(\n",
+        "    val_ds,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    shuffle=False,\n",
+        "    num_workers=NUM_WORKERS,\n",
+        "    pin_memory=True,\n",
+        "    collate_fn=collate_fn,\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "Nou-Ct_e9zV5"
+      },
+      "execution_count": 13,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Training"
+      ],
+      "metadata": {
+        "id": "RblgS11W-Wuo"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Find config below."
+      ],
+      "metadata": {
+        "id": "25sCxjwG_tPo"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import math\n",
+        "import random\n",
+        "from typing import List, Dict, Any\n",
+        "\n",
+        "\n",
+        "EPOCHS = 5\n",
+        "LR = 5e-4\n",
+        "WEIGHT_DECAY = 1e-4\n",
+        "WARMUP_RATIO = 0.05\n",
+        "CHECKPOINT_DIR = \"./checkpoints_dinov3_food101\"\n",
+        "EVAL_EVERY_STEPS = 100\n",
+        "\n",
+        "optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LR, weight_decay=WEIGHT_DECAY)\n",
+        "total_steps = EPOCHS * math.ceil(len(train_loader))\n",
+        "warmup_steps = int(WARMUP_RATIO * total_steps)\n",
+        "scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)\n",
+        "criterion = nn.CrossEntropyLoss()\n",
+        "\n",
+        "scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WWM8KLQD_sya",
+        "outputId": "1672c194-aad2-4af2-a9cf-e61aa0d558b9"
+      },
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/tmp/ipython-input-593493728.py:19: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
+            "  scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "os.makedirs(\"./checkpoints_dinov3_food101\")"
+      ],
+      "metadata": {
+        "id": "OJPRRz09kxFT"
+      },
+      "execution_count": 15,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We need to evaluate during training."
+      ],
+      "metadata": {
+        "id": "FHS5DSu1_22g"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def evaluate() -> Dict[str, float]:\n",
+        "    model.eval()\n",
+        "    correct, total, loss_sum = 0, 0, 0.0\n",
+        "    with torch.no_grad():\n",
+        "        for batch in val_loader:\n",
+        "            pixel_values = batch[\"pixel_values\"].to(device, non_blocking=True)\n",
+        "            labels = batch[\"labels\"].to(device, non_blocking=True)\n",
+        "            logits = model(pixel_values)\n",
+        "            loss = criterion(logits, labels)\n",
+        "            loss_sum += loss.item() * labels.size(0)\n",
+        "            preds = logits.argmax(dim=-1)\n",
+        "            correct += (preds == labels).sum().item()\n",
+        "            total += labels.size(0)\n",
+        "    return {\n",
+        "        \"val_loss\": loss_sum / max(total, 1),\n",
+        "        \"val_acc\": correct / max(total, 1),\n",
+        "    }"
+      ],
+      "metadata": {
+        "id": "TSD4tzZr_4i3"
+      },
+      "execution_count": 16,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's write the training loop. We'll also use trackio for experiment tracking."
+      ],
+      "metadata": {
+        "id": "yakvOUOkAVcR"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 723
+        },
+        "id": "r-WQGd7UyN1s",
+        "outputId": "83e5aa69-4e4d-4c1d-c045-fc8ebad975ff"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "* Running on public URL: https://3669a91d39321f7f86.gradio.live\n",
+            "* Trackio project initialized: dinov3\n",
+            "* Trackio metrics logged to: /root/.cache/huggingface/trackio\n",
+            "* View dashboard by running in your terminal:\n",
+            "\u001b[1m\u001b[93mtrackio show --project \"dinov3\"\u001b[0m\n",
+            "* or by running in Python: trackio.show(project=\"dinov3\")\n",
+            "[epoch 1 | step 100] train_loss=4.4878 val_loss=4.0990 val_acc=50.77%\n",
+            "[epoch 1 | step 200] train_loss=3.4722 val_loss=2.5605 val_acc=83.72%\n",
+            "[epoch 1 | step 300] train_loss=1.9046 val_loss=1.2049 val_acc=87.09%\n",
+            "[epoch 1 | step 400] train_loss=1.0664 val_loss=0.7385 val_acc=89.78%\n",
+            "[epoch 1 | step 500] train_loss=0.7269 val_loss=0.5500 val_acc=90.30%\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.11/dist-packages/PIL/TiffImagePlugin.py:950: UserWarning: Truncated File Read\n",
+            "  warnings.warn(str(msg))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[epoch 1 | step 600] train_loss=0.6400 val_loss=0.4473 val_acc=91.92%\n",
+            "[epoch 1 | step 700] train_loss=0.5444 val_loss=0.3916 val_acc=92.44%\n",
+            "[epoch 1 | step 800] train_loss=0.5084 val_loss=0.3506 val_acc=92.08%\n"
+          ]
+        },
+        {
+          "output_type": "error",
+          "ename": "KeyboardInterrupt",
+          "evalue": "",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+            "\u001b[0;32m/tmp/ipython-input-3838929309.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m         \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     27\u001b[0m         \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m         \u001b[0mscheduler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, optimizer, *args, **kwargs)\u001b[0m\n\u001b[1;32m    455\u001b[0m         ), \"No inf checks were recorded for this optimizer.\"\n\u001b[1;32m    456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m         \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_opt_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    459\u001b[0m         \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"stage\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOptState\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSTEPPED\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36m_maybe_opt_step\u001b[0;34m(self, optimizer, optimizer_state, *args, **kwargs)\u001b[0m\n\u001b[1;32m    349\u001b[0m     ) -> Optional[float]:\n\u001b[1;32m    350\u001b[0m         \u001b[0mretval\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"found_inf_per_device\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    352\u001b[0m             \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    353\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    349\u001b[0m     ) -> Optional[float]:\n\u001b[1;32m    350\u001b[0m         \u001b[0mretval\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"found_inf_per_device\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    352\u001b[0m             \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    353\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+          ]
+        }
+      ],
+      "source": [
+        "import trackio\n",
+        "\n",
+        "best_acc = 0.0\n",
+        "global_step = 0\n",
+        "\n",
+        "trackio.init(project=\"dinov3\", config={\n",
+        "            \"epochs\": EPOCHS,\n",
+        "            \"learning_rate\": LR,\n",
+        "            \"batch_size\": BATCH_SIZE\n",
+        "        })\n",
+        "\n",
+        "for epoch in range(1, EPOCHS + 1):\n",
+        "    model.train()\n",
+        "    model.backbone.eval()  # comment out if you want to train the whole model\n",
+        "\n",
+        "    running_loss = 0.0\n",
+        "    for i, batch in enumerate(train_loader, start=1):\n",
+        "        pixel_values = batch[\"pixel_values\"].to(device, non_blocking=True)\n",
+        "        labels = batch[\"labels\"].to(device, non_blocking=True)\n",
+        "\n",
+        "        optimizer.zero_grad(set_to_none=True)\n",
+        "        logits = model(pixel_values)\n",
+        "        loss = criterion(logits, labels)\n",
+        "\n",
+        "        scaler.scale(loss).backward()\n",
+        "        scaler.step(optimizer)\n",
+        "        scaler.update()\n",
+        "        scheduler.step()\n",
+        "\n",
+        "        running_loss += loss.item()\n",
+        "        global_step += 1\n",
+        "\n",
+        "        if global_step % EVAL_EVERY_STEPS == 0:\n",
+        "            metrics = evaluate()\n",
+        "            print(\n",
+        "                f\"[epoch {epoch} | step {global_step}] \"\n",
+        "                f\"train_loss={running_loss / EVAL_EVERY_STEPS:.4f} \"\n",
+        "                f\"val_loss={metrics['val_loss']:.4f} val_acc={metrics['val_acc']*100:.2f}%\"\n",
+        "            )\n",
+        "            running_loss = 0.0\n",
+        "\n",
+        "            trackio.log(\n",
+        "                    {\n",
+        "                        \"epoch\": epoch,\n",
+        "                        \"val_acc\": best_acc,\n",
+        "                    }\n",
+        "                )\n",
+        "\n",
+        "            if metrics[\"val_acc\"] > best_acc:\n",
+        "                best_acc = metrics[\"val_acc\"]\n",
+        "                ckpt_path = os.path.join(CHECKPOINT_DIR, f\"best_acc_{best_acc:.4f}.pt\")\n",
+        "                torch.save(\n",
+        "                    {\n",
+        "                        \"model_state_dict\": model.state_dict(),\n",
+        "                        \"optimizer_state_dict\": optimizer.state_dict(),\n",
+        "                        \"scheduler_state_dict\": scheduler.state_dict(),\n",
+        "                        \"config\": {\n",
+        "                            \"model_name\": MODEL_NAME,\n",
+        "                            \"num_classes\": num_classes,\n",
+        "                        },\n",
+        "                        \"step\": global_step,\n",
+        "                        \"epoch\": epoch,\n",
+        "                    },\n",
+        "                    ckpt_path,\n",
+        "                )\n",
+        "\n",
+        "\n",
+        "    metrics = evaluate()\n",
+        "    print(\n",
+        "        f\"END EPOCH {epoch}: val_loss={metrics['val_loss']:.4f} val_acc={metrics['val_acc']*100:.2f}% \"\n",
+        "        f\"(best_acc={best_acc*100:.2f}%)\"\n",
+        "    )\n",
+        "    trackio.finish()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!trackio show"
+      ],
+      "metadata": {
+        "id": "dX0kEHogATQ_"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Let's infer with the model, I have a few in the wild images."
+      ],
+      "metadata": {
+        "id": "VKpGJ4L7bb2E"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "from PIL import Image\n",
+        "from typing import List, Dict\n",
+        "\n",
+        "\n",
+        "model.eval()\n",
+        "\n",
+        "images = [\"/content/pizza.jpg\", \"/content/spaghetti.JPG\"]\n",
+        "\n",
+        "pil_images = [Image.open(p).convert(\"RGB\") for p in images]\n",
+        "inputs = image_processor(images=pil_images, return_tensors=\"pt\").to(device)\n",
+        "\n",
+        "with torch.no_grad():\n",
+        "  logits = model(inputs[\"pixel_values\"])\n",
+        "\n",
+        "# take top 2 classes\n",
+        "probs = logits.softmax(dim=-1)\n",
+        "scores, indices = probs.topk(2, dim=-1)\n",
+        "\n",
+        "results = []\n",
+        "for path, idxs, scs in zip(images, indices, scores):\n",
+        "    preds = [\n",
+        "        {\"label_id\": int(i.item()),\n",
+        "          \"label\": id2label.get(int(i.item()), f\"class_{int(i)}\"),\n",
+        "          \"score\": float(s.item())}\n",
+        "        for i, s in zip(idxs, scs)\n",
+        "    ]\n",
+        "    results.append({\"image\": path, \"topk\": preds})\n"
+      ],
+      "metadata": {
+        "id": "RGZntYQEaVbA"
+      },
+      "execution_count": 19,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The model predicts correctly, which is expected given we only trained head with the great backbone frozen, it learned very fast. Feel free to try with more challenging use cases."
+      ],
+      "metadata": {
+        "id": "bFoB-1Ebcab1"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "results"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NrgtO2D1cXzj",
+        "outputId": "c972e7d0-ee78-45d3-e91f-7c68521d6a0b"
+      },
+      "execution_count": 20,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[{'image': '/content/pizza.jpg',\n",
+              "  'topk': [{'label_id': 76, 'label': 'pizza', 'score': 0.7595003843307495},\n",
+              "   {'label_id': 35, 'label': 'escargots', 'score': 0.013227012008428574}]},\n",
+              " {'image': '/content/spaghetti.JPG',\n",
+              "  'topk': [{'label_id': 91,\n",
+              "    'label': 'spaghetti_carbonara',\n",
+              "    'score': 0.6622196435928345},\n",
+              "   {'label_id': 90,\n",
+              "    'label': 'spaghetti_bolognese',\n",
+              "    'score': 0.18182380497455597}]}]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 20
+        }
+      ]
+    }
+  ]
+}