{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "L4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "32138245d41348928cc5b5834b07cb7e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_df6de04fdb204d348767dd0b2d0e88f7",
              "IPY_MODEL_63a3800d62dd41d6b4a3f643a8930d95",
              "IPY_MODEL_49d67bd205184874a5cee04d318d91fe"
            ],
            "layout": "IPY_MODEL_f00ace964f96471b9eb839cce48ce378"
          }
        },
        "df6de04fdb204d348767dd0b2d0e88f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3ad0ac8def244930a3aff41d68a88a65",
            "placeholder": "​",
            "style": "IPY_MODEL_7464841c193d492685bb929b1c0d230c",
            "value": "preprocessor_config.json: 100%"
          }
        },
        "63a3800d62dd41d6b4a3f643a8930d95": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5c16553a2ff34a37a2cb62b4a4c42a6f",
            "max": 585,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_34be83ddb4bf43e58cadbcbac5a606b7",
            "value": 585
          }
        },
        "49d67bd205184874a5cee04d318d91fe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0ce7bd7e52074f29b446ef2d4dd0921a",
            "placeholder": "​",
            "style": "IPY_MODEL_7e2178d696c04d5787e736ace9ab57c0",
            "value": " 585/585 [00:00&lt;00:00, 65.6kB/s]"
          }
        },
        "f00ace964f96471b9eb839cce48ce378": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3ad0ac8def244930a3aff41d68a88a65": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7464841c193d492685bb929b1c0d230c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5c16553a2ff34a37a2cb62b4a4c42a6f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "34be83ddb4bf43e58cadbcbac5a606b7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0ce7bd7e52074f29b446ef2d4dd0921a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7e2178d696c04d5787e736ace9ab57c0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3ff80bc2f64948408757caa8715d0603": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_12aa8675bca54f05a6deb7ec7a5def7a",
              "IPY_MODEL_31a74feac76f4744a0f34fbc99433831",
              "IPY_MODEL_bd51d97e739a4e78ad28083043f638d8"
            ],
            "layout": "IPY_MODEL_062d36b5d0c043a597eb9b3ebd35f313"
          }
        },
        "12aa8675bca54f05a6deb7ec7a5def7a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2c2223a6ae3e4ff6be96a5f4e2d2d9b6",
            "placeholder": "​",
            "style": "IPY_MODEL_f2c7be27f90b49a3abe51b5e3003c17d",
            "value": "config.json: 100%"
          }
        },
        "31a74feac76f4744a0f34fbc99433831": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_76d1f15c857640c3b06d98aef478f234",
            "max": 744,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d43089f8240c44339c6881355ff0aee3",
            "value": 744
          }
        },
        "bd51d97e739a4e78ad28083043f638d8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a139b85557a942b9b5d32b9d7def3e50",
            "placeholder": "​",
            "style": "IPY_MODEL_92043bfce97e4629bf9e4b268aa88c11",
            "value": " 744/744 [00:00&lt;00:00, 93.7kB/s]"
          }
        },
        "062d36b5d0c043a597eb9b3ebd35f313": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2c2223a6ae3e4ff6be96a5f4e2d2d9b6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f2c7be27f90b49a3abe51b5e3003c17d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "76d1f15c857640c3b06d98aef478f234": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d43089f8240c44339c6881355ff0aee3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a139b85557a942b9b5d32b9d7def3e50": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "92043bfce97e4629bf9e4b268aa88c11": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f20b3989658642528f4ed91666320097": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_3ee9921a635d44ec9b248e2155b5b243",
              "IPY_MODEL_caf0790dbf2544378cb04aa8eb3098c3",
              "IPY_MODEL_3ff0fc5ce62a44b9950dd8575d90bd21"
            ],
            "layout": "IPY_MODEL_77cdafc6dae44107a43a46ae19ed390a"
          }
        },
        "3ee9921a635d44ec9b248e2155b5b243": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_65d8b73e3bdd46fca8a42b67739e27f9",
            "placeholder": "​",
            "style": "IPY_MODEL_b566321171044b0eb02ea3bd8c0472df",
            "value": "model.safetensors: 100%"
          }
        },
        "caf0790dbf2544378cb04aa8eb3098c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_62535e046f794a28b4002c3f34fe7ff7",
            "max": 3362432800,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_663aa65fdb4e4349b2815b6bafce4dcd",
            "value": 3362432800
          }
        },
        "3ff0fc5ce62a44b9950dd8575d90bd21": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8410c9d15bca4c9f8b3aab2b7d327211",
            "placeholder": "​",
            "style": "IPY_MODEL_fb359d0651a74fe790aaace9a5d0e329",
            "value": " 3.36G/3.36G [00:18&lt;00:00, 296MB/s]"
          }
        },
        "77cdafc6dae44107a43a46ae19ed390a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "65d8b73e3bdd46fca8a42b67739e27f9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b566321171044b0eb02ea3bd8c0472df": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "62535e046f794a28b4002c3f34fe7ff7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "663aa65fdb4e4349b2815b6bafce4dcd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8410c9d15bca4c9f8b3aab2b7d327211": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fb359d0651a74fe790aaace9a5d0e329": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## DINOv3 Fine-tuning for Image Classification"
      ],
      "metadata": {
        "id": "BCTUDjwiYn6T"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -q trackio git+https://github.com/huggingface/transformers.git"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Aa1zPoxo_JBf",
        "outputId": "958700f5-189c-42ec-dfc5-9852e5efe368"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m838.5/838.5 kB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Dataset"
      ],
      "metadata": {
        "id": "5AJ3YVCE8S9Y"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "We will do a very small run on food101 dataset."
      ],
      "metadata": {
        "id": "s_Aabbb6VBZt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "ds = load_dataset(\"ethz/food101\")\n",
        "\n",
        "train_ds = ds[\"train\"]\n",
        "train_ds = train_ds.shuffle().train_test_split(test_size=0.9)[\"train\"]\n",
        "val_ds = ds[\"validation\"].shuffle().train_test_split(test_size=0.9)[\"train\"]"
      ],
      "metadata": {
        "id": "Cxzbngbq4K31"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train_ds"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "g1wl86sp8L6C",
        "outputId": "1b42f43f-df62-4eba-f469-54cabd232cf9"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['image', 'label'],\n",
              "    num_rows: 7575\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "val_ds"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Tq5OiKxvVj9k",
        "outputId": "391489ba-d95f-498a-b4bb-f959e19686b0"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Dataset({\n",
              "    features: ['image', 'label'],\n",
              "    num_rows: 2525\n",
              "})"
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "num_classes = train_ds.features[\"label\"].num_classes\n",
        "id2label = {i: name for i, name in enumerate(train_ds.features[\"label\"].names)}\n",
        "label2id = {v: k for k, v in id2label.items()}\n",
        "print(f\"Classes: {num_classes}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1JcvDPFK8Scd",
        "outputId": "5c920e23-e96b-4c62-bf3a-7db183c97f48"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Classes: 101\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Load Model\n",
        "\n",
        "This model doesn't come with a head, so we need to write the headed model class."
      ],
      "metadata": {
        "id": "_69A3AmO81c8"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch.nn as nn\n",
        "import torch\n",
        "from transformers import AutoImageProcessor, AutoModel, get_cosine_schedule_with_warmup\n",
        "\n",
        "MODEL_NAME = \"facebook/dinov3-vith16plus-pretrain-lvd1689m\"\n",
        "\n",
        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
        "\n",
        "\n",
        "image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)\n",
        "backbone = AutoModel.from_pretrained(MODEL_NAME)\n",
        "\n",
        "hidden_size = getattr(backbone.config, \"hidden_size\", None)\n",
        "\n",
        "class DinoV3Linear(nn.Module):\n",
        "    def __init__(self, backbone: AutoModel, hidden_size: int, num_classes: int, freeze_backbone: bool = True):\n",
        "        super().__init__()\n",
        "        self.backbone = backbone\n",
        "        if freeze_backbone:\n",
        "            for p in self.backbone.parameters():\n",
        "                p.requires_grad = False\n",
        "            self.backbone.eval()\n",
        "\n",
        "        self.head = nn.Linear(hidden_size, num_classes)\n",
        "\n",
        "    def forward(self, pixel_values):\n",
        "        outputs = self.backbone(pixel_values=pixel_values)\n",
        "        last_hidden = outputs.last_hidden_state\n",
        "        cls = last_hidden[:, 0]\n",
        "        logits = self.head(cls)\n",
        "        return logits\n",
        "\n",
        "model = DinoV3Linear(backbone, hidden_size, num_classes, freeze_backbone=True).to(device) # we only train the head"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 113,
          "referenced_widgets": [
            "32138245d41348928cc5b5834b07cb7e",
            "df6de04fdb204d348767dd0b2d0e88f7",
            "63a3800d62dd41d6b4a3f643a8930d95",
            "49d67bd205184874a5cee04d318d91fe",
            "f00ace964f96471b9eb839cce48ce378",
            "3ad0ac8def244930a3aff41d68a88a65",
            "7464841c193d492685bb929b1c0d230c",
            "5c16553a2ff34a37a2cb62b4a4c42a6f",
            "34be83ddb4bf43e58cadbcbac5a606b7",
            "0ce7bd7e52074f29b446ef2d4dd0921a",
            "7e2178d696c04d5787e736ace9ab57c0",
            "3ff80bc2f64948408757caa8715d0603",
            "12aa8675bca54f05a6deb7ec7a5def7a",
            "31a74feac76f4744a0f34fbc99433831",
            "bd51d97e739a4e78ad28083043f638d8",
            "062d36b5d0c043a597eb9b3ebd35f313",
            "2c2223a6ae3e4ff6be96a5f4e2d2d9b6",
            "f2c7be27f90b49a3abe51b5e3003c17d",
            "76d1f15c857640c3b06d98aef478f234",
            "d43089f8240c44339c6881355ff0aee3",
            "a139b85557a942b9b5d32b9d7def3e50",
            "92043bfce97e4629bf9e4b268aa88c11",
            "f20b3989658642528f4ed91666320097",
            "3ee9921a635d44ec9b248e2155b5b243",
            "caf0790dbf2544378cb04aa8eb3098c3",
            "3ff0fc5ce62a44b9950dd8575d90bd21",
            "77cdafc6dae44107a43a46ae19ed390a",
            "65d8b73e3bdd46fca8a42b67739e27f9",
            "b566321171044b0eb02ea3bd8c0472df",
            "62535e046f794a28b4002c3f34fe7ff7",
            "663aa65fdb4e4349b2815b6bafce4dcd",
            "8410c9d15bca4c9f8b3aab2b7d327211",
            "fb359d0651a74fe790aaace9a5d0e329"
          ]
        },
        "id": "_oqXAu_y81H4",
        "outputId": "7c4a4f6f-2301-4a43-eecb-50f1adb004b9"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "preprocessor_config.json:   0%|          | 0.00/585 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "32138245d41348928cc5b5834b07cb7e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config.json:   0%|          | 0.00/744 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "3ff80bc2f64948408757caa8715d0603"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model.safetensors:   0%|          | 0.00/3.36G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f20b3989658642528f4ed91666320097"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Write the data collator to batch inputs and dataloaders for training."
      ],
      "metadata": {
        "id": "IfC3TFbw9SlZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from dataclasses import dataclass\n",
        "from PIL import Image\n",
        "import numpy as np\n",
        "import torch\n",
        "from transformers import AutoImageProcessor\n",
        "\n",
        "@dataclass\n",
        "class Collator:\n",
        "    processor: AutoImageProcessor\n",
        "\n",
        "    def __call__(self, batch):\n",
        "        raw_images = [x[\"image\"] for x in batch]\n",
        "        labels = torch.tensor([x[\"label\"] for x in batch], dtype=torch.long)\n",
        "\n",
        "        rgb_images = []\n",
        "        # there's grayscale images in the dataset\n",
        "        for im in raw_images:\n",
        "            if isinstance(im, Image.Image):\n",
        "                rgb_images.append(im.convert(\"RGB\"))\n",
        "                continue\n",
        "\n",
        "        inputs = self.processor(images=rgb_images, return_tensors=\"pt\")\n",
        "        return {\"pixel_values\": inputs[\"pixel_values\"], \"labels\": labels}\n",
        "\n",
        "collate_fn = Collator(image_processor)"
      ],
      "metadata": {
        "id": "Wlo3_8qE9SVR"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from torch.utils.data import DataLoader\n",
        "import os\n",
        "\n",
        "BATCH_SIZE = 8\n",
        "NUM_WORKERS = min(8, os.cpu_count() or 2)\n",
        "\n",
        "train_loader = DataLoader(\n",
        "    train_ds,\n",
        "    batch_size=BATCH_SIZE,\n",
        "    shuffle=True,\n",
        "    num_workers=NUM_WORKERS,\n",
        "    pin_memory=True,\n",
        "    collate_fn=collate_fn,\n",
        ")\n",
        "val_loader = DataLoader(\n",
        "    val_ds,\n",
        "    batch_size=BATCH_SIZE,\n",
        "    shuffle=False,\n",
        "    num_workers=NUM_WORKERS,\n",
        "    pin_memory=True,\n",
        "    collate_fn=collate_fn,\n",
        ")"
      ],
      "metadata": {
        "id": "Nou-Ct_e9zV5"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Training"
      ],
      "metadata": {
        "id": "RblgS11W-Wuo"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Find config below."
      ],
      "metadata": {
        "id": "25sCxjwG_tPo"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import math\n",
        "import random\n",
        "from typing import List, Dict, Any\n",
        "\n",
        "\n",
        "EPOCHS = 5\n",
        "LR = 5e-4\n",
        "WEIGHT_DECAY = 1e-4\n",
        "WARMUP_RATIO = 0.05\n",
        "CHECKPOINT_DIR = \"./checkpoints_dinov3_food101\"\n",
        "EVAL_EVERY_STEPS = 100\n",
        "\n",
        "optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LR, weight_decay=WEIGHT_DECAY)\n",
        "total_steps = EPOCHS * math.ceil(len(train_loader))\n",
        "warmup_steps = int(WARMUP_RATIO * total_steps)\n",
        "scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)\n",
        "criterion = nn.CrossEntropyLoss()\n",
        "\n",
        "scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WWM8KLQD_sya",
        "outputId": "1672c194-aad2-4af2-a9cf-e61aa0d558b9"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/tmp/ipython-input-593493728.py:19: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
            "  scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "os.makedirs(\"./checkpoints_dinov3_food101\")"
      ],
      "metadata": {
        "id": "OJPRRz09kxFT"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "We need to evaluate during training."
      ],
      "metadata": {
        "id": "FHS5DSu1_22g"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def evaluate() -> Dict[str, float]:\n",
        "    model.eval()\n",
        "    correct, total, loss_sum = 0, 0, 0.0\n",
        "    with torch.no_grad():\n",
        "        for batch in val_loader:\n",
        "            pixel_values = batch[\"pixel_values\"].to(device, non_blocking=True)\n",
        "            labels = batch[\"labels\"].to(device, non_blocking=True)\n",
        "            logits = model(pixel_values)\n",
        "            loss = criterion(logits, labels)\n",
        "            loss_sum += loss.item() * labels.size(0)\n",
        "            preds = logits.argmax(dim=-1)\n",
        "            correct += (preds == labels).sum().item()\n",
        "            total += labels.size(0)\n",
        "    return {\n",
        "        \"val_loss\": loss_sum / max(total, 1),\n",
        "        \"val_acc\": correct / max(total, 1),\n",
        "    }"
      ],
      "metadata": {
        "id": "TSD4tzZr_4i3"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Let's write the training loop. We'll also use trackio for experiment tracking."
      ],
      "metadata": {
        "id": "yakvOUOkAVcR"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 723
        },
        "id": "r-WQGd7UyN1s",
        "outputId": "83e5aa69-4e4d-4c1d-c045-fc8ebad975ff"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "* Running on public URL: https://3669a91d39321f7f86.gradio.live\n",
            "* Trackio project initialized: dinov3\n",
            "* Trackio metrics logged to: /root/.cache/huggingface/trackio\n",
            "* View dashboard by running in your terminal:\n",
            "\u001b[1m\u001b[93mtrackio show --project \"dinov3\"\u001b[0m\n",
            "* or by running in Python: trackio.show(project=\"dinov3\")\n",
            "[epoch 1 | step 100] train_loss=4.4878 val_loss=4.0990 val_acc=50.77%\n",
            "[epoch 1 | step 200] train_loss=3.4722 val_loss=2.5605 val_acc=83.72%\n",
            "[epoch 1 | step 300] train_loss=1.9046 val_loss=1.2049 val_acc=87.09%\n",
            "[epoch 1 | step 400] train_loss=1.0664 val_loss=0.7385 val_acc=89.78%\n",
            "[epoch 1 | step 500] train_loss=0.7269 val_loss=0.5500 val_acc=90.30%\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.11/dist-packages/PIL/TiffImagePlugin.py:950: UserWarning: Truncated File Read\n",
            "  warnings.warn(str(msg))\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[epoch 1 | step 600] train_loss=0.6400 val_loss=0.4473 val_acc=91.92%\n",
            "[epoch 1 | step 700] train_loss=0.5444 val_loss=0.3916 val_acc=92.44%\n",
            "[epoch 1 | step 800] train_loss=0.5084 val_loss=0.3506 val_acc=92.08%\n"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m/tmp/ipython-input-3838929309.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m         \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     27\u001b[0m         \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m         \u001b[0mscheduler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, optimizer, *args, **kwargs)\u001b[0m\n\u001b[1;32m    455\u001b[0m         ), \"No inf checks were recorded for this optimizer.\"\n\u001b[1;32m    456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m         \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_opt_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    459\u001b[0m         \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"stage\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOptState\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSTEPPED\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36m_maybe_opt_step\u001b[0;34m(self, optimizer, optimizer_state, *args, **kwargs)\u001b[0m\n\u001b[1;32m    349\u001b[0m     ) -> Optional[float]:\n\u001b[1;32m    350\u001b[0m         \u001b[0mretval\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"found_inf_per_device\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    352\u001b[0m             \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    353\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    349\u001b[0m     ) -> Optional[float]:\n\u001b[1;32m    350\u001b[0m         \u001b[0mretval\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"found_inf_per_device\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    352\u001b[0m             \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    353\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ],
      "source": [
        "import trackio\n",
        "\n",
        "best_acc = 0.0\n",
        "global_step = 0\n",
        "\n",
        "trackio.init(project=\"dinov3\", config={\n",
        "            \"epochs\": EPOCHS,\n",
        "            \"learning_rate\": LR,\n",
        "            \"batch_size\": BATCH_SIZE\n",
        "        })\n",
        "\n",
        "for epoch in range(1, EPOCHS + 1):\n",
        "    model.train()\n",
        "    model.backbone.eval()  # comment out if you want to train the whole model\n",
        "\n",
        "    running_loss = 0.0\n",
        "    for i, batch in enumerate(train_loader, start=1):\n",
        "        pixel_values = batch[\"pixel_values\"].to(device, non_blocking=True)\n",
        "        labels = batch[\"labels\"].to(device, non_blocking=True)\n",
        "\n",
        "        optimizer.zero_grad(set_to_none=True)\n",
        "        logits = model(pixel_values)\n",
        "        loss = criterion(logits, labels)\n",
        "\n",
        "        scaler.scale(loss).backward()\n",
        "        scaler.step(optimizer)\n",
        "        scaler.update()\n",
        "        scheduler.step()\n",
        "\n",
        "        running_loss += loss.item()\n",
        "        global_step += 1\n",
        "\n",
        "        if global_step % EVAL_EVERY_STEPS == 0:\n",
        "            metrics = evaluate()\n",
        "            print(\n",
        "                f\"[epoch {epoch} | step {global_step}] \"\n",
        "                f\"train_loss={running_loss / EVAL_EVERY_STEPS:.4f} \"\n",
        "                f\"val_loss={metrics['val_loss']:.4f} val_acc={metrics['val_acc']*100:.2f}%\"\n",
        "            )\n",
        "            running_loss = 0.0\n",
        "\n",
        "            trackio.log(\n",
        "                    {\n",
        "                        \"epoch\": epoch,\n",
        "                        \"val_acc\": best_acc,\n",
        "                    }\n",
        "                )\n",
        "\n",
        "            if metrics[\"val_acc\"] > best_acc:\n",
        "                best_acc = metrics[\"val_acc\"]\n",
        "                ckpt_path = os.path.join(CHECKPOINT_DIR, f\"best_acc_{best_acc:.4f}.pt\")\n",
        "                torch.save(\n",
        "                    {\n",
        "                        \"model_state_dict\": model.state_dict(),\n",
        "                        \"optimizer_state_dict\": optimizer.state_dict(),\n",
        "                        \"scheduler_state_dict\": scheduler.state_dict(),\n",
        "                        \"config\": {\n",
        "                            \"model_name\": MODEL_NAME,\n",
        "                            \"num_classes\": num_classes,\n",
        "                        },\n",
        "                        \"step\": global_step,\n",
        "                        \"epoch\": epoch,\n",
        "                    },\n",
        "                    ckpt_path,\n",
        "                )\n",
        "\n",
        "\n",
        "    metrics = evaluate()\n",
        "    print(\n",
        "        f\"END EPOCH {epoch}: val_loss={metrics['val_loss']:.4f} val_acc={metrics['val_acc']*100:.2f}% \"\n",
        "        f\"(best_acc={best_acc*100:.2f}%)\"\n",
        "    )\n",
        "    trackio.finish()"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!trackio show"
      ],
      "metadata": {
        "id": "dX0kEHogATQ_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Let's infer with the model, I have a few in the wild images."
      ],
      "metadata": {
        "id": "VKpGJ4L7bb2E"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from PIL import Image\n",
        "from typing import List, Dict\n",
        "\n",
        "\n",
        "model.eval()\n",
        "\n",
        "images = [\"/content/pizza.jpg\", \"/content/spaghetti.JPG\"]\n",
        "\n",
        "pil_images = [Image.open(p).convert(\"RGB\") for p in images]\n",
        "inputs = image_processor(images=pil_images, return_tensors=\"pt\").to(device)\n",
        "\n",
        "with torch.no_grad():\n",
        "  logits = model(inputs[\"pixel_values\"])\n",
        "\n",
        "# take top 2 classes\n",
        "probs = logits.softmax(dim=-1)\n",
        "scores, indices = probs.topk(2, dim=-1)\n",
        "\n",
        "results = []\n",
        "for path, idxs, scs in zip(images, indices, scores):\n",
        "    preds = [\n",
        "        {\"label_id\": int(i.item()),\n",
        "          \"label\": id2label.get(int(i.item()), f\"class_{int(i)}\"),\n",
        "          \"score\": float(s.item())}\n",
        "        for i, s in zip(idxs, scs)\n",
        "    ]\n",
        "    results.append({\"image\": path, \"topk\": preds})\n"
      ],
      "metadata": {
        "id": "RGZntYQEaVbA"
      },
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "The model predicts correctly, which is expected given we only trained head with the great backbone frozen, it learned very fast. Feel free to try with more challenging use cases."
      ],
      "metadata": {
        "id": "bFoB-1Ebcab1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "results"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NrgtO2D1cXzj",
        "outputId": "c972e7d0-ee78-45d3-e91f-7c68521d6a0b"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[{'image': '/content/pizza.jpg',\n",
              "  'topk': [{'label_id': 76, 'label': 'pizza', 'score': 0.7595003843307495},\n",
              "   {'label_id': 35, 'label': 'escargots', 'score': 0.013227012008428574}]},\n",
              " {'image': '/content/spaghetti.JPG',\n",
              "  'topk': [{'label_id': 91,\n",
              "    'label': 'spaghetti_carbonara',\n",
              "    'score': 0.6622196435928345},\n",
              "   {'label_id': 90,\n",
              "    'label': 'spaghetti_bolognese',\n",
              "    'score': 0.18182380497455597}]}]"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
    }
  ]
}