File size: 19,647 Bytes
27ba5c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "64f33f31-f533-41e8-9821-940a5d2ea343",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inspecting file: model-00001-of-00004.safetensors\n",
      "Available keys (tensor names):\n",
      "\n",
      "Key: model.layers.0.input_layernorm.weight\n",
      "  Shape: torch.Size([3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 3584 elements\n",
      "  First few elements: [0.275390625, 0.3046875, 0.26171875, 0.291015625, 0.29296875]\n",
      "\n",
      "Key: model.layers.0.mlp.down_proj.weight\n",
      "  Shape: torch.Size([3584, 18944])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 67895296 elements\n",
      "  First few elements: [-0.005096435546875, 0.01385498046875, 0.0096435546875, -0.00848388671875, -0.002593994140625]\n",
      "\n",
      "Key: model.layers.0.mlp.gate_proj.weight\n",
      "  Shape: torch.Size([18944, 3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 67895296 elements\n",
      "  First few elements: [0.00286865234375, -0.0201416015625, -0.0216064453125, 0.006622314453125, -0.015625]\n",
      "\n",
      "Key: model.layers.0.mlp.up_proj.weight\n",
      "  Shape: torch.Size([18944, 3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 67895296 elements\n",
      "  First few elements: [0.007537841796875, -0.0111083984375, -0.0024261474609375, -0.006927490234375, -0.02587890625]\n",
      "\n",
      "Key: model.layers.0.post_attention_layernorm.weight\n",
      "  Shape: torch.Size([3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 3584 elements\n",
      "  First few elements: [0.28515625, 0.33203125, 0.259765625, 0.236328125, 0.296875]\n",
      "\n",
      "Key: model.layers.0.self_attn.kv_a_proj_with_mqa.bias\n",
      "  Shape: torch.Size([576])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 576 elements\n",
      "  First few elements: [3.953125, 0.0634765625, 1.2578125, -1.515625, 0.29296875]\n",
      "\n",
      "Key: model.layers.0.self_attn.kv_a_proj_with_mqa.weight\n",
      "  Shape: torch.Size([576, 3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 2064384 elements\n",
      "  First few elements: [0.0322265625, -0.005157470703125, -0.03173828125, 0.0184326171875, -0.015625]\n",
      "\n",
      "Key: model.layers.0.self_attn.kv_b_proj.weight\n",
      "  Shape: torch.Size([7168, 512])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 3670016 elements\n",
      "  First few elements: [-0.04931640625, -0.01904296875, 0.080078125, -0.01324462890625, 0.0179443359375]\n",
      "\n",
      "Key: model.layers.0.self_attn.o_proj.weight\n",
      "  Shape: torch.Size([3584, 3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 12845056 elements\n",
      "  First few elements: [0.00323486328125, -0.030029296875, -0.0069580078125, 0.0089111328125, 0.007568359375]\n",
      "\n",
      "Key: model.layers.0.self_attn.q_proj.bias\n",
      "  Shape: torch.Size([5376])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 5376 elements\n",
      "  First few elements: [0.5, 2.140625, -0.98046875, 1.3671875, 1.015625]\n",
      "\n",
      "Key: model.layers.0.self_attn.q_proj.weight\n",
      "  Shape: torch.Size([5376, 3584])\n",
      "  Dtype: torch.bfloat16\n",
      "  Size: 19267584 elements\n",
      "  First few elements: [-0.0003452301025390625, -0.005340576171875, 0.021484375, 0.003997802734375, -0.00274658203125]\n"
     ]
    }
   ],
   "source": [
    "from safetensors import safe_open\n",
    "\n",
    "def inspect_safetensors(file_path):\n",
    "    print(f\"Inspecting file: {file_path}\")\n",
    "    with safe_open(file_path, framework=\"pt\", device=\"cpu\") as f:\n",
    "        print(\"Available keys (tensor names):\")\n",
    "        for key in f.keys():\n",
    "            if 'model.layers.0' in key:\n",
    "                \n",
    "                tensor = f.get_tensor(key)\n",
    "                print(f\"\\nKey: {key}\")\n",
    "                print(f\"  Shape: {tensor.shape}\")\n",
    "                print(f\"  Dtype: {tensor.dtype}\")\n",
    "                print(f\"  Size: {tensor.numel()} elements\")\n",
    "                # 可选:显示前几个元素\n",
    "                print(f\"  First few elements: {tensor.flatten()[:5].tolist()}\")\n",
    "\n",
    "# 示例路径,请替换为你自己的 .safetensors 文件路径\n",
    "file_path = \"model-00001-of-00004.safetensors\"\n",
    "inspect_safetensors(file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2a331ae6-f4ca-4d16-8bbf-36f39b9ac43e",
   "metadata": {},
   "outputs": [
    {
     "ename": "Exception",
     "evalue": "data did not match any variant of untagged enum ModelWrapper at line 757455 column 3",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# model setting\u001b[39;00m\n\u001b[1;32m      6\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 8\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m      9\u001b[0m model \u001b[38;5;241m=\u001b[39m VideoChatFlashQwenForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_path)\u001b[38;5;241m.\u001b[39mto(torch\u001b[38;5;241m.\u001b[39mbfloat16)\u001b[38;5;241m.\u001b[39mcuda()\n\u001b[1;32m     10\u001b[0m image_processor \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mget_vision_tower()\u001b[38;5;241m.\u001b[39mimage_processor\n",
      "File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m    833\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    834\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    835\u001b[0m             \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTokenizer class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtokenizer_class_candidate\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist or is not currently imported.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    836\u001b[0m         )\n\u001b[0;32m--> 837\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtokenizer_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    839\u001b[0m \u001b[38;5;66;03m# Otherwise we have to be creative.\u001b[39;00m\n\u001b[1;32m    840\u001b[0m \u001b[38;5;66;03m# if model is an encoder decoder, the encoder tokenizer class is used by default\u001b[39;00m\n\u001b[1;32m    841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config, EncoderDecoderConfig):\n",
      "File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2086\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m   2083\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   2084\u001b[0m         logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloading file \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m from cache at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresolved_vocab_files[file_id]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2086\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2087\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresolved_vocab_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2088\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2089\u001b[0m \u001b[43m    \u001b[49m\u001b[43minit_configuration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2090\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minit_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2091\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2092\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2093\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2094\u001b[0m \u001b[43m    \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2095\u001b[0m \u001b[43m    \u001b[49m\u001b[43m_is_local\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_local\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrust_remote_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2097\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2325\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase._from_pretrained\u001b[0;34m(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m   2323\u001b[0m \u001b[38;5;66;03m# Instantiate the tokenizer.\u001b[39;00m\n\u001b[1;32m   2324\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 2325\u001b[0m     tokenizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minit_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minit_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2326\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m:\n\u001b[1;32m   2327\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[1;32m   2328\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to load vocabulary from file. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2329\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease check that the provided vocabulary is accessible and not corrupted.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2330\u001b[0m     )\n",
      "File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/models/qwen2/tokenization_qwen2_fast.py:129\u001b[0m, in \u001b[0;36mQwen2TokenizerFast.__init__\u001b[0;34m(self, vocab_file, merges_file, tokenizer_file, unk_token, bos_token, eos_token, pad_token, **kwargs)\u001b[0m\n\u001b[1;32m    118\u001b[0m unk_token \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    119\u001b[0m     AddedToken(unk_token, lstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, rstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, special\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, normalized\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m    120\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(unk_token, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m    121\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m unk_token\n\u001b[1;32m    122\u001b[0m )\n\u001b[1;32m    123\u001b[0m pad_token \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    124\u001b[0m     AddedToken(pad_token, lstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, rstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, special\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, normalized\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m    125\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pad_token, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m pad_token\n\u001b[1;32m    127\u001b[0m )\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    130\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvocab_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    131\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmerges_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    132\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtokenizer_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtokenizer_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    133\u001b[0m \u001b[43m    \u001b[49m\u001b[43munk_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43munk_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    134\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbos_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbos_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    135\u001b[0m \u001b[43m    \u001b[49m\u001b[43meos_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meos_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    136\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpad_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpad_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    137\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    138\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py:111\u001b[0m, in \u001b[0;36mPreTrainedTokenizerFast.__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    108\u001b[0m     fast_tokenizer \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(tokenizer_object)\n\u001b[1;32m    109\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m fast_tokenizer_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m from_slow:\n\u001b[1;32m    110\u001b[0m     \u001b[38;5;66;03m# We have a serialization from tokenizers which let us directly build the backend\u001b[39;00m\n\u001b[0;32m--> 111\u001b[0m     fast_tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mTokenizerFast\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfast_tokenizer_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m slow_tokenizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    113\u001b[0m     \u001b[38;5;66;03m# We need to convert a slow tokenizer to build the backend\u001b[39;00m\n\u001b[1;32m    114\u001b[0m     fast_tokenizer \u001b[38;5;241m=\u001b[39m convert_slow_tokenizer(slow_tokenizer)\n",
      "\u001b[0;31mException\u001b[0m: data did not match any variant of untagged enum ModelWrapper at line 757455 column 3"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModel, AutoTokenizer\n",
    "import torch\n",
    "from modeling_videochat_flash import VideoChatFlashQwenForCausalLM\n",
    "\n",
    "# model setting\n",
    "model_path = './'\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n",
    "model = VideoChatFlashQwenForCausalLM.from_pretrained(model_path).to(torch.bfloat16).cuda()\n",
    "image_processor = model.get_vision_tower().image_processor\n",
    "\n",
    "mm_llm_compress = False # use the global compress or not\n",
    "if mm_llm_compress:\n",
    "    model.config.mm_llm_compress = True\n",
    "    model.config.llm_compress_type = \"uniform0_attention\"\n",
    "    model.config.llm_compress_layer_list = [4, 18]\n",
    "    model.config.llm_image_token_ratio_list = [1, 0.75, 0.25]\n",
    "else:\n",
    "    model.config.mm_llm_compress = False\n",
    "\n",
    "# evaluation setting\n",
    "max_num_frames = 512\n",
    "generation_config = dict(\n",
    "    do_sample=False,\n",
    "    temperature=0.0,\n",
    "    max_new_tokens=1024,\n",
    "    top_p=0.1,\n",
    "    num_beams=1\n",
    ")\n",
    "\n",
    "video_path = \"test.mp4\"\n",
    "\n",
    "# single-turn conversation\n",
    "question1 = \"Describe this video in detail.\"\n",
    "output1, chat_history = model.chat(video_path=video_path, tokenizer=tokenizer, user_prompt=question1, return_history=True, max_num_frames=max_num_frames, generation_config=generation_config)\n",
    "\n",
    "print(output1)\n",
    "\n",
    "# # multi-turn conversation\n",
    "# question2 = \"How many people appear in the video?\"\n",
    "# output2, chat_history = model.chat(video_path=video_path, tokenizer=tokenizer, user_prompt=question2, chat_history=chat_history, return_history=True, max_num_frames=max_num_frames, generation_config=generation_config)\n",
    "\n",
    "# print(output2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bfd7429-5d77-42ad-8e3f-9cf37b25dfc7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "vcflash",
   "language": "python",
   "name": "vcflash"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}