File size: 19,647 Bytes
27ba5c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "64f33f31-f533-41e8-9821-940a5d2ea343",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Inspecting file: model-00001-of-00004.safetensors\n",
"Available keys (tensor names):\n",
"\n",
"Key: model.layers.0.input_layernorm.weight\n",
" Shape: torch.Size([3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 3584 elements\n",
" First few elements: [0.275390625, 0.3046875, 0.26171875, 0.291015625, 0.29296875]\n",
"\n",
"Key: model.layers.0.mlp.down_proj.weight\n",
" Shape: torch.Size([3584, 18944])\n",
" Dtype: torch.bfloat16\n",
" Size: 67895296 elements\n",
" First few elements: [-0.005096435546875, 0.01385498046875, 0.0096435546875, -0.00848388671875, -0.002593994140625]\n",
"\n",
"Key: model.layers.0.mlp.gate_proj.weight\n",
" Shape: torch.Size([18944, 3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 67895296 elements\n",
" First few elements: [0.00286865234375, -0.0201416015625, -0.0216064453125, 0.006622314453125, -0.015625]\n",
"\n",
"Key: model.layers.0.mlp.up_proj.weight\n",
" Shape: torch.Size([18944, 3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 67895296 elements\n",
" First few elements: [0.007537841796875, -0.0111083984375, -0.0024261474609375, -0.006927490234375, -0.02587890625]\n",
"\n",
"Key: model.layers.0.post_attention_layernorm.weight\n",
" Shape: torch.Size([3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 3584 elements\n",
" First few elements: [0.28515625, 0.33203125, 0.259765625, 0.236328125, 0.296875]\n",
"\n",
"Key: model.layers.0.self_attn.kv_a_proj_with_mqa.bias\n",
" Shape: torch.Size([576])\n",
" Dtype: torch.bfloat16\n",
" Size: 576 elements\n",
" First few elements: [3.953125, 0.0634765625, 1.2578125, -1.515625, 0.29296875]\n",
"\n",
"Key: model.layers.0.self_attn.kv_a_proj_with_mqa.weight\n",
" Shape: torch.Size([576, 3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 2064384 elements\n",
" First few elements: [0.0322265625, -0.005157470703125, -0.03173828125, 0.0184326171875, -0.015625]\n",
"\n",
"Key: model.layers.0.self_attn.kv_b_proj.weight\n",
" Shape: torch.Size([7168, 512])\n",
" Dtype: torch.bfloat16\n",
" Size: 3670016 elements\n",
" First few elements: [-0.04931640625, -0.01904296875, 0.080078125, -0.01324462890625, 0.0179443359375]\n",
"\n",
"Key: model.layers.0.self_attn.o_proj.weight\n",
" Shape: torch.Size([3584, 3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 12845056 elements\n",
" First few elements: [0.00323486328125, -0.030029296875, -0.0069580078125, 0.0089111328125, 0.007568359375]\n",
"\n",
"Key: model.layers.0.self_attn.q_proj.bias\n",
" Shape: torch.Size([5376])\n",
" Dtype: torch.bfloat16\n",
" Size: 5376 elements\n",
" First few elements: [0.5, 2.140625, -0.98046875, 1.3671875, 1.015625]\n",
"\n",
"Key: model.layers.0.self_attn.q_proj.weight\n",
" Shape: torch.Size([5376, 3584])\n",
" Dtype: torch.bfloat16\n",
" Size: 19267584 elements\n",
" First few elements: [-0.0003452301025390625, -0.005340576171875, 0.021484375, 0.003997802734375, -0.00274658203125]\n"
]
}
],
"source": [
"from safetensors import safe_open\n",
"\n",
"def inspect_safetensors(file_path):\n",
" print(f\"Inspecting file: {file_path}\")\n",
" with safe_open(file_path, framework=\"pt\", device=\"cpu\") as f:\n",
" print(\"Available keys (tensor names):\")\n",
" for key in f.keys():\n",
" if 'model.layers.0' in key:\n",
" \n",
" tensor = f.get_tensor(key)\n",
" print(f\"\\nKey: {key}\")\n",
" print(f\" Shape: {tensor.shape}\")\n",
" print(f\" Dtype: {tensor.dtype}\")\n",
" print(f\" Size: {tensor.numel()} elements\")\n",
" # 可选:显示前几个元素\n",
" print(f\" First few elements: {tensor.flatten()[:5].tolist()}\")\n",
"\n",
"# 示例路径,请替换为你自己的 .safetensors 文件路径\n",
"file_path = \"model-00001-of-00004.safetensors\"\n",
"inspect_safetensors(file_path)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2a331ae6-f4ca-4d16-8bbf-36f39b9ac43e",
"metadata": {},
"outputs": [
{
"ename": "Exception",
"evalue": "data did not match any variant of untagged enum ModelWrapper at line 757455 column 3",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# model setting\u001b[39;00m\n\u001b[1;32m 6\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 8\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m VideoChatFlashQwenForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_path)\u001b[38;5;241m.\u001b[39mto(torch\u001b[38;5;241m.\u001b[39mbfloat16)\u001b[38;5;241m.\u001b[39mcuda()\n\u001b[1;32m 10\u001b[0m image_processor \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mget_vision_tower()\u001b[38;5;241m.\u001b[39mimage_processor\n",
"File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:837\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 833\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tokenizer_class \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 834\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 835\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTokenizer class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtokenizer_class_candidate\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not exist or is not currently imported.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 836\u001b[0m )\n\u001b[0;32m--> 837\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtokenizer_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 839\u001b[0m \u001b[38;5;66;03m# Otherwise we have to be creative.\u001b[39;00m\n\u001b[1;32m 840\u001b[0m \u001b[38;5;66;03m# if model is an encoder decoder, the encoder tokenizer class is used by default\u001b[39;00m\n\u001b[1;32m 841\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config, EncoderDecoderConfig):\n",
"File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2086\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m 2083\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2084\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloading file \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m from cache at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresolved_vocab_files[file_id]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2086\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2087\u001b[0m \u001b[43m \u001b[49m\u001b[43mresolved_vocab_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2088\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2089\u001b[0m \u001b[43m \u001b[49m\u001b[43minit_configuration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2090\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minit_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2091\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2092\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2093\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2094\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2095\u001b[0m \u001b[43m \u001b[49m\u001b[43m_is_local\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_local\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2096\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrust_remote_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2097\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2325\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase._from_pretrained\u001b[0;34m(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m 2323\u001b[0m \u001b[38;5;66;03m# Instantiate the tokenizer.\u001b[39;00m\n\u001b[1;32m 2324\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 2325\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minit_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minit_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2326\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m:\n\u001b[1;32m 2327\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[1;32m 2328\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to load vocabulary from file. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2329\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease check that the provided vocabulary is accessible and not corrupted.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2330\u001b[0m )\n",
"File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/models/qwen2/tokenization_qwen2_fast.py:129\u001b[0m, in \u001b[0;36mQwen2TokenizerFast.__init__\u001b[0;34m(self, vocab_file, merges_file, tokenizer_file, unk_token, bos_token, eos_token, pad_token, **kwargs)\u001b[0m\n\u001b[1;32m 118\u001b[0m unk_token \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 119\u001b[0m AddedToken(unk_token, lstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, rstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, special\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, normalized\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(unk_token, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m unk_token\n\u001b[1;32m 122\u001b[0m )\n\u001b[1;32m 123\u001b[0m pad_token \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 124\u001b[0m AddedToken(pad_token, lstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, rstrip\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, special\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, normalized\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pad_token, \u001b[38;5;28mstr\u001b[39m)\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m pad_token\n\u001b[1;32m 127\u001b[0m )\n\u001b[0;32m--> 129\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43mvocab_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[43m \u001b[49m\u001b[43mmerges_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[43m \u001b[49m\u001b[43mtokenizer_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtokenizer_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[43m \u001b[49m\u001b[43munk_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43munk_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[43m \u001b[49m\u001b[43mbos_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbos_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 135\u001b[0m \u001b[43m \u001b[49m\u001b[43meos_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meos_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 136\u001b[0m \u001b[43m \u001b[49m\u001b[43mpad_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpad_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 137\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 138\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/conda/envs/vcflash/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py:111\u001b[0m, in \u001b[0;36mPreTrainedTokenizerFast.__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m fast_tokenizer \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(tokenizer_object)\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m fast_tokenizer_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m from_slow:\n\u001b[1;32m 110\u001b[0m \u001b[38;5;66;03m# We have a serialization from tokenizers which let us directly build the backend\u001b[39;00m\n\u001b[0;32m--> 111\u001b[0m fast_tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mTokenizerFast\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfast_tokenizer_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m slow_tokenizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# We need to convert a slow tokenizer to build the backend\u001b[39;00m\n\u001b[1;32m 114\u001b[0m fast_tokenizer \u001b[38;5;241m=\u001b[39m convert_slow_tokenizer(slow_tokenizer)\n",
"\u001b[0;31mException\u001b[0m: data did not match any variant of untagged enum ModelWrapper at line 757455 column 3"
]
}
],
"source": [
"from transformers import AutoModel, AutoTokenizer\n",
"import torch\n",
"from modeling_videochat_flash import VideoChatFlashQwenForCausalLM\n",
"\n",
"# model setting\n",
"model_path = './'\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n",
"model = VideoChatFlashQwenForCausalLM.from_pretrained(model_path).to(torch.bfloat16).cuda()\n",
"image_processor = model.get_vision_tower().image_processor\n",
"\n",
"mm_llm_compress = False # use the global compress or not\n",
"if mm_llm_compress:\n",
" model.config.mm_llm_compress = True\n",
" model.config.llm_compress_type = \"uniform0_attention\"\n",
" model.config.llm_compress_layer_list = [4, 18]\n",
" model.config.llm_image_token_ratio_list = [1, 0.75, 0.25]\n",
"else:\n",
" model.config.mm_llm_compress = False\n",
"\n",
"# evaluation setting\n",
"max_num_frames = 512\n",
"generation_config = dict(\n",
" do_sample=False,\n",
" temperature=0.0,\n",
" max_new_tokens=1024,\n",
" top_p=0.1,\n",
" num_beams=1\n",
")\n",
"\n",
"video_path = \"test.mp4\"\n",
"\n",
"# single-turn conversation\n",
"question1 = \"Describe this video in detail.\"\n",
"output1, chat_history = model.chat(video_path=video_path, tokenizer=tokenizer, user_prompt=question1, return_history=True, max_num_frames=max_num_frames, generation_config=generation_config)\n",
"\n",
"print(output1)\n",
"\n",
"# # multi-turn conversation\n",
"# question2 = \"How many people appear in the video?\"\n",
"# output2, chat_history = model.chat(video_path=video_path, tokenizer=tokenizer, user_prompt=question2, chat_history=chat_history, return_history=True, max_num_frames=max_num_frames, generation_config=generation_config)\n",
"\n",
"# print(output2)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bfd7429-5d77-42ad-8e3f-9cf37b25dfc7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "vcflash",
"language": "python",
"name": "vcflash"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|