File size: 12,140 Bytes
76abf0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 |
#!/usr/bin/env python3
"""
Test script to verify DotsVLMForCausalLM model loading
"""
import os
import json
import shutil
import torch
from transformers import AutoConfig, AutoProcessor
from .configuration_dots_vlm import DotsVLMConfig, DotsVLMProcessor
from .modeling_dots_vlm import DotsVLMForCausalLM
import traceback
def create_debug_model_path(original_path, debug_layers=3):
"""
创建一个debug版本的模型路径,只包含指定层数的配置和权重
Args:
original_path: 原始模型路径
debug_layers: debug模式下使用的层数
Returns:
debug模型的临时路径
"""
debug_path = original_path + "_debug_temp"
# 如果debug目录已存在,先删除
# if os.path.exists(debug_path):
# shutil.rmtree(debug_path)
# 创建debug目录
os.makedirs(debug_path, exist_ok=True)
# 复制基本配置文件
files_to_copy = [
'tokenizer_config.json',
'tokenizer.json',
'special_tokens_map.json',
'preprocessor_config.json',
'modeling_deepseek.py',
'configuration_deepseek.py'
]
for file in files_to_copy:
src = os.path.join(original_path, file)
dst = os.path.join(debug_path, file)
if os.path.exists(src):
if not os.path.exists(dst):
print(f"📁 Copying basefile {file}... from {src} to {dst}")
shutil.copy2(src, dst)
# # 复制ve目录(vision encoder)
# ve_src = os.path.join(original_path, 've')
# ve_dst = os.path.join(debug_path, 've')
# if os.path.exists(ve_src):
# shutil.copytree(ve_src, ve_dst)
# 修改config.json - 减少层数
config_src = os.path.join(original_path, 'config.json')
config_dst = os.path.join(debug_path, 'config.json')
with open(config_src, 'r') as f:
config = json.load(f)
# 修改为debug层数
original_layers = config['num_hidden_layers']
config['num_hidden_layers'] = debug_layers
print(f"🔧 DEBUG: Reducing num_hidden_layers from {original_layers} to {debug_layers}")
with open(config_dst, 'w') as f:
json.dump(config, f, indent=2)
# 读取原始safetensor索引
index_src = os.path.join(original_path, 'model.safetensors.index.json')
with open(index_src, 'r') as f:
index_data = json.load(f)
original_weight_map = index_data['weight_map']
# 确定需要的safetensor文件
# 前8个文件:包含嵌入层和前几层
front_files = [f"model-{i:05d}-of-00316.safetensors" for i in range(1, 9)]
# 第2层的权重在model-00056中,需要特别包含
layer2_files = ["model-00056-of-00316.safetensors"]
# 后2个文件:包含最后的输出层和vision_tower
back_files = ["model-00315-of-00316.safetensors", "model-00316-of-00316.safetensors"]
# model.norm所在文件
norm_files = ["model-00314-of-00316.safetensors"]
needed_files = set(front_files + layer2_files + back_files + norm_files)
print(f"🔧 DEBUG: Will load {len(needed_files)} safetensor files instead of 316:")
for f in sorted(needed_files):
print(f" - {f}")
# 过滤权重映射,只保留需要的层和基础组件
new_weight_map = {}
# 保留嵌入层和输出层
for key, file in original_weight_map.items():
# 保留基础组件
if any(key.startswith(prefix) for prefix in [
'model.embed_tokens',
'model.norm',
'lm_head'
]):
if file in needed_files:
new_weight_map[key] = file
# 保留vision_tower权重(这些在model-00315和model-00316中)
elif key.startswith('vision_tower.'):
if file in needed_files:
new_weight_map[key] = file
# 只保留前debug_layers层
elif key.startswith('model.layers.'):
# 提取层号
layer_parts = key.split('.')
if len(layer_parts) >= 3 and layer_parts[2].isdigit():
layer_num = int(layer_parts[2])
if layer_num < debug_layers and file in needed_files:
new_weight_map[key] = file
print(f"🔧 DEBUG: Filtered weight map from {len(original_weight_map)} to {len(new_weight_map)} entries")
# 复制需要的safetensor文件
copied_files = set()
for file in new_weight_map.values():
if file not in copied_files:
src_file = os.path.join(original_path, file)
dst_file = os.path.join(debug_path, file)
if os.path.exists(src_file):
if not os.path.exists(dst_file):
print(f"📁 Copying Safetensor {file}... from {src_file} to {dst_file}")
shutil.copy2(src_file, dst_file)
copied_files.add(file)
else:
print(f"⚠️ File not found: {src_file}")
# 创建新的索引文件
new_index_data = {
"metadata": index_data.get("metadata", {}),
"weight_map": new_weight_map
}
index_dst = os.path.join(debug_path, 'model.safetensors.index.json')
with open(index_dst, 'w') as f:
json.dump(new_index_data, f, indent=2)
print(f"✅ DEBUG: Created debug model at {debug_path}")
return debug_path
def test_model_loading():
"""Test loading the model from pretrained weights"""
# Path to your model weights
model_path = "."
# Check if DEBUG mode is enabled
debug_minimal_layers = os.getenv('DEBUG_MINIMAL_LAYERS', '0') == '1'
if debug_minimal_layers:
print("🔧 DEBUG MINIMAL LAYERS: Using only 3 layers and minimal safetensors")
# 创建debug版本的模型
model_path = create_debug_model_path(model_path, debug_layers=3)
print("Loading model configuration...")
try:
config = AutoConfig.from_pretrained(model_path)
print(f"✓ Config loaded successfully: {config.__class__.__name__}")
print(f" Model type: {config.model_type}")
print(f" Architecture: {config.architectures}")
print(f" Number of hidden layers: {config.num_hidden_layers}")
# Check if quantization config exists and is fp8
if hasattr(config, 'quantization_config') and config.quantization_config is not None:
quant_config = config.quantization_config
if isinstance(quant_config, dict) and quant_config.get('quant_method') == 'fp8':
print(" Detected FP8 quantization configuration")
print(f" Format: {quant_config.get('fmt', 'unknown')}")
print(f" Weight block size: {quant_config.get('weight_block_size', 'unknown')}")
print(f" Activation scheme: {quant_config.get('activation_scheme', 'unknown')}")
except Exception as e:
print(f"✗ Error loading config: {e}")
return False
print("\nTesting processor loading...")
try:
# Test if processor can be loaded
try:
processor = AutoProcessor.from_pretrained(model_path)
print(f"✓ AutoProcessor loaded successfully: {processor.__class__.__name__}")
except Exception as e:
print(f"⚠️ AutoProcessor failed, trying direct import: {e}")
# Fallback to direct processor creation
processor = DotsVLMProcessor.from_pretrained(model_path)
print(f"✓ Direct processor loaded successfully: {processor.__class__.__name__}")
# Check processor attributes
if hasattr(processor, 'image_token'):
print(f" Image token: {processor.image_token}")
if hasattr(processor, 'tokenizer'):
print(f" Tokenizer type: {processor.tokenizer.__class__.__name__}")
if hasattr(processor, 'image_processor'):
print(f" Image processor type: {processor.image_processor.__class__.__name__}")
except Exception as e:
print(f"⚠️ Processor loading failed (this is OK for now): {e}")
processor = None
print("\nLoading model with auto-fixing vision_tower quantization...")
try:
# Use the custom model class directly for elegant handling
model = DotsVLMForCausalLM.from_pretrained(
model_path,
config=config,
torch_dtype="auto",
device_map="auto",
trust_remote_code=True
)
print(f"✓ Model loaded successfully: {model.__class__.__name__}")
print(f" Number of parameters: {model.num_parameters():,}")
# Check vision_tower dtype
if hasattr(model, 'vision_tower') and model.vision_tower is not None:
vision_sample_param = list(model.vision_tower.parameters())[0]
print(f" Vision tower dtype: {vision_sample_param.dtype}")
except Exception as e:
traceback.print_exc()
print(f"✗ Error loading model: {e}")
return False
# print("\nLoading vision weights...")
# try:
# vision_weights_path = f"{model_path}/ve/visual.pt"
# vision_state_dict = torch.load(vision_weights_path, map_location='cpu', weights_only=False)
# # Extract vision_encoder weights and remove vision_tower prefix
# vision_encoder_weights = vision_state_dict['vision_encoder']
# cleaned_weights = {}
# for key, value in vision_encoder_weights.items():
# if key.startswith('vision_tower.'):
# new_key = key[len('vision_tower.'):] # Remove 'vision_tower.' prefix
# cleaned_weights[new_key] = value
# else:
# cleaned_weights[key] = value
# # Convert weights to match the vision_tower dtype
# target_dtype = list(model.vision_tower.parameters())[0].dtype
# for key, value in cleaned_weights.items():
# if value.dtype != target_dtype and value.dtype in [torch.float32, torch.float16, torch.bfloat16]:
# cleaned_weights[key] = value.to(target_dtype)
# # Use assign=True to avoid meta tensor copying warnings
# model.vision_tower.load_state_dict(cleaned_weights, assign=True, strict=False)
# print("✓ Vision weights loaded successfully")
# print(f" Vision tower final dtype: {list(model.vision_tower.parameters())[0].dtype}")
# except Exception as e:
# print(f"✗ Error loading vision weights: {e}")
# return False
print("\nTesting model forward pass...")
try:
# Full model forward pass
batch_size = 1
seq_len = 10
input_ids = torch.randint(0, 1000, (batch_size, seq_len))
# Move inputs to the same device as the model
if hasattr(model, 'device'):
device = next(model.parameters()).device
input_ids = input_ids.to(device)
with torch.no_grad():
outputs = model(input_ids=input_ids)
print(f"✓ Forward pass successful")
print(f" Output shape: {outputs.logits.shape}")
print(f" Output dtype: {outputs.logits.dtype}")
except Exception as e:
print(f"✗ Error in forward pass: {e}")
traceback.print_exc()
return False
# 清理临时debug目录
if debug_minimal_layers and model_path.endswith("_debug_temp"):
try:
# shutil.rmtree(model_path)
print(f"🧹 Cleaned up temporary debug directory: {model_path}")
except Exception as e:
print(f"⚠️ Failed to clean up debug directory: {e}")
elif debug_minimal_layers:
print("\n🎉 DEBUG MINIMAL LAYERS: 3-layer model test completed!")
print("💡 To test with full 61-layer model, run without DEBUG_MINIMAL_LAYERS=1")
else:
print("\n🎉 All tests passed! Model is working correctly.")
return True
if __name__ == "__main__":
test_model_loading() |