# processing_opencua.py from transformers import Qwen2_5_VLProcessor, AutoTokenizer, AutoImageProcessor class OpenCUAProcessor(Qwen2_5_VLProcessor): # 用字符串就行,但我们会在 from_pretrained 里手动加载,避免字符串反射 tokenizer_class = "TikTokenV3" @classmethod def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): # 确保 remote code 可用 trust_remote_code = kwargs.get("trust_remote_code", False) # 1) 手动加载 tokenizer(会按模型目录里的 tokenizer_config.json -> TikTokenV3 + tokenization_opencua.py) tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path, trust_remote_code=trust_remote_code, ) # 2) 手动加载图像处理器(保持 Qwen2VLImageProcessor) image_processor = AutoImageProcessor.from_pretrained( pretrained_model_name_or_path, trust_remote_code=trust_remote_code, ) # 3) 构造并返回 Qwen2.5-VL 的 Processor 实例 return cls(image_processor=image_processor, tokenizer=tokenizer)