File size: 1,133 Bytes
64ff90d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# processing_opencua.py
from transformers import Qwen2_5_VLProcessor, AutoTokenizer, AutoImageProcessor

class OpenCUAProcessor(Qwen2_5_VLProcessor):
    # 用字符串就行,但我们会在 from_pretrained 里手动加载,避免字符串反射
    tokenizer_class = "TikTokenV3"

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        # 确保 remote code 可用
        trust_remote_code = kwargs.get("trust_remote_code", False)

        # 1) 手动加载 tokenizer(会按模型目录里的 tokenizer_config.json -> TikTokenV3 + tokenization_opencua.py)
        tokenizer = AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
        )

        # 2) 手动加载图像处理器(保持 Qwen2VLImageProcessor)
        image_processor = AutoImageProcessor.from_pretrained(
            pretrained_model_name_or_path,
            trust_remote_code=trust_remote_code,
        )

        # 3) 构造并返回 Qwen2.5-VL 的 Processor 实例
        return cls(image_processor=image_processor, tokenizer=tokenizer)