OpenCUA-7B-vllm / processing_opencua.py
zhiyuanhucs's picture
Upload model files
64ff90d verified
# processing_opencua.py
from transformers import Qwen2_5_VLProcessor, AutoTokenizer, AutoImageProcessor
class OpenCUAProcessor(Qwen2_5_VLProcessor):
# 用字符串就行,但我们会在 from_pretrained 里手动加载,避免字符串反射
tokenizer_class = "TikTokenV3"
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
# 确保 remote code 可用
trust_remote_code = kwargs.get("trust_remote_code", False)
# 1) 手动加载 tokenizer(会按模型目录里的 tokenizer_config.json -> TikTokenV3 + tokenization_opencua.py)
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
)
# 2) 手动加载图像处理器(保持 Qwen2VLImageProcessor)
image_processor = AutoImageProcessor.from_pretrained(
pretrained_model_name_or_path,
trust_remote_code=trust_remote_code,
)
# 3) 构造并返回 Qwen2.5-VL 的 Processor 实例
return cls(image_processor=image_processor, tokenizer=tokenizer)