Commit
·
409338f
1
Parent(s):
131865f
feat: adapter switching
Browse filesSigned-off-by: jupyterjazz <[email protected]>
- {adapter → adapters/old_retrieval}/adapter_config.json +1 -1
- {adapter → adapters/old_retrieval}/adapter_model.safetensors +0 -0
- adapters/retrieval/adapter_config.json +31 -0
- adapters/retrieval/adapter_model.safetensors +3 -0
- adapters/text-matching/adapter_config.json +26 -0
- adapters/text-matching/adapter_model.safetensors +3 -0
- config.json +1 -1
- modeling_colqwen_duo.py +59 -21
{adapter → adapters/old_retrieval}/adapter_config.json
RENAMED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 32,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(
|
23 |
"task_type": "FEATURE_EXTRACTION",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 32,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
|
23 |
"task_type": "FEATURE_EXTRACTION",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
{adapter → adapters/old_retrieval}/adapter_model.safetensors
RENAMED
File without changes
|
adapters/retrieval/adapter_config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "jinaai/colqwen25-duo-base",
|
5 |
+
"bias": "none",
|
6 |
+
"corda_config": null,
|
7 |
+
"eva_config": null,
|
8 |
+
"exclude_modules": null,
|
9 |
+
"fan_in_fan_out": false,
|
10 |
+
"inference_mode": true,
|
11 |
+
"init_lora_weights": "gaussian",
|
12 |
+
"layer_replication": null,
|
13 |
+
"layers_pattern": null,
|
14 |
+
"layers_to_transform": null,
|
15 |
+
"loftq_config": {},
|
16 |
+
"lora_alpha": 32,
|
17 |
+
"lora_bias": false,
|
18 |
+
"lora_dropout": 0.1,
|
19 |
+
"megatron_config": null,
|
20 |
+
"megatron_core": "megatron.core",
|
21 |
+
"modules_to_save": null,
|
22 |
+
"peft_type": "LORA",
|
23 |
+
"r": 32,
|
24 |
+
"rank_pattern": {},
|
25 |
+
"revision": null,
|
26 |
+
"target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
|
27 |
+
"task_type": "FEATURE_EXTRACTION",
|
28 |
+
"trainable_token_indices": null,
|
29 |
+
"use_dora": false,
|
30 |
+
"use_rslora": false
|
31 |
+
}
|
adapters/retrieval/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23759be51c8b6d9d744ec73ec7e0801b1a1d56ba61c201b4c67ab11a170cd6e0
|
3 |
+
size 120138416
|
adapters/text-matching/adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "jinaai/colqwen25-duo-base",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": "gaussian",
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 32,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
|
23 |
+
"task_type": "FEATURE_EXTRACTION",
|
24 |
+
"use_dora": false,
|
25 |
+
"use_rslora": false
|
26 |
+
}
|
adapters/text-matching/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3677815cef695c54aae2358c574c046d6d9a5787fd96ca457ee00ac656576985
|
3 |
+
size 120138416
|
config.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
],
|
7 |
"auto_map": {
|
8 |
"AutoConfig": "configuration_colqwen_duo.ColQwen25DuoConfig",
|
9 |
-
"AutoModel": "modeling_colqwen_duo.
|
10 |
},
|
11 |
"attention_dropout": 0.0,
|
12 |
"bos_token_id": 151643,
|
|
|
6 |
],
|
7 |
"auto_map": {
|
8 |
"AutoConfig": "configuration_colqwen_duo.ColQwen25DuoConfig",
|
9 |
+
"AutoModel": "modeling_colqwen_duo.JinaEmbeddingsV4Model"
|
10 |
},
|
11 |
"attention_dropout": 0.0,
|
12 |
"bos_token_id": 151643,
|
modeling_colqwen_duo.py
CHANGED
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
|
|
6 |
from dataclasses import dataclass
|
7 |
from typing import Any, Callable, ClassVar, Dict, List, Optional, Union, cast
|
8 |
from typing_extensions import Unpack
|
9 |
-
|
10 |
import torch
|
11 |
from torch import nn
|
12 |
from torch.utils.data import DataLoader
|
@@ -15,6 +15,7 @@ from functools import partial
|
|
15 |
from PIL import Image
|
16 |
from tqdm import tqdm
|
17 |
from enum import Enum
|
|
|
18 |
|
19 |
from transformers import BatchEncoding, BatchFeature
|
20 |
|
@@ -642,32 +643,69 @@ class ColQwenDuoBase(AbstractHybridModel, QwenVLEmbeddingBase):
|
|
642 |
)
|
643 |
|
644 |
|
645 |
-
class
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
self._init_projection_layers(config)
|
650 |
-
self.post_init()
|
651 |
-
self.processor = ColQwen25DuoProcessor.from_pretrained(self.name_or_path, trust_remote_code=True)
|
652 |
|
|
|
|
|
|
|
|
|
653 |
@classmethod
|
654 |
def from_pretrained(
|
655 |
cls,
|
|
|
656 |
*args,
|
657 |
**kwargs,
|
658 |
):
|
659 |
-
if
|
660 |
kwargs["torch_dtype"] = "auto"
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from dataclasses import dataclass
|
7 |
from typing import Any, Callable, ClassVar, Dict, List, Optional, Union, cast
|
8 |
from typing_extensions import Unpack
|
9 |
+
from peft import LoraConfig, PeftModel
|
10 |
import torch
|
11 |
from torch import nn
|
12 |
from torch.utils.data import DataLoader
|
|
|
15 |
from PIL import Image
|
16 |
from tqdm import tqdm
|
17 |
from enum import Enum
|
18 |
+
from peft.utils.hotswap import hotswap_adapter
|
19 |
|
20 |
from transformers import BatchEncoding, BatchFeature
|
21 |
|
|
|
643 |
)
|
644 |
|
645 |
|
646 |
+
class JinaEmbeddingsV4Model:
|
647 |
+
"""
|
648 |
+
Wrapper class for ColQwen25Duo that handles the loading of models and adapters.
|
649 |
+
"""
|
|
|
|
|
|
|
650 |
|
651 |
+
def __init__(self, model, adapter_dir):
|
652 |
+
self.model = model
|
653 |
+
self.adapter_dir = adapter_dir
|
654 |
+
|
655 |
@classmethod
|
656 |
def from_pretrained(
|
657 |
cls,
|
658 |
+
pretrained_model_name_or_path,
|
659 |
*args,
|
660 |
**kwargs,
|
661 |
):
|
662 |
+
if "torch_dtype" not in kwargs:
|
663 |
kwargs["torch_dtype"] = "auto"
|
664 |
+
|
665 |
+
task = kwargs.pop('task', 'retrieval')
|
666 |
+
|
667 |
+
model = ColQwen25Duo.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
|
668 |
+
|
669 |
+
if os.path.isdir(model.name_or_path):
|
670 |
+
adapter_dir = os.path.join(model.name_or_path, 'adapters')
|
671 |
+
else:
|
672 |
+
adapter_cache_path = snapshot_download(
|
673 |
+
repo_id=model.name_or_path,
|
674 |
+
allow_patterns=['adapters/*']
|
675 |
+
)
|
676 |
+
adapter_dir = os.path.join(adapter_cache_path, 'adapters')
|
677 |
+
model = PeftModel.from_pretrained(model, os.path.join(adapter_dir, task))
|
678 |
+
je_v4_model = cls(model, adapter_dir)
|
679 |
+
|
680 |
+
return je_v4_model
|
681 |
+
|
682 |
+
def set_task(self, task: str):
|
683 |
+
"""
|
684 |
+
Set the task adapter for the model.
|
685 |
+
|
686 |
+
Args:
|
687 |
+
task (str): The task name. Must be one of ['retrieval', 'text-matching', 'code']
|
688 |
+
"""
|
689 |
+
if task not in ['retrieval', 'text-matching', 'code']:
|
690 |
+
raise ValueError(f"Invalid task: {task}. Must be one of ['retrieval', 'text-matching', 'code']")
|
691 |
+
|
692 |
+
adapter_path = os.path.join(self.adapter_dir, task)
|
693 |
+
hotswap_adapter(self.model, adapter_path, adapter_name='default')
|
694 |
+
|
695 |
+
def __getattr__(self, name):
|
696 |
+
"""
|
697 |
+
Delegate attribute access to the underlying model.
|
698 |
+
"""
|
699 |
+
if hasattr(self.model, name):
|
700 |
+
return getattr(self.model, name)
|
701 |
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
|
702 |
+
|
703 |
|
704 |
+
class ColQwen25Duo(ColQwenDuoBase, Qwen2_5_VLForConditionalGeneration):
|
705 |
+
config_class = ColQwen25DuoConfig
|
706 |
+
def __init__(self, config: ColQwen25DuoConfig):
|
707 |
+
Qwen2_5_VLForConditionalGeneration.__init__(self, config)
|
708 |
+
self._init_projection_layers(config)
|
709 |
+
self.post_init()
|
710 |
+
self.processor = ColQwen25DuoProcessor.from_pretrained(self.name_or_path, trust_remote_code=True)
|
711 |
+
|