hermeschen-ezcon commited on
Commit
d8c3062
·
verified ·
1 Parent(s): 1d9f9c5

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,19 +1,23 @@
1
  ---
2
- license: apache-2.0
3
  language:
4
  - en
 
5
  pipeline_tag: image-text-to-text
 
6
  tags:
7
  - multimodal
 
 
 
 
 
8
  - mlx
9
- library_name: transformers
10
- base_model:
11
- - Qwen/Qwen2-VL-2B
12
  ---
13
 
14
  # EZCon/Qwen2-VL-2B-Instruct-8bit-mlx
15
- This model was converted to MLX format from [`Qwen/Qwen2-VL-2B-Instruct`]() using mlx-vlm version **0.3.2**.
16
- Refer to the [original model card](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) for more details on the model.
17
  ## Use with mlx
18
 
19
  ```bash
 
1
  ---
2
+ base_model: Qwen/Qwen2-VL-2B-Instruct
3
  language:
4
  - en
5
+ library_name: transformers
6
  pipeline_tag: image-text-to-text
7
+ license: apache-2.0
8
  tags:
9
  - multimodal
10
+ - qwen
11
+ - qwen2
12
+ - unsloth
13
+ - transformers
14
+ - vision
15
  - mlx
 
 
 
16
  ---
17
 
18
  # EZCon/Qwen2-VL-2B-Instruct-8bit-mlx
19
+ This model was converted to MLX format from [`unsloth/Qwen2-VL-2B-Instruct`]() using mlx-vlm version **0.3.2**.
20
+ Refer to the [original model card](https://huggingface.co/unsloth/Qwen2-VL-2B-Instruct) for more details on the model.
21
  ## Use with mlx
22
 
23
  ```bash
chat_template.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
- }
 
1
  {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_dropout": 0.0,
7
  "bad_words_ids": null,
8
  "begin_suppress_tokens": null,
9
- "bos_token_id": 151643,
10
  "chunk_size_feed_forward": 0,
11
  "cross_attention_hidden_size": null,
12
  "decoder_start_token_id": null,
@@ -50,7 +50,7 @@
50
  "output_attentions": false,
51
  "output_hidden_states": false,
52
  "output_scores": false,
53
- "pad_token_id": null,
54
  "prefix": null,
55
  "problem_type": null,
56
  "pruned_heads": {},
@@ -1036,13 +1036,13 @@
1036
  "return_dict_in_generate": false,
1037
  "rms_norm_eps": 1e-06,
1038
  "rope_scaling": {
1039
- "type": "default",
1040
  "mrope_section": [
1041
  16,
1042
  24,
1043
  24
1044
  ],
1045
- "rope_type": "default"
 
1046
  },
1047
  "rope_theta": 1000000.0,
1048
  "sep_token_id": null,
@@ -1068,13 +1068,13 @@
1068
  "rope_theta": 1000000.0,
1069
  "attention_dropout": 0.0,
1070
  "rope_scaling": {
1071
- "type": "default",
1072
  "mrope_section": [
1073
  16,
1074
  24,
1075
  24
1076
  ],
1077
- "rope_type": "default"
 
1078
  },
1079
  "layer_types": [
1080
  "full_attention",
@@ -1136,8 +1136,8 @@
1136
  "problem_type": null,
1137
  "tokenizer_class": null,
1138
  "prefix": null,
1139
- "bos_token_id": 151643,
1140
- "pad_token_id": null,
1141
  "eos_token_id": 151645,
1142
  "sep_token_id": null,
1143
  "decoder_start_token_id": null,
@@ -1166,11 +1166,12 @@
1166
  "exponential_decay_length_penalty": null,
1167
  "suppress_tokens": null,
1168
  "begin_suppress_tokens": null,
1169
- "_name_or_path": "",
1170
- "vision_start_token_id": 151652,
 
1171
  "vision_end_token_id": 151653,
 
1172
  "vision_token_id": 151654,
1173
- "model_type": "qwen2_vl_text",
1174
  "tf_legacy_loss": false,
1175
  "use_bfloat16": false,
1176
  "output_attentions": false
@@ -1184,6 +1185,7 @@
1184
  "torchscript": false,
1185
  "transformers_version": "4.55.0.dev0",
1186
  "typical_p": 1.0,
 
1187
  "use_bfloat16": false,
1188
  "use_cache": true,
1189
  "use_sliding_window": false,
@@ -1192,7 +1194,7 @@
1192
  "return_dict": true,
1193
  "output_hidden_states": false,
1194
  "torchscript": false,
1195
- "torch_dtype": null,
1196
  "pruned_heads": {},
1197
  "tie_word_embeddings": true,
1198
  "chunk_size_feed_forward": 0,
@@ -1247,6 +1249,7 @@
1247
  "begin_suppress_tokens": null,
1248
  "_name_or_path": "",
1249
  "in_chans": 3,
 
1250
  "spatial_patch_size": 14,
1251
  "tf_legacy_loss": false,
1252
  "use_bfloat16": false,
@@ -1261,7 +1264,6 @@
1261
  "spatial_merge_size": 2,
1262
  "temporal_patch_size": 2,
1263
  "initializer_range": 0.02,
1264
- "model_type": "qwen2_vl",
1265
  "output_attentions": false
1266
  },
1267
  "vision_end_token_id": 151653,
 
6
  "attention_dropout": 0.0,
7
  "bad_words_ids": null,
8
  "begin_suppress_tokens": null,
9
+ "bos_token_id": null,
10
  "chunk_size_feed_forward": 0,
11
  "cross_attention_hidden_size": null,
12
  "decoder_start_token_id": null,
 
50
  "output_attentions": false,
51
  "output_hidden_states": false,
52
  "output_scores": false,
53
+ "pad_token_id": 151654,
54
  "prefix": null,
55
  "problem_type": null,
56
  "pruned_heads": {},
 
1036
  "return_dict_in_generate": false,
1037
  "rms_norm_eps": 1e-06,
1038
  "rope_scaling": {
 
1039
  "mrope_section": [
1040
  16,
1041
  24,
1042
  24
1043
  ],
1044
+ "rope_type": "default",
1045
+ "type": "default"
1046
  },
1047
  "rope_theta": 1000000.0,
1048
  "sep_token_id": null,
 
1068
  "rope_theta": 1000000.0,
1069
  "attention_dropout": 0.0,
1070
  "rope_scaling": {
 
1071
  "mrope_section": [
1072
  16,
1073
  24,
1074
  24
1075
  ],
1076
+ "rope_type": "default",
1077
+ "type": "default"
1078
  },
1079
  "layer_types": [
1080
  "full_attention",
 
1136
  "problem_type": null,
1137
  "tokenizer_class": null,
1138
  "prefix": null,
1139
+ "bos_token_id": null,
1140
+ "pad_token_id": 151654,
1141
  "eos_token_id": 151645,
1142
  "sep_token_id": null,
1143
  "decoder_start_token_id": null,
 
1166
  "exponential_decay_length_penalty": null,
1167
  "suppress_tokens": null,
1168
  "begin_suppress_tokens": null,
1169
+ "_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
1170
+ "model_type": "qwen2_vl_text",
1171
+ "unsloth_fixed": true,
1172
  "vision_end_token_id": 151653,
1173
+ "vision_start_token_id": 151652,
1174
  "vision_token_id": 151654,
 
1175
  "tf_legacy_loss": false,
1176
  "use_bfloat16": false,
1177
  "output_attentions": false
 
1185
  "torchscript": false,
1186
  "transformers_version": "4.55.0.dev0",
1187
  "typical_p": 1.0,
1188
+ "unsloth_fixed": true,
1189
  "use_bfloat16": false,
1190
  "use_cache": true,
1191
  "use_sliding_window": false,
 
1194
  "return_dict": true,
1195
  "output_hidden_states": false,
1196
  "torchscript": false,
1197
+ "torch_dtype": "bfloat16",
1198
  "pruned_heads": {},
1199
  "tie_word_embeddings": true,
1200
  "chunk_size_feed_forward": 0,
 
1249
  "begin_suppress_tokens": null,
1250
  "_name_or_path": "",
1251
  "in_chans": 3,
1252
+ "model_type": "qwen2_vl",
1253
  "spatial_patch_size": 14,
1254
  "tf_legacy_loss": false,
1255
  "use_bfloat16": false,
 
1264
  "spatial_merge_size": 2,
1265
  "temporal_patch_size": 2,
1266
  "initializer_range": 0.02,
 
1267
  "output_attentions": false
1268
  },
1269
  "vision_end_token_id": 151653,
generation_config.json CHANGED
@@ -1,12 +1,14 @@
1
  {
2
- "cache_implementation": "offloaded_hybrid_chunked",
3
- "eos_token_id": 151645,
4
- "forced_eos_token_id": 151645,
5
- "low_memory": true,
6
- "max_new_tokens": 8,
7
- "min_new_tokens": 5,
8
- "pad_token_id": 151643,
9
- "remove_invalid_values": true,
10
- "renormalize_logits": true,
11
- "transformers_version": "4.55.0.dev0"
 
 
12
  }
 
1
  {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "max_length": 32768,
9
+ "pad_token_id": 151654,
10
+ "temperature": 0.01,
11
+ "top_k": 1,
12
+ "top_p": 0.001,
13
+ "transformers_version": "4.49.0"
14
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21326527da58f616b6ca7e40b5866baee4e16c5375bc33473d86aa15130e922d
3
- size 2971011425
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:641709b32bcc69c5c56c0ba31cc688ed118ecb626697dcbaa2f841135bc3db6b
3
+ size 2971011801
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
25
+ "content": "<|vision_pad|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -135,7 +135,7 @@
135
  "errors": "replace",
136
  "extra_special_tokens": {},
137
  "model_max_length": 32768,
138
- "pad_token": "<|endoftext|>",
139
  "padding_side": "left",
140
  "processor_class": "Qwen2VLProcessor",
141
  "split_special_tokens": false,
 
135
  "errors": "replace",
136
  "extra_special_tokens": {},
137
  "model_max_length": 32768,
138
+ "pad_token": "<|vision_pad|>",
139
  "padding_side": "left",
140
  "processor_class": "Qwen2VLProcessor",
141
  "split_special_tokens": false,