tc-mb commited on Aug 5

Commit

f174180

0 Parent(s):

Initial commit: MiniCPM-V-4-gguf model

Browse files

Files changed (20) hide show

.DS_Store +0 -0
.gitattributes +47 -0
README.md +649 -0
ane_minicpmv4_vit_f16.mlmodelc.zip +3 -0
ane_minicpmv4_vit_f16.mlmodelc/analytics/coremldata.bin +3 -0
ane_minicpmv4_vit_f16.mlmodelc/coremldata.bin +3 -0
ane_minicpmv4_vit_f16.mlmodelc/metadata.json +85 -0
ane_minicpmv4_vit_f16.mlmodelc/model.mil +0 -0
ane_minicpmv4_vit_f16.mlmodelc/weights/weight.bin +3 -0
ggml-model-Q4_0.gguf +3 -0
ggml-model-Q4_1.gguf +3 -0
ggml-model-Q4_K_M.gguf +3 -0
ggml-model-Q4_K_S.gguf +3 -0
ggml-model-Q5_0.gguf +3 -0
ggml-model-Q5_1.gguf +3 -0
ggml-model-Q5_K_M.gguf +3 -0
ggml-model-Q5_K_S.gguf +3 -0
ggml-model-Q6_K.gguf +3 -0
ggml-model-Q8_0.gguf +3 -0
mmproj-model-f16.gguf +3 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitattributes ADDED Viewed

	@@ -0,0 +1,47 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Model-3.6B-F16.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
+ggml-model-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,649 @@

+---
+pipeline_tag: image-text-to-text
+datasets:
+- openbmb/RLAIF-V-Dataset
+library_name: transformers
+language:
+- multilingual
+tags:
+- minicpm-v
+- vision
+- ocr
+- multi-image
+- video
+- custom_code
+---
+<h1>A GPT-4V Level MLLM for Single Image, Multi Image and Video on Your Phone</h1>
+[GitHub](https://github.com/OpenBMB/MiniCPM-o) | [Demo](https://minicpm-v.openbmb.cn/)</a>
+## MiniCPM-V 4.0
+**MiniCPM-V 4.0** is the latest model in the MiniCPM-V series. The model is built in an end-to-end fasion based on SigLip2-400M and MiniCPM4-3B with a total of 4.1B parameters. It inherents the strong single-image, multi-image and video understanding performance of MiniCPM-V 2.6 with the efficiency largely improved. Notable features of MiniCPM-V 4.0 include:
+- 🔥 **Leading Visual Capability.**
+  MiniCPM-V 4.0 achieves an average score of 69.0 on OpenCompass, a comprehensive evaluation of 8 popular benchmarks, outperforming both MiniCPM-V 2.6 (8.1B, 65.2) and Qwen2.5-VL-3B-Instruct (3.8B, 64.5). **With only 4.1B parameters, it surpasses the widely used proprietary model GPT-4.1-mini-20250414** for single image understanding.  It also outperforms MiniCPM-V 2.6 on both multi-image understanding and video understanding.
+- 🚀 **Superior Efficiency.**
+  Designed for performance on end devices, MiniCPM-V 4.0 runs smoothly on the **iPhone 16 Pro Max, devlivering 17.9 tokens/second decoding speed**. Compared to the already efficient MiniCPM-V 2.6, MiniCPM-V 4.0 further achieves a 30% throughput boost while offering enhanced visual understanding.
+-  💫  **Easy Usage.**
+  MiniCPM-V 4.0 can be easily used in various ways including **llama.cpp, Ollama, vLLM, SGLang, LLaMA-Factory and local web demo** etc. Get started easily with our **well-structured  [Cookbook](https://github.com/OpenSQZ/MiniCPM-V-CookBook)**, featuring detailed instructions and practical examples.
+### Evaluation
+<details>
+<summary>Click to view single image results on OpenCompass. </summary>
+<div align="center">
+<table style="margin: 0px auto;">
+    <thead>
+        <tr>
+            <th nowrap="nowrap" align="left">model</th>
+            <th>Size</th>
+            <th>Opencompass</th>
+            <th>OCRBench</th>
+            <th>MathVista</th>
+            <th>HallusionBench</th>
+            <th>MMMU</th>
+            <th>MMVet</th>
+            <th>MMBench V1.1</th>
+            <th>MMStar</th>
+            <th>AI2D</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td colspan="11" align="left"><strong>Proprietary</strong></td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">GPT-4v-20240409</td>
+            <td>-</td>
+            <td>63.5</td>
+            <td>656</td>
+            <td>55.2</td>
+            <td>43.9</td>
+            <td>61.7</td>
+            <td>67.5</td>
+            <td>79.8</td>
+            <td>56.0</td>
+            <td>78.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Gemini-1.5-Pro</td>
+            <td>-</td>
+            <td>64.5</td>
+            <td>754</td>
+            <td>58.3</td>
+            <td>45.6</td>
+            <td>60.6</td>
+            <td>64.0</td>
+            <td>73.9</td>
+            <td>59.1</td>
+            <td>79.1</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">GPT-4.1-mini-20250414</td>
+            <td>-</td>
+            <td>68.9</td>
+            <td>840</td>
+            <td>70.9</td>
+            <td>49.3</td>
+            <td>55.0</td>
+            <td>74.3</td>
+            <td>80.9</td>
+            <td>60.9</td>
+            <td>76.0</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Claude 3.5 Sonnet-20241022</td>
+            <td>-</td>
+            <td>70.6</td>
+            <td>798</td>
+            <td>65.3</td>
+            <td>55.5</td>
+            <td>66.4</td>
+            <td>70.1</td>
+            <td>81.7</td>
+            <td>65.1</td>
+            <td>81.2</td>
+        </tr>
+        <tr>
+            <td colspan="11" align="left"><strong>Open-source</strong></td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Qwen2.5-VL-3B-Instruct</td>
+            <td>3.8B</td>
+            <td>64.5</td>
+            <td>828</td>
+            <td>61.2</td>
+            <td>46.6</td>
+            <td>51.2</td>
+            <td>60.0</td>
+            <td>76.8</td>
+            <td>56.3</td>
+            <td>81.4</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">InternVL2.5-4B</td>
+            <td>3.7B</td>
+            <td>65.1</td>
+            <td>820</td>
+            <td>60.8</td>
+            <td>46.6</td>
+            <td>51.8</td>
+            <td>61.5</td>
+            <td>78.2</td>
+            <td>58.7</td>
+            <td>81.4</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Qwen2.5-VL-7B-Instruct</td>
+            <td>8.3B</td>
+            <td>70.9</td>
+            <td>888</td>
+            <td>68.1</td>
+            <td>51.9</td>
+            <td>58.0</td>
+            <td>69.7</td>
+            <td>82.2</td>
+            <td>64.1</td>
+            <td>84.3</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">InternVL2.5-8B</td>
+            <td>8.1B</td>
+            <td>68.1</td>
+            <td>821</td>
+            <td>64.5</td>
+            <td>49.0</td>
+            <td>56.2</td>
+            <td>62.8</td>
+            <td>82.5</td>
+            <td>63.2</td>
+            <td>84.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-V-2.6</td>
+            <td>8.1B</td>
+            <td>65.2</td>
+            <td>852</td>
+            <td>60.8</td>
+            <td>48.1</td>
+            <td>49.8</td>
+            <td>60.0</td>
+            <td>78.0</td>
+            <td>57.5</td>
+            <td>82.1</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-o-2.6</td>
+            <td>8.7B</td>
+            <td>70.2</td>
+            <td>889</td>
+            <td>73.3</td>
+            <td>51.1</td>
+            <td>50.9</td>
+            <td>67.2</td>
+            <td>80.6</td>
+            <td>63.3</td>
+            <td>86.1</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-V-4.0</td>
+            <td>4.1B</td>
+            <td>69.0</td>
+            <td>894</td>
+            <td>66.9</td>
+            <td>50.8</td>
+            <td>51.2</td>
+            <td>68.0</td>
+            <td>79.7</td>
+            <td>62.8</td>
+            <td>82.9</td>
+        </tr>
+    </tbody>
+</table>
+</div>
+</details>
+<details>
+<summary>Click to view single image results on ChartQA, MME, RealWorldQA, TextVQA, DocVQA, MathVision, DynaMath, WeMath, Object HalBench and MM Halbench. </summary>
+<div align="center">
+<table style="margin: 0px auto;">
+    <thead>
+        <tr>
+            <th nowrap="nowrap" align="left">model</th>
+            <th>Size</th>
+            <th>ChartQA</th>
+            <th>MME</th>
+            <th>RealWorldQA</th>
+            <th>TextVQA</th>
+            <th>DocVQA</th>
+            <th>MathVision</th>
+            <th>DynaMath</th>
+            <th>WeMath</th>
+            <th colspan="2">Obj Hal</th>
+            <th colspan="2">MM Hal</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td>CHAIRs↓</td>
+            <td>CHAIRi↓</td>
+            <td nowrap="nowrap">score avg@3↑</td>
+            <td nowrap="nowrap">hall rate avg@3↓</td>
+        </tr>
+        <tbody align="center">
+        <tr>
+            <td colspan="14" align="left"><strong>Proprietary</strong></td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">GPT-4v-20240409</td>
+            <td>-</td>
+            <td>78.5</td>
+            <td>1927</td>
+            <td>61.4</td>
+            <td>78.0</td>
+            <td>88.4</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Gemini-1.5-Pro</td>
+            <td>-</td>
+            <td>87.2</td>
+            <td>-</td>
+            <td>67.5</td>
+            <td>78.8</td>
+            <td>93.1</td>
+            <td>41.0</td>
+            <td>31.5</td>
+            <td>50.5</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">GPT-4.1-mini-20250414</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>45.3</td>
+            <td>47.7</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Claude 3.5 Sonnet-20241022</td>
+            <td>-</td>
+            <td>90.8</td>
+            <td>-</td>
+            <td>60.1</td>
+            <td>74.1</td>
+            <td>95.2</td>
+            <td>35.6</td>
+            <td>35.7</td>
+            <td>44.0</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+            <td>-</td>
+        </tr>
+        <tr>
+            <td colspan="14" align="left"><strong>Open-source</strong></td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Qwen2.5-VL-3B-Instruct</td>
+            <td>3.8B</td>
+            <td>84.0</td>
+            <td>2157</td>
+            <td>65.4</td>
+            <td>79.3</td>
+            <td>93.9</td>
+            <td>21.9</td>
+            <td>13.2</td>
+            <td>22.9</td>
+            <td>18.3</td>
+            <td>10.8</td>
+            <td>3.9 </td>
+            <td>33.3 </td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">InternVL2.5-4B</td>
+            <td>3.7B</td>
+            <td>84.0</td>
+            <td>2338</td>
+            <td>64.3</td>
+            <td>76.8</td>
+            <td>91.6</td>
+            <td>18.4</td>
+            <td>15.2</td>
+            <td>21.2</td>
+            <td>13.7</td>
+            <td>8.7</td>
+            <td>3.2 </td>
+            <td>46.5 </td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Qwen2.5-VL-7B-Instruct</td>
+            <td>8.3B</td>
+            <td>87.3</td>
+            <td>2347</td>
+            <td>68.5</td>
+            <td>84.9</td>
+            <td>95.7</td>
+            <td>25.4</td>
+            <td>21.8</td>
+            <td>36.2</td>
+            <td>13.3</td>
+            <td>7.9</td>
+            <td>4.1 </td>
+            <td>31.6 </td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">InternVL2.5-8B</td>
+            <td>8.1B</td>
+            <td>84.8</td>
+            <td>2344</td>
+            <td>70.1</td>
+            <td>79.1</td>
+            <td>93.0</td>
+            <td>17.0</td>
+            <td>9.4</td>
+            <td>23.5</td>
+            <td>18.3</td>
+            <td>11.6</td>
+            <td>3.6 </td>
+            <td>37.2</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-V-2.6</td>
+            <td>8.1B</td>
+            <td>79.4</td>
+            <td>2348</td>
+            <td>65.0</td>
+            <td>80.1</td>
+            <td>90.8</td>
+            <td>17.5</td>
+            <td>9.0</td>
+            <td>20.4</td>
+            <td>7.3</td>
+            <td>4.7</td>
+            <td>4.0 </td>
+            <td>29.9 </td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-o-2.6</td>
+            <td>8.7B</td>
+            <td>86.9</td>
+            <td>2372</td>
+            <td>68.1</td>
+            <td>82.0</td>
+            <td>93.5</td>
+            <td>21.7</td>
+            <td>10.4</td>
+            <td>25.2</td>
+            <td>6.3</td>
+            <td>3.4</td>
+            <td>4.1 </td>
+            <td>31.3 </td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-V-4.0</td>
+            <td>4.1B</td>
+            <td>84.4</td>
+            <td>2298</td>
+            <td>68.5</td>
+            <td>80.8</td>
+            <td>92.9</td>
+            <td>20.7</td>
+            <td>14.2</td>
+            <td>32.7</td>
+            <td>6.3</td>
+            <td>3.5</td>
+            <td>4.1 </td>
+            <td>29.2 </td>
+        </tr>
+    </tbody>
+</table>
+</div>
+</details>
+<details>
+<summary>Click to view multi-image and video understanding results on Mantis, Blink and Video-MME. </summary>
+<div align="center">
+<table style="margin: 0px auto;">
+    <thead>
+        <tr>
+            <th nowrap="nowrap" align="left">model</th>
+            <th>Size</th>
+            <th>Mantis</th>
+            <th>Blink</th>
+            <th nowrap="nowrap" colspan="2" >Video-MME</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td></td>
+            <td>wo subs</td>
+            <td>w subs</td>
+        </tr>
+        <tbody align="center">
+        <tr>
+            <td colspan="6" align="left"><strong>Proprietary</strong></td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">GPT-4v-20240409</td>
+            <td>-</td>
+            <td>62.7</td>
+            <td>54.6</td>
+            <td>59.9</td>
+            <td>63.3</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Gemini-1.5-Pro</td>
+            <td>-</td>
+            <td>-</td>
+            <td>59.1</td>
+            <td>75.0</td>
+            <td>81.3</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">GPT-4o-20240513</td>
+            <td>-</td>
+            <td>-</td>
+            <td>68.0</td>
+            <td>71.9</td>
+            <td>77.2</td>
+        </tr>
+        <tr>
+            <td colspan="6" align="left"><strong>Open-source</strong></td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Qwen2.5-VL-3B-Instruct</td>
+            <td>3.8B</td>
+            <td>-</td>
+            <td>47.6</td>
+            <td>61.5</td>
+            <td>67.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">InternVL2.5-4B</td>
+            <td>3.7B</td>
+            <td>62.7</td>
+            <td>50.8</td>
+            <td>62.3</td>
+            <td>63.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">Qwen2.5-VL-7B-Instruct</td>
+            <td>8.3B</td>
+            <td>-</td>
+            <td>56.4</td>
+            <td>65.1</td>
+            <td>71.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">InternVL2.5-8B</td>
+            <td>8.1B</td>
+            <td>67.7</td>
+            <td>54.8</td>
+            <td>64.2</td>
+            <td>66.9</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-V-2.6</td>
+            <td>8.1B</td>
+            <td>69.1</td>
+            <td>53.0</td>
+            <td>60.9</td>
+            <td>63.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-o-2.6</td>
+            <td>8.7B</td>
+            <td>71.9</td>
+            <td>56.7</td>
+            <td>63.9</td>
+            <td>69.6</td>
+        </tr>
+        <tr>
+            <td nowrap="nowrap" align="left">MiniCPM-V-4.0</td>
+            <td>4.1B</td>
+            <td>71.4</td>
+            <td>54.0</td>
+            <td>61.2</td>
+            <td>65.8</td>
+        </tr>
+    </tbody>
+</table>
+</div>
+</details>
+### Examples
+<div style="display: flex; flex-direction: column; align-items: center;">
+  <img src="https://raw.githubusercontent.com/openbmb/MiniCPM-o/main/assets/minicpmv4/minicpm-v-4-case.png" alt="math" style="margin-bottom: 5px;">
+</div>
+Run locally on iPhone 16 Pro Max with [iOS demo](https://github.com/OpenSQZ/MiniCPM-V-CookBook/blob/main/demo/ios_demo/ios.md).
+<div align="center">
+  <img src="https://raw.githubusercontent.com/openbmb/MiniCPM-o/main/assets/minicpmv4/iphone_en.gif" width="45%" style="display: inline-block; margin: 0 10px;"/>
+  <img src="https://raw.githubusercontent.com/openbmb/MiniCPM-o/main/assets/minicpmv4/iphone_en_information_extraction.gif" width="45%" style="display: inline-block; margin: 0 10px;"/>
+</div>
+<div align="center">
+  <img src="https://raw.githubusercontent.com/openbmb/MiniCPM-o/main/assets/minicpmv4/iphone_cn.gif" width="45%" style="display: inline-block; margin: 0 10px;"/>
+  <img src="https://raw.githubusercontent.com/openbmb/MiniCPM-o/main/assets/minicpmv4/iphone_cn_funny_points.gif" width="45%" style="display: inline-block; margin: 0 10px;"/>
+</div>
+## Usage
+```python
+from PIL import Image
+import torch
+from transformers import AutoModel, AutoTokenizer
+model_path = 'openbmb/MiniCPM-V-4'
+model = AutoModel.from_pretrained(model_path, trust_remote_code=True,
+                                  # sdpa or flash_attention_2, no eager
+                                  attn_implementation='sdpa', torch_dtype=torch.bfloat16)
+model = model.eval().cuda()
+tokenizer = AutoTokenizer.from_pretrained(
+    model_path, trust_remote_code=True)
+image = Image.open('./assets/single.png').convert('RGB')
+display(image.resize((400, 400)))
+# First round chat
+question = "What is the landform in the picture?"
+msgs = [{'role': 'user', 'content': [image, question]}]
+answer = model.chat(
+    msgs=msgs,
+    image=image,
+    tokenizer=tokenizer
+)
+print(answer)
+# Second round chat, pass history context of multi-turn conversation
+msgs.append({"role": "assistant", "content": [answer]})
+msgs.append({"role": "user", "content": [
+            "What should I pay attention to when traveling here?"]})
+answer = model.chat(
+    msgs=msgs,
+    image=None,
+    tokenizer=tokenizer
+)
+print(answer)
+```
+## License
+#### Model License
+* The code in this repo is released under the [Apache-2.0](https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE) License.
+* The usage of MiniCPM-V series model weights must strictly follow [MiniCPM Model License.md](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md).
+* The models and weights of MiniCPM are completely free for academic research. After filling out a ["questionnaire"](https://modelbest.feishu.cn/share/base/form/shrcnpV5ZT9EJ6xYjh3Kx0J6v8g) for registration, MiniCPM-V 2.6 weights are also available for free commercial use.
+#### Statement
+* As an LMM, MiniCPM-V 4.0 generates contents by learning a large mount of multimodal corpora, but it cannot comprehend, express personal opinions or make value judgement. Anything generated by MiniCPM-V 4.0 does not represent the views and positions of the model developers
+* We will not be liable for any problems arising from the use of the MinCPM-V models, including but not limited to data security issues, risk of public opinion, or any risks and problems arising from the misdirection, misuse, dissemination or misuse of the model.
+## Key Techniques and Other Multimodal Projects
+👏 Welcome to explore key techniques of MiniCPM-V 2.6 and other multimodal projects of our team:
+[VisCPM](https://github.com/OpenBMB/VisCPM/tree/main) | [RLHF-V](https://github.com/RLHF-V/RLHF-V) | [LLaVA-UHD](https://github.com/thunlp/LLaVA-UHD)  | [RLAIF-V](https://github.com/RLHF-V/RLAIF-V)
+## Citation
+If you find our work helpful, please consider citing our papers 📝 and liking this project ❤️！
+```bib
+@article{yao2024minicpm,
+  title={MiniCPM-V: A GPT-4V Level MLLM on Your Phone},
+  author={Yao, Yuan and Yu, Tianyu and Zhang, Ao and Wang, Chongyi and Cui, Junbo and Zhu, Hongji and Cai, Tianchi and Li, Haoyu and Zhao, Weilin and He, Zhihui and others},
+  journal={Nat Commun 16, 5509 (2025)},
+  year={2025}
+}
+```

ane_minicpmv4_vit_f16.mlmodelc.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7751a5f889396c72622d90113f80b0ce0abd8efeeb63011388047a77a1fc1482
+size 635559301

ane_minicpmv4_vit_f16.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bb3712cb631ef6c4a430b02994c3b12eeb007b64c486a4b1e752fea26652c4
+size 243

ane_minicpmv4_vit_f16.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9892bfc4a858d381faa4176acce8733b89eb4f0260b90432184e24be7d44e9c2
+size 713

ane_minicpmv4_vit_f16.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,85 @@

+[
+  {
+    "shortDescription" : "MiniCPM-V 4.0 vit on ANE",
+    "metadataOutputVersion" : "3.0",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1024 × 1152)",
+        "shortDescription" : "",
+        "shape" : "[1, 1024, 1152]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "version" : "4.0.0",
+    "modelParameters" : [
+    ],
+    "author" : "tianchi",
+    "specificationVersion" : 6,
+    "storagePrecision" : "Float16",
+    "license" : "Apache 2.0",
+    "mlProgramOperationTypeHistogram" : {
+      "Linear" : 162,
+      "Matmul" : 54,
+      "Cast" : 2,
+      "Softmax" : 27,
+      "Mul" : 27,
+      "Transpose" : 108,
+      "LayerNorm" : 55,
+      "Add" : 54,
+      "Reshape" : 108,
+      "Gelu" : 27
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "stateSchema" : [
+    ],
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1024 × 1152)",
+        "shortDescription" : "",
+        "shape" : "[1, 1024, 1152]",
+        "name" : "input",
+        "type" : "MultiArray"
+      }
+    ],
+    "userDefinedMetadata" : {
+      "converter" : "coremltools",
+      "compute_units" : "ALL",
+      "deployment_target" : "iOS15+",
+      "model_id" : "61a76759-ea68-463d-bbdf-bb8ded301a81",
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "base_model" : "MiniCPM-V4",
+      "owner" : "tianchi",
+      "input_shape" : "torch.Size([1, 1024, 1152])",
+      "batch_size" : "1",
+      "precision" : "float16",
+      "target_device" : "ANE",
+      "com.github.apple.coremltools.source" : "torch==2.6.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "framework" : "pytorch",
+      "model_type" : "vision_transformer"
+    },
+    "generatedClassName" : "ane_minicpmv4_vit_f16",
+    "method" : "predict"
+  }
+]

ane_minicpmv4_vit_f16.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

ane_minicpmv4_vit_f16.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cd718dae817f3582db314ab114d4fab3cdc3e8e066bd4b5041113b2ca8a16ad
+size 822966528

ggml-model-Q4_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3eab8d69253a22ae2d5b06e2732be10c792b20b07141a51e30768bb03cc6ccc
+size 2079023456

ggml-model-Q4_1.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b97c6ad1f690c9528835ee0faa87dc1dbc40a6fb707d82b2cce2dd616a7d9cc3
+size 2292626016

ggml-model-Q4_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0ff610e9c92b30389ff1e0dd40fffed3c1f02a9d34a735fd5fba6a5ad25672b
+size 2189861216

ggml-model-Q4_K_S.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b02ff32d3537207c7a160f8dcf2aa792c40fe1be2fed7ef9001aae4e802058f
+size 2092458336

ggml-model-Q5_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6da0b86200388b15ac8c33940abfb961f7e51d7107a0776ed6ddaa819e4ae29a
+size 2506228576

ggml-model-Q5_1.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d67ed4cc239c82736f5beb185e84f449dbf1044658668aaf4281bcec5e0a4253
+size 2719831136

ggml-model-Q5_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd643b2cc310df9b6b2ce3a6bdf18eb16a59485bd4c6bb962ff700ad6a153b03
+size 2563326816

ggml-model-Q5_K_S.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2643b82737a05b5149f440e276c281918a579f7ecbf06a48a7ae5ef7f96162ec
+size 2506228576

ggml-model-Q6_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc78a557d5bcc3cb21f99667f97fe3e0381fc129d586d1532fffe1926ec53bd1
+size 2960134016

ggml-model-Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4618297c8a2ec285c84dd219d6daaaecd4359a8c92a1fc9bb0d629928be44bad
+size 3833381696

mmproj-model-f16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0faa9ae63532300999c86a196f140c716cd0fbb08bbbd81850f1f9a631f7761
+size 958777792