[WIP] Transformers.js improvements
Browse files- default to q4 since the model is quite large
- support external data format (see [here](https://github.com/huggingface/transformers.js/pull/1212))
- config.json +5 -0
config.json
CHANGED
|
@@ -25,9 +25,14 @@
|
|
| 25 |
"torch_dtype": "bfloat16",
|
| 26 |
"transformers_version": "4.42.3",
|
| 27 |
"transformers.js_config": {
|
|
|
|
| 28 |
"kv_cache_dtype": {
|
| 29 |
"q4f16": "float16",
|
| 30 |
"fp16": "float16"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
}
|
| 32 |
},
|
| 33 |
"use_cache": true,
|
|
|
|
| 25 |
"torch_dtype": "bfloat16",
|
| 26 |
"transformers_version": "4.42.3",
|
| 27 |
"transformers.js_config": {
|
| 28 |
+
"dtype": "q4",
|
| 29 |
"kv_cache_dtype": {
|
| 30 |
"q4f16": "float16",
|
| 31 |
"fp16": "float16"
|
| 32 |
+
},
|
| 33 |
+
"use_external_data_format": {
|
| 34 |
+
"model.onnx": true,
|
| 35 |
+
"model_fp16.onnx": true
|
| 36 |
}
|
| 37 |
},
|
| 38 |
"use_cache": true,
|