Add/update the quantized ONNX model files and README.md for Transformers.js v3 (#1)
Browse files- Add/update the quantized ONNX model files and README.md for Transformers.js v3 (b73940d05c3dbd2307a3e19bff424663ed0417f3)
Co-authored-by: Yuichiro Tachibana <[email protected]>
- README.md +4 -5
- onnx/model_bnb4.onnx +3 -0
- onnx/model_int8.onnx +3 -0
- onnx/model_q4.onnx +3 -0
- onnx/model_q4f16.onnx +3 -0
- onnx/model_uint8.onnx +3 -0
README.md
CHANGED
|
@@ -7,14 +7,14 @@ https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2-v2 with ONNX weights
|
|
| 7 |
|
| 8 |
## Usage (Transformers.js)
|
| 9 |
|
| 10 |
-
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@
|
| 11 |
```bash
|
| 12 |
-
npm i @
|
| 13 |
```
|
| 14 |
|
| 15 |
**Example:** Information Retrieval w/ `Xenova/ms-marco-TinyBERT-L-2-v2`.
|
| 16 |
```js
|
| 17 |
-
import { AutoTokenizer, AutoModelForSequenceClassification } from '@
|
| 18 |
|
| 19 |
const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
| 20 |
const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
|
@@ -31,7 +31,7 @@ const features = tokenizer(
|
|
| 31 |
}
|
| 32 |
)
|
| 33 |
|
| 34 |
-
const scores = await model(features)
|
| 35 |
console.log(scores);
|
| 36 |
// quantized: [ 7.210887908935547, -11.559350967407227 ]
|
| 37 |
// unquantized: [ 7.235750675201416, -11.562294006347656 ]
|
|
@@ -39,5 +39,4 @@ console.log(scores);
|
|
| 39 |
|
| 40 |
---
|
| 41 |
|
| 42 |
-
|
| 43 |
Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
|
|
|
|
| 7 |
|
| 8 |
## Usage (Transformers.js)
|
| 9 |
|
| 10 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
| 11 |
```bash
|
| 12 |
+
npm i @huggingface/transformers
|
| 13 |
```
|
| 14 |
|
| 15 |
**Example:** Information Retrieval w/ `Xenova/ms-marco-TinyBERT-L-2-v2`.
|
| 16 |
```js
|
| 17 |
+
import { AutoTokenizer, AutoModelForSequenceClassification } from '@huggingface/transformers';
|
| 18 |
|
| 19 |
const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
| 20 |
const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-TinyBERT-L-2-v2');
|
|
|
|
| 31 |
}
|
| 32 |
)
|
| 33 |
|
| 34 |
+
const scores = await model(features);
|
| 35 |
console.log(scores);
|
| 36 |
// quantized: [ 7.210887908935547, -11.559350967407227 ]
|
| 37 |
// unquantized: [ 7.235750675201416, -11.562294006347656 ]
|
|
|
|
| 39 |
|
| 40 |
---
|
| 41 |
|
|
|
|
| 42 |
Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
|
onnx/model_bnb4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2a5570406cb76fde0137ed4b3976375e2724e9c8115630644ef8e6530e04df9
|
| 3 |
+
size 16254069
|
onnx/model_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f24d6dcf08df3d26b8fba3886942575b64856deba7ac2aa0962c2fb2ccd6d895
|
| 3 |
+
size 4475667
|
onnx/model_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eefe2de2bae1f47563d1fea3e9688f0fb2ff453430b2591e4eb76b95eafc30fa
|
| 3 |
+
size 16278557
|
onnx/model_q4f16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebc16aa0fbdb620cf4723bdf4cea73a2734930c18c4a40fdda351a860ff78dcc
|
| 3 |
+
size 8268506
|
onnx/model_uint8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6547876c4b12cdc52082dc3db0ae896e0332d81ce5afa79baf5a063db2382a31
|
| 3 |
+
size 4475674
|