Add files using upload-large-folder tool
Browse files
.gitattributes
CHANGED
|
@@ -39,3 +39,5 @@ Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task filter=lfs diff=lfs merg
|
|
| 39 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
| 40 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv1280.task filter=lfs diff=lfs merge=lfs -text
|
| 41 |
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 39 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
| 40 |
Qwen2.5-1.5B-Instruct_seq128_q8_ekv1280.task filter=lfs diff=lfs merge=lfs -text
|
| 41 |
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.litertlm filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.litertlm filter=lfs diff=lfs merge=lfs -text
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.litertlm
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:708483abb477c10dd8db8cc6b4995bbd9cd2deda5539249a37c218028a669391
|
| 3 |
+
size 6182436864
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv4096.task
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:addf45e6d75f60c9cd34bdf42d84f896f5b7b4faba6ee9f631bf6139de6af087
|
| 3 |
+
size 6182391796
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.litertlm
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ac49902133f729199bbd23ad6714df32b20585c786f2546d2293cc371f35278
|
| 3 |
+
size 1598603264
|
Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82968d0a6c3872cf016fdbcfc591571605f4c7fd2b0f64d2533df502cc6596b3
|
| 3 |
+
size 1598556720
|
README.md
CHANGED
|
@@ -62,8 +62,8 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
| 62 |
<th></th>
|
| 63 |
</tr>
|
| 64 |
<tr>
|
| 65 |
-
<td rowspan="
|
| 66 |
-
<td><p style="text-align: left">fp32 (baseline)</p></td>
|
| 67 |
<td><p style="text-align: right">1280</p></td>
|
| 68 |
<td><p style="text-align: right">27 tk/s</p></td>
|
| 69 |
<td><p style="text-align: right">6 tk/s</p></td>
|
|
@@ -74,8 +74,7 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
| 74 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv1280.task">🔗</a></p></td>
|
| 75 |
</tr>
|
| 76 |
<tr>
|
| 77 |
-
<td rowspan="
|
| 78 |
-
<td><p style="text-align: right">1280</p></td>
|
| 79 |
<td><p style="text-align: right">106 tk/s</p></td>
|
| 80 |
<td><p style="text-align: right">23 tk/s</p></td>
|
| 81 |
<td><p style="text-align: right">2.74 s</p></td>
|
|
@@ -85,7 +84,6 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
| 85 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
| 86 |
</tr>
|
| 87 |
<tr>
|
| 88 |
-
<td><p style="text-align: right">4096</p></td>
|
| 89 |
<td><p style="text-align: right">63 tk/s</p></td>
|
| 90 |
<td><p style="text-align: right">20 tk/s</p></td>
|
| 91 |
<td><p style="text-align: right">4.40 s</p></td>
|
|
@@ -95,8 +93,8 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
| 95 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task">🔗</a></p></td>
|
| 96 |
</tr>
|
| 97 |
<tr>
|
| 98 |
-
<td rowspan="2"><p style="text-align: left">
|
| 99 |
-
<td><p style="text-align: right">1280</p></td>
|
| 100 |
<td><p style="text-align: right">706 tk/s</p></td>
|
| 101 |
<td><p style="text-align: right">24 tk/s</p></td>
|
| 102 |
<td><p style="text-align: right">6.94 s</p></td>
|
|
@@ -106,7 +104,6 @@ Note that all benchmark stats are from a Samsung S24 Ultra and multiple prefill
|
|
| 106 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
| 107 |
</tr>
|
| 108 |
<tr>
|
| 109 |
-
<td><p style="text-align: right">4096</p></td>
|
| 110 |
<td><p style="text-align: right">417 tk/s</p></td>
|
| 111 |
<td><p style="text-align: right">22 tk/s</p></td>
|
| 112 |
<td><p style="text-align: right">7.93 s</p></td>
|
|
|
|
| 62 |
<th></th>
|
| 63 |
</tr>
|
| 64 |
<tr>
|
| 65 |
+
<td rowspan="5"><p style="text-align: left">CPU</p></td>
|
| 66 |
+
<td rowspan="3"><p style="text-align: left">fp32 (baseline)</p></td>
|
| 67 |
<td><p style="text-align: right">1280</p></td>
|
| 68 |
<td><p style="text-align: right">27 tk/s</p></td>
|
| 69 |
<td><p style="text-align: right">6 tk/s</p></td>
|
|
|
|
| 74 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_f32_ekv1280.task">🔗</a></p></td>
|
| 75 |
</tr>
|
| 76 |
<tr>
|
| 77 |
+
<td rowspan="2"><p style="text-align: right">1280</p></td>
|
|
|
|
| 78 |
<td><p style="text-align: right">106 tk/s</p></td>
|
| 79 |
<td><p style="text-align: right">23 tk/s</p></td>
|
| 80 |
<td><p style="text-align: right">2.74 s</p></td>
|
|
|
|
| 84 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
| 85 |
</tr>
|
| 86 |
<tr>
|
|
|
|
| 87 |
<td><p style="text-align: right">63 tk/s</p></td>
|
| 88 |
<td><p style="text-align: right">20 tk/s</p></td>
|
| 89 |
<td><p style="text-align: right">4.40 s</p></td>
|
|
|
|
| 93 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv4096.task">🔗</a></p></td>
|
| 94 |
</tr>
|
| 95 |
<tr>
|
| 96 |
+
<td rowspan="2"><p style="text-align: left">dynamic_int8</p></td>
|
| 97 |
+
<td rowspan="2"><p style="text-align: right">1280</p></td>
|
| 98 |
<td><p style="text-align: right">706 tk/s</p></td>
|
| 99 |
<td><p style="text-align: right">24 tk/s</p></td>
|
| 100 |
<td><p style="text-align: right">6.94 s</p></td>
|
|
|
|
| 104 |
<td><p style="text-align: left"><a style="text-decoration: none" href="https://huggingface.co/litert-community/Qwen2.5-1.5B-Instruct/resolve/main/Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task">🔗</a></p></td>
|
| 105 |
</tr>
|
| 106 |
<tr>
|
|
|
|
| 107 |
<td><p style="text-align: right">417 tk/s</p></td>
|
| 108 |
<td><p style="text-align: right">22 tk/s</p></td>
|
| 109 |
<td><p style="text-align: right">7.93 s</p></td>
|