JingzeShi commited on
Commit
f415235
·
verified ·
1 Parent(s): ab5430f

Upload tokenizer

Browse files
chat_template.jinja CHANGED
@@ -1,6 +1,6 @@
1
- {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
2
- You are a helpful assistant<|im_end|>
3
- ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
4
- ' + message['content'] + '<|im_end|>' + '
5
- '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
6
  ' }}{% endif %}
 
1
+ {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
2
+ You are a helpful assistant<|im_end|>
3
+ ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
4
+ ' + message['content'] + '<|im_end|>' + '
5
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
6
  ' }}{% endif %}
qwen.tiktoken CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
- {
2
- "bos_token": "<|extra_203|>",
3
- "eos_token": "<|extra_204|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
6
- }
 
1
+ {
2
+ "bos_token": "<|extra_203|>",
3
+ "eos_token": "<|extra_204|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
tokenizer_config.json CHANGED
@@ -1,17 +1,17 @@
1
- {
2
- "added_tokens_decoder": {},
3
- "auto_map": {
4
- "AutoTokenizer": [
5
- "tokenization_qwen.QWenTokenizer",
6
- null
7
- ]
8
- },
9
- "bos_token": "<|extra_203|>",
10
- "clean_up_tokenization_spaces": false,
11
- "eos_token": "<|extra_204|>",
12
- "extra_special_tokens": {},
13
- "model_max_length": 8192,
14
- "pad_token": "<|endoftext|>",
15
- "tokenizer_class": "QWenTokenizer",
16
- "unk_token": "<|endoftext|>"
17
- }
 
1
+ {
2
+ "added_tokens_decoder": {},
3
+ "auto_map": {
4
+ "AutoTokenizer": [
5
+ "tokenization_qwen.QWenTokenizer",
6
+ null
7
+ ]
8
+ },
9
+ "bos_token": "<|extra_203|>",
10
+ "clean_up_tokenization_spaces": false,
11
+ "eos_token": "<|extra_204|>",
12
+ "extra_special_tokens": {},
13
+ "model_max_length": 8192,
14
+ "pad_token": "<|endoftext|>",
15
+ "tokenizer_class": "QWenTokenizer",
16
+ "unk_token": "<|endoftext|>"
17
+ }