bchenfireworks commited on
Commit
c8430fc
·
verified ·
1 Parent(s): 841a184

Update tokenizer_config.json

Browse files

Support tool call, temp 0 tau2-airline went from 14% to 50%. Old template was ignoring all the previous tool calls, breaking all multi step tool call conversations.

Files changed (1) hide show
  1. tokenizer_config.json +1 -1
tokenizer_config.json CHANGED
@@ -151,7 +151,7 @@
151
  "clean_up_tokenization_spaces": false,
152
  "eos_token": "[EOS]",
153
  "extra_special_tokens": {},
154
- "chat_template": "{% if tools -%}\n {{ '<|im_system|>tool_declare<|im_middle|>' -}}\n {{- tools | tojson -}}\n {{ '<|im_end|>' -}}\n{%- endif -%}\n\n{%- for message in messages -%}\n {%- if loop.first and messages[0]['role'] != 'system' -%}\n {{ '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>' }}\n {%- endif -%}\n {%- if message['role'] == 'system' -%}\n {{ '<|im_system|>system<|im_middle|>' }}\n {%- elif message['role'] == 'user' -%}\n {{ '<|im_user|>user<|im_middle|>' }}\n {%- elif message['role'] == 'assistant' -%}\n {{ '<|im_assistant|>assistant<|im_middle|>' }}\n {%- elif message['role'] == 'tool' -%}\n {{ '<|im_system|>tool<|im_middle|>' }}\n {%- endif -%}\n\n {%- if message['content'] is string -%}\n {{- message['content'] + '<|im_end|>' -}}\n {%- else -%}\n {%- for content in message['content'] -%}\n {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\n {{ '<|media_start|>image<|media_content|><|media_pad|><|media_end|>' }}\n {%- else -%}\n {{ content['text'] }}\n {%- endif -%}\n {%- endfor -%}\n {{ '<|im_end|>' }}\n {%- endif -%}\n{%- endfor -%}\n\n{%- if add_generation_prompt -%}\n {{ '<|im_assistant|>assistant<|im_middle|>' }}\n{%- endif -%}",
155
  "model_max_length": 1000000000000000019884624838656,
156
  "pad_token": "[PAD]",
157
  "tokenizer_class": "TikTokenTokenizer",
 
151
  "clean_up_tokenization_spaces": false,
152
  "eos_token": "[EOS]",
153
  "extra_special_tokens": {},
154
+ "chat_template": "{% if tools -%}\n {{ '<|im_system|>tool_declare<|im_middle|>' -}}\n {{- tools | tojson -}}\n {{ '<|im_end|>' -}}\n{%- endif -%}\n\n{%- for message in messages -%}\n {%- if loop.first and messages[0]['role'] != 'system' -%}\n {{ '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>' }}\n {%- endif -%}\n {%- if message['role'] == 'system' -%}\n {{ '<|im_system|>system<|im_middle|>' }}\n {%- elif message['role'] == 'user' -%}\n {{ '<|im_user|>user<|im_middle|>' }}\n {%- elif message['role'] == 'assistant' -%}\n {{ '<|im_assistant|>assistant<|im_middle|>' }}\n {%- elif message['role'] == 'tool' -%}\n {{ '<|im_system|>tool<|im_middle|>' }}\n {%- endif -%}\n\n {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}\n {%- if message['content'] -%}\n {{ message['content'] }}\n {%- endif -%}\n {{ '<|tool_calls_section_begin|>' }}\n {%- for tool_call in message['tool_calls'] -%}\n {%- set func_name = tool_call['function']['name'] -%}\n {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}\n {{ '<|tool_call_begin|>' }}{{ formatted_id }}{{ '<|tool_call_argument_begin|>' }}{{ tool_call['function']['arguments'] }}{{ '<|tool_call_end|>' }}\n {%- endfor -%}\n {{ '<|tool_calls_section_end|>' }}\n {%- elif message['content'] is string -%}\n {{- message['content'] -}}\n {%- elif message['content'] is not none -%}\n {%- for content in message['content'] -%}\n {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\n {{ '<|media_start|>image<|media_content|><|media_pad|><|media_end|>' }}\n {%- else -%}\n {{ content['text'] }}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {{ '<|im_end|>' }}\n{%- endfor -%}\n\n{%- if add_generation_prompt -%}\n {{ '<|im_assistant|>assistant<|im_middle|>' }}\n{%- endif -%}",
155
  "model_max_length": 1000000000000000019884624838656,
156
  "pad_token": "[PAD]",
157
  "tokenizer_class": "TikTokenTokenizer",