Update tokenizer_config.json
Browse files- tokenizer_config.json +1 -0
tokenizer_config.json
CHANGED
@@ -22,6 +22,7 @@
|
|
22 |
"name_or_path": "cerebras/Cerebras-GPT-256M",
|
23 |
"pad_token": null,
|
24 |
"special_tokens_map_file": null,
|
|
|
25 |
"tokenizer_class": "GPT2Tokenizer",
|
26 |
"unk_token": {
|
27 |
"__type": "AddedToken",
|
|
|
22 |
"name_or_path": "cerebras/Cerebras-GPT-256M",
|
23 |
"pad_token": null,
|
24 |
"special_tokens_map_file": null,
|
25 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{ add_bos_token and bos_token or '' }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{ '<|user|>' + message['content'] }}{%- elif message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{{ '<|assistant|><|tool_calls_begin|><|tool_call_begin|>' + tool['type'] + '<|tool_sep|>' + tool['function']['name'] + '\\n```json\\n' + tool['function']['arguments'] + '\\n```' + '<|tool_call_end|>' }}{% set ns.is_first = true %}{%- else %}{{ '\\n' + '<|tool_call_begin|>' + tool['type'] + '<|tool_sep|>' + tool['function']['name'] + '\\n```json\\n' + tool['function']['arguments'] + '\\n```' + '<|tool_call_end|>' + '<|tool_calls_end|>' + '<|endoftext|>' }}{%- endif %}{%- endfor %}{%- elif message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{ '<|tool_outputs_end|>' + message['content'] + '<|endoftext|>' }}{% set ns.is_tool = false %}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{ '<|assistant|>' + content + '<|endoftext|>' }}{%- endif %}{%- elif message['role'] == 'tool' %}{%- set ns.is_tool = true %}{%- if ns.is_output_first %}{{ '<|tool_outputs_begin|><|tool_output_begin|>' + message['content'] + '<|tool_output_end|>' }}{% set ns.is_output_first = false %}{%- else %}{{ '\\n<|tool_output_begin|>' + message['content'] + '<|tool_output_end|>' }}{%- endif %}{%- endif %}{%- endfor %}{% if ns.is_tool %}{{ '<|tool_outputs_end|>' }}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{ '<|assistant|>' }}{% endif %}",
|
26 |
"tokenizer_class": "GPT2Tokenizer",
|
27 |
"unk_token": {
|
28 |
"__type": "AddedToken",
|