Midm-2.0-Base-Instruct-128K / tokenizer_config.json
Bingsu's picture
Add files using upload-large-folder tool
c44dc38 verified
{
"added_tokens_decoder": {
"0": {
"content": "<|begin_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|end_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131301": {
"content": "<|eot_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131302": {
"content": "<|start_header_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131303": {
"content": "<|end_header_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131304": {
"content": "<|eop_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131305": {
"content": "<|begin_of_passage|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131306": {
"content": "<|end_of_passage|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131307": {
"content": "<img>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131308": {
"content": "</img>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131309": {
"content": "<ref>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131310": {
"content": "</ref>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131311": {
"content": "<box>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131312": {
"content": "</box>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131313": {
"content": "<quad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131314": {
"content": "</quad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131315": {
"content": "<imgpad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131316": {
"content": "<table>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131317": {
"content": "</table>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131318": {
"content": "<tr>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131319": {
"content": "</tr>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131320": {
"content": "<td>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131321": {
"content": "</td>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131322": {
"content": "<chart>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131323": {
"content": "</chart>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131324": {
"content": "<caption>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131325": {
"content": "<thead>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131326": {
"content": "<tbody>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131327": {
"content": "<tfoot>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131328": {
"content": "<th>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131329": {
"content": "</caption>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131330": {
"content": "</thead>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131331": {
"content": "</tbody>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131332": {
"content": "</tfoot>\"",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131333": {
"content": "</th>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131334": {
"content": "<h1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131335": {
"content": "<h2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131336": {
"content": "<h3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131337": {
"content": "<h4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131338": {
"content": "<h5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131339": {
"content": "<h6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131340": {
"content": "<blockquote>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131341": {
"content": "</h1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131342": {
"content": "</h2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131343": {
"content": "</h4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131344": {
"content": "</h5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131345": {
"content": "</h6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131346": {
"content": "</blockquote>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131347": {
"content": "<strong>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131348": {
"content": "<em>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131349": {
"content": "<b>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131350": {
"content": "<i>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131351": {
"content": "<u>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131352": {
"content": "<sub>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131353": {
"content": "<sup>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131354": {
"content": "<code>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131355": {
"content": "</strong>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131356": {
"content": "</em>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131357": {
"content": "</b>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131358": {
"content": "</i>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131359": {
"content": "</u>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131360": {
"content": "</sub>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131361": {
"content": "</sup>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131362": {
"content": "</code>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131363": {
"content": "<|finetune_right_pad_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131364": {
"content": "<|eom_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131365": {
"content": "<|python_tag|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131366": {
"content": "#@이름@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131367": {
"content": "#@ID@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131368": {
"content": "#@주민번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131369": {
"content": "#@이메일@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131370": {
"content": "#@계좌번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131371": {
"content": "#@전화번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131372": {
"content": "#@주소@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131373": {
"content": "#@자동차번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131374": {
"content": "#@사업자등록번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131375": {
"content": "#@자동차운전면허번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131376": {
"content": "#@여권번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131377": {
"content": "#@외국인등록번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131378": {
"content": "#@건보번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131379": {
"content": "#@신용카드번호@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131380": {
"content": "#@IP@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131381": {
"content": "#@MAC주소@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131382": {
"content": "#@SNS계정@#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"131383": {
"content": "#@통관번호#",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|begin_of_text|>",
"clean_up_tokenization_spaces": true,
"content": "<|end_of_text|>",
"eos_token": "<|end_of_text|>",
"extra_special_tokens": {},
"legacy": false,
"lstrip": false,
"model_max_length": 1000000000000000019884624838656,
"normalized": false,
"pad_token": "<|end_of_text|>",
"rstrip": false,
"single_word": false,
"tokenizer_class": "PreTrainedTokenizerFast"
}