Transformers
bpe500_cz / tokenizer.json
Lakoc's picture
Upload tokenizer
6889265 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 1
}
}
],
"special_tokens": {
"</s>": {
"id": "</s>",
"ids": [
1
],
"tokens": [
"</s>"
]
},
"<s>": {
"id": "<s>",
"ids": [
0
],
"tokens": [
"<s>"
]
}
}
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<s>": 0,
"</s>": 1,
"<unk>": 2,
"<pad>": 3,
"<mask>": 4,
"-": 5,
"a": 6,
"b": 7,
"c": 8,
"d": 9,
"e": 10,
"f": 11,
"g": 12,
"h": 13,
"i": 14,
"j": 15,
"k": 16,
"l": 17,
"m": 18,
"n": 19,
"o": 20,
"p": 21,
"q": 22,
"r": 23,
"s": 24,
"t": 25,
"u": 26,
"v": 27,
"w": 28,
"x": 29,
"y": 30,
"z": 31,
"¡": 32,
"£": 33,
"¤": 34,
"¥": 35,
"¦": 36,
"§": 37,
"¨": 38,
"©": 39,
"«": 40,
"¬": 41,
"¯": 42,
"±": 43,
"³": 44,
"¶": 45,
"¸": 46,
"º": 47,
"¼": 48,
"½": 49,
"¾": 50,
"Ã": 51,
"Ä": 52,
"Å": 53,
"Ì": 54,
"â": 55,
"Ġ": 56,
"Ģ": 57,
"ģ": 58,
"Ĥ": 59,
"ĥ": 60,
"Ħ": 61,
"ĩ": 62,
"Ī": 63,
"į": 64,
"ı": 65,
"ij": 66,
"ĵ": 67,
"Ķ": 68,
"Ĺ": 69,
"Ļ": 70,
"Ľ": 71,
"ľ": 72,
"ŀ": 73,
"Ł": 74,
"ł": 75,
"Ń": 76,
"ÃŃ": 77,
"á": 78,
"Ġp": 79,
"Ġs": 80,
"ÄĽ": 81,
"Ġt": 82,
"Ġv": 83,
"Ġn": 84,
"ÅĻ": 85,
"Ġj": 86,
"nÃŃ": 87,
"é": 88,
"st": 89,
"ž": 90,
"Ġz": 91,
"Ġd": 92,
"ro": 93,
"Ġa": 94,
"ch": 95,
"ov": 96,
"Äį": 97,
"Ġm": 98,
"Ġk": 99,
"Ġpo": 100,
"ý": 101,
"Ġo": 102,
"ed": 103,
"Å¡": 104,
"la": 105,
"en": 106,
"Ġb": 107,
"ra": 108,
"ou": 109,
"ak": 110,
"em": 111,
"li": 112,
"ů": 113,
"te": 114,
"le": 115,
"ho": 116,
"Ġna": 117,
"Ġto": 118,
"ĠpÅĻ": 119,
"nÄĽ": 120,
"Ġje": 121,
"Ġpro": 122,
"Ġse": 123,
"Ġne": 124,
"ce": 125,
"že": 126,
"to": 127,
"in": 128,
"an": 129,
"sk": 130,
"Ġdo": 131,
"at": 132,
"ÃŃm": 133,
"rá": 134,
"Ġby": 135,
"Ġza": 136,
"ĠÄį": 137,
"uj": 138,
"lo": 139,
"no": 140,
"it": 141,
"Ġst": 142,
"Ġu": 143,
"ÅĻe": 144,
"Ġže": 145,
"Ġtak": 146,
"ni": 147,
"po": 148,
"ad": 149,
"ci": 150,
"al": 151,
"Ġko": 152,
"ko": 153,
"Ġná": 154,
"ná": 155,
"na": 156,
"Ġro": 157,
"vo": 158,
"ru": 159,
"ku": 160,
"ti": 161,
"Ġvy": 162,
"va": 163,
"Ġh": 164,
"re": 165,
"ých": 166,
"de": 167,
"Ġjs": 168,
"ck": 169,
"né": 170,
"lá": 171,
"Ġve": 172,
"cÃŃ": 173,
"Ġzá": 174,
"Ġkte": 175,
"ne": 176,
"by": 177,
"ky": 178,
"ÅĻÃŃ": 179,
"vÄĽ": 180,
"Ġob": 181,
"ĠpÅĻed": 182,
"át": 183,
"Ġf": 184,
"mi": 185,
"ka": 186,
"me": 187,
"Ġpos": 188,
"Ġpod": 189,
"dy": 190,
"ú": 191,
"še": 192,
"mÄĽ": 193,
"Ġpan": 194,
"Ġjak": 195,
"Ġjed": 196,
"Ġú": 197,
"ová": 198,
"ĠpÅĻi": 199,
"Å¡ÃŃ": 200,
"mu": 201,
"jÃŃ": 202,
"ské": 203,
"vr": 204,
"Ġvý": 205,
"bo": 206,
"vá": 207,
"Ġod": 208,
"sti": 209,
"lu": 210,
"Ġe": 211,
"ze": 212,
"ÄĽk": 213,
"Ġposla": 214,
"Ġi": 215,
"ÅĻi": 216,
"pra": 217,
"Ġroz": 218,
"Ġkter": 219,
"tÄĽ": 220,
"da": 221,
"vrh": 222,
"ist": 223,
"ové": 224,
"dÄĽ": 225,
"Ġre": 226,
"du": 227,
"uji": 228,
"ar": 229,
"Ġbu": 230,
"ova": 231,
"Ġvá": 232,
"vnÃŃ": 233,
"Ġmo": 234,
"Ġch": 235,
"il": 236,
"or": 237,
"ĠmÄĽ": 238,
"er": 239,
"is": 240,
"ĠpÅĻÃŃ": 241,
"ovat": 242,
"uje": 243,
"ob": 244,
"sta": 245,
"ny": 246,
"vÃŃ": 247,
"sl": 248,
"Ġmá": 249,
"ĠvÄĽ": 250,
"Ġnávrh": 251,
"ent": 252,
"Ġc": 253,
"Ġjá": 254,
"ĠnÄĽ": 255,
"am": 256,
"Ġale": 257,
"Ġsi": 258,
"ct": 259,
"Ġaby": 260,
"Ġbyl": 261,
"ÅĪ": 262,
"nÃŃm": 263,
"cho": 264,
"ĠpÅĻe": 265,
"Ġpr": 266,
"cké": 267,
"nu": 268,
"ál": 269,
"Ġmin": 270,
"nost": 271,
"je": 272,
"Ġsou": 273,
"ým": 274,
"lé": 275,
"nÃŃch": 276,
"Ġpoz": 277,
"ĠdÄĽk": 278,
"Äįe": 279,
"se": 280,
"Ġž": 281,
"Ġde": 282,
"eme": 283,
"Ġpra": 284,
"ji": 285,
"ady": 286,
"hod": 287,
"Ġjako": 288,
"ká": 289,
"Ġten": 290,
"tÃŃ": 291,
"Ġpa": 292,
"las": 293,
"ĠdÄĽkuji": 294,
"Ġjsou": 295,
"Ġprá": 296,
"sed": 297,
"ty": 298,
"Ġnej": 299,
"prav": 300,
"Ġdů": 301,
"tu": 302,
"pe": 303,
"nou": 304,
"Ġproto": 305,
"Ġle": 306,
"eno": 307,
"Ġjsem": 308,
"ĠÅ¡": 309,
"Ġposlan": 310,
"zi": 311,
"do": 312,
"ry": 313,
"Ġdva": 314,
"Ġté": 315,
"Ġspo": 316,
"Ġkon": 317,
"Ġin": 318,
"ovánÃŃ": 319,
"Ġtady": 320,
"ĠÄįe": 321,
"lov": 322,
"Ġmy": 323,
"ve": 324,
"Ġkteré": 325,
"ÅĻed": 326,
"dá": 327,
"Ġsv": 328,
"stu": 329,
"sÃŃm": 330,
"ÄįnÃŃ": 331,
"kla": 332,
"Ġmi": 333,
"Ġos": 334,
"Ġni": 335,
"Ġminist": 336,
"Ġco": 337,
"Ġvo": 338,
"Ġmu": 339,
"tel": 340,
"Ġzáko": 341,
"ný": 342,
"as": 343,
"Ġev": 344,
"Ġnem": 345,
"ri": 346,
"Ġtaké": 347,
"rop": 348,
"Ġte": 349,
"Ġvel": 350,
"Ġbo": 351,
"Ġvlá": 352,
"ĠmÃŃ": 353,
"Å¡tÄĽ": 354,
"dnÃŃ": 355,
"ly": 356,
"Ġli": 357,
"Ġposlane": 358,
"ĠpÅĻedsed": 359,
"ĠtÄĽ": 360,
"Ġce": 361,
"led": 362,
"Ġkdy": 363,
"mÃŃ": 364,
"pad": 365,
"di": 366,
"ĠÅĻÃŃ": 367,
"Ġtoho": 368,
"Ġtom": 369,
"len": 370,
"pu": 371,
"bu": 372,
"ta": 373,
"ujÃŃ": 374,
"lou": 375,
"Ġevrop": 376,
"Ġstát": 377,
"ád": 378,
"prá": 379,
"Ġtu": 380,
"vy": 381,
"sto": 382,
"sát": 383,
"vi": 384,
"Ġty": 385,
"Ġjsme": 386,
"žen": 387,
"ĠÅĻe": 388,
"Ġta": 389,
"ÅĻej": 390,
"ba": 391,
"nosti": 392,
"Ġhlas": 393,
"Ġnebo": 394,
"mo": 395,
"Ġji": 396,
"my": 397,
"ajÃŃ": 398,
"tá": 399,
"oval": 400,
"ého": 401,
"Ġbud": 402,
"leg": 403,
"Ġsta": 404,
"Ġpane": 405,
"isk": 406,
"žÃŃ": 407,
"Ġho": 408,
"ste": 409,
"ĠnenÃŃ": 410,
"stup": 411,
"vÄĽt": 412,
"ĠtÅĻi": 413,
"mov": 414,
"Ġdal": 415,
"Ġprost": 416,
"ez": 417,
"Ġkoleg": 418,
"Ġbude": 419,
"Ġka": 420,
"Ġvz": 421,
"lÃŃ": 422,
"ĠpanÃŃ": 423,
"nÃŃho": 424,
"cet": 425,
"za": 426,
"Ġkterý": 427,
"Ġprotože": 428,
"Ġslov": 429,
"chá": 430,
"Ġdob": 431,
"men": 432,
"Ġpot": 433,
"ruh": 434,
"ži": 435,
"sÃŃ": 436,
"Ġze": 437,
"Ġtomu": 438,
"ÄįnÄĽ": 439,
"Ġpoli": 440,
"ĠtÃŃm": 441,
"ĠvÅ¡": 442,
"rov": 443,
"ĠsnÄĽ": 444,
"Ġvýbo": 445,
"Ġdá": 446,
"Ġbylo": 447,
"ÄĽt": 448,
"Ġsam": 449,
"Ġbych": 450,
"Ġbyla": 451,
"ĠsnÄĽmov": 452,
"isÃŃ": 453,
"Ġg": 454,
"Ġbý": 455,
"ĠnÄĽk": 456,
"Ġsto": 457,
"dÃŃ": 458,
"ků": 459,
"Ġtakže": 460,
"ÄįÃŃ": 461,
"sa": 462,
"Ġdne": 463,
"ma": 464,
"ĠprosÃŃm": 465,
"zÃŃ": 466,
"Ġjedno": 467,
"ter": 468,
"Ġdruh": 469,
"ĠvÅ¡e": 470,
"Ġuž": 471,
"Ġjeho": 472,
"ných": 473,
"edy": 474,
"Ġprob": 475,
"ĠdalÅ¡ÃŃ": 476,
"chom": 477,
"Ġzd": 478,
"kou": 479,
"rů": 480,
"Ġtedy": 481,
"Ġsku": 482,
"Å¡ÃŃm": 483,
"Ġpou": 484,
"ÅĻad": 485,
"Ġpoku": 486,
"vnÄĽ": 487,
"Ġsed": 488,
"ovÄĽ": 489,
"Ġzem": 490,
"ĠtisÃŃ": 491,
"Ġsamo": 492,
"vod": 493,
"žit": 494,
"bli": 495,
"ém": 496,
"Ġstra": 497,
"tick": 498,
"Ġmož": 499
},
"merges": [
"Ã Ń",
"Ã ¡",
"Ġ p",
"Ġ s",
"Ä Ľ",
"Ġ t",
"Ġ v",
"Ġ n",
"Å Ļ",
"Ġ j",
"n ÃŃ",
"Ã ©",
"s t",
"Å ¾",
"Ġ z",
"Ġ d",
"r o",
"Ġ a",
"c h",
"o v",
"Ä į",
"Ġ m",
"Ġ k",
"Ġp o",
"Ã ½",
"Ġ o",
"e d",
"Å ¡",
"l a",
"e n",
"Ġ b",
"r a",
"o u",
"a k",
"e m",
"l i",
"Å ¯",
"t e",
"l e",
"h o",
"Ġn a",
"Ġt o",
"Ġp ÅĻ",
"n ÄĽ",
"Ġj e",
"Ġp ro",
"Ġs e",
"Ġn e",
"c e",
"ž e",
"t o",
"i n",
"a n",
"s k",
"Ġd o",
"a t",
"ÃŃ m",
"r á",
"Ġb y",
"Ġz a",
"Ġ Äį",
"u j",
"l o",
"n o",
"i t",
"Ġs t",
"Ġ u",
"ÅĻ e",
"Ġ že",
"Ġt ak",
"n i",
"p o",
"a d",
"c i",
"a l",
"Ġk o",
"k o",
"Ġn á",
"n á",
"n a",
"Ġ ro",
"v o",
"r u",
"k u",
"t i",
"Ġv y",
"v a",
"Ġ h",
"r e",
"ý ch",
"d e",
"Ġj s",
"c k",
"n é",
"l á",
"Ġv e",
"c ÃŃ",
"Ġz á",
"Ġk te",
"n e",
"b y",
"k y",
"ÅĻ ÃŃ",
"v ÄĽ",
"Ġo b",
"ĠpÅĻ ed",
"á t",
"Ġ f",
"m i",
"k a",
"m e",
"Ġpo s",
"Ġpo d",
"d y",
"Ã º",
"Å¡ e",
"m ÄĽ",
"Ġp an",
"Ġj ak",
"Ġj ed",
"Ġ ú",
"ov á",
"ĠpÅĻ i",
"Å¡ ÃŃ",
"m u",
"j ÃŃ",
"sk é",
"v r",
"Ġv ý",
"b o",
"v á",
"Ġo d",
"st i",
"l u",
"Ġ e",
"z e",
"ÄĽ k",
"Ġpos la",
"Ġ i",
"ÅĻ i",
"p ra",
"Ġro z",
"Ġkte r",
"t ÄĽ",
"d a",
"vr h",
"i st",
"ov é",
"d ÄĽ",
"Ġ re",
"d u",
"uj i",
"a r",
"Ġb u",
"ov a",
"Ġv á",
"v nÃŃ",
"Ġm o",
"Ġ ch",
"i l",
"o r",
"Ġm ÄĽ",
"e r",
"i s",
"ĠpÅĻ ÃŃ",
"ov at",
"uj e",
"o b",
"st a",
"n y",
"v ÃŃ",
"s l",
"Ġm á",
"Ġv ÄĽ",
"Ġná vrh",
"en t",
"Ġ c",
"Ġj á",
"Ġn ÄĽ",
"a m",
"Ġa le",
"Ġs i",
"c t",
"Ġa by",
"Ġby l",
"Å Ī",
"nÃŃ m",
"ch o",
"ĠpÅĻ e",
"Ġp r",
"ck é",
"n u",
"á l",
"Ġm in",
"no st",
"j e",
"Ġs ou",
"ý m",
"l é",
"nÃŃ ch",
"Ġpo z",
"Ġd ÄĽk",
"Äį e",
"s e",
"Ġ ž",
"Ġd e",
"em e",
"Ġp ra",
"j i",
"ad y",
"ho d",
"Ġjak o",
"k á",
"Ġt en",
"t ÃŃ",
"Ġp a",
"la s",
"ĠdÄĽk uji",
"Ġjs ou",
"Ġp rá",
"s ed",
"t y",
"Ġne j",
"pra v",
"Ġd ů",
"t u",
"p e",
"n ou",
"Ġpro to",
"Ġ le",
"en o",
"Ġjs em",
"Ġ Å¡",
"Ġposla n",
"z i",
"d o",
"r y",
"Ġd va",
"Ġt é",
"Ġs po",
"Ġko n",
"Ġ in",
"ová nÃŃ",
"Ġt ady",
"ĠÄį e",
"l ov",
"Ġm y",
"v e",
"Ġkter é",
"ÅĻ ed",
"d á",
"Ġs v",
"st u",
"s ÃŃm",
"Äį nÃŃ",
"k la",
"Ġm i",
"Ġo s",
"Ġn i",
"Ġmin ist",
"Ġc o",
"Ġv o",
"Ġm u",
"te l",
"Ġzá ko",
"n ý",
"a s",
"Ġe v",
"Ġn em",
"r i",
"Ġtak é",
"ro p",
"Ġt e",
"Ġve l",
"Ġb o",
"Ġv lá",
"Ġm ÃŃ",
"Å¡ tÄĽ",
"d nÃŃ",
"l y",
"Ġ li",
"Ġposla ne",
"ĠpÅĻed sed",
"Ġt ÄĽ",
"Ġ ce",
"l ed",
"Ġk dy",
"m ÃŃ",
"p ad",
"d i",
"Ġ ÅĻÃŃ",
"Ġto ho",
"Ġto m",
"l en",
"p u",
"b u",
"t a",
"uj ÃŃ",
"l ou",
"Ġev rop",
"Ġst át",
"á d",
"p rá",
"Ġt u",
"v y",
"st o",
"s át",
"v i",
"Ġt y",
"Ġjs me",
"ž en",
"Ġ ÅĻe",
"Ġt a",
"ÅĻe j",
"b a",
"no sti",
"Ġh las",
"Ġne bo",
"m o",
"Ġj i",
"m y",
"a jÃŃ",
"t á",
"ov al",
"é ho",
"Ġbu d",
"le g",
"Ġst a",
"Ġpan e",
"i sk",
"ž ÃŃ",
"Ġ ho",
"st e",
"Ġne nÃŃ",
"stu p",
"vÄĽ t",
"Ġt ÅĻi",
"m ov",
"Ġd al",
"Ġpro st",
"e z",
"Ġko leg",
"Ġbu de",
"Ġk a",
"Ġv z",
"l ÃŃ",
"Ġpa nÃŃ",
"nÃŃ ho",
"ce t",
"z a",
"Ġkter ý",
"Ġproto že",
"Ġs lov",
"ch á",
"Ġdo b",
"m en",
"Ġpo t",
"ru h",
"ž i",
"s ÃŃ",
"Ġz e",
"Ġto mu",
"Äį nÄĽ",
"Ġpo li",
"Ġt ÃŃm",
"Ġv Å¡",
"ro v",
"Ġs nÄĽ",
"Ġvý bo",
"Ġd á",
"Ġby lo",
"ÄĽ t",
"Ġs am",
"Ġby ch",
"Ġby la",
"ĠsnÄĽ mov",
"is ÃŃ",
"Ġ g",
"Ġb ý",
"Ġn ÄĽk",
"Ġs to",
"d ÃŃ",
"k ů",
"Ġtak že",
"Äį ÃŃ",
"s a",
"Ġd ne",
"m a",
"Ġpro sÃŃm",
"z ÃŃ",
"Ġjed no",
"te r",
"Ġd ruh",
"Ġv Å¡e",
"Ġu ž",
"Ġje ho",
"n ých",
"ed y",
"Ġpro b",
"Ġdal Å¡ÃŃ",
"cho m",
"Ġz d",
"k ou",
"r ů",
"Ġt edy",
"Ġs ku",
"Å¡ ÃŃm",
"Ġpo u",
"ÅĻ ad",
"Ġpo ku",
"v nÄĽ",
"Ġs ed",
"ov ÄĽ",
"Ġz em",
"Ġt isÃŃ",
"Ġsam o",
"vo d",
"ž it",
"b li",
"é m",
"Ġst ra",
"ti ck",
"Ġmo ž"
]
}
}