Upload folder using huggingface_hub
Browse files
fw57Mmulti_Entropy_thresholdB_32000/tokenizer.json
CHANGED
@@ -40,7 +40,10 @@
|
|
40 |
]
|
41 |
},
|
42 |
"pre_tokenizer": {
|
43 |
-
"type": "
|
|
|
|
|
|
|
44 |
},
|
45 |
"post_processor": {
|
46 |
"type": "ByteLevel",
|
@@ -318,262 +321,262 @@
|
|
318 |
"Ł": 255,
|
319 |
"ł": 256,
|
320 |
"Ń": 257,
|
321 |
-
"##
|
322 |
-
"
|
323 |
-
"
|
324 |
-
"
|
325 |
-
"
|
326 |
-
"
|
327 |
-
"
|
328 |
-
"
|
329 |
-
"
|
330 |
-
"
|
331 |
-
"
|
332 |
-
"
|
333 |
-
"
|
334 |
-
"
|
335 |
-
"
|
336 |
-
"
|
337 |
-
"
|
338 |
-
"
|
339 |
-
"
|
340 |
-
"
|
341 |
-
"
|
342 |
-
"
|
343 |
-
"
|
344 |
-
"
|
345 |
-
"
|
346 |
-
"
|
347 |
-
"
|
348 |
-
"
|
349 |
-
"
|
350 |
-
"
|
351 |
-
"
|
352 |
-
"
|
353 |
-
"
|
354 |
-
"
|
355 |
-
"
|
356 |
-
"
|
357 |
-
"
|
358 |
-
"
|
359 |
-
"
|
360 |
-
"
|
361 |
-
"
|
362 |
-
"
|
363 |
-
"
|
364 |
-
"
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
371 |
-
"
|
372 |
-
"
|
373 |
-
"
|
374 |
-
"
|
375 |
-
"
|
376 |
-
"
|
377 |
-
"
|
378 |
-
"
|
379 |
-
"
|
380 |
-
"##
|
381 |
-
"
|
382 |
-
"
|
383 |
-
"
|
384 |
-
"
|
385 |
-
"
|
386 |
-
"
|
387 |
-
"
|
388 |
-
"
|
389 |
-
"##
|
390 |
-
"
|
391 |
-
"
|
392 |
-
"
|
393 |
-
"
|
394 |
-
"
|
395 |
-
"
|
396 |
-
"
|
397 |
-
"
|
398 |
-
"##
|
399 |
-
"
|
400 |
-
"
|
401 |
-
"
|
402 |
-
"
|
403 |
-
"
|
404 |
-
"
|
405 |
-
"
|
406 |
-
"
|
407 |
-
"
|
408 |
-
"
|
409 |
-
"
|
410 |
-
"
|
411 |
-
"##
|
412 |
-
"
|
413 |
-
"
|
414 |
-
"##
|
415 |
-
"
|
416 |
-
"
|
417 |
-
"
|
418 |
-
"
|
419 |
-
"
|
420 |
-
"
|
421 |
-
"
|
422 |
-
"
|
423 |
-
"
|
424 |
-
"
|
425 |
-
"
|
426 |
-
"
|
427 |
-
"
|
428 |
-
"
|
429 |
-
"
|
430 |
"##6": 367,
|
431 |
-
"
|
432 |
-
"
|
433 |
-
"
|
434 |
-
"
|
435 |
-
"
|
436 |
-
"
|
437 |
-
"
|
438 |
-
"
|
439 |
-
"
|
440 |
-
"
|
441 |
-
"
|
442 |
-
"
|
443 |
-
"
|
444 |
-
"
|
445 |
-
"
|
446 |
-
"
|
447 |
-
"
|
448 |
-
"
|
449 |
-
"
|
450 |
-
"
|
451 |
-
"
|
452 |
-
"
|
453 |
-
"
|
454 |
-
"
|
455 |
-
"
|
456 |
-
"
|
457 |
-
"
|
458 |
-
"
|
459 |
-
"
|
460 |
-
"
|
461 |
-
"
|
462 |
-
"
|
463 |
-
"
|
464 |
-
"
|
465 |
-
"
|
466 |
-
"
|
467 |
-
"
|
468 |
-
"
|
469 |
-
"
|
470 |
-
"
|
471 |
-
"
|
472 |
-
"
|
473 |
-
"
|
474 |
-
"
|
475 |
-
"
|
476 |
-
"
|
477 |
-
"
|
478 |
-
"
|
479 |
-
"
|
480 |
-
"
|
481 |
-
"
|
482 |
-
"
|
483 |
-
"##
|
484 |
-
"##
|
485 |
-
"
|
486 |
-
"##
|
487 |
-
"
|
488 |
-
"##
|
489 |
-
"
|
490 |
-
"
|
491 |
-
"
|
492 |
-
"
|
493 |
-
"
|
494 |
-
"##
|
495 |
-
"
|
496 |
-
"
|
497 |
-
"
|
498 |
-
"
|
499 |
-
"
|
500 |
-
"
|
501 |
-
"
|
502 |
-
"
|
503 |
-
"
|
504 |
-
"
|
505 |
-
"
|
506 |
-
"##
|
507 |
-
"
|
508 |
-
"
|
509 |
-
"
|
510 |
-
"
|
511 |
-
"
|
512 |
-
"
|
513 |
-
"
|
514 |
-
"
|
515 |
-
"
|
516 |
-
"
|
517 |
-
"
|
518 |
-
"
|
519 |
-
"
|
520 |
-
"
|
521 |
-
"
|
522 |
-
"
|
523 |
-
"##
|
524 |
-
"
|
525 |
-
"
|
526 |
-
"##
|
527 |
-
"
|
528 |
-
"
|
529 |
-
"##
|
530 |
-
"
|
531 |
-
"
|
532 |
-
"
|
533 |
-
"
|
534 |
-
"
|
535 |
-
"
|
536 |
-
"
|
537 |
-
"
|
538 |
-
"
|
539 |
-
"
|
540 |
-
"
|
541 |
-
"
|
542 |
-
"
|
543 |
-
"
|
544 |
-
"
|
545 |
-
"
|
546 |
-
"
|
547 |
-
"
|
548 |
-
"
|
549 |
-
"
|
550 |
-
"
|
551 |
-
"
|
552 |
-
"
|
553 |
-
"
|
554 |
-
"
|
555 |
-
"
|
556 |
-
"
|
557 |
-
"
|
558 |
-
"
|
559 |
-
"
|
560 |
-
"
|
561 |
-
"##
|
562 |
-
"
|
563 |
-
"
|
564 |
-
"
|
565 |
-
"
|
566 |
-
"
|
567 |
-
"
|
568 |
-
"
|
569 |
-
"
|
570 |
-
"
|
571 |
-
"
|
572 |
-
"
|
573 |
-
"##
|
574 |
-
"
|
575 |
-
"
|
576 |
-
"
|
577 |
"<|unk|>": 514,
|
578 |
"##�": 515,
|
579 |
"sa": 516,
|
|
|
40 |
]
|
41 |
},
|
42 |
"pre_tokenizer": {
|
43 |
+
"type": "ByteLevel",
|
44 |
+
"add_prefix_space": true,
|
45 |
+
"trim_offsets": true,
|
46 |
+
"use_regex": true
|
47 |
},
|
48 |
"post_processor": {
|
49 |
"type": "ByteLevel",
|
|
|
321 |
"Ł": 255,
|
322 |
"ł": 256,
|
323 |
"Ń": 257,
|
324 |
+
"##)": 258,
|
325 |
+
"##ĝ": 259,
|
326 |
+
"##¶": 260,
|
327 |
+
"##|": 261,
|
328 |
+
"##Ē": 262,
|
329 |
+
"##Ě": 263,
|
330 |
+
"##`": 264,
|
331 |
+
"##ç": 265,
|
332 |
+
"##÷": 266,
|
333 |
+
"##p": 267,
|
334 |
+
"##=": 268,
|
335 |
+
"##2": 269,
|
336 |
+
"##U": 270,
|
337 |
+
"##s": 271,
|
338 |
+
"##¢": 272,
|
339 |
+
"##Í": 273,
|
340 |
+
"##Ê": 274,
|
341 |
+
"##ŀ": 275,
|
342 |
+
"##ú": 276,
|
343 |
+
"##û": 277,
|
344 |
+
"##ij": 278,
|
345 |
+
"##3": 279,
|
346 |
+
"##k": 280,
|
347 |
+
"##1": 281,
|
348 |
+
"##o": 282,
|
349 |
+
"##T": 283,
|
350 |
+
"##é": 284,
|
351 |
+
"##[": 285,
|
352 |
+
"##ċ": 286,
|
353 |
+
"##-": 287,
|
354 |
+
"##ħ": 288,
|
355 |
+
"##,": 289,
|
356 |
+
"##Ë": 290,
|
357 |
+
"##ĺ": 291,
|
358 |
+
"##I": 292,
|
359 |
+
"##đ": 293,
|
360 |
+
"##Ü": 294,
|
361 |
+
"##Ļ": 295,
|
362 |
+
"##ß": 296,
|
363 |
+
"##;": 297,
|
364 |
+
"##Ñ": 298,
|
365 |
+
"##R": 299,
|
366 |
+
"##@": 300,
|
367 |
+
"##\\": 301,
|
368 |
+
"##r": 302,
|
369 |
+
"##]": 303,
|
370 |
+
"##»": 304,
|
371 |
+
"##Ð": 305,
|
372 |
+
"##M": 306,
|
373 |
+
"##â": 307,
|
374 |
+
"##Ô": 308,
|
375 |
+
"##Õ": 309,
|
376 |
+
"##Đ": 310,
|
377 |
+
"##ę": 311,
|
378 |
+
"##ì": 312,
|
379 |
+
"##ð": 313,
|
380 |
+
"##¸": 314,
|
381 |
+
"##ı": 315,
|
382 |
+
"##_": 316,
|
383 |
+
"##4": 317,
|
384 |
+
"##ļ": 318,
|
385 |
+
"##Ğ": 319,
|
386 |
+
"##<": 320,
|
387 |
+
"##n": 321,
|
388 |
+
"##É": 322,
|
389 |
+
"##'": 323,
|
390 |
+
"##ò": 324,
|
391 |
+
"##0": 325,
|
392 |
+
"##C": 326,
|
393 |
+
"##ď": 327,
|
394 |
+
"##a": 328,
|
395 |
+
"##Ą": 329,
|
396 |
+
"##Ĭ": 330,
|
397 |
+
"##Æ": 331,
|
398 |
+
"##Ā": 332,
|
399 |
+
"##ģ": 333,
|
400 |
+
"##q": 334,
|
401 |
+
"##N": 335,
|
402 |
+
"##·": 336,
|
403 |
+
"##¬": 337,
|
404 |
+
"##Ý": 338,
|
405 |
+
"##½": 339,
|
406 |
+
"##Ď": 340,
|
407 |
+
"##ė": 341,
|
408 |
+
"##į": 342,
|
409 |
+
"##¤": 343,
|
410 |
+
"##Ĵ": 344,
|
411 |
+
"##Q": 345,
|
412 |
+
"##Ò": 346,
|
413 |
+
"##Ķ": 347,
|
414 |
+
"##L": 348,
|
415 |
+
"##ĭ": 349,
|
416 |
+
"##Ĉ": 350,
|
417 |
+
"##}": 351,
|
418 |
+
"##°": 352,
|
419 |
+
"##~": 353,
|
420 |
+
"##ĉ": 354,
|
421 |
+
"##^": 355,
|
422 |
+
"##!": 356,
|
423 |
+
"##ĕ": 357,
|
424 |
+
"##e": 358,
|
425 |
+
"##ä": 359,
|
426 |
+
"##þ": 360,
|
427 |
+
"##ö": 361,
|
428 |
+
"##Ć": 362,
|
429 |
+
"##Ċ": 363,
|
430 |
+
"##X": 364,
|
431 |
+
"##«": 365,
|
432 |
+
"##ľ": 366,
|
433 |
"##6": 367,
|
434 |
+
"##l": 368,
|
435 |
+
"##à": 369,
|
436 |
+
"##Ń": 370,
|
437 |
+
"##Ú": 371,
|
438 |
+
"##5": 372,
|
439 |
+
"##Č": 373,
|
440 |
+
"##Ę": 374,
|
441 |
+
"##¡": 375,
|
442 |
+
"##W": 376,
|
443 |
+
"##ī": 377,
|
444 |
+
"##+": 378,
|
445 |
+
"##>": 379,
|
446 |
+
"##Ĝ": 380,
|
447 |
+
"##ł": 381,
|
448 |
+
"##®": 382,
|
449 |
+
"##%": 383,
|
450 |
+
"##:": 384,
|
451 |
+
"##&": 385,
|
452 |
+
"##Ç": 386,
|
453 |
+
"##ĸ": 387,
|
454 |
+
"##/": 388,
|
455 |
+
"##Ã": 389,
|
456 |
+
"##á": 390,
|
457 |
+
"##c": 391,
|
458 |
+
"##ē": 392,
|
459 |
+
"##Ï": 393,
|
460 |
+
"##¹": 394,
|
461 |
+
"##O": 395,
|
462 |
+
"##Ó": 396,
|
463 |
+
"##ñ": 397,
|
464 |
+
"##Ī": 398,
|
465 |
+
"##d": 399,
|
466 |
+
"##ù": 400,
|
467 |
+
"##Ă": 401,
|
468 |
+
"##æ": 402,
|
469 |
+
"##¾": 403,
|
470 |
+
"##ó": 404,
|
471 |
+
"##õ": 405,
|
472 |
+
"##ü": 406,
|
473 |
+
"##D": 407,
|
474 |
+
"##¯": 408,
|
475 |
+
"##Ġ": 409,
|
476 |
+
"##G": 410,
|
477 |
+
"##v": 411,
|
478 |
+
"##£": 412,
|
479 |
+
"##è": 413,
|
480 |
+
"##Ø": 414,
|
481 |
+
"##ý": 415,
|
482 |
+
"##ĩ": 416,
|
483 |
+
"##*": 417,
|
484 |
+
"##{": 418,
|
485 |
+
"##Â": 419,
|
486 |
+
"##h": 420,
|
487 |
+
"##w": 421,
|
488 |
+
"##¿": 422,
|
489 |
+
"##B": 423,
|
490 |
+
"##Á": 424,
|
491 |
+
"##t": 425,
|
492 |
+
"##ô": 426,
|
493 |
+
"###": 427,
|
494 |
+
"##?": 428,
|
495 |
+
"##¨": 429,
|
496 |
+
"##Ä": 430,
|
497 |
+
"##8": 431,
|
498 |
+
"##P": 432,
|
499 |
+
"##Û": 433,
|
500 |
+
"##ÿ": 434,
|
501 |
+
"##ā": 435,
|
502 |
+
"##Y": 436,
|
503 |
+
"##©": 437,
|
504 |
+
"##ě": 438,
|
505 |
+
"##Ì": 439,
|
506 |
+
"##ĥ": 440,
|
507 |
+
"##Ĥ": 441,
|
508 |
+
"##º": 442,
|
509 |
+
"##y": 443,
|
510 |
+
"##²": 444,
|
511 |
+
"##µ": 445,
|
512 |
+
"##E": 446,
|
513 |
+
"##¦": 447,
|
514 |
+
"##´": 448,
|
515 |
+
"##F": 449,
|
516 |
+
"##Ĕ": 450,
|
517 |
+
"##İ": 451,
|
518 |
+
"##Ľ": 452,
|
519 |
+
"##î": 453,
|
520 |
+
"##g": 454,
|
521 |
+
"##ª": 455,
|
522 |
+
"##¼": 456,
|
523 |
+
"##7": 457,
|
524 |
+
"##×": 458,
|
525 |
+
"##Å": 459,
|
526 |
+
"##b": 460,
|
527 |
+
"##§": 461,
|
528 |
+
"##ê": 462,
|
529 |
+
"##f": 463,
|
530 |
+
"##Ö": 464,
|
531 |
+
"##å": 465,
|
532 |
+
"##Z": 466,
|
533 |
+
"##ă": 467,
|
534 |
+
"##H": 468,
|
535 |
+
"##ġ": 469,
|
536 |
+
"##Ĩ": 470,
|
537 |
+
"##x": 471,
|
538 |
+
"##í": 472,
|
539 |
+
"##³": 473,
|
540 |
+
"##9": 474,
|
541 |
+
"##Þ": 475,
|
542 |
+
"##Ħ": 476,
|
543 |
+
"##J": 477,
|
544 |
+
"##Ł": 478,
|
545 |
+
"##IJ": 479,
|
546 |
+
"##ø": 480,
|
547 |
+
"##z": 481,
|
548 |
+
"##¥": 482,
|
549 |
+
"##j": 483,
|
550 |
+
"##È": 484,
|
551 |
+
"##č": 485,
|
552 |
+
"##ķ": 486,
|
553 |
+
"##V": 487,
|
554 |
+
"##Ù": 488,
|
555 |
+
"##S": 489,
|
556 |
+
"##$": 490,
|
557 |
+
"##ć": 491,
|
558 |
+
"##m": 492,
|
559 |
+
"##(": 493,
|
560 |
+
"##ï": 494,
|
561 |
+
"##Į": 495,
|
562 |
+
"##.": 496,
|
563 |
+
"##Ģ": 497,
|
564 |
+
"##i": 498,
|
565 |
+
"##Ŀ": 499,
|
566 |
+
"##Ĺ": 500,
|
567 |
+
"##A": 501,
|
568 |
+
"##±": 502,
|
569 |
+
"##ğ": 503,
|
570 |
+
"##Ė": 504,
|
571 |
+
"##u": 505,
|
572 |
+
"##Î": 506,
|
573 |
+
"##À": 507,
|
574 |
+
"##ë": 508,
|
575 |
+
"##ã": 509,
|
576 |
+
"##K": 510,
|
577 |
+
"##\"": 511,
|
578 |
+
"##ą": 512,
|
579 |
+
"##ĵ": 513,
|
580 |
"<|unk|>": 514,
|
581 |
"##�": 515,
|
582 |
"sa": 516,
|
fw57Mmulti_Entropy_thresholdB_32000/vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|