codebyzeb commited on
Commit
62a0567
·
verified ·
1 Parent(s): f9c3417

Upload folder using huggingface_hub

Browse files
fw57Mmulti_Entropy_thresholdB_32000/tokenizer.json CHANGED
@@ -318,262 +318,262 @@
318
  "Ł": 255,
319
  "ł": 256,
320
  "Ń": 257,
321
- "##q": 258,
322
- "##¡": 259,
323
- "##ă": 260,
324
- "##W": 261,
325
- "##S": 262,
326
- "##Ď": 263,
327
- "##Ė": 264,
328
- "##Ĝ": 265,
329
- "##y": 266,
330
- "##J": 267,
331
- "###": 268,
332
- "##I": 269,
333
- "##Ø": 270,
334
- "##N": 271,
335
- "##Ò": 272,
336
- "##ý": 273,
337
- "##Y": 274,
338
- "##¦": 275,
339
- "##\\": 276,
340
- "##È": 277,
341
- "##ğ": 278,
342
- "##¨": 279,
343
- "##>": 280,
344
- "##Á": 281,
345
- "##Î": 282,
346
- "##µ": 283,
347
- "##;": 284,
348
- "##¹": 285,
349
- "##IJ": 286,
350
- "##ij": 287,
351
- "##±": 288,
352
- "##M": 289,
353
- "##°": 290,
354
- "##á": 291,
355
- "##<": 292,
356
- "##é": 293,
357
- "##Ę": 294,
358
- "##ĭ": 295,
359
- "##Ķ": 296,
360
- "##Ġ": 297,
361
- "##H": 298,
362
- "##Ě": 299,
363
- "##ę": 300,
364
- "##7": 301,
365
- "##¬": 302,
366
- "##đ": 303,
367
- "##@": 304,
368
- "##6": 305,
369
- "##|": 306,
370
- "##Ì": 307,
371
- "##ä": 308,
372
- "##ć": 309,
373
- "##=": 310,
374
- "##'": 311,
375
- "##û": 312,
376
- "##ī": 313,
377
- "##Û": 314,
378
- "##Ĕ": 315,
379
- "##.": 316,
380
  "##A": 317,
381
- "##Ī": 318,
382
- "##ě": 319,
383
- "##£": 320,
384
- "##ü": 321,
385
- "##å": 322,
386
- "##%": 323,
387
- "##ò": 324,
388
- "##Ŀ": 325,
389
- "##ė": 326,
390
- "##ñ": 327,
391
- "##«": 328,
392
- "##ē": 329,
393
- "##ĸ": 330,
394
- "##t": 331,
395
- "##3": 332,
396
- "##l": 333,
397
- "##{": 334,
398
- "##L": 335,
399
- "##-": 336,
400
- "##s": 337,
401
- "##Ý": 338,
402
- "##Ĥ": 339,
403
- "##Å": 340,
404
- "##¸": 341,
405
- "##2": 342,
406
- "##ª": 343,
407
- "##Ð": 344,
408
- "##ģ": 345,
409
- "##æ": 346,
410
- "##h": 347,
411
- "##`": 348,
412
- "##ġ": 349,
413
- "##*": 350,
414
- "##®": 351,
415
- "##ĉ": 352,
416
- "##Į": 353,
417
- "##½": 354,
418
- "##Ñ": 355,
419
- "##í": 356,
420
- "##5": 357,
421
- "##î": 358,
422
- "##\"": 359,
423
- "##Ó": 360,
424
- "##i": 361,
425
- "##]": 362,
426
- "##Ħ": 363,
427
- "##Ĺ": 364,
428
- "##à": 365,
429
- "##c": 366,
430
- "##×": 367,
431
- "##Ú": 368,
432
- "##ļ": 369,
433
- "##§": 370,
434
- "##ı": 371,
435
- "##º": 372,
436
- "##Þ": 373,
437
- "##Đ": 374,
438
- "##!": 375,
439
- "##Ĩ": 376,
440
- "##ï": 377,
441
- "##ì": 378,
442
- "##ô": 379,
443
- "##Č": 380,
444
- "##ö": 381,
445
- "##Í": 382,
446
- "##ħ": 383,
447
- "##Ù": 384,
448
- "##_": 385,
449
- "##8": 386,
450
- "##ĺ": 387,
451
- "##u": 388,
452
- "##Ç": 389,
453
- "##Ĭ": 390,
454
- "##d": 391,
455
- "##ð": 392,
456
- "##Ö": 393,
457
- "##n": 394,
458
- "##?": 395,
459
- "##Ń": 396,
460
- "##¼": 397,
461
- "##B": 398,
462
- "##³": 399,
463
- "##ľ": 400,
464
- "##¥": 401,
465
- "##À": 402,
466
- "##p": 403,
467
- "##Ë": 404,
468
- "##F": 405,
469
- "##Ą": 406,
470
- "##Ĉ": 407,
471
- "##Ē": 408,
472
- "##ó": 409,
473
- "##Ā": 410,
474
- "##a": 411,
475
- "##X": 412,
476
- "##Ć": 413,
477
- "##~": 414,
478
- "##ø": 415,
479
- "##f": 416,
480
- "##ĩ": 417,
481
- "##©": 418,
482
- "##D": 419,
483
- "##&": 420,
484
- "##V": 421,
485
- "##^": 422,
486
- "##0": 423,
487
- "##1": 424,
488
- "##j": 425,
489
- "##Ô": 426,
490
- "##į": 427,
491
- "##P": 428,
492
- "##w": 429,
493
- "##Ģ": 430,
494
- "##g": 431,
495
  "##Ĵ": 432,
496
- "##ù": 433,
497
- "##č": 434,
498
- "##ĥ": 435,
499
- "##ŀ": 436,
500
- "##Ê": 437,
501
- "##[": 438,
502
- "##)": 439,
503
- "##O": 440,
504
- "##ā": 441,
505
- "##Ã": 442,
506
- "##ÿ": 443,
507
- "##ł": 444,
508
- "##e": 445,
509
- "##x": 446,
510
- "##ĝ": 447,
511
- "##Ċ": 448,
512
- "##´": 449,
513
- "##Q": 450,
514
- "##m": 451,
515
- "##ĕ": 452,
516
- "##Z": 453,
517
- "##G": 454,
518
- "##¾": 455,
519
- "##ã": 456,
520
- "##ë": 457,
521
- "##ċ": 458,
522
- "##Õ": 459,
523
  "##v": 460,
524
- "##è": 461,
525
- "##z": 462,
526
- "##Â": 463,
527
- "##÷": 464,
528
- "##Ü": 465,
529
- "##¤": 466,
530
- "##Æ": 467,
531
- "##:": 468,
532
- "##b": 469,
533
- "##Ï": 470,
534
- "##o": 471,
535
- "##r": 472,
536
- "##õ": 473,
537
- "##¶": 474,
538
- "##Ļ": 475,
539
- "##}": 476,
540
- "##¿": 477,
541
- "##T": 478,
542
- "##²": 479,
543
- "##E": 480,
544
- "##k": 481,
545
- "##9": 482,
546
- "##ç": 483,
547
- "##(": 484,
548
- "##¢": 485,
549
- "##ď": 486,
550
- "##Ł": 487,
551
- "##U": 488,
552
- "##ĵ": 489,
553
- "##C": 490,
554
- "##·": 491,
555
- "##4": 492,
556
- "##þ": 493,
557
- "##/": 494,
558
- "##»": 495,
559
- "##Ä": 496,
560
- "##ß": 497,
561
- "##ą": 498,
562
- "##R": 499,
563
- "##ú": 500,
564
- "##İ": 501,
565
- "##ê": 502,
566
- "##Ľ": 503,
567
- "##â": 504,
568
- "##K": 505,
569
- "##ķ": 506,
570
- "##$": 507,
571
- "##+": 508,
572
- "##,": 509,
573
- "##Ă": 510,
574
- "##¯": 511,
575
- "##É": 512,
576
- "##Ğ": 513,
577
  "<|unk|>": 514,
578
  "##�": 515,
579
  "sa": 516,
 
318
  "Ł": 255,
319
  "ł": 256,
320
  "Ń": 257,
321
+ "##Z": 258,
322
+ "##s": 259,
323
+ "##L": 260,
324
+ "##à": 261,
325
+ "##O": 262,
326
+ "##t": 263,
327
+ "##ĸ": 264,
328
+ "##+": 265,
329
+ "##Đ": 266,
330
+ "##×": 267,
331
+ "##,": 268,
332
+ "##Á": 269,
333
+ "##ě": 270,
334
+ "##ķ": 271,
335
+ "##K": 272,
336
+ "##n": 273,
337
+ "##đ": 274,
338
+ "##İ": 275,
339
+ "##¢": 276,
340
+ "##Õ": 277,
341
+ "##í": 278,
342
+ "##Ą": 279,
343
+ "##Ý": 280,
344
+ "##¼": 281,
345
+ "##Ģ": 282,
346
+ "##¶": 283,
347
+ "##X": 284,
348
+ "##ā": 285,
349
+ "##ē": 286,
350
+ "##!": 287,
351
+ "##¸": 288,
352
+ "##Å": 289,
353
+ "##H": 290,
354
+ "##¾": 291,
355
+ "##Į": 292,
356
+ "##j": 293,
357
+ "##ß": 294,
358
+ "##Y": 295,
359
+ "##Î": 296,
360
+ "##Ł": 297,
361
+ "##Ļ": 298,
362
+ "##±": 299,
363
+ "##R": 300,
364
+ "##i": 301,
365
+ "##\"": 302,
366
+ "##;": 303,
367
+ "##0": 304,
368
+ "##o": 305,
369
+ "##Ô": 306,
370
+ "##ì": 307,
371
+ "##ð": 308,
372
+ "##?": 309,
373
+ "##Č": 310,
374
+ "##.": 311,
375
+ "##ĝ": 312,
376
+ "##*": 313,
377
+ "##î": 314,
378
+ "##Ğ": 315,
379
+ "##Ġ": 316,
380
  "##A": 317,
381
+ "##ij": 318,
382
+ "##¥": 319,
383
+ "##C": 320,
384
+ "##Æ": 321,
385
+ "##µ": 322,
386
+ "##ë": 323,
387
+ "##f": 324,
388
+ "##«": 325,
389
+ "##Q": 326,
390
+ "##Ú": 327,
391
+ "##Â": 328,
392
+ "##ğ": 329,
393
+ "##u": 330,
394
+ "##J": 331,
395
+ "##_": 332,
396
+ "##ċ": 333,
397
+ "##å": 334,
398
+ "##{": 335,
399
+ "##V": 336,
400
+ "##Ħ": 337,
401
+ "##E": 338,
402
+ "##Ĉ": 339,
403
+ "##ñ": 340,
404
+ "##į": 341,
405
+ "##7": 342,
406
+ "##5": 343,
407
+ "##Ķ": 344,
408
+ "##Ò": 345,
409
+ "##%": 346,
410
+ "##Í": 347,
411
+ "##q": 348,
412
+ "##č": 349,
413
+ "##2": 350,
414
+ "##a": 351,
415
+ "##ł": 352,
416
+ "##²": 353,
417
+ "##£": 354,
418
+ "##>": 355,
419
+ "##¿": 356,
420
+ "##h": 357,
421
+ "##Ĭ": 358,
422
+ "##Ċ": 359,
423
+ "##Þ": 360,
424
+ "##û": 361,
425
+ "##w": 362,
426
+ "##F": 363,
427
+ "##|": 364,
428
+ "##Ó": 365,
429
+ "##Ĕ": 366,
430
+ "##6": 367,
431
+ "##ď": 368,
432
+ "##Ę": 369,
433
+ "##d": 370,
434
+ "##9": 371,
435
+ "##/": 372,
436
+ "##Ě": 373,
437
+ "##-": 374,
438
+ "##p": 375,
439
+ "##ĵ": 376,
440
+ "##ç": 377,
441
+ "##b": 378,
442
+ "##ļ": 379,
443
+ "##´": 380,
444
+ "##Ŀ": 381,
445
+ "##ĉ": 382,
446
+ "##º": 383,
447
+ "##c": 384,
448
+ "##È": 385,
449
+ "##è": 386,
450
+ "##Ê": 387,
451
+ "##^": 388,
452
+ "##ú": 389,
453
+ "##â": 390,
454
+ "##ò": 391,
455
+ "##Ľ": 392,
456
+ "##»": 393,
457
+ "##Ø": 394,
458
+ "##ö": 395,
459
+ "##Ã": 396,
460
+ "##á": 397,
461
+ "##é": 398,
462
+ "##ı": 399,
463
+ "##É": 400,
464
+ "##Û": 401,
465
+ "##Ī": 402,
466
+ "##§": 403,
467
+ "##ø": 404,
468
+ "##Ă": 405,
469
+ "##ŀ": 406,
470
+ "##Ĥ": 407,
471
+ "##m": 408,
472
+ "##ġ": 409,
473
+ "##Ď": 410,
474
+ "##`": 411,
475
+ "##Ć": 412,
476
+ "##)": 413,
477
+ "##æ": 414,
478
+ "##ħ": 415,
479
+ "##Ä": 416,
480
+ "###": 417,
481
+ "##Ð": 418,
482
+ "##ê": 419,
483
+ "##x": 420,
484
+ "##U": 421,
485
+ "##ã": 422,
486
+ "##z": 423,
487
+ "##ė": 424,
488
+ "##e": 425,
489
+ "##¹": 426,
490
+ "##Ė": 427,
491
+ "##ĺ": 428,
492
+ "##1": 429,
493
+ "##Ù": 430,
494
+ "##M": 431,
495
  "##Ĵ": 432,
496
+ "##Ĝ": 433,
497
+ "##½": 434,
498
+ "##G": 435,
499
+ "##<": 436,
500
+ "##Ē": 437,
501
+ "##õ": 438,
502
+ "##T": 439,
503
+ "##l": 440,
504
+ "##&": 441,
505
+ "##}": 442,
506
+ "##D": 443,
507
+ "##ä": 444,
508
+ "##ý": 445,
509
+ "##:": 446,
510
+ "##©": 447,
511
+ "##k": 448,
512
+ "##¨": 449,
513
+ "##8": 450,
514
+ "##ô": 451,
515
+ "##ĩ": 452,
516
+ "##P": 453,
517
+ "##ĭ": 454,
518
+ "##®": 455,
519
+ "##þ": 456,
520
+ "##ľ": 457,
521
+ "##ć": 458,
522
+ "##I": 459,
523
  "##v": 460,
524
+ "##4": 461,
525
+ "##ª": 462,
526
+ "##r": 463,
527
+ "##·": 464,
528
+ "##W": 465,
529
+ "##[": 466,
530
+ "##B": 467,
531
+ "##Ü": 468,
532
+ "##~": 469,
533
+ "##3": 470,
534
+ "##ÿ": 471,
535
+ "##@": 472,
536
+ "##ă": 473,
537
+ "##Ĩ": 474,
538
+ "##ģ": 475,
539
+ "##¯": 476,
540
+ "##Ń": 477,
541
+ "##¦": 478,
542
+ "##Ì": 479,
543
+ "##Ç": 480,
544
+ "##Ë": 481,
545
+ "##ī": 482,
546
+ "##ù": 483,
547
+ "##÷": 484,
548
+ "##³": 485,
549
+ "##g": 486,
550
+ "##ĕ": 487,
551
+ "##ą": 488,
552
+ "##Ö": 489,
553
+ "##IJ": 490,
554
+ "##¤": 491,
555
+ "##ü": 492,
556
+ "##À": 493,
557
+ "##\\": 494,
558
+ "##=": 495,
559
+ "##ï": 496,
560
+ "##ę": 497,
561
+ "##y": 498,
562
+ "##$": 499,
563
+ "##N": 500,
564
+ "##Ï": 501,
565
+ "##¡": 502,
566
+ "##Ñ": 503,
567
+ "##°": 504,
568
+ "##Ā": 505,
569
+ "##¬": 506,
570
+ "##ĥ": 507,
571
+ "##(": 508,
572
+ "##S": 509,
573
+ "##]": 510,
574
+ "##Ĺ": 511,
575
+ "##ó": 512,
576
+ "##'": 513,
577
  "<|unk|>": 514,
578
  "##�": 515,
579
  "sa": 516,
fw57Mmulti_Entropy_thresholdB_32000/vocab.json CHANGED
The diff for this file is too large to render. See raw diff