Zeb commited on
Commit
583e43f
Β·
1 Parent(s): 8b5bd01

Rename all tokenizers

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +3 -0
  3. {fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/special_tokens_map.json +0 -0
  4. {fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/tokenizer.json +0 -0
  5. {fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/tokenizer_config.json +0 -0
  6. {fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/vocab.json +0 -0
  7. {fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/special_tokens_map.json +0 -0
  8. {fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/tokenizer.json +0 -0
  9. {fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/tokenizer_config.json +0 -0
  10. {fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/vocab.json +0 -0
  11. {fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/special_tokens_map.json +0 -0
  12. {fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/tokenizer.json +0 -0
  13. {fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/tokenizer_config.json +0 -0
  14. {fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/vocab.json +0 -0
  15. {fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/special_tokens_map.json +0 -0
  16. {fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/tokenizer.json +0 -0
  17. {fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/tokenizer_config.json +0 -0
  18. {fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/vocab.json +0 -0
  19. {fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/special_tokens_map.json +0 -0
  20. {fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/tokenizer.json +0 -0
  21. {fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/tokenizer_config.json +0 -0
  22. {fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/vocab.json +0 -0
  23. {frequency_128000 β†’ BPE_128000}/merges.txt +0 -0
  24. {frequency_128000 β†’ BPE_128000}/merges_data.csv +0 -0
  25. {frequency_128000 β†’ BPE_128000}/special_tokens_map.json +0 -0
  26. {frequency_128000 β†’ BPE_128000}/tokenizer.json +0 -0
  27. {frequency_128000 β†’ BPE_128000}/tokenizer_config.json +0 -0
  28. {frequency_128000 β†’ BPE_128000}/vocab.json +0 -0
  29. {frequency_16000 β†’ BPE_16000}/merges.txt +0 -0
  30. {frequency_16000 β†’ BPE_16000}/merges_data.csv +0 -0
  31. {frequency_16000 β†’ BPE_16000}/special_tokens_map.json +0 -0
  32. {frequency_16000 β†’ BPE_16000}/tokenizer.json +0 -0
  33. {frequency_16000 β†’ BPE_16000}/tokenizer_config.json +0 -0
  34. {frequency_16000 β†’ BPE_16000}/vocab.json +0 -0
  35. {frequency_32000 β†’ BPE_32000}/merges.txt +0 -0
  36. {frequency_32000 β†’ BPE_32000}/merges_data.csv +0 -0
  37. {frequency_32000 β†’ BPE_32000}/special_tokens_map.json +0 -0
  38. {frequency_32000 β†’ BPE_32000}/tokenizer.json +0 -0
  39. {frequency_32000 β†’ BPE_32000}/tokenizer_config.json +0 -0
  40. {frequency_32000 β†’ BPE_32000}/vocab.json +0 -0
  41. {frequency_64000 β†’ BPE_64000}/merges.txt +0 -0
  42. {frequency_64000 β†’ BPE_64000}/merges_data.csv +0 -0
  43. {frequency_64000 β†’ BPE_64000}/special_tokens_map.json +0 -0
  44. {frequency_64000 β†’ BPE_64000}/tokenizer.json +0 -0
  45. {frequency_64000 β†’ BPE_64000}/tokenizer_config.json +0 -0
  46. {frequency_64000 β†’ BPE_64000}/vocab.json +0 -0
  47. {frequency_8064 β†’ BPE_8064}/merges.txt +0 -0
  48. {frequency_8064 β†’ BPE_8064}/merges_data.csv +0 -0
  49. {frequency_8064 β†’ BPE_8064}/special_tokens_map.json +0 -0
  50. {frequency_8064 β†’ BPE_8064}/tokenizer.json +0 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -64,3 +64,6 @@ frequencymultirussian_256000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
64
  frequencymultirussian_256000/vocab.json filter=lfs diff=lfs merge=lfs -text
65
  fw57M_Surprisal_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
66
  fw57M_Entropy_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
64
  frequencymultirussian_256000/vocab.json filter=lfs diff=lfs merge=lfs -text
65
  fw57M_Surprisal_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
66
  fw57M_Entropy_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
67
+ ByteSpanEntropyGlobalIncrement_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
68
+ ByteSpanSurprisalGlobalIncrement_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
69
+ MultiBPE_256000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
{fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/special_tokens_map.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/tokenizer.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/tokenizer_config.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_128000 β†’ BPEWP_128000}/vocab.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/special_tokens_map.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/tokenizer.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/tokenizer_config.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_16000 β†’ BPEWP_16000}/vocab.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/special_tokens_map.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/tokenizer.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/tokenizer_config.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_32000 β†’ BPEWP_32000}/vocab.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/special_tokens_map.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/tokenizer.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/tokenizer_config.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_64000 β†’ BPEWP_64000}/vocab.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/special_tokens_map.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/tokenizer.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/tokenizer_config.json RENAMED
File without changes
{fw57M_Entropy_bytespanP0-0_8064 β†’ BPEWP_8064}/vocab.json RENAMED
File without changes
{frequency_128000 β†’ BPE_128000}/merges.txt RENAMED
File without changes
{frequency_128000 β†’ BPE_128000}/merges_data.csv RENAMED
File without changes
{frequency_128000 β†’ BPE_128000}/special_tokens_map.json RENAMED
File without changes
{frequency_128000 β†’ BPE_128000}/tokenizer.json RENAMED
File without changes
{frequency_128000 β†’ BPE_128000}/tokenizer_config.json RENAMED
File without changes
{frequency_128000 β†’ BPE_128000}/vocab.json RENAMED
File without changes
{frequency_16000 β†’ BPE_16000}/merges.txt RENAMED
File without changes
{frequency_16000 β†’ BPE_16000}/merges_data.csv RENAMED
File without changes
{frequency_16000 β†’ BPE_16000}/special_tokens_map.json RENAMED
File without changes
{frequency_16000 β†’ BPE_16000}/tokenizer.json RENAMED
File without changes
{frequency_16000 β†’ BPE_16000}/tokenizer_config.json RENAMED
File without changes
{frequency_16000 β†’ BPE_16000}/vocab.json RENAMED
File without changes
{frequency_32000 β†’ BPE_32000}/merges.txt RENAMED
File without changes
{frequency_32000 β†’ BPE_32000}/merges_data.csv RENAMED
File without changes
{frequency_32000 β†’ BPE_32000}/special_tokens_map.json RENAMED
File without changes
{frequency_32000 β†’ BPE_32000}/tokenizer.json RENAMED
File without changes
{frequency_32000 β†’ BPE_32000}/tokenizer_config.json RENAMED
File without changes
{frequency_32000 β†’ BPE_32000}/vocab.json RENAMED
File without changes
{frequency_64000 β†’ BPE_64000}/merges.txt RENAMED
File without changes
{frequency_64000 β†’ BPE_64000}/merges_data.csv RENAMED
File without changes
{frequency_64000 β†’ BPE_64000}/special_tokens_map.json RENAMED
File without changes
{frequency_64000 β†’ BPE_64000}/tokenizer.json RENAMED
File without changes
{frequency_64000 β†’ BPE_64000}/tokenizer_config.json RENAMED
File without changes
{frequency_64000 β†’ BPE_64000}/vocab.json RENAMED
File without changes
{frequency_8064 β†’ BPE_8064}/merges.txt RENAMED
File without changes
{frequency_8064 β†’ BPE_8064}/merges_data.csv RENAMED
File without changes
{frequency_8064 β†’ BPE_8064}/special_tokens_map.json RENAMED
File without changes
{frequency_8064 β†’ BPE_8064}/tokenizer.json RENAMED
File without changes