Zeb
commited on
Commit
Β·
583e43f
1
Parent(s):
8b5bd01
Rename all tokenizers
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .DS_Store +0 -0
- .gitattributes +3 -0
- {fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/special_tokens_map.json +0 -0
- {fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/tokenizer.json +0 -0
- {fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/tokenizer_config.json +0 -0
- {fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/vocab.json +0 -0
- {fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/special_tokens_map.json +0 -0
- {fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/tokenizer.json +0 -0
- {fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/tokenizer_config.json +0 -0
- {fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/vocab.json +0 -0
- {fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/special_tokens_map.json +0 -0
- {fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/tokenizer.json +0 -0
- {fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/tokenizer_config.json +0 -0
- {fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/vocab.json +0 -0
- {fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/special_tokens_map.json +0 -0
- {fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/tokenizer.json +0 -0
- {fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/tokenizer_config.json +0 -0
- {fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/vocab.json +0 -0
- {fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/special_tokens_map.json +0 -0
- {fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/tokenizer.json +0 -0
- {fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/tokenizer_config.json +0 -0
- {fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/vocab.json +0 -0
- {frequency_128000 β BPE_128000}/merges.txt +0 -0
- {frequency_128000 β BPE_128000}/merges_data.csv +0 -0
- {frequency_128000 β BPE_128000}/special_tokens_map.json +0 -0
- {frequency_128000 β BPE_128000}/tokenizer.json +0 -0
- {frequency_128000 β BPE_128000}/tokenizer_config.json +0 -0
- {frequency_128000 β BPE_128000}/vocab.json +0 -0
- {frequency_16000 β BPE_16000}/merges.txt +0 -0
- {frequency_16000 β BPE_16000}/merges_data.csv +0 -0
- {frequency_16000 β BPE_16000}/special_tokens_map.json +0 -0
- {frequency_16000 β BPE_16000}/tokenizer.json +0 -0
- {frequency_16000 β BPE_16000}/tokenizer_config.json +0 -0
- {frequency_16000 β BPE_16000}/vocab.json +0 -0
- {frequency_32000 β BPE_32000}/merges.txt +0 -0
- {frequency_32000 β BPE_32000}/merges_data.csv +0 -0
- {frequency_32000 β BPE_32000}/special_tokens_map.json +0 -0
- {frequency_32000 β BPE_32000}/tokenizer.json +0 -0
- {frequency_32000 β BPE_32000}/tokenizer_config.json +0 -0
- {frequency_32000 β BPE_32000}/vocab.json +0 -0
- {frequency_64000 β BPE_64000}/merges.txt +0 -0
- {frequency_64000 β BPE_64000}/merges_data.csv +0 -0
- {frequency_64000 β BPE_64000}/special_tokens_map.json +0 -0
- {frequency_64000 β BPE_64000}/tokenizer.json +0 -0
- {frequency_64000 β BPE_64000}/tokenizer_config.json +0 -0
- {frequency_64000 β BPE_64000}/vocab.json +0 -0
- {frequency_8064 β BPE_8064}/merges.txt +0 -0
- {frequency_8064 β BPE_8064}/merges_data.csv +0 -0
- {frequency_8064 β BPE_8064}/special_tokens_map.json +0 -0
- {frequency_8064 β BPE_8064}/tokenizer.json +0 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.gitattributes
CHANGED
@@ -64,3 +64,6 @@ frequencymultirussian_256000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
64 |
frequencymultirussian_256000/vocab.json filter=lfs diff=lfs merge=lfs -text
|
65 |
fw57M_Surprisal_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
|
66 |
fw57M_Entropy_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
64 |
frequencymultirussian_256000/vocab.json filter=lfs diff=lfs merge=lfs -text
|
65 |
fw57M_Surprisal_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
|
66 |
fw57M_Entropy_thresholdB_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
|
67 |
+
ByteSpanEntropyGlobalIncrement_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
|
68 |
+
ByteSpanSurprisalGlobalIncrement_64000/stats.csv filter=lfs diff=lfs merge=lfs -text
|
69 |
+
MultiBPE_256000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
{fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/special_tokens_map.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/tokenizer.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/tokenizer_config.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_128000 β BPEWP_128000}/vocab.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/special_tokens_map.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/tokenizer.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/tokenizer_config.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_16000 β BPEWP_16000}/vocab.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/special_tokens_map.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/tokenizer.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/tokenizer_config.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_32000 β BPEWP_32000}/vocab.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/special_tokens_map.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/tokenizer.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/tokenizer_config.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_64000 β BPEWP_64000}/vocab.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/special_tokens_map.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/tokenizer.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/tokenizer_config.json
RENAMED
File without changes
|
{fw57M_Entropy_bytespanP0-0_8064 β BPEWP_8064}/vocab.json
RENAMED
File without changes
|
{frequency_128000 β BPE_128000}/merges.txt
RENAMED
File without changes
|
{frequency_128000 β BPE_128000}/merges_data.csv
RENAMED
File without changes
|
{frequency_128000 β BPE_128000}/special_tokens_map.json
RENAMED
File without changes
|
{frequency_128000 β BPE_128000}/tokenizer.json
RENAMED
File without changes
|
{frequency_128000 β BPE_128000}/tokenizer_config.json
RENAMED
File without changes
|
{frequency_128000 β BPE_128000}/vocab.json
RENAMED
File without changes
|
{frequency_16000 β BPE_16000}/merges.txt
RENAMED
File without changes
|
{frequency_16000 β BPE_16000}/merges_data.csv
RENAMED
File without changes
|
{frequency_16000 β BPE_16000}/special_tokens_map.json
RENAMED
File without changes
|
{frequency_16000 β BPE_16000}/tokenizer.json
RENAMED
File without changes
|
{frequency_16000 β BPE_16000}/tokenizer_config.json
RENAMED
File without changes
|
{frequency_16000 β BPE_16000}/vocab.json
RENAMED
File without changes
|
{frequency_32000 β BPE_32000}/merges.txt
RENAMED
File without changes
|
{frequency_32000 β BPE_32000}/merges_data.csv
RENAMED
File without changes
|
{frequency_32000 β BPE_32000}/special_tokens_map.json
RENAMED
File without changes
|
{frequency_32000 β BPE_32000}/tokenizer.json
RENAMED
File without changes
|
{frequency_32000 β BPE_32000}/tokenizer_config.json
RENAMED
File without changes
|
{frequency_32000 β BPE_32000}/vocab.json
RENAMED
File without changes
|
{frequency_64000 β BPE_64000}/merges.txt
RENAMED
File without changes
|
{frequency_64000 β BPE_64000}/merges_data.csv
RENAMED
File without changes
|
{frequency_64000 β BPE_64000}/special_tokens_map.json
RENAMED
File without changes
|
{frequency_64000 β BPE_64000}/tokenizer.json
RENAMED
File without changes
|
{frequency_64000 β BPE_64000}/tokenizer_config.json
RENAMED
File without changes
|
{frequency_64000 β BPE_64000}/vocab.json
RENAMED
File without changes
|
{frequency_8064 β BPE_8064}/merges.txt
RENAMED
File without changes
|
{frequency_8064 β BPE_8064}/merges_data.csv
RENAMED
File without changes
|
{frequency_8064 β BPE_8064}/special_tokens_map.json
RENAMED
File without changes
|
{frequency_8064 β BPE_8064}/tokenizer.json
RENAMED
File without changes
|