Zeb commited on
Commit
fde09f8
·
1 Parent(s): ce169aa

Rename frequency tokenizers

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. frequency_128000/merges.txt +0 -0
  2. frequency_128000/merges_data.csv +0 -0
  3. frequency_128000/tokenizer.json +0 -0
  4. frequency_128000/vocab.json +0 -0
  5. frequency_16000/merges.txt +0 -0
  6. frequency_16000/merges_data.csv +0 -0
  7. frequency_16000/tokenizer.json +0 -0
  8. frequency_16000/vocab.json +0 -0
  9. frequency_256000/merges.txt +0 -0
  10. frequency_256000/merges_data.csv +0 -0
  11. frequency_256000/tokenizer.json +2 -2
  12. frequency_256000/vocab.json +0 -0
  13. frequency_32000/merges.txt +0 -0
  14. frequency_32000/merges_data.csv +0 -0
  15. frequency_32000/tokenizer.json +0 -0
  16. frequency_32000/vocab.json +0 -0
  17. frequency_64000/merges.txt +0 -0
  18. frequency_64000/merges_data.csv +0 -0
  19. frequency_64000/tokenizer.json +0 -0
  20. frequency_64000/vocab.json +0 -0
  21. frequency_8064/merges.txt +0 -0
  22. frequency_8064/merges_data.csv +0 -0
  23. frequency_8064/tokenizer.json +0 -0
  24. frequency_8064/vocab.json +0 -0
  25. fw57M_Entropy_frequency_128000/merges.txt +0 -0
  26. fw57M_Entropy_frequency_128000/merges_data.csv +0 -0
  27. fw57M_Entropy_frequency_128000/special_tokens_map.json +0 -4
  28. fw57M_Entropy_frequency_128000/tokenizer.json +0 -0
  29. fw57M_Entropy_frequency_128000/tokenizer_config.json +0 -29
  30. fw57M_Entropy_frequency_128000/vocab.json +0 -0
  31. fw57M_Entropy_frequency_16000/merges.txt +0 -0
  32. fw57M_Entropy_frequency_16000/merges_data.csv +0 -0
  33. fw57M_Entropy_frequency_16000/special_tokens_map.json +0 -4
  34. fw57M_Entropy_frequency_16000/tokenizer.json +0 -0
  35. fw57M_Entropy_frequency_16000/tokenizer_config.json +0 -29
  36. fw57M_Entropy_frequency_16000/vocab.json +0 -0
  37. fw57M_Entropy_frequency_256000/merges.txt +0 -0
  38. fw57M_Entropy_frequency_256000/merges_data.csv +0 -0
  39. fw57M_Entropy_frequency_256000/special_tokens_map.json +0 -4
  40. fw57M_Entropy_frequency_256000/tokenizer.json +0 -3
  41. fw57M_Entropy_frequency_256000/tokenizer_config.json +0 -29
  42. fw57M_Entropy_frequency_256000/vocab.json +0 -0
  43. fw57M_Entropy_frequency_32000/merges.txt +0 -0
  44. fw57M_Entropy_frequency_32000/merges_data.csv +0 -0
  45. fw57M_Entropy_frequency_32000/special_tokens_map.json +0 -4
  46. fw57M_Entropy_frequency_32000/tokenizer.json +0 -0
  47. fw57M_Entropy_frequency_32000/tokenizer_config.json +0 -29
  48. fw57M_Entropy_frequency_32000/vocab.json +0 -0
  49. fw57M_Entropy_frequency_64000/merges.txt +0 -0
  50. fw57M_Entropy_frequency_64000/merges_data.csv +0 -0
frequency_128000/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_128000/merges_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_128000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_128000/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_16000/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_16000/merges_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_16000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_16000/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_256000/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_256000/merges_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_256000/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f820f33e2063231e65c2449292054629b3ece73ab9b4aba8d740f5c9186741d
3
- size 19624004
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a19d76ccd8400f78de8cb1bb0cc81e948596a5a9337c90d739c81a7c59ce8b
3
+ size 19623518
frequency_256000/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_32000/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_32000/merges_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_32000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_32000/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_64000/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_64000/merges_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_64000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_64000/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_8064/merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_8064/merges_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_8064/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
frequency_8064/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_128000/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_128000/merges_data.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_128000/special_tokens_map.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "eos_token": "<|endoftext|>",
3
- "pad_token": "<|padding|>"
4
- }
 
 
 
 
 
fw57M_Entropy_frequency_128000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_128000/tokenizer_config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- }
20
- },
21
- "bos_token": null,
22
- "clean_up_tokenization_spaces": false,
23
- "eos_token": "<|endoftext|>",
24
- "extra_special_tokens": {},
25
- "model_max_length": 1000000000000000019884624838656,
26
- "pad_token": "<|padding|>",
27
- "tokenizer_class": "PreTrainedTokenizer",
28
- "unk_token": null
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_frequency_128000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_16000/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_16000/merges_data.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_16000/special_tokens_map.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "eos_token": "<|endoftext|>",
3
- "pad_token": "<|padding|>"
4
- }
 
 
 
 
 
fw57M_Entropy_frequency_16000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_16000/tokenizer_config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- }
20
- },
21
- "bos_token": null,
22
- "clean_up_tokenization_spaces": false,
23
- "eos_token": "<|endoftext|>",
24
- "extra_special_tokens": {},
25
- "model_max_length": 1000000000000000019884624838656,
26
- "pad_token": "<|padding|>",
27
- "tokenizer_class": "PreTrainedTokenizer",
28
- "unk_token": null
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_frequency_16000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_256000/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_256000/merges_data.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_256000/special_tokens_map.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "eos_token": "<|endoftext|>",
3
- "pad_token": "<|padding|>"
4
- }
 
 
 
 
 
fw57M_Entropy_frequency_256000/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:47a19d76ccd8400f78de8cb1bb0cc81e948596a5a9337c90d739c81a7c59ce8b
3
- size 19623518
 
 
 
 
fw57M_Entropy_frequency_256000/tokenizer_config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- }
20
- },
21
- "bos_token": null,
22
- "clean_up_tokenization_spaces": false,
23
- "eos_token": "<|endoftext|>",
24
- "extra_special_tokens": {},
25
- "model_max_length": 1000000000000000019884624838656,
26
- "pad_token": "<|padding|>",
27
- "tokenizer_class": "PreTrainedTokenizer",
28
- "unk_token": null
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_frequency_256000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_32000/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_32000/merges_data.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_32000/special_tokens_map.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "eos_token": "<|endoftext|>",
3
- "pad_token": "<|padding|>"
4
- }
 
 
 
 
 
fw57M_Entropy_frequency_32000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_32000/tokenizer_config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- }
20
- },
21
- "bos_token": null,
22
- "clean_up_tokenization_spaces": false,
23
- "eos_token": "<|endoftext|>",
24
- "extra_special_tokens": {},
25
- "model_max_length": 1000000000000000019884624838656,
26
- "pad_token": "<|padding|>",
27
- "tokenizer_class": "PreTrainedTokenizer",
28
- "unk_token": null
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_frequency_32000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_64000/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_frequency_64000/merges_data.csv DELETED
The diff for this file is too large to render. See raw diff