nsthorat commited on
Commit
7351822
·
1 Parent(s): ae97a29
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +0 -114
  2. data/.cache/lilac/concept/lilac/legal-termination/cohere.pkl +0 -0
  3. data/.cache/lilac/concept/lilac/legal-termination/gte-base.pkl +0 -0
  4. data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl +0 -0
  5. data/.cache/lilac/concept/lilac/legal-termination/openai.pkl +0 -0
  6. data/.cache/lilac/concept/lilac/legal-termination/palm.pkl +0 -0
  7. data/.cache/lilac/concept/lilac/legal-termination/sbert.pkl +0 -0
  8. data/.cache/lilac/concept/lilac/negative-sentiment/cohere.pkl +0 -3
  9. data/.cache/lilac/concept/lilac/negative-sentiment/gte-base.pkl +0 -0
  10. data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl +0 -0
  11. data/.cache/lilac/concept/lilac/negative-sentiment/openai.pkl +0 -0
  12. data/.cache/lilac/concept/lilac/negative-sentiment/palm.pkl +0 -0
  13. data/.cache/lilac/concept/lilac/negative-sentiment/sbert.pkl +0 -0
  14. data/.cache/lilac/concept/lilac/positive-sentiment/cohere.pkl +0 -3
  15. data/.cache/lilac/concept/lilac/positive-sentiment/gte-base.pkl +0 -0
  16. data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl +0 -0
  17. data/.cache/lilac/concept/lilac/positive-sentiment/openai.pkl +0 -0
  18. data/.cache/lilac/concept/lilac/positive-sentiment/palm.pkl +0 -0
  19. data/.cache/lilac/concept/lilac/positive-sentiment/sbert.pkl +0 -0
  20. data/.cache/lilac/concept/lilac/profanity/cohere.pkl +0 -3
  21. data/.cache/lilac/concept/lilac/profanity/gte-base.pkl +0 -3
  22. data/.cache/lilac/concept/lilac/profanity/gte-small.pkl +0 -3
  23. data/.cache/lilac/concept/lilac/profanity/openai.pkl +0 -3
  24. data/.cache/lilac/concept/lilac/profanity/palm.pkl +0 -3
  25. data/.cache/lilac/concept/lilac/profanity/sbert.pkl +0 -3
  26. data/.cache/lilac/concept/lilac/toxicity/cohere.pkl +0 -3
  27. data/.cache/lilac/concept/lilac/toxicity/gte-base.pkl +0 -3
  28. data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl +0 -3
  29. data/.cache/lilac/concept/lilac/toxicity/openai.pkl +0 -3
  30. data/.cache/lilac/concept/lilac/toxicity/palm.pkl +0 -3
  31. data/.cache/lilac/concept/lilac/toxicity/sbert.pkl +0 -3
  32. data/datasets/lilac/databricks-dolly-15k-curated-en/config.yml +0 -67
  33. data/datasets/lilac/databricks-dolly-15k-curated-en/data-00000-of-00001.parquet +0 -3
  34. data/datasets/lilac/databricks-dolly-15k-curated-en/manifest.json +0 -87
  35. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.hnswlib.bin +0 -3
  36. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.lookup.pkl +0 -0
  37. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/signal_manifest.json +0 -40
  38. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/spans.pkl +0 -0
  39. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/lang_detection/data-00000-of-00001.parquet +0 -0
  40. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/lang_detection/signal_manifest.json +0 -36
  41. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/near_dup/data-00000-of-00001.parquet +0 -0
  42. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/near_dup/signal_manifest.json +0 -41
  43. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/pii/data-00000-of-00001.parquet +0 -0
  44. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/pii/signal_manifest.json +0 -50
  45. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/text_statistics/data-00000-of-00001.parquet +0 -0
  46. data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/text_statistics/signal_manifest.json +0 -64
  47. data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/lang_detection/data-00000-of-00001.parquet +0 -0
  48. data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/lang_detection/signal_manifest.json +0 -36
  49. data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/near_dup/data-00000-of-00001.parquet +0 -0
  50. data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/near_dup/signal_manifest.json +0 -41
.gitattributes DELETED
@@ -1,114 +0,0 @@
1
- data/.cache/lilac/concept/lilac/negative-sentiment/cohere.pkl filter=lfs diff=lfs merge=lfs -text
2
- data/.cache/lilac/concept/lilac/positive-sentiment/cohere.pkl filter=lfs diff=lfs merge=lfs -text
3
- data/.cache/lilac/concept/lilac/profanity/cohere.pkl filter=lfs diff=lfs merge=lfs -text
4
- data/.cache/lilac/concept/lilac/profanity/gte-base.pkl filter=lfs diff=lfs merge=lfs -text
5
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
6
- data/.cache/lilac/concept/lilac/profanity/openai.pkl filter=lfs diff=lfs merge=lfs -text
7
- data/.cache/lilac/concept/lilac/profanity/palm.pkl filter=lfs diff=lfs merge=lfs -text
8
- data/.cache/lilac/concept/lilac/profanity/sbert.pkl filter=lfs diff=lfs merge=lfs -text
9
- data/.cache/lilac/concept/lilac/toxicity/cohere.pkl filter=lfs diff=lfs merge=lfs -text
10
- data/.cache/lilac/concept/lilac/toxicity/gte-base.pkl filter=lfs diff=lfs merge=lfs -text
11
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
12
- data/.cache/lilac/concept/lilac/toxicity/openai.pkl filter=lfs diff=lfs merge=lfs -text
13
- data/.cache/lilac/concept/lilac/toxicity/palm.pkl filter=lfs diff=lfs merge=lfs -text
14
- data/.cache/lilac/concept/lilac/toxicity/sbert.pkl filter=lfs diff=lfs merge=lfs -text
15
- data/datasets/local/OpenOrca-100k/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
16
- data/datasets/local/OpenOrca-100k/question/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
17
- data/datasets/local/OpenOrca-100k/question/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
18
- data/datasets/local/OpenOrca-100k/question/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
19
- data/datasets/local/OpenOrca-100k/question/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
20
- data/datasets/local/OpenOrca-100k/question/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
21
- data/datasets/local/OpenOrca-100k/question/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
22
- data/datasets/local/OpenOrca-100k/question/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
23
- data/datasets/local/OpenOrca-100k/response/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
24
- data/datasets/local/OpenOrca-100k/response/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
25
- data/datasets/local/OpenOrca-100k/response/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
26
- data/datasets/local/OpenOrca-100k/response/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
27
- data/datasets/local/OpenOrca-100k/response/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
28
- data/datasets/local/OpenOrca-100k/response/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
29
- data/datasets/local/OpenOrca-100k/response/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
30
- data/datasets/lilac/piqa/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
31
- data/datasets/lilac/piqa/goal/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
32
- data/datasets/lilac/piqa/goal/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
33
- data/datasets/lilac/piqa/goal/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
34
- data/datasets/lilac/piqa/sol1/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
35
- data/datasets/lilac/piqa/sol1/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
36
- data/datasets/lilac/piqa/sol1/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
37
- data/datasets/lilac/piqa/sol2/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
38
- data/datasets/lilac/piqa/sol2/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
39
- data/datasets/lilac/piqa/sol2/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
40
- data/datasets/lilac/pile-of-law-atticus-contracts/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
41
- data/datasets/lilac/mmlu_professional_law/choices/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
42
- data/datasets/lilac/mmlu_professional_law/choices/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
43
- data/datasets/lilac/mmlu_professional_law/choices/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
44
- data/datasets/lilac/mmlu_professional_law/choices/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
45
- data/datasets/lilac/mmlu_professional_law/choices/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
46
- data/datasets/lilac/mmlu_professional_law/choices/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
47
- data/datasets/lilac/mmlu_professional_law/choices/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
48
- data/datasets/lilac/mmlu_professional_law/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
49
- data/datasets/lilac/mmlu_professional_law/question/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
50
- data/datasets/lilac/mmlu_professional_law/question/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
51
- data/datasets/lilac/mmlu_professional_law/question/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
52
- data/datasets/lilac/mmlu_professional_law/question/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
53
- data/datasets/lilac/mmlu_professional_law/question/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
54
- data/datasets/lilac/mmlu_professional_law/question/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
55
- data/datasets/lilac/mmlu_professional_law/question/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
56
- data/datasets/lilac/pile-of-law-r-legaladvice/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
57
- data/datasets/lilac/pile-of-law-r-legaladvice/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
58
- data/datasets/lilac/pile-of-law-r-legaladvice/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
59
- data/datasets/lilac/pile-of-law-r-legaladvice/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
60
- data/datasets/lilac/pile-of-law-r-legaladvice/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
61
- data/datasets/lilac/pile-of-law-r-legaladvice/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
62
- data/datasets/lilac/pile-of-law-r-legaladvice/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
63
- data/datasets/lilac/pile-of-law-r-legaladvice/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
64
- data/datasets/lilac/open-asssistant-conversations/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
65
- data/datasets/lilac/open-asssistant-conversations/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
66
- data/datasets/lilac/open-asssistant-conversations/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
67
- data/datasets/lilac/open-asssistant-conversations/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
68
- data/datasets/lilac/open-asssistant-conversations/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
69
- data/datasets/lilac/open-asssistant-conversations/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
70
- data/datasets/lilac/open-asssistant-conversations/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
71
- data/datasets/lilac/open-asssistant-conversations/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
72
- data/datasets/lilac/squad_v2/answers/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
73
- data/datasets/lilac/squad_v2/answers/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
74
- data/datasets/lilac/squad_v2/answers/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
75
- data/datasets/lilac/squad_v2/answers/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
76
- data/datasets/lilac/squad_v2/context/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
77
- data/datasets/lilac/squad_v2/context/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
78
- data/datasets/lilac/squad_v2/context/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
79
- data/datasets/lilac/squad_v2/context/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
80
- data/datasets/lilac/squad_v2/context/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
81
- data/datasets/lilac/squad_v2/context/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
82
- data/datasets/lilac/squad_v2/context/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
83
- data/datasets/lilac/squad_v2/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
84
- data/datasets/lilac/squad_v2/question/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
85
- data/datasets/lilac/squad_v2/question/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
86
- data/datasets/lilac/squad_v2/question/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
87
- data/datasets/lilac/squad_v2/question/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
88
- data/datasets/lilac/imdb/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
89
- data/datasets/lilac/imdb/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
90
- data/datasets/lilac/imdb/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
91
- data/datasets/lilac/imdb/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
92
- data/datasets/lilac/imdb/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
93
- data/datasets/lilac/imdb/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
94
- data/datasets/lilac/imdb/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
95
- data/datasets/lilac/imdb/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
96
- data/datasets/lilac/databricks-dolly-15k-curated-en/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
97
- data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
98
- data/datasets/lilac/databricks-dolly-15k-curated-en/original-context/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
99
- data/datasets/lilac/wikitext-2-raw-v1/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
100
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
101
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
102
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
103
- data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
104
- data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
105
- data/datasets/lilac/wikitext-2-raw-v1/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
106
- data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
107
- data/datasets/lilac/medical_dialog/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
108
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
109
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
110
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
111
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
112
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
113
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
114
- data/datasets/lilac/medical_dialog/dialogue_turns/utterance/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/.cache/lilac/concept/lilac/legal-termination/cohere.pkl DELETED
Binary file (610 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/gte-base.pkl DELETED
Binary file (117 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl DELETED
Binary file (60.6 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/openai.pkl DELETED
Binary file (231 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/palm.pkl DELETED
Binary file (117 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/sbert.pkl DELETED
Binary file (60.6 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/cohere.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:be0a6613703986f991d3795a6b3cfdccd40b75ce1c6cbf36e852403c8d8a6b7b
3
- size 2088021
 
 
 
 
data/.cache/lilac/concept/lilac/negative-sentiment/gte-base.pkl DELETED
Binary file (397 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/openai.pkl DELETED
Binary file (787 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/palm.pkl DELETED
Binary file (397 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/sbert.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/cohere.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d654cd9eb459aba5509d6b21d48e2f2560c84457608a0ddefc0d7e26a685b7ef
3
- size 1857652
 
 
 
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-base.pkl DELETED
Binary file (353 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/openai.pkl DELETED
Binary file (700 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/palm.pkl DELETED
Binary file (353 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/sbert.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/profanity/cohere.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d68aeb0d88c4a501e21173ed3e5e53fa4998447ece6812d046c1c48b2595739
3
- size 17413997
 
 
 
 
data/.cache/lilac/concept/lilac/profanity/gte-base.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c4454090ac980db7f02e5f2ff974b0fe75e26982c40e323891fae4b3785d38
3
- size 3301326
 
 
 
 
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef20b176c6faec04280e7f4fcc5edfd01c8261b4838eab2955885a171e46590d
3
- size 1672960
 
 
 
 
data/.cache/lilac/concept/lilac/profanity/openai.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a8e4c306cbbdbfa1d5dde1504fa67ee8e5ffa02ab77f1e8d06d15b9ea777398
3
- size 6558076
 
 
 
 
data/.cache/lilac/concept/lilac/profanity/palm.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0450ba56a6af797838a696e5d1046e84dbca2d6c6a00724c3bf403465980b1f5
3
- size 3301322
 
 
 
 
data/.cache/lilac/concept/lilac/profanity/sbert.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:44f41da69be02fb8ffe4db5d7d16cdd0e8f4357f8147d4259ae5518863577087
3
- size 1672951
 
 
 
 
data/.cache/lilac/concept/lilac/toxicity/cohere.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ec3b151a149f0664813f50ba599001bc3a44cc7b6f8c35a48e213ee9c9682fb
3
- size 19602537
 
 
 
 
data/.cache/lilac/concept/lilac/toxicity/gte-base.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd428cfda7f66401cf3b6ec9f1b356c3e0d79ab5bce8675c271e562c24de3580
3
- size 3719127
 
 
 
 
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9bab578121263c97f51049e82846b7b517b79655bfd9b6ad5fe861dbefd1212
3
- size 1886446
 
 
 
 
data/.cache/lilac/concept/lilac/toxicity/openai.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e9be59c226a117c78c32cdb0f4befaf771b74a7af820af25f86053e1cf80614
3
- size 7384512
 
 
 
 
data/.cache/lilac/concept/lilac/toxicity/palm.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b334cd21b80823eb0b7a6c177ba2f6ed8cadc504b642e7b89481f0e583dcfece
3
- size 3719128
 
 
 
 
data/.cache/lilac/concept/lilac/toxicity/sbert.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:64540327c019de743fa40b494f21cf1a5a7b0af80df6243eb44b56cf3810ccbf
3
- size 1886440
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/config.yml DELETED
@@ -1,67 +0,0 @@
1
- embeddings:
2
- - embedding: gte-small
3
- path: [new-context, value, '*']
4
- - {embedding: gte-small, path: original-context}
5
- name: databricks-dolly-15k-curated-en
6
- namespace: lilac
7
- settings:
8
- preferred_embedding: gte-small
9
- ui:
10
- media_paths:
11
- - original-instruction
12
- - original-context
13
- - original-response
14
- - [new-instruction, value, '*']
15
- - [new-context, value, '*']
16
- - [new-response, value, '*']
17
- signals:
18
- - path: original-instruction
19
- signal: {signal_name: near_dup}
20
- - path: original-instruction
21
- signal: {signal_name: text_statistics}
22
- - path: original-instruction
23
- signal: {signal_name: pii}
24
- - path: original-instruction
25
- signal: {signal_name: lang_detection}
26
- - path: original-context
27
- signal: {signal_name: near_dup}
28
- - path: original-context
29
- signal: {signal_name: text_statistics}
30
- - path: original-context
31
- signal: {signal_name: lang_detection}
32
- - path: original-context
33
- signal: {signal_name: pii}
34
- - path: original-response
35
- signal: {signal_name: near_dup}
36
- - path: original-response
37
- signal: {signal_name: text_statistics}
38
- - path: original-response
39
- signal: {signal_name: pii}
40
- - path: original-response
41
- signal: {signal_name: lang_detection}
42
- - path: [new-instruction, value, '*']
43
- signal: {signal_name: near_dup}
44
- - path: [new-instruction, value, '*']
45
- signal: {signal_name: text_statistics}
46
- - path: [new-instruction, value, '*']
47
- signal: {signal_name: pii}
48
- - path: [new-instruction, value, '*']
49
- signal: {signal_name: lang_detection}
50
- - path: [new-context, value, '*']
51
- signal: {signal_name: near_dup}
52
- - path: [new-context, value, '*']
53
- signal: {signal_name: text_statistics}
54
- - path: [new-context, value, '*']
55
- signal: {signal_name: lang_detection}
56
- - path: [new-context, value, '*']
57
- signal: {signal_name: pii}
58
- - path: [new-response, value, '*']
59
- signal: {signal_name: near_dup}
60
- - path: [new-response, value, '*']
61
- signal: {signal_name: text_statistics}
62
- - path: [new-response, value, '*']
63
- signal: {signal_name: pii}
64
- - path: [new-response, value, '*']
65
- signal: {signal_name: lang_detection}
66
- source: {dataset_name: argilla/databricks-dolly-15k-curated-en, source_name: huggingface}
67
- tags: [machine-learning]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/data-00000-of-00001.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad225b50d5880a097ea66eb4ca70fc529c0321cf8a5652bd8fbe7a638d016851
3
- size 15882489
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/manifest.json DELETED
@@ -1,87 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "data_schema": {
6
- "fields": {
7
- "id": {
8
- "dtype": "string"
9
- },
10
- "category": {
11
- "dtype": "string"
12
- },
13
- "original-instruction": {
14
- "dtype": "string"
15
- },
16
- "original-context": {
17
- "dtype": "string"
18
- },
19
- "original-response": {
20
- "dtype": "string"
21
- },
22
- "new-instruction": {
23
- "fields": {
24
- "user_id": {
25
- "repeated_field": {
26
- "dtype": "string"
27
- }
28
- },
29
- "value": {
30
- "repeated_field": {
31
- "dtype": "string"
32
- }
33
- },
34
- "status": {
35
- "repeated_field": {
36
- "dtype": "string"
37
- }
38
- }
39
- }
40
- },
41
- "new-context": {
42
- "fields": {
43
- "user_id": {
44
- "repeated_field": {
45
- "dtype": "string"
46
- }
47
- },
48
- "value": {
49
- "repeated_field": {
50
- "dtype": "string"
51
- }
52
- },
53
- "status": {
54
- "repeated_field": {
55
- "dtype": "string"
56
- }
57
- }
58
- }
59
- },
60
- "new-response": {
61
- "fields": {
62
- "user_id": {
63
- "repeated_field": {
64
- "dtype": "string"
65
- }
66
- },
67
- "value": {
68
- "repeated_field": {
69
- "dtype": "string"
70
- }
71
- },
72
- "status": {
73
- "repeated_field": {
74
- "dtype": "string"
75
- }
76
- }
77
- }
78
- },
79
- "external_id": {
80
- "dtype": "string"
81
- },
82
- "__hfsplit__": {
83
- "dtype": "string"
84
- }
85
- }
86
- }
87
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.hnswlib.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd30fd8659fca42a569c12fa3fb37e62b97ae62349f535a5e2aac97d8a5b2750
3
- size 32553584
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/hnsw.lookup.pkl DELETED
Binary file (522 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/signal_manifest.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "files": [],
3
- "parquet_id": "new-context.value.gte-small",
4
- "data_schema": {
5
- "fields": {
6
- "new-context": {
7
- "fields": {
8
- "value": {
9
- "repeated_field": {
10
- "fields": {
11
- "gte-small": {
12
- "repeated_field": {
13
- "fields": {
14
- "embedding": {
15
- "dtype": "embedding"
16
- }
17
- },
18
- "dtype": "string_span"
19
- },
20
- "signal": {
21
- "signal_name": "gte-small"
22
- }
23
- }
24
- }
25
- }
26
- }
27
- }
28
- }
29
- }
30
- },
31
- "signal": {
32
- "signal_name": "gte-small"
33
- },
34
- "enriched_path": [
35
- "new-context",
36
- "value",
37
- "*"
38
- ],
39
- "vector_store": "hnsw"
40
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/gte-small/spans.pkl DELETED
Binary file (351 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/lang_detection/data-00000-of-00001.parquet DELETED
Binary file (521 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/lang_detection/signal_manifest.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "new-context.value.lang_detection",
6
- "data_schema": {
7
- "fields": {
8
- "new-context": {
9
- "fields": {
10
- "value": {
11
- "repeated_field": {
12
- "fields": {
13
- "lang_detection": {
14
- "dtype": "string",
15
- "signal": {
16
- "split_by_paragraph": false,
17
- "signal_name": "lang_detection"
18
- }
19
- }
20
- }
21
- }
22
- }
23
- }
24
- }
25
- }
26
- },
27
- "signal": {
28
- "split_by_paragraph": false,
29
- "signal_name": "lang_detection"
30
- },
31
- "enriched_path": [
32
- "new-context",
33
- "value",
34
- "*"
35
- ]
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/near_dup/data-00000-of-00001.parquet DELETED
Binary file (550 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/near_dup/signal_manifest.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "new-context.value.near_dup",
6
- "data_schema": {
7
- "fields": {
8
- "new-context": {
9
- "fields": {
10
- "value": {
11
- "repeated_field": {
12
- "fields": {
13
- "near_dup": {
14
- "fields": {
15
- "cluster_id": {
16
- "dtype": "uint32",
17
- "categorical": true
18
- }
19
- },
20
- "signal": {
21
- "threshold": 0.85,
22
- "signal_name": "near_dup"
23
- }
24
- }
25
- }
26
- }
27
- }
28
- }
29
- }
30
- }
31
- },
32
- "signal": {
33
- "threshold": 0.85,
34
- "signal_name": "near_dup"
35
- },
36
- "enriched_path": [
37
- "new-context",
38
- "value",
39
- "*"
40
- ]
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/pii/data-00000-of-00001.parquet DELETED
Binary file (519 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/pii/signal_manifest.json DELETED
@@ -1,50 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "new-context.value.pii",
6
- "data_schema": {
7
- "fields": {
8
- "new-context": {
9
- "fields": {
10
- "value": {
11
- "repeated_field": {
12
- "fields": {
13
- "pii": {
14
- "fields": {
15
- "emails": {
16
- "repeated_field": {
17
- "dtype": "string_span"
18
- }
19
- },
20
- "ip_addresses": {
21
- "repeated_field": {
22
- "dtype": "string_span"
23
- }
24
- },
25
- "secrets": {
26
- "repeated_field": {
27
- "dtype": "string_span"
28
- }
29
- }
30
- },
31
- "signal": {
32
- "signal_name": "pii"
33
- }
34
- }
35
- }
36
- }
37
- }
38
- }
39
- }
40
- }
41
- },
42
- "signal": {
43
- "signal_name": "pii"
44
- },
45
- "enriched_path": [
46
- "new-context",
47
- "value",
48
- "*"
49
- ]
50
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/text_statistics/data-00000-of-00001.parquet DELETED
Binary file (603 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-context/value/text_statistics/signal_manifest.json DELETED
@@ -1,64 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "new-context.value.text_statistics",
6
- "data_schema": {
7
- "fields": {
8
- "new-context": {
9
- "fields": {
10
- "value": {
11
- "repeated_field": {
12
- "fields": {
13
- "text_statistics": {
14
- "fields": {
15
- "num_characters": {
16
- "dtype": "int32"
17
- },
18
- "readability": {
19
- "dtype": "float32"
20
- },
21
- "log(type_token_ratio)": {
22
- "dtype": "float32"
23
- },
24
- "frac_non_ascii": {
25
- "dtype": "float32",
26
- "bins": [
27
- [
28
- "Low",
29
- null,
30
- 0.15
31
- ],
32
- [
33
- "Medium",
34
- 0.15,
35
- 0.3
36
- ],
37
- [
38
- "High",
39
- 0.3,
40
- null
41
- ]
42
- ]
43
- }
44
- },
45
- "signal": {
46
- "signal_name": "text_statistics"
47
- }
48
- }
49
- }
50
- }
51
- }
52
- }
53
- }
54
- }
55
- },
56
- "signal": {
57
- "signal_name": "text_statistics"
58
- },
59
- "enriched_path": [
60
- "new-context",
61
- "value",
62
- "*"
63
- ]
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/lang_detection/data-00000-of-00001.parquet DELETED
Binary file (521 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/lang_detection/signal_manifest.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "new-instruction.value.lang_detection",
6
- "data_schema": {
7
- "fields": {
8
- "new-instruction": {
9
- "fields": {
10
- "value": {
11
- "repeated_field": {
12
- "fields": {
13
- "lang_detection": {
14
- "dtype": "string",
15
- "signal": {
16
- "split_by_paragraph": false,
17
- "signal_name": "lang_detection"
18
- }
19
- }
20
- }
21
- }
22
- }
23
- }
24
- }
25
- }
26
- },
27
- "signal": {
28
- "split_by_paragraph": false,
29
- "signal_name": "lang_detection"
30
- },
31
- "enriched_path": [
32
- "new-instruction",
33
- "value",
34
- "*"
35
- ]
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/near_dup/data-00000-of-00001.parquet DELETED
Binary file (602 kB)
 
data/datasets/lilac/databricks-dolly-15k-curated-en/new-instruction/value/near_dup/signal_manifest.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "files": [
3
- "data-00000-of-00001.parquet"
4
- ],
5
- "parquet_id": "new-instruction.value.near_dup",
6
- "data_schema": {
7
- "fields": {
8
- "new-instruction": {
9
- "fields": {
10
- "value": {
11
- "repeated_field": {
12
- "fields": {
13
- "near_dup": {
14
- "fields": {
15
- "cluster_id": {
16
- "dtype": "uint32",
17
- "categorical": true
18
- }
19
- },
20
- "signal": {
21
- "threshold": 0.85,
22
- "signal_name": "near_dup"
23
- }
24
- }
25
- }
26
- }
27
- }
28
- }
29
- }
30
- }
31
- },
32
- "signal": {
33
- "threshold": 0.85,
34
- "signal_name": "near_dup"
35
- },
36
- "enriched_path": [
37
- "new-instruction",
38
- "value",
39
- "*"
40
- ]
41
- }