diff --git "a/data/lilac.yml" "b/data/lilac.yml" --- "a/data/lilac.yml" +++ "b/data/lilac.yml" @@ -30,6 +30,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -37,6 +38,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: text @@ -44,6 +46,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: text @@ -51,6 +54,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: text @@ -58,6 +62,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: text @@ -65,6 +70,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: text @@ -72,6 +78,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -79,11 +86,148 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: text signal: signal_name: text_statistics + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score settings: ui: media_paths: @@ -121,6 +265,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -128,6 +273,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: text @@ -135,6 +281,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: text @@ -142,6 +289,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: text @@ -149,6 +297,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: text @@ -156,6 +305,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: text @@ -163,6 +313,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -170,6 +321,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -177,11 +329,76 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: text signal: signal_name: text_statistics + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score settings: ui: media_paths: @@ -222,6 +439,71 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: text + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: text @@ -229,6 +511,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -236,6 +519,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: text @@ -243,6 +527,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: text @@ -250,6 +535,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: text @@ -257,6 +543,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: text @@ -264,6 +551,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: text @@ -271,6 +559,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score settings: @@ -424,6 +713,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: original-instruction @@ -431,6 +721,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: original-instruction @@ -438,6 +729,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: original-instruction @@ -445,6 +737,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: original-instruction @@ -452,6 +745,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: original-instruction @@ -459,6 +753,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: original-instruction @@ -466,6 +761,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: original-instruction @@ -473,6 +769,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: original-context @@ -480,6 +777,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: original-context @@ -487,6 +785,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: original-context @@ -494,6 +793,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: original-context @@ -501,6 +801,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: original-context @@ -508,6 +809,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: original-context @@ -515,6 +817,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: original-context @@ -522,6 +825,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: original-context @@ -529,6 +833,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: original-response @@ -536,6 +841,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: original-response @@ -543,6 +849,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: original-response @@ -550,6 +857,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: original-response @@ -557,6 +865,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: original-response @@ -564,6 +873,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: original-response @@ -571,6 +881,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: original-response @@ -578,6 +889,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: original-response @@ -585,6 +897,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: @@ -595,6 +908,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: @@ -605,6 +919,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: @@ -615,6 +930,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: @@ -625,6 +941,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: @@ -635,6 +952,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: @@ -645,6 +963,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: @@ -655,6 +974,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: @@ -665,6 +985,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: @@ -675,6 +996,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: @@ -685,6 +1007,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: @@ -695,6 +1018,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: @@ -705,6 +1029,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: @@ -715,6 +1040,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: @@ -725,6 +1051,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: @@ -735,6 +1062,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: @@ -745,6 +1073,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - path: @@ -755,6 +1084,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - path: @@ -765,6 +1095,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - path: @@ -775,6 +1106,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - path: @@ -785,6 +1117,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - path: @@ -795,6 +1128,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - path: @@ -805,6 +1139,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - path: @@ -815,6 +1150,7 @@ datasets: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - path: @@ -825,649 +1161,3154 @@ datasets: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - settings: - ui: - media_paths: - - original-instruction - - original-context - - original-response - - - new-instruction - - value - - '*' - - - new-context - - value - - '*' - - - new-response - - value - - '*' - markdown_paths: [] - preferred_embedding: gte-small - - namespace: lilac - name: piqa - tags: [] - source: - dataset_name: piqa - config_name: null - split: null - sample_size: null + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-context + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-context + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-instruction + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: original-response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-instruction + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - new-response + - value + - '*' + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + settings: + ui: + media_paths: + - original-instruction + - original-context + - original-response + - - new-instruction + - value + - '*' + - - new-context + - value + - '*' + - - new-response + - value + - '*' + markdown_paths: [] + preferred_embedding: gte-small + - namespace: lilac + name: piqa + tags: [] + source: + dataset_name: piqa + config_name: null + split: null + sample_size: null + revision: null + load_from_disk: false + source_name: huggingface + embeddings: + - path: goal + embedding: gte-small + - path: sol1 + embedding: gte-small + - path: sol2 + embedding: gte-small + signals: + - path: goal + signal: + threshold: 0.85 + signal_name: near_dup + - path: goal + signal: + signal_name: pii + - path: goal + signal: + split_by_paragraph: false + signal_name: lang_detection + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: goal + signal: + signal_name: text_statistics + - path: sol1 + signal: + threshold: 0.85 + signal_name: near_dup + - path: sol1 + signal: + signal_name: pii + - path: sol1 + signal: + split_by_paragraph: false + signal_name: lang_detection + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: sol1 + signal: + signal_name: text_statistics + - path: sol2 + signal: + threshold: 0.85 + signal_name: near_dup + - path: sol2 + signal: + signal_name: pii + - path: sol2 + signal: + split_by_paragraph: false + signal_name: lang_detection + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: sol2 + signal: + signal_name: text_statistics + settings: + ui: + media_paths: + - sol1 + - sol2 + - goal + markdown_paths: [] + preferred_embedding: gte-small + - namespace: lilac + name: OpenOrca-100k + tags: [] + source: + dataset_name: Open-Orca/OpenOrca + config_name: null + split: null + sample_size: 100000 + revision: null + load_from_disk: false + source_name: huggingface + embeddings: + - path: question + embedding: gte-small + - path: response + embedding: gte-small + signals: + - path: question + signal: + threshold: 0.85 + signal_name: near_dup + - path: question + signal: + signal_name: pii + - path: question + signal: + split_by_paragraph: false + signal_name: lang_detection + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: question + signal: + signal_name: text_statistics + - path: response + signal: + threshold: 0.85 + signal_name: near_dup + - path: response + signal: + signal_name: pii + - path: response + signal: + split_by_paragraph: false + signal_name: lang_detection + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: response + signal: + signal_name: text_statistics + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: question + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: response + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + settings: + ui: + media_paths: + - question + - response + markdown_paths: [] + preferred_embedding: gte-small + - namespace: lilac + name: opus100-en-es-validation + tags: [] + source: + dataset_name: opus100 + config_name: en-es + split: validation + sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - - path: goal - embedding: gte-small - - path: sol1 + - path: + - translation + - en embedding: gte-small - - path: sol2 + - path: + - translation + - es embedding: gte-small signals: - - path: goal + - path: + - translation + - en signal: threshold: 0.85 signal_name: near_dup - - path: goal + - path: + - translation + - en signal: signal_name: pii - - path: goal + - path: + - translation + - en + signal: + split_by_paragraph: false + signal_name: lang_detection + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + signal_name: text_statistics + - path: + - translation + - es + signal: + threshold: 0.85 + signal_name: near_dup + - path: + - translation + - es + signal: + signal_name: pii + - path: + - translation + - es signal: split_by_paragraph: false signal_name: lang_detection - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - - path: goal + - path: + - translation + - es signal: signal_name: text_statistics - - path: sol1 + - path: + - translation + - es signal: - threshold: 0.85 - signal_name: near_dup - - path: sol1 + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: + - translation + - es signal: - signal_name: pii - - path: sol1 + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - translation + - es signal: - split_by_paragraph: false - signal_name: lang_detection - - path: sol1 + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - translation + - es signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - es signal: embedding: gte-small namespace: lilac - concept_name: non-english + concept_name: profanity + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - es signal: embedding: gte-small namespace: lilac - concept_name: toxicity + concept_name: question + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - es signal: embedding: gte-small namespace: lilac - concept_name: question + concept_name: source-code + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - es signal: embedding: gte-small namespace: lilac - concept_name: legal-termination + concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - en signal: embedding: gte-small namespace: lilac - concept_name: source-code + concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - en signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en signal: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - - path: sol1 + - path: + - translation + - en signal: - signal_name: text_statistics - - path: sol2 + embedding: gte-small + namespace: lilac + concept_name: question + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: + - translation + - en + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + settings: + ui: + media_paths: + - - translation + - es + - - translation + - en + markdown_paths: [] + preferred_embedding: gte-small + - namespace: lilac + name: science-qa-derek-thomas + tags: [] + source: + dataset_name: derek-thomas/ScienceQA + config_name: null + split: null + sample_size: null + revision: null + load_from_disk: false + source_name: huggingface + embeddings: + - path: lecture + embedding: gte-small + signals: + - path: lecture signal: threshold: 0.85 signal_name: near_dup - - path: sol2 + - path: lecture signal: signal_name: pii - - path: sol2 + - path: lecture signal: split_by_paragraph: false signal_name: lang_detection - - path: sol2 + - path: lecture + signal: + signal_name: text_statistics + - path: lecture signal: embedding: gte-small namespace: lilac - concept_name: positive-sentiment + concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: lecture signal: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture signal: embedding: gte-small namespace: lilac - concept_name: toxicity + concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: profanity + version: null + draft: main + signal_name: concept_score + - path: lecture signal: embedding: gte-small namespace: lilac concept_name: question + version: null + draft: main + signal_name: concept_score + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: source-code + version: null + draft: main + signal_name: concept_score + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: toxicity + version: null + draft: main + signal_name: concept_score + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: legal-termination + version: null + draft: main + signal_name: concept_score + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: negative-sentiment + version: null + draft: main + signal_name: concept_score + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: non-english + version: null + draft: main + signal_name: concept_score + - path: lecture + signal: + embedding: gte-small + namespace: lilac + concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture signal: embedding: gte-small namespace: lilac - concept_name: legal-termination + concept_name: profanity + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture signal: embedding: gte-small namespace: lilac - concept_name: source-code + concept_name: question + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture signal: embedding: gte-small namespace: lilac - concept_name: negative-sentiment + concept_name: source-code + version: null draft: main signal_name: concept_score - - path: sol2 + - path: lecture signal: embedding: gte-small namespace: lilac - concept_name: profanity + concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: sol2 - signal: - signal_name: text_statistics settings: ui: media_paths: - - sol1 - - sol2 - - goal + - lecture markdown_paths: [] preferred_embedding: gte-small - namespace: lilac - name: OpenOrca-100k + name: enron-emails tags: [] source: - dataset_name: Open-Orca/OpenOrca - config_name: null + dataset_name: EleutherAI/pile + config_name: enron_emails split: null sample_size: 100000 revision: null load_from_disk: false source_name: huggingface embeddings: - - path: question - embedding: gte-small - - path: response + - path: text embedding: gte-small signals: - - path: question + - path: text signal: threshold: 0.85 signal_name: near_dup - - path: question + - path: text signal: signal_name: pii - - path: question + - path: text signal: split_by_paragraph: false signal_name: lang_detection - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - - path: question + - path: text signal: signal_name: text_statistics - - path: response - signal: - threshold: 0.85 - signal_name: near_dup - - path: response - signal: - signal_name: pii - - path: response - signal: - split_by_paragraph: false - signal_name: lang_detection - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: positive-sentiment + concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: non-english + concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: toxicity + concept_name: non-english + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: question + concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: legal-termination + concept_name: profanity + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: source-code + concept_name: question + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: negative-sentiment + concept_name: source-code + version: null draft: main signal_name: concept_score - - path: response + - path: text signal: embedding: gte-small namespace: lilac - concept_name: profanity + concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: response - signal: - signal_name: text_statistics settings: ui: media_paths: - - question - - response + - text markdown_paths: [] preferred_embedding: gte-small - namespace: lilac - name: opus100-en-es-validation + name: the_movies_dataset tags: [] source: - dataset_name: opus100 - config_name: en-es - split: validation - sample_size: null - revision: null - load_from_disk: false - source_name: huggingface + filepaths: + - https://storage.googleapis.com/lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv + delim: ',' + header: true + names: null + source_name: csv embeddings: - - path: - - translation - - en - embedding: gte-small - - path: - - translation - - es + - path: overview embedding: gte-small signals: - - path: - - translation - - en + - path: overview signal: threshold: 0.85 signal_name: near_dup - - path: - - translation - - en + - path: overview signal: signal_name: pii - - path: - - translation - - en + - path: overview signal: split_by_paragraph: false signal_name: lang_detection - - path: - - translation - - en + - path: overview + signal: + signal_name: text_statistics + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: positive-sentiment + concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: non-english + concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: toxicity + concept_name: non-english + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: question + concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: legal-termination + concept_name: profanity + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: source-code + concept_name: question + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: negative-sentiment + concept_name: source-code + version: null draft: main signal_name: concept_score - - path: - - translation - - en + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: profanity + concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: - - translation - - en - signal: - signal_name: text_statistics - - path: - - translation - - es - signal: - threshold: 0.85 - signal_name: near_dup - - path: - - translation - - es - signal: - signal_name: pii - - path: - - translation - - es - signal: - split_by_paragraph: false - signal_name: lang_detection - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: positive-sentiment + concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: non-english + concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: toxicity + concept_name: non-english + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: question + concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: legal-termination + concept_name: profanity + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: source-code + concept_name: question + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: negative-sentiment + concept_name: source-code + version: null draft: main signal_name: concept_score - - path: - - translation - - es + - path: overview signal: embedding: gte-small namespace: lilac - concept_name: profanity + concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: - - translation - - es - signal: - signal_name: text_statistics settings: ui: media_paths: - - - translation - - es - - - translation - - en + - overview markdown_paths: [] preferred_embedding: gte-small - namespace: lilac - name: science-qa-derek-thomas + name: textbook_quality_programming tags: [] source: - dataset_name: derek-thomas/ScienceQA + dataset_name: vikp/textbook_quality_programming config_name: null split: null sample_size: null @@ -1475,289 +4316,299 @@ datasets: load_from_disk: false source_name: huggingface embeddings: - - path: lecture + - path: + - outline + - '*' + embedding: gte-small + - path: + - concepts + - '*' + embedding: gte-small + - path: markdown embedding: gte-small signals: - - path: lecture + - path: + - outline + - '*' + signal: + signal_name: pii + - path: + - outline + - '*' + signal: + signal_name: text_statistics + - path: + - outline + - '*' signal: threshold: 0.85 signal_name: near_dup - - path: lecture - signal: - signal_name: pii - - path: lecture + - path: + - outline + - '*' signal: split_by_paragraph: false signal_name: lang_detection - - path: lecture - signal: - signal_name: text_statistics - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - - path: lecture + - path: + - outline + - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - settings: - ui: - media_paths: - - lecture - markdown_paths: [] - preferred_embedding: gte-small - - namespace: lilac - name: enron-emails - tags: [] - source: - dataset_name: EleutherAI/pile - config_name: enron_emails - split: null - sample_size: 100000 - revision: null - load_from_disk: false - source_name: huggingface - embeddings: - - path: text - embedding: gte-small - signals: - - path: text + - path: + - concepts + - '*' + signal: + signal_name: pii + - path: + - concepts + - '*' + signal: + signal_name: text_statistics + - path: + - concepts + - '*' signal: threshold: 0.85 signal_name: near_dup - - path: text - signal: - signal_name: pii - - path: text + - path: + - concepts + - '*' signal: split_by_paragraph: false signal_name: lang_detection - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: positive-sentiment + concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: non-english + concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: toxicity + concept_name: non-english + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: question + concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: legal-termination + concept_name: profanity + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: source-code + concept_name: question + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: negative-sentiment + concept_name: source-code + version: null draft: main signal_name: concept_score - - path: text + - path: + - concepts + - '*' signal: embedding: gte-small namespace: lilac - concept_name: profanity + concept_name: toxicity + version: null draft: main signal_name: concept_score - - path: text + - path: markdown + signal: + signal_name: pii + - path: markdown signal: signal_name: text_statistics - settings: - ui: - media_paths: - - text - markdown_paths: [] - preferred_embedding: gte-small - - namespace: lilac - name: the_movies_dataset - tags: [] - source: - filepaths: - - https://storage.googleapis.com/lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv - delim: ',' - header: true - names: null - source_name: csv - embeddings: - - path: overview - embedding: gte-small - signals: - - path: overview + - path: markdown signal: threshold: 0.85 signal_name: near_dup - - path: overview - signal: - signal_name: pii - - path: overview + - path: markdown signal: split_by_paragraph: false signal_name: lang_detection - - path: overview - signal: - signal_name: text_statistics - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: legal-termination + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: non-english + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: profanity + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: question + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: source-code + version: null draft: main signal_name: concept_score - - path: overview + - path: markdown signal: embedding: gte-small namespace: lilac concept_name: toxicity + version: null draft: main signal_name: concept_score - settings: - ui: - media_paths: - - overview - markdown_paths: [] - preferred_embedding: gte-small - - namespace: lilac - name: textbook_quality_programming - tags: [] - source: - dataset_name: vikp/textbook_quality_programming - config_name: null - split: null - sample_size: null - revision: null - load_from_disk: false - source_name: huggingface - embeddings: - - path: - - outline - - '*' - embedding: gte-small - - path: - - concepts - - '*' - embedding: gte-small - - path: markdown - embedding: gte-small - signals: [] settings: ui: media_paths: