ping98k commited on
Commit
3ebfd79
·
1 Parent(s): 3d2b0c3

Refactor heatmap event handling by modularizing utility functions and improving code organization; update index.html to enhance user input sections.

Browse files
Files changed (2) hide show
  1. heatmap_event.js +57 -75
  2. index.html +3 -0
heatmap_event.js CHANGED
@@ -4,6 +4,53 @@ import { plotHeatmap } from './plotting.js';
4
 
5
  const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  export async function handleHeatmapEvent() {
8
  const progressBar = document.getElementById("progress-bar");
9
  const progressBarInner = document.getElementById("progress-bar-inner");
@@ -65,99 +112,34 @@ export async function handleHeatmapEvent() {
65
  // First group is search then follow by hight sim group
66
  // in each group order by high sim line
67
  if (searchIdx !== -1 && search_by_max_search_line) {
68
- const searchLines = groups[searchIdx]
69
- .split("\n")
70
- .filter(l => l && !l.startsWith("##"));
71
  const searchEmbeds = await getLineEmbeddings(searchLines, task);
72
-
73
- const cleanGroups = groups.map(g =>
74
- g.split("\n").filter(l => l && !l.startsWith("##"))
75
- );
76
- const allLines = cleanGroups.flat();
77
- const allEmbeds = await getLineEmbeddings(allLines, task);
78
-
79
- const cosine = (a, b) => {
80
- let dot = 0, na = 0, nb = 0;
81
- for (let i = 0; i < a.length; i++) {
82
- dot += a[i] * b[i];
83
- na += a[i] * a[i];
84
- nb += b[i] * b[i];
85
- }
86
- return na && nb ? dot / Math.sqrt(na * nb) : 0;
87
- };
88
-
89
- const score = e =>
90
- Math.max(...searchEmbeds.map(se => cosine(se, e)));
91
-
92
- const idxByGroup = [];
93
- let p = 0;
94
- for (const g of cleanGroups) {
95
- idxByGroup.push(Array.from({ length: g.length }, (_, i) => p + i));
96
- p += g.length;
97
- }
98
-
99
  const sorted = order.map(g =>
100
  idxByGroup[g]
101
  .map(i => ({ t: allLines[i], s: score(allEmbeds[i]) }))
102
  .sort((a, b) => b.s - a.s)
103
  .map(o => o.t)
104
  );
105
-
106
- const finalText = order
107
- .map((gIdx, i) => {
108
- const header =
109
- clusterNames?.length === n ? clusterNames[gIdx] : `Group ${i + 1}`;
110
- return `## ${header}\n${sorted[i].join("\n")}`;
111
- })
112
- .join("\n\n\n");
113
-
114
  document.getElementById("input").value = finalText;
115
  }
116
 
117
  if (searchIdx !== -1 && search_by_max_search_group) {
118
  const refEmbed = groupEmbeddings[searchIdx];
119
-
120
- const cleanGroups = groups.map(g =>
121
- g.split("\n").filter(l => l && !l.startsWith("##"))
122
- );
123
-
124
- const allLines = cleanGroups.flat();
125
- const allEmbeds = await getLineEmbeddings(allLines, task);
126
-
127
- const idxByGroup = [];
128
- let p = 0;
129
- for (const g of cleanGroups) {
130
- idxByGroup.push(Array.from({ length: g.length }, (_, i) => p + i));
131
- p += g.length;
132
- }
133
-
134
- const cosine = (a, b) => {
135
- let dot = 0,
136
- na = 0,
137
- nb = 0;
138
- for (let i = 0; i < a.length; i++) {
139
- dot += a[i] * b[i];
140
- na += a[i] * a[i];
141
- nb += b[i] * b[i];
142
- }
143
- return na && nb ? dot / Math.sqrt(na * nb) : 0;
144
- };
145
-
146
  const sortedLines = order.map(gIdx =>
147
  idxByGroup[gIdx]
148
  .map(i => ({ t: allLines[i], s: cosine(refEmbed, allEmbeds[i]) }))
149
  .sort((a, b) => b.s - a.s)
150
  .map(o => o.t)
151
  );
152
-
153
- const finalText = order
154
- .map((gIdx, i) => {
155
- const header =
156
- clusterNames?.length === n ? clusterNames[gIdx] : `Group ${i + 1}`;
157
- return `## ${header}\n${sortedLines[i].join("\n")}`;
158
- })
159
- .join("\n\n\n");
160
-
161
  document.getElementById("input").value = finalText;
162
  }
163
  progressBarInner.style.width = "100%";
 
4
 
5
  const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
6
 
7
+ // Cosine similarity between two vectors
8
+ function cosine(a, b) {
9
+ let dot = 0, na = 0, nb = 0;
10
+ for (let i = 0; i < a.length; i++) {
11
+ dot += a[i] * b[i];
12
+ na += a[i] * a[i];
13
+ nb += b[i] * b[i];
14
+ }
15
+ return na && nb ? dot / Math.sqrt(na * nb) : 0;
16
+ }
17
+
18
+ // Remove headers and split groups into arrays of lines
19
+ function getCleanGroups(groups) {
20
+ return groups.map(g =>
21
+ g.split("\n").filter(l => l && !l.startsWith("##"))
22
+ );
23
+ }
24
+
25
+ // Flatten all lines and get their embeddings
26
+ async function getAllLinesAndEmbeds(cleanGroups, task) {
27
+ const allLines = cleanGroups.flat();
28
+ const allEmbeds = await getLineEmbeddings(allLines, task);
29
+ return { allLines, allEmbeds };
30
+ }
31
+
32
+ // Build index mapping for each group
33
+ function getIdxByGroup(cleanGroups) {
34
+ const idxByGroup = [];
35
+ let p = 0;
36
+ for (const g of cleanGroups) {
37
+ idxByGroup.push(Array.from({ length: g.length }, (_, i) => p + i));
38
+ p += g.length;
39
+ }
40
+ return idxByGroup;
41
+ }
42
+
43
+ // Helper to build the final output text for reordered groups
44
+ function buildFinalText(order, sortedLines, clusterNames, n) {
45
+ return order
46
+ .map((gIdx, i) => {
47
+ const header =
48
+ clusterNames?.length === n ? clusterNames[gIdx] : `Group ${i + 1}`;
49
+ return `## ${header}\n${sortedLines[i].join("\n")}`;
50
+ })
51
+ .join("\n\n\n");
52
+ }
53
+
54
  export async function handleHeatmapEvent() {
55
  const progressBar = document.getElementById("progress-bar");
56
  const progressBarInner = document.getElementById("progress-bar-inner");
 
112
  // First group is search then follow by hight sim group
113
  // in each group order by high sim line
114
  if (searchIdx !== -1 && search_by_max_search_line) {
115
+ const searchLines = getCleanGroups(groups)[searchIdx];
 
 
116
  const searchEmbeds = await getLineEmbeddings(searchLines, task);
117
+ const cleanGroups = getCleanGroups(groups);
118
+ const { allLines, allEmbeds } = await getAllLinesAndEmbeds(cleanGroups, task);
119
+ const idxByGroup = getIdxByGroup(cleanGroups);
120
+ const score = e => Math.max(...searchEmbeds.map(se => cosine(se, e)));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  const sorted = order.map(g =>
122
  idxByGroup[g]
123
  .map(i => ({ t: allLines[i], s: score(allEmbeds[i]) }))
124
  .sort((a, b) => b.s - a.s)
125
  .map(o => o.t)
126
  );
127
+ const finalText = buildFinalText(order, sorted, clusterNames, n);
 
 
 
 
 
 
 
 
128
  document.getElementById("input").value = finalText;
129
  }
130
 
131
  if (searchIdx !== -1 && search_by_max_search_group) {
132
  const refEmbed = groupEmbeddings[searchIdx];
133
+ const cleanGroups = getCleanGroups(groups);
134
+ const { allLines, allEmbeds } = await getAllLinesAndEmbeds(cleanGroups, task);
135
+ const idxByGroup = getIdxByGroup(cleanGroups);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  const sortedLines = order.map(gIdx =>
137
  idxByGroup[gIdx]
138
  .map(i => ({ t: allLines[i], s: cosine(refEmbed, allEmbeds[i]) }))
139
  .sort((a, b) => b.s - a.s)
140
  .map(o => o.t)
141
  );
142
+ const finalText = buildFinalText(order, sortedLines, clusterNames, n);
 
 
 
 
 
 
 
 
143
  document.getElementById("input").value = finalText;
144
  }
145
  progressBarInner.style.width = "100%";
index.html CHANGED
@@ -68,11 +68,14 @@
68
 
69
  <body>
70
  <h1>Text Embedding Playground</h1>
 
71
  <textarea id="input"></textarea>
72
  <script type="module">
73
  import { sentences } from './sentences.js';
74
  document.getElementById("input").value = sentences.join("\n");
75
  </script>
 
 
76
  <div class="control-group">
77
  <label for="kmeans-k">Clusters:</label>
78
  <input id="kmeans-k" type="number" min="2" max="100" value="7" style="width:60px;">
 
68
 
69
  <body>
70
  <h1>Text Embedding Playground</h1>
71
+ <label for="input">Input Text</label>
72
  <textarea id="input"></textarea>
73
  <script type="module">
74
  import { sentences } from './sentences.js';
75
  document.getElementById("input").value = sentences.join("\n");
76
  </script>
77
+ <label for="search-group-input" style="margin-top:10px;display:block;">Search Group (optional, overrides group with ##search):</label>
78
+ <textarea id="search-group-input" placeholder="Paste or type search group lines here..."></textarea>
79
  <div class="control-group">
80
  <label for="kmeans-k">Clusters:</label>
81
  <input id="kmeans-k" type="number" min="2" max="100" value="7" style="width:60px;">