ping98k commited on
Commit
802de67
·
1 Parent(s): a47a283

Refactor index.html layout for clarity and update K-Means clustering functionality to enhance user experience

Browse files
Files changed (1) hide show
  1. index.html +19 -14
index.html CHANGED
@@ -37,12 +37,14 @@
37
  <body>
38
  <h1>Embedding Similarity Heatmap</h1>
39
  <textarea id="input"></textarea>
40
- <button id="run">Run</button>
41
- <input id="kmeans-k" type="number" min="2" max="20" value="3" style="width:60px; margin-left:10px;"> <button
42
- id="kmeans-btn">K-Means Clustering</button>
 
43
  <div class="plot-container">
44
- <div id="plot-heatmap" style="width:500px; height:500px;"></div>
45
  <div id="plot-scatter" style="width:500px; height:500px;"></div>
 
 
46
  </div>
47
  <script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
48
  <script type="module">
@@ -108,22 +110,19 @@
108
  // K-Means implementation
109
  const k = Math.max(2, Math.min(20, parseInt(document.getElementById("kmeans-k").value) || 3));
110
  const n = embeddings.length, dim = embeddings[0].length;
111
- // Randomly initialize centroids
112
- let centroids = Array.from({length: k}, () => embeddings[Math.floor(Math.random()*n)].slice());
113
  let labels = new Array(n).fill(0);
114
  for (let iter = 0; iter < 20; ++iter) {
115
- // Assign
116
  for (let i = 0; i < n; ++i) {
117
  let best = 0, bestDist = Infinity;
118
  for (let c = 0; c < k; ++c) {
119
  let dist = 0;
120
- for (let d = 0; d < dim; ++d) dist += (embeddings[i][d] - centroids[c][d])**2;
121
  if (dist < bestDist) { bestDist = dist; best = c; }
122
  }
123
  labels[i] = best;
124
  }
125
- // Update
126
- centroids = Array.from({length: k}, () => new Array(dim).fill(0));
127
  const counts = new Array(k).fill(0);
128
  for (let i = 0; i < n; ++i) {
129
  counts[labels[i]]++;
@@ -135,10 +134,10 @@
135
  const umap = new UMAP({ nComponents: 2 });
136
  const proj = umap.fit(embeddings);
137
  // Plot
138
- const colors = ["red","blue","green","orange","purple","cyan","magenta","yellow","brown","black","lime","navy","teal","olive","maroon","pink","gray","gold","aqua","indigo"];
139
- const traces = Array.from({length: k}, (_, c) => ({
140
- x: [], y: [], text: [], mode: "markers", type: "scatter", name: `Cluster ${c+1}`,
141
- marker: { color: colors[c%colors.length], size: 12, line: { width: 1, color: '#333' } }
142
  }));
143
  for (let i = 0; i < n; ++i) {
144
  traces[labels[i]].x.push(proj[i][0]);
@@ -153,6 +152,12 @@
153
  margin: { t: 40, l: 40, r: 10, b: 40 },
154
  title: `K-Means Clustering (k=${k})`
155
  });
 
 
 
 
 
 
156
  };
157
  </script>
158
  </body>
 
37
  <body>
38
  <h1>Embedding Similarity Heatmap</h1>
39
  <textarea id="input"></textarea>
40
+ <label for="kmeans-k" style="margin-left:10px;">Clusters:</label>
41
+ <input id="kmeans-k" type="number" min="2" max="20" value="3" style="width:60px;">
42
+ <button id="kmeans-btn">K-Means Clustering</button>
43
+ <button id="run">Similarity Heatmap</button>
44
  <div class="plot-container">
 
45
  <div id="plot-scatter" style="width:500px; height:500px;"></div>
46
+ <div id="plot-heatmap" style="width:500px; height:500px;"></div>
47
+
48
  </div>
49
  <script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
50
  <script type="module">
 
110
  // K-Means implementation
111
  const k = Math.max(2, Math.min(20, parseInt(document.getElementById("kmeans-k").value) || 3));
112
  const n = embeddings.length, dim = embeddings[0].length;
113
+ let centroids = Array.from({ length: k }, () => embeddings[Math.floor(Math.random() * n)].slice());
 
114
  let labels = new Array(n).fill(0);
115
  for (let iter = 0; iter < 20; ++iter) {
 
116
  for (let i = 0; i < n; ++i) {
117
  let best = 0, bestDist = Infinity;
118
  for (let c = 0; c < k; ++c) {
119
  let dist = 0;
120
+ for (let d = 0; d < dim; ++d) dist += (embeddings[i][d] - centroids[c][d]) ** 2;
121
  if (dist < bestDist) { bestDist = dist; best = c; }
122
  }
123
  labels[i] = best;
124
  }
125
+ centroids = Array.from({ length: k }, () => new Array(dim).fill(0));
 
126
  const counts = new Array(k).fill(0);
127
  for (let i = 0; i < n; ++i) {
128
  counts[labels[i]]++;
 
134
  const umap = new UMAP({ nComponents: 2 });
135
  const proj = umap.fit(embeddings);
136
  // Plot
137
+ const colors = ["red", "blue", "green", "orange", "purple", "cyan", "magenta", "yellow", "brown", "black", "lime", "navy", "teal", "olive", "maroon", "pink", "gray", "gold", "aqua", "indigo"];
138
+ const traces = Array.from({ length: k }, (_, c) => ({
139
+ x: [], y: [], text: [], mode: "markers", type: "scatter", name: `Cluster ${c + 1}`,
140
+ marker: { color: colors[c % colors.length], size: 12, line: { width: 1, color: '#333' } }
141
  }));
142
  for (let i = 0; i < n; ++i) {
143
  traces[labels[i]].x.push(proj[i][0]);
 
152
  margin: { t: 40, l: 40, r: 10, b: 40 },
153
  title: `K-Means Clustering (k=${k})`
154
  });
155
+ // Update textarea: group by cluster, separated by triple newlines
156
+ const clustered = Array.from({ length: k }, (_, c) => []);
157
+ for (let i = 0; i < n; ++i) clustered[labels[i]].push(lines[i]);
158
+ document.getElementById("input").value = clustered.map(g => g.join("\n")).join("\n\n\n");
159
+ // Re-run heatmap after updating textarea
160
+ document.getElementById("run").onclick();
161
  };
162
  </script>
163
  </body>