ping98k commited on
Commit
a47a283
·
1 Parent(s): 45ba71c

Fix README short description capitalization and enhance index.html layout for K-Means clustering visualization

Browse files
Files changed (2) hide show
  1. README.md +5 -1
  2. index.html +80 -3
README.md CHANGED
@@ -6,7 +6,7 @@ colorTo: yellow
6
  sdk: static
7
  pinned: false
8
  license: apache-2.0
9
- short_description: 'exploring text embeddings and group similarity '
10
  ---
11
 
12
  # Embedding WebGPU Playground
@@ -23,6 +23,10 @@ This is a browser-based playground for exploring text embeddings and group simil
23
  - Cosine similarity is calculated between all group embeddings, resulting in a group-by-group similarity matrix.
24
  - The similarity matrix is visualized as a heatmap using Plotly (color range locked to 0–1).
25
 
 
 
 
 
26
  ## Tech stack
27
  - [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) (ESM, WebGPU)
28
  - [ONNX Qwen3-Embedding-0.6B-ONNX](https://huggingface.co/onnx-community/Qwen3-Embedding-0.6B-ONNX)
 
6
  sdk: static
7
  pinned: false
8
  license: apache-2.0
9
+ short_description: 'Exploring text embeddings and group similarity '
10
  ---
11
 
12
  # Embedding WebGPU Playground
 
23
  - Cosine similarity is calculated between all group embeddings, resulting in a group-by-group similarity matrix.
24
  - The similarity matrix is visualized as a heatmap using Plotly (color range locked to 0–1).
25
 
26
+
27
+ K-Means Clustering
28
+ re group text by using K-Means and number of group
29
+
30
  ## Tech stack
31
  - [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) (ESM, WebGPU)
32
  - [ONNX Qwen3-Embedding-0.6B-ONNX](https://huggingface.co/onnx-community/Qwen3-Embedding-0.6B-ONNX)
index.html CHANGED
@@ -21,10 +21,16 @@
21
  margin-top: 10px
22
  }
23
 
24
- #plot {
 
25
  width: 100%;
26
  height: 600px
27
  }
 
 
 
 
 
28
  </style>
29
  </head>
30
 
@@ -32,10 +38,16 @@
32
  <h1>Embedding Similarity Heatmap</h1>
33
  <textarea id="input"></textarea>
34
  <button id="run">Run</button>
35
- <div id="plot"></div>
 
 
 
 
 
36
  <script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
37
  <script type="module">
38
  import { pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2";
 
39
 
40
  const embed = await pipeline(
41
  "feature-extraction",
@@ -75,7 +87,72 @@
75
  sim.push(row);
76
  }
77
  const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0, zmax: 1 }];
78
- Plotly.newPlot("plot", data, { xaxis: { title: "Group" }, yaxis: { title: "Group" } });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  };
80
  </script>
81
  </body>
 
21
  margin-top: 10px
22
  }
23
 
24
+ #plot-heatmap,
25
+ #plot-scatter {
26
  width: 100%;
27
  height: 600px
28
  }
29
+
30
+ .plot-container {
31
+ display: flex;
32
+ gap: 20px;
33
+ }
34
  </style>
35
  </head>
36
 
 
38
  <h1>Embedding Similarity Heatmap</h1>
39
  <textarea id="input"></textarea>
40
  <button id="run">Run</button>
41
+ <input id="kmeans-k" type="number" min="2" max="20" value="3" style="width:60px; margin-left:10px;"> <button
42
+ id="kmeans-btn">K-Means Clustering</button>
43
+ <div class="plot-container">
44
+ <div id="plot-heatmap" style="width:500px; height:500px;"></div>
45
+ <div id="plot-scatter" style="width:500px; height:500px;"></div>
46
+ </div>
47
  <script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
48
  <script type="module">
49
  import { pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2";
50
+ import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm";
51
 
52
  const embed = await pipeline(
53
  "feature-extraction",
 
87
  sim.push(row);
88
  }
89
  const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0, zmax: 1 }];
90
+ Plotly.newPlot("plot-heatmap", data, {
91
+ xaxis: { title: "Group", scaleanchor: "y", scaleratio: 1 },
92
+ yaxis: { title: "Group", scaleanchor: "x", scaleratio: 1 },
93
+ width: 500,
94
+ height: 500,
95
+ margin: { t: 40, l: 40, r: 10, b: 40 },
96
+ title: "Group Similarity Heatmap"
97
+ });
98
+ };
99
+
100
+ // --- K-Means Clustering ---
101
+ document.getElementById("kmeans-btn").onclick = async () => {
102
+ const text = document.getElementById("input").value;
103
+ const lines = text.split(/\n/).map(x => x.trim()).filter(x => x);
104
+ const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
105
+ const out = await embed(prompts, { pooling: "mean", normalize: true });
106
+ const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data;
107
+
108
+ // K-Means implementation
109
+ const k = Math.max(2, Math.min(20, parseInt(document.getElementById("kmeans-k").value) || 3));
110
+ const n = embeddings.length, dim = embeddings[0].length;
111
+ // Randomly initialize centroids
112
+ let centroids = Array.from({length: k}, () => embeddings[Math.floor(Math.random()*n)].slice());
113
+ let labels = new Array(n).fill(0);
114
+ for (let iter = 0; iter < 20; ++iter) {
115
+ // Assign
116
+ for (let i = 0; i < n; ++i) {
117
+ let best = 0, bestDist = Infinity;
118
+ for (let c = 0; c < k; ++c) {
119
+ let dist = 0;
120
+ for (let d = 0; d < dim; ++d) dist += (embeddings[i][d] - centroids[c][d])**2;
121
+ if (dist < bestDist) { bestDist = dist; best = c; }
122
+ }
123
+ labels[i] = best;
124
+ }
125
+ // Update
126
+ centroids = Array.from({length: k}, () => new Array(dim).fill(0));
127
+ const counts = new Array(k).fill(0);
128
+ for (let i = 0; i < n; ++i) {
129
+ counts[labels[i]]++;
130
+ for (let d = 0; d < dim; ++d) centroids[labels[i]][d] += embeddings[i][d];
131
+ }
132
+ for (let c = 0; c < k; ++c) if (counts[c]) for (let d = 0; d < dim; ++d) centroids[c][d] /= counts[c];
133
+ }
134
+ // UMAP for 2D projection
135
+ const umap = new UMAP({ nComponents: 2 });
136
+ const proj = umap.fit(embeddings);
137
+ // Plot
138
+ const colors = ["red","blue","green","orange","purple","cyan","magenta","yellow","brown","black","lime","navy","teal","olive","maroon","pink","gray","gold","aqua","indigo"];
139
+ const traces = Array.from({length: k}, (_, c) => ({
140
+ x: [], y: [], text: [], mode: "markers", type: "scatter", name: `Cluster ${c+1}`,
141
+ marker: { color: colors[c%colors.length], size: 12, line: { width: 1, color: '#333' } }
142
+ }));
143
+ for (let i = 0; i < n; ++i) {
144
+ traces[labels[i]].x.push(proj[i][0]);
145
+ traces[labels[i]].y.push(proj[i][1]);
146
+ traces[labels[i]].text.push(lines[i]);
147
+ }
148
+ Plotly.newPlot("plot-scatter", traces, {
149
+ xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
150
+ yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
151
+ width: 500,
152
+ height: 500,
153
+ margin: { t: 40, l: 40, r: 10, b: 40 },
154
+ title: `K-Means Clustering (k=${k})`
155
+ });
156
  };
157
  </script>
158
  </body>