Spaces:
Running
Running
ping98k
commited on
Commit
·
a47a283
1
Parent(s):
45ba71c
Fix README short description capitalization and enhance index.html layout for K-Means clustering visualization
Browse files- README.md +5 -1
- index.html +80 -3
README.md
CHANGED
@@ -6,7 +6,7 @@ colorTo: yellow
|
|
6 |
sdk: static
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
-
short_description: '
|
10 |
---
|
11 |
|
12 |
# Embedding WebGPU Playground
|
@@ -23,6 +23,10 @@ This is a browser-based playground for exploring text embeddings and group simil
|
|
23 |
- Cosine similarity is calculated between all group embeddings, resulting in a group-by-group similarity matrix.
|
24 |
- The similarity matrix is visualized as a heatmap using Plotly (color range locked to 0–1).
|
25 |
|
|
|
|
|
|
|
|
|
26 |
## Tech stack
|
27 |
- [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) (ESM, WebGPU)
|
28 |
- [ONNX Qwen3-Embedding-0.6B-ONNX](https://huggingface.co/onnx-community/Qwen3-Embedding-0.6B-ONNX)
|
|
|
6 |
sdk: static
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
+
short_description: 'Exploring text embeddings and group similarity '
|
10 |
---
|
11 |
|
12 |
# Embedding WebGPU Playground
|
|
|
23 |
- Cosine similarity is calculated between all group embeddings, resulting in a group-by-group similarity matrix.
|
24 |
- The similarity matrix is visualized as a heatmap using Plotly (color range locked to 0–1).
|
25 |
|
26 |
+
|
27 |
+
K-Means Clustering
|
28 |
+
re group text by using K-Means and number of group
|
29 |
+
|
30 |
## Tech stack
|
31 |
- [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) (ESM, WebGPU)
|
32 |
- [ONNX Qwen3-Embedding-0.6B-ONNX](https://huggingface.co/onnx-community/Qwen3-Embedding-0.6B-ONNX)
|
index.html
CHANGED
@@ -21,10 +21,16 @@
|
|
21 |
margin-top: 10px
|
22 |
}
|
23 |
|
24 |
-
#plot
|
|
|
25 |
width: 100%;
|
26 |
height: 600px
|
27 |
}
|
|
|
|
|
|
|
|
|
|
|
28 |
</style>
|
29 |
</head>
|
30 |
|
@@ -32,10 +38,16 @@
|
|
32 |
<h1>Embedding Similarity Heatmap</h1>
|
33 |
<textarea id="input"></textarea>
|
34 |
<button id="run">Run</button>
|
35 |
-
<
|
|
|
|
|
|
|
|
|
|
|
36 |
<script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
|
37 |
<script type="module">
|
38 |
import { pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2";
|
|
|
39 |
|
40 |
const embed = await pipeline(
|
41 |
"feature-extraction",
|
@@ -75,7 +87,72 @@
|
|
75 |
sim.push(row);
|
76 |
}
|
77 |
const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0, zmax: 1 }];
|
78 |
-
Plotly.newPlot("plot", data, {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
};
|
80 |
</script>
|
81 |
</body>
|
|
|
21 |
margin-top: 10px
|
22 |
}
|
23 |
|
24 |
+
#plot-heatmap,
|
25 |
+
#plot-scatter {
|
26 |
width: 100%;
|
27 |
height: 600px
|
28 |
}
|
29 |
+
|
30 |
+
.plot-container {
|
31 |
+
display: flex;
|
32 |
+
gap: 20px;
|
33 |
+
}
|
34 |
</style>
|
35 |
</head>
|
36 |
|
|
|
38 |
<h1>Embedding Similarity Heatmap</h1>
|
39 |
<textarea id="input"></textarea>
|
40 |
<button id="run">Run</button>
|
41 |
+
<input id="kmeans-k" type="number" min="2" max="20" value="3" style="width:60px; margin-left:10px;"> <button
|
42 |
+
id="kmeans-btn">K-Means Clustering</button>
|
43 |
+
<div class="plot-container">
|
44 |
+
<div id="plot-heatmap" style="width:500px; height:500px;"></div>
|
45 |
+
<div id="plot-scatter" style="width:500px; height:500px;"></div>
|
46 |
+
</div>
|
47 |
<script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
|
48 |
<script type="module">
|
49 |
import { pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2";
|
50 |
+
import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm";
|
51 |
|
52 |
const embed = await pipeline(
|
53 |
"feature-extraction",
|
|
|
87 |
sim.push(row);
|
88 |
}
|
89 |
const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0, zmax: 1 }];
|
90 |
+
Plotly.newPlot("plot-heatmap", data, {
|
91 |
+
xaxis: { title: "Group", scaleanchor: "y", scaleratio: 1 },
|
92 |
+
yaxis: { title: "Group", scaleanchor: "x", scaleratio: 1 },
|
93 |
+
width: 500,
|
94 |
+
height: 500,
|
95 |
+
margin: { t: 40, l: 40, r: 10, b: 40 },
|
96 |
+
title: "Group Similarity Heatmap"
|
97 |
+
});
|
98 |
+
};
|
99 |
+
|
100 |
+
// --- K-Means Clustering ---
|
101 |
+
document.getElementById("kmeans-btn").onclick = async () => {
|
102 |
+
const text = document.getElementById("input").value;
|
103 |
+
const lines = text.split(/\n/).map(x => x.trim()).filter(x => x);
|
104 |
+
const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
|
105 |
+
const out = await embed(prompts, { pooling: "mean", normalize: true });
|
106 |
+
const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data;
|
107 |
+
|
108 |
+
// K-Means implementation
|
109 |
+
const k = Math.max(2, Math.min(20, parseInt(document.getElementById("kmeans-k").value) || 3));
|
110 |
+
const n = embeddings.length, dim = embeddings[0].length;
|
111 |
+
// Randomly initialize centroids
|
112 |
+
let centroids = Array.from({length: k}, () => embeddings[Math.floor(Math.random()*n)].slice());
|
113 |
+
let labels = new Array(n).fill(0);
|
114 |
+
for (let iter = 0; iter < 20; ++iter) {
|
115 |
+
// Assign
|
116 |
+
for (let i = 0; i < n; ++i) {
|
117 |
+
let best = 0, bestDist = Infinity;
|
118 |
+
for (let c = 0; c < k; ++c) {
|
119 |
+
let dist = 0;
|
120 |
+
for (let d = 0; d < dim; ++d) dist += (embeddings[i][d] - centroids[c][d])**2;
|
121 |
+
if (dist < bestDist) { bestDist = dist; best = c; }
|
122 |
+
}
|
123 |
+
labels[i] = best;
|
124 |
+
}
|
125 |
+
// Update
|
126 |
+
centroids = Array.from({length: k}, () => new Array(dim).fill(0));
|
127 |
+
const counts = new Array(k).fill(0);
|
128 |
+
for (let i = 0; i < n; ++i) {
|
129 |
+
counts[labels[i]]++;
|
130 |
+
for (let d = 0; d < dim; ++d) centroids[labels[i]][d] += embeddings[i][d];
|
131 |
+
}
|
132 |
+
for (let c = 0; c < k; ++c) if (counts[c]) for (let d = 0; d < dim; ++d) centroids[c][d] /= counts[c];
|
133 |
+
}
|
134 |
+
// UMAP for 2D projection
|
135 |
+
const umap = new UMAP({ nComponents: 2 });
|
136 |
+
const proj = umap.fit(embeddings);
|
137 |
+
// Plot
|
138 |
+
const colors = ["red","blue","green","orange","purple","cyan","magenta","yellow","brown","black","lime","navy","teal","olive","maroon","pink","gray","gold","aqua","indigo"];
|
139 |
+
const traces = Array.from({length: k}, (_, c) => ({
|
140 |
+
x: [], y: [], text: [], mode: "markers", type: "scatter", name: `Cluster ${c+1}`,
|
141 |
+
marker: { color: colors[c%colors.length], size: 12, line: { width: 1, color: '#333' } }
|
142 |
+
}));
|
143 |
+
for (let i = 0; i < n; ++i) {
|
144 |
+
traces[labels[i]].x.push(proj[i][0]);
|
145 |
+
traces[labels[i]].y.push(proj[i][1]);
|
146 |
+
traces[labels[i]].text.push(lines[i]);
|
147 |
+
}
|
148 |
+
Plotly.newPlot("plot-scatter", traces, {
|
149 |
+
xaxis: { title: "UMAP-1", scaleanchor: "y", scaleratio: 1 },
|
150 |
+
yaxis: { title: "UMAP-2", scaleanchor: "x", scaleratio: 1 },
|
151 |
+
width: 500,
|
152 |
+
height: 500,
|
153 |
+
margin: { t: 40, l: 40, r: 10, b: 40 },
|
154 |
+
title: `K-Means Clustering (k=${k})`
|
155 |
+
});
|
156 |
};
|
157 |
</script>
|
158 |
</body>
|