Embedding-Playground / embedding.js
ping98k
Refactor K-Means clustering implementation; modularize embedding and clustering logic, enhance heatmap and scatter plot functions, and improve cluster naming process.
12c4198
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.6.0';
const embed = await pipeline(
"feature-extraction",
"onnx-community/Qwen3-Embedding-0.6B-ONNX",
{ device: "webgpu", dtype: "q4f16" },
);
export async function getGroupEmbeddings(groups, task) {
const groupEmbeddings = [];
for (const g of groups) {
// Remove lines starting with ##
const lines = g.split(/\n/)
.map(x => x.trim())
.filter(x => x && !x.startsWith('##'));
const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
const out = await embed(prompts, { pooling: "mean", normalize: true });
const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data;
const dim = embeddings[0].length;
const avg = new Float32Array(dim);
for (const e of embeddings) { for (let i = 0; i < dim; i++) avg[i] += e[i]; }
for (let i = 0; i < dim; i++) avg[i] /= embeddings.length;
groupEmbeddings.push(avg);
}
return groupEmbeddings;
}
export async function getLineEmbeddings(lines, task) {
const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
const out = await embed(prompts, { pooling: "mean", normalize: true });
return typeof out.tolist === 'function' ? out.tolist() : out.data;
}