File size: 1,803 Bytes
f2e1fb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// Handles K-Means and Balanced K-Means clustering event
import { getLineEmbeddings } from './embedding.js';
import { kmeans, balancedKMeans } from './clustering.js';

const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";

export async function handleKMeansEvent() {
    const progressBar = document.getElementById("progress-bar");
    const progressBarInner = document.getElementById("progress-bar-inner");
    progressBar.style.display = "block";
    progressBarInner.style.width = "0%";

    const text = document.getElementById("input").value;
    // Remove ## lines for embedding
    const lines = text.split(/\n/).map(x => x.trim()).filter(x => x && !x.startsWith("##"));
    const embeddings = await getLineEmbeddings(lines, task);
    const n = embeddings.length;
    if (n < 2) return;
    const requestedK = parseInt(document.getElementById("kmeans-k").value) || 3;
    const k = Math.max(2, Math.min(requestedK, n));
    // Read clustering type and beta
    const clusteringType = document.getElementById("kmeans-type").value;
    const beta = parseFloat(document.getElementById("kmeans-beta").value) || 0.01;
    let labels;
    if (clusteringType === "balancedKMeans") {
        labels = balancedKMeans(embeddings, k, beta).labels;
    } else {
        labels = kmeans(embeddings, k).labels;
    }

    // Build clustered text for textarea
    const clustered = Array.from({ length: k }, () => []);
    for (let i = 0; i < n; ++i)
        clustered[labels[i]].push(lines[i]);
    const clusterNames = Array.from({ length: k }, (_, c) => `Cluster ${c + 1}`);
    document.getElementById("input").value = clustered.map((g, i) =>
        `## ${clusterNames[i]}\n${g.join("\n")}`
    ).join("\n\n\n");

    progressBarInner.style.width = "100%";
}