File size: 876 Bytes
5fa1a76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import os
import torch
from speechbrain.pretrained import EncoderClassifier
spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
device = "cuda" if torch.cuda.is_available() else "cpu"
speaker_model = EncoderClassifier.from_hparams(
     source=spk_model_name,
     run_opts={"device": device},
     savedir=os.path.join("/tmp", spk_model_name),
 )
def create_speaker_embedding(waveform):
     with torch.no_grad():
         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
     return speaker_embeddings

It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
dataset, whereas the training examples in this guide are in Dutch.