File size: 876 Bytes
5fa1a76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import os import torch from speechbrain.pretrained import EncoderClassifier spk_model_name = "speechbrain/spkrec-xvect-voxceleb" device = "cuda" if torch.cuda.is_available() else "cpu" speaker_model = EncoderClassifier.from_hparams( source=spk_model_name, run_opts={"device": device}, savedir=os.path.join("/tmp", spk_model_name), ) def create_speaker_embedding(waveform): with torch.no_grad(): speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform)) speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2) speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy() return speaker_embeddings It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb dataset, whereas the training examples in this guide are in Dutch. |