PyTorch
ssl-aasist
custom_code
ash56's picture
Add files using upload-large-folder tool
9742bb8 verified
raw
history blame
5.8 kB
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import re
import glob
import json
import tempfile
import math
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import numpy as np
import commons
import utils
import argparse
import subprocess
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
from models import SynthesizerTrn
from scipy.io.wavfile import write
class TextMapper(object):
def __init__(self, vocab_file):
self.symbols = [x.replace("\n", "") for x in open(vocab_file, encoding="utf-8").readlines()]
self.SPACE_ID = self.symbols.index(" ")
self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)}
self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)}
def text_to_sequence(self, text, cleaner_names):
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args:
text: string to convert to a sequence
cleaner_names: names of the cleaner functions to run the text through
Returns:
List of integers corresponding to the symbols in the text
'''
sequence = []
clean_text = text.strip()
for symbol in clean_text:
symbol_id = self._symbol_to_id[symbol]
sequence += [symbol_id]
return sequence
def uromanize(self, text, uroman_pl):
iso = "xxx"
with tempfile.NamedTemporaryFile() as tf, \
tempfile.NamedTemporaryFile() as tf2:
with open(tf.name, "w") as f:
f.write("\n".join([text]))
cmd = f"perl " + uroman_pl
cmd += f" -l {iso} "
cmd += f" < {tf.name} > {tf2.name}"
os.system(cmd)
outtexts = []
with open(tf2.name) as f:
for line in f:
line = re.sub(r"\s+", " ", line).strip()
outtexts.append(line)
outtext = outtexts[0]
return outtext
def get_text(self, text, hps):
text_norm = self.text_to_sequence(text, hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def filter_oov(self, text, lang=None):
text = self.preprocess_char(text, lang=lang)
val_chars = self._symbol_to_id
txt_filt = "".join(list(filter(lambda x: x in val_chars, text)))
print(f"text after filtering OOV: {txt_filt}")
return txt_filt
def preprocess_char(self, text, lang=None):
"""
Special treatement of characters in certain languages
"""
if lang == "ron":
text = text.replace("ț", "ţ")
print(f"{lang} (ț -> ţ): {text}")
return text
def generate():
parser = argparse.ArgumentParser(description='TTS inference')
parser.add_argument('--model-dir', type=str, help='model checkpoint dir')
parser.add_argument('--wav', type=str, help='output wav path')
parser.add_argument('--txt', type=str, help='input text')
parser.add_argument('--uroman-dir', type=str, default=None, help='uroman lib dir (will download if not specified)')
parser.add_argument('--lang', type=str, default=None, help='language iso code (required for Romanian)')
args = parser.parse_args()
ckpt_dir, wav_path, txt = args.model_dir, args.wav, args.txt
if torch.cuda.is_available():
device = torch.device("cuda")
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = torch.device("mps")
else:
device = torch.device("cpu")
print(f"Run inference with {device}")
vocab_file = f"{ckpt_dir}/vocab.txt"
config_file = f"{ckpt_dir}/config.json"
assert os.path.isfile(config_file), f"{config_file} doesn't exist"
hps = utils.get_hparams_from_file(config_file)
text_mapper = TextMapper(vocab_file)
net_g = SynthesizerTrn(
len(text_mapper.symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model)
net_g.to(device)
_ = net_g.eval()
g_pth = f"{ckpt_dir}/G_100000.pth"
print(f"load {g_pth}")
_ = utils.load_checkpoint(g_pth, net_g, None)
print(f"text: {txt}")
is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'
if is_uroman:
with tempfile.TemporaryDirectory() as tmp_dir:
if args.uroman_dir is None:
cmd = f"git clone git@github.com:isi-nlp/uroman.git {tmp_dir}"
print(cmd)
subprocess.check_output(cmd, shell=True)
args.uroman_dir = tmp_dir
uroman_pl = os.path.join(args.uroman_dir, "bin", "uroman.pl")
print(f"uromanize")
txt = text_mapper.uromanize(txt, uroman_pl)
print(f"uroman text: {txt}")
txt = txt.lower()
txt = text_mapper.filter_oov(txt, lang=args.lang)
stn_tst = text_mapper.get_text(txt, hps)
with torch.no_grad():
x_tst = stn_tst.unsqueeze(0).to(device)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(device)
hyp = net_g.infer(
x_tst, x_tst_lengths, noise_scale=.667,
noise_scale_w=0.8, length_scale=1.0
)[0][0,0].cpu().float().numpy()
os.makedirs(os.path.dirname(wav_path), exist_ok=True)
print(f"wav: {wav_path}")
write(wav_path, hps.data.sampling_rate, hyp)
return
if __name__ == '__main__':
generate()