import argparse from tqdm import tqdm from multiprocessing import Manager, Pool from scipy.io.wavfile import read from librosa.util import normalize import numpy as np import amfm_decompy.pYAAPT as pYAAPT import amfm_decompy.basic_tools as basic MAX_WAV_VALUE = 32768.0 parser = argparse.ArgumentParser(description="") parser.add_argument("tsv", help="") parser.add_argument("--extractor", choices=["crepe", "pyaapt"], default="pyaapt", help="") parser.add_argument("--interp", action="store_true", help="") parser.add_argument("--n_workers", type=int, default=40, help="") args = parser.parse_args() tsv_lines = open(args.tsv, "r").readlines() root, tsv_lines = tsv_lines[0].strip(), tsv_lines[1:] def extract_f0(tsv_line): wav_path, _ = tsv_line.split("\t") wav_path = root.strip() + "/" + wav_path sr, wav = read(wav_path) wav = wav / MAX_WAV_VALUE wav = normalize(wav) * 0.95 if args.extractor == "pyaapt": frame_length = 20.0 pad = int(frame_length / 1000 * sr) // 2 wav = np.pad(wav.squeeze(), (pad, pad), "constant", constant_values=0) signal = basic.SignalObj(wav, sr) pitch = pYAAPT.yaapt( signal, **{ 'frame_length': frame_length, 'frame_space': 5.0, 'nccf_thresh1': 0.25, 'tda_frame_length': 25.0 }) pitch = pitch.samp_interp[None, None, :] if args.interp else pitch.samp_values[None, None, :] pitch = pitch[0, 0] f0_path = wav_path.replace(".wav", ".yaapt") f0_path += ".interp.f0" if args.interp else ".f0" np.save(f0_path, pitch) def main(): with Pool(args.n_workers) as p: r = list(tqdm(p.imap(extract_f0, tsv_lines), total=len(tsv_lines))) if __name__ == "__main__": main()