PyTorch
ssl-aasist
custom_code
ash56's picture
Add files using upload-large-folder tool
010952f verified
raw
history blame
1.97 kB
import torchaudio
import argparse
import json
def main():
parser = argparse.ArgumentParser(description="example: python create_hifigan_manifest.py --tsv /checkpoint/felixkreuk/datasets/vctk/splits/vctk_16khz/train.tsv --km /checkpoint/felixkreuk/experiments/hubert/hubert_feats/vctk_16khz_km_100/train.km --km_type hubert_100km > ~/tmp/tmp_mani.txt")
parser.add_argument("--tsv", required=True, help="path to fairseq tsv file")
parser.add_argument("--km", required=True, help="path to a km file generated by HuBERT clustering")
parser.add_argument("--km_type", required=True, help="name of the codes in the output json (for example: 'cpc_100km')")
args = parser.parse_args()
km_lines = open(args.km, "r").readlines()
tsv_lines = open(args.tsv, "r").readlines()
assert len(km_lines) == len(tsv_lines) - 1, "tsv and km files are not of the same length!"
wav_root = tsv_lines[0].strip()
tsv_lines = tsv_lines[1:]
for tsv_line, km_line in zip(tsv_lines, km_lines):
tsv_line, km_line = tsv_line.strip(), km_line.strip()
wav_basename, wav_num_frames = tsv_line.split("\t")
wav_path = wav_root + "/" + wav_basename
wav_info = torchaudio.info(wav_path)
assert int(wav_num_frames) == wav_info.num_frames, "tsv duration and actual duration don't match!"
wav_duration = wav_info.num_frames / wav_info.sample_rate
manifest_line = {"audio": wav_path, "duration": wav_duration, args.km_type: km_line}
print(json.dumps(manifest_line))
if __name__ == "__main__":
"""
usage:
python create_hifigan_manifest.py \
--tsv /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/valid.tsv \
--km /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/hubert_km_100/valid.km \
--km_type hubert \
> /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/hubert_km_100/hifigan_valid_manifest.txt
"""
main()