ssl-aasist / fairseq /examples /emotion_conversion /preprocess /build_hifigan_manifest.py

Add files using upload-large-folder tool

010952f verified 3 months ago

1.97 kB

	import torchaudio
	import argparse
	import json

	def main():
	parser = argparse.ArgumentParser(description="example: python create_hifigan_manifest.py --tsv /checkpoint/felixkreuk/datasets/vctk/splits/vctk_16khz/train.tsv --km /checkpoint/felixkreuk/experiments/hubert/hubert_feats/vctk_16khz_km_100/train.km --km_type hubert_100km > ~/tmp/tmp_mani.txt")
	parser.add_argument("--tsv", required=True, help="path to fairseq tsv file")
	parser.add_argument("--km", required=True, help="path to a km file generated by HuBERT clustering")
	parser.add_argument("--km_type", required=True, help="name of the codes in the output json (for example: 'cpc_100km')")
	args = parser.parse_args()

	km_lines = open(args.km, "r").readlines()
	tsv_lines = open(args.tsv, "r").readlines()
	assert len(km_lines) == len(tsv_lines) - 1, "tsv and km files are not of the same length!"

	wav_root = tsv_lines[0].strip()
	tsv_lines = tsv_lines[1:]

	for tsv_line, km_line in zip(tsv_lines, km_lines):
	tsv_line, km_line = tsv_line.strip(), km_line.strip()
	wav_basename, wav_num_frames = tsv_line.split("\t")
	wav_path = wav_root + "/" + wav_basename
	wav_info = torchaudio.info(wav_path)
	assert int(wav_num_frames) == wav_info.num_frames, "tsv duration and actual duration don't match!"
	wav_duration = wav_info.num_frames / wav_info.sample_rate
	manifest_line = {"audio": wav_path, "duration": wav_duration, args.km_type: km_line}
	print(json.dumps(manifest_line))

	if __name__ == "__main__":
	"""
	usage:
	python create_hifigan_manifest.py \
	--tsv /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/valid.tsv \
	--km /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/hubert_km_100/valid.km \
	--km_type hubert \
	> /checkpoint/felixkreuk/datasets/vctk/manifests/vctk_16khz/hubert_km_100/hifigan_valid_manifest.txt
	"""
	main()