MVSEP-MDX23-music-separation-model

Running

App Files Files Community

Yeluo0204 commited on Nov 4, 2024

Commit

1a492ad

verified ·

1 Parent(s): c6ed663

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -56

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import librosa
 import librosa.display
 import matplotlib.pyplot as plt
 def download_youtube_video_as_wav(youtube_url):
     output_dir = "downloads"
     os.makedirs(output_dir, exist_ok=True)
@@ -21,22 +21,20 @@ def download_youtube_video_as_wav(youtube_url):
     try:
         yt = YouTube(youtube_url)
         yt.streams.filter(only_audio=True).first().download(filename=output_file)
-        print("Download completed successfully.")
     except Exception as e:
-        print(f"An error occurred while downloading the video: {e}")
         return None
-    # Convert mp4 audio to wav
     wav_file = os.path.join(output_dir, "mixture.wav")
     clip = AudioFileClip(output_file)
     clip.write_audiofile(wav_file)
     return wav_file
 def check_file_readiness(filepath):
-    # If the loop finished, it means the file size has not changed for 5 seconds
-    # which indicates that the file is ready
     num_same_size_checks = 0
     last_size = -1
     while num_same_size_checks < 5:
@@ -49,7 +47,7 @@ def check_file_readiness(filepath):
         time.sleep(0.5)
     return True
 def generate_spectrogram(audio_file_path):
     y, sr = librosa.load(audio_file_path)
     plt.figure(figsize=(10, 4))
@@ -64,17 +62,17 @@ def generate_spectrogram(audio_file_path):
     plt.close()
     return image_path
 def generate_spectrograms(audio_files):
     output_spectrograms = []
     for audio_file in audio_files:
         output_spectrograms.append(generate_spectrogram(audio_file))
     return tuple(output_spectrograms)
 def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu):
     input_files = []
-    # Validate YouTube URL or directory path
     if input_string.startswith("https://www.youtube.com") or input_string.startswith("https://youtu.be"):
         output_file = download_youtube_video_as_wav(input_string)
         if output_file is not None:
@@ -83,15 +81,15 @@ def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_ov
         input_directory = input_string
         input_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('.wav')]
     else:
-        raise ValueError("Invalid input! Please provide a valid YouTube link or a directory path.")
-    # Validate overlap values
     if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1):
-        raise ValueError("Overlap values must be between 0 and 1.")
-    # Validate chunk size
     if chunk_size <= 0:
-        raise ValueError("Chunk size must be greater than 0.") # not thicc enough
     options = {
         'input_audio': input_files,
@@ -106,7 +104,7 @@ def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_ov
     predict_with_model(options)
-    # Clear GPU cache
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
@@ -115,43 +113,42 @@ def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_ov
         audio_file_name = os.path.splitext(os.path.basename(f))[0]
         output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav")
         output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav")
-        output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # For the second instrumental output
         output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav")
         output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav")
         output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav")
-    # Check the readiness of the files
     output_files_ready = []
     for k, v in output_files.items():
         if os.path.exists(v) and check_file_readiness(v):
             output_files_ready.append(v)
         else:
-            empty_data = np.zeros((44100, 2)) # 2 channels, 1 second of silence at 44100Hz
             empty_file = tempfile.mktemp('.wav')
-            wavfile.write(empty_file, 44100, empty_data.astype(np.int16))  # Cast to int16 as wavfile does not support float32
             output_files_ready.append(empty_file)
-    # Generate spectrograms right after separating the audio
     output_spectrograms = generate_spectrograms(output_files_ready)
-    print(len(output_files_ready)) # should print 6
-    print(len(output_spectrograms)) # should print 6
-    print("Before return")
     return tuple(output_files_ready) + output_spectrograms
-    print("After return")
 description = """
 # ZFTurbo Web-UI
 Web-UI by [Ma5onic](https://github.com/Ma5onic)
-## Options:
-- **Use CPU Only:** Select this if you have not enough GPU memory. It will be slower.
-- **Use Single ONNX:** Select this to use a single ONNX model. It will decrease quality a little bit but can help with GPU memory usage.
-- **Large Overlap:** The overlap for large chunks. Adjust as needed.
-- **Small Overlap:** The overlap for small chunks. Adjust as needed.
-- **Chunk Size:** The size of chunks to be processed at a time. Reduce this if facing memory issues.
-- **Use Fast Large GPU Version:** Select this to use the old fast method that requires > 11 GB of GPU memory. It will work faster.
 """
 theme = gr.themes.Base(
     primary_hue="cyan",
@@ -160,27 +157,27 @@ theme = gr.themes.Base(
 with gr.Blocks(theme=theme) as demo:
     gr.Markdown(description)
-    input_string = gr.Text(label="YouTube Link/URL")
-    use_cpu = gr.Checkbox(label="Use CPU Only", value=True)
-    use_single_onnx = gr.Checkbox(label="Use Single ONNX", value=False)
-    large_overlap = gr.Number(label="Large Overlap", value=0.6)
-    small_overlap = gr.Number(label="Small Overlap", value=0.5)
-    chunk_size = gr.Number(label="Chunk Size", value=1000000)
-    use_large_gpu = gr.Checkbox(label="Use Fast Large GPU Version", value=False)
-    process_button = gr.Button("Process Audio")
-    vocals = gr.Audio(label="Vocals")
-    vocals_spectrogram = gr.Image(label="Vocals Spectrogram")
-    instrumental = gr.Audio(label="Instrumental")
-    instrumental_spectrogram = gr.Image(label="Instrumental Spectrogram")
-    instrumental2 = gr.Audio(label="Instrumental 2")
-    instrumental2_spectrogram = gr.Image(label="Instrumental 2 Spectrogram")
-    bass = gr.Audio(label="Bass")
-    bass_spectrogram = gr.Image(label="Bass Spectrogram")
-    drums = gr.Audio(label="Drums")
-    drums_spectrogram = gr.Image(label="Drums Spectrogram")
-    other = gr.Audio(label="Other")
-    other_spectrogram = gr.Image(label="Other Spectrogram")
     process_button.click(
         separate_music_file_wrapper,

 import librosa.display
 import matplotlib.pyplot as plt
+# 下载YouTube视频并转换为WAV格式
 def download_youtube_video_as_wav(youtube_url):
     output_dir = "downloads"
     os.makedirs(output_dir, exist_ok=True)
     try:
         yt = YouTube(youtube_url)
         yt.streams.filter(only_audio=True).first().download(filename=output_file)
+        print("下载成功。")
     except Exception as e:
+        print(f"下载视频时发生错误: {e}")
         return None
+    # 将mp4音频转换为wav
     wav_file = os.path.join(output_dir, "mixture.wav")
     clip = AudioFileClip(output_file)
     clip.write_audiofile(wav_file)
     return wav_file
+# 检查文件是否准备好
 def check_file_readiness(filepath):
     num_same_size_checks = 0
     last_size = -1
     while num_same_size_checks < 5:
         time.sleep(0.5)
     return True
+# 生成音频的频谱图
 def generate_spectrogram(audio_file_path):
     y, sr = librosa.load(audio_file_path)
     plt.figure(figsize=(10, 4))
     plt.close()
     return image_path
+# 为多个音频文件生成频谱图
 def generate_spectrograms(audio_files):
     output_spectrograms = []
     for audio_file in audio_files:
         output_spectrograms.append(generate_spectrogram(audio_file))
     return tuple(output_spectrograms)
+# 音乐分离的包装函数
 def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu):
     input_files = []
+    # 验证YouTube URL或目录路径
     if input_string.startswith("https://www.youtube.com") or input_string.startswith("https://youtu.be"):
         output_file = download_youtube_video_as_wav(input_string)
         if output_file is not None:
         input_directory = input_string
         input_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('.wav')]
     else:
+        raise ValueError("无效输入！请输入有效的YouTube链接或目录路径。")
+    # 验证重叠值
     if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1):
+        raise ValueError("重叠值必须在0到1之间。")
+    # 验证块大小
     if chunk_size <= 0:
+        raise ValueError("块大小必须大于0。") # 太小了
     options = {
         'input_audio': input_files,
     predict_with_model(options)
+    # 清除GPU缓存
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         audio_file_name = os.path.splitext(os.path.basename(f))[0]
         output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav")
         output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav")
+        output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # 第二个伴奏输出
         output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav")
         output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav")
         output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav")
+    # 检查文件是否准备好
     output_files_ready = []
     for k, v in output_files.items():
         if os.path.exists(v) and check_file_readiness(v):
             output_files_ready.append(v)
         else:
+            empty_data = np.zeros((44100, 2)) # 2个声道，1秒的静音，采样率为44100Hz
             empty_file = tempfile.mktemp('.wav')
+            wavfile.write(empty_file, 44100, empty_data.astype(np.int16))  # 转换为int16，因为wavfile不支持float32
             output_files_ready.append(empty_file)
+    # 在分离音频后立即生成频谱图
     output_spectrograms = generate_spectrograms(output_files_ready)
+    print(len(output_files_ready)) # 应输出6
+    print(len(output_spectrograms)) # 应输出6
+    print("返回前")
     return tuple(output_files_ready) + output_spectrograms
+    print("返回后")
 description = """
 # ZFTurbo Web-UI
 Web-UI by [Ma5onic](https://github.com/Ma5onic)
+## 选项:
+- **仅使用CPU:** 如果GPU内存不足，请选择此项。速度会较慢。
+- **使用单一ONNX模型:** 选择此项以使用单一ONNX模型。会稍微降低质量，但可以帮助减少GPU内存使用。
+- **大块重叠:** 大块的重叠。根据需要调整。
+- **小块重叠:** 小块的重叠。根据需要调整。
+- **块大小:** 每次处理的块大小。如果遇到内存问题，请减少此值。
+- **使用快速大GPU版本:** 选择此项以使用旧的快速方法，需要超过11GB的GPU内存。运行速度更快。
 """
 theme = gr.themes.Base(
     primary_hue="cyan",
 with gr.Blocks(theme=theme) as demo:
     gr.Markdown(description)
+    input_string = gr.Text(label="YouTube链接/URL")
+    use_cpu = gr.Checkbox(label="仅使用CPU", value=True)
+    use_single_onnx = gr.Checkbox(label="使用单一ONNX模型", value=False)
+    large_overlap = gr.Number(label="大块重叠", value=0.6)
+    small_overlap = gr.Number(label="小块重叠", value=0.5)
+    chunk_size = gr.Number(label="块大小", value=1000000)
+    use_large_gpu = gr.Checkbox(label="使用快速大GPU版本", value=False)
+    process_button = gr.Button("处理音频")
+    vocals = gr.Audio(label="人声")
+    vocals_spectrogram = gr.Image(label="人声频谱图")
+    instrumental = gr.Audio(label="伴奏")
+    instrumental_spectrogram = gr.Image(label="伴奏频谱图")
+    instrumental2 = gr.Audio(label="伴奏2")
+    instrumental2_spectrogram = gr.Image(label="伴奏2频谱图")
+    bass = gr.Audio(label="贝斯")
+    bass_spectrogram = gr.Image(label="贝斯频谱图")
+    drums = gr.Audio(label="鼓声")
+    drums_spectrogram = gr.Image(label="鼓声频谱图")
+    other = gr.Audio(label="其他")
+    other_spectrogram = gr.Image(label="其他频谱图")
     process_button.click(
         separate_music_file_wrapper,