Yeluo0204's picture
Update app.py
0af3599 verified
raw
history blame
6.39 kB
import os
import numpy as np
import tempfile
from scipy.io import wavfile
import gradio as gr
from inference import EnsembleDemucsMDXMusicSeparationModel, predict_with_model
import torch
import librosa
import librosa.display
import matplotlib.pyplot as plt
# 检查文件是否准备好
def check_file_readiness(filepath):
num_same_size_checks = 0
last_size = -1
while num_same_size_checks < 5:
current_size = os.path.getsize(filepath)
if current_size == last_size:
num_same_size_checks += 1
else:
num_same_size_checks = 0
last_size = current_size
time.sleep(0.5)
return True
# 生成音频的频谱图
def generate_spectrogram(audio_file_path):
y, sr = librosa.load(audio_file_path)
plt.figure(figsize=(10, 4))
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
y_axis='mel', fmax=8000, x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
image_path = tempfile.mktemp('.png')
plt.savefig(image_path)
plt.close()
return image_path
# 为多个音频文件生成频谱图
def generate_spectrograms(audio_files):
output_spectrograms = []
for audio_file in audio_files:
output_spectrograms.append(generate_spectrogram(audio_file))
return tuple(output_spectrograms)
# 音乐分离的包装函数
def separate_music_file_wrapper(uploaded_files, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu):
input_files = [uploaded_files.name] # 获取上传文件的路径
# 验证重叠值
if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1):
raise ValueError("重叠值必须在0到1之间。")
# 验证块大小
if chunk_size <= 0:
raise ValueError("块大小必须大于0。") # 太小了
options = {
'input_audio': input_files,
'output_folder': 'results',
'cpu': use_cpu,
'single_onnx': use_single_onnx,
'overlap_large': large_overlap,
'overlap_small': small_overlap,
'chunk_size': chunk_size,
'large_gpu': use_large_gpu,
}
predict_with_model(options)
# 清除GPU缓存
if torch.cuda.is_available():
torch.cuda.empty_cache()
output_files = {}
for f in input_files:
audio_file_name = os.path.splitext(os.path.basename(f))[0]
output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav")
output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav")
output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # 第二个伴奏输出
output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav")
output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav")
output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav")
# 检查文件是否准备好
output_files_ready = []
for k, v in output_files.items():
if os.path.exists(v) and check_file_readiness(v):
output_files_ready.append(v)
else:
empty_data = np.zeros((44100, 2)) # 2个声道,1秒的静音,采样率为44100Hz
empty_file = tempfile.mktemp('.wav')
wavfile.write(empty_file, 44100, empty_data.astype(np.int16)) # 转换为int16,因为wavfile不支持float32
output_files_ready.append(empty_file)
# 在分离音频后立即生成频谱图
output_spectrograms = generate_spectrograms(output_files_ready)
print(len(output_files_ready)) # 应输出6
print(len(output_spectrograms)) # 应输出6
print("返回前")
return tuple(output_files_ready) + output_spectrograms
print("返回后")
description = """
# ZFTurbo Web-UI
Web-UI by [Ma5onic](https://github.com/Ma5onic)
## 选项:
- **仅使用CPU:** 如果GPU内存不足,请选择此项。速度会较慢。
- **使用单一ONNX模型:** 选择此项以使用单一ONNX模型。会稍微降低质量,但可以帮助减少GPU内存使用。
- **大块重叠:** 大块的重叠。根据需要调整。
- **小块重叠:** 小块的重叠。根据需要调整。
- **块大小:** 每次处理的块大小。如果遇到内存问题,请减少此值。
- **使用快速大GPU版本:** 选择此项以使用旧的快速方法,需要超过11GB的GPU内存。运行速度更快。
"""
theme = gr.themes.Base(
primary_hue="cyan",
secondary_hue="cyan",
)
with gr.Blocks(theme=theme) as demo:
gr.Markdown(description)
uploaded_file = gr.File(label="上传音频文件", type="file")
use_cpu = gr.Checkbox(label="仅使用CPU", value=True)
use_single_onnx = gr.Checkbox(label="使用单一ONNX模型", value=False)
large_overlap = gr.Number(label="大块重叠", value=0.6)
small_overlap = gr.Number(label="小块重叠", value=0.5)
chunk_size = gr.Number(label="块大小", value=1000000)
use_large_gpu = gr.Checkbox(label="使用快速大GPU版本", value=False)
process_button = gr.Button("处理音频")
vocals = gr.Audio(label="人声")
vocals_spectrogram = gr.Image(label="人声频谱图")
instrumental = gr.Audio(label="伴奏")
instrumental_spectrogram = gr.Image(label="伴奏频谱图")
instrumental2 = gr.Audio(label="伴奏2")
instrumental2_spectrogram = gr.Image(label="伴奏2频谱图")
bass = gr.Audio(label="贝斯")
bass_spectrogram = gr.Image(label="贝斯频谱图")
drums = gr.Audio(label="鼓声")
drums_spectrogram = gr.Image(label="鼓声频谱图")
other = gr.Audio(label="其他")
other_spectrogram = gr.Image(label="其他频谱图")
process_button.click(
separate_music_file_wrapper,
inputs=[uploaded_file, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu],
outputs=[vocals, instrumental, instrumental2, bass, drums, other, vocals_spectrogram, instrumental_spectrogram, instrumental2_spectrogram, bass_spectrogram, drums_spectrogram, other_spectrogram],
)
demo.queue().launch(debug=True, share=False)