|
import os |
|
import numpy as np |
|
import tempfile |
|
from scipy.io import wavfile |
|
import gradio as gr |
|
from inference import EnsembleDemucsMDXMusicSeparationModel, predict_with_model |
|
import torch |
|
import librosa |
|
import librosa.display |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def check_file_readiness(filepath): |
|
num_same_size_checks = 0 |
|
last_size = -1 |
|
while num_same_size_checks < 5: |
|
current_size = os.path.getsize(filepath) |
|
if current_size == last_size: |
|
num_same_size_checks += 1 |
|
else: |
|
num_same_size_checks = 0 |
|
last_size = current_size |
|
time.sleep(0.5) |
|
return True |
|
|
|
|
|
def generate_spectrogram(audio_file_path): |
|
y, sr = librosa.load(audio_file_path) |
|
plt.figure(figsize=(10, 4)) |
|
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) |
|
librosa.display.specshow(librosa.power_to_db(S, ref=np.max), |
|
y_axis='mel', fmax=8000, x_axis='time') |
|
plt.colorbar(format='%+2.0f dB') |
|
plt.title('Mel spectrogram') |
|
plt.tight_layout() |
|
image_path = tempfile.mktemp('.png') |
|
plt.savefig(image_path) |
|
plt.close() |
|
return image_path |
|
|
|
|
|
def generate_spectrograms(audio_files): |
|
output_spectrograms = [] |
|
for audio_file in audio_files: |
|
output_spectrograms.append(generate_spectrogram(audio_file)) |
|
return tuple(output_spectrograms) |
|
|
|
|
|
def separate_music_file_wrapper(uploaded_files, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu): |
|
input_files = [uploaded_files.name] |
|
|
|
|
|
if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1): |
|
raise ValueError("重叠值必须在0到1之间。") |
|
|
|
|
|
if chunk_size <= 0: |
|
raise ValueError("块大小必须大于0。") |
|
|
|
options = { |
|
'input_audio': input_files, |
|
'output_folder': 'results', |
|
'cpu': use_cpu, |
|
'single_onnx': use_single_onnx, |
|
'overlap_large': large_overlap, |
|
'overlap_small': small_overlap, |
|
'chunk_size': chunk_size, |
|
'large_gpu': use_large_gpu, |
|
} |
|
|
|
predict_with_model(options) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
|
|
output_files = {} |
|
for f in input_files: |
|
audio_file_name = os.path.splitext(os.path.basename(f))[0] |
|
output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav") |
|
output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav") |
|
output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") |
|
output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav") |
|
output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav") |
|
output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav") |
|
|
|
|
|
output_files_ready = [] |
|
for k, v in output_files.items(): |
|
if os.path.exists(v) and check_file_readiness(v): |
|
output_files_ready.append(v) |
|
else: |
|
empty_data = np.zeros((44100, 2)) |
|
empty_file = tempfile.mktemp('.wav') |
|
wavfile.write(empty_file, 44100, empty_data.astype(np.int16)) |
|
output_files_ready.append(empty_file) |
|
|
|
|
|
output_spectrograms = generate_spectrograms(output_files_ready) |
|
|
|
print(len(output_files_ready)) |
|
print(len(output_spectrograms)) |
|
|
|
print("返回前") |
|
return tuple(output_files_ready) + output_spectrograms |
|
print("返回后") |
|
|
|
description = """ |
|
# ZFTurbo Web-UI |
|
Web-UI by [Ma5onic](https://github.com/Ma5onic) |
|
## 选项: |
|
- **仅使用CPU:** 如果GPU内存不足,请选择此项。速度会较慢。 |
|
- **使用单一ONNX模型:** 选择此项以使用单一ONNX模型。会稍微降低质量,但可以帮助减少GPU内存使用。 |
|
- **大块重叠:** 大块的重叠。根据需要调整。 |
|
- **小块重叠:** 小块的重叠。根据需要调整。 |
|
- **块大小:** 每次处理的块大小。如果遇到内存问题,请减少此值。 |
|
- **使用快速大GPU版本:** 选择此项以使用旧的快速方法,需要超过11GB的GPU内存。运行速度更快。 |
|
""" |
|
theme = gr.themes.Base( |
|
primary_hue="cyan", |
|
secondary_hue="cyan", |
|
) |
|
|
|
with gr.Blocks(theme=theme) as demo: |
|
gr.Markdown(description) |
|
uploaded_file = gr.File(label="上传音频文件", type="file") |
|
use_cpu = gr.Checkbox(label="仅使用CPU", value=True) |
|
use_single_onnx = gr.Checkbox(label="使用单一ONNX模型", value=False) |
|
large_overlap = gr.Number(label="大块重叠", value=0.6) |
|
small_overlap = gr.Number(label="小块重叠", value=0.5) |
|
chunk_size = gr.Number(label="块大小", value=1000000) |
|
use_large_gpu = gr.Checkbox(label="使用快速大GPU版本", value=False) |
|
process_button = gr.Button("处理音频") |
|
|
|
vocals = gr.Audio(label="人声") |
|
vocals_spectrogram = gr.Image(label="人声频谱图") |
|
instrumental = gr.Audio(label="伴奏") |
|
instrumental_spectrogram = gr.Image(label="伴奏频谱图") |
|
instrumental2 = gr.Audio(label="伴奏2") |
|
instrumental2_spectrogram = gr.Image(label="伴奏2频谱图") |
|
bass = gr.Audio(label="贝斯") |
|
bass_spectrogram = gr.Image(label="贝斯频谱图") |
|
drums = gr.Audio(label="鼓声") |
|
drums_spectrogram = gr.Image(label="鼓声频谱图") |
|
other = gr.Audio(label="其他") |
|
other_spectrogram = gr.Image(label="其他频谱图") |
|
|
|
process_button.click( |
|
separate_music_file_wrapper, |
|
inputs=[uploaded_file, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu], |
|
outputs=[vocals, instrumental, instrumental2, bass, drums, other, vocals_spectrogram, instrumental_spectrogram, instrumental2_spectrogram, bass_spectrogram, drums_spectrogram, other_spectrogram], |
|
) |
|
|
|
demo.queue().launch(debug=True, share=False) |
|
|