Spaces:
Runtime error
Runtime error
import os | |
import time | |
import numpy as np | |
import tempfile | |
from scipy.io import wavfile | |
from pytube import YouTube | |
import gradio as gr | |
from moviepy.editor import AudioFileClip | |
from inference import EnsembleDemucsMDXMusicSeparationModel, predict_with_model | |
import torch | |
import librosa | |
import librosa.display | |
import matplotlib.pyplot as plt | |
def download_youtube_video_as_wav(youtube_url): | |
output_dir = "downloads" | |
os.makedirs(output_dir, exist_ok=True) | |
output_file = os.path.join(output_dir, "temp.mp4") | |
try: | |
yt = YouTube(youtube_url) | |
yt.streams.filter(only_audio=True).first().download(filename=output_file) | |
print("Download completed successfully.") | |
except Exception as e: | |
print(f"An error occurred while downloading the video: {e}") | |
return None | |
# Convert mp4 audio to wav | |
wav_file = os.path.join(output_dir, "mixture.wav") | |
clip = AudioFileClip(output_file) | |
clip.write_audiofile(wav_file) | |
return wav_file | |
def check_file_readiness(filepath): | |
# If the loop finished, it means the file size has not changed for 5 seconds | |
# which indicates that the file is ready | |
num_same_size_checks = 0 | |
last_size = -1 | |
while num_same_size_checks < 5: | |
current_size = os.path.getsize(filepath) | |
if current_size == last_size: | |
num_same_size_checks += 1 | |
else: | |
num_same_size_checks = 0 | |
last_size = current_size | |
time.sleep(0.5) | |
return True | |
def generate_spectrogram(audio_file_path): | |
y, sr = librosa.load(audio_file_path) | |
plt.figure(figsize=(10, 4)) | |
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) | |
librosa.display.specshow(librosa.power_to_db(S, ref=np.max), | |
y_axis='mel', fmax=8000, x_axis='time') | |
plt.colorbar(format='%+2.0f dB') | |
plt.title('Mel spectrogram') | |
plt.tight_layout() | |
image_path = tempfile.mktemp('.png') | |
plt.savefig(image_path) | |
plt.close() | |
return image_path | |
def generate_spectrograms(audio_files): | |
output_spectrograms = [] | |
for audio_file in audio_files: | |
output_spectrograms.append(generate_spectrogram(audio_file)) | |
return tuple(output_spectrograms) | |
def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu): | |
input_files = [] | |
# Validate YouTube URL or directory path | |
if input_string.startswith("https://www.youtube.com") or input_string.startswith("https://youtu.be"): | |
output_file = download_youtube_video_as_wav(input_string) | |
if output_file is not None: | |
input_files.append(output_file) | |
elif os.path.isdir(input_string): | |
input_directory = input_string | |
input_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('.wav')] | |
else: | |
raise ValueError("Invalid input! Please provide a valid YouTube link or a directory path.") | |
# Validate overlap values | |
if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1): | |
raise ValueError("Overlap values must be between 0 and 1.") | |
# Validate chunk size | |
if chunk_size <= 0: | |
raise ValueError("Chunk size must be greater than 0.") # not thicc enough | |
options = { | |
'input_audio': input_files, | |
'output_folder': 'results', | |
'cpu': use_cpu, | |
'single_onnx': use_single_onnx, | |
'overlap_large': large_overlap, | |
'overlap_small': small_overlap, | |
'chunk_size': chunk_size, | |
'large_gpu': use_large_gpu, | |
} | |
predict_with_model(options) | |
# Clear GPU cache | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
output_files = {} | |
for f in input_files: | |
audio_file_name = os.path.splitext(os.path.basename(f))[0] | |
output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav") | |
output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav") | |
output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # For the second instrumental output | |
output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav") | |
output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav") | |
output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav") | |
# Check the readiness of the files | |
output_files_ready = [] | |
for k, v in output_files.items(): | |
if os.path.exists(v) and check_file_readiness(v): | |
output_files_ready.append(v) | |
else: | |
empty_data = np.zeros((44100, 2)) # 2 channels, 1 second of silence at 44100Hz | |
empty_file = tempfile.mktemp('.wav') | |
wavfile.write(empty_file, 44100, empty_data.astype(np.int16)) # Cast to int16 as wavfile does not support float32 | |
output_files_ready.append(empty_file) | |
# Generate spectrograms right after separating the audio | |
output_spectrograms = generate_spectrograms(output_files_ready) | |
print(len(output_files_ready)) # should print 6 | |
print(len(output_spectrograms)) # should print 6 | |
print("Before return") | |
return tuple(output_files_ready) + output_spectrograms | |
print("After return") | |
description = """ | |
# ZFTurbo Web-UI | |
Web-UI by [Ma5onic](https://github.com/Ma5onic) | |
## Options: | |
- **Use CPU Only:** Select this if you have not enough GPU memory. It will be slower. | |
- **Use Single ONNX:** Select this to use a single ONNX model. It will decrease quality a little bit but can help with GPU memory usage. | |
- **Large Overlap:** The overlap for large chunks. Adjust as needed. | |
- **Small Overlap:** The overlap for small chunks. Adjust as needed. | |
- **Chunk Size:** The size of chunks to be processed at a time. Reduce this if facing memory issues. | |
- **Use Fast Large GPU Version:** Select this to use the old fast method that requires > 11 GB of GPU memory. It will work faster. | |
""" | |
theme = gr.themes.Base( | |
primary_hue="cyan", | |
secondary_hue="cyan", | |
) | |
with gr.Blocks(theme=theme) as demo: | |
gr.Markdown(description) | |
input_string = gr.Text(label="YouTube Link/URL") | |
use_cpu = gr.Checkbox(label="Use CPU Only", value=True) | |
use_single_onnx = gr.Checkbox(label="Use Single ONNX", value=False) | |
large_overlap = gr.Number(label="Large Overlap", value=0.6) | |
small_overlap = gr.Number(label="Small Overlap", value=0.5) | |
chunk_size = gr.Number(label="Chunk Size", value=1000000) | |
use_large_gpu = gr.Checkbox(label="Use Fast Large GPU Version", value=False) | |
process_button = gr.Button("Process Audio") | |
vocals = gr.Audio(label="Vocals") | |
vocals_spectrogram = gr.Image(label="Vocals Spectrogram") | |
instrumental = gr.Audio(label="Instrumental") | |
instrumental_spectrogram = gr.Image(label="Instrumental Spectrogram") | |
instrumental2 = gr.Audio(label="Instrumental 2") | |
instrumental2_spectrogram = gr.Image(label="Instrumental 2 Spectrogram") | |
bass = gr.Audio(label="Bass") | |
bass_spectrogram = gr.Image(label="Bass Spectrogram") | |
drums = gr.Audio(label="Drums") | |
drums_spectrogram = gr.Image(label="Drums Spectrogram") | |
other = gr.Audio(label="Other") | |
other_spectrogram = gr.Image(label="Other Spectrogram") | |
process_button.click( | |
separate_music_file_wrapper, | |
inputs=[input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu], | |
outputs=[vocals, instrumental, instrumental2, bass, drums, other, vocals_spectrogram, instrumental_spectrogram, instrumental2_spectrogram, bass_spectrogram, drums_spectrogram, other_spectrogram], | |
) | |
demo.queue().launch(debug=True, share=False) | |