import os
import time
import numpy as np
import tempfile
from scipy.io import wavfile
from pytube import YouTube
import gradio as gr
from moviepy.editor import AudioFileClip
from inference import EnsembleDemucsMDXMusicSeparationModel, predict_with_model
import torch
import librosa
import librosa.display
import matplotlib.pyplot as plt


def download_youtube_video_as_wav(youtube_url):
    output_dir = "downloads"
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, "temp.mp4")

    try:
        yt = YouTube(youtube_url)
        yt.streams.filter(only_audio=True).first().download(filename=output_file)
        print("Download completed successfully.")
    except Exception as e:
        print(f"An error occurred while downloading the video: {e}")
        return None

    # Convert mp4 audio to wav
    wav_file = os.path.join(output_dir, "mixture.wav")
    clip = AudioFileClip(output_file)
    clip.write_audiofile(wav_file)

    return wav_file


def check_file_readiness(filepath):
    # If the loop finished, it means the file size has not changed for 5 seconds
    # which indicates that the file is ready
    num_same_size_checks = 0
    last_size = -1
    while num_same_size_checks < 5:
        current_size = os.path.getsize(filepath)
        if current_size == last_size:
            num_same_size_checks += 1
        else:
            num_same_size_checks = 0
            last_size = current_size
        time.sleep(0.5)
    return True


def generate_spectrogram(audio_file_path):
    y, sr = librosa.load(audio_file_path)
    plt.figure(figsize=(10, 4))
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
                             y_axis='mel', fmax=8000, x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')
    plt.tight_layout()
    image_path = tempfile.mktemp('.png')
    plt.savefig(image_path)
    plt.close()
    return image_path


def generate_spectrograms(audio_files):
    output_spectrograms = []
    for audio_file in audio_files:
        output_spectrograms.append(generate_spectrogram(audio_file))
    return tuple(output_spectrograms)


def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu):
    input_files = []
    # Validate YouTube URL or directory path
    if input_string.startswith("https://www.youtube.com") or input_string.startswith("https://youtu.be"):
        output_file = download_youtube_video_as_wav(input_string)
        if output_file is not None:
            input_files.append(output_file)
    elif os.path.isdir(input_string):
        input_directory = input_string
        input_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('.wav')]
    else:
        raise ValueError("Invalid input! Please provide a valid YouTube link or a directory path.")

    # Validate overlap values
    if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1):
        raise ValueError("Overlap values must be between 0 and 1.")

    # Validate chunk size
    if chunk_size <= 0:
        raise ValueError("Chunk size must be greater than 0.") # not thicc enough

    options = {
        'input_audio': input_files,
        'output_folder': 'results',
        'cpu': use_cpu,
        'single_onnx': use_single_onnx,
        'overlap_large': large_overlap,
        'overlap_small': small_overlap,
        'chunk_size': chunk_size,
        'large_gpu': use_large_gpu,
    }

    predict_with_model(options)

    # Clear GPU cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    output_files = {}
    for f in input_files:
        audio_file_name = os.path.splitext(os.path.basename(f))[0]
        output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav")
        output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav")
        output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # For the second instrumental output
        output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav")
        output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav")
        output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav")

    # Check the readiness of the files
    output_files_ready = []
    for k, v in output_files.items():
        if os.path.exists(v) and check_file_readiness(v):
            output_files_ready.append(v)
        else:
            empty_data = np.zeros((44100, 2)) # 2 channels, 1 second of silence at 44100Hz
            empty_file = tempfile.mktemp('.wav')
            wavfile.write(empty_file, 44100, empty_data.astype(np.int16))  # Cast to int16 as wavfile does not support float32
            output_files_ready.append(empty_file)
    
    # Generate spectrograms right after separating the audio
    output_spectrograms = generate_spectrograms(output_files_ready)

    print(len(output_files_ready)) # should print 6
    print(len(output_spectrograms)) # should print 6

    print("Before return")
    return tuple(output_files_ready) + output_spectrograms
    print("After return") 


description = """
# ZFTurbo Web-UI
Web-UI by [Ma5onic](https://github.com/Ma5onic)
## Options:
- **Use CPU Only:** Select this if you have not enough GPU memory. It will be slower.
- **Use Single ONNX:** Select this to use a single ONNX model. It will decrease quality a little bit but can help with GPU memory usage.
- **Large Overlap:** The overlap for large chunks. Adjust as needed.
- **Small Overlap:** The overlap for small chunks. Adjust as needed.
- **Chunk Size:** The size of chunks to be processed at a time. Reduce this if facing memory issues.
- **Use Fast Large GPU Version:** Select this to use the old fast method that requires > 11 GB of GPU memory. It will work faster.
"""
theme = gr.themes.Base(
    primary_hue="cyan",
    secondary_hue="cyan",
)

with gr.Blocks(theme=theme) as demo:
    gr.Markdown(description)
    input_string = gr.Text(label="YouTube Link/URL")
    use_cpu = gr.Checkbox(label="Use CPU Only", value=True)
    use_single_onnx = gr.Checkbox(label="Use Single ONNX", value=False)
    large_overlap = gr.Number(label="Large Overlap", value=0.6)
    small_overlap = gr.Number(label="Small Overlap", value=0.5)
    chunk_size = gr.Number(label="Chunk Size", value=1000000)
    use_large_gpu = gr.Checkbox(label="Use Fast Large GPU Version", value=False)    
    process_button = gr.Button("Process Audio")

    vocals = gr.Audio(label="Vocals")
    vocals_spectrogram = gr.Image(label="Vocals Spectrogram")
    instrumental = gr.Audio(label="Instrumental")
    instrumental_spectrogram = gr.Image(label="Instrumental Spectrogram")
    instrumental2 = gr.Audio(label="Instrumental 2")
    instrumental2_spectrogram = gr.Image(label="Instrumental 2 Spectrogram")
    bass = gr.Audio(label="Bass")
    bass_spectrogram = gr.Image(label="Bass Spectrogram")
    drums = gr.Audio(label="Drums")
    drums_spectrogram = gr.Image(label="Drums Spectrogram")
    other = gr.Audio(label="Other")
    other_spectrogram = gr.Image(label="Other Spectrogram")
    
    process_button.click(
        separate_music_file_wrapper,
        inputs=[input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu],
        outputs=[vocals, instrumental, instrumental2, bass, drums, other, vocals_spectrogram, instrumental_spectrogram, instrumental2_spectrogram, bass_spectrogram, drums_spectrogram, other_spectrogram],
    )

demo.queue().launch(debug=True, share=False)