Yeluo0204 commited on
Commit
1a492ad
·
verified ·
1 Parent(s): c6ed663

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -56
app.py CHANGED
@@ -12,7 +12,7 @@ import librosa
12
  import librosa.display
13
  import matplotlib.pyplot as plt
14
 
15
-
16
  def download_youtube_video_as_wav(youtube_url):
17
  output_dir = "downloads"
18
  os.makedirs(output_dir, exist_ok=True)
@@ -21,22 +21,20 @@ def download_youtube_video_as_wav(youtube_url):
21
  try:
22
  yt = YouTube(youtube_url)
23
  yt.streams.filter(only_audio=True).first().download(filename=output_file)
24
- print("Download completed successfully.")
25
  except Exception as e:
26
- print(f"An error occurred while downloading the video: {e}")
27
  return None
28
 
29
- # Convert mp4 audio to wav
30
  wav_file = os.path.join(output_dir, "mixture.wav")
31
  clip = AudioFileClip(output_file)
32
  clip.write_audiofile(wav_file)
33
 
34
  return wav_file
35
 
36
-
37
  def check_file_readiness(filepath):
38
- # If the loop finished, it means the file size has not changed for 5 seconds
39
- # which indicates that the file is ready
40
  num_same_size_checks = 0
41
  last_size = -1
42
  while num_same_size_checks < 5:
@@ -49,7 +47,7 @@ def check_file_readiness(filepath):
49
  time.sleep(0.5)
50
  return True
51
 
52
-
53
  def generate_spectrogram(audio_file_path):
54
  y, sr = librosa.load(audio_file_path)
55
  plt.figure(figsize=(10, 4))
@@ -64,17 +62,17 @@ def generate_spectrogram(audio_file_path):
64
  plt.close()
65
  return image_path
66
 
67
-
68
  def generate_spectrograms(audio_files):
69
  output_spectrograms = []
70
  for audio_file in audio_files:
71
  output_spectrograms.append(generate_spectrogram(audio_file))
72
  return tuple(output_spectrograms)
73
 
74
-
75
  def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu):
76
  input_files = []
77
- # Validate YouTube URL or directory path
78
  if input_string.startswith("https://www.youtube.com") or input_string.startswith("https://youtu.be"):
79
  output_file = download_youtube_video_as_wav(input_string)
80
  if output_file is not None:
@@ -83,15 +81,15 @@ def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_ov
83
  input_directory = input_string
84
  input_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('.wav')]
85
  else:
86
- raise ValueError("Invalid input! Please provide a valid YouTube link or a directory path.")
87
 
88
- # Validate overlap values
89
  if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1):
90
- raise ValueError("Overlap values must be between 0 and 1.")
91
 
92
- # Validate chunk size
93
  if chunk_size <= 0:
94
- raise ValueError("Chunk size must be greater than 0.") # not thicc enough
95
 
96
  options = {
97
  'input_audio': input_files,
@@ -106,7 +104,7 @@ def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_ov
106
 
107
  predict_with_model(options)
108
 
109
- # Clear GPU cache
110
  if torch.cuda.is_available():
111
  torch.cuda.empty_cache()
112
 
@@ -115,43 +113,42 @@ def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_ov
115
  audio_file_name = os.path.splitext(os.path.basename(f))[0]
116
  output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav")
117
  output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav")
118
- output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # For the second instrumental output
119
  output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav")
120
  output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav")
121
  output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav")
122
 
123
- # Check the readiness of the files
124
  output_files_ready = []
125
  for k, v in output_files.items():
126
  if os.path.exists(v) and check_file_readiness(v):
127
  output_files_ready.append(v)
128
  else:
129
- empty_data = np.zeros((44100, 2)) # 2 channels, 1 second of silence at 44100Hz
130
  empty_file = tempfile.mktemp('.wav')
131
- wavfile.write(empty_file, 44100, empty_data.astype(np.int16)) # Cast to int16 as wavfile does not support float32
132
  output_files_ready.append(empty_file)
133
-
134
- # Generate spectrograms right after separating the audio
135
  output_spectrograms = generate_spectrograms(output_files_ready)
136
 
137
- print(len(output_files_ready)) # should print 6
138
- print(len(output_spectrograms)) # should print 6
139
 
140
- print("Before return")
141
  return tuple(output_files_ready) + output_spectrograms
142
- print("After return")
143
-
144
 
145
  description = """
146
  # ZFTurbo Web-UI
147
  Web-UI by [Ma5onic](https://github.com/Ma5onic)
148
- ## Options:
149
- - **Use CPU Only:** Select this if you have not enough GPU memory. It will be slower.
150
- - **Use Single ONNX:** Select this to use a single ONNX model. It will decrease quality a little bit but can help with GPU memory usage.
151
- - **Large Overlap:** The overlap for large chunks. Adjust as needed.
152
- - **Small Overlap:** The overlap for small chunks. Adjust as needed.
153
- - **Chunk Size:** The size of chunks to be processed at a time. Reduce this if facing memory issues.
154
- - **Use Fast Large GPU Version:** Select this to use the old fast method that requires > 11 GB of GPU memory. It will work faster.
155
  """
156
  theme = gr.themes.Base(
157
  primary_hue="cyan",
@@ -160,27 +157,27 @@ theme = gr.themes.Base(
160
 
161
  with gr.Blocks(theme=theme) as demo:
162
  gr.Markdown(description)
163
- input_string = gr.Text(label="YouTube Link/URL")
164
- use_cpu = gr.Checkbox(label="Use CPU Only", value=True)
165
- use_single_onnx = gr.Checkbox(label="Use Single ONNX", value=False)
166
- large_overlap = gr.Number(label="Large Overlap", value=0.6)
167
- small_overlap = gr.Number(label="Small Overlap", value=0.5)
168
- chunk_size = gr.Number(label="Chunk Size", value=1000000)
169
- use_large_gpu = gr.Checkbox(label="Use Fast Large GPU Version", value=False)
170
- process_button = gr.Button("Process Audio")
171
-
172
- vocals = gr.Audio(label="Vocals")
173
- vocals_spectrogram = gr.Image(label="Vocals Spectrogram")
174
- instrumental = gr.Audio(label="Instrumental")
175
- instrumental_spectrogram = gr.Image(label="Instrumental Spectrogram")
176
- instrumental2 = gr.Audio(label="Instrumental 2")
177
- instrumental2_spectrogram = gr.Image(label="Instrumental 2 Spectrogram")
178
- bass = gr.Audio(label="Bass")
179
- bass_spectrogram = gr.Image(label="Bass Spectrogram")
180
- drums = gr.Audio(label="Drums")
181
- drums_spectrogram = gr.Image(label="Drums Spectrogram")
182
- other = gr.Audio(label="Other")
183
- other_spectrogram = gr.Image(label="Other Spectrogram")
184
 
185
  process_button.click(
186
  separate_music_file_wrapper,
 
12
  import librosa.display
13
  import matplotlib.pyplot as plt
14
 
15
+ # 下载YouTube视频并转换为WAV格式
16
  def download_youtube_video_as_wav(youtube_url):
17
  output_dir = "downloads"
18
  os.makedirs(output_dir, exist_ok=True)
 
21
  try:
22
  yt = YouTube(youtube_url)
23
  yt.streams.filter(only_audio=True).first().download(filename=output_file)
24
+ print("下载成功。")
25
  except Exception as e:
26
+ print(f"下载视频时发生错误: {e}")
27
  return None
28
 
29
+ # mp4音频转换为wav
30
  wav_file = os.path.join(output_dir, "mixture.wav")
31
  clip = AudioFileClip(output_file)
32
  clip.write_audiofile(wav_file)
33
 
34
  return wav_file
35
 
36
+ # 检查文件是否准备好
37
  def check_file_readiness(filepath):
 
 
38
  num_same_size_checks = 0
39
  last_size = -1
40
  while num_same_size_checks < 5:
 
47
  time.sleep(0.5)
48
  return True
49
 
50
+ # 生成音频的频谱图
51
  def generate_spectrogram(audio_file_path):
52
  y, sr = librosa.load(audio_file_path)
53
  plt.figure(figsize=(10, 4))
 
62
  plt.close()
63
  return image_path
64
 
65
+ # 为多个音频文件生成频谱图
66
  def generate_spectrograms(audio_files):
67
  output_spectrograms = []
68
  for audio_file in audio_files:
69
  output_spectrograms.append(generate_spectrogram(audio_file))
70
  return tuple(output_spectrograms)
71
 
72
+ # 音乐分离的包装函数
73
  def separate_music_file_wrapper(input_string, use_cpu, use_single_onnx, large_overlap, small_overlap, chunk_size, use_large_gpu):
74
  input_files = []
75
+ # 验证YouTube URL或目录路径
76
  if input_string.startswith("https://www.youtube.com") or input_string.startswith("https://youtu.be"):
77
  output_file = download_youtube_video_as_wav(input_string)
78
  if output_file is not None:
 
81
  input_directory = input_string
82
  input_files = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith('.wav')]
83
  else:
84
+ raise ValueError("无效输入!请输入有效的YouTube链接或目录路径。")
85
 
86
+ # 验证重叠值
87
  if not (0 <= large_overlap <= 1) or not (0 <= small_overlap <= 1):
88
+ raise ValueError("重叠值必须在01之间。")
89
 
90
+ # 验证块大小
91
  if chunk_size <= 0:
92
+ raise ValueError("块大小必须大于0") # 太小了
93
 
94
  options = {
95
  'input_audio': input_files,
 
104
 
105
  predict_with_model(options)
106
 
107
+ # 清除GPU缓存
108
  if torch.cuda.is_available():
109
  torch.cuda.empty_cache()
110
 
 
113
  audio_file_name = os.path.splitext(os.path.basename(f))[0]
114
  output_files["vocals"] = os.path.join(options['output_folder'], audio_file_name + "_vocals.wav")
115
  output_files["instrumental"] = os.path.join(options['output_folder'], audio_file_name + "_instrum.wav")
116
+ output_files["instrumental2"] = os.path.join(options['output_folder'], audio_file_name + "_instrum2.wav") # 第二个伴奏输出
117
  output_files["bass"] = os.path.join(options['output_folder'], audio_file_name + "_bass.wav")
118
  output_files["drums"] = os.path.join(options['output_folder'], audio_file_name + "_drums.wav")
119
  output_files["other"] = os.path.join(options['output_folder'], audio_file_name + "_other.wav")
120
 
121
+ # 检查文件是否准备好
122
  output_files_ready = []
123
  for k, v in output_files.items():
124
  if os.path.exists(v) and check_file_readiness(v):
125
  output_files_ready.append(v)
126
  else:
127
+ empty_data = np.zeros((44100, 2)) # 2个声道,1秒的静音,采样率为44100Hz
128
  empty_file = tempfile.mktemp('.wav')
129
+ wavfile.write(empty_file, 44100, empty_data.astype(np.int16)) # 转换为int16,因为wavfile不支持float32
130
  output_files_ready.append(empty_file)
131
+
132
+ # 在分离音频后立即生成频谱图
133
  output_spectrograms = generate_spectrograms(output_files_ready)
134
 
135
+ print(len(output_files_ready)) # 应输出6
136
+ print(len(output_spectrograms)) # 应输出6
137
 
138
+ print("返回前")
139
  return tuple(output_files_ready) + output_spectrograms
140
+ print("返回后")
 
141
 
142
  description = """
143
  # ZFTurbo Web-UI
144
  Web-UI by [Ma5onic](https://github.com/Ma5onic)
145
+ ## 选项:
146
+ - **仅使用CPU:** 如果GPU内存不足,请选择此项。速度会较慢。
147
+ - **使用单一ONNX模型:** 选择此项以使用单一ONNX模型。会稍微降低质量,但可以帮助减少GPU内存使用。
148
+ - **大块重叠:** 大块的重叠。根据需要调整。
149
+ - **小块重叠:** 小块的重叠。根据需要调整。
150
+ - **块大小:** 每次处理的块大小。如果遇到内存问题,请减少此值。
151
+ - **使用快速大GPU版本:** 选择此项以使用旧的快速方法,需要超过11GB的GPU内存。运行速度更快。
152
  """
153
  theme = gr.themes.Base(
154
  primary_hue="cyan",
 
157
 
158
  with gr.Blocks(theme=theme) as demo:
159
  gr.Markdown(description)
160
+ input_string = gr.Text(label="YouTube链接/URL")
161
+ use_cpu = gr.Checkbox(label="仅使用CPU", value=True)
162
+ use_single_onnx = gr.Checkbox(label="使用单一ONNX模型", value=False)
163
+ large_overlap = gr.Number(label="大块重叠", value=0.6)
164
+ small_overlap = gr.Number(label="小块重叠", value=0.5)
165
+ chunk_size = gr.Number(label="块大小", value=1000000)
166
+ use_large_gpu = gr.Checkbox(label="使用快速大GPU版本", value=False)
167
+ process_button = gr.Button("处理音频")
168
+
169
+ vocals = gr.Audio(label="人声")
170
+ vocals_spectrogram = gr.Image(label="人声频谱图")
171
+ instrumental = gr.Audio(label="伴奏")
172
+ instrumental_spectrogram = gr.Image(label="伴奏频谱图")
173
+ instrumental2 = gr.Audio(label="伴奏2")
174
+ instrumental2_spectrogram = gr.Image(label="伴奏2频谱图")
175
+ bass = gr.Audio(label="贝斯")
176
+ bass_spectrogram = gr.Image(label="贝斯频谱图")
177
+ drums = gr.Audio(label="鼓声")
178
+ drums_spectrogram = gr.Image(label="鼓声频谱图")
179
+ other = gr.Audio(label="其他")
180
+ other_spectrogram = gr.Image(label="其他频谱图")
181
 
182
  process_button.click(
183
  separate_music_file_wrapper,