ASesYusuf1 commited on
Commit
004da11
·
verified ·
1 Parent(s): 30ea38f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -0
app.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import time
4
+ import io
5
+ import contextlib
6
+ import matplotlib.pyplot as plt
7
+ import librosa.display
8
+ import gc
9
+ import os
10
+ import random
11
+ import numpy as np
12
+ from scipy.signal.windows import hann
13
+ from scipy.stats import kurtosis, skew
14
+ import soundfile as sf
15
+ import torch
16
+ import tempfile
17
+ import librosa
18
+ import noisereduce as nr
19
+ from scipy import signal
20
+ import warnings
21
+ import requests
22
+ from pathlib import Path
23
+ warnings.filterwarnings("ignore")
24
+
25
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
26
+ torch.set_float32_matmul_precision("high")
27
+
28
+ # Control for GPU utilization
29
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
+ print(f"Using device: {device}")
31
+
32
+ # Create necessary directories
33
+ base_dir = os.path.dirname(os.path.abspath(__file__))
34
+ output_folder = os.path.join(base_dir, 'output_file')
35
+ model_folder = os.path.join(base_dir, 'model')
36
+ config_folder = os.path.join(base_dir, 'configs')
37
+
38
+ for folder in [output_folder, model_folder, config_folder]:
39
+ if not os.path.exists(folder):
40
+ os.makedirs(folder)
41
+ print(f"Created folder: {folder}")
42
+
43
+ # Model URLs
44
+ MODEL_URLS = {
45
+ 'MP3 Enhancer': {
46
+ 'model': 'https://huggingface.co/JusperLee/Apollo/resolve/main/pytorch_model.bin',
47
+ 'config': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml'
48
+ },
49
+ 'Lew Vocal Enhancer': {
50
+ 'model': 'https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/apollo_model.ckpt',
51
+ 'config': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml'
52
+ },
53
+ 'Lew Vocal Enhancer v2 (beta)': {
54
+ 'model': 'https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/apollo_model_v2.ckpt',
55
+ 'config': 'https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/config_apollo_vocal.yaml'
56
+ },
57
+ 'Apollo Universal Model': {
58
+ 'model': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/apollo_universal_model.ckpt',
59
+ 'config': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml'
60
+ }
61
+ }
62
+
63
+ def download_file(url, destination):
64
+ if not os.path.exists(destination):
65
+ print(f"Downloading {os.path.basename(destination)}...")
66
+ response = requests.get(url, stream=True)
67
+ response.raise_for_status()
68
+ with open(destination, 'wb') as f:
69
+ for chunk in response.iter_content(chunk_size=8192):
70
+ if chunk:
71
+ f.write(chunk)
72
+ print(f"Downloaded {os.path.basename(destination)}")
73
+ else:
74
+ print(f"File already exists: {os.path.basename(destination)}")
75
+
76
+ def ensure_model_files(model_name):
77
+ model_url = MODEL_URLS[model_name]['model']
78
+ config_url = MODEL_URLS[model_name]['config']
79
+
80
+ model_filename = os.path.join(model_folder, os.path.basename(model_url))
81
+ config_filename = os.path.join(config_folder, os.path.basename(config_url))
82
+
83
+ download_file(model_url, model_filename)
84
+ download_file(config_url, config_filename)
85
+
86
+ return model_filename, config_filename
87
+
88
+ def process_audio(input_file, model, chunk_size, overlap):
89
+ input_file_path = input_file.name
90
+ original_file_name = os.path.splitext(os.path.basename(input_file_path))[0]
91
+ output_file_path = f'{output_folder}/{original_file_name}.wav'
92
+
93
+ # Download necessary model files
94
+ ckpt, config = ensure_model_files(model)
95
+ print(f"Using model: {model}")
96
+
97
+ print("Processing started. Please wait...")
98
+ command = f"python inference.py --in_wav '{input_file_path}' --out_wav '{output_file_path}' --chunk_size {chunk_size} --overlap {overlap} --ckpt '{ckpt}' --config '{config}'"
99
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
100
+
101
+ for line in process.stdout:
102
+ print(f"Processing: {line.strip()}")
103
+
104
+ process.stdout.close()
105
+ process.wait()
106
+
107
+ if process.returncode != 0:
108
+ return "An error occurred while processing the audio.", None, None
109
+
110
+ print("Processing completed.")
111
+ return output_file_path, input_file_path
112
+
113
+ def mid_side_separation(audio_file):
114
+ y, sr = librosa.load(audio_file.name, sr=None, mono=False)
115
+ if y.ndim == 1:
116
+ raise ValueError("Stereo audio file required!")
117
+
118
+ left, right = y[0], y[1]
119
+ mid = (left + right) / 2
120
+ side = (left - right) / 2
121
+
122
+ mid_path = os.path.join(output_folder, "mid.wav")
123
+ side_path = os.path.join(output_folder, "side.wav")
124
+ sf.write(mid_path, mid, sr)
125
+ sf.write(side_path, side, sr)
126
+
127
+ return mid_path, side_path, sr
128
+
129
+ def mid_side_combine(mid_file, side_file, output_path):
130
+ mid_data, sr_mid = librosa.load(mid_file, sr=None, mono=True)
131
+ side_data, sr_side = librosa.load(side_file, sr=None, mono=True)
132
+
133
+ if sr_mid != sr_side:
134
+ raise ValueError("Mid and Side files have different sample rates!")
135
+
136
+ left = mid_data + side_data
137
+ right = mid_data - side_data
138
+ stereo = np.stack([left, right], axis=0)
139
+
140
+ sf.write(output_path, stereo.T, sr_mid)
141
+ return output_path
142
+
143
+ def process_mid_side_upscale(input_file, model, chunk_size, overlap):
144
+ try:
145
+ print("Separating Mid and Side channels...")
146
+ mid_path, side_path, sr = mid_side_separation(input_file)
147
+
148
+ print("Processing Mid channel...")
149
+ mid_restored, _ = process_audio(
150
+ type('obj', (object,), {'name': mid_path}), model, chunk_size, overlap
151
+ )
152
+ print("Processing Side channel...")
153
+ side_restored, _ = process_audio(
154
+ type('obj', (object,), {'name': side_path}), model, chunk_size, overlap
155
+ )
156
+
157
+ original_file_name = os.path.splitext(os.path.basename(input_file.name))[0]
158
+ final_output_path = os.path.join(output_folder, f"{original_file_name}_upscaled.wav")
159
+ print("Combining processed Mid and Side channels...")
160
+ final_audio = mid_side_combine(mid_restored, side_restored, final_output_path)
161
+
162
+ print("Mid/Side upscaling completed.")
163
+ return final_audio, input_file.name
164
+
165
+ except Exception as e:
166
+ return f"Error: {str(e)}", None
167
+
168
+ def show_credits():
169
+ return """This Web UI was created using AI tools and written by U.Z.S.
170
+
171
+ **Apollo-Colab-Inference** (https://github.com/jarredou/Apollo-Colab-Inference):
172
+ This project was developed by Jarred Ou and provides a colab-based inference implementation of the Apollo model for audio enhancement.
173
+
174
+ **Apollo** (https://github.com/JusperLee/Apollo):
175
+ Created by Jusper Lee, Apollo is a deep learning-based model aimed at improving vocal clarity and overall audio quality in recordings.
176
+ """
177
+
178
+ def spectrum(audio_file):
179
+ if audio_file is None:
180
+ return None, "No file selected"
181
+
182
+ try:
183
+ chunk_duration = 60
184
+ hop_length = 512
185
+ n_fft = 4096
186
+
187
+ with sf.SoundFile(audio_file.name) as sf_desc:
188
+ duration = len(sf_desc) / sf_desc.samplerate
189
+
190
+ num_chunks = int(np.ceil(duration / chunk_duration))
191
+ freqs = librosa.fft_frequencies(sr=sf_desc.samplerate, n_fft=n_fft)
192
+ total_frames = int(np.ceil(duration * sf_desc.samplerate / hop_length))
193
+ S_db_full = np.zeros((len(freqs), total_frames))
194
+
195
+ for chunk_idx in range(num_chunks):
196
+ start_time = chunk_idx * chunk_duration
197
+ y, sr = librosa.load(audio_file.name, offset=start_time, duration=chunk_duration, sr=None)
198
+ S_chunk = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
199
+ S_db_chunk = librosa.amplitude_to_db(S_chunk, ref=np.max)
200
+ start_frame = int(start_time * sr / hop_length)
201
+ end_frame = start_frame + S_db_chunk.shape[1]
202
+ S_db_full[:, start_frame:end_frame] = S_db_chunk
203
+ del S_chunk, S_db_chunk
204
+ gc.collect()
205
+
206
+ downsample_factor = 4
207
+ S_db_downsampled = S_db_full[:, ::downsample_factor]
208
+ threshold = np.max(S_db_downsampled) - 60
209
+ significant_freqs = freqs[np.any(S_db_downsampled > threshold, axis=1)]
210
+ max_freq = np.max(significant_freqs) if len(significant_freqs) > 0 else sr / 2
211
+
212
+ plt.figure(figsize=(30, 16))
213
+ display_hop = 4
214
+ librosa.display.specshow(
215
+ S_db_full[:, ::display_hop],
216
+ sr=sr,
217
+ hop_length=hop_length * display_hop,
218
+ x_axis='time',
219
+ y_axis='hz',
220
+ cmap='magma'
221
+ )
222
+
223
+ freq_ticks = [2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000, 22000, 24000]
224
+ plt.yticks(freq_ticks, [f"{f/1000:.0f}" for f in freq_ticks])
225
+ plt.colorbar(format='%+2.0f dB')
226
+ plt.title('Frequency Spectrum', fontsize=24)
227
+ plt.xlabel('Time (seconds)', fontsize=20)
228
+ plt.ylabel('Frequency (kHz)', fontsize=20)
229
+
230
+ output_image_path = os.path.join(output_folder, 'spectrum.png')
231
+ plt.savefig(output_image_path, bbox_inches='tight', dpi=300)
232
+ plt.close()
233
+
234
+ del S_db_full, S_db_downsampled
235
+ gc.collect()
236
+
237
+ closest_freq = min(freq_ticks, key=lambda x: abs(x - max_freq))
238
+ return output_image_path, f"Maximum Frequency {int(closest_freq)} Hz"
239
+
240
+ except Exception as e:
241
+ return None, f"Error: {str(e)}"
242
+
243
+ # Gradio Interface
244
+ with gr.Blocks(css="""
245
+ .gradio-container { background-color: black; color: white; font-family: Arial, sans-serif; }
246
+ .footer { position: absolute; bottom: 10px; right: 10px; font-size: 12px; color: white; }
247
+ .gradio-button { background-color: #6a0dad; color: white; border: 1px solid #5a0b8a; border-radius: 5px; }
248
+ .gradio-button:hover { background-color: #5a0b8a; }
249
+ .gradio-input { background-color: rgba(106, 13, 173, 0.8); border: 1px solid #5a0b8a; color: white; border-radius: 5px; }
250
+ .gradio-input:focus { border-color: #ffffff; box-shadow: 0 0 5px rgba(255, 255, 255, 0.5); }
251
+ .gradio-slider { background-color: rgba(106, 13, 173, 0.8); color: white; }
252
+ .gradio-label { color: white; }
253
+ .gradio-tabs { background-color: rgba(106, 13, 173, 0.8); color: white; }
254
+ @media (max-width: 600px) {
255
+ .gradio-button { width: 100%; font-size: 16px; }
256
+ .gradio-input { width: 100%; font-size: 16px; }
257
+ .gradio-slider { width: 100%; }
258
+ .gradio-label { font-size: 14px; }
259
+ }
260
+ """) as app:
261
+
262
+ with gr.Tab("Home"):
263
+ gr.Markdown("# Apollo Audio Enhancement")
264
+ with gr.Row():
265
+ audio_input = gr.File(label="Select Audio File", file_types=["audio"])
266
+ model = gr.Radio(
267
+ ["MP3 Enhancer", "Lew Vocal Enhancer", "Lew Vocal Enhancer v2 (beta)", "Apollo Universal Model"],
268
+ label="Select Model"
269
+ )
270
+ gr.Markdown("**For Universal model, please set Chunk_Size to 19**", elem_classes="model-note")
271
+ chunk_size = gr.Slider(minimum=3, maximum=25, step=1, value=25, label="Chunk Size")
272
+ overlap = gr.Slider(minimum=2, maximum=10, step=1, value=2, label="Overlap")
273
+ output_audio = gr.Audio(label="Processed Audio")
274
+ original_audio = gr.Audio(label="Original Audio")
275
+ process_button = gr.Button("Process Audio")
276
+ process_button.click(process_audio, inputs=[audio_input, model, chunk_size, overlap], outputs=[output_audio, original_audio])
277
+
278
+ with gr.Tab("Spectrum"):
279
+ gr.Markdown("# Spectrum Analysis")
280
+ spectrogram_input = gr.File(label="Select Audio File for Spectrum", file_types=["audio"])
281
+ output_spectrum = gr.Image(label="Frequency Spectrum")
282
+ max_freq_info = gr.Textbox(label="Maximum Frequency Information")
283
+ spectrum_button = gr.Button("Show Spectrum")
284
+ spectrum_button.click(spectrum, inputs=[spectrogram_input], outputs=[output_spectrum, max_freq_info])
285
+
286
+ with gr.Tab("Mid/Side Upscale"):
287
+ gr.Markdown("# 🎚️ Mid/Side Audio Upscaling")
288
+ gr.Markdown("Upload a stereo audio file to separate, enhance, and recombine its Mid and Side channels using Apollo.")
289
+ with gr.Row():
290
+ ms_input = gr.File(label="Select Stereo Audio File", file_types=["audio"])
291
+ ms_model = gr.Radio(
292
+ ["MP3 Enhancer", "Lew Vocal Enhancer", "Lew Vocal Enhancer v2 (beta)", "Apollo Universal Model"],
293
+ label="Select Model",
294
+ value="Apollo Universal Model"
295
+ )
296
+ ms_chunk_size = gr.Slider(minimum=3, maximum=25, step=1, value=18, label="Chunk Size")
297
+ ms_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=2, label="Overlap")
298
+ ms_output = gr.Audio(label="Upscaled Audio")
299
+ ms_original = gr.Audio(label="Original Audio")
300
+ ms_process_button = gr.Button("Process Mid/Side Upscale")
301
+ ms_process_button.click(
302
+ process_mid_side_upscale,
303
+ inputs=[ms_input, ms_model, ms_chunk_size, ms_overlap],
304
+ outputs=[ms_output, ms_original]
305
+ )
306
+
307
+ with gr.Tab("Credits"):
308
+ gr.Markdown("## Credits")
309
+ gr.Markdown(show_credits())
310
+
311
+ gr.Markdown("Developed by U.Z.S using Claude.", elem_classes="footer")
312
+
313
+ if __name__ == "__main__":
314
+ app.launch()