Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import subprocess | |
| import os | |
| import numpy as np | |
| import librosa | |
| import soundfile as sf | |
| import matplotlib.pyplot as plt | |
| import librosa.display | |
| import gc | |
| import torch | |
| import time | |
| import warnings | |
| import json | |
| from scipy import signal | |
| from scipy.stats import kurtosis, skew | |
| import spaces | |
| import urllib.request | |
| from datetime import timedelta | |
| warnings.filterwarnings("ignore") | |
| os.environ["TOKENIZERS_PARALLELISM"] = "true" | |
| torch.set_float32_matmul_precision("high") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Using device: {device}") | |
| output_folder = "output_file" | |
| os.makedirs(output_folder, exist_ok=True) | |
| print(f"Output folder ready: {output_folder}") | |
| def setup(): | |
| os.makedirs("Apollo/model", exist_ok=True) | |
| os.makedirs("Apollo/configs", exist_ok=True) | |
| files_to_download = { | |
| "Apollo/inference.py": "https://raw.githubusercontent.com/jarredou/Apollo-Colab-Inference/main/inference.py", | |
| "Apollo/model/pytorch_model.bin": "https://huggingface.co/JusperLee/Apollo/resolve/main/pytorch_model.bin", | |
| "Apollo/model/apollo_model.ckpt": "https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/apollo_model.ckpt", | |
| "Apollo/model/apollo_model_v2.ckpt": "https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/apollo_model_v2.ckpt", | |
| "Apollo/model/apollo_universal_model.ckpt": "https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/apollo_universal_model.ckpt", | |
| "Apollo/configs/config_apollo_vocal.yaml": "https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/config_apollo_vocal.yaml", | |
| "Apollo/configs/config_apollo.yaml": "https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml", | |
| "Apollo/configs/apollo.yaml": "https://huggingface.co/JusperLee/Apollo/resolve/main/apollo.yaml", | |
| } | |
| for file_path, url in files_to_download.items(): | |
| if not os.path.exists(file_path): | |
| print(f"Downloading {file_path}...") | |
| try: | |
| subprocess.run(["wget", "-O", file_path, url], check=True, capture_output=True, text=True) | |
| print(f"Downloaded {file_path} with wget") | |
| except (subprocess.CalledProcessError, FileNotFoundError) as e: | |
| print(f"wget failed for {file_path}: {e}. Falling back to urllib...") | |
| try: | |
| urllib.request.urlretrieve(url, file_path) | |
| print(f"Downloaded {file_path} with urllib") | |
| except Exception as e: | |
| print(f"Failed to download {file_path}: {e}") | |
| raise Exception(f"Failed to download {file_path}") | |
| try: | |
| setup() | |
| except Exception as e: | |
| print(f"Setup failed: {e}") | |
| raise | |
| # Süreyi 60'tan 120 saniyeye çıkardım | |
| def process_audio(input_file, model, chunk_size, overlap, progress=gr.Progress()): | |
| if not input_file: | |
| return "No file uploaded.", None, None, None | |
| input_file_path = input_file | |
| original_file_name = os.path.splitext(os.path.basename(input_file_path))[0] | |
| output_file_path = f'{output_folder}/{original_file_name}_processed.wav' | |
| model_paths = { | |
| 'MP3 Enhancer': ('Apollo/model/pytorch_model.bin', 'Apollo/configs/apollo.yaml'), | |
| 'Lew Vocal Enhancer': ('Apollo/model/apollo_model.ckpt', 'Apollo/configs/apollo.yaml'), | |
| 'Lew Vocal Enhancer v2 (beta)': ('Apollo/model/apollo_model_v2.ckpt', 'Apollo/configs/config_apollo_vocal.yaml'), | |
| 'Apollo Universal Model': ('Apollo/model/apollo_universal_model.ckpt', 'Apollo/configs/config_apollo.yaml') | |
| } | |
| if model not in model_paths: | |
| return "Invalid model selected.", None, None, None | |
| ckpt, config = model_paths[model] | |
| if not os.path.exists(ckpt) or not os.path.exists(config): | |
| return f"Model files not found: {ckpt} or {config}", None, None, None | |
| print(f"Model selected: {model}") | |
| print("Processing started. Please wait...") | |
| start_time = time.time() | |
| command = [ | |
| "python", "Apollo/inference.py", | |
| "--in_wav", input_file_path, | |
| "--out_wav", output_file_path, | |
| "--chunk_size", str(chunk_size), | |
| "--overlap", str(overlap), | |
| "--ckpt", ckpt, | |
| "--config", config | |
| ] | |
| try: | |
| process = subprocess.Popen( | |
| command, | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.STDOUT, | |
| text=True | |
| ) | |
| progress(0.0, desc="Processing started...") | |
| for line in process.stdout: | |
| try: | |
| data = json.loads(line.strip()) | |
| if "percentage" in data: | |
| percentage = data["percentage"] | |
| elapsed_time = data["elapsed_time"] | |
| if percentage > 0: | |
| time_remaining = (elapsed_time / percentage) * (100 - percentage) | |
| time_remaining_str = str(timedelta(seconds=int(time_remaining))) | |
| else: | |
| time_remaining_str = "Calculating..." | |
| progress(percentage / 100, desc=f"Processing: {percentage:.1f}% | Time remaining: {time_remaining_str}") | |
| else: | |
| print(f"Processing: {line.strip()}") | |
| except json.JSONDecodeError: | |
| print(f"Processing: {line.strip()}") | |
| process.stdout.close() | |
| process.wait() | |
| if process.returncode != 0: | |
| return f"Error processing audio: Non-zero exit code {process.returncode}.", None, None, None | |
| total_duration = str(timedelta(seconds=int(time.time() - start_time))) | |
| progress(1.0, desc=f"Processing completed. Total time: {total_duration}") | |
| return output_file_path, input_file_path, None, f"Processing completed. Total time: {total_duration}" | |
| except Exception as e: | |
| return f"Error in process_audio: {str(e)}", None, None, None | |
| def mid_side_separation(audio_file): | |
| try: | |
| print(f"Loading audio file: {audio_file}") | |
| y, sr = librosa.load(audio_file, sr=None, mono=False) | |
| print(f"Audio shape: {y.shape}, Sample rate: {sr}") | |
| if y.ndim == 1: | |
| raise ValueError("Stereo audio file required! Please upload a stereo .wav or .mp3 file.") | |
| left, right = y[0], y[1] | |
| print("Performing Mid/Side separation...") | |
| mid = (left + right) / 2 | |
| side = (left - right) / 2 | |
| mid_path = os.path.join(output_folder, "mid.wav") | |
| side_path = os.path.join(output_folder, "side.wav") | |
| print(f"Saving Mid to {mid_path} and Side to {side_path}") | |
| sf.write(mid_path, mid, sr) | |
| sf.write(side_path, side, sr) | |
| print("Mid/Side separation completed.") | |
| return mid_path, side_path, sr | |
| except Exception as e: | |
| print(f"Error in mid/side separation: {str(e)}") | |
| raise ValueError(f"Error in mid/side separation: {str(e)}") | |
| def mid_side_combine(mid_file, side_file, output_path): | |
| try: | |
| print(f"Combining Mid: {mid_file} and Side: {side_file}") | |
| mid_data, sr_mid = librosa.load(mid_file, sr=None, mono=True) | |
| side_data, sr_side = librosa.load(side_file, sr=None, mono=True) | |
| if sr_mid != sr_side: | |
| raise ValueError("Mid and Side sample rates do not match!") | |
| left = mid_data + side_data | |
| right = mid_data - side_data | |
| stereo = np.stack([left, right], axis=0) | |
| print(f"Saving combined audio to {output_path}") | |
| sf.write(output_path, stereo.T, sr_mid) | |
| return output_path | |
| except Exception as e: | |
| print(f"Error in mid/side combination: {str(e)}") | |
| raise ValueError(f"Error in mid/side combination: {str(e)}") | |
| # Süreyi 60'tan 120 saniyeye çıkardım | |
| def process_mid_side_upscale(input_file, model, chunk_size, overlap, progress=gr.Progress()): | |
| if not input_file: | |
| return "No file uploaded.", None, None, None | |
| try: | |
| total_start_time = time.time() | |
| print(f"Starting Mid/Side upscale for: {input_file}") | |
| # Mid/Side ayrımı | |
| print("Separating Mid and Side channels...") | |
| mid_path, side_path, sr = mid_side_separation(input_file) | |
| print(f"Mid path: {mid_path}, Side path: {side_path}, Sample rate: {sr}") | |
| # Mid kanalını işle | |
| print("Processing Mid channel...") | |
| mid_restored, _, _, mid_status = process_audio(mid_path, model, chunk_size, overlap, progress=progress) | |
| if not mid_restored.endswith(".wav"): | |
| return f"Mid channel processing failed: {mid_status}", None, None, None | |
| print(f"Mid channel processed: {mid_restored}") | |
| # Side kanalını işle | |
| print("Processing Side channel...") | |
| side_restored, _, _, side_status = process_audio(side_path, model, chunk_size, overlap, progress=progress) | |
| if not side_restored.endswith(".wav"): | |
| return f"Side channel processing failed: {side_status}", None, None, None | |
| print(f"Side channel processed: {side_restored}") | |
| # Orijinal dosya adını al ve çıktı yolunu oluştur | |
| original_file_name = os.path.splitext(os.path.basename(input_file))[0] | |
| final_output_path = os.path.join(output_folder, f"{original_file_name}_upscaled.wav") | |
| # Mid ve Side kanallarını birleştir | |
| print("Combining processed Mid and Side channels...") | |
| final_audio = mid_side_combine(mid_restored, side_restored, final_output_path) | |
| print(f"Final audio saved: {final_audio}") | |
| total_duration = str(timedelta(seconds=int(time.time() - total_start_time))) | |
| progress(1.0, desc=f"Mid/Side upscaling completed. Total time: {total_duration}") | |
| return final_audio, input_file, None, f"Mid/Side upscaling completed. Total time: {total_duration}" | |
| except Exception as e: | |
| error_msg = f"Error in Mid/Side upscale: {str(e)}" | |
| print(error_msg) | |
| return error_msg, None, None, None | |
| def spectrum(audio_file): | |
| if not audio_file: | |
| return None, "No file selected" | |
| try: | |
| chunk_duration = 30 | |
| hop_length = 512 | |
| n_fft = 2048 | |
| with sf.SoundFile(audio_file) as sf_desc: | |
| duration = len(sf_desc) / sf_desc.samplerate | |
| num_chunks = int(np.ceil(duration / chunk_duration)) | |
| freqs = librosa.fft_frequencies(sr=sf_desc.samplerate, n_fft=n_fft) | |
| total_frames = int(np.ceil(duration * sf_desc.samplerate / hop_length)) | |
| S_db_full = np.zeros((len(freqs), total_frames)) | |
| for chunk_idx in range(num_chunks): | |
| start_time = chunk_idx * chunk_duration | |
| y, sr = librosa.load(audio_file, offset=start_time, duration=chunk_duration, sr=None) | |
| S_chunk = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)) | |
| S_db_chunk = librosa.amplitude_to_db(S_chunk, ref=np.max) | |
| start_frame = int(start_time * sr / hop_length) | |
| end_frame = start_frame + S_db_chunk.shape[1] | |
| S_db_full[:, start_frame:end_frame] = S_db_chunk | |
| del S_chunk, S_db_chunk | |
| gc.collect() | |
| downsample_factor = 4 | |
| S_db_downsampled = S_db_full[:, ::downsample_factor] | |
| threshold = np.max(S_db_downsampled) - 60 | |
| significant_freqs = freqs[np.any(S_db_downsampled > threshold, axis=1)] | |
| max_freq = np.max(significant_freqs) if len(significant_freqs) > 0 else sr / 2 | |
| plt.figure(figsize=(15, 8)) | |
| display_hop = 4 | |
| librosa.display.specshow( | |
| S_db_full[:, ::display_hop], | |
| sr=sr, | |
| hop_length=hop_length * display_hop, | |
| x_axis='time', | |
| y_axis='hz', | |
| cmap='magma' | |
| ) | |
| freq_ticks = [2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000] | |
| plt.yticks(freq_ticks, [f"{f/1000:.0f}" for f in freq_ticks]) | |
| plt.colorbar(format='%+2.0f dB') | |
| plt.title('Frequency Spectrum', fontsize=16) | |
| plt.xlabel('Time (seconds)', fontsize=12) | |
| plt.ylabel('Frequency (kHz)', fontsize=12) | |
| output_image_path = os.path.join(output_folder, 'spectrum.png') | |
| plt.savefig(output_image_path, bbox_inches='tight', dpi=150) | |
| plt.close() | |
| del S_db_full, S_db_downsampled | |
| gc.collect() | |
| closest_freq = min(freq_ticks, key=lambda x: abs(x - max_freq)) | |
| return output_image_path, f"Maximum Frequency {int(closest_freq)} Hz" | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| def show_credits(): | |
| return """This Web UI was created using AI tools and written by U.Z.S. | |
| **Apollo-Colab-Inference** (https://github.com/jarredou/Apollo-Colab-Inference): | |
| Developed by Jarred Ou, provides a colab-based inference implementation of the Apollo model. | |
| **Apollo** (https://github.com/JusperLee/Apollo): | |
| Created by Jusper Lee, a deep learning-based model for vocal clarity and audio quality. | |
| """ | |
| app = gr.Blocks( | |
| css=""" | |
| .gradio-container { background-color: #121212; color: white; font-family: Arial, sans-serif; } | |
| .gradio-button { | |
| background-color: #6a0dad; | |
| color: white; | |
| border: 1px solid #5a0b8a; | |
| border-radius: 5px; | |
| padding: 10px 20px; | |
| } | |
| .gradio-button:hover { background-color: #5a0b8a; } | |
| .gradio-input, .gradio-file { | |
| background-color: rgba(106, 13, 173, 0.2); | |
| border: 1px solid #5a0b8a; | |
| color: white; | |
| border-radius: 5px; | |
| } | |
| .gradio-input:focus, .gradio-file:focus { | |
| border-color: #ffffff; | |
| box-shadow: 0 0 5px rgba(255, 255, 255, 0.5); | |
| } | |
| .gradio-slider { | |
| background-color: rgba(106, 13, 173, 0.2); | |
| color: white; | |
| } | |
| .gradio-label { color: white; font-weight: bold; } | |
| .gradio-tabs { background-color: rgba(106, 13, 173, 0.2); } | |
| .gradio-tab { padding: 15px; } | |
| .model-note { color: #ff9800; font-size: 0.9em; } | |
| /* Hide footer elements */ | |
| footer {display: none !important;} | |
| #footer {display: none !important;} | |
| .gradio-footer {display: none !important;} | |
| @media (max-width: 600px) { | |
| .gradio-button { width: 100%; font-size: 16px; } | |
| .gradio-input, .gradio-file { width: 100%; font-size: 16px; } | |
| .gradio-slider { width: 100%; } | |
| .gradio-label { font-size: 14px; } | |
| } | |
| """ | |
| ) | |
| with app: | |
| with gr.Tab("Audio Enhancer"): | |
| gr.Markdown("# 🎵 Audio Enhancement Tool") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.File( | |
| label="Select Audio File", | |
| file_types=[".wav", ".mp3"], | |
| elem_classes=["gradio-file"] | |
| ) | |
| model = gr.Radio( | |
| ["MP3 Enhancer", "Lew Vocal Enhancer", "Lew Vocal Enhancer v2 (beta)", "Apollo Universal Model"], | |
| label="Select Model", | |
| value="Apollo Universal Model" | |
| ) | |
| gr.Markdown("**For Universal model, please set Chunk Size to 19**", elem_classes="model-note") | |
| with gr.Row(): | |
| chunk_size = gr.Slider( | |
| minimum=3, | |
| maximum=25, | |
| step=1, | |
| value=19, | |
| label="Chunk Size", | |
| interactive=True | |
| ) | |
| overlap = gr.Slider( | |
| minimum=2, | |
| maximum=10, | |
| step=1, | |
| value=2, | |
| label="Overlap", | |
| interactive=True | |
| ) | |
| process_button = gr.Button("Process Audio", variant="primary") | |
| with gr.Column(): | |
| output_audio = gr.Audio(label="Processed Audio") | |
| original_audio = gr.Audio(label="Original Audio") | |
| status_message = gr.Textbox(label="Status", interactive=False) | |
| process_button.click( | |
| process_audio, | |
| inputs=[audio_input, model, chunk_size, overlap], | |
| outputs=[output_audio, original_audio, status_message, status_message] | |
| ) | |
| with gr.Tab("Spectrum Analyzer"): | |
| gr.Markdown("# 📊 Frequency Spectrum Analysis") | |
| with gr.Row(): | |
| with gr.Column(): | |
| spectrogram_input = gr.File( | |
| label="Select Audio File", | |
| file_types=[".wav", ".mp3"], | |
| elem_classes=["gradio-file"] | |
| ) | |
| spectrum_button = gr.Button("Analyze Spectrum", variant="primary") | |
| with gr.Column(): | |
| output_spectrum = gr.Image(label="Frequency Spectrum", interactive=False) | |
| max_freq_info = gr.Textbox(label="Frequency Analysis", interactive=False) | |
| spectrum_button.click( | |
| spectrum, | |
| inputs=[spectrogram_input], | |
| outputs=[output_spectrum, max_freq_info] | |
| ) | |
| with gr.Tab("Mid/Side Processor"): | |
| gr.Markdown("# 🎚️ Mid/Side Channel Processing") | |
| gr.Markdown("Upload a stereo audio file to separate, enhance, and recombine its Mid and Side channels.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| ms_input = gr.File( | |
| label="Select Stereo Audio File", | |
| file_types=[".wav", ".mp3"], | |
| elem_classes=["gradio-file"] | |
| ) | |
| ms_model = gr.Radio( | |
| ["MP3 Enhancer", "Lew Vocal Enhancer", "Lew Vocal Enhancer v2 (beta)", "Apollo Universal Model"], | |
| label="Select Model", | |
| value="Apollo Universal Model" | |
| ) | |
| with gr.Row(): | |
| ms_chunk_size = gr.Slider( | |
| minimum=3, | |
| maximum=25, | |
| step=1, | |
| value=19, | |
| label="Chunk Size" | |
| ) | |
| ms_overlap = gr.Slider( | |
| minimum=2, | |
| maximum=10, | |
| step=1, | |
| value=2, | |
| label="Overlap" | |
| ) | |
| ms_process_button = gr.Button("Process Mid/Side", variant="primary") | |
| with gr.Column(): | |
| ms_output = gr.Audio(label="Processed Audio") | |
| ms_original = gr.Audio(label="Original Audio") | |
| ms_status_message = gr.Textbox(label="Status", interactive=False) | |
| ms_process_button.click( | |
| process_mid_side_upscale, | |
| inputs=[ms_input, ms_model, ms_chunk_size, ms_overlap], | |
| outputs=[ms_output, ms_original, ms_status_message, ms_status_message] | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown("## ℹ️ About This Tool") | |
| gr.Markdown(show_credits()) | |
| gr.Markdown("### 🚀 Features") | |
| gr.Markdown(""" | |
| - High-quality audio enhancement using Apollo models | |
| - Frequency spectrum visualization | |
| - Advanced Mid/Side channel processing | |
| - GPU-accelerated processing | |
| """) | |
| gr.Markdown("<div class='footer'>Developed by U.Z.S using AI tools</div>") | |
| if __name__ == "__main__": | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_api=False, | |
| ) |