Spaces:

ASesYusuf1
/

4p5l34e5nhen5r

Runtime error

App Files Files Community

ASesYusuf1 commited on Jun 2, 2025

Commit

004da11

verified ·

1 Parent(s): 30ea38f

Create app.py

Browse files

Files changed (1) hide show

app.py +314 -0

app.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import gradio as gr
+import subprocess
+import time
+import io
+import contextlib
+import matplotlib.pyplot as plt
+import librosa.display
+import gc
+import os
+import random
+import numpy as np
+from scipy.signal.windows import hann
+from scipy.stats import kurtosis, skew
+import soundfile as sf
+import torch
+import tempfile
+import librosa
+import noisereduce as nr
+from scipy import signal
+import warnings
+import requests
+from pathlib import Path
+warnings.filterwarnings("ignore")
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+torch.set_float32_matmul_precision("high")
+# Control for GPU utilization
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Create necessary directories
+base_dir = os.path.dirname(os.path.abspath(__file__))
+output_folder = os.path.join(base_dir, 'output_file')
+model_folder = os.path.join(base_dir, 'model')
+config_folder = os.path.join(base_dir, 'configs')
+for folder in [output_folder, model_folder, config_folder]:
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+        print(f"Created folder: {folder}")
+# Model URLs
+MODEL_URLS = {
+    'MP3 Enhancer': {
+        'model': 'https://huggingface.co/JusperLee/Apollo/resolve/main/pytorch_model.bin',
+        'config': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml'
+    },
+    'Lew Vocal Enhancer': {
+        'model': 'https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/apollo_model.ckpt',
+        'config': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml'
+    },
+    'Lew Vocal Enhancer v2 (beta)': {
+        'model': 'https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/apollo_model_v2.ckpt',
+        'config': 'https://huggingface.co/jarredou/lew_apollo_vocal_enhancer/resolve/main/config_apollo_vocal.yaml'
+    },
+    'Apollo Universal Model': {
+        'model': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/apollo_universal_model.ckpt',
+        'config': 'https://huggingface.co/ASesYusuf1/Apollo_universal_model/resolve/main/config_apollo.yaml'
+    }
+}
+def download_file(url, destination):
+    if not os.path.exists(destination):
+        print(f"Downloading {os.path.basename(destination)}...")
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        with open(destination, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+        print(f"Downloaded {os.path.basename(destination)}")
+    else:
+        print(f"File already exists: {os.path.basename(destination)}")
+def ensure_model_files(model_name):
+    model_url = MODEL_URLS[model_name]['model']
+    config_url = MODEL_URLS[model_name]['config']
+    model_filename = os.path.join(model_folder, os.path.basename(model_url))
+    config_filename = os.path.join(config_folder, os.path.basename(config_url))
+    download_file(model_url, model_filename)
+    download_file(config_url, config_filename)
+    return model_filename, config_filename
+def process_audio(input_file, model, chunk_size, overlap):
+    input_file_path = input_file.name
+    original_file_name = os.path.splitext(os.path.basename(input_file_path))[0]
+    output_file_path = f'{output_folder}/{original_file_name}.wav'
+    # Download necessary model files
+    ckpt, config = ensure_model_files(model)
+    print(f"Using model: {model}")
+    print("Processing started. Please wait...")
+    command = f"python inference.py --in_wav '{input_file_path}' --out_wav '{output_file_path}' --chunk_size {chunk_size} --overlap {overlap} --ckpt '{ckpt}' --config '{config}'"
+    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
+    for line in process.stdout:
+        print(f"Processing: {line.strip()}")
+    process.stdout.close()
+    process.wait()
+    if process.returncode != 0:
+        return "An error occurred while processing the audio.", None, None
+    print("Processing completed.")
+    return output_file_path, input_file_path
+def mid_side_separation(audio_file):
+    y, sr = librosa.load(audio_file.name, sr=None, mono=False)
+    if y.ndim == 1:
+        raise ValueError("Stereo audio file required!")
+    left, right = y[0], y[1]
+    mid = (left + right) / 2
+    side = (left - right) / 2
+    mid_path = os.path.join(output_folder, "mid.wav")
+    side_path = os.path.join(output_folder, "side.wav")
+    sf.write(mid_path, mid, sr)
+    sf.write(side_path, side, sr)
+    return mid_path, side_path, sr
+def mid_side_combine(mid_file, side_file, output_path):
+    mid_data, sr_mid = librosa.load(mid_file, sr=None, mono=True)
+    side_data, sr_side = librosa.load(side_file, sr=None, mono=True)
+    if sr_mid != sr_side:
+        raise ValueError("Mid and Side files have different sample rates!")
+    left = mid_data + side_data
+    right = mid_data - side_data
+    stereo = np.stack([left, right], axis=0)
+    sf.write(output_path, stereo.T, sr_mid)
+    return output_path
+def process_mid_side_upscale(input_file, model, chunk_size, overlap):
+    try:
+        print("Separating Mid and Side channels...")
+        mid_path, side_path, sr = mid_side_separation(input_file)
+        print("Processing Mid channel...")
+        mid_restored, _ = process_audio(
+            type('obj', (object,), {'name': mid_path}), model, chunk_size, overlap
+        )
+        print("Processing Side channel...")
+        side_restored, _ = process_audio(
+            type('obj', (object,), {'name': side_path}), model, chunk_size, overlap
+        )
+        original_file_name = os.path.splitext(os.path.basename(input_file.name))[0]
+        final_output_path = os.path.join(output_folder, f"{original_file_name}_upscaled.wav")
+        print("Combining processed Mid and Side channels...")
+        final_audio = mid_side_combine(mid_restored, side_restored, final_output_path)
+        print("Mid/Side upscaling completed.")
+        return final_audio, input_file.name
+    except Exception as e:
+        return f"Error: {str(e)}", None
+def show_credits():
+    return """This Web UI was created using AI tools and written by U.Z.S.
+    **Apollo-Colab-Inference** (https://github.com/jarredou/Apollo-Colab-Inference):
+    This project was developed by Jarred Ou and provides a colab-based inference implementation of the Apollo model for audio enhancement.
+    **Apollo** (https://github.com/JusperLee/Apollo):
+    Created by Jusper Lee, Apollo is a deep learning-based model aimed at improving vocal clarity and overall audio quality in recordings.
+    """
+def spectrum(audio_file):
+    if audio_file is None:
+        return None, "No file selected"
+    try:
+        chunk_duration = 60
+        hop_length = 512
+        n_fft = 4096
+        with sf.SoundFile(audio_file.name) as sf_desc:
+            duration = len(sf_desc) / sf_desc.samplerate
+        num_chunks = int(np.ceil(duration / chunk_duration))
+        freqs = librosa.fft_frequencies(sr=sf_desc.samplerate, n_fft=n_fft)
+        total_frames = int(np.ceil(duration * sf_desc.samplerate / hop_length))
+        S_db_full = np.zeros((len(freqs), total_frames))
+        for chunk_idx in range(num_chunks):
+            start_time = chunk_idx * chunk_duration
+            y, sr = librosa.load(audio_file.name, offset=start_time, duration=chunk_duration, sr=None)
+            S_chunk = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
+            S_db_chunk = librosa.amplitude_to_db(S_chunk, ref=np.max)
+            start_frame = int(start_time * sr / hop_length)
+            end_frame = start_frame + S_db_chunk.shape[1]
+            S_db_full[:, start_frame:end_frame] = S_db_chunk
+            del S_chunk, S_db_chunk
+            gc.collect()
+        downsample_factor = 4
+        S_db_downsampled = S_db_full[:, ::downsample_factor]
+        threshold = np.max(S_db_downsampled) - 60
+        significant_freqs = freqs[np.any(S_db_downsampled > threshold, axis=1)]
+        max_freq = np.max(significant_freqs) if len(significant_freqs) > 0 else sr / 2
+        plt.figure(figsize=(30, 16))
+        display_hop = 4
+        librosa.display.specshow(
+            S_db_full[:, ::display_hop],
+            sr=sr,
+            hop_length=hop_length * display_hop,
+            x_axis='time',
+            y_axis='hz',
+            cmap='magma'
+        )
+        freq_ticks = [2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000, 22000, 24000]
+        plt.yticks(freq_ticks, [f"{f/1000:.0f}" for f in freq_ticks])
+        plt.colorbar(format='%+2.0f dB')
+        plt.title('Frequency Spectrum', fontsize=24)
+        plt.xlabel('Time (seconds)', fontsize=20)
+        plt.ylabel('Frequency (kHz)', fontsize=20)
+        output_image_path = os.path.join(output_folder, 'spectrum.png')
+        plt.savefig(output_image_path, bbox_inches='tight', dpi=300)
+        plt.close()
+        del S_db_full, S_db_downsampled
+        gc.collect()
+        closest_freq = min(freq_ticks, key=lambda x: abs(x - max_freq))
+        return output_image_path, f"Maximum Frequency {int(closest_freq)} Hz"
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+# Gradio Interface
+with gr.Blocks(css="""
+    .gradio-container { background-color: black; color: white; font-family: Arial, sans-serif; }
+    .footer { position: absolute; bottom: 10px; right: 10px; font-size: 12px; color: white; }
+    .gradio-button { background-color: #6a0dad; color: white; border: 1px solid #5a0b8a; border-radius: 5px; }
+    .gradio-button:hover { background-color: #5a0b8a; }
+    .gradio-input { background-color: rgba(106, 13, 173, 0.8); border: 1px solid #5a0b8a; color: white; border-radius: 5px; }
+    .gradio-input:focus { border-color: #ffffff; box-shadow: 0 0 5px rgba(255, 255, 255, 0.5); }
+    .gradio-slider { background-color: rgba(106, 13, 173, 0.8); color: white; }
+    .gradio-label { color: white; }
+    .gradio-tabs { background-color: rgba(106, 13, 173, 0.8); color: white; }
+    @media (max-width: 600px) {
+        .gradio-button { width: 100%; font-size: 16px; }
+        .gradio-input { width: 100%; font-size: 16px; }
+        .gradio-slider { width: 100%; }
+        .gradio-label { font-size: 14px; }
+    }
+""") as app:
+    with gr.Tab("Home"):
+        gr.Markdown("# Apollo Audio Enhancement")
+        with gr.Row():
+            audio_input = gr.File(label="Select Audio File", file_types=["audio"])
+            model = gr.Radio(
+                ["MP3 Enhancer", "Lew Vocal Enhancer", "Lew Vocal Enhancer v2 (beta)", "Apollo Universal Model"],
+                label="Select Model"
+            )
+        gr.Markdown("**For Universal model, please set Chunk_Size to 19**", elem_classes="model-note")
+        chunk_size = gr.Slider(minimum=3, maximum=25, step=1, value=25, label="Chunk Size")
+        overlap = gr.Slider(minimum=2, maximum=10, step=1, value=2, label="Overlap")
+        output_audio = gr.Audio(label="Processed Audio")
+        original_audio = gr.Audio(label="Original Audio")
+        process_button = gr.Button("Process Audio")
+        process_button.click(process_audio, inputs=[audio_input, model, chunk_size, overlap], outputs=[output_audio, original_audio])
+    with gr.Tab("Spectrum"):
+        gr.Markdown("# Spectrum Analysis")
+        spectrogram_input = gr.File(label="Select Audio File for Spectrum", file_types=["audio"])
+        output_spectrum = gr.Image(label="Frequency Spectrum")
+        max_freq_info = gr.Textbox(label="Maximum Frequency Information")
+        spectrum_button = gr.Button("Show Spectrum")
+        spectrum_button.click(spectrum, inputs=[spectrogram_input], outputs=[output_spectrum, max_freq_info])
+    with gr.Tab("Mid/Side Upscale"):
+        gr.Markdown("# 🎚️ Mid/Side Audio Upscaling")
+        gr.Markdown("Upload a stereo audio file to separate, enhance, and recombine its Mid and Side channels using Apollo.")
+        with gr.Row():
+            ms_input = gr.File(label="Select Stereo Audio File", file_types=["audio"])
+            ms_model = gr.Radio(
+                ["MP3 Enhancer", "Lew Vocal Enhancer", "Lew Vocal Enhancer v2 (beta)", "Apollo Universal Model"],
+                label="Select Model",
+                value="Apollo Universal Model"
+            )
+        ms_chunk_size = gr.Slider(minimum=3, maximum=25, step=1, value=18, label="Chunk Size")
+        ms_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=2, label="Overlap")
+        ms_output = gr.Audio(label="Upscaled Audio")
+        ms_original = gr.Audio(label="Original Audio")
+        ms_process_button = gr.Button("Process Mid/Side Upscale")
+        ms_process_button.click(
+            process_mid_side_upscale,
+            inputs=[ms_input, ms_model, ms_chunk_size, ms_overlap],
+            outputs=[ms_output, ms_original]
+        )
+    with gr.Tab("Credits"):
+        gr.Markdown("## Credits")
+        gr.Markdown(show_credits())
+    gr.Markdown("Developed by U.Z.S using Claude.", elem_classes="footer")
+if __name__ == "__main__":
+    app.launch()