Spaces:

gauravvjhaa
/

affecto-inference

Running

File size: 13,686 Bytes

#!/usr/bin/env python3
"""
Affecto - Real MagicFace Emotion Transformation
Made by Gaurav Jha and Raj Shakya
"""

# ============================================
# CRITICAL: Fix huggingface-hub version FIRST
# ============================================
import subprocess
import sys
import os

print("=" * 60)
print("🔧 FIXING DEPENDENCY VERSIONS")
print("=" * 60)

# Check current version
try:
    import huggingface_hub
    current_version = huggingface_hub.__version__
    print(f"📦 Current huggingface-hub version: {current_version}")
    
    if current_version != "0.25.2":
        print(f"❌ Version {current_version} doesn't match required 0.25.2!")
        print("🔄 Force installing 0.25.2...")
        
        # Uninstall current version
        subprocess.check_call([
            sys.executable, "-m", "pip", "uninstall", 
            "huggingface-hub", "-y"
        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        
        # Install correct version
        subprocess.check_call([
            sys.executable, "-m", "pip", "install", 
            "huggingface-hub==0.25.2", "--no-deps"
        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        
        print(f"✅ Installed huggingface-hub 0.25.2")
    else:
        print(f"✅ Version {current_version} is correct!")
        
except Exception as e:
    print(f"⚠️  Error fixing huggingface-hub: {str(e)}")
    print("⚠️  Proceeding anyway...")

print("=" * 60)
print()

# ============================================
# HF Hub compatibility shim
# ============================================
import importlib
import importlib.util

hf_spec = importlib.util.find_spec("huggingface_hub")
if hf_spec is not None:
    hf = importlib.import_module("huggingface_hub")
    if not hasattr(hf, "cached_download") and hasattr(hf, "hf_hub_download"):
        def cached_download(*args, **kwargs):
            return hf.hf_hub_download(*args, **kwargs)
        setattr(hf, "cached_download", cached_download)
    print("shim: huggingface_hub cached_download patched:", hasattr(hf, "cached_download"))
else:
    print("shim: huggingface_hub not present at import-time")

# ============================================
# NOW SAFE TO IMPORT OTHER MODULES
# ============================================

import gradio as gr
import torch
from PIL import Image
import base64
from io import BytesIO
import json
import traceback

print("🚀 Starting Affecto - Real MagicFace Inference Service...")

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🖥️  Device: {device}")

# Initialize preprocessor and model
print("📥 Loading models...")

from preprocessor import FacePreprocessor
from magicface_model import MagicFaceModel

try:
    preprocessor = FacePreprocessor(device=device)
    model = MagicFaceModel(device=device)
    print("✅ All models loaded successfully!")
except Exception as e:
    print(f"❌ Error loading models: {str(e)}")
    traceback.print_exc()
    raise

# ============================================
# UTILITY FUNCTIONS
# ============================================

def pil_to_base64(image):
    """Convert PIL to base64"""
    buffered = BytesIO()
    image.save(buffered, format="JPEG", quality=95)
    return base64.b64encode(buffered.getvalue()).decode()

def base64_to_pil(base64_str):
    """Convert base64 to PIL"""
    if base64_str.startswith('data:image'):
        base64_str = base64_str.split(',')[1]
    image_bytes = base64.b64decode(base64_str)
    return Image.open(BytesIO(image_bytes)).convert('RGB')

# ============================================
# INFERENCE FUNCTION
# ============================================

def transform_emotion(image, au_params_str, steps, seed):
    """
    Main transformation function
    
    Args:
        image: PIL Image
        au_params_str: JSON string of AU parameters
        steps: Number of inference steps
        seed: Random seed
        
    Returns:
        result_image: Transformed PIL Image
        status_msg: Status message
    """
    try:
        if image is None:
            return None, "❌ No image provided"
        
        print("\n" + "="*60)
        print("🎭 NEW TRANSFORMATION REQUEST")
        print("="*60)
        
        # Parse AU params
        try:
            au_params = json.loads(au_params_str)
            print(f"✅ AU Parameters: {au_params}")
        except json.JSONDecodeError as e:
            return None, f"❌ Invalid JSON: {str(e)}"
        
        # Step 1: Preprocess (detect face, crop, extract background)
        print("\n📸 STEP 1: Preprocessing...")
        try:
            source_img, bg_img = preprocessor.preprocess(image)
            print("✅ Preprocessing complete")
        except Exception as e:
            return None, f"❌ Preprocessing failed: {str(e)}"
        
        # Step 2: Transform with MagicFace
        print("\n🎨 STEP 2: MagicFace Transformation...")
        print(f"   Inference steps: {steps}")
        print(f"   Seed: {seed}")
        print(f"   Expected time: ~{steps * 10} seconds on CPU")
        
        try:
            result_img = model.transform(
                source_image=source_img,
                bg_image=bg_img,
                au_params=au_params,
                num_inference_steps=steps,
                seed=seed
            )
            
            print("\n✅ TRANSFORMATION SUCCESSFUL!")
            print("="*60 + "\n")
            
            return result_img, "✅ Transformation successful!"
            
        except Exception as e:
            print(f"\n❌ Transformation failed: {str(e)}")
            traceback.print_exc()
            return None, f"❌ Transformation failed: {str(e)}"
        
    except Exception as e:
        print(f"\n❌ Unexpected error: {str(e)}")
        traceback.print_exc()
        return None, f"❌ Error: {str(e)}"

# ============================================
# GRADIO INTERFACE
# ============================================

with gr.Blocks(theme=gr.themes.Soft(), title="Affecto - Real MagicFace") as demo:
    gr.Markdown("# 🎭 Affecto - Real MagicFace Emotion Transformation")
    gr.Markdown("Transform facial emotions using Action Units with the complete MagicFace diffusion model")
    gr.Markdown(f"⚡ Running on: **{device.upper()}** | ⏱️ Estimated time: ~10-15 minutes per transformation on CPU")
    
    with gr.Tab("🖼️ Transform"):
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="pil", label="Upload Face Image")
                
                gr.Markdown("### 🎛️ Action Unit Parameters")
                au_params_input = gr.Textbox(
                    label="AU Parameters (JSON)",
                    value='{"AU6": 2.0, "AU12": 2.0}',
                    lines=4,
                    placeholder='{"AU6": 2.0, "AU12": 2.0}'
                )
                
                with gr.Row():
                    steps_slider = gr.Slider(
                        minimum=20, maximum=100, value=50, step=10,
                        label="Inference Steps (20=fast/lower quality, 50=balanced, 100=slow/high quality)"
                    )
                
                with gr.Row():
                    seed_input = gr.Number(value=424, label="Seed (for reproducibility)", precision=0)
                
                transform_btn = gr.Button("✨ Transform Emotion", variant="primary", size="lg")
                
                gr.Markdown("⚠️ **Note:** Transformation takes ~10-15 minutes on CPU. Please be patient!")
            
            with gr.Column():
                output_image = gr.Image(type="pil", label="Transformed Result")
                status_text = gr.Textbox(label="Status", interactive=False, lines=2)
        
        gr.Markdown("### 🎨 Emotion Presets (click to use):")
        gr.Examples(
            examples=[
                ['{"AU6": 2.5, "AU12": 3.0}', 50, 424],  # Happy
                ['{"AU1": 2.0, "AU4": 2.5, "AU15": 2.5}', 50, 424],  # Sad
                ['{"AU4": 3.0, "AU5": 2.0, "AU7": 2.5, "AU23": 2.0}', 50, 424],  # Angry
                ['{"AU1": 3.0, "AU2": 2.5, "AU5": 3.0, "AU26": 2.5}', 50, 424],  # Surprised
                ['{"AU6": 1.5, "AU12": 1.5}', 50, 424],  # Slight smile
                ['{"AU4": 1.5, "AU15": 1.5}', 50, 424],  # Slight frown
            ],
            inputs=[au_params_input, steps_slider, seed_input],
        )
        
        transform_btn.click(
            fn=transform_emotion,
            inputs=[input_image, au_params_input, steps_slider, seed_input],
            outputs=[output_image, status_text]
        )
    
    with gr.Tab("📡 API Documentation"):
        gr.Markdown("""
        ## 🔗 API Usage
        
        ### Gradio API Endpoint
        
        **POST** `https://gauravvjhaa-affecto-inference.hf.space/api/predict`
        
        ```python
        import requests
        import base64
        
        # Prepare image
        with open("face.jpg", "rb") as f:
            image_base64 = base64.b64encode(f.read()).decode()
        
        # Call API
        response = requests.post(
            "https://gauravvjhaa-affecto-inference.hf.space/api/predict",
            json={
                "data": [
                    f"data:image/jpeg;base64,{image_base64}",
                    '{"AU6": 2.0, "AU12": 2.0}',
                    50,  # steps
                    424  # seed
                ]
            }
        )
        
        result = response.json()
        # Result image is in result["data"][0]
        ```
        
        ### 🎭 Available Action Units (AU):
        
        **😊 Happiness:**
        - **AU6** (Cheek Raiser): 0-4 → Raises cheeks, squints eyes
        - **AU12** (Lip Corner Puller): 0-4 → Pulls lip corners up (smile)
        
        **😢 Sadness:**
        - **AU1** (Inner Brow Raiser): 0-4 → Raises inner brows
        - **AU4** (Brow Lowerer): 0-4 → Lowers brows (frown)
        - **AU15** (Lip Corner Depressor): 0-4 → Pulls lip corners down
        
        **😠 Anger:**
        - **AU4** (Brow Lowerer): 0-4
        - **AU5** (Upper Lid Raiser): 0-4
        - **AU7** (Lid Tightener): 0-4
        - **AU23** (Lip Tightener): 0-4
        
        **😮 Surprise:**
        - **AU1** (Inner Brow Raiser): 0-4
        - **AU2** (Outer Brow Raiser): 0-4
        - **AU5** (Upper Lid Raiser): 0-4
        - **AU26** (Jaw Drop): 0-4
        
        **😨 Fear:**
        - **AU1** (Inner Brow Raiser): 0-4
        - **AU2** (Outer Brow Raiser): 0-4
        - **AU4** (Brow Lowerer): 0-4
        - **AU5** (Upper Lid Raiser): 0-4
        - **AU20** (Lip Stretcher): 0-4
        - **AU25** (Lips Part): 0-4
        
        ### 📊 Example Combinations:
        
        ```json
        # Happy (smile)
        {"AU6": 2.5, "AU12": 3.0}
        
        # Sad
        {"AU1": 2.0, "AU4": 2.5, "AU15": 2.5}
        
        # Angry
        {"AU4": 3.0, "AU5": 2.0, "AU7": 2.5, "AU23": 2.0}
        
        # Surprised
        {"AU1": 3.0, "AU2": 2.5, "AU5": 3.0, "AU26": 2.5}
        
        # Fear
        {"AU1": 2.5, "AU2": 2.5, "AU4": 2.0, "AU5": 3.0, "AU20": 2.0, "AU25": 2.0}
        ```
        
        ### ⚙️ Pipeline Details:
        
        1. **Face Detection** → InsightFace detects face
        2. **Cropping** → Crop to 512×512 with alignment
        3. **Background Extraction** → BiSeNet segments face from background
        4. **Diffusion** → MagicFace transforms expression
        5. **Output** → 512×512 transformed face
        
        ### ⏱️ Performance:
        
        - **CPU**: ~10-15 minutes per image
        - **GPU (T4)**: ~15-20 seconds per image
        
        ### 📝 Notes:
        
        - Input images should contain at least one face
        - If multiple faces, largest face is used
        - AU values typically range 0-4 (higher = stronger)
        - Seed ensures reproducibility
        """)
    
    with gr.Tab("ℹ️ About"):
        gr.Markdown("""
        ## 🎭 Affecto - MagicFace Implementation
        
        ### Architecture:
        
        This is a complete implementation of the **MagicFace** diffusion model for facial emotion transformation.
        
        **Components:**
        - **InsightFace**: Face detection and landmark extraction
        - **BiSeNet**: Face segmentation for background extraction
        - **Stable Diffusion 1.5**: Base diffusion model
        - **Custom UNets**: ID encoder + Denoising UNet from MagicFace
        - **Action Units**: FACS-based emotion parameters
        
        ### References:
        
        - **MagicFace Paper**: [arxiv.org/abs/2408.00623](https://arxiv.org/abs/2408.00623)
        - **MagicFace Code**: [github.com/weimengting/MagicFace](https://github.com/weimengting/MagicFace)
        - **HuggingFace Model**: [huggingface.co/mengtingwei/magicface](https://huggingface.co/mengtingwei/magicface)
        
        ### Project Info:
        
        - **Project**: Affecto - Facial Emotion Transformation System
        - **Implementation**: Complete MagicFace pipeline with preprocessing
        - **Hardware**: CPU (free tier) - upgrade to GPU for faster inference
        
        ---
        
        **Made by Gaurav Jha and Raj Shakya** 🎓
        
        Built using Gradio, PyTorch, and Diffusers
        """)

print("✅ Affecto - Real MagicFace API Ready!")
print(f"🌐 URL: https://gauravvjhaa-affecto-inference.hf.space/")

if __name__ == "__main__":
    demo.queue(max_size=5)  # Queue for long-running tasks
    demo.launch(server_name="0.0.0.0", server_port=7860)