Spaces:

gauravvjhaa
/

affecto-inference

Running

App Files Files Community

gauravvjhaa commited on Nov 11

Commit

3ece5ec

1 Parent(s): b6a80c5

Add real MagicFace model structure (simplified for now)

Browse files

Files changed (3) hide show

app.py +99 -181
magicface_model.py +130 -0
requirements.txt +4 -3

app.py CHANGED Viewed

@@ -1,68 +1,27 @@
 import gradio as gr
 import torch
-import numpy as np
 from PIL import Image
 import base64
 from io import BytesIO
 import json
-from huggingface_hub import hf_hub_download
 print("🚀 Starting Affecto Inference Service...")
-# ============================================
-# MODEL LOADING
-# ============================================
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"🖥️  Device: {device}")
-# Download your model
-print("📥 Downloading MagicFace model...")
-model_path = hf_hub_download(
-    repo_id="gauravvjhaa/magicface-affecto-model",
-    filename="79999_iter.pth",
-    cache_dir="./models"
-)
-print(f"✅ Model downloaded to: {model_path}")
-# Load checkpoint
-checkpoint = torch.load(model_path, map_location=device)
-print(f"📦 Checkpoint loaded successfully")
 # ============================================
-# IMAGE PROCESSING UTILITIES
 # ============================================
-import torchvision.transforms as transforms
-def preprocess_image(image):
-    """Convert PIL image to tensor"""
-    if not isinstance(image, Image.Image):
-        image = Image.fromarray(image)
-    if image.mode != 'RGB':
-        image = image.convert('RGB')
-    transform = transforms.Compose([
-        transforms.Resize((256, 256)),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
-    ])
-    tensor = transform(image).unsqueeze(0)
-    return tensor.to(device)
-def postprocess_tensor(tensor):
-    """Convert tensor to PIL image"""
-    tensor = tensor.squeeze(0).cpu()
-    tensor = tensor * 0.5 + 0.5
-    tensor = torch.clamp(tensor, 0, 1)
-    numpy_image = tensor.numpy().transpose(1, 2, 0)
-    numpy_image = (numpy_image * 255).astype(np.uint8)
-    return Image.fromarray(numpy_image)
 def pil_to_base64(image):
     """Convert PIL to base64"""
     buffered = BytesIO()
@@ -75,115 +34,76 @@ def base64_to_pil(base64_str):
     return Image.open(BytesIO(image_bytes))
 # ============================================
-# TRANSFORMATION
 # ============================================
-def apply_emotion_transform(input_tensor, au_params):
-    """Apply emotion transformation (placeholder)"""
-    print(f"🎭 Applying transformation with AU params: {au_params}")
-    output = input_tensor.clone()
-    if "AU12" in au_params:
-        intensity = au_params["AU12"]
-        output = output * (1.0 + intensity * 0.2)
-    if "AU4" in au_params:
-        intensity = au_params["AU4"]
-        output = output * (1.0 - intensity * 0.15)
-    output = torch.clamp(output, -1, 1)
-    return output
-# ============================================
-# API FUNCTIONS
-# ============================================
-def transform_api(data):
     """API function for external calls"""
     try:
-        image_base64 = data["image"]
-        au_params = data["au_params"]
-        print(f"📥 Received API request with AU params: {au_params}")
         image = base64_to_pil(image_base64)
         print(f"📸 Image size: {image.size}")
-        input_tensor = preprocess_image(image)
-        output_tensor = apply_emotion_transform(input_tensor, au_params)
-        result_image = postprocess_tensor(output_tensor)
         result_base64 = pil_to_base64(result_image)
         print("✅ Transformation complete")
-        return {
-            "success": True,
-            "transformed_image": result_base64,
-            "au_params": au_params,
-            "message": "Transformation successful"
-        }
     except Exception as e:
         print(f"❌ API Error: {str(e)}")
         import traceback
         traceback.print_exc()
-        return {
-            "success": False,
-            "error": str(e),
-            "message": "Transformation failed"
-        }
-def health_check():
-    """Health check function"""
-    return {
-        "status": "healthy",
-        "model": "magicface",
-        "device": str(device),
-        "version": "1.0.0"
-    }
-def root_info():
-    """Root info function"""
-    return {
-        "message": "Affecto Inference API",
-        "status": "running",
-        "version": "1.0.0",
-        "endpoints": {
-            "health": "/health",
-            "transform": "/transform"
-        }
-    }
 # ============================================
 # GRADIO INTERFACE
 # ============================================
-def transform_gradio(image, au_params_str):
-    """Gradio interface function"""
-    try:
-        au_params = json.loads(au_params_str)
-        input_tensor = preprocess_image(image)
-        output_tensor = apply_emotion_transform(input_tensor, au_params)
-        result_image = postprocess_tensor(output_tensor)
-        return result_image
-    except Exception as e:
-        print(f"❌ Error: {str(e)}")
-        import traceback
-        traceback.print_exc()
-        return image
-# Build Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(), title="Affecto Inference API") as demo:
-    gr.Markdown("# 🎭 Affecto - Emotion Transformation API")
-    gr.Markdown("Transform facial emotions using MagicFace Action Units")
     with gr.Tab("🖼️ Web Interface"):
         with gr.Row():
             with gr.Column():
-                input_image = gr.Image(type="pil", label="Upload Image")
                 au_params_input = gr.Textbox(
                     label="AU Parameters (JSON)",
-                    value='{"AU6": 1.0, "AU12": 1.0}',
                     lines=3
                 )
                 transform_btn = gr.Button("✨ Transform", variant="primary", size="lg")
@@ -191,15 +111,16 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Affecto Inference API") as demo:
             with gr.Column():
                 output_image = gr.Image(type="pil", label="Transformed Result")
-        gr.Markdown("### 🎨 Emotion Presets:")
         gr.Examples(
             examples=[
-                ['{"AU6": 1.0, "AU12": 1.0}'],
-                ['{"AU1": 1.0, "AU4": 1.0, "AU15": 1.0}'],
-                ['{"AU4": 1.0, "AU5": 1.0, "AU7": 1.0, "AU23": 1.0}'],
-                ['{"AU1": 1.0, "AU2": 1.0, "AU5": 1.0, "AU26": 1.0}'],
             ],
             inputs=[au_params_input],
         )
         transform_btn.click(
@@ -210,60 +131,57 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Affecto Inference API") as demo:
     with gr.Tab("📡 API Documentation"):
         gr.Markdown("""
-        ## API Endpoints
-        ### Transform Image
-        **POST** `/api/transform`
-        ```json
-        {
-            "image": "base64_encoded_image",
-            "au_params": {"AU6": 1.0, "AU12": 1.0}
-        }
         ```
-        ### Health Check
-        **GET** `/api/health`
-        Returns service status and model information.
         """)
-    # API endpoints as Gradio functions
-    with gr.Tab("🔌 API"):
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("### POST /api/transform")
-                api_input = gr.Textbox(
-                    label="Request JSON",
-                    value='{"image": "BASE64_STRING", "au_params": {"AU6": 1.0}}',
-                    lines=5
-                )
-                api_btn = gr.Button("Test API")
-                api_output = gr.JSON(label="Response")
-                api_btn.click(
-                    fn=lambda x: transform_api(json.loads(x)),
-                    inputs=[api_input],
-                    outputs=[api_output]
-                )
-            with gr.Column():
-                gr.Markdown("### GET /api/health")
-                health_btn = gr.Button("Check Health")
-                health_output = gr.JSON(label="Health Status")
-                health_btn.click(
-                    fn=health_check,
-                    inputs=[],
-                    outputs=[health_output]
-                )
-# Add API routes using Gradio's API
-demo.api_names = ["transform", "health", "root"]
-print("✅ Affecto Inference API Ready!")
 print(f"🌐 Gradio UI: https://gauravvjhaa-affecto-inference.hf.space/")
-# Launch
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import torch
 from PIL import Image
 import base64
 from io import BytesIO
 import json
 print("🚀 Starting Affecto Inference Service...")
+# Import our MagicFace model
+from magicface_model import MagicFaceModel
+# Initialize model
+device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"🖥️  Device: {device}")
+print("📥 Loading MagicFace model...")
+model = MagicFaceModel(device=device)
+print("✅ Model ready!")
 # ============================================
+# UTILITY FUNCTIONS
 # ============================================
 def pil_to_base64(image):
     """Convert PIL to base64"""
     buffered = BytesIO()
     return Image.open(BytesIO(image_bytes))
 # ============================================
+# INFERENCE FUNCTIONS
 # ============================================
+def transform_gradio(image, au_params_str):
+    """Gradio interface function"""
+    try:
+        # Parse AU params
+        au_params = json.loads(au_params_str)
+        # Ensure image is 512x512
+        if image.size != (512, 512):
+            image = image.resize((512, 512), Image.LANCZOS)
+        # Transform
+        result_image = model.transform(image, au_params)
+        return result_image
+    except Exception as e:
+        print(f"❌ Error: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return image
+def transform_api(image_base64, au_params_str):
     """API function for external calls"""
     try:
+        print(f"📥 Received API request")
+        # Decode image
         image = base64_to_pil(image_base64)
         print(f"📸 Image size: {image.size}")
+        # Parse AU params
+        au_params = json.loads(au_params_str)
+        # Ensure 512x512
+        if image.size != (512, 512):
+            image = image.resize((512, 512), Image.LANCZOS)
+        # Transform
+        result_image = model.transform(image, au_params)
+        # Encode result
         result_base64 = pil_to_base64(result_image)
         print("✅ Transformation complete")
+        return result_base64
     except Exception as e:
         print(f"❌ API Error: {str(e)}")
         import traceback
         traceback.print_exc()
+        raise
 # ============================================
 # GRADIO INTERFACE
 # ============================================
+with gr.Blocks(theme=gr.themes.Soft(), title="Affecto MagicFace API") as demo:
+    gr.Markdown("# 🎭 Affecto - MagicFace Emotion Transformation")
+    gr.Markdown("Transform facial emotions using Action Units (AU)")
+    gr.Markdown("⚠️ **Note:** Currently using simplified model. Full MagicFace pipeline coming soon!")
     with gr.Tab("🖼️ Web Interface"):
         with gr.Row():
             with gr.Column():
+                input_image = gr.Image(type="pil", label="Upload Face Image (512x512 recommended)")
                 au_params_input = gr.Textbox(
                     label="AU Parameters (JSON)",
+                    value='{"AU6": 2.0, "AU12": 2.0}',
                     lines=3
                 )
                 transform_btn = gr.Button("✨ Transform", variant="primary", size="lg")
             with gr.Column():
                 output_image = gr.Image(type="pil", label="Transformed Result")
+        gr.Markdown("### 🎨 Emotion Presets (click to use):")
         gr.Examples(
             examples=[
+                ['{"AU6": 2.0, "AU12": 2.0}'],  # Happy
+                ['{"AU1": 2.0, "AU4": 2.0, "AU15": 2.0}'],  # Sad
+                ['{"AU4": 3.0, "AU5": 2.0, "AU7": 2.0}'],  # Angry
+                ['{"AU1": 3.0, "AU2": 2.0, "AU5": 3.0, "AU26": 2.0}'],  # Surprised
             ],
             inputs=[au_params_input],
+            label="Emotion Presets"
         )
         transform_btn.click(
     with gr.Tab("📡 API Documentation"):
         gr.Markdown("""
+        ## API Usage
+        ### Gradio API Endpoint
+        ```python
+        import requests
+        import base64
+        import json
+        # Prepare image
+        with open("face.jpg", "rb") as f:
+            image_base64 = base64.b64encode(f.read()).decode()
+        # Call API
+        response = requests.post(
+            "https://gauravvjhaa-affecto-inference.hf.space/api/predict",
+            json={
+                "data": [
+                    image_base64,
+                    '{"AU6": 2.0, "AU12": 2.0}'
+                ]
+            }
+        )
+        result = response.json()
+        result_image = result["data"][0]  # base64 string
         ```
+        ### Available Action Units:
+        - **AU1** (0): Inner Brow Raiser - Values: 0-4
+        - **AU2** (1): Outer Brow Raiser - Values: 0-4
+        - **AU4** (2): Brow Lowerer - Values: 0-4
+        - **AU5** (3): Upper Lid Raiser - Values: 0-4
+        - **AU6** (4): Cheek Raiser - Values: 0-4
+        - **AU9** (5): Nose Wrinkler - Values: 0-4
+        - **AU12** (6): Lip Corner Puller (Smile) - Values: 0-4
+        - **AU15** (7): Lip Corner Depressor - Values: 0-4
+        - **AU17** (8): Chin Raiser - Values: 0-4
+        - **AU20** (9): Lip Stretcher - Values: 0-4
+        - **AU25** (10): Lips Part - Values: 0-4
+        - **AU26** (11): Jaw Drop - Values: 0-4
+        ### Example Combinations:
+        - **Happy**: `{"AU6": 2, "AU12": 2}`
+        - **Sad**: `{"AU1": 2, "AU4": 2, "AU15": 2}`
+        - **Angry**: `{"AU4": 3, "AU5": 2, "AU7": 2}`
+        - **Surprised**: `{"AU1": 3, "AU2": 2, "AU5": 3, "AU26": 2}`
         """)
+print("✅ Affecto MagicFace API Ready!")
 print(f"🌐 Gradio UI: https://gauravvjhaa-affecto-inference.hf.space/")
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

magicface_model.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import torch
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+from diffusers import AutoencoderKL, UniPCMultistepScheduler
+from transformers import CLIPTextModel, CLIPTokenizer
+# We'll need to implement these custom UNet classes
+# For now, we'll use a simplified version
+class MagicFaceModel:
+    def __init__(self, device='cuda'):
+        self.device = device if torch.cuda.is_available() else 'cpu'
+        print(f"🖥️  Initializing MagicFace on: {self.device}")
+        # AU mapping (same as original)
+        self.ind_dict = {
+            'AU1':0, 'AU2':1, 'AU4':2, 'AU5':3, 'AU6':4, 'AU9':5,
+            'AU12':6, 'AU15':7, 'AU17':8, 'AU20':9, 'AU25':10, 'AU26':11
+        }
+        self.load_models()
+    def load_models(self):
+        """Load all required models"""
+        print("📥 Loading Stable Diffusion components...")
+        # Load VAE
+        self.vae = AutoencoderKL.from_pretrained(
+            'runwayml/stable-diffusion-v1-5',
+            subfolder="vae",
+            torch_dtype=torch.float16 if self.device == 'cuda' else torch.float32
+        ).to(self.device)
+        # Load Text Encoder
+        self.text_encoder = CLIPTextModel.from_pretrained(
+            'runwayml/stable-diffusion-v1-5',
+            subfolder="text_encoder",
+        ).to(self.device)
+        # Load Tokenizer
+        self.tokenizer = CLIPTokenizer.from_pretrained(
+            'runwayml/stable-diffusion-v1-5',
+            subfolder="tokenizer",
+        )
+        # TODO: Load custom UNets from mengtingwei/magicface
+        # For now, we'll use a simplified approach
+        print("⚠️  Using simplified model (custom UNets not yet loaded)")
+        self.vae.requires_grad_(False)
+        self.text_encoder.requires_grad_(False)
+        print("✅ Models loaded successfully")
+    def preprocess_image(self, image: Image.Image):
+        """Preprocess image for inference"""
+        transform = transforms.Compose([
+            transforms.Resize((512, 512)),
+            transforms.ToTensor(),
+        ])
+        return transform(image).unsqueeze(0).to(self.device)
+    def prepare_au_vector(self, au_params: dict):
+        """Convert AU parameters dict to numpy array"""
+        au_prompt = np.zeros((12,))
+        for au_name, value in au_params.items():
+            if au_name in self.ind_dict:
+                au_prompt[self.ind_dict[au_name]] = value
+        return torch.from_numpy(au_prompt).float().unsqueeze(0).to(self.device)
+    def tokenize_caption(self, caption: str):
+        """Tokenize text prompt"""
+        inputs = self.tokenizer(
+            caption,
+            max_length=self.tokenizer.model_max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt"
+        )
+        return inputs.input_ids.to(self.device)
+    @torch.no_grad()
+    def transform(self, image: Image.Image, au_params: dict):
+        """
+        Transform facial expression based on AU parameters
+        Args:
+            image: PIL Image (512x512)
+            au_params: dict like {"AU6": 1.0, "AU12": 1.0}
+        Returns:
+            PIL Image (transformed)
+        """
+        print(f"🎭 Transforming with AU params: {au_params}")
+        # Preprocess
+        source_tensor = self.preprocess_image(image)
+        au_vector = self.prepare_au_vector(au_params)
+        # Get text embeddings
+        prompt = "A close up of a person."
+        prompt_ids = self.tokenize_caption(prompt)
+        prompt_embeds = self.text_encoder(prompt_ids)[0]
+        # TODO: Implement full diffusion pipeline with custom UNets
+        # For now, return a simple transformation
+        print("⚠️  Using simplified transformation (full pipeline not yet implemented)")
+        # Placeholder: Apply simple brightness adjustment based on AUs
+        output_tensor = source_tensor.clone()
+        # AU12 (smile) - brighten
+        if "AU12" in au_params:
+            output_tensor = output_tensor * (1.0 + au_params["AU12"] * 0.3)
+        # AU4 (frown) - darken
+        if "AU4" in au_params:
+            output_tensor = output_tensor * (1.0 - au_params["AU4"] * 0.2)
+        output_tensor = torch.clamp(output_tensor, 0, 1)
+        # Convert back to PIL
+        output_np = output_tensor.squeeze(0).cpu().numpy().transpose(1, 2, 0)
+        output_np = (output_np * 255).astype(np.uint8)
+        result_image = Image.fromarray(output_np)
+        return result_image

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
 torch==2.0.1
 torchvision==0.15.2
 gradio==4.16.0
-fastapi==0.109.0
-uvicorn[standard]==0.27.0
 Pillow==10.2.0
 numpy==1.26.3
 huggingface-hub==0.20.3
-python-multipart==0.0.6

 torch==2.0.1
 torchvision==0.15.2
 gradio==4.16.0
+diffusers==0.21.4
+transformers==4.35.2
+accelerate==0.24.1
+safetensors==0.4.1
 Pillow==10.2.0
 numpy==1.26.3
 huggingface-hub==0.20.3