PSHuman

Running on Zero

App Files Files Community

painter3000 commited on 24 days ago

Commit

aa4e85e

verified ·

1 Parent(s): 66043e5

Update inference.py

Browse files

Files changed (1) hide show

inference.py +9 -49

inference.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import argparse
 import os
-import shutil
 from pathlib import Path
 from typing import Dict, Optional, List, Tuple
 from collections import defaultdict
@@ -36,10 +35,6 @@ session = new_session(providers=providers)
 weight_dtype = torch.float16
-# ============================================================
-# Config
-# ============================================================
 @dataclass
 class TestConfig:
     pretrained_model_name_or_path: str
@@ -63,16 +58,11 @@ class TestConfig:
     with_smpl: Optional[bool]
     recon_opt: Dict
-    # New two-stage fields
     run_mode: str = "full"  # full | generate | reconstruct
     multiview_tmp_dir: str = "./multiview"
     prefer_edited_views: bool = True
-# ============================================================
-# Image helpers
-# ============================================================
 def convert_to_numpy(tensor):
     return tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
@@ -92,10 +82,6 @@ def save_image_tensor(tensor, fp):
     return ndarr
-# ============================================================
-# Multiview storage helpers
-# ============================================================
 def ensure_dir(path: Path):
     path.mkdir(parents=True, exist_ok=True)
@@ -108,24 +94,20 @@ def save_multiview_scene(multiview_root: str, scene: str, colors: List[Image.Ima
     ensure_dir(raw_dir)
     ensure_dir(edit_dir)
-    # Clean previous files to avoid stale leftovers
     for folder in (raw_dir, edit_dir):
         for p in folder.glob("*"):
             if p.is_file():
                 p.unlink()
     for idx, img in enumerate(colors):
-        raw_color = raw_dir / f"color_{idx:02d}.png"
-        edit_color = edit_dir / f"color_{idx:02d}.png"
-        img.save(raw_color)
-        img.save(edit_color)
     for idx, img in enumerate(normals):
-        raw_normal = raw_dir / f"normal_{idx:02d}.png"
-        edit_normal = edit_dir / f"normal_{idx:02d}.png"
-        img.save(raw_normal)
-        img.save(edit_normal)
     meta = {
         "scene": scene,
         "num_colors": len(colors),
@@ -133,7 +115,6 @@ def save_multiview_scene(multiview_root: str, scene: str, colors: List[Image.Ima
         "source": "PSHuman two-stage inference",
     }
     with open(scene_dir / "meta.json", "w", encoding="utf-8") as f:
-        import json
         json.dump(meta, f, indent=2)
@@ -159,10 +140,6 @@ def load_multiview_scene(multiview_root: str, scene: str, prefer_edit=True) -> T
     return colors, normals
-# ============================================================
-# Pipeline helpers
-# ============================================================
 def load_pshuman_pipeline(cfg):
     pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
         cfg.pretrained_model_name_or_path,
@@ -174,25 +151,17 @@ def load_pshuman_pipeline(cfg):
     return pipeline
-def extract_scene_views_for_case(
-    batch,
-    out,
-    imgs_in,
-    i: int,
-    num_views: int,
-):
     normals_pred = out[: out.shape[0] // 2]
     images_pred = out[out.shape[0] // 2:]
     scene = batch['filename'][i].split('.')[0]
     normals, colors = [], []
     for j in range(num_views):
         idx = i * num_views + j
         normal = normals_pred[idx]
-        # Fix from original code: use scene-local first input image
         if j == 0:
             color = imgs_in[i * num_views].to(out.device)
         else:
@@ -214,35 +183,29 @@ def extract_scene_views_for_case(
         normals.append(normal)
-    # Preserve original PSHuman behavior
     if len(normals) >= 2:
         normals[0][:, :256, 256:512] = normals[-1]
-    # Original code keeps first 6 views only
     colors_pil = [remove(convert_to_pil(tensor), session=session) for tensor in colors[:6]]
     normals_pil = [remove(convert_to_pil(tensor), session=session) for tensor in normals[:6]]
     return scene, colors_pil, normals_pil
-# ============================================================
-# Main inference logic
-# ============================================================
 def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save_dir):
-    pipeline.set_progress_bar_config(disable=True)
     if cfg.seed is None:
         generator = None
     else:
-        generator = torch.Generator(device=pipeline.unet.device).manual_seed(cfg.seed)
     images_cond, pred_cat = [], defaultdict(list)
     for case_id, batch in tqdm(enumerate(dataloader)):
         images_cond.append(batch['imgs_in'][:, 0])
-        # Reconstruct-only path: skip diffusion, load saved views instead
         if cfg.run_mode == "reconstruct":
             scene = batch['filename'][0].split('.')[0]
             colors, normals = load_multiview_scene(
@@ -315,7 +278,6 @@ def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save
                     vis_ = make_grid(vis_, nrow=len(vis_), padding=0, value_range=(0, 1))
                     save_image_tensor(vis_, out_filename)
-                # concat mode is only for legacy visualization
                 continue
             elif cfg.save_mode == 'rgb':
@@ -332,7 +294,6 @@ def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save
                         save_multiview_scene(cfg.multiview_tmp_dir, scene, colors, normals)
                         continue
-                    # full mode: original one-pass behavior
                     pose = econdata.__getitem__(case_id)
                     carving.optimize_case(scene, pose, colors, normals)
                     torch.cuda.empty_cache()
@@ -342,7 +303,6 @@ def main(cfg: TestConfig):
     if cfg.seed is not None:
         set_seed(cfg.seed)
-    # Reconstruct mode does not need the diffusion pipeline at all
     pipeline = None if cfg.run_mode == "reconstruct" else load_pshuman_pipeline(cfg)
     if cfg.with_smpl:

 import argparse
 import os
 from pathlib import Path
 from typing import Dict, Optional, List, Tuple
 from collections import defaultdict
 weight_dtype = torch.float16
 @dataclass
 class TestConfig:
     pretrained_model_name_or_path: str
     with_smpl: Optional[bool]
     recon_opt: Dict
     run_mode: str = "full"  # full | generate | reconstruct
     multiview_tmp_dir: str = "./multiview"
     prefer_edited_views: bool = True
 def convert_to_numpy(tensor):
     return tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
     return ndarr
 def ensure_dir(path: Path):
     path.mkdir(parents=True, exist_ok=True)
     ensure_dir(raw_dir)
     ensure_dir(edit_dir)
     for folder in (raw_dir, edit_dir):
         for p in folder.glob("*"):
             if p.is_file():
                 p.unlink()
     for idx, img in enumerate(colors):
+        img.save(raw_dir / f"color_{idx:02d}.png")
+        img.save(edit_dir / f"color_{idx:02d}.png")
     for idx, img in enumerate(normals):
+        img.save(raw_dir / f"normal_{idx:02d}.png")
+        img.save(edit_dir / f"normal_{idx:02d}.png")
+    import json
     meta = {
         "scene": scene,
         "num_colors": len(colors),
         "source": "PSHuman two-stage inference",
     }
     with open(scene_dir / "meta.json", "w", encoding="utf-8") as f:
         json.dump(meta, f, indent=2)
     return colors, normals
 def load_pshuman_pipeline(cfg):
     pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
         cfg.pretrained_model_name_or_path,
     return pipeline
+def extract_scene_views_for_case(batch, out, imgs_in, i: int, num_views: int):
     normals_pred = out[: out.shape[0] // 2]
     images_pred = out[out.shape[0] // 2:]
     scene = batch['filename'][i].split('.')[0]
     normals, colors = [], []
     for j in range(num_views):
         idx = i * num_views + j
         normal = normals_pred[idx]
         if j == 0:
             color = imgs_in[i * num_views].to(out.device)
         else:
         normals.append(normal)
     if len(normals) >= 2:
         normals[0][:, :256, 256:512] = normals[-1]
     colors_pil = [remove(convert_to_pil(tensor), session=session) for tensor in colors[:6]]
     normals_pil = [remove(convert_to_pil(tensor), session=session) for tensor in normals[:6]]
     return scene, colors_pil, normals_pil
 def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save_dir):
+    if pipeline is not None:
+        pipeline.set_progress_bar_config(disable=True)
     if cfg.seed is None:
         generator = None
     else:
+        generator = torch.Generator(device='cuda' if torch.cuda.is_available() else 'cpu').manual_seed(cfg.seed)
     images_cond, pred_cat = [], defaultdict(list)
     for case_id, batch in tqdm(enumerate(dataloader)):
         images_cond.append(batch['imgs_in'][:, 0])
         if cfg.run_mode == "reconstruct":
             scene = batch['filename'][0].split('.')[0]
             colors, normals = load_multiview_scene(
                     vis_ = make_grid(vis_, nrow=len(vis_), padding=0, value_range=(0, 1))
                     save_image_tensor(vis_, out_filename)
                 continue
             elif cfg.save_mode == 'rgb':
                         save_multiview_scene(cfg.multiview_tmp_dir, scene, colors, normals)
                         continue
                     pose = econdata.__getitem__(case_id)
                     carving.optimize_case(scene, pose, colors, normals)
                     torch.cuda.empty_cache()
     if cfg.seed is not None:
         set_seed(cfg.seed)
     pipeline = None if cfg.run_mode == "reconstruct" else load_pshuman_pipeline(cfg)
     if cfg.with_smpl: