Spaces:

dqj5182
/

HACO

Running

App Files Files Community

HACO / demo.py

dqj5182

init

5732928 9 months ago

raw

history blame contribute delete

4.89 kB

	import os
	import cv2
	import torch
	import argparse
	import numpy as np
	from tqdm import tqdm

	import mediapipe as mp
	from mediapipe.tasks.python import vision
	from mediapipe.tasks.python import BaseOptions

	from lib.core.config import cfg, update_config
	from lib.models.model import HACO
	from lib.utils.human_models import mano
	from lib.utils.contact_utils import get_contact_thres
	from lib.utils.vis_utils import ContactRenderer, draw_landmarks_on_image
	from lib.utils.preprocessing import augmentation_contact
	from lib.utils.demo_utils import remove_small_contact_components


	parser = argparse.ArgumentParser(description='Demo HACO')
	parser.add_argument('--backbone', type=str, default='hamer', choices=['hamer', 'vit-l-16', 'vit-b-16', 'vit-s-16', 'handoccnet', 'hrnet-w48', 'hrnet-w32', 'resnet-152', 'resnet-101', 'resnet-50', 'resnet-34', 'resnet-18'], help='backbone model')
	parser.add_argument('--checkpoint', type=str, default='', help='model path for demo')
	parser.add_argument('--input_path', type=str, default='asset/example_images', help='image path for demo')
	args = parser.parse_args()


	# Set device as CUDA
	device = 'cuda' if torch.cuda.is_available() else 'cpu'


	# Initialize directories
	experiment_dir = 'experiments_demo_image'


	# Load config
	update_config(backbone_type=args.backbone, exp_dir=experiment_dir)


	# Initialize renderer
	contact_renderer = ContactRenderer()


	# Load demo images
	input_dir = args.input_path
	images = [f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]


	# Initialize MediaPipe HandLandmarker
	base_options = BaseOptions(model_asset_path=cfg.MODEL.hand_landmarker_path)
	hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=2)
	detector = vision.HandLandmarker.create_from_options(hand_options)


	############# Model #############
	model = HACO().to(device)
	model.eval()
	############# Model #############


	# Load model checkpoint if provided
	if args.checkpoint:
	checkpoint = torch.load(args.checkpoint, map_location=device)
	model.load_state_dict(checkpoint['state_dict'])


	############################### Demo Loop ###############################
	for i, frame_name in tqdm(enumerate(images), total=len(images)):
	print(f"Processing: {frame_name}")

	# Load and convert image
	frame_path = os.path.join(input_dir, frame_name)
	frame = cv2.imread(frame_path)
	orig_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frame_name_base = os.path.splitext(frame_name)[0]

	# Hand landmark detection
	mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=orig_img.copy())
	detection_result = detector.detect(mp_image)
	annotated_image, right_hand_bbox = draw_landmarks_on_image(orig_img.copy(), detection_result)

	if right_hand_bbox is None:
	print(f"Skipping {frame_name} - no hand detected.")
	continue

	print(f"Frame {i}: Right hand bbox: {right_hand_bbox}")

	# Image preprocessing
	crop_img, img2bb_trans, bb2img_trans, rot, do_flip, color_scale = augmentation_contact(orig_img.copy(), right_hand_bbox, 'test', enforce_flip=False)

	# Convert to model input format
	if args.backbone in ['handoccnet'] or 'resnet' in cfg.MODEL.backbone_type or 'hrnet' in cfg.MODEL.backbone_type:
	from torchvision import transforms
	img_tensor = transforms.ToTensor()(crop_img.astype(np.float32) / 255.0)
	elif args.backbone in ['hamer'] or 'vit' in cfg.MODEL.backbone_type:
	from torchvision.transforms import Normalize
	normalize = Normalize(mean=cfg.MODEL.img_mean, std=cfg.MODEL.img_std)
	img_tensor = crop_img.transpose(2, 0, 1) / 255.0
	img_tensor = normalize(torch.from_numpy(img_tensor)).float()
	else:
	raise NotImplementedError(f"Unsupported backbone: {args.backbone}")

	############# Run model #############
	with torch.no_grad():
	outputs = model({'input': {'image': img_tensor[None].to(device)}}, mode="test")
	############# Run model #############

	# Save result
	os.makedirs('outputs', exist_ok=True)
	os.makedirs('outputs/detection', exist_ok=True)
	os.makedirs('outputs/crop_img', exist_ok=True)
	os.makedirs('outputs/contact', exist_ok=True)

	cv2.imwrite(f'outputs/detection/{frame_name_base}.png', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
	cv2.imwrite(f'outputs/crop_img/{frame_name_base}.png', crop_img[..., ::-1])

	eval_thres = get_contact_thres(args.backbone)
	contact_mask = (outputs['contact_out'][0] > eval_thres).detach().cpu().numpy()
	contact_mask = remove_small_contact_components(contact_mask, faces=mano.watertight_face['right'], min_size=20)
	contact_rendered = contact_renderer.render_contact(crop_img[..., ::-1], contact_mask)
	cv2.imwrite(f'outputs/contact/{frame_name_base}.png', contact_rendered)
	############################### Demo Loop ###############################