import torch.nn as nn from ..utils import log import comfy.model_management as mm from comfy.utils import ProgressBar from tqdm import tqdm def update_transformer(transformer, state_dict): concat_dim = 4 transformer.dwpose_embedding = nn.Sequential( nn.Conv3d(3, concat_dim * 4, (3,3,3), stride=(1,1,1), padding=(1,1,1)), nn.SiLU(), nn.Conv3d(concat_dim * 4, concat_dim * 4, (3,3,3), stride=(1,1,1), padding=(1,1,1)), nn.SiLU(), nn.Conv3d(concat_dim * 4, concat_dim * 4, (3,3,3), stride=(1,1,1), padding=(1,1,1)), nn.SiLU(), nn.Conv3d(concat_dim * 4, concat_dim * 4, (3,3,3), stride=(1,2,2), padding=(1,1,1)), nn.SiLU(), nn.Conv3d(concat_dim * 4, concat_dim * 4, 3, stride=(2,2,2), padding=1), nn.SiLU(), nn.Conv3d(concat_dim * 4, concat_dim * 4, 3, stride=(2,2,2), padding=1), nn.SiLU(), nn.Conv3d(concat_dim * 4, 5120, (1,2,2), stride=(1,2,2), padding=0)) randomref_dim = 20 transformer.randomref_embedding_pose = nn.Sequential( nn.Conv2d(3, concat_dim * 4, 3, stride=1, padding=1), nn.SiLU(), nn.Conv2d(concat_dim * 4, concat_dim * 4, 3, stride=1, padding=1), nn.SiLU(), nn.Conv2d(concat_dim * 4, concat_dim * 4, 3, stride=1, padding=1), nn.SiLU(), nn.Conv2d(concat_dim * 4, concat_dim * 4, 3, stride=2, padding=1), nn.SiLU(), nn.Conv2d(concat_dim * 4, concat_dim * 4, 3, stride=2, padding=1), nn.SiLU(), nn.Conv2d(concat_dim * 4, randomref_dim, 3, stride=2, padding=1), ) state_dict_new = {} for key in list(state_dict.keys()): if "dwpose_embedding" in key: state_dict_new[key.split("dwpose_embedding.")[1]] = state_dict.pop(key) transformer.dwpose_embedding.load_state_dict(state_dict_new, strict=True) state_dict_new = {} for key in list(state_dict.keys()): if "randomref_embedding_pose" in key: state_dict_new[key.split("randomref_embedding_pose.")[1]] = state_dict.pop(key) transformer.randomref_embedding_pose.load_state_dict(state_dict_new,strict=True) return transformer # Openpose # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose # 2nd Edited by https://github.com/Hzzone/pytorch-openpose # 3rd Edited by ControlNet # 4th Edited by ControlNet (added face and correct hands) import os import torch import numpy as np import copy import torch import numpy as np import math from .dwpose.wholebody import Wholebody def smoothing_factor(t_e, cutoff): r = 2 * math.pi * cutoff * t_e return r / (r + 1) def exponential_smoothing(a, x, x_prev): return a * x + (1 - a) * x_prev class OneEuroFilter: def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0, d_cutoff=1.0): """Initialize the one euro filter.""" # The parameters. self.min_cutoff = float(min_cutoff) self.beta = float(beta) self.d_cutoff = float(d_cutoff) # Previous values. self.x_prev = x0 self.dx_prev = float(dx0) self.t_prev = float(t0) def __call__(self, t, x): """Compute the filtered signal.""" t_e = t - self.t_prev # The filtered derivative of the signal. a_d = smoothing_factor(t_e, self.d_cutoff) dx = (x - self.x_prev) / t_e dx_hat = exponential_smoothing(a_d, dx, self.dx_prev) # The filtered signal. cutoff = self.min_cutoff + self.beta * abs(dx_hat) a = smoothing_factor(t_e, cutoff) x_hat = exponential_smoothing(a, x, self.x_prev) # Memorize the previous values. self.x_prev = x_hat self.dx_prev = dx_hat self.t_prev = t return x_hat class DWposeDetector: def __init__(self, model_det, model_pose): self.pose_estimation = Wholebody(model_det, model_pose) def __call__(self, oriImg, score_threshold=0.3): oriImg = oriImg.copy() H, W, C = oriImg.shape with torch.no_grad(): candidate, subset = self.pose_estimation(oriImg) candidate = candidate[0][np.newaxis, :, :] subset = subset[0][np.newaxis, :] nums, keys, locs = candidate.shape candidate[..., 0] /= float(W) candidate[..., 1] /= float(H) body = candidate[:,:18].copy() body = body.reshape(nums*18, locs) score = subset[:,:18].copy() for i in range(len(score)): for j in range(len(score[i])): if score[i][j] > score_threshold: score[i][j] = int(18*i+j) else: score[i][j] = -1 un_visible = subset score_threshold: bodyfoot_score[i][j] = int(18*i+j) else: bodyfoot_score[i][j] = -1 if -1 not in bodyfoot_score[:,18] and -1 not in bodyfoot_score[:,19]: bodyfoot_score[:,18] = np.array([18.]) else: bodyfoot_score[:,18] = np.array([-1.]) if -1 not in bodyfoot_score[:,21] and -1 not in bodyfoot_score[:,22]: bodyfoot_score[:,19] = np.array([19.]) else: bodyfoot_score[:,19] = np.array([-1.]) bodyfoot_score = bodyfoot_score[:, :20] bodyfoot = candidate[:,:24].copy() for i in range(nums): if -1 not in bodyfoot[i][18] and -1 not in bodyfoot[i][19]: bodyfoot[i][18] = (bodyfoot[i][18]+bodyfoot[i][19])/2 else: bodyfoot[i][18] = np.array([-1., -1.]) if -1 not in bodyfoot[i][21] and -1 not in bodyfoot[i][22]: bodyfoot[i][19] = (bodyfoot[i][21]+bodyfoot[i][22])/2 else: bodyfoot[i][19] = np.array([-1., -1.]) bodyfoot = bodyfoot[:,:20,:] bodyfoot = bodyfoot.reshape(nums*20, locs) foot = candidate[:,18:24] faces = candidate[:,24:92] hands = candidate[:,92:113] hands = np.vstack([hands, candidate[:,113:]]) # bodies = dict(candidate=body, subset=score) bodies = dict(candidate=bodyfoot, subset=bodyfoot_score, score=bodyfoot_score) pose = dict(bodies=bodies, hands=hands, faces=faces) # return draw_pose(pose, H, W) return pose def draw_pose(pose, H, W, stick_width=4,draw_body=True, draw_hands=True, draw_feet=True, body_keypoint_size=4, hand_keypoint_size=4, draw_head=True): from .dwpose.util import draw_body_and_foot, draw_handpose, draw_facepose bodies = pose['bodies'] faces = pose['faces'] hands = pose['hands'] candidate = bodies['candidate'] subset = bodies['subset'] score=bodies['score'] canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8) canvas = draw_body_and_foot(canvas, candidate, subset, score, draw_body=draw_body, stick_width=stick_width, draw_feet=draw_feet, draw_head=draw_head, body_keypoint_size=body_keypoint_size) canvas = draw_handpose(canvas, hands, draw_hands=draw_hands, hand_keypoint_size=hand_keypoint_size) canvas_without_face = copy.deepcopy(canvas) canvas = draw_facepose(canvas, faces) return canvas_without_face, canvas def pose_extract(pose_images, ref_image, dwpose_model, height, width, score_threshold, stick_width, draw_body=True, draw_hands=True, hand_keypoint_size=4, draw_feet=True, body_keypoint_size=4, handle_not_detected="repeat", draw_head=True): results_vis = [] comfy_pbar = ProgressBar(len(pose_images)) if ref_image is not None: try: pose_ref = dwpose_model(ref_image.squeeze(0), score_threshold=score_threshold) except: raise ValueError("No pose detected in reference image") prev_pose = None for img in tqdm(pose_images, desc="Pose Extraction", unit="image", total=len(pose_images)): try: pose = dwpose_model(img, score_threshold=score_threshold) if handle_not_detected == "repeat": prev_pose = pose except: if prev_pose is not None: pose = prev_pose else: pose = np.zeros_like(img) results_vis.append(pose) comfy_pbar.update(1) bodies = results_vis[0]['bodies'] faces = results_vis[0]['faces'] hands = results_vis[0]['hands'] candidate = bodies['candidate'] if ref_image is not None: ref_bodies = pose_ref['bodies'] ref_faces = pose_ref['faces'] ref_hands = pose_ref['hands'] ref_candidate = ref_bodies['candidate'] ref_2_x = ref_candidate[2][0] ref_2_y = ref_candidate[2][1] ref_5_x = ref_candidate[5][0] ref_5_y = ref_candidate[5][1] ref_8_x = ref_candidate[8][0] ref_8_y = ref_candidate[8][1] ref_11_x = ref_candidate[11][0] ref_11_y = ref_candidate[11][1] ref_center1 = 0.5*(ref_candidate[2]+ref_candidate[5]) ref_center2 = 0.5*(ref_candidate[8]+ref_candidate[11]) zero_2_x = candidate[2][0] zero_2_y = candidate[2][1] zero_5_x = candidate[5][0] zero_5_y = candidate[5][1] zero_8_x = candidate[8][0] zero_8_y = candidate[8][1] zero_11_x = candidate[11][0] zero_11_y = candidate[11][1] zero_center1 = 0.5*(candidate[2]+candidate[5]) zero_center2 = 0.5*(candidate[8]+candidate[11]) x_ratio = (ref_5_x-ref_2_x)/(zero_5_x-zero_2_x) y_ratio = (ref_center2[1]-ref_center1[1])/(zero_center2[1]-zero_center1[1]) results_vis[0]['bodies']['candidate'][:,0] *= x_ratio results_vis[0]['bodies']['candidate'][:,1] *= y_ratio results_vis[0]['faces'][:,:,0] *= x_ratio results_vis[0]['faces'][:,:,1] *= y_ratio results_vis[0]['hands'][:,:,0] *= x_ratio results_vis[0]['hands'][:,:,1] *= y_ratio ########neck######## l_neck_ref = ((ref_candidate[0][0] - ref_candidate[1][0]) ** 2 + (ref_candidate[0][1] - ref_candidate[1][1]) ** 2) ** 0.5 l_neck_0 = ((candidate[0][0] - candidate[1][0]) ** 2 + (candidate[0][1] - candidate[1][1]) ** 2) ** 0.5 neck_ratio = l_neck_ref / l_neck_0 x_offset_neck = (candidate[1][0]-candidate[0][0])*(1.-neck_ratio) y_offset_neck = (candidate[1][1]-candidate[0][1])*(1.-neck_ratio) results_vis[0]['bodies']['candidate'][0,0] += x_offset_neck results_vis[0]['bodies']['candidate'][0,1] += y_offset_neck results_vis[0]['bodies']['candidate'][14,0] += x_offset_neck results_vis[0]['bodies']['candidate'][14,1] += y_offset_neck results_vis[0]['bodies']['candidate'][15,0] += x_offset_neck results_vis[0]['bodies']['candidate'][15,1] += y_offset_neck results_vis[0]['bodies']['candidate'][16,0] += x_offset_neck results_vis[0]['bodies']['candidate'][16,1] += y_offset_neck results_vis[0]['bodies']['candidate'][17,0] += x_offset_neck results_vis[0]['bodies']['candidate'][17,1] += y_offset_neck ########shoulder2######## l_shoulder2_ref = ((ref_candidate[2][0] - ref_candidate[1][0]) ** 2 + (ref_candidate[2][1] - ref_candidate[1][1]) ** 2) ** 0.5 l_shoulder2_0 = ((candidate[2][0] - candidate[1][0]) ** 2 + (candidate[2][1] - candidate[1][1]) ** 2) ** 0.5 shoulder2_ratio = l_shoulder2_ref / l_shoulder2_0 x_offset_shoulder2 = (candidate[1][0]-candidate[2][0])*(1.-shoulder2_ratio) y_offset_shoulder2 = (candidate[1][1]-candidate[2][1])*(1.-shoulder2_ratio) results_vis[0]['bodies']['candidate'][2,0] += x_offset_shoulder2 results_vis[0]['bodies']['candidate'][2,1] += y_offset_shoulder2 results_vis[0]['bodies']['candidate'][3,0] += x_offset_shoulder2 results_vis[0]['bodies']['candidate'][3,1] += y_offset_shoulder2 results_vis[0]['bodies']['candidate'][4,0] += x_offset_shoulder2 results_vis[0]['bodies']['candidate'][4,1] += y_offset_shoulder2 results_vis[0]['hands'][1,:,0] += x_offset_shoulder2 results_vis[0]['hands'][1,:,1] += y_offset_shoulder2 ########shoulder5######## l_shoulder5_ref = ((ref_candidate[5][0] - ref_candidate[1][0]) ** 2 + (ref_candidate[5][1] - ref_candidate[1][1]) ** 2) ** 0.5 l_shoulder5_0 = ((candidate[5][0] - candidate[1][0]) ** 2 + (candidate[5][1] - candidate[1][1]) ** 2) ** 0.5 shoulder5_ratio = l_shoulder5_ref / l_shoulder5_0 x_offset_shoulder5 = (candidate[1][0]-candidate[5][0])*(1.-shoulder5_ratio) y_offset_shoulder5 = (candidate[1][1]-candidate[5][1])*(1.-shoulder5_ratio) results_vis[0]['bodies']['candidate'][5,0] += x_offset_shoulder5 results_vis[0]['bodies']['candidate'][5,1] += y_offset_shoulder5 results_vis[0]['bodies']['candidate'][6,0] += x_offset_shoulder5 results_vis[0]['bodies']['candidate'][6,1] += y_offset_shoulder5 results_vis[0]['bodies']['candidate'][7,0] += x_offset_shoulder5 results_vis[0]['bodies']['candidate'][7,1] += y_offset_shoulder5 results_vis[0]['hands'][0,:,0] += x_offset_shoulder5 results_vis[0]['hands'][0,:,1] += y_offset_shoulder5 ########arm3######## l_arm3_ref = ((ref_candidate[3][0] - ref_candidate[2][0]) ** 2 + (ref_candidate[3][1] - ref_candidate[2][1]) ** 2) ** 0.5 l_arm3_0 = ((candidate[3][0] - candidate[2][0]) ** 2 + (candidate[3][1] - candidate[2][1]) ** 2) ** 0.5 arm3_ratio = l_arm3_ref / l_arm3_0 x_offset_arm3 = (candidate[2][0]-candidate[3][0])*(1.-arm3_ratio) y_offset_arm3 = (candidate[2][1]-candidate[3][1])*(1.-arm3_ratio) results_vis[0]['bodies']['candidate'][3,0] += x_offset_arm3 results_vis[0]['bodies']['candidate'][3,1] += y_offset_arm3 results_vis[0]['bodies']['candidate'][4,0] += x_offset_arm3 results_vis[0]['bodies']['candidate'][4,1] += y_offset_arm3 results_vis[0]['hands'][1,:,0] += x_offset_arm3 results_vis[0]['hands'][1,:,1] += y_offset_arm3 ########arm4######## l_arm4_ref = ((ref_candidate[4][0] - ref_candidate[3][0]) ** 2 + (ref_candidate[4][1] - ref_candidate[3][1]) ** 2) ** 0.5 l_arm4_0 = ((candidate[4][0] - candidate[3][0]) ** 2 + (candidate[4][1] - candidate[3][1]) ** 2) ** 0.5 arm4_ratio = l_arm4_ref / l_arm4_0 x_offset_arm4 = (candidate[3][0]-candidate[4][0])*(1.-arm4_ratio) y_offset_arm4 = (candidate[3][1]-candidate[4][1])*(1.-arm4_ratio) results_vis[0]['bodies']['candidate'][4,0] += x_offset_arm4 results_vis[0]['bodies']['candidate'][4,1] += y_offset_arm4 results_vis[0]['hands'][1,:,0] += x_offset_arm4 results_vis[0]['hands'][1,:,1] += y_offset_arm4 ########arm6######## l_arm6_ref = ((ref_candidate[6][0] - ref_candidate[5][0]) ** 2 + (ref_candidate[6][1] - ref_candidate[5][1]) ** 2) ** 0.5 l_arm6_0 = ((candidate[6][0] - candidate[5][0]) ** 2 + (candidate[6][1] - candidate[5][1]) ** 2) ** 0.5 arm6_ratio = l_arm6_ref / l_arm6_0 x_offset_arm6 = (candidate[5][0]-candidate[6][0])*(1.-arm6_ratio) y_offset_arm6 = (candidate[5][1]-candidate[6][1])*(1.-arm6_ratio) results_vis[0]['bodies']['candidate'][6,0] += x_offset_arm6 results_vis[0]['bodies']['candidate'][6,1] += y_offset_arm6 results_vis[0]['bodies']['candidate'][7,0] += x_offset_arm6 results_vis[0]['bodies']['candidate'][7,1] += y_offset_arm6 results_vis[0]['hands'][0,:,0] += x_offset_arm6 results_vis[0]['hands'][0,:,1] += y_offset_arm6 ########arm7######## l_arm7_ref = ((ref_candidate[7][0] - ref_candidate[6][0]) ** 2 + (ref_candidate[7][1] - ref_candidate[6][1]) ** 2) ** 0.5 l_arm7_0 = ((candidate[7][0] - candidate[6][0]) ** 2 + (candidate[7][1] - candidate[6][1]) ** 2) ** 0.5 arm7_ratio = l_arm7_ref / l_arm7_0 x_offset_arm7 = (candidate[6][0]-candidate[7][0])*(1.-arm7_ratio) y_offset_arm7 = (candidate[6][1]-candidate[7][1])*(1.-arm7_ratio) results_vis[0]['bodies']['candidate'][7,0] += x_offset_arm7 results_vis[0]['bodies']['candidate'][7,1] += y_offset_arm7 results_vis[0]['hands'][0,:,0] += x_offset_arm7 results_vis[0]['hands'][0,:,1] += y_offset_arm7 ########head14######## l_head14_ref = ((ref_candidate[14][0] - ref_candidate[0][0]) ** 2 + (ref_candidate[14][1] - ref_candidate[0][1]) ** 2) ** 0.5 l_head14_0 = ((candidate[14][0] - candidate[0][0]) ** 2 + (candidate[14][1] - candidate[0][1]) ** 2) ** 0.5 head14_ratio = l_head14_ref / l_head14_0 x_offset_head14 = (candidate[0][0]-candidate[14][0])*(1.-head14_ratio) y_offset_head14 = (candidate[0][1]-candidate[14][1])*(1.-head14_ratio) results_vis[0]['bodies']['candidate'][14,0] += x_offset_head14 results_vis[0]['bodies']['candidate'][14,1] += y_offset_head14 results_vis[0]['bodies']['candidate'][16,0] += x_offset_head14 results_vis[0]['bodies']['candidate'][16,1] += y_offset_head14 ########head15######## l_head15_ref = ((ref_candidate[15][0] - ref_candidate[0][0]) ** 2 + (ref_candidate[15][1] - ref_candidate[0][1]) ** 2) ** 0.5 l_head15_0 = ((candidate[15][0] - candidate[0][0]) ** 2 + (candidate[15][1] - candidate[0][1]) ** 2) ** 0.5 head15_ratio = l_head15_ref / l_head15_0 x_offset_head15 = (candidate[0][0]-candidate[15][0])*(1.-head15_ratio) y_offset_head15 = (candidate[0][1]-candidate[15][1])*(1.-head15_ratio) results_vis[0]['bodies']['candidate'][15,0] += x_offset_head15 results_vis[0]['bodies']['candidate'][15,1] += y_offset_head15 results_vis[0]['bodies']['candidate'][17,0] += x_offset_head15 results_vis[0]['bodies']['candidate'][17,1] += y_offset_head15 ########head16######## l_head16_ref = ((ref_candidate[16][0] - ref_candidate[14][0]) ** 2 + (ref_candidate[16][1] - ref_candidate[14][1]) ** 2) ** 0.5 l_head16_0 = ((candidate[16][0] - candidate[14][0]) ** 2 + (candidate[16][1] - candidate[14][1]) ** 2) ** 0.5 head16_ratio = l_head16_ref / l_head16_0 x_offset_head16 = (candidate[14][0]-candidate[16][0])*(1.-head16_ratio) y_offset_head16 = (candidate[14][1]-candidate[16][1])*(1.-head16_ratio) results_vis[0]['bodies']['candidate'][16,0] += x_offset_head16 results_vis[0]['bodies']['candidate'][16,1] += y_offset_head16 ########head17######## l_head17_ref = ((ref_candidate[17][0] - ref_candidate[15][0]) ** 2 + (ref_candidate[17][1] - ref_candidate[15][1]) ** 2) ** 0.5 l_head17_0 = ((candidate[17][0] - candidate[15][0]) ** 2 + (candidate[17][1] - candidate[15][1]) ** 2) ** 0.5 head17_ratio = l_head17_ref / l_head17_0 x_offset_head17 = (candidate[15][0]-candidate[17][0])*(1.-head17_ratio) y_offset_head17 = (candidate[15][1]-candidate[17][1])*(1.-head17_ratio) results_vis[0]['bodies']['candidate'][17,0] += x_offset_head17 results_vis[0]['bodies']['candidate'][17,1] += y_offset_head17 ########MovingAverage######## ########left leg######## l_ll1_ref = ((ref_candidate[8][0] - ref_candidate[9][0]) ** 2 + (ref_candidate[8][1] - ref_candidate[9][1]) ** 2) ** 0.5 l_ll1_0 = ((candidate[8][0] - candidate[9][0]) ** 2 + (candidate[8][1] - candidate[9][1]) ** 2) ** 0.5 ll1_ratio = l_ll1_ref / l_ll1_0 x_offset_ll1 = (candidate[9][0]-candidate[8][0])*(ll1_ratio-1.) y_offset_ll1 = (candidate[9][1]-candidate[8][1])*(ll1_ratio-1.) results_vis[0]['bodies']['candidate'][9,0] += x_offset_ll1 results_vis[0]['bodies']['candidate'][9,1] += y_offset_ll1 results_vis[0]['bodies']['candidate'][10,0] += x_offset_ll1 results_vis[0]['bodies']['candidate'][10,1] += y_offset_ll1 results_vis[0]['bodies']['candidate'][19,0] += x_offset_ll1 results_vis[0]['bodies']['candidate'][19,1] += y_offset_ll1 l_ll2_ref = ((ref_candidate[9][0] - ref_candidate[10][0]) ** 2 + (ref_candidate[9][1] - ref_candidate[10][1]) ** 2) ** 0.5 l_ll2_0 = ((candidate[9][0] - candidate[10][0]) ** 2 + (candidate[9][1] - candidate[10][1]) ** 2) ** 0.5 ll2_ratio = l_ll2_ref / l_ll2_0 x_offset_ll2 = (candidate[10][0]-candidate[9][0])*(ll2_ratio-1.) y_offset_ll2 = (candidate[10][1]-candidate[9][1])*(ll2_ratio-1.) results_vis[0]['bodies']['candidate'][10,0] += x_offset_ll2 results_vis[0]['bodies']['candidate'][10,1] += y_offset_ll2 results_vis[0]['bodies']['candidate'][19,0] += x_offset_ll2 results_vis[0]['bodies']['candidate'][19,1] += y_offset_ll2 ########right leg######## l_rl1_ref = ((ref_candidate[11][0] - ref_candidate[12][0]) ** 2 + (ref_candidate[11][1] - ref_candidate[12][1]) ** 2) ** 0.5 l_rl1_0 = ((candidate[11][0] - candidate[12][0]) ** 2 + (candidate[11][1] - candidate[12][1]) ** 2) ** 0.5 rl1_ratio = l_rl1_ref / l_rl1_0 x_offset_rl1 = (candidate[12][0]-candidate[11][0])*(rl1_ratio-1.) y_offset_rl1 = (candidate[12][1]-candidate[11][1])*(rl1_ratio-1.) results_vis[0]['bodies']['candidate'][12,0] += x_offset_rl1 results_vis[0]['bodies']['candidate'][12,1] += y_offset_rl1 results_vis[0]['bodies']['candidate'][13,0] += x_offset_rl1 results_vis[0]['bodies']['candidate'][13,1] += y_offset_rl1 results_vis[0]['bodies']['candidate'][18,0] += x_offset_rl1 results_vis[0]['bodies']['candidate'][18,1] += y_offset_rl1 l_rl2_ref = ((ref_candidate[12][0] - ref_candidate[13][0]) ** 2 + (ref_candidate[12][1] - ref_candidate[13][1]) ** 2) ** 0.5 l_rl2_0 = ((candidate[12][0] - candidate[13][0]) ** 2 + (candidate[12][1] - candidate[13][1]) ** 2) ** 0.5 rl2_ratio = l_rl2_ref / l_rl2_0 x_offset_rl2 = (candidate[13][0]-candidate[12][0])*(rl2_ratio-1.) y_offset_rl2 = (candidate[13][1]-candidate[12][1])*(rl2_ratio-1.) results_vis[0]['bodies']['candidate'][13,0] += x_offset_rl2 results_vis[0]['bodies']['candidate'][13,1] += y_offset_rl2 results_vis[0]['bodies']['candidate'][18,0] += x_offset_rl2 results_vis[0]['bodies']['candidate'][18,1] += y_offset_rl2 offset = ref_candidate[1] - results_vis[0]['bodies']['candidate'][1] results_vis[0]['bodies']['candidate'] += offset[np.newaxis, :] results_vis[0]['faces'] += offset[np.newaxis, np.newaxis, :] results_vis[0]['hands'] += offset[np.newaxis, np.newaxis, :] for i in range(1, len(results_vis)): results_vis[i]['bodies']['candidate'][:,0] *= x_ratio results_vis[i]['bodies']['candidate'][:,1] *= y_ratio results_vis[i]['faces'][:,:,0] *= x_ratio results_vis[i]['faces'][:,:,1] *= y_ratio results_vis[i]['hands'][:,:,0] *= x_ratio results_vis[i]['hands'][:,:,1] *= y_ratio ########neck######## x_offset_neck = (results_vis[i]['bodies']['candidate'][1][0]-results_vis[i]['bodies']['candidate'][0][0])*(1.-neck_ratio) y_offset_neck = (results_vis[i]['bodies']['candidate'][1][1]-results_vis[i]['bodies']['candidate'][0][1])*(1.-neck_ratio) results_vis[i]['bodies']['candidate'][0,0] += x_offset_neck results_vis[i]['bodies']['candidate'][0,1] += y_offset_neck results_vis[i]['bodies']['candidate'][14,0] += x_offset_neck results_vis[i]['bodies']['candidate'][14,1] += y_offset_neck results_vis[i]['bodies']['candidate'][15,0] += x_offset_neck results_vis[i]['bodies']['candidate'][15,1] += y_offset_neck results_vis[i]['bodies']['candidate'][16,0] += x_offset_neck results_vis[i]['bodies']['candidate'][16,1] += y_offset_neck results_vis[i]['bodies']['candidate'][17,0] += x_offset_neck results_vis[i]['bodies']['candidate'][17,1] += y_offset_neck ########shoulder2######## x_offset_shoulder2 = (results_vis[i]['bodies']['candidate'][1][0]-results_vis[i]['bodies']['candidate'][2][0])*(1.-shoulder2_ratio) y_offset_shoulder2 = (results_vis[i]['bodies']['candidate'][1][1]-results_vis[i]['bodies']['candidate'][2][1])*(1.-shoulder2_ratio) results_vis[i]['bodies']['candidate'][2,0] += x_offset_shoulder2 results_vis[i]['bodies']['candidate'][2,1] += y_offset_shoulder2 results_vis[i]['bodies']['candidate'][3,0] += x_offset_shoulder2 results_vis[i]['bodies']['candidate'][3,1] += y_offset_shoulder2 results_vis[i]['bodies']['candidate'][4,0] += x_offset_shoulder2 results_vis[i]['bodies']['candidate'][4,1] += y_offset_shoulder2 results_vis[i]['hands'][1,:,0] += x_offset_shoulder2 results_vis[i]['hands'][1,:,1] += y_offset_shoulder2 ########shoulder5######## x_offset_shoulder5 = (results_vis[i]['bodies']['candidate'][1][0]-results_vis[i]['bodies']['candidate'][5][0])*(1.-shoulder5_ratio) y_offset_shoulder5 = (results_vis[i]['bodies']['candidate'][1][1]-results_vis[i]['bodies']['candidate'][5][1])*(1.-shoulder5_ratio) results_vis[i]['bodies']['candidate'][5,0] += x_offset_shoulder5 results_vis[i]['bodies']['candidate'][5,1] += y_offset_shoulder5 results_vis[i]['bodies']['candidate'][6,0] += x_offset_shoulder5 results_vis[i]['bodies']['candidate'][6,1] += y_offset_shoulder5 results_vis[i]['bodies']['candidate'][7,0] += x_offset_shoulder5 results_vis[i]['bodies']['candidate'][7,1] += y_offset_shoulder5 results_vis[i]['hands'][0,:,0] += x_offset_shoulder5 results_vis[i]['hands'][0,:,1] += y_offset_shoulder5 ########arm3######## x_offset_arm3 = (results_vis[i]['bodies']['candidate'][2][0]-results_vis[i]['bodies']['candidate'][3][0])*(1.-arm3_ratio) y_offset_arm3 = (results_vis[i]['bodies']['candidate'][2][1]-results_vis[i]['bodies']['candidate'][3][1])*(1.-arm3_ratio) results_vis[i]['bodies']['candidate'][3,0] += x_offset_arm3 results_vis[i]['bodies']['candidate'][3,1] += y_offset_arm3 results_vis[i]['bodies']['candidate'][4,0] += x_offset_arm3 results_vis[i]['bodies']['candidate'][4,1] += y_offset_arm3 results_vis[i]['hands'][1,:,0] += x_offset_arm3 results_vis[i]['hands'][1,:,1] += y_offset_arm3 ########arm4######## x_offset_arm4 = (results_vis[i]['bodies']['candidate'][3][0]-results_vis[i]['bodies']['candidate'][4][0])*(1.-arm4_ratio) y_offset_arm4 = (results_vis[i]['bodies']['candidate'][3][1]-results_vis[i]['bodies']['candidate'][4][1])*(1.-arm4_ratio) results_vis[i]['bodies']['candidate'][4,0] += x_offset_arm4 results_vis[i]['bodies']['candidate'][4,1] += y_offset_arm4 results_vis[i]['hands'][1,:,0] += x_offset_arm4 results_vis[i]['hands'][1,:,1] += y_offset_arm4 ########arm6######## x_offset_arm6 = (results_vis[i]['bodies']['candidate'][5][0]-results_vis[i]['bodies']['candidate'][6][0])*(1.-arm6_ratio) y_offset_arm6 = (results_vis[i]['bodies']['candidate'][5][1]-results_vis[i]['bodies']['candidate'][6][1])*(1.-arm6_ratio) results_vis[i]['bodies']['candidate'][6,0] += x_offset_arm6 results_vis[i]['bodies']['candidate'][6,1] += y_offset_arm6 results_vis[i]['bodies']['candidate'][7,0] += x_offset_arm6 results_vis[i]['bodies']['candidate'][7,1] += y_offset_arm6 results_vis[i]['hands'][0,:,0] += x_offset_arm6 results_vis[i]['hands'][0,:,1] += y_offset_arm6 ########arm7######## x_offset_arm7 = (results_vis[i]['bodies']['candidate'][6][0]-results_vis[i]['bodies']['candidate'][7][0])*(1.-arm7_ratio) y_offset_arm7 = (results_vis[i]['bodies']['candidate'][6][1]-results_vis[i]['bodies']['candidate'][7][1])*(1.-arm7_ratio) results_vis[i]['bodies']['candidate'][7,0] += x_offset_arm7 results_vis[i]['bodies']['candidate'][7,1] += y_offset_arm7 results_vis[i]['hands'][0,:,0] += x_offset_arm7 results_vis[i]['hands'][0,:,1] += y_offset_arm7 ########head14######## x_offset_head14 = (results_vis[i]['bodies']['candidate'][0][0]-results_vis[i]['bodies']['candidate'][14][0])*(1.-head14_ratio) y_offset_head14 = (results_vis[i]['bodies']['candidate'][0][1]-results_vis[i]['bodies']['candidate'][14][1])*(1.-head14_ratio) results_vis[i]['bodies']['candidate'][14,0] += x_offset_head14 results_vis[i]['bodies']['candidate'][14,1] += y_offset_head14 results_vis[i]['bodies']['candidate'][16,0] += x_offset_head14 results_vis[i]['bodies']['candidate'][16,1] += y_offset_head14 ########head15######## x_offset_head15 = (results_vis[i]['bodies']['candidate'][0][0]-results_vis[i]['bodies']['candidate'][15][0])*(1.-head15_ratio) y_offset_head15 = (results_vis[i]['bodies']['candidate'][0][1]-results_vis[i]['bodies']['candidate'][15][1])*(1.-head15_ratio) results_vis[i]['bodies']['candidate'][15,0] += x_offset_head15 results_vis[i]['bodies']['candidate'][15,1] += y_offset_head15 results_vis[i]['bodies']['candidate'][17,0] += x_offset_head15 results_vis[i]['bodies']['candidate'][17,1] += y_offset_head15 ########head16######## x_offset_head16 = (results_vis[i]['bodies']['candidate'][14][0]-results_vis[i]['bodies']['candidate'][16][0])*(1.-head16_ratio) y_offset_head16 = (results_vis[i]['bodies']['candidate'][14][1]-results_vis[i]['bodies']['candidate'][16][1])*(1.-head16_ratio) results_vis[i]['bodies']['candidate'][16,0] += x_offset_head16 results_vis[i]['bodies']['candidate'][16,1] += y_offset_head16 ########head17######## x_offset_head17 = (results_vis[i]['bodies']['candidate'][15][0]-results_vis[i]['bodies']['candidate'][17][0])*(1.-head17_ratio) y_offset_head17 = (results_vis[i]['bodies']['candidate'][15][1]-results_vis[i]['bodies']['candidate'][17][1])*(1.-head17_ratio) results_vis[i]['bodies']['candidate'][17,0] += x_offset_head17 results_vis[i]['bodies']['candidate'][17,1] += y_offset_head17 # ########MovingAverage######## ########left leg######## x_offset_ll1 = (results_vis[i]['bodies']['candidate'][9][0]-results_vis[i]['bodies']['candidate'][8][0])*(ll1_ratio-1.) y_offset_ll1 = (results_vis[i]['bodies']['candidate'][9][1]-results_vis[i]['bodies']['candidate'][8][1])*(ll1_ratio-1.) results_vis[i]['bodies']['candidate'][9,0] += x_offset_ll1 results_vis[i]['bodies']['candidate'][9,1] += y_offset_ll1 results_vis[i]['bodies']['candidate'][10,0] += x_offset_ll1 results_vis[i]['bodies']['candidate'][10,1] += y_offset_ll1 results_vis[i]['bodies']['candidate'][19,0] += x_offset_ll1 results_vis[i]['bodies']['candidate'][19,1] += y_offset_ll1 x_offset_ll2 = (results_vis[i]['bodies']['candidate'][10][0]-results_vis[i]['bodies']['candidate'][9][0])*(ll2_ratio-1.) y_offset_ll2 = (results_vis[i]['bodies']['candidate'][10][1]-results_vis[i]['bodies']['candidate'][9][1])*(ll2_ratio-1.) results_vis[i]['bodies']['candidate'][10,0] += x_offset_ll2 results_vis[i]['bodies']['candidate'][10,1] += y_offset_ll2 results_vis[i]['bodies']['candidate'][19,0] += x_offset_ll2 results_vis[i]['bodies']['candidate'][19,1] += y_offset_ll2 ########right leg######## x_offset_rl1 = (results_vis[i]['bodies']['candidate'][12][0]-results_vis[i]['bodies']['candidate'][11][0])*(rl1_ratio-1.) y_offset_rl1 = (results_vis[i]['bodies']['candidate'][12][1]-results_vis[i]['bodies']['candidate'][11][1])*(rl1_ratio-1.) results_vis[i]['bodies']['candidate'][12,0] += x_offset_rl1 results_vis[i]['bodies']['candidate'][12,1] += y_offset_rl1 results_vis[i]['bodies']['candidate'][13,0] += x_offset_rl1 results_vis[i]['bodies']['candidate'][13,1] += y_offset_rl1 results_vis[i]['bodies']['candidate'][18,0] += x_offset_rl1 results_vis[i]['bodies']['candidate'][18,1] += y_offset_rl1 x_offset_rl2 = (results_vis[i]['bodies']['candidate'][13][0]-results_vis[i]['bodies']['candidate'][12][0])*(rl2_ratio-1.) y_offset_rl2 = (results_vis[i]['bodies']['candidate'][13][1]-results_vis[i]['bodies']['candidate'][12][1])*(rl2_ratio-1.) results_vis[i]['bodies']['candidate'][13,0] += x_offset_rl2 results_vis[i]['bodies']['candidate'][13,1] += y_offset_rl2 results_vis[i]['bodies']['candidate'][18,0] += x_offset_rl2 results_vis[i]['bodies']['candidate'][18,1] += y_offset_rl2 results_vis[i]['bodies']['candidate'] += offset[np.newaxis, :] results_vis[i]['faces'] += offset[np.newaxis, np.newaxis, :] results_vis[i]['hands'] += offset[np.newaxis, np.newaxis, :] dwpose_woface_list = [] for i in range(len(results_vis)): #try: dwpose_woface, dwpose_wface = draw_pose(results_vis[i], H=height, W=width, stick_width=stick_width, draw_body=draw_body, draw_hands=draw_hands, hand_keypoint_size=hand_keypoint_size, draw_feet=draw_feet, body_keypoint_size=body_keypoint_size, draw_head=draw_head) result = torch.from_numpy(dwpose_woface) #except: # result = torch.zeros((height, width, 3), dtype=torch.uint8) dwpose_woface_list.append(result) dwpose_woface_tensor = torch.stack(dwpose_woface_list, dim=0) dwpose_woface_ref_tensor = None if ref_image is not None: dwpose_woface_ref, dwpose_wface_ref = draw_pose(pose_ref, H=height, W=width, stick_width=stick_width, draw_body=draw_body, draw_hands=draw_hands, hand_keypoint_size=hand_keypoint_size, draw_feet=draw_feet, body_keypoint_size=body_keypoint_size, draw_head=draw_head) dwpose_woface_ref_tensor = torch.from_numpy(dwpose_woface_ref) return dwpose_woface_tensor, dwpose_woface_ref_tensor class WanVideoUniAnimateDWPoseDetector: @classmethod def INPUT_TYPES(s): return {"required": { "pose_images": ("IMAGE", {"tooltip": "Pose images"}), "score_threshold": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Score threshold for pose detection"}), "stick_width": ("INT", {"default": 4, "min": 1, "max": 100, "step": 1, "tooltip": "Stick width for drawing keypoints"}), "draw_body": ("BOOLEAN", {"default": True, "tooltip": "Draw body keypoints"}), "body_keypoint_size": ("INT", {"default": 4, "min": 0, "max": 100, "step": 1, "tooltip": "Body keypoint size"}), "draw_feet": ("BOOLEAN", {"default": True, "tooltip": "Draw feet keypoints"}), "draw_hands": ("BOOLEAN", {"default": True, "tooltip": "Draw hand keypoints"}), "hand_keypoint_size": ("INT", {"default": 4, "min": 0, "max": 100, "step": 1, "tooltip": "Hand keypoint size"}), "colorspace": (["RGB", "BGR"], {"tooltip": "Color space for the output image"}), "handle_not_detected": (["empty", "repeat"], {"default": "empty", "tooltip": "How to handle undetected poses, empty inserts black and repeat inserts previous detection"}), "draw_head": ("BOOLEAN", {"default": True, "tooltip": "Draw head keypoints"}), }, "optional": { "reference_pose_image": ("IMAGE", {"tooltip": "Reference pose image"}), }, } RETURN_TYPES = ("IMAGE", "IMAGE", ) RETURN_NAMES = ("poses", "reference_pose",) FUNCTION = "process" CATEGORY = "WanVideoWrapper" def process(self, pose_images, score_threshold, stick_width, reference_pose_image=None, draw_body=True, body_keypoint_size=4, draw_feet=True, draw_hands=True, hand_keypoint_size=4, colorspace="RGB", handle_not_detected="empty", draw_head=True): device = mm.get_torch_device() #model loading dw_pose_model = "dw-ll_ucoco_384_bs5.torchscript.pt" yolo_model = "yolox_l.torchscript.pt" script_directory = os.path.dirname(os.path.abspath(__file__)) model_base_path = os.path.join(script_directory, "models", "DWPose") model_det=os.path.join(model_base_path, yolo_model) model_pose=os.path.join(model_base_path, dw_pose_model) if not os.path.exists(model_det): log.info(f"Downloading yolo model to: {model_base_path}") from huggingface_hub import snapshot_download snapshot_download(repo_id="hr16/yolox-onnx", allow_patterns=[f"*{yolo_model}*"], local_dir=model_base_path, local_dir_use_symlinks=False) if not os.path.exists(model_pose): log.info(f"Downloading dwpose model to: {model_base_path}") from huggingface_hub import snapshot_download snapshot_download(repo_id="hr16/DWPose-TorchScript-BatchSize5", allow_patterns=[f"*{dw_pose_model}*"], local_dir=model_base_path, local_dir_use_symlinks=False) if not hasattr(self, "det") or not hasattr(self, "pose"): self.det = torch.jit.load(model_det, map_location=device) self.pose = torch.jit.load(model_pose, map_location=device) self.dwpose_detector = DWposeDetector(self.det, self.pose) #model inference height, width = pose_images.shape[1:3] pose_np = pose_images.cpu().numpy() * 255 ref_np = None if reference_pose_image is not None: ref = reference_pose_image ref_np = ref.cpu().numpy() * 255 poses, reference_pose = pose_extract(pose_np, ref_np, self.dwpose_detector, height, width, score_threshold, stick_width=stick_width, draw_body=draw_body, body_keypoint_size=body_keypoint_size, draw_feet=draw_feet, draw_hands=draw_hands, hand_keypoint_size=hand_keypoint_size, handle_not_detected=handle_not_detected, draw_head=draw_head) poses = poses / 255.0 if reference_pose_image is not None: reference_pose = reference_pose.unsqueeze(0) / 255.0 else: reference_pose = torch.zeros(1, 64, 64, 3, device=torch.device("cpu")) if colorspace == "BGR": poses=torch.flip(poses, dims=[-1]) return (poses, reference_pose, ) class WanVideoUniAnimatePoseInput: @classmethod def INPUT_TYPES(s): return {"required": { "pose_images": ("IMAGE", {"tooltip": "Pose images"}), "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Strength of the pose control"}), "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Start percentage for the pose control"}), "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "End percentage for the pose control"}), }, "optional": { "reference_pose_image": ("IMAGE", {"tooltip": "Reference pose image"}), }, } RETURN_TYPES = ("UNIANIMATE_POSE", ) RETURN_NAMES = ("unianimate_poses",) FUNCTION = "process" CATEGORY = "WanVideoWrapper" def process(self, pose_images, strength, start_percent, end_percent, reference_pose_image=None): pose = pose_images.permute(3, 0, 1, 2).unsqueeze(0).contiguous() ref = None if reference_pose_image is not None: ref = reference_pose_image.permute(0, 3, 1, 2).contiguous() unianim_poses = { "pose": pose, "ref": ref, "strength": strength, "start_percent": start_percent, "end_percent": end_percent } return (unianim_poses,) NODE_CLASS_MAPPINGS = { "WanVideoUniAnimatePoseInput": WanVideoUniAnimatePoseInput, "WanVideoUniAnimateDWPoseDetector": WanVideoUniAnimateDWPoseDetector, } NODE_DISPLAY_NAME_MAPPINGS = { "WanVideoUniAnimatePoseInput": "WanVideo UniAnimate Pose Input", "WanVideoUniAnimateDWPoseDetector": "WanVideo UniAnimate DWPose Detector", }