File size: 6,915 Bytes
f4d2177 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
from __future__ import print_function
import os
import cv2
import numpy as np
import random
import torch
from torch.utils import data
from torchvision import transforms
import torch.nn.functional as F
def cassini2Equirec(cassini):
if cassini.ndim == 2:
cassini = np.expand_dims(cassini, axis=-1)
source_image = torch.FloatTensor(cassini).unsqueeze(0).transpose(1, 3).transpose(2, 3)
elif cassini.ndim == 3:
source_image = torch.FloatTensor(cassini).unsqueeze(0).transpose(1, 3).transpose(2, 3)
else:
source_image = cassini
erp_h = source_image.shape[-1]
erp_w = source_image.shape[-2]
theta_erp_start = np.pi - (np.pi / erp_w)
theta_erp_end = -np.pi
theta_erp_step = 2 * np.pi / erp_w
theta_erp_range = np.arange(theta_erp_start, theta_erp_end, -theta_erp_step)
theta_erp_map = np.array([theta_erp_range for i in range(erp_h)]).astype(np.float32)
phi_erp_start = 0.5 * np.pi - (0.5 * np.pi / erp_h)
phi_erp_end = -0.5 * np.pi
phi_erp_step = np.pi / erp_h
phi_erp_range = np.arange(phi_erp_start, phi_erp_end, -phi_erp_step)
phi_erp_map = np.array([phi_erp_range for j in range(erp_w)]).astype(np.float32).T
theta_cassini_map = np.arctan2(np.tan(phi_erp_map), np.cos(theta_erp_map))
phi_cassini_map = np.arcsin(np.cos(phi_erp_map) * np.sin(theta_erp_map))
grid_x = torch.FloatTensor(np.clip(-phi_cassini_map / (0.5 * np.pi), -1, 1)).unsqueeze(-1)
grid_y = torch.FloatTensor(np.clip(-theta_cassini_map / np.pi, -1, 1)).unsqueeze(-1)
grid = torch.cat([grid_x, grid_y], dim=-1).unsqueeze(0).repeat_interleave(source_image.shape[0], dim=0)
sampled_image = F.grid_sample(source_image, grid, mode='bilinear', align_corners=True, padding_mode='border') # 1, ch, self.output_h, self.output_w
if cassini.ndim == 3:
erp = sampled_image.transpose(1, 3).transpose(1, 2).data.numpy()[0].astype(cassini.dtype)
return erp.squeeze()
else:
erp = sampled_image.numpy()
return erp.squeeze(1)
def read_list(list_file):
rgb_depth_list = []
with open(list_file) as f:
lines = f.readlines()
for line in lines:
rgb_depth_list.append(line.strip().split(" "))
return rgb_depth_list
class Deep360(data.Dataset):
"""The Deep360 Dataset"""
def __init__(self, root_dir, list_file, height=504, width=1008, color_augmentation=True,
LR_filp_augmentation=True, yaw_rotation_augmentation=True, repeat=1, is_training=False):
"""
Args:
root_dir (string): Directory of the Deep360 Dataset.
list_file (string): Path to the txt file contain the list of image and depth files.
height, width: input size.
disable_color_augmentation, disable_LR_filp_augmentation,
disable_yaw_rotation_augmentation: augmentation options.
is_training (bool): True if the dataset is the training set.
"""
self.root_dir = root_dir
self.rgb_depth_list = read_list(list_file)
self.w = width
self.h = height
self.max_depth_meters = 100.0
self.min_depth_meters = 0.01
self.color_augmentation = color_augmentation
self.LR_filp_augmentation = LR_filp_augmentation
self.yaw_rotation_augmentation = yaw_rotation_augmentation
self.is_training = is_training
if self.color_augmentation:
try:
self.brightness = (0.8, 1.2)
self.contrast = (0.8, 1.2)
self.saturation = (0.8, 1.2)
self.hue = (-0.1, 0.1)
self.color_aug= transforms.ColorJitter(
self.brightness, self.contrast, self.saturation, self.hue)
except TypeError:
self.brightness = 0.2
self.contrast = 0.2
self.saturation = 0.2
self.hue = 0.1
self.color_aug = transforms.ColorJitter(
self.brightness, self.contrast, self.saturation, self.hue)
self.to_tensor = transforms.ToTensor()
self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
def __len__(self):
return len(self.rgb_depth_list)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
inputs = {}
rgb_name = self.root_dir + self.rgb_depth_list[idx][0]
rgb = cv2.imread(rgb_name)
if rgb is None:
print(rgb_name)
rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
# print(rgb.shape)
rgb = cassini2Equirec(rgb)
rgb = cv2.resize(rgb, dsize=(self.w, self.h), interpolation=cv2.INTER_CUBIC)
depth_name = self.root_dir + self.rgb_depth_list[idx][1]
gt_depth = np.load(depth_name)['arr_0'].astype(np.float32)
gt_depth = cassini2Equirec(gt_depth)
gt_depth = cv2.resize(gt_depth, dsize=(self.w, self.h), interpolation=cv2.INTER_NEAREST)
gt_depth[gt_depth > self.max_depth_meters] = self.max_depth_meters
if self.is_training and self.yaw_rotation_augmentation:
# random yaw rotation
roll_idx = random.randint(0, self.w)
rgb = np.roll(rgb, roll_idx, 1)
gt_depth = np.roll(gt_depth, roll_idx, 1)
if self.is_training and self.LR_filp_augmentation and random.random() > 0.5:
rgb = cv2.flip(rgb, 1)
gt_depth = cv2.flip(gt_depth, 1)
if self.is_training and self.color_augmentation and random.random() > 0.5:
aug_rgb = np.asarray(self.color_aug(transforms.ToPILImage()(rgb)))
else:
aug_rgb = rgb.copy()
aug_rgb = self.to_tensor(aug_rgb.copy())
gt_depth = torch.from_numpy(np.expand_dims(gt_depth, axis=0)).to(torch.float32)
val_mask = ((gt_depth > 0) & (gt_depth <= self.max_depth_meters) & ~torch.isnan(gt_depth))
mask_100 = (gt_depth < 100.0)
# Normalize depth
# _min, _max = torch.quantile(gt_depth[val_mask], torch.tensor([0.02, 1 - 0.02]),)
# gt_depth_norm = (gt_depth - 0.01) / (_max - 0.01)
# gt_depth_norm = torch.clip(gt_depth_norm, 0.01, 1.0)
gt_depth_norm = gt_depth / self.max_depth_meters
gt_depth_norm = torch.clip(gt_depth_norm, 0.001, 1.0)
# Conduct output
inputs = {}
inputs["rgb"] = self.normalize(aug_rgb)
inputs["gt_depth"] = gt_depth_norm
inputs["val_mask"] = val_mask
inputs["mask_100"] = mask_100
inputs["touying"] = False
# inputs["moge_depth"] = moge_depth
return inputs
if __name__ == "__main__":
dataset = Deep360(root_dir='/hpc2hdd/home/zcao740/Documents/Dataset/Deep360', list_file='/hpc2hdd/home/zcao740/Documents/360Depth/Semi-supervision/datasets/deep360_train.txt')
print(len(dataset))
for i in range(2):
print(dataset[i]) |