Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| # -*- coding:utf-8 -*- | |
| # Copyright (c) Megvii, Inc. and its affiliates. | |
| """ | |
| Data augmentation functionality. Passed as callable transformations to | |
| Dataset classes. | |
| The data augmentation procedures were interpreted from @weiliu89's SSD paper | |
| http://arxiv.org/abs/1512.02325 | |
| """ | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| from yolox.utils import xyxy2cxcywh | |
| import math | |
| import random | |
| def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4): | |
| r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains | |
| hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) | |
| dtype = img.dtype # uint8 | |
| x = np.arange(0, 256, dtype=np.int16) | |
| lut_hue = ((x * r[0]) % 180).astype(dtype) | |
| lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) | |
| lut_val = np.clip(x * r[2], 0, 255).astype(dtype) | |
| img_hsv = cv2.merge( | |
| (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)) | |
| ).astype(dtype) | |
| cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed | |
| def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2): | |
| # box1(4,n), box2(4,n) | |
| # Compute candidate boxes which include follwing 5 things: | |
| # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio | |
| w1, h1 = box1[2] - box1[0], box1[3] - box1[1] | |
| w2, h2 = box2[2] - box2[0], box2[3] - box2[1] | |
| ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio | |
| return ( | |
| (w2 > wh_thr) | |
| & (h2 > wh_thr) | |
| & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) | |
| & (ar < ar_thr) | |
| ) # candidates | |
| def random_perspective( | |
| img, | |
| targets=(), | |
| degrees=10, | |
| translate=0.1, | |
| scale=0.1, | |
| shear=10, | |
| perspective=0.0, | |
| border=(0, 0), | |
| ): | |
| # targets = [cls, xyxy] | |
| height = img.shape[0] + border[0] * 2 # shape(h,w,c) | |
| width = img.shape[1] + border[1] * 2 | |
| # Center | |
| C = np.eye(3) | |
| C[0, 2] = -img.shape[1] / 2 # x translation (pixels) | |
| C[1, 2] = -img.shape[0] / 2 # y translation (pixels) | |
| # Rotation and Scale | |
| R = np.eye(3) | |
| a = random.uniform(-degrees, degrees) | |
| # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations | |
| s = random.uniform(scale[0], scale[1]) | |
| # s = 2 ** random.uniform(-scale, scale) | |
| R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) | |
| # Shear | |
| S = np.eye(3) | |
| S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) | |
| S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) | |
| # Translation | |
| T = np.eye(3) | |
| T[0, 2] = ( | |
| random.uniform(0.5 - translate, 0.5 + translate) * width | |
| ) # x translation (pixels) | |
| T[1, 2] = ( | |
| random.uniform(0.5 - translate, 0.5 + translate) * height | |
| ) # y translation (pixels) | |
| # Combined rotation matrix | |
| M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT | |
| ########################### | |
| # For Aug out of Mosaic | |
| # s = 1. | |
| # M = np.eye(3) | |
| ########################### | |
| if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed | |
| if perspective: | |
| img = cv2.warpPerspective( | |
| img, M, dsize=(width, height), borderValue=(114, 114, 114) | |
| ) | |
| else: # affine | |
| img = cv2.warpAffine( | |
| img, M[:2], dsize=(width, height), borderValue=(114, 114, 114) | |
| ) | |
| # Transform label coordinates | |
| n = len(targets) | |
| if n: | |
| # warp points | |
| xy = np.ones((n * 4, 3)) | |
| xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( | |
| n * 4, 2 | |
| ) # x1y1, x2y2, x1y2, x2y1 | |
| xy = xy @ M.T # transform | |
| if perspective: | |
| xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale | |
| else: # affine | |
| xy = xy[:, :2].reshape(n, 8) | |
| # create new boxes | |
| x = xy[:, [0, 2, 4, 6]] | |
| y = xy[:, [1, 3, 5, 7]] | |
| xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T | |
| # clip boxes | |
| #xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) | |
| #xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) | |
| # filter candidates | |
| i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T) | |
| targets = targets[i] | |
| targets[:, :4] = xy[i] | |
| targets = targets[targets[:, 0] < width] | |
| targets = targets[targets[:, 2] > 0] | |
| targets = targets[targets[:, 1] < height] | |
| targets = targets[targets[:, 3] > 0] | |
| return img, targets | |
| def _distort(image): | |
| def _convert(image, alpha=1, beta=0): | |
| tmp = image.astype(float) * alpha + beta | |
| tmp[tmp < 0] = 0 | |
| tmp[tmp > 255] = 255 | |
| image[:] = tmp | |
| image = image.copy() | |
| if random.randrange(2): | |
| _convert(image, beta=random.uniform(-32, 32)) | |
| if random.randrange(2): | |
| _convert(image, alpha=random.uniform(0.5, 1.5)) | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) | |
| if random.randrange(2): | |
| tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) | |
| tmp %= 180 | |
| image[:, :, 0] = tmp | |
| if random.randrange(2): | |
| _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) | |
| image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) | |
| return image | |
| def _mirror(image, boxes): | |
| _, width, _ = image.shape | |
| if random.randrange(2): | |
| image = image[:, ::-1] | |
| boxes = boxes.copy() | |
| boxes[:, 0::2] = width - boxes[:, 2::-2] | |
| return image, boxes | |
| def preproc(image, input_size, mean, std, swap=(2, 0, 1)): | |
| if len(image.shape) == 3: | |
| padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 | |
| else: | |
| padded_img = np.ones(input_size) * 114.0 | |
| img = np.array(image) | |
| r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) | |
| resized_img = cv2.resize( | |
| img, | |
| (int(img.shape[1] * r), int(img.shape[0] * r)), | |
| interpolation=cv2.INTER_LINEAR, | |
| ).astype(np.float32) | |
| padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img | |
| padded_img = padded_img[:, :, ::-1] | |
| padded_img /= 255.0 | |
| if mean is not None: | |
| padded_img -= mean | |
| if std is not None: | |
| padded_img /= std | |
| padded_img = padded_img.transpose(swap) | |
| padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) | |
| return padded_img, r | |
| class TrainTransform: | |
| def __init__(self, p=0.5, rgb_means=None, std=None, max_labels=100): | |
| self.means = rgb_means | |
| self.std = std | |
| self.p = p | |
| self.max_labels = max_labels | |
| def __call__(self, image, targets, input_dim): | |
| boxes = targets[:, :4].copy() | |
| labels = targets[:, 4].copy() | |
| ids = targets[:, 5].copy() | |
| if len(boxes) == 0: | |
| targets = np.zeros((self.max_labels, 6), dtype=np.float32) | |
| image, r_o = preproc(image, input_dim, self.means, self.std) | |
| image = np.ascontiguousarray(image, dtype=np.float32) | |
| return image, targets | |
| image_o = image.copy() | |
| targets_o = targets.copy() | |
| height_o, width_o, _ = image_o.shape | |
| boxes_o = targets_o[:, :4] | |
| labels_o = targets_o[:, 4] | |
| ids_o = targets_o[:, 5] | |
| # bbox_o: [xyxy] to [c_x,c_y,w,h] | |
| boxes_o = xyxy2cxcywh(boxes_o) | |
| image_t = _distort(image) | |
| image_t, boxes = _mirror(image_t, boxes) | |
| height, width, _ = image_t.shape | |
| image_t, r_ = preproc(image_t, input_dim, self.means, self.std) | |
| # boxes [xyxy] 2 [cx,cy,w,h] | |
| boxes = xyxy2cxcywh(boxes) | |
| boxes *= r_ | |
| mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 | |
| boxes_t = boxes[mask_b] | |
| labels_t = labels[mask_b] | |
| ids_t = ids[mask_b] | |
| if len(boxes_t) == 0: | |
| image_t, r_o = preproc(image_o, input_dim, self.means, self.std) | |
| boxes_o *= r_o | |
| boxes_t = boxes_o | |
| labels_t = labels_o | |
| ids_t = ids_o | |
| labels_t = np.expand_dims(labels_t, 1) | |
| ids_t = np.expand_dims(ids_t, 1) | |
| targets_t = np.hstack((labels_t, boxes_t, ids_t)) | |
| padded_labels = np.zeros((self.max_labels, 6)) | |
| padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ | |
| : self.max_labels | |
| ] | |
| padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) | |
| image_t = np.ascontiguousarray(image_t, dtype=np.float32) | |
| return image_t, padded_labels | |
| class ValTransform: | |
| """ | |
| Defines the transformations that should be applied to test PIL image | |
| for input into the network | |
| dimension -> tensorize -> color adj | |
| Arguments: | |
| resize (int): input dimension to SSD | |
| rgb_means ((int,int,int)): average RGB of the dataset | |
| (104,117,123) | |
| swap ((int,int,int)): final order of channels | |
| Returns: | |
| transform (transform) : callable transform to be applied to test/val | |
| data | |
| """ | |
| def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1)): | |
| self.means = rgb_means | |
| self.swap = swap | |
| self.std = std | |
| # assume input is cv2 img for now | |
| def __call__(self, img, res, input_size): | |
| img, _ = preproc(img, input_size, self.means, self.std, self.swap) | |
| return img, np.zeros((1, 5)) | |