빛의 방향 및 밝기에 대한 이미지 augmentation 방법
Real Scene data problem
필자가 다루고 있는 indoor , outdoor image 의 distribution 은 정말정말 Open Long-Taield 하다.
Open Long-Tailed Recognition (aka. OLTR) 에 관해서는 차후 다루기로 하고…
특히 계절과 빛에 의한 환경 변화가 매우매우 심한데 이중 빛에 대해서 조금이나마 극복해보자 하여 Adjust Local Brightness 한 image augmentation 방법에 대해 적고자 한다.
간략히 설명하자면 일반적으로 RGB 값은 각각 의 값이 빛의 강도에 의해 변이가 이루어 진다.
다 알고 있는 이야기 이지만 예를 들어 R 값이 0 이면 우리 눈에는 검은색인거고 R 값이 254 이면 순수한 빨간색이 된다.
그럼 현실세계에서의 이미지를 생각해보자.
다음의 사진을 보았을때 각기 다른 3명의 사람이 빛의 방향 에 따라 밝기 변화가 이뤄지고 그에따른 RGB 값이 변함을 알수 있다.
태양은 동쪽에서 떠서 서쪽으로 진다. outdoor 의 경우 위치는 고정이지만 이 빛의 움직임에 따라 각기 고유의 RGB 값이 변하게 되는것이다.
그럼 만약 한 이미지에대해 특정 방향에서 빛 (spot light) 에 대한 data augmentation 을 할수 있다면??
정확하진 않지만 비슷한 느낌의 이미지들을 사전에 얻을수 있을것이고 이는 scene classificaion 또는 scene recognition 에 분명 도움이 될것이다. (아직 안해봤음.., 가설)
그럼 구현은?? 세상은 넓고 역시 누군가 해놓았다. Reference
빛의 방향 과 종류
3D 개발을 해보면 초반에 배우는것이 Light 에 대한것이다. 빛에 의해 그림자가 생성되는것은 Parallel light 이냐 Spot light 냐에 따라 구분 된다고 생각할수 있다.
나와라 빛이여!
빛을 골고루 입힌다라.. 생각해보자..
Gaussian 으로 줄수 있을것이고.. linear 하게 줄수 있을것이다.
설명은 필요없다. 코드 들어간다.
def _decayed_value_in_norm(x, max_value, min_value, center, range):
"""
decay from max value to min value following Gaussian/Normal distribution
"""
radius = range / 3
center_prob = norm.pdf(center, center, radius)
x_prob = norm.pdf(x, center, radius)
x_value = (x_prob / center_prob) * (max_value - min_value) + min_value
return x_value
def _decayed_value_in_linear(x, max_value, padding_center, decay_rate):
"""
decay from max value to min value with static linear decay rate.
"""
x_value = max_value - abs(padding_center - x) * decay_rate
if x_value < 0:
x_value = 1
return x_value
Parallel light data augmentation method
Mask generation
import cv2
import numpy as np
import random
from scipy.stats import norm
def generate_parallel_light_mask(mask_size,
position=None,
direction=None,
max_brightness=255,
min_brightness=0,
mode="gaussian",
linear_decay_rate=None):
"""
Generate decayed light mask generated by light strip given its position, direction
Args:
mask_size: tuple of integers (w, h) defining generated mask size
position: tuple of integers (x, y) defining the center of light strip position,
which is the reference point during rotating
direction: integer from 0 to 360 to indicate the rotation degree of light strip
max_brightness: integer that max brightness in the mask
min_brightness: integer that min brightness in the mask
mode: the way that brightness decay from max to min: linear or gaussian
linear_decay_rate: only valid in linear_static mode. Suggested value is within [0.2, 2]
Return:
light_mask: ndarray in float type consisting value from 0 to strength
"""
if position is None:
pos_x = random.randint(0, mask_size[0])
pos_y = random.randint(0, mask_size[1])
else:
pos_x = position[0]
pos_y = position[1]
if direction is None:
direction = random.randint(0, 360)
print("Rotate degree: ", direction)
if linear_decay_rate is None:
if mode == "linear_static":
linear_decay_rate = random.uniform(0.2, 2)
if mode == "linear_dynamic":
linear_decay_rate = (max_brightness - min_brightness) / max(mask_size)
assert mode in ["linear_dynamic", "linear_static", "gaussian"], \
"mode must be linear_dynamic, linear_static or gaussian"
padding = int(max(mask_size) * np.sqrt(2))
# add padding to satisfy cropping after rotating
canvas_x = padding * 2 + mask_size[0]
canvas_y = padding * 2 + mask_size[1]
mask = np.zeros(shape=(canvas_y, canvas_x), dtype=np.float32)
# initial mask's up left corner and bottom right corner coordinate
init_mask_ul = (int(padding), int(padding))
init_mask_br = (int(padding+mask_size[0]), int(padding+mask_size[1]))
init_light_pos = (padding + pos_x, padding + pos_y)
# fill in mask row by row with value decayed from center
for i in range(canvas_y):
if mode == "linear":
i_value = _decayed_value_in_linear(i, max_brightness, init_light_pos[1], linear_decay_rate)
elif mode == "gaussian":
i_value = _decayed_value_in_norm(i, max_brightness, min_brightness, init_light_pos[1], mask_size[1])
else:
i_value = 0
mask[i] = i_value
# rotate mask
rotate_M = cv2.getRotationMatrix2D(init_light_pos, direction, 1)
mask = cv2.warpAffine(mask, rotate_M, (canvas_x, canvas_y))
# crop
mask = mask[init_mask_ul[1]:init_mask_br[1], init_mask_ul[0]:init_mask_br[0]]
mask = np.asarray(mask, dtype=np.uint8)
# add median blur
mask = cv2.medianBlur(mask, 9)
mask = 255 - mask
# cv2.circle(mask, init_light_pos, 1, (0, 0, 255))
# cv2.imshow("crop", mask[init_mask_ul[1]:init_mask_br[1], init_mask_ul[0]:init_mask_br[0]])
# cv2.imshow("all", mask)
# cv2.waitKey(0)
return mask
Merging
def add_parallel_light(image, light_position=None, direction=None, max_brightness=255, min_brightness=0,
mode="gaussian", linear_decay_rate=None, transparency=None):
"""
Add mask generated from parallel light to given image
"""
if transparency is None:
transparency = random.uniform(0.5, 0.85)
frame = cv2.imread(image)
height, width, _ = frame.shape
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
mask = generate_parallel_light_mask(mask_size=(width, height),
position=light_position,
direction=direction,
max_brightness=max_brightness,
min_brightness=min_brightness,
mode=mode,
linear_decay_rate=linear_decay_rate)
hsv[:, :, 2] = hsv[:, :, 2] * transparency + mask * (1 - transparency)
frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
frame[frame > 255] = 255
frame = np.asarray(frame, dtype=np.uint8)
return frame
Spot light data augmentation method
import cv2
import numpy as np
import random
from scipy.stats import norm
def generate_spot_light_mask(mask_size,
position=None,
max_brightness=255,
min_brightness=0,
mode="gaussian",
linear_decay_rate=None,
speedup=False):
"""
Generate decayed light mask generated by spot light given position, direction. Multiple spotlights are accepted.
Args:
mask_size: tuple of integers (w, h) defining generated mask size
position: list of tuple of integers (x, y) defining the center of spotlight light position,
which is the reference point during rotating
max_brightness: integer that max brightness in the mask
min_brightness: integer that min brightness in the mask
mode: the way that brightness decay from max to min: linear or gaussian
linear_decay_rate: only valid in linear_static mode. Suggested value is within [0.2, 2]
speedup: use `shrinkage then expansion` strategy to speed up vale calculation
Return:
light_mask: ndarray in float type consisting value from max_brightness to min_brightness. If in 'linear' mode
minimum value could be smaller than given min_brightness.
"""
if position is None:
position = [(random.randint(0, mask_size[0]), random.randint(0, mask_size[1]))]
if linear_decay_rate is None:
if mode == "linear_static":
linear_decay_rate = random.uniform(0.25, 1)
assert mode in ["linear", "gaussian"], \
"mode must be linear_dynamic, linear_static or gaussian"
mask = np.zeros(shape=(mask_size[1], mask_size[0]), dtype=np.float32)
if mode == "gaussian":
mu = np.sqrt(mask.shape[0]**2+mask.shape[1]**2)
dev = mu / 3.5
mask = _decay_value_radically_norm_in_matrix(mask_size, position, max_brightness, min_brightness, dev)
mask = np.asarray(mask, dtype=np.uint8)
# add median blur
mask = cv2.medianBlur(mask, 5)
mask = 255 - mask
# cv2.imshow("mask", mask)
# cv2.waitKey(0)
return mask
def _decay_value_radically_norm_in_matrix(mask_size, centers, max_value, min_value, dev):
"""
_decay_value_radically_norm function in matrix format
"""
center_prob = norm.pdf(0, 0, dev)
x_value_rate = np.zeros((mask_size[1], mask_size[0]))
for center in centers:
coord_x = np.arange(mask_size[0])
coord_y = np.arange(mask_size[1])
xv, yv = np.meshgrid(coord_x, coord_y)
dist_x = xv - center[0]
dist_y = yv - center[1]
dist = np.sqrt(np.power(dist_x, 2) + np.power(dist_y, 2))
x_value_rate += norm.pdf(dist, 0, dev) / center_prob
mask = x_value_rate * (max_value - min_value) + min_value
mask[mask > 255] = 255
return mask
def _decay_value_radically_norm(x, centers, max_value, min_value, dev):
"""
Calculate point value decayed from center following Gaussian decay. If multiple centers are given, value
from each center sums up while limiting the accumulated value into [0, 255]
NOTE: assuming light at each center is identical: same brightness and same decay rate
"""
center_prob = norm.pdf(0, 0, dev)
x_value_rate = 0
for center in centers:
distance = np.sqrt((center[0]-x[0])**2 + (center[1]-x[1])**2)
x_value_rate += norm.pdf(distance, 0, dev) / center_prob
x_value = x_value_rate * (max_value - min_value) + min_value
x_value = 255 if x_value > 255 else x_value
return x_value
def add_spot_light(image, light_position=None, max_brightness=255, min_brightness=0,
mode='gaussian', linear_decay_rate=None, transparency=None):
"""
Add mask generated from spot light to given image
"""
if transparency is None:
transparency = random.uniform(0.5, 0.85)
frame = cv2.imread(image)
height, width, _ = frame.shape
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
mask = generate_spot_light_mask(mask_size=(width, height),
position=light_position,
max_brightness=max_brightness,
min_brightness=min_brightness,
mode=mode,
linear_decay_rate=linear_decay_rate)
hsv[:, :, 2] = hsv[:, :, 2] * transparency + mask * (1 - transparency)
frame = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
frame[frame > 255] = 255
frame = np.asarray(frame, dtype=np.uint8)
return frame