import argparse
|
import cv2
|
import imgaug as ia
|
from imgaug import augmenters as iaa
|
from imgaug import parameters as iap
|
import imutils
|
import math
|
import numpy as np
|
import os
|
import pandas as pd
|
import random
|
from shapely import geometry
|
|
import fetch_data
|
import generate_data
|
from config import Config
|
|
|
def key_pts_to_yolo(key_pts, w_img, h_img):
|
"""
|
Convert a list of keypoints into a yolo training format
|
:param key_pts: list of keypoints
|
:param w_img: width of the entire image
|
:param h_img: height of the entire image
|
:return: <x> <y> <width> <height>
|
"""
|
x1 = max(0, min([pt[0] for pt in key_pts]))
|
x2 = min(w_img, max([pt[0] for pt in key_pts]))
|
y1 = max(0, min([pt[1] for pt in key_pts]))
|
y2 = min(h_img, max([pt[1] for pt in key_pts]))
|
x = (x2 + x1) / 2 / w_img
|
y = (y2 + y1) / 2 / h_img
|
width = (x2 - x1) / w_img
|
height = (y2 - y1) / h_img
|
return x, y, width, height
|
|
|
class ImageGenerator:
|
"""
|
A template for generating a training image
|
An ImageGenerator contains a background image, list of cards, and other environmental parameters to
|
set up a training image for YOLO network
|
"""
|
def __init__(self, img_bg, class_ids, width, height, skew=None, cards=None):
|
"""
|
:param img_bg: background (textile) image
|
:param width: width of the training image
|
:param height: height of the training image
|
:param skew: 4 coordinates that indicates the corners (in normalized form) for perspective transform
|
:param cards: list of Card objects
|
"""
|
self.img_bg = img_bg
|
self.class_ids = class_ids
|
self.img_result = None
|
self.width = width
|
self.height = height
|
if cards is None:
|
self.cards = []
|
else:
|
self.cards = cards
|
|
# Compute transform matrix for perspective transform (used for skewing the final result)
|
if skew is not None:
|
orig_corner = np.array([[0, 0], [0, height], [width, height], [width, 0]], dtype=np.float32)
|
new_corner = np.array([[width * s[0], height * s[1]] for s in skew], dtype=np.float32)
|
self.M = cv2.getPerspectiveTransform(orig_corner, new_corner)
|
pass
|
else:
|
self.M = None
|
pass
|
|
def add_card(self, card, x=None, y=None, theta=0.0, scale=1.0):
|
"""
|
Add a card to this generator scenario.
|
:param card: card to be added
|
:param x: new X-coordinate for the centre of the card
|
:param y: new Y-coordinate for the centre of the card
|
:param theta: new angle for the card
|
:param scale: new scale for the card
|
:return: none
|
"""
|
# If the position isn't given, push it out of the image so that it won't be visible during rendering
|
if x is None:
|
x = -len(card.img[0]) / 2
|
if y is None:
|
y = -len(card.img) / 2
|
self.cards.append(card)
|
card.x = x
|
card.y = y
|
card.theta = theta
|
card.scale = scale
|
pass
|
|
def render(self, visibility=0.5, aug=None, display=False, debug=False):
|
"""
|
Display the current state of the generator.
|
:param visibility: portion of the card's image that must not be overlapped by other cards for the card to be
|
considered as visible
|
:param aug: image augmentator to apply during rendering
|
:param display: flag for displaying the rendering result
|
:param debug: flag for debug
|
:return: none
|
"""
|
self.check_visibility(visibility=visibility)
|
img_result = np.zeros((self.height, self.width, 3), dtype=np.uint8)
|
card_mask = cv2.imread(Config.card_mask_path)
|
|
for card in self.cards:
|
card_x = int(card.x + 0.5)
|
card_y = int(card.y + 0.5)
|
|
# Scale & rotate card image
|
img_card = cv2.resize(card.img, (int(len(card.img[0]) * card.scale), int(len(card.img) * card.scale)))
|
# Add a random glaring on individual card - it happens frequently in real life as MTG cards can reflect
|
# the lights very well.
|
if aug is not None:
|
seq = iaa.Sequential([
|
iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(128)), size_px_max=[1, 3],
|
upscale_method="cubic"), # Lighting
|
])
|
img_card = seq.augment_image(img_card)
|
mask_scale = cv2.resize(card_mask, (int(len(card_mask[0]) * card.scale), int(len(card_mask) * card.scale)))
|
img_mask = cv2.bitwise_and(img_card, mask_scale)
|
img_rotate = imutils.rotate_bound(img_mask, card.theta / math.pi * 180)
|
|
# Calculate the position of the card image in relation to the background
|
# Crop the card image if it's out of boundary
|
card_w = len(img_rotate[0])
|
card_h = len(img_rotate)
|
card_crop_x1 = max(0, card_w // 2 - card_x)
|
card_crop_x2 = min(card_w, card_w // 2 + len(img_result[0]) - card_x)
|
card_crop_y1 = max(0, card_h // 2 - card_y)
|
card_crop_y2 = min(card_h, card_h // 2 + len(img_result) - card_y)
|
img_card_crop = img_rotate[card_crop_y1:card_crop_y2, card_crop_x1:card_crop_x2]
|
|
# Calculate the position of the corresponding area in the background
|
bg_crop_x1 = max(0, card_x - (card_w // 2))
|
bg_crop_x2 = min(len(img_result[0]), int(card_x + (card_w / 2) + 0.5))
|
bg_crop_y1 = max(0, card_y - (card_h // 2))
|
bg_crop_y2 = min(len(img_result), int(card_y + (card_h / 2) + 0.5))
|
img_result_crop = img_result[bg_crop_y1:bg_crop_y2, bg_crop_x1:bg_crop_x2]
|
|
# Override the background with the current card
|
img_result_crop = np.where(img_card_crop, img_card_crop, img_result_crop)
|
img_result[bg_crop_y1:bg_crop_y2, bg_crop_x1:bg_crop_x2] = img_result_crop
|
|
if debug:
|
for ext_obj in card.objects:
|
if ext_obj.visible:
|
for pt in ext_obj.key_pts:
|
cv2.circle(img_result, card.coordinate_in_generator(pt[0], pt[1]), 2, (1, 1, 255), 10)
|
bounding_box = card.bb_in_generator(ext_obj.key_pts)
|
cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (1, 255, 1), 5)
|
|
img_result = cv2.GaussianBlur(img_result, (5, 5), 0)
|
|
# Skew the cards if it's provided
|
if self.M is not None:
|
img_result = cv2.warpPerspective(img_result, self.M, (self.width, self.height))
|
if debug:
|
for card in self.cards:
|
for ext_obj in card.objects:
|
if ext_obj.visible:
|
new_pts = np.array([[list(card.coordinate_in_generator(pt[0], pt[1]))]
|
for pt in ext_obj.key_pts], dtype=np.float32)
|
new_pts = cv2.perspectiveTransform(new_pts, self.M)
|
for pt in new_pts:
|
cv2.circle(img_result, (pt[0][0], pt[0][1]), 2, (255, 1, 1), 10)
|
|
img_bg = cv2.resize(self.img_bg, (self.width, self.height))
|
img_result = np.where(img_result, img_result, img_bg)
|
|
# Apply image augmentation
|
if aug is not None:
|
img_result = aug.augment_image(img_result)
|
|
if display or debug:
|
cv2.imshow('Result', img_result)
|
cv2.waitKey(0)
|
|
self.img_result = img_result
|
pass
|
|
def generate_horizontal_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None):
|
"""
|
Generating the first scenario where the cards are laid out in a straight horizontal line
|
:param gap: horizontal offset between each adjacent cards
|
:param scale: scale of each cards in the generator
|
:param theta: rotation of the entire span in radian
|
:param shift: range of arbitrary offset for each card
|
:param jitter: range of in-place rotation for each card in radian
|
:return: True if successfully generated, otherwise False
|
"""
|
# Set scale of the cards, variance of shift & jitter to be applied if they're not given
|
card_size = (len(self.cards[0].img[0]), len(self.cards[0].img))
|
if scale is None:
|
# Scale the cards so that card takes about 50% of the image's height
|
coverage_ratio = 0.5
|
scale = self.height * coverage_ratio / card_size[1]
|
if shift is None:
|
# Plus minus 5% of the card's height
|
shift = [-card_size[1] * scale * 0.05, card_size[1] * scale * 0.05]
|
pass
|
if jitter is None:
|
# Plus minus 10 degrees
|
jitter = [-math.pi / 18, math.pi / 18]
|
if gap is None:
|
# 25% of the card's width - set symbol and 1-2 mana symbols will be visible on each card
|
gap = card_size[0] * scale * 0.4
|
|
# Determine the location of the first card
|
# The cards will cover (width of a card + (# of cards - 1) * gap) pixels wide and (height of a card) pixels high
|
x_anchor = int(self.width / 2 + (len(self.cards) - 1) * gap / 2)
|
y_anchor = self.height // 2
|
for card in self.cards:
|
card.scale = scale
|
card.x = x_anchor
|
card.y = y_anchor
|
card.theta = 0
|
card.shift(shift, shift)
|
card.rotate(jitter)
|
card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor))
|
x_anchor -= gap
|
|
return True
|
|
def generate_vertical_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None):
|
"""
|
Generating the second scenario where the cards are laid out in a straight vertical line
|
:param gap: horizontal offset between each adjacent cards
|
:param scale: scale of each cards in the generator
|
:param theta: rotation of the entire span in radian
|
:param shift: range of arbitrary offset for each card
|
:param jitter: range of in-place rotation for each card in radian
|
:return: True if successfully generated, otherwise False
|
:return: True if successfully generated, otherwise False
|
"""
|
# Set scale of the cards, variance of shift & jitter to be applied if they're not given
|
card_size = (len(self.cards[0].img[0]), len(self.cards[0].img))
|
if scale is None:
|
# Scale the cards so that card takes about 50% of the image's height
|
coverage_ratio = 0.5
|
scale = self.height * coverage_ratio / card_size[1]
|
if shift is None:
|
# Plus minus 5% of the card's height
|
shift = [-card_size[1] * scale * 0.05, card_size[1] * scale * 0.05]
|
pass
|
if jitter is None:
|
# Plus minus 5 degrees
|
jitter = [-math.pi / 36, math.pi / 36]
|
if gap is None:
|
# 15% of the card's height - the title bar (with mana symbols) will be visible
|
gap = card_size[1] * scale * 0.25
|
|
# Determine the location of the first card
|
# The cards will cover (width of a card) pixels wide and (height of a card + (# of cards - 1) * gap) pixels high
|
x_anchor = self.width // 2
|
y_anchor = int(self.height / 2 - (len(self.cards) - 1) * gap / 2)
|
for card in self.cards:
|
card.scale = scale
|
card.x = x_anchor
|
card.y = y_anchor
|
card.theta = 0
|
card.shift(shift, shift)
|
card.rotate(jitter)
|
card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor))
|
y_anchor += gap
|
return True
|
|
def generate_fan_out(self, centre, theta_between_cards=None, scale=None, shift=None, jitter=None):
|
"""
|
Generating the third scenario where the cards are laid out in a fan shape
|
:return: True if successfully generated, otherwise False
|
"""
|
# TODO
|
return False
|
|
def generate_non_obstructive(self, tolerance=0.90, scale=None):
|
"""
|
Generating the fourth scenario where the cards are laid in arbitrary position that doesn't obstruct other cards
|
:param tolerance: minimum level of visibility for each cards
|
:param scale: scale of each cards in generator
|
:return: True if successfully generated, otherwise False
|
"""
|
card_size = (len(self.cards[0].img[0]), len(self.cards[0].img))
|
if scale is None:
|
# Total area of the cards should cover about 25-40% of the entire image, depending on the number of cards
|
scale = math.sqrt(self.width * self.height * min(0.25 + 0.02 * len(self.cards), 0.4)
|
/ (card_size[0] * card_size[1] * len(self.cards)))
|
# Position each card at random location that doesn't obstruct other cards
|
i = 0
|
while i < len(self.cards):
|
card = self.cards[i]
|
card.scale = scale
|
rep = 0
|
while True:
|
card.x = random.uniform(card_size[1] * scale / 2, self.width - card_size[1] * scale)
|
card.y = random.uniform(card_size[1] * scale / 2, self.height - card_size[1] * scale)
|
card.theta = random.uniform(-math.pi, math.pi)
|
self.check_visibility(self.cards[:i + 1], visibility=tolerance)
|
# This position is not obstructive if all of the cards are visible
|
is_visible = [other_card.objects[0].visible for other_card in self.cards[:i + 1]]
|
non_obstructive = all(is_visible)
|
if non_obstructive:
|
i += 1
|
break
|
rep += 1
|
if rep >= 1000:
|
# Reassign previous card's position
|
i -= 1
|
break
|
return True
|
|
def check_visibility(self, cards=None, i_check=None, visibility=0.5):
|
"""
|
Check whether if extracted objects in a card is visible in the current scenario, and update their status
|
:param cards: list of cards (in a correct Z-order). All cards in this Generator are checked by default.
|
:param i_check: indices of cards that needs to be checked. Cards that aren't in this list will only be used
|
to check visibility of other cards. All cards are checked by default.
|
:param visibility: minimum ratio of the object's area that aren't covered by another card to be visible
|
:return: none
|
"""
|
if cards is None:
|
cards = self.cards
|
if i_check is None:
|
i_check = range(len(cards))
|
|
# Create a polygon of each card
|
card_poly_list = [geometry.Polygon([card.coordinate_in_generator(0, 0),
|
card.coordinate_in_generator(0, len(card.img)),
|
card.coordinate_in_generator(len(card.img[0]), len(card.img)),
|
card.coordinate_in_generator(len(card.img[0]), 0)]) for card in self.cards]
|
template_poly = geometry.Polygon([(0, 0), (self.width, 0), (self.width, self.height), (0, self.height)])
|
|
# First card in the list is overlaid on the bottom of the card pile
|
for i in i_check:
|
card = cards[i]
|
for ext_obj in card.objects:
|
obj_poly = geometry.Polygon([card.coordinate_in_generator(pt[0], pt[1]) for pt in ext_obj.key_pts])
|
obj_area = obj_poly.area
|
# Check if the other cards are blocking this object or if it's out of the template
|
# If there are other polygons with higher indices in the list, that card is overlapping this object
|
# We assume that no objects from the same card is on top of each other
|
for card_poly in card_poly_list[i + 1:]:
|
obj_poly = obj_poly.difference(card_poly)
|
obj_poly = obj_poly.intersection(template_poly)
|
visible_area = obj_poly.area
|
ext_obj.visible = obj_area * visibility <= visible_area
|
|
def export_training_data(self, out_name, visibility=0.5, aug=None):
|
"""
|
Export the generated training image along with the txt file for all bounding boxes
|
:param out_name: path of the output file (without extension)
|
:param visibility: portion of the card's image that must not be overlapped by other cards for the card to be
|
considered as visible
|
:param aug: image augmentator to be applied
|
:return: none
|
"""
|
self.render(visibility, aug=aug)
|
cv2.imwrite(out_name + '.jpg', self.img_result)
|
out_txt = open(out_name + '.txt', 'w')
|
for card in self.cards:
|
for ext_obj in card.objects:
|
if not ext_obj.visible:
|
continue
|
coords_in_gen = [card.coordinate_in_generator(key_pt[0], key_pt[1]) for key_pt in ext_obj.key_pts]
|
obj_yolo_info = key_pts_to_yolo(coords_in_gen, self.width, self.height)
|
if ext_obj.label == 'card':
|
#class_id = self.class_ids[card.info['name']]
|
class_id = 0 # since only the entire card is used
|
out_txt.write(str(class_id) + ' %.6f %.6f %.6f %.6f\n' % obj_yolo_info)
|
out_txt.close()
|
|
|
class Card:
|
"""
|
A class for storing required information about a card in relation to the ImageGenerator
|
"""
|
def __init__(self, img, card_info, objects, x=None, y=None, theta=None, scale=None):
|
"""
|
:param img: image of the card
|
:param card_info: details like name, mana cost, type, set, etc
|
:param objects: list of ExtractedObjects like mana & set symbol, etc
|
:param x: X-coordinate of the card's centre in relation to the generator
|
:param y: Y-coordinate of the card's centre in relation to the generator
|
:param theta: angle of rotation of the card in relation to the generator
|
:param scale: scale of the card in the generator in relation to the original image
|
"""
|
self.img = img
|
self.info = card_info
|
self.objects = objects
|
self.x = x
|
self.y = y
|
self.theta = theta
|
self.scale = scale
|
pass
|
|
def shift(self, x, y):
|
"""
|
Apply a X/Y translation on this image
|
:param x: amount of X-translation. If range is given, translate by a random amount within that range
|
:param y: amount of Y-translation. If range is given, translate by a random amount within that range
|
:return: none
|
"""
|
if isinstance(x, tuple) or (isinstance(x, list) and len(x) == 2):
|
self.x += random.uniform(x[0], x[1])
|
else:
|
self.x += x
|
if isinstance(y, tuple) or (isinstance(y, list) and len(y) == 2):
|
self.y += random.uniform(y[0], y[1])
|
else:
|
self.y += y
|
pass
|
|
def rotate(self, theta, centre=(0, 0)):
|
"""
|
Apply a rotation on this image with a centre
|
:param theta: amount of rotation in radian (clockwise). If a range is given, rotate by a random amount within
|
that range
|
:param centre: coordinate of the centre of the rotation in relation to the centre of this card
|
:return: none
|
"""
|
if isinstance(theta, tuple) or (isinstance(theta, list) and len(theta) == 2):
|
theta = random.uniform(theta[0], theta[1])
|
|
# If the centre given is the centre of this card, the whole math simplifies a bit
|
# (This still works without the if statement, but let's not do useless trigs if we know the answer already)
|
if centre is not (0, 0):
|
# Rotation math
|
self.x -= -centre[1] * math.sin(theta) + centre[0] * math.cos(theta)
|
self.y -= centre[1] * math.cos(theta) + centre[0] * math.sin(theta)
|
|
# Offset for the coordinate translation
|
self.x += centre[0]
|
self.y += centre[1]
|
|
self.theta += theta
|
pass
|
|
def coordinate_in_generator(self, x, y):
|
"""
|
Converting coordinate within the card into the coordinate in the generator it is associated with
|
:param x: x coordinate within the card
|
:param y: y coordinate within the card
|
:return: (x, y) coordinate in the generator
|
"""
|
# Relative distance in X & Y axis, if the centre of the card is at the origin (0, 0)
|
rel_x = x - len(self.img[0]) // 2
|
rel_y = y - len(self.img) // 2
|
|
# Scaling
|
rel_x *= self.scale
|
rel_y *= self.scale
|
|
# Rotation
|
rot_x = rel_x - rel_y * math.sin(self.theta) + rel_x * math.cos(self.theta)
|
rot_y = rel_y + rel_y * math.cos(self.theta) + rel_x * math.sin(self.theta)
|
|
# Negate offset
|
rot_x -= rel_x
|
rot_y -= rel_y
|
|
# Shift
|
gen_x = rot_x + self.x
|
gen_y = rot_y + self.y
|
|
return int(gen_x), int(gen_y)
|
|
def bb_in_generator(self, key_pts):
|
"""
|
Convert a keypoints of bounding box in card into the coordinate in the generator
|
:param key_pts: keypoints of the bounding box
|
:return: bounding box represented by 4 points in the generator
|
"""
|
coords_in_gen = [self.coordinate_in_generator(key_pt[0], key_pt[1]) for key_pt in key_pts]
|
x1 = min([pt[0] for pt in coords_in_gen])
|
x2 = max([pt[0] for pt in coords_in_gen])
|
y1 = min([pt[1] for pt in coords_in_gen])
|
y2 = max([pt[1] for pt in coords_in_gen])
|
return [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
|
|
|
class ExtractedObject:
|
"""
|
Simple struct to hold information about an extracted object
|
"""
|
def __init__(self, label, key_pts):
|
self.label = label
|
self.key_pts = key_pts
|
self.visible = False
|
|
|
def main(args):
|
random.seed()
|
ia.seed(random.randrange(10000))
|
|
bg_images = generate_data.load_dtd(dtd_dir='%s/dtd/images' % Config.data_dir, dump_it=False)
|
background = generate_data.Backgrounds(images=bg_images)
|
|
card_pool = pd.DataFrame()
|
for set_name in Config.all_set_list:
|
df = fetch_data.load_all_cards_text('%s/csv/%s.csv' % (Config.data_dir, set_name))
|
card_pool = card_pool.append(df)
|
class_ids = {}
|
with open('%s/obj.names' % Config.data_dir) as names_file:
|
class_name_list = names_file.read().splitlines()
|
for i in range(len(class_name_list)):
|
class_ids[class_name_list[i]] = i
|
|
for i in range(args.num_gen):
|
# Arbitrarily select top left and right corners for perspective transformation
|
# Since the training image are generated with random rotation, don't need to skew all four sides
|
skew = [[random.uniform(0, 0.25), 0], [0, 1], [1, 1],
|
[random.uniform(0.75, 1), 0]]
|
generator = ImageGenerator(background.get_random(), class_ids, args.width, args.height, skew=skew)
|
out_name = ''
|
|
# Use 2 to 5 cards per generator
|
for _, card_info in card_pool.sample(random.randint(2, 5)).iterrows():
|
img_name = '%s/card_img/png/%s/%s_%s.png' % (Config.data_dir, card_info['set'],
|
card_info['collector_number'],
|
fetch_data.get_valid_filename(card_info['name']))
|
out_name += '%s%s_' % (card_info['set'], card_info['collector_number'])
|
card_img = cv2.imread(img_name)
|
if card_img is None:
|
fetch_data.fetch_card_image(card_info, out_dir='%s/card_img/png/%s' % (Config.data_dir,
|
card_info['set']))
|
card_img = cv2.imread(img_name)
|
if card_img is None:
|
print('WARNING: card %s is not found!' % img_name)
|
detected_object_list = generate_data.apply_bounding_box(card_img, card_info)
|
card = Card(card_img, card_info, detected_object_list)
|
generator.add_card(card)
|
|
for j in range(args.num_iter):
|
seq = iaa.Sequential([
|
iaa.Multiply((0.8, 1.2)), # darken / brighten the whole image
|
iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(64)), per_channel=0.1, size_px_max=[3, 6],
|
upscale_method="cubic"), # Lighting
|
iaa.AdditiveGaussianNoise(scale=random.uniform(0, 0.05) * 255, per_channel=0.1), # Noises
|
iaa.Dropout(p=[0, 0.05], per_channel=0.1) # Dropout
|
])
|
|
if i % 3 == 0:
|
generator.generate_non_obstructive()
|
generator.export_training_data(visibility=0.0, out_name='%s/train/non_obstructive_update/%s%d'
|
% (Config.data_dir, out_name, j), aug=seq)
|
elif i % 3 == 1:
|
generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi))
|
generator.export_training_data(visibility=0.0, out_name='%s/train/horizontal_span_update/%s%d'
|
% (Config.data_dir, out_name, j), aug=seq)
|
else:
|
generator.generate_vertical_span(theta=random.uniform(-math.pi, math.pi))
|
generator.export_training_data(visibility=0.0, out_name='%s/train/vertical_span_update/%s%d'
|
% (Config.data_dir, out_name, j), aug=seq)
|
|
#generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi))
|
#generator.render(display=True, aug=seq, debug=True)
|
print('Generated %s%d' % (out_name, j))
|
generator.img_bg = background.get_random()
|
pass
|
|
|
if __name__ == '__main__':
|
parser = argparse.ArgumentParser()
|
parser.add_argument('-n', '--num_gen', dest='num_gen', help='Number of training images to generate',
|
type=int, required=True)
|
parser.add_argument('-ni', '--num_iter', dest='num_iter', help='Number of iterations to generate each config',
|
type=int, default=1)
|
parser.add_argument('-w', '--width', dest='width', help='Width of the training image', type=int, default=1440)
|
parser.add_argument('-ht', '--height', dest='height', help='Height of the training image', type=int, default=960)
|
args = parser.parse_args()
|
main(args)
|