import argparse import cv2 import imgaug as ia from imgaug import augmenters as iaa from imgaug import parameters as iap import imutils import math import numpy as np import os import pandas as pd import random from shapely import geometry import fetch_data import generate_data from config import Config def key_pts_to_yolo(key_pts, w_img, h_img): """ Convert a list of keypoints into a yolo training format :param key_pts: list of keypoints :param w_img: width of the entire image :param h_img: height of the entire image :return: """ x1 = max(0, min([pt[0] for pt in key_pts])) x2 = min(w_img, max([pt[0] for pt in key_pts])) y1 = max(0, min([pt[1] for pt in key_pts])) y2 = min(h_img, max([pt[1] for pt in key_pts])) x = (x2 + x1) / 2 / w_img y = (y2 + y1) / 2 / h_img width = (x2 - x1) / w_img height = (y2 - y1) / h_img return x, y, width, height class ImageGenerator: """ A template for generating a training image An ImageGenerator contains a background image, list of cards, and other environmental parameters to set up a training image for YOLO network """ def __init__(self, img_bg, class_ids, width, height, skew=None, cards=None): """ :param img_bg: background (textile) image :param width: width of the training image :param height: height of the training image :param skew: 4 coordinates that indicates the corners (in normalized form) for perspective transform :param cards: list of Card objects """ self.img_bg = img_bg self.class_ids = class_ids self.img_result = None self.width = width self.height = height if cards is None: self.cards = [] else: self.cards = cards # Compute transform matrix for perspective transform (used for skewing the final result) if skew is not None: orig_corner = np.array([[0, 0], [0, height], [width, height], [width, 0]], dtype=np.float32) new_corner = np.array([[width * s[0], height * s[1]] for s in skew], dtype=np.float32) self.M = cv2.getPerspectiveTransform(orig_corner, new_corner) pass else: self.M = None pass def add_card(self, card, x=None, y=None, theta=0.0, scale=1.0): """ Add a card to this generator scenario. :param card: card to be added :param x: new X-coordinate for the centre of the card :param y: new Y-coordinate for the centre of the card :param theta: new angle for the card :param scale: new scale for the card :return: none """ # If the position isn't given, push it out of the image so that it won't be visible during rendering if x is None: x = -len(card.img[0]) / 2 if y is None: y = -len(card.img) / 2 self.cards.append(card) card.x = x card.y = y card.theta = theta card.scale = scale pass def render(self, visibility=0.5, aug=None, display=False, debug=False): """ Display the current state of the generator. :param visibility: portion of the card's image that must not be overlapped by other cards for the card to be considered as visible :param aug: image augmentator to apply during rendering :param display: flag for displaying the rendering result :param debug: flag for debug :return: none """ self.check_visibility(visibility=visibility) img_result = np.zeros((self.height, self.width, 3), dtype=np.uint8) card_mask = cv2.imread(Config.card_mask_path) for card in self.cards: card_x = int(card.x + 0.5) card_y = int(card.y + 0.5) # Scale & rotate card image img_card = cv2.resize(card.img, (int(len(card.img[0]) * card.scale), int(len(card.img) * card.scale))) # Add a random glaring on individual card - it happens frequently in real life as MTG cards can reflect # the lights very well. if aug is not None: seq = iaa.Sequential([ iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(128)), size_px_max=[1, 3], upscale_method="cubic"), # Lighting ]) img_card = seq.augment_image(img_card) mask_scale = cv2.resize(card_mask, (int(len(card_mask[0]) * card.scale), int(len(card_mask) * card.scale))) img_mask = cv2.bitwise_and(img_card, mask_scale) img_rotate = imutils.rotate_bound(img_mask, card.theta / math.pi * 180) # Calculate the position of the card image in relation to the background # Crop the card image if it's out of boundary card_w = len(img_rotate[0]) card_h = len(img_rotate) card_crop_x1 = max(0, card_w // 2 - card_x) card_crop_x2 = min(card_w, card_w // 2 + len(img_result[0]) - card_x) card_crop_y1 = max(0, card_h // 2 - card_y) card_crop_y2 = min(card_h, card_h // 2 + len(img_result) - card_y) img_card_crop = img_rotate[card_crop_y1:card_crop_y2, card_crop_x1:card_crop_x2] # Calculate the position of the corresponding area in the background bg_crop_x1 = max(0, card_x - (card_w // 2)) bg_crop_x2 = min(len(img_result[0]), int(card_x + (card_w / 2) + 0.5)) bg_crop_y1 = max(0, card_y - (card_h // 2)) bg_crop_y2 = min(len(img_result), int(card_y + (card_h / 2) + 0.5)) img_result_crop = img_result[bg_crop_y1:bg_crop_y2, bg_crop_x1:bg_crop_x2] # Override the background with the current card img_result_crop = np.where(img_card_crop, img_card_crop, img_result_crop) img_result[bg_crop_y1:bg_crop_y2, bg_crop_x1:bg_crop_x2] = img_result_crop if debug: for ext_obj in card.objects: if ext_obj.visible: for pt in ext_obj.key_pts: cv2.circle(img_result, card.coordinate_in_generator(pt[0], pt[1]), 2, (1, 1, 255), 10) bounding_box = card.bb_in_generator(ext_obj.key_pts) cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (1, 255, 1), 5) img_result = cv2.GaussianBlur(img_result, (5, 5), 0) # Skew the cards if it's provided if self.M is not None: img_result = cv2.warpPerspective(img_result, self.M, (self.width, self.height)) if debug: for card in self.cards: for ext_obj in card.objects: if ext_obj.visible: new_pts = np.array([[list(card.coordinate_in_generator(pt[0], pt[1]))] for pt in ext_obj.key_pts], dtype=np.float32) new_pts = cv2.perspectiveTransform(new_pts, self.M) for pt in new_pts: cv2.circle(img_result, (pt[0][0], pt[0][1]), 2, (255, 1, 1), 10) img_bg = cv2.resize(self.img_bg, (self.width, self.height)) img_result = np.where(img_result, img_result, img_bg) # Apply image augmentation if aug is not None: img_result = aug.augment_image(img_result) if display or debug: cv2.imshow('Result', img_result) cv2.waitKey(0) self.img_result = img_result pass def generate_horizontal_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None): """ Generating the first scenario where the cards are laid out in a straight horizontal line :param gap: horizontal offset between each adjacent cards :param scale: scale of each cards in the generator :param theta: rotation of the entire span in radian :param shift: range of arbitrary offset for each card :param jitter: range of in-place rotation for each card in radian :return: True if successfully generated, otherwise False """ # Set scale of the cards, variance of shift & jitter to be applied if they're not given card_size = (len(self.cards[0].img[0]), len(self.cards[0].img)) if scale is None: # Scale the cards so that card takes about 50% of the image's height coverage_ratio = 0.5 scale = self.height * coverage_ratio / card_size[1] if shift is None: # Plus minus 5% of the card's height shift = [-card_size[1] * scale * 0.05, card_size[1] * scale * 0.05] pass if jitter is None: # Plus minus 10 degrees jitter = [-math.pi / 18, math.pi / 18] if gap is None: # 25% of the card's width - set symbol and 1-2 mana symbols will be visible on each card gap = card_size[0] * scale * 0.4 # Determine the location of the first card # The cards will cover (width of a card + (# of cards - 1) * gap) pixels wide and (height of a card) pixels high x_anchor = int(self.width / 2 + (len(self.cards) - 1) * gap / 2) y_anchor = self.height // 2 for card in self.cards: card.scale = scale card.x = x_anchor card.y = y_anchor card.theta = 0 card.shift(shift, shift) card.rotate(jitter) card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor)) x_anchor -= gap return True def generate_vertical_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None): """ Generating the second scenario where the cards are laid out in a straight vertical line :param gap: horizontal offset between each adjacent cards :param scale: scale of each cards in the generator :param theta: rotation of the entire span in radian :param shift: range of arbitrary offset for each card :param jitter: range of in-place rotation for each card in radian :return: True if successfully generated, otherwise False :return: True if successfully generated, otherwise False """ # Set scale of the cards, variance of shift & jitter to be applied if they're not given card_size = (len(self.cards[0].img[0]), len(self.cards[0].img)) if scale is None: # Scale the cards so that card takes about 50% of the image's height coverage_ratio = 0.5 scale = self.height * coverage_ratio / card_size[1] if shift is None: # Plus minus 5% of the card's height shift = [-card_size[1] * scale * 0.05, card_size[1] * scale * 0.05] pass if jitter is None: # Plus minus 5 degrees jitter = [-math.pi / 36, math.pi / 36] if gap is None: # 15% of the card's height - the title bar (with mana symbols) will be visible gap = card_size[1] * scale * 0.25 # Determine the location of the first card # The cards will cover (width of a card) pixels wide and (height of a card + (# of cards - 1) * gap) pixels high x_anchor = self.width // 2 y_anchor = int(self.height / 2 - (len(self.cards) - 1) * gap / 2) for card in self.cards: card.scale = scale card.x = x_anchor card.y = y_anchor card.theta = 0 card.shift(shift, shift) card.rotate(jitter) card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor)) y_anchor += gap return True def generate_fan_out(self, centre, theta_between_cards=None, scale=None, shift=None, jitter=None): """ Generating the third scenario where the cards are laid out in a fan shape :return: True if successfully generated, otherwise False """ # TODO return False def generate_non_obstructive(self, tolerance=0.90, scale=None): """ Generating the fourth scenario where the cards are laid in arbitrary position that doesn't obstruct other cards :param tolerance: minimum level of visibility for each cards :param scale: scale of each cards in generator :return: True if successfully generated, otherwise False """ card_size = (len(self.cards[0].img[0]), len(self.cards[0].img)) if scale is None: # Total area of the cards should cover about 25-40% of the entire image, depending on the number of cards scale = math.sqrt(self.width * self.height * min(0.25 + 0.02 * len(self.cards), 0.4) / (card_size[0] * card_size[1] * len(self.cards))) # Position each card at random location that doesn't obstruct other cards i = 0 while i < len(self.cards): card = self.cards[i] card.scale = scale rep = 0 while True: card.x = random.uniform(card_size[1] * scale / 2, self.width - card_size[1] * scale) card.y = random.uniform(card_size[1] * scale / 2, self.height - card_size[1] * scale) card.theta = random.uniform(-math.pi, math.pi) self.check_visibility(self.cards[:i + 1], visibility=tolerance) # This position is not obstructive if all of the cards are visible is_visible = [other_card.objects[0].visible for other_card in self.cards[:i + 1]] non_obstructive = all(is_visible) if non_obstructive: i += 1 break rep += 1 if rep >= 1000: # Reassign previous card's position i -= 1 break return True def check_visibility(self, cards=None, i_check=None, visibility=0.5): """ Check whether if extracted objects in a card is visible in the current scenario, and update their status :param cards: list of cards (in a correct Z-order). All cards in this Generator are checked by default. :param i_check: indices of cards that needs to be checked. Cards that aren't in this list will only be used to check visibility of other cards. All cards are checked by default. :param visibility: minimum ratio of the object's area that aren't covered by another card to be visible :return: none """ if cards is None: cards = self.cards if i_check is None: i_check = range(len(cards)) # Create a polygon of each card card_poly_list = [geometry.Polygon([card.coordinate_in_generator(0, 0), card.coordinate_in_generator(0, len(card.img)), card.coordinate_in_generator(len(card.img[0]), len(card.img)), card.coordinate_in_generator(len(card.img[0]), 0)]) for card in self.cards] template_poly = geometry.Polygon([(0, 0), (self.width, 0), (self.width, self.height), (0, self.height)]) # First card in the list is overlaid on the bottom of the card pile for i in i_check: card = cards[i] for ext_obj in card.objects: obj_poly = geometry.Polygon([card.coordinate_in_generator(pt[0], pt[1]) for pt in ext_obj.key_pts]) obj_area = obj_poly.area # Check if the other cards are blocking this object or if it's out of the template # If there are other polygons with higher indices in the list, that card is overlapping this object # We assume that no objects from the same card is on top of each other for card_poly in card_poly_list[i + 1:]: obj_poly = obj_poly.difference(card_poly) obj_poly = obj_poly.intersection(template_poly) visible_area = obj_poly.area ext_obj.visible = obj_area * visibility <= visible_area def export_training_data(self, out_name, visibility=0.5, aug=None): """ Export the generated training image along with the txt file for all bounding boxes :param out_name: path of the output file (without extension) :param visibility: portion of the card's image that must not be overlapped by other cards for the card to be considered as visible :param aug: image augmentator to be applied :return: none """ self.render(visibility, aug=aug) cv2.imwrite(out_name + '.jpg', self.img_result) out_txt = open(out_name + '.txt', 'w') for card in self.cards: for ext_obj in card.objects: if not ext_obj.visible: continue coords_in_gen = [card.coordinate_in_generator(key_pt[0], key_pt[1]) for key_pt in ext_obj.key_pts] obj_yolo_info = key_pts_to_yolo(coords_in_gen, self.width, self.height) if ext_obj.label == 'card': #class_id = self.class_ids[card.info['name']] class_id = 0 # since only the entire card is used out_txt.write(str(class_id) + ' %.6f %.6f %.6f %.6f\n' % obj_yolo_info) out_txt.close() class Card: """ A class for storing required information about a card in relation to the ImageGenerator """ def __init__(self, img, card_info, objects, x=None, y=None, theta=None, scale=None): """ :param img: image of the card :param card_info: details like name, mana cost, type, set, etc :param objects: list of ExtractedObjects like mana & set symbol, etc :param x: X-coordinate of the card's centre in relation to the generator :param y: Y-coordinate of the card's centre in relation to the generator :param theta: angle of rotation of the card in relation to the generator :param scale: scale of the card in the generator in relation to the original image """ self.img = img self.info = card_info self.objects = objects self.x = x self.y = y self.theta = theta self.scale = scale pass def shift(self, x, y): """ Apply a X/Y translation on this image :param x: amount of X-translation. If range is given, translate by a random amount within that range :param y: amount of Y-translation. If range is given, translate by a random amount within that range :return: none """ if isinstance(x, tuple) or (isinstance(x, list) and len(x) == 2): self.x += random.uniform(x[0], x[1]) else: self.x += x if isinstance(y, tuple) or (isinstance(y, list) and len(y) == 2): self.y += random.uniform(y[0], y[1]) else: self.y += y pass def rotate(self, theta, centre=(0, 0)): """ Apply a rotation on this image with a centre :param theta: amount of rotation in radian (clockwise). If a range is given, rotate by a random amount within that range :param centre: coordinate of the centre of the rotation in relation to the centre of this card :return: none """ if isinstance(theta, tuple) or (isinstance(theta, list) and len(theta) == 2): theta = random.uniform(theta[0], theta[1]) # If the centre given is the centre of this card, the whole math simplifies a bit # (This still works without the if statement, but let's not do useless trigs if we know the answer already) if centre is not (0, 0): # Rotation math self.x -= -centre[1] * math.sin(theta) + centre[0] * math.cos(theta) self.y -= centre[1] * math.cos(theta) + centre[0] * math.sin(theta) # Offset for the coordinate translation self.x += centre[0] self.y += centre[1] self.theta += theta pass def coordinate_in_generator(self, x, y): """ Converting coordinate within the card into the coordinate in the generator it is associated with :param x: x coordinate within the card :param y: y coordinate within the card :return: (x, y) coordinate in the generator """ # Relative distance in X & Y axis, if the centre of the card is at the origin (0, 0) rel_x = x - len(self.img[0]) // 2 rel_y = y - len(self.img) // 2 # Scaling rel_x *= self.scale rel_y *= self.scale # Rotation rot_x = rel_x - rel_y * math.sin(self.theta) + rel_x * math.cos(self.theta) rot_y = rel_y + rel_y * math.cos(self.theta) + rel_x * math.sin(self.theta) # Negate offset rot_x -= rel_x rot_y -= rel_y # Shift gen_x = rot_x + self.x gen_y = rot_y + self.y return int(gen_x), int(gen_y) def bb_in_generator(self, key_pts): """ Convert a keypoints of bounding box in card into the coordinate in the generator :param key_pts: keypoints of the bounding box :return: bounding box represented by 4 points in the generator """ coords_in_gen = [self.coordinate_in_generator(key_pt[0], key_pt[1]) for key_pt in key_pts] x1 = min([pt[0] for pt in coords_in_gen]) x2 = max([pt[0] for pt in coords_in_gen]) y1 = min([pt[1] for pt in coords_in_gen]) y2 = max([pt[1] for pt in coords_in_gen]) return [(x1, y1), (x2, y1), (x2, y2), (x1, y2)] class ExtractedObject: """ Simple struct to hold information about an extracted object """ def __init__(self, label, key_pts): self.label = label self.key_pts = key_pts self.visible = False def main(args): random.seed() ia.seed(random.randrange(10000)) bg_images = generate_data.load_dtd(dtd_dir='%s/dtd/images' % Config.data_dir, dump_it=False) background = generate_data.Backgrounds(images=bg_images) card_pool = pd.DataFrame() for set_name in Config.all_set_list: df = fetch_data.load_all_cards_text('%s/csv/%s.csv' % (Config.data_dir, set_name)) card_pool = card_pool.append(df) class_ids = {} with open('%s/obj.names' % Config.data_dir) as names_file: class_name_list = names_file.read().splitlines() for i in range(len(class_name_list)): class_ids[class_name_list[i]] = i for i in range(args.num_gen): # Arbitrarily select top left and right corners for perspective transformation # Since the training image are generated with random rotation, don't need to skew all four sides skew = [[random.uniform(0, 0.25), 0], [0, 1], [1, 1], [random.uniform(0.75, 1), 0]] generator = ImageGenerator(background.get_random(), class_ids, args.width, args.height, skew=skew) out_name = '' # Use 2 to 5 cards per generator for _, card_info in card_pool.sample(random.randint(2, 5)).iterrows(): img_name = '%s/card_img/png/%s/%s_%s.png' % (Config.data_dir, card_info['set'], card_info['collector_number'], fetch_data.get_valid_filename(card_info['name'])) out_name += '%s%s_' % (card_info['set'], card_info['collector_number']) card_img = cv2.imread(img_name) if card_img is None: fetch_data.fetch_card_image(card_info, out_dir='%s/card_img/png/%s' % (Config.data_dir, card_info['set'])) card_img = cv2.imread(img_name) if card_img is None: print('WARNING: card %s is not found!' % img_name) detected_object_list = generate_data.apply_bounding_box(card_img, card_info) card = Card(card_img, card_info, detected_object_list) generator.add_card(card) for j in range(args.num_iter): seq = iaa.Sequential([ iaa.Multiply((0.8, 1.2)), # darken / brighten the whole image iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(64)), per_channel=0.1, size_px_max=[3, 6], upscale_method="cubic"), # Lighting iaa.AdditiveGaussianNoise(scale=random.uniform(0, 0.05) * 255, per_channel=0.1), # Noises iaa.Dropout(p=[0, 0.05], per_channel=0.1) # Dropout ]) if i % 3 == 0: generator.generate_non_obstructive() generator.export_training_data(visibility=0.0, out_name='%s/train/non_obstructive_update/%s%d' % (Config.data_dir, out_name, j), aug=seq) elif i % 3 == 1: generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi)) generator.export_training_data(visibility=0.0, out_name='%s/train/horizontal_span_update/%s%d' % (Config.data_dir, out_name, j), aug=seq) else: generator.generate_vertical_span(theta=random.uniform(-math.pi, math.pi)) generator.export_training_data(visibility=0.0, out_name='%s/train/vertical_span_update/%s%d' % (Config.data_dir, out_name, j), aug=seq) #generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi)) #generator.render(display=True, aug=seq, debug=True) print('Generated %s%d' % (out_name, j)) generator.img_bg = background.get_random() pass if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-n', '--num_gen', dest='num_gen', help='Number of training images to generate', type=int, required=True) parser.add_argument('-ni', '--num_iter', dest='num_iter', help='Number of iterations to generate each config', type=int, default=1) parser.add_argument('-w', '--width', dest='width', help='Width of the training image', type=int, default=1440) parser.add_argument('-ht', '--height', dest='height', help='Height of the training image', type=int, default=960) args = parser.parse_args() main(args)