| | |
| | | import os |
| | | import random |
| | | import math |
| | | import cv2 |
| | |
| | | import generate_data |
| | | from shapely import geometry |
| | | import pytesseract |
| | | import imgaug as ia |
| | | from imgaug import augmenters as iaa |
| | | from imgaug import parameters as iap |
| | | |
| | | card_mask = cv2.imread('data/mask.png') |
| | | data_dir = os.path.abspath('/media/edmond/My Passport/data') |
| | | darknet_dir = os.path.abspath('darknet') |
| | | |
| | | |
| | | def key_pts_to_yolo(key_pts, w_img, h_img): |
| | |
| | | """ |
| | | A template for generating a training image. |
| | | """ |
| | | def __init__(self, img_bg, width, height, cards=None): |
| | | def __init__(self, img_bg, width, height, skew=None, cards=None): |
| | | """ |
| | | :param img_bg: background (textile) image |
| | | :param width: width of the training image |
| | | :param height: height of the training image |
| | | :param skew: 4 coordinates that indicates the corners (in normalized form) for perspective transform |
| | | :param cards: list of Card objects |
| | | """ |
| | | self.img_bg = img_bg |
| | |
| | | self.cards = [] |
| | | else: |
| | | self.cards = cards |
| | | |
| | | # Compute transform matrix for perspective transform |
| | | if skew is not None: |
| | | orig_corner = np.array([[0, 0], [0, height], [width, height], [width, 0]], dtype=np.float32) |
| | | new_corner = np.array([[width * s[0], height * s[1]] for s in skew], dtype=np.float32) |
| | | self.M = cv2.getPerspectiveTransform(orig_corner, new_corner) |
| | | pass |
| | | else: |
| | | self.M = None |
| | | pass |
| | | |
| | | def add_card(self, card, x=None, y=None, theta=0.0, scale=1.0): |
| | |
| | | card.scale = scale |
| | | pass |
| | | |
| | | def render(self, visibility=0.5, display=False, debug=False): |
| | | def render(self, visibility=0.5, display=False, debug=False, aug=None): |
| | | """ |
| | | Display the current state of the generator |
| | | :return: none |
| | | """ |
| | | self.check_visibility(visibility=visibility) |
| | | img_result = cv2.resize(self.img_bg, (self.width, self.height)) |
| | | #img_result = cv2.resize(self.img_bg, (self.width, self.height)) |
| | | img_result = np.zeros((self.height, self.width, 3), dtype=np.uint8) |
| | | |
| | | for card in self.cards: |
| | | if card.x == 0.0 and card.y == 0.0 and card.theta == 0.0 and card.scale == 1.0: |
| | |
| | | for ext_obj in card.objects: |
| | | if ext_obj.visible: |
| | | for pt in ext_obj.key_pts: |
| | | cv2.circle(img_result, card.coordinate_in_generator(pt[0], pt[1]), 2, (0, 0, 255), 2) |
| | | cv2.circle(img_result, card.coordinate_in_generator(pt[0], pt[1]), 2, (1, 1, 255), 10) |
| | | bounding_box = card.bb_in_generator(ext_obj.key_pts) |
| | | cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (0, 255, 0), 2) |
| | | cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (1, 255, 1), 5) |
| | | |
| | | ''' |
| | | try: |
| | | text = pytesseract.image_to_string(img_result, output_type=pytesseract.Output.DICT) |
| | |
| | | ''' |
| | | img_result = cv2.GaussianBlur(img_result, (5, 5), 0) |
| | | |
| | | if self.M is not None: |
| | | img_result = cv2.warpPerspective(img_result, self.M, (self.width, self.height)) |
| | | if debug: |
| | | for card in self.cards: |
| | | for ext_obj in card.objects: |
| | | if ext_obj.visible: |
| | | new_pts = np.array([[list(card.coordinate_in_generator(pt[0], pt[1]))] |
| | | for pt in ext_obj.key_pts], dtype=np.float32) |
| | | new_pts = cv2.perspectiveTransform(new_pts, self.M) |
| | | for pt in new_pts: |
| | | cv2.circle(img_result, (pt[0][0], pt[0][1]), 2, (255, 1, 1), 10) |
| | | |
| | | img_bg = cv2.resize(self.img_bg, (self.width, self.height)) |
| | | img_result = np.where(img_result, img_result, img_bg) |
| | | |
| | | if aug is not None: |
| | | img_result = aug.augment_image(img_result) |
| | | |
| | | if display: |
| | | cv2.imshow('Result', img_result) |
| | | cv2.waitKey(0) |
| | |
| | | self.img_result = img_result |
| | | pass |
| | | |
| | | def generate_horizontal_span(self, gap=None, scale=None, shift=None, jitter=None): |
| | | def generate_horizontal_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None): |
| | | """ |
| | | Generating the first scenario where the cards are laid out in a straight horizontal line |
| | | :return: True if successfully generated, otherwise False |
| | |
| | | card.theta = 0 |
| | | card.shift(shift, shift) |
| | | card.rotate(jitter) |
| | | card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor)) |
| | | x_anchor -= gap |
| | | |
| | | return True |
| | | |
| | | def generate_vertical_span(self, gap=None, scale=None, shift=None, jitter=None): |
| | | def generate_vertical_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None): |
| | | """ |
| | | Generating the second scenario where the cards are laid out in a straight vertical line |
| | | :return: True if successfully generated, otherwise False |
| | |
| | | card.theta = 0 |
| | | card.shift(shift, shift) |
| | | card.rotate(jitter) |
| | | card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor)) |
| | | y_anchor += gap |
| | | return True |
| | | |
| | |
| | | #print("%s: %.1f visible" % (ext_obj.label, visible_area / obj_area * 100)) |
| | | ext_obj.visible = obj_area * visibility <= visible_area |
| | | |
| | | def export_training_data(self, out_name, visibility=0.5): |
| | | def export_training_data(self, out_name, visibility=0.5, aug=None): |
| | | """ |
| | | Export the generated training image along with the txt file for all bounding boxes |
| | | :return: none |
| | | """ |
| | | self.render(visibility) |
| | | self.render(visibility, aug=aug) |
| | | cv2.imwrite(out_name + '.jpg', self.img_result) |
| | | out_txt = open(out_name+ '.txt', 'w') |
| | | for card in self.cards: |
| | |
| | | |
| | | def main(): |
| | | random.seed() |
| | | ia.seed(random.randrange(10000)) |
| | | |
| | | bg_images = generate_data.load_dtd(dump_it=False) |
| | | bg_images = generate_data.load_dtd(dtd_dir='%s/dtd/images' % data_dir, dump_it=False) |
| | | #bg_images = [cv2.imread('data/frilly_0007.jpg')] |
| | | background = generate_data.Backgrounds(images=bg_images) |
| | | |
| | | card_pool = pd.DataFrame() |
| | | for set_name in fetch_data.all_set_list: |
| | | df = fetch_data.load_all_cards_text('data/csv/%s.csv' % set_name) |
| | | df = fetch_data.load_all_cards_text('%s/csv/%s.csv' % (data_dir, set_name)) |
| | | card_pool = card_pool.append(df) |
| | | |
| | | num_gen = 25600 |
| | | num_iter = 3 |
| | | num_gen = 60000 |
| | | num_iter = 1 |
| | | |
| | | for i in range(num_gen): |
| | | generator = ImageGenerator(background.get_random(), 1440, 960) |
| | | out_name = 'data/train/non_obstructive/' |
| | | # Arbitrarily select top left and right corners for perspective transformation |
| | | # Since the training image are generated with random rotation, don't need to skew all four sides |
| | | skew = [[random.uniform(0, 0.25), 0], [0, 1], [1, 1], |
| | | [random.uniform(0.75, 1), 0]] |
| | | generator = ImageGenerator(background.get_random(), 1440, 960, skew=skew) |
| | | out_name = '' |
| | | for _, card_info in card_pool.sample(random.randint(2, 5)).iterrows(): |
| | | img_name = '../usb/data/png/%s/%s_%s.png' % (card_info['set'], card_info['collector_number'], |
| | | img_name = '%s/card_img/png/%s/%s_%s.png' % (data_dir, card_info['set'], card_info['collector_number'], |
| | | fetch_data.get_valid_filename(card_info['name'])) |
| | | out_name += '%s%s_' % (card_info['set'], card_info['collector_number']) |
| | | card_img = cv2.imread(img_name) |
| | | if card_img is None: |
| | | fetch_data.fetch_card_image(card_info, out_dir='../usb/data/png/%s' % card_info['set']) |
| | | fetch_data.fetch_card_image(card_info, out_dir='%s/card_img/png/%s' % (data_dir, card_info['set'])) |
| | | card_img = cv2.imread(img_name) |
| | | if card_img is None: |
| | | print('WARNING: card %s is not found!' % img_name) |
| | |
| | | card = Card(card_img, card_info, detected_object_list) |
| | | generator.add_card(card) |
| | | for j in range(num_iter): |
| | | generator.generate_non_obstructive() |
| | | #generator.generate_horizontal_span() |
| | | generator.export_training_data(visibility=0.0, out_name=out_name + str(j)) |
| | | seq = iaa.Sequential([ |
| | | iaa.Multiply((0.8, 1.2)), # darken / brighten the whole image |
| | | iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(64)), per_channel=0.1, size_px_max=[3, 6], |
| | | upscale_method="cubic"), # Lighting |
| | | iaa.AdditiveGaussianNoise(scale=random.uniform(0, 0.05) * 255, per_channel=0.1), # Noises |
| | | iaa.Dropout(p=[0, 0.05], per_channel=0.1) |
| | | ]) |
| | | if i % 3 == 0: |
| | | generator.generate_non_obstructive() |
| | | generator.export_training_data(visibility=0.0, out_name='%s/train/non_obstructive/%s_%d' |
| | | % (data_dir, out_name, j), aug=seq) |
| | | elif i % 3 == 1: |
| | | generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi)) |
| | | generator.export_training_data(visibility=0.0, out_name='%s/train/horizontal_span/%s_%d' |
| | | % (data_dir, out_name, j), aug=seq) |
| | | else: |
| | | generator.generate_vertical_span(theta=random.uniform(-math.pi, math.pi)) |
| | | generator.export_training_data(visibility=0.0, out_name='%s/train/vertical_span/%s_%d' |
| | | % (data_dir, out_name, j), aug=seq) |
| | | #generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi)) |
| | | #generator.render(display=True, aug=seq, debug=True) |
| | | print('Generated %s%d' % (out_name, j)) |
| | | generator.img_bg = background.get_random() |
| | | |
| | | ''' |
| | | #img_bg = cv2.imread('data/frilly_0007.jpg') |
| | | #generator = ImageGenerator(img_bg, 1440, 960) |
| | | card_pool = pd.DataFrame() |
| | | for set_name in fetch_data.all_set_list: |
| | | df = fetch_data.load_all_cards_text('data/csv/%s.csv' % set_name) |
| | | card_info = df.iloc[random.randint(0, df.shape[0] - 1)] |
| | | # Currently ignoring planeswalker cards due to their different card layout |
| | | is_planeswalker = 'Planeswalker' in card_info['type_line'] |
| | | if not is_planeswalker: |
| | | card_pool = card_pool.append(card_info) |
| | | for i in [random.randrange(0, card_pool.shape[0] - 1, 1) for _ in range(4)]: |
| | | card_info = card_pool.iloc[i] |
| | | img_name = '../usb/data/png/%s/%s_%s.png' % (card_info['set'], card_info['collector_number'], |
| | | fetch_data.get_valid_filename(card_info['name'])) |
| | | print(img_name) |
| | | card_img = cv2.imread(img_name) |
| | | if card_img is None: |
| | | fetch_data.fetch_card_image(card_info, out_dir='../usb/data/png/%s' % card_info['set']) |
| | | card_img = cv2.imread(img_name) |
| | | detected_object_list = generate_data.apply_bounding_box(card_img, card_info) |
| | | card = Card(card_img, card_info, detected_object_list) |
| | | |
| | | generator.add_card(card) |
| | | #generator.add_card(card, x=random.uniform(200, generator.width - 200), |
| | | # y=random.uniform(200, generator.height - 200), theta=random.uniform(-math.pi, math.pi), scale=0.5) |
| | | #card.shift([-100, 100], [-100, 100]) |
| | | #card.rotate((0, 0), [-math.pi / 4, math.pi / 4]) |
| | | import time |
| | | |
| | | for i in range(100): |
| | | generator.generate_vertical_span() |
| | | generator.render(debug=False) |
| | | generator.export_training_data(out_name='data/test') |
| | | #generator.generate_horizontal_span() |
| | | #generator.render(debug=True) |
| | | #generator.generate_vertical_span() |
| | | #generator.render(debug=True) |
| | | ''' |
| | | pass |
| | | |
| | | |