From dea64611730c84a59c711c61f7f80948f82bcd31 Mon Sep 17 00:00:00 2001
From: Edmond Yoo <hj3yoo@uwaterloo.ca>
Date: Fri, 12 Oct 2018 20:12:47 +0000
Subject: [PATCH] Commit before removing YOLO
---
transform_data.py | 170 +++++++++++++++++++++++++++++++++++---------------------
1 files changed, 105 insertions(+), 65 deletions(-)
diff --git a/transform_data.py b/transform_data.py
index bd6668a..b22084d 100644
--- a/transform_data.py
+++ b/transform_data.py
@@ -1,3 +1,4 @@
+import os
import random
import math
import cv2
@@ -8,8 +9,13 @@
import generate_data
from shapely import geometry
import pytesseract
+import imgaug as ia
+from imgaug import augmenters as iaa
+from imgaug import parameters as iap
card_mask = cv2.imread('data/mask.png')
+data_dir = os.path.abspath('/media/win10/data')
+darknet_dir = os.path.abspath('.')
def key_pts_to_yolo(key_pts, w_img, h_img):
@@ -20,10 +26,10 @@
:param h_img: height of the entire image
:return: <x> <y> <width> <height>
"""
- x1 = min([pt[0] for pt in key_pts])
- x2 = max([pt[0] for pt in key_pts])
- y1 = min([pt[1] for pt in key_pts])
- y2 = max([pt[1] for pt in key_pts])
+ x1 = max(0, min([pt[0] for pt in key_pts]))
+ x2 = min(w_img, max([pt[0] for pt in key_pts]))
+ y1 = max(0, min([pt[1] for pt in key_pts]))
+ y2 = min(h_img, max([pt[1] for pt in key_pts]))
x = (x2 + x1) / 2 / w_img
y = (y2 + y1) / 2 / h_img
width = (x2 - x1) / w_img
@@ -35,14 +41,16 @@
"""
A template for generating a training image.
"""
- def __init__(self, img_bg, width, height, cards=None):
+ def __init__(self, img_bg, class_ids, width, height, skew=None, cards=None):
"""
:param img_bg: background (textile) image
:param width: width of the training image
:param height: height of the training image
+ :param skew: 4 coordinates that indicates the corners (in normalized form) for perspective transform
:param cards: list of Card objects
"""
self.img_bg = img_bg
+ self.class_ids = class_ids
self.img_result = None
self.width = width
self.height = height
@@ -50,6 +58,15 @@
self.cards = []
else:
self.cards = cards
+
+ # Compute transform matrix for perspective transform
+ if skew is not None:
+ orig_corner = np.array([[0, 0], [0, height], [width, height], [width, 0]], dtype=np.float32)
+ new_corner = np.array([[width * s[0], height * s[1]] for s in skew], dtype=np.float32)
+ self.M = cv2.getPerspectiveTransform(orig_corner, new_corner)
+ pass
+ else:
+ self.M = None
pass
def add_card(self, card, x=None, y=None, theta=0.0, scale=1.0):
@@ -73,13 +90,14 @@
card.scale = scale
pass
- def render(self, visibility=0.5, display=False, debug=False):
+ def render(self, visibility=0.5, display=False, debug=False, aug=None):
"""
Display the current state of the generator
:return: none
"""
self.check_visibility(visibility=visibility)
- img_result = cv2.resize(self.img_bg, (self.width, self.height))
+ #img_result = cv2.resize(self.img_bg, (self.width, self.height))
+ img_result = np.zeros((self.height, self.width, 3), dtype=np.uint8)
for card in self.cards:
if card.x == 0.0 and card.y == 0.0 and card.theta == 0.0 and card.scale == 1.0:
@@ -90,6 +108,12 @@
# Scale & rotate card image
img_card = cv2.resize(card.img, (int(len(card.img[0]) * card.scale), int(len(card.img) * card.scale)))
+ if aug is not None:
+ seq = iaa.Sequential([
+ iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(128)), size_px_max=[1, 3],
+ upscale_method="cubic"), # Lighting
+ ])
+ img_card = seq.augment_image(img_card)
mask_scale = cv2.resize(card_mask, (int(len(card_mask[0]) * card.scale), int(len(card_mask) * card.scale)))
img_mask = cv2.bitwise_and(img_card, mask_scale)
img_rotate = imutils.rotate_bound(img_mask, card.theta / math.pi * 180)
@@ -119,9 +143,10 @@
for ext_obj in card.objects:
if ext_obj.visible:
for pt in ext_obj.key_pts:
- cv2.circle(img_result, card.coordinate_in_generator(pt[0], pt[1]), 2, (0, 0, 255), 2)
+ cv2.circle(img_result, card.coordinate_in_generator(pt[0], pt[1]), 2, (1, 1, 255), 10)
bounding_box = card.bb_in_generator(ext_obj.key_pts)
- cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (0, 255, 0), 2)
+ cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (1, 255, 1), 5)
+
'''
try:
text = pytesseract.image_to_string(img_result, output_type=pytesseract.Output.DICT)
@@ -131,6 +156,24 @@
'''
img_result = cv2.GaussianBlur(img_result, (5, 5), 0)
+ if self.M is not None:
+ img_result = cv2.warpPerspective(img_result, self.M, (self.width, self.height))
+ if debug:
+ for card in self.cards:
+ for ext_obj in card.objects:
+ if ext_obj.visible:
+ new_pts = np.array([[list(card.coordinate_in_generator(pt[0], pt[1]))]
+ for pt in ext_obj.key_pts], dtype=np.float32)
+ new_pts = cv2.perspectiveTransform(new_pts, self.M)
+ for pt in new_pts:
+ cv2.circle(img_result, (pt[0][0], pt[0][1]), 2, (255, 1, 1), 10)
+
+ img_bg = cv2.resize(self.img_bg, (self.width, self.height))
+ img_result = np.where(img_result, img_result, img_bg)
+
+ if aug is not None:
+ img_result = aug.augment_image(img_result)
+
if display:
cv2.imshow('Result', img_result)
cv2.waitKey(0)
@@ -138,7 +181,7 @@
self.img_result = img_result
pass
- def generate_horizontal_span(self, gap=None, scale=None, shift=None, jitter=None):
+ def generate_horizontal_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None):
"""
Generating the first scenario where the cards are laid out in a straight horizontal line
:return: True if successfully generated, otherwise False
@@ -170,10 +213,12 @@
card.theta = 0
card.shift(shift, shift)
card.rotate(jitter)
+ card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor))
x_anchor -= gap
+
return True
- def generate_vertical_span(self, gap=None, scale=None, shift=None, jitter=None):
+ def generate_vertical_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None):
"""
Generating the second scenario where the cards are laid out in a straight vertical line
:return: True if successfully generated, otherwise False
@@ -206,6 +251,7 @@
card.theta = 0
card.shift(shift, shift)
card.rotate(jitter)
+ card.rotate(theta, centre=(self.width // 2 - x_anchor, self.height // 2 - y_anchor))
y_anchor += gap
return True
@@ -286,12 +332,12 @@
#print("%s: %.1f visible" % (ext_obj.label, visible_area / obj_area * 100))
ext_obj.visible = obj_area * visibility <= visible_area
- def export_training_data(self, out_name, visibility=0.5):
+ def export_training_data(self, out_name, visibility=0.5, aug=None):
"""
Export the generated training image along with the txt file for all bounding boxes
:return: none
"""
- self.render(visibility)
+ self.render(visibility, aug=aug)
cv2.imwrite(out_name + '.jpg', self.img_result)
out_txt = open(out_name+ '.txt', 'w')
for card in self.cards:
@@ -301,7 +347,9 @@
coords_in_gen = [card.coordinate_in_generator(key_pt[0], key_pt[1]) for key_pt in ext_obj.key_pts]
obj_yolo_info = key_pts_to_yolo(coords_in_gen, self.width, self.height)
if ext_obj.label == 'card':
- out_txt.write('0 %.6f %.6f %.6f %.6f\n' % obj_yolo_info)
+ #class_id = self.class_ids[card.info['name']]
+ class_id = 0
+ out_txt.write(str(class_id) + ' %.6f %.6f %.6f %.6f\n' % obj_yolo_info)
pass
elif ext_obj.label[:ext_obj.label.find[':']] == 'mana_symbol':
# TODO
@@ -446,27 +494,39 @@
def main():
random.seed()
+ ia.seed(random.randrange(10000))
- bg_images = generate_data.load_dtd(dump_it=False)
+ bg_images = generate_data.load_dtd(dtd_dir='%s/dtd/images' % data_dir, dump_it=False)
+ #bg_images = [cv2.imread('data/frilly_0007.jpg')]
background = generate_data.Backgrounds(images=bg_images)
+
card_pool = pd.DataFrame()
for set_name in fetch_data.all_set_list:
- df = fetch_data.load_all_cards_text('data/csv/%s.csv' % set_name)
+ df = fetch_data.load_all_cards_text('%s/csv/%s.csv' % (data_dir, set_name))
card_pool = card_pool.append(df)
+ class_ids = {}
+ with open('%s/obj.names' % data_dir) as names_file:
+ class_name_list = names_file.read().splitlines()
+ for i in range(len(class_name_list)):
+ class_ids[class_name_list[i]] = i
- num_gen = 25600
- num_iter = 3
+ num_gen = 60000
+ num_iter = 1
for i in range(num_gen):
- generator = ImageGenerator(background.get_random(), 1440, 960)
- out_name = 'data/train/non_obstructive/'
+ # Arbitrarily select top left and right corners for perspective transformation
+ # Since the training image are generated with random rotation, don't need to skew all four sides
+ skew = [[random.uniform(0, 0.25), 0], [0, 1], [1, 1],
+ [random.uniform(0.75, 1), 0]]
+ generator = ImageGenerator(background.get_random(), class_ids, 1440, 960, skew=skew)
+ out_name = ''
for _, card_info in card_pool.sample(random.randint(2, 5)).iterrows():
- img_name = '../usb/data/png/%s/%s_%s.png' % (card_info['set'], card_info['collector_number'],
+ img_name = '%s/card_img/png/%s/%s_%s.png' % (data_dir, card_info['set'], card_info['collector_number'],
fetch_data.get_valid_filename(card_info['name']))
out_name += '%s%s_' % (card_info['set'], card_info['collector_number'])
card_img = cv2.imread(img_name)
if card_img is None:
- fetch_data.fetch_card_image(card_info, out_dir='../usb/data/png/%s' % card_info['set'])
+ fetch_data.fetch_card_image(card_info, out_dir='%s/card_img/png/%s' % (data_dir, card_info['set']))
card_img = cv2.imread(img_name)
if card_img is None:
print('WARNING: card %s is not found!' % img_name)
@@ -474,51 +534,31 @@
card = Card(card_img, card_info, detected_object_list)
generator.add_card(card)
for j in range(num_iter):
- generator.generate_non_obstructive()
- #generator.generate_horizontal_span()
- generator.export_training_data(visibility=0.0, out_name=out_name + str(j))
+ seq = iaa.Sequential([
+ iaa.Multiply((0.8, 1.2)), # darken / brighten the whole image
+ iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(64)), per_channel=0.1, size_px_max=[3, 6],
+ upscale_method="cubic"), # Lighting
+ iaa.AdditiveGaussianNoise(scale=random.uniform(0, 0.05) * 255, per_channel=0.1), # Noises
+ iaa.Dropout(p=[0, 0.05], per_channel=0.1)
+ ])
+
+ if i % 3 == 0:
+ generator.generate_non_obstructive()
+ generator.export_training_data(visibility=0.0, out_name='%s/train/non_obstructive_update/%s%d'
+ % (data_dir, out_name, j), aug=seq)
+ elif i % 3 == 1:
+ generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi))
+ generator.export_training_data(visibility=0.0, out_name='%s/train/horizontal_span_update/%s%d'
+ % (data_dir, out_name, j), aug=seq)
+ else:
+ generator.generate_vertical_span(theta=random.uniform(-math.pi, math.pi))
+ generator.export_training_data(visibility=0.0, out_name='%s/train/vertical_span_update/%s%d'
+ % (data_dir, out_name, j), aug=seq)
+
+ #generator.generate_horizontal_span(theta=random.uniform(-math.pi, math.pi))
+ #generator.render(display=True, aug=seq, debug=True)
print('Generated %s%d' % (out_name, j))
generator.img_bg = background.get_random()
-
- '''
- #img_bg = cv2.imread('data/frilly_0007.jpg')
- #generator = ImageGenerator(img_bg, 1440, 960)
- card_pool = pd.DataFrame()
- for set_name in fetch_data.all_set_list:
- df = fetch_data.load_all_cards_text('data/csv/%s.csv' % set_name)
- card_info = df.iloc[random.randint(0, df.shape[0] - 1)]
- # Currently ignoring planeswalker cards due to their different card layout
- is_planeswalker = 'Planeswalker' in card_info['type_line']
- if not is_planeswalker:
- card_pool = card_pool.append(card_info)
- for i in [random.randrange(0, card_pool.shape[0] - 1, 1) for _ in range(4)]:
- card_info = card_pool.iloc[i]
- img_name = '../usb/data/png/%s/%s_%s.png' % (card_info['set'], card_info['collector_number'],
- fetch_data.get_valid_filename(card_info['name']))
- print(img_name)
- card_img = cv2.imread(img_name)
- if card_img is None:
- fetch_data.fetch_card_image(card_info, out_dir='../usb/data/png/%s' % card_info['set'])
- card_img = cv2.imread(img_name)
- detected_object_list = generate_data.apply_bounding_box(card_img, card_info)
- card = Card(card_img, card_info, detected_object_list)
-
- generator.add_card(card)
- #generator.add_card(card, x=random.uniform(200, generator.width - 200),
- # y=random.uniform(200, generator.height - 200), theta=random.uniform(-math.pi, math.pi), scale=0.5)
- #card.shift([-100, 100], [-100, 100])
- #card.rotate((0, 0), [-math.pi / 4, math.pi / 4])
- import time
-
- for i in range(100):
- generator.generate_vertical_span()
- generator.render(debug=False)
- generator.export_training_data(out_name='data/test')
- #generator.generate_horizontal_span()
- #generator.render(debug=True)
- #generator.generate_vertical_span()
- #generator.render(debug=True)
- '''
pass
--
Gitblit v1.10.0