From c59db54775606349f6ba5c6cab7fcb34498bb31d Mon Sep 17 00:00:00 2001
From: Edmond Yoo <hj3yoo@uwaterloo.ca>
Date: Sat, 13 Oct 2018 18:18:42 +0000
Subject: [PATCH] Cleaning & commenting #2 - updating comments & docstrings
---
fetch_data.py | 67 ++++++++---
transform_data.py | 104 ++++++++--------
card_detector.py | 10 +
generate_data.py | 141 ++++-------------------
opencv_dnn.py | 9 +
README.md | 3
6 files changed, 146 insertions(+), 188 deletions(-)
diff --git a/README.md b/README.md
index 0ccd80e..059a3c5 100644
--- a/README.md
+++ b/README.md
@@ -127,12 +127,14 @@
In order to identify the card from the snippet of the card image, I'm using perceptual hashing. When the card is detected in YOLO, I compute its pHash value from its image, and compare it with the pHash of every cards in the database to find the match. This process has a speed of O(n * m), where n is the number of cards detected in the image and m is the number of cards in the database. With more than 10000 different cards printed in MTG history, this computation was the first bottleneck. For the 50ms increment per detected card mentioned above, majority of that time was spent trying to subtract two 1024-bit hashes 10000+ times - that's more than 10^10 comparisons right there!
Although I couldn't cut down on the number of arithmetics, I did find another place that was unncessarily slowing things down. The following is the elapsed time for subtracting pHash for all 10000 elements in pandas database:
+
| hash_size | elapsed_time (ms) |
|---|---|
| 8 | 23.01 |
| 16 | 25.72 |
| 32 | 33.38 |
| 64 | 65.98 |
+
If you plot them using (hash_size)^2 and elapsed_time, you get almost a linear graph with a huge constant y-intercept:
<img src="https://github.com/hj3yoo/mtg_card_detector/blob/master/figures/6_time_plot_1.png">
@@ -163,6 +165,7 @@
The execution time of that code snippet on average is 11.65ms, which is slightly over half of 22.4ms of constant delay. That's a lot of time that can be cut out.
By pre-emptively flattening the hashes and using the hash subtraction's code (yes I know it's not a good OOP design, but this is too much of a tradeoff), that constant time can be cut out significantly:
+
| hash_size | elapsed_time (ms) |
|---|---|
| 8 | 9.9 |
diff --git a/card_detector.py b/card_detector.py
index aa8bd6a..5b8bd81 100644
--- a/card_detector.py
+++ b/card_detector.py
@@ -1,9 +1,17 @@
import cv2
import numpy as np
-import pandas as pd
import math
from screeninfo import get_monitors
+"""
+This is the first attempt of identifying MTG cards using only classical computer vision technique.
+Most of the processes are similar to the process used in opencv_dnn.py, but it instead tries to use
+Hough transformation to identify straight edges of the card.
+However, there were difficulties trying to associate multiple edges into a rectangle, as some of them
+either didn't show up or was too short to intersect.
+There were also no method to dynamically adjust various threshold, even finding all the edges were
+very conditional.
+"""
def detect_a_card(img, thresh_val=80, blur_radius=None, dilate_radius=None, min_hyst=80, max_hyst=200,
min_line_length=None, max_line_gap=None, debug=False):
diff --git a/fetch_data.py b/fetch_data.py
index db7d30c..4f36181 100644
--- a/fetch_data.py
+++ b/fetch_data.py
@@ -5,9 +5,13 @@
import re
import os
import transform_data
-import time
+"""
+Note: All codes in this file realies on Scryfall API to aggregate card database and their images.
+Scryfall API doc is available at: https://scryfall.com/docs/api
+"""
+# List of all black-bordered cards printed from 8th edition and onwards (8ed and 9ed are white-bordered)
# Core & expansion sets with 2003 frame
set_2003_list = ['mrd', 'dst', '5dn', 'chk', 'bok', 'sok', 'rav', 'gpt', 'dis', 'csp', 'tsp', 'plc', 'fut', '10e',
'lrw', 'mor', 'shm', 'eve', 'ala', 'con', 'arb', 'm10', 'zen', 'wwk', 'roe', 'm11', 'som', 'mbs',
@@ -28,7 +32,13 @@
def fetch_all_cards_text(url='https://api.scryfall.com/cards/search?q=layout:normal+format:modern+lang:en+frame:2003',
- csv_name=''):
+ csv_name=None):
+ """
+ Given the query URL using Scryfall API, aggregate all card information and convert them from json to table
+ :param url: query URL
+ :param csv_name: path of the csv file to save the result
+ :return: pandas dataframe of the fetch cards
+ """
has_more = True
cards = []
# get cards dataset as a json from the query
@@ -45,25 +55,19 @@
# Convert them into a dataframe, and truncate unnecessary columns
df = pd.DataFrame.from_dict(cards)
- if csv_name != '':
+ if csv_name is not None:
#df = df[['artist', 'border_color', 'collector_number', 'color_identity', 'colors', 'flavor_text', 'image_uris',
# 'mana_cost', 'legalities', 'name', 'oracle_text', 'rarity', 'type_line', 'set', 'set_name', 'power',
# 'toughness']]
- #df.to_json(csv_name)
- df.to_csv(csv_name, sep=';') # Comma doesn't work, since some columns are saved as a dict
-
+ df.to_csv(csv_name, sep=';') # Comma seperator doesn't work, since some columns are saved as a dict
return df
def load_all_cards_text(csv_name):
- #with open(csv_name, 'r') as json_file:
- # cards = json.loads(json_file.read())
- #df = pd.DataFrame.from_dict(cards)
- df = pd.read_csv(csv_name, sep=';')
+ df = pd.read_csv(csv_name, sep=';') # Comma seperator doesn't work, since some columns are saved as a dict
return df
-# Pulled from Django framework (https://github.com/django/django/blob/master/django/utils/text.py)
def get_valid_filename(s):
"""
Return the given string converted to a string that can be used for a clean
@@ -72,28 +76,52 @@
underscore, or dot.
>>> get_valid_filename("john's portrait in 2004.jpg")
'johns_portrait_in_2004.jpg'
+ From: https://github.com/django/django/blob/master/django/utils/text.py
+ :param s: input string
+ :return: string of valid filename
"""
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
-def fetch_all_cards_image(df, out_dir='', size='png'):
+def fetch_all_cards_image(df, out_dir=None, size='png'):
+ """
+ Download card images from Scryfall database
+ :param df: pandas dataframe (or series) of cards
+ :param out_dir: path of output directory
+ :param size: Image format given by Scryfall API - 'png', 'large', 'normal', 'small', 'art_crop', 'border_crop'
+ :return:
+ """
+ if size != 'png':
+ print('Note: this repo has been implemented using only \'png\' size. '
+ 'Using %s may result in an unexpected behaviour in other parts of this repo.' % size)
if isinstance(df, pd.Series):
+ # df is a single row of card
fetch_card_image(df, out_dir, size)
else:
+ # df is a dataframe containing list of cards
for ind, row in df.iterrows():
fetch_card_image(row, out_dir, size)
-def fetch_card_image(row, out_dir='', size='png'):
- if out_dir == '':
+def fetch_card_image(row, out_dir=None, size='png'):
+ """
+ Download a card's image from Scryfall database
+ :param row: pandas series including the card's information
+ :param out_dir: path of the output directory
+ :param size: Image format given by Scryfall API - 'png', 'large', 'normal', 'small', 'art_crop', 'border_crop'
+ :return:
+ """
+ if out_dir is None:
out_dir = '%s/card_img/%s/%s' % (transform_data.data_dir, size, row['set'])
if not os.path.exists(out_dir):
os.makedirs(out_dir)
+ # Extract card's name and URL for image accordingly
+ # Double-faced cards have a different format, and results in two separate card images
png_urls = []
card_names = []
- if row['layout'] == 'transform' or row['layout'] == 'double_faced_token':
+ if row['layout'] in ['transform', 'double_faced_token']:
if isinstance(row['card_faces'], str): # For some reason, dict isn't being parsed in the previous step
card_faces = ast.literal_eval(row['card_faces'])
else:
@@ -116,21 +144,22 @@
def main():
+ # Query card data by each set, then merge them together
for set_name in all_set_list:
csv_name = '%s/csv/%s.csv' % (transform_data.data_dir, set_name)
print(csv_name)
if not os.path.isfile(csv_name):
- df = fetch_all_cards_text(url='https://api.scryfall.com/cards/search?q=set:%s+lang:en'
- % set_name, csv_name=csv_name)
+ df = fetch_all_cards_text(url='https://api.scryfall.com/cards/search?q=set:%s+lang:en' % set_name,
+ csv_name=csv_name)
else:
df = load_all_cards_text(csv_name)
df.sort_values('collector_number')
fetch_all_cards_image(df, out_dir='%s/card_img/png/%s' % (transform_data.data_dir, set_name))
+
#df = fetch_all_cards_text(url='https://api.scryfall.com/cards/search?q=layout:normal+lang:en+frame:2003',
# csv_name='data/csv/all.csv')
- pass
+ return
if __name__ == '__main__':
main()
- pass
diff --git a/generate_data.py b/generate_data.py
index 7a2ce87..c283b97 100644
--- a/generate_data.py
+++ b/generate_data.py
@@ -5,16 +5,18 @@
import math
import random
import os
-import re
import cv2
import fetch_data
-import sys
import numpy as np
import pandas as pd
import transform_data
-# Referenced from geaxgx's playing-card-detection: https://github.com/geaxgx/playing-card-detection
+
class Backgrounds:
+ """
+ Container class for all background images for generator
+ Referenced from geaxgx's playing-card-detection: https://github.com/geaxgx/playing-card-detection
+ """
def __init__(self, images=None, dumps_dir='data/dtd/images'):
if images is not None:
self._images = images
@@ -40,8 +42,15 @@
def load_dtd(dtd_dir='data/dtd/images', dump_it=True, dump_batch_size=1000):
+ """
+ Load Describable Texture Dataset (DTD) from local
+ :param dtd_dir: path of the DTD images folder
+ :param dump_it: flag for pickling it
+ :param dump_batch_size: # of images stored per pickle file
+ :return: list of all DTD images
+ """
if not os.path.exists(dtd_dir):
- print('Warning: directory for DTD 5s doesn\'t exist.' % dtd_dir)
+ print('Warning: directory for DTD %s doesn\'t exist.' % dtd_dir)
print('You can download the dataset using this command:'
'!wget https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz')
return []
@@ -64,121 +73,19 @@
def apply_bounding_box(img, card_info, display=False):
+ """
+ Given a card image, extract specific features that can be used to train a model.
+ Note: Mana & set symbols are deprecated from the feature list. Refer to previous commits for their implementation:
+ https://github.com/hj3yoo/mtg_card_detector/tree/bb34d4e13da0f4753fbdefee837f54b16149d3ef
+ :param img: image of the card
+ :param card_info: characteristics of this card
+ :param display: flag for displaying the extracted features
+ :return:
+ """
# List of detected objects to be fed into the neural net
# The first object is the entire card
- detected_object_list = [transform_data.ExtractedObject('card', [(0, 0), (len(img[0]), 0), (len(img[0]), len(img)), (0, len(img))])]
- '''
- # Mana symbol - They are located on the top right side of the card, next to the name
- # Their position is stationary, and is right-aligned.
- has_mana_cost = isinstance(card_info['mana_cost'], str) # Cards with no mana cost will have nan
- if has_mana_cost:
- mana_cost = re.findall('\{(.*?)\}', card_info['mana_cost'])
- x_anchor = 683
- y_anchor = 65
-
- # Cards with specific type or from old sets have their symbol at a different position
- if card_info['set'] in ['8ed', 'mrd', 'dst', '5dn']:
- y_anchor -= 2
-
- for i in reversed(range(len(mana_cost))):
- # Hybrid mana symbol are larger than a normal symbol
- is_hybrid = '/' in mana_cost[i]
- if is_hybrid:
- x1 = x_anchor - 47
- x2 = x_anchor + 2
- y1 = y_anchor - 8
- y2 = y_anchor + 43
- x_anchor -= 45
- else:
- x1 = x_anchor - 39
- x2 = x_anchor
- y1 = y_anchor
- y2 = y_anchor + 43
- x_anchor -= 37
- # Append them to the list of bounding box with the appropriate label
- symbol_name = 'mana_symbol:' + mana_cost[i]
- key_pts = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
- detected_object_list.append(transform_data.ExtractedObject(symbol_name, key_pts))
-
- if display:
- img_symbol = img[y1:y2, x1:x2]
- cv2.imshow('symbol', img_symbol)
- cv2.waitKey(0)
-
- # Set symbol - located on the right side of the type box in the centre of the card, next to the card type
- # Only one symbol exists, and its colour varies by rarity.
- if card_info['set'] in ['8ed']:
- x1 = 622
- x2 = 670
- elif card_info['set'] in ['mrd', 'm10', 'm11', 'm12', 'm13', 'm14']:
- x1 = 602
- x2 = 684
- elif card_info['set'] in ['dst']:
- x1 = 636
- x2 = 673
- elif card_info['set'] in ['5dn']:
- x1 = 630
- x2 = 675
- elif card_info['set'] in ['bok', 'rtr']:
- x1 = 633
- x2 = 683
- elif card_info['set'] in ['sok', 'mbs']:
- x1 = 638
- x2 = 683
- elif card_info['set'] in ['rav']:
- x1 = 640
- x2 = 678
- elif card_info['set'] in ['csp']:
- x1 = 650
- x2 = 683
- elif card_info['set'] in ['tsp', 'lrw', 'zen', 'wwk', 'ths']:
- x1 = 640
- x2 = 683
- elif card_info['set'] in ['plc', 'fut', 'shm', 'eve']:
- x1 = 625
- x2 = 685
- elif card_info['set'] in ['10e']:
- x1 = 623
- x2 = 680
- elif card_info['set'] in ['mor', 'roe', 'bng']:
- x1 = 637
- x2 = 687
- elif card_info['set'] in ['ala', 'arb']:
- x1 = 635
- x2 = 680
- elif card_info['set'] in ['nph']:
- x1 = 642
- x2 = 678
- elif card_info['set'] in ['gtc']:
- x1 = 610
- x2 = 683
- elif card_info['set'] in ['dgm']:
- x1 = 618
- x2 = 678
- else:
- x1 = 630
- x2 = 683
- y1 = 589
- y2 = 636
- # Append them to the list of bounding box with the appropriate label
- symbol_name = 'set_symbol:' + card_info['set']
- key_pts = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
- detected_object_list.append(transform_data.ExtractedObject(symbol_name, key_pts))
-
- if display:
- img_symbol = img[y1:y2, x1:x2]
- cv2.imshow('symbol', img_symbol)
- cv2.waitKey(0)
-
- # Name box - The long bar on the top with card name and mana symbols
- # TODO
-
- # Type box - The long bar on the middle with card type and set symbols
- # TODO
-
- # Image box - the large image on the top half of the card
- # TODO
- '''
+ detected_object_list = [transform_data.ExtractedObject('card', [(0, 0), (len(img[0]), 0), (len(img[0]), len(img)),
+ (0, len(img))])]
return detected_object_list
diff --git a/opencv_dnn.py b/opencv_dnn.py
index 624aea8..44503ef 100644
--- a/opencv_dnn.py
+++ b/opencv_dnn.py
@@ -13,6 +13,15 @@
import transform_data
+"""
+As of the current version, the YOLO network has been removed from this code during optimization.
+It was found out that YOLO was adding too much processing delay, and the benefits from using it couldn't justify
+such heavy cost.
+If you're interested to see the implementation using YOLO, please check out the previous commit:
+https://github.com/hj3yoo/mtg_card_detector/tree/dea64611730c84a59c711c61f7f80948f82bcd31
+"""
+
+
def calc_image_hashes(card_pool, save_to=None, hash_size=32, highfreq_factor=4):
"""
Calculate perceptual hash (pHash) value for each cards in the database, then store them if needed
diff --git a/transform_data.py b/transform_data.py
index b22084d..6b5f477 100644
--- a/transform_data.py
+++ b/transform_data.py
@@ -8,7 +8,6 @@
import fetch_data
import generate_data
from shapely import geometry
-import pytesseract
import imgaug as ia
from imgaug import augmenters as iaa
from imgaug import parameters as iap
@@ -39,7 +38,9 @@
class ImageGenerator:
"""
- A template for generating a training image.
+ A template for generating a training image
+ An ImageGenerator contains a background image, list of cards, and other environmental parameters to
+ set up a training image for YOLO network
"""
def __init__(self, img_bg, class_ids, width, height, skew=None, cards=None):
"""
@@ -59,7 +60,7 @@
else:
self.cards = cards
- # Compute transform matrix for perspective transform
+ # Compute transform matrix for perspective transform (used for skewing the final result)
if skew is not None:
orig_corner = np.array([[0, 0], [0, height], [width, height], [width, 0]], dtype=np.float32)
new_corner = np.array([[width * s[0], height * s[1]] for s in skew], dtype=np.float32)
@@ -79,6 +80,7 @@
:param scale: new scale for the card
:return: none
"""
+ # If the position isn't given, push it out of the image so that it won't be visible during rendering
if x is None:
x = -len(card.img[0]) / 2
if y is None:
@@ -90,24 +92,27 @@
card.scale = scale
pass
- def render(self, visibility=0.5, display=False, debug=False, aug=None):
+ def render(self, visibility=0.5, aug=None, display=False, debug=False):
"""
- Display the current state of the generator
+ Display the current state of the generator.
+ :param visibility: portion of the card's image that must not be overlapped by other cards for the card to be
+ considered as visible
+ :param aug: image augmentator to apply during rendering
+ :param display: flag for displaying the rendering result
+ :param debug: flag for debug
:return: none
"""
self.check_visibility(visibility=visibility)
- #img_result = cv2.resize(self.img_bg, (self.width, self.height))
img_result = np.zeros((self.height, self.width, 3), dtype=np.uint8)
for card in self.cards:
- if card.x == 0.0 and card.y == 0.0 and card.theta == 0.0 and card.scale == 1.0:
- continue
card_x = int(card.x + 0.5)
card_y = int(card.y + 0.5)
- #print(card_x, card_y, card.theta, card.scale)
# Scale & rotate card image
img_card = cv2.resize(card.img, (int(len(card.img[0]) * card.scale), int(len(card.img) * card.scale)))
+ # Add a random glaring on individual card - it happens frequently in real life as MTG cards can reflect
+ # the lights very well.
if aug is not None:
seq = iaa.Sequential([
iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(128)), size_px_max=[1, 3],
@@ -147,15 +152,9 @@
bounding_box = card.bb_in_generator(ext_obj.key_pts)
cv2.rectangle(img_result, bounding_box[0], bounding_box[2], (1, 255, 1), 5)
- '''
- try:
- text = pytesseract.image_to_string(img_result, output_type=pytesseract.Output.DICT)
- print(text)
- except pytesseract.pytesseract.TesseractError:
- pass
- '''
img_result = cv2.GaussianBlur(img_result, (5, 5), 0)
+ # Skew the cards if it's provided
if self.M is not None:
img_result = cv2.warpPerspective(img_result, self.M, (self.width, self.height))
if debug:
@@ -171,10 +170,11 @@
img_bg = cv2.resize(self.img_bg, (self.width, self.height))
img_result = np.where(img_result, img_result, img_bg)
+ # Apply image augmentation
if aug is not None:
img_result = aug.augment_image(img_result)
- if display:
+ if display or debug:
cv2.imshow('Result', img_result)
cv2.waitKey(0)
@@ -184,6 +184,11 @@
def generate_horizontal_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None):
"""
Generating the first scenario where the cards are laid out in a straight horizontal line
+ :param gap: horizontal offset between each adjacent cards
+ :param scale: scale of each cards in the generator
+ :param theta: rotation of the entire span in radian
+ :param shift: range of arbitrary offset for each card
+ :param jitter: range of in-place rotation for each card in radian
:return: True if successfully generated, otherwise False
"""
# Set scale of the cards, variance of shift & jitter to be applied if they're not given
@@ -197,7 +202,8 @@
shift = [-card_size[1] * scale * 0.05, card_size[1] * scale * 0.05]
pass
if jitter is None:
- jitter = [-math.pi / 18, math.pi / 18] # Plus minus 10 degrees
+ # Plus minus 10 degrees
+ jitter = [-math.pi / 18, math.pi / 18]
if gap is None:
# 25% of the card's width - set symbol and 1-2 mana symbols will be visible on each card
gap = card_size[0] * scale * 0.4
@@ -221,6 +227,12 @@
def generate_vertical_span(self, gap=None, scale=None, theta=0, shift=None, jitter=None):
"""
Generating the second scenario where the cards are laid out in a straight vertical line
+ :param gap: horizontal offset between each adjacent cards
+ :param scale: scale of each cards in the generator
+ :param theta: rotation of the entire span in radian
+ :param shift: range of arbitrary offset for each card
+ :param jitter: range of in-place rotation for each card in radian
+ :return: True if successfully generated, otherwise False
:return: True if successfully generated, otherwise False
"""
# Set scale of the cards, variance of shift & jitter to be applied if they're not given
@@ -260,12 +272,14 @@
Generating the third scenario where the cards are laid out in a fan shape
:return: True if successfully generated, otherwise False
"""
+ # TODO
return False
def generate_non_obstructive(self, tolerance=0.90, scale=None):
"""
Generating the fourth scenario where the cards are laid in arbitrary position that doesn't obstruct other cards
:param tolerance: minimum level of visibility for each cards
+ :param scale: scale of each cards in generator
:return: True if successfully generated, otherwise False
"""
card_size = (len(self.cards[0].img[0]), len(self.cards[0].img))
@@ -276,7 +290,6 @@
# Position each card at random location that doesn't obstruct other cards
i = 0
while i < len(self.cards):
- #for i in range(len(self.cards)):
card = self.cards[i]
card.scale = scale
rep = 0
@@ -300,8 +313,8 @@
def check_visibility(self, cards=None, i_check=None, visibility=0.5):
"""
- Check whether if extracted objects in each card are visible in the current scenario, and update their status
- :param cards: list of cards (in a correct order)
+ Check whether if extracted objects in a card is visible in the current scenario, and update their status
+ :param cards: list of cards (in a correct Z-order). All cards in this Generator are checked by default.
:param i_check: indices of cards that needs to be checked. Cards that aren't in this list will only be used
to check visibility of other cards. All cards are checked by default.
:param visibility: minimum ratio of the object's area that aren't covered by another card to be visible
@@ -311,6 +324,8 @@
cards = self.cards
if i_check is None:
i_check = range(len(cards))
+
+ # Create a polygon of each card
card_poly_list = [geometry.Polygon([card.coordinate_in_generator(0, 0),
card.coordinate_in_generator(0, len(card.img)),
card.coordinate_in_generator(len(card.img[0]), len(card.img)),
@@ -324,22 +339,26 @@
obj_poly = geometry.Polygon([card.coordinate_in_generator(pt[0], pt[1]) for pt in ext_obj.key_pts])
obj_area = obj_poly.area
# Check if the other cards are blocking this object or if it's out of the template
+ # If there are other polygons with higher indices in the list, that card is overlapping this object
+ # We assume that no objects from the same card is on top of each other
for card_poly in card_poly_list[i + 1:]:
obj_poly = obj_poly.difference(card_poly)
obj_poly = obj_poly.intersection(template_poly)
visible_area = obj_poly.area
- #print(visible_area, obj_area, len(card.img[0]) * len(card.img) * card.scale * card.scale)
- #print("%s: %.1f visible" % (ext_obj.label, visible_area / obj_area * 100))
ext_obj.visible = obj_area * visibility <= visible_area
def export_training_data(self, out_name, visibility=0.5, aug=None):
"""
Export the generated training image along with the txt file for all bounding boxes
+ :param out_name: path of the output file (without extension)
+ :param visibility: portion of the card's image that must not be overlapped by other cards for the card to be
+ considered as visible
+ :param aug: image augmentator to be applied
:return: none
"""
self.render(visibility, aug=aug)
cv2.imwrite(out_name + '.jpg', self.img_result)
- out_txt = open(out_name+ '.txt', 'w')
+ out_txt = open(out_name + '.txt', 'w')
for card in self.cards:
for ext_obj in card.objects:
if not ext_obj.visible:
@@ -348,17 +367,9 @@
obj_yolo_info = key_pts_to_yolo(coords_in_gen, self.width, self.height)
if ext_obj.label == 'card':
#class_id = self.class_ids[card.info['name']]
- class_id = 0
+ class_id = 0 # since only the entire card is used
out_txt.write(str(class_id) + ' %.6f %.6f %.6f %.6f\n' % obj_yolo_info)
- pass
- elif ext_obj.label[:ext_obj.label.find[':']] == 'mana_symbol':
- # TODO
- pass
- elif ext_obj.label[:ext_obj.label.find[':']] == 'set_symbol':
- # TODO
- pass
out_txt.close()
- pass
class Card:
@@ -370,7 +381,6 @@
:param img: image of the card
:param card_info: details like name, mana cost, type, set, etc
:param objects: list of ExtractedObjects like mana & set symbol, etc
- :param generator: ImageGenerator object that the card is bound to
:param x: X-coordinate of the card's centre in relation to the generator
:param y: Y-coordinate of the card's centre in relation to the generator
:param theta: angle of rotation of the card in relation to the generator
@@ -389,7 +399,7 @@
"""
Apply a X/Y translation on this image
:param x: amount of X-translation. If range is given, translate by a random amount within that range
- :param y: amount of Y-translation. Refer to x when a range is given.
+ :param y: amount of Y-translation. If range is given, translate by a random amount within that range
:return: none
"""
if isinstance(x, tuple) or (isinstance(x, list) and len(x) == 2):
@@ -406,8 +416,8 @@
"""
Apply a rotation on this image with a centre
:param theta: amount of rotation in radian (clockwise). If a range is given, rotate by a random amount within
+ that range
:param centre: coordinate of the centre of the rotation in relation to the centre of this card
- that range
:return: none
"""
if isinstance(theta, tuple) or (isinstance(theta, list) and len(theta) == 2):
@@ -467,18 +477,6 @@
x2 = max([pt[0] for pt in coords_in_gen])
y1 = min([pt[1] for pt in coords_in_gen])
y2 = max([pt[1] for pt in coords_in_gen])
- '''
- x1 = -math.inf
- x2 = math.inf
- y1 = -math.inf
- y2 = math.inf
- for key_pt in key_pts:
- coord_in_gen = self.coordinate_in_generator(key_pt[0], key_pt[1])
- x1 = max(x1, coord_in_gen[0])
- x2 = min(x2, coord_in_gen[0])
- y1 = max(y1, coord_in_gen[1])
- y2 = min(y2, coord_in_gen[1])
- '''
return [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]
@@ -497,7 +495,6 @@
ia.seed(random.randrange(10000))
bg_images = generate_data.load_dtd(dtd_dir='%s/dtd/images' % data_dir, dump_it=False)
- #bg_images = [cv2.imread('data/frilly_0007.jpg')]
background = generate_data.Backgrounds(images=bg_images)
card_pool = pd.DataFrame()
@@ -512,14 +509,18 @@
num_gen = 60000
num_iter = 1
+ w_gen = 1440
+ h_gen = 960
for i in range(num_gen):
# Arbitrarily select top left and right corners for perspective transformation
# Since the training image are generated with random rotation, don't need to skew all four sides
skew = [[random.uniform(0, 0.25), 0], [0, 1], [1, 1],
[random.uniform(0.75, 1), 0]]
- generator = ImageGenerator(background.get_random(), class_ids, 1440, 960, skew=skew)
+ generator = ImageGenerator(background.get_random(), class_ids, w_gen, h_gen, skew=skew)
out_name = ''
+
+ # Use 2 to 5 cards per generator
for _, card_info in card_pool.sample(random.randint(2, 5)).iterrows():
img_name = '%s/card_img/png/%s/%s_%s.png' % (data_dir, card_info['set'], card_info['collector_number'],
fetch_data.get_valid_filename(card_info['name']))
@@ -533,13 +534,14 @@
detected_object_list = generate_data.apply_bounding_box(card_img, card_info)
card = Card(card_img, card_info, detected_object_list)
generator.add_card(card)
+
for j in range(num_iter):
seq = iaa.Sequential([
iaa.Multiply((0.8, 1.2)), # darken / brighten the whole image
iaa.SimplexNoiseAlpha(first=iaa.Add(random.randrange(64)), per_channel=0.1, size_px_max=[3, 6],
upscale_method="cubic"), # Lighting
iaa.AdditiveGaussianNoise(scale=random.uniform(0, 0.05) * 255, per_channel=0.1), # Noises
- iaa.Dropout(p=[0, 0.05], per_channel=0.1)
+ iaa.Dropout(p=[0, 0.05], per_channel=0.1) # Dropout
])
if i % 3 == 0:
--
Gitblit v1.10.0