Edmond Yoo
2018-08-28 d6d65c65cc3b4a063cb3b632437359f64974518c
Now can extract from all modern-bordered cards (8ED - JOU)
2 files modified
1 files deleted
485 ■■■■ changed files
data/all_cards.csv 420 ●●●●● patch | view | raw | blame | history
fetch_data.py 36 ●●●● patch | view | raw | blame | history
generate_data.py 29 ●●●●● patch | view | raw | blame | history
data/all_cards.csv
File was deleted
fetch_data.py
@@ -1,4 +1,5 @@
from urllib import request
import ast
import json
import pandas as pd
import re
@@ -12,7 +13,7 @@
    # get cards dataset as a json from the query
    while has_more:
        res_file_dir, http_message = request.urlretrieve(url)
        with open(res_file_dir) as res_file:
        with open(res_file_dir, 'r') as res_file:
            res_json = json.loads(res_file.read())
            cards += res_json['data']
            has_more = res_json['has_more']
@@ -27,12 +28,16 @@
        df = df[['artist', 'border_color', 'collector_number', 'color_identity', 'colors', 'flavor_text', 'image_uris',
                 'mana_cost', 'legalities', 'name', 'oracle_text', 'rarity', 'type_line', 'set', 'set_name', 'power',
                 'toughness']]
        #df.to_json(csv_name)
        df.to_csv(csv_name, sep=';')  # Comma doesn't work, since some columns are saved as a dict
    return df
def load_all_cards_text(csv_name):
    #with open(csv_name, 'r') as json_file:
    #    cards = json.loads(json_file.read())
    #df = pd.DataFrame.from_dict(cards)
    df = pd.read_csv(csv_name, sep=';')
    return df
@@ -51,8 +56,18 @@
    return re.sub(r'(?u)[^-\w.]', '', s)
def fetch_cards_image(df, out_dir='', size='png'):
def fetch_all_cards_image(df, out_dir='', size='png'):
    if isinstance(df, pd.Series):
        fetch_card_image(df, out_dir, size)
    else:
    for ind, row in df.iterrows():
            fetch_card_image(row, out_dir, size)
def fetch_card_image(row, out_dir='', size='png'):
    if isinstance(row['image_uris'], str):  # For some reason, dict isn't being parsed in the previous step
        png_url = ast.literal_eval(row['image_uris'])[size]
    else:
        png_url = row['image_uris'][size]
        if out_dir == '':
            out_dir = 'data/%s/%s' % (size, row['set'])
@@ -61,13 +76,22 @@
        img_name = '%s/%s_%s.png' % (out_dir, row['collector_number'], get_valid_filename(row['name']))
        request.urlretrieve(png_url, filename=img_name)
        print(img_name)
    pass
def main():
    df = fetch_all_cards_text(url='https://api.scryfall.com/cards/search?q=layout:normal+set:rtr+lang:en',
                              csv_name='data/all_cards.csv')
    #fetch_cards_image(df)
    for set_name in ['mrd', 'dst', '5dn', 'chk', 'bok', 'sok', 'rav', 'gpt', 'dis', 'csp', 'tsp', 'plc', 'fut', 'lrw',
                     'mor', 'shm', 'eve', 'ala', 'con', 'arb', 'zen', 'wwk', 'roe', 'som', 'mbs', 'nph', 'isd', 'dka',
                     'avr', 'rtr', 'gtc', 'dgm', 'ths', 'bng', 'jou', '8ed', '9ed', '10e', 'm10', 'm11', 'm12', 'm13',
                     'm14']:
        csv_name = 'data/csv/%s.csv' % set_name
        if not os.path.isfile(csv_name):
            df = fetch_all_cards_text(url='https://api.scryfall.com/cards/search?q=layout:normal+set:%s+lang:en' % set_name,
                                      csv_name=csv_name)
        else:
            df = load_all_cards_text(csv_name)
        print(csv_name)
        if not os.path.exists('data/png/%s' % set_name):
            fetch_all_cards_image(df)
    pass
generate_data.py
@@ -10,6 +10,7 @@
import fetch_data
import sys
import numpy as np
import pandas as pd
# Referenced from geaxgx's playing-card-detection: https://github.com/geaxgx/playing-card-detection
class Backgrounds:
@@ -69,11 +70,16 @@
    if has_mana_cost:
        mana_cost = re.findall('\{(.*?)\}', card_info['mana_cost'])
        x2 = 683
        if is_planeswalker:
            y1 = 50
        else:
            y1 = 67
        # Cards with specific type or from old sets have their symbol at a different position
        if is_planeswalker:
            y1 -= 17
        if card_info['set'] in ['8ed', 'mrd', 'dst', '5dn']:
            y1 -= 2
        for i in reversed(range(len(mana_cost))):
            # Hybrid mana symbol are larger than a normal symbol
            is_hybrid = '/' in mana_cost[i]
            if is_hybrid:
                box = [(x2 - 47, y1 - 8), (x2 + 2, y1 + 43)]  # (x1, y1), (x2, y2)
@@ -81,7 +87,6 @@
            else:
                box = [(x2 - 39, y1), (x2, y1 + 41)]  # (x1, y1), (x2, y2)
                x2 -= 37
            img_symbol = img[box[0][1]:box[1][1], box[0][0]:box[1][0]]
            if display:
                cv2.imshow('symbol', img_symbol)
@@ -92,15 +97,19 @@
    #bg_images = load_dtd()
    #bg = Backgrounds()
    #bg.get_random(display=True)
    df = fetch_data.load_all_cards_text('data/all_cards.csv')
    df = fetch_data.load_all_cards_text('data/csv/dgm.csv')
    #repeat = 'y'
    while True:
        rand_card = df.iloc[random.randint(0, df.shape[0] - 1)]
        card_img = cv2.imread('data/png/%s/%s_%s.png' % (rand_card['set'], rand_card['collector_number'],
                                                         fetch_data.get_valid_filename(rand_card['name'])))
        print(rand_card['name'])
        card_info = df.iloc[random.randint(0, df.shape[0] - 1)]
        print(card_info['name'])
        card_img = cv2.imread('data/png/%s/%s_%s.png' % (card_info['set'], card_info['collector_number'],
                                                         fetch_data.get_valid_filename(card_info['name'])))
        if card_img is None:
            fetch_data.fetch_card_image(card_info)
            card_img = cv2.imread('data/png/%s/%s_%s.png' % (card_info['set'], card_info['collector_number'],
                                                             fetch_data.get_valid_filename(card_info['name'])))
        sys.stdout.flush()
        apply_bounding_box(card_img, rand_card, display=True)
        apply_bounding_box(card_img, card_info, display=True)
        #repeat = input('y to repeat, n to finish')
    return