| | |
| | | import pandas as pd |
| | | from PIL import Image |
| | | import time |
| | | |
| | | from multiprocessing import Pool |
| | | from config import Config |
| | | import fetch_data |
| | | |
| | |
| | | https://github.com/hj3yoo/mtg_card_detector/tree/dea64611730c84a59c711c61f7f80948f82bcd31 |
| | | """ |
| | | |
| | | |
| | | def calc_image_hashes(card_pool, save_to=None, hash_size=None): |
| | | """ |
| | | Calculate perceptual hash (pHash) value for each cards in the database, then store them if needed |
| | | :param card_pool: pandas dataframe containing all card information |
| | | :param save_to: path for the pickle file to be saved |
| | | :param hash_size: param for pHash algorithm |
| | | :return: pandas dataframe |
| | | """ |
| | | if hash_size is None: |
| | | hash_size = [16, 32] |
| | | elif isinstance(hash_size, int): |
| | | hash_size = [hash_size] |
| | | |
| | | # Since some double-faced cards may result in two different cards, create a new dataframe to store the result |
| | | def do_calc(args): |
| | | card_pool = args[0] |
| | | hash_size = args[1] |
| | | new_pool = pd.DataFrame(columns=list(card_pool.columns.values)) |
| | | for hs in hash_size: |
| | | new_pool['card_hash_%d' % hs] = np.NaN |
| | | #new_pool['art_hash_%d' % hs] = np.NaN |
| | | new_pool['card_hash_%d' % hs] = np.NaN |
| | | #new_pool['art_hash_%d' % hs] = np.NaN |
| | | for ind, card_info in card_pool.iterrows(): |
| | | if ind % 100 == 0: |
| | | print('Calculating hashes: %dth card' % ind) |
| | |
| | | #art_hash = ih.phash(img_art, hash_size=hs) |
| | | #card_info['art_hash_%d' % hs] = art_hash |
| | | new_pool.loc[0 if new_pool.empty else new_pool.index.max() + 1] = card_info |
| | | return new_pool |
| | | |
| | | def calc_image_hashes(card_pool, save_to=None, hash_size=None): |
| | | """ |
| | | Calculate perceptual hash (pHash) value for each cards in the database, then store them if needed |
| | | :param card_pool: pandas dataframe containing all card information |
| | | :param save_to: path for the pickle file to be saved |
| | | :param hash_size: param for pHash algorithm |
| | | :return: pandas dataframe |
| | | """ |
| | | if hash_size is None: |
| | | hash_size = [16, 32] |
| | | elif isinstance(hash_size, int): |
| | | hash_size = [hash_size] |
| | | |
| | | num_cores = 15 |
| | | num_partitions = 60 |
| | | pool = Pool(num_cores) |
| | | df_split = np.array_split(card_pool, num_partitions) |
| | | new_pool = pd.concat(pool.map(do_calc, [(split, hash_size) for split in df_split])) |
| | | pool.close() |
| | | pool.join() |
| | | # Since some double-faced cards may result in two different cards, create a new dataframe to store the result |
| | | |
| | | if save_to is not None: |
| | | new_pool.to_pickle(save_to) |
| | |
| | | img_erode = cv2.erode(img_dilate, kernel, iterations=1) |
| | | |
| | | # Find the contour |
| | | _, cnts, hier = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) |
| | | cnts, hier = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) |
| | | if len(cnts) == 0: |
| | | #print('no contours') |
| | | return [] |
| | |
| | | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) |
| | | if debug: |
| | | # cv2.rectangle(img_warp, (22, 47), (294, 249), (0, 255, 0), 2) |
| | | cv2.putText(img_warp, card_name + ', ' + str(hash_diff), (0, 20), |
| | | cv2.putText(img_warp, card_name + ':' + card_set + ', ' + str(hash_diff), (0, 20), |
| | | cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1) |
| | | cv2.imshow('card#%d' % i, img_warp) |
| | | if display: |
| | |
| | | |
| | | def main(args): |
| | | # Specify paths for all necessary files |
| | | |
| | | hash_sizes = {16, 32} |
| | | hash_sizes.add(args.hash_size) |
| | | pck_path = os.path.abspath('card_pool.pck') |
| | | if os.path.isfile(pck_path): |
| | | card_pool = pd.read_pickle(pck_path) |
| | |
| | | card_pool = pd.concat(df_list, sort=True) |
| | | card_pool.reset_index(drop=True, inplace=True) |
| | | card_pool.drop('Unnamed: 0', axis=1, inplace=True, errors='ignore') |
| | | calc_image_hashes(card_pool, save_to=pck_path) |
| | | card_pool = calc_image_hashes(card_pool, save_to=pck_path, hash_size=hash_sizes) |
| | | ch_key = 'card_hash_%d' % args.hash_size |
| | | if ch_key not in card_pool.columns: |
| | | # we did not generate this hash_size yet |
| | | print('We need to add hash_size=%d' % (args.hash_size,)) |
| | | card_pool = calc_image_hashes(card_pool, save_to=pck_path, hash_size=[args.hash_size]) |
| | | |
| | | card_pool = card_pool[['name', 'set', 'collector_number', ch_key]] |
| | | |
| | | # Processing time is almost linear to the size of the database |