~speedprog/mtg/mtg_card_detector.git

			@@ -9,7 +9,7 @@
			import pandas as pd
			from PIL import Image
			import time

			from multiprocessing import Pool
			from config import Config
			import fetch_data

			@@ -22,25 +22,13 @@
			https://github.com/hj3yoo/mtg_card_detector/tree/dea64611730c84a59c711c61f7f80948f82bcd31
			"""


			def calc_image_hashes(card_pool, save_to=None, hash_size=None):
			"""
			Calculate perceptual hash (pHash) value for each cards in the database, then store them if needed
			:param card_pool: pandas dataframe containing all card information
			:param save_to: path for the pickle file to be saved
			:param hash_size: param for pHash algorithm
			:return: pandas dataframe
			"""
			if hash_size is None:
			hash_size = [16, 32]
			elif isinstance(hash_size, int):
			hash_size = [hash_size]

			# Since some double-faced cards may result in two different cards, create a new dataframe to store the result
			def do_calc(args):
			card_pool = args[0]
			hash_size = args[1]
			new_pool = pd.DataFrame(columns=list(card_pool.columns.values))
			for hs in hash_size:
			new_pool['card_hash_%d' % hs] = np.NaN
			#new_pool['art_hash_%d' % hs] = np.NaN
			new_pool['card_hash_%d' % hs] = np.NaN
			#new_pool['art_hash_%d' % hs] = np.NaN
			for ind, card_info in card_pool.iterrows():
			if ind % 100 == 0:
			print('Calculating hashes: %dth card' % ind)
			@@ -82,6 +70,29 @@
			#art_hash = ih.phash(img_art, hash_size=hs)
			#card_info['art_hash_%d' % hs] = art_hash
			new_pool.loc[0 if new_pool.empty else new_pool.index.max() + 1] = card_info
			return new_pool

			def calc_image_hashes(card_pool, save_to=None, hash_size=None):
			"""
			Calculate perceptual hash (pHash) value for each cards in the database, then store them if needed
			:param card_pool: pandas dataframe containing all card information
			:param save_to: path for the pickle file to be saved
			:param hash_size: param for pHash algorithm
			:return: pandas dataframe
			"""
			if hash_size is None:
			hash_size = [16, 32]
			elif isinstance(hash_size, int):
			hash_size = [hash_size]

			num_cores = 15
			num_partitions = 60
			pool = Pool(num_cores)
			df_split = np.array_split(card_pool, num_partitions)
			new_pool = pd.concat(pool.map(do_calc, [(split, hash_size) for split in df_split]))
			pool.close()
			pool.join()
			# Since some double-faced cards may result in two different cards, create a new dataframe to store the result

			if save_to is not None:
			new_pool.to_pickle(save_to)
			@@ -217,7 +228,7 @@
			img_erode = cv2.erode(img_dilate, kernel, iterations=1)

			# Find the contour
			_, cnts, hier = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
			cnts, hier = cv2.findContours(img_erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
			if len(cnts) == 0:
			#print('no contours')
			return []
			@@ -358,7 +369,7 @@
			cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
			if debug:
			# cv2.rectangle(img_warp, (22, 47), (294, 249), (0, 255, 0), 2)
			cv2.putText(img_warp, card_name + ', ' + str(hash_diff), (0, 20),
			cv2.putText(img_warp, card_name + ':' + card_set + ', ' + str(hash_diff), (0, 20),
			cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
			cv2.imshow('card#%d' % i, img_warp)
			if display:
			@@ -467,7 +478,8 @@

			def main(args):
			# Specify paths for all necessary files

			hash_sizes = {16, 32}
			hash_sizes.add(args.hash_size)
			pck_path = os.path.abspath('card_pool.pck')
			if os.path.isfile(pck_path):
			card_pool = pd.read_pickle(pck_path)
			@@ -482,8 +494,13 @@
			card_pool = pd.concat(df_list, sort=True)
			card_pool.reset_index(drop=True, inplace=True)
			card_pool.drop('Unnamed: 0', axis=1, inplace=True, errors='ignore')
			calc_image_hashes(card_pool, save_to=pck_path)
			card_pool = calc_image_hashes(card_pool, save_to=pck_path, hash_size=hash_sizes)
			ch_key = 'card_hash_%d' % args.hash_size
			if ch_key not in card_pool.columns:
			# we did not generate this hash_size yet
			print('We need to add hash_size=%d' % (args.hash_size,))
			card_pool = calc_image_hashes(card_pool, save_to=pck_path, hash_size=[args.hash_size])

			card_pool = card_pool[['name', 'set', 'collector_number', ch_key]]

			# Processing time is almost linear to the size of the database

	fetch_data.py	7 ●●●●● patch \| view \| raw \| blame \| history
	fetch_icons.py	21 ●●●●● patch \| view \| raw \| blame \| history
	opencv_dnn.py	61 ●●●●● patch \| view \| raw \| blame \| history

			@@ -81,9 +81,14 @@
			# df is a single row of card
			fetch_card_image(df, out_dir, size)
			else:
			from concurrent.futures import ThreadPoolExecutor, wait as fwait
			executor = ThreadPoolExecutor(5)
			# df is a dataframe containing list of cards
			arglist = []
			for ind, row in df.iterrows():
			fetch_card_image(row, out_dir, size)
			arglist.append(executor.submit(fetch_card_image, row, out_dir, size))
			fwait(arglist)
			# fetch_card_image(row, out_dir, size)


			def fetch_card_image(row, out_dir=None, size='png'):

New file
			@@ -0,0 +1,21 @@
			#!/bin/python3
			import sys
			import json
			import os
			from config import Config
			from urllib import request

			def main(args):
			setdata = None
			with open(os.path.join(Config.data_dir, 'sets.json'), 'rt') as setfile:
			setdata = json.load(setfile)
			for mset in setdata['data']:
			if len(mset['code']) > 3: # not an official set
			continue

			request.urlretrieve(mset['icon_svg_uri'], filename=os.path.join(Config.data_dir, 'icons', mset['code']+'.svg'))



			if __name__ == '__main__':
			main(sys.argv)