Constantin Wenger
2022-02-03 b95bf33cb5b296efb70a0c4b1c82c0f62286f52a
added options to flip/rotate and specify different input resolutions
also fixed displayed image to max 800x800 everything above will be scaled while keeping aspect ratio

added option to run tesseract edition detection (can only detect M20 for now)
and the result is not used other then being displayed this option is still in development
1 files modified
135 ■■■■ changed files
opencv_dnn.py 135 ●●●● patch | view | raw | blame | history
opencv_dnn.py
@@ -12,6 +12,7 @@
from multiprocessing import Pool
from config import Config
import fetch_data
import pytesseract
"""
@@ -79,7 +80,7 @@
            cnts2 = sorted(cnts, key=cv2.contourArea, reverse=True)
            cnts2 = cnts2[:10]
            if True:
                cv2.drawContours(img_cc, cnts2, -1, (0, 255, 0), 3)
                cv2.rawContours(img_cc, cnts2, -1, (0, 255, 0), 3)
                #cv2.imshow('Contours', card_img)
                #cv2.waitKey(10000)
            """
@@ -336,7 +337,7 @@
    return cnts_rect
def draw_card_graph(exist_cards, card_pool, f_len):
def draw_card_graph(exist_cards, card_pool, f_len, text_scale=0.8):
    """
    Given the history of detected cards in the current and several previous frames, draw a simple graph
    displaying the detected cards with its confidence level
@@ -352,7 +353,7 @@
    gap_sm = 10  # Small offset
    w_bar = 300  # Length of the confidence bar at 100%
    h_bar = 12
    txt_scale = 0.8
    txt_scale = text_scale
    n_cards_p_col = 4  # Number of cards displayed per one column
    w_img = gap + (w_card + gap + w_bar + gap) * 2  # Dimension of the entire graph (for 2 columns)
    h_img = 480
@@ -398,7 +399,7 @@
def detect_frame(img, card_pool, hash_size=32, size_thresh=10000,
                 out_path=None, display=True, debug=False):
                 out_path=None, display=True, debug=False, scale=1.0, tesseract=False):
    """
    Identify all cards in the input frame, display or save the frame if needed
    :param img: input frame
@@ -436,6 +437,8 @@
        '''
        img_card = Image.fromarray(img_warp.astype('uint8'), 'RGB')
        img_card_size = img_warp.shape
        # cut out the part of the image that has the set icon
        #print(img_card_size)
        cut = [round(img_card_size[0]*0.57),round(img_card_size[0]*0.615),round(img_card_size[1]*0.81),round(img_card_size[1]*0.940)]
        #print(cut)
@@ -445,6 +448,52 @@
        #print('img set')
        if debug:
            cv2.imshow("Set Img#%d" % i, img_set_part)
        # tesseract takes a long time (200ms+), so if at all we should collect pictures
        # and then if a card is detected successfully, add it to detected cards and run a background check with
        # tesseract, if the identification with tesseract fails, mark somehow
        # or only use tesseract in case of edition conflicts idk yet
        # we will need to see what is needed
        # also it is hard to detect with bad 500x600 px image
        # maybe training it for the font would make it better or getting better resolution images
        prefilter = True
        if tesseract:
            height, width, channels = img_warp.shape
            blank_image = np.zeros((height, width, 3), np.uint8)
            threshold = 70
            athreshold = -30
            athreshold = -cv2.getTrackbarPos("Threshold", "mainwindow")
            cut = [round(img_card_size[0]*0.94),round(img_card_size[0]*0.98),round(img_card_size[1]*0.02),round(img_card_size[1]*0.3)]
            blank_image = img_warp[cut[0]:cut[1], cut[2]:cut[3]]
            cv2.imshow("Tesseract Image", blank_image)
            if prefilter:
                blank_image = cv2.cvtColor(blank_image, cv2.COLOR_BGR2GRAY)
                blank_image = cv2.normalize(blank_image, None,  0, 255, cv2.NORM_MINMAX)
                cv2.imshow("Normalized", blank_image)
                result_image = cv2.adaptiveThreshold(blank_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 501, athreshold)
                #_, result_image = cv2.threshold(blank_image, threshold, 255, cv2.THRESH_BINARY_INV)
                cv2.imshow("TessImg", result_image)
                tesseract_output = pytesseract.image_to_string(cv2.cvtColor(result_image, cv2.COLOR_GRAY2RGB))
            else:
                tesseract_output = pytesseract.image_to_string(cv2.cvtColor(blank_image, cv2.COLOR_BGR2RGB))
            if "M20" in tesseract_output or 'm20' in tesseract_output:
                tesseract_output = "M20"
                print(tesseract_output)
            else:
                print(tesseract_output)
                tesseract_output = "Set not detected"
            #cv2.imshow("Tesseract Image", img_warp)
            #img_gray = cv2.cvtColor(img_warp, cv2.COLOR_BGR2GRAY)
            #img_blur = cv2.medianBlur(img_gray, 5)
            #img_thresh = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 5)
            #cv2.imshow('Thres', img_thresh)
            #tesseract_output = pytesseract.image_to_string(cv2.cvtColor(img_thresh, cv2.COLOR_GRAY2RGB))
            #if "M20" in tesseract_output or 'm20' in tesseract_output:
            #    tesseract_output = "M20"
            #    print(tesseract_output)
            #else:
            #    print(tesseract_output)
            #    tesseract_output = "Set not detected"
        # the stored values of hashes in the dataframe is pre-emptively flattened already to minimize computation time
        card_hash = ih.phash(img_card, hash_size=hash_size).hash.flatten()
@@ -481,13 +530,16 @@
        det_cards.append((card_name, card_set))
        # Render the result, and display them if needed
        image_header = card_name
        if tesseract:
            image_header += ' TS: ' + tesseract_output
        cv2.drawContours(img_result, [cnt], -1, (0, 255, 0), 2)
        cv2.putText(img_result, card_name, (int(min(pts[0][0], pts[1][0])), int(min(pts[0][1], pts[1][1]))),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
        cv2.putText(img_result, image_header, (int(min(pts[0][0], pts[1][0])), int(min(pts[0][1], pts[1][1]))),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5*scale+0.1, (255, 255, 255), 2)
        if debug:
            # cv2.rectangle(img_warp, (22, 47), (294, 249), (0, 255, 0), 2)
            cv2.putText(img_warp, card_name + ':' + card_set + ', ' + str(hash_diff), (0, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4*scale+0.1, (255, 255, 255), 1)
            cv2.imshow('card#%d' % i, img_warp)
    if display:
        cv2.imshow('Result', img_result)
@@ -498,9 +550,12 @@
        cv2.imwrite(out_path, img_result.astype(np.uint8))
    return det_cards, img_result
def trackbardummy(v):
    pass
def detect_video(capture, card_pool, hash_size=32, size_thresh=10000,
                 out_path=None, display=True, show_graph=True, debug=False, crop_x=0, crop_y=0):
                 out_path=None, display=True, show_graph=True, debug=False,
                 crop_x=0, crop_y=0, rotate=None, flip=None, tesseract=False):
    """
    Identify all cards in the continuous video stream, display or save the result if needed
    :param capture: input video stream
@@ -514,22 +569,40 @@
    :return: list of detected card's name/set and resulting image
    :return:
    """
    if tesseract:
        cv2.namedWindow('mainwindow')
        cv2.createTrackbar("Threshold", "mainwindow", 30, 255, trackbardummy)
    list_names_from = 0
    # get some frame numers
    f_width = 0
    f_height = 0
    f_scale = 1.0
    if rotate is not None and (rotate == 0 or rotate == 2):
        f_height = round(capture.get(cv2.CAP_PROP_FRAME_WIDTH)-2*crop_y)
        f_width = round(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)-2*crop_x)
    else:
        f_width = round(capture.get(cv2.CAP_PROP_FRAME_WIDTH) - 2*crop_x)
        f_height = round(capture.get(cv2.CAP_PROP_FRAME_HEIGHT) - 2*crop_y)
    if f_width > 800 or f_height > 800:
        f_max = max(f_width, f_height)
        f_scale = (800.0/float(f_max))
    # Get the dimension of the output video, and set it up
    if show_graph:
        img_graph = draw_card_graph({}, pd.DataFrame(), -1)  # Black image of the graph just to get the dimension
        width = round(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) - 2*crop_x  + img_graph.shape[1]
        height = max(round(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) - 2*crop_y, img_graph.shape[0])
        width = int(f_width * f_scale)  + img_graph.shape[1]
        height = max(int(f_height * f_scale), img_graph.shape[0])
        height += 200  # some space to display last detected cards
    else:
        width = round(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = round(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        width = int(f_width * f_scale)
        height = int(f_height * f_scale)
    if out_path is not None:
        vid_writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'MJPG'), 10.0, (width, height))
    max_num_obj = 0
    f_len = 10  # number of frames to consider to check for existing cards
    exist_cards = {}
    #print(f"fw{f_width} fh{f_height} w{width} h{height} fs{f_scale}")
    exist_card_single = {}
    written_out_cards = set()
    found_cards = []
@@ -538,6 +611,12 @@
            ret, frame = capture.read()
            if not ret:
                continue
            if flip is not None:
                frame = cv2.flip(frame, flip)
            if rotate is not None:
                frame = cv2.rotate(frame, rotate)
            y_max_index = -crop_y
            if crop_y == 0:
                y_max_index = frame.shape[0]
@@ -546,7 +625,7 @@
               x_max_index = frame.shape[1]
            croped_img = frame[crop_y:y_max_index, crop_x:x_max_index]
            fimg = cv2.flip(croped_img, -1)
            fimg = croped_img
            start_time = time.time()
            if not ret:
                # End of video
@@ -558,7 +637,7 @@
                break
            # Detect all cards from the current frame
            det_cards, img_result = detect_frame(fimg, card_pool, hash_size=hash_size, size_thresh=size_thresh,
                                                 out_path=None, display=False, debug=debug)
                                                 out_path=None, display=False, debug=debug, scale=1.0/f_scale, tesseract=tesseract)
            if show_graph:
                # If the card was already detected in the previous frame, append 1 to the list
                # If the card previously detected was not found in this trame, append 0 to the list
@@ -620,6 +699,11 @@
                # Draw the graph based on the history of detected cards, then concatenate it with the result image
                img_graph = draw_card_graph(exist_cards, card_pool, f_len)
                img_save = np.zeros((height, width, 3), dtype=np.uint8)
                # resize result to out predefined area
                if f_scale != 1.0:
                    img_result = cv2.resize(img_result, (min(800, int(img_result.shape[1]*f_scale)), min(800, int(img_result.shape[0] * f_scale))), interpolation=cv2.INTER_LINEAR)
                #print(f'ri_w{img_result.shape[1]} ri_h{img_result.shape[0]}')
                #print(f"gi_w{img_graph.shape[1]} gi_h{img_graph.shape[0]}")
                img_save[0:img_result.shape[0], 0:img_result.shape[1]] = img_result
                img_save[0:img_graph.shape[0], img_result.shape[1]:img_result.shape[1] + img_graph.shape[1]] = img_graph
                start_at = max(0,list_names_from-10) 
@@ -720,20 +804,22 @@
            print("Using webcam") 
            capture = cv2.VideoCapture(0)
            capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"MJPG"))
            capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
            capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
            capture.set(cv2.CAP_PROP_FRAME_WIDTH, args.rx)
            capture.set(cv2.CAP_PROP_FRAME_HEIGHT, args.ry)
        else:
            print(f"Using streami {args.stream_url}")
            print(f"Using stream {args.stream_url}")
            capture = cv2.VideoCapture(args.stream_url)
        thres = int((1920-2*args.crop_x)*(1080-2*args.crop_y)*(float(args.threshold_percent)/100))
        thres = int((args.rx-2*args.crop_x)*(args.ry-2*args.crop_y)*(float(args.threshold_percent)/100))
        print('Threshold:', thres)
        if args.out_path is None:
            out_path = None
        else:
            out_path = '%s/result.avi' % args.out_path
        detect_video(capture, card_pool, hash_size=args.hash_size, out_path=out_path,
                     display=args.display, show_graph=args.show_graph, debug=args.debug, crop_x=args.crop_x, crop_y=args.crop_y, size_thresh=thres)
                     display=args.display, show_graph=args.show_graph, debug=args.debug,
                     crop_x=args.crop_x, crop_y=args.crop_y, size_thresh=thres,
                     rotate=args.rotate, flip=args.flip, tesseract=args.tesseract)
        capture.release()
    else:
        print(f"Using image or video {args.in_path}")
@@ -760,7 +846,9 @@
            # Test file is a video
            capture = cv2.VideoCapture(args.in_path)
            detect_video(capture, card_pool, hash_size=args.hash_size, out_path=out_path, display=args.display,
                         show_graph=args.show_graph, debug=args.debug)
                         show_graph=args.show_graph, debug=args.debug,
                         rotate=args.rotate, flip=args.flip, tesseract=args.tesseract)
            capture.release()
    pass
@@ -782,6 +870,11 @@
    parser.add_argument('-cx', '--crop-x', dest='crop_x', help='crop x amount of pixel on each side in x-axis', type=int, default=0)
    parser.add_argument('-cy', '--crop-y', dest='crop_y', help='crop x amount of pixel on each side in y-axis', type=int, default=0)
    parser.add_argument('-tp', '--threshold-percent', dest='threshold_percent', help='percentage amount that the card image needs to take up to be detected',type=int, default=5)
    parser.add_argument('-r', '--rotate', dest='rotate', help='Rotate image before usage 0 90_CLOCK, 1 180, 2 90 COUNTER_CLOCK', type=int, default=None)
    parser.add_argument('-f', '--flip', dest='flip', help='flip image before using, this is done before rotation -1(both axis), 0(x-axis), 1(y-axis)', type=int, default=None)
    parser.add_argument('-rx', '--resolution-x', dest='rx', help='X-Resolution of the source, defaults to 1920', type=int, default=1920)
    parser.add_argument('-ry', '--resulution-y', dest='ry', help="Y-Resolution of the source, defaults to 1080", type=int, default=1080)
    parser.add_argument('-t', '--tesseract', dest='tesseract', help='enable tesseract edition detection (not used only displayed)', action='store_true', default=False)
    args = parser.parse_args()
    if not args.display and args.out_path is None:
        # Then why the heck are you running this thing in the first place?