fritz-c/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Yakuza 0 Karaoke Singer

A computer-vision-based program to automatically play the Yakuza 0 karaoke minigame.

(Full video on reddit)
Setup


Download the karaoke_singer.py, trigger_key.py files and the following image:

(save it as notes.png)
and place them in the same directory


Install dependencies:
python3 -m pip install -U --user Pillow mss opencv-python


Change the value of the IS_NEEDLE_BLUE according to the song
you're playing (the accompaniment parts use a pink needle,
while Kiryu's is blue)


Usage


Load Yakuza 0 in windowed mode, at 1280x720 resolution, and move the window into the far upper-left corner of the screen.
Run the following from Powershell:
python3 karaoke_singer.py

Use the keyboard to select and start the karaoke song of your choice (don't use a gamepad, or the note icons will change)


## karaoke_singer.py
import time
import mss
from trigger_key import (
    PressKey,
    ReleaseKey,
    VK_UP_ARROW_KEY,
    VK_DOWN_ARROW_KEY,
    VK_LEFT_ARROW_KEY,
    VK_RIGHT_ARROW_KEY,
)
from PIL import Image, ImageStat
import cv2 as cv
import numpy as np

# DEBUG = True
DEBUG = False

# When doing the Kiryu singing parts, this should be True
# for the accompaniment songs (with the pink needle), set it
# to False
IS_NEEDLE_BLUE = True
# IS_NEEDLE_BLUE = False

# Note: there are plenty of hard-coded pixel values in this code, so you're
# best off matching these dimensions as close you can (maybe adjust the top if
# necessary)
GAME_WINDOW_DIMENSIONS = {"top": 45, "left": 0, "width": 1280, "height": 720}

KEYPRESS_DURATION_SEC = 0.02
DRUM_DOWNTIME_SEC = 0.1
ROW_HEIGHT = int(GAME_WINDOW_DIMENSIONS["height"] * 15 / 72)
TEMPLATE_MATCH_THRESHOLD = 0.80
MAX_PIXELS_BEFORE_PLAYING_NOTE = 18
MIN_PIXELS_BEFORE_PLAYING_NOTE = 5

notes_info = {
    "down": {"x": 5, "key": VK_DOWN_ARROW_KEY},
    "right": {"x": 47, "key": VK_RIGHT_ARROW_KEY},
    "up": {"x": 88, "key": VK_UP_ARROW_KEY},
    "left": {"x": 130, "key": VK_LEFT_ARROW_KEY},
}

note_templates = cv.imread("notes.png", 0)
TEMPLATE_WIDTH = 13
TEMPLATE_HEIGHT = 19
RIGHT_TEMPLATE_OFFSET = 12
for note_info in notes_info.values():
    x = note_info["x"]
    y = 9
    note_info["template_left"] = note_templates[
        y : y + TEMPLATE_HEIGHT, x : x + TEMPLATE_WIDTH
    ]
    note_info["template_right"] = note_templates[
        y : y + TEMPLATE_HEIGHT,
        x + RIGHT_TEMPLATE_OFFSET : x + RIGHT_TEMPLATE_OFFSET + TEMPLATE_WIDTH,
    ]


sct = mss.mss()


def get_left_side_image():
    """Screenshot the left side of the screen, so we can
    search for the needle after it changes rows
    """
    return np.asarray(
        sct.grab(
            {
                **GAME_WINDOW_DIMENSIONS,
                "width": int(GAME_WINDOW_DIMENSIONS["width"] / 4),
            }
        )
    )


def get_row_image(y_offset):
    """Screenshot a row of notes and lyrics"""
    return np.asarray(
        sct.grab(
            {
                **GAME_WINDOW_DIMENSIONS,
                "top": int(GAME_WINDOW_DIMENSIONS["top"] + max(0, y_offset)),
                "height": ROW_HEIGHT,
            }
        )
    )


def extract_blue_needle_region(img_hsv):
    low_H = 92
    high_H = 115
    low_S = 118
    high_S = 255
    low_V = 71
    high_V = 255
    mask = cv.inRange(img_hsv, (low_H, low_S, low_V), (high_H, high_S, high_V))

    return mask


def extract_pink_needle_region(img_hsv):
    low_H = 158
    high_H = 169
    low_S = 143
    high_S = 186
    low_V = 107
    high_V = 223
    mask = cv.inRange(img_hsv, (low_H, low_S, low_V), (high_H, high_S, high_V))

    return mask


def get_needle_location(img_hsv, min_x=None):
    """Locate the needle using its color and line detection"""

    # If a min_x has been set, we focus in on the only areas we want to
    # consider that the needle could be.
    # Also, it's a bit hacky, but the bottom 1/8 of the image
    # (containing the notes) has been trimmed off to make it less likely
    # to detect lines in the row below
    if min_x is not None:
        img_hsv = img_hsv[: int(img_hsv.shape[0] * 7 / 8), min_x : min_x + 120]
    else:
        img_hsv = img_hsv[: int(img_hsv.shape[0] * 7 / 8)]

    # Extract the region of the image with the needle by its hue, targeted to the
    # needle color we want. In retrospective, maybe I could have looked into
    # template matching (as I did to identify notes) here as well, although
    # it might not have been very resilient to the explosions of color
    # the notes make when played. Anyway. Follow up with canny edge detection
    # to get nice outlines for the Hough Transform to use for line detection.
    global IS_NEEDLE_BLUE
    if IS_NEEDLE_BLUE:
        img_outlines = cv.Canny(extract_blue_needle_region(img_hsv), 70, 200, None, 3)
    else:
        img_outlines = cv.Canny(extract_pink_needle_region(img_hsv), 70, 200, None, 3)

    lines = cv.HoughLinesP(img_outlines, 1, np.pi, 30, None, 30, 10)

    if DEBUG and img_hsv.shape[0] <= ROW_HEIGHT:
        cv.imshow("outline", img_outlines)
        cv.moveWindow("outline", 10, 720 + ROW_HEIGHT)
        if cv.waitKey(25) & 0xFF == ord("q"):
            cv.destroyAllWindows()

    if lines is None:
        return 0, 0, False

    # Account for the x offset that would have occurred by working with a trimmed image
    if min_x is not None:
        lines[:, :, 0] += min_x
        lines[:, :, 2] += min_x

    # Very rough approximation of the center point of the needle, using
    # averages of the x's and y's in the lines detected.
    # It's very weak to noise in the form of other things incorrectly
    # identified as lines, but my general approach was to eliminate the
    # noise itself before worrying about how I calculated this.
    mean = lambda index: np.mean([l[0][index] for l in lines])
    avg_x = int(mean(0))
    avg_y = int((mean(1) + mean(3)) / 2)

    return avg_x, avg_y, True


def get_template_match(img, template):
    """Returns an array of x's and y's corresponding to points where
    a template image was most likely to be found (using a threshold)
    """
    res = cv.matchTemplate(img, template, cv.TM_CCOEFF_NORMED)
    return np.where(res > TEMPLATE_MATCH_THRESHOLD)


def find_notes(row_img):
    """Locates the notes in the image, and identifies if they should be held or drummed"""

    # Only look at the very bottom part of the image, which contains the notes
    img_bottom = row_img[int(row_img.shape[0] * 3 / 4) :, :]

    # Get a HSV representation of the image so we can check hue values
    # on the right side of the notes to determine if they should
    # be drummed or held
    img_hsv = cv.cvtColor(img_bottom, cv.COLOR_BGR2HSV)

    # Get a grayscale representation of the image, necessary for template matching
    img = cv.cvtColor(img_bottom, cv.COLOR_BGRA2GRAY)

    notes = []
    for note_name, note_info in notes_info.items():
        # To avoid missing notes that are overlapped on either
        # their right or left sides, we match on both sides independently,
        # and then combine the results
        left_side_matches = get_template_match(img, note_info["template_left"])
        right_loc = get_template_match(img, note_info["template_right"])

        # Put the located x's and y's into a nicer format to work with
        xs = np.sort(
            np.concatenate(
                [
                    # Since the template match finds the left side of the note,
                    # here we add TEMPLATE_WIDTH to record the x at roughly the
                    # the note
                    left_side_matches[1] + TEMPLATE_WIDTH,
                    # and for the right side, we first eliminate the offset to make
                    # the location
                    right_loc[1] - RIGHT_TEMPLATE_OFFSET + TEMPLATE_WIDTH,
                ]
            )
        )
        ys = np.sort(np.concatenate([left_side_matches[0], right_loc[0]]))

        # If there were any template matches for this note, process them
        if len(xs) > 0:
            y = ys[0]

            last_x = -100
            for x in xs:
                if x - last_x < TEMPLATE_WIDTH:
                    # Skip repeated hits of the same template in the same space
                    # Pretty hacky, but it works
                    continue
                last_x = x

                # Using a single pixel slightly to the right of the note, we
                # check if the color looks close to what would appear if the
                # note has a HOLD or DRUM bar after it. Surprisingly reliable.
                check_pixel = (x + 35, int(y + TEMPLATE_HEIGHT / 2))
                [hue, saturation, value] = img_hsv[check_pixel[1], check_pixel[0]]
                has_drum = value > 20 and saturation > 20 and 147 < hue < 167
                has_hold = value > 20 and saturation > 20 and 82 < hue < 102

                notes.append((x, note_name, has_drum, has_hold))

    # Sort the notes from left to right
    notes.sort(key=lambda n: n[0])

    # Print out the notes detected
    print(
        "new line",
        ",".join(
            f"{f[1]}{'-drum' if f[2] else ''}{'-hold' if f[3] else ''}" for f in notes
        ),
    )

    return notes


def karaoke_singer():
    last_known_row_y = 0
    notes = None

    fps = 0
    last_time = time.time()
    last_start_x = None
    last_needle_x = None
    frames_since_row_change = 0
    last_drum_time = time.time()
    drum_key = None
    hold_key = None
    missed_frames = 0

    # The main karaoke-playing loop
    while True:
        frames_since_row_change += 1
        img = get_row_image(last_known_row_y)
        img_hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
        needle_x, needle_y, needle_was_found = get_needle_location(
            img_hsv, last_needle_x
        )

        if not needle_was_found:
            # needle was not found at the row we expected.
            # try to find it at the beginning of another row

            # Allow the needle to go missing just for a bit before
            # searching other rows
            if missed_frames < 1:
                missed_frames += 1
                continue

            last_needle_x = None
            img = get_left_side_image()
            img_hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
            needle_x, needle_y, needle_was_found = get_needle_location(img_hsv, 80)

            if not needle_was_found:
                # if we still couldn't find it, give up
                continue

            # Reset the notes we identified on the previous row
            notes = None
            last_start_x = needle_x
            frames_since_row_change = 0

            # Snap the needle y value up to the closest row y
            screen_top_padding = int(ROW_HEIGHT * 90 / 300)
            last_known_row_y = (
                (needle_y - screen_top_padding) // ROW_HEIGHT
            ) * ROW_HEIGHT + screen_top_padding

            # Prepare the row image for note-finding
            img = get_row_image(last_known_row_y)

        # Reset some stuff since we found the needle
        missed_frames = 0
        last_needle_x = needle_x

        # When the needle is at the beginning of a row,
        # scope out all the notes coming up
        if notes is None:
            notes = find_notes(img)

        # Get a very rough approximation of the speed per frame of the needle
        # This is super finicky, and I'm still not sure if it made a difference
        # in the long run.
        needle_speed = (
            (needle_x - last_start_x) / frames_since_row_change
            if frames_since_row_change != 0 and last_start_x is not None
            else 10
        )

        # Maximum distance the needle has to be from a note before
        # we attempt to play it
        strike_zone = min(
            MAX_PIXELS_BEFORE_PLAYING_NOTE,
            max(needle_speed * 3, MIN_PIXELS_BEFORE_PLAYING_NOTE),
        )

        # Rapid-fire key presses when drummed notes are active
        if drum_key is not None and last_drum_time < time.time() - DRUM_DOWNTIME_SEC:
            PressKey(drum_key)
            time.sleep(KEYPRESS_DURATION_SEC)
            ReleaseKey(drum_key)
            last_drum_time = time.time()

        for index, note in enumerate(notes):
            x, note_name, has_drum, has_hold = note

            # If we're close enough to a note, play it.
            if abs(x - needle_x) < strike_zone:
                print("you're welcome", note_name)
                notes.pop(index)

                if drum_key is not None:
                    # just stop drumming
                    drum_key = None
                    break

                if hold_key is not None:
                    # release the held key
                    ReleaseKey(hold_key)
                    hold_key = None
                    break

                PressKey(notes_info[note_name]["key"])
                if not has_hold:
                    time.sleep(KEYPRESS_DURATION_SEC)
                    ReleaseKey(notes_info[note_name]["key"])

                hold_key = notes_info[note_name]["key"] if has_hold else None
                drum_key = notes_info[note_name]["key"] if has_drum else None
                break

        # Keep track of FPS for the detection, sometimes handy for debugging
        fps += 1
        if time.time() - last_time >= 1:
            last_time = time.time()
            # print(f"fps: {fps}")
            fps = 0

        # If debugging, draw the current row with the perceived location of the
        # needle drawn as a green line
        if DEBUG:
            cv.line(
                img,
                (needle_x - int(strike_zone), ROW_HEIGHT - 10),
                (needle_x + int(strike_zone), ROW_HEIGHT - 10),
                (0, 255, 0),
                5,
            )
            cv.line(
                img,
                (needle_x, ROW_HEIGHT - 30),
                (needle_x, ROW_HEIGHT - 10),
                (0, 255, 0),
                5,
            )
            window_title = "Test"
            cv.imshow(window_title, img)
            cv.moveWindow(window_title, 10, 720)
            if cv.waitKey(25) & 0xFF == ord("q"):
                cv.destroyAllWindows()
                break


karaoke_singer()

## trigger_key.py
# Code from https://stackoverflow.com/a/13615802

import ctypes
from ctypes import wintypes
import time

user32 = ctypes.WinDLL("user32", use_last_error=True)

INPUT_MOUSE = 0
INPUT_KEYBOARD = 1
INPUT_HARDWARE = 2

KEYEVENTF_EXTENDEDKEY = 0x0001
KEYEVENTF_KEYUP = 0x0002
KEYEVENTF_UNICODE = 0x0004
KEYEVENTF_SCANCODE = 0x0008

MAPVK_VK_TO_VSC = 0

# msdn.microsoft.com/en-us/library/dd375731
VK_TAB = 0x09
VK_MENU = 0x12

VK_DOWN_ARROW_KEY = 0x28
VK_LEFT_ARROW_KEY = 0x25
VK_RIGHT_ARROW_KEY = 0x27
VK_UP_ARROW_KEY = 0x26

VK_A_KEY = 0x41
VK_B_KEY = 0x42
VK_C_KEY = 0x43
VK_D_KEY = 0x44
VK_E_KEY = 0x45
VK_F_KEY = 0x46
VK_G_KEY = 0x47
VK_H_KEY = 0x48
VK_I_KEY = 0x49
VK_J_KEY = 0x4A
VK_K_KEY = 0x4B
VK_L_KEY = 0x4C
VK_M_KEY = 0x4D
VK_N_KEY = 0x4E
VK_O_KEY = 0x4F
VK_P_KEY = 0x50
VK_Q_KEY = 0x51
VK_R_KEY = 0x52
VK_S_KEY = 0x53
VK_T_KEY = 0x54
VK_U_KEY = 0x55
VK_V_KEY = 0x56
VK_W_KEY = 0x57
VK_X_KEY = 0x58
VK_Y_KEY = 0x59
VK_Z_KEY = 0x5A

# C struct definitions

wintypes.ULONG_PTR = wintypes.WPARAM


class MOUSEINPUT(ctypes.Structure):
    _fields_ = (
        ("dx", wintypes.LONG),
        ("dy", wintypes.LONG),
        ("mouseData", wintypes.DWORD),
        ("dwFlags", wintypes.DWORD),
        ("time", wintypes.DWORD),
        ("dwExtraInfo", wintypes.ULONG_PTR),
    )


class KEYBDINPUT(ctypes.Structure):
    _fields_ = (
        ("wVk", wintypes.WORD),
        ("wScan", wintypes.WORD),
        ("dwFlags", wintypes.DWORD),
        ("time", wintypes.DWORD),
        ("dwExtraInfo", wintypes.ULONG_PTR),
    )

    def __init__(self, *args, **kwds):
        super(KEYBDINPUT, self).__init__(*args, **kwds)
        # some programs use the scan code even if KEYEVENTF_SCANCODE
        # isn't set in dwFflags, so attempt to map the correct code.
        if not self.dwFlags & KEYEVENTF_UNICODE:
            self.wScan = user32.MapVirtualKeyExW(self.wVk, MAPVK_VK_TO_VSC, 0)


class HARDWAREINPUT(ctypes.Structure):
    _fields_ = (
        ("uMsg", wintypes.DWORD),
        ("wParamL", wintypes.WORD),
        ("wParamH", wintypes.WORD),
    )


class INPUT(ctypes.Structure):
    class _INPUT(ctypes.Union):
        _fields_ = (("ki", KEYBDINPUT), ("mi", MOUSEINPUT), ("hi", HARDWAREINPUT))

    _anonymous_ = ("_input",)
    _fields_ = (("type", wintypes.DWORD), ("_input", _INPUT))


LPINPUT = ctypes.POINTER(INPUT)


def _check_count(result, func, args):
    if result == 0:
        raise ctypes.WinError(ctypes.get_last_error())
    return args


user32.SendInput.errcheck = _check_count
user32.SendInput.argtypes = (
    wintypes.UINT,  # nInputs
    LPINPUT,  # pInputs
    ctypes.c_int,
)  # cbSize

# Functions


def PressKey(hexKeyCode):
    x = INPUT(type=INPUT_KEYBOARD, ki=KEYBDINPUT(wVk=hexKeyCode))
    user32.SendInput(1, ctypes.byref(x), ctypes.sizeof(x))


def ReleaseKey(hexKeyCode):
    x = INPUT(
        type=INPUT_KEYBOARD, ki=KEYBDINPUT(wVk=hexKeyCode, dwFlags=KEYEVENTF_KEYUP)
    )
    user32.SendInput(1, ctypes.byref(x), ctypes.sizeof(x))
	import time
	import mss
	from trigger_key import (
	PressKey,
	ReleaseKey,
	VK_UP_ARROW_KEY,
	VK_DOWN_ARROW_KEY,
	VK_LEFT_ARROW_KEY,
	VK_RIGHT_ARROW_KEY,
	)
	from PIL import Image, ImageStat
	import cv2 as cv
	import numpy as np

	# DEBUG = True
	DEBUG = False

	# When doing the Kiryu singing parts, this should be True
	# for the accompaniment songs (with the pink needle), set it
	# to False
	IS_NEEDLE_BLUE = True
	# IS_NEEDLE_BLUE = False

	# Note: there are plenty of hard-coded pixel values in this code, so you're
	# best off matching these dimensions as close you can (maybe adjust the top if
	# necessary)
	GAME_WINDOW_DIMENSIONS = {"top": 45, "left": 0, "width": 1280, "height": 720}

	KEYPRESS_DURATION_SEC = 0.02
	DRUM_DOWNTIME_SEC = 0.1
	ROW_HEIGHT = int(GAME_WINDOW_DIMENSIONS["height"] * 15 / 72)
	TEMPLATE_MATCH_THRESHOLD = 0.80
	MAX_PIXELS_BEFORE_PLAYING_NOTE = 18
	MIN_PIXELS_BEFORE_PLAYING_NOTE = 5

	notes_info = {
	"down": {"x": 5, "key": VK_DOWN_ARROW_KEY},
	"right": {"x": 47, "key": VK_RIGHT_ARROW_KEY},
	"up": {"x": 88, "key": VK_UP_ARROW_KEY},
	"left": {"x": 130, "key": VK_LEFT_ARROW_KEY},
	}

	note_templates = cv.imread("notes.png", 0)
	TEMPLATE_WIDTH = 13
	TEMPLATE_HEIGHT = 19
	RIGHT_TEMPLATE_OFFSET = 12
	for note_info in notes_info.values():
	x = note_info["x"]
	y = 9
	note_info["template_left"] = note_templates[
	y : y + TEMPLATE_HEIGHT, x : x + TEMPLATE_WIDTH
	]
	note_info["template_right"] = note_templates[
	y : y + TEMPLATE_HEIGHT,
	x + RIGHT_TEMPLATE_OFFSET : x + RIGHT_TEMPLATE_OFFSET + TEMPLATE_WIDTH,
	]


	sct = mss.mss()


	def get_left_side_image():
	"""Screenshot the left side of the screen, so we can
	search for the needle after it changes rows
	"""
	return np.asarray(
	sct.grab(
	{
	**GAME_WINDOW_DIMENSIONS,
	"width": int(GAME_WINDOW_DIMENSIONS["width"] / 4),
	}
	)
	)


	def get_row_image(y_offset):
	"""Screenshot a row of notes and lyrics"""
	return np.asarray(
	sct.grab(
	{
	**GAME_WINDOW_DIMENSIONS,
	"top": int(GAME_WINDOW_DIMENSIONS["top"] + max(0, y_offset)),
	"height": ROW_HEIGHT,
	}
	)
	)


	def extract_blue_needle_region(img_hsv):
	low_H = 92
	high_H = 115
	low_S = 118
	high_S = 255
	low_V = 71
	high_V = 255
	mask = cv.inRange(img_hsv, (low_H, low_S, low_V), (high_H, high_S, high_V))

	return mask


	def extract_pink_needle_region(img_hsv):
	low_H = 158
	high_H = 169
	low_S = 143
	high_S = 186
	low_V = 107
	high_V = 223
	mask = cv.inRange(img_hsv, (low_H, low_S, low_V), (high_H, high_S, high_V))

	return mask


	def get_needle_location(img_hsv, min_x=None):
	"""Locate the needle using its color and line detection"""

	# If a min_x has been set, we focus in on the only areas we want to
	# consider that the needle could be.
	# Also, it's a bit hacky, but the bottom 1/8 of the image
	# (containing the notes) has been trimmed off to make it less likely
	# to detect lines in the row below
	if min_x is not None:
	img_hsv = img_hsv[: int(img_hsv.shape[0] * 7 / 8), min_x : min_x + 120]
	else:
	img_hsv = img_hsv[: int(img_hsv.shape[0] * 7 / 8)]

	# Extract the region of the image with the needle by its hue, targeted to the
	# needle color we want. In retrospective, maybe I could have looked into
	# template matching (as I did to identify notes) here as well, although
	# it might not have been very resilient to the explosions of color
	# the notes make when played. Anyway. Follow up with canny edge detection
	# to get nice outlines for the Hough Transform to use for line detection.
	global IS_NEEDLE_BLUE
	if IS_NEEDLE_BLUE:
	img_outlines = cv.Canny(extract_blue_needle_region(img_hsv), 70, 200, None, 3)
	else:
	img_outlines = cv.Canny(extract_pink_needle_region(img_hsv), 70, 200, None, 3)

	lines = cv.HoughLinesP(img_outlines, 1, np.pi, 30, None, 30, 10)

	if DEBUG and img_hsv.shape[0] <= ROW_HEIGHT:
	cv.imshow("outline", img_outlines)
	cv.moveWindow("outline", 10, 720 + ROW_HEIGHT)
	if cv.waitKey(25) & 0xFF == ord("q"):
	cv.destroyAllWindows()

	if lines is None:
	return 0, 0, False

	# Account for the x offset that would have occurred by working with a trimmed image
	if min_x is not None:
	lines[:, :, 0] += min_x
	lines[:, :, 2] += min_x

	# Very rough approximation of the center point of the needle, using
	# averages of the x's and y's in the lines detected.
	# It's very weak to noise in the form of other things incorrectly
	# identified as lines, but my general approach was to eliminate the
	# noise itself before worrying about how I calculated this.
	mean = lambda index: np.mean([l[0][index] for l in lines])
	avg_x = int(mean(0))
	avg_y = int((mean(1) + mean(3)) / 2)

	return avg_x, avg_y, True


	def get_template_match(img, template):
	"""Returns an array of x's and y's corresponding to points where
	a template image was most likely to be found (using a threshold)
	"""
	res = cv.matchTemplate(img, template, cv.TM_CCOEFF_NORMED)
	return np.where(res > TEMPLATE_MATCH_THRESHOLD)


	def find_notes(row_img):
	"""Locates the notes in the image, and identifies if they should be held or drummed"""

	# Only look at the very bottom part of the image, which contains the notes
	img_bottom = row_img[int(row_img.shape[0] * 3 / 4) :, :]

	# Get a HSV representation of the image so we can check hue values
	# on the right side of the notes to determine if they should
	# be drummed or held
	img_hsv = cv.cvtColor(img_bottom, cv.COLOR_BGR2HSV)

	# Get a grayscale representation of the image, necessary for template matching
	img = cv.cvtColor(img_bottom, cv.COLOR_BGRA2GRAY)

	notes = []
	for note_name, note_info in notes_info.items():
	# To avoid missing notes that are overlapped on either
	# their right or left sides, we match on both sides independently,
	# and then combine the results
	left_side_matches = get_template_match(img, note_info["template_left"])
	right_loc = get_template_match(img, note_info["template_right"])

	# Put the located x's and y's into a nicer format to work with
	xs = np.sort(
	np.concatenate(
	[
	# Since the template match finds the left side of the note,
	# here we add TEMPLATE_WIDTH to record the x at roughly the
	# the note
	left_side_matches[1] + TEMPLATE_WIDTH,
	# and for the right side, we first eliminate the offset to make
	# the location
	right_loc[1] - RIGHT_TEMPLATE_OFFSET + TEMPLATE_WIDTH,
	]
	)
	)
	ys = np.sort(np.concatenate([left_side_matches[0], right_loc[0]]))

	# If there were any template matches for this note, process them
	if len(xs) > 0:
	y = ys[0]

	last_x = -100
	for x in xs:
	if x - last_x < TEMPLATE_WIDTH:
	# Skip repeated hits of the same template in the same space
	# Pretty hacky, but it works
	continue
	last_x = x

	# Using a single pixel slightly to the right of the note, we
	# check if the color looks close to what would appear if the
	# note has a HOLD or DRUM bar after it. Surprisingly reliable.
	check_pixel = (x + 35, int(y + TEMPLATE_HEIGHT / 2))
	[hue, saturation, value] = img_hsv[check_pixel[1], check_pixel[0]]
	has_drum = value > 20 and saturation > 20 and 147 < hue < 167
	has_hold = value > 20 and saturation > 20 and 82 < hue < 102

	notes.append((x, note_name, has_drum, has_hold))

	# Sort the notes from left to right
	notes.sort(key=lambda n: n[0])

	# Print out the notes detected
	print(
	"new line",
	",".join(
	f"{f[1]}{'-drum' if f[2] else ''}{'-hold' if f[3] else ''}" for f in notes
	),
	)

	return notes


	def karaoke_singer():
	last_known_row_y = 0
	notes = None

	fps = 0
	last_time = time.time()
	last_start_x = None
	last_needle_x = None
	frames_since_row_change = 0
	last_drum_time = time.time()
	drum_key = None
	hold_key = None
	missed_frames = 0

	# The main karaoke-playing loop
	while True:
	frames_since_row_change += 1
	img = get_row_image(last_known_row_y)
	img_hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
	needle_x, needle_y, needle_was_found = get_needle_location(
	img_hsv, last_needle_x
	)

	if not needle_was_found:
	# needle was not found at the row we expected.
	# try to find it at the beginning of another row

	# Allow the needle to go missing just for a bit before
	# searching other rows
	if missed_frames < 1:
	missed_frames += 1
	continue

	last_needle_x = None
	img = get_left_side_image()
	img_hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
	needle_x, needle_y, needle_was_found = get_needle_location(img_hsv, 80)

	if not needle_was_found:
	# if we still couldn't find it, give up
	continue

	# Reset the notes we identified on the previous row
	notes = None
	last_start_x = needle_x
	frames_since_row_change = 0

	# Snap the needle y value up to the closest row y
	screen_top_padding = int(ROW_HEIGHT * 90 / 300)
	last_known_row_y = (
	(needle_y - screen_top_padding) // ROW_HEIGHT
	) * ROW_HEIGHT + screen_top_padding

	# Prepare the row image for note-finding
	img = get_row_image(last_known_row_y)

	# Reset some stuff since we found the needle
	missed_frames = 0
	last_needle_x = needle_x

	# When the needle is at the beginning of a row,
	# scope out all the notes coming up
	if notes is None:
	notes = find_notes(img)

	# Get a very rough approximation of the speed per frame of the needle
	# This is super finicky, and I'm still not sure if it made a difference
	# in the long run.
	needle_speed = (
	(needle_x - last_start_x) / frames_since_row_change
	if frames_since_row_change != 0 and last_start_x is not None
	else 10
	)

	# Maximum distance the needle has to be from a note before
	# we attempt to play it
	strike_zone = min(
	MAX_PIXELS_BEFORE_PLAYING_NOTE,
	max(needle_speed * 3, MIN_PIXELS_BEFORE_PLAYING_NOTE),
	)

	# Rapid-fire key presses when drummed notes are active
	if drum_key is not None and last_drum_time < time.time() - DRUM_DOWNTIME_SEC:
	PressKey(drum_key)
	time.sleep(KEYPRESS_DURATION_SEC)
	ReleaseKey(drum_key)
	last_drum_time = time.time()

	for index, note in enumerate(notes):
	x, note_name, has_drum, has_hold = note

	# If we're close enough to a note, play it.
	if abs(x - needle_x) < strike_zone:
	print("you're welcome", note_name)
	notes.pop(index)

	if drum_key is not None:
	# just stop drumming
	drum_key = None
	break

	if hold_key is not None:
	# release the held key
	ReleaseKey(hold_key)
	hold_key = None
	break

	PressKey(notes_info[note_name]["key"])
	if not has_hold:
	time.sleep(KEYPRESS_DURATION_SEC)
	ReleaseKey(notes_info[note_name]["key"])

	hold_key = notes_info[note_name]["key"] if has_hold else None
	drum_key = notes_info[note_name]["key"] if has_drum else None
	break

	# Keep track of FPS for the detection, sometimes handy for debugging
	fps += 1
	if time.time() - last_time >= 1:
	last_time = time.time()
	# print(f"fps: {fps}")
	fps = 0

	# If debugging, draw the current row with the perceived location of the
	# needle drawn as a green line
	if DEBUG:
	cv.line(
	img,
	(needle_x - int(strike_zone), ROW_HEIGHT - 10),
	(needle_x + int(strike_zone), ROW_HEIGHT - 10),
	(0, 255, 0),
	5,
	)
	cv.line(
	img,
	(needle_x, ROW_HEIGHT - 30),
	(needle_x, ROW_HEIGHT - 10),
	(0, 255, 0),
	5,
	)
	window_title = "Test"
	cv.imshow(window_title, img)
	cv.moveWindow(window_title, 10, 720)
	if cv.waitKey(25) & 0xFF == ord("q"):
	cv.destroyAllWindows()
	break


	karaoke_singer()
	# Code from https://stackoverflow.com/a/13615802

	import ctypes
	from ctypes import wintypes
	import time

	user32 = ctypes.WinDLL("user32", use_last_error=True)

	INPUT_MOUSE = 0
	INPUT_KEYBOARD = 1
	INPUT_HARDWARE = 2

	KEYEVENTF_EXTENDEDKEY = 0x0001
	KEYEVENTF_KEYUP = 0x0002
	KEYEVENTF_UNICODE = 0x0004
	KEYEVENTF_SCANCODE = 0x0008

	MAPVK_VK_TO_VSC = 0

	# msdn.microsoft.com/en-us/library/dd375731
	VK_TAB = 0x09
	VK_MENU = 0x12

	VK_DOWN_ARROW_KEY = 0x28
	VK_LEFT_ARROW_KEY = 0x25
	VK_RIGHT_ARROW_KEY = 0x27
	VK_UP_ARROW_KEY = 0x26

	VK_A_KEY = 0x41
	VK_B_KEY = 0x42
	VK_C_KEY = 0x43
	VK_D_KEY = 0x44
	VK_E_KEY = 0x45
	VK_F_KEY = 0x46
	VK_G_KEY = 0x47
	VK_H_KEY = 0x48
	VK_I_KEY = 0x49
	VK_J_KEY = 0x4A
	VK_K_KEY = 0x4B
	VK_L_KEY = 0x4C
	VK_M_KEY = 0x4D
	VK_N_KEY = 0x4E
	VK_O_KEY = 0x4F
	VK_P_KEY = 0x50
	VK_Q_KEY = 0x51
	VK_R_KEY = 0x52
	VK_S_KEY = 0x53
	VK_T_KEY = 0x54
	VK_U_KEY = 0x55
	VK_V_KEY = 0x56
	VK_W_KEY = 0x57
	VK_X_KEY = 0x58
	VK_Y_KEY = 0x59
	VK_Z_KEY = 0x5A

	# C struct definitions

	wintypes.ULONG_PTR = wintypes.WPARAM


	class MOUSEINPUT(ctypes.Structure):
	_fields_ = (
	("dx", wintypes.LONG),
	("dy", wintypes.LONG),
	("mouseData", wintypes.DWORD),
	("dwFlags", wintypes.DWORD),
	("time", wintypes.DWORD),
	("dwExtraInfo", wintypes.ULONG_PTR),
	)


	class KEYBDINPUT(ctypes.Structure):
	_fields_ = (
	("wVk", wintypes.WORD),
	("wScan", wintypes.WORD),
	("dwFlags", wintypes.DWORD),
	("time", wintypes.DWORD),
	("dwExtraInfo", wintypes.ULONG_PTR),
	)

	def __init__(self, args, *kwds):
	super(KEYBDINPUT, self).__init__(args, *kwds)
	# some programs use the scan code even if KEYEVENTF_SCANCODE
	# isn't set in dwFflags, so attempt to map the correct code.
	if not self.dwFlags & KEYEVENTF_UNICODE:
	self.wScan = user32.MapVirtualKeyExW(self.wVk, MAPVK_VK_TO_VSC, 0)


	class HARDWAREINPUT(ctypes.Structure):
	_fields_ = (
	("uMsg", wintypes.DWORD),
	("wParamL", wintypes.WORD),
	("wParamH", wintypes.WORD),
	)


	class INPUT(ctypes.Structure):
	class _INPUT(ctypes.Union):
	_fields_ = (("ki", KEYBDINPUT), ("mi", MOUSEINPUT), ("hi", HARDWAREINPUT))

	_anonymous_ = ("_input",)
	_fields_ = (("type", wintypes.DWORD), ("_input", _INPUT))


	LPINPUT = ctypes.POINTER(INPUT)


	def _check_count(result, func, args):
	if result == 0:
	raise ctypes.WinError(ctypes.get_last_error())
	return args


	user32.SendInput.errcheck = _check_count
	user32.SendInput.argtypes = (
	wintypes.UINT, # nInputs
	LPINPUT, # pInputs
	ctypes.c_int,
	) # cbSize

	# Functions


	def PressKey(hexKeyCode):
	x = INPUT(type=INPUT_KEYBOARD, ki=KEYBDINPUT(wVk=hexKeyCode))
	user32.SendInput(1, ctypes.byref(x), ctypes.sizeof(x))


	def ReleaseKey(hexKeyCode):
	x = INPUT(
	type=INPUT_KEYBOARD, ki=KEYBDINPUT(wVk=hexKeyCode, dwFlags=KEYEVENTF_KEYUP)
	)
	user32.SendInput(1, ctypes.byref(x), ctypes.sizeof(x))