Skip to content

Instantly share code, notes, and snippets.

@banjun
Last active January 31, 2024 18:44
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save banjun/5179290fce141c44c54f63f31acc34f5 to your computer and use it in GitHub Desktop.
Save banjun/5179290fce141c44c54f63f31acc34f5 to your computer and use it in GitHub Desktop.
specific window capture implementation memo for https://github.com/mzp/HeartVoice
import Cocoa
import CoreGraphics
import Vision
struct TargetWindow {
let id: CGWindowID
let bounds: CGRect
init?(appName: String, windowTitle: String) {
guard let windows = CGWindowListCopyWindowInfo(.optionAll, kCGNullWindowID) as? [[String: Any]] else { return nil }
guard let window = (windows.first {
$0[kCGWindowOwnerName as String] as? String == appName &&
$0[kCGWindowName as String] as? String == windowTitle}) else { return nil }
guard let id = window[kCGWindowNumber as String] as? Int else { return nil }
guard let rect = window[kCGWindowBounds as String] as? NSDictionary, let bounds = CGRect(dictionaryRepresentation: rect) else { return nil }
self.id = CGWindowID(id)
self.bounds = bounds
}
func captureBitmap(relativeBounds: CGRect? = nil) -> NSBitmapImageRep? {
let rect = relativeBounds.map {$0.offsetBy(dx: bounds.minX, dy: bounds.minY)} ?? .null
guard let capture = CGWindowListCreateImage(rect, .optionIncludingWindow, id, []) else { return nil }
return NSBitmapImageRep(cgImage: capture)
}
func capture(relativeBounds: CGRect? = nil) -> NSImage? {
guard let bitmap = captureBitmap(relativeBounds: relativeBounds) else { return nil }
let image = NSImage(size: bitmap.size)
image.addRepresentation(bitmap)
return image
}
}
final class ViewController: NSViewController {
let imageView = NSImageView(frame: .zero)
var timer: Timer?
override func viewDidLoad() {
super.viewDidLoad()
imageView.autoresizingMask = [.width, .height]
imageView.frame = view.bounds
view.addSubview(imageView)
}
override func viewDidAppear() {
super.viewDidAppear()
timer = Timer.scheduledTimer(withTimeInterval: 1, repeats: true) { [weak self] _ in
self?.capture()
}
}
override func viewWillDisappear() {
super.viewWillDisappear()
timer?.invalidate()
}
func capture() {
guard let player = TargetWindow(appName: "Mac Blu-ray Player", windowTitle: "Mac Blu-ray Player") else { return }
let image = player.capture(relativeBounds: CGRect(x: 0, y: player.bounds.height - 12 - 64, width: 45, height: 12))
imageView.image = image
let textReq = VNDetectTextRectanglesRequest { req, error in
guard let observations = req.results as? [VNTextObservation] else { return }
let characterBoxes = observations.flatMap {$0.characterBoxes ?? []}
// NSLog("%@", "\(characterBoxes)")
guard let sourceImage = self.imageView.image else { return }
do {
let digits = try characterBoxes
.filter {abs($0.topLeft.y - $0.bottomRight.y) > 0.5}
.map { box -> Int64 in
let size = CGSize(width: abs(box.bottomRight.x - box.topLeft.x) * sourceImage.size.width,
height: abs(box.bottomRight.y - box.topLeft.y) * sourceImage.size.height)
let image = NSImage(size: CGSize(width: 28, height: 28))
image.lockFocus()
NSColor.black.set()
CGRect(origin: .zero, size: image.size).fill()
sourceImage.draw(at: CGPoint(x: (28 - size.width) / 2, y: (28 - size.height) / 2), from: CGRect(
x: min(box.bottomRight.x, box.topLeft.x) * sourceImage.size.width,
y: min(box.bottomRight.y, box.topLeft.y) * sourceImage.size.height,
width: size.width,
height: size.height), operation: .copy, fraction: 1)
image.unlockFocus()
let prediction = try MNIST().prediction(input: MNISTInput(image: image.pixelBuffer()!))
// NSLog("%@", "\(prediction.classLabel) \(prediction.prediction[prediction.classLabel])")
return prediction.classLabel
}
let prefixedReversedDigits = Array(([0] + digits).reversed())
let components = stride(from: 0, to: prefixedReversedDigits.count - 1, by: 2)
.map {(prefixedReversedDigits[$0 + 1], prefixedReversedDigits[$0])}.reversed()
let positionString: String = components.map {"\($0)\($1)"}.joined(separator: ":")
NSLog("%@", positionString)
} catch _ {}
}
textReq.reportCharacterBoxes = true
guard let cgImage = (image?.representations[0] as! NSBitmapImageRep).cgImage else { return }
_ = try? VNImageRequestHandler(cgImage: cgImage).perform([textReq])
}
}
// https://gist.github.com/DennisWeidmann/7c4b4bb72062bd1a40c714aa5d95a0d7
extension NSImage {
func pixelBuffer() -> CVPixelBuffer? {
let width = self.size.width
let height = self.size.height
let attrs = [kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue,
kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue] as CFDictionary
var pixelBuffer: CVPixelBuffer?
let status = CVPixelBufferCreate(kCFAllocatorDefault,
Int(width),
Int(height),
kCVPixelFormatType_OneComponent8,
attrs,
&pixelBuffer)
guard let resultPixelBuffer = pixelBuffer, status == kCVReturnSuccess else {
return nil
}
CVPixelBufferLockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
let pixelData = CVPixelBufferGetBaseAddress(resultPixelBuffer)
let colorspace = CGColorSpaceCreateDeviceGray()
guard let context = CGContext(data: pixelData,
width: Int(width),
height: Int(height),
bitsPerComponent: 8,
bytesPerRow: CVPixelBufferGetBytesPerRow(resultPixelBuffer),
space: colorspace,
bitmapInfo: CGImageAlphaInfo.none.rawValue) else {return nil}
// context.translateBy(x: 0, y: height)
// context.scaleBy(x: 1.0, y: -1.0)
let graphicsContext = NSGraphicsContext(cgContext: context, flipped: false)
NSGraphicsContext.saveGraphicsState()
NSGraphicsContext.current = graphicsContext
draw(in: CGRect(x: 0, y: 0, width: width, height: height))
NSGraphicsContext.restoreGraphicsState()
CVPixelBufferUnlockBaseAddress(resultPixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
return resultPixelBuffer
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment