Skip to content

Instantly share code, notes, and snippets.

@alexruperez
Last active May 14, 2018 09:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexruperez/a259a552c22fbe99d4f817fb945638a0 to your computer and use it in GitHub Desktop.
Save alexruperez/a259a552c22fbe99d4f817fb945638a0 to your computer and use it in GitHub Desktop.
Create a StoryTeller in Swift with Firebase ML Kit Text Recognition OCR | Lil ‘Bits | https://www.youtube.com/watch?v=kpzyFZYI1PQ
import UIKit
import AVFoundation
import Firebase
class StoryTellerViewController: UIViewController {
private lazy var previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
private lazy var captureSession: AVCaptureSession = {
let captureSession = AVCaptureSession()
guard let captureDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back),
let input = try? AVCaptureDeviceInput(device: captureDevice) else {
return captureSession
}
captureSession.addInput(input)
let output = AVCaptureVideoDataOutput()
output.setSampleBufferDelegate(self, queue: DispatchQueue(label: "SampleBuffer"))
captureSession.addOutput(output)
return captureSession
}()
let speechSynthesizer = AVSpeechSynthesizer()
lazy var vision = Vision.vision()
var textDetector: VisionTextDetector?
private var textsToRead = [String]()
override func viewDidLoad() {
super.viewDidLoad()
speechSynthesizer.delegate = self
view.layer.addSublayer(previewLayer)
try? setPlaybackAudioSession()
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
previewLayer.frame = view.frame
checkCameraAccess()
}
func checkCameraAccess() {
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .authorized: captureSession.startRunning()
default: requestCameraAccess()
}
}
func requestCameraAccess() {
AVCaptureDevice.requestAccess(for: .video) { _ in
self.checkCameraAccess()
}
}
func setPlaybackAudioSession() throws {
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSessionCategoryPlayback)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
}
func read(_ texts: [String]) {
textsToRead.append(contentsOf: texts)
read(textsToRead.removeFirst())
}
func read(_ text: String) {
let speechUtterance = AVSpeechUtterance(string: text)
let dominantLanguage = NSLinguisticTagger.dominantLanguage(for: text)
speechUtterance.voice = AVSpeechSynthesisVoice(language: dominantLanguage)
speechSynthesizer.speak(speechUtterance)
}
}
extension StoryTellerViewController: AVSpeechSynthesizerDelegate {
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
if textsToRead.isEmpty {
textDetector = nil
captureSession.startRunning()
} else {
read(textsToRead.removeFirst())
}
}
}
extension StoryTellerViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
if textDetector == nil {
let metadata = VisionImageMetadata()
metadata.orientation = .rightTop
let visionImage = VisionImage(buffer: sampleBuffer)
visionImage.metadata = metadata
textDetector = vision.textDetector()
textDetector?.detect(in: visionImage, completion: handleDetect)
}
}
func handleDetect(features: [VisionText]?, error: Error?) {
guard error == nil, let features = features, !features.isEmpty else {
textDetector = nil
return
}
captureSession.stopRunning()
read(features.compactMap { $0.text })
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment