Catfish-Man/tweetstorm.swift

## tweetstorm.swift
//  Created by David Smith on 5/29/17.
//  Copyright © 2017 Unseen University. All rights reserved.
//
//  Localization- and encoding-safe solution to Coraline Ada's challenge here: https://twitter.com/CoralineAda/status/869204799027372032
//  Very lightly tested. Probably contains bugs.

import Foundation

func tweetStorm(input uncanonicalizedInput:String, handle:String?) -> [String] {
    let input = uncanonicalizedInput.precomposedStringWithCanonicalMapping //twitter requires NFC
    let handlePrefix = handle != nil ? handle! + " " : ""
    var wordRanges = [Range<String.Index>]()
    input.enumerateSubstrings(in: input.startIndex ..< input.endIndex, options: .byWords) { (_, wordRange, _, _) in
        wordRanges.append(wordRange)
    }

    let inputUnicodeScalars = input.unicodeScalars

    let unicodeScalarWordRanges = wordRanges.map { $0.lowerBound.samePosition(in: inputUnicodeScalars) ..< $0.upperBound.samePosition(in: inputUnicodeScalars) }

    var output = [String]()
    var startOfNextOutput = inputUnicodeScalars.startIndex
    var endOfPreviousWord:String.UnicodeScalarIndex? = nil

    for rangeIdx in unicodeScalarWordRanges.indices {
        var range = unicodeScalarWordRanges[rangeIdx]
        let isLast = rangeIdx == unicodeScalarWordRanges.endIndex - 1
        let nextRange = !isLast ? unicodeScalarWordRanges[rangeIdx + 1] : nil
        let tweetSize = (isLast ? 140 : 139) - handlePrefix.unicodeScalars.count

        repeat { //tbh I'm using this as a fake goto

            if nextRange == nil || inputUnicodeScalars.distance(from: startOfNextOutput, to: nextRange!.upperBound) > tweetSize {
                //We've crossed a 140 character boundary, so we know we'll need to output something this iteration

                let resultRangeStart = startOfNextOutput.samePosition(in: input)!

                if endOfPreviousWord == nil {
                    //single word >tweetSize chars, need to hyphenate
                    var hyphenationPoint:String.Index? = nil
                    var offset = 139 //unconditionally 139 because we know we're putting a hyphen in
                    repeat {
                        hyphenationPoint = inputUnicodeScalars.index(startOfNextOutput, offsetBy: offset).samePosition(in: input)
                        offset -= 1
                    } while (hyphenationPoint == nil)

                    let resultRange = resultRangeStart ..< hyphenationPoint!
                    //TODO: Doing the hyphenation properly without a typesetting engine (I don't want to pull in AppKit) would be tricky to say the least…
                    output.append(String(handlePrefix + input[resultRange] + "-"))

                    startOfNextOutput = hyphenationPoint!.samePosition(in: inputUnicodeScalars)

                    range = startOfNextOutput ..< range.upperBound

                    continue //re-drive this iteration with the rest of the word
                } else {
                    let resultRange = resultRangeStart ..< range.upperBound.samePosition(in: input)!
                    output.append(String(handlePrefix + input[resultRange] + (isLast ? "" : "…")))
                }

                startOfNextOutput = inputUnicodeScalars.index(after: range.upperBound)
                endOfPreviousWord = nil
                break
            } else {
                endOfPreviousWord = range.upperBound
                break
            }

        } while(true)
    }

    assert(output.map { $0.unicodeScalars.count }.filter { $0 > 140}.count == 0)

    return output
}

precondition(CommandLine.arguments.count == 2) //process name, and input string

let inputURL = URL(fileURLWithPath: CommandLine.arguments[1])
let input = try! String(contentsOf: inputURL, encoding: .utf8)

let output = tweetStorm(input: input, handle: "@Catfish_Man")
for tweet in output {
    print("[")
    print(tweet)
    print("]")
}
	// Created by David Smith on 5/29/17.
	// Copyright © 2017 Unseen University. All rights reserved.
	//
	// Localization- and encoding-safe solution to Coraline Ada's challenge here: https://twitter.com/CoralineAda/status/869204799027372032
	// Very lightly tested. Probably contains bugs.

	import Foundation

	func tweetStorm(input uncanonicalizedInput:String, handle:String?) -> [String] {
	let input = uncanonicalizedInput.precomposedStringWithCanonicalMapping //twitter requires NFC
	let handlePrefix = handle != nil ? handle! + " " : ""
	var wordRanges = [Range<String.Index>]()
	input.enumerateSubstrings(in: input.startIndex ..< input.endIndex, options: .byWords) { (_, wordRange, _, _) in
	wordRanges.append(wordRange)
	}

	let inputUnicodeScalars = input.unicodeScalars

	let unicodeScalarWordRanges = wordRanges.map { $0.lowerBound.samePosition(in: inputUnicodeScalars) ..< $0.upperBound.samePosition(in: inputUnicodeScalars) }

	var output = [String]()
	var startOfNextOutput = inputUnicodeScalars.startIndex
	var endOfPreviousWord:String.UnicodeScalarIndex? = nil

	for rangeIdx in unicodeScalarWordRanges.indices {
	var range = unicodeScalarWordRanges[rangeIdx]
	let isLast = rangeIdx == unicodeScalarWordRanges.endIndex - 1
	let nextRange = !isLast ? unicodeScalarWordRanges[rangeIdx + 1] : nil
	let tweetSize = (isLast ? 140 : 139) - handlePrefix.unicodeScalars.count

	repeat { //tbh I'm using this as a fake goto

	if nextRange == nil \|\| inputUnicodeScalars.distance(from: startOfNextOutput, to: nextRange!.upperBound) > tweetSize {
	//We've crossed a 140 character boundary, so we know we'll need to output something this iteration

	let resultRangeStart = startOfNextOutput.samePosition(in: input)!

	if endOfPreviousWord == nil {
	//single word >tweetSize chars, need to hyphenate
	var hyphenationPoint:String.Index? = nil
	var offset = 139 //unconditionally 139 because we know we're putting a hyphen in
	repeat {
	hyphenationPoint = inputUnicodeScalars.index(startOfNextOutput, offsetBy: offset).samePosition(in: input)
	offset -= 1
	} while (hyphenationPoint == nil)

	let resultRange = resultRangeStart ..< hyphenationPoint!
	//TODO: Doing the hyphenation properly without a typesetting engine (I don't want to pull in AppKit) would be tricky to say the least…
	output.append(String(handlePrefix + input[resultRange] + "-"))

	startOfNextOutput = hyphenationPoint!.samePosition(in: inputUnicodeScalars)

	range = startOfNextOutput ..< range.upperBound

	continue //re-drive this iteration with the rest of the word
	} else {
	let resultRange = resultRangeStart ..< range.upperBound.samePosition(in: input)!
	output.append(String(handlePrefix + input[resultRange] + (isLast ? "" : "…")))
	}

	startOfNextOutput = inputUnicodeScalars.index(after: range.upperBound)
	endOfPreviousWord = nil
	break
	} else {
	endOfPreviousWord = range.upperBound
	break
	}

	} while(true)
	}

	assert(output.map { $0.unicodeScalars.count }.filter { $0 > 140}.count == 0)

	return output
	}

	precondition(CommandLine.arguments.count == 2) //process name, and input string

	let inputURL = URL(fileURLWithPath: CommandLine.arguments[1])
	let input = try! String(contentsOf: inputURL, encoding: .utf8)

	let output = tweetStorm(input: input, handle: "@Catfish_Man")
	for tweet in output {
	print("[")
	print(tweet)
	print("]")
	}