Skip to content

Instantly share code, notes, and snippets.

@jarsen
Created October 10, 2014 20:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save jarsen/bc029f71363f28b4e4f7 to your computer and use it in GitHub Desktop.
Save jarsen/bc029f71363f28b4e4f7 to your computer and use it in GitHub Desktop.
Univariate Linear Regression
import Cocoa
class CSVDoubleSequence: SequenceType {
typealias GeneratorType = IndexingGenerator<Array<Double>>
let path: String
let values: [Double]
init(path: String) {
self.path = path
var error: NSError?
let file = String.stringWithContentsOfFile(path, encoding: NSUTF8StringEncoding, error: &error)
if let e = error {
assert(false, "Error opening \(path): \(e.localizedDescription)")
}
let delimiters = NSMutableCharacterSet.whitespaceAndNewlineCharacterSet()
delimiters.addCharactersInString(",")
let doubles = file?.componentsSeparatedByCharactersInSet(delimiters)
.filter({ $0 != "" })
.map({ x in (x as NSString).doubleValue })
values = doubles ?? [Double]()
}
func generate() -> IndexingGenerator<Array<Double>> {
return values.generate()
}
}
func meanSquaredError(predictions: [Double], values: [Double]) -> Double {
let sum = reduce(Zip2(predictions, values), 0.0) { (acc, y) in
let diff = y.0 - y.1
return acc + (diff * diff)
}
return sum / Double(predictions.count)
}
func learnUnivariateWeights(x: [Double], y: [Double]) -> (Double, Double) {
let n = Double(x.count)
let sumX = x.reduce(0, +)
let sumY = y.reduce(0, +)
let sumXY = reduce(Zip2(x, y), 0.0) { (acc, val) in
return acc + val.0 * val.1
}
let sumXsquared = x.map({$0 * $0}).reduce(0, combine: +)
let w1 = ((n * sumXY) - (sumX * sumY)) / ((n * sumXsquared) - (sumX * sumX))
let w0 = (sumY - (w1 * sumX)) / n
return (w0, w1)
}
func linearRegression(weights: [Double]) -> ([Double]) -> (Double) {
return { x in
// the x input into this should always have the first weight be 1...
return reduce(Zip2(weights, x), 0, { (acc, pair) in
return acc + pair.0 * pair.1
})
}
}
// load data
let manager = NSFileManager.defaultManager()
let trainingData = CSVDoubleSequence(path: "/Users/jarsen/Desktop/learning.playground/Resources/univariate_data-train.csv")
let testData = CSVDoubleSequence(path: "/Users/jarsen/Desktop/learning.playground/Resources/univariate_data-train.csv")
// process data
func processTwoColumns(data: CSVDoubleSequence) -> (x: [Double], y: [Double]) {
var x = [Double]();
var y = [Double]();
for (index, item) in enumerate(data) {
switch(index % 2) {
case 0:
x.append(item)
default:
y.append(item)
}
}
return (x, y)
}
let (trainX, trainY) = processTwoColumns(trainingData)
let (testX, testY) = processTwoColumns(testData)
// learn function
let weights = learnUnivariateWeights(trainX, trainY)
let learnedFunction = linearRegression([weights.0, weights.1])
// test function on test data
let predictions = testX.map { x in learnedFunction([1, x]) }
// calculate loss function on results of test data
meanSquaredError(predictions, testY)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment