Skip to content

Instantly share code, notes, and snippets.

@mathias-brandewinder
Created March 27, 2016 23:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mathias-brandewinder/4830fe68ebb208329ebd to your computer and use it in GitHub Desktop.
Save mathias-brandewinder/4830fe68ebb208329ebd to your computer and use it in GitHub Desktop.
kaggle home depot notes
type Observation = {
SearchTerms: string
ProductTitle: string
}
with member this.SearchLength = this.SearchTerms.Length |> float
type Relevance = float
type Predictor = Observation -> Relevance
type Example = Relevance * Observation
type Learner = Example [] -> Predictor
let trivialModel : Learner =
function sample ->
let average =
sample
|> Seq.map fst
|> Seq.average
let predictor (obs:Observation) = average
predictor
type Feature = Observation -> float
let extractFeatures
(features: Feature[])
(obs: Observation) =
features
|> Array.map (fun f -> f obs)
let ``Search Terms characters`` : Feature =
function obs ->
obs.SearchTerms.Length |> float
let ``Matching characters between title and search terms`` : Feature =
function obs ->
let searchChars = obs.SearchTerms |> Set.ofSeq
let titleChars = obs.ProductTitle |> Set.ofSeq
Set.intersect searchChars titleChars
|> Set.count
|> float
#I "../packages"
#r @"FSharp.Data/lib/net40/FSharp.Data.dll"
open FSharp.Data
type Training = CsvProvider<"""../data/train.csv""">
let training =
Training.GetSample().Rows
|> Seq.map (fun row ->
row.Relevance |> float,
{
SearchTerms = row.Search_term
ProductTitle = row.Product_title
})
|> Seq.toArray
#r @"Accord/lib/net45/Accord.dll"
#r @"Accord.Math/lib/net45/Accord.Math.dll"
#r @"Accord.Statistics/lib/net45/Accord.Statistics.dll"
open Accord.Statistics.Models.Regression
open Accord.Statistics.Models.Regression.Fitting
let model = [|
``Search Terms characters``
``Matching characters between title and search terms``
|]
let logisticModel : Learner =
function sample ->
let inputsCount = model.Length
let regression = LogisticRegression(inputsCount)
let teacher = IterativeReweightedLeastSquares(regression)
let labelNormalize x = (x - 1.) / 2.
let labelDenormalize x = (x * 2.) + 1.
let input,output =
sample
|> Seq.map (fun (label,obs) ->
extractFeatures model obs,
labelNormalize label)
|> Seq.toArray
|> Array.unzip
let rec learn () =
let error = teacher.Run(input, output)
if error < 0.01
then regression
else learn ()
let logPredictor = learn ()
let predictor (obs:Observation) =
obs
|> extractFeatures model
|> logPredictor.Compute
|> labelDenormalize
predictor
let logisticPredictor = logisticModel training
training
|> Seq.take 10
|> Seq.map (fun (l,o) -> l,logisticPredictor o)
|> Seq.iter (fun (act,pred) ->
printfn "Actual: %.2f, Predicted: %.2f" act pred)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment