Skip to content

Instantly share code, notes, and snippets.

@wareya
Last active September 24, 2018 03:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wareya/f73505a048b965517c00e358aa5fc69d to your computer and use it in GitHub Desktop.
Save wareya/f73505a048b965517c00e358aa5fc69d to your computer and use it in GitHub Desktop.
unoptimized multiple regression in rust
use std::vec::Vec;
use std::fs::File;
use std::io::Read;
use std::result::Result;
fn get_data() -> Result<Vec<Vec<f64>>, std::io::Error>
{
let mut file = File::open("info.txt")?;
let mut text = String::new();
file.read_to_string(&mut text)?;
let lines : Vec<String> = text.lines().map(|x| x.trim().to_string()).collect();
let mut ret = Vec::<Vec<f64>>::new();
for line in lines
{
let mut fields = Vec::<f64>::new();
for token in line.split(' ')
{
if let Ok(f) = token.parse::<f64>()
{
fields.push(f);
}
else
{
assert!(false);
}
}
ret.push(fields);
}
return Ok(ret);
}
fn mean(x : &Vec<f64>) -> f64
{
return x.iter().sum::<f64>()/(x.len() as f64);
}
fn to_real_data(indata : Vec<Vec<f64>>) -> (Vec<Vec<f64>>, Vec<f64>)
{
let mut real_indata = Vec::<Vec<f64>>::new();
let mut outdata = Vec::<f64>::new();
for entry in indata
{
let counts = entry[0..3].to_vec();
let runs = entry[4..7].to_vec();
let runlen_han = counts[0]/runs[0];
let runlen_hira = counts[1]/runs[1];
let runlen_kata = counts[2]/runs[2];
let avg_counts = mean(&counts);
let prop_han = counts[0]/avg_counts;
let prop_hira = counts[1]/avg_counts;
let prop_kata = counts[2]/avg_counts;
let avg_runs = mean(&runs);
let prop_runs_han = runs[0]/avg_runs;
let prop_runs_hira = runs[1]/avg_runs;
let prop_runs_kata = runs[2]/avg_runs;
real_indata.push(vec![
runlen_han,
runlen_hira,
runlen_kata,
prop_han.ln(),
prop_hira.ln(),
prop_kata.ln(),
prop_runs_han.ln(),
prop_runs_hira.ln(),
prop_runs_kata.ln(),
1.0
]);
outdata.push(entry[12]/15000.0);
}
return (real_indata, outdata);
}
fn fit(model : &mut Vec<f64>, indata : &Vec<Vec<f64>>, outdata : &Vec<f64>, rate : f64)
{
let mut deltas : Vec<Vec<f64>> = Vec::<Vec<f64>>::new();
for i in 0..indata.len()
{
let data = &indata[i];
//activations = [model[j] * indata[i][j] for j in rlen(model)]
let mut activations = data.clone();
for j in 0..data.len()
{
activations[j] *= model[j];
}
let prediction = activations.iter().sum::<f64>();
let error = outdata[i]-prediction;
let errarray = data.iter().map(|x| x * error).collect();
deltas.push(errarray);
}
for j in 0..model.len()
{
model[j] += mean(&deltas.iter().map(|x| x[j]).collect())*rate;
}
}
fn print_vec(x : &Vec<f64>)
{
for i in 0..x.len()-1
{
print!("{}\t", x[i]);
}
if x.len() > 0
{
print!("{}\n", x.last().unwrap());
}
else
{
print!("\n");
}
}
fn rsq(truth : &Vec<f64>, preds : &Vec<f64>) -> f64
{
let truemean = mean(truth);
let mut res : f64 = 0.0;
let mut tot : f64 = 0.0;
for i in 0..truth.len()
{
res += (truth[i]-preds[i]).powi(2);
tot += (truth[i]-truemean).powi(2);
}
return 1.0 - res/tot;
}
fn predict(model : &Vec<f64>, indata : &Vec<Vec<f64>>, outdata : &Vec<f64>) -> (Vec<f64>, f64) // output, rsq
{
let mut predictions = Vec::<f64>::new();
for i in 0..indata.len()
{
let data = &indata[i];
//activations = [model[j] * indata[i][j] for j in rlen(model)]
let mut activations = data.clone();
for j in 0..data.len()
{
activations[j] *= model[j];
}
let prediction = activations.iter().sum::<f64>();
predictions.push(prediction);
}
let myrsq = rsq(outdata, &predictions);
return (predictions, myrsq);
}
fn main ()
{
if let Ok(indata) = get_data()
{
let (real_indata, outdata) = to_real_data(indata);
/*
let mut model = Vec::<f64>::new();
while model.len() < real_indata[0].len()
{
model.push(0.0);
}
let modellen = model.len();
model[modellen-1] = mean(&outdata);
*/
/*
let mut model : Vec<f64> = vec!
[ 0.992494146529402,
-0.013714790333185,
-0.002360355821806,
-2.13749281707757,
-6.08396128173098,
-0.135511050646753,
1.22356744966759,
0.626388927579391,
-0.120468027439253,
1.74580127799164
];
*/
// fitted here 100000000 iters
let mut model : Vec<f64> = vec!
[ 0.753160896617854,
0.100542016774891,
-0.001514824068502,
-1.77280423644666,
-6.4006913369764,
-0.137534864314251,
0.849608497919147,
0.950797701504978,
-0.118258915821494,
1.99965543700808
];
// fitted by keras
/*
let mut model : Vec<f64> = vec!
[ 1.253433 ,
-0.14630389,
0.00329586,
-2.4908898 ,
-5.5842404 ,
-0.14600338,
1.5507061 ,
0.13142511,
-0.10953176,
1.4638389
];
*/
let lr : f64 = 0.0622;
for i in 0..100000000
{
fit(&mut model, &real_indata, &outdata, lr);
if i%1000 == 0
{
print_vec(&model);
//let (_, rsq) = predict(&model, &real_indata, &outdata);
//println!("{}", rsq);
}
}
print_vec(&model);
}
else
{
println!("error: data is not formatted correctly");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment