Last active
September 24, 2018 03:28
-
-
Save wareya/f73505a048b965517c00e358aa5fc69d to your computer and use it in GitHub Desktop.
unoptimized multiple regression in rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::vec::Vec; | |
use std::fs::File; | |
use std::io::Read; | |
use std::result::Result; | |
fn get_data() -> Result<Vec<Vec<f64>>, std::io::Error> | |
{ | |
let mut file = File::open("info.txt")?; | |
let mut text = String::new(); | |
file.read_to_string(&mut text)?; | |
let lines : Vec<String> = text.lines().map(|x| x.trim().to_string()).collect(); | |
let mut ret = Vec::<Vec<f64>>::new(); | |
for line in lines | |
{ | |
let mut fields = Vec::<f64>::new(); | |
for token in line.split(' ') | |
{ | |
if let Ok(f) = token.parse::<f64>() | |
{ | |
fields.push(f); | |
} | |
else | |
{ | |
assert!(false); | |
} | |
} | |
ret.push(fields); | |
} | |
return Ok(ret); | |
} | |
fn mean(x : &Vec<f64>) -> f64 | |
{ | |
return x.iter().sum::<f64>()/(x.len() as f64); | |
} | |
fn to_real_data(indata : Vec<Vec<f64>>) -> (Vec<Vec<f64>>, Vec<f64>) | |
{ | |
let mut real_indata = Vec::<Vec<f64>>::new(); | |
let mut outdata = Vec::<f64>::new(); | |
for entry in indata | |
{ | |
let counts = entry[0..3].to_vec(); | |
let runs = entry[4..7].to_vec(); | |
let runlen_han = counts[0]/runs[0]; | |
let runlen_hira = counts[1]/runs[1]; | |
let runlen_kata = counts[2]/runs[2]; | |
let avg_counts = mean(&counts); | |
let prop_han = counts[0]/avg_counts; | |
let prop_hira = counts[1]/avg_counts; | |
let prop_kata = counts[2]/avg_counts; | |
let avg_runs = mean(&runs); | |
let prop_runs_han = runs[0]/avg_runs; | |
let prop_runs_hira = runs[1]/avg_runs; | |
let prop_runs_kata = runs[2]/avg_runs; | |
real_indata.push(vec![ | |
runlen_han, | |
runlen_hira, | |
runlen_kata, | |
prop_han.ln(), | |
prop_hira.ln(), | |
prop_kata.ln(), | |
prop_runs_han.ln(), | |
prop_runs_hira.ln(), | |
prop_runs_kata.ln(), | |
1.0 | |
]); | |
outdata.push(entry[12]/15000.0); | |
} | |
return (real_indata, outdata); | |
} | |
fn fit(model : &mut Vec<f64>, indata : &Vec<Vec<f64>>, outdata : &Vec<f64>, rate : f64) | |
{ | |
let mut deltas : Vec<Vec<f64>> = Vec::<Vec<f64>>::new(); | |
for i in 0..indata.len() | |
{ | |
let data = &indata[i]; | |
//activations = [model[j] * indata[i][j] for j in rlen(model)] | |
let mut activations = data.clone(); | |
for j in 0..data.len() | |
{ | |
activations[j] *= model[j]; | |
} | |
let prediction = activations.iter().sum::<f64>(); | |
let error = outdata[i]-prediction; | |
let errarray = data.iter().map(|x| x * error).collect(); | |
deltas.push(errarray); | |
} | |
for j in 0..model.len() | |
{ | |
model[j] += mean(&deltas.iter().map(|x| x[j]).collect())*rate; | |
} | |
} | |
fn print_vec(x : &Vec<f64>) | |
{ | |
for i in 0..x.len()-1 | |
{ | |
print!("{}\t", x[i]); | |
} | |
if x.len() > 0 | |
{ | |
print!("{}\n", x.last().unwrap()); | |
} | |
else | |
{ | |
print!("\n"); | |
} | |
} | |
fn rsq(truth : &Vec<f64>, preds : &Vec<f64>) -> f64 | |
{ | |
let truemean = mean(truth); | |
let mut res : f64 = 0.0; | |
let mut tot : f64 = 0.0; | |
for i in 0..truth.len() | |
{ | |
res += (truth[i]-preds[i]).powi(2); | |
tot += (truth[i]-truemean).powi(2); | |
} | |
return 1.0 - res/tot; | |
} | |
fn predict(model : &Vec<f64>, indata : &Vec<Vec<f64>>, outdata : &Vec<f64>) -> (Vec<f64>, f64) // output, rsq | |
{ | |
let mut predictions = Vec::<f64>::new(); | |
for i in 0..indata.len() | |
{ | |
let data = &indata[i]; | |
//activations = [model[j] * indata[i][j] for j in rlen(model)] | |
let mut activations = data.clone(); | |
for j in 0..data.len() | |
{ | |
activations[j] *= model[j]; | |
} | |
let prediction = activations.iter().sum::<f64>(); | |
predictions.push(prediction); | |
} | |
let myrsq = rsq(outdata, &predictions); | |
return (predictions, myrsq); | |
} | |
fn main () | |
{ | |
if let Ok(indata) = get_data() | |
{ | |
let (real_indata, outdata) = to_real_data(indata); | |
/* | |
let mut model = Vec::<f64>::new(); | |
while model.len() < real_indata[0].len() | |
{ | |
model.push(0.0); | |
} | |
let modellen = model.len(); | |
model[modellen-1] = mean(&outdata); | |
*/ | |
/* | |
let mut model : Vec<f64> = vec! | |
[ 0.992494146529402, | |
-0.013714790333185, | |
-0.002360355821806, | |
-2.13749281707757, | |
-6.08396128173098, | |
-0.135511050646753, | |
1.22356744966759, | |
0.626388927579391, | |
-0.120468027439253, | |
1.74580127799164 | |
]; | |
*/ | |
// fitted here 100000000 iters | |
let mut model : Vec<f64> = vec! | |
[ 0.753160896617854, | |
0.100542016774891, | |
-0.001514824068502, | |
-1.77280423644666, | |
-6.4006913369764, | |
-0.137534864314251, | |
0.849608497919147, | |
0.950797701504978, | |
-0.118258915821494, | |
1.99965543700808 | |
]; | |
// fitted by keras | |
/* | |
let mut model : Vec<f64> = vec! | |
[ 1.253433 , | |
-0.14630389, | |
0.00329586, | |
-2.4908898 , | |
-5.5842404 , | |
-0.14600338, | |
1.5507061 , | |
0.13142511, | |
-0.10953176, | |
1.4638389 | |
]; | |
*/ | |
let lr : f64 = 0.0622; | |
for i in 0..100000000 | |
{ | |
fit(&mut model, &real_indata, &outdata, lr); | |
if i%1000 == 0 | |
{ | |
print_vec(&model); | |
//let (_, rsq) = predict(&model, &real_indata, &outdata); | |
//println!("{}", rsq); | |
} | |
} | |
print_vec(&model); | |
} | |
else | |
{ | |
println!("error: data is not formatted correctly"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment