-
-
Save samuell/6f0ecbe4c5e88c04c387 to your computer and use it in GitHub Desktop.
use std::io::BufferedReader; | |
use std::io::File; | |
use std::str::StrSlice; | |
fn main() { | |
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa"); | |
let mut file = BufferedReader::new(File::open(&path)); | |
let mut gc = 0i; | |
let mut at = 0i; | |
for line in file.lines() { | |
for c in line.unwrap().as_slice().chars() { | |
match c { | |
'G' => gc += 1, | |
'C' => gc += 1, | |
'A' => at += 1, | |
'T' => at += 1, | |
_ => {} | |
} | |
} | |
} | |
let gc_frac: f64 = (gc as f64) / ((at as f64) + (gc as f64)); | |
println!("GC fraction: {}", gc_frac) | |
} |
@hounw: Cool, that's a 25% speedup of the fastest Go version I have on http://saml.rilspace.org/moar-languagez-gc-content-in-python-d-fpc-c-and-c ! :) Good job!
Hope to post updated benchmarks and graphs soon!
Aha, only I had forgot the check on whether each line starts with '>', which we have in https://gist.github.com/samuell/5591369 ... Do you think you could update your code with that, @hounw? (I'm unfortunately not versed in rust enough to know how to do it ...)
use std::io::File;
fn main() {
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa");
let mut file = File::open(&path);
let mut gc = 0i;
let mut at = 0i;
// large buffer of bytes
let mut buf = [0u8, .. 1 << 16];
let mut start_of_line = true;
let mut inside_comment = false;
loop {
let n = match file.read(buf) {
Ok(n) => n,
// EOF etc.
Err(_) => break
};
for b in buf.slice_to(n).iter() {
let is_newline = *b == b'\n';
if inside_comment {
if is_newline {
inside_comment = false;
}
} else {
match *b {
b'G' | b'C' => gc += 1,
b'A' | b'T' => at += 1,
b'>' if start_of_line => inside_comment = true,
_ => {}
}
}
start_of_line = is_newline;
}
}
let gc_frac: f64 = (gc as f64) / ((at as f64) + (gc as f64));
println!("GC fraction: {}", gc_frac)
}
By the way, it seems some programs in your blog post skip the first line, and some others don't handle >
line comments.
Did you catch the memory mapped version at https://gist.github.com/Blei/32d22fb92a3365da86b6?
@huonw: I think you're right about some unfortunate differences in the code examples. I really should compile an updated benchmark soon.
The following is about 3× faster for me:
(Unlike Go, Rust ensures that strings are valid UTF8: the validator shows up high the profiles when using
lines
.)