Last active
August 29, 2015 14:04
-
-
Save samuell/6f0ecbe4c5e88c04c387 to your computer and use it in GitHub Desktop.
To be included in: http://saml.rilspace.org/moar-languagez-gc-content-in-python-d-fpc-c-and-c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io::BufferedReader; | |
use std::io::File; | |
use std::str::StrSlice; | |
fn main() { | |
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa"); | |
let mut file = BufferedReader::new(File::open(&path)); | |
let mut gc = 0i; | |
let mut at = 0i; | |
for line in file.lines() { | |
for c in line.unwrap().as_slice().chars() { | |
match c { | |
'G' => gc += 1, | |
'C' => gc += 1, | |
'A' => at += 1, | |
'T' => at += 1, | |
_ => {} | |
} | |
} | |
} | |
let gc_frac: f64 = (gc as f64) / ((at as f64) + (gc as f64)); | |
println!("GC fraction: {}", gc_frac) | |
} |
use std::io::File;
fn main() {
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa");
let mut file = File::open(&path);
let mut gc = 0i;
let mut at = 0i;
// large buffer of bytes
let mut buf = [0u8, .. 1 << 16];
let mut start_of_line = true;
let mut inside_comment = false;
loop {
let n = match file.read(buf) {
Ok(n) => n,
// EOF etc.
Err(_) => break
};
for b in buf.slice_to(n).iter() {
let is_newline = *b == b'\n';
if inside_comment {
if is_newline {
inside_comment = false;
}
} else {
match *b {
b'G' | b'C' => gc += 1,
b'A' | b'T' => at += 1,
b'>' if start_of_line => inside_comment = true,
_ => {}
}
}
start_of_line = is_newline;
}
}
let gc_frac: f64 = (gc as f64) / ((at as f64) + (gc as f64));
println!("GC fraction: {}", gc_frac)
}
By the way, it seems some programs in your blog post skip the first line, and some others don't handle >
line comments.
Did you catch the memory mapped version at https://gist.github.com/Blei/32d22fb92a3365da86b6?
@huonw: I think you're right about some unfortunate differences in the code examples. I really should compile an updated benchmark soon.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Aha, only I had forgot the check on whether each line starts with '>', which we have in https://gist.github.com/samuell/5591369 ... Do you think you could update your code with that, @hounw? (I'm unfortunately not versed in rust enough to know how to do it ...)