Created
August 7, 2014 20:10
-
-
Save Blei/32d22fb92a3365da86b6 to your computer and use it in GitHub Desktop.
Rust GC Count -- Memmap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//based off https://gist.github.com/samuell/5555803 | |
extern crate native; | |
extern crate rustrt; | |
use native::io::file; | |
use rustrt::rtio; | |
use rustrt::rtio::RtioFileStream; | |
use std::os; | |
#[deriving(PartialEq,Eq,Show)] | |
enum State { | |
Begin, | |
Read, | |
Ignore, | |
} | |
fn main() { | |
let path = "Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa".to_c_str(); | |
let mut fd = file::open(&path, rtio::Open, rtio::Read).ok().expect("couldn't open file"); | |
let stat = fd.fstat().ok().expect("couldn't stat file"); | |
let mm = os::MemoryMap::new(stat.size as uint, &[os::MapFd(fd.fd()), os::MapReadable]).ok().expect("mmap failed"); | |
let mut gc = 0u; | |
let mut at = 0u; | |
let mut state = Begin; | |
let data = mm.data(); | |
for i in range(0, mm.len()) { | |
let c = unsafe{*((data as uint + i) as *mut u8)}; | |
match c { | |
b'>' => { | |
if state == Begin { | |
state = Ignore; | |
} | |
} | |
b'\n' => { | |
state = Begin; | |
} | |
b'A' | b'T' => if state != Ignore { at += 1 }, | |
b'G' | b'C' => if state != Ignore { gc += 1 }, | |
_ => () | |
} | |
} | |
println!("{:.10}", 100.0 * gc as f64 / (gc + at) as f64); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It seems to me to me that the State enum includes a third state (Read) that isn't used... if it doesn't already, I wonder if removing that would allow LLVM to optimize out the branch (if state != Ignore) as addition of an integer (0 or 1)? If not, you could probably make it even faster by changing State to a CLike enum such that Begin = 1 and Ignore = 0, then just add state to at and gc. Of course, it's also possible that for a simple I/O bound program like this, CPU isn't enough of a bottleneck for this to make a difference :)