Skip to content

Instantly share code, notes, and snippets.

@samuell
Last active Aug 29, 2015
Embed
What would you like to do?
use std::io::BufferedReader;
use std::io::File;
use std::str::StrSlice;
fn main() {
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa");
let mut file = BufferedReader::new(File::open(&path));
let mut gc = 0i;
let mut at = 0i;
for line in file.lines() {
for c in line.unwrap().as_slice().chars() {
match c {
'G' => gc += 1,
'C' => gc += 1,
'A' => at += 1,
'T' => at += 1,
_ => {}
}
}
}
let gc_frac: f64 = (gc as f64) / ((at as f64) + (gc as f64));
println!("GC fraction: {}", gc_frac)
}
@huonw
Copy link

huonw commented Aug 8, 2014

@samuell

use std::io::File;

fn main() {
    let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa");
    let mut file = File::open(&path);
    let mut gc = 0i;
    let mut at = 0i;
    // large buffer of bytes
    let mut buf = [0u8, .. 1 << 16];

    let mut start_of_line = true;
    let mut inside_comment = false;

    loop {
        let n = match file.read(buf) {
            Ok(n) => n,

            // EOF etc.
            Err(_) => break
        };

        for b in buf.slice_to(n).iter() {
            let is_newline = *b == b'\n';
            if inside_comment {
                if is_newline {
                    inside_comment = false;
                }
            } else {
                match *b {
                    b'G' | b'C' => gc += 1,
                    b'A' | b'T' => at += 1,
                    b'>' if start_of_line => inside_comment = true,
                    _ => {}
                }
            }
            start_of_line = is_newline;
        }
    }

    let gc_frac: f64 = (gc as f64) / ((at as f64) + (gc as f64));
    println!("GC fraction: {}", gc_frac)
}

By the way, it seems some programs in your blog post skip the first line, and some others don't handle > line comments.

@pythonesque
Copy link

pythonesque commented Aug 8, 2014

Did you catch the memory mapped version at https://gist.github.com/Blei/32d22fb92a3365da86b6?

@samuell
Copy link
Author

samuell commented Aug 6, 2015

@huonw: I think you're right about some unfortunate differences in the code examples. I really should compile an updated benchmark soon.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment