Created
June 12, 2013 01:53
-
-
Save binarybana/5762324 to your computer and use it in GitHub Desktop.
Buffered IO in Julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function read_buf(fname::ASCIIString, copybuf::Bool) | |
fsize = filesize(fname) | |
fid = open(fname) | |
line = 0 | |
reads = 0 | |
bufsize = 4096 | |
buf = zeros(Uint8, bufsize) | |
mydata = zeros(Uint8, bufsize) | |
while position(fid) < (fsize-bufsize) | |
read(fid, buf) | |
if copybuf | |
mydata = copy(buf) | |
end | |
for c in buf | |
if c == uint8('\n') | |
line+=1 | |
if (line-1)%4 == 0 | |
reads+=1 | |
end | |
end | |
end | |
end | |
print("buffered: ") | |
end | |
function read_native(fname) | |
fid = open(fname) | |
count = 0 | |
for (i,seq) in enumerate(eachline(fid)) | |
if (i-1)%4 == 0 | |
count += 1 | |
end | |
end | |
close(fid) | |
print("eachline: ") | |
end | |
fname = "myfile.fastq" | |
@time read_buf(fname, false) | |
@time read_buf(fname, false) | |
@time read_buf(fname, true) | |
@time read_buf(fname, true) | |
@time read_native(fname) | |
@time read_native(fname) | |
@time readall(`wc -l $fname`) | |
@time readall(`wc -l $fname`) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
def read_native(fname): | |
count = 0 | |
reads = 0 | |
for line in open(fname): | |
count+=1 | |
if count%4 == 0: | |
reads+=1 | |
print(reads) | |
fname = "/mnt/datab/DLVR2Chapkin/test-24.med.fastq" | |
t1 = time.time() | |
read_native(fname) | |
print(time.time()-t1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# All the below are using a fastq test file that is 400k lines that look like the ones in sample.fastq | |
$ python fastq.py | |
0.0622370243073 | |
$ julia misc-scripts/fasta-speed.jl # 64 byte buffer | |
buffered: elapsed time: 0.056056226 seconds | |
buffered: elapsed time: 0.029347927 seconds | |
buffered: elapsed time: 0.074274421 seconds | |
buffered: elapsed time: 0.061128665 seconds | |
eachline: elapsed time: 0.166149137 seconds | |
eachline: elapsed time: 0.133473847 seconds | |
elapsed time: 0.095022658 seconds | |
elapsed time: 0.020793529 seconds | |
$ julia fastq.jl # 4096 byte buffer | |
buffered: elapsed time: 0.041787645 seconds | |
buffered: elapsed time: 0.015024399 seconds | |
buffered: elapsed time: 0.021850903 seconds | |
buffered: elapsed time: 0.029343111 seconds | |
eachline: elapsed time: 0.162843544 seconds | |
eachline: elapsed time: 0.142045988 seconds | |
elapsed time: 0.088005462 seconds | |
elapsed time: 0.019529357 seconds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@HWI-ST3395:36:C1544ACXX:2:1101:1098:2048 1:N:0:GATCAG | |
GCAATGTGNAGCTGGTTCCCATCTAGAATCTCACTGAAGTGGAAGAACAT | |
+ | |
8:?;;===#2<AACGC<<FH@<AEE4AE<?EGHIGEFE?:9??D9BBDG# | |
@HWI-ST3395:36:C1544ACXX:2:1101:1248:2115 1:N:0:GATCAG | |
CAGGGGGGATGCCCTCTTTATCCTGGATCTTGGCCTTCACGTTCTCGATG | |
+ | |
=@@DDAD/,32<8@@?BBBBABBA388@>>>@(8>@A>@B88(28A8@9A | |
@HWI-ST3395:36:C1544ACXX:2:1101:1349:2057 1:N:0:GATCAG | |
GCCTGAGCCCTTCTCTGAGCCTTTCTCCAGAAAATCCACTCACAGCAACT | |
+ | |
@@@DFFBDHHBHHIHHGGGIJGHGIIEGFACEDHHE<GHAFHGHHIIIID | |
@HWI-ST3395:36:C1544ACXX:2:1101:1441:2066 1:N:0:GATCAG | |
AGGCAATGTCATTTCCATTGAAGCTGTTCTGAAAGTTCACCACAAACCTT | |
+ | |
@<@DADFBFBFHFEGHIIHEHEGGGCFHHHHDHHG2?CFEBCH9CGHEE4 | |
@HWI-ST3395:36:C1544ACXX:2:1101:1485:2155 1:N:0:GATCAG | |
CAGTCCCTGCGGCTGATCATTGTCATGATGATCTGGTACTGGAAGTACAT | |
+ | |
?@@=BDDAFD?CAGF@GB@9G4:CGECGFCD4CDFG??BDBDF>?9BFC? | |
@HWI-ST3395:36:C1544ACXX:2:1101:1460:2172 1:N:0:GATCAG | |
AAGAACTGCAGAGAGTACTGGTACAGGAAGTGAACCTGCTTCAAGGACTC | |
+ | |
@@CFFFEDDDFHGIIBFHHCDCFHIGIJJID9FGHIGEHIIIIIAHIIBH | |
@HWI-ST3395:36:C1544ACXX:2:1101:1357:2174 1:N:0:GATCAG | |
ATAGAATGCTGTGTCATTCTTTAAGTGTGCACCTACACTCAGCACCTCAG | |
+ | |
=@@=D>,+=DADFF>@FBFHDFEGC,33<A?<E3AEBHEHHCHGIIIII? | |
@HWI-ST3395:36:C1544ACXX:2:1101:1389:2196 1:N:0:GATCAG | |
CTCAGCATCTGGTCAGGCTCCCGTGGCCTCCGCGGCTGCCATCCGCTGCT | |
+ | |
BBBFFEFBHHHHFGHIIIHGHIF<CEGHEFHBDDFGGGCHE@E@/;89>@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment