Skip to content

Instantly share code, notes, and snippets.

@binarybana
Created June 12, 2013 01:53
Show Gist options
  • Save binarybana/5762324 to your computer and use it in GitHub Desktop.
Save binarybana/5762324 to your computer and use it in GitHub Desktop.
Buffered IO in Julia
function read_buf(fname::ASCIIString, copybuf::Bool)
fsize = filesize(fname)
fid = open(fname)
line = 0
reads = 0
bufsize = 4096
buf = zeros(Uint8, bufsize)
mydata = zeros(Uint8, bufsize)
while position(fid) < (fsize-bufsize)
read(fid, buf)
if copybuf
mydata = copy(buf)
end
for c in buf
if c == uint8('\n')
line+=1
if (line-1)%4 == 0
reads+=1
end
end
end
end
print("buffered: ")
end
function read_native(fname)
fid = open(fname)
count = 0
for (i,seq) in enumerate(eachline(fid))
if (i-1)%4 == 0
count += 1
end
end
close(fid)
print("eachline: ")
end
fname = "myfile.fastq"
@time read_buf(fname, false)
@time read_buf(fname, false)
@time read_buf(fname, true)
@time read_buf(fname, true)
@time read_native(fname)
@time read_native(fname)
@time readall(`wc -l $fname`)
@time readall(`wc -l $fname`)
import time
def read_native(fname):
count = 0
reads = 0
for line in open(fname):
count+=1
if count%4 == 0:
reads+=1
print(reads)
fname = "/mnt/datab/DLVR2Chapkin/test-24.med.fastq"
t1 = time.time()
read_native(fname)
print(time.time()-t1)
# All the below are using a fastq test file that is 400k lines that look like the ones in sample.fastq
$ python fastq.py
0.0622370243073
$ julia misc-scripts/fasta-speed.jl # 64 byte buffer
buffered: elapsed time: 0.056056226 seconds
buffered: elapsed time: 0.029347927 seconds
buffered: elapsed time: 0.074274421 seconds
buffered: elapsed time: 0.061128665 seconds
eachline: elapsed time: 0.166149137 seconds
eachline: elapsed time: 0.133473847 seconds
elapsed time: 0.095022658 seconds
elapsed time: 0.020793529 seconds
$ julia fastq.jl # 4096 byte buffer
buffered: elapsed time: 0.041787645 seconds
buffered: elapsed time: 0.015024399 seconds
buffered: elapsed time: 0.021850903 seconds
buffered: elapsed time: 0.029343111 seconds
eachline: elapsed time: 0.162843544 seconds
eachline: elapsed time: 0.142045988 seconds
elapsed time: 0.088005462 seconds
elapsed time: 0.019529357 seconds
@HWI-ST3395:36:C1544ACXX:2:1101:1098:2048 1:N:0:GATCAG
GCAATGTGNAGCTGGTTCCCATCTAGAATCTCACTGAAGTGGAAGAACAT
+
8:?;;===#2<AACGC<<FH@<AEE4AE<?EGHIGEFE?:9??D9BBDG#
@HWI-ST3395:36:C1544ACXX:2:1101:1248:2115 1:N:0:GATCAG
CAGGGGGGATGCCCTCTTTATCCTGGATCTTGGCCTTCACGTTCTCGATG
+
=@@DDAD/,32<8@@?BBBBABBA388@>>>@(8>@A>@B88(28A8@9A
@HWI-ST3395:36:C1544ACXX:2:1101:1349:2057 1:N:0:GATCAG
GCCTGAGCCCTTCTCTGAGCCTTTCTCCAGAAAATCCACTCACAGCAACT
+
@@@DFFBDHHBHHIHHGGGIJGHGIIEGFACEDHHE<GHAFHGHHIIIID
@HWI-ST3395:36:C1544ACXX:2:1101:1441:2066 1:N:0:GATCAG
AGGCAATGTCATTTCCATTGAAGCTGTTCTGAAAGTTCACCACAAACCTT
+
@<@DADFBFBFHFEGHIIHEHEGGGCFHHHHDHHG2?CFEBCH9CGHEE4
@HWI-ST3395:36:C1544ACXX:2:1101:1485:2155 1:N:0:GATCAG
CAGTCCCTGCGGCTGATCATTGTCATGATGATCTGGTACTGGAAGTACAT
+
?@@=BDDAFD?CAGF@GB@9G4:CGECGFCD4CDFG??BDBDF>?9BFC?
@HWI-ST3395:36:C1544ACXX:2:1101:1460:2172 1:N:0:GATCAG
AAGAACTGCAGAGAGTACTGGTACAGGAAGTGAACCTGCTTCAAGGACTC
+
@@CFFFEDDDFHGIIBFHHCDCFHIGIJJID9FGHIGEHIIIIIAHIIBH
@HWI-ST3395:36:C1544ACXX:2:1101:1357:2174 1:N:0:GATCAG
ATAGAATGCTGTGTCATTCTTTAAGTGTGCACCTACACTCAGCACCTCAG
+
=@@=D>,+=DADFF>@FBFHDFEGC,33<A?<E3AEBHEHHCHGIIIII?
@HWI-ST3395:36:C1544ACXX:2:1101:1389:2196 1:N:0:GATCAG
CTCAGCATCTGGTCAGGCTCCCGTGGCCTCCGCGGCTGCCATCCGCTGCT
+
BBBFFEFBHHHHFGHIIIHGHIF<CEGHEFHBDDFGGGCHE@E@/;89>@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment