Skip to content

Instantly share code, notes, and snippets.

@doobwa
Created September 25, 2012 01:01
Show Gist options
  • Save doobwa/3779377 to your computer and use it in GitHub Desktop.
Save doobwa/3779377 to your computer and use it in GitHub Desktop.
An attempt at faster Julia IO
# Chris DuBois
# September 2012
# Inspired by: http://vijayinterviewquestions.blogspot.com/2007/07/write-your-own-c-program-to-implement.html
function atoi(s)
i = 0
L = strlen(s)
# skip preceding whitespaece
a = 1
while s[a] == ' '
a+=1
end
j = a
while j <= L
# skip trailing non digits
if s[j] < '0' || s[j] > '9'
j = L + 1
else
i = (i << 3) + (i << 1) + s[j] - '0'
j += 1
end
end
return i
end
@assert atoi("390709") == 390709
@assert atoi(" 390709") == 390709
@assert atoi(" 390709 ") == 390709
@assert atoi(" 390709 lxkjd") == 390709
function atof(s)
out = Array(Float64,1)
ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)
out[1]
end
@assert atof("1.234") == 1.234
@assert atof(" 1.234") == 1.234
@assert atof(" 1.234 ") == 1.234
@assert atof(" 1 ") == 1.0
# Read a delimited file with known types in each column.
# Return a Dict with an element for every column.
# TODO: how to handle empty final lines
function dlm2dict(filename, types, dlm::Char, header)
d = Dict()
io = open(filename)
if header
cn = split(readline(io),dlm)
else
cn = [strcat("column_",x) for x in 1:length(types)]
end
for i in 1:length(cn)
d[cn[i]] = Array(types[i], 0)
end
for line in each_line(io)
pos = 1
for i in 1:length(cn)
if types[i] == Int64
push(d[cn[i]], atoi(line[pos:end]))
pos = strchr(line, dlm, pos) + 1
elseif types[i] == Float64
push(d[cn[i]], atof(line[pos:end]))
pos = strchr(line, dlm, pos) + 1
else
e = strchr(line, dlm, pos)
push(d[cn[i]], line[pos:e])
pos = e + 1
end
end
end
return d
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment