Skip to content

Instantly share code, notes, and snippets.

@joshbode
Created May 11, 2017 15:54
Show Gist options
  • Save joshbode/09413919bde54d035a28f20e51bb8467 to your computer and use it in GitHub Desktop.
Save joshbode/09413919bde54d035a28f20e51bb8467 to your computer and use it in GitHub Desktop.
TextParse Support for GZip
using GZip
MAX_WIDTH = 4
function Base.filesize(io::GZip.GZipStream)
f = open(io.name, "r")
seekend(f)
skip(f, -4)
n = Int64(first(reinterpret(Int32, read(f, 4))))
close(f)
n
end
type StringIO{T <: IO} <: AbstractString
io::T
size::Int64
end
StringIO(io::IO) = StringIO(io, filesize(io))
function Base.print(io::IO, s::StringIO)
print(io, "StringIO(", s.io, ")")
end
Base.show(io::IO, x::StringIO) = print(io, x)
Base.start{T}(s::StringIO{T}) = 1
Base.endof{T}(s::StringIO{T}) = s.size
function Base.next{T}(s::StringIO{T}, i::Int)
#p = position(s.io)
seek(s.io, max(i - 1, 0))
x = read(s.io, MAX_WIDTH)
#seek(s.io, p)
c, j = next(String(x), 1)
(c, i + j - 1)
end
function Base.prevind{T}(s::StringIO{T}, i::Int)
#p = position(s.io)
seek(s.io, max(i - 1 - MAX_WIDTH, 0))
x = read(s.io, 8)
#seek(s.io, p)
j = prevind(String(x), min(i, MAX_WIDTH + 1))
i + j - min(i, MAX_WIDTH + 1)
end
function Base.getindex{T}(s::StringIO{T}, r::UnitRange{Int})
#p = position(s.io)
i, j = prevind(s, first(r)), nextind(s, last(r))
seek(s.io, max(i - 1, 0))
x = read(s.io, j - i - 1)
#seek(s.io, p)
String(x)
end
using PooledArrays
using TextParse
@inline function TextParse.alloc_string(str::StringIO, r::TextParse.StrRange)
str[(r.offset + 2):(r.length + r.offset + 1)]
end
@inline function TextParse.nonallocating_setindex!{T}(pa::PooledArrays.PooledArray{T}, i, rng::TextParse.StrRange, str::StringIO)
wstr = str[(rng.offset + 2):(rng.length + rng.offset + 1)]
pool_idx = findfirst(pa.pool, wstr)
if pool_idx <= 0
# allocate only here.
val = convert(T, wstr)
pool_idx = PooledArrays.unsafe_pool_push!(pa, val)
end
pa.refs[i] = pool_idx
end
function TextParse.csvread(file::StringIO, delim=','; kwargs...)
TextParse._csvread(file, delim; kwargs...)
end
s = StringIO(Gzip.open("foo.txt.gz"))
csvread(s, '\t')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment