Skip to content

Instantly share code, notes, and snippets.

@thautwarm
Created May 10, 2020 15:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thautwarm/5cd15424d93c35117166185c5383e044 to your computer and use it in GitHub Desktop.
Save thautwarm/5cd15424d93c35117166185c5383e044 to your computer and use it in GitHub Desktop.
time efficient julia string
struct AString
buf::Vector{Char}
end
function convert(::Type{AString}, str::String)
n = ncodeunits(str)
buf = Char[]
i = 1
while i <= n
chr = str[i]::Char
push!(buf, chr)
i += ncodeunits(chr)
end
AString(buf)
end
@generated function control_char(::Val{Ch}) where {Ch}
Meta.parse(String(['\'', '\\', Ch, '\'']))
end
macro a_str(s::String)
chs = Char[]
n = ncodeunits(s)
i = 1
quoted = false
while i <= n
if quoted
if s[i] === '\\'
push!(chs, '\\')
i += 1
else
push!(chs, control_char(Val(s[i])))
i += ncodeunits(s[i])
end
quoted = false
elseif s[i] === '\\'
quoted = true
i += 1
else
push!(chs, s[i])
i += ncodeunits(s[i])
end
end
AString(chs)
end
function Base.show(io::IO, a::AString)
print(io, '\"')
for c in a.buf
if c === '\"'
print(io, '\\')
end
print(io, c)
end
print(io, '\"')
end
@eval function Base.$(:(==))(x::AString, y::AString)
x.buf == y.buf
end
Base.length(a::AString) = length(a.buf)
function KMPSearch(pat::AString, txt::AString)
M = length(pat)
N = length(txt)
lps = fill(1, M)
j = 1
computeLPSArray!(pat, M, lps)
i = 1
indices = Int32[]
while i <= N
if pat.buf[j] === txt.buf[i]
i += 1
j += 1
end
if j === M + 1
push!(indices, i - j + 1)
j = 1
elseif i <= N && pat.buf[j] !== txt.buf[i]
if j !== 1
j = lps[j-1]
else
i += 1
end
end
end
indices
end
function computeLPSArray!(pat, M, lps)
len = 1 # length of the previous longest prefix suffix
i = 2
while i <= M
if pat.buf[i] == pat.buf[len]
len += 1
lps[i] = len
i += 1
else
if len != 1
len = lps[len-1]
else
lps[i] = 1
i += 1
end
end
end
end
function split′(s::AString, sep::AString)
indices = KMPSearch(sep, s)
n_sep = length(sep)
if isempty(indices)
AString[s]
else
res = Vector{AString}(undef, length(indices) + 1)
buf = s.buf
push!(indices, length(s) + 1)
last_i = 1
for i in eachindex(indices)
now = indices[i]
res[i] = AString(buf[last_i:now-1])
last_i = now + n_sep
end
res
end
end
function startswith′(a::AString, prefix::AString)
n1 = length(a)
n2 = length(prefix)
n1 < n2 && return false
all(a.buf[i] === prefix.buf[i] for i in 1:n2)
end
const a = ARGS[1]
function f()
z = 0
for l in eachline(a::String)
l = convert(AString, l)
if startswith′(l, a"##")
continue
else
d = split′(l, a"\t")
dd = d[10:end]
for i in 1:33
ddd = dd[i]
if split′(ddd, a":")[1] != a"./." && split′(ddd, a":")[1] != a"0/0"
z += length(l)
break
end
end
end
end
println(z)
end
@time f()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment