Skip to content

Instantly share code, notes, and snippets.

@brentp
Created December 1, 2011 22:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brentp/1420220 to your computer and use it in GitHub Desktop.
Save brentp/1420220 to your computer and use it in GitHub Desktop.
trim fastq reads based on quality with luajit.
--[[
-- trim fastq reads from stdin e.g.:
-- $ luajit trim.lua 15 25 < input.fastq > output.trimmed.fastq
-- will trim 5' bases with a quality below 15 and only print reads
-- that are longer than 25 bases after trimming.
-- IN PROGRESS: adaptive trimming...
--]]
local function set_quality(t)
-- turn the quality string into numbers
local qs = t[4]
local quals = t[5]
for i=1,#qs do
quals[i] = qs:byte(i) - 33
end
-- in case previous qual string was shorter...
while quals[#qs + 1] do
table.remove(quals, #qs + 1)
end
end
local function get_record(t)
-- read data in to t{}
for i = 1, 4 do
t[i] = io.read()
end
if t[4] == nil then
os.exit(0)
end
-- always just update entries in the table
set_quality(t)
end
local function trim_record(t, cutoff)
local quals = t[5]
local i=#quals
while quals[i] < cutoff and i ~= 1 do
i = i - 1
end
-- trim seq and quals to that index.
t[4] = t[4]:sub(1, i)
t[2] = t[2]:sub(1, i)
end
--[[ use a moving window approach. --]]
local function trim_adaptive_right(t, cutoff, window_len)
local quals = t[5]
local L = #quals
-- multiply here and use sum inside window (wsum), rather than average.
cutoff = cutoff * window_len
wsum = 0
for i= L - window_len, L do
wsum = wsum + quals[i]
end
i = L
-- TODO center on i ?
while i - window_len ~= 1 and wsum < cutoff do
i = i - 1
wsum = wsum - quals[i] -- take out the right-most
wsum = wsum + quals[i - window_len] -- add the left-most
end
t[4] = t[4]:sub(1, i + 1)
t[2] = t[2]:sub(1, i + 1)
end
local function print_record(t, min_len)
if #t[4] < min_len then return end
print(table.concat(t, "\n", 1, 4))
end
function main(min_qual, min_len)
-- only create the table once.
local t = {true, nil, nil, nil, {}}
while t[1] do
get_record(t)
trim_adaptive_right(t, 15, 5)
--trim_record(t, min_qual)
print_record(t, min_len)
end
end
main(tonumber(arg[1]), tonumber(arg[2]))
@brentp
Copy link
Author

brentp commented Oct 18, 2012

if you use this for something other than streaming, make sure not to re-use t as in main above.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment