Created
December 1, 2011 22:02
-
-
Save brentp/1420220 to your computer and use it in GitHub Desktop.
trim fastq reads based on quality with luajit.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
-- trim fastq reads from stdin e.g.: | |
-- $ luajit trim.lua 15 25 < input.fastq > output.trimmed.fastq | |
-- will trim 5' bases with a quality below 15 and only print reads | |
-- that are longer than 25 bases after trimming. | |
-- IN PROGRESS: adaptive trimming... | |
--]] | |
local function set_quality(t) | |
-- turn the quality string into numbers | |
local qs = t[4] | |
local quals = t[5] | |
for i=1,#qs do | |
quals[i] = qs:byte(i) - 33 | |
end | |
-- in case previous qual string was shorter... | |
while quals[#qs + 1] do | |
table.remove(quals, #qs + 1) | |
end | |
end | |
local function get_record(t) | |
-- read data in to t{} | |
for i = 1, 4 do | |
t[i] = io.read() | |
end | |
if t[4] == nil then | |
os.exit(0) | |
end | |
-- always just update entries in the table | |
set_quality(t) | |
end | |
local function trim_record(t, cutoff) | |
local quals = t[5] | |
local i=#quals | |
while quals[i] < cutoff and i ~= 1 do | |
i = i - 1 | |
end | |
-- trim seq and quals to that index. | |
t[4] = t[4]:sub(1, i) | |
t[2] = t[2]:sub(1, i) | |
end | |
--[[ use a moving window approach. --]] | |
local function trim_adaptive_right(t, cutoff, window_len) | |
local quals = t[5] | |
local L = #quals | |
-- multiply here and use sum inside window (wsum), rather than average. | |
cutoff = cutoff * window_len | |
wsum = 0 | |
for i= L - window_len, L do | |
wsum = wsum + quals[i] | |
end | |
i = L | |
-- TODO center on i ? | |
while i - window_len ~= 1 and wsum < cutoff do | |
i = i - 1 | |
wsum = wsum - quals[i] -- take out the right-most | |
wsum = wsum + quals[i - window_len] -- add the left-most | |
end | |
t[4] = t[4]:sub(1, i + 1) | |
t[2] = t[2]:sub(1, i + 1) | |
end | |
local function print_record(t, min_len) | |
if #t[4] < min_len then return end | |
print(table.concat(t, "\n", 1, 4)) | |
end | |
function main(min_qual, min_len) | |
-- only create the table once. | |
local t = {true, nil, nil, nil, {}} | |
while t[1] do | |
get_record(t) | |
trim_adaptive_right(t, 15, 5) | |
--trim_record(t, min_qual) | |
print_record(t, min_len) | |
end | |
end | |
main(tonumber(arg[1]), tonumber(arg[2])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
if you use this for something other than streaming, make sure not to re-use t as in main above.