Skip to content

Instantly share code, notes, and snippets.

@jelber2
Last active March 7, 2024 09:45
Show Gist options
  • Save jelber2/53a95d44667cf1a8d84af6cde6931817 to your computer and use it in GitHub Desktop.
Save jelber2/53a95d44667cf1a8d84af6cde6931817 to your computer and use it in GitHub Desktop.
Stitch FASTQ reads shredded with shred.sh from BBTools back together
# tested on Julialang v.1.10.2 , DataStructures v0.18.16, and FASTX v2.1.4
# Usage
# $ julia stitch-fastq.jl chr20.herro.fasta.Q30.recal.shred.fastq > chr20.herro.fasta.Q30.recal.fastq
#
import Pkg; Pkg.add("FASTX")
import Pkg; Pkg.add("DataStructures")
using DataStructures
using FASTX
function process_fastq_file(filename::String)
reader = FASTQReader(open(filename))
sequencesS = Dict{String, String}()
sequencesQ = Dict{String, String}()
sequences1 = Dict{String, String}()
sequences2 = Dict{String, String}()
for record in reader
# Get identifier
main_id = string(identifier(record))
seq = sequence(record)
qual = quality(record)
sequencesS[main_id] = seq
sequencesQ[main_id] = qual
end
close(reader)
sorted_pairs1 = OrderedDict(sort(collect(pairs(sequencesS))))
sorted_pairs2 = OrderedDict(sort(collect(pairs(sequencesQ))))
for (id, seq) in sorted_pairs1
main_id = split(id, '_')[1]
# Aggregate sequences by the main identifier
if haskey(sequences1, main_id)
sequences1[main_id] *= seq
else
sequences1[main_id] = seq
end
end
for (id, qual) in sorted_pairs2
main_id = split(id, '_')[1]
# Aggregate sequences by the main identifier
if haskey(sequences2, main_id)
sequences2[main_id] *= qual
else
sequences2[main_id] = qual
end
end
# Print the aggregated sequences
for id in keys(sequences1)
seq = sequences1[id]
qual = sequences2[id]
println("@$id")
println(seq)
println("+")
println(qual)
end
end
# Check if a filename is passed as a command line argument
if length(ARGS) < 1
println("Usage: julia ", PROGRAM_FILE, " <filename.fastq>")
return
end
# Example usage with command line argument
process_fastq_file(ARGS[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment