Last active
November 4, 2023 13:36
-
-
Save iam28th/418dc7d5048067af194a76ffb5840c90 to your computer and use it in GitHub Desktop.
Shuffle PE fastq files with preserved pairing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Usage: ./shuf_pe.sh <reads1> <reads2> | |
# The output files 1_shuffled.fastq and 1_shuffled.fastq | |
# contain shuffled reads with preserved pairing. | |
# For shuffling PE reads: | |
# https://gist.github.com/iam28th/49a245427ea2b8ed5f1f9889c13468bf | |
# remove files with output filenames if exist | |
# because awk will append to them | |
rm -f 1_shuffled.fastq | |
rm -f 2_shuffled.fastq | |
# interleave lines from forward and reversed files | |
paste -d '\n' $1 $2 | \ | |
awk '{ | |
# read 8 lines | |
lines[1] = $0; | |
for (i = 2; i <= 8; ++i) | |
getline lines[i]; | |
# and print them tab-separated on a single line | |
for (i = 1; i <= 8; ++i) | |
printf("%s%s", lines[i], i == 8 ? "'"\n"'" : "'"\t"'") | |
}' | \ | |
# shuffle | |
shuf | \ | |
# replace all tabs back to newlines | |
tr '\t' '\n' | \ | |
# print odd and even lines to different files | |
awk 'NR % 2 == 1 { print >> "1_shuffled.fastq" } | |
NR % 2 == 0 { print >> "2_shuffled.fastq" }' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment