Skip to content

Instantly share code, notes, and snippets.

@nievergeltlab
Created November 17, 2020 20:25
Show Gist options
  • Save nievergeltlab/6b15e5369183211a7bc68d50aca0a604 to your computer and use it in GitHub Desktop.
Save nievergeltlab/6b15e5369183211a7bc68d50aca0a604 to your computer and use it in GitHub Desktop.
#LC_ALL=C join <(awk '{print $1}' w_hm3.snplist.sorted) <(awk '{print $22,$1}' eur_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.txt.annotated | LC_ALL=C sort -k1b,1) | awk '{print $2,$1}' | LC_ALL=C sort -k1b,1 > w_hm3.snplist.linked
for anc in eur aam asn his # "$anc"
do
#important to use ref and alt1 for this!
cat vcf_header1b.txt vcf_header2.txt <(awk '{OFS="\t"}{if (NR>1) print $1,$2, ".", $4,$5 , "100", "PASS", "MVP="$3}' "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa | sort -n -k1r,1 -k2,2 ) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf
bcftools view "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf -Oz -o "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz
tabix -f "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz
#Some will not be annotated, need to identify these for my merge steps..
bcftools annotate -a All_20180423.vcf.gz -c INFO "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz -o "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated
echo "ID SNP" | awk 'BEGIN {OFS="\t"} {print $1,$2}' > snpheader.txt
tail -n+82 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated | grep RS | awk '{print $8}' | awk 'BEGIN {FS=";"}{OFS="\t"} {print $1,$2}' | sed 's/RS=/rs/g' | sed 's/MVP=//g' | cat snpheader.txt - | LC_ALL=C sort -k1b,1 > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.success
tail -n+82 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated | grep -v RS | awk '{print $8}' | awk 'BEGIN {FS=";"}{OFS="\t"} {print $1,$1}' | sed 's/MVP=//g' | cat snpheader.txt - | LC_ALL=C sort -k1b,1 > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.failed
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.success
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.failed
LC_ALL=C join -1 1 -2 3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.success <(LC_ALL=C sort -k3b,3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.success
LC_ALL=C join -1 1 -2 3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.failed <(LC_ALL=C sort -k3b,3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.failed
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.success
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.failed
cat "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.success "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.failed | grep -v 2:27849893:CTGT | sort -g -k 22 | awk '{if(NR>1) print $2,$3,$4,$8,$9,$10,$13,$16,$17,$18,$21,$22}' | gzip > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.fuma.gz
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment