Created
November 17, 2020 20:25
-
-
Save nievergeltlab/6b15e5369183211a7bc68d50aca0a604 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#LC_ALL=C join <(awk '{print $1}' w_hm3.snplist.sorted) <(awk '{print $22,$1}' eur_broad_mar172020_allchr.broad_tinnitus.maf01.resultsa.txt.annotated | LC_ALL=C sort -k1b,1) | awk '{print $2,$1}' | LC_ALL=C sort -k1b,1 > w_hm3.snplist.linked | |
for anc in eur aam asn his # "$anc" | |
do | |
#important to use ref and alt1 for this! | |
cat vcf_header1b.txt vcf_header2.txt <(awk '{OFS="\t"}{if (NR>1) print $1,$2, ".", $4,$5 , "100", "PASS", "MVP="$3}' "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa | sort -n -k1r,1 -k2,2 ) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf | |
bcftools view "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf -Oz -o "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz | |
tabix -f "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz | |
#Some will not be annotated, need to identify these for my merge steps.. | |
bcftools annotate -a All_20180423.vcf.gz -c INFO "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz -o "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated | |
echo "ID SNP" | awk 'BEGIN {OFS="\t"} {print $1,$2}' > snpheader.txt | |
tail -n+82 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated | grep RS | awk '{print $8}' | awk 'BEGIN {FS=";"}{OFS="\t"} {print $1,$2}' | sed 's/RS=/rs/g' | sed 's/MVP=//g' | cat snpheader.txt - | LC_ALL=C sort -k1b,1 > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.success | |
tail -n+82 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated | grep -v RS | awk '{print $8}' | awk 'BEGIN {FS=";"}{OFS="\t"} {print $1,$1}' | sed 's/MVP=//g' | cat snpheader.txt - | LC_ALL=C sort -k1b,1 > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.failed | |
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa | |
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated | |
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.success | |
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.failed | |
LC_ALL=C join -1 1 -2 3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.success <(LC_ALL=C sort -k3b,3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.success | |
LC_ALL=C join -1 1 -2 3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.vcf.gz.annotated.failed <(LC_ALL=C sort -k3b,3 "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa) > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.failed | |
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.success | |
wc -l "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.failed | |
cat "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.success "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.failed | grep -v 2:27849893:CTGT | sort -g -k 22 | awk '{if(NR>1) print $2,$3,$4,$8,$9,$10,$13,$16,$17,$18,$21,$22}' | gzip > "$anc"_broad_mar172020_allchr.hearing.maf01.resultsa.fuma.gz | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment