Adam X. Maihofer nievergeltlab

## ldsc_results_extractor.sh

for files in $(ls | grep log | grep -v munge | grep ldsc)
do
 h2snp=$(cat $files | grep "Observed scale" | sed 's/(//g' | sed 's/)//g' | awk '{print $5,$6, $5/$6}')
 intercept=$(cat $files | grep "Intercept" | sed 's/(//g' | sed 's/)//g' | awk '{print $2,$3, ($2-1)/$3}' )
 ratio=$(cat $files | grep "Ratio" | sed 's/(//g' | sed 's/)//g' | awk '{print $2,$3, $2/$3}' )
 lambda=$(cat $files | grep "Lambda" | sed 's/(//g' | sed 's/)//g' | awk '{print $3}' )
 chisq=$(cat $files | grep "Mean Chi" | sed 's/(//g' | sed 's/)//g' | awk '{print $3}' )
 outname=$(echo $files | sed 's/.log//g')

## replicate_region.r
d1$replicated <- NA
#All 3 must be true
for (i in 1:dim(d1)[1])
{
 d1[i,]$replicated <- any(d1[i,]$chr == d2$CHR & d1[i,]$pos >= d2$START & d1[i,]$pos <= d2$STOP)
 }


## nb_logodds.r
#Y is an Nx1 vector of the phenotype
#x is a NxN matrix of covariates
#b is beta (supplied by optim)
#d is a dispersion parameter (supplied by optim)
likfun1 <- function(y,x,theta)
{
 #the beta parameters will be the first N-1 inputs supplied by optim, dispersion the last
 b=theta[-length(theta)]
 d=theta[length(theta)]
 loglik1 <- sum ( log(gamma(y+d)) -log(gamma(y + 1)) - log(gamma(d)) +

## get_ns.r
for file in $(ls pheno | grep .pheno)
do
 fname=$(echo $file | awk 'BEGIN {FS="_"}{print $2}')
 fname2=$(echo $file | awk 'BEGIN {FS="_"}{print $3}' | sed 's/.pheno//g')

# scount=$(awk '{print $3}' pheno/$file | grep -v NA | wc -l | awk '{print $1}')
 scount=$(LC_ALL=C join <(awk '{print $1"_"$2,$3}' pheno/$file  | LC_ALL=C sort -k1b,1 ) <( awk '{print $1"_"$2}' pheno/p2_"$fname"_eur_"$fname2"_agesex.cov | LC_ALL=C sort -k1b,1) | awk '{print $2}' | grep -v NA | wc -l | awk '{print $1}')

 echo $fname $fname2 $scount
 head -n1 pheno/$file

## 0_forest_plot.r
args <- commandArgs(trailingOnly = TRUE)
 snpresults <- args[1]
 header_classes <- args[2]
 descriptor <-args[3]
 outfile <- args[4]


# snpresults="results_filtered/forest_plots/rs10266297"
# header_classes="results_filtered/forest_plots/header_classes.csv"
# descriptor="results_filtered/forest_plots/forestplot_descriptor.csv"

## optimal_p.r
library(MASS)
library(data.table)
require(foreign)
require(nnet)
require(ggplot2)
require(reshape2)
library(lmtest)
#if it complains about this package:
 #install.packages('zoo',repos="https://cran.r-project.org")
 #install.packages('lmtest',repos="https://cran.r-project.org")

## job_example.sh

inc1=0
inc2=0
while [ $inc1 -le 100 ]
do
 inc1=$(($inc2 + 1))
 inc2=$((inc1 + 5))
 echo $inc1 $inc2
 for i in $(seq $inc1 1 $inc2 )
 do

## 00_launch_plink.sh
   for geno in $(ls genotypes | grep gz )
    do
    echo running $geno
    date
    Rscript 00_plinkR_v2.txt  dos_pts_mrsc_mix_am-qc.hg19.ch.fl.chr15_093_103.out.dosage.fam \
                             1_MRSC_C_V2_2.5_PCL4.txt \
                             pts_mrsc_mix_am-qc-eur_pca.menv.mds_cov \
                             $geno \
                             "genotypes" \
                             "outputs" \

## prs_nl.r
library(MASS)
library(data.table)
require(foreign)
require(nnet)
require(ggplot2)
require(reshape2)
library(lmtest)
#if it complains about this package:
 #install.packages('zoo',repos="https://cran.r-project.org")
 #install.packages('lmtest',repos="https://cran.r-project.org")

## ukbb_gwas

#PLINK2 with dosages
for chr in 2 # {1..22}
do
 ./plink2 --bgen /mnt/ukbb/adam/bgen/ukb_imp_chr"$chr"_v3.bgen ref-first\
        --maf 0.01 \
        --geno 0.05 \
        --keep UKB_ptsd_eur_unrelated_m0_may13_2021.pheno \
        --sample /mnt/ukbb/adam/ptsd/preimputation_genotypes/ukb41209_imp_chr1_v3_s487320.sample \
        --pheno UKB_ptsd_eur_unrelated_m0_may13_2021.pheno --pheno-name PCL_sum_imp \

	for files in $(ls \| grep log \| grep -v munge \| grep ldsc)
	do
	h2snp=$(cat $files \| grep "Observed scale" \| sed 's/(//g' \| sed 's/)//g' \| awk '{print $5,$6, $5/$6}')
	intercept=$(cat $files \| grep "Intercept" \| sed 's/(//g' \| sed 's/)//g' \| awk '{print $2,$3, ($2-1)/$3}' )
	ratio=$(cat $files \| grep "Ratio" \| sed 's/(//g' \| sed 's/)//g' \| awk '{print $2,$3, $2/$3}' )
	lambda=$(cat $files \| grep "Lambda" \| sed 's/(//g' \| sed 's/)//g' \| awk '{print $3}' )
	chisq=$(cat $files \| grep "Mean Chi" \| sed 's/(//g' \| sed 's/)//g' \| awk '{print $3}' )
	outname=$(echo $files \| sed 's/.log//g')
	d1$replicated <- NA
	#All 3 must be true
	for (i in 1:dim(d1)[1])
	{
	d1[i,]$replicated <- any(d1[i,]$chr == d2$CHR & d1[i,]$pos >= d2$START & d1[i,]$pos <= d2$STOP)
	}
	#Y is an Nx1 vector of the phenotype
	#x is a NxN matrix of covariates
	#b is beta (supplied by optim)
	#d is a dispersion parameter (supplied by optim)
	likfun1 <- function(y,x,theta)
	{
	#the beta parameters will be the first N-1 inputs supplied by optim, dispersion the last
	b=theta[-length(theta)]
	d=theta[length(theta)]
	loglik1 <- sum ( log(gamma(y+d)) -log(gamma(y + 1)) - log(gamma(d)) +
	for file in $(ls pheno \| grep .pheno)
	do
	fname=$(echo $file \| awk 'BEGIN {FS="_"}{print $2}')
	fname2=$(echo $file \| awk 'BEGIN {FS="_"}{print $3}' \| sed 's/.pheno//g')

	# scount=$(awk '{print $3}' pheno/$file \| grep -v NA \| wc -l \| awk '{print $1}')
	scount=$(LC_ALL=C join <(awk '{print $1"_"$2,$3}' pheno/$file \| LC_ALL=C sort -k1b,1 ) <( awk '{print $1"_"$2}' pheno/p2_"$fname"_eur_"$fname2"_agesex.cov \| LC_ALL=C sort -k1b,1) \| awk '{print $2}' \| grep -v NA \| wc -l \| awk '{print $1}')

	echo $fname $fname2 $scount
	head -n1 pheno/$file
	args <- commandArgs(trailingOnly = TRUE)
	snpresults <- args[1]
	header_classes <- args[2]
	descriptor <-args[3]
	outfile <- args[4]


	# snpresults="results_filtered/forest_plots/rs10266297"
	# header_classes="results_filtered/forest_plots/header_classes.csv"
	# descriptor="results_filtered/forest_plots/forestplot_descriptor.csv"
	library(MASS)
	library(data.table)
	require(foreign)
	require(nnet)
	require(ggplot2)
	require(reshape2)
	library(lmtest)
	#if it complains about this package:
	#install.packages('zoo',repos="https://cran.r-project.org")
	#install.packages('lmtest',repos="https://cran.r-project.org")

	inc1=0
	inc2=0
	while [ $inc1 -le 100 ]
	do
	inc1=$(($inc2 + 1))
	inc2=$((inc1 + 5))
	echo $inc1 $inc2
	for i in $(seq $inc1 1 $inc2 )
	do
	for geno in $(ls genotypes \| grep gz )
	do
	echo running $geno
	date
	Rscript 00_plinkR_v2.txt dos_pts_mrsc_mix_am-qc.hg19.ch.fl.chr15_093_103.out.dosage.fam \
	1_MRSC_C_V2_2.5_PCL4.txt \
	pts_mrsc_mix_am-qc-eur_pca.menv.mds_cov \
	$geno \
	"genotypes" \
	"outputs" \

	#PLINK2 with dosages
	for chr in 2 # {1..22}
	do
	./plink2 --bgen /mnt/ukbb/adam/bgen/ukb_imp_chr"$chr"_v3.bgen ref-first\
	--maf 0.01 \
	--geno 0.05 \
	--keep UKB_ptsd_eur_unrelated_m0_may13_2021.pheno \
	--sample /mnt/ukbb/adam/ptsd/preimputation_genotypes/ukb41209_imp_chr1_v3_s487320.sample \
	--pheno UKB_ptsd_eur_unrelated_m0_may13_2021.pheno --pheno-name PCL_sum_imp \