This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Looking up p-values in the decasted table: | |
get_pv_cond(){ | |
trait="$1" | |
gene="$2" | |
fgrep -e ${trait}-${gene} -e tp /lustre/scratch115/realdata/mdt0/projects/t144_helic_15x/analysis/HP/burdentesting/alltraits/merged_missing/Pooled.dcast.txt | ~ag15/transpose.sh | sort -k2,2nr | column -t; | |
} | |
# Testing if two variables are in LD in our dataset | |
# 1) a bcftools view is piped into a plink command, as there is no plink files generated from the POMAK dataset so far. | |
# 2) the output of the bcftools has to be adjusted as in the vcf file there is no IDs for the variants required by plink. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### | |
### Conditionals | |
### | |
if [[ -d "${DIRECTORY}" ]]; then ... fi # Returnes true if the directory exists | |
if [[ ! -d "${DIRECTORY}" ]]; then ... fi # Returnes true if the directory does not exists | |
if [[ ! -e "${file}" ]]; then ... fi # Returnes true if the file/directory does not exists | |
if [[ -z ${variable} ]]; then ... fi # Returns true if variable is not set. | |
if [[ $? -ne 0 ]]; then .. fi # Returns true if the previous command has failed. | |
# Create directory if not already exists: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vcfFile="${1}" # Has to be a properly formatted vcf file. Only the first five columns will be used. The ID will be lost. | |
refFasta="${2}" # A reference, indexable fasta file downloaded eg. from Ensembl: | |
# curl -s ftp://ftp.ensembl.org/pub/grch37/release-90/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz | gunzip > Homo_sapiens.GRCh37.dna.primary_assembly.fa | |
nucBed -fi ${refFasta} -bed <( grep -v "#" $vcfFile | awk '{printf "%s\t%s\t%s\t%s/%s\n", $1, $2 -1, $2, $4, $5}' | sed -e 's/chr//' ) -seq | head | grep -v "#" | perl -lane 'print join "\t", qw(#CHROM POS ID REF ALT) if $. == 1; ($ref, $alt) = split("/", $F[3]); ($ref, $alt) = ($alt, $ref) if $ref ne $F[13]; printf "chr%s\t%s\t.\t%s\t%s\n", $F[0], $F[2], $ref, $alt' > ${vcfFile/.vcf/.aligned.vcf} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generating qq plots from p-values: | |
qqplot(-log10(ppoints(length(-log10(pvals)))), -log10(pvals), xlab = "expected", ylab = "observed") | |
# Fine tuning colors: | |
library(grDevices) | |
adjustcolor( 'firebrick', alpha.f = 0.2) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## Based on a trait name and the corresponding p-values, a QQ plot is generated with a | |
## marked genomic inflation factor. | |
## | |
QQ_plot = function(pvals, trait){ | |
mlogpv = -log10(pvals); # Observed data | |
mlogpv = mlogpv[! is.na(mlogpv)] # Remving Na-s | |
mlogexp = -log10(qunif(ppoints(length(pvals)))); # Generating expected data | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get overlapping protein-coding genes for a set of rsIDs: | |
variants="rs6796106 | |
rs191957455 | |
rs41264153 | |
rs145265828" | |
# This script loops through a set of variants and returns the overlapping gene(s): | |
for v in $variants ; do wget -q http://ec2-54-91-140-228.compute-1.amazonaws.com:5000/variation/human/${v}.json -O - | jq -r '"\(.name) \(.mappings[0].location)"'; done | while read rsID location; do genes=$(wget -q "http://ec2-54-91-140-228.compute-1.amazonaws.com:5000/overlap/region/human/${location}?feature=gene&content-type=application/json" -O - | jq -r '[ .[] | select(.biotype == "protein_coding").external_name ] | join(",")') ; echo $rsID $genes; done | |
# This script looks up a variant in the eQTL database (through the Ensembl REST API), and returns all the genes in which the p-value was lower than1e-9 (Also returns the gene names as well from a separate REST query) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Find failed lsf jobs based on the output logs, also why did it failed: | |
find . -type f -name "*.o" -exec grep -L "Successfully completed" {} \; | xargs grep TERM | |
# Extract failed jobs, restart: | |
find . -type f -name "*.o" -exec grep -L "Successfully completed" {} \; | while read f; do | |
command=$(echo $f | xargs -I {} grep -H -A1 "User input" {} | sed -r -e 's/ +/ /g' | grep -v "#" | cut -d- -f2-) | |
chunk=$(grep -i "Subject: Job" $f | perl -lane '$_ =~ /\d+\[(\d+)\]/; print $1') | |
echo "$command -c $chunk" | |
done | ~ag15/array 5g restart |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A bash function to retrieve COSMIC variants (ID, location(b38), consequence and phenotype) based on | |
# genomic location using Ensembl's REST API. | |
# The first solution uses only bash's wget and Perl with using the JSON package, | |
# while the second, more compact solution utilizes jq: https://stedolan.github.io/jq/manual/ | |
# The output of the solutions are expected to be the same. | |
# Without jq: | |
function get_COSMIC { wget -q "http://rest.ensembl.org/overlap/region/human/${1}?feature=somatic_variation;content-type=application/json" -O - | perl -MJSON -lane 'foreach $v (@{decode_json($_)}){ print join "\t", $v->{id} if $v->{source} == "COSMIC"}' | while read ID; do wget -q "http://rest.ensembl.org/variation/human/${ID}?content-type=application/json&phenotypes=1" -O - | perl -MJSON -lane '$v = decode_json($_); $id = $v->{name}; $loc = $v->{mappings}->[0]->{location}; $cons = $v->{most_severe_consequence}; my @pheno = (); foreach $p (@{$v->{phenotypes}}){push @pheno, $p->{trait}}; $pheno = join "|", @pheno; $pheno |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getInteractiveJob (){ | |
MEMLIM=1000 | |
DEFAULTSHELL=$(which bash) | |
if [[ ! -z "${1}" ]]; then | |
MEMLIM=$(( $1 * 1000 )) | |
fi | |
# Submitting a request to get interactive shell: | |
bsub -M${MEMLIM} -R"select[mem>${MEMLIM}] rusage[mem=${MEMLIM}]" -Ip "${DEFAULTSHELL}" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
awk ' | |
{ | |
for (i=1; i<=NF; i++) { | |
a[NR,i] = $i | |
} | |
} | |
NF>p { p = NF } | |
END { | |
for(j=1; j<=p; j++) { |
OlderNewer