Dave Tang davetang

## plot_gemini.sh
#!/bin/bash

if type -P gemini > /dev/null

   then
      for SEVERITY in HIGH MED LOW; do
         gemini query -q "select impact_so, count(impact_so) from variants where impact_severity == \"$SEVERITY\" group by impact_so order by count(impact_so)" --header *.db > $SEVERITY.tsv
         plot_gemini.R $SEVERITY.tsv
         rm -f $SEVERITY.tsv
      done

## plot_gemini.R
#!/usr/bin/env Rscript
#
# Usage: plot_gemini.R <file.tsv>
#

my_required <- c('ggplot2', 'reshape2', 'ggthemes')
for (my_package in my_required){
   if(my_package %in% rownames(installed.packages()) == FALSE){
      stop(paste("Please install", my_package, "first"))
   }

## get_sequence.R
#I want to fetch sequences from
#my_random_loci and my_refseq_tss
head(my_random_loci,2)
    chr    start      end strand
1 chr18 59415403 59415407      +
2 chr22  8535632  8535636      -

#install if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("BSgenome.Hsapiens.UCSC.hg19")

## hpo_to_term.pl
#!/usr/bin/env perl

# Script to output names and synonyms of HPO terms

use strict;
use warnings;

my $usage = "Usage: $0 <HPO term> [HPO terms]\n";

if (scalar(@ARGV) == 0){

## text_to_hpo_term.pl
#!/usr/bin/env perl

# Strings (the query) present on each line of a file are matched to Human Phenotype Ontology (HPO) terms (the subject)
# If a direct match between the query and subject could not be found, a global alignment is performed
# Alignments will only take place between queries and subjects that are not longer than each
# other by a length of 5 characters (including spaces)
# For example, 'short' and 'microphones' differ by a length of 6 and will not be compared
# The terms 'short' and 'computer' will be aligned because they differ by a length of 3
# Change $threshold if you want to change the length difference threshold

## split_chr.pl
#!/bin/env perl

use strict;
use warnings;

#hash for filehandles
my %fh = ();

#read from stream
while (<>){

## random_bed.pl
#!/bin/env perl

use strict;
use warnings;

my $usage = "Usage: $0 <infile.bed>\n";
my $infile = shift or die $usage;

my %bed = ();

## copy_directory.pl
#!/bin/env perl

use strict;
use warnings;

my $usage = "Usage: $0 <old_dir> <new_dir>\n";
my $old = shift or die $usage;
my $new = shift or die $usage;

my %current = ();

## transfac_to_tess.pl
#!/bin/env perl

use strict;
use warnings;

my $usage = "Usage: $0 <matrix.dat>\n";
my $infile = shift or die $usage;

my $accession = '';
my $start = 0;

## random_forest.R
#install if necessary
install.packages("randomForest")

#load library
library(randomForest)

#I have two sets of dinucleotide counts stored in
#my_random_loci_seq_di and my_refseq_tss_seq_di

head(my_refseq_tss_seq_di,2)
	#!/bin/bash

	if type -P gemini > /dev/null

	then
	for SEVERITY in HIGH MED LOW; do
	gemini query -q "select impact_so, count(impact_so) from variants where impact_severity == \"$SEVERITY\" group by impact_so order by count(impact_so)" --header *.db > $SEVERITY.tsv
	plot_gemini.R $SEVERITY.tsv
	rm -f $SEVERITY.tsv
	done
	#!/usr/bin/env Rscript
	#
	# Usage: plot_gemini.R <file.tsv>
	#

	my_required <- c('ggplot2', 'reshape2', 'ggthemes')
	for (my_package in my_required){
	if(my_package %in% rownames(installed.packages()) == FALSE){
	stop(paste("Please install", my_package, "first"))
	}
	#I want to fetch sequences from
	#my_random_loci and my_refseq_tss
	head(my_random_loci,2)
	chr start end strand
	1 chr18 59415403 59415407 +
	2 chr22 8535632 8535636 -

	#install if necessary
	source("http://bioconductor.org/biocLite.R")
	biocLite("BSgenome.Hsapiens.UCSC.hg19")
	#!/usr/bin/env perl

	# Script to output names and synonyms of HPO terms

	use strict;
	use warnings;

	my $usage = "Usage: $0 <HPO term> [HPO terms]\n";

	if (scalar(@ARGV) == 0){
	#!/usr/bin/env perl

	# Strings (the query) present on each line of a file are matched to Human Phenotype Ontology (HPO) terms (the subject)
	# If a direct match between the query and subject could not be found, a global alignment is performed
	# Alignments will only take place between queries and subjects that are not longer than each
	# other by a length of 5 characters (including spaces)
	# For example, 'short' and 'microphones' differ by a length of 6 and will not be compared
	# The terms 'short' and 'computer' will be aligned because they differ by a length of 3
	# Change $threshold if you want to change the length difference threshold
	#!/bin/env perl

	use strict;
	use warnings;

	#hash for filehandles
	my %fh = ();

	#read from stream
	while (<>){
	#install if necessary
	install.packages("randomForest")

	#load library
	library(randomForest)

	#I have two sets of dinucleotide counts stored in
	#my_random_loci_seq_di and my_refseq_tss_seq_di

	head(my_refseq_tss_seq_di,2)