This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Rscript normalize_featurecounts.R counts_table.txt tpm ; | |
#Rscript normalize_featurecounts.R counts_table.txt rpkm ; | |
#Sample table### | |
#GeneID<TAB>sample1<TAB>sample2<TAB>sample3<TAB>Length | |
#Gene1<TAB>10<TAB>4<TAB>5<TAB>1500 | |
#Gene2<TAB>20<TAB>43<TAB>60<TAB>4300 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
echo `date`; | |
dir=(`dirname $1`) | |
base=(`basename $1`) | |
echo "1/7 Preparing files"; | |
cat $1 | sed 's/ "/\t/g' | sed 's/"; /\t/g' | cut -f1,4,5,7,10,12,16,18,22 | sed 's/";//' | awk '!x[$6]++' | awk '{print $6"\t"$0;}' > $dir/enst_annotation.tmp; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ./CheckPhredScale.sh <FASTQ_FILE> <NUMBER OF LINES TO CHECK> | |
echo "Shell:"; | |
zcat $1 | head -n $2 | awk '{if(NR%4==0) printf("%s",$0);}' | od -A n -t u1 | awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}}END{if(max<=74 && min<59) print "Phred+33"; else if(max>73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding";}'; | |
echo ""; | |
echo "python:"; | |
zcat $1 | awk 'NR % 4 == 0' | head -n $2 | python ./guess-encoding.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
$/=undef; | |
## Run script: perl CpG_feature_extraction.pl CpG_islands.bed shores/shelves (This gives warnings for 'rm' command, just ignore it) | |
# Reference for definition of shores and shelves from: | |
# http://www.mdpi.com/2073-4425/5/2/347/htm | |
# http://www.mdpi.com/genes/genes-05-00347/article_deploy/html/images/genes-05-00347-g001-1024.png | |
extend_coordinates($ARGV[0] ,$ARGV[1]); | |
sub extend_coordinates | |
{ | |
$type=$ARGV[1]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#USAGE: CI_normal(matrix[,c(x:y)],97.5) - multiple column from a normal distribution | |
#USAGE: CI_tdist(matrix[,c(x:y)],97.5) - multiple column from a t distribution | |
#USAGE: CI_normal(matrix[,x],97.5) - single column from a normal distribution | |
#USAGE: CI_tdist(matrix[,x],97.5) - single column from a t distribution | |
#Created and updated by Santhilal Subhash on 2016/06/07 | |
CI_normal <- function(li,stat) | |
{ | |
cat(paste0("CI","\t","column","\t","Lower.limit","\t","Upper.limit","\n")) | |
if(length(colnames(li))>1) | |
{ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This R script is to generate the TF or histone modification heatmap | |
# at certain genomic features (TSS, enhancers) from the ChIP-seq data | |
# the input matrix is got from Homer software. alternative to R, use cluster3 to cluster, and visualize by # java Treeviewer | |
# generate the matrix by Homer: annotatePeaks.pl peak_file.txt hg19 -size 6000 -hist 10 -ghist -d TF1/ # > outputfile_matrix.txt | |
# see several posts for heatmap: | |
# http://davetang.org/muse/2010/12/06/making-a-heatmap-with-r/ | |
# http://www.r-bloggers.com/r-using-rcolorbrewer-to-colour-your-figures-in-r/ | |
# 08/20/13 by Tommy Tang | |
# it is such a simple script but took me several days to get it work...I mean the desired |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#The X axis is -3 kb to 3 kb around TSS. | |
#It turns out that the heatmap.2 function use default hclust ( Hierachical Clustering) to cluster the #matrix. | |
#alternatively, we can use K-means clustering to cluster the data and to see what's the pattern look like. | |
km<- kmeans(m1,2) # determine how many cluster you want, I specify 2 here | |
m1.kmeans<- cbind(m1, km$cluster) # combine the cluster with the matrix | |
dim(m1.kmeans) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
m.row.sum<- cbind(m1, rowSums(m1)) | |
o1<- rev(order(m.row.sum[,602])) | |
m.row.sum<- m.row.sum[o1,] | |
bk = unique(c(seq(-0.1,3, length=100),seq(3,10.35,length=100))) | |
hmcols<- colorRampPalette(c("white","red"))(length(bk)-1) | |
pheatmap( m.row.sum[,1:601], cluster_rows = F, cluster_cols = F, col= hmcols, breaks = bk, legend=FALSE, show_rownames=FALSE, show_colnames=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chr1 processed_transcript exon 11869 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002234944"; | |
chr1 processed_transcript exon 12613 12721 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00003582793"; | |
chr1 processed_transcript exon 13221 14409 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002312635"; | |
chr1 transcribed_unprocessed_pseudogene exon 11872 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002234632"; | |
chr1 transcribed_unprocessed_pseudogene exon 12613 12721 . + . gene_id "ENSG000002 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ENSG00000223972 chr1 11869 14412 + DDX11L1 pseudogene | |
ENSG00000227232 chr1 14363 29806 - WASH7P pseudogene | |
ENSG00000243485 chr1 29554 31109 + MIR1302-11 lincRNA |
NewerOlder