This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Rscript normalize_featurecounts.R counts_table.txt tpm ; | |
#Rscript normalize_featurecounts.R counts_table.txt rpkm ; | |
#Sample table### | |
#GeneID<TAB>sample1<TAB>sample2<TAB>sample3<TAB>Length | |
#Gene1<TAB>10<TAB>4<TAB>5<TAB>1500 | |
#Gene2<TAB>20<TAB>43<TAB>60<TAB>4300 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ./CheckPhredScale.sh <FASTQ_FILE> <NUMBER OF LINES TO CHECK> | |
echo "Shell:"; | |
zcat $1 | head -n $2 | awk '{if(NR%4==0) printf("%s",$0);}' | od -A n -t u1 | awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}}END{if(max<=74 && min<59) print "Phred+33"; else if(max>73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding";}'; | |
echo ""; | |
echo "python:"; | |
zcat $1 | awk 'NR % 4 == 0' | head -n $2 | python ./guess-encoding.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
$/=undef; | |
## Run script: perl CpG_feature_extraction.pl CpG_islands.bed shores/shelves (This gives warnings for 'rm' command, just ignore it) | |
# Reference for definition of shores and shelves from: | |
# http://www.mdpi.com/2073-4425/5/2/347/htm | |
# http://www.mdpi.com/genes/genes-05-00347/article_deploy/html/images/genes-05-00347-g001-1024.png | |
extend_coordinates($ARGV[0] ,$ARGV[1]); | |
sub extend_coordinates | |
{ | |
$type=$ARGV[1]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#USAGE: CI_normal(matrix[,c(x:y)],97.5) - multiple column from a normal distribution | |
#USAGE: CI_tdist(matrix[,c(x:y)],97.5) - multiple column from a t distribution | |
#USAGE: CI_normal(matrix[,x],97.5) - single column from a normal distribution | |
#USAGE: CI_tdist(matrix[,x],97.5) - single column from a t distribution | |
#Created and updated by Santhilal Subhash on 2016/06/07 | |
CI_normal <- function(li,stat) | |
{ | |
cat(paste0("CI","\t","column","\t","Lower.limit","\t","Upper.limit","\n")) | |
if(length(colnames(li))>1) | |
{ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
m.row.sum<- cbind(m1, rowSums(m1)) | |
o1<- rev(order(m.row.sum[,602])) | |
m.row.sum<- m.row.sum[o1,] | |
bk = unique(c(seq(-0.1,3, length=100),seq(3,10.35,length=100))) | |
hmcols<- colorRampPalette(c("white","red"))(length(bk)-1) | |
pheatmap( m.row.sum[,1:601], cluster_rows = F, cluster_cols = F, col= hmcols, breaks = bk, legend=FALSE, show_rownames=FALSE, show_colnames=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#The X axis is -3 kb to 3 kb around TSS. | |
#It turns out that the heatmap.2 function use default hclust ( Hierachical Clustering) to cluster the #matrix. | |
#alternatively, we can use K-means clustering to cluster the data and to see what's the pattern look like. | |
km<- kmeans(m1,2) # determine how many cluster you want, I specify 2 here | |
m1.kmeans<- cbind(m1, km$cluster) # combine the cluster with the matrix | |
dim(m1.kmeans) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This R script is to generate the TF or histone modification heatmap | |
# at certain genomic features (TSS, enhancers) from the ChIP-seq data | |
# the input matrix is got from Homer software. alternative to R, use cluster3 to cluster, and visualize by # java Treeviewer | |
# generate the matrix by Homer: annotatePeaks.pl peak_file.txt hg19 -size 6000 -hist 10 -ghist -d TF1/ # > outputfile_matrix.txt | |
# see several posts for heatmap: | |
# http://davetang.org/muse/2010/12/06/making-a-heatmap-with-r/ | |
# http://www.r-bloggers.com/r-using-rcolorbrewer-to-colour-your-figures-in-r/ | |
# 08/20/13 by Tommy Tang | |
# it is such a simple script but took me several days to get it work...I mean the desired |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ENST00000423562 chr1 14363 29370 - ENSG00000227232 WASH7P pseudogene | |
ENST00000438504 chr1 14363 29370 - ENSG00000227232 WASH7P pseudogene | |
ENST00000450305 chr1 12010 13670 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000456328 chr1 11869 14409 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000469289 chr1 30267 31109 + ENSG00000243485 MIR1302-11 lincRNA | |
ENST00000473358 chr1 29554 31097 + ENSG00000243485 MIR1302-11 lincRNA | |
ENST00000488147 chr1 14404 29570 - ENSG00000227232 WASH7P pseudogene | |
ENST00000515242 chr1 11872 14412 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000518655 chr1 11874 14409 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000538476 chr1 14411 29806 - ENSG00000227232 WASH7P pseudogene |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ENSG00000223972 chr1 11869 14412 + DDX11L1 pseudogene | |
ENSG00000227232 chr1 14363 29806 - WASH7P pseudogene | |
ENSG00000243485 chr1 29554 31109 + MIR1302-11 lincRNA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chr1 processed_transcript exon 11869 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002234944"; | |
chr1 processed_transcript exon 12613 12721 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00003582793"; | |
chr1 processed_transcript exon 13221 14409 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002312635"; | |
chr1 transcribed_unprocessed_pseudogene exon 11872 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002234632"; | |
chr1 transcribed_unprocessed_pseudogene exon 12613 12721 . + . gene_id "ENSG000002 |
NewerOlder