View normalize_read_counts.R
#Rscript normalize_featurecounts.R counts_table.txt tpm ; | |
#Rscript normalize_featurecounts.R counts_table.txt rpkm ; | |
#Sample table### | |
#GeneID<TAB>sample1<TAB>sample2<TAB>sample3<TAB>Length | |
#Gene1<TAB>10<TAB>4<TAB>5<TAB>1500 | |
#Gene2<TAB>20<TAB>43<TAB>60<TAB>4300 | |
View CheckPhredScale.sh
#!/bin/bash | |
# ./CheckPhredScale.sh <FASTQ_FILE> <NUMBER OF LINES TO CHECK> | |
echo "Shell:"; | |
zcat $1 | head -n $2 | awk '{if(NR%4==0) printf("%s",$0);}' | od -A n -t u1 | awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}}END{if(max<=74 && min<59) print "Phred+33"; else if(max>73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding";}'; | |
echo ""; | |
echo "python:"; | |
zcat $1 | awk 'NR % 4 == 0' | head -n $2 | python ./guess-encoding.py |
View CpG_feature_extraction.pl
#!/usr/bin/perl | |
$/=undef; | |
## Run script: perl CpG_feature_extraction.pl CpG_islands.bed shores/shelves (This gives warnings for 'rm' command, just ignore it) | |
# Reference for definition of shores and shelves from: | |
# http://www.mdpi.com/2073-4425/5/2/347/htm | |
# http://www.mdpi.com/genes/genes-05-00347/article_deploy/html/images/genes-05-00347-g001-1024.png | |
extend_coordinates($ARGV[0] ,$ARGV[1]); | |
sub extend_coordinates | |
{ | |
$type=$ARGV[1]; |
View confidence_interval.R
#USAGE: CI_normal(matrix[,c(x:y)],97.5) - multiple column from a normal distribution | |
#USAGE: CI_tdist(matrix[,c(x:y)],97.5) - multiple column from a t distribution | |
#USAGE: CI_normal(matrix[,x],97.5) - single column from a normal distribution | |
#USAGE: CI_tdist(matrix[,x],97.5) - single column from a t distribution | |
#Created and updated by Santhilal Subhash on 2016/06/07 | |
CI_normal <- function(li,stat) | |
{ | |
cat(paste0("CI","\t","column","\t","Lower.limit","\t","Upper.limit","\n")) | |
if(length(colnames(li))>1) | |
{ |
View heatmap_ChIP-seq_3.r
m.row.sum<- cbind(m1, rowSums(m1)) | |
o1<- rev(order(m.row.sum[,602])) | |
m.row.sum<- m.row.sum[o1,] | |
bk = unique(c(seq(-0.1,3, length=100),seq(3,10.35,length=100))) | |
hmcols<- colorRampPalette(c("white","red"))(length(bk)-1) | |
pheatmap( m.row.sum[,1:601], cluster_rows = F, cluster_cols = F, col= hmcols, breaks = bk, legend=FALSE, show_rownames=FALSE, show_colnames=FALSE) |
View heatmap_ChIP-seq_2.r
#The X axis is -3 kb to 3 kb around TSS. | |
#It turns out that the heatmap.2 function use default hclust ( Hierachical Clustering) to cluster the #matrix. | |
#alternatively, we can use K-means clustering to cluster the data and to see what's the pattern look like. | |
km<- kmeans(m1,2) # determine how many cluster you want, I specify 2 here | |
m1.kmeans<- cbind(m1, km$cluster) # combine the cluster with the matrix | |
dim(m1.kmeans) |
View heatmap_ChIP-seq.r
# This R script is to generate the TF or histone modification heatmap | |
# at certain genomic features (TSS, enhancers) from the ChIP-seq data | |
# the input matrix is got from Homer software. alternative to R, use cluster3 to cluster, and visualize by # java Treeviewer | |
# generate the matrix by Homer: annotatePeaks.pl peak_file.txt hg19 -size 6000 -hist 10 -ghist -d TF1/ # > outputfile_matrix.txt | |
# see several posts for heatmap: | |
# http://davetang.org/muse/2010/12/06/making-a-heatmap-with-r/ | |
# http://www.r-bloggers.com/r-using-rcolorbrewer-to-colour-your-figures-in-r/ | |
# 08/20/13 by Tommy Tang | |
# it is such a simple script but took me several days to get it work...I mean the desired |
View sample.GTF_enst_annotation.txt
ENST00000423562 chr1 14363 29370 - ENSG00000227232 WASH7P pseudogene | |
ENST00000438504 chr1 14363 29370 - ENSG00000227232 WASH7P pseudogene | |
ENST00000450305 chr1 12010 13670 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000456328 chr1 11869 14409 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000469289 chr1 30267 31109 + ENSG00000243485 MIR1302-11 lincRNA | |
ENST00000473358 chr1 29554 31097 + ENSG00000243485 MIR1302-11 lincRNA | |
ENST00000488147 chr1 14404 29570 - ENSG00000227232 WASH7P pseudogene | |
ENST00000515242 chr1 11872 14412 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000518655 chr1 11874 14409 + ENSG00000223972 DDX11L1 pseudogene | |
ENST00000538476 chr1 14411 29806 - ENSG00000227232 WASH7P pseudogene |
View sample.GTF_ensg_annotation.txt
ENSG00000223972 chr1 11869 14412 + DDX11L1 pseudogene | |
ENSG00000227232 chr1 14363 29806 - WASH7P pseudogene | |
ENSG00000243485 chr1 29554 31109 + MIR1302-11 lincRNA |
View sample.GTF
chr1 processed_transcript exon 11869 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002234944"; | |
chr1 processed_transcript exon 12613 12721 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00003582793"; | |
chr1 processed_transcript exon 13221 14409 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002312635"; | |
chr1 transcribed_unprocessed_pseudogene exon 11872 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002234632"; | |
chr1 transcribed_unprocessed_pseudogene exon 12613 12721 . + . gene_id "ENSG000002 |
NewerOlder