This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in {1..22} X Y | |
do | |
same=$(comm -12 <(cut -f1,2,4,5 keep_original_bam/TCGA-06-0125_$i.call.keep.txt | sort) <(cut -f1,2,4,5 keep_realigned_bam/TCGA-06-0125_$i.call.keep.txt| sort) | wc -l | tr -d " ") | |
original=`wc -l keep_original_bam/TCGA-06-0125_$i.call.keep.txt | awk '{print $1}'` | |
realigned=`wc -l keep_realigned_bam/TCGA-06-0125_$i.call.keep.txt | awk '{print $1}'` | |
printf "chromosome $i, original_bam:$original, realigned_bam:$realigned, common:$same \n" | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## imagine we have a file with one line header, and we want to keep the header after sorting | |
## use subshells http://bash.cyberciti.biz/guide/What_is_a_Subshell%3F | |
(sed -n '1p' your_file; cat your_file | sed '1d' | sort) > sort_header.txt | |
## if you have two header lines and want to keep both of them: | |
(sed -n '1,2p' your_file; cat your_file | sed '1,2d' | sort) > sort_header.txt | |
## if you have many lines starting with "#" as header, like vcf files | |
(grep "^#" my_vcf; grep -v "^#" my_vcf | sort -k1,1V -k2,2n) > sorted.vcf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(Biobase) | |
library(GEOquery) | |
# load series and platform data from GEO | |
gset <- getGEO("GSE34412", GSEMatrix =TRUE) | |
gset<- gset[[1]] | |
# make proper column names to match toptable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with open ("C:/Users/Tang Ming/Desktop/anotation.txt", "r") as annotation: | |
anotation_dict = {} | |
for line in annotation: | |
line = line.split() | |
if line: #test whether it is an empty line | |
anotation_dict[line[0]]=line[1:] | |
else: | |
continue | |
# really should not parse the fasta file by myself. there are |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#2014 MSU NGS R basics tutorial | |
#http://angus.readthedocs.org/en/2014/R_Introductory_tutorial_2014.html | |
#https://github.com/jrherr/quick_basic_R_tutorial/blob/master/R_tutorial.md | |
#pick one language, and learn it well! | |
#pick up a dataset, play with it! | |
#object-oriented programming | |
#functional programming |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!# /usr/bin/bash | |
while read name | |
do | |
echo "Name read from file - $name" | |
touch $name | |
done < $1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for fspec1 in *.gz | |
do | |
#echo $fspec1 | |
fspec2=$(echo ${fspec1} | sed "s/\([1-4]egg\)_r\([1-2]\)_0\([1-2]\)_sub.fastq.gz/\1_R\3_00\2.fastq.gz/") | |
echo $fspec2 | |
mv ${fspec1} ${fspec2} | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##### use bioconductor annotation packages ####### | |
source("http://Bioconductor.org/biocLite.R") | |
biocLite("org.Hs.eg.db") | |
biocLite(c("GenomicFeatures", "AnnotationDbi")) | |
library("org.Hs.eg.db") | |
library("AnnotationDbi") | |
library("GenomicFeatures") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
setwd("/home/tommy/annotations/human/ensemble/") | |
# set the colClasses for faster reading in the data | |
gtf_cols <- c(seqname="factor", source="factor", feature="factor", | |
start="integer", end="integer", score="character", | |
strand="factor", frame="factor", attribute="character") | |
hs_gtf <- read.delim('Homo_sapiens.GRCh37.74.gtf.gz', header=FALSE, | |
col.names=names(gtf_cols), comment.char="#") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# search pubmed contains "glioblastoma enhancer" | |
$esearch -db pubmed -query "glioblastoma enhancer" | |
<ENTREZ_DIRECT> | |
<Db>pubmed</Db> | |
<WebEnv>NCID_1_539964707_130.14.18.34_9001_1422280320_2091337226_0MetA0_S_MegaStore_F_1</WebEnv> | |
<QueryKey>1</QueryKey> | |
<Count>97</Count> | |
<Step>1</Step> | |
</ENTREZ_DIRECT> |
OlderNewer