Skip to content

Instantly share code, notes, and snippets.

View jergosh's full-sized avatar

Greg Slodkowicz jergosh

  • MRC Laboratory of Molecular Biology
  • Cambridge
View GitHub Profile
setwd("~/Documents/projects/paper-pat-seq/")
patsy_table <- read.table("Log/TEST_WT1_vs_MUT1_gcf.tab", header=T)
patsy_data <- patsy_table$runs_ma
names(patsy_data) <- as.character(patsy_table$transcripts)
pasta_table <- read.table("ExtData/PASTA_polyAranking.tab", header=T)
pasta_data <- pasta_table$weighted_length
names(pasta_data) <- as.character(pasta_table$transcripts)
seed used = 1386212133
4 1644
GLN_GLV_PISO0N06151G --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ATG GAG AAA GGC CCT TCA TCA CTT CAG GGT AAG AAA TAC CCT GCT AAG AAC CAT GCC CGC AGC GTA TAT AGG CAC TTT AAA GAC AAA AAG AAT CTG GTA CTG GAC GAA ACC GTT GCA TTT TTT GTT AGC GGT GAG GCA TTG GAA CTA TAT CAG TAC TGT GAT CAG ACG AAG CCC TTG CGA CAA AAT AGA TAC TTT TTT TAT TTA AGT GGA GTC GCT ATT CCT GGC TCA CAC GTT CTT TAT GAA CCT CTG AAG GAT AAG CTA ACG CTT TTC TTA CCT GAT GTT GAT GAG GAC GAT ATT ATG TGG TCG GGT CTT CCC CTC AGC GCT AAG GAG GCC GCC GCA AAA TAT GAC ATT GAC CAT GTC CTT TAT GCC TCA GAT ATT CCC CAG ACA TTA GAG GCT GTG AAT ACT AAG GCG AAA GTC TAC ACC ACC GAT GTG AAC AAA TTC AAT TCC AAT TAT GCT AAG TAT --- CTT GTT GAA GGT GAT GCT GAC TTT TTC TAT GCA TTG GAT GAA TCT CGT TTA TGT AAG GAC T
import glob
for d in glob.glob("/nfs/research2/goldman/gregs/slr_pipeline/data/ens/78/trees/*"):
print "Directory name", d
# Probably more useful but would print a lot of stuff
# for f in glob.glob("/nfs/research2/goldman/gregs/slr_pipeline/data/ens/78/trees/*/*.nh"):
# print "File name", f
#!/usr/bin/perl
use strict;
use Bio::SeqIO;
use Bio::EnsEMBL::Registry;
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
JOB_DEP_CMD = echo $(DATASETS) | sed 's/ /"\) \&\& done("/g'
JOB_DEP = -w 'done("$(shell $(JOB_DEP_CMD))")'
@$(foreach DS,$(DATASETS), $(SUBMIT_JOB) -J $(DS) "samtools view -b -h $(ALN_DIR)/$(DS)/$(SPIKEIN_NVTR) $(SPIKEIN_NAMES) -o $(ALN_DIR)/$(DS)/spikeins.bam";)
@jergosh
jergosh / biomart
Last active January 1, 2016 14:39
library(biomaRt)
ensembl <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl",
host="www.ensembl.org", path="/biomart/martservice")
attrs <- c("ensembl_gene_id", "upstream_flank")
gene_ann <- getBM(attributes=attrs, filters=list(ensembl_gene_id="ENSG00000165702", upstream_flank=100), mart=ensembl, checkFilters=F)
gene_ann
library(ggplot2)
library(grid)
library(gtable)
ex <- data.frame(xmin=1, xmax=100, ymin=0, ymax=15)
p <- ggplot(ex, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)) +
theme_minimal() +
theme(plot.margin=margin(l=2.0, unit="cm"),
panel.margin=margin(l=2.0, r=0, unit="cm"),
def make_layout(ann_dict):
def layout(node):
if node.is_leaf():
labelFace = ete3.faces.TextFace(str(ann_dict[node.name]))
ete3.faces.add_face_to_node(labelFace, node, column=2, position="aligned")
esnFace = ete3.faces.RectFace(height=100*ann_dict[node.name],
width=10,
fgcolor="white",
bgcolor="red")
members <- read.table("~/Documents/group_members.txt", stringsAsFactors=F, header=F)$V1
members_ordered <- sample(members, replace=F)
write.table(file="meeting_order.txt", data.frame(person=members_ordered), quote=F, row.names=F)
library("seqinr")
library("Biostrings")
library("MASS")
library("GenomicRanges")
library("dndscv")
## Helper methods copied from dndscv
nt = c("A","C","G","T")
compnt = setNames(rev(nt), nt)