Skip to content

Instantly share code, notes, and snippets.

View jergosh's full-sized avatar

Greg Slodkowicz jergosh

  • MRC Laboratory of Molecular Biology
  • Cambridge
View GitHub Profile
class SklearnWrapper(object):
def __init__(self, clf, params={}):
self.clf = clf(**params)
def __getattr__(self, name):
return self.clf.__getattribute__(name)
#!/usr/bin/env perl
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
# Copyright [2016-2019] EMBL-European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
library("seqinr")
library("Biostrings")
library("MASS")
library("GenomicRanges")
library("dndscv")
## Helper methods copied from dndscv
nt = c("A","C","G","T")
compnt = setNames(rev(nt), nt)
members <- read.table("~/Documents/group_members.txt", stringsAsFactors=F, header=F)$V1
members_ordered <- sample(members, replace=F)
write.table(file="meeting_order.txt", data.frame(person=members_ordered), quote=F, row.names=F)
def make_layout(ann_dict):
def layout(node):
if node.is_leaf():
labelFace = ete3.faces.TextFace(str(ann_dict[node.name]))
ete3.faces.add_face_to_node(labelFace, node, column=2, position="aligned")
esnFace = ete3.faces.RectFace(height=100*ann_dict[node.name],
width=10,
fgcolor="white",
bgcolor="red")
library(ggplot2)
library(grid)
library(gtable)
ex <- data.frame(xmin=1, xmax=100, ymin=0, ymax=15)
p <- ggplot(ex, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)) +
theme_minimal() +
theme(plot.margin=margin(l=2.0, unit="cm"),
panel.margin=margin(l=2.0, r=0, unit="cm"),
@jergosh
jergosh / biomart
Last active January 1, 2016 14:39
library(biomaRt)
ensembl <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl",
host="www.ensembl.org", path="/biomart/martservice")
attrs <- c("ensembl_gene_id", "upstream_flank")
gene_ann <- getBM(attributes=attrs, filters=list(ensembl_gene_id="ENSG00000165702", upstream_flank=100), mart=ensembl, checkFilters=F)
gene_ann
JOB_DEP_CMD = echo $(DATASETS) | sed 's/ /"\) \&\& done("/g'
JOB_DEP = -w 'done("$(shell $(JOB_DEP_CMD))")'
@$(foreach DS,$(DATASETS), $(SUBMIT_JOB) -J $(DS) "samtools view -b -h $(ALN_DIR)/$(DS)/$(SPIKEIN_NVTR) $(SPIKEIN_NAMES) -o $(ALN_DIR)/$(DS)/spikeins.bam";)
#!/usr/bin/perl
use strict;
use Bio::SeqIO;
use Bio::EnsEMBL::Registry;
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
import glob
for d in glob.glob("/nfs/research2/goldman/gregs/slr_pipeline/data/ens/78/trees/*"):
print "Directory name", d
# Probably more useful but would print a lot of stuff
# for f in glob.glob("/nfs/research2/goldman/gregs/slr_pipeline/data/ens/78/trees/*/*.nh"):
# print "File name", f