Skip to content

Instantly share code, notes, and snippets.

View dmyersturnbull's full-sized avatar

Douglas Myers-Turnbull dmyersturnbull

  • Stanford University
  • Stanford, CA
View GitHub Profile
@dmyersturnbull
dmyersturnbull / uniprot_go_terms_at_level.py
Last active March 22, 2016 19:25
Get all of the GO terms at the specified level and type associated with a UniProt ID. Uses Python 3.5 Typing.
# Requires https://gist.github.com/dmyersturnbull/efe32052bf4cf06df915
import pandas as pd
from typing import Iterable, Union, Mapping
from goatools import obo_parser # uses https://github.com/tanghaibao/goatools
from goatools.obo_parser import GOTerm # NOT the same as FlatGoTerm, which has no knowledge of hierarchy
if not os.path.exists('gene_ontology.1_2.obo'):
import wget
@dmyersturnbull
dmyersturnbull / converge.py
Created March 28, 2016 17:15
Repeatedly sample something until the mean (or other statistic) converges to within ε.
import warnings
import numpy as np
from typing import Iterable, Mapping, Callable, Any, Tuple
def converge(sampler: Callable[[None], Iterable[float]],
statistic:Callable[[np.ndarray], float]=np.mean,
ε:float=0.01, min_iters:int=3, max_iters:int=50,
noter:Callable[[int, float, float, Iterable[float]], Any]=lambda i, estimate, delta, samples: print('Iteration {}: {:.3f}, δ=={:.3f}'.format(i, estimate, delta))
) -> Tuple[float, Iterable[float]]:
"""Repeatedly sample something until the mean (or other statistic) converges to within ε.
@dmyersturnbull
dmyersturnbull / uniprot_go_terms.py
Last active March 30, 2016 18:33
Build a Pandas DataFrame of Gene Ontology term objects from a UniProt ID. Uses Python 3.5 typing.
# Requires https://gist.github.com/dmyersturnbull/c0717406eb46158401a9:
from silenced import silenced
import re
import uniprot # from https://github.com/boscoh/uniprot
import pandas as pd
import contextlib
import sys
from io import StringIO
from typing import Iterable, Union, Mapping
@dmyersturnbull
dmyersturnbull / matlab_to_hdf5.jl
Created April 2, 2016 02:33
Convert a proprietary .mat file to a more modern HDF5-compatible version 7 .mat file.
using MAT
using Lumberjack
@doc """
Using MAT.jl, converts any MATLAB version >=5 .mat file to an HDF5-compatible MATLAB version 7 .mat file.
Warns if the file already exists.
""" ->
function convert_to_matlab7(input_file:: AbstractString, output_file:: AbstractString)
if ispath(output_file)
warn("File $output_file already exists")
@dmyersturnbull
dmyersturnbull / MinimalScalaCheck.scala
Last active April 3, 2016 04:59
A tiny example of using ScalaCheck and ScalaTest together.
import org.scalacheck.Gen
import org.scalatest.{PropSpec, Matchers}
import org.scalatest.prop.PropertyChecks
class MinimalScalaCheckExample extends PropSpec with PropertyChecks with Matchers {
property("A string's length should be constant") {
forAll { (s: String) =>
s.length should equal(s.length)
}
}
@dmyersturnbull
dmyersturnbull / search_hgnc.py
Last active April 19, 2016 20:48
Search HGNC when you can't remember the official gene symbol.
from http_get import http_get # uses https://gist.github.com/dmyersturnbull/fade1a5901beeb1003680f8267454640
from typing import Mapping, Union, Iterable
import json
searchable_fields = {'alias_name', 'alias_symbol', 'ccds_id', 'ena', 'ensemble_gene_id',
'entrez_id', 'hgnc_id', 'locus_group', 'locus_type', 'mgd_id',
'name', 'prev_name', 'prev_symbol', 'refseq_accession', 'rgd_id',
'status', 'symbol', 'ucsc_id', 'uniprot_ids', 'vega_id'}
@dmyersturnbull
dmyersturnbull / tissue_expression_level.py
Last active April 19, 2016 21:59
Display per-tissue or per-cell type gene expression data from the Human Protein Atlas.
from typing import Callable
import pandas as pd
from dl_and_rezip import dl_and_rezip # see https://gist.github.com/dmyersturnbull/a6591676fc98da355c5250d48e26844e
def _load(filter_fn: Callable[[pd.DataFrame], pd.DataFrame]=pd.DataFrame.dropna) -> pd.DataFrame:
"""Get a DataFrame of Human Protein Atlas tissue expression data, indexed by Gene name and with the 'Gene' and 'Reliability' columns dropped.
The expression level ('Level') is replaced using this map: {'Not detected': 0, 'Low': 1, 'Medium': 2, 'High': 3}.
Downloads the file from http://www.proteinatlas.org/download/normal_tissue.csv.zip and reloads from normal_tissue.csv.gz thereafter.
@dmyersturnbull
dmyersturnbull / UnorderedPair.scala
Created March 21, 2016 20:29
Unordered pair in Scala
case class UnorderedPair[A](a: A, b: A) {
override def equals(o: Any) = o match {
case that: UnorderedPair[A] => that.a == a && that.b == b || that.a == b && that.b == a
case _ => false
}
override def hashCode = a.hashCode * b.hashCode // commutative and unique
}
@dmyersturnbull
dmyersturnbull / Paddable.scala
Last active May 23, 2016 17:47
Left- and right- pad a string with ^ and $, for rare cases where it makes operations much more clear.
private implicit class Paddable(string: String) {
/** Left-pad this string. */
def ^(end: Int): String = " " * (end - string.length) + string
/** Right-pad this string. */
def $(end: Int): String = string + " " * (end - string.length)
}
@dmyersturnbull
dmyersturnbull / interpolation_experiment.jl
Created June 14, 2016 01:23
Julia string interpolation security experiments.
# This prints thisismypassword:
password = "thisismypassword"
function display_post(post_text)
println("<p>$post_text</p>")
end
display_post("$pass" * "word")
# This doesn't when given $password (escaped) as an argument:
display_post(ARGS[1])