Skip to content

Instantly share code, notes, and snippets.

View dmyersturnbull's full-sized avatar

Douglas Myers-Turnbull dmyersturnbull

  • Stanford University
  • Stanford, CA
View GitHub Profile
@dmyersturnbull
dmyersturnbull / UnorderedPair.scala
Created March 21, 2016 20:29
Unordered pair in Scala
case class UnorderedPair[A](a: A, b: A) {
override def equals(o: Any) = o match {
case that: UnorderedPair[A] => that.a == a && that.b == b || that.a == b && that.b == a
case _ => false
}
override def hashCode = a.hashCode * b.hashCode // commutative and unique
}
@dmyersturnbull
dmyersturnbull / uniprot_go_terms.py
Last active March 30, 2016 18:33
Build a Pandas DataFrame of Gene Ontology term objects from a UniProt ID. Uses Python 3.5 typing.
# Requires https://gist.github.com/dmyersturnbull/c0717406eb46158401a9:
from silenced import silenced
import re
import uniprot # from https://github.com/boscoh/uniprot
import pandas as pd
import contextlib
import sys
from io import StringIO
from typing import Iterable, Union, Mapping
@dmyersturnbull
dmyersturnbull / uniprot_go_terms_at_level.py
Last active March 22, 2016 19:25
Get all of the GO terms at the specified level and type associated with a UniProt ID. Uses Python 3.5 Typing.
# Requires https://gist.github.com/dmyersturnbull/efe32052bf4cf06df915
import pandas as pd
from typing import Iterable, Union, Mapping
from goatools import obo_parser # uses https://github.com/tanghaibao/goatools
from goatools.obo_parser import GOTerm # NOT the same as FlatGoTerm, which has no knowledge of hierarchy
if not os.path.exists('gene_ontology.1_2.obo'):
import wget
@dmyersturnbull
dmyersturnbull / silenced.py
Last active August 16, 2016 17:29
Silence stdout and/or stderr in Python 3 using with statement
import contextlib
import sys
from io import StringIO
@contextlib.contextmanager
def silenced(no_stdout=True, no_stderr=True):
"""
Suppresses output to stdout and/or stderr.
Always resets stdout and stderr, even on an exception.
Usage:
@dmyersturnbull
dmyersturnbull / converge.py
Created March 28, 2016 17:15
Repeatedly sample something until the mean (or other statistic) converges to within ε.
import warnings
import numpy as np
from typing import Iterable, Mapping, Callable, Any, Tuple
def converge(sampler: Callable[[None], Iterable[float]],
statistic:Callable[[np.ndarray], float]=np.mean,
ε:float=0.01, min_iters:int=3, max_iters:int=50,
noter:Callable[[int, float, float, Iterable[float]], Any]=lambda i, estimate, delta, samples: print('Iteration {}: {:.3f}, δ=={:.3f}'.format(i, estimate, delta))
) -> Tuple[float, Iterable[float]]:
"""Repeatedly sample something until the mean (or other statistic) converges to within ε.
@dmyersturnbull
dmyersturnbull / chemspider_unique_search.py
Last active January 20, 2017 04:12
Fetch unique compounds from ChemSpider.
# Douglas Myers-Turnbull wrote this while at UCSF. Because of this, the list of copyright owners is unknown and is not licensed (sorry!).
import chemspipy
from chemspipy import ChemSpider
import warnings
from typing import Iterable, Mapping, Optional
import warnings
import time
# use your API key for fetching from ChemSpider
@dmyersturnbull
dmyersturnbull / lines.py
Last active July 20, 2016 18:04
Lazily read a text file, gunzip based on filename extension, and return newline-stripped lines.
import gzip, io
from typing import Iterator
def lines(file_name: str, known_encoding='utf-8') -> Iterator[str]:
"""Lazily read a text file or gzipped text file, decode, and strip any newline character (\n or \r).
If the file name ends with '.gz' or '.gzip', assumes the file is Gzipped.
Arguments:
known_encoding: Applied only when decoding gzip
"""
if file_name.endswith('.gz') or file_name.endswith('.gzip'):
@dmyersturnbull
dmyersturnbull / matlab_to_hdf5.jl
Created April 2, 2016 02:33
Convert a proprietary .mat file to a more modern HDF5-compatible version 7 .mat file.
using MAT
using Lumberjack
@doc """
Using MAT.jl, converts any MATLAB version >=5 .mat file to an HDF5-compatible MATLAB version 7 .mat file.
Warns if the file already exists.
""" ->
function convert_to_matlab7(input_file:: AbstractString, output_file:: AbstractString)
if ispath(output_file)
warn("File $output_file already exists")
@dmyersturnbull
dmyersturnbull / Paddable.scala
Last active May 23, 2016 17:47
Left- and right- pad a string with ^ and $, for rare cases where it makes operations much more clear.
private implicit class Paddable(string: String) {
/** Left-pad this string. */
def ^(end: Int): String = " " * (end - string.length) + string
/** Right-pad this string. */
def $(end: Int): String = string + " " * (end - string.length)
}
@dmyersturnbull
dmyersturnbull / MinimalScalaCheck.scala
Last active April 3, 2016 04:59
A tiny example of using ScalaCheck and ScalaTest together.
import org.scalacheck.Gen
import org.scalatest.{PropSpec, Matchers}
import org.scalatest.prop.PropertyChecks
class MinimalScalaCheckExample extends PropSpec with PropertyChecks with Matchers {
property("A string's length should be constant") {
forAll { (s: String) =>
s.length should equal(s.length)
}
}