Skip to content

Instantly share code, notes, and snippets.

View dmyersturnbull's full-sized avatar

Douglas Myers-Turnbull dmyersturnbull

  • Stanford University
  • Stanford, CA
View GitHub Profile
@dmyersturnbull
dmyersturnbull / sliding_window.py
Last active June 21, 2016 03:08
Quickly calculate a sliding window of a Numpy array.
import numpy as np
def sliding_window(x: np.ndarray, n: int) -> np.ndarray:
"""Returns a sliding window of n elements from x.
Raises a ValueError of n > len(x).
"""
if n > len(x): raise ValueError("N must be less than the array length")
# Courtesy of https://stackoverflow.com/questions/13728392/moving-average-or-running-mean
return np.convolve(x, np.ones((n,)) / n, mode='valid')
@dmyersturnbull
dmyersturnbull / head.py
Last active July 6, 2016 23:05
Pretty-print the head of a Pandas table in a Jupyter notebook and show its dimensions. Allows showing multiple tables per cell.
from IPython.display import display, Markdown
import pandas as pd
def head(df: pd.DataFrame, n_rows:int=1) -> None:
"""Pretty-print the head of a Pandas table in a Jupyter notebook and show its dimensions."""
display(Markdown("**whole table (below):** {} rows × {} columns".format(len(df), len(df.columns))))
display(df.head(n_rows))
@dmyersturnbull
dmyersturnbull / exists.py
Created July 11, 2016 18:05
Efficient existential quantifier for a filter() predicate.
from typing import Callable, Generic, TypeVar, Iterable
T = TypeVar('T')
def exists(keep_predicate: Callable[[T], bool], seq: Iterable[T]) -> bool:
"""Efficient existential quantifier for a filter() predicate.
Returns true iff keep_predicate is true for one or more elements."""
for e in seq:
if keep_predicate(e): return True # short-circuit
return False
@dmyersturnbull
dmyersturnbull / scantree.py
Last active July 14, 2016 00:38
List the full path of every meaningful file in a directory recursively.
import os
from typing import Iterator
def is_proper_file(path: str) -> bool:
name = os.path.split(path)[1]
return len(name) > 0 and name[0] not in {'.', '~', '_'}
def scantree(path: str, follow_symlinks: bool=False) -> Iterator[str]:
"""List the full path of every file not beginning with '.', '~', or '_' in a directory, recursively."""
for entry in os.scandir(path):
@dmyersturnbull
dmyersturnbull / lines.py
Last active July 20, 2016 18:04
Lazily read a text file, gunzip based on filename extension, and return newline-stripped lines.
import gzip, io
from typing import Iterator
def lines(file_name: str, known_encoding='utf-8') -> Iterator[str]:
"""Lazily read a text file or gzipped text file, decode, and strip any newline character (\n or \r).
If the file name ends with '.gz' or '.gzip', assumes the file is Gzipped.
Arguments:
known_encoding: Applied only when decoding gzip
"""
if file_name.endswith('.gz') or file_name.endswith('.gzip'):
@dmyersturnbull
dmyersturnbull / zip_strict.py
Created July 29, 2016 00:30
Zip function that requires the same lengths (and still works with generators).
def zip_strict(*args):
"""Same as zip(), but raises a ValueError if the lengths don't match."""
iters = [iter(axis) for axis in args]
n_elements = 0
failures = []
while len(failures) == 0:
n_elements += 1
values = []
failures = []
@dmyersturnbull
dmyersturnbull / timestamp.py
Last active August 1, 2016 18:16
YYYY-mm-dd_HH-MM-SS timestamp.
import datetime
def format_time(time: datetime.datetime) -> str:
"""Standard timestamp format. Ex: 2016-05-02_22_35_56."""
return time.strftime("%Y-%m-%d_%H-%M-%S")
def timestamp() -> str:
"""Standard timestamp of time now. Ex: 2016-05-02_22_35_56."""
return format_time(datetime.datetime.now())
@dmyersturnbull
dmyersturnbull / silenced.py
Last active August 16, 2016 17:29
Silence stdout and/or stderr in Python 3 using with statement
import contextlib
import sys
from io import StringIO
@contextlib.contextmanager
def silenced(no_stdout=True, no_stderr=True):
"""
Suppresses output to stdout and/or stderr.
Always resets stdout and stderr, even on an exception.
Usage:
@dmyersturnbull
dmyersturnbull / LICENSE
Created November 21, 2016 23:13
Apache License, Version 2.0 for all Gists under dmyersturnbull. The copyright owners may be different for each Gist.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
@dmyersturnbull
dmyersturnbull / SpiderRecovery.py
Last active November 21, 2016 23:16
Makes a best-effort attempt to recover SMILES strings from compound names unambiguously by searching ChemSpider.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import re
import warnings
import time
from chemspipy import ChemSpider