Skip to content

Instantly share code, notes, and snippets.

View dmyersturnbull's full-sized avatar

Douglas Myers-Turnbull dmyersturnbull

  • Stanford University
  • Stanford, CA
View GitHub Profile
@dmyersturnbull
dmyersturnbull / connected.py
Last active November 21, 2016 23:16
Context with convenience functions for MySQL/MariaDB calls with pymysql.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import pymysql
import contextlib
from typing import Tuple, List, Dict, Iterator
@contextlib.contextmanager
def connected(connection: pymysql.connections.Connection):
@dmyersturnbull
dmyersturnbull / plot_dose_response.py
Last active November 21, 2016 23:17
Plot a grid of nice-looking dose-response curves.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import matplotlib
import pandas as pd
from typing import Callable, Tuple, Dict, Optional, Union
@dmyersturnbull
dmyersturnbull / marker_iterator.py
Last active November 21, 2016 23:17
Iterate over Matplotlib marker shapes.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import pandas as pd
import itertools
from typing import Iterator
def marker_iterator(df: pd.DataFrame, class_column: str='class') -> Iterator[str]:
"""Returns an iterator of decent marker shapes. The order is such that similar markers aren't used unless they're needed."""
@dmyersturnbull
dmyersturnbull / clustering_plot.py
Last active November 21, 2016 23:18
Calculate tSNE or MDS+PCA and plot the results in Seaborn in a way that doesn't look terrible.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import pandas as pd
import seaborn as sns
from scipy.spatial.distance import squareform
from sklearn import manifold
from scipy.spatial.distance import pdist
from sklearn.decomposition import PCA
@dmyersturnbull
dmyersturnbull / colored_barplot.py
Last active November 21, 2016 23:18
Colored barplot in Matplotlib that makes sense and doesn't look terrible.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import matplotlib.pyplot as plt
import numpy as np
from typing import Optional, Tuple
def colored_barplot(x: np.ndarray, y: np.ndarray, colors: np.ndarray, y_ticks: Optional[np.ndarray]=None, fig_size: Tuple[float, float]=(10.0, 10.0), label_rotation: float=75):
index = np.arange(0, len(x))
@dmyersturnbull
dmyersturnbull / TryWith.scala
Created August 4, 2016 20:47
Scala try-with-resources
import java.io.Closeable
import scala.util.control.NonFatal
import scala.util.{Failure, Try}
/**
* Taken from user Morgen on StackOverflow with no substantive modifications:
* https://codereview.stackexchange.com/questions/79267/scala-trywith-that-closes-resources-automatically
* Which is licensed under cc-wiki with attribution required.
*/
object TryWith {
@dmyersturnbull
dmyersturnbull / zip_strict.py
Created July 29, 2016 00:30
Zip function that requires the same lengths (and still works with generators).
def zip_strict(*args):
"""Same as zip(), but raises a ValueError if the lengths don't match."""
iters = [iter(axis) for axis in args]
n_elements = 0
failures = []
while len(failures) == 0:
n_elements += 1
values = []
failures = []
@dmyersturnbull
dmyersturnbull / groupyby_parallel.py
Last active February 6, 2024 00:43
Performs a Pandas groupby operation in parallel
import pandas as pd
import itertools
import time
import multiprocessing
from typing import Callable, Tuple, Union
def groupby_parallel(
groupby_df: pd.core.groupby.DataFrameGroupBy,
func: Callable[[Tuple[str, pd.DataFrame]], Union[pd.DataFrame, pd.Series]],
num_cpus: int = multiprocessing.cpu_count() - 1,
@dmyersturnbull
dmyersturnbull / two_layer_palette.py
Last active November 21, 2016 23:18
Generate a color palette that has main colors for classes and generated variations of those colors for subclasses.
# coding=utf-8
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
import warnings
import numpy as np
from typing import List, Optional, Tuple, Callable, Dict, Any
from collections import OrderedDict
@dmyersturnbull
dmyersturnbull / find_only_file_matching.py
Last active November 21, 2016 23:19
Find a unique file in a directory.
# Douglas Myers-Turnbull wrote this for the Kokel Lab, which has released it under the Apache Software License, Version 2.0
# See the license file here: https://gist.github.com/dmyersturnbull/bfa1c3371e7449db553aaa1e7cd3cac1
# The list of copyright owners is unknown
from .scan_for_files import scan_for_files # see https://gist.github.com/dmyersturnbull/80845ba9ebab2da83963
from typing import Callable, Iterator
def find_only_file_matching(directory: str, matcher: Callable[[str], bool], file_iterator: Callable[[str], Iterator[str]]=scan_for_files) -> str:
"""Returns the full path of the matching file and raises an exception if none are found or more than 1 is found."""
file = None