pavlin-policar/plot_markers.py

## plot_markers.py
import scipy.sparse as sp
import numpy as np


def plot_marker(
    marker,
    dataset,
    embedding: np.ndarray,
    binary=True,
    s=1,
    alpha=0.1,
    colors=None,
    threshold=0,
    zorder=1,
    ax=None,
):
    import matplotlib.pyplot as plt
    import matplotlib.colors as clr

    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 8))

    marker = np.atleast_1d(marker)
    gene_mask = dataset.var_names.isin(marker)

    data = dataset.X[:, gene_mask]
    if sp.issparse(data):
        data = data.toarray()

    if colors is None:
        colors = ["#fee8c8", "#e34a33"]
        colors = ["#000000", "#7DF454"]
        colors = ["#000000", "#EA4736"]

    # And mark all the cells which have expressed the genes with a positive class
    if binary:
        y = np.any(data > threshold, axis=1)
        ax.scatter(
            embedding[~y, 0],
            embedding[~y, 1],
            c=colors[0],
            s=s,
            alpha=alpha,
            rasterized=True,
            zorder=zorder,
        )
        ax.scatter(
            embedding[y, 0],
            embedding[y, 1],
            c=colors[1],
            s=s,
            alpha=alpha,
            rasterized=True,
            zorder=zorder,
        )
    else:
        y = np.max(data, axis=1)
        y_mask = np.any(data > 0, axis=1)

        cmap = clr.LinearSegmentedColormap.from_list(
            "expression", [colors[0], colors[1]], N=256
        )
        ax.scatter(
            embedding[~y_mask, 0],
            embedding[~y_mask, 1],
            c=y[~y_mask],
            s=s,
            alpha=alpha,
            rasterized=True,
            cmap=cmap,
            zorder=zorder,
        )
        ax.scatter(
            embedding[y_mask, 0],
            embedding[y_mask, 1],
            c=y[y_mask],
            s=s,
            alpha=alpha,
            rasterized=True,
            cmap=cmap,
            zorder=zorder,
        )

    marker_str = ", ".join(m.lower().capitalize() for m in marker)
    title = f"${marker_str}$"
    ax.set_title(title)

    return ax


def plot_markers(
    markers,
    dataset,
    embedding: np.ndarray,
    per_row=4,
    figwidth=16,
    binary=True,
    s=1,
    alpha=0.1,
    colors=None,
    threshold=0,
    return_ax=False,
    zorder=1,
):
    import matplotlib.pyplot as plt
    import matplotlib.colors as clr

    n_rows = len(markers) // per_row
    if len(markers) % per_row > 0:
        n_rows += 1

    figheight = figwidth / per_row * n_rows
    fig, ax = plt.subplots(nrows=n_rows, ncols=per_row, figsize=(figwidth, figheight))

    ax = ax.ravel()
    for axi in ax:
        axi.set_axis_off()

    if isinstance(markers, dict):
        markers_ = markers.values()
    elif isinstance(markers, list):
        markers_ = markers
    else:
        raise ValueError("markers cannot be instance of `%s`" % type(markers))

    # Handle lists of markers
    all_markers = []
    for m in markers_:
        if isinstance(m, list):
            for m_ in m:
                all_markers.append(m_)
        else:
            all_markers.append(m)
    assert all(
        m in dataset.var_names for m in all_markers
    ), "One or more of the specified marker genes was not found in dataset"

    if colors is None:
        colors = ["#fee8c8", "#e34a33"]
        colors = ["#000000", "#7DF454"]
        colors = ["#000000", "#EA4736"]

    for idx, marker in enumerate(markers_):
        plot_marker(
            marker,
            dataset,
            embedding,
            binary=binary,
            s=s,
            alpha=alpha,
            colors=colors,
            threshold=threshold,
            zorder=zorder,
            ax=ax[idx],
        )

        if isinstance(markers, dict):
            title = ax.get_title()
            title = f"{list(markers)[idx]}\n{title}"
            ax[idx].set_title(title)

        plt.tight_layout()

    if return_ax:
        return fig, ax
	import scipy.sparse as sp
	import numpy as np


	def plot_marker(
	marker,
	dataset,
	embedding: np.ndarray,
	binary=True,
	s=1,
	alpha=0.1,
	colors=None,
	threshold=0,
	zorder=1,
	ax=None,
	):
	import matplotlib.pyplot as plt
	import matplotlib.colors as clr

	if ax is None:
	fig, ax = plt.subplots(figsize=(8, 8))

	marker = np.atleast_1d(marker)
	gene_mask = dataset.var_names.isin(marker)

	data = dataset.X[:, gene_mask]
	if sp.issparse(data):
	data = data.toarray()

	if colors is None:
	colors = ["#fee8c8", "#e34a33"]
	colors = ["#000000", "#7DF454"]
	colors = ["#000000", "#EA4736"]

	# And mark all the cells which have expressed the genes with a positive class
	if binary:
	y = np.any(data > threshold, axis=1)
	ax.scatter(
	embedding[~y, 0],
	embedding[~y, 1],
	c=colors[0],
	s=s,
	alpha=alpha,
	rasterized=True,
	zorder=zorder,
	)
	ax.scatter(
	embedding[y, 0],
	embedding[y, 1],
	c=colors[1],
	s=s,
	alpha=alpha,
	rasterized=True,
	zorder=zorder,
	)
	else:
	y = np.max(data, axis=1)
	y_mask = np.any(data > 0, axis=1)

	cmap = clr.LinearSegmentedColormap.from_list(
	"expression", [colors[0], colors[1]], N=256
	)
	ax.scatter(
	embedding[~y_mask, 0],
	embedding[~y_mask, 1],
	c=y[~y_mask],
	s=s,
	alpha=alpha,
	rasterized=True,
	cmap=cmap,
	zorder=zorder,
	)
	ax.scatter(
	embedding[y_mask, 0],
	embedding[y_mask, 1],
	c=y[y_mask],
	s=s,
	alpha=alpha,
	rasterized=True,
	cmap=cmap,
	zorder=zorder,
	)

	marker_str = ", ".join(m.lower().capitalize() for m in marker)
	title = f"${marker_str}$"
	ax.set_title(title)

	return ax


	def plot_markers(
	markers,
	dataset,
	embedding: np.ndarray,
	per_row=4,
	figwidth=16,
	binary=True,
	s=1,
	alpha=0.1,
	colors=None,
	threshold=0,
	return_ax=False,
	zorder=1,
	):
	import matplotlib.pyplot as plt
	import matplotlib.colors as clr

	n_rows = len(markers) // per_row
	if len(markers) % per_row > 0:
	n_rows += 1

	figheight = figwidth / per_row * n_rows
	fig, ax = plt.subplots(nrows=n_rows, ncols=per_row, figsize=(figwidth, figheight))

	ax = ax.ravel()
	for axi in ax:
	axi.set_axis_off()

	if isinstance(markers, dict):
	markers_ = markers.values()
	elif isinstance(markers, list):
	markers_ = markers
	else:
	raise ValueError("markers cannot be instance of `%s`" % type(markers))

	# Handle lists of markers
	all_markers = []
	for m in markers_:
	if isinstance(m, list):
	for m_ in m:
	all_markers.append(m_)
	else:
	all_markers.append(m)
	assert all(
	m in dataset.var_names for m in all_markers
	), "One or more of the specified marker genes was not found in dataset"

	if colors is None:
	colors = ["#fee8c8", "#e34a33"]
	colors = ["#000000", "#7DF454"]
	colors = ["#000000", "#EA4736"]

	for idx, marker in enumerate(markers_):
	plot_marker(
	marker,
	dataset,
	embedding,
	binary=binary,
	s=s,
	alpha=alpha,
	colors=colors,
	threshold=threshold,
	zorder=zorder,
	ax=ax[idx],
	)

	if isinstance(markers, dict):
	title = ax.get_title()
	title = f"{list(markers)[idx]}\n{title}"
	ax[idx].set_title(title)

	plt.tight_layout()

	if return_ax:
	return fig, ax