Skip to content

Instantly share code, notes, and snippets.

@peterk87
Created May 17, 2021 22:37
Show Gist options
  • Save peterk87/8cb494bdff433e2afe51a9d44ecf42af to your computer and use it in GitHub Desktop.
Save peterk87/8cb494bdff433e2afe51a9d44ecf42af to your computer and use it in GitHub Desktop.
Create coverage plots for multiple genomes from "samtools depth" tabular outputs
#!/usr/bin/env python
from pathlib import Path
import logging
import typer
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import pylab
from rich.logging import RichHandler
logger = logging.getLogger(__name__)
def read_depths(fpath: Path, name: str = None) -> pd.DataFrame:
df = pd.read_table(fpath, names=["reference", "pos", "depth"], header=None)
if name:
df["genome"] = name
else:
df["genome"] = fpath.stem
return df
def main(
depths_dir: Path,
depth_file_pattern: str = typer.Option(
"**/*.tsv", help="samtools depth file glob pattern"
),
width: float = typer.Option(16.0, help="Output figure width"),
height: float = typer.Option(10.0, help="Output figure height"),
xlabel: str = typer.Option("SARS-CoV-2 Wuhan-Hu-1 Genome Position"),
output: str = typer.Option(
"covplot.pdf",
help='Output figure file path. ".pdf" for PDF output, ".png" for PNG output',
),
verbose: bool = typer.Option(False, help="Verbose logs"),
):
from rich.traceback import install
install(show_locals=True)
logging.basicConfig(
format="%(message)s",
datefmt="[%Y-%m-%d %X]",
level=logging.INFO if not verbose else logging.DEBUG,
handlers=[RichHandler(rich_tracebacks=True, tracebacks_show_locals=True)],
)
depths_paths = Path(depths_dir)
df_depths = pd.concat(
[read_depths(p, p.stem) for p in depths_paths.glob(depth_file_pattern)]
)
df_depths.sort_values(["genome", "pos"], inplace=True, ascending=True)
samples = df_depths.genome.unique()
logger.info(f"Read depths for {len(samples)}")
df_depths_log = df_depths.copy()
df_depths_log.depth = np.log10(df_depths_log.depth)
df_depths_log_pivot = pd.pivot(
df_depths_log, index="genome", columns="pos", values="depth"
)
sns.set_context("talk", font_scale=0.8)
fig = plt.figure(figsize=(width, height))
count = 0
pylab.subplots_adjust(hspace=0.1)
n_samples = df_depths_log_pivot.index.size
gs = gridspec.GridSpec(n_samples * 4, 1)
for idx in samples:
row = df_depths_log_pivot.loc[idx, :]
ax1 = fig.add_subplot(gs[count : count + 3, 0])
ax1.set_ylim(top=df_depths_log_pivot.values.max())
ax1.set_xlim(left=1, right=row.size)
ax1.set_yticks([0, 1, 2, 3, 4])
ax1.set_yticklabels([1, 10, 100, 1000, 10000])
ax1.fill_between(np.arange(row.size), row, 0, color="steelblue")
ax1.set_xticks([])
ax1.set_xticklabels([])
ax1.set_ylabel(idx, rotation=0, labelpad=70, fontdict=dict(fontsize=16))
for edge, spine in ax1.spines.items():
spine.set_visible(False)
ax2 = fig.add_subplot(gs[count + 3, 0])
arr = row.values.copy()
arr[arr > 1.0] = 2
arr[(arr > -np.inf) & (arr <= 1.0)] = 1
arr[arr == -np.inf] = 0
arr.shape = (1, arr.shape[0])
im = ax2.imshow(arr, aspect="auto", cmap=plt.cm.Greys_r)
ax2.set_yticks([])
ax2.set_yticklabels([])
for edge, spine in ax2.spines.items():
spine.set_visible(False)
count += 4
if count < n_samples * 4:
ax2.set_xticks([])
ax2.set_xticklabels([])
else:
ticks = list(range(0, row.values.size, 2000))
ticks[0] = 1
mticks = range(1000, row.values.size, 500)
ax2.set_xticks(ticks)
ax2.set_xticks(mticks, minor=True)
ax2.set_xlabel(xlabel, fontdict=dict(fontsize=16), labelpad=15)
plt.savefig(output)
logger.info(f'Saved covplot to "{output}"')
if __name__ == "__main__":
typer.run(main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment