Skip to content

Instantly share code, notes, and snippets.

@sminot
Created June 26, 2020 15:29
Show Gist options
  • Save sminot/099630a5e44df7e821b7fe7d29179672 to your computer and use it in GitHub Desktop.
Save sminot/099630a5e44df7e821b7fe7d29179672 to your computer and use it in GitHub Desktop.
Plot CAG summary figures from geneshot results HDF5
# Plot the distribution of CAG sizes
def plot_cag_size(hdf_fp, pdf=None, min_size=5, alpha=0.25):
cag_annot = pd.read_hdf(hdf_fp, "/annot/cag/all").set_index("CAG")
# Calculate the log10 size (number of genes per CAG)
cag_annot = cag_annot.assign(
size_log10 = cag_annot["size"].apply(np.log10)
)
# Filter by CAG size
if min_size is not None and min_size >= 1:
cag_annot = cag_annot.query(
"size >= {}".format(min_size)
)
# Make a weighted histogram of CAG sizes (weighted by CAG size)
cag_annot["size_log10"].hist(
weights=cag_annot["size"],
bins=100,
linewidth=0,
)
plt.xlabel("CAG size (number of genes)")
plt.ylabel("Total number of genes per bin")
if pdf is not None:
pdf.savefig(bbox_inches="tight")
plt.show()
for col_name, axis_label in [
("mean_abundance", "Mean Abundance"),
("entropy", "Entropy"),
("prevalence", "Prevalence"),
]:
g = sns.scatterplot(
data=cag_annot,
x="size_log10",
y=col_name,
linewidth=0,
alpha=alpha
)
plt.ylabel(axis_label)
plt.xlabel("CAG size (number of genes)")
if pdf is not None:
pdf.savefig(bbox_inches="tight")
plt.show()
g = sns.jointplot(
data=cag_annot,
x="size_log10",
y=col_name,
kind="hex",
height=4,
)
g.set_axis_labels("CAG size (number of genes)", axis_label)
if pdf is not None:
pdf.savefig(bbox_inches="tight")
plt.show()
plot_cag_size(hdf_fp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment