Skip to content

Instantly share code, notes, and snippets.

@adamewing
Created April 16, 2019 01:17
Show Gist options
  • Save adamewing/931cc44d717959073fa5b09078e7e4b3 to your computer and use it in GitHub Desktop.
Save adamewing/931cc44d717959073fa5b09078e7e4b3 to your computer and use it in GitHub Desktop.
Generate violin plots for Stevenson et al.
#!/usr/bin/env python
import sys
import pandas as pd
import numpy as np
from collections import defaultdict as dd
import matplotlib as mpl
# Force matplotlib to not use any Xwindows backend.
mpl.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
import logging
FORMAT = '%(asctime)s %(message)s'
logging.basicConfig(format=FORMAT)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if len(sys.argv) == 2:
cohorts = []
samples = []
for comp in ('C12C14','C13C14'):
logger.info("loading cpm.%s.csv..." % comp)
cohorts.append(pd.read_csv('cpm.%s.csv.gz' % comp, header=0, index_col=0))
logger.info("loading samples.%s.csv..." % comp)
samples.append(pd.read_csv('samples.%s.csv.gz' % comp, header=0, index_col=0))
sc = dd(list)
for i, cpm in enumerate(cohorts):
for tag in cpm.columns:
assert tag in samples[i].index
sc[samples[i]['SubClusterNumbers'][tag]].append(tag)
with open(sys.argv[1]) as _:
for line in _:
gene = line.strip()
logger.info('plotting %s' % gene)
plotdata = dd(dict)
for cluster in ('C12','C13','C14'):
tags = sc[cluster]
for tag in tags:
if cluster in ('C12'):
if cohorts[0][tag].ix[gene] > 0.0:
plotdata[tag]['Cluster'] = cluster
plotdata[tag][gene] = gene
plotdata[tag]['CPM'] = np.log2(cohorts[0][tag].ix[gene]+1)
else:
if cohorts[1][tag].ix[gene] > 0.0:
plotdata[tag]['Cluster'] = cluster
plotdata[tag][gene] = gene
plotdata[tag]['CPM'] = np.log2(cohorts[1][tag].ix[gene]+1)
plotdata = pd.DataFrame.from_dict(plotdata).T
plotdata = pd.DataFrame(plotdata.to_dict())
plt.figure()
ax = sns.violinplot(x="Cluster", y="CPM", data=plotdata, order=['C12', 'C13', 'C14'])
ax.set_title(gene)
fig = ax.figure
outfn = '%s.plotlogcpm.eps' % gene
fig.savefig(outfn, bbox_inches='tight')
plt.clf()
else:
sys.exit('usage: %s <gene name>' % sys.argv[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment