Skip to content

Instantly share code, notes, and snippets.

@ppliu
Created April 7, 2014 18:15
Show Gist options
  • Save ppliu/1914ddeccfbabb7810e8 to your computer and use it in GitHub Desktop.
Save ppliu/1914ddeccfbabb7810e8 to your computer and use it in GitHub Desktop.
single_cell_k_means
# set group colors
k_means_colors = {}
# run k-means clustering
km = KMeans(init='k-means++', n_clusters=3, n_init=100)
km.fit(log(expr_df.transpose()))
# determine group membership
km_membership = dict(zip(expr_df.columns, km.labels_))
km_groups = defaultdict(list)
for sample in km_membership:
km_groups[km_membership[sample]].append(sample[0])
# determine group outliers based on majority membership of cell type
km_outliers = []
km_colors = []
group_to_color = {}
for label in km_groups:
lst = km_groups[label]
most_common = max(set(lst), key=lst.count)
if most_common == 'M':
group_to_color[label] = 'red'
elif most_common == 'N':
group_to_color[label] = 'green'
elif most_common == 'P':
group_to_color[label] = 'blue'
for sample in km_membership:
if sample[0] != most_common and km_membership[sample] == label:
km_outliers.append(sample)
km_colors = [group_to_color[label] for label in km.labels_]
km_colors_dict = dict(zip(expr_df.transpose().index, km_colors))
# color outliers red
for outlier in km_outliers:
km_colors_dict[outlier] = 'magenta'
_=plot_pca(log(expr_df.transpose()), \
figsize=(20, 20), \
colors_dict=km_colors_dict, \
markers_dict=markers_dict, \
num_vectors=50, \
column_ids_dict=ens_to_gene_sym, \
show_point_labels=False, \
show_vectors=True,\
vector_width=1, \
marker_size=200, \
save_as='pca_kmeans.pdf', \
save_format='pdf')
print 'outliers', km_outliers
sample_ids_to_celltype = dict((sample_id, celltype_to_long_name(celltype)) for celltype,
sample_ids in celltype_to_ids.iteritems() for sample_id in sample_ids)
fig, axes = plt.subplots(ncols=5, nrows=4, figsize=(20,20))
for num, (vector, ax) in enumerate(zip(_, axes.flatten())):
gene_name = vector[2]
try:
series = expr_df.ix[gene_sym_to_ens[gene_name]]
except:
pass
sns.violinplot((series), sample_ids_to_celltype, \
color=['blue', 'green', 'red', 'pink', 'darkgrey'], \
inner='points', \
order=['iPSCs', 'NPCs', 'Motor Neurons'], \
alpha=0.75, \
bw=(max(series)-min(series))/50, \
linewidth=0, ax=ax)
#ax.set_yscale('log')
#ax.set_ylim(0.001)
ax.set_title(gene_name)
sns.despine()
fig.tight_layout()
# fig.savefig('vp_all_gencode.pdf')
group1 = ['POU5F1', 'PAX6', 'KLF4', 'IKZF1', 'IKZF2', 'IKZF3', 'IKZF4', 'IKZF5', 'ISL1', 'ATOH1']
group2 = ['NLGN4X', 'NRXN1', 'IGF2BP1', 'IGF2BP2', 'IGF2BP3', 'MYL6', 'NOVA1', 'NOVA2', 'MBNL1', \
'MBNL2', 'MBNL3', 'PTBP1', 'RBFOX2']
fig, axes = plt.subplots(ncols=5, nrows=2, figsize=(20,12))
for gene_name, ax in zip(group2, axes.flatten()):
gene_sym = gene_name
try:
gene_name = gene_sym_to_ens[gene_name]
except Exception as e:
pass
print gene_name
series = expr_df.ix[gene_name]
sns.violinplot((series), sample_ids_to_celltype, \
color=['blue', 'green', 'red', 'pink', 'darkgrey'], \
inner='points', \
order=['iPSCs', 'NPCs', 'Motor Neurons'], \
alpha=0.75, \
bw=(max(series)-min(series))/50, \
linewidth=0, ax=ax)
#ax.set_yscale('log')
#ax.set_ylim(0.001)
ax.set_title(gene_sym)
sns.despine()
fig.tight_layout()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment