-
-
Save ppliu/1914ddeccfbabb7810e8 to your computer and use it in GitHub Desktop.
single_cell_k_means
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set group colors | |
k_means_colors = {} | |
# run k-means clustering | |
km = KMeans(init='k-means++', n_clusters=3, n_init=100) | |
km.fit(log(expr_df.transpose())) | |
# determine group membership | |
km_membership = dict(zip(expr_df.columns, km.labels_)) | |
km_groups = defaultdict(list) | |
for sample in km_membership: | |
km_groups[km_membership[sample]].append(sample[0]) | |
# determine group outliers based on majority membership of cell type | |
km_outliers = [] | |
km_colors = [] | |
group_to_color = {} | |
for label in km_groups: | |
lst = km_groups[label] | |
most_common = max(set(lst), key=lst.count) | |
if most_common == 'M': | |
group_to_color[label] = 'red' | |
elif most_common == 'N': | |
group_to_color[label] = 'green' | |
elif most_common == 'P': | |
group_to_color[label] = 'blue' | |
for sample in km_membership: | |
if sample[0] != most_common and km_membership[sample] == label: | |
km_outliers.append(sample) | |
km_colors = [group_to_color[label] for label in km.labels_] | |
km_colors_dict = dict(zip(expr_df.transpose().index, km_colors)) | |
# color outliers red | |
for outlier in km_outliers: | |
km_colors_dict[outlier] = 'magenta' | |
_=plot_pca(log(expr_df.transpose()), \ | |
figsize=(20, 20), \ | |
colors_dict=km_colors_dict, \ | |
markers_dict=markers_dict, \ | |
num_vectors=50, \ | |
column_ids_dict=ens_to_gene_sym, \ | |
show_point_labels=False, \ | |
show_vectors=True,\ | |
vector_width=1, \ | |
marker_size=200, \ | |
save_as='pca_kmeans.pdf', \ | |
save_format='pdf') | |
print 'outliers', km_outliers | |
sample_ids_to_celltype = dict((sample_id, celltype_to_long_name(celltype)) for celltype, | |
sample_ids in celltype_to_ids.iteritems() for sample_id in sample_ids) | |
fig, axes = plt.subplots(ncols=5, nrows=4, figsize=(20,20)) | |
for num, (vector, ax) in enumerate(zip(_, axes.flatten())): | |
gene_name = vector[2] | |
try: | |
series = expr_df.ix[gene_sym_to_ens[gene_name]] | |
except: | |
pass | |
sns.violinplot((series), sample_ids_to_celltype, \ | |
color=['blue', 'green', 'red', 'pink', 'darkgrey'], \ | |
inner='points', \ | |
order=['iPSCs', 'NPCs', 'Motor Neurons'], \ | |
alpha=0.75, \ | |
bw=(max(series)-min(series))/50, \ | |
linewidth=0, ax=ax) | |
#ax.set_yscale('log') | |
#ax.set_ylim(0.001) | |
ax.set_title(gene_name) | |
sns.despine() | |
fig.tight_layout() | |
# fig.savefig('vp_all_gencode.pdf') | |
group1 = ['POU5F1', 'PAX6', 'KLF4', 'IKZF1', 'IKZF2', 'IKZF3', 'IKZF4', 'IKZF5', 'ISL1', 'ATOH1'] | |
group2 = ['NLGN4X', 'NRXN1', 'IGF2BP1', 'IGF2BP2', 'IGF2BP3', 'MYL6', 'NOVA1', 'NOVA2', 'MBNL1', \ | |
'MBNL2', 'MBNL3', 'PTBP1', 'RBFOX2'] | |
fig, axes = plt.subplots(ncols=5, nrows=2, figsize=(20,12)) | |
for gene_name, ax in zip(group2, axes.flatten()): | |
gene_sym = gene_name | |
try: | |
gene_name = gene_sym_to_ens[gene_name] | |
except Exception as e: | |
pass | |
print gene_name | |
series = expr_df.ix[gene_name] | |
sns.violinplot((series), sample_ids_to_celltype, \ | |
color=['blue', 'green', 'red', 'pink', 'darkgrey'], \ | |
inner='points', \ | |
order=['iPSCs', 'NPCs', 'Motor Neurons'], \ | |
alpha=0.75, \ | |
bw=(max(series)-min(series))/50, \ | |
linewidth=0, ax=ax) | |
#ax.set_yscale('log') | |
#ax.set_ylim(0.001) | |
ax.set_title(gene_sym) | |
sns.despine() | |
fig.tight_layout() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment