Skip to content

Instantly share code, notes, and snippets.

@ethanagb
Created February 12, 2022 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ethanagb/c8080dc20b3d060b9b44f153d1f8bf9e to your computer and use it in GitHub Desktop.
Save ethanagb/c8080dc20b3d060b9b44f153d1f8bf9e to your computer and use it in GitHub Desktop.
import numpy as np
from sympy import root
import simulate_classes as sim
import pandas as pd
from numpy.random import default_rng
rng = default_rng(512)
A = np.load('/Users/ethan/Dropbox (MIT)/Lab/spatial-data-generation_brokengit/circle_packing/denis_tissue_20211217/A_2021-12-17-145404.npy')
C = np.load('/Users/ethan/Dropbox (MIT)/Lab/spatial-data-generation_brokengit/circle_packing/denis_tissue_20211217/C_2021-12-17-145404.npy')
rootdir = './denis_tissue_20211217/'
Bnames = ['tissue1_B.npy','tissue2_B.npy','tissue3_B.npy','tissue4_B.npy',
'tissue5_1-1+1-3_B.npy','tissue6.npy','tissue7.npy']
n_cells = A.shape[0] * 4 #very lazy loading to make sure I have enough of each type (since didn't initially set this up to specify cell types)
n_classes = 4
n_genes = 100
n_informative = 75
#X, y = datasets.make_classification(n_samples=n_cells, n_features=n_genes, n_informative=n_informative, n_classes=n_classes, shift=1)
#Generate once to hold constant.
means, variances = sim.generate_means(n_genes, n_informative)
X, y = sim.generate_data(n_cells, n_genes, n_classes, means, variances)
for b in Bnames:
basename = b.split('.')[0]
B = np.load(rootdir + b)
types = np.argmax(B, axis=1)
#initialize empty array of x, y, markers
data = np.empty((A.shape[0], n_genes + 4))
for t in range(0, n_classes):
#for each type
idx, = np.where(types == t) #which cells are assigned that type.
type_markers = X[np.where(y == t), :][0]
assigned_markers = type_markers[:len(idx), :]
for i in range(0, len(idx)):
cell_id = idx[i]
cell_marker_set = assigned_markers[i]
data[cell_id, 4:] = cell_marker_set
data[:, 2:4] = C
data[:, 1] = types
data[:, 0] = np.array([k for k in range(0, A.shape[0])])
cols = ['cell_id', 'cell_type', 'x', 'y']
for j in range(0, n_genes):
gene_name = 'gene_' + str(j)
cols.append(gene_name)
df = pd.DataFrame(data, columns=cols)
df.to_csv(rootdir + 'markers_20220211/' + basename + '_markers.csv')
np.savetxt(rootdir + 'markers_20220211/' + 'variance.csv', variances, delimiter=',')
np.savetxt(rootdir + 'markers_20220211/' + 'celltype_means.csv', means, delimiter=',')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment