-
-
Save ethanagb/c8080dc20b3d060b9b44f153d1f8bf9e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sympy import root | |
import simulate_classes as sim | |
import pandas as pd | |
from numpy.random import default_rng | |
rng = default_rng(512) | |
A = np.load('/Users/ethan/Dropbox (MIT)/Lab/spatial-data-generation_brokengit/circle_packing/denis_tissue_20211217/A_2021-12-17-145404.npy') | |
C = np.load('/Users/ethan/Dropbox (MIT)/Lab/spatial-data-generation_brokengit/circle_packing/denis_tissue_20211217/C_2021-12-17-145404.npy') | |
rootdir = './denis_tissue_20211217/' | |
Bnames = ['tissue1_B.npy','tissue2_B.npy','tissue3_B.npy','tissue4_B.npy', | |
'tissue5_1-1+1-3_B.npy','tissue6.npy','tissue7.npy'] | |
n_cells = A.shape[0] * 4 #very lazy loading to make sure I have enough of each type (since didn't initially set this up to specify cell types) | |
n_classes = 4 | |
n_genes = 100 | |
n_informative = 75 | |
#X, y = datasets.make_classification(n_samples=n_cells, n_features=n_genes, n_informative=n_informative, n_classes=n_classes, shift=1) | |
#Generate once to hold constant. | |
means, variances = sim.generate_means(n_genes, n_informative) | |
X, y = sim.generate_data(n_cells, n_genes, n_classes, means, variances) | |
for b in Bnames: | |
basename = b.split('.')[0] | |
B = np.load(rootdir + b) | |
types = np.argmax(B, axis=1) | |
#initialize empty array of x, y, markers | |
data = np.empty((A.shape[0], n_genes + 4)) | |
for t in range(0, n_classes): | |
#for each type | |
idx, = np.where(types == t) #which cells are assigned that type. | |
type_markers = X[np.where(y == t), :][0] | |
assigned_markers = type_markers[:len(idx), :] | |
for i in range(0, len(idx)): | |
cell_id = idx[i] | |
cell_marker_set = assigned_markers[i] | |
data[cell_id, 4:] = cell_marker_set | |
data[:, 2:4] = C | |
data[:, 1] = types | |
data[:, 0] = np.array([k for k in range(0, A.shape[0])]) | |
cols = ['cell_id', 'cell_type', 'x', 'y'] | |
for j in range(0, n_genes): | |
gene_name = 'gene_' + str(j) | |
cols.append(gene_name) | |
df = pd.DataFrame(data, columns=cols) | |
df.to_csv(rootdir + 'markers_20220211/' + basename + '_markers.csv') | |
np.savetxt(rootdir + 'markers_20220211/' + 'variance.csv', variances, delimiter=',') | |
np.savetxt(rootdir + 'markers_20220211/' + 'celltype_means.csv', means, delimiter=',') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment