Last active
August 29, 2015 14:12
-
-
Save afrendeiro/7425a73e808dc90449ed to your computer and use it in GitHub Desktop.
Playing with CellProfiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env/python | |
import os | |
import h5py | |
import pandas as pd | |
projectDir = '/home/afr/workspace/cellprofiler/' | |
# open hdf5 | |
hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r') | |
# get experiment name | |
experiment = str(hdf5['/Measurements'].items()[0][0]) | |
objects = ['FilteredNuclei', 'Cells', 'Cytoplasm', 'Mitochondria'] | |
measurementsToSkip = [ | |
"ObjectNumber", | |
"Neighbors_FirstClosestObjectNumber_Adjacent", | |
"Neighbors_SecondClosestObjectNumber_Adjacent" | |
] | |
for obj in objects: | |
# initialize dataframe | |
df = pd.DataFrame() | |
measurements = hdf5['/Measurements/' + experiment + '/' + obj].items() | |
# prepare output directory | |
outputDir = projectDir + experiment + '-measurements/' | |
if not os.path.exists(outputDir): | |
os.makedirs(outputDir) | |
# iterate through measurements and append new data to dataframe as columns | |
for meas in measurements: | |
meas = meas[0] | |
if meas not in measurementsToSkip: | |
data = hdf5['/Measurements/' + experiment + '/' + obj][meas]['data'].value | |
tmpDF = pd.DataFrame({meas : data}) | |
df[meas] = hdf5['/Measurements/' + experiment + '/' + obj][meas]['data'].value | |
# output as csv | |
df.to_csv(outputDir + obj + '.csv') | |
# Get relashionships | |
obj = 'Relationship' | |
relationships = hdf5['/Measurements/' + experiment + '/' + obj] | |
df = pd.DataFrame() | |
df['Cells-Cells-Neighbours_ObjectNumber_First'] = relationships['12']['Neighbors']['Cells']['Cells']['ObjectNumber_First'].value | |
df['Cells-Cells-Neighbours_ObjectNumber_Second'] = relationships['12']['Neighbors']['Cells']['Cells']['ObjectNumber_Second'].value | |
df.to_csv(outputDir + 'Cells-Cells-Neighbours' + '.csv') | |
df = pd.DataFrame() | |
df['Cells-Mitochondria-Parent_ObjectNumber_First'] = relationships['13']['Parent']['Cells']['Mitochondria']['ObjectNumber_First'].value | |
df['Cells-Mitochondria-Parent_ObjectNumber_Second'] = relationships['13']['Parent']['Cells']['Mitochondria']['ObjectNumber_Second'].value | |
df.to_csv(outputDir + 'Cells-Mitochondria-Parent' + '.csv') | |
df = pd.DataFrame() | |
df['Nuclei-Cells-Parent_ObjectNumber_First'] = relationships['7']['Parent']['Nuclei']['Cells']['ObjectNumber_First'].value | |
df['Nuclei-Cells-Parent_ObjectNumber_Second'] = relationships['7']['Parent']['Nuclei']['Cells']['ObjectNumber_Second'].value | |
df.to_csv(outputDir + 'Nuclei-Cells-Parent' + '.csv') | |
df = pd.DataFrame() | |
df['Cells-Cytoplasm-Parent_ObjectNumber_First'] = relationships['8']['Parent']['Cells']['Cytoplasm']['ObjectNumber_First'].value | |
df['Cells-Cytoplasm-Parent_ObjectNumber_Second'] = relationships['8']['Parent']['Cells']['Cytoplasm']['ObjectNumber_Second'].value | |
df.to_csv(outputDir + 'Cells-Cytoplasm-Parent' + '.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env/python | |
import os | |
import h5py | |
import pandas as pd | |
import string | |
import math | |
import scipy.stats as stats | |
import pylab | |
import matplotlib.pyplot as plt | |
import matplotlib.cm as cm | |
import matplotlib | |
import mlpy | |
projectDir = '/home/afr/workspace/cellprofiler/2014-10-21-16-10-40-measurements/' | |
# change font of plots to look like LATEX | |
#matplotlib.rc('font', family='serif') | |
#matplotlib.rc('text', usetex='false') | |
#matplotlib.rcParams.update({'font.size': 14}) | |
def imageNumber2wellNumber(image): | |
""" | |
Calculates the index of the well from which an image belongs. | |
""" | |
return int(np.around((int(image) + 1)/4.)) if int(image) != 1 else 1 | |
def alphanumeric2numeric(well): | |
""" | |
Converts a well from alphanumeric (e.g. 'B06') to numeric (e. g. 30). | |
""" | |
return ((string.ascii_uppercase.index(well[0])) * 24) + int(well[1:]) | |
def wellNumber2Position(well): | |
""" | |
Calculates well position in plate (e. g. x, y = (2, 6) from its numeric index. | |
""" | |
row = 1 + math.trunc((well - 1) / 24) | |
col = ((well - 1) % 24) + 1 | |
return (row, col) | |
def normalizeWells(wells, measurement): | |
""" | |
Requires dataframe column with Measurements, indexed with well number. | |
Returns same dataframe normalized (subtraction) by row and column. | |
""" | |
normalized = [] | |
for well in range(1, 385): | |
# get median of all wells in row | |
row = wells.ix[well]['rowIndex'] | |
col = wells.ix[well]['colIndex'] | |
rowMeasurement = list(wells[(wells['rowIndex'] == row) & (wells['drugType'] != 'PosCon') & (wells['drugType'] != np.NaN)][measurement]) | |
colMeasurement = list(wells[(wells['colIndex'] == row) & (wells['drugType'] != 'PosCon') & (wells['drugType'] != np.NaN)][measurement]) | |
# normalize by row and col | |
#normalized.append(wells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) | |
# normalize by row and col and globally | |
#normalized.append((wells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) / np.median(wells[measurement])) | |
# normalize by row and col and globally and logaritmize | |
#normalized.append(log2((wells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) / np.median(wells[measurement]))) | |
# normalize by row and col and globally and logaritmize | |
normalized.append(log2(wells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement) + 1)) | |
wells[measurement + 'Normalized'] = normalized | |
return wells | |
def normalizeCells(cells, measurement): | |
""" | |
Requires dataframe column with Measurements, indexed with well number. | |
Returns same dataframe normalized (subtraction) by row and column. | |
""" | |
normalized = [] | |
for cell in range(len(cells)): | |
well = cells['wellNumber'][cell] | |
row, col = wellNumber2Position(well) | |
# get median of all cells in row | |
rowMeasurement = list(cells[(cells['rowIndex'] == row) & (cells['drugType'] != 'PosCon') & (cells['drugType'] != np.NaN)][measurement]) | |
colMeasurement = list(cells[(cells['colIndex'] == col) & (cells['drugType'] != 'PosCon') & (cells['drugType'] != np.NaN)][measurement]) | |
# normalize by row and col | |
#normalized.append(cells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) | |
# normalize by row and col and globally | |
#normalized.append((cells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) / np.median(cells[measurement])) | |
# normalize by row and col and globally and logaritmize | |
#normalized.append(log2((cells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) / np.median(cells[measurement]))) | |
# normalize by row and col and globally and logaritmize | |
normalized.append(cells.ix[well][measurement] - np.median(rowMeasurement + colMeasurement)) | |
cells[measurement + 'Normalized'] = normalized | |
return cells | |
def zFactor(meanPos, meanNeg, stdPos, stdNeg): | |
""" | |
Calculates the Z-factor of measure for a plate based on controls (pos and neg). | |
""" | |
return 1 - (3 * (stdPos + stdNeg) / abs(meanPos - meanNeg)) | |
### read in csvs | |
nuclei = pd.io.parsers.read_csv(projectDir + 'FilteredNuclei.csv') | |
cells = pd.io.parsers.read_csv(projectDir + 'Cells.csv') | |
Cytoplasm = pd.io.parsers.read_csv(projectDir + 'Cytoplasm.csv') | |
mitochondria = pd.io.parsers.read_csv(projectDir + 'Mitochondria.csv') | |
### read in plate metadata | |
metadata = pd.io.parsers.read_csv(projectDir + '20140206_transferlist_001_064_annotated_PLATE_1315001.csv') | |
# remember which wells are used | |
usedWells = [] | |
for well in metadata['DestWell']: | |
usedWells.append(alphanumeric2numeric(well)) | |
metadata.index = usedWells | |
# fill in missing rows with NaNs | |
for row in range(1, 385): | |
if row not in metadata.index: | |
metadata.ix[row] = np.NaN | |
metadata.sort() | |
# convert well number to numeric | |
rowIndex = [] | |
colIndex = [] | |
for well in range(1, 385): | |
x, y = wellNumber2Position(well) | |
rowIndex.append(x) | |
colIndex.append(y) | |
metadata['rowIndex'] = rowIndex | |
metadata['colIndex'] = colIndex | |
### Annotate cells with well number, coordinates, drug type | |
wellN = []; drugType = []; rowIndex = []; colIndex = [] | |
for i in range(len(cells['ImageNumber'])): | |
well = imageNumber2wellNumber(cells.ix[i]['ImageNumber']) | |
row, col = wellNumber2Position(well) | |
wellN.append(well) | |
rowIndex.append(row) | |
colIndex.append(col) | |
drugType.append(cellMeans.ix[well]['drugType']) | |
cells['wellNumber'] = wellN; cells['drugType'] = drugType; cells['rowIndex'] = rowIndex; cells['colIndex'] = colIndex | |
##################### Analyze with averaged measurements per well | |
measurements = ['cellNumber', 'cellAreaMean', 'cellAreaMeanStd', 'cellPerimeterMean', 'cellPerimeterStd', 'cellFormFactorMean', 'cellFormFactorStd', 'cellEccentricityMean', 'cellEccentricityStd'] | |
### get drug type metadata | |
cellMeans = metadata[['rowIndex', 'colIndex','Type_A']] | |
cellMeans = cellMeans.rename(columns = {'Type_A' : 'drugType'}) | |
### Get average and std for each measurement | |
for well in range(1, 385): | |
cellsWell = cells[cells['wellNumber'] == well] | |
if measurement == 'cellNumber': | |
cellMeans[measurement] = len(cellsWell) | |
else: | |
cellMeans[measurement + 'Mean'] = np.mean(cellsWell) | |
cellMeans[measurement + 'Std'] = np.std(cellsWell) | |
### plot measurements | |
measurements = ['cellNumber', 'cellAreaMean', 'cellAreaMeanStd', 'cellPerimeterMean', 'cellPerimeterStd', 'cellFormFactorMean', 'cellFormFactorStd', 'cellEccentricityMean', 'cellEccentricityStd'] | |
for measurement in measurements: | |
# normalize measurements | |
cellMeans = normalizeWells(cellMeans, measurement) | |
# plot plate heatmap | |
# create np array | |
for row in range(1, 17): | |
if row == 1: | |
measurementArray = np.array([cellMeans[cellMeans['rowIndex'] == row][measurement]]) | |
measurementArrayNormalized = np.array([cellMeans[cellMeans['rowIndex'] == row][measurement + 'Normalized']]) | |
else: | |
measurementArray = np.append(measurementArray, [cellMeans[cellMeans['rowIndex'] == row][measurement]], axis = 0) | |
measurementArrayNormalized = np.append(measurementArrayNormalized, [cellMeans[cellMeans['rowIndex'] == row][measurement + 'Normalized']], axis = 0) | |
### calculate z-factor | |
meanPos = np.mean(cellMeans[cellMeans['drugType'] == 'PosCon'][measurement]) | |
meanNeg = np.mean(cellMeans[cellMeans['drugType'] == 'DMSO'][measurement]) | |
stdPos = np.std(cellMeans[cellMeans['drugType'] == 'PosCon'][measurement]) | |
stdNeg = np.std(cellMeans[cellMeans['drugType'] == 'DMSO'][measurement]) | |
z = zFactor(meanPos, meanNeg, stdPos, stdNeg) | |
### Plot | |
# unnormalized | |
plt.subplot(2, 1, 1) | |
plt.title(measurement + ' per well') | |
plt.xlabel('Z-factor: %s' % z) | |
plt.pcolor(measurementArray, vmin=cellMeans[measurement].min(), vmax=cellMeans[measurement].max()) | |
plt.colorbar() | |
plt.xticks(np.arange(0, 24) + 0.5, range(1, 25)) | |
plt.yticks(np.arange(0, 16) + 0.5, range(1, 17)) | |
plt.xlim(0, 24); plt.ylim(0, 16) | |
# normalized | |
meanPos = np.mean(cellMeans[cellMeans['drugType'] == 'PosCon'][measurement + 'Normalized']) | |
meanNeg = np.mean(cellMeans[cellMeans['drugType'] == 'DMSO'][measurement + 'Normalized']) | |
stdPos = np.std(cellMeans[cellMeans['drugType'] == 'PosCon'][measurement + 'Normalized']) | |
stdNeg = np.std(cellMeans[cellMeans['drugType'] == 'DMSO'][measurement + 'Normalized']) | |
z = zFactor(meanPos, meanNeg, stdPos, stdNeg) | |
plt.subplot(2, 1, 2) | |
plt.title(measurement + ' per well') | |
plt.xlabel('Z-factor: %s' % z) | |
plt.pcolor(measurementArrayNormalized, vmin=cellMeans[measurement + 'Normalized'].min(), vmax=cellMeans[measurement + 'Normalized'].max()) | |
plt.colorbar() | |
plt.xticks(np.arange(0, 24) + 0.5, range(1, 25)) | |
plt.yticks(np.arange(0, 16) + 0.5, range(1, 17)) | |
plt.xlim(0, 24); plt.ylim(0, 16) | |
plt.subplots_adjust(hspace=.5) | |
plt.savefig(projectDir + "plots/" + measurement + "Plate.png", dpi = 600, bbox_inches = 'tight') | |
plt.close() | |
#### plot cell number per well | |
plt.subplot(2, 1, 1) | |
plt.scatter( | |
y = cellMeans[cellMeans['drugType'] == 'Cpd'][measurement], | |
x = cellMeans[cellMeans['drugType'] == 'Cpd'].index, | |
c = 'red', | |
label = 'compounds' | |
) | |
plt.scatter( | |
y = cellMeans[cellMeans['drugType'] == 'DMSO'][measurement], | |
x = cellMeans[cellMeans['drugType'] == 'DMSO'].index, | |
c = 'blue', | |
label = 'DMSO' | |
) | |
plt.scatter( | |
y = cellMeans[cellMeans['drugType'] == 'PosCon'][measurement], | |
x = cellMeans[cellMeans['drugType'] == 'PosCon'].index, | |
c = 'green', | |
label = 'positive' | |
) | |
plt.title(measurement + ' per well'); plt.ylabel(measurement); plt.xlabel('Well n.') | |
plt.axis([cellMeans.index[0], cellMeans.index[-1], cellMeans[measurement].min(), cellMeans[measurement].max()]) | |
plt.subplot(2, 1, 2) | |
plt.scatter( | |
y = cellMeans[cellMeans['drugType'] == 'Cpd'][measurement + 'Normalized'], | |
x = cellMeans[cellMeans['drugType'] == 'Cpd'].index, | |
c = 'red', | |
label = 'compounds' | |
) | |
plt.scatter( | |
y = cellMeans[cellMeans['drugType'] == 'DMSO'][measurement + 'Normalized'], | |
x = cellMeans[cellMeans['drugType'] == 'DMSO'].index, | |
c = 'blue', | |
label = 'DMSO' | |
) | |
plt.scatter( | |
y = cellMeans[cellMeans['drugType'] == 'PosCon'][measurement + 'Normalized'], | |
x = cellMeans[cellMeans['drugType'] == 'PosCon'].index, | |
c = 'green', | |
label = 'positive' | |
) | |
plt.title('Normalized ' + measurement + ' per well'); plt.ylabel('Normalized ' + measurement); plt.xlabel('Well n.') | |
plt.axis([cellMeans.index[0], cellMeans.index[-1], cellMeans[measurement + 'Normalized'].min(), cellMeans[measurement + 'Normalized'].max()]) | |
plt.subplots_adjust(hspace=.5) | |
plt.legend(bbox_to_anchor=(1.05, -0.05), loc='best', borderaxespad=0.) | |
plt.savefig(projectDir + "plots/" + measurement + "_perWell.png", dpi = 600, bbox_inches='tight') | |
plt.close() | |
# plot cell Area vs Perimeter per well | |
#plt.scatter( | |
# x = cellMeans[cellMeans['drugType'] == 'Cpd']['cellAreaMean'], | |
# y = cellMeans[cellMeans['drugType'] == 'Cpd']['cellPerimeterMean'], | |
# c = 'red', | |
# label = 'compounds' | |
#) | |
#plt.scatter( | |
# x = cellMeans[cellMeans['drugType'] == 'DMSO']['cellAreaMean'], | |
# y = cellMeans[cellMeans['drugType'] == 'DMSO']['cellPerimeterMean'], | |
# c = 'blue', | |
# label = 'DMSO' | |
#) | |
#plt.scatter( | |
# x = cellMeans[cellMeans['drugType'] == 'PosCon']['cellAreaMean'], | |
# y = cellMeans[cellMeans['drugType'] == 'PosCon']['cellPerimeterMean'], | |
# c = 'green', | |
# label = 'positive' | |
#) | |
#plt.legend(); plt.title('Cell area vs Perimeter'); plt.xlabel('Cell area'); plt.ylabel('Cell perimeter') | |
#plt.xlim(0, 10000); plt.ylim(100,450) | |
#plt.savefig(projectDir + "/plots/cellAreaPerimeter.png", dpi = 600) | |
######################### SINGLE-CELL STUFF | |
### Normalize | |
cells = normalizeCells(cells, 'AreaShape_Area') | |
### Plot scatter for all cells in all wells | |
# colors according to drug type | |
colors = [] | |
for i in cellMeans.drugType: | |
if i == 'Cpd': | |
colors.append('r') | |
elif i == 'DMSO': | |
colors.append('b') | |
elif i == 'PosCon': | |
colors.append('g') | |
else: | |
colors.append('r') | |
# plot density | |
plt.subplot(2, 1, 1) | |
for well in range(1, 385): | |
# get cells from well | |
cellsWell = cells[cells['wellNumber'] == well] | |
density = gaussian_kde(cellsWell[measurement]) | |
xs = np.linspace(np.min(cellsWell[measurement]), np.max(cellsWell[measurement]), len(cellsWell[measurement])) | |
#density.covariance_factor = lambda : .25 | |
#density._compute_covariance() | |
plt.plot(xs, density(xs), c = colors[well - 1], label = cellMeans.ix[well]['drugType']) | |
plt.subplot(2, 1, 2) | |
for well in range(1, 385): | |
# get cells from well | |
cellsWell = cells[cells['wellNumber'] == well] | |
density = gaussian_kde(cellsWell[measurement + 'Normalized']) | |
xs = np.linspace(np.min(cellsWell[measurement + 'Normalized']), np.max(cellsWell[measurement + 'Normalized']), len(cellsWell[measurement + 'Normalized'])) | |
#density.covariance_factor = lambda : .25 | |
#density._compute_covariance() | |
plt.plot(xs, density(xs), c = colors[well - 1], label = cellMeans.ix[well]['drugType']) | |
plt.title(measurement) | |
plt.xlabel(measurement) | |
plt.ylabel('Density') | |
plt.legend() | |
plt.savefig(projectDir + "/plots/AreaShape_Area_density.png", dpi = 600) | |
plt.close() | |
##### Classify nuclei using a SVM | |
# annotate nuclei with well n. and type of drug | |
wellN = [] | |
drugType = [] | |
for i in range(len(nuclei['ImageNumber'])): | |
well = imageNumber2wellNumber(nuclei.ix[i]['ImageNumber']) | |
drug = cellMeans.ix[well]['drugType'] | |
wellN.append(well) | |
drugType.append(drug) | |
nuclei['wellNumber'] = wellN | |
nuclei['drugType'] = drugType | |
# take only nucleous shape measurements | |
nucleiMeasurements = ['AreaShape_Area', 'AreaShape_Center_X', 'AreaShape_Center_Y', 'AreaShape_Compactness', 'AreaShape_Eccentricity', 'AreaShape_EulerNumber', 'AreaShape_Extent', 'AreaShape_FormFactor', 'AreaShape_MajorAxisLength', 'AreaShape_MaxFeretDiameter', 'AreaShape_MaximumRadius', 'AreaShape_MeanRadius', 'AreaShape_MedianRadius', 'AreaShape_MinFeretDiameter', 'AreaShape_MinorAxisLength', 'AreaShape_Orientation', 'AreaShape_Perimeter', 'AreaShape_Solidity', 'AreaShape_Zernike_0_0', 'AreaShape_Zernike_1_1', 'AreaShape_Zernike_2_0', 'AreaShape_Zernike_2_2', 'AreaShape_Zernike_3_1', 'AreaShape_Zernike_3_3', 'AreaShape_Zernike_4_0', 'AreaShape_Zernike_4_2', 'AreaShape_Zernike_4_4', 'AreaShape_Zernike_5_1', 'AreaShape_Zernike_5_3', 'AreaShape_Zernike_5_5', 'AreaShape_Zernike_6_0', 'AreaShape_Zernike_6_2', 'AreaShape_Zernike_6_4', 'AreaShape_Zernike_6_6', 'AreaShape_Zernike_7_1', 'AreaShape_Zernike_7_3', 'AreaShape_Zernike_7_5', 'AreaShape_Zernike_7_7', 'AreaShape_Zernike_8_0', 'AreaShape_Zernike_8_2', 'AreaShape_Zernike_8_4', 'AreaShape_Zernike_8_6', 'AreaShape_Zernike_8_8', 'AreaShape_Zernike_9_1', 'AreaShape_Zernike_9_3', 'AreaShape_Zernike_9_5', 'AreaShape_Zernike_9_7', 'AreaShape_Zernike_9_9'] | |
y = nuclei['drugType'] | |
x = nuclei[nucleiMeasurements] | |
# reduce dimentionality with a PCA | |
pca = mlpy.PCA() # new PCA instance | |
pca.learn(x) # learn from data | |
z = pca.transform(x, k=2) # transform into 2D space | |
# plot pca | |
colors = [] | |
for i in y: | |
if i == 'Cpd': | |
colors.append('r') | |
if i == 'DMSO': | |
colors.append('b') | |
if i == 'PosCon': | |
colors.append('g') | |
plt.scatter(x = z[:, 0], y = z[:, 1], marker = '.', c = colors, alpha = 1) | |
plt.title("PCA on 48 nucleous features") | |
plt.xlabel("First component") | |
plt.ylabel("Second component") | |
plt.savefig(projectDir + "/plots/nucleousPCA.png", dpi = 600) | |
plt.close() | |
# new linear SVM instance | |
linearSVM = mlpy.LibSvm(kernel_type = 'linear') | |
linearSVM.learn(z, y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment