jcasado/ome-tiff-quantification.py

## ome-tiff-quantification.py
# Instructions to run this on one sample:
# conda env create -n quant -f quantification.yml
# conda activate quant
# python quantification.py -M /path/to/cellMask.tif -in /path/to/Sample_01.ome.tif -o /path/to/output/ -ch /path/to/channel_names.csv -c 46
# <julia.casado@helsinki.fi>
import os
import argparse
from pathlib import Path
import csv
import time
import skimage.io
import pandas as pd
import numpy as np
import skimage.measure as measure
import multiprocessing as mp
import tifffile

# Global variables
global CHANNEL_PROPS
CHANNEL_PROPS=['mean_intensity']
global MORPH_PROPS
MORPH_PROPS=['label','centroid','area','eccentricity']
global IMAGEPATH
global CURRENT_MASK
global CHANNELS

def channelQuantification(channel):
    """This function is called inside the multiprocessing Pool.map call
     'IMAGEPATH' and 'CURRENT_MASK'  must be shared resources"""
    # load the channel image
    channel_image_loaded = tifffile.imread(IMAGEPATH, key = channel)
    # first channel for each new mask will also have morphologicla features
    if channel == 0:
        props = MORPH_PROPS + CHANNEL_PROPS
    else:
        props = CHANNEL_PROPS
    properties = measure.regionprops_table(CURRENT_MASK, channel_image_loaded, properties=props)
    result = pd.DataFrame(properties)
    # Default properties: label,mean_intensity,centroid-0,centroid-1,area,eccentricity
    # Should be CellID but Cycif_viewer uses ID
    result.rename(columns={'mean_intensity':CHANNELS[channel], 'label':'ID', 'centroid-0':'X Position', 'centroid-1':'Y Position', 'area':'Area', 'eccentricity':'Eccentricity'},inplace=True)

    return result

def imageQuantification(masks_loaded,threads):
    mask_names = list(masks_loaded.keys())
    #Create empty dictionary to store channel results per mask
    result = {m_name: [] for m_name in mask_names}
    # New pool for each time we modify the global variable: 'CURRENT_MASK'
    pool = mp.Pool(threads)
    res = pool.map(channelQuantification, range(len(CHANNELS)))
    pool.close()
    pool.join()

    result[mask_names[0]] = pd.concat(res,axis=1)
    if len(mask_names) > 1:
        print('THIS PRINT SHOULD NOT SHOW')
        for mask in range(len(mask_names)):
            CURRENT_MASK = masks_loaded[mask_names[mask]]
            # New pool for each time we modify the global variable: 'CURRENT_MASK'
            pool = mp.Pool(threads)
            res = pool.map(channelQuantification, range(len(CHANNELS)))
            pool.close()
            pool.join()
            result[mask] = pd.concat(res)
        #Concatenate all data from all masks to return
        merged_data = pd.concat([result[mask] for mask in mask_names],axis=1)
    else:
        merged_data = result[mask_names[0]]
    return merged_data

def checkChannelNames(channel_names):
    """Check valid and unique names. Expects a single column with no header"""
    channel_names_loaded = pd.read_csv(channel_names, header = None)
    channel_names_loaded.columns = ["marker"]
    channel_names_loaded_list = list(channel_names_loaded.marker)

    #Check for unique marker names -- create new list to store new names
    channel_names_loaded_checked = []
    for idx,val in enumerate(channel_names_loaded_list):
        #Check for unique value
        if channel_names_loaded_list.count(val) > 1:
            #If unique count greater than one, add suffix
            channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))
        else:
            #Otherwise, leave channel name
            channel_names_loaded_checked.append(val)

    channel_names_loaded, channel_names_loaded_list = None, None
    return channel_names_loaded_checked

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--maskPaths','-m',nargs='+')
    parser.add_argument('--imagePath', '-in')
    parser.add_argument('--channelNamesFile','-ch')
    parser.add_argument('--outputFolder','-o')
    parser.add_argument('--threads','-c',type=int)
    args = parser.parse_args()

    t = time.time()
    CHANNELS = checkChannelNames(args.channelNamesFile)
    #Read the masks
    masks_loaded = {}
    for m in args.maskPaths:
        m_full_name = os.path.basename(m)
        m_name = m_full_name.split('.')[0]
        masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')})
    CURRENT_MASK = masks_loaded[list(masks_loaded.keys())[0]] # load first mask to run by default unless there are more
    IMAGEPATH = args.imagePath
    # Call to main function to quantify image
    scdata = imageQuantification(masks_loaded,args.threads)

    #Write output
    output = Path(args.outputFolder)
    im_full_name = os.path.basename(IMAGEPATH)
    im_name = im_full_name.split('.')[0]
    scdata.to_csv(str(Path(os.path.join(str(args.outputFolder),str(im_name+".csv")))),index=False)
    print('Sample {} quantified in {:.2f} seconds'.format(im_name, time.time() - t))

#EOF
	# Instructions to run this on one sample:
	# conda env create -n quant -f quantification.yml
	# conda activate quant
	# python quantification.py -M /path/to/cellMask.tif -in /path/to/Sample_01.ome.tif -o /path/to/output/ -ch /path/to/channel_names.csv -c 46
	# <julia.casado@helsinki.fi>
	import os
	import argparse
	from pathlib import Path
	import csv
	import time
	import skimage.io
	import pandas as pd
	import numpy as np
	import skimage.measure as measure
	import multiprocessing as mp
	import tifffile

	# Global variables
	global CHANNEL_PROPS
	CHANNEL_PROPS=['mean_intensity']
	global MORPH_PROPS
	MORPH_PROPS=['label','centroid','area','eccentricity']
	global IMAGEPATH
	global CURRENT_MASK
	global CHANNELS

	def channelQuantification(channel):
	"""This function is called inside the multiprocessing Pool.map call
	'IMAGEPATH' and 'CURRENT_MASK' must be shared resources"""
	# load the channel image
	channel_image_loaded = tifffile.imread(IMAGEPATH, key = channel)
	# first channel for each new mask will also have morphologicla features
	if channel == 0:
	props = MORPH_PROPS + CHANNEL_PROPS
	else:
	props = CHANNEL_PROPS
	properties = measure.regionprops_table(CURRENT_MASK, channel_image_loaded, properties=props)
	result = pd.DataFrame(properties)
	# Default properties: label,mean_intensity,centroid-0,centroid-1,area,eccentricity
	# Should be CellID but Cycif_viewer uses ID
	result.rename(columns={'mean_intensity':CHANNELS[channel], 'label':'ID', 'centroid-0':'X Position', 'centroid-1':'Y Position', 'area':'Area', 'eccentricity':'Eccentricity'},inplace=True)

	return result

	def imageQuantification(masks_loaded,threads):
	mask_names = list(masks_loaded.keys())
	#Create empty dictionary to store channel results per mask
	result = {m_name: [] for m_name in mask_names}
	# New pool for each time we modify the global variable: 'CURRENT_MASK'
	pool = mp.Pool(threads)
	res = pool.map(channelQuantification, range(len(CHANNELS)))
	pool.close()
	pool.join()

	result[mask_names[0]] = pd.concat(res,axis=1)
	if len(mask_names) > 1:
	print('THIS PRINT SHOULD NOT SHOW')
	for mask in range(len(mask_names)):
	CURRENT_MASK = masks_loaded[mask_names[mask]]
	# New pool for each time we modify the global variable: 'CURRENT_MASK'
	pool = mp.Pool(threads)
	res = pool.map(channelQuantification, range(len(CHANNELS)))
	pool.close()
	pool.join()
	result[mask] = pd.concat(res)
	#Concatenate all data from all masks to return
	merged_data = pd.concat([result[mask] for mask in mask_names],axis=1)
	else:
	merged_data = result[mask_names[0]]
	return merged_data

	def checkChannelNames(channel_names):
	"""Check valid and unique names. Expects a single column with no header"""
	channel_names_loaded = pd.read_csv(channel_names, header = None)
	channel_names_loaded.columns = ["marker"]
	channel_names_loaded_list = list(channel_names_loaded.marker)

	#Check for unique marker names -- create new list to store new names
	channel_names_loaded_checked = []
	for idx,val in enumerate(channel_names_loaded_list):
	#Check for unique value
	if channel_names_loaded_list.count(val) > 1:
	#If unique count greater than one, add suffix
	channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))
	else:
	#Otherwise, leave channel name
	channel_names_loaded_checked.append(val)

	channel_names_loaded, channel_names_loaded_list = None, None
	return channel_names_loaded_checked

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--maskPaths','-m',nargs='+')
	parser.add_argument('--imagePath', '-in')
	parser.add_argument('--channelNamesFile','-ch')
	parser.add_argument('--outputFolder','-o')
	parser.add_argument('--threads','-c',type=int)
	args = parser.parse_args()

	t = time.time()
	CHANNELS = checkChannelNames(args.channelNamesFile)
	#Read the masks
	masks_loaded = {}
	for m in args.maskPaths:
	m_full_name = os.path.basename(m)
	m_name = m_full_name.split('.')[0]
	masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')})
	CURRENT_MASK = masks_loaded[list(masks_loaded.keys())[0]] # load first mask to run by default unless there are more
	IMAGEPATH = args.imagePath
	# Call to main function to quantify image
	scdata = imageQuantification(masks_loaded,args.threads)

	#Write output
	output = Path(args.outputFolder)
	im_full_name = os.path.basename(IMAGEPATH)
	im_name = im_full_name.split('.')[0]
	scdata.to_csv(str(Path(os.path.join(str(args.outputFolder),str(im_name+".csv")))),index=False)
	print('Sample {} quantified in {:.2f} seconds'.format(im_name, time.time() - t))

	#EOF