adyprat/labeledtif2geojson.py

## labeledtif2geojson.py

from geojson import Point, Feature, FeatureCollection,  Polygon
import geojson
import numpy as np
import tifffile
import sys
import pandas as pd
from tqdm import tqdm
import cv2
from skimage.morphology import binary_dilation
from skimage import io

# Requires (pip install) opencv-python==4.6.0, tifffile, geojson==2.5.0, tqdm, pandas, scikit-image
# Labeled tif requires 1-based indexing, stored as uint32 values
# i.e., labeled image 0: background, everything else is an object
# labels in labeled image are unit32 values starting at 1 and are in sequential order
# Example usage: python labledtif2geojson.py --mask /path/to/tiff
# Example usage: python labledtif2geojson.py --mask /path/to/tiff --pxSize 0.3774 --objType d --annotCSV /path/to/annotdf.csv --output /path/to/output.geojson

from tifffile import imread
from skimage.measure import label, regionprops, regionprops_table
import pandas as pd

import argparse

def get_parser() -> argparse.ArgumentParser:
    '''
    :return: an argparse ArgumentParser object for parsing command
        line parameters
    '''
    parser = argparse.ArgumentParser(
        description='Run pathway reconstruction pipeline.')

    parser.add_argument('--mask','-m', default='mask.tif',
        help='Path to tif file',required=True)

    parser.add_argument('--pxSize','-p', default=1.0,type=float,
        help='Pixel size',required=False)

    parser.add_argument('--objType','-t', default='d',
        help='Qupath object type a=annotation, d=detection',required=False)

    parser.add_argument('--annotCSV','-a', default=None,
        help='Path to csv file with cell coordinate data',required=False)

    parser.add_argument('--output','-o', default='output.geojson',)
    return parser

def parse_arguments():
    '''
    Initialize a parser and use it to parse the command line arguments
    :return: parsed dictionary of command line arguments
    '''
    parser = get_parser()
    opts = parser.parse_args()

    return opts

# the function below does all the heavy lifting
def getOutline(labeled_image, idx, offsetX, offsetY):
    subIm = binary_dilation(labeled_image==idx+1)*255
    #io.imsave('temp.png',subIm)
    polygons = cv2.findContours(subIm.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE, offset=(-1, -1))
    #print(polygons)
    pts = polygons[0][0].flatten().reshape(-1, 2).round().astype(int)
    poly = [(offsetX+float(x[0]),offsetY+float(x[1])) for x in pts]
    poly.append(poly[0])
    poly = [poly]
    return poly

def mask_to_geojson(
    labeled_image: np.ndarray,
    pxSize = 1,
    objType = 'PathDetectionObject',
    inDF = None,
    output_file = ''):

    features = [None]*labeled_image.max()
    for idx,row in tqdm(inDF.iterrows(), total=inDF.shape[0]):
        # change it to another column with centroid's x and y coordinates
        # convert it to pixel ID form um.
        x0 = round(row['spatial_X']/pxSize)
        y0 = round(row['spatial_Y']/pxSize)
        # Assumption: Largest object is 200 pixels wide.
        # larger values are slower
        # This could be improved by using the bounding box of the object instead
        width = 200
        # width/2 since offset is calculated from centroid
        imgOffset0 = int(min([x0,y0,width/2]))
        imgOffset1 = width-imgOffset0
        subImg = labeled_image[y0-imgOffset0:y0+imgOffset1,x0-imgOffset0:x0+imgOffset1]
        #print(x0, y0, x0-imgOffset0,x0+imgOffset1,y0-imgOffset0,y0+imgOffset1)
        #print(idx+1, sum(sum(subImg==idx+1)),'\n')
        poly = getOutline(subImg,idx, x0-imgOffset0,y0-imgOffset0, )
        # Change from row.phenotype to another column name if needed
        # To add other metadata, pass it as a dictionary of measurement key-value pairs
        features[idx] = Feature(geometry=Polygon(poly),
                                properties= {'object_type':objType,"classification": {"name": str(row.phenotype)},
                                "isLocked": False})
    features = [val for val in features if val is not None]
    feature_collection = FeatureCollection(features)
    # write geojson
    with open(output_file, "w") as fp:
        geojson.dump(feature_collection, fp, indent=2)
import sys
opts = parse_arguments()
print(opts)
iF = tifffile.imread(opts.mask).astype(np.uint32)
pxSize = opts.pxSize
objType = opts.objType

if opts.annotCSV is None:
    inDF = pd.DataFrame(regionprops_table(iF.T,properties=['label','centroid']))
    inDF.loc[:,'phenotype'] = 'cells'
    inDF.columns = ['CellID','spatial_X','spatial_Y','phenotype']
else:
    # read in csv file with cell coordinate data
    inDF = pd.read_csv(annotCSV)
if objType == 'a':
    objType = 'PathAnnotationObject'
elif objType == 'd':
    objType = 'PathDetectionObject'
else:
    sys.exit('Err. objType must be a or d')

print(inDF.head())
mask_to_geojson(iF, pxSize = pxSize,
                 objType=objType,inDF=inDF, output_file= opts.output,)

print("Done.")

	from geojson import Point, Feature, FeatureCollection, Polygon
	import geojson
	import numpy as np
	import tifffile
	import sys
	import pandas as pd
	from tqdm import tqdm
	import cv2
	from skimage.morphology import binary_dilation
	from skimage import io

	# Requires (pip install) opencv-python==4.6.0, tifffile, geojson==2.5.0, tqdm, pandas, scikit-image
	# Labeled tif requires 1-based indexing, stored as uint32 values
	# i.e., labeled image 0: background, everything else is an object
	# labels in labeled image are unit32 values starting at 1 and are in sequential order
	# Example usage: python labledtif2geojson.py --mask /path/to/tiff
	# Example usage: python labledtif2geojson.py --mask /path/to/tiff --pxSize 0.3774 --objType d --annotCSV /path/to/annotdf.csv --output /path/to/output.geojson

	from tifffile import imread
	from skimage.measure import label, regionprops, regionprops_table
	import pandas as pd

	import argparse

	def get_parser() -> argparse.ArgumentParser:
	'''
	:return: an argparse ArgumentParser object for parsing command
	line parameters
	'''
	parser = argparse.ArgumentParser(
	description='Run pathway reconstruction pipeline.')

	parser.add_argument('--mask','-m', default='mask.tif',
	help='Path to tif file',required=True)

	parser.add_argument('--pxSize','-p', default=1.0,type=float,
	help='Pixel size',required=False)

	parser.add_argument('--objType','-t', default='d',
	help='Qupath object type a=annotation, d=detection',required=False)

	parser.add_argument('--annotCSV','-a', default=None,
	help='Path to csv file with cell coordinate data',required=False)

	parser.add_argument('--output','-o', default='output.geojson',)
	return parser

	def parse_arguments():
	'''
	Initialize a parser and use it to parse the command line arguments
	:return: parsed dictionary of command line arguments
	'''
	parser = get_parser()
	opts = parser.parse_args()

	return opts

	# the function below does all the heavy lifting
	def getOutline(labeled_image, idx, offsetX, offsetY):
	subIm = binary_dilation(labeled_image==idx+1)*255
	#io.imsave('temp.png',subIm)
	polygons = cv2.findContours(subIm.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE, offset=(-1, -1))
	#print(polygons)
	pts = polygons[0][0].flatten().reshape(-1, 2).round().astype(int)
	poly = [(offsetX+float(x[0]),offsetY+float(x[1])) for x in pts]
	poly.append(poly[0])
	poly = [poly]
	return poly

	def mask_to_geojson(
	labeled_image: np.ndarray,
	pxSize = 1,
	objType = 'PathDetectionObject',
	inDF = None,
	output_file = ''):

	features = [None]*labeled_image.max()
	for idx,row in tqdm(inDF.iterrows(), total=inDF.shape[0]):
	# change it to another column with centroid's x and y coordinates
	# convert it to pixel ID form um.
	x0 = round(row['spatial_X']/pxSize)
	y0 = round(row['spatial_Y']/pxSize)
	# Assumption: Largest object is 200 pixels wide.
	# larger values are slower
	# This could be improved by using the bounding box of the object instead
	width = 200
	# width/2 since offset is calculated from centroid
	imgOffset0 = int(min([x0,y0,width/2]))
	imgOffset1 = width-imgOffset0
	subImg = labeled_image[y0-imgOffset0:y0+imgOffset1,x0-imgOffset0:x0+imgOffset1]
	#print(x0, y0, x0-imgOffset0,x0+imgOffset1,y0-imgOffset0,y0+imgOffset1)
	#print(idx+1, sum(sum(subImg==idx+1)),'\n')
	poly = getOutline(subImg,idx, x0-imgOffset0,y0-imgOffset0, )
	# Change from row.phenotype to another column name if needed
	# To add other metadata, pass it as a dictionary of measurement key-value pairs
	features[idx] = Feature(geometry=Polygon(poly),
	properties= {'object_type':objType,"classification": {"name": str(row.phenotype)},
	"isLocked": False})
	features = [val for val in features if val is not None]
	feature_collection = FeatureCollection(features)
	# write geojson
	with open(output_file, "w") as fp:
	geojson.dump(feature_collection, fp, indent=2)
	import sys
	opts = parse_arguments()
	print(opts)
	iF = tifffile.imread(opts.mask).astype(np.uint32)
	pxSize = opts.pxSize
	objType = opts.objType

	if opts.annotCSV is None:
	inDF = pd.DataFrame(regionprops_table(iF.T,properties=['label','centroid']))
	inDF.loc[:,'phenotype'] = 'cells'
	inDF.columns = ['CellID','spatial_X','spatial_Y','phenotype']
	else:
	# read in csv file with cell coordinate data
	inDF = pd.read_csv(annotCSV)
	if objType == 'a':
	objType = 'PathAnnotationObject'
	elif objType == 'd':
	objType = 'PathDetectionObject'
	else:
	sys.exit('Err. objType must be a or d')

	print(inDF.head())
	mask_to_geojson(iF, pxSize = pxSize,
	objType=objType,inDF=inDF, output_file= opts.output,)

	print("Done.")