adyprat/labeledmat2qpjson.py

## labeledmat2qpjson.py

from geojson import Point, Feature, FeatureCollection,  Polygon
import geojson
import numpy as np
import sys
import pandas as pd
from tqdm import tqdm
import cv2
from skimage.morphology import binary_dilation
from skimage import io
import h5py


# Requires (pip install) opencv-python, h5py, geojson, tqdm, pandas, scikit-image
# Labeled tif requires 1-based indexing, stored as uint32 values
# i.e., labeled image 0: background, everything else is an object
# labels in labeled image are unit32 values starting at 1 and are in sequential order
# Example usage: python labeledmat2qpjson.py DAPI_cell_area_labeled.mat 1 d 20221011_Fox_008175_Scan2_Annotations_Sheet2.csv 18 qpImport.geojson

# colors needed for qp v0.4 and above
cList = [[2, 62, 255],
 [255, 124, 0],
 [26, 201, 56],
 [232, 0, 11],
 [139, 43, 226],
 [159, 72, 0],
 [241, 76, 193],
 [163, 163, 163],
 [255, 196, 0],
 [0, 215, 255]]


# the function below does all the heavy lifting
def getOutline(labeled_image, idx, offsetX, offsetY):
    subIm = binary_dilation(labeled_image==idx)*255
    #io.imsave('temp.png',subIm)
    polygons = cv2.findContours(subIm.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE, offset=(-1, -1))
    #print(polygons)
    pts = polygons[0][0].flatten().reshape(-1, 2).round().astype(int)
    poly = [(offsetX+float(x[0]),offsetY+float(x[1])) for x in pts]
    poly.append(poly[0])
    poly = [poly]
    return poly

def mask_to_geojson(
    labeled_image: np.ndarray,
    pxSize = 1,
    objType = 'detection',
    inDF = None,
    output_file = '',
    colID = 18):
    # for all cells
    #features = [None]*labeled_image.max()
    # for cells in inDF
    features = [None]*inDF.shape[0]
    # for all cells use below loop,
    # else use labels from inDF
    #for idx in tqdm(range(labeled_image.max())):
        #row = inDF.iloc[idx]
    cMap = {x:cList[ix%10] for ix,x in enumerate(inDF.Annotation.unique())}
    for idx, row in tqdm(inDF.iterrows()):
        # change it to another column with centroid's x and y coordinates
        # convert it to pixel ID form um.
        # MIA coordinates are already in pixels!!
        #print(objID)
        x0 = round(row.X/pxSize)
        y0 = round(row.Y/pxSize)
        objID = labeled_image[y0,x0]
        # Assumption: Largest object is 200 pixels wide.
        # larger values are slower
        # This could be improved by using the bounding box of the object instead
        width = 200
        # width/2 since offset is calculated from centroid
        imgOffset0 = int(min([x0,y0,width/2]))
        imgOffset1 = width-imgOffset0
        subImg = labeled_image[y0-imgOffset0:y0+imgOffset1,x0-imgOffset0:x0+imgOffset1]
        #print(x0,y0,subImg.max())
        poly = getOutline(subImg, objID, x0-imgOffset0, y0-imgOffset0)
        # Change from row.phenotype to another column name if needed
        # To add other metadata, pass it as a dictionary of measurement key-value pairs
        # e.g., {"x: Cel: Mean:": y for ix, x, y in enumerate(row.items()) if ix >= 19}
        features[idx] = Feature(geometry=Polygon(poly),
                                properties= {'objectType':objType,"classification": {"name": str(row.Annotation), "color":cMap[row.Annotation]},
                                "measurements": {f"{x[0]}: Mean": x[1] for ix, x in enumerate(row.items()) if ix >= colID},
                                "isLocked": False})

    features = [val for val in features if val is not None]
    feature_collection = FeatureCollection(features)
    # write geojson
    with open(output_file, "w") as fp:
        geojson.dump(feature_collection, fp, indent=2)

import sys
# labeled mat file from MIA output
iF = sys.argv[1]
#print(iF)
mat = h5py.File(iF,'r')
data = mat.get('NCl')
data = np.array(data).astype(np.uint32).T # For converting to a NumPy array
print(data.shape)

# pixel size
# if X, Y in csv are in pixels, set pixelSize to 1
# MIA export has X and Y in pixels!!
pixSize = sys.argv[2]
# object type: a or d
# default is d (detection)
objType = sys.argv[3]
# csv file with cell data
# Should contain centroids: spatial_X, spatial_Y, and an optional phenotype column
# This can be modified to include other columns
annotCSV = sys.argv[4]

# column index for avg. expression values.
# 18 is the index of the first measurement column in the csv file shared.
colID = int(sys.argv[5])

# output geojson file name
oF = sys.argv[6]


inDF = pd.read_csv(annotCSV, index_col=None)
if objType == 'a':
    objType = 'annotation'
elif objType == 'd':
    objType = 'detection'
else:
    sys.exit('Err. objType must be a or d')

mask_to_geojson(data, pxSize = np.float(pixSize),
                 objType=objType,inDF=inDF, output_file= oF,colID=colID)

print("Done.")

	from geojson import Point, Feature, FeatureCollection, Polygon
	import geojson
	import numpy as np
	import sys
	import pandas as pd
	from tqdm import tqdm
	import cv2
	from skimage.morphology import binary_dilation
	from skimage import io
	import h5py


	# Requires (pip install) opencv-python, h5py, geojson, tqdm, pandas, scikit-image
	# Labeled tif requires 1-based indexing, stored as uint32 values
	# i.e., labeled image 0: background, everything else is an object
	# labels in labeled image are unit32 values starting at 1 and are in sequential order
	# Example usage: python labeledmat2qpjson.py DAPI_cell_area_labeled.mat 1 d 20221011_Fox_008175_Scan2_Annotations_Sheet2.csv 18 qpImport.geojson

	# colors needed for qp v0.4 and above
	cList = [[2, 62, 255],
	[255, 124, 0],
	[26, 201, 56],
	[232, 0, 11],
	[139, 43, 226],
	[159, 72, 0],
	[241, 76, 193],
	[163, 163, 163],
	[255, 196, 0],
	[0, 215, 255]]


	# the function below does all the heavy lifting
	def getOutline(labeled_image, idx, offsetX, offsetY):
	subIm = binary_dilation(labeled_image==idx)*255
	#io.imsave('temp.png',subIm)
	polygons = cv2.findContours(subIm.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE, offset=(-1, -1))
	#print(polygons)
	pts = polygons[0][0].flatten().reshape(-1, 2).round().astype(int)
	poly = [(offsetX+float(x[0]),offsetY+float(x[1])) for x in pts]
	poly.append(poly[0])
	poly = [poly]
	return poly

	def mask_to_geojson(
	labeled_image: np.ndarray,
	pxSize = 1,
	objType = 'detection',
	inDF = None,
	output_file = '',
	colID = 18):
	# for all cells
	#features = [None]*labeled_image.max()
	# for cells in inDF
	features = [None]*inDF.shape[0]
	# for all cells use below loop,
	# else use labels from inDF
	#for idx in tqdm(range(labeled_image.max())):
	#row = inDF.iloc[idx]
	cMap = {x:cList[ix%10] for ix,x in enumerate(inDF.Annotation.unique())}
	for idx, row in tqdm(inDF.iterrows()):
	# change it to another column with centroid's x and y coordinates
	# convert it to pixel ID form um.
	# MIA coordinates are already in pixels!!
	#print(objID)
	x0 = round(row.X/pxSize)
	y0 = round(row.Y/pxSize)
	objID = labeled_image[y0,x0]
	# Assumption: Largest object is 200 pixels wide.
	# larger values are slower
	# This could be improved by using the bounding box of the object instead
	width = 200
	# width/2 since offset is calculated from centroid
	imgOffset0 = int(min([x0,y0,width/2]))
	imgOffset1 = width-imgOffset0
	subImg = labeled_image[y0-imgOffset0:y0+imgOffset1,x0-imgOffset0:x0+imgOffset1]
	#print(x0,y0,subImg.max())
	poly = getOutline(subImg, objID, x0-imgOffset0, y0-imgOffset0)
	# Change from row.phenotype to another column name if needed
	# To add other metadata, pass it as a dictionary of measurement key-value pairs
	# e.g., {"x: Cel: Mean:": y for ix, x, y in enumerate(row.items()) if ix >= 19}
	features[idx] = Feature(geometry=Polygon(poly),
	properties= {'objectType':objType,"classification": {"name": str(row.Annotation), "color":cMap[row.Annotation]},
	"measurements": {f"{x[0]}: Mean": x[1] for ix, x in enumerate(row.items()) if ix >= colID},
	"isLocked": False})

	features = [val for val in features if val is not None]
	feature_collection = FeatureCollection(features)
	# write geojson
	with open(output_file, "w") as fp:
	geojson.dump(feature_collection, fp, indent=2)

	import sys
	# labeled mat file from MIA output
	iF = sys.argv[1]
	#print(iF)
	mat = h5py.File(iF,'r')
	data = mat.get('NCl')
	data = np.array(data).astype(np.uint32).T # For converting to a NumPy array
	print(data.shape)

	# pixel size
	# if X, Y in csv are in pixels, set pixelSize to 1
	# MIA export has X and Y in pixels!!
	pixSize = sys.argv[2]
	# object type: a or d
	# default is d (detection)
	objType = sys.argv[3]
	# csv file with cell data
	# Should contain centroids: spatial_X, spatial_Y, and an optional phenotype column
	# This can be modified to include other columns
	annotCSV = sys.argv[4]

	# column index for avg. expression values.
	# 18 is the index of the first measurement column in the csv file shared.
	colID = int(sys.argv[5])

	# output geojson file name
	oF = sys.argv[6]


	inDF = pd.read_csv(annotCSV, index_col=None)
	if objType == 'a':
	objType = 'annotation'
	elif objType == 'd':
	objType = 'detection'
	else:
	sys.exit('Err. objType must be a or d')

	mask_to_geojson(data, pxSize = np.float(pixSize),
	objType=objType,inDF=inDF, output_file= oF,colID=colID)

	print("Done.")