Skip to content

Instantly share code, notes, and snippets.

@adyprat
Created March 31, 2023 18:59
Show Gist options
  • Save adyprat/2c2760e6cf8764cdfce88a3a26e3568d to your computer and use it in GitHub Desktop.
Save adyprat/2c2760e6cf8764cdfce88a3a26e3568d to your computer and use it in GitHub Desktop.
from geojson import Point, Feature, FeatureCollection, Polygon
import geojson
import numpy as np
import sys
import pandas as pd
from tqdm import tqdm
import cv2
from skimage.morphology import binary_dilation
from skimage import io
import h5py
# Requires (pip install) opencv-python, h5py, geojson, tqdm, pandas, scikit-image
# Labeled tif requires 1-based indexing, stored as uint32 values
# i.e., labeled image 0: background, everything else is an object
# labels in labeled image are unit32 values starting at 1 and are in sequential order
# Example usage: python labeledmat2qpjson.py DAPI_cell_area_labeled.mat 1 d 20221011_Fox_008175_Scan2_Annotations_Sheet2.csv 18 qpImport.geojson
# colors needed for qp v0.4 and above
cList = [[2, 62, 255],
[255, 124, 0],
[26, 201, 56],
[232, 0, 11],
[139, 43, 226],
[159, 72, 0],
[241, 76, 193],
[163, 163, 163],
[255, 196, 0],
[0, 215, 255]]
# the function below does all the heavy lifting
def getOutline(labeled_image, idx, offsetX, offsetY):
subIm = binary_dilation(labeled_image==idx)*255
#io.imsave('temp.png',subIm)
polygons = cv2.findContours(subIm.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE, offset=(-1, -1))
#print(polygons)
pts = polygons[0][0].flatten().reshape(-1, 2).round().astype(int)
poly = [(offsetX+float(x[0]),offsetY+float(x[1])) for x in pts]
poly.append(poly[0])
poly = [poly]
return poly
def mask_to_geojson(
labeled_image: np.ndarray,
pxSize = 1,
objType = 'detection',
inDF = None,
output_file = '',
colID = 18):
# for all cells
#features = [None]*labeled_image.max()
# for cells in inDF
features = [None]*inDF.shape[0]
# for all cells use below loop,
# else use labels from inDF
#for idx in tqdm(range(labeled_image.max())):
#row = inDF.iloc[idx]
cMap = {x:cList[ix%10] for ix,x in enumerate(inDF.Annotation.unique())}
for idx, row in tqdm(inDF.iterrows()):
# change it to another column with centroid's x and y coordinates
# convert it to pixel ID form um.
# MIA coordinates are already in pixels!!
#print(objID)
x0 = round(row.X/pxSize)
y0 = round(row.Y/pxSize)
objID = labeled_image[y0,x0]
# Assumption: Largest object is 200 pixels wide.
# larger values are slower
# This could be improved by using the bounding box of the object instead
width = 200
# width/2 since offset is calculated from centroid
imgOffset0 = int(min([x0,y0,width/2]))
imgOffset1 = width-imgOffset0
subImg = labeled_image[y0-imgOffset0:y0+imgOffset1,x0-imgOffset0:x0+imgOffset1]
#print(x0,y0,subImg.max())
poly = getOutline(subImg, objID, x0-imgOffset0, y0-imgOffset0)
# Change from row.phenotype to another column name if needed
# To add other metadata, pass it as a dictionary of measurement key-value pairs
# e.g., {"x: Cel: Mean:": y for ix, x, y in enumerate(row.items()) if ix >= 19}
features[idx] = Feature(geometry=Polygon(poly),
properties= {'objectType':objType,"classification": {"name": str(row.Annotation), "color":cMap[row.Annotation]},
"measurements": {f"{x[0]}: Mean": x[1] for ix, x in enumerate(row.items()) if ix >= colID},
"isLocked": False})
features = [val for val in features if val is not None]
feature_collection = FeatureCollection(features)
# write geojson
with open(output_file, "w") as fp:
geojson.dump(feature_collection, fp, indent=2)
import sys
# labeled mat file from MIA output
iF = sys.argv[1]
#print(iF)
mat = h5py.File(iF,'r')
data = mat.get('NCl')
data = np.array(data).astype(np.uint32).T # For converting to a NumPy array
print(data.shape)
# pixel size
# if X, Y in csv are in pixels, set pixelSize to 1
# MIA export has X and Y in pixels!!
pixSize = sys.argv[2]
# object type: a or d
# default is d (detection)
objType = sys.argv[3]
# csv file with cell data
# Should contain centroids: spatial_X, spatial_Y, and an optional phenotype column
# This can be modified to include other columns
annotCSV = sys.argv[4]
# column index for avg. expression values.
# 18 is the index of the first measurement column in the csv file shared.
colID = int(sys.argv[5])
# output geojson file name
oF = sys.argv[6]
inDF = pd.read_csv(annotCSV, index_col=None)
if objType == 'a':
objType = 'annotation'
elif objType == 'd':
objType = 'detection'
else:
sys.exit('Err. objType must be a or d')
mask_to_geojson(data, pxSize = np.float(pixSize),
objType=objType,inDF=inDF, output_file= oF,colID=colID)
print("Done.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment