Created
March 31, 2023 18:59
-
-
Save adyprat/2c2760e6cf8764cdfce88a3a26e3568d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from geojson import Point, Feature, FeatureCollection, Polygon | |
import geojson | |
import numpy as np | |
import sys | |
import pandas as pd | |
from tqdm import tqdm | |
import cv2 | |
from skimage.morphology import binary_dilation | |
from skimage import io | |
import h5py | |
# Requires (pip install) opencv-python, h5py, geojson, tqdm, pandas, scikit-image | |
# Labeled tif requires 1-based indexing, stored as uint32 values | |
# i.e., labeled image 0: background, everything else is an object | |
# labels in labeled image are unit32 values starting at 1 and are in sequential order | |
# Example usage: python labeledmat2qpjson.py DAPI_cell_area_labeled.mat 1 d 20221011_Fox_008175_Scan2_Annotations_Sheet2.csv 18 qpImport.geojson | |
# colors needed for qp v0.4 and above | |
cList = [[2, 62, 255], | |
[255, 124, 0], | |
[26, 201, 56], | |
[232, 0, 11], | |
[139, 43, 226], | |
[159, 72, 0], | |
[241, 76, 193], | |
[163, 163, 163], | |
[255, 196, 0], | |
[0, 215, 255]] | |
# the function below does all the heavy lifting | |
def getOutline(labeled_image, idx, offsetX, offsetY): | |
subIm = binary_dilation(labeled_image==idx)*255 | |
#io.imsave('temp.png',subIm) | |
polygons = cv2.findContours(subIm.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE, offset=(-1, -1)) | |
#print(polygons) | |
pts = polygons[0][0].flatten().reshape(-1, 2).round().astype(int) | |
poly = [(offsetX+float(x[0]),offsetY+float(x[1])) for x in pts] | |
poly.append(poly[0]) | |
poly = [poly] | |
return poly | |
def mask_to_geojson( | |
labeled_image: np.ndarray, | |
pxSize = 1, | |
objType = 'detection', | |
inDF = None, | |
output_file = '', | |
colID = 18): | |
# for all cells | |
#features = [None]*labeled_image.max() | |
# for cells in inDF | |
features = [None]*inDF.shape[0] | |
# for all cells use below loop, | |
# else use labels from inDF | |
#for idx in tqdm(range(labeled_image.max())): | |
#row = inDF.iloc[idx] | |
cMap = {x:cList[ix%10] for ix,x in enumerate(inDF.Annotation.unique())} | |
for idx, row in tqdm(inDF.iterrows()): | |
# change it to another column with centroid's x and y coordinates | |
# convert it to pixel ID form um. | |
# MIA coordinates are already in pixels!! | |
#print(objID) | |
x0 = round(row.X/pxSize) | |
y0 = round(row.Y/pxSize) | |
objID = labeled_image[y0,x0] | |
# Assumption: Largest object is 200 pixels wide. | |
# larger values are slower | |
# This could be improved by using the bounding box of the object instead | |
width = 200 | |
# width/2 since offset is calculated from centroid | |
imgOffset0 = int(min([x0,y0,width/2])) | |
imgOffset1 = width-imgOffset0 | |
subImg = labeled_image[y0-imgOffset0:y0+imgOffset1,x0-imgOffset0:x0+imgOffset1] | |
#print(x0,y0,subImg.max()) | |
poly = getOutline(subImg, objID, x0-imgOffset0, y0-imgOffset0) | |
# Change from row.phenotype to another column name if needed | |
# To add other metadata, pass it as a dictionary of measurement key-value pairs | |
# e.g., {"x: Cel: Mean:": y for ix, x, y in enumerate(row.items()) if ix >= 19} | |
features[idx] = Feature(geometry=Polygon(poly), | |
properties= {'objectType':objType,"classification": {"name": str(row.Annotation), "color":cMap[row.Annotation]}, | |
"measurements": {f"{x[0]}: Mean": x[1] for ix, x in enumerate(row.items()) if ix >= colID}, | |
"isLocked": False}) | |
features = [val for val in features if val is not None] | |
feature_collection = FeatureCollection(features) | |
# write geojson | |
with open(output_file, "w") as fp: | |
geojson.dump(feature_collection, fp, indent=2) | |
import sys | |
# labeled mat file from MIA output | |
iF = sys.argv[1] | |
#print(iF) | |
mat = h5py.File(iF,'r') | |
data = mat.get('NCl') | |
data = np.array(data).astype(np.uint32).T # For converting to a NumPy array | |
print(data.shape) | |
# pixel size | |
# if X, Y in csv are in pixels, set pixelSize to 1 | |
# MIA export has X and Y in pixels!! | |
pixSize = sys.argv[2] | |
# object type: a or d | |
# default is d (detection) | |
objType = sys.argv[3] | |
# csv file with cell data | |
# Should contain centroids: spatial_X, spatial_Y, and an optional phenotype column | |
# This can be modified to include other columns | |
annotCSV = sys.argv[4] | |
# column index for avg. expression values. | |
# 18 is the index of the first measurement column in the csv file shared. | |
colID = int(sys.argv[5]) | |
# output geojson file name | |
oF = sys.argv[6] | |
inDF = pd.read_csv(annotCSV, index_col=None) | |
if objType == 'a': | |
objType = 'annotation' | |
elif objType == 'd': | |
objType = 'detection' | |
else: | |
sys.exit('Err. objType must be a or d') | |
mask_to_geojson(data, pxSize = np.float(pixSize), | |
objType=objType,inDF=inDF, output_file= oF,colID=colID) | |
print("Done.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment