Skip to content

Instantly share code, notes, and snippets.

@manthey
Created May 30, 2022 18:05
Show Gist options
  • Save manthey/af1afa20c255cbef6eed6ed4ada38555 to your computer and use it in GitHub Desktop.
Save manthey/af1afa20c255cbef6eed6ed4ada38555 to your computer and use it in GitHub Desktop.
Tasks to run test jobs comparing MONAI Label pathology with HistomicsTK
import sys
import time
import girder_client
# Your instance of girder with the api/v1 path
apiUrl = 'http://abc.kitware.com:8085/api/v1'
# An authentication token that has permission to run jobs
apiToken = '64characterGirderTokenString'
# This is the directory where all of the images to process are located
inputImageFileFolder = '628b8d1b422a5f5080a087a1'
# This is the directory where annotation files are stored. Depending on
# settings, they may be auto-deleted after ingest
outputFolder = '6061cc5caf8f3ca46922387b'
# These are the jobs to run. If skip is True, that condition will be skipped.
# These are (in order):
# HistomicsTK with 1k x 1k ROI
# HistomicsTK with 4k x 4k ROI
# HistomicsTK with WSI
# segmentation_nuclei with 1k x 1k ROI
# segmentation_nuclei with 4k x 4k ROI
# segmentation_nuclei with WSI
# deepedit_nuclei with 1k x 1k ROI
# deepedit_nuclei with 4k x 4k ROI
# deepedit_nuclei with WSI
# Note that for the MONAILabel tasks, there is
# 'extra_params': '{"max_workers": ...}',
# defined to allow it to run on 18 core system with my 16 GB GPU
# Also, note that the min_fgrd_frac and min_nucleus_area are set to
# functionally match between the algorithms (the foreground fraction is ignored
# in HistomicsTK if an ROI other that the whole image is specified).
jobs = [{
'path': 'slicer_cli_web/dsarchive_histomicstk_latest/NucleiDetection/run',
'parameters': {
'inputImageFile_folder': inputImageFileFolder,
'inputImageFile': '.*',
'outputNucleiAnnotationFile_folder': outputFolder,
'outputNucleiAnnotationFile': 'Detects Nuclei-outputNucleiAnnotationFile.anot',
'min_fgnd_frac': '0.25',
'min_nucleus_area': 80,
'analysis_tile_size': 1024,
},
'variations': [{
'skip': False,
'min_fgnd_frac': '0.25',
'analysis_roi': '[10000,10000,1024,1024]',
}, {
'skip': False,
'analysis_roi': '[10000,10000,4096,4096]',
}, {
'skip': False,
'analysis_roi': '[-1,-1,-1,-1]',
}],
}, {
'path': 'slicer_cli_web/projectmonai_monailabel-dsa_latest/MONAILabelAnnotation/run',
'parameters': {
'inputImageFile_folder': inputImageFileFolder,
'inputImageFile': '.*',
'server': '__default__',
'outputAnnotationFile_folder': outputFolder,
'outputAnnotationFile': 'MONAILabel Annotations-outputAnnotationFile.anot',
'min_poly_area': 80,
'analysis_tile_size': 1024,
'extra_params': '{"max_workers": 9}',
},
'variations': [{
'skip': False,
'model_name': 'segmentation_nuclei',
'analysis_roi': '[10000,10000,1024,1024]',
}, {
'skip': False,
'model_name': 'segmentation_nuclei',
'analysis_roi': '[10000,10000,4096,4096]',
}, {
'skip': False,
'min_fgnd_frac': '0.25',
'model_name': 'segmentation_nuclei',
'analysis_roi': '[-1,-1,-1,-1]',
}, {
'skip': False,
'model_name': 'deepedit_nuclei',
'analysis_roi': '[10000,10000,1024,1024]',
}, {
'skip': False,
'model_name': 'deepedit_nuclei',
'analysis_roi': '[10000,10000,4096,4096]',
}, {
'skip': False,
'min_fgnd_frac': '0.25',
'model_name': 'deepedit_nuclei',
'analysis_roi': '[-1,-1,-1,-1]',
}],
}]
gc = girder_client.GirderClient(apiUrl=apiUrl)
gc.token = apiToken
for jobRecord in jobs:
for vari in jobRecord['variations']:
if vari.get('skip'):
continue
param = jobRecord['parameters'].copy()
param.update({k: v for k, v in vari.items() if 'skip' not in k})
job = gc.post(jobRecord['path'], parameters=param)
while True:
job = gc.get('/job/%s' % job['_id'])
if job['status'] not in [0, 1, 2]:
break
sys.stdout.write('.')
sys.stdout.flush()
time.sleep(1)
sys.stdout.write('\n')
import json
import sys
import dateutil.parser
import girder_client
import pandas as pd
# Your instance of girder with the api/v1 path
apiUrl = 'http://abc.kitware.com:8085/api/v1'
# An authentication token that has permission to run jobs
apiToken = '64characterGirderTokenString'
# The resource path to the input image folder
resourceRoot = '/collection/TCGA/area/'
# The location to store the CSV
outputCSV = '/mnt/transfer/summary.csv'
itemList = [
('TCGA-06-0130-01Z-00-DX1.0391b65f-4e1d-4444-abb0-e5804606d461.svs', 6000, 7350),
('TCGA-AA-3663-01Z-00-DX1.9AEDC003-2062-4876-8993-A5CEE4DDE1A9.svs', 10000, 9869),
('TCGA-HC-7080-01Z-00-DX1.c979be6a-e7c9-4840-8555-6f34499dd2bf.svs', 13429, 16750),
('TCGA-06-0138-01Z-00-DX3.2767efef-7d5f-40ff-9b36-5329d0fa6829.svs', 16000, 25013),
('TCGA-28-1746-01Z-00-DX1.06f187d2-b5e8-4b37-bb23-a707d0059944.svs', 24001, 26093),
('TCGA-DU-7010-01Z-00-DX1.542F36CC-9685-4780-94EB-B664CECFF09D.svs', 27888, 32276),
('TCGA-EM-A2CJ-01Z-00-DX1.D6F4716C-D6C7-4087-9B17-E1D89A3EEA8F.svs', 31723, 38601),
('TCGA-27-1831-01Z-00-DX4.b8a6fef5-9ba3-40b4-b32a-31485dbaa153.svs', 40001, 39991),
('TCGA-AP-A0LT-01Z-00-DX1.74C269EA-3118-4E65-AAFE-C1D186EAC207.svs', 51930, 38999),
('TCGA-HU-8244-01Z-00-DX1.EF15C805-A823-46EA-B737-2EC4A8C5C278.svs', 55775, 44812),
('TCGA-06-0195-01Z-00-DX2.5327662a-89b0-4297-ac6e-7af80f06cb3a.svs', 67917, 44541),
('TCGA-HT-7620-01Z-00-DX5.E88271BE-B362-4F17-96DF-31E421AA3143.svs', 75695, 47533),
('TCGA-FF-8047-01Z-00-DX1.75aa745c-bbe3-4869-a37b-c18ee50c14d5.svs', 76739, 55047),
('TCGA-BB-A6UO-01Z-00-DX1.11D049DC-EFC3-47EB-B390-A694BFD304A2.svs', 91632, 53467),
('TCGA-85-8072-01Z-00-DX1.3a0ad5a6-c93e-428c-94e7-809ceaf01ef1.svs', 80576, 69789),
('TCGA-V4-A9E7-01Z-00-DX1.465EFC95-3B6C-4836-A8BC-0A4F0BBFA601.svs', 81672, 78369),
('TCGA-C5-A3HD-01Z-00-DX1.11EECACD-371A-4B16-A21A-8E2A2258D3A9.svs', 135360, 53378),
('TCGA-OR-A5J6-01Z-00-DX1.C3F415F4-B679-433F-B8C2-33ED940272FB.svs', 91631, 88418),
('TCGA-26-1799-01Z-00-DX1.630B7217-0B01-4CDD-8ABF-0EC4CF293476.svs', 116825, 77258),
('TCGA-V1-A8WV-01Z-00-DX1.1419FDEA-BA02-42C4-9FF0-6F1F284BC6F3.svs', 113543, 88075),
('TCGA-AX-A1C4-01Z-00-DX1.237A4C5C-E87E-4904-83F2-B76196A247F0.svs', 123759, 89134),
('TCGA-DX-A6YR-01Z-00-DX1.8329CE17-C02B-4C56-8D02-54F40D95D624.svs', 137448, 88062),
('TCGA-DJ-A2PP-01Z-00-DX1.5BC2A5F2-1918-44E9-9544-1972974BA7BC.svs', 129472, 102134),
('TCGA-D8-A1JK-01Z-00-DX1.3190C919-A403-460D-9F6C-D2AB5FD3FD05.svs', 163743, 87914),
('TCGA-50-5068-01Z-00-DX2.0492A5C6-09CB-424B-BE20-10A1CBEA2E57.svs', 169320, 92215),
('TCGA-T7-A92I-01Z-00-DX1.3B036C1D-F8A7-475F-9830-C0972AD3889F.svs', 102912, 164096),
('TCGA-5N-A9KM-01Z-00-DX1.5197F750-D17F-459B-B74D-846F5F50F7B7.svs', 119040, 152832),
('TCGA-P3-A6T4-01Z-00-DX1.5DC1C4B4-7BB2-44AE-8D7A-FFFA3CB4BE63.svs', 203183, 97499),
('TCGA-T3-A92N-01Z-00-DX2.A08786DD-AF48-4551-BF71-E41C371C97C7.svs', 102656, 197888),
('TCGA-OL-A6VO-01Z-00-DX1.291D54D6-EBAF-4622-BD42-97AA5997F014.svs', 126464, 199936),
('TCGA-OL-A66J-01Z-00-DX1.661F7F70-E4D4-4875-B8C4-556F7927F3BA.svs', 130304, 247552),
]
gc = girder_client.GirderClient(apiUrl=apiUrl)
gc.token = apiToken
jobList = gc.get('/job', parameters={
'types': json.dumps([
'projectmonai/monailabel-dsa:latest#MONAILabelAnnotation',
'dsarchive/histomicstk:latest#NucleiDetection']),
'limit': 0})
jobSummary = {}
for job in jobList:
ca = job['kwargs']['container_args']
model = 'segmentation_nuclei' if 'segmentation_nuclei' in ca else (
'deepedit_nuclei' if 'deepedit_nuclei' in ca else 'NucleiDetection')
try:
file = 'TCGA' + str(ca).split('TCGA')[1].split('svs')[0] + 'svs'
size = 1024 if '1024, 1024' in str(ca) else (
4096 if '4096, 4096' in str(ca) else -1)
stat2 = next(t for t in job['timestamps'] if t.get('status') == 2)
stat3 = next(t for t in job['timestamps'] if t.get('status') == 3)
except Exception:
continue
duration = (
dateutil.parser.parse(stat3['time']).timestamp() -
dateutil.parser.parse(stat2['time']).timestamp())
if model != 'NucleiDetection':
duration /= 2
if file not in jobSummary:
jobSummary[file] = {}
if size not in jobSummary[file]:
jobSummary[file][size] = {}
if model not in jobSummary[file][size]:
jobSummary[file][size][model] = duration
dataList = []
for name, w, h in itemList:
item = gc.get('/resource/lookup', parameters={'path': '%s%s' % (resourceRoot, name)})
itemId = item['_id']
if len(sys.argv) == 2 and sys.argv[1] == '--purge':
print('Purging annotations for %s (%s)' % (itemId, name))
gc.delete('/annotation/item/%s' % (itemId))
continue
print(name, int(w * h // 1e6), jobSummary[name])
data = {
'name': name,
'w': w,
'h': h,
'megapixels': int(w * h // 1e6),
}
for size in [1024, 4096, -1]:
for model in 'NucleiDetection', 'segmentation_nuclei', 'deepedit_nuclei':
key = model + ' - ' + (str(size) if size != -1 else 'wsi')
try:
data[key] = jobSummary[name][size][model]
except Exception:
data[key] = None
dataList.append(data)
if len(sys.argv) == 2 and sys.argv[1] == '--purge':
sys.exit(0)
df = pd.DataFrame(dataList)
df.to_csv(outputCSV)
@manthey
Copy link
Author

manthey commented May 30, 2022

Use this with the 31 files from the TCGA. See notes at the top of each file for hard-coded values that need to be changed for your configuration.

You can purge existing annotations from the files by running python summary_monai.py --purge.

Then, run all of the jobs (set 'skip': True for conditions you don't want to run): python jobs_monai.py

Them, make a csv file with the summary: python summary_monai.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment