Last active
June 22, 2021 09:51
-
-
Save will-moore/be0ca736882f44413e2ed14fcf13466c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import pandas | |
import omero.clients | |
import omero.cli | |
import omero | |
import os | |
# This script combines all the [imgname]_other_measurements.tsv files (for all the | |
# images with ROIs under the idr0079 Project) along with the IDs of ROIs and Shapes | |
# on the Images, to create a single CSV file for parade-crossfilter usage. | |
# Checkout https://github.com/IDR/idr0079-hartmann-lateralline | |
# and update tables_path to that location | |
project_name = "idr0079-hartmann-lateralline/experimentA" | |
tables_path = os.path.join( | |
"/path/to/idr0079-hartmann-lateralline", | |
"experimentA/idr0079_experimentA_extracted_measurements/%s/", | |
"%s_other_measurements.tsv" | |
) | |
print("tables_path", tables_path) | |
def get_mid_shape_id(roi): | |
shapes = roi.copyShapes() | |
return shapes[len(shapes)//2].id.val | |
def process_images(conn, images): | |
# Since populate_metadata fails if rois don't have name | |
df2 = None | |
print("Processing %s images..." % len(images)) | |
for image in images: | |
# Read csv for each image | |
image_name = image.name | |
print(image_name) | |
table_pth = tables_path % (image_name, image_name) | |
# print('table_pth', table_pth) | |
df = pandas.read_csv(table_pth, delimiter="\t") | |
col_names = list(df.columns) | |
new_col_names = [name.replace(" ", "_") for name in col_names] | |
# Create output table with extra columns | |
if df2 is None: | |
df2 = pandas.DataFrame(columns=(["Roi", "Shape", "Image"] + new_col_names)) | |
# Get all ROIs - The order seems to correspond to the order they were | |
# created in, which matches the order of rows in the .tsv | |
result = conn.getRoiService().findByImage(image.id, None) | |
# Go through every row, find Shape ID and ROI ID | |
# for row in df.to_dict(orient='records'): | |
for index, row in df.iterrows(): | |
if (index + 1) > len(result.rois): | |
print("WARNING! ROW greater than rois count", index, len(result.rois)) | |
continue | |
roi = result.rois[index] | |
# print("ROW", index, roi.id.val) | |
new_row = {} | |
# Create a copy of row, with columns named with_underscores (no spaces) | |
for c in col_names: | |
value = row[c] | |
new_row[c.replace(" ", "_")] = value | |
new_row["Roi"] = roi.id.val | |
new_row["Shape"] = get_mid_shape_id(roi) | |
new_row["Image"] = image.id | |
df2 = df2.append(new_row, ignore_index=True) | |
csv_name = "idr0079_all_rois.csv" | |
print("writing", csv_name) | |
df2.to_csv(csv_name, mode="a", index=False) | |
def main(conn): | |
project = conn.getObject("Project", attributes={"name": project_name}) | |
print("Project", project.id) | |
conn.SERVICE_OPTS.setOmeroGroup(project.getDetails().group.id.val) | |
# For each Image in Project, open the local CSV and summarise to one row | |
images = [] | |
for dataset in project.listChildren(): | |
print("Dataset: ", dataset.name) | |
for image in dataset.listChildren(): | |
# ignore _seg images etc. | |
if '_' in image.name: | |
continue | |
if image.getROICount() == 0: | |
print("No ROIs for image", image.name) | |
continue | |
images.append(image) | |
process_images(conn, images) | |
if __name__ == "__main__": | |
# NB: Assumes that the segmentation ROIs are added to the image | |
# (*before* any other ROIs) | |
# | |
# USAGE: | |
# $ python scripts/csv_rois_to_parade_csv | |
with omero.cli.cli_login() as c: | |
conn = omero.gateway.BlitzGateway(client_obj=c.get_client()) | |
main(conn) | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment