Skip to content

Instantly share code, notes, and snippets.

@DragaDoncila
Created September 26, 2018 09:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save DragaDoncila/a3923868ec7a2f7836a529d865187252 to your computer and use it in GitHub Desktop.
Save DragaDoncila/a3923868ec7a2f7836a529d865187252 to your computer and use it in GitHub Desktop.
import pandas as pd
import os
import re
from sklearn.decomposition import PCA
from microscopium import io
from microscopium.preprocess import montage_stream
from microscopium.preprocess import correct_multiimage_illumination
from microscopium.preprocess import find_background_illumination
from microscopium.features import default_feature_map
#FILE_NAME_PREFIX = "Week1_150607_"
IMAGE_FILE_PATH = "/data/BBBC021/data.broadinstitute.org/bbbc/BBBC021/"
OUTPUT_FILE_PATH = "/data/bbbc_out/"
FEATURES_FILE = "./all_features.csv"
DATA_FILE = OUTPUT_FILE_PATH + "Data.csv"
def main():
## get valid filenames and build output filenames
# file_name_groups = make_groups(NUM_IMAGES)
filenames, names_illum = get_valid_file_names(IMAGE_FILE_PATH)
for i, directory_files in enumerate(filenames):
run_illum(directory_files, names_illum[i])
print("Directory processed: ", directory_files[0])
# # concatenate names including file path to output directory, keeping channel and quadrant information
# names_illum = [OUTPUT_FILE_PATH + filename[26:48] + "_illum.tif" for filename in filenames]
# illum images
#run_illum(filenames, names_illum)
## montage illumed images
# names_montage = [OUTPUT_FILE_PATH + FILE_NAME_PREFIX + group + "_montaged.tif" for group in file_name_groups]
# run_montage(sorted(names_illum), names_montage)
#
# ## run features on images
# ims = map(io.imread, names_montage)
# output_features(ims, names_montage, FEATURES_FILE)
#
# ## get x y coordinates
# coords = pca_transform(FEATURES_FILE)
#
# ## generate CSV of coordinates
# generate_bokeh_csv(coords, file_name_groups, names_montage)
def get_valid_file_names(filepath):
"""
Get full filenames relative to top level directory for each file in the BBBC trial, and
construct filenames with paths for saving output
:param filepath: path to the directory containing folders of images
:return (valid_filenames, illum_filenames): tuple with lists of filenames for reading and saving
images
"""
filename_reg = r'(^Week._.*)(_..._s._w.)(.*)(\.tif)$'
valid_filenames = []
illum_filenames = []
for root, directories, filenames in os.walk(IMAGE_FILE_PATH):
current_subdir = root
new_subdir = []
new_subdir_illum = []
for filename in os.listdir(current_subdir):
match = re.search(filename_reg, filename)
if match:
new_subdir.append(os.path.join(root, match.group(1) + match.group(2) + match.group(3) + match.group(4)))
new_subdir_illum.append("".join([OUTPUT_FILE_PATH, root[51:]]) + "_" + match.group(1) + match.group(2) + match.group(3) + "_illum" + match.group(4))
if len(new_subdir) != 0 and len(new_subdir_illum) != 0:
valid_filenames.append(new_subdir)
illum_filenames.append(new_subdir_illum)
return (valid_filenames, illum_filenames)
def run_illum(filenames, names_out):
"""
Find background illumination and correct all images corresponding to elements in filenames.
Save corrected images using names_out which includes a relative path from the top level directory.
:param filenames: list of valid filenames with relative paths from top level directory
:param names_out: list of valid filenames for saving output with relative paths from top level directory
"""
illum = find_background_illumination(filenames)
corrected_images = correct_multiimage_illumination(filenames, illum=illum)
for (image, name) in zip(corrected_images, names_out):
io.imsave(name, image)
def run_montage(filenames, names_out):
"""
Read images from filenames and stitch and stack their quadrants and channels before saving to new files using
names_out
:param filenames: list of filenames with relative paths to top level sorted by well, quadrant and channel e.g.
filenames = ['B02_s1_w1_illum.tif', 'B02_s1_w2_illum.tif', 'B02_s1_w4_illum.tif',
'B02_s2_w1_illum.tif', 'B02_s2_w2_illum.tif', 'B02_s2_w4_illum.tif',
'B02_s3_w1_illum.tif', 'B02_s3_w2_illum.tif', 'B02_s3_w4_illum.tif',
'B02_s4_w1_illum.tif', 'B02_s4_w2_illum.tif', 'B02_s4_w4_illum.tif']
will result in one image (B02) with quadrants [[s1, s2], [s3, s4]] where each quadrant
is stacked in the order [w4, w2, w1]. This example assumes files at the top level directory
:param names_out: list of filenames with relative paths to top level for output
"""
illumed_ims = map(io.imread, filenames)
montaged_ims = montage_stream(illumed_ims, montage_order=[[0, 1], [2, 3]], channel_order=[2, 1, 0])
for (image, name) in zip(montaged_ims, names_out):
io.imsave(name, image)
def output_features(ims, filenames, out_file):
"""
Build a default feature map for each image in ims and output a dataframe of
[filenames, features] to out_file as csv for reading in
:param ims: opened nparray images
:param filenames: filenames corresponding to each image in ims with relative path to top level directory
:param out_file: name of CSV file to save dataframe, with relative path to top level directory
"""
# generate filenames column to exist as first column of feature DF
filenames_col = ["Filenames"]
filenames_col.extend(filenames)
filenames_col = pd.DataFrame(filenames_col)
all_image_features = pd.DataFrame()
# set up flag to only add header row once
flag = True
for im, im_name in zip(ims, filenames):
image_features, feature_names = default_feature_map(im)
# make sure header row is added to dataframe in first iteration
if flag:
all_image_features = all_image_features.append(pd.DataFrame(feature_names).transpose())
flag = False
image_features = pd.DataFrame(image_features).transpose()
all_image_features = all_image_features.append(image_features, ignore_index=True)
# concatenate filenames column to the features and save to CSV.
all_image_features = pd.concat([filenames_col, all_image_features], axis=1)
all_image_features.to_csv(out_file)
def make_groups(num_images):
"""
Concatenate strings corresponding to the filename IDs in the BBBC trial dataset.
Will generate as many groups as the number of images requested
:return: list of filename groups e.g. ["B02", "B02", "D03"]
"""
file_name_groups = []
for letter in "BCDEFG":
for num in range(2, 12):
group = letter + "{:02}".format(num)
if group not in BAD_GROUPS:
file_name_groups.append(group)
if len(file_name_groups) == num_images:
return file_name_groups
return file_name_groups
def pca_transform(features_filename):
"""
Read a file of image features into dataframe and perform a 2 component PCA, returning the 2 component values
of each image
:param features_filename: filename of CSV containing image features
:return coords: np array of 2 components for each image
"""
all_image_features = pd.read_csv(features_filename)
pca = PCA(2)
coords = pca.fit_transform(all_image_features.iloc[1:, 2:])
return coords
def generate_bokeh_csv(coords, file_name_groups, names):
"""
Generate a CSV of columns
index,info,url,x,y
to work with Bokeh app.
:param coords: the x,y components of each data point
:param file_name_groups: the valid filename IDs generated for this application e.g. ["B02", "B03", "D02"]
:param names: the names of the images you wish to load into bokeh, relative to the top level directory
"""
coords_df = pd.DataFrame(coords)
indices = pd.DataFrame([FILE_NAME_PREFIX + group for group in file_name_groups])
info = pd.DataFrame([FILE_NAME_PREFIX + group + "_info" for group in file_name_groups])
# strip relative path from filename since CSV will be stored in same folder
urls = pd.DataFrame([name[18:] for name in names])
coord_csv = pd.concat([indices, info, urls, coords_df], axis=1)
coord_csv.columns = ["index", "info", "url", "x", "y"]
coord_csv.to_csv(DATA_FILE)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment