Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
The code from https://codereview.stackexchange.com/questions/192027/genetic-sequence-visualizer-generating-large-images slightly optimized. It avoids storing things that can easily be computed on the fly saving 40x memory.
#!/usr/bin/env python3
# Python3
#
# Run from command line
#
import logging
from argparse import ArgumentParser
from copy import deepcopy, copy
from datetime import timedelta
from math import ceil
from os import remove, makedirs
from os.path import exists
from re import sub
from time import time
from PIL import Image, ImageDraw
from largearray import Array
import numpy as np
uuid = id(time())
parser = ArgumentParser()
parser.add_argument("file", help="Location of input file. path/to/file (FASTA file formats supported)")
parser.add_argument("-i", "--image-name",
help="Where to save finished image file. path/to/file (Default: Name_of_input_file.png)")
parser.add_argument("-s", "--dump-size", help="The size of temp files to dump to disk. (Default & Max: 5)", type=int)
parser.add_argument("-t", "--temp", help="Where to dump temp files. path/to/directory/ (Default: .cache/)", type=str)
parser.add_argument("-d", "--debug-file", help="Where to store debug file. path/to/file (Default ./cache/debug.log")
args = parser.parse_args()
filepath = args.file
ipath = ".".join(filepath.split(".")[:-1]) + ".png"
if args.image_name:
ipath = args.image_name
print(ipath)
dsize = 5
if args.dump_size:
dsize = args.dump_size
cachedir = ".cache/"
if args.temp:
cachedir = args.temp
debugpath = '.cache/debug%d.log' % uuid
if args.debug_file:
debugpath = args.debug_file
if not exists(filepath):
raise Exception("Path of input file does not exist")
print("Debug at %s" % debugpath)
if exists(debugpath):
remove(debugpath)
if not exists(cachedir):
makedirs(cachedir)
logging.basicConfig(filename=debugpath, level=logging.DEBUG)
logging.info("Init: %d" % uuid)
del parser, ArgumentParser, remove, exists,
print("Generating vizualization of %s" % filepath)
starttime = time()
file = open(filepath, 'r')
logging.info("File opened")
logging.info("Serializing %s ..." % filepath)
raw = ''.join([n for n in file.readlines() if not n.startswith('>')]).replace('\n', "").lower()
logging.info("Replaced FASTA info")
file.close()
del file
raw = sub(r'[rykmswbdhv-]', "n", raw) # Handles miscellaneous FASTA characters
raw = sub(r'[^atgcn]', "", raw) # Handles 4 bases and not-known
sequence = Array(name="sequence", cachedirectory=cachedir, a=list(raw), maxitems=(dsize * 10))
sequence.trim()
logging.info("Parsed characters (%d items)" % len(sequence))
del sub, raw
endtime = [time()]
print("The input file has been serialized. %s (%d items) Calculating path..." % (
str(timedelta(seconds=(endtime[0] - starttime))), len(sequence)))
pendingactions = Array(name="pendingactions", cachedirectory=cachedir, maxitems=dsize)
logging.info("%d temp files will be created [pendingactions]" % ceil(len(sequence) / pendingactions.maxitems))
action_lookup = {
"a": np.array((0, -1)),
"t": np.array((0, 1)),
"g": np.array((-1, 0)),
"c": np.array((1, 0)),
"n": np.array((0, 0)),
}
color_lookup = {
"a": (0, 255, 0),
"t": (255, 0, 0),
"g": (255, 0, 255),
"c": (0, 0, 255),
"n": (0, 0, 0),
}
top_left = np.array((0, 0))
bottom_right = np.array((0, 0))
cursor = np.array((0, 0))
for i in sequence:
# get the actions associated from dict
cursor += action_lookup[i]
bottom_right = np.maximum(cursor, bottom_right)
top_left = np.minimum(cursor, top_left)
pendingactions.append(i)
pendingactions.trim()
# Final dimensions of image + 10px border
border = np.array((10, 10))
dim = bottom_right - top_left + 2 * border
endtime.append(time())
print("The path has been calculated. %s Rendering image... %s" % (
str(timedelta(seconds=(endtime[1] - starttime))), "(" + str(dim[0]) + "x" + str(dim[1]) + ")"))
with Image.new("RGBA", tuple(dim), None) as img:
logging.info("Initial image created. (%d x %d)" % (dim[0], dim[1]))
draw = ImageDraw.Draw(img)
logging.info("Draw object created")
cursor = np.abs(top_left) + border
for i in pendingactions:
cursor += action_lookup[i]
color = color_lookup[i]
draw.point(tuple(cursor), fill=color)
logging.info("Path Drawn")
# Start and End points are dynamically sized to the dimensions of the final image
start_cursor = np.abs(top_left) + border
dot_size = np.ceil(np.mean(dim) / 500)
size = np.array((dot_size, dot_size))
draw.ellipse([tuple(start_cursor-size), tuple(start_cursor+size)],
fill=(255, 255, 0), outline=(255, 255, 0)) # yellow
draw.ellipse([tuple(cursor-size), tuple(cursor+size)], fill=(51, 255, 255),
outline=(51, 255, 255)) # neon blue
logging.info("Start and End points drawn")
endtime.append(time())
print("The image has been rendered. %s Saving..." % str(timedelta(seconds=(endtime[2] - endtime[1]))))
img.save(ipath, "PNG", optimize=True)
logging.info("Image saved at %s" % ipath)
endtime.append(time())
del img, Image
print("Done! %s Image is saved as: %s" % (str(timedelta(seconds=(endtime[3] - endtime[2]))), ipath))
print("Program took %s to run" % str(timedelta(seconds=(endtime[3] - starttime))))
logging.info("%s | %s | %s | %s # Parsing File | Computing Path | Rendering | Saving" % (
str(timedelta(seconds=(endtime[0] - starttime))), str(timedelta(seconds=(endtime[1] - starttime))),
str(timedelta(seconds=(endtime[2] - starttime))), str(timedelta(seconds=(endtime[3] - starttime)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.