Last active
March 2, 2017 13:54
-
-
Save afrendeiro/c20b6b8ebad5afc48c58d95c902bfe43 to your computer and use it in GitHub Desktop.
Make a UCSC trackHub from a looper.Project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from looper.models import Project | |
import argparse | |
import os | |
import pandas as pd | |
import re | |
import string | |
import textwrap | |
def get_colors(level, pallete="gist_rainbow", nan_color=[0.5, 0.5, 0.5]): | |
""" | |
Given a level (list or iterable) with length n, | |
return a same-sized list of RGB colors for each unique value in the level. | |
""" | |
import matplotlib.pyplot as plt | |
pallete = plt.get_cmap(pallete) | |
level_unique = list(set(level)) | |
n = len(level_unique) | |
# get n equidistant colors | |
p = [pallete(1. * i / n) for i in range(n)] | |
# convert to integer RGB strings with no alpha channel | |
p = [",".join([str(int(y * 255)) for y in x[:-1]]) for x in p] | |
color_dict = dict(zip(list(set(level)), p)) | |
# color for nan cases | |
color_dict[pd.np.nan] = nan_color | |
return [color_dict[x] for x in level] | |
parser = argparse.ArgumentParser() | |
parser.add_argument(dest="project_config_file") | |
parser.add_argument( | |
'-a', '--attrs', dest="attributes", nargs="?", | |
help="Sample attributes (annotation sheet columns) to use to order tracks.") | |
parser.add_argument( | |
'-c', '--color-attr', dest="color_attribute", | |
help="Sample attribute to use to color tracks with. Default is first attribute passed.") | |
parser.add_argument( | |
'-r', '--overlay-replicates', dest="overlay_replicates", action="store_true", | |
help="Whether replicate samples should be overlaied in same track. Default=False.") | |
# args = parser.parse_args("-a patient_id,timepoint,cell_type -c patient_id metadata/project_config.yaml".split(" ")) | |
args = parser.parse_args() | |
args.attributes = args.attributes.split(',') | |
if args.color_attribute is None: | |
args.color_attribute = args.attributes[0] | |
print(args) | |
# Start project object | |
prj = Project(args.project_config_file) | |
# Setup paths and hub files | |
bigwig_dir = os.path.join(prj.trackhubs.trackhub_dir) | |
track_hub = os.path.join(bigwig_dir, "hub.txt") | |
genomes_hub = os.path.join(bigwig_dir, "genomes.txt") | |
open(genomes_hub, 'w').write("") | |
# Setup attributes | |
config = prj['config'] | |
proj_name = config['name'] if "name" in config else os.path.basename(config['paths']['output_dir']) | |
proj_desc = config['description'] if "description" in config else proj_name | |
user_email = config['email'] if "email" in config else "" | |
# Start building hub | |
text = """hub {proj} | |
shortLabel {description} | |
longLabel {description} | |
genomesFile genomes.txt | |
email {email} | |
""".format(proj=proj_name, description=proj_desc, email=user_email) | |
with open(track_hub, 'w') as handle: | |
handle.write(text) | |
os.chmod(track_hub, 0755) | |
# Templates for various levels | |
track_parent = """track {proj} | |
container multiWig | |
shortLabel {description} | |
longLabel {description} | |
container multiWig | |
aggregate none | |
showSubtrackColorOnUi on | |
type bigWig | |
autoScale on | |
visibility full | |
maxHeighPixels 100:32:8{0}{1}{2} | |
""" | |
track_middle = """ | |
track {track} | |
shortLabel {desc} | |
longLabel {desc} | |
parent {parent} | |
container multiWig | |
aggregate transparentOverlay | |
showSubtrackColorOnUi on | |
type bigWig | |
visibility full | |
maxHeighPixels 100:32:8{subgroups} | |
""" | |
track_final = """ | |
track {name} | |
shortLabel {name} | |
longLabel {name} | |
parent {parent} | |
type bigWig | |
graphTypeDefault bar | |
visibility full | |
height 32 | |
maxHeighPixels 100:32:8 | |
windowingFunction mean | |
autoScale on | |
bigDataUrl {bigwig} | |
color {color}{subgroups} | |
""" | |
# Make dataframe for groupby | |
df = pd.DataFrame([s.as_series() for s in prj.samples]).fillna("none") | |
# Keep only samples that have appropriate types to be displayed | |
df = df[df['library'].isin(["ATAC-seq", "ChIP-seq", "ChIPmentation"])] | |
# Create a trackHub for each genome | |
for genome in df['genome'].unique(): | |
if not os.path.exists(os.path.join(bigwig_dir, genome)): | |
os.makedirs(os.path.join(bigwig_dir, genome)) | |
os.chmod(os.path.join(bigwig_dir, genome), 0755) | |
df_g = df[(df['genome'] == genome)] | |
# Genomes | |
text = """genome {g} | |
trackDb {g}/trackDb.txt | |
""".format(g=genome) | |
with open(genomes_hub, 'a') as handle: | |
handle.write(text) | |
os.chmod(genomes_hub, 0755) | |
# TrackDB | |
track_db = os.path.join(os.path.join(bigwig_dir, genome, "trackDb.txt")) | |
open(track_db, 'w').write("") | |
# Create subgroups, an attribute sort order and an experiment matrix | |
subgroups = "\n" + "\n".join([ | |
"""subGroup{0} {1} {1} \\\n {2}""".format(i + 1, attr, " \\\n ".join( | |
[x + "=" + x for x in sorted(df_g[attr].unique())])) for i, attr in enumerate(args.attributes)]) | |
sort_order = "\n" + "sortOrder " + " ".join([x + "=+" for x in args.attributes]) | |
dimensions = "\n" + "dimensions " + " ".join( | |
["".join(x) for x in zip(["dim{}=".format(x) for x in ["X", "Y"] + list(string.ascii_uppercase[:-3])], args.attributes)]) | |
track = track_parent.format(subgroups, | |
sort_order, | |
dimensions, | |
proj=proj_name, description=proj_desc) | |
# Get unique colors for the given attribute | |
df_g['track_color'] = get_colors(df[args.color_attribute]) | |
# Group by requested attributes, add tracks | |
for labels, indices in df_g.groupby(args.attributes).groups.items(): | |
subgroups = "\n subGroups " + " ".join(["{}={}".format(k, v) for k, v in zip(args.attributes, labels)]) | |
if len(indices) == 1: | |
sample_attrs = df_g.ix[indices].squeeze() | |
track += textwrap.dedent(track_final.format( | |
name=sample_attrs["sample_name"], | |
color=sample_attrs['track_color'], | |
parent=proj_name, | |
bigwig=os.path.basename(sample_attrs['bigwig']), | |
subgroups=subgroups)) | |
# Make symbolic link to bigWig | |
dest = os.path.join(os.path.join(bigwig_dir, genome, os.path.basename(sample_attrs['bigwig']))) | |
if not os.path.exists(dest): | |
os.symlink(sample_attrs['bigwig'], dest) | |
os.chmod(dest, 0655) | |
else: | |
name = "_".join(labels) | |
desc = " ".join(labels) | |
track += track_middle.format( | |
track=name, | |
desc=desc, | |
parent=proj_name, | |
subgroups=subgroups) | |
for index in indices: | |
sample_attrs = df_g.ix[indices].squeeze() | |
track += track_final.format( | |
name=sample_attrs["sample_name"], | |
color=sample_attrs['track_color'], | |
parent=name, | |
bigwig=os.path.basename(sample_attrs['bigwig']), | |
subgroups=subgroups) | |
# Make symbolic link to bigWig | |
dest = os.path.join(os.path.join(bigwig_dir, genome, os.path.basename(sample_attrs['bigwig']))) | |
if not os.path.exists(dest): | |
os.symlink(sample_attrs['bigwig'], dest) | |
os.chmod(dest, 0755) | |
track = re.sub("_none", "", track) | |
# write trackDb to file | |
with open(track_db, 'w') as handle: | |
handle.write(textwrap.dedent(track)) | |
msg = "\n".join([ | |
"Finished producing trackHub!", | |
"----------------------------", | |
"Add the following URL to your UCSC trackHubs:", | |
"{url}/hub.txt".format(url=prj['config']['trackhubs']['url']), | |
"or alternatively follow this URL: " | |
+ "http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&hubClear={url}/hub.txt".format(url=prj['config']['trackhubs']['url']) | |
]) | |
if 'trackhubs' in prj['config']: | |
if "url" in prj['config']['trackhubs']: | |
print(msg) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment