Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Last active March 2, 2017 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/c20b6b8ebad5afc48c58d95c902bfe43 to your computer and use it in GitHub Desktop.
Save afrendeiro/c20b6b8ebad5afc48c58d95c902bfe43 to your computer and use it in GitHub Desktop.
Make a UCSC trackHub from a looper.Project
from looper.models import Project
import argparse
import os
import pandas as pd
import re
import string
import textwrap
def get_colors(level, pallete="gist_rainbow", nan_color=[0.5, 0.5, 0.5]):
"""
Given a level (list or iterable) with length n,
return a same-sized list of RGB colors for each unique value in the level.
"""
import matplotlib.pyplot as plt
pallete = plt.get_cmap(pallete)
level_unique = list(set(level))
n = len(level_unique)
# get n equidistant colors
p = [pallete(1. * i / n) for i in range(n)]
# convert to integer RGB strings with no alpha channel
p = [",".join([str(int(y * 255)) for y in x[:-1]]) for x in p]
color_dict = dict(zip(list(set(level)), p))
# color for nan cases
color_dict[pd.np.nan] = nan_color
return [color_dict[x] for x in level]
parser = argparse.ArgumentParser()
parser.add_argument(dest="project_config_file")
parser.add_argument(
'-a', '--attrs', dest="attributes", nargs="?",
help="Sample attributes (annotation sheet columns) to use to order tracks.")
parser.add_argument(
'-c', '--color-attr', dest="color_attribute",
help="Sample attribute to use to color tracks with. Default is first attribute passed.")
parser.add_argument(
'-r', '--overlay-replicates', dest="overlay_replicates", action="store_true",
help="Whether replicate samples should be overlaied in same track. Default=False.")
# args = parser.parse_args("-a patient_id,timepoint,cell_type -c patient_id metadata/project_config.yaml".split(" "))
args = parser.parse_args()
args.attributes = args.attributes.split(',')
if args.color_attribute is None:
args.color_attribute = args.attributes[0]
print(args)
# Start project object
prj = Project(args.project_config_file)
# Setup paths and hub files
bigwig_dir = os.path.join(prj.trackhubs.trackhub_dir)
track_hub = os.path.join(bigwig_dir, "hub.txt")
genomes_hub = os.path.join(bigwig_dir, "genomes.txt")
open(genomes_hub, 'w').write("")
# Setup attributes
config = prj['config']
proj_name = config['name'] if "name" in config else os.path.basename(config['paths']['output_dir'])
proj_desc = config['description'] if "description" in config else proj_name
user_email = config['email'] if "email" in config else ""
# Start building hub
text = """hub {proj}
shortLabel {description}
longLabel {description}
genomesFile genomes.txt
email {email}
""".format(proj=proj_name, description=proj_desc, email=user_email)
with open(track_hub, 'w') as handle:
handle.write(text)
os.chmod(track_hub, 0755)
# Templates for various levels
track_parent = """track {proj}
container multiWig
shortLabel {description}
longLabel {description}
container multiWig
aggregate none
showSubtrackColorOnUi on
type bigWig
autoScale on
visibility full
maxHeighPixels 100:32:8{0}{1}{2}
"""
track_middle = """
track {track}
shortLabel {desc}
longLabel {desc}
parent {parent}
container multiWig
aggregate transparentOverlay
showSubtrackColorOnUi on
type bigWig
visibility full
maxHeighPixels 100:32:8{subgroups}
"""
track_final = """
track {name}
shortLabel {name}
longLabel {name}
parent {parent}
type bigWig
graphTypeDefault bar
visibility full
height 32
maxHeighPixels 100:32:8
windowingFunction mean
autoScale on
bigDataUrl {bigwig}
color {color}{subgroups}
"""
# Make dataframe for groupby
df = pd.DataFrame([s.as_series() for s in prj.samples]).fillna("none")
# Keep only samples that have appropriate types to be displayed
df = df[df['library'].isin(["ATAC-seq", "ChIP-seq", "ChIPmentation"])]
# Create a trackHub for each genome
for genome in df['genome'].unique():
if not os.path.exists(os.path.join(bigwig_dir, genome)):
os.makedirs(os.path.join(bigwig_dir, genome))
os.chmod(os.path.join(bigwig_dir, genome), 0755)
df_g = df[(df['genome'] == genome)]
# Genomes
text = """genome {g}
trackDb {g}/trackDb.txt
""".format(g=genome)
with open(genomes_hub, 'a') as handle:
handle.write(text)
os.chmod(genomes_hub, 0755)
# TrackDB
track_db = os.path.join(os.path.join(bigwig_dir, genome, "trackDb.txt"))
open(track_db, 'w').write("")
# Create subgroups, an attribute sort order and an experiment matrix
subgroups = "\n" + "\n".join([
"""subGroup{0} {1} {1} \\\n {2}""".format(i + 1, attr, " \\\n ".join(
[x + "=" + x for x in sorted(df_g[attr].unique())])) for i, attr in enumerate(args.attributes)])
sort_order = "\n" + "sortOrder " + " ".join([x + "=+" for x in args.attributes])
dimensions = "\n" + "dimensions " + " ".join(
["".join(x) for x in zip(["dim{}=".format(x) for x in ["X", "Y"] + list(string.ascii_uppercase[:-3])], args.attributes)])
track = track_parent.format(subgroups,
sort_order,
dimensions,
proj=proj_name, description=proj_desc)
# Get unique colors for the given attribute
df_g['track_color'] = get_colors(df[args.color_attribute])
# Group by requested attributes, add tracks
for labels, indices in df_g.groupby(args.attributes).groups.items():
subgroups = "\n subGroups " + " ".join(["{}={}".format(k, v) for k, v in zip(args.attributes, labels)])
if len(indices) == 1:
sample_attrs = df_g.ix[indices].squeeze()
track += textwrap.dedent(track_final.format(
name=sample_attrs["sample_name"],
color=sample_attrs['track_color'],
parent=proj_name,
bigwig=os.path.basename(sample_attrs['bigwig']),
subgroups=subgroups))
# Make symbolic link to bigWig
dest = os.path.join(os.path.join(bigwig_dir, genome, os.path.basename(sample_attrs['bigwig'])))
if not os.path.exists(dest):
os.symlink(sample_attrs['bigwig'], dest)
os.chmod(dest, 0655)
else:
name = "_".join(labels)
desc = " ".join(labels)
track += track_middle.format(
track=name,
desc=desc,
parent=proj_name,
subgroups=subgroups)
for index in indices:
sample_attrs = df_g.ix[indices].squeeze()
track += track_final.format(
name=sample_attrs["sample_name"],
color=sample_attrs['track_color'],
parent=name,
bigwig=os.path.basename(sample_attrs['bigwig']),
subgroups=subgroups)
# Make symbolic link to bigWig
dest = os.path.join(os.path.join(bigwig_dir, genome, os.path.basename(sample_attrs['bigwig'])))
if not os.path.exists(dest):
os.symlink(sample_attrs['bigwig'], dest)
os.chmod(dest, 0755)
track = re.sub("_none", "", track)
# write trackDb to file
with open(track_db, 'w') as handle:
handle.write(textwrap.dedent(track))
msg = "\n".join([
"Finished producing trackHub!",
"----------------------------",
"Add the following URL to your UCSC trackHubs:",
"{url}/hub.txt".format(url=prj['config']['trackhubs']['url']),
"or alternatively follow this URL: "
+ "http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&hubClear={url}/hub.txt".format(url=prj['config']['trackhubs']['url'])
])
if 'trackhubs' in prj['config']:
if "url" in prj['config']['trackhubs']:
print(msg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment