david-a-parry/manhattan_plot.py

## manhattan_plot.py
#!/usr/bin/env python3
from collections import OrderedDict
import os
import requests
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

URL = 'https://portals.broadinstitute.org/collaboration/giant/images/8/80/Height_AA_add_SV.txt.gz'
SUGGESTIVE = -np.log10(5e-6)
SIGNIFICANT = -np.log10(3.3e-7)
REF_GENOME = 'hg19'


def get_cumulative_chrom_pos(chrom_sizes):
    '''
    Return OrderedDict of chromosome names to cumulative position in the
    genome.
    '''
    chrom_starts = OrderedDict()
    pos = 0
    for chrom, size in chrom_sizes.items():
        chrom_starts[chrom] = pos
        pos += size
    return chrom_starts


def get_chrom_sizes(ref_genome, chrom_order):
    ''' Return OrderedDict of chromosome names to lengths from UCSC.'''
    r = requests.get(
        "http://api.genome.ucsc.edu/list/chromosomes?genome={}".format(
            ref_genome))
    if not r.ok:
        r.raise_for_status()
    chrom_data = r.json()
    return OrderedDict(
        (x, chrom_data['chromosomes']['chr' + x]) for x in chrom_order)


def get_data(url):
    ''' Retrieve GWAS results from url.'''
    fname = os.path.basename(url)
    r = requests.get(url)
    if not r.ok:
        r.raise_for_status()
    with open(fname, "wb") as fh:
        fh.write(r.content)
    df = pd.read_csv(fname, sep='\t', dtype={'CHR': str, 'POS': int})
    df['-log10P'] = -np.log10(df['Pvalue'])
    return df


def get_xticks_and_labels(chrom_pos, chrom_sizes):
    ''' Return tick positions and labels for manhattan plot'''
    ticks = []
    labels = []
    for chrom in chrom_pos:
        start = chrom_pos[chrom]
        end = start + chrom_sizes[chrom]
        ticks.append((start + end) / 2)
        labels.append(chrom)
    return ticks, labels


def manhattan_plot(df,
                   ref_genome,
                   title=None,
                   plotname='manhattan.png',
                   suggestive=None,
                   significant=None):
    ''' Create a manhattan plot from dataframe.'''
    chrom_order = sorted(df.CHR.unique(),
                         key=lambda x: int(
                             x.split('chr')[-1].replace('X', '23').replace(
                                 'Y', '24').replace('M', '25')))
    chrom_size = get_chrom_sizes(ref_genome, chrom_order)
    chrom_pos = get_cumulative_chrom_pos(chrom_size)
    df['xpos'] = df.apply(lambda x: chrom_pos[x.CHR] + x.POS, axis=1)
    fig, ax = plt.subplots(figsize=(24, 6))
    pair_pal = sns.color_palette("Blues", 2)
    pal = [pair_pal[i % 2] for i in range(1, len(chrom_order) + 1)]
    sns.scatterplot(ax=ax,
                    data=df,
                    x='xpos',
                    y='-log10P',
                    hue='CHR',
                    hue_order=chrom_order,
                    palette=pal,
                    edgecolor='none',
                    size=100)
    plt.legend([], [], frameon=False)
    x_end = chrom_pos[chrom_order[-1]] + chrom_size[chrom_order[-1]]
    if significant is not None:
        ax.plot([0, x_end], [significant, significant],
                c='r',
                linestyle='--',
                alpha=0.7)
    if suggestive is not None:
        ax.plot([0, x_end], [suggestive, suggestive],
                c='k',
                linestyle='--',
                alpha=0.7)
    if title:
        ax.set_title(title)
    ticks, labels = get_xticks_and_labels(chrom_pos, chrom_size)
    ax.set_xticks(ticks)
    ax.set_xticklabels(labels)
    ax.set_xlabel('chromosome')
    sns.despine()
    fig.savefig(plotname)


def main():
    ''' Retrieve data and make a manhattan plot.'''
    df = get_data(URL)
    manhattan_plot(df,
                   REF_GENOME,
                   title='Height GWAS',
                   suggestive=SUGGESTIVE,
                   significant=SIGNIFICANT)


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	from collections import OrderedDict
	import os
	import requests
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import seaborn as sns

	URL = 'https://portals.broadinstitute.org/collaboration/giant/images/8/80/Height_AA_add_SV.txt.gz'
	SUGGESTIVE = -np.log10(5e-6)
	SIGNIFICANT = -np.log10(3.3e-7)
	REF_GENOME = 'hg19'


	def get_cumulative_chrom_pos(chrom_sizes):
	'''
	Return OrderedDict of chromosome names to cumulative position in the
	genome.
	'''
	chrom_starts = OrderedDict()
	pos = 0
	for chrom, size in chrom_sizes.items():
	chrom_starts[chrom] = pos
	pos += size
	return chrom_starts


	def get_chrom_sizes(ref_genome, chrom_order):
	''' Return OrderedDict of chromosome names to lengths from UCSC.'''
	r = requests.get(
	"http://api.genome.ucsc.edu/list/chromosomes?genome={}".format(
	ref_genome))
	if not r.ok:
	r.raise_for_status()
	chrom_data = r.json()
	return OrderedDict(
	(x, chrom_data['chromosomes']['chr' + x]) for x in chrom_order)


	def get_data(url):
	''' Retrieve GWAS results from url.'''
	fname = os.path.basename(url)
	r = requests.get(url)
	if not r.ok:
	r.raise_for_status()
	with open(fname, "wb") as fh:
	fh.write(r.content)
	df = pd.read_csv(fname, sep='\t', dtype={'CHR': str, 'POS': int})
	df['-log10P'] = -np.log10(df['Pvalue'])
	return df


	def get_xticks_and_labels(chrom_pos, chrom_sizes):
	''' Return tick positions and labels for manhattan plot'''
	ticks = []
	labels = []
	for chrom in chrom_pos:
	start = chrom_pos[chrom]
	end = start + chrom_sizes[chrom]
	ticks.append((start + end) / 2)
	labels.append(chrom)
	return ticks, labels


	def manhattan_plot(df,
	ref_genome,
	title=None,
	plotname='manhattan.png',
	suggestive=None,
	significant=None):
	''' Create a manhattan plot from dataframe.'''
	chrom_order = sorted(df.CHR.unique(),
	key=lambda x: int(
	x.split('chr')[-1].replace('X', '23').replace(
	'Y', '24').replace('M', '25')))
	chrom_size = get_chrom_sizes(ref_genome, chrom_order)
	chrom_pos = get_cumulative_chrom_pos(chrom_size)
	df['xpos'] = df.apply(lambda x: chrom_pos[x.CHR] + x.POS, axis=1)
	fig, ax = plt.subplots(figsize=(24, 6))
	pair_pal = sns.color_palette("Blues", 2)
	pal = [pair_pal[i % 2] for i in range(1, len(chrom_order) + 1)]
	sns.scatterplot(ax=ax,
	data=df,
	x='xpos',
	y='-log10P',
	hue='CHR',
	hue_order=chrom_order,
	palette=pal,
	edgecolor='none',
	size=100)
	plt.legend([], [], frameon=False)
	x_end = chrom_pos[chrom_order[-1]] + chrom_size[chrom_order[-1]]
	if significant is not None:
	ax.plot([0, x_end], [significant, significant],
	c='r',
	linestyle='--',
	alpha=0.7)
	if suggestive is not None:
	ax.plot([0, x_end], [suggestive, suggestive],
	c='k',
	linestyle='--',
	alpha=0.7)
	if title:
	ax.set_title(title)
	ticks, labels = get_xticks_and_labels(chrom_pos, chrom_size)
	ax.set_xticks(ticks)
	ax.set_xticklabels(labels)
	ax.set_xlabel('chromosome')
	sns.despine()
	fig.savefig(plotname)


	def main():
	''' Retrieve data and make a manhattan plot.'''
	df = get_data(URL)
	manhattan_plot(df,
	REF_GENOME,
	title='Height GWAS',
	suggestive=SUGGESTIVE,
	significant=SIGNIFICANT)


	if __name__ == '__main__':
	main()