Last active
May 9, 2022 16:07
-
-
Save david-a-parry/e92947e885a89f3788000682cee55246 to your computer and use it in GitHub Desktop.
Example python code for creating a Manhattan plot using pandas and seaborn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import OrderedDict | |
import os | |
import requests | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
URL = 'https://portals.broadinstitute.org/collaboration/giant/images/8/80/Height_AA_add_SV.txt.gz' | |
SUGGESTIVE = -np.log10(5e-6) | |
SIGNIFICANT = -np.log10(3.3e-7) | |
REF_GENOME = 'hg19' | |
def get_cumulative_chrom_pos(chrom_sizes): | |
''' | |
Return OrderedDict of chromosome names to cumulative position in the | |
genome. | |
''' | |
chrom_starts = OrderedDict() | |
pos = 0 | |
for chrom, size in chrom_sizes.items(): | |
chrom_starts[chrom] = pos | |
pos += size | |
return chrom_starts | |
def get_chrom_sizes(ref_genome, chrom_order): | |
''' Return OrderedDict of chromosome names to lengths from UCSC.''' | |
r = requests.get( | |
"http://api.genome.ucsc.edu/list/chromosomes?genome={}".format( | |
ref_genome)) | |
if not r.ok: | |
r.raise_for_status() | |
chrom_data = r.json() | |
return OrderedDict( | |
(x, chrom_data['chromosomes']['chr' + x]) for x in chrom_order) | |
def get_data(url): | |
''' Retrieve GWAS results from url.''' | |
fname = os.path.basename(url) | |
r = requests.get(url) | |
if not r.ok: | |
r.raise_for_status() | |
with open(fname, "wb") as fh: | |
fh.write(r.content) | |
df = pd.read_csv(fname, sep='\t', dtype={'CHR': str, 'POS': int}) | |
df['-log10P'] = -np.log10(df['Pvalue']) | |
return df | |
def get_xticks_and_labels(chrom_pos, chrom_sizes): | |
''' Return tick positions and labels for manhattan plot''' | |
ticks = [] | |
labels = [] | |
for chrom in chrom_pos: | |
start = chrom_pos[chrom] | |
end = start + chrom_sizes[chrom] | |
ticks.append((start + end) / 2) | |
labels.append(chrom) | |
return ticks, labels | |
def manhattan_plot(df, | |
ref_genome, | |
title=None, | |
plotname='manhattan.png', | |
suggestive=None, | |
significant=None): | |
''' Create a manhattan plot from dataframe.''' | |
chrom_order = sorted(df.CHR.unique(), | |
key=lambda x: int( | |
x.split('chr')[-1].replace('X', '23').replace( | |
'Y', '24').replace('M', '25'))) | |
chrom_size = get_chrom_sizes(ref_genome, chrom_order) | |
chrom_pos = get_cumulative_chrom_pos(chrom_size) | |
df['xpos'] = df.apply(lambda x: chrom_pos[x.CHR] + x.POS, axis=1) | |
fig, ax = plt.subplots(figsize=(24, 6)) | |
pair_pal = sns.color_palette("Blues", 2) | |
pal = [pair_pal[i % 2] for i in range(1, len(chrom_order) + 1)] | |
sns.scatterplot(ax=ax, | |
data=df, | |
x='xpos', | |
y='-log10P', | |
hue='CHR', | |
hue_order=chrom_order, | |
palette=pal, | |
edgecolor='none', | |
size=100) | |
plt.legend([], [], frameon=False) | |
x_end = chrom_pos[chrom_order[-1]] + chrom_size[chrom_order[-1]] | |
if significant is not None: | |
ax.plot([0, x_end], [significant, significant], | |
c='r', | |
linestyle='--', | |
alpha=0.7) | |
if suggestive is not None: | |
ax.plot([0, x_end], [suggestive, suggestive], | |
c='k', | |
linestyle='--', | |
alpha=0.7) | |
if title: | |
ax.set_title(title) | |
ticks, labels = get_xticks_and_labels(chrom_pos, chrom_size) | |
ax.set_xticks(ticks) | |
ax.set_xticklabels(labels) | |
ax.set_xlabel('chromosome') | |
sns.despine() | |
fig.savefig(plotname) | |
def main(): | |
''' Retrieve data and make a manhattan plot.''' | |
df = get_data(URL) | |
manhattan_plot(df, | |
REF_GENOME, | |
title='Height GWAS', | |
suggestive=SUGGESTIVE, | |
significant=SIGNIFICANT) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment