Skip to content

Instantly share code, notes, and snippets.

@david-a-parry
Last active May 9, 2022 16:07
Show Gist options
  • Save david-a-parry/e92947e885a89f3788000682cee55246 to your computer and use it in GitHub Desktop.
Save david-a-parry/e92947e885a89f3788000682cee55246 to your computer and use it in GitHub Desktop.
Example python code for creating a Manhattan plot using pandas and seaborn
#!/usr/bin/env python3
from collections import OrderedDict
import os
import requests
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
URL = 'https://portals.broadinstitute.org/collaboration/giant/images/8/80/Height_AA_add_SV.txt.gz'
SUGGESTIVE = -np.log10(5e-6)
SIGNIFICANT = -np.log10(3.3e-7)
REF_GENOME = 'hg19'
def get_cumulative_chrom_pos(chrom_sizes):
'''
Return OrderedDict of chromosome names to cumulative position in the
genome.
'''
chrom_starts = OrderedDict()
pos = 0
for chrom, size in chrom_sizes.items():
chrom_starts[chrom] = pos
pos += size
return chrom_starts
def get_chrom_sizes(ref_genome, chrom_order):
''' Return OrderedDict of chromosome names to lengths from UCSC.'''
r = requests.get(
"http://api.genome.ucsc.edu/list/chromosomes?genome={}".format(
ref_genome))
if not r.ok:
r.raise_for_status()
chrom_data = r.json()
return OrderedDict(
(x, chrom_data['chromosomes']['chr' + x]) for x in chrom_order)
def get_data(url):
''' Retrieve GWAS results from url.'''
fname = os.path.basename(url)
r = requests.get(url)
if not r.ok:
r.raise_for_status()
with open(fname, "wb") as fh:
fh.write(r.content)
df = pd.read_csv(fname, sep='\t', dtype={'CHR': str, 'POS': int})
df['-log10P'] = -np.log10(df['Pvalue'])
return df
def get_xticks_and_labels(chrom_pos, chrom_sizes):
''' Return tick positions and labels for manhattan plot'''
ticks = []
labels = []
for chrom in chrom_pos:
start = chrom_pos[chrom]
end = start + chrom_sizes[chrom]
ticks.append((start + end) / 2)
labels.append(chrom)
return ticks, labels
def manhattan_plot(df,
ref_genome,
title=None,
plotname='manhattan.png',
suggestive=None,
significant=None):
''' Create a manhattan plot from dataframe.'''
chrom_order = sorted(df.CHR.unique(),
key=lambda x: int(
x.split('chr')[-1].replace('X', '23').replace(
'Y', '24').replace('M', '25')))
chrom_size = get_chrom_sizes(ref_genome, chrom_order)
chrom_pos = get_cumulative_chrom_pos(chrom_size)
df['xpos'] = df.apply(lambda x: chrom_pos[x.CHR] + x.POS, axis=1)
fig, ax = plt.subplots(figsize=(24, 6))
pair_pal = sns.color_palette("Blues", 2)
pal = [pair_pal[i % 2] for i in range(1, len(chrom_order) + 1)]
sns.scatterplot(ax=ax,
data=df,
x='xpos',
y='-log10P',
hue='CHR',
hue_order=chrom_order,
palette=pal,
edgecolor='none',
size=100)
plt.legend([], [], frameon=False)
x_end = chrom_pos[chrom_order[-1]] + chrom_size[chrom_order[-1]]
if significant is not None:
ax.plot([0, x_end], [significant, significant],
c='r',
linestyle='--',
alpha=0.7)
if suggestive is not None:
ax.plot([0, x_end], [suggestive, suggestive],
c='k',
linestyle='--',
alpha=0.7)
if title:
ax.set_title(title)
ticks, labels = get_xticks_and_labels(chrom_pos, chrom_size)
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.set_xlabel('chromosome')
sns.despine()
fig.savefig(plotname)
def main():
''' Retrieve data and make a manhattan plot.'''
df = get_data(URL)
manhattan_plot(df,
REF_GENOME,
title='Height GWAS',
suggestive=SUGGESTIVE,
significant=SIGNIFICANT)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment