Skip to content

Instantly share code, notes, and snippets.

@fxia22
Created May 14, 2018 20:01
Show Gist options
  • Save fxia22/e6d96a3a9a89e813cb9169005d729ba8 to your computer and use it in GitHub Desktop.
Save fxia22/e6d96a3a9a89e813cb9169005d729ba8 to your computer and use it in GitHub Desktop.
process_ukbb
import os
import numpy as np
import matplotlib.pyplot as plt
import sys
from tqdm import tqdm
import pandas as pd
main_ukbb_file = sys.argv[1]
other_ukbb_files = sys.argv[1:]
nl = 0
lines = []
with open(main_ukbb_file) as f:
for line in tqdm(f):
if line[:2] == '1:':
lines.append(line.strip().split('\t'))
nl += 1
name = [item[0] for item in lines]
pval = [float(item[-1]) for item in lines]
print(len(name))
d = pd.read_csv('chr1phastcon.txt')
phastcon = np.array(d).flatten()
state = []
with open('wgEncodeAwgSegmentationChromhmmGm12878.bed') as f:
for line in f:
state.append(line.split()[3])
state = sorted(list(set(state)))
print(state)
chrom_state = np.zeros(250000000, )
state_dict = dict(zip(state, range(len(state))))
with open('wgEncodeAwgSegmentationChromhmmGm12878.bed') as f:
for line in tqdm(f):
ls = line.split()
if ls[0] != 'chr1':
break
start = int(ls[1])
end = int(ls[2])
state = ls[3]
chrom_state[start:end] = state_dict[state]
x = np.zeros((len(name), 2))
p = np.zeros((len(name), ))
for i in tqdm(range(len(name))):
pos = int(name[i].split(':')[1])
x[i,0] = chrom_state[pos]
x[i,1] = phastcon[pos]
p[i] = pval[i]
with open('{}.csv'.format(main_ukbb_file.split('.')[0]), 'w') as f:
f.write('x1, x2, p_value, h\n')
for i in range(x.shape[0]):
f.write("{}, {}, {}, {}\n".format(x[i, 0], x[i, 1], p[i], np.nan))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment