Skip to content

Instantly share code, notes, and snippets.

@pgsin

pgsin/hipsci.py

Created May 24, 2017
Embed
What would you like to do?
import requests
import json
import pandas as pd
import os
from IPython.display import display, HTML
pd.set_option("display.max_rows",10)
base_dir = "C:/Users/Pavel/Documents/Projects/hipsci/data/"
base_url = "http://www.hipsci.org/lines/api/cellLine/"
fnames = ["rs1800547", "rs8070723", "rs1052553", "rs2583988", "rs356181"]
cell_lines_json = []
with open(os.path.join(base_dir, fnames[0]), "r") as fstream:
for line in fstream:
cell_line_name = line.rstrip().split("\t")[0].split(".")[0]
response = requests.get("http://www.hipsci.org/lines/api/cellLine/" + cell_line_name)
cell_lines_json.append(json.loads(response.text))
if (response.status_code != requests.codes.ok):
print("ERROR: " + cell_line_name + ", status.code is " + response.status_code)
cell_line_df = pd.io.json.json_normalize(cell_lines_json).set_index(['_id'])
cell_line_df.columns = [i.replace("_source.", "") for i in cell_line_df.columns.values]
for i in fnames:
cell_line_df[i+".haplotype"] = ""
cell_line_df[i+".vcf_info"] = ""
cell_line_df[i+".vcf_additional_info"] = ""
dd = {"0|0" : "H1|H1", "0|1" : "H1|H2", "1|0" : "H1|H2", "1|1" : "H2|H2"}
for fname in fnames:
with open(os.path.join(base_dir, fname), "r") as fstream:
for line in fstream:
ll = line.rstrip().split("\t")
cell_name = ll[0].split(".")[0]
cell_line_df.set_value(cell_name, fname + ".haplotype", dd[ll[10][:3]]) #haplo info add.info
cell_line_df.set_value(cell_name, fname + ".vcf_info", ll[8]) #info add.info
cell_line_df.set_value(cell_name, fname + ".vcf_additional_info", ll[10]) #add.info
cell_line_df.to_csv('output.txt', sep='\t')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.