This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Filename: merge_snp.py | |
# Date: 2016-09-23 | |
# Author: suqingdong | |
class MergeSNP: | |
''' | |
Merge all the samples according to the second column(pos), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#====================== | |
# Date: 2016-10-10 | |
# Author: suqingdong | |
# Introductions: get gene list from annovar annotation result file, then generate a bed file from origin bed. | |
# Usage: python getGeneBed.py <genelist> <originbed> [outbed] | |
#====================== | |
import re | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Extract columns: 'ChROM POS ID REF ALT GeneName' + samples' columns | |
def safe_open(infile): | |
try: | |
if infile.endswith('.gz'): | |
import gzip | |
return gzip.open(infile) | |
else: | |
return open(infile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
# One gene may exist in different chromsomes | |
# One position may belong diffrent genes | |
# geneList format: | |
# {'genename1': { | |
# "chr1": [(start1,stop1), (start2, stop2)], | |
# "chr2": [(start1,stop1), (start2, stop2)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#======================================================= | |
# convert pos and samples | |
# row name is sample name, and column name is position | |
# value is the number of alt (0,1,2) | |
#======================================================= | |
# sampledict: {'sample1':['0','2',...], ...} | |
def convertSNP(infile, outfile): | |
with open(infile) as f: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#====================================== | |
# merge file1 and file2 of replaced.xls | |
# if no pos, mark '0' | |
# output coverted.xls | |
#====================================== | |
# sampledict structure: {'sample1': {'pos1':'snp1','pos2':'snp2',... }, ...} | |
def getSampleDict(infile): | |
sampledict = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
def get_sample_list(infofile, listfile): | |
sampledict = {} | |
with open(infofile) as f: | |
for line in f: | |
sampleid,novoid = line.strip().split('\t')[2:4] | |
sampledict[novoid] = sampleid | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
def get_sample_info(infofile, pn, disease): | |
header = '#B1\n#FamilyID\tSampleID\tSex\tNormal/Patient\tPN' | |
if disease: | |
header += '\tDisease' | |
header += '\n' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#!-*- coding: utf-8 -*- | |
import sys | |
def add_count_samples(infile, outfile=None): | |
outfile = outfile or infile+'.addCountSamples' | |
with open(infile) as f, open(outfile, 'w') as out: | |
for line in f: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# A simple example of crawler with requests and BeautifulSoup | |
# Pay attention to encoding | |
import bs4 | |
import requests | |
def main(genelist): | |
with open(genelist) as f: |
OlderNewer