Qingdong Su suqingdong

## merge_snp.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Filename: merge_snp.py
# Date: 2016-09-23
# Author: suqingdong


class MergeSNP:
    '''
    Merge all the samples according to the second column(pos),

## getGeneBed.py
#!/usr/bin/env python
#======================
# Date: 2016-10-10
# Author: suqingdong
# Introductions: get gene list from annovar annotation result file, then generate a bed file from origin bed.
# Usage: python getGeneBed.py <genelist> <originbed> [outbed]
#======================
import re


## replaceSNP.py
#!/usr/bin/env python
# Extract columns: 'ChROM POS ID REF ALT GeneName' + samples' columns

def safe_open(infile):
    try:
        if infile.endswith('.gz'):
            import gzip
            return gzip.open(infile)
        else:
            return open(infile)

## depthStatByGene.py
#!/usr/bin/env python

import re

# One gene may exist in different chromsomes
# One position may belong diffrent genes
# geneList format:
# {'genename1': {
#    "chr1": [(start1,stop1), (start2, stop2)],
#    "chr2": [(start1,stop1), (start2, stop2)]

## convertSNP.py
#!/usr/bin/env python
#=======================================================
# convert pos and samples
# row name is sample name, and column name is position
# value is the number of alt (0,1,2)
#=======================================================

# sampledict: {'sample1':['0','2',...], ...}
def convertSNP(infile, outfile):
    with open(infile) as f:

## mergeSNP.py
#!/usr/bin/env python
#======================================
# merge file1 and file2 of replaced.xls
# if no pos, mark '0'
# output coverted.xls
#======================================

# sampledict structure: {'sample1': {'pos1':'snp1','pos2':'snp2',... }, ...}
def getSampleDict(infile):
    sampledict = {}

## get_sample_list.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-

def get_sample_list(infofile, listfile):
    sampledict = {}
    with open(infofile) as f:
        for line in f:
            sampleid,novoid = line.strip().split('\t')[2:4]
            sampledict[novoid] = sampleid


## get_sample_info.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-


def get_sample_info(infofile, pn, disease):
    header = '#B1\n#FamilyID\tSampleID\tSex\tNormal/Patient\tPN'
    if disease:
        header += '\tDisease'
    header += '\n'

## add_count_samples.py
#!/usr/bin/env python
#!-*- coding: utf-8 -*-

import sys


def add_count_samples(infile, outfile=None):
    outfile = outfile or infile+'.addCountSamples'
    with open(infile) as f, open(outfile, 'w') as out:
        for line in f:

## get_blue
#!/usr/bin/env python
# A simple example of crawler with requests and BeautifulSoup
# Pay attention to encoding

import bs4
import requests


def main(genelist):
    with open(genelist) as f:
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# Filename: merge_snp.py
	# Date: 2016-09-23
	# Author: suqingdong


	class MergeSNP:
	'''
	Merge all the samples according to the second column(pos),
	#!/usr/bin/env python
	#======================
	# Date: 2016-10-10
	# Author: suqingdong
	# Introductions: get gene list from annovar annotation result file, then generate a bed file from origin bed.
	# Usage: python getGeneBed.py <genelist> <originbed> [outbed]
	#======================
	import re
	#!/usr/bin/env python
	# Extract columns: 'ChROM POS ID REF ALT GeneName' + samples' columns

	def safe_open(infile):
	try:
	if infile.endswith('.gz'):
	import gzip
	return gzip.open(infile)
	else:
	return open(infile)
	#!/usr/bin/env python

	import re

	# One gene may exist in different chromsomes
	# One position may belong diffrent genes
	# geneList format:
	# {'genename1': {
	# "chr1": [(start1,stop1), (start2, stop2)],
	# "chr2": [(start1,stop1), (start2, stop2)]
	#!/usr/bin/env python
	#=======================================================
	# convert pos and samples
	# row name is sample name, and column name is position
	# value is the number of alt (0,1,2)
	#=======================================================

	# sampledict: {'sample1':['0','2',...], ...}
	def convertSNP(infile, outfile):
	with open(infile) as f:
	#!/usr/bin/env python
	#======================================
	# merge file1 and file2 of replaced.xls
	# if no pos, mark '0'
	# output coverted.xls
	#======================================

	# sampledict structure: {'sample1': {'pos1':'snp1','pos2':'snp2',... }, ...}
	def getSampleDict(infile):
	sampledict = {}
	#!/usr/bin/env python
	#-- coding: utf-8 --

	def get_sample_list(infofile, listfile):
	sampledict = {}
	with open(infofile) as f:
	for line in f:
	sampleid,novoid = line.strip().split('\t')[2:4]
	sampledict[novoid] = sampleid
	#!/usr/bin/env python
	#-- coding: utf-8 --


	def get_sample_info(infofile, pn, disease):
	header = '#B1\n#FamilyID\tSampleID\tSex\tNormal/Patient\tPN'
	if disease:
	header += '\tDisease'
	header += '\n'
	#!/usr/bin/env python
	#!-- coding: utf-8 --

	import sys


	def add_count_samples(infile, outfile=None):
	outfile = outfile or infile+'.addCountSamples'
	with open(infile) as f, open(outfile, 'w') as out:
	for line in f:
	#!/usr/bin/env python
	# A simple example of crawler with requests and BeautifulSoup
	# Pay attention to encoding

	import bs4
	import requests


	def main(genelist):
	with open(genelist) as f: