heliy/seqcutloc.py

## seqcutloc.py
#coding:UTF-8
#!/usr/bin/python

#使用方法：
#python seqcutloc.py  fasta文件 序列 输出文件
#./seqcutloc.py fasta文件 序列 输出文件

#算法是通过将长序列按碱基切开 在拼接回原序列
#eg seq:ATAATCGGCGATTAATCGAAT  cut:AAT
#切完后得到子序列 AT CGGCGATT CG 长度为2 8 2
#拼接得到 第一个位点:2+1=3 第二个位点:2+3+8+1=14 第三个位点:2+3+8+3+2+1=19

import string
import sys

# fasta文件类
class fasta(object):
    def __init__(self,head,seq):
	self.head=head
	self.seq=seq

#处理单个fasta文件数据
def sinfasta(sfas,target):
    targloc=[]
    name=sfas.head.split('|')[3]

    sub=sfas.seq.split(target)
    if len(sub)<1:
        return     #没有╮(╯_╰)╭

    i=1            #找到的序号
    before=1       #之前的位置标记

    tarlen=len(target)
    for s in sub[:-1]:     #最末尾的不用考虑
	before+=len(s)     #该切点开始位置
	targloc.append([name,str(i),str(before),str(before+tarlen)])
	before+=tarlen     #该切点结束位置
	i+=1

    return targloc

#将fna文件中的单个fasta分开
def file2singles(filename):
    fl=[]
    f=open(filename,'r')
    fastalist=f.read().split('>')
    f.close()

    for fas in fastalist:
	if len(fas)<10:
	    continue
	head=fas.split("\n")[0]
	seq=fas.replace(head,'').replace('\n','')

	fa=fasta(head,seq)
	fl.append(fa)
    return fl

#主处理过程
def process(filename,target,tofile):
    w=open(tofile,'w+')
    falist=file2singles(filename)
    for fa in falist:
        result=sinfasta(fa,target)
        lines=[string.join(item,'\t')+'\n' for item in result]

        w.writelines(lines)
        w.write('\n')
    w.close()
    return

if __name__=="__main__":
    [filename,target,tofile]=sys.argv[1:]
    process(filename,target,tofile)
	#coding:UTF-8
	#!/usr/bin/python

	#使用方法：
	#python seqcutloc.py fasta文件序列输出文件
	#./seqcutloc.py fasta文件序列输出文件

	#算法是通过将长序列按碱基切开在拼接回原序列
	#eg seq:ATAATCGGCGATTAATCGAAT cut:AAT
	#切完后得到子序列 AT CGGCGATT CG 长度为2 8 2
	#拼接得到第一个位点:2+1=3 第二个位点:2+3+8+1=14 第三个位点:2+3+8+3+2+1=19

	import string
	import sys

	# fasta文件类
	class fasta(object):
	def __init__(self,head,seq):
	self.head=head
	self.seq=seq

	#处理单个fasta文件数据
	def sinfasta(sfas,target):
	targloc=[]
	name=sfas.head.split('\|')[3]

	sub=sfas.seq.split(target)
	if len(sub)<1:
	return #没有╮(╯_╰)╭

	i=1 #找到的序号
	before=1 #之前的位置标记

	tarlen=len(target)
	for s in sub[:-1]: #最末尾的不用考虑
	before+=len(s) #该切点开始位置
	targloc.append([name,str(i),str(before),str(before+tarlen)])
	before+=tarlen #该切点结束位置
	i+=1

	return targloc

	#将fna文件中的单个fasta分开
	def file2singles(filename):
	fl=[]
	f=open(filename,'r')
	fastalist=f.read().split('>')
	f.close()

	for fas in fastalist:
	if len(fas)<10:
	continue
	head=fas.split("\n")[0]
	seq=fas.replace(head,'').replace('\n','')

	fa=fasta(head,seq)
	fl.append(fa)
	return fl

	#主处理过程
	def process(filename,target,tofile):
	w=open(tofile,'w+')
	falist=file2singles(filename)
	for fa in falist:
	result=sinfasta(fa,target)
	lines=[string.join(item,'\t')+'\n' for item in result]

	w.writelines(lines)
	w.write('\n')
	w.close()
	return

	if __name__=="__main__":
	[filename,target,tofile]=sys.argv[1:]
	process(filename,target,tofile)