Skip to content

Instantly share code, notes, and snippets.

@brentp
Created February 16, 2012 06:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brentp/1842507 to your computer and use it in GitHub Desktop.
Save brentp/1842507 to your computer and use it in GitHub Desktop.
simpler pyfasta interface using fai
import os.path as op
class Fasta(object):
def __init__(self, fname):
assert op.exists(fname) and op.exists(fname + ".fai")
self.fname = fname
self._fh = open(self.fname)
self.fai = dict(self._parse_fai(fname + ".fai"))
def _parse_fai(self, fai):
for line in open(fai):
seq, seq_len, start, len1, len2 = line.split("\t")
yield seq, {"seq": seq,
"seq_len": int(seq_len),
"start": int(start),
"len": int(len1)}
def __getitem__(self, chrom):
return Seq(self.fai[chrom], self._fh)
def __repr__(self):
return "Fasta('%s')" % self.fname
def keys(self): return self.fai.keys()
def iterkeys(self):
for k in self.keys():
yield self
def values(self): return self.fai.values()
def itervalues(self):
for v in self.values(): yield v
def items(self): return self.fai.items()
def iteritems(self):
for i in self.fai.iteritems(): yield i
class Seq(object):
def __init__(self, fai_line, fh):
self.fai_line = fai_line
self._fh = fh
def __len__(self):
return self.fai_line['seq_len']
def __repr__(self):
return "Seq('%s')" % self.fai_line['seq']
def __getitem__(self, slice):
ss = slice.start or 0
se = slice.stop or len(self) + 2
assert se >= ss >= 0, (se, ss)
assert slice.step is None
LL = self.fai_line['len']
lines, inline = divmod(ss, LL)
start = self.fai_line['start'] + inline + lines * LL + lines
L = len(self) + 2
end = self.fai_line['start'] + lines + min(slice.stop or L, L)
self._fh.seek(start)
l = end - start
assert l <= L, ("requested sequence out of range!", l, L)
seqs = []
seq_len, rl = 0, self._fh.readline
LL += 1
while seq_len <= l:
s = rl(LL) #.rstrip(LL)
if seqs and not s: break
seqs.append(s)
seq_len += LL
if len(seqs):
seqs[-1] = seqs[-1].rstrip()
return "".join(seqs)[:l]
if __name__ == "__main__":
from pyfasta import Fasta as F
f = Fasta('c.fasta')
p = F('c.fasta')
assert f['2'][:4] == p['2'][:4]
print len(f['2'])
print f
print f['2']
if f['3'][620:630] != p['3'][620:]:
print( f['3'][620:630], p['3'][620:])
print f['3'][620:630]
print f['3'][630:630]
assert f['2'][100:106] == p['2'][100:106]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment