Skip to content

Instantly share code, notes, and snippets.

@Nanguage
Created June 27, 2019 03:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Nanguage/9fdf0ba44f4ef4021d770831f8899c20 to your computer and use it in GitHub Desktop.
Save Nanguage/9fdf0ba44f4ef4021d770831f8899c20 to your computer and use it in GitHub Desktop.
import attr
import typing
from collections import defaultdict
@attr.s(auto_attribs=True)
class GFF3Record:
seq_id : str = ""
source : str = ""
type_ : str = ""
start : int = 0
end : int = 0
score : str = "."
strand : str = "."
attributes : typing.Dict[str, str] = attr.Factory(dict)
id_ : str = ""
parent : "GFF3Record" = None
childs : typing.List["GFF3Record"] = attr.Factory(list)
def parse_attributes(attrs_str):
attrs = dict()
for it in attrs_str.split(";"):
k, v = it.split('=')
attrs[k] = v
return attrs
def parse_gff3(path):
records = dict()
collection_by_type = defaultdict(list)
with open(path) as f:
for line in f:
if line.startswith('#'):
continue
itms = line.strip().split("\t")
attrs = parse_attributes(itms[-1])
rec = GFF3Record(*itms[:3], start=int(itms[3]), end=int(itms[4]),
strand=itms[6], attributes=attrs)
if 'ID' not in attrs:
rec.id_ = rec.type_ + ':' + attrs['Parent'].split(':')[1]
else:
rec.id_ = attrs['ID']
if 'Parent' in attrs:
rec.parent = records[attrs['Parent']]
rec.parent.childs.append(rec)
records[rec.id_] = rec
collection_by_type[rec.type_].append(rec)
return records, collection_by_type
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment