Skip to content

Instantly share code, notes, and snippets.

@dpryan79
Created April 29, 2016 20:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dpryan79/ce3d119113646bb1b7752a721fd7eb79 to your computer and use it in GitHub Desktop.
Save dpryan79/ce3d119113646bb1b7752a721fd7eb79 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
e2g = []
e2u = []
g2e = []
g2u = []
u2e = []
u2g = []
ifile = open("GCA_000001405.22_GRCh38.p7_assembly_report.txt", "r")
for line in ifile:
if line.startswith("#"):
continue
cols = line.strip().split("\t")
ctype = cols[1]
if ctype == "assembled-molecule":
e2g.append("{}\t{}\n".format(cols[2], cols[9]))
g2e.append("{}\t{}\n".format(cols[9], cols[2]))
e2u.append("{}\t{}\n".format(cols[2], cols[9]))
u2e.append("{}\t{}\n".format(cols[9], cols[2]))
u2g.append("{}\t{}\n".format(cols[9], cols[9]))
g2u.append("{}\t{}\n".format(cols[9], cols[9]))
elif ctype == "unlocalized-scaffold" or ctype == "unplaced-scaffold":
#UCSC 9
#gencode/ensembl 4
e2g.append("{}\t{}\n".format(cols[4], cols[4]))
g2e.append("{}\t{}\n".format(cols[4], cols[4]))
e2u.append("{}\t{}\n".format(cols[4], cols[9]))
u2e.append("{}\t{}\n".format(cols[9], cols[4]))
u2g.append("{}\t{}\n".format(cols[9], cols[4]))
g2u.append("{}\t{}\n".format(cols[4], cols[9]))
elif ctype in ["fix-patch", "novel-patch"]:
#gencode 4
#ensembl CHR_0
e2g.append("CHR_{}\t{}\n".format(cols[0], cols[4]))
g2e.append("{}\tCHR_{}\n".format(cols[4], cols[0]))
e2u.append("CHR_{}\t\n".format(cols[0]))
g2u.append("{}\t\n".format(cols[4]))
elif ctype == "alt-scaffold":
#UCSC 9
#gencode 4
#ensembl CHR_0
e2g.append("CHR_{}\t{}\n".format(cols[0], cols[4]))
g2e.append("{}\tCHR_{}\n".format(cols[4], cols[0]))
e2u.append("CHR_{}\t{}\n".format(cols[0], cols[9]))
u2e.append("{}\tCHR_{}\n".format(cols[9], cols[0]))
u2g.append("{}\t{}\n".format(cols[9], cols[4]))
g2u.append("{}\t{}\n".format(cols[4], cols[9]))
else:
print("can't handle {}".format(ctype))
f = open("GRCh38_ensembl2gencode.txt", "w")
for line in sorted(e2g, key=lambda x: x.lower()):
f.write(line)
f.close()
f = open("GRCh38_gencode2ensembl.txt", "w")
for line in sorted(g2e, key=lambda x: x.lower()):
f.write(line)
f.close()
f = open("GRCh38_UCSC2gencode.txt", "w")
for line in sorted(u2g, key=lambda x: x.lower()):
f.write(line)
f.close()
f = open("GRCh38_UCSC2ensembl.txt", "w")
for line in sorted(u2e, key=lambda x: x.lower()):
f.write(line)
f.close()
f = open("GRCh38_ensembl2UCSC.txt", "w")
for line in sorted(e2u, key=lambda x: x.lower()):
f.write(line)
f.close()
f = open("GRCh38_gencode2UCSC.txt", "w")
for line in sorted(g2u, key=lambda x: x.lower()):
f.write(line)
f.close()
ifile.close()
#!/usr/bin/env python
e2g = [] #open("GRCm38_ensembl2gencode.txt", "w")
e2u = [] #open("GRCm38_ensembl2UCSC.txt", "w")
g2e = [] #open("GRCm38_gencode2ensembl.txt", "w")
g2u = [] #open("GRCm38_gencode2UCSC.txt", "w")
u2e = [] #open("GRCm38_UCSC2ensembl.txt", "w")
u2g = [] #open("GRCm38_UCSC2gencode.txt", "w")
ifile = open("GCA_000001635.6_GRCm38.p4_assembly_report.txt", "r")
for line in ifile:
if line.startswith("#"):
continue
cols = line.strip().split("\t")
ctype = cols[1]
if ctype == "assembled-molecule":
e2g.append("{}\t{}\n".format(cols[2], cols[9]))
g2e.append("{}\t{}\n".format(cols[9], cols[2]))
e2u.append("{}\t{}\n".format(cols[2], cols[9]))
u2e.append("{}\t{}\n".format(cols[9], cols[2]))
u2g.append("{}\t{}\n".format(cols[9], cols[9]))
g2u.append("{}\t{}\n".format(cols[9], cols[9]))
elif ctype == "unlocalized-scaffold" or ctype == "unplaced-scaffold":
#UCSC 9
#gencode/ensembl 4
e2g.append("{}\t{}\n".format(cols[4], cols[4]))
g2e.append("{}\t{}\n".format(cols[4], cols[4]))
e2u.append("{}\t{}\n".format(cols[4], cols[9]))
u2e.append("{}\t{}\n".format(cols[9], cols[4]))
u2g.append("{}\t{}\n".format(cols[9], cols[4]))
g2u.append("{}\t{}\n".format(cols[4], cols[9]))
elif ctype == "novel-patch" or ctype == "fix-patch":
#UCSC ignore
#gencode 4
#ensembl CHR_0
e2g.append("CHR_{}\t{}\n".format(cols[0], cols[4]))
g2e.append("{}\tCHR_{}\n".format(cols[4], cols[0]))
e2u.append("CHR_{}\t\n".format(cols[0]))
g2u.append("{}\t\n".format(cols[4]))
elif ctype == "alt-scaffold":
continue
else:
print("can't handle {}".format(ctype))
f = open("GRCm38_ensembl2gencode.txt", "w")
for line in sorted(e2g):
f.write(line)
f.close()
f = open("GRCm38_gencode2ensembl.txt", "w")
for line in sorted(g2e):
f.write(line)
f.close()
f = open("GRCm38_UCSC2gencode.txt", "w")
for line in sorted(u2g):
f.write(line)
f.close()
f = open("GRCm38_UCSC2ensembl.txt", "w")
for line in sorted(u2e):
f.write(line)
f.close()
f = open("GRCm38_ensembl2UCSC.txt", "w")
for line in sorted(e2u):
f.write(line)
f.close()
f = open("GRCm38_gencode2UCSC.txt", "w")
for line in sorted(g2u):
f.write(line)
f.close()
ifile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment