julianhess/parse_cytoband.py Secret

## parse_cytoband.py
import numpy as np
import pandas as pd

_chrmap = dict(zip(["chr" + str(x) for x in list(range(1, 23)) + ["X", "Y"]], range(1, 25)))

def parse_cytoband(cytoband):
    cband = pd.read_csv(cytoband, sep = "\t", names = ["chr", "start", "end", "band", "stain"])
    cband["chr"] = cband["chr"].apply(lambda x : _chrmap[x])

    chrs = cband["chr"].unique()
    ints = dict(zip(chrs, [{0} for _ in range(0, len(chrs))]))
    last_end = None
    last_stain = None
    last_chrom = None
    for _, chrom, start, end, _, stain in cband.itertuples():
        if start == 0:
            if last_end is not None:
                ints[last_chrom].add(last_end)
        if stain == "acen" and last_stain != "acen":
            ints[chrom].add(start)
        if stain != "acen" and last_stain == "acen":
            ints[chrom].add(start)

        last_end = end
        last_stain = stain
        last_chrom = chrom
    ints[chrom].add(end)

    CI = np.full([len(ints), 4], 0)
    for c in chrs:
        CI[c - 1, :] = sorted(ints[c])

    return pd.DataFrame(
      np.c_[np.tile(np.c_[np.r_[1:25]], [1, 2]).reshape(-1, 1), CI.reshape(-1, 2)],
      columns = ["chr", "start", "end"]
    )
	import numpy as np
	import pandas as pd

	_chrmap = dict(zip(["chr" + str(x) for x in list(range(1, 23)) + ["X", "Y"]], range(1, 25)))

	def parse_cytoband(cytoband):
	cband = pd.read_csv(cytoband, sep = "\t", names = ["chr", "start", "end", "band", "stain"])
	cband["chr"] = cband["chr"].apply(lambda x : _chrmap[x])

	chrs = cband["chr"].unique()
	ints = dict(zip(chrs, [{0} for _ in range(0, len(chrs))]))
	last_end = None
	last_stain = None
	last_chrom = None
	for _, chrom, start, end, _, stain in cband.itertuples():
	if start == 0:
	if last_end is not None:
	ints[last_chrom].add(last_end)
	if stain == "acen" and last_stain != "acen":
	ints[chrom].add(start)
	if stain != "acen" and last_stain == "acen":
	ints[chrom].add(start)

	last_end = end
	last_stain = stain
	last_chrom = chrom
	ints[chrom].add(end)

	CI = np.full([len(ints), 4], 0)
	for c in chrs:
	CI[c - 1, :] = sorted(ints[c])

	return pd.DataFrame(
	np.c_[np.tile(np.c_[np.r_[1:25]], [1, 2]).reshape(-1, 1), CI.reshape(-1, 2)],
	columns = ["chr", "start", "end"]
	)