Skip to content

Instantly share code, notes, and snippets.

@hanfeisun
Created April 13, 2013 07:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hanfeisun/5377516 to your computer and use it in GitHub Desktop.
Save hanfeisun/5377516 to your computer and use it in GitHub Desktop.
Two duplicate function
def _peaks_parse(input):
total = 0
fc20n = 0
fc10n = 0
peaks_info = {}
with open(input) as peaks_xls:
for line in peaks_xls:
if line.startswith('# tags after filtering in treatment'):
# tags after filtering in treatment: 13438948
peaks_info["uniloc"] = int(line.strip().split(":")[1])
if line.startswith('# d'):
peaks_info["distance"] = int(line.strip().split("=")[1])
if line.strip() != "" and not line.startswith("#") and not line.startswith("chr\t"):
l = line.strip().split("\t")
total += 1
## column 7th denotes fold change value
fc = float(l[7])
if fc >= 20:
fc20n += 1
if fc >= 10:
fc10n += 1
peaks_info["totalpeak"] = total
peaks_info["peaksge20"] = fc20n
peaks_info["peaksge10"] = fc10n
peaks_info["peaksge20ratio"] = peaks_info["peaksge20"] / peaks_info["totalpeak"]
peaks_info["peaksge10ratio"] = peaks_info["peaksge10"] / peaks_info["totalpeak"]
return peaks_info
def _macs2_summary_parse(input={"macs2_peaks_xls": ""}, param={"id": ""}):
"""Basic statistic of peak calling result."""
name = 'dataset' + param["id"]
shift_size = 0
with open(input["macs2_peaks_xls"], "rU") as fhd:
fold_enrichment_list = []
for i in fhd:
i = i.strip()
if i.startswith("# qvalue cutoff"):
q_value_cutoff = float(i.split('=')[1])
continue
if i.startswith("#") or i.startswith("chr\t") or not i:
continue
if i.startswith("# d"): # parse shift-size, # d =
shift_size = int(i.strip().split("=")[1]) / 2
fold_enrichment = float(i.split("\t")[7]) #8th column is fold change
fold_enrichment_list.append(fold_enrichment)
fold_greater_than_10_peaks = [x for x in fold_enrichment_list if x >= 10]
total_peak_count = len(fold_enrichment_list)
fold_greater_than_10_peaks_count = len(fold_greater_than_10_peaks)
peaks_summary = [name, q_value_cutoff, total_peak_count, fold_greater_than_10_peaks_count, shift_size]
return {"peaks_summary": peaks_summary, "fold_gt_10_peaks_count": fold_greater_than_10_peaks_count}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment