Created
November 11, 2011 13:42
-
-
Save vals/1358028 to your computer and use it in GitHub Desktop.
Plot most common barcodes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import with_statement | |
import matplotlib.pyplot as plt | |
plt.rc('xtick', labelsize='x-small') | |
import yaml | |
def convert(yaml_file): | |
"""Convert the data in a yaml file generated by count_barcodes.py to a | |
.dat file with space seperated sorted fractions of barcode distribution. | |
""" | |
yaml_yaml = open(yaml_file) | |
yaml_data = yaml.load(yaml_yaml) | |
dat_data = open(yaml_file.split(".")[0] + ".dat", "w+") | |
matched_bc_and_counts = {} | |
for barcode, info in yaml_data["matched"].items(): | |
matched_bc_and_counts[barcode] = info["count"] | |
unmatched_bc_and_counts = {} | |
for barcode, info in yaml_data["unmatched"].items(): | |
unmatched_bc_and_counts[barcode] = info["count"] | |
total = float(sum(matched_bc_and_counts.values()) + \ | |
sum(unmatched_bc_and_counts.values())) | |
matched_sorted_counts = [(barcode, count / total) \ | |
for barcode, count in matched_bc_and_counts.iteritems()] | |
matched_sorted_counts.sort(key=lambda x: x[1], reverse=True) | |
unmatched_sorted_counts = [(barcode, count / total) \ | |
for barcode, count in unmatched_bc_and_counts.iteritems()] | |
unmatched_sorted_counts.sort(key=lambda x: x[1], reverse=True) | |
print total | |
yaml.dump({'matched': matched_sorted_counts, \ | |
'unmatched': unmatched_sorted_counts}, dat_data) | |
def plot_dat(dat_file): | |
"""Plot the sorted lists of tuples. | |
""" | |
fig = plt.figure() | |
fig.subplots_adjust(bottom=0.2) | |
ax = fig.add_subplot(111) | |
data = open(dat_file, "r") | |
tuple_lists = yaml.load(data) | |
y_matched = [float(value) for _, value in tuple_lists["matched"]] | |
y_unmatched = [float(value) for _, value in tuple_lists["unmatched"]] | |
top_num = min(40, len(y_matched) + len(y_unmatched)) | |
percentile = 0.02 | |
ax.bar(range(len(y_matched)), y_matched, color='green') | |
ax.bar(range(len(y_matched), top_num), \ | |
y_unmatched[:top_num - len(y_matched)], color='blue') | |
ax.plot([0, top_num], [percentile, percentile], 'r-', linewidth=3) | |
ticks = ['\n'.join(list(barcode)) + '\n#' for barcode, _ in \ | |
tuple_lists["matched"]] | |
ticks += ['\n'.join(list(barcode)) for barcode, _ in \ | |
tuple_lists["unmatched"][:top_num - len(tuple_lists["matched"])]] | |
plt.xticks([x + .4 for x in range(top_num)], ticks) | |
fig.show() | |
def sort_out_N(yaml_file, out_yaml): | |
"""Goes through the matched barcodes, looking which ones contains 'N' and | |
tries to replace that with a matched barcode which does not contain 'N'. | |
If that is not possible, the barcode will be moved to the unmatched | |
catergory. | |
""" | |
with open(yaml_file) as yaml_yaml: | |
yaml_data = yaml.load(yaml_yaml) | |
for barcode, info in yaml_data["matched"].items(): | |
if 'N' in barcode: | |
for matched_barcode in info["variants"]: | |
if 'N' not in matched_barcode: | |
yaml_data["matched"][matched_barcode] = info | |
break | |
else: | |
yaml_data["unmatched"][barcode] = info | |
del yaml_data["matched"][barcode] | |
with open(out_yaml, "w+") as out_handle: | |
yaml.dump(yaml_data, out_handle) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment