Skip to content

Instantly share code, notes, and snippets.

@vals
Created November 11, 2011 13:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vals/1358028 to your computer and use it in GitHub Desktop.
Save vals/1358028 to your computer and use it in GitHub Desktop.
Plot most common barcodes
from __future__ import with_statement
import matplotlib.pyplot as plt
plt.rc('xtick', labelsize='x-small')
import yaml
def convert(yaml_file):
"""Convert the data in a yaml file generated by count_barcodes.py to a
.dat file with space seperated sorted fractions of barcode distribution.
"""
yaml_yaml = open(yaml_file)
yaml_data = yaml.load(yaml_yaml)
dat_data = open(yaml_file.split(".")[0] + ".dat", "w+")
matched_bc_and_counts = {}
for barcode, info in yaml_data["matched"].items():
matched_bc_and_counts[barcode] = info["count"]
unmatched_bc_and_counts = {}
for barcode, info in yaml_data["unmatched"].items():
unmatched_bc_and_counts[barcode] = info["count"]
total = float(sum(matched_bc_and_counts.values()) + \
sum(unmatched_bc_and_counts.values()))
matched_sorted_counts = [(barcode, count / total) \
for barcode, count in matched_bc_and_counts.iteritems()]
matched_sorted_counts.sort(key=lambda x: x[1], reverse=True)
unmatched_sorted_counts = [(barcode, count / total) \
for barcode, count in unmatched_bc_and_counts.iteritems()]
unmatched_sorted_counts.sort(key=lambda x: x[1], reverse=True)
print total
yaml.dump({'matched': matched_sorted_counts, \
'unmatched': unmatched_sorted_counts}, dat_data)
def plot_dat(dat_file):
"""Plot the sorted lists of tuples.
"""
fig = plt.figure()
fig.subplots_adjust(bottom=0.2)
ax = fig.add_subplot(111)
data = open(dat_file, "r")
tuple_lists = yaml.load(data)
y_matched = [float(value) for _, value in tuple_lists["matched"]]
y_unmatched = [float(value) for _, value in tuple_lists["unmatched"]]
top_num = min(40, len(y_matched) + len(y_unmatched))
percentile = 0.02
ax.bar(range(len(y_matched)), y_matched, color='green')
ax.bar(range(len(y_matched), top_num), \
y_unmatched[:top_num - len(y_matched)], color='blue')
ax.plot([0, top_num], [percentile, percentile], 'r-', linewidth=3)
ticks = ['\n'.join(list(barcode)) + '\n#' for barcode, _ in \
tuple_lists["matched"]]
ticks += ['\n'.join(list(barcode)) for barcode, _ in \
tuple_lists["unmatched"][:top_num - len(tuple_lists["matched"])]]
plt.xticks([x + .4 for x in range(top_num)], ticks)
fig.show()
def sort_out_N(yaml_file, out_yaml):
"""Goes through the matched barcodes, looking which ones contains 'N' and
tries to replace that with a matched barcode which does not contain 'N'.
If that is not possible, the barcode will be moved to the unmatched
catergory.
"""
with open(yaml_file) as yaml_yaml:
yaml_data = yaml.load(yaml_yaml)
for barcode, info in yaml_data["matched"].items():
if 'N' in barcode:
for matched_barcode in info["variants"]:
if 'N' not in matched_barcode:
yaml_data["matched"][matched_barcode] = info
break
else:
yaml_data["unmatched"][barcode] = info
del yaml_data["matched"][barcode]
with open(out_yaml, "w+") as out_handle:
yaml.dump(yaml_data, out_handle)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment