Skip to content

Instantly share code, notes, and snippets.

@mpkocher
Forked from natechols/classify_stats.json
Last active September 3, 2015 03:46
Show Gist options
  • Save mpkocher/f6d24468a83121d9ea60 to your computer and use it in GitHub Desktop.
Save mpkocher/f6d24468a83121d9ea60 to your computer and use it in GitHub Desktop.
{
"num_3_seen": 13,
"num_5_seen": 11,
"num_filtered_short_reads": 0,
"num_fl": 9,
"num_flc": 0,
"num_flnc": 9,
"num_flnc_bases": 35563,
"num_nfl": 13,
"num_nflc": null,
"num_nflnc": null,
"num_polyA_seen": 13,
"num_reads": 22
}
import sys
import json
import warnings
import itertools
from collections import defaultdict
from pbcommand.models.report import Report, Attribute, Table, Column
def to_attributes(raw_d, namespace):
attributes = []
for k, v in raw_d.items():
ns = "_".join([namespace, k.lower()])
# These can't be none for some reason
if v is not None:
a = Attribute(ns, v, name=k)
attributes.append(a)
else:
warnings.warn("skipping null entry {k}->{v}".format(k=k, v=v))
return attributes
def _merge_attributes_d(attributes_list):
attrs = defaultdict(lambda : [])
for ax in attributes_list:
for a in ax:
attrs[a.id].append(a.value)
return attrs
def sum_attributes(attributes_list):
d = _merge_attributes_d(attributes_list)
return [Attribute(k, sum(values), name=k) for k, values in d.iteritems()]
def attributes_to_table(attributes_list, table_id, title):
"""Merge the Attributes in from different chunks to create a table"""
attrs = _merge_attributes_d(attributes_list)
columns = [Column(k.lower(), header=k, values=values) for k, values in attrs.iteritems()]
table = Table(table_id, title=title, columns=columns)
return table
def load_raw(file_name):
with open(file_name, 'r') as f:
d = json.loads(f.read())
return d
def main():
d = load_raw('classify_stats.json')
report_id = "iso_seq_stats"
namespace = "classify_stats"
attributes = to_attributes(d, namespace)
report = Report(report_id, attributes=attributes)
report.write_json("report.json")
# Create new Report with Table and Merged stats
# mock chunks
chunk_attrs = [attributes, attributes]
table = attributes_to_table(chunk_attrs, 'chunk_metrics', "Chunk Metrics")
print table
merged_attributes = sum_attributes(chunk_attrs)
mreport = Report(report_id, attributes=merged_attributes, tables=[table])
mreport.write_json("merged-report.json")
return 0
if __name__ == '__main__':
sys.exit(main())
{
"_changelist": "UNKNOWN",
"_version": "0.2.13",
"attributes": [
{
"id": "iso_seq_stats.classify_stats_num_flc",
"name": "classify_stats_num_flc",
"value": 0
},
{
"id": "iso_seq_stats.classify_stats_num_5_seen",
"name": "classify_stats_num_5_seen",
"value": 22
},
{
"id": "iso_seq_stats.classify_stats_num_flnc_bases",
"name": "classify_stats_num_flnc_bases",
"value": 71126
},
{
"id": "iso_seq_stats.classify_stats_num_polya_seen",
"name": "classify_stats_num_polya_seen",
"value": 26
},
{
"id": "iso_seq_stats.classify_stats_num_fl",
"name": "classify_stats_num_fl",
"value": 18
},
{
"id": "iso_seq_stats.classify_stats_num_nfl",
"name": "classify_stats_num_nfl",
"value": 26
},
{
"id": "iso_seq_stats.classify_stats_num_3_seen",
"name": "classify_stats_num_3_seen",
"value": 26
},
{
"id": "iso_seq_stats.classify_stats_num_reads",
"name": "classify_stats_num_reads",
"value": 44
},
{
"id": "iso_seq_stats.classify_stats_num_flnc",
"name": "classify_stats_num_flnc",
"value": 18
},
{
"id": "iso_seq_stats.classify_stats_num_filtered_short_reads",
"name": "classify_stats_num_filtered_short_reads",
"value": 0
}
],
"dataset_uuids": [],
"id": "iso_seq_stats",
"plotGroups": [],
"tables": [
{
"columns": [
{
"header": "classify_stats_num_flc",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_flc",
"values": [
0,
0
]
},
{
"header": "classify_stats_num_5_seen",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_5_seen",
"values": [
11,
11
]
},
{
"header": "classify_stats_num_flnc_bases",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_flnc_bases",
"values": [
35563,
35563
]
},
{
"header": "classify_stats_num_polya_seen",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_polya_seen",
"values": [
13,
13
]
},
{
"header": "classify_stats_num_fl",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_fl",
"values": [
9,
9
]
},
{
"header": "classify_stats_num_nfl",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_nfl",
"values": [
13,
13
]
},
{
"header": "classify_stats_num_3_seen",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_3_seen",
"values": [
13,
13
]
},
{
"header": "classify_stats_num_reads",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_reads",
"values": [
22,
22
]
},
{
"header": "classify_stats_num_flnc",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_flnc",
"values": [
9,
9
]
},
{
"header": "classify_stats_num_filtered_short_reads",
"id": "iso_seq_stats.chunk_metrics.classify_stats_num_filtered_short_reads",
"values": [
0,
0
]
}
],
"id": "iso_seq_stats.chunk_metrics",
"title": "Chunk Metrics"
}
]
}
{
"_changelist": "UNKNOWN",
"_version": "0.2.13",
"attributes": [
{
"id": "iso_seq_stats.classify_stats_num_polya_seen",
"name": "num_polyA_seen",
"value": 13
},
{
"id": "iso_seq_stats.classify_stats_num_flc",
"name": "num_flc",
"value": 0
},
{
"id": "iso_seq_stats.classify_stats_num_flnc_bases",
"name": "num_flnc_bases",
"value": 35563
},
{
"id": "iso_seq_stats.classify_stats_num_3_seen",
"name": "num_3_seen",
"value": 13
},
{
"id": "iso_seq_stats.classify_stats_num_reads",
"name": "num_reads",
"value": 22
},
{
"id": "iso_seq_stats.classify_stats_num_fl",
"name": "num_fl",
"value": 9
},
{
"id": "iso_seq_stats.classify_stats_num_flnc",
"name": "num_flnc",
"value": 9
},
{
"id": "iso_seq_stats.classify_stats_num_filtered_short_reads",
"name": "num_filtered_short_reads",
"value": 0
},
{
"id": "iso_seq_stats.classify_stats_num_5_seen",
"name": "num_5_seen",
"value": 11
},
{
"id": "iso_seq_stats.classify_stats_num_nfl",
"name": "num_nfl",
"value": 13
}
],
"dataset_uuids": [],
"id": "iso_seq_stats",
"plotGroups": [],
"tables": []
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment