Skip to content

Instantly share code, notes, and snippets.

@bactisme
Created October 27, 2015 10:18
Show Gist options
  • Save bactisme/abb2283274f826e06b71 to your computer and use it in GitHub Desktop.
Save bactisme/abb2283274f826e06b71 to your computer and use it in GitHub Desktop.
Cut Tags Stats
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import operator, sys
def split_tags(string):
return [x for x in string.replace('{', '').split('}') if x]
class CutTagsStats:
def __init__(self, filename):
self.filename = filename
self.data = {}
def handle_line(self, tags, count):
for t in tags:
if t in self.data:
self.data[t] += count
else:
self.data[t] = count
def read_csv_file(self):
fp = open(self.filename, "r")
for line in fp:
if len(line) == 1:
break
line = line.split(',')
if len(line) > 2: # old format like "{business},{3g},{arcep},{femtocell},{free},{free-mobile},{mobile}",4
continue
tags = split_tags(line[0])
count = int(line[1])
self.handle_line(tags, count)
def sort_print(self):
data = sorted(self.data.items(), key=operator.itemgetter(1), reverse=True)
for line in data:
print "%s,%d" % line
if __name__ == '__main__':
if len(sys.argv) < 2:
print "python cut_tags_stats.py file.csv"
else:
cut = CutTagsStats(sys.argv[1])
cut.read_csv_file()
cut.sort_print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment