Skip to content

Instantly share code, notes, and snippets.

@DeusFigendi
Created July 11, 2017 10:55
Show Gist options
  • Save DeusFigendi/32f5ef33e267b4393d97fb310e76ba7c to your computer and use it in GitHub Desktop.
Save DeusFigendi/32f5ef33e267b4393d97fb310e76ba7c to your computer and use it in GitHub Desktop.
This python script generates some stats about your diaspora profile. Usage: python diaspora_profile_stats.py profile_export_file.json
import json
import os
from datetime import datetime
import collections
import unicodedata
import sys
profile_fh = open(sys.argv[1],'r')
profile_tx = profile_fh.read()
profile_dc = json.loads(profile_tx)
#usefull data-structures:
# profile_dc.user.posts[0..n].created_at
# profile_dc.user.posts[0..n].type
# profile_dc.user.comments[0..n].post_guid
post_dates_day = {}
max_count_per_day = 0
max_bytes_per_day = 0
post_dates_month = {}
max_count_per_month = 0
max_bytes_per_month = 0
post_dates_year = {}
max_count_per_year = 0
max_bytes_per_year = 0
oldest_post = datetime.today()
for post_dc in profile_dc['user']['posts']:
this_date_list0 = post_dc['created_at'].split(" ")
this_date_list1 = this_date_list0[0].split('-')
this_date_list2 = this_date_list0[1].split(':')
this_date = datetime(int(this_date_list1[0]),int(this_date_list1[1]),int(this_date_list1[2]),int(this_date_list2[0]),int(this_date_list2[1]),int(this_date_list2[2]))
if this_date < oldest_post:
oldest_post = this_date
this_date_key = this_date.toordinal()
if not(this_date_key in post_dates_day):
post_dates_day[this_date_key] = { 'count' : 0 , 'byte' : 0 }
post_dates_day[this_date_key]['count'] += 1
post_dates_day[this_date_key]['byte'] += len(post_dc['text'])
if post_dates_day[this_date_key]['count'] > max_count_per_day:
max_count_per_day = post_dates_day[this_date_key]['count']
if post_dates_day[this_date_key]['byte'] > max_bytes_per_day:
max_bytes_per_day = post_dates_day[this_date_key]['byte']
this_date_key = this_date.year*100+this_date.month
if not(this_date_key in post_dates_month):
post_dates_month[this_date_key] = { 'count' : 0 , 'byte' : 0 }
post_dates_month[this_date_key]['count'] += 1
post_dates_month[this_date_key]['byte'] += len(post_dc['text'])
if post_dates_month[this_date_key]['count'] > max_count_per_month:
max_count_per_month = post_dates_month[this_date_key]['count']
if post_dates_month[this_date_key]['byte'] > max_bytes_per_month:
max_bytes_per_month = post_dates_month[this_date_key]['byte']
this_date_key = this_date.year
if not(this_date_key in post_dates_year):
post_dates_year[this_date_key] = { 'count' : 0 , 'byte' : 0 }
post_dates_year[this_date_key]['count'] += 1
post_dates_year[this_date_key]['byte'] += len(post_dc['text'])
if post_dates_year[this_date_key]['count'] > max_count_per_year:
max_count_per_year = post_dates_year[this_date_key]['count']
if post_dates_year[this_date_key]['byte'] > max_bytes_per_year:
max_bytes_per_year = post_dates_year[this_date_key]['byte']
print('## Oldes Post: `'+str(oldest_post)+'`')
print("\n\n## Activity by year")
od = collections.OrderedDict(sorted(post_dates_year.items()))
csv_filehandler = open('year.csv',"w")
csv_filehandler.write('"Year";"Posting count";"Character count"')
for this_year, counter in od.items():
csv_filehandler.write('"'+str(this_year)+'";"'+str(counter['count'])+'";"'+str(counter['byte'])+'"')
count_dots = (counter['count']*100/max_count_per_year)
bytes_dots = (counter['byte'] *100/max_bytes_per_year)
if (count_dots >= bytes_dots):
print ('`'+str(this_year)+'`'+' '+(bytes_dots*':')+((count_dots-bytes_dots)*'\''))
else:
print ('`'+str(this_year)+'`'+' '+(count_dots*':')+((bytes_dots-count_dots)*'.'))
csv_filehandler.close()
print("\n\n## Activity by month")
od = collections.OrderedDict(sorted(post_dates_month.items()))
csv_filehandler = open('month.csv',"w")
csv_filehandler.write('"Year";"Month";"Posting count";"Character count"')
for this_month, counter in od.items():
csv_filehandler.write('\n"'+str(this_month/100)+'";"'+str(this_month%100)+'";"'+str(counter['count'])+'";"'+str(counter['byte'])+'"')
count_dots = (counter['count']*100/max_count_per_month)
bytes_dots = (counter['byte'] *100/max_bytes_per_month)
if (count_dots >= bytes_dots):
print ('`'+str(this_month)+'`'+' '+(bytes_dots*':')+((count_dots-bytes_dots)*'\''))
else:
print ('`'+str(this_month)+'`'+' '+(count_dots*':')+((bytes_dots-count_dots)*'.'))
csv_filehandler.close()
od = collections.OrderedDict(sorted(post_dates_day.items()))
csv_filehandler = open('day.csv',"w")
csv_filehandler.write('"Date";"Posting count";"Character count"')
for this_date, counter in od.items():
csv_filehandler.write('\n"'+str(datetime.fromordinal(this_date).date())+'";"'+str(counter['count'])+'";"'+str(counter['byte'])+'"')
csv_filehandler.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment