Created
November 2, 2020 19:07
-
-
Save say4n/bfead16052ec68b7217bbf6e43fd803f to your computer and use it in GitHub Desktop.
Google Takeout Analyzer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
from collections import defaultdict | |
from pprint import pprint | |
fname = "files.txt" | |
files = [] | |
with open(fname, "rt") as f: | |
files = f.readlines() | |
data = defaultdict(int) | |
for f in files: | |
f = [x for x in f.split() if x != ''] | |
size = f[2] | |
path = "".join(f[5:])[8:] | |
blob = path.split("/")[0] | |
data[blob] += int(size) | |
processed_data = {} | |
for k, v in data.items(): | |
processed_data[k] = v / (1024 * 1024) # bytes to megabytes | |
pprint(sorted(processed_data.items(), key=lambda item: item[1], reverse=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment