Skip to content

Instantly share code, notes, and snippets.

@kissge
Created April 29, 2023 13:05
Show Gist options
  • Save kissge/ff4595a5f7228454d5f0a369999b9945 to your computer and use it in GitHub Desktop.
Save kissge/ff4595a5f7228454d5f0a369999b9945 to your computer and use it in GitHub Desktop.
import re
from dataclasses import dataclass
# Usage:
# dbxcli ls -lR > dropbox.txt
# python3 dropbox-large-files.py
with open('dropbox.txt') as fp:
lines = fp.readlines()
for line in lines:
if match := re.fullmatch(r'(((- +)- +)- +)/.*\n', line):
end_offsets = [len(g) for g in reversed(match.groups())]
offsets = list(zip([0] + end_offsets, end_offsets + [None]))
break
else:
raise Exception('Could not find offsets')
@dataclass
class Node:
size: int
file_count: int
def parse_size(size_str: str) -> int:
if size_str.endswith(' GiB'):
return int(float(size_str[:-4]) * 1024 * 1024 * 1024)
elif size_str.endswith(' MiB'):
return int(float(size_str[:-4]) * 1024 * 1024)
elif size_str.endswith(' KiB'):
return int(float(size_str[:-4]) * 1024)
else:
assert size_str.endswith(' B')
return int(size_str[:-2])
tree = dict[str, Node]()
for line in lines:
revision, size_str, updated, path = (line[x:y].strip() for x, y in offsets)
if size_str == '-':
continue
size = parse_size(size_str)
path_partial = ''
for segment in path.split('/')[1:]:
path_partial += '/' + segment
if path_partial not in tree:
tree[path_partial] = Node(0, 0)
tree[path_partial].size += size
tree[path_partial].file_count += 1
for path in sorted(tree, key=lambda x: tree[x].size, reverse=True):
print(path, tree[path].size, tree[path].file_count, sep='\t')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment