Created
May 23, 2022 16:04
-
-
Save Ortham/dc10183a0f2442adc5b9fba67b9a6a76 to your computer and use it in GitHub Desktop.
Create CSV files from Acronis backup notification emails.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# Create CSV files from Acronis backup notification emails. | |
import csv | |
import re | |
import sys | |
from dataclasses import dataclass | |
from pathlib import Path | |
from typing import Generator, Pattern | |
HOST_REGEX = re.compile(r'font-size:14px; line-height:20px;">(.+)</div>') | |
DURATION_REGEX = re.compile(r'>(\d\d:\d\d:\d\d)</div>') | |
SIZE_REGEX = re.compile(r'(\d[\d\.]+ (?:Bytes|KB|MB|GB|TB))') | |
@dataclass | |
class BackupData: | |
date: str | |
status: str | |
duration: str | |
size: float | |
def get_date_from_filename(filename: str) -> str: | |
return filename.split(' ')[-2] | |
def get_multiplier(unit: str) -> float: | |
if unit == 'Bytes': | |
return 1 | |
if unit == 'KB': | |
return 1e3 | |
if unit == 'MB': | |
return 1e6 | |
if unit == 'GB': | |
return 1e9 | |
if unit == 'TB': | |
return 1e12 | |
raise RuntimeError('Unrecognised size unit: {}'.format(unit)) | |
def get_size_in_bytes(size: str) -> float: | |
parts = size.split(' ') | |
if len(parts) != 2: | |
raise RuntimeError('Size string has unexpected format: {}'.format(size)) | |
significand = float(parts[0]) | |
multiplier = get_multiplier(parts[1]) | |
return significand * multiplier | |
def extract_value(content: str, extraction_regex: Pattern) -> str: | |
match = extraction_regex.search(content) | |
if match is not None: | |
return match.group(1) | |
raise RuntimeError('Failed to find value in content') | |
def get_host_data(path: Path) -> tuple[str, BackupData]: | |
date = get_date_from_filename(path.name) | |
with open(path, encoding='utf-8') as file: | |
content = file.read() | |
try: | |
hostname = extract_value(content, HOST_REGEX) | |
except RuntimeError: | |
print('Failed to find the name of the backed-up PC in {}', path) | |
sys.exit(1) | |
try: | |
duration = extract_value(content, DURATION_REGEX) | |
except RuntimeError: | |
print('Failed to find the backup duration in {}', path) | |
sys.exit(1) | |
try: | |
size = extract_value(content, SIZE_REGEX) | |
except RuntimeError: | |
print('Failed to find the backup size in {}', path) | |
sys.exit(1) | |
try: | |
size_in_bytes = get_size_in_bytes(size) | |
except RuntimeError as e: | |
print('Failed to read backup size in {}: {}'.format(path, e)) | |
sys.exit(1) | |
status = 'Succeeded' if 'Backup succeeded' in content else 'Failed' | |
return (hostname, BackupData(date, status, duration, size_in_bytes)) | |
def get_data_by_host(paths: Generator[Path, None, None]) -> dict[str, list[BackupData]]: | |
data_by_host: dict[str, list[BackupData]] = {} | |
for path in paths: | |
hostname, data = get_host_data(path) | |
if hostname in data_by_host: | |
data_by_host[hostname].append(data) | |
else: | |
data_by_host[hostname] = [data] | |
return data_by_host | |
if __name__ == "__main__": | |
if len(sys.argv) < 3: | |
print('Incorrect number of arguments: expected <input dir> <output dir>') | |
sys.exit(1) | |
input_dir = sys.argv[1] | |
output_dir = sys.argv[2] | |
paths = Path(input_dir).glob('*.eml') | |
data_by_host = get_data_by_host(paths) | |
for [hostname, rows] in data_by_host.items(): | |
file_path = Path(output_dir, hostname + '-backups.csv') | |
with open(file_path, 'w', newline='') as csv_file: | |
csv_writer = csv.writer(csv_file) | |
csv_writer.writerow(['Date', 'Status', 'Duration', 'Size / B']) | |
for row in rows: | |
csv_writer.writerow([row.date, row.status, row.duration, row.size]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment