Skip to content

Instantly share code, notes, and snippets.

@Ortham
Created May 23, 2022 16:04
Show Gist options
  • Save Ortham/dc10183a0f2442adc5b9fba67b9a6a76 to your computer and use it in GitHub Desktop.
Save Ortham/dc10183a0f2442adc5b9fba67b9a6a76 to your computer and use it in GitHub Desktop.
Create CSV files from Acronis backup notification emails.
#!/usr/bin/env python3
#
# Create CSV files from Acronis backup notification emails.
import csv
import re
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Generator, Pattern
HOST_REGEX = re.compile(r'font-size:14px; line-height:20px;">(.+)</div>')
DURATION_REGEX = re.compile(r'>(\d\d:\d\d:\d\d)</div>')
SIZE_REGEX = re.compile(r'(\d[\d\.]+ (?:Bytes|KB|MB|GB|TB))')
@dataclass
class BackupData:
date: str
status: str
duration: str
size: float
def get_date_from_filename(filename: str) -> str:
return filename.split(' ')[-2]
def get_multiplier(unit: str) -> float:
if unit == 'Bytes':
return 1
if unit == 'KB':
return 1e3
if unit == 'MB':
return 1e6
if unit == 'GB':
return 1e9
if unit == 'TB':
return 1e12
raise RuntimeError('Unrecognised size unit: {}'.format(unit))
def get_size_in_bytes(size: str) -> float:
parts = size.split(' ')
if len(parts) != 2:
raise RuntimeError('Size string has unexpected format: {}'.format(size))
significand = float(parts[0])
multiplier = get_multiplier(parts[1])
return significand * multiplier
def extract_value(content: str, extraction_regex: Pattern) -> str:
match = extraction_regex.search(content)
if match is not None:
return match.group(1)
raise RuntimeError('Failed to find value in content')
def get_host_data(path: Path) -> tuple[str, BackupData]:
date = get_date_from_filename(path.name)
with open(path, encoding='utf-8') as file:
content = file.read()
try:
hostname = extract_value(content, HOST_REGEX)
except RuntimeError:
print('Failed to find the name of the backed-up PC in {}', path)
sys.exit(1)
try:
duration = extract_value(content, DURATION_REGEX)
except RuntimeError:
print('Failed to find the backup duration in {}', path)
sys.exit(1)
try:
size = extract_value(content, SIZE_REGEX)
except RuntimeError:
print('Failed to find the backup size in {}', path)
sys.exit(1)
try:
size_in_bytes = get_size_in_bytes(size)
except RuntimeError as e:
print('Failed to read backup size in {}: {}'.format(path, e))
sys.exit(1)
status = 'Succeeded' if 'Backup succeeded' in content else 'Failed'
return (hostname, BackupData(date, status, duration, size_in_bytes))
def get_data_by_host(paths: Generator[Path, None, None]) -> dict[str, list[BackupData]]:
data_by_host: dict[str, list[BackupData]] = {}
for path in paths:
hostname, data = get_host_data(path)
if hostname in data_by_host:
data_by_host[hostname].append(data)
else:
data_by_host[hostname] = [data]
return data_by_host
if __name__ == "__main__":
if len(sys.argv) < 3:
print('Incorrect number of arguments: expected <input dir> <output dir>')
sys.exit(1)
input_dir = sys.argv[1]
output_dir = sys.argv[2]
paths = Path(input_dir).glob('*.eml')
data_by_host = get_data_by_host(paths)
for [hostname, rows] in data_by_host.items():
file_path = Path(output_dir, hostname + '-backups.csv')
with open(file_path, 'w', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Date', 'Status', 'Duration', 'Size / B'])
for row in rows:
csv_writer.writerow([row.date, row.status, row.duration, row.size])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment