Last active
September 19, 2024 03:07
-
-
Save ckhung/1af9d473013115cf930505a3093a098d to your computer and use it in GitHub Desktop.
split-gmail.pl: extract bank statements: 把 zimbra mail server 下載回來的 *.eml 檔當中的銀行/信用卡/電費/電信費帳單抓出來、移除密碼,成為單純的 pdf 檔 + split-gmail.pl 把 gmail 的一個 *.mbox 拆成每封信一個檔案
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# usage: split-gmail.pl test.mbox | |
# split test.mbox, a file exported from gmail, into one mail per file, named m0001.mbox, m0002.mbox, ... | |
$fn = sprintf("m%04d.mbox", 1); | |
open(FH, '>', $fn) or die $!; | |
$N = 0; | |
while (<>) { | |
# a sample string we are matching against: | |
# From 1789598446958704853@xxx Wed Jan 31 09:41:15 +0000 2024^M | |
if (/^From\s+\S+\s+\w+\s+\w+\s+\d+\s+[\d:]+\s+\S+\s+\d+\s*$/) { | |
close(FH); | |
++$N; | |
$fn = sprintf("m%04d.mbox", $N); | |
open(FH, '>', $fn) or die $!; | |
print(FH $_); | |
} else { | |
print(FH $_); | |
} | |
} | |
close(FH); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# extract bank statements | |
# 用法: xtbankst.sh 來源目錄 目的地目錄 | |
src_dir=$(realpath $1) | |
dst_dir=$(realpath $2) | |
echo "$src_dir => $dst_dir" | |
echo -n "Password: " | |
read -s PASSWORD | |
echo '' | |
shopt -s nullglob | |
# https://www.endpointdev.com/blog/2016/12/bash-loop-wildcards-nullglob-failglob/ | |
all_files=$(echo \ | |
"$src_dir/*-玩很大銀行*月份『綜合對帳單』.eml" \ | |
"$src_dir/*-真好貸銀行*月信用卡電子帳單.eml" \ | |
"$src_dir/*-抄底高手證券電子綜合月對帳單\(20*_*\).eml" \ | |
"$src_dir/*-台電e-Bill*電費繳費憑證*.eml" \ | |
"$src_dir/*-中華電信*年*月電信費用通知單*.eml" \ | |
"$src_dir/*.mbox" \ | |
) | |
mkdir -p $dst_dir | |
pushd $dst_dir | |
for MF in $all_files ; do | |
ym=$( echo "$MF" | perl -ne ' | |
($y, $m) = /(\d+)年(\d+)月/ or | |
($y, $m) = /\b(20\d\d)_(\d\d)\b/ or | |
($y, $m) = (2099,99); | |
$y = $y>2000 ? $y-2000 : $y-89; | |
printf("%02d%02d\n", $y, $m)' ) | |
echo "[$ym] [$MF]" | |
rm -rf xtbankst-tmp/ ; mkdir -p xtbankst-tmp/ | |
pushd xtbankst-tmp/ | |
munpack -qf "$MF" | |
for att in *.pdf ; do | |
qpdf --decrypt --password="$PASSWORD" "$att" "$dst_dir/$ym-$att" | |
done | |
for att in *X[Uu][Tt][Ff]-8XBX*X= ; do | |
dan=$(echo $att | sed 's/=X[Uu][Tt][Ff]-8XBX//; s/X=//' | base64 -d) | |
# decoded attachment name | |
if [[ "$dan" =~ \.pdf$ ]]; then | |
qpdf --decrypt --password="$PASSWORD" "$att" "$dst_dir/$ym-$dan" | |
fi | |
rm $att | |
done | |
popd | |
rm -rf xtbankst-tmp/ | |
done | |
popd |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment