Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Download mailman archive
#!/usr/bin/env python3
from datetime import date
import gzip
import shutil
import time
import urllib.request
import urllib.error
BASE_URL = 'http://lists.alioth.debian.org/pipermail/pkg-ime-devel/'
START_YEAR = 2004
END_YEAR = 2018
ARCHIVE = 'archive.mbox.gz'
def download(fname, fout):
url = '{base}{fname}'.format(base=BASE_URL, fname=fname)
try:
with urllib.request.urlopen(url) as res:
with gzip.GzipFile(fileobj=res) as gz:
shutil.copyfileobj(gz, fout)
except urllib.error.HTTPError as e:
print('Error: {code} for {fname}'.format(code=e.code, fname=fname))
if __name__ == '__main__':
archive_list = b''
try:
with urllib.request.urlopen(BASE_URL) as res:
archive_list = str(res.read())
except urllib.error.HTTPError as e:
print('Error: {code} for {fname}'.format(code=e.code, fname=BASE_URL))
with gzip.open(ARCHIVE, 'wb') as fout:
for y in range(START_YEAR, END_YEAR + 1):
for m in range(12):
fname = '{:%Y-%B}.txt.gz'.format(date(y, m + 1, 1))
if fname in archive_list:
print('Downloading: {}'.format(fname))
download(fname, fout)
time.sleep(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.