Skip to content

Instantly share code, notes, and snippets.

@charlesreid1
Last active October 3, 2018 20:34
Show Gist options
  • Save charlesreid1/a08d3eefed1373fd01c0f8adfeba4669 to your computer and use it in GitHub Desktop.
Save charlesreid1/a08d3eefed1373fd01c0f8adfeba4669 to your computer and use it in GitHub Desktop.
Get mail archives for Groups.io subgroups

Get Groups.io Mbox Email Archives

This python script demonstrates how to download the email archives for all subgroups of a groups.io group using their API. This will download each subgroup's email archives into a separate .mbox file in the mboxes/ directory. This write-to-disk step is not strictly necessary, as the mbox file is contained in a zip file in memory.

To run this, you need a Groups.io secret token. Provide this to the script using an environment variable:

GROUPSIO_SECRET_TOKEN="XYZ" python get_mbox_archives.py
import json, os, io, requests
from zipfile import ZipFile, BadZipFile
"""
Get Mbox Email Archives
For All Groups.io Subgroups
"""
MAX_SUBGROUPS=100
def main():
"""
Iterate over every subgroup and save its archive as
a separate mbox file - one mbox file per subgroup.
"""
group_ids = get_all_subgroups()
for group_id in group_ids.keys():
# This function call below will call
# get_archive_zip for each subgroup
save_mbox_file(group_ids[group_id], group_id)
def save_mbox_file(group_name, group_id):
"""
Save the mailbox zip file for an mbox for a subgroup,
and extract the mailbox to a subgroup mbox file.
"""
z = get_archive_zip(group_name,group_id)
if z is not None:
file_contents = {name:z.read(name) for name in z.namelist()}
html = file_contents['messages.mbox']
# Now save the contents of the mbox file
# to our mboxes folder
fname = "%s.mbox"%(group_name)
fname = os.path.join('mboxes',fname)
if not os.path.isdir('mboxes'):
os.mkdir('mboxes')
with open(fname,'wb') as f:
f.write(html)
def get_all_subgroups():
"""
Returns a dictionary where keys are subgroup ids
and values are subgroup names
"""
url = 'https://api.groups.io/v1/getsubgroups'
try:
key = os.environ['GROUPSIO_SECRET_TOKEN']
except KeyError:
err = "ERROR: You must set the GROUPSIO_SECRET_TOKEN environment variable. See README.md"
raise Exception(err)
data = [ ('group_name','dcppc'),
('limit',MAX_SUBGROUPS)]
response = requests.post(url,data=data,auth=(key,''))
response = response.json()
dat = response['data']
all_subgroups = {}
for group in dat:
all_subgroups[group['id']] = group['name']
return all_subgroups
def get_archive_zip(group_name,group_id):
"""
Use the API to extract a zipped .mbox email archive
for one subgroup, and return the contents as z.
"""
url = "https://api.groups.io/v1/downloadarchives"
try:
key = os.environ['GROUPSIO_SECRET_TOKEN']
except KeyError:
err = "ERROR: You must set the GROUPSIO_SECRET_TOKEN environment variable. See README.md"
raise Exception(err)
data = [('group_id',group_id)]
print("get_archive_zip(): getting .mbox archive for subgroup %s (%s)"%(group_name,group_id))
r = requests.post(url,data=data,auth=(key,''),stream=True)
try:
z = ZipFile(io.BytesIO(r.content))
z.extractall()
print("SUCCESS: subgroup %s worked"%(group_name))
print("")
return z
except BadZipFile:
print("ABORTING: subgroup %s failed"%(group_name))
print(r.content.decode('utf-8'))
print("")
return None
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment