Skip to content

Instantly share code, notes, and snippets.

Created June 23, 2011 10:04
Show Gist options
  • Save anonymous/1042292 to your computer and use it in GitHub Desktop.
Save anonymous/1042292 to your computer and use it in GitHub Desktop.
A Python script for removing duplicated messages in a Outlook folder
import sys
import hashlib
from win32com.client import *
def getMD5(str):
m = hashlib.md5()
m.update(str)
return m.hexdigest()
def getFolder(folder, path):
if not path:
return folder
else:
return getFolder(folder.Folders.Item(path[0]), path[1:])
def getFolders(folderPaths):
app = gencache.EnsureDispatch("Outlook.Application")
ns = app.GetNamespace("MAPI")
folders = []
for i in folderPaths:
path = [j.strip() for j in i.split('/') if j.strip()]
folders.append(getFolder(ns, i.split('/')))
return folders
Folders = [
u"Working/Inbox/pvcs-tracker",
]
dryRun = True
removedCount = 0
for f in getFolders(Folders):
count = len(f.Items)
keys = set()
for i in range(count, 0, -1):
msg = f.Items[i]
subject = msg.Subject.encode('utf-8')
digest = getMD5(msg.Body.encode('utf-8'))
received = msg.ReceivedTime
print >>sys.stderr, "message %6d of %6d: %s" % (count-i+1, count, digest),
key = (subject, received, digest)
if key in keys:
removedCount += 1
print >>sys.stderr, " - REMOVED"
if not dryRun:
msg.Delete()
else:
keys.add(key)
print >>sys.stderr
print >>sys.stderr, "Done, %d messages removed." % removedCount
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment