Skip to content

Instantly share code, notes, and snippets.

@zhasm
Created November 3, 2011 14:19
Show Gist options
  • Save zhasm/1336591 to your computer and use it in GitHub Desktop.
Save zhasm/1336591 to your computer and use it in GitHub Desktop.
get unread Gmails, print the Subject, From, To, Message-ID, without modifying the gmails' flags.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import imaplib2 as imaplib
from email import Header
from email.parser import Parser
##USER CONFIG
HOST = 'imap.gmail.com'
PORT = '993'
USER = ''
PAWD = ''
HAM_F = 'INBOX'
SPAM_F = '0_learn_spam' #'[Gmail]/Spam' by default
LIMIT = 20 #howmany mails to read each time.
##Filter config. you need to know some regex
filters = {
"To": re.compile(r"""(?xi)
msn\d{3,8}\@fanfou\.com |
(?:coderex|evernote)\@zhasm\.com
"""),
"From": re.compile(r"""(?xi)
vipcareer\.net |
calendar-notification\@google\.com |
Tmall\@newsletter\.mail\.taobao\.com |
newsletter@newegg.com.cn |
培训
"""),
"Subject": re.compile(r"""(?x)
(AD)|
\(AD\)|
^\.$
"""),
"Message-ID": re.compile(r"\.ru>$"),
}
## system config
def GetUID(s):
"""the global ID for emails"""
pattern_uid = re.compile(r'(?<=UID )(\d+)')
try:
return pattern_uid.findall(s)[0]
except Exception, e:
print "Error!", str(e)
return ""
def Filter(headers):
"""to detect if it is spam
if any of the filters matches, the message is considered spammy.
aggressive, but simple to handle.
"""
for header_name, header_value in headers.items():
if filters[header_name].search(header_value):
#print "Match!", header_name, header_value, headers
return True
return False
def DecodeSingleHeader(s):
"""
decode headers to UTF-8
"""
parts = Header.decode_header(s)
header = []
for part in parts:
s, enc=part
if enc:
s = unicode(s , enc).encode('utf8', 'replace')
header.append(s)
h= " ".join(header)
return h
def StringToHeaders(s):
return Parser().parsestr(s)
def TrimFrom(s):
"""
only necessary for "From" header.
"""
s=re.sub(r'''\?=["']<''', '?= <', s)
s=s.replace('"', '')
s=s.replace("'", '')
return s
def MarkUnread(imap, uids):
"""
mark a list of uids as Read. Implementation: remove the SEEN flag.
"""
if not uids:
return
uids=','.join(uids)
print "marking unread", uids
typ, data = imap.uid('STORE', uids, '-FLAGS', "\SEEN")
print typ, data
def Move(imap, ham_f, spam_f, uids):
"""
move uids from one folder to another.
implementation: Add new flag and delete from old folder.
"""
spam_f= '"%s"'% spam_f
uids=",".join(uids)
print "moving spams:", uids
try:
typ, data = imap.uid('STORE', uids, '+X-GM-LABELS', spam_f)
print data
if typ=='OK':
mov, data = imap.uid('STORE', uids , '+FLAGS', '(\Deleted)')
print data
imap.expunge()
except Exception, e:
print e
g = imaplib.IMAP4_SSL(HOST)
try:
r, info=g.login(USER, PAWD)
except Exception, e:
print str(e)
g.select(HAM_F)
#typ, msg_ids = g.search(None, ('UNSEEN'))
typ, msg_ids = g.search(None, ('ALL'))
spam=[]
ham=[]
if typ=='OK':
ids= msg_ids[0].split(' ')
for id in ids[:LIMIT]:
(r,msg)=g.fetch(id, '(UID BODY[HEADER.FIELDS (SUBJECT FROM TO MESSAGE-ID)])')
header=msg[0][1]
uid= GetUID(msg[0][0])
h=StringToHeaders(header)
header={}
for i in ['From', 'To', 'Subject', 'Message-ID' ]:
if h.has_key(i):
v=h[i]
if i.lower() == 'from':
v=TrimFrom(v)
header[i]=DecodeSingleHeader(v)
if Filter(header):
spam.append(uid)
print "SPAM:"
else:
ham.append(uid)
print "HAM:"
for k, v in header.items():
print k, ": ", v
print
Move(g, HAM_F, SPAM_F, spam)
MarkUnread(g, ham)
g.logout()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment