Skip to content

Instantly share code, notes, and snippets.

@mbiette
Last active December 15, 2015 20:59
Show Gist options
  • Save mbiette/5322301 to your computer and use it in GitHub Desktop.
Save mbiette/5322301 to your computer and use it in GitHub Desktop.
This really simple script written in Python that send by email job offers published on a RSS feed. (I left the RSS feed that work well with the name of fields I put) It runs every 5 minutes. It downloads a RSS feed with feedparser, put the content in a simple object named offer, track new offers and save them in a pickle file, send an email with…
from offer import offer
import psycopg2
class dbOffer:
def __init__(self,sourcename,host="localhost",port="5432",database="rsstracker",user="rsstracker",password="rsstracker"):
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
self.conn = psycopg2.connect(host=host,port=port,database=database,user=user,password=password)
self.sourcename = sourcename
def prepareOffer(self,offer):
return (offer.link,offer.description,offer.date,offer.sent_message_id,offer.reply_message_id,offer.deleted,self.sourcename,offer.title)
def convertToOffer(self,sqlArray):
o = offer(sqlArray[1],sqlArray[2],sqlArray[3],sqlArray[4])
o.sent_message_id = sqlArray[5]
o.reply_message_id = sqlArray[6]
o.deleted = sqlArray[7]
return o
def insertOfferList(self,offerList):
sqlList = []
for offer in offerList:
sqlList.append(self.prepareOffer(offer))
cur = self.conn.cursor()
cur.executemany("INSERT INTO offers (link,description,date,sent_message_id,reply_message_id,deleted,source,title) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)",sqlList)
self.conn.commit()
def updateOfferList(self,offerList):
sqlList = []
for offer in offerList:
sqlList.append(self.prepareOffer(offer))
cur = self.conn.cursor()
cur.executemany("UPDATE offers SET link=%s, description=%s, date=%s, sent_message_id=%s, reply_message_id=%s, deleted=%s WHERE source=%s AND title=%s",sqlList)
self.conn.commit()
def getValidOfferList(self):
cur = self.conn.cursor()
cur.execute("SELECT * FROM offers WHERE source=%s AND deleted IS NOT TRUE",(self.sourcename,))
sqlArray = cur.fetchall()
offerList = []
for array in sqlArray:
offerList.append(self.convertToOffer(array))
return offerList
CREATE DATABASE rsstracker
WITH ENCODING='UTF8'
CONNECTION LIMIT=-1;
CREATE ROLE rsstracker LOGIN ENCRYPTED PASSWORD 'md530f6cd783e03a6ab3e9e3a601400cbfa'
VALID UNTIL 'infinity';
CREATE ROLE rsstracker_group
VALID UNTIL 'infinity';
ALTER DEFAULT PRIVILEGES
GRANT INSERT, SELECT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER ON TABLES
TO rsstracker_group;
ALTER DEFAULT PRIVILEGES
GRANT SELECT, UPDATE, USAGE ON SEQUENCES
TO rsstracker_group;
ALTER DEFAULT PRIVILEGES
GRANT EXECUTE ON FUNCTIONS
TO rsstracker_group;
CREATE TABLE offers
(
source varchar(10),
title varchar(100),
link varchar(200),
description text,
date varchar(50),
sent_message_id varchar(50),
reply_message_id varchar(50),
deleted boolean,
primary key (source,title)
)
import feedparser
import time,datetime
from offer import offer
from db import dbOffer
from listOffers import listOffers
from mailOffer import mailOffer
#import pprint
if __name__ == '__main__':
# Url for the RSS feed
rss_url = "https://careers.societegenerale.com/groupe/fr/rss-offre.html?0&zone=83&lang=fr&JOBTYPE_VIE=34"
# Setting up objects
dbo = dbOffer('socgen')
lo = listOffers(dbo)
mo = mailOffer(email_from = '',
email_to = '',
server_smtp = '',
login = '',
password = '',
subject_keyword = 'SG')
while(True):
# Getting information from the web
print datetime.datetime.now()
feed = feedparser.parse(rss_url)
if len(feed['items']) > 0: # Avoid resending ALL the offer when feed is empty.
# Traking changes
for item in feed['items']:
#pprint.pprint(item)
o = offer(item['title'],item['link'],item['description'],item['updated'])
lo.trackOffer(o)
# Simple output
print 'list',len(lo.listOffers)
print 'new',len(lo.newOffers)
print 'del',len(lo.delOffers)
# Sending new offers by email
for item in lo.newOffers:
mo.sendOffer(item)
time.sleep(2) # Wait in order not to flood STMP server and/or the recipient mail server
pass
# Sending del offers by email
for item in lo.delOffers:
mo.delOffer(item)
time.sleep(2)
pass
# Reseting the traking and saving
lo.saveList()
lo.resetTracking()
# Waiting
print
time.sleep(300) # every 5 minutes = 300 seconds
import cPickle as pickle
from db import dbOffer
class listOffers:
''' listOffers tracks if there are new offers and deleted ones. It also keep track of the list in a pickle file '''
def __init__(self,db):
self.db = db
self.listOffers = None
self.newOffers = []
self.delOffers = []
self.loadList()
self.resetTracking()
def loadList(self):
self.listOffers = self.db.getValidOfferList()
def saveList(self):
self.db.insertOfferList(self.newOffers)
for o in self.delOffers:
o.deleted = True
self.db.updateOfferList(self.delOffers)
def resetTracking(self):
self.listOffers.extend(self.newOffers)
for off in self.delOffers:
self.listOffers.remove(off)
self.newOffers = []
self.delOffers = [item for item in self.listOffers]
def trackOffer(self,offer):
try:
self.delOffers.remove(offer)
except ValueError:
self.newOffers.append(offer)
import smtplib
import pprint
from bs4 import BeautifulSoup
from email.utils import make_msgid
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email import charset
from offer import offer
class mailOffer:
''' mailOffer set up everything to be able to send offers by email '''
def __init__(self, email_from, email_to, server_smtp, login, password, subject_keyword):
self.email_from = email_from
self.email_to = email_to
self.server_smtp = server_smtp
self.login = login
self.password = password
self.subject_keyword = subject_keyword
charset.add_charset('utf-8', charset.SHORTEST, charset.QP)
def sendOffer(self,o):
# Create message container - the correct MIME type is multipart/alternative.
msg = MIMEMultipart('alternative')
msg['Subject'] = "[VIE "+self.subject_keyword+" "+o.date+"] "+o.title
msg['From'] = self.email_from
msg['To'] = self.email_to
if o.sent_message_id is None:
o.sent_message_id = make_msgid();
msg['Message-ID'] = o.sent_message_id;
print msg['Message-ID']
# Create the body of the message (a plain-text and an HTML version).
text = o.link + "\n\n" + BeautifulSoup(o.description).get_text()
html = """\
<html>
<head></head>
<body>
<p>"""+o.link+"""</p><br>
"""+o.description+"""
</body>
</html>
"""
# Record the MIME types of both parts - text/plain and text/html.
part1 = MIMEText(text.encode('utf-8'), 'plain', _charset='utf-8')
part2 = MIMEText(html.encode('utf-8'), 'html', _charset='utf-8')
# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)
# Send the message via local SMTP server.
s = smtplib.SMTP(self.server_smtp)
s.starttls()
s.login(self.login,self.password)
# sendmail function takes 3 arguments: sender's address, recipient's address
# and message to send - here it is sent as one string.
s.sendmail(self.email_from, self.email_to, msg.as_string())
print 'Sent email', o.title
s.quit()
def delOffer(self,o):
# Create message container - the correct MIME type is multipart/alternative.
msg = MIMEMultipart('alternative')
msg['Subject'] = "RE: [VIE "+self.subject_keyword+" "+o.date+"] "+o.title
msg['From'] = self.email_from
msg['To'] = self.email_to
if o.reply_message_id is None:
o.reply_message_id = make_msgid();
if o.sent_message_id is not None:
msg['References'] = o.sent_message_id;
msg['In-Reply-To'] = o.sent_message_id;
msg['Message-ID'] = o.reply_message_id;
# Create the body of the message (a plain-text and an HTML version).
text = "DELETED"
html = """\
<html>
<head></head>
<body>
<h1>DELETED</h1>
</body>
</html>
"""
# Record the MIME types of both parts - text/plain and text/html.
part1 = MIMEText(text.encode('utf-8'), 'plain', _charset='utf-8')
part2 = MIMEText(html.encode('utf-8'), 'html', _charset='utf-8')
# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)
# Send the message via local SMTP server.
s = smtplib.SMTP(self.server_smtp)
s.starttls()
s.login(self.login,self.password)
# sendmail function takes 3 arguments: sender's address, recipient's address
# and message to send - here it is sent as one string.
s.sendmail(self.email_from, self.email_to, msg.as_string())
print 'Sent email DEL', o.title
s.quit()
class offer:
''' Container for offers' information '''
def __init__(self,title,link,description,date):
self.title = title
self.link = link
self.description = description
self.date = date
self.sent_message_id = None
self.reply_message_id = None
self.deleted = False
def __eq__(self, other):
return self.title == other.title
def __str__(self):
return str(title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment