Skip to content

Instantly share code, notes, and snippets.

@show0k
Forked from fclairamb/lbca.py
Last active January 7, 2024 23:19
Show Gist options
  • Save show0k/ff386eb6a67b4c86a2c6 to your computer and use it in GitHub Desktop.
Save show0k/ff386eb6a67b4c86a2c6 to your computer and use it in GitHub Desktop.
Parseur de leboncoin : permet de s'envoyer des emails lors de nouvelles annonces sur une recherche précise
#!/usr/bin/python
import urllib
import requests
import re
import sqlite3
import os
import argparse
import gettext
import logging
import logging.handlers
from datetime import datetime
from bs4 import BeautifulSoup as bs
logger = logging.getLogger("debug")
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s | %(levelname)8s | %(message)s")
stStdout = logging.StreamHandler()
stStdout.setFormatter(formatter)
logger.addHandler(stStdout)
lbcaDir= os.environ['HOME']+"/.lbca"
logsDir = lbcaDir + "/logs"
if not os.path.isdir(logsDir):
if not os.path.isdir(lbcaDir):
os.mkdir(lbcaDir)
os.mkdir(logsDir)
stLogfile = logging.handlers.RotatingFileHandler(logsDir+'/log', maxBytes=256*1024, backupCount=10)
stLogfile.setFormatter(formatter)
#stLogfile.doRollover()
logger.addHandler(stLogfile)
parser = argparse.ArgumentParser(description="Le bon coin alert generator")
parser.add_argument("-r", metavar="region", dest="region", help="Region", default="ile_de_france")
parser.add_argument("-s", metavar="search", dest="searches", nargs="+", help="Searches to perform",
default=["Atlantic F18", "Sauter Lucki"])
parser.add_argument("-e", metavar="email-to", dest="email_to", help="Email to send it to",
default="florent@clairambault.fr")
parser.add_argument("-f", metavar="email-from", dest="email_from", help="Email to send it from",
default="florent@clairambault.fr")
parser.add_argument("-u", metavar="email-subject", dest="email_subject", help="Email's subject",
default="An article matched your search")
parser.add_argument("--smtp-server", metavar="smtp-server", dest="server", help="SMTP server to use",
default="localhost")
parser.add_argument("--url", metavar="url", dest="url", nargs='+', help="Listes d'Url")
parser.add_argument("--gui", dest="gui", action="store_true", help="Use WxWidget GUI")
args = parser.parse_args()
# LBC specific
baseUrl = "http://www.leboncoin.fr/annonces/offres/"+args.region+"/?"
linkRegex = re.compile("http://www.leboncoin.fr/[a-z0-9]+/[0-9]{8,12}\\.htm")
# This is for requests handling
def searchToLinks(search):
logger.info("Searching for {search}".format(search=search))
links = []
url = baseUrl + urllib.urlencode({"q": search})
pageSoup = bs(requests.get(url).text)
# We search all the link
for i, aTag in enumerate(pageSoup.findAll('a')):
href = aTag.get('href')
if href:
# And perform a link target matching
if linkRegex.match(href):
links.append(href)
return links
def searchesToLinks(searches):
links = []
for search in searches:
for link in searchToLinks(search):
links.append(link)
return links
def searchToLinksFromUrl(url):
logger.info("Searching for {url}".format(url=url))
links = []
pageSoup = bs(requests.get(url).text)
# We search all the link
for i, aTag in enumerate(pageSoup.findAll('a')):
href = aTag.get('href')
if href:
# And perform a link target matching
if linkRegex.match(href):
links.append(href)
return links
def searchesToLinksFromUrl(urls):
links = []
for url in urls:
for link in searchToLinksFromUrl(url):
links.append(link)
return links
# DB Preparation
db = sqlite3.connect(lbcaDir+"/db")
db.execute("""
CREATE TABLE IF NOT EXISTS links (
url TEXT UNIQUE,
date DATETIME,
seen BOOL DEFAULT 0,
nb_views INTEGER,
emailed BOOL DEFAULT 0
);
""")
gettext.install("lbca")
logger.info("Start !")
if args.gui:
# GUI mode: We should use a timer and a notification to make it really useful
import wx
import gettext
class LBCMainFrame(wx.Frame):
def __init__(self, *args, **kwds):
# begin wxGlade: MyFrame1.__init__
wx.Frame.__init__(self, *args, **kwds)
self.itemsList = wx.ListCtrl(self, wx.ID_ANY, style=wx.LC_REPORT | wx.SUNKEN_BORDER)
self.itemsList.InsertColumn(0, "ID")
self.itemsList.InsertColumn(1, "URL")
self.itemsList.InsertColumn(2, "Date")
self.itemsList.InsertColumn(3, "Seen")
self.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.selectItem, self.itemsList)
# Menu Bar
self.generalMenuBar = wx.MenuBar()
self.General = wx.Menu()
self.Update = wx.MenuItem(self.General, 2, _("Refresh"), _("Refresh"), wx.ITEM_NORMAL)
self.General.AppendItem(self.Update)
self.generalMenuBar.Append(self.General, _("General"))
self.SetMenuBar(self.generalMenuBar)
# Menu Bar end
self.__set_properties()
self.__do_layout()
self.Bind(wx.EVT_MENU, self.refreshItems, self.Update)
# end wxGlade
def __set_properties(self):
# begin wxGlade: MyFrame1.__set_properties
self.SetTitle(_("LBC Alert"))
self.itemsList.SetMinSize((600, 400))
#self.SetSize((400, 300))
# end wxGlade
def __do_layout(self):
# begin wxGlade: MyFrame1.__do_layout
self.itemsList.SetSize(wx.Size(400, 300))
itemsListSize = wx.BoxSizer(wx.VERTICAL)
itemsListSize.Add(self.itemsList, 1, wx.EXPAND, 0)
self.SetSizer(itemsListSize)
itemsListSize.Fit(self)
self.Layout()
# end wxGlade
def loadItems(self, event):
self.itemsList.DeleteAllItems()
for row in db.execute("select rowid, url, date, seen from links order by seen desc, rowid desc limit 20;"):
p = self.itemsList.InsertStringItem(0, str(row[0]))
self.itemsList.SetStringItem(p, 1, row[1])
self.itemsList.SetStringItem(p, 2, row[2])
if row[3]:
msg = "Yes"
else:
msg = "No"
self.itemsList.SetStringItem(p, 3, msg)
def selectItem(self, event):
id = event.GetText()
row = db.execute("select url from links where rowid=?;", (id,)).fetchone()
os.system("sensible-browser "+row[0])
db.execute("update links set seen=1, nb_views=nb_views+1 where rowid=?;", (id,))
db.commit()
event.Skip()
self.loadItems(event)
def refreshItems(self, event):
# We save all links
for link in searchesToLinks(args.searches):
#print "Saving "+link
db.execute("insert or ignore into links ('url','date') values (?,?);", (link, datetime.now()))
db.commit()
self.loadItems(event)
if __name__ == "__main__":
app = wx.PySimpleApp(0)
wx.InitAllImageHandlers()
mainFrame = LBCMainFrame(None, wx.ID_ANY, "")
app.SetTopWindow(mainFrame)
mainFrame.Show()
mainFrame.loadItems(None)
app.MainLoop()
else:
# Email mode
import smtplib
from email.mime.text import MIMEText
# We save all links
links = []
logger.info("Url = "+ args.url[0])
if args.url :
links = searchesToLinksFromUrl(args.url)
else :
links = searchesToLinks(args.searches)
for link in links :
#print "Saving "+link
db.execute("insert or ignore into links ('url','date') values (?,?);", (link, datetime.now()))
db.commit()
nb = 0
text = '<ul>\n'
for rowid, url in db.execute("select rowid, url from links where emailed=0;"):
logger.info("We have new link : {link}.".format(link=url))
text += '<li><a href="{url}">{url} (#{id})</a></li>\n'.format(id=rowid, url=url)
db.execute("update links set emailed=1 where rowid=?", (rowid,))
nb += 1
text += '</ul>\n'
if nb > 0:
text = _("We found {nb} articles matching your searches:<br />".format(nb=nb)) + '\n' + text
smtp = smtplib.SMTP(args.server)
msg = MIMEText(text, 'html')
msg['Subject'] = args.email_subject
msg['From'] = args.email_from
msg['To'] = args.email_to
smtp.sendmail(msg['From'], [msg['To']], msg.as_string())
logger.info("We are sending the following email:\n {email}".format(email=msg.as_string()))
db.commit()
smtp.quit()
else:
logger.info("We don't have anything to send !")
db.close()
logger.info("End !")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment