Last active August 31, 2021 23:05
A simple IRC bot which displays urls titles
#!/usr/bin/env python3
#-*- encoding: utf-8 -*-
# Simple bot which displays urls titles
# Copyright (c) 2010 Mick@el and Zopieux
# Copyright (c) 2014 gustavi
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <>.
import time
from urllib import parse
import re
import unicodedata
import difflib
import requests
from pypeul import *
from bs4 import BeautifulSoup
from itertools import groupby
# Bot informations
BOT_NAME = 'SimpleB00t'
BOT_CHAN = '##gustavi'
BOT_PORT = 6667
# Main settings
settings = {
'ratio' : 0.6, # Display or not title, use in SimpleUrlBot.show_url()
'buffer_url_len' : 25,
'title_len_min' : 18,
'title_len_max' : 136,
'content_length_max' : 2097152,
# Contain lasts urls
buffer_url = []
# The urls we can't displays title
exceptions_domain = [
class SimpleUrlBot(IRC):
def get_title(self, url):
Get the url title. Return error message if http error or domain does
not exist.
req = requests.get(url, stream=True, verify=False)
if int(req.headers.get('content-length')) > settings['content_length_max']:
return '', ''
except TypeError:
soup = BeautifulSoup(req.content)
# Exclude binary
title = soup.title.string.replace('\n', '')
except AttributeError:
return '', ''
# Cut the title if too big
if len(title) > settings['title_len_max']:
title = title[:settings['title_len_max']] + '...'
# Compute a compact redirection path
hist = (parse.urlsplit(u.url).netloc for u in list(req.history) + [req])
nodup_hist = ((k, sum(1 for i in g)) for k, g in groupby(hist))
path = ' → '.join('%s%s' % (host, ' ×%d' % c if c > 1 else '') for host, c in nodup_hist)
return '[{}] {}'.format(path, title), req.url
def slugify(self, value):
Converts to lowercase, removes non-word characters (alphanumerics and
underscores) and converts spaces to hyphens. Also strips leading and
trailing whitespace.
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[^\w\s-]', '', value).strip().lower()
return re.sub('[-\s]+', '-', value)
def show_url(self, url, title, final_url):
Displays title only if:
- url does not contain title
- url is not recent (is not in 'buffer_url')
- lenght of title is bigger than 'url_title_min' (in settings)
- not in exeptions list
# Check the min lenght
if len(title) < settings['title_len_min']:
return False
# Check if not in exceptions list
if final_url in exceptions_domain:
return False
url_dif = parse.urlsplit(url).path.split('/')[-1]
title_dif = self.slugify(title)
# Check if url does not contain title
if difflib.SequenceMatcher(None, url_dif, title_dif).ratio() < settings['ratio']:
# Check if is recent
if final_url in buffer_url:
return False
# Check if buffer is over
if len(buffer_url) == settings['buffer_url_len']:
del buffer_url[0]
return True
return False
def on_ready(self):
If you want your bot to join a channel when it connects, you should do
that in the on_ready event handler.
def on_channel_message(self, umask, target, msg):
Main event handler, called when someone speaks on a channel where the
bot is.
Just displays the title for each url.
urls = urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', msg)
for i in urls:
title, final_url = self.get_title(i)
if self.show_url(i, title, final_url) and title != '':
self.message(target, title)
def on_ctcp_version_request(self, umask, value):
There are event handlers for CTCP too.
Here the bot replies its own __version__ string on a CTCP "version".
"{}, powered by pypeul and <3".format(BOT_NAME))
def on_disconnected(self):'Disconnected. Trying to reconnect...')
time_sleep = 30
while True:
self.connect(BOT_SERVER, BOT_PORT)
logger.error('Attempt failed. Retrying in {}s...'.format(time_spleep))
time_sleep += 30
if __name__ == '__main__':
# Enable debug-level logging
import logging
# Instanciate our SimpleUrlBot class and let it run
bot = SimpleUrlBot()
bot.connect(BOT_SERVER, BOT_PORT)
