Skip to content

Instantly share code, notes, and snippets.

@lieuwex
Last active February 25, 2018 19:29
Show Gist options
  • Save lieuwex/f487c9986a81024c3f1d2693fc703b26 to your computer and use it in GitHub Desktop.
Save lieuwex/f487c9986a81024c3f1d2693fc703b26 to your computer and use it in GitHub Desktop.
Weechat plugin to fetch the page title when an URL is printed in a buffer
import weechat
import htmllib
import re
bufcounts = {}
regex_url = re.compile("""https?://[^ ]+""")
def unescape(s):
"""Unescape HTML entities"""
p = htmllib.HTMLParser(None)
p.save_bgn()
p.feed(s)
return p.save_end()
def incBuf(name):
global bufcounts
if name not in bufcounts:
bufcounts[name] = 0
bufcounts[name] += 1
def getPageTitle(html):
"""Retrieve the HTML <title> from a webpage"""
html = re.sub("[\r\n\t ]", " ", html)
title = re.search('(?i)\<title\>(.*?)\</title\>', html)
if title:
title = title.group(1)
return unescape(title)
else:
return ""
def page_cb(bufname, command, rc, out, err):
global bufcounts
title = getPageTitle(out)
if len(title) < 5:
return weechat.WEECHAT_RC_OK
msg = "URL title: " + title
buffer = weechat.buffer_search("==", bufname)
# weechat.prnt_y(buffer, -bufcounts[bufname]-1, msg)
weechat.prnt(buffer, msg)
bufcounts[bufname] = 0
return weechat.WEECHAT_RC_OK
def msg_cb(data, buffer, date, tags, displayed, highlight, prefix, message):
bufname = weechat.buffer_get_string(buffer, "full_name")
incBuf(bufname)
match = regex_url.search(message)
if match is None:
return weechat.WEECHAT_RC_OK
url = match.group()
weechat.hook_process_hashtable("url:"+url, {
'useragent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
}, 30 * 1000, "page_cb", bufname)
return weechat.WEECHAT_RC_OK
weechat.register(
'pagetitle',
'lieuwex',
'0',
'MIT',
"""Adds page title to messages""",
"",
""
)
weechat.hook_print("", "", "", 1, "msg_cb", "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment