Last active
February 25, 2018 19:29
-
-
Save lieuwex/f487c9986a81024c3f1d2693fc703b26 to your computer and use it in GitHub Desktop.
Weechat plugin to fetch the page title when an URL is printed in a buffer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import weechat | |
import htmllib | |
import re | |
bufcounts = {} | |
regex_url = re.compile("""https?://[^ ]+""") | |
def unescape(s): | |
"""Unescape HTML entities""" | |
p = htmllib.HTMLParser(None) | |
p.save_bgn() | |
p.feed(s) | |
return p.save_end() | |
def incBuf(name): | |
global bufcounts | |
if name not in bufcounts: | |
bufcounts[name] = 0 | |
bufcounts[name] += 1 | |
def getPageTitle(html): | |
"""Retrieve the HTML <title> from a webpage""" | |
html = re.sub("[\r\n\t ]", " ", html) | |
title = re.search('(?i)\<title\>(.*?)\</title\>', html) | |
if title: | |
title = title.group(1) | |
return unescape(title) | |
else: | |
return "" | |
def page_cb(bufname, command, rc, out, err): | |
global bufcounts | |
title = getPageTitle(out) | |
if len(title) < 5: | |
return weechat.WEECHAT_RC_OK | |
msg = "URL title: " + title | |
buffer = weechat.buffer_search("==", bufname) | |
# weechat.prnt_y(buffer, -bufcounts[bufname]-1, msg) | |
weechat.prnt(buffer, msg) | |
bufcounts[bufname] = 0 | |
return weechat.WEECHAT_RC_OK | |
def msg_cb(data, buffer, date, tags, displayed, highlight, prefix, message): | |
bufname = weechat.buffer_get_string(buffer, "full_name") | |
incBuf(bufname) | |
match = regex_url.search(message) | |
if match is None: | |
return weechat.WEECHAT_RC_OK | |
url = match.group() | |
weechat.hook_process_hashtable("url:"+url, { | |
'useragent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', | |
}, 30 * 1000, "page_cb", bufname) | |
return weechat.WEECHAT_RC_OK | |
weechat.register( | |
'pagetitle', | |
'lieuwex', | |
'0', | |
'MIT', | |
"""Adds page title to messages""", | |
"", | |
"" | |
) | |
weechat.hook_print("", "", "", 1, "msg_cb", "") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment