lieuwex/pagetitle.py

## pagetitle.py
import weechat
import htmllib
import re

bufcounts = {}
regex_url = re.compile("""https?://[^ ]+""")


def unescape(s):
    """Unescape HTML entities"""
    p = htmllib.HTMLParser(None)
    p.save_bgn()
    p.feed(s)
    return p.save_end()


def incBuf(name):
    global bufcounts
    if name not in bufcounts:
        bufcounts[name] = 0

    bufcounts[name] += 1


def getPageTitle(html):
    """Retrieve the HTML <title> from a webpage"""

    html = re.sub("[\r\n\t ]", " ", html)

    title = re.search('(?i)\<title\>(.*?)\</title\>', html)
    if title:
        title = title.group(1)
        return unescape(title)
    else:
        return ""


def page_cb(bufname, command, rc, out, err):
    global bufcounts

    title = getPageTitle(out)
    if len(title) < 5:
        return weechat.WEECHAT_RC_OK

    msg = "URL title: " + title

    buffer = weechat.buffer_search("==", bufname)
    # weechat.prnt_y(buffer, -bufcounts[bufname]-1, msg)
    weechat.prnt(buffer, msg)

    bufcounts[bufname] = 0
    return weechat.WEECHAT_RC_OK


def msg_cb(data, buffer, date, tags, displayed, highlight, prefix, message):
    bufname = weechat.buffer_get_string(buffer, "full_name")
    incBuf(bufname)

    match = regex_url.search(message)
    if match is None:
        return weechat.WEECHAT_RC_OK

    url = match.group()
    weechat.hook_process_hashtable("url:"+url, {
        'useragent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
    }, 30 * 1000, "page_cb", bufname)

    return weechat.WEECHAT_RC_OK


weechat.register(
    'pagetitle',
    'lieuwex',
    '0',
    'MIT',
    """Adds page title to messages""",
    "",
    ""
)
weechat.hook_print("", "", "", 1, "msg_cb", "")
	import weechat
	import htmllib
	import re

	bufcounts = {}
	regex_url = re.compile("""https?://[^ ]+""")


	def unescape(s):
	"""Unescape HTML entities"""
	p = htmllib.HTMLParser(None)
	p.save_bgn()
	p.feed(s)
	return p.save_end()


	def incBuf(name):
	global bufcounts
	if name not in bufcounts:
	bufcounts[name] = 0

	bufcounts[name] += 1


	def getPageTitle(html):
	"""Retrieve the HTML <title> from a webpage"""

	html = re.sub("[\r\n\t ]", " ", html)

	title = re.search('(?i)\<title\>(.*?)\</title\>', html)
	if title:
	title = title.group(1)
	return unescape(title)
	else:
	return ""


	def page_cb(bufname, command, rc, out, err):
	global bufcounts

	title = getPageTitle(out)
	if len(title) < 5:
	return weechat.WEECHAT_RC_OK

	msg = "URL title: " + title

	buffer = weechat.buffer_search("==", bufname)
	# weechat.prnt_y(buffer, -bufcounts[bufname]-1, msg)
	weechat.prnt(buffer, msg)

	bufcounts[bufname] = 0
	return weechat.WEECHAT_RC_OK


	def msg_cb(data, buffer, date, tags, displayed, highlight, prefix, message):
	bufname = weechat.buffer_get_string(buffer, "full_name")
	incBuf(bufname)

	match = regex_url.search(message)
	if match is None:
	return weechat.WEECHAT_RC_OK

	url = match.group()
	weechat.hook_process_hashtable("url:"+url, {
	'useragent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
	}, 30 * 1000, "page_cb", bufname)

	return weechat.WEECHAT_RC_OK


	weechat.register(
	'pagetitle',
	'lieuwex',
	'0',
	'MIT',
	"""Adds page title to messages""",
	"",
	""
	)
	weechat.hook_print("", "", "", 1, "msg_cb", "")