Skip to content

Instantly share code, notes, and snippets.

@pilate
Last active December 22, 2015 09:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pilate/6455876 to your computer and use it in GitHub Desktop.
Save pilate/6455876 to your computer and use it in GitHub Desktop.
from HTMLParser import HTMLParser
import requests
import re
url_regex = "(https?:\/\/[^ ]+)"
class TitleParser(HTMLParser):
istitle = False
def handle_starttag(self, tag, attrs):
if tag == "title":
self.istitle = True
def handle_data(self, data):
if self.istitle:
self.title = data
self.istitle = False
def run(data, settings):
re_test = re.search(url_regex, data['payload'])
if re_test:
url = re_test.group(0)
r = requests.get(url)
parser = TitleParser()
parser.feed(r.text)
return parser.title
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment