Created
July 12, 2018 22:01
-
-
Save baatout/56460cd9d2749b72fc970012150b4271 to your computer and use it in GitHub Desktop.
A part from the crawler tutorial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.request import ProxyHandler, build_opener, install_opener, Request, urlopen | |
from stem import Signal | |
from stem.control import Controller | |
class TorHandler: | |
def __init__(self): | |
self.headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'} | |
def open_url(self, url): | |
# communicate with TOR via a local proxy (privoxy) | |
def _set_url_proxy(): | |
proxy_support = ProxyHandler({'http': '127.0.0.1:8118'}) | |
opener = build_opener(proxy_support) | |
install_opener(opener) | |
_set_url_proxy() | |
request = Request(url, None, self.headers) | |
return urlopen(request).read().decode('utf-8') | |
@staticmethod | |
def renew_connection(): | |
with Controller.from_port(port=9051) as controller: | |
controller.authenticate(password='btt') | |
controller.signal(Signal.NEWNYM) | |
controller.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment