Last active
October 2, 2018 13:28
-
-
Save norweeg/4f4bb747461906015ac609fa1f5bc55b to your computer and use it in GitHub Desktop.
A simple HTML renderer module useful for retrieving JS-rendered HTML for scraping written using PyQt5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets | |
from queue import Queue | |
class Renderer(QtWebEngineWidgets.QWebEnginePage): | |
def __init__(self,profile=None,parent=None): | |
super().__init__(profile,parent) | |
self._result_queue=Queue(maxsize=1) | |
self.loadFinished.connect(lambda: self.toHtml(self._result_queue.put)) | |
def render(self,url): | |
self._result_queue.join() | |
self.load(QtCore.QUrl(url)) | |
while self._result_queue.empty(): | |
QtWidgets.QApplication.instance().processEvents() | |
html=self._result_queue.get() | |
self._result_queue.task_done() | |
return html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment