Skip to content

Instantly share code, notes, and snippets.

@johnpauljanecek
Created June 1, 2015 05:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnpauljanecek/39c1ab450f3d188af548 to your computer and use it in GitHub Desktop.
Save johnpauljanecek/39c1ab450f3d188af548 to your computer and use it in GitHub Desktop.
Inject javascript into python selenium
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.common.exceptions import NoSuchElementException"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"driver = webdriver.Firefox()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import time\n",
"\n",
"class DuckDuckGoResults(object):\n",
" def __init__(self,driver):\n",
" self.driver = driver\n",
" \n",
" def search(self,searchTerm):\n",
" self.driver.get(\"https://duckduckgo.com/\")\n",
" inputSearchElm = driver.find_element_by_css_selector('#search_form_input_homepage')\n",
" inputSearchElm.send_keys(\"%s\\n\" % searchTerm)\n",
" \n",
" def scroll_botton(self):\n",
" self.driver.execute_script(\"window.scrollTo(0,document.body.scrollHeight);\")\n",
" return True\n",
" \n",
" def load_all_results(self):\n",
" self.resultElms = self.driver.find_elements_by_css_selector(\"#links>div.results_links_deep\")\n",
" while True :\n",
" self.scroll_botton()\n",
" time.sleep(4)\n",
" newResultElms = self.driver.find_elements_by_css_selector(\"#links>div.results_links_deep\")\n",
" if len(newResultElms) == len(self.resultElms):\n",
" self.resultElms = newResultElms\n",
" break\n",
" self.resultElms = newResultElms\n",
" \n",
" def parse_resultElm(self,resultElm):\n",
" try :\n",
" resultAElm = resultElm.find_element_by_css_selector(\"a.result__a\")\n",
" result = {\n",
" \"title\" : resultAElm.text,\n",
" \"href\" : resultAElm.get_attribute(\"href\"),\n",
" \"snippet\" : resultElm.find_element_by_css_selector(\"div.result__snippet\").text\n",
" }\n",
" return result\n",
" except NoSuchElementException:\n",
" print \"exception\",resultElm.text\n",
" \n",
" def get_results_python(self):\n",
" return map(self.parse_resultElm,self.resultElms)\n",
" \n",
" def get_results_javascript(self):\n",
" jsFunction = \"\"\"\n",
" var resultElms = Array.prototype.slice.call(document.querySelectorAll(\"#links>div.results_links_deep\"))\n",
" return resultElms.map(function(resultElm) {\n",
" var result = [];\n",
" var resultAElm = resultElm.querySelector(\"a.result__a\");\n",
" result.push([\"title\",resultAElm.textContent]);\n",
" result.push([\"href\",resultAElm.getAttribute(\"href\")]);\n",
" result.push([\"snippet\",resultElm.querySelector(\"div.result__snippet\").textContent]);\n",
" return result;\n",
" });\n",
" \"\"\"\n",
" results = self.driver.execute_script(jsFunction)\n",
" return map(dict,results)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"page = DuckDuckGoResults(driver)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"page.search(\"python\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"page.load_all_results()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"jsResults = page.get_results_javascript()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pyResults = page.get_results_python()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(pyResults) == len(jsResults)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment