Skip to content

Instantly share code, notes, and snippets.

View alecxe's full-sized avatar

Alex A. alecxe

View GitHub Profile
@alecxe
alecxe / runner.py
Last active August 23, 2023 14:19
Self-contained minimum example script to run scrapy
import json
from scrapy.crawler import Crawler
from scrapy.contrib.loader import ItemLoader
from scrapy.contrib.loader.processor import Join, MapCompose, TakeFirst
from scrapy import log, signals, Spider, Item, Field
from scrapy.settings import Settings
from twisted.internet import reactor
#!/usr/bin/env python
""" Test menu for Website
"""
import urllib2
from bs4 import BeautifulSoup
print (47 * '-')
print (" C H O I C E L I S T")
var browserstackUser = 'username';
var browserstackKey = 'key';
module.exports = function (config) {
config.set({
basePath: '',
frameworks: [
'jasmine',
'jasmine-matchers'
],
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.select import Select
url = "http://calstate-la.bncollege.com/webapp/wcs/stores/servlet/TBWizardView?catalogId=10001&langId=-1&storeId=30556"
driver = webdriver.Firefox()
import requests
from bs4 import BeautifulSoup
urls = ["https://www304.americanexpress.com/credit-card/compare"]
for url in urls:
website = requests.get(url)
soup = BeautifulSoup(website.content)
print(''.join([element.text for element in soup.body.find_all(lambda tag: tag != 'script', recursive=False)]))
# -*- coding: utf-8 -*-
import mechanize
import cookielib
b = mechanize.Browser()
b.set_handle_refresh(True)
b.set_debug_redirects(True)
b.set_handle_redirect(True)
b.set_debug_http(True)
cj = cookielib.CookieJar()
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
data = """
<table>
<tr align="right"><td>193</td><td>Dalton</td><td>Daisy</td></tr>
<tr align="right"><td>194</td><td>Dakota</td><td>Amelia</td></tr>
<tr align="right"><td>195</td><td>Julio</td><td>Mayra</td></tr>
<tr align="right"><td>196</td><td>Arthur</td><td>Theresa</td></tr>
<tr align="right"><td>197</td><td>Pedro</td><td>Madeline</td></tr>
from bs4 import BeautifulSoup
import re
data = """
<div>
<p>D: string-1.string2 15030 9h7a2m string3.string<br/>
D: string-1.string2 15030 9h7a2m string3.string<br/>
D: string-1.string2 15030 9h7a2m string3.string</p>
<p><span id="more-1203"></span></p>
from scrapy.http import Request
from scrapy.item import Item, Field
from scrapy.selector import Selector
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
class MyItem(Item):
reviewer_ranking = Field()
import urllib2
from bs4 import BeautifulSoup
base_url = "http://www.boostmobile.com/stores/?page={page}&zipcode={zipcode}"
num_pages = 10
zipcodes = [30008, 30009]
for zipcode in zipcodes:
print "Zip Code: %s" % zipcode
for page in xrange(1, num_pages + 1):