Skip to content

Instantly share code, notes, and snippets.

Avatar

Alexander Afanasyev alecxe

View GitHub Profile
@alecxe
alecxe / runner.py
Last active Jan 10, 2020
Self-contained minimum example script to run scrapy
View runner.py
import json
from scrapy.crawler import Crawler
from scrapy.contrib.loader import ItemLoader
from scrapy.contrib.loader.processor import Join, MapCompose, TakeFirst
from scrapy import log, signals, Spider, Item, Field
from scrapy.settings import Settings
from twisted.internet import reactor
View test_input.py
#!/usr/bin/env python
""" Test menu for Website
"""
import urllib2
from bs4 import BeautifulSoup
print (47 * '-')
print (" C H O I C E L I S T")
View karma.conf.js
var browserstackUser = 'username';
var browserstackKey = 'key';
module.exports = function (config) {
config.set({
basePath: '',
frameworks: [
'jasmine',
'jasmine-matchers'
],
View bncollege.py
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.select import Select
url = "http://calstate-la.bncollege.com/webapp/wcs/stores/servlet/TBWizardView?catalogId=10001&langId=-1&storeId=30556"
driver = webdriver.Firefox()
View amex_parse.py
import requests
from bs4 import BeautifulSoup
urls = ["https://www304.americanexpress.com/credit-card/compare"]
for url in urls:
website = requests.get(url)
soup = BeautifulSoup(website.content)
print(''.join([element.text for element in soup.body.find_all(lambda tag: tag != 'script', recursive=False)]))
View mechanize_test.py
# -*- coding: utf-8 -*-
import mechanize
import cookielib
b = mechanize.Browser()
b.set_handle_refresh(True)
b.set_debug_redirects(True)
b.set_handle_redirect(True)
b.set_debug_http(True)
cj = cookielib.CookieJar()
View 25294354.py
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
data = """
<table>
<tr align="right"><td>193</td><td>Dalton</td><td>Daisy</td></tr>
<tr align="right"><td>194</td><td>Dakota</td><td>Amelia</td></tr>
<tr align="right"><td>195</td><td>Julio</td><td>Mayra</td></tr>
<tr align="right"><td>196</td><td>Arthur</td><td>Theresa</td></tr>
<tr align="right"><td>197</td><td>Pedro</td><td>Madeline</td></tr>
View my_soup
from bs4 import BeautifulSoup
import re
data = """
<div>
<p>D: string-1.string2 15030 9h7a2m string3.string<br/>
D: string-1.string2 15030 9h7a2m string3.string<br/>
D: string-1.string2 15030 9h7a2m string3.string</p>
<p><span id="more-1203"></span></p>
View spider.py
from scrapy.http import Request
from scrapy.item import Item, Field
from scrapy.selector import Selector
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
class MyItem(Item):
reviewer_ranking = Field()
View boost_mobile.py
import urllib2
from bs4 import BeautifulSoup
base_url = "http://www.boostmobile.com/stores/?page={page}&zipcode={zipcode}"
num_pages = 10
zipcodes = [30008, 30009]
for zipcode in zipcodes:
print "Zip Code: %s" % zipcode
for page in xrange(1, num_pages + 1):
You can’t perform that action at this time.