Skip to content

Instantly share code, notes, and snippets.

View Kiwibp's full-sized avatar

Keenan Burke-Pitts Kiwibp

View GitHub Profile
# -*- coding: utf-8 -*-
import scrapy
import sys
class CraigslistSpider(scrapy.Spider):
name = 'craigslist'
allowed_domains = ['asheville.craigslist.org']
start_urls = ['https://asheville.craigslist.org/search/sss']
def parse(self, response):
# -*- coding: utf-8 -*-
import scrapy
import json
import requests
import re
from time import sleep
import sys
class LetgoSpider(scrapy.Spider):
name = 'letgo'
#pipeline adjustment to export data to MongoDB
from pymongo import MongoClient
from scrapy.conf import settings
class MongoDBPipeline(object):
def __init__(self):
connection = MongoClient(
settings['MONGODB_SERVER'],
settings['MONGODB_PORT'])
@Kiwibp
Kiwibp / BSsnippet.py
Last active June 7, 2018 21:16
NYCDSA DataViz Project -- BeautifulSoup Code Snippet
guards_advanced = urllib.request.urlopen("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885").read()
guards_advancedguards_ = bs.BeautifulSoup(guards_advanced, 'lxml')
#leaving out a number of lines necessary to extract data, see github repo for full code if you'd like.
guards_advanced_col_names = col_names.split()
print(guards_advanced_col_names)
#could also use pandas read_html method as well
guards_advanced_dfs = pd.read_html("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885")
guards_advanced_stats_df = guards_advanced_dfs[2]
guards_advanced_stats_df.tail()