Skip to content

Instantly share code, notes, and snippets.

View Kiwibp's full-sized avatar

Keenan Burke-Pitts Kiwibp

View GitHub Profile
locations_ten_or_more = all_items_df.groupby(['Location']).filter(lambda g: g.Location.value_counts() >= 10) \
.loc[:,['Location','Description', 'Price', 'Title', 'Url']]
#checking the number of locations with less than 10 items
len_of_locs = len(locations_ten_or_more.groupby("Location").size())
print(f'There are {len_of_locs} cities with 10 items or more.')
print('\n')
#checking the locations with the most items in this subset
print('Locations with the most amount of items in this subset:')
# -*- coding: utf-8 -*-
import scrapy
import sys
class CraigslistSpider(scrapy.Spider):
name = 'craigslist'
allowed_domains = ['asheville.craigslist.org']
start_urls = ['https://asheville.craigslist.org/search/sss']
def parse(self, response):
# -*- coding: utf-8 -*-
import scrapy
import json
import requests
import re
from time import sleep
import sys
class LetgoSpider(scrapy.Spider):
name = 'letgo'
#pipeline adjustment to export data to MongoDB
from pymongo import MongoClient
from scrapy.conf import settings
class MongoDBPipeline(object):
def __init__(self):
connection = MongoClient(
settings['MONGODB_SERVER'],
settings['MONGODB_PORT'])