Skip to content

Instantly share code, notes, and snippets.

View Kiwibp's full-sized avatar

Keenan Burke-Pitts Kiwibp

View GitHub Profile
#facebook marketplace
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from pymongo import MongoClient
class App:
# -*- coding: utf-8 -*-
import scrapy
import sys
class CraigslistSpider(scrapy.Spider):
name = 'craigslist'
allowed_domains = ['asheville.craigslist.org']
start_urls = ['https://asheville.craigslist.org/search/sss']
def parse(self, response):
# -*- coding: utf-8 -*-
import scrapy
import json
import requests
import re
from time import sleep
import sys
class LetgoSpider(scrapy.Spider):
name = 'letgo'
# train the model on the training set
gboost.fit(X_train, y_train)
# make class predictions for the testing set
y_pred_class = gboost.predict(X_test)
# IMPORTANT: first argument is true values, second argument is predicted values
print(metrics.confusion_matrix(y_test, y_pred_class))
binary = np.array([[125, 14],
logreg = LogisticRegression()
logreg_cv = LogisticRegressionCV()
rf = RandomForestClassifier()
gboost = GradientBoostingClassifier()
svm = SVC(probability=True)
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier()
models = [logreg, logreg_cv, rf, gboost, svm, knn, dt]
# Tree-based estimators can be used to compute feature importances, which in turn can be used to discard irrelevant features.
clf = RandomForestClassifier(n_estimators=50, max_features='sqrt')
clf = clf.fit(train, targets)
# Let's have a look at the importance of each feature.
features = pd.DataFrame()
features['feature'] = train.columns
features['importance'] = clf.feature_importances_
# Sorting values by feature importance.
@Kiwibp
Kiwibp / wordcloud.py
Created June 11, 2018 16:20
Craigslist Webscraping Project
wordcloud = WordCloud(background_color='white', mode = "RGB", width = 2000, height=1000).generate(str(postings['name']))
plt.title("Craigslist Used Items Word Cloud")
plt.imshow(wordcloud)
plt.axis("off")
plt.show();
@Kiwibp
Kiwibp / popular-locations-subset.py
Created June 11, 2018 16:18
Craiglist Webscraping Project
@Kiwibp
Kiwibp / scrapy-items-and-spider-scripts.py
Last active June 11, 2018 16:16
Craigslist Spider for Webscraping Projecgt
import scrapy
class CraigslistWebscrapingItem(scrapy.Item):
name = scrapy.Field()
price = scrapy.Field()
location = scrapy.Field()
date = scrapy.Field()
@Kiwibp
Kiwibp / motor-vehicles-subset.py
Created June 11, 2018 16:15
Craiglist Webscraping Project
# vehicles are skewing boxplot too much; all rows at or above 1.8k appear to be motor vehicles.
motor_vehicles = postings.loc[postings.price >= 1800.0, :]
motor_vehicles.plot.bar('name', 'price', figsize=(9,9))
plt.ylabel("Price")
plt.xlabel("Vehicle")
plt.show();