Keenan Burke-Pitts Kiwibp

## feature_selection.py
# Tree-based estimators can be used to compute feature importances, which in turn can be used to discard irrelevant features.
clf = RandomForestClassifier(n_estimators=50, max_features='sqrt')
clf = clf.fit(train, targets)

# Let's have a look at the importance of each feature.
features = pd.DataFrame()
features['feature'] = train.columns
features['importance'] = clf.feature_importances_

# Sorting values by feature importance.

## popular-locations-subset.py
#Removing all locations with 2 or less items.
counts = non_mv.location.value_counts()
loc_gt2 = counts[counts > 2]

popular_locations = non_mv[non_mv.location.isin(loc_gt2.keys())]

plt.figure(figsize=(10,5))
sns.violinplot(x="location", y="price", data=popular_locations, scale="width", inner="stick")
plt.show();

## monkey-learn-nlp.py
#execute Summary Extractor model
ml = MonkeyLearn('insert api key here')
data = list(nlp_df_sample.iloc[:,7])
model_id = 'ex_94WD2XxD'
summary_model_results = ml.extractors.extract(model_id, data, production_model=True)
print(summary_model_results.body)

#execute Price Extractor model
data = list(nlp_df_sample.iloc[:,7])
model_id = 'ex_wNDME4vE'

## BSsnippet.py
guards_advanced = urllib.request.urlopen("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885").read()
guards_advancedguards_  = bs.BeautifulSoup(guards_advanced, 'lxml')
#leaving out a number of lines necessary to extract data, see github repo for full code if you'd like.
guards_advanced_col_names = col_names.split()
print(guards_advanced_col_names)

#could also use pandas read_html method as well
guards_advanced_dfs = pd.read_html("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885")
guards_advanced_stats_df = guards_advanced_dfs[2]
guards_advanced_stats_df.tail()
	# Tree-based estimators can be used to compute feature importances, which in turn can be used to discard irrelevant features.
	clf = RandomForestClassifier(n_estimators=50, max_features='sqrt')
	clf = clf.fit(train, targets)

	# Let's have a look at the importance of each feature.
	features = pd.DataFrame()
	features['feature'] = train.columns
	features['importance'] = clf.feature_importances_

	# Sorting values by feature importance.
	#Removing all locations with 2 or less items.
	counts = non_mv.location.value_counts()
	loc_gt2 = counts[counts > 2]

	popular_locations = non_mv[non_mv.location.isin(loc_gt2.keys())]

	plt.figure(figsize=(10,5))
	sns.violinplot(x="location", y="price", data=popular_locations, scale="width", inner="stick")
	plt.show();
	#execute Summary Extractor model
	ml = MonkeyLearn('insert api key here')
	data = list(nlp_df_sample.iloc[:,7])
	model_id = 'ex_94WD2XxD'
	summary_model_results = ml.extractors.extract(model_id, data, production_model=True)
	print(summary_model_results.body)

	#execute Price Extractor model
	data = list(nlp_df_sample.iloc[:,7])
	model_id = 'ex_wNDME4vE'
	guards_advanced = urllib.request.urlopen("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885").read()
	guards_advancedguards_ = bs.BeautifulSoup(guards_advanced, 'lxml')
	#leaving out a number of lines necessary to extract data, see github repo for full code if you'd like.
	guards_advanced_col_names = col_names.split()
	print(guards_advanced_col_names)

	#could also use pandas read_html method as well
	guards_advanced_dfs = pd.read_html("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885")
	guards_advanced_stats_df = guards_advanced_dfs[2]
	guards_advanced_stats_df.tail()