This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from instascrape import Post, Profile | |
def get_post_data(post_object, | |
attributes=['caption', | |
'upload_date', | |
'location', | |
'likes', | |
'comments', | |
'id']): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
class CountWords(BaseEstimator, TransformerMixin): | |
# Creates a dataframe from a series of text documents by creating a new column named n_words, | |
# that contains the number of words in each document | |
def __init__(self, new_col_name): | |
self.new_col_name = new_col_name | |
def fit(self, series, y=None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import GridSearchCV | |
# params is a dictionary, the keys are the hyperparameter and the vaules are a list of values | |
# to search over. | |
params = [ | |
{ | |
"transform__txt__max_features": [None, 100, 10], | |
"transform__num__selector__attribute_names": [ | |
["n_words"], | |
["mean_word_length"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.compose import ColumnTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.impute import SimpleImputer | |
from sklearn.pipeline import Pipeline | |
from sklearn.svm import SVC | |
# Define the names of the text and numerical features | |
text_features = "text" | |
numerical_features = ["n_words", "mean_word_length"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.metrics import ( | |
accuracy_score, | |
auc, | |
precision_score, | |
recall_score, | |
roc_curve, | |
) | |
from sklearn.model_selection import StratifiedShuffleSplit | |
from sklearn.pipeline import Pipeline |
NewerOlder