Oliver Batey oliver-batey

## get_post_info.py
import pandas as pd
from instascrape import Post, Profile

def get_post_data(post_object,
             attributes=['caption',
                        'upload_date',
                        'location',
                        'likes',
                        'comments',
                        'id']):

## custom_transformers.py
from sklearn.base import BaseEstimator, TransformerMixin


class CountWords(BaseEstimator, TransformerMixin):
    # Creates a dataframe from a series of text documents by creating a new column named n_words,
    # that contains the number of words in each document
    def __init__(self, new_col_name):
        self.new_col_name = new_col_name

    def fit(self, series, y=None):

## gridsearchcv.py
from sklearn.model_selection import GridSearchCV

# params is a dictionary, the keys are the hyperparameter and the vaules are a list of values
# to search over.
params = [
    {
        "transform__txt__max_features": [None, 100, 10],
        "transform__num__selector__attribute_names": [
            ["n_words"],
            ["mean_word_length"],

## pipeline.py
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

# Define the names of the text and numerical features
text_features = "text"
numerical_features = ["n_words", "mean_word_length"]

## sampling.py
import pandas as pd
from sklearn.metrics import (
    accuracy_score,
    auc,
    precision_score,
    recall_score,
    roc_curve,
)
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.pipeline import Pipeline
	import pandas as pd
	from instascrape import Post, Profile

	def get_post_data(post_object,
	attributes=['caption',
	'upload_date',
	'location',
	'likes',
	'comments',
	'id']):
	from sklearn.base import BaseEstimator, TransformerMixin


	class CountWords(BaseEstimator, TransformerMixin):
	# Creates a dataframe from a series of text documents by creating a new column named n_words,
	# that contains the number of words in each document
	def __init__(self, new_col_name):
	self.new_col_name = new_col_name

	def fit(self, series, y=None):
	from sklearn.model_selection import GridSearchCV

	# params is a dictionary, the keys are the hyperparameter and the vaules are a list of values
	# to search over.
	params = [
	{
	"transform__txt__max_features": [None, 100, 10],
	"transform__num__selector__attribute_names": [
	["n_words"],
	["mean_word_length"],
	from sklearn.compose import ColumnTransformer
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.impute import SimpleImputer
	from sklearn.pipeline import Pipeline
	from sklearn.svm import SVC

	# Define the names of the text and numerical features
	text_features = "text"
	numerical_features = ["n_words", "mean_word_length"]
	import pandas as pd
	from sklearn.metrics import (
	accuracy_score,
	auc,
	precision_score,
	recall_score,
	roc_curve,
	)
	from sklearn.model_selection import StratifiedShuffleSplit
	from sklearn.pipeline import Pipeline