Laura Langdon LauraLangdon

## gist:54a36a30993f85953fca6084bf89a3ca
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-5ea46c4de867> in <module>
      1 bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3))
      2 dls = bears.dataloaders(path)
----> 3 dls.train.show_batch(max_n=4, nrows=1, unique=True)

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in show_batch(self, b, max_n, ctxs, show, **kwargs)
     90         if b is None: b = self.one_batch()
     91         if not show: return self._pre_show_batch(b, max_n=max_n)

## gist:2ac2d336e7257fd860a794bcd705269b
def csv_list_maker(data_file, delimiter=',') -> list:
    """
    Turn data in csv form into list form

    :param data_file: file containing the data
    :param delimiter: character delimiting the data

    :return: data_list: data in list form
    """

## gist:a4cad4d4d13e9a3ec5b9a1a963e6d9da
def read_file(file_name: str, key_name='') -> list:
    """
    Open and read csv.gz, .tsv, .csv, or JSON file; return as list

    :param file_name: Name of file
    :param key_name: Name of JSON key (optional)

    :return: data_list: Data in list form
    """

## gist:633c1ce4213956562c1cfa452b5551df
def clean_text(corpus, input_string: str) -> list:
    """
    Clean text data and add to corpus

    :param corpus: list of all words in the data
    :param input_string: string of words to be added to the corpus

    :return: output_string_as_list: cleaned list of words from input string
    """
    input_string = re.split(r'\W+', input_string)

## gist:a1a8e1deee02479a9c09892f326a51ef
def individual_tweet_vectorizer(corpus, tweet, index=0, author=''):
    """
    Formats a single tweet as a vector

    :param corpus: list of all words in tweets
    :param tweet: tweet to be vectorized
    :param index: index of tweet in main list of tweets
    :param author: Trump or general

    :return: Single tweet in vector form

## gist:4a75924ddc67bbe28b185976bc810ca9
def randomize_vectors(tweet_vectors):
    """

    :param tweet_vectors:

    :return: randomized_tweet_vectors: a Numpy array of tweet vectors that have
                 been randomly shuffled
    """
    #Initialize randomized tweet vectors
    randomized_tweet_vectors = np.zeros((tweet_vectors.shape[0], tweet_vectors.shape[1]), dtype=int)

## gist:32ad8ba990a1fb74a2c20f59551cf590
def split_train_test(tweet_vectors, randomized_tweet_vectors) -> tuple:
    """
    Split into train and test sets

    :param tweet_vectors: tweets in vector form

    :return: train_set, test_set tuple of train set and test set
    """
    x_train_dim = math.floor(0.8 * tweet_vectors.shape[0])  # Use 80% of data for train set
    x_test_dim = math.ceil(0.2 * tweet_vectors.shape[0])  # Use 20% of data for test set

## gist:3844439fc0151be27dda160801a3ec20
def get_distance(tweet1_vector, tweet2_vector) -> int:
    """
    Implement Minkowski distance metric

    :param tweet1_vector: vector of first tweet
    :param tweet2_vector: vector of second tweet

    :return: Minkowski distance between tweets
    """
    distance = 0

## gist:e06d551e715ecd3774b9d8b63ae4265c
def knn(tweet_vector, train_set, k) -> list:
    """
    Find k nearest neighbors of a given tweet

    :param tweet_vector: vector of tweet whose neighbors we seek
    :param train_set: training set
    :param k: desired number of nearest neighbors

    :return: list of indices in main tweet list of k nearest neighbors, and distances of those
            neighbors to given tweet

## gist:58707f6bc579b6f4c64a717571535737
def majority_vote(tweet_vector, train_set, k) -> str:
    """
    Count how many of the k-NN tweets were written by Trump or not-Trump,
    and return whichever is larger

    :param tweet_vector: vector of given tweet
    :param train_set: training set
    :param k: desired number of nearest neighbors

    :return: Whether tweet was authored by Trump, not Trump, or draw
	---------------------------------------------------------------------------
	AttributeError Traceback (most recent call last)
	<ipython-input-19-5ea46c4de867> in <module>
	1 bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3))
	2 dls = bears.dataloaders(path)
	----> 3 dls.train.show_batch(max_n=4, nrows=1, unique=True)

	/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in show_batch(self, b, max_n, ctxs, show, **kwargs)
	90 if b is None: b = self.one_batch()
	91 if not show: return self._pre_show_batch(b, max_n=max_n)
	def csv_list_maker(data_file, delimiter=',') -> list:
	"""
	Turn data in csv form into list form

	:param data_file: file containing the data
	:param delimiter: character delimiting the data

	:return: data_list: data in list form
	"""
	def read_file(file_name: str, key_name='') -> list:
	"""
	Open and read csv.gz, .tsv, .csv, or JSON file; return as list

	:param file_name: Name of file
	:param key_name: Name of JSON key (optional)

	:return: data_list: Data in list form
	"""
	def clean_text(corpus, input_string: str) -> list:
	"""
	Clean text data and add to corpus

	:param corpus: list of all words in the data
	:param input_string: string of words to be added to the corpus

	:return: output_string_as_list: cleaned list of words from input string
	"""
	input_string = re.split(r'\W+', input_string)
	def individual_tweet_vectorizer(corpus, tweet, index=0, author=''):
	"""
	Formats a single tweet as a vector

	:param corpus: list of all words in tweets
	:param tweet: tweet to be vectorized
	:param index: index of tweet in main list of tweets
	:param author: Trump or general

	:return: Single tweet in vector form
	def randomize_vectors(tweet_vectors):
	"""

	:param tweet_vectors:

	:return: randomized_tweet_vectors: a Numpy array of tweet vectors that have
	been randomly shuffled
	"""
	#Initialize randomized tweet vectors
	randomized_tweet_vectors = np.zeros((tweet_vectors.shape[0], tweet_vectors.shape[1]), dtype=int)
	def split_train_test(tweet_vectors, randomized_tweet_vectors) -> tuple:
	"""
	Split into train and test sets

	:param tweet_vectors: tweets in vector form

	:return: train_set, test_set tuple of train set and test set
	"""
	x_train_dim = math.floor(0.8 * tweet_vectors.shape[0]) # Use 80% of data for train set
	x_test_dim = math.ceil(0.2 * tweet_vectors.shape[0]) # Use 20% of data for test set
	def get_distance(tweet1_vector, tweet2_vector) -> int:
	"""
	Implement Minkowski distance metric

	:param tweet1_vector: vector of first tweet
	:param tweet2_vector: vector of second tweet

	:return: Minkowski distance between tweets
	"""
	distance = 0
	def knn(tweet_vector, train_set, k) -> list:
	"""
	Find k nearest neighbors of a given tweet

	:param tweet_vector: vector of tweet whose neighbors we seek
	:param train_set: training set
	:param k: desired number of nearest neighbors

	:return: list of indices in main tweet list of k nearest neighbors, and distances of those
	neighbors to given tweet
	def majority_vote(tweet_vector, train_set, k) -> str:
	"""
	Count how many of the k-NN tweets were written by Trump or not-Trump,
	and return whichever is larger

	:param tweet_vector: vector of given tweet
	:param train_set: training set
	:param k: desired number of nearest neighbors

	:return: Whether tweet was authored by Trump, not Trump, or draw