michaelguia

## map_gen.py
def mapper1(_, line):
    for word in line:
        yield ((len(word), word[0]), 1)

def reducer1(key, values):
    yield key, sum(values)

def mapper2(key, value):
    length, letter = key
    yield (length, (value, letter))

## queue.py
mport threading
import time
from queue import Queue
import sys
from os import listdir
from collections import Counter

q = Queue()
directory = sys.argv[1]
files = listdir(directory)

## pool.py
from multiprocessing import Pool
from time import sleep
import numpy as np

def sum_list(numbers):
    return sum(numbers)

master_list = np.arange(100000000).reshape(2,50000000)

l1 = [3, 5, 7]

## hierarchical.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                michaelguia
                / hierarchical.md
            
            
              Created
              June 7, 2018 02:10
            
          
    SUMMARY: Hierarchical cluster analysis


Hierarchical cluster analysis of n objects is defined by a stepwise algorithm which
merges two objects at each step, the two which have the least dissimilarity.


Dissimilarities between clusters of objects can be defined in several ways; for
example, the maximum dissimilarity (complete linkage), minimum dissimilarity
(single linkage) or average dissimilarity (average linkage).


Either rows or columns of a matrix can be clustered – in each case we choose the


## tfidf.py
import pandas as pd
import unicodedata
import string
import numpy as np
from nltk.util import ngrams
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
docs = []
docs.append('code PYTHON code.')

## to_categorical.py
def to_categorical(y, num_classes=None):
    """Converts a class vector (integers) to binary class matrix.
    E.g. for use with categorical_crossentropy.
    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        num_classes: total number of classes.
    # Returns
        A binary matrix representation of the input.
    """

## poly.py
def PolynomialFeatures_labeled(input_df,power):
    '''Basically this is a cover for the sklearn preprocessing function.
    The problem with that function is if you give it a labeled dataframe, it ouputs an unlabeled dataframe with potentially
    a whole bunch of unlabeled columns.

    Inputs:
    input_df = Your labeled pandas dataframe (list of x's not raised to any power)
    power = what order polynomial you want variables up to. (use the same power as you want entered into pp.PolynomialFeatures(power) directly)

    Ouput:
	def mapper1(_, line):
	for word in line:
	yield ((len(word), word[0]), 1)

	def reducer1(key, values):
	yield key, sum(values)

	def mapper2(key, value):
	length, letter = key
	yield (length, (value, letter))
	mport threading
	import time
	from queue import Queue
	import sys
	from os import listdir
	from collections import Counter

	q = Queue()
	directory = sys.argv[1]
	files = listdir(directory)
	from multiprocessing import Pool
	from time import sleep
	import numpy as np

	def sum_list(numbers):
	return sum(numbers)

	master_list = np.arange(100000000).reshape(2,50000000)

	l1 = [3, 5, 7]
	import pandas as pd
	import unicodedata
	import string
	import numpy as np
	from nltk.util import ngrams
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem.snowball import SnowballStemmer
	docs = []
	docs.append('code PYTHON code.')
	def to_categorical(y, num_classes=None):
	"""Converts a class vector (integers) to binary class matrix.
	E.g. for use with categorical_crossentropy.
	# Arguments
	y: class vector to be converted into a matrix
	(integers from 0 to num_classes).
	num_classes: total number of classes.
	# Returns
	A binary matrix representation of the input.
	"""
	def PolynomialFeatures_labeled(input_df,power):
	'''Basically this is a cover for the sklearn preprocessing function.
	The problem with that function is if you give it a labeled dataframe, it ouputs an unlabeled dataframe with potentially
	a whole bunch of unlabeled columns.

	Inputs:
	input_df = Your labeled pandas dataframe (list of x's not raised to any power)
	power = what order polynomial you want variables up to. (use the same power as you want entered into pp.PolynomialFeatures(power) directly)

	Ouput: