This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_iris | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from sklearn.pipeline import Pipeline | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.metrics import accuracy_score | |
from sklearn.externals import joblib | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.ensemble import RandomForestClassifier |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import sqrt, floor | |
import numpy as np | |
def random(ds, k, random_state=42): | |
""" | |
Create random cluster centroids. | |
Parameters | |
---------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re, string, unicodedata | |
import nltk | |
import contractions | |
import inflect | |
from bs4 import BeautifulSoup | |
from nltk import word_tokenize, sent_tokenize | |
from nltk.corpus import stopwords | |
from nltk.stem import LancasterStemmer, WordNetLemmatizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def cluster(ds, k): | |
''' | |
The k-means clustering algorithm | |
Parameters: | |
----------- | |
ds: ndarray |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def remove_non_ascii(words): | |
"""Remove non-ASCII characters from list of tokenized words""" | |
new_words = [] | |
for word in words: | |
new_word = unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8', 'ignore') | |
new_words.append(new_word) | |
return new_words | |
def to_lowercase(words): | |
"""Convert all characters to lowercase from list of tokenized words""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
arr = [10, 386, 479, 627, 20, 523, 482, 483, 542, 699, 535, 617, 577, 471, 615, 583, 441, 562, 563, 527, 453, 530, 433, 541, 585, 704, 443, 569, 430, 637, 331, 511, 552, 496, 484, 566, 554, 472, 335, 440, 579, 341, 545, 615, 548, 604, 439, 556, 442, 461, 624, 611, 444, 578, 405, 487, 490, 496, 398, 512, 422, 455, 449, 432, 607, 679, 434, 597, 639, 565, 415, 486, 668, 414, 665, 763, 557, 304, 404, 454, 689, 610, 483, 441, 657, 590, 492, 476, 437, 483, 529, 363, 711, 543] | |
elements = numpy.array(arr) | |
mean = numpy.mean(elements, axis=0) | |
sd = numpy.std(elements, axis=0) | |
final_list = [x for x in arr if (x > mean - 2 * sd)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
# Random data | |
N = 10 | |
M = 2 | |
input = np.random.random((N,M)) | |
print input | |
# Setup matrices |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_iris | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from sklearn.pipeline import Pipeline | |
from sklearn.externals import joblib | |
from sklearn.linear_model import LogisticRegression | |
from sklearn import svm | |
from sklearn import tree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
CASE WHEN TipPercentage < 0 THEN 'No Tip' | |
WHEN TipPercentage BETWEEN 0 AND 5 THEN 'Less but still a Tip' | |
WHEN TipPercentage BETWEEN 5 AND 10 THEN 'Decent Tip' | |
WHEN TipPercentage > 10 THEN 'Good Tip' | |
ELSE 'Something different' | |
END AS TipRange, | |
Hr, | |
Wk, | |
TripMonth, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Vocabulary: | |
PAD_token = 0 # Used for padding short sentences | |
SOS_token = 1 # Start-of-sentence token | |
EOS_token = 2 # End-of-sentence token | |
def __init__(self, name): | |
self.name = name | |
self.word2index = {} | |
self.word2count = {} | |
self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"} |