Skip to content

Instantly share code, notes, and snippets.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from math import sqrt, floor
import numpy as np
def random(ds, k, random_state=42):
"""
Create random cluster centroids.
Parameters
----------
import re, string, unicodedata
import nltk
import contractions
import inflect
from bs4 import BeautifulSoup
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer, WordNetLemmatizer
import numpy as np
def cluster(ds, k):
'''
The k-means clustering algorithm
Parameters:
-----------
ds: ndarray
def remove_non_ascii(words):
"""Remove non-ASCII characters from list of tokenized words"""
new_words = []
for word in words:
new_word = unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8', 'ignore')
new_words.append(new_word)
return new_words
def to_lowercase(words):
"""Convert all characters to lowercase from list of tokenized words"""
import numpy
arr = [10, 386, 479, 627, 20, 523, 482, 483, 542, 699, 535, 617, 577, 471, 615, 583, 441, 562, 563, 527, 453, 530, 433, 541, 585, 704, 443, 569, 430, 637, 331, 511, 552, 496, 484, 566, 554, 472, 335, 440, 579, 341, 545, 615, 548, 604, 439, 556, 442, 461, 624, 611, 444, 578, 405, 487, 490, 496, 398, 512, 422, 455, 449, 432, 607, 679, 434, 597, 639, 565, 415, 486, 668, 414, 665, 763, 557, 304, 404, 454, 689, 610, 483, 441, 657, 590, 492, 476, 437, 483, 529, 363, 711, 543]
elements = numpy.array(arr)
mean = numpy.mean(elements, axis=0)
sd = numpy.std(elements, axis=0)
final_list = [x for x in arr if (x > mean - 2 * sd)]
import numpy as np
import matplotlib.pyplot as plt
# Random data
N = 10
M = 2
input = np.random.random((N,M))
print input
# Setup matrices
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn import tree
SELECT
CASE WHEN TipPercentage < 0 THEN 'No Tip'
WHEN TipPercentage BETWEEN 0 AND 5 THEN 'Less but still a Tip'
WHEN TipPercentage BETWEEN 5 AND 10 THEN 'Decent Tip'
WHEN TipPercentage > 10 THEN 'Good Tip'
ELSE 'Something different'
END AS TipRange,
Hr,
Wk,
TripMonth,
class Vocabulary:
PAD_token = 0 # Used for padding short sentences
SOS_token = 1 # Start-of-sentence token
EOS_token = 2 # End-of-sentence token
def __init__(self, name):
self.name = name
self.word2index = {}
self.word2count = {}
self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}