TomHorton TomHortons

## file0.txt
$ brew update
$ brew install hadoop

## boxplot.py
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

sns.boxplot(x='types', y="A", hue='sex', data=data, palette="PRGn")
sns.despine(offset=10, trim=True)

## check_ans.py
ans = np.loadtxt('./3GB_0.338917307945_16482164617e7c9d188bc75bafc06a08_test.txt')
print "Acc: ", accuracy_score(test[:, -1], ans)

## file1.txt
array([[-0.7333268 ,  0.28111396,  0.7734522 , ...,  0.92725609,
         0.54231164,  1.45600466],
       [ 1.00453544,  1.38036959, -0.53947796, ...,  0.26597865,
         1.05617546,  0.69105939],
       [ 0.19488847,  1.41583741, -0.4825291 , ...,  1.44242267,
         2.98111301, -1.13948051],
       ...,
       [-0.26055551,  2.20761146, -0.29844408, ...,  0.35477103,
         0.88205004, -0.12984548],
       [-0.39628748, -0.17146747,  0.3233919 , ...,  0.18283435,

## check_category.py
cat_cols = pd.read_csv(TRAIN_CAT, nrows = 1).columns.values
print 'cat_cols: ', cat_cols
print 'cat_cols.shape: ', cat_cols.shape

cats = pd.read_csv(TRAIN_CAT, usecols=(cat_cols[:2].tolist()))
print 'cats.shape: ', cats.shape
print cats

## CV.py
clf = XGBClassifier(max_depth=5, base_score=0.005)
cv = StratifiedKFold(y, n_folds=3)
preds = np.ones(y.shape[0])
for i, (train, test) in enumerate(cv):
    preds[test] = clf.fit(X[train], y[train]).predict_proba(X[test])[:,1]
    print("fold {}, ROC AUC: {:.3f}".format(i, roc_auc_score(y[test], preds[test])))
print(roc_auc_score(y, preds))

## file1.txt
def plot_embedding(X, title=None):
    x_min, x_max = np.min(X, 0), np.max(X, 0)
    X = (X - x_min) / (x_max - x_min)

    plt.figure()
    ax = plt.subplot(111)
    for i in range(X.shape[0]):
        plt.text(X[i, 0], X[i, 1], str(digits.target[i]),
                 color=plt.cm.Set1(y[i] / 10.),
                 fontdict={'weight': 'bold', 'size': 9})

## file0.txt
library(data.table)
library(dplyr)


#imports for plotting
require(GGally)
library(network)
library(sna)
library(ggplot2)
require(igraph)

## act_test.csv
activity_id date    activity_category   char_1  char_2  char_3  char_4  char_5  char_6  char_7  char_8  char_9  char_10
people_id
ppl_100004  act1_249281 2022-07-20  type 1  type 5  type 10 type 5  type 1  type 6  type 1  type 1  type 7  type 4  NaN
ppl_100004  act2_230855 2022-07-20  type 5  NaN NaN NaN NaN NaN NaN NaN NaN NaN type 682
ppl_10001   act1_240724 2022-10-14  type 1  type 12 type 1  type 5  type 4  type 6  type 1  type 1  type 13 type 10 NaN

## Keras_benchmark_redhat
import pandas as pd
import numpy as np
from scipy import sparse as ssp
import pylab as plt
from sklearn.preprocessing import LabelEncoder,LabelBinarizer,MinMaxScaler,OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.decomposition import TruncatedSVD,NMF,PCA,FactorAnalysis
from sklearn.feature_selection import SelectFromModel,SelectPercentile,f_classif
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import log_loss,roc_auc_score
	import seaborn as sns
	import pandas as pd
	import matplotlib.pyplot as plt
	%matplotlib inline

	sns.boxplot(x='types', y="A", hue='sex', data=data, palette="PRGn")
	sns.despine(offset=10, trim=True)
	ans = np.loadtxt('./3GB_0.338917307945_16482164617e7c9d188bc75bafc06a08_test.txt')
	print "Acc: ", accuracy_score(test[:, -1], ans)
	array([[-0.7333268 , 0.28111396, 0.7734522 , ..., 0.92725609,
	0.54231164, 1.45600466],
	[ 1.00453544, 1.38036959, -0.53947796, ..., 0.26597865,
	1.05617546, 0.69105939],
	[ 0.19488847, 1.41583741, -0.4825291 , ..., 1.44242267,
	2.98111301, -1.13948051],
	...,
	[-0.26055551, 2.20761146, -0.29844408, ..., 0.35477103,
	0.88205004, -0.12984548],
	[-0.39628748, -0.17146747, 0.3233919 , ..., 0.18283435,
	cat_cols = pd.read_csv(TRAIN_CAT, nrows = 1).columns.values
	print 'cat_cols: ', cat_cols
	print 'cat_cols.shape: ', cat_cols.shape

	cats = pd.read_csv(TRAIN_CAT, usecols=(cat_cols[:2].tolist()))
	print 'cats.shape: ', cats.shape
	print cats
	clf = XGBClassifier(max_depth=5, base_score=0.005)
	cv = StratifiedKFold(y, n_folds=3)
	preds = np.ones(y.shape[0])
	for i, (train, test) in enumerate(cv):
	preds[test] = clf.fit(X[train], y[train]).predict_proba(X[test])[:,1]
	print("fold {}, ROC AUC: {:.3f}".format(i, roc_auc_score(y[test], preds[test])))
	print(roc_auc_score(y, preds))
	def plot_embedding(X, title=None):
	x_min, x_max = np.min(X, 0), np.max(X, 0)
	X = (X - x_min) / (x_max - x_min)

	plt.figure()
	ax = plt.subplot(111)
	for i in range(X.shape[0]):
	plt.text(X[i, 0], X[i, 1], str(digits.target[i]),
	color=plt.cm.Set1(y[i] / 10.),
	fontdict={'weight': 'bold', 'size': 9})
	library(data.table)
	library(dplyr)


	#imports for plotting
	require(GGally)
	library(network)
	library(sna)
	library(ggplot2)
	require(igraph)
	activity_id date activity_category char_1 char_2 char_3 char_4 char_5 char_6 char_7 char_8 char_9 char_10
	people_id
	ppl_100004 act1_249281 2022-07-20 type 1 type 5 type 10 type 5 type 1 type 6 type 1 type 1 type 7 type 4 NaN
	ppl_100004 act2_230855 2022-07-20 type 5 NaN NaN NaN NaN NaN NaN NaN NaN NaN type 682
	ppl_10001 act1_240724 2022-10-14 type 1 type 12 type 1 type 5 type 4 type 6 type 1 type 1 type 13 type 10 NaN
	import pandas as pd
	import numpy as np
	from scipy import sparse as ssp
	import pylab as plt
	from sklearn.preprocessing import LabelEncoder,LabelBinarizer,MinMaxScaler,OneHotEncoder
	from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
	from sklearn.decomposition import TruncatedSVD,NMF,PCA,FactorAnalysis
	from sklearn.feature_selection import SelectFromModel,SelectPercentile,f_classif
	from sklearn.decomposition import TruncatedSVD
	from sklearn.metrics import log_loss,roc_auc_score