This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ brew update | |
$ brew install hadoop |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
sns.boxplot(x='types', y="A", hue='sex', data=data, palette="PRGn") | |
sns.despine(offset=10, trim=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ans = np.loadtxt('./3GB_0.338917307945_16482164617e7c9d188bc75bafc06a08_test.txt') | |
print "Acc: ", accuracy_score(test[:, -1], ans) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array([[-0.7333268 , 0.28111396, 0.7734522 , ..., 0.92725609, | |
0.54231164, 1.45600466], | |
[ 1.00453544, 1.38036959, -0.53947796, ..., 0.26597865, | |
1.05617546, 0.69105939], | |
[ 0.19488847, 1.41583741, -0.4825291 , ..., 1.44242267, | |
2.98111301, -1.13948051], | |
..., | |
[-0.26055551, 2.20761146, -0.29844408, ..., 0.35477103, | |
0.88205004, -0.12984548], | |
[-0.39628748, -0.17146747, 0.3233919 , ..., 0.18283435, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat_cols = pd.read_csv(TRAIN_CAT, nrows = 1).columns.values | |
print 'cat_cols: ', cat_cols | |
print 'cat_cols.shape: ', cat_cols.shape | |
cats = pd.read_csv(TRAIN_CAT, usecols=(cat_cols[:2].tolist())) | |
print 'cats.shape: ', cats.shape | |
print cats |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clf = XGBClassifier(max_depth=5, base_score=0.005) | |
cv = StratifiedKFold(y, n_folds=3) | |
preds = np.ones(y.shape[0]) | |
for i, (train, test) in enumerate(cv): | |
preds[test] = clf.fit(X[train], y[train]).predict_proba(X[test])[:,1] | |
print("fold {}, ROC AUC: {:.3f}".format(i, roc_auc_score(y[test], preds[test]))) | |
print(roc_auc_score(y, preds)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_embedding(X, title=None): | |
x_min, x_max = np.min(X, 0), np.max(X, 0) | |
X = (X - x_min) / (x_max - x_min) | |
plt.figure() | |
ax = plt.subplot(111) | |
for i in range(X.shape[0]): | |
plt.text(X[i, 0], X[i, 1], str(digits.target[i]), | |
color=plt.cm.Set1(y[i] / 10.), | |
fontdict={'weight': 'bold', 'size': 9}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(dplyr) | |
#imports for plotting | |
require(GGally) | |
library(network) | |
library(sna) | |
library(ggplot2) | |
require(igraph) |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
activity_id date activity_category char_1 char_2 char_3 char_4 char_5 char_6 char_7 char_8 char_9 char_10 | |
people_id | |
ppl_100004 act1_249281 2022-07-20 type 1 type 5 type 10 type 5 type 1 type 6 type 1 type 1 type 7 type 4 NaN | |
ppl_100004 act2_230855 2022-07-20 type 5 NaN NaN NaN NaN NaN NaN NaN NaN NaN type 682 | |
ppl_10001 act1_240724 2022-10-14 type 1 type 12 type 1 type 5 type 4 type 6 type 1 type 1 type 13 type 10 NaN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from scipy import sparse as ssp | |
import pylab as plt | |
from sklearn.preprocessing import LabelEncoder,LabelBinarizer,MinMaxScaler,OneHotEncoder | |
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer | |
from sklearn.decomposition import TruncatedSVD,NMF,PCA,FactorAnalysis | |
from sklearn.feature_selection import SelectFromModel,SelectPercentile,f_classif | |
from sklearn.decomposition import TruncatedSVD | |
from sklearn.metrics import log_loss,roc_auc_score |
OlderNewer