Skip to content

Instantly share code, notes, and snippets.

View tengpeng's full-sized avatar

Teng Peng tengpeng

View GitHub Profile
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:58
replace NA py
users.gender.replace('-unknown-', np.nan, inplace=True)
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:59
drop NA py
users_nan = (users.isnull().sum() / users.shape[0]) * 100
users_nan[users_nan > 0].drop('country_destination')
@tengpeng
tengpeng / a.rb
Created January 31, 2016 23:07
py
users[users.age > 122]['age'].describe()
@tengpeng
tengpeng / a.rb
Created January 31, 2016 23:10
set na py
users.loc[users.age > 95, 'age'] = np.nan
@tengpeng
tengpeng / a.rb
Created February 2, 2016 20:50
categorical to numerical
## Convert categorical features into numerical features ##
for (f in common_vars) {
if (class(train[[f]]) == "character"){
levels <- unique(c(train[[f]], test[[f]]))
train[[f]] <- as.integer(factor(train[[f]], levels=levels))
test[[f]] <- as.integer(factor(test[[f]], levels=levels))
}
}
@tengpeng
tengpeng / a.rb
Created February 5, 2016 21:08
subset r
nci = subset(nci, select = c("CNS", "RENAL", "BREAST", "NSCLC", "MELANOMA", "OVARIAN", "LEUKEMIA", "COLON"))
nci = nci[, c("CNS", "RENAL", "BREAST", "NSCLC", "MELANOMA", "OVARIAN", "LEUKEMIA", "COLON")]
nci = nci[, which(colnames(nci) %in% c("CNS", "RENAL", "BREAST", "NSCLC", "MELANOMA", "OVARIAN", "LEUKEMIA", "COLON"))]
@tengpeng
tengpeng / a.rb
Created February 5, 2016 21:36
classify r
result = if(any(result <= 1.5)) 1 else 2
result = ifelse(result <= 1.5, 1, 2)
@tengpeng
tengpeng / a.rb
Created February 5, 2016 23:40
binary classification r
prediction <- as.numeric(pred > 0.5)
@tengpeng
tengpeng / a.rb
Created February 5, 2016 23:49
binary classification measure performance r
err <- mean(as.numeric(pred > 0.5) != test$label)
@tengpeng
tengpeng / a.rb
Created February 6, 2016 01:32
binary classification xgboost r
library(xgboost)
X = as.matrix(train[,-58])
y = train[,58]
numberOfClasses <- max(y) + 1
bst <- xgboost(data = X, label = y, max.depth = 2, eta = 1, nthread = 2, nround = 200, objective = "binary:logistic", verbose = 1)
pred = predict(bst, data.matrix(test[,-58]))
pred = as.numeric(pred > 0.5)