Skip to content

Instantly share code, notes, and snippets.

# Predict the test set using the model
pred_lasso = predict(glmmod, test_sparse, type="response", s=best.lambda)
pred_lasso
# Apply a threshold
new_pred_lasso = ifelse(pred_lasso >= 0.5, 1, 0)
new_pred_lasso = data.frame(new_pred_lasso)
data_lasso = cbind(test[,2], new_pred_lasso)
names(data_lasso) = c("actual", "pred")
xtab_lasso = table(data_lasso$actual, data_lasso$pred)
# Get performance measures
overall_accuracy_ols = cm_ols$overall['Accuracy']
# Predict the test set using the model
pred_ols = predict(lmmod, test[,3:32], type="response")
pred_ols
# Apply a threshold
new_pred_ols = ifelse(pred_ols >= 0.5, 1, 0)
new_pred_ols = data.frame(new_pred_ols)
data_ols = cbind(test[,2], new_pred_ols)
names(data_ols) = c("actual", "pred")
xtab_ols = table(data_ols$actual, data_ols$pred)
# Train the model (Logistic regression)
lmmod = lm(diagnosis ~ . , data = train[,2:32])
summary(lmmod)
coeftest(lmmod, vcov. = vcovHC, type = "HC1")
# Get performance measures
overall_accuracy_lasso = cm_lasso$overall['Accuracy']
# Train the model
glmmod = glmnet(x=train_sparse, y=as.factor(train[,2]), alpha=1, family="binomial")
plot(glmmod, xvar="lambda")
glmmod
coef(glmmod)[,100]
# Try cross validation lasso
cv.glmmod = cv.glmnet(x=train_sparse, y=as.factor(train[,2]), alpha=1, family="binomial")
plot(cv.glmmod)
library(Matrix)
library(glmnet)
library(pROC)
library(caret)
# Import dataset
data1 = read.csv(file = "./data/input/breast-cancer.csv")
data1$diagnosis<-ifelse(data1$diagnosis=='M', 1,0)
data2 = data.matrix(data1)
Matrix(data2, sparse = TRUE)
if __name__ == "__main__":
lexicon = Empath()
result = lexicon.analyze("the quick brown fox jumps over the lazy dog", normalize=True)
df0 = pd.Series(result, name = 'KeyValue')
logging.getLogger().setLevel(logging.INFO)
col_names = df0.keys()
df = pd.DataFrame(columns=col_names)
# Load libraries
import os
import logging
from empath import Empath
import pandas as pd
# Set up folder locations
source_folder_path_list = []
source_folder_path = "C:/Users/Marriane/Documents/GitHub/empath-on-movie-reviews/data/scale_whole_review.tar (with text)/scale_whole_review/scale_whole_review/"
folder_list = ["Dennis+Schwartz/txt.parag", "James+Berardinelli/txt.parag", "Scott+Renshaw/txt.parag", "Steve+Rhodes/txt.parag"]
# Using map_data()
worldmap <- map_data ("world")
mapplot1 <- ggplot(worldmap) +
geom_map(data = worldmap, map = worldmap, aes(x=long, y=lat, map_id=region), col = "white", fill = "gray50") +
geom_scatterpie(aes(x=longitude, y=latitude, group = country, r = multiplier*6),
data = final_data, cols = colnames(final_data[,c(2:11)])) +
xlim(-20,60) + ylim(10, 75) +
scale_fill_brewer(palette = "Paired") +
geom_text(aes(x=longitude, y=latitude, group = country, label = country), data = final_data, stat = "identity",