This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## Linear regression by gradient descent | |
## | |
## A learning exercise to help build intuition about gradient descent. | |
## J. Christopher Bare, 2012 | |
## | |
# set random seed | |
set.seed(12345) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read data | |
library(data.table) | |
train<-fread('train.csv',header=T) | |
test<-fread('test.csv',header=T) | |
gc() | |
# the label: is_booking, hotel_cluster | |
head(train) | |
head(test) | |
# process the data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
size<-1000 | |
devider<-10 | |
Nt<-numeric(size) | |
result<-numeric(size/devider) | |
for (i in 1:size) { | |
X<-runif(1000, min=0, max=1) | |
S<-cumsum(X) | |
t<-100 | |
Nt[i]<-sum(S<t) | |
result[i]<-sum(Nt)/i |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# folked from https://www.kaggle.com/signochastic/expedia-hotel-recommendations/r-version-of-most-popular-local-hotel | |
## R version of most popular local hotels | |
library(data.table) | |
expedia_train <- fread('../input/train.csv', header=TRUE) | |
expedia_test <- fread('../input/test.csv', header=TRUE) | |
sum_and_count <- function(x){ | |
sum(x)*0.835 + length(x) *0.165 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# folked from https://www.kaggle.com/zfturbo/expedia-hotel-recommendations/r-some-tweaks/code | |
## R version of most popular local hotels (change variable) | |
library(data.table) | |
expedia_train <- fread('../input/train.csv', header=TRUE) | |
expedia_test <- fread('../input/test.csv', header=TRUE) | |
sum_and_count <- function(x){ | |
# take the weight of clicking and browsing | |
sum(x)*0.95 + length(x) *0.05 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load data #### | |
library(data.table) | |
expedia_train <- fread('train.csv',header = T) | |
#expedia_test <- fread('test.csv',header = T) | |
#set training | |
smp_size <- floor(0.98 * nrow(expedia_train)) | |
## set the seed to make your partition reproductible |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all_user_id <- unique(train$user_id) | |
similar_table<-list() | |
counter<-1 | |
for (user in all_user_id) { | |
similar_user<-c(user) | |
current <- train[user_id == user,] | |
for (next_user in all_user_id[-user]) { | |
# calculate similarity | |
# this is a test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
length(unique(train$hotel_market)) | |
hotel_markets<-unique(train$hotel_market) | |
market<-train[hotel_market==hotel_markets[2],] | |
summary(market) | |
unique(market$hotel_continent) | |
setkey(market) | |
market$date_time<-as.POSIXct(market$date_time) | |
market<-market[-which(market$srch_ci==""),] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
#set sample size of test data | |
smp_size <- 10000 | |
## set the seed to make your partition reproductible | |
set.seed(1234) | |
idx <- sample(seq_len(nrow(expedia_train)), size = smp_size) | |
test <- expedia_train[idx,] | |
train<-expedia_train[-idx,] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
# read csv | |
expedia_train <- fread('train.csv', header = T) | |
# create date object | |
dates <- | |
list(expedia_train$date_time, | |
expedia_train$srch_ci, | |
expedia_train$srch_co) |
OlderNewer