Skip to content

Instantly share code, notes, and snippets.

@alChaCC
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alChaCC/4fee2a25a422dceb40f5 to your computer and use it in GitHub Desktop.
Save alChaCC/4fee2a25a422dceb40f5 to your computer and use it in GitHub Desktop.
setwd("r-playground/R/ga_engagement")
list.of.packages <- c("rjson", "RCurl","RGoogleAnalytics")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
require(rjson)
require(RCurl)
require(RGoogleAnalytics)
require(ggplot2)
token <- Auth('YOUR API USER ID','YOUR API USER PASSWORD')
save(token,file="./token_file")
profile <- GetProfiles(token) # show all your profile
# id name
# XXXXXX HELLO (All)
# YYYYYYYY COOL
# ....
# Your profile number (not the GA ID number)
my_profile <- profile[profile$name == 'HELLO (All)',1]
time_start_seq <- as.Date(ISOdate(2014,seq(1,12),1))
#[1] "2014-01-01" "2014-02-01" "2014-03-01" "2014-04-01" "2014-05-01" "2014-06-01"
#[7] "2014-07-01" "2014-08-01" "2014-09-01" "2014-10-01" "2014-11-01" "2014-12-01"
time_end_seq <- seq(as.Date("2014-02-01"), length=12, by="1 month") - 1
# [1] "2014-01-31" "2014-02-28" "2014-03-31" "2014-04-30" "2014-05-31" "2014-06-30"
# [7] "2014-07-31" "2014-08-31" "2014-09-30" "2014-10-31" "2014-11-30" "2014-12-31"
# 為了做資料紀錄
every_month_2014 <- list()
all_data <- data.frame()
for ( i in 1:length(time_start_seq)) {
query.list <- Init(start.date = as.character(time_start_seq[i]),
end.date = as.character(time_end_seq[i]),
dimensions = "ga:sessionDurationBucket",
metrics = "ga:sessions,ga:pageviews",
sort = "ga:sessionDurationBucket",
max.results = 10000,
table.id = paste("ga:",my_profile,sep="",collapse=",")
)
# 建立一個query等一下就是透過這個query與token拿資料!
ga.query <- QueryBuilder(query.list)
# 向GA抓取資料,存成data frame
ga.data <- GetReportData(ga.query, token)
# ga.data <- GetReportData(ga.query, token,split_daywise = T,paginate_query = TRUE) 另外一種拿法也work
# 資料處理部分,由於抓回來的 “data$sessionDurationBucket” 是個string,所要把它轉成 數字
less_than_10_seconds <- ga.data[as.numeric(ga.data$sessionDurationBucket) <= 10,]
between_11_to_30 <- ga.data[as.numeric(ga.data$sessionDurationBucket) > 10 & as.numeric(ga.data$sessionDurationBucket) <= 30,]
between_31_to_60 <- ga.data[as.numeric(ga.data$sessionDurationBucket) > 30 & as.numeric(ga.data$sessionDurationBucket) <= 60,]
between_61_to_180 <- ga.data[as.numeric(ga.data$sessionDurationBucket) > 60 & as.numeric(ga.data$sessionDurationBucket) <= 180,]
between_181_to_600 <- ga.data[as.numeric(ga.data$sessionDurationBucket) > 180 & as.numeric(ga.data$sessionDurationBucket) <= 600,]
between_601_to_1800 <- ga.data[as.numeric(ga.data$sessionDurationBucket) > 600 & as.numeric(ga.data$sessionDurationBucket) <= 1800,]
more_than_1801 <- ga.data[as.numeric(ga.data$sessionDurationBucket) > 1800,]
# 處理後資料長這樣
# sessionDurationBucket sessions pageviews
# 0 342840 341561
# 1 1445 1906
# 10 2567 4392
# 2 2210 2704
# 3 2283 2934
# 4 2309 3133
# 5 2368 3384
# 資料整併
every_month_2014[[i]] <- rbind(colSums(less_than_10_seconds[,-1]),colSums(between_11_to_30[,-1]),
colSums(between_31_to_60[,-1]),colSums(between_61_to_180[,-1]),
colSums(between_181_to_600[,-1]),colSums(between_601_to_1800[,-1]),
colSums(more_than_1801[,-1]))
# sessions pageviews
#[1,] 365814 375361
#[2,] 44857 92466
#[3,] 48003 133645
#[4,] 96890 423865
#[5,] 107373 872529
#[6,] 78404 1057353
#[7,] 28183 812783
# 補上一欄時間區間
month_all_data <- cbind(c(as.character(i)),c("< 10s","11 ~ 30","31~60","61~180","181~600","601~1800",">1801"),every_month_2014[[i]])
# sessions pageviews
#[1,] "12" "< 10s" "365814" "375361"
#[2,] "12" "11 ~ 30" "44857" "92466"
#[3,] "12" "31~60" "48003" "133645"
#[4,] "12" "61~180" "96890" "423865"
#[5,] "12" "181~600" "107373" "872529"
#[6,] "12" "601~1800" "78404" "1057353"
#[7,] "12" ">1801" "28183" "812783"
# 將每一年的統整資料放在一起,作圖需要
all_data <- rbind(all_data,month_all_data)
}
# 補上欄位名稱
colnames(all_data) <- c("month","time_interval","sessions","pageviews")
# 先畫pageviews
# 將資料的型態改正,因為上面建立的時候,會將每個欄位的屬性變成"factor",這會對畫圖趙成莫大影響!
all_data <- transform(all_data,
time_interval = factor(time_interval, levels =
c('< 10s','11 ~ 30','31~60', '61~180','181~600', '601~1800','>1801')),
pageviews = as.numeric(as.character(pageviews)))
# 作圖
plot <- ggplot(data = all_data, aes(x = month, y = pageviews, fill = time_interval)) +
geom_bar(stat = "identity", position = "dodge", colour = "black")
# 將圖存出
ggsave(plot,file=paste("2014年engagement-pageviews圖.png",sep=""),width=15, height=10)
# 再畫sessions
all_data <- transform(all_data,
time_interval = factor(time_interval, levels =
c('< 10s','11 ~ 30','31~60', '61~180','181~600', '601~1800','>1801')),
sessions = as.numeric(as.character(sessions)))
plot <- ggplot(data = all_data, aes(x = month, y = sessions, fill = time_interval)) +
geom_bar(stat = "identity", position = "dodge", colour = "black")
ggsave(plot,file=paste("2014年engagement-sessions圖.png",sep=""),width=15, height=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment