Skip to content

Instantly share code, notes, and snippets.

@shuozhang1985
Created August 7, 2016 20:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shuozhang1985/f16e4bcdf456126b6c2f2f37c38c09b3 to your computer and use it in GitHub Desktop.
Save shuozhang1985/f16e4bcdf456126b6c2f2f37c38c09b3 to your computer and use it in GitHub Desktop.
library(dplyr)
library(wordcloud)
library(RColorBrewer)
library(shinythemes)
device_data=read.csv('./data/phonedata.csv', header=T, stringsAsFactors = F)
#str(device_data)
device_map=dplyr::filter(device_data, !is.na(longitude), !is.na(latitude), !is.na(group))%>%
dplyr::filter(longitude>=73, longitude<136, latitude>=4, latitude<54)
#str(device_map)
top10 <- names(sort(table(device_map$phone_brand_English), decreasing = T))[1:10]
gender=unique(device_map$gender)
agegroup=c("F23-", "F24-26", "F27-28", "F29-32", "F33-42", "F43+", "M22-", "M23-26", "M27-28", "M29-31", "M32-38", "M39+")
table1=device_map%>%
dplyr::group_by(phone_brand_English)%>%
dplyr::summarise(n=n())%>%
arrange(desc(n))
phoneprice=read.csv('./data/phoneprice.csv',header=T, stringsAsFactors = F)
phoneprice=head(phoneprice,4)
device_map%>%
dplyr::group_by(phone_brand_English, gender) %>%
dplyr::summarise(n=n()) %>%
mutate(percent=n/sum(n))-> phone_bygender
# # further analysi of shiny app
# phone_male=filter(phone_bygender, gender=='M')
# sum(phone_male$n)/sum(phone_bygender$n)
#
# N=nrow(device_map)
# agegroup=rep(0, N)
# for (i in 1:N){
# if (device_map$age[i]<=26){
# agegroup[i]='post-90s'
# }
# else if (device_map$age[i]>26&device_map$age[i]<=36){
# agegroup[i]='post-80s'
# }
# else if (device_map$age[i]>36&device_map$age[i]<=46){
# agegroup[i]='post-70s'
# }
# else if (device_map$age[i]>46&device_map$age[i]<=56){
# agegroup[i]='post-60s'
# }
# else if (device_map$age[i]>56){
# agegroup[i]='post-50s'
# }
# }
# device_map=mutate(device_map, agegroup=agegroup)
#
# agedis=device_map%>%
# dplyr::filter(phone_brand_English %in% c('Xiaomi', 'Huawei', 'OPPO',
# 'vivo', 'samsung')) %>%
# dplyr::group_by(agegroup, phone_brand_English)%>%
# summarise(n=n())%>%
# mutate(percent=n/sum(n))
# ageplot=ggplot(data=agedis, aes(x=agegroup, y=percent,
# fill=phone_brand_English))+
# geom_bar(stat = 'identity')+
# xlab('age group')+
# ylab('percent of number')+
# ggtitle('User Age Group Distribution of Top 5 Phone Brands')+
# theme_bw()
# ggplotly(ageplot)
app_data=read.csv('./data/appmap.csv', header=T, stringsAsFactors = F)
#str(app_data)
app_map=dplyr::filter(app_data, !is.na(longitude), !is.na(latitude), !is.na(group))%>%
dplyr::filter(longitude>=73, longitude<136, latitude>=4, latitude<54)
#str(app_map)
top10APP <- names(sort(table(app_map$category), decreasing = T))[1:10]
# # further analysi of shiny app
# N1=nrow(app_map)
# agegroup1=rep(0, N1)
# for (i in 1:N1){
# if (app_map$age[i]<=26){
# agegroup1[i]='post-90s'
# }
# else if (app_map$age[i]>26&app_map$age[i]<=36){
# agegroup1[i]='post-80s'
# }
# else if (app_map$age[i]>36&app_map$age[i]<=46){
# agegroup1[i]='post-70s'
# }
# else if (app_map$age[i]>46&app_map$age[i]<=56){
# agegroup1[i]='post-60s'
# }
# else if (app_map$age[i]>56){
# agegroup1[i]='post-50s'
# }
# }
# app_map=mutate(app_map, agegroup=agegroup1)
# unique(app_map$category)
#
# agedis1=app_map%>%
# dplyr::filter(category %in% c("Property Industry 2.0", "Industry tag" , "video" ,
# "Services 1" ,"P2P net loan" ) ) %>%
# dplyr::group_by(agegroup, category)%>%
# summarise(n=n())%>%
# mutate(percent=n/sum(n))
# ageplot1=ggplot(data=agedis1, aes(x=agegroup, y=percent,
# fill=category))+
# geom_bar(stat = 'identity')+
# xlab('age group')+
# ylab('percent of number')+
# ggtitle('User Age Group Distribution of APP cateogoreis')+
# theme_bw()
# ggplotly(ageplot1)
table2=app_map%>%
dplyr::group_by(category)%>%
dplyr::summarise(n=n())%>%
arrange(desc(n))
app_map%>%
dplyr::group_by(category, gender) %>%
dplyr::summarise(n=n()) %>%
mutate(percent=n/sum(n))-> app_bygender
app_map%>%
dplyr::group_by(category, phone_brand_English, gender, age) %>%
dplyr::summarise(n=n()) -> app_byphone
app_map$is_active=as.character(app_map$is_active)
app_map %>%
dplyr::group_by(category, is_active) %>%
dplyr::summarise(n=n())->app_byactive
app_map%>%
dplyr::group_by(category, phone_brand_English)%>%
dplyr::summarise(n=n())->app_bybrand
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment