Created
March 8, 2018 19:37
-
-
Save lmcstro/a0b7b4e46211518b479db271cc74fda7 to your computer and use it in GitHub Desktop.
Blog on Location Analysis: Creating the location and frequency matrices
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Section 1. Load the data from journey file | |
# this section takes in the dataframes (reads them in first from CSV files) | |
l.journeys <- list.files(pattern = "*_journey.csv") | |
# select a particlar car and day to examine | |
x.journey.number <- 4 | |
df.x <- read.csv(l.journeys[x.journey.number] | |
, stringsAsFactors = FALSE) | |
# | |
# > str(df.x) | |
#'data.frame': 700 obs. of 6 variables: | |
# $ from : int 1 1 1 1 1 1 1 1 1 1 ... | |
#$ to : int 1 1 1 1 1 1 1 2 2 2 ... | |
#$ day : chr "Monday" "Tuesday" "Wednesday" "Thursday" ... | |
#$ journeys : int 0 0 0 0 0 0 0 0 1 1 ... | |
#$ journey.days : int 0 0 0 0 0 0 0 0 1 1 ... | |
#$ non.journey.days: int 6 6 6 6 6 6 6 6 5 5 ... | |
#> head(df.x) | |
#from to day journeys journey.days non.journey.days | |
#1 1 1 Monday 0 0 6 | |
#2 1 1 Tuesday 0 0 6 | |
#3 1 1 Wednesday 0 0 6 | |
#4 1 1 Thursday 0 0 6 | |
#5 1 1 Friday 0 0 6 | |
#6 1 1 Saturday 0 0 6 | |
# | |
# find the max cluster number in the data frame | |
x.max.cluster <- max(c(max(df.x$from),max(df.x$to))) | |
# create a label vector of ordered days of week (1 = Monday) | |
v.days <- c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday") | |
# Section 2. Journey Count | |
# initialize a matrix to calculate frequency and probability with | |
# day, start and end places | |
journey.count.x <- array(rep(0, | |
(x.max.cluster)*(x.max.cluster)*7), | |
dim=c(x.max.cluster,x.max.cluster,7)) | |
# load the journey data from the dataframe into the matrix | |
for (i in 1:7) { | |
for (j in 1:x.max.cluster) { # from | |
for (k in 1:x.max.cluster) { # to | |
journey.count.x[j,k,i] <- df.x[df.x$from == j & | |
df.x$to == k & | |
df.x$day == v.days[i],"journeys"] | |
} | |
} | |
} | |
# add names to the matrix dimention. | |
# row = "from" place for a journey | |
# column = "to" place for a journey | |
# plane = day of week journey occured | |
rownames(journey.count.x) <- | |
rownames(journey.count.x, do.NULL = FALSE, prefix = "From.") | |
colnames(journey.count.x) <- | |
colnames(journey.count.x, do.NULL = FALSE, prefix = "To.") | |
dimnames(journey.count.x)[[3]] <- v.days | |
# here ia an example of queries on the matrix to find what to/from/days | |
# have more than 5 journeys | |
# dim 1 is row, dim 2 is column, dim 3 is days | |
which(journey.count.x >5, arr.in=TRUE) | |
# Section 3. Days of travel count | |
# | |
# Initialize a matrix to examine predictability of travel (using the count of | |
# days where at least 1 journey occured between 2 locations) with | |
# day, start and end places | |
pred.journey.x <- array(rep(0, | |
(x.max.cluster)*(x.max.cluster)*7), | |
dim=c(x.max.cluster,x.max.cluster,7)) | |
for (i in 1:7) { | |
for (j in 1:x.max.cluster) { # from | |
for (k in 1:x.max.cluster) { # to | |
pred.journey.x[j,k,i] <- df.x[df.x$from == j & | |
df.x$to == k & | |
df.x$day == v.days[i],"journey.days"] | |
} | |
} | |
} | |
# add names to the matrix dimention. | |
# row = "from" place for a journey | |
# column = "to" place for a journey | |
# plane = day of week journey occured | |
rownames(pred.journey.x) <- rownames(pred.journey.x, do.NULL = FALSE, prefix = "From.") | |
colnames(pred.journey.x) <- colnames(pred.journey.x, do.NULL = FALSE, prefix = "To.") | |
dimnames(pred.journey.x)[[3]] <- v.days | |
# here ia an example of queries on the matrix to get insights on | |
# journeys | |
# dim 1 is row, dim 2 is column, dim 3 is days | |
which(pred.journey.x >5, arr.in=TRUE) # journeys that occured 6 out of 6 days | |
which(pred.journey.x >4, arr.in=TRUE) # journeys that occured 5 out of 6 days | |
which(pred.journey.x >3, arr.in=TRUE) # journeys that occured 6 out of 6 days |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment