Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Blog on Location Analysis: Creating the location and frequency matrices
# Section 1. Load the data from journey file
# this section takes in the dataframes (reads them in first from CSV files)
l.journeys <- list.files(pattern = "*_journey.csv")
# select a particlar car and day to examine
x.journey.number <- 4
df.x <- read.csv(l.journeys[x.journey.number]
, stringsAsFactors = FALSE)
#
# > str(df.x)
#'data.frame': 700 obs. of 6 variables:
# $ from : int 1 1 1 1 1 1 1 1 1 1 ...
#$ to : int 1 1 1 1 1 1 1 2 2 2 ...
#$ day : chr "Monday" "Tuesday" "Wednesday" "Thursday" ...
#$ journeys : int 0 0 0 0 0 0 0 0 1 1 ...
#$ journey.days : int 0 0 0 0 0 0 0 0 1 1 ...
#$ non.journey.days: int 6 6 6 6 6 6 6 6 5 5 ...
#> head(df.x)
#from to day journeys journey.days non.journey.days
#1 1 1 Monday 0 0 6
#2 1 1 Tuesday 0 0 6
#3 1 1 Wednesday 0 0 6
#4 1 1 Thursday 0 0 6
#5 1 1 Friday 0 0 6
#6 1 1 Saturday 0 0 6
#
# find the max cluster number in the data frame
x.max.cluster <- max(c(max(df.x$from),max(df.x$to)))
# create a label vector of ordered days of week (1 = Monday)
v.days <- c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
# Section 2. Journey Count
# initialize a matrix to calculate frequency and probability with
# day, start and end places
journey.count.x <- array(rep(0,
(x.max.cluster)*(x.max.cluster)*7),
dim=c(x.max.cluster,x.max.cluster,7))
# load the journey data from the dataframe into the matrix
for (i in 1:7) {
for (j in 1:x.max.cluster) { # from
for (k in 1:x.max.cluster) { # to
journey.count.x[j,k,i] <- df.x[df.x$from == j &
df.x$to == k &
df.x$day == v.days[i],"journeys"]
}
}
}
# add names to the matrix dimention.
# row = "from" place for a journey
# column = "to" place for a journey
# plane = day of week journey occured
rownames(journey.count.x) <-
rownames(journey.count.x, do.NULL = FALSE, prefix = "From.")
colnames(journey.count.x) <-
colnames(journey.count.x, do.NULL = FALSE, prefix = "To.")
dimnames(journey.count.x)[[3]] <- v.days
# here ia an example of queries on the matrix to find what to/from/days
# have more than 5 journeys
# dim 1 is row, dim 2 is column, dim 3 is days
which(journey.count.x >5, arr.in=TRUE)
# Section 3. Days of travel count
#
# Initialize a matrix to examine predictability of travel (using the count of
# days where at least 1 journey occured between 2 locations) with
# day, start and end places
pred.journey.x <- array(rep(0,
(x.max.cluster)*(x.max.cluster)*7),
dim=c(x.max.cluster,x.max.cluster,7))
for (i in 1:7) {
for (j in 1:x.max.cluster) { # from
for (k in 1:x.max.cluster) { # to
pred.journey.x[j,k,i] <- df.x[df.x$from == j &
df.x$to == k &
df.x$day == v.days[i],"journey.days"]
}
}
}
# add names to the matrix dimention.
# row = "from" place for a journey
# column = "to" place for a journey
# plane = day of week journey occured
rownames(pred.journey.x) <- rownames(pred.journey.x, do.NULL = FALSE, prefix = "From.")
colnames(pred.journey.x) <- colnames(pred.journey.x, do.NULL = FALSE, prefix = "To.")
dimnames(pred.journey.x)[[3]] <- v.days
# here ia an example of queries on the matrix to get insights on
# journeys
# dim 1 is row, dim 2 is column, dim 3 is days
which(pred.journey.x >5, arr.in=TRUE) # journeys that occured 6 out of 6 days
which(pred.journey.x >4, arr.in=TRUE) # journeys that occured 5 out of 6 days
which(pred.journey.x >3, arr.in=TRUE) # journeys that occured 6 out of 6 days
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment