Blog on Location Analysis: Creating the location and frequency matrices
# Section 1. Load the data from journey file | |
# this section takes in the dataframes (reads them in first from CSV files) | |
l.journeys <- list.files(pattern = "*_journey.csv") | |
# select a particlar car and day to examine | |
x.journey.number <- 4 | |
df.x <- read.csv(l.journeys[x.journey.number] | |
, stringsAsFactors = FALSE) | |
# | |
# > str(df.x) | |
#'data.frame': 700 obs. of 6 variables: | |
# $ from : int 1 1 1 1 1 1 1 1 1 1 ... | |
#$ to : int 1 1 1 1 1 1 1 2 2 2 ... | |
#$ day : chr "Monday" "Tuesday" "Wednesday" "Thursday" ... | |
#$ journeys : int 0 0 0 0 0 0 0 0 1 1 ... | |
#$ journey.days : int 0 0 0 0 0 0 0 0 1 1 ... | |
#$ non.journey.days: int 6 6 6 6 6 6 6 6 5 5 ... | |
#> head(df.x) | |
#from to day journeys journey.days non.journey.days | |
#1 1 1 Monday 0 0 6 | |
#2 1 1 Tuesday 0 0 6 | |
#3 1 1 Wednesday 0 0 6 | |
#4 1 1 Thursday 0 0 6 | |
#5 1 1 Friday 0 0 6 | |
#6 1 1 Saturday 0 0 6 | |
# | |
# find the max cluster number in the data frame | |
x.max.cluster <- max(c(max(df.x$from),max(df.x$to))) | |
# create a label vector of ordered days of week (1 = Monday) | |
v.days <- c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday") | |
# Section 2. Journey Count | |
# initialize a matrix to calculate frequency and probability with | |
# day, start and end places | |
journey.count.x <- array(rep(0, | |
(x.max.cluster)*(x.max.cluster)*7), | |
dim=c(x.max.cluster,x.max.cluster,7)) | |
# load the journey data from the dataframe into the matrix | |
for (i in 1:7) { | |
for (j in 1:x.max.cluster) { # from | |
for (k in 1:x.max.cluster) { # to | |
journey.count.x[j,k,i] <- df.x[df.x$from == j & | |
df.x$to == k & | |
df.x$day == v.days[i],"journeys"] | |
} | |
} | |
} | |
# add names to the matrix dimention. | |
# row = "from" place for a journey | |
# column = "to" place for a journey | |
# plane = day of week journey occured | |
rownames(journey.count.x) <- | |
rownames(journey.count.x, do.NULL = FALSE, prefix = "From.") | |
colnames(journey.count.x) <- | |
colnames(journey.count.x, do.NULL = FALSE, prefix = "To.") | |
dimnames(journey.count.x)[[3]] <- v.days | |
# here ia an example of queries on the matrix to find what to/from/days | |
# have more than 5 journeys | |
# dim 1 is row, dim 2 is column, dim 3 is days | |
which(journey.count.x >5, arr.in=TRUE) | |
# Section 3. Days of travel count | |
# | |
# Initialize a matrix to examine predictability of travel (using the count of | |
# days where at least 1 journey occured between 2 locations) with | |
# day, start and end places | |
pred.journey.x <- array(rep(0, | |
(x.max.cluster)*(x.max.cluster)*7), | |
dim=c(x.max.cluster,x.max.cluster,7)) | |
for (i in 1:7) { | |
for (j in 1:x.max.cluster) { # from | |
for (k in 1:x.max.cluster) { # to | |
pred.journey.x[j,k,i] <- df.x[df.x$from == j & | |
df.x$to == k & | |
df.x$day == v.days[i],"journey.days"] | |
} | |
} | |
} | |
# add names to the matrix dimention. | |
# row = "from" place for a journey | |
# column = "to" place for a journey | |
# plane = day of week journey occured | |
rownames(pred.journey.x) <- rownames(pred.journey.x, do.NULL = FALSE, prefix = "From.") | |
colnames(pred.journey.x) <- colnames(pred.journey.x, do.NULL = FALSE, prefix = "To.") | |
dimnames(pred.journey.x)[[3]] <- v.days | |
# here ia an example of queries on the matrix to get insights on | |
# journeys | |
# dim 1 is row, dim 2 is column, dim 3 is days | |
which(pred.journey.x >5, arr.in=TRUE) # journeys that occured 6 out of 6 days | |
which(pred.journey.x >4, arr.in=TRUE) # journeys that occured 5 out of 6 days | |
which(pred.journey.x >3, arr.in=TRUE) # journeys that occured 6 out of 6 days |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment