Last active
April 29, 2017 01:32
-
-
Save viveknarang/4c4d4b43ec7de692ae7a7bb62159ad54 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################################### | |
##################### Assignment A8 - B ############################ | |
######################################### Vivek Narang ############# | |
# Start | |
# Loading required libraries | |
library(rmr2) | |
library(tidyr) | |
library(readr) | |
library(dplyr) | |
# Setting hadoop backend to local | |
rmr.options(backend = "local") | |
# CSV File reference | |
url <- "c:\\A8.csv" | |
# Reading Comma separated CSV file | |
t <- read_delim(url, delim=',') | |
# Using the data frame to create a temp file on virtual HDFS | |
hdfs.temp <- to.dfs(data.frame(t)) | |
# Mapper: Grouping all the Taxi Out data for each Airport ID | |
mapper <- function(k,v) { | |
key <- v$Origin | |
value <- v$TaxiOut | |
keyval(key,value) | |
} | |
# Reducer: Computing min, max, average for each Taxi Out subset against Each airport ID | |
reducer <- function(k,v) { | |
key <- k # Origin Airport | |
value <- c(min(v, na.rm = TRUE), mean(v, na.rm = TRUE), max(v, na.rm = TRUE)) | |
keyval(key,value) | |
} | |
# Initiating the Hadoop Mapreduce Job. | |
out = mapreduce( input = hdfs.temp, map = mapper, reduce = reducer) | |
# Fetching the data frame from batch processed data | |
t3 <- as.data.frame(from.dfs(out)) | |
t3$measure <- c('min','mean','max') | |
# Converting to tabular form. | |
stats2 <- spread(t3,measure,val) | |
colnames(stats2) <- c('Origin Airport','Max Taxi Out time','Mean Taxi Out time','Min Taxi Out time') | |
#Ordering by airport ID | |
stats3 <- stats2 %>% arrange(`Origin Airport`) | |
# Print output on console top 6 rows | |
head(stats3) | |
# End |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment