gabrielburcea/gist:5a8e274af6650e9027b904d5c42ac028

## gistfile1.txt
##############################
# Admissions and Discharges
##############################

####################
#reading the data ##
####################
data <- read.csv("df.csv", header = TRUE)

head(data, 10)
data$X <- NULL
##################################
# Importing the libraries I need##
##################################
library(lubridate)
library(tidyverse)
##################################
##################################


#######################################################

# Code that converts all the columns names to lowercase

df_tolower <-  function(x) {
  colnames(x) <- tolower(colnames(x))
  x

}

#####################################################

df <- df_tolower(data)


########################################
########################################
#function to transform the variables POSITXlt into POSITct
change_col_to_ct <- function(df, col_name) {
  df[,col_name] <- as.POSIXct(df[,col_name])
  df
}

##################################
##################################
# example applying the change col to POSITct
dt_sub <- df %>%
  change_col_to_ct("start_datetime")%>%
  change_col_to_ct("end_datetime")

#################################
#################################
str(dt_sub)
#################################


################################################################################################################
# subseting data set
################################################################################################################
dt_rmds<- dt_sub[c("pat_code", "start_datetime", "end_datetime", "spell.type", "ward_code", "episode.order", "spell.number")]


dt_JM2016<- subset(dt_rmds, (start_datetime >= as.POSIXct("2014-01-01 00:00:00") & start_datetime <= as.POSIXct("2014-04-01 23:59:00")) |
                     (end_datetime >= as.POSIXct("2014-01-01 00:00:00") & end_datetime <= as.POSIXct("2014-04-01 23:59:00")))


#Choose EpisodeNumber == 1 (which has different spell numbers).
#Each row has an episode number attached, with subsequent row == having the same PSeudoId
#That means it has the same number of episode for multiple spells within the Episode.
#With thi, there are different AdmissionTimes, but different number for Discharge Time.
# As a consequence, Yet I am interested only in the Episode number one

#########################################################
### Obtaining the data needed for the two charts 46 $ 57
########################################################
dt_spell <- dt_JM2016 %>% dplyr::filter(episode.order ==1)
names(dt_spell)

as.data.frame(dt_spell)


################
#
################
# Selecting the variables I need
dt_AdmiD_DischD <- dt_spell[c("pat_code","start_datetime", "end_datetime", "spell.number")]


# get the count of Admission and Discharges. with gather function
dt_spell_1 <- dt_AdmiD_DischD %>%
  group_by(pat_code) %>%
  gather(value= "count", key= Event, start_datetime,end_datetime)

# finding the days of the week from the dates
dt_spell_1$EventDay <- lubridate::wday(dt_spell_1$count, label = TRUE)

dt_spell_1$count <- NULL


# get the counts
sum_for_plot <- dt_spell_1 %>%
  group_by(EventDay, Event) %>%
  dplyr::summarize(Count=n()) %>%
  dplyr::mutate(Percent = Count / sum(Count))


# getting the mean ? I have the feeling this is not doing what I meant to?

avg_tbl <- sum_for_plot %>%
  group_by(EventDay, Event) %>%
  dplyr::summarize(avg = mean(Count, na.rm=TRUE))

#######################################
# check the averages - it is not good #
#######################################

p1 <-ggplot(avg_tbl, aes(EventDay, avg, group= Event, shape = Event, colour = Event))
p1 + geom_bar(stat = "identity", position = "identity" , alpha=0.1, width = 0.4, colour="white", lwd = 0.5) +
  geom_line()+
  scale_colour_manual(name = "Event",
                      breaks = c("Admissions", "Discharges"),
                      labels = c("Admissions", "Discharges"),
                      values = c("blue" , "red")) +
  geom_point()+
  xlim("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")+
  labs(title = "Admissions and Discharges by days of the week, 1st of Jan to 31st of March 2014",
       subtitle = "Number of daily hospital arrivals and discharges(incl.only the emergency cases) by the day of the week.
Note: results are intended for management information only",
       y = "Count", x = "Days of the week", caption = "Source: CLAHRC") +
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 21, b = 0, l = 0)),
        plot.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 7))
	##############################
	# Admissions and Discharges
	##############################

	####################
	#reading the data ##
	####################
	data <- read.csv("df.csv", header = TRUE)

	head(data, 10)
	data$X <- NULL
	##################################
	# Importing the libraries I need##
	##################################
	library(lubridate)
	library(tidyverse)
	##################################
	##################################


	#######################################################

	# Code that converts all the columns names to lowercase

	df_tolower <- function(x) {
	colnames(x) <- tolower(colnames(x))
	x

	}

	#####################################################

	df <- df_tolower(data)



	########################################
	########################################
	#function to transform the variables POSITXlt into POSITct
	change_col_to_ct <- function(df, col_name) {
	df[,col_name] <- as.POSIXct(df[,col_name])
	df
	}

	##################################
	##################################
	# example applying the change col to POSITct
	dt_sub <- df %>%
	change_col_to_ct("start_datetime")%>%
	change_col_to_ct("end_datetime")

	#################################
	#################################
	str(dt_sub)
	#################################


	################################################################################################################
	# subseting data set
	################################################################################################################
	dt_rmds<- dt_sub[c("pat_code", "start_datetime", "end_datetime", "spell.type", "ward_code", "episode.order", "spell.number")]



	dt_JM2016<- subset(dt_rmds, (start_datetime >= as.POSIXct("2014-01-01 00:00:00") & start_datetime <= as.POSIXct("2014-04-01 23:59:00")) \|
	(end_datetime >= as.POSIXct("2014-01-01 00:00:00") & end_datetime <= as.POSIXct("2014-04-01 23:59:00")))



	#Choose EpisodeNumber == 1 (which has different spell numbers).
	#Each row has an episode number attached, with subsequent row == having the same PSeudoId
	#That means it has the same number of episode for multiple spells within the Episode.
	#With thi, there are different AdmissionTimes, but different number for Discharge Time.
	# As a consequence, Yet I am interested only in the Episode number one

	#########################################################
	### Obtaining the data needed for the two charts 46 $ 57
	########################################################
	dt_spell <- dt_JM2016 %>% dplyr::filter(episode.order ==1)
	names(dt_spell)

	as.data.frame(dt_spell)



	################
	#
	################
	# Selecting the variables I need
	dt_AdmiD_DischD <- dt_spell[c("pat_code","start_datetime", "end_datetime", "spell.number")]


	# get the count of Admission and Discharges. with gather function
	dt_spell_1 <- dt_AdmiD_DischD %>%
	group_by(pat_code) %>%
	gather(value= "count", key= Event, start_datetime,end_datetime)

	# finding the days of the week from the dates
	dt_spell_1$EventDay <- lubridate::wday(dt_spell_1$count, label = TRUE)

	dt_spell_1$count <- NULL


	# get the counts
	sum_for_plot <- dt_spell_1 %>%
	group_by(EventDay, Event) %>%
	dplyr::summarize(Count=n()) %>%
	dplyr::mutate(Percent = Count / sum(Count))



	# getting the mean ? I have the feeling this is not doing what I meant to?

	avg_tbl <- sum_for_plot %>%
	group_by(EventDay, Event) %>%
	dplyr::summarize(avg = mean(Count, na.rm=TRUE))

	#######################################
	# check the averages - it is not good #
	#######################################

	p1 <-ggplot(avg_tbl, aes(EventDay, avg, group= Event, shape = Event, colour = Event))
	p1 + geom_bar(stat = "identity", position = "identity" , alpha=0.1, width = 0.4, colour="white", lwd = 0.5) +
	geom_line()+
	scale_colour_manual(name = "Event",
	breaks = c("Admissions", "Discharges"),
	labels = c("Admissions", "Discharges"),
	values = c("blue" , "red")) +
	geom_point()+
	xlim("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")+
	labs(title = "Admissions and Discharges by days of the week, 1st of Jan to 31st of March 2014",
	subtitle = "Number of daily hospital arrivals and discharges(incl.only the emergency cases) by the day of the week.
	Note: results are intended for management information only",
	y = "Count", x = "Days of the week", caption = "Source: CLAHRC") +
	theme(axis.title.y = element_text(margin = margin(t = 0, r = 21, b = 0, l = 0)),
	plot.title = element_text(size = 10, face = "bold"),
	plot.subtitle = element_text(size = 7))