mattparker-wf/collapse_time_idea.r

## collapse_time_idea.r
# An alternative approach
# First - a quick function for counting unique values that excludes NAs

count.unique <- function(x) { length(unique(x[!is.na(x)])) }

# Compare:
length(unique(NA))
count.unique(NA)


# Then, using plyr to aggregate
collapsed_timelog <- ddply(collapsed_history,
                           .var = c("Account.Name", "Quarter.End", "filing.estimate"),
                           .fun = function(x) {

    # Grab the appropriate subset of timelog
    x_timelog <- subset(timelog,
                        subset = Account.Name %in% x$Account.Name &
                                 Billable %in% 1 &
                                 Date >= x$Quarter.End &
                                 Date <= x$filing.estimate &
                                 is.na(Date)
    )

    # Aggregate using summarise - basically generates a data.frame with jus
    # the variables I name on the 2nd and 3rd lines
    summarise(x_timelog,
              billable_time = sum(Hours),
              concurrent_services = count.unique(x_timelog$Services.ID)
    )

})


# Then merging the aggregated results
collapsed_history_time <- merge(x = collapsed_history,
                                y = collapsed_timelog,
                                by = c("Account.Name", "filing.estimate"),
                                all = TRUE
)
	# An alternative approach
	# First - a quick function for counting unique values that excludes NAs

	count.unique <- function(x) { length(unique(x[!is.na(x)])) }

	# Compare:
	length(unique(NA))
	count.unique(NA)


	# Then, using plyr to aggregate
	collapsed_timelog <- ddply(collapsed_history,
	.var = c("Account.Name", "Quarter.End", "filing.estimate"),
	.fun = function(x) {

	# Grab the appropriate subset of timelog
	x_timelog <- subset(timelog,
	subset = Account.Name %in% x$Account.Name &
	Billable %in% 1 &
	Date >= x$Quarter.End &
	Date <= x$filing.estimate &
	is.na(Date)
	)

	# Aggregate using summarise - basically generates a data.frame with jus
	# the variables I name on the 2nd and 3rd lines
	summarise(x_timelog,
	billable_time = sum(Hours),
	concurrent_services = count.unique(x_timelog$Services.ID)
	)

	})


	# Then merging the aggregated results
	collapsed_history_time <- merge(x = collapsed_history,
	y = collapsed_timelog,
	by = c("Account.Name", "filing.estimate"),
	all = TRUE
	)