expersso/ldply_example

## ldply_example
# Using ldply returns a clean data frame, so avoids
# the lapply + do.call(rbind, ...) idiom
# Also, the .progress argument is very useful

df <- ldply(c(2002:2014, 52, 26, 13, 4, 1), function(x) {
        sprintf("http://distrowatch.com/index.php?dataspan=%d", x) %>%
        html() %>%
        html_nodes(xpath =
        "//table[@class = 'News' and @style = 'direction: ltr'][2]") %>%
        .[[1]] %>%
        html_table() %>%
        .[-(1:2), -1] %>%
        setNames(c("distro", "npd")) %>%
        mutate(npd = npd %>% str_replace(",", "") %>% as.numeric(),
               time = x) %>%
        tbl_df()
}, .progress = "text")

head(df)
#     distro npd time
# 1 Mandrake 473 2002
# 2  Red Hat 453 2002
# 3   Gentoo 326 2002
# 4   Debian 311 2002
# 5 Sorcerer 253 2002
# 6     SuSE 250 2002
	# Using ldply returns a clean data frame, so avoids
	# the lapply + do.call(rbind, ...) idiom
	# Also, the .progress argument is very useful

	df <- ldply(c(2002:2014, 52, 26, 13, 4, 1), function(x) {
	sprintf("http://distrowatch.com/index.php?dataspan=%d", x) %>%
	html() %>%
	html_nodes(xpath =
	"//table[@class = 'News' and @style = 'direction: ltr'][2]") %>%
	.[[1]] %>%
	html_table() %>%
	.[-(1:2), -1] %>%
	setNames(c("distro", "npd")) %>%
	mutate(npd = npd %>% str_replace(",", "") %>% as.numeric(),
	time = x) %>%
	tbl_df()
	}, .progress = "text")

	head(df)
	# distro npd time
	# 1 Mandrake 473 2002
	# 2 Red Hat 453 2002
	# 3 Gentoo 326 2002
	# 4 Debian 311 2002
	# 5 Sorcerer 253 2002
	# 6 SuSE 250 2002