d = data_frame() e = data_frame() for (i in rev(f)) { h = read_html(unz(z, i)) d = rbind(d, data_frame(title = html_node(h, "h2") %>% html_text, date = html_node(h, xpath = "//p[2]") %>% html_text)) j = html_nodes(h, xpath = "//a") %>% html_attr("href") j = j[ grepl("^\\d{4}(.*)html(#\\w+)?$", j) ] if (length(j)) e = rbind(e, data_frame(i, j)) } e = filter(e, i != j) e$i = gsub("(.*)\\.html(.*)?", "\\1", e$i) e$j = gsub("(.*)\\.html(.*)?", "\\1", e$j) e = apply(e, 1, function(x) paste0(sort(x), collapse = "_")) e = as.data.frame(table(e)) e = data_frame(i = gsub("(.*)_(.*)", "\\1", e[, 1]), j = gsub("(.*)_(.*)", "\\2", e[, 1]), w = e[, 2])