Skip to content

Instantly share code, notes, and snippets.

@Tokubara
Tokubara / little_crawler.R
Last active July 24, 2021 08:51
rcore-tutorial
chap0_index_url="https://rcore-os.github.io/rCore-Tutorial-Book-v3/chapter0/index.html"
html=read_html(chap0_index_url)
link_sets0=html%>% html_nodes(".current .internal") %>% html_attr("href")
index_urls=index_urls%>%str_replace("\\.\\./", "https://rcore-os.github.io/rCore-Tutorial-Book-v3/")
index_urls[length(index_urls)+1]='https://rcore-os.github.io/rCore-Tutorial-Book-v3/chapter0/index.html'
for(index_url in index_urls) { # 是完整的
index_page=read_html(index_url)
all_links=index_page%>% html_nodes(".current .internal") %>% html_attr("href")
in_links=all_links%>%str_subset("^\\d")