Created
September 3, 2021 01:27
-
-
Save geotheory/b58cb09b941c0c75016f9e1d71efad73 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(rvest) | |
html_text_parent = function(x){ | |
x2 = rlang::duplicate(x, shallow = FALSE) | |
children = html_children(x2) | |
xml2::xml_remove(children) | |
txt = html_text(x2) | |
x = map(children, ~ xml2::xml_add_child(x, .value = .x)) | |
txt | |
} | |
#------------------------- | |
# Usage | |
h = '<ul> | |
<li id="target"> | |
text to extract | |
<ul><li>text to ignore</li><li>this too</li></ul> | |
<ul><li>text to ignore</li><li>this too</li></ul> | |
</li> | |
</ul>' | |
doc = xml2::read_html(h) | |
x = html_node(doc, '#target') | |
html_text(x) | |
#> [1] "\ntext to extract\ntext to ignorethis tootext to ignorethis too" | |
html_text_parent(x) | |
#> Error in map(children, ~xml2::xml_add_child(x, .value = .x)): could not find function "map" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment