Skip to content

Instantly share code, notes, and snippets.

@vbkaisetsu
Last active August 12, 2021 07:46
Show Gist options
  • Save vbkaisetsu/2aa73ebec99cb0b71f17097571af0acb to your computer and use it in GitHub Desktop.
Save vbkaisetsu/2aa73ebec99cb0b71f17097571af0acb to your computer and use it in GitHub Desktop.
RustでHTMLをパースし,編集し,書き出す (html5ever)
use std::cell::RefCell;
use std::rc::Rc;
use html5ever::{Attribute, LocalName, QualName};
use html5ever::driver::ParseOpts;
use html5ever::{local_name, ns, namespace_url};
use html5ever::{parse_document, parse_fragment};
use html5ever::rcdom::{Handle, Node, NodeData, RcDom};
use html5ever::serialize;
use html5ever::serialize::SerializeOpts;
use html5ever::tendril::{TendrilSink, StrTendril};
fn search_iter(node: &Handle) {
match node.data {
NodeData::Text { ref contents } => {
println!("{}", contents.borrow().to_string() );
},
NodeData::Element { ref name, ref attrs, .. } => {
},
NodeData::Document { .. } => {
},
NodeData::Doctype { .. } => {
},
NodeData::Comment { .. } => {
},
NodeData::ProcessingInstruction { .. } => {
},
};
for child in node.children.borrow().iter() {
search_iter(&child);
}
}
fn create_text_node(s: &str) -> Rc<Node> {
Node::new(NodeData::Text {
contents: RefCell::new(s.into()),
})
}
fn create_element_node(
name: QualName,
attrs: Vec<Attribute>) -> Rc<Node> {
Node::new(NodeData::Element {
name: name,
attrs: RefCell::new(attrs),
template_contents: None,
mathml_annotation_xml_integration_point: false,
})
}
fn create_tag_name(name: &str) -> QualName {
QualName::new(None, ns!(html), LocalName::from(name))
}
fn create_attribute(name: &str, value: &str) -> Attribute {
Attribute {
name: QualName::new(None, ns!(), LocalName::from(name)),
value: StrTendril::from(value),
}
}
fn main() {
let html_data = "これは<span>テスト</span>です。";
// let parser = parse_document(RcDom::default(), ParseOpts::default());
// let dom = parser.one(html_data);
let parser = parse_fragment(
RcDom::default(),
ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("body")),
vec![],
);
let dom = parser.one(html_data);
search_iter(&dom.document.children.borrow()[0]);
let html_node = &dom.document.children.borrow()[0];
let span_node = &html_node.children.borrow()[1];
let a_node = create_element_node(create_tag_name("a"), vec![]);
a_node.children.borrow_mut().push(create_text_node("アンカー"));
span_node.children.borrow_mut()[0] = a_node;
if let NodeData::Element { ref attrs, .. } = span_node.data {
attrs.borrow_mut().push(create_attribute("style", "color: #ff0000"));
}
let mut bytes = vec![];
// serialize(&mut bytes, &dom.document, SerializeOpts::default()).unwrap();
serialize(&mut bytes, &dom.document.children.borrow()[0], SerializeOpts::default()).unwrap();
println!("{}", String::from_utf8(bytes).unwrap());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment