Created
July 3, 2022 22:27
-
-
Save nashid/9bcdb7a5aac7fd38f90dc8d618caf74d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from zss import Node, simple_distance | |
import lxml.html | |
html1 = lxml.html.parse('file1.html') | |
html2 = lxml.html.parse('file1.html') | |
def tree_from_el(el): | |
node = Node(el.tag) | |
for child in el: | |
node.addkid(tree_from_el(child)) | |
return node | |
tree1 = tree_from_el(html1.getroot()) | |
tree2 = tree_from_el(html2.getroot()) | |
print(simple_distance(tree1, tree2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment