Last active
November 19, 2021 15:27
-
-
Save pafonta/d33a0d5d849932f8ceab8b711d995497 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For an example of use, see https://gist.github.com/pafonta/d33a0d5d849932f8ceab8b711d995497#gistcomment-3965575. | |
"""Find MeSH terms in the MeSH tree simply (i.e. without using a graph).""" | |
from __future__ import annotations | |
import json | |
from collections.abc import Iterator | |
from xml.etree.ElementTree import Element # nosec | |
from defusedxml import ElementTree | |
def is_child(mesh: Element, roots: list[str]) -> bool: | |
"""Check if a MeSH term is a child of the given MeSH terms in the MeSH tree. | |
Parameters | |
---------- | |
mesh | |
The MeSH term. | |
roots | |
The root MeSH terms for which looking for children. | |
Returns | |
------- | |
bool | |
True, if the MeSH term is under one of the root MeSH terms or is one of them. | |
""" | |
for x in mesh.iterfind("TreeNumberList/TreeNumber"): | |
for y in roots: | |
if x.text.startswith(y): | |
return True | |
return False | |
def collect(mesh_tree: ElementTree, roots: list[str]) -> Iterator[str]: | |
"""Collect MeSH terms which are children of the given MeSH terms in the MeSH tree. | |
Parameters | |
---------- | |
mesh_tree | |
The MeSH tree. | |
roots | |
The root MeSH terms for which looking for children. | |
Yields | |
------ | |
str | |
The MeSH terms which are under the root MeSH terms, including them. | |
""" | |
for x in mesh_tree.iter("DescriptorRecord"): | |
match = is_child(x, roots) | |
if match: | |
name = x.find("DescriptorName/String").text | |
yield " ".join(name.split()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example of use
Import utility functions:
Parse the
MeSH
tree:Define the
MeSH
terms to use as roots:Collect the children
MeSH
terms (roots ones are included too):[Optional] Save the collected
MeSH
terms: