Created
June 3, 2024 19:30
-
-
Save greg-randall/3d7dde3a37271f72f9608cad2d613026 to your computer and use it in GitHub Desktop.
Remove items that aren't from 2024 from a wordpress export.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
# Parse the XML file and get the root | |
tree = ET.parse('wordpress_export.xml') | |
root = tree.getroot() | |
# Find the <channel> element | |
channel = root.find('channel') | |
# Iterate over all <item> elements within the <channel> | |
for item in channel.findall('item'): | |
# Find the <pubdate> element and get its text | |
pubdate = item.find('pubDate') | |
if pubdate is not None and isinstance(pubdate.text, str): | |
try: | |
if "2024" not in pubdate.text: | |
channel.remove(item) | |
print(f"\t{pubdate.text}") | |
else: | |
print(pubdate.text) | |
except: | |
print("Error") | |
# Write the modified XML back to the file | |
tree.write('trimmed.xml') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment