Skip to content

Instantly share code, notes, and snippets.

@emiliodallatorre
Created May 27, 2021 10:57
Show Gist options
  • Save emiliodallatorre/170ffed5c0ded8f8167c9407b5c3d721 to your computer and use it in GitHub Desktop.
Save emiliodallatorre/170ffed5c0ded8f8167c9407b5c3d721 to your computer and use it in GitHub Desktop.
Simple utility to put all <wp:author> tag child on the same line of a WordPress WXR/XML export
import os
def main():
export_file_names: list = [
f for f in os.listdir(".") if f.endswith(".xml") and not f.endswith(".correct.xml")]
for export_file_name in export_file_names:
print(f"Going with: {export_file_name}")
export_file = open(export_file_name, "r")
export_file_lines = export_file.readlines()
edited_file = open("./trimmed/" + export_file_name.replace(
".xml", ".correct.xml"), "w")
buffer: str = None
for export_file_line in export_file_lines:
if "<wp:author>" in export_file_line:
buffer = export_file_line.replace("\n", "").strip()
continue
elif "</wp:author>" in export_file_line:
buffer += export_file_line.strip()
edited_file.write(buffer + "\n")
buffer = None
continue
if buffer is not None:
buffer += export_file_line.replace("\n", "").strip()
else:
edited_file.write(export_file_line)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment