Skip to content

Instantly share code, notes, and snippets.

@seanbreckenridge
Last active May 17, 2021 07:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seanbreckenridge/9a5532561bfe24268c368366e5360cfe to your computer and use it in GitHub Desktop.
Save seanbreckenridge/9a5532561bfe24268c368366e5360cfe to your computer and use it in GitHub Desktop.
cleans up my MAL export so thats its importable to anilist
#!/usr/bin/env python3
# cleans up my MAL export according to this thread:
# https://anilist.co/forum/thread/3291
# so that its importable
# otherwise the graphql error fails with a 413 request entity too large
# since the anilist page just inlines the XML contents into the gql query
import sys
import typing
from pathlib import Path
import lxml.etree as ET
import click
KEEP_ATTRS = set(
[
"series_animedb_id",
"series_title",
"my_watched_episodes",
"my_start_date",
"my_finish_date",
"my_score",
"my_status",
]
)
def fix(from_file: Path) -> str:
tree = ET.parse(str(from_file))
root = tree.getroot()
root.remove(root.find("myinfo"))
for anime_tag in root.findall("anime"):
for anime_info in anime_tag:
if anime_info.tag not in KEEP_ATTRS:
anime_tag.remove(anime_info)
return ET.tostring(root)
@click.command()
@click.argument("XML_FILE")
def main(xml_file: str):
"""
pass the xml file from MAL as the first argument
prints results to STDOUT
"""
assert xml_file.endswith(".xml"), "Pass the xml file, not the .xml.gz file"
click.echo(fix(Path(xml_file)))
if __name__ == "__main__":
main(prog_name="anilist_413")
@seanbreckenridge
Copy link
Author

Stills time outs...

@seanbreckenridge
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment