Skip to content

Instantly share code, notes, and snippets.

@ohaiibuzzle
Last active July 11, 2023 19:24
Show Gist options
  • Save ohaiibuzzle/b21a06b38b12e73153db9504c9931066 to your computer and use it in GitHub Desktop.
Save ohaiibuzzle/b21a06b38b12e73153db9504c9931066 to your computer and use it in GitHub Desktop.
import urllib.request
import html.parser
import json
TARGET_URL = (
"http://www.pix" "iv.net/ran" "king.php" "?mode=daily&content=illust&p="
)
PAGES = 4
# get a UA from useragents.me/api
USER_AGENT = json.loads(
urllib.request.urlopen("https://www.useragents.me/api").read()
)["data"][0]["ua"]
class AnimePageParser(html.parser.HTMLParser):
def __init__(self):
super().__init__()
self.works = []
def handle_starttag(self, tag, attrs):
if tag == "a":
class_attr = dict(attrs).get("class")
if class_attr is not None and "work _work" in class_attr:
self.works.append(dict(attrs).get("href"))
def main():
works = []
parser = AnimePageParser()
for i in range(1, PAGES + 1):
with urllib.request.urlopen(
urllib.request.Request(
TARGET_URL + str(i), headers={"User-Agent": USER_AGENT}
)
) as res:
parser.feed(res.read().decode("utf-8"))
works.extend(parser.works)
for work in works:
# convert them to pixiv.cat links
print("http://pixiv.cat/" + work.split("/")[-1] + ".jpg")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment