Skip to content

Instantly share code, notes, and snippets.

@code-rgb
Created September 5, 2021 07:20
Show Gist options
  • Save code-rgb/b82d7ac1f04d0604f97410e99d896f3f to your computer and use it in GitHub Desktop.
Save code-rgb/b82d7ac1f04d0604f97410e99d896f3f to your computer and use it in GitHub Desktop.
import logging
import os
import re
from pathlib import Path
from typing import Dict, Optional, Pattern, Tuple, Union
from urllib.parse import quote
import requests
try:
import ujson as json
except ImportError:
import json
from bs4 import BeautifulSoup
log = logging.getLogger(__name__)
class Pixiv:
def __init__(self) -> None:
"""Pixiv Downloader"""
self.headers: Dict[str, str] = {
"Referer": "https://www.pixiv.net/",
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
),
}
self.base_url: str = "https://www.pixiv.net/en/artworks/"
self.pixiv_re: Pattern = re.compile(
r"^https://www\.pixiv\.net/en/artworks/(?P<id>\d+)$"
)
self.img_ext: Tuple[str] = (".jpg", ".jpeg", ".png" ".bmp")
def get(self, query: str) -> Optional[Dict[str, str]]:
"""Get Image from pixiv.net
Args:
- query (`str`): Provide Artword id or URL.
Raises:
`ValueError`: On Invalid "query"
Returns:
`Optional[Dict[str, str]]`: On Success
Example:
.. code-block:: python
pixiv_dl = Pixiv()
img_url = pixiv_dl.get(92166665)
print(img_url)
# {
# "mini": "https://i.pximg.net/c/48x48/custom-thumb/img/2021/08/22/03/22/47/92166665_p0_custom1200.jpg",
# "thumb": "https://i.pximg.net/c/250x250_80_a2/custom-thumb/img/2021/08/22/03/22/47/92166665_p0_custom1200.jpg",
# "small": "https://i.pximg.net/c/540x540_70/img-master/img/2021/08/22/03/22/47/92166665_p0_master1200.jpg",
# "regular": "https://i.pximg.net/img-master/img/2021/08/22/03/22/47/92166665_p0_master1200.jpg",
# "original": "https://i.pximg.net/img-original/img/2021/08/22/03/22/47/92166665_p0.jpg",
# }
"""
photo_id = None
if isinstance(query, int) or query.isdigit():
photo_id = str(query)
elif query.startswith("https://") and (match := self.pixiv_re.match(query)):
photo_id = match.group("id")
if not photo_id:
raise ValueError("InvalidQuery: query must an ID or URL !")
r = requests.get(self.base_url + photo_id, headers=self.headers)
assert r.status_code == 200
soup = BeautifulSoup(r.text, "html.parser")
try:
return ujson.loads(
soup.find("meta", {"id": "meta-preload-data"}).get("content")
)["illust"][photo_id]["urls"]
except Exception as e:
log.exception("Somthing Went Wrong", e)
def download(self, image_url: str, path: Union[Path, str] = "downloads") -> str:
"""Download Image from url
Args:
- image_url (`str`): Http Image URL.
- path (`Union[Path, str]`, *optional*): Path or filename. Defaults to "downloads".
Returns:
`str`: Downloaded image path (On Success)
Example:
.. code-block:: python
pixiv_dl = Pixiv()
img_url = pixiv_dl.get(92166665)
filename = pixiv_dl.download(img_url["original"])
"""
r = requests.get(image_url, headers=self.headers, stream=True)
assert r.status_code == 200
down_path = Path(path) if isinstance(path, str) else path
if down_path.is_dir():
down_path.mkdir(exist_ok=True, parents=True)
name = quote(os.path.basename(image_url))
if not name.endswith(self.img_ext):
name += ".jpg"
filename = down_path.joinpath(name)
else:
filename = down_path
with filename.open("wb") as outfile:
for chunk in r.iter_content(chunk_size=1024):
if not chunk:
break
outfile.write(chunk)
if filename.is_file():
return str(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment