Skip to content

Instantly share code, notes, and snippets.

@thedtvn
Last active March 10, 2024 13:25
Show Gist options
  • Save thedtvn/3182efb114229104c6b9b67599c296e5 to your computer and use it in GitHub Desktop.
Save thedtvn/3182efb114229104c6b9b67599c296e5 to your computer and use it in GitHub Desktop.
download pdf from drive drive that block download
import io
import re
import json
import requests
import demjson3
from PIL import Image
import urllib.parse as parse
file = input("file url: ")
file_list = []
with requests.Session() as s:
s.headers.update({"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0"})
html_data = s.get(file).text
viewerData = re.search("window.viewerData = (.*?);", html_data).group(1)
viewerData_json = demjson3.decode(viewerData)
clean_viewerData = [i for i in viewerData_json["itemJson"] if i is not None]
assert clean_viewerData[3] == "application/pdf"
url_raw_obj = parse.urlparse(clean_viewerData[2])
page_info_raw = s.get(clean_viewerData[2]).text.removeprefix(")]}'\n")
page_info = json.loads(page_info_raw)
url_qs_dict = parse.parse_qs(url_raw_obj.query)
for i in url_qs_dict.keys():
url_qs_dict[i] = url_qs_dict[i][0]
url_qs_dict["w"] = page_info["maxPageWidth"]
for page_int in range(0, page_info["pages"]):
url_qs_dict["page"] = str(page_int)
qr = parse.urlencode(url_qs_dict)
datar = s.get(f"https://drive.google.com/viewer2/prod-01/img?{qr}").content
img_obj = Image.open(io.BytesIO(datar))
file_list.append(img_obj)
print("save page", page_int+1, "of", page_info["pages"])
file_list[0].save(clean_viewerData[0], format="pdf", save_all=True, append_images=file_list[1:])
print("done")
requests
demjson3
pillow
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment