Skip to content

Instantly share code, notes, and snippets.

@modalsoul
Last active November 5, 2018 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save modalsoul/c37398b7fd52d4d1362a2f75a6aaa7c9 to your computer and use it in GitHub Desktop.
Save modalsoul/c37398b7fd52d4d1362a2f75a6aaa7c9 to your computer and use it in GitHub Desktop.
file download sample with headless chrome and docker
version: '2'
services:
scraping:
image: scraping
volumes:
- .:/opt/app/scraping:rw
FROM python:3.6-alpine3.7
ENV APP_ROOT=${APP_ROOT:-/opt/app}
# update apk repo
RUN echo "http://dl-4.alpinelinux.org/alpine/v3.7/main" >> /etc/apk/repositories && \
echo "http://dl-4.alpinelinux.org/alpine/v3.7/community" >> /etc/apk/repositories
# install chromedriver
RUN apk update
RUN apk add chromium chromium-chromedriver
RUN apk add ttf-freefont
# install selenium
RUN pip install selenium
# install font
RUN mkdir /noto
ADD https://noto-website.storage.googleapis.com/pkgs/NotoSansCJKjp-hinted.zip /noto
WORKDIR /noto
RUN unzip NotoSansCJKjp-hinted.zip && \
mkdir -p /usr/share/fonts/noto && \
cp *.otf /usr/share/fonts/noto && \
chmod 644 -R /usr/share/fonts/noto/ && \
fc-cache -fv
WORKDIR $APP_ROOT/scraping
DOCKER=docker
scraping:
$(DOCKER) build -t scraping -f Dockerfile .
import time
from selenium import webdriver
URL = 'https://www.post.japanpost.jp/zipcode/dl/oogaki-zip.html'
DOWNLOAD_PATH = '/opt/app/scraping/dst'
def main():
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
prefs = {"download.default_directory": DOWNLOAD_PATH, "download.prompt_for_download": False}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(chrome_options=options)
driver.get(URL)
driver.execute_script("document.getElementsByTagName('a')[27].click()")
time.sleep(3)
driver.close()
driver.quit()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment