Created
June 20, 2021 13:53
-
-
Save shimakaze-git/23677b34393bcd7f3ed2cffda07a0671 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import time | |
import zipfile | |
import sys | |
from selenium import webdriver | |
from webdriver_manager.chrome import ChromeDriverManager | |
def edinet_code_dl(): | |
# seleniumでchromeからzipファイルをダウンロード | |
chromeOptions = webdriver.ChromeOptions() | |
chromeOptions.add_experimental_option( | |
"prefs", | |
{"download.default_directory": "./"}, | |
# 保存先のディレクトリの指定 | |
) | |
# ブラウザ非表示 | |
chromeOptions.add_argument("--headless") | |
url = "https://disclosure.edinet-fsa.go.jp/E01EW/BLMainController.jsp" | |
url += "?uji.bean=ee.bean.W1E62071.EEW1E62071Bean&uji.verb" | |
url += "=W1E62071InitDisplay&TID=W1E62071&" | |
url += "PID=W0EZ0001&SESSIONKEY=&lgKbn=2&dflg=0&iflg=0" | |
driver = webdriver.Chrome( | |
ChromeDriverManager().install(), | |
options=chromeOptions, | |
) | |
driver.maximize_window() | |
# EDINETのEDINETコードリストにアクセス | |
driver.get(url) | |
driver.execute_script( | |
"EEW1E62071EdinetCodeListDownloadAction('lgKbn=2&dflg=0&iflg=0&dispKbn=1');" | |
) | |
time.sleep(5) | |
driver.quit() | |
def unzip_edinet_code_dl_info(): | |
# ダウンロードしたzipファイルのパスを取得 | |
# ディレクトリのリストを取得する. | |
# ワイルドカードを追加 | |
list_of_files = glob.glob("./" + r"/*") | |
# 作成日時が最新のファイルパスを取得 | |
latest_file = max(list_of_files, key=os.path.getctime) | |
# zipファイルを同じディレクトリに展開 | |
zip_f = zipfile.ZipFile(latest_file) | |
zip_f.extractall("./") | |
zip_f.close() | |
# zipファイルを削除 | |
os.remove(latest_file) | |
list_of_files = glob.glob("./" + r"/*") # ワイルドカードを追加 | |
csv_filepath = max(list_of_files, key=os.path.getctime) | |
return csv_filepath | |
def get_edinet_code_list(): | |
edinet_code_dl() | |
# EdinetcodeDlInfo.csvを取得 | |
csv_filepath = unzip_edinet_code_dl_info() | |
cp932_file = open(csv_filepath, "r", encoding="cp932") | |
cp932_file_list = [row for row in cp932_file] | |
# 一番最初の行である不要なカラムを削除する | |
del cp932_file_list[0] | |
utf8_file_name = "EdinetcodeDlInfo_utf8.csv" | |
utf8_file = open(utf8_file_name, "w", encoding="utf-8") | |
for row in cp932_file_list: | |
utf8_file.write(row) | |
cp932_file.close() | |
utf8_file.close() | |
def main(args): | |
# EDINETのCSVを取得 | |
get_edinet_code_list() | |
if __name__ == "__main__": | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment