Skip to content

Instantly share code, notes, and snippets.

@tak-akashi
Created July 16, 2018 00:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tak-akashi/05296f0de7fb8e0bc3cb6facb739db64 to your computer and use it in GitHub Desktop.
Save tak-akashi/05296f0de7fb8e0bc3cb6facb739db64 to your computer and use it in GitHub Desktop.
東京証券取引所「空売り比率」推移
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = 'https://www.jpx.co.jp/markets/statistics-equities/short-selling/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ISO-8859-1'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res = requests.get(url)\n",
"res.encoding"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# 東証のホームページだとres.encoding = 'ISO-8859-1'となり、res.textが文字化けするため、\n",
"# 以下の行を入れる。そうすると、res.encoding = 'utf-8'となる。\n",
"res.encoding = res.apparent_encoding"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"soup = BeautifulSoup(res.text, 'html.parser')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"s = soup.find('div', {'class': 'component-normal-table'})"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"a_tags = s.find_all('a')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"pdf_list = []\n",
"for a_tag in a_tags:\n",
" if a_tag.get('href')[-5] == 'm':\n",
" pdf_list.append(a_tag.get('href'))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['/markets/statistics-equities/short-selling/nlsgeu0000037ver-att/180713-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037t4d-att/180712-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037pl7-att/180711-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037nj9-att/180710-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037jr9-att/180709-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037h3w-att/180706-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037efl-att/180705-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu00000379yi-att/180704-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu000003764s-att/180703-m.pdf',\n",
" '/markets/statistics-equities/short-selling/nlsgeu0000037115-att/180702-m.pdf']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdf_list"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"base_url = 'https://www.jpx.co.jp'"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import urllib.request"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"num = len(pdf_list)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"for i, x in enumerate(pdf_list):\n",
" url = base_url + x\n",
" urllib.request.urlretrieve(url,'temp/shortselling'+ str(i) + '.pdf')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# 一括処理\n",
"from pdfminer.pdfparser import PDFParser\n",
"from pdfminer.pdfparser import PDFDocument\n",
"from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter\n",
"from pdfminer.pdfparser import PDFPage\n",
"from pdfminer.pdfdevice import PDFDevice\n",
"from pdfminer.converter import PDFPageAggregator\n",
"from pdfminer.converter import TextConverter\n",
"from pdfminer.layout import LAParams\n",
"from pdfminer.layout import LTTextBoxHorizontal"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime as dt"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"data = []"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"for i in range(num):\n",
" fp = open('temp/shortselling' + str(i) + '.pdf', 'rb')\n",
"\n",
" # Create a PDF parser object associated with the file object.\n",
" parser = PDFParser(fp)\n",
" document = PDFDocument()\n",
" parser.set_document(document)\n",
"\n",
" # Create a PDF document object that stores the document structure.\n",
" # Supply the password for initialization.\n",
" password=\"\"\n",
" document.set_parser(parser)\n",
" document.initialize(password)\n",
"\n",
" # Create a PDF resource manager object that stores shared resources.\n",
" rsrcmgr = PDFResourceManager()\n",
"\n",
" # Set parameters for analysis.\n",
" laparams = LAParams()\n",
"\n",
" # Create a PDF page aggregator object.\n",
" device = PDFPageAggregator(rsrcmgr, laparams=laparams)\n",
" interpreter = PDFPageInterpreter(rsrcmgr, device)\n",
"\n",
" pages = list(document.get_pages())\n",
" page_1 = pages[0] # 1st page\n",
" page_1\n",
"\n",
" # interpreter page1\n",
" interpreter.process_page(page_1)\n",
"\n",
" # receive the LTPage object for the page.\n",
" # layoutの中にページを構成する要素(LTTextBoxHorizontalなど)が入っている\n",
" layout = device.get_result()\n",
" # print(layout)\n",
"\n",
" text = []\n",
"\n",
" for l in layout:\n",
" # print(l) # l is object\n",
" if isinstance(l, LTTextBoxHorizontal):\n",
" text.append(l.get_text())\n",
" \n",
" year, month, day = text[11].split('\\n')[0].split('/')\n",
" data.append([dt(int(year), int(month), int(day)), 100 - float(text[16].rstrip('%\\n'))])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"x = []\n",
"y = []"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"for li in data:\n",
" x.append(li[0])\n",
" y.append(li[1])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"plt.style.use('ggplot')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0, 100)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x576 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10,8))\n",
"plt.plot(x, y, lw=3, color='blue')\n",
"plt.title('東証「空売り比率」推移', size=18)\n",
"plt.ylim(0,100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment