# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import json
import mojimoji
import re
import datetime

def makejson(page_num = 1):
    # JSON
    '''
    {
        "date": "7/13 15:00",
        "data": [
            {
                "name": "会社名",
                "hp_url": "会社のURL",
                "yf_url": "会社のヤフーファイナンスURL"
                "feature": "特色"
            },
            {},
            ...,
            {}
        ]
    }
    '''
    ret = {}
    ret["date"] = datetime.datetime.now().strftime("%m/%d %H:%M")
    ret["data"] = []

    # 指定ページまで繰り返す
    for page in range(1,page_num+1):
        # ヤフーファイナンスの設立年月日ランキングのページを取得
        urlstr = 'https://finance.yahoo.co.jp/stocks/ranking/listingDate?market=all&term=daily&page=' + str(page)
        html_text = requests.get(urlstr).text
        soup = BeautifulSoup(html_text, 'html.parser')

        # テーブルを取得
        for selecttable in soup.find_all("table"):
            if "順位" in selecttable.text:
                table = selecttable
                break

        # アンカーリストを取得
        for ancs in table.find_all("a"):
            # 掲示板のアンカーを排除
            if "掲示板" not in ancs:
                #------------------------------
                # 各会社の基本情報を取得
                #------------------------------
                company = {}
                company["name"] = mojimoji.zen_to_han(ancs.text, kana=False).replace("&","&")
                company["yf_url"] = ancs.attrs["href"]
                #------------------------------
                # 特色を取得
                #------------------------------
                company_code = re.sub(".*/", "", company["yf_url"])
                urlstr2 = "https://finance.yahoo.co.jp/quote/" + company_code + "/profile"
                html_text2 = requests.get(urlstr2).text
                soup2 = BeautifulSoup(html_text2, 'html.parser')
                # 特色を取得
                company["feature"] = ""
                for selecttable in soup2.find_all("table"):
                    if "特色" in selecttable.text or "概要" in selecttable.text:
                        for selecttr in selecttable.find_all("tr"):
                            if "特色" in selecttr.text or "概要" in selecttr.text:
                                company["feature"] = selecttr.find_all("td")[0].text.replace("【特色】", "")
                                break
                        break
                # 特色が未反映の会社は除外する
                if company["feature"] == "---":
                    continue
                #------------------------------
                # 会社URLを取得
                #------------------------------
                urlstr3 = "https://www.google.com/search?q=" + company["name"] + "+hp"
                html_text3 = requests.get(urlstr3).text
                soup3 = BeautifulSoup(html_text3, 'html.parser')
                # 会社URLを取得
                company["hp_url"] = ""
                for selectdiv in soup3.select("div > a"):
                    href = selectdiv.attrs["href"]
                    if href.startswith("/url?q=") and "google" not in href and "wikipedia" not in href:
                        tmpstr = href.replace("/url?q=", "")
                        tmpstr = re.sub("&sa=U.*", "", tmpstr)
                        company["hp_url"] = tmpstr
                        break
                #------------------------------
                # 標準出力
                #------------------------------
                print(company["name"])
                print(company["hp_url"])
                print(company["yf_url"])
                print(company["feature"])
                print("----------------------")
                ret["data"].append(company)

    # JSON出力
    f = open("dynamic/youngstock.json", "w", encoding='utf-8')
    f.write(json.dumps(ret, ensure_ascii=False))
    f.close()

if __name__ == "__main__":
    # JSONファイル作成
    makejson(page_num=15)