Created
June 29, 2016 04:54
-
-
Save rb-roomba/cf7fdcddb9c58b72c7d98252aeade612 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# --- 1.必要なライブラリのインポート | |
import urllib | |
import cv2 | |
import os | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from bs4 import BeautifulSoup | |
# --- 2. 関数 | |
def maybe_download(url, name): | |
""" ファイルが存在しない場合のみダウンロードしてくる """ | |
if not os.path.exists(name): | |
urllib.urlretrieve(url, name) | |
print "File " + name + " downloaded!" | |
else: | |
print "File " + name + " already exists!" | |
return name | |
def parse_html(name): | |
""" Wikipedia「国旗の一覧」から国旗画像のURLを取り出す """ | |
soup = BeautifulSoup(open(name,'r').read(), "lxml") | |
img = soup.find_all("img")[:206] | |
return ["https:"+i.attrs["src"] for i in img] | |
if __name__ == '__main__': | |
# --- 3. 国旗の画像をダウンロードする | |
# URL of 国旗の一覧 | |
url = "https://ja.wikipedia.org/wiki/%E5%9B%BD%E6%97%97%E3%81%AE%E4%B8%80%E8%A6%A7" | |
# download html | |
html_name = maybe_download(url, "flag_list.html") | |
# parse html | |
flag_urls = parse_html(html_name) | |
# download figs | |
for u in flag_urls: | |
png_name = u.split("px-")[-1].split(".")[0] + ".png" | |
maybe_download(u, os.path.join("./figs", png_name)) | |
# --- 4. 国旗画像をNumPyのArrayとして取り込む(OpenCV) | |
flag_list = [] | |
for fig_name in os.listdir("./figs"): | |
img = cv2.imread(os.path.join("./figs", fig_name)) | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # default: BGR | |
flag_list.append(img) | |
flag_list = np.array(flag_list) | |
# --- 5. 国旗を全て表示してみる | |
fig = plt.figure(figsize=(12, 15)) | |
fig.subplots_adjust(left=0, right=1, bottom=0, top=0.5, hspace=0.05, wspace=0.05) | |
for i in xrange(206): | |
ax = fig.add_subplot(15, 15, i + 1, xticks=[], yticks=[]) | |
ax.imshow(flag_list[i], interpolation="none") | |
# --- 6. 各色の出現頻度を調べる | |
hue_list = np.array([cv2.cvtColor(f, cv2.COLOR_RGB2HSV)[:,:,0] for f in flag_list]) | |
hist = np.zeros(180) | |
for h in hue_list: | |
hist += np.histogram(h, bins=np.arange(181))[0] | |
fig = plt.figure(figsize=(12, 5)) | |
plt.bar(np.arange(180)+1, hist) | |
plt.show() | |
# --- 7. JSONで色相の頻度を保存 | |
nhist = hist / np.max(hist) # normalize | |
f = open("hue.js", "w") | |
f.write("data=") | |
json.dump(dict(zip(np.arange(180), nhist)), f) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment