Skip to content

Instantly share code, notes, and snippets.

@rb-roomba
Created June 29, 2016 04:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rb-roomba/cf7fdcddb9c58b72c7d98252aeade612 to your computer and use it in GitHub Desktop.
Save rb-roomba/cf7fdcddb9c58b72c7d98252aeade612 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# --- 1.必要なライブラリのインポート
import urllib
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
# --- 2. 関数
def maybe_download(url, name):
""" ファイルが存在しない場合のみダウンロードしてくる """
if not os.path.exists(name):
urllib.urlretrieve(url, name)
print "File " + name + " downloaded!"
else:
print "File " + name + " already exists!"
return name
def parse_html(name):
""" Wikipedia「国旗の一覧」から国旗画像のURLを取り出す """
soup = BeautifulSoup(open(name,'r').read(), "lxml")
img = soup.find_all("img")[:206]
return ["https:"+i.attrs["src"] for i in img]
if __name__ == '__main__':
# --- 3. 国旗の画像をダウンロードする
# URL of 国旗の一覧
url = "https://ja.wikipedia.org/wiki/%E5%9B%BD%E6%97%97%E3%81%AE%E4%B8%80%E8%A6%A7"
# download html
html_name = maybe_download(url, "flag_list.html")
# parse html
flag_urls = parse_html(html_name)
# download figs
for u in flag_urls:
png_name = u.split("px-")[-1].split(".")[0] + ".png"
maybe_download(u, os.path.join("./figs", png_name))
# --- 4. 国旗画像をNumPyのArrayとして取り込む(OpenCV)
flag_list = []
for fig_name in os.listdir("./figs"):
img = cv2.imread(os.path.join("./figs", fig_name))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # default: BGR
flag_list.append(img)
flag_list = np.array(flag_list)
# --- 5. 国旗を全て表示してみる
fig = plt.figure(figsize=(12, 15))
fig.subplots_adjust(left=0, right=1, bottom=0, top=0.5, hspace=0.05, wspace=0.05)
for i in xrange(206):
ax = fig.add_subplot(15, 15, i + 1, xticks=[], yticks=[])
ax.imshow(flag_list[i], interpolation="none")
# --- 6. 各色の出現頻度を調べる
hue_list = np.array([cv2.cvtColor(f, cv2.COLOR_RGB2HSV)[:,:,0] for f in flag_list])
hist = np.zeros(180)
for h in hue_list:
hist += np.histogram(h, bins=np.arange(181))[0]
fig = plt.figure(figsize=(12, 5))
plt.bar(np.arange(180)+1, hist)
plt.show()
# --- 7. JSONで色相の頻度を保存
nhist = hist / np.max(hist) # normalize
f = open("hue.js", "w")
f.write("data=")
json.dump(dict(zip(np.arange(180), nhist)), f)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment