Skip to content

Instantly share code, notes, and snippets.

@jhorikawa
Last active June 27, 2016 04:42
Show Gist options
  • Save jhorikawa/911ef6e28e695ca1e5e7d3234441d3e2 to your computer and use it in GitHub Desktop.
Save jhorikawa/911ef6e28e695ca1e5e7d3234441d3e2 to your computer and use it in GitHub Desktop.
Download collection of train icons from http://www.trainfrontview.net/.
import urllib
from urllib.request import urlretrieve
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import os
baseUrl = "http://www.trainfrontview.net/"
trainUrl = "sozai.htm"
html = urlopen(baseUrl + trainUrl)
bsObj = BeautifulSoup(html, "html5lib")
links = bsObj.find("table",{"width":"750","border":"1"}).findAll("a",{"href":re.compile("sozai-\w+.+")})
dict = {}
key = ""
for i in range(len(links)):
link = links[i]
if len(link.text.split('[')) == 1:
key = link.text
dict[key] = []
dict[key].append(baseUrl + link.get("href"))
for key in dict:
path = "./" + key
if os.path.isdir(path) == False:
os.makedirs(path)
for urlSozai in dict[key]:
htmlSozai = urlopen(urlSozai)
bsObjSozai = BeautifulSoup(htmlSozai, "html5lib")
iconTables = bsObjSozai.find("table",{"class":"icon"}).findAll("td")
iconUrls = {}
pathIcon = ""
for iconTD in iconTables:
imageTD = iconTD.find("img")
if(imageTD != None):
title = iconTD.text.strip().replace("*","").replace("★","").replace('\n',"").replace("/","_")
if(title not in iconUrls):
iconUrls[title] = []
pathIcon = path + "/" + title;
if(os.path.isdir(pathIcon) == False):
os.makedirs(pathIcon)
print("_______________________")
print(title)
images = iconTD.findAll("img")
for eachImage in images:
iconImageSrc = eachImage.get("src")
srcs = iconImageSrc.split("/")
iconName = srcs[len(srcs)-1]
iconUrl = baseUrl + iconImageSrc
f = open(pathIcon+"/"+iconName,'wb')
f.write(urlopen(iconUrl).read())
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment