Skip to content

Instantly share code, notes, and snippets.

@shinichi-takayanagi
Created May 6, 2017 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shinichi-takayanagi/c4830e46563e3dd9a9a1c2e1328e0eac to your computer and use it in GitHub Desktop.
Save shinichi-takayanagi/c4830e46563e3dd9a9a1c2e1328e0eac to your computer and use it in GitHub Desktop.
Tokyo Bay Area Map Generator
library(rvest)
library(stringr)
library(leaflet)
library(RCurl)
library(rjson)
library(RgoogleMaps)
#住所を文字列で入力すると位置情報をjson形式で返却する関数
GetLocationInformation <-function(address)
{
#URL用にエンコーディング
address <- RCurl::curlEscape(address)
#URL(API)生成
url <- paste("http://maps.googleapis.com/maps/api/geocode/json?address=",
address,"&sensor=false&region=JP&language=ja",sep="")
RCurl::getURL(url)
}
#住所(文字列ベクトル)を緯度・経度へ
GetLatitudeAndLongitude <- function(addresses)
{
#各住所をgoogle APIで位置情報(json)へ
jsons <- sapply(addresses, GetLocationInformation)
#googleAPIで取得した位置情報(json)から経度・緯度情報を抽出
ExtractLatitudeAndLongitude <- function(json)
{
latitude <- rjson::fromJSON(json)$results[[1]]$geometry$location$lat
longitude <- rjson::fromJSON(json)$results[[1]]$geometry$location$lng
c(lat=latitude,lon=longitude)
}
result <- t(sapply(jsons,ExtractLatitudeAndLongitude))
rownames(result) <- NULL
result
}
scrape <- function(url)
{
# 指定されたURLのデータを取得
html <- read_html(url)
# タワマンの名前を取得
name <- html %>% html_nodes(xpath='//div[@class="pic-block-child"]/a/img') %>% html_attr("alt")
name <- Filter(function(x){x!="写真に戻る"}, name)
# タワマンの住所を取得
address <- html %>% html_nodes("table.col-1st") %>% html_text
address <- sapply(str_split(address, "\n"), function(x){x[2]})
# タワマンの住所から緯度経度を取得
# GetLatitudeAndLongitude() は以下のリンクの助言に基づき取得
# http://wafdata.hatenablog.com/entry/2015/09/22/114711
location <- GetLatitudeAndLongitude(address)
# タワマン画像のURLを取得
url_image <- html %>% html_nodes(xpath="//div[@class = 'pic-block-child']/a/img")%>% html_attr("src")
url_image <- Filter(function(x){str_detect(x, "axel-home.com")}, url_image)
# 各タワマンの詳細ページへのLINKを取得
url_page <- html %>% html_nodes(xpath="//h3/a") %>% html_attr("href")
# data.frameとして出力
data.frame(name, address, location, url_page, url_image, stringsAsFactors = FALSE)
}
# データ取得先となるURLを定義
urls <- c(
paste0("http://www.axel-home.com/towers/chuo/index", c("", "_2", "_3"), ".html"),
paste0("http://www.axel-home.com/towers/koto/index", c("", "_2", "_3"), ".html")
)
# 全URLからデータを取得
data <- lapply(urls, scrape)
# 1つのdat.frameにまとめる
df <- dplyr::bind_rows(data)
# クリックした際のポップアップ用の中身を定義
df <- dplyr::mutate(df, popup=paste0("<a target='_blank' href=", url_page, ">", name, "<br><img src=", url_image, " width=150></a>"))
# Lefletによる可視化
leaflet(df) %>%
addTiles() %>%
addMarkers(lng=~lon,lat=~lat, popup=~popup, label=~name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment