Last active
January 10, 2018 03:11
-
-
Save takashifuruya0/0ffae247ddbdb9ddaa00c59f0a2d6b00 to your computer and use it in GitHub Desktop.
GASでスクレイピング:SUUMOから賃貸物件情報を取得 ref: https://qiita.com/tfuruya/items/7798e0f3188a2795319e
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function doCurl(){ | |
// SUUMOのURL | |
var url = "https://suumo.jp/chintai/bc_100106732165/?suit=STfr20160902000" | |
main(url) | |
} | |
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
// main: | |
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
function main(url){ | |
var html = UrlFetchApp.fetch(url).getContentText(); | |
var splited_html = html.split("\n"); | |
var tag='<span class="jj-fr_detail-icon jj-fr_detail-icon--note"></span><span>お気に入りに登録する</span></a>' | |
for(var i=0; i<splited_html.length; i++){ | |
var num = i | |
var body = splited_html[i] | |
var index = body.indexOf(tag); | |
if(index != -1){ | |
Logger.log("================================") | |
yachin = cut_string(splited_html[i+17].trim(),"<span>", "</span>")//家賃 | |
yachin = Number(cut_string(yachin, "", "万円"))*10000 | |
Logger.log("家賃: "+yachin) | |
kanrihi = cut_string(splited_html[i+21].trim(),"<span>", "</span>")//管理費 | |
kanrihi = Number(cut_string(kanrihi, "", "円")) | |
Logger.log("管理費: "+kanrihi) | |
shikikin = cut_string(splited_html[i+29].trim(),"<span>", "</span>")//敷金 | |
shikikin = Number(cut_string(shikikin, "", "万円"))*10000 | |
Logger.log("敷金: "+shikikin) | |
reikin = cut_string(splited_html[i+33].trim(),"<span>", "</span>")//礼金 | |
reikin = Number(cut_string(reikin, "", "万円"))*10000 | |
Logger.log("礼金: "+reikin) | |
madori = cut_string(splited_html[i+44].trim(),"<div>", "</div>")//間取り | |
Logger.log("間取り: "+madori) | |
hirosa = cut_string(splited_html[i+46].trim(),"", "m<sup>")//広さ | |
hirosa = Math.round(hirosa) | |
Logger.log("広さ: "+hirosa) | |
muki = cut_string(splited_html[i+48].trim(),"<div>", "</div>")//向き | |
Logger.log("向き: "+muki) | |
chikunen = cut_string(splited_html[i+56].trim(),"<div>", "</div>")//築年数 | |
if(chikunen=="新築"){ | |
chikunen = 1 | |
}else{ | |
chikunen = cut_string(chikunen, "築","年") | |
} | |
Logger.log("築年数: "+chikunen) | |
} | |
} | |
var tag = '[<a href="javascript:norikaePop(' | |
for(var i=0; i<splited_html.length; i++){ | |
var num = i | |
var body = splited_html[i] | |
var index = body.indexOf(tag); | |
if(index != -1){ | |
moyori = cut_string(splited_html[i-1].trim(),"/", "駅")//最寄り | |
Logger.log("最寄り駅: "+moyori) | |
moyori_toho = cut_string(splited_html[i-1].trim(),"歩", "分")//最寄り | |
Logger.log("駅徒歩: "+moyori_toho) | |
} | |
} | |
var tag ='<h2><span>物件概要</span></h2>' | |
for(var i=0; i<splited_html.length; i++){ | |
var num = i | |
var body = splited_html[i] | |
var index = body.indexOf(tag); | |
if(index != -1){ | |
madori_detail = cut_string(splited_html[i+11].trim(),"<td>", "</td>")//間取り詳細 | |
Logger.log("間取り詳細: "+madori_detail) | |
kozo = cut_string(splited_html[i+14].trim(),"<td>", "</td>")//構造 | |
Logger.log("構造: "+kozo) | |
kaisu = cut_string(splited_html[i+18].trim(),"<td>", "</td>")//階数 | |
Logger.log("階: "+kaisu) | |
kenchikubi = cut_string(splited_html[i+20].trim(),"<td>", "</td>")//建築日 | |
Logger.log("建築日: "+kenchikubi) | |
} | |
} | |
var res = { | |
"yachin": yachin, | |
"kanrihi": kanrihi, | |
"shikikin": shikikin, | |
"reikin": reikin, | |
"madori": madori, | |
"hirosa": hirosa, | |
"muki": muki, | |
"chikunen": chikunen, | |
"madori_detail": madori_detail, | |
"kaisu": kaisu, | |
"kenchikubi": kenchikubi, | |
"moyori": moyori, | |
"moyori_toho": moyori_toho, | |
"kozo": kozo, | |
"url": url, | |
} | |
Logger.log("================================") | |
Logger.log(res) | |
return res | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment