Last active
October 22, 2017 04:09
-
-
Save a-kuma3/29dd8a7344d7ea791068113ad39a6ae7 to your computer and use it in GitHub Desktop.
q1508143393
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(() => { | |
const d_ = document; | |
const title_list = { | |
"company_name": "企業名", | |
"url": "URL", | |
"employment_pattern": "雇用形態", | |
"work_location": "勤務地", | |
"salary": "給与", | |
"phone_no": "電話番号", | |
"mail_address": "メールアドレス", | |
}; | |
let job_list = []; | |
let data_list = []; | |
let re_location_condition, location_condition_map = {}; | |
function get_interval_time() { | |
let wait = Math.floor((Math.random() * 2 + 2) * 1000); | |
wait = 0; | |
return wait; | |
} | |
const U = { | |
append: (parent, tag, prop) => { | |
return parent.appendChild(Object.assign(d_.createElement(tag), prop || {})); | |
}, | |
remove: e => { | |
return e.parentNode.removeChild(e); | |
}, | |
}; | |
// initialize | |
(() => { | |
// clean up | |
[ | |
"#scraping-progress", | |
"#scraping-output", | |
"#scraping-style", | |
].forEach(s => { | |
let e = d_.querySelector(s); | |
if (e) { | |
U.remove(e); | |
} | |
}); | |
U.append(d_.head, "style", { | |
id: "scraping-style", | |
innerHTML: ` | |
#scraping-output { | |
position: absolute; | |
top: 0; | |
right: 0; | |
/*width: 70%;*/ | |
/*height: 50%;*/ | |
border: 1px solid green; | |
background-color: white; | |
z-index: 9999; | |
display: inline-block; | |
max-width: 70%; | |
} | |
#scraping-output td, | |
#scraping-output th { | |
border: 1px solid gray; | |
} | |
#scraping-output .toolbar button { | |
width: initial; | |
} | |
#scraping-progress { | |
position: fixed; | |
top: 10px; | |
right: 10px; | |
background-color: green; | |
border: 3px solid white; | |
border-radius: 10px; | |
color: white; | |
padding: 0.5ex 2em; | |
font-size: 16pt; | |
} | |
`, | |
}); | |
})(); | |
function display_progress() { | |
let o = d_.getElementById("scraping-progress"); | |
if (! o) { | |
o = U.append(d_.body, "div", { | |
id: "scraping-progress", | |
}); | |
o.dataset["total"] = job_list.length; | |
} | |
let n = o.dataset["total"]; | |
let i = n - job_list.length + 1; | |
o.innerHTML = `${i} / ${n}`; | |
} | |
function display_data() { | |
let prog = d_.getElementById("scraping-progress"); | |
if (prog) { | |
U.remove(prog); | |
} | |
let panel = d_.getElementById("scraping-output"); | |
if (! panel) { | |
panel = U.append(d_.body, "div", { | |
id: "scraping-output", | |
}); | |
} | |
panel.innerHTML = ""; | |
let toolbar = U.append(panel, "div", { | |
className: "toolbar", | |
}); | |
U.append(toolbar, "button", { | |
innerHTML: "copy", | |
onclick: ev => { | |
let tbl = ev.target.parentNode.parentNode; | |
let range = d_.createRange(); | |
range.selectNode(tbl); | |
let selection = d_.getSelection(); | |
selection.removeAllRanges(); | |
selection.addRange(range); | |
d_.execCommand("copy"); | |
alert("コピーしました。"); | |
selection.removeAllRanges(); | |
}, | |
}); | |
U.append(toolbar, "button", { | |
innerHTML: "close", | |
onclick: ev => { | |
let toolbar = ev.target.parentNode; | |
U.remove(toolbar.nextSibling); | |
toolbar.innerHTML = ""; | |
U.append(toolbar, "button", { | |
innerHTML: "open", | |
onclick: ev => { | |
display_data(); | |
}, | |
}); | |
}, | |
}); | |
let tbl = U.append(panel, "table"); | |
let tr = U.append(tbl, "tr"); | |
Object.keys(title_list).forEach(k => { | |
U.append(tr, "th", { | |
innerHTML: title_list[k], | |
}); | |
}); | |
const bind_func_map = { | |
default: v => (v || "").toString(), | |
url: v => `<a href="${v}" target="_blank">${v}</a>`, | |
}; | |
data_list.forEach(info => { | |
let tr = U.append(tbl, "tr"); | |
Object.keys(title_list).forEach(k => { | |
let bind_func = bind_func_map[k] || bind_func_map["default"]; | |
let td = U.append(tr, "td", { | |
innerHTML: bind_func(info[k]), | |
}); | |
}); | |
}); | |
panel.scrollIntoView(); | |
} | |
function get_list_and_next(doc) { | |
let list_a = doc.querySelectorAll(".rnn-jobOfferList__item a.rnn-button"); | |
job_list = job_list.concat(Array.from(list_a)); | |
let next = doc.querySelector(".rnn-pagination__next a"); | |
if (next) { | |
get_list(next.href); | |
} else { | |
// job_list.splice(0, job_list.length - 10); | |
store_data_and_next(); | |
} | |
} | |
function get_list(url) { | |
let xhr = new XMLHttpRequest(); | |
xhr.onload = ev => { | |
if (ev.target.status < 400) { | |
get_list_and_next(ev.target.response); | |
} | |
}; | |
xhr.open('GET', url, true); | |
xhr.responseType = 'document'; | |
xhr.send(null); | |
} | |
/* | |
「企業名」 | |
「求人情報タブを開いた際のURL」 | |
「雇用形態」(左上の正社員など) | |
「勤務地」(勤務地の中に札幌市(札幌支店)が記載されていたら札幌市、記載されてなければ空白」 | |
「給与」(月給が記載されている行。2行目以降の特記や年収例は不要。) | |
「連絡先の電話番号とメールアドレス」(電話番号もメールアドレスも記載がないときは空白) | |
*/ | |
const EXer = { | |
work_location: e => { | |
let m = re_location_condition.exec(e.textContent.replace(/リクナビNEXT上の地域分類では(.|\n)*/, "")); | |
return m ? location_condition_map[m[1]] : ""; | |
}, | |
salary: e => { | |
let lines = e.textContent.replace(/年収例(.|\n)*/m, "").split("\n"); | |
return (lines.find(line => /^(?=.*月)(?=.*円)/.test(line)) || | |
lines.find(line => /(年収|年俸).*円/.test(line)) || | |
"").trim(); | |
}, | |
phone_no: e => { | |
let m = /([((](代|代表)[))])?\b\d{2,4}-?(\d{2,4}-\d{3,4}|\d{6})(\b|\n)([((](代|代表)[))])?/.exec(e.innerHTML); | |
return m ? m[0] : ""; | |
}, | |
mail_address: e => { | |
let m = /\b[\w.-]+@[\w.-]+\b/.exec(e.innerHTML); | |
return m ? m[0] : ""; | |
}, | |
}; | |
function extract_recruitment(d) { | |
let data = {}; | |
let e = d.querySelector(".rnn-offerCompanyName"); | |
data.company_name = e.textContent || "?"; | |
data.info_header = Array.from(d.querySelectorAll(".rnn-offerInfoHeader__iconList > li > span.rnn-label--large")).map(s => s.textContent); | |
data.employment_pattern = data.info_header[0]; | |
Array.from(d.querySelectorAll(".rnn-offerInfoMain table.rnn-detailTable th.rnn-col-2")).forEach(th => { | |
if (/勤務地/.test(th.textContent)) { | |
data.work_location = EXer.work_location(th.nextElementSibling); | |
} else if (/給与/.test(th.textContent)) { | |
data.salary = EXer.salary(th.nextElementSibling); | |
} else if (/連絡先/.test(th.textContent)) { | |
data.phone_no = EXer.phone_no(th.nextElementSibling); | |
data.mail_address = EXer.mail_address(th.nextElementSibling); | |
} | |
}); | |
return data; | |
} | |
// for 転職支援サービス | |
function extract_recruitment_2(d) { | |
let data = {}; | |
let e = d.querySelector(".QWZ14-140-B"); | |
data.company_name = e.textContent || "?"; | |
data.info_header = Array.from(d.querySelectorAll(".QWZ12-140-B img")).map(s => s.alt); | |
data.employment_pattern = data.info_header[0]; | |
Array.from(d.querySelectorAll("dl.recruit_details dt")).forEach(th => { | |
if (/勤務地/.test(th.textContent)) { | |
data.work_location = EXer.work_location(th.nextElementSibling); | |
} else if (/給与/.test(th.textContent)) { | |
data.salary = EXer.salary(th.nextElementSibling); | |
} else if (/連絡先/.test(th.textContent)) { | |
data.phone_no = EXer.phone_no(th.nextElementSibling); | |
data.mail_address = EXer.mail_address(th.nextElementSibling); | |
} | |
}); | |
return data; | |
} | |
function store_data_and_next(data) { | |
display_progress(); | |
if (data) { | |
data_list.push(data); | |
} | |
if (job_list.length > 0) { | |
setTimeout(() => { | |
get_detail(job_list.shift().href); | |
}, get_interval_time()); | |
} else { | |
display_data(); | |
} | |
} | |
function get_detail(url) { | |
let xhr = new XMLHttpRequest(); | |
xhr.onload = ev => { | |
if (ev.target.status < 400) { | |
let selected_tab = ev.target.response.querySelector("ul.rnn-tabMenu__navi .is-current"); | |
if (selected_tab) { | |
if (/求人情報/.test(selected_tab.textContent)) { | |
let data = extract_recruitment(ev.target.response); | |
data.url = ev.target.responseURL; | |
store_data_and_next(data); | |
} else { | |
let link = selected_tab.parentNode.parentNode.querySelector("a.rnn-tabMenu__navi__itemlink"); | |
if (link) { | |
get_detail(link.href); | |
} else { | |
console.error("get_detail", url); | |
} | |
} | |
} else { // may be 転職支援サービスのページ | |
let data = extract_recruitment_2(ev.target.response); | |
data.url = ev.target.responseURL; | |
store_data_and_next(data); | |
} | |
} | |
}; | |
xhr.open('GET', url, true); | |
xhr.responseType = 'document'; | |
xhr.send(null); | |
} | |
function extract_location_condition() { | |
Array.from(d_.querySelectorAll(".rnn-conditionalSearch__title")).find(e => { | |
if (/勤務地/.test(e.textContent)) { | |
let a = Array.from(e.parentNode.querySelectorAll(".rnn-conditionalSearch__condition .rnn-conditionalSearch__condition__result__item")).map(a => a.textContent.trim()); | |
let s = a.map(s => { | |
let m = /([^【]*)【.*以外】/.exec(s); | |
if (m) { | |
let ss = "その他" + m[1]; | |
location_condition_map[ss] = s; | |
return ss; | |
} else { | |
location_condition_map[s] = s; | |
return s; | |
} | |
}).join("|"); | |
re_location_condition = new RegExp(`(全国|${s})`); | |
location_condition_map["全国"] = "全国"; | |
return true; | |
} | |
}); | |
} | |
extract_location_condition(); | |
// console.log(re_location_condition); | |
// console.log(location_condition_map); | |
get_list_and_next(d_); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment