Skip to content

Instantly share code, notes, and snippets.

@a-kuma3
Last active October 22, 2017 04:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save a-kuma3/29dd8a7344d7ea791068113ad39a6ae7 to your computer and use it in GitHub Desktop.
Save a-kuma3/29dd8a7344d7ea791068113ad39a6ae7 to your computer and use it in GitHub Desktop.
q1508143393
(() => {
const d_ = document;
const title_list = {
"company_name": "企業名",
"url": "URL",
"employment_pattern": "雇用形態",
"work_location": "勤務地",
"salary": "給与",
"phone_no": "電話番号",
"mail_address": "メールアドレス",
};
let job_list = [];
let data_list = [];
let re_location_condition, location_condition_map = {};
function get_interval_time() {
let wait = Math.floor((Math.random() * 2 + 2) * 1000);
wait = 0;
return wait;
}
const U = {
append: (parent, tag, prop) => {
return parent.appendChild(Object.assign(d_.createElement(tag), prop || {}));
},
remove: e => {
return e.parentNode.removeChild(e);
},
};
// initialize
(() => {
// clean up
[
"#scraping-progress",
"#scraping-output",
"#scraping-style",
].forEach(s => {
let e = d_.querySelector(s);
if (e) {
U.remove(e);
}
});
U.append(d_.head, "style", {
id: "scraping-style",
innerHTML: `
#scraping-output {
position: absolute;
top: 0;
right: 0;
/*width: 70%;*/
/*height: 50%;*/
border: 1px solid green;
background-color: white;
z-index: 9999;
display: inline-block;
max-width: 70%;
}
#scraping-output td,
#scraping-output th {
border: 1px solid gray;
}
#scraping-output .toolbar button {
width: initial;
}
#scraping-progress {
position: fixed;
top: 10px;
right: 10px;
background-color: green;
border: 3px solid white;
border-radius: 10px;
color: white;
padding: 0.5ex 2em;
font-size: 16pt;
}
`,
});
})();
function display_progress() {
let o = d_.getElementById("scraping-progress");
if (! o) {
o = U.append(d_.body, "div", {
id: "scraping-progress",
});
o.dataset["total"] = job_list.length;
}
let n = o.dataset["total"];
let i = n - job_list.length + 1;
o.innerHTML = `${i} / ${n}`;
}
function display_data() {
let prog = d_.getElementById("scraping-progress");
if (prog) {
U.remove(prog);
}
let panel = d_.getElementById("scraping-output");
if (! panel) {
panel = U.append(d_.body, "div", {
id: "scraping-output",
});
}
panel.innerHTML = "";
let toolbar = U.append(panel, "div", {
className: "toolbar",
});
U.append(toolbar, "button", {
innerHTML: "copy",
onclick: ev => {
let tbl = ev.target.parentNode.parentNode;
let range = d_.createRange();
range.selectNode(tbl);
let selection = d_.getSelection();
selection.removeAllRanges();
selection.addRange(range);
d_.execCommand("copy");
alert("コピーしました。");
selection.removeAllRanges();
},
});
U.append(toolbar, "button", {
innerHTML: "close",
onclick: ev => {
let toolbar = ev.target.parentNode;
U.remove(toolbar.nextSibling);
toolbar.innerHTML = "";
U.append(toolbar, "button", {
innerHTML: "open",
onclick: ev => {
display_data();
},
});
},
});
let tbl = U.append(panel, "table");
let tr = U.append(tbl, "tr");
Object.keys(title_list).forEach(k => {
U.append(tr, "th", {
innerHTML: title_list[k],
});
});
const bind_func_map = {
default: v => (v || "").toString(),
url: v => `<a href="${v}" target="_blank">${v}</a>`,
};
data_list.forEach(info => {
let tr = U.append(tbl, "tr");
Object.keys(title_list).forEach(k => {
let bind_func = bind_func_map[k] || bind_func_map["default"];
let td = U.append(tr, "td", {
innerHTML: bind_func(info[k]),
});
});
});
panel.scrollIntoView();
}
function get_list_and_next(doc) {
let list_a = doc.querySelectorAll(".rnn-jobOfferList__item a.rnn-button");
job_list = job_list.concat(Array.from(list_a));
let next = doc.querySelector(".rnn-pagination__next a");
if (next) {
get_list(next.href);
} else {
// job_list.splice(0, job_list.length - 10);
store_data_and_next();
}
}
function get_list(url) {
let xhr = new XMLHttpRequest();
xhr.onload = ev => {
if (ev.target.status < 400) {
get_list_and_next(ev.target.response);
}
};
xhr.open('GET', url, true);
xhr.responseType = 'document';
xhr.send(null);
}
/*
「企業名」
「求人情報タブを開いた際のURL」
「雇用形態」(左上の正社員など)
「勤務地」(勤務地の中に札幌市(札幌支店)が記載されていたら札幌市、記載されてなければ空白」
「給与」(月給が記載されている行。2行目以降の特記や年収例は不要。)
「連絡先の電話番号とメールアドレス」(電話番号もメールアドレスも記載がないときは空白)
*/
const EXer = {
work_location: e => {
let m = re_location_condition.exec(e.textContent.replace(/リクナビNEXT上の地域分類では(.|\n)*/, ""));
return m ? location_condition_map[m[1]] : "";
},
salary: e => {
let lines = e.textContent.replace(/年収例(.|\n)*/m, "").split("\n");
return (lines.find(line => /^(?=.*月)(?=.*円)/.test(line)) ||
lines.find(line => /(年収|年俸).*円/.test(line)) ||
"").trim();
},
phone_no: e => {
let m = /([((](代|代表)[))])?\b\d{2,4}-?(\d{2,4}-\d{3,4}|\d{6})(\b|\n)([((](代|代表)[))])?/.exec(e.innerHTML);
return m ? m[0] : "";
},
mail_address: e => {
let m = /\b[\w.-]+@[\w.-]+\b/.exec(e.innerHTML);
return m ? m[0] : "";
},
};
function extract_recruitment(d) {
let data = {};
let e = d.querySelector(".rnn-offerCompanyName");
data.company_name = e.textContent || "?";
data.info_header = Array.from(d.querySelectorAll(".rnn-offerInfoHeader__iconList > li > span.rnn-label--large")).map(s => s.textContent);
data.employment_pattern = data.info_header[0];
Array.from(d.querySelectorAll(".rnn-offerInfoMain table.rnn-detailTable th.rnn-col-2")).forEach(th => {
if (/勤務地/.test(th.textContent)) {
data.work_location = EXer.work_location(th.nextElementSibling);
} else if (/給与/.test(th.textContent)) {
data.salary = EXer.salary(th.nextElementSibling);
} else if (/連絡先/.test(th.textContent)) {
data.phone_no = EXer.phone_no(th.nextElementSibling);
data.mail_address = EXer.mail_address(th.nextElementSibling);
}
});
return data;
}
// for 転職支援サービス
function extract_recruitment_2(d) {
let data = {};
let e = d.querySelector(".QWZ14-140-B");
data.company_name = e.textContent || "?";
data.info_header = Array.from(d.querySelectorAll(".QWZ12-140-B img")).map(s => s.alt);
data.employment_pattern = data.info_header[0];
Array.from(d.querySelectorAll("dl.recruit_details dt")).forEach(th => {
if (/勤務地/.test(th.textContent)) {
data.work_location = EXer.work_location(th.nextElementSibling);
} else if (/給与/.test(th.textContent)) {
data.salary = EXer.salary(th.nextElementSibling);
} else if (/連絡先/.test(th.textContent)) {
data.phone_no = EXer.phone_no(th.nextElementSibling);
data.mail_address = EXer.mail_address(th.nextElementSibling);
}
});
return data;
}
function store_data_and_next(data) {
display_progress();
if (data) {
data_list.push(data);
}
if (job_list.length > 0) {
setTimeout(() => {
get_detail(job_list.shift().href);
}, get_interval_time());
} else {
display_data();
}
}
function get_detail(url) {
let xhr = new XMLHttpRequest();
xhr.onload = ev => {
if (ev.target.status < 400) {
let selected_tab = ev.target.response.querySelector("ul.rnn-tabMenu__navi .is-current");
if (selected_tab) {
if (/求人情報/.test(selected_tab.textContent)) {
let data = extract_recruitment(ev.target.response);
data.url = ev.target.responseURL;
store_data_and_next(data);
} else {
let link = selected_tab.parentNode.parentNode.querySelector("a.rnn-tabMenu__navi__itemlink");
if (link) {
get_detail(link.href);
} else {
console.error("get_detail", url);
}
}
} else { // may be 転職支援サービスのページ
let data = extract_recruitment_2(ev.target.response);
data.url = ev.target.responseURL;
store_data_and_next(data);
}
}
};
xhr.open('GET', url, true);
xhr.responseType = 'document';
xhr.send(null);
}
function extract_location_condition() {
Array.from(d_.querySelectorAll(".rnn-conditionalSearch__title")).find(e => {
if (/勤務地/.test(e.textContent)) {
let a = Array.from(e.parentNode.querySelectorAll(".rnn-conditionalSearch__condition .rnn-conditionalSearch__condition__result__item")).map(a => a.textContent.trim());
let s = a.map(s => {
let m = /([^【]*)【.*以外】/.exec(s);
if (m) {
let ss = "その他" + m[1];
location_condition_map[ss] = s;
return ss;
} else {
location_condition_map[s] = s;
return s;
}
}).join("|");
re_location_condition = new RegExp(`(全国|${s})`);
location_condition_map["全国"] = "全国";
return true;
}
});
}
extract_location_condition();
// console.log(re_location_condition);
// console.log(location_condition_map);
get_list_and_next(d_);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment