Skip to content

Instantly share code, notes, and snippets.

@ttsukagoshi
Last active February 22, 2021 01:17
Show Gist options
  • Save ttsukagoshi/620dede952fa7384e098ec5f2d9385f3 to your computer and use it in GitHub Desktop.
Save ttsukagoshi/620dede952fa7384e098ec5f2d9385f3 to your computer and use it in GitHub Desktop.
秋田県庁の部署一覧( https://www.pref.akita.lg.jp/pages/genre/organization )をGoogleスプレッドシートに落とし込むためのGoogle Apps Script
/* global SpreadsheetApp, UrlFetchApp */
/* exported fetchAkitaDeps */
const TARGET_URL = 'https://www.pref.akita.lg.jp/pages/genre/organization'; // クロールするサイトのURL
const SHEET_NAME = 'List'; // データを挿入するシート名
const INSERT_DATA_POS = { row: 4, col: 1 }; // データの挿入開始位置
const INSERT_EDIT_TIME_POS = { row: 1, col: 4 }; // 最終更新日時の挿入位置
const REGEXP_DEPT = RegExp('<h3 class\=\"c-category-panel-item__heading\"><a href\=\"(?<deptUrl>[^\"]+)\">(?<deptName>[^の]+)の関連ジャンル<\/a><\/h3>[ ]*?<ul class\=\"c-category-panel-item__body c-gallery-list c-gallery-list--2\">(?<sectSource>.*?)<\/ul>', 'g'); // 部局名、URLを検出
const REGEXP_SECT = RegExp('<li class\=\"c-gallery-list__item\"><a href=\"(?<sectUrl>[^\"]+)\">(?<sectName>.*?)<\/a><\/li>', 'g'); // 課名、URLを検出
/**
* 秋田県庁のウェブサイトから部署名一覧を取得してスプレッドシートに落とし込む。
*/
function fetchAkitaDeps() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName(SHEET_NAME);
var tz = ss.getSpreadsheetTimeZone();
var now = new Date();
var editTime = `Last Updated: ${Utilities.formatDate(now, tz, 'yyyy-MM-dd HH:mm:ss Z')}`;
// Clear existing data
sheet.getRange(
INSERT_DATA_POS.row,
INSERT_DATA_POS.col,
sheet.getLastRow() - INSERT_DATA_POS.row + 1,
sheet.getLastColumn() - INSERT_DATA_POS.col + 1
).clearContent();
// Get new content
var content = UrlFetchApp.fetch(TARGET_URL).getContentText();
var contentLine = content.replace(/\r|\n/g, '');
var depts = [...contentLine.matchAll(REGEXP_DEPT)];
var deptsArr = [];
depts.forEach(dept => {
let [deptUrl, deptName, sectSource] = [dept.groups.deptUrl, dept.groups.deptName, dept.groups.sectSource];
let sectNames = [...sectSource.matchAll(REGEXP_SECT)];
sectNames.forEach(sect => deptsArr.push([deptName, deptUrl, sect.groups.sectName, sect.groups.sectUrl]));
});
sheet.getRange(INSERT_DATA_POS.row, INSERT_DATA_POS.col, deptsArr.length, deptsArr[0].length).setValues(deptsArr);
sheet.getRange(INSERT_EDIT_TIME_POS.row, INSERT_EDIT_TIME_POS.col)
.setValue(editTime);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment