Last active
December 15, 2021 11:06
-
-
Save bepyan/102c48a36cc2ae45eda13ede35a1f7c7 to your computer and use it in GitHub Desktop.
생명의 삶 QT 본문 크롤러 입니다.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import axios from "axios" | |
import cheerio from "cheerio" | |
import iconv from "iconv-lite"; | |
const getDate = () => { | |
const date = new Date().toISOString().substring(0, 10) | |
const KO_DAY = ['일', '월', '화', '수', '목', '금', '토']; | |
return `${date} (${KO_DAY[new Date().getDay()]})` | |
} | |
const links = { | |
bible: "https://www.duranno.com/qt/view/bible.asp", | |
commentary: "https://www.duranno.com/qt/view/explain.asp" | |
} | |
const getHTML = async (link) => { | |
const html = await axios({ | |
url: links[link], | |
method: "GET", | |
headers: { | |
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
'Accept': '*/*' | |
}, | |
responseEncoding: 'binary', | |
responseType: 'arraybuffer' | |
}) | |
return iconv.decode(html.data, "euc-kr"); | |
}; | |
const parseBible = ($) => { | |
let verses = []; | |
$(".bible").children().each((_, elem) => { | |
const $elem = $(elem) | |
if (elem.tagName === "p") { | |
verses.push({ text: $elem.text().trim() }) | |
return | |
} | |
const verse = $elem.find('th').text() | |
const text = $elem.find('td').text().trim() | |
verses.push({ verse: +verse, text }) | |
}) | |
return verses | |
} | |
const parseCommentaries = ($) => { | |
let commentaries = []; | |
$(".bible").children().each((_, elem) => { | |
commentaries.push($(elem).text().trim()) | |
}) | |
return commentaries | |
} | |
export const parseContent = async () => { | |
try { | |
const bibleHTML = await getHTML("bible") | |
const $bible = cheerio.load(bibleHTML); | |
const commentaryHTML = await getHTML("commentary") | |
const $commentary = cheerio.load(commentaryHTML); | |
return { | |
title: $bible('h1 span').text().trim(), | |
range: $bible('h1 em').text().trim(), | |
date: getDate(), | |
verses: parseBible($bible), | |
commentaries: parseCommentaries($commentary) | |
} | |
} catch (e) { | |
console.log(e) | |
} | |
} | |
console.log(await parseContent()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "web-crawler", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"scripts": { | |
"start": "node index.js" | |
}, | |
"author": "bepyan", | |
"license": "ISC", | |
"dependencies": { | |
"axios": "^0.24.0", | |
"cheerio": "^1.0.0-rc.10", | |
"iconv-lite": "^0.6.3" | |
}, | |
"type": "module" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment