Skip to content

Instantly share code, notes, and snippets.

@CS6
Last active September 18, 2020 14:07
Show Gist options
  • Save CS6/2d616acfb51324f3e5b43e5ebb113be2 to your computer and use it in GitHub Desktop.
Save CS6/2d616acfb51324f3e5b43e5ebb113be2 to your computer and use it in GitHub Desktop.
GetIThome_ironman 爬出該組選手發文紀錄
var request = require("request");
var cheerio = require("cheerio");
// 爬出該選手發文紀錄
var tema_url = "https://ithelp.ithome.com.tw/2020-12th-ironman/signup/team/124";
var user_url = "https://ithelp.ithome.com.tw/users/20126642/ironman/3419";
; (async () => console.log('擷取此網址的隊伍鐵人賽文章:', JSON.stringify(await GetTheTemaDetail())))()
; (async () => console.log('擷取此網址的隊伍鐵人賽文章 JSON:', await GetTheTemaDetail()))()
async function GetTheTemaDetail() {
let tema = await getTheMyTemaNumber(tema_url);
console.log(tema.numbers)
console.log(tema.numbers.length)
for (let i = 0; i < tema.numbers.length; i++) {
let doclist = await GetIronmanDocList(tema.numbers[i].link);
tema.numbers[i] = Object.assign(tema.numbers[i], { 'doclist': doclist })
}
return tema;
}
function GetMyDocsNew(option) {
return new Promise((resolve, reject) => {
request(option, (error, response, body) => {
if (error) {
return reject('GetTheTemaNumber Error.');
}
// 用 cheerio 解析 html 資料
const $ = cheerio.load(body) ///成功取得列表
const img = $('.profile-header').find('img').attr('src')
const name = $('.profile-header__name').text()
const result = $('.board.leftside.profile-main')
.find('.profile-list__content')
.map((_, el) => ({
title: stringFMT($(el).find('.qa-list__title-link', 'a').text()),
badges: $(el)
.find('.qa-list__title .title-badge')
.map((_, el) => stringFMT($(el).text()))
.get(),
tags: $(el)
.find('.tag')
.map((_, el) => stringFMT($(el).text()))
.get(),
link: stringFMT($(el).find('.qa-list__title-link').attr('href')),
}))
.get();
resolve({
name:stringFMT(name),
img:img,
title: stringFMT($('.board.leftside.profile-main').find('h2').text()),
doclist: result
});
});
});
}
(async () => console.log("擷取此人的文章 New: ", JSON.stringify(await GetMyDocsNew({
url: 'https://ithelp.ithome.com.tw/users/20130369/articles',
headers: {
'Cookie': ` __utma=147059567.1935332645.1599489542.1599489542.1599489542.1; __utmc=147059567; __utmz=147059567.1599489542.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _token=07f67b5a86c2402c9eed535b9ca07ad064a456b3; _ga=GA1.3.1935332645.1599489542; _gid=GA1.3.359045945.1599490256; __utmt=1; XSRF-TOKEN=eyJpdiI6IlpRVTBKYnZXY2w5dDl1UmMyTllOT3c9PSIsInZhbHVlIjoiUzdURUZTWGY0RzhCVnpTY1daeW8wR0RTWSt0aXFxT3dXQUVZXC9Dd25iTlRwS2tUZzRGK3BOSmNIUlBFOEVWUmQiLCJtYWMiOiI4NTQyM2E5Y2NhOTZmZDFhN2QwNWIzZjE2MjAwZjdmNjdhOTVmMDBlYTIxOTY1MmViMTQ0YTQzNzQ4ZjBkMGQzIn0%3D; ithelp2016_desktop=eyJpdiI6IlhQVStKbTRTSHZqMkpKY29nM3RJWXc9PSIsInZhbHVlIjoiaHZ0bTVxc1c5czZ6cExzTnNmYzRMY0lrYzJYWUtndklCNVVRaGFheTQzRnZyaFdlNDBFeWN5SjIxUTVCRUV4cyIsIm1hYyI6IjljY2NjODkyNTdlMTRkNzkwNTE5NWYwMWUxNTZjODk4NDJjODlhODA0ZDRhY2ZjNTI0MmZhMTU1ODRkOGRhYWIifQ%3D%3D; __utmb=147059567.21.10.1599489542`,
}
}))))();
function getTheMyTemaNumber(url) {
let result_number = [] // 建立一個儲存結果的容器
return new Promise((resolve, reject) => {
request(url, function (error, response, body) {
if (error) reject('GetTheTemaNumber Error.')
// 用 cheerio 解析 html 資料
let $ = cheerio.load(body)
$('.team-detail-lists')
.find('.team-detail-member')
.map(function (i, el) {
return result_number.push({
title: $(this).find('.contestants-list__title').text(),
type: stringFMT($(this).find('.group-badge__name').text()),
user: stringFMT($(this).find('.team-leader-info__name').text()),
link: $(this).find('.contestants-list__title').attr('href'),
img: $(this).find('img').attr('src'),
icon: $(this).find('.group-badge__img').attr('src')
})
}) ///成功取得列表
resolve({
tema: stringFMT(
$('.border-frame.border-frame--team.clearfix').find('.team-detail__title').text()
),
img: $('.border-frame.border-frame--team.clearfix').find('#teamicon').attr('src'),
info: $('.border-frame.border-frame--team.clearfix').find('.team-detail__badge').text(),
start: $('.border-frame.border-frame--team.clearfix').find('.team-dashboard__date').text(),
state: $('.border-frame.border-frame--team.clearfix')
.find('.team-dashboard__text.team-dashboard__text--none')
.text(),
detail: $('.border-frame.border-frame--team.clearfix').find('.team-dashboard__num').text(),
numbers: result_number,
})
})
}).catch(error => ("擷取錯誤:" + error))
}
function GetIronmanDocList(url) {
let result_ironman_doc_list = []; // 建立一個儲存結果的容器
let LastDay = ''; // 建立一個儲存結果的容器
let dayList = [];
return new Promise((resolve, reject) => {
request(url, function (error, response, body) {
if (error) reject('GetTheTemaNumber Error.')
// 用 cheerio 解析 html 資料
let $ = cheerio.load(body);
$('.qa-list' + '.profile-list' + '.ir-profile-list').find('.ir-qa-list__status').map(function (i, el) {
LastDay = stringFMT($(this).text());
dayList.push(stringFMT($(this).text()));
})
console.log('LastDay', LastDay)
$('.board' + '.leftside' + '.profile-main').find('.qa-list__title-link', 'a').map(function (i, el) {
let str = $(this).text();
str = str.replace(/\r\n|\n/g, "")
str = str.replace(/(^\s*)|(\s*$)/g, "")
str = str.replace(/\s+/g, "");
str = str.replace(/\s*/g, "");
return result_ironman_doc_list.push({
'title': str,
'day': dayList[i],
'link': stringFMT($(this).attr("href"))
});
}) ///成功取得列表
// console.log(result_ironman_doc_list);
resolve(result_ironman_doc_list)
});
}).catch(error => ("擷取錯誤:" + error))
}
const stringFMT = (text) =>
text.replace(/\s*/g, "");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment