-
-
Save CS6/2d616acfb51324f3e5b43e5ebb113be2 to your computer and use it in GitHub Desktop.
GetIThome_ironman 爬出該組選手發文紀錄
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var request = require("request"); | |
var cheerio = require("cheerio"); | |
// 爬出該選手發文紀錄 | |
var tema_url = "https://ithelp.ithome.com.tw/2020-12th-ironman/signup/team/124"; | |
var user_url = "https://ithelp.ithome.com.tw/users/20126642/ironman/3419"; | |
; (async () => console.log('擷取此網址的隊伍鐵人賽文章:', JSON.stringify(await GetTheTemaDetail())))() | |
; (async () => console.log('擷取此網址的隊伍鐵人賽文章 JSON:', await GetTheTemaDetail()))() | |
async function GetTheTemaDetail() { | |
let tema = await getTheMyTemaNumber(tema_url); | |
console.log(tema.numbers) | |
console.log(tema.numbers.length) | |
for (let i = 0; i < tema.numbers.length; i++) { | |
let doclist = await GetIronmanDocList(tema.numbers[i].link); | |
tema.numbers[i] = Object.assign(tema.numbers[i], { 'doclist': doclist }) | |
} | |
return tema; | |
} | |
function GetMyDocsNew(option) { | |
return new Promise((resolve, reject) => { | |
request(option, (error, response, body) => { | |
if (error) { | |
return reject('GetTheTemaNumber Error.'); | |
} | |
// 用 cheerio 解析 html 資料 | |
const $ = cheerio.load(body) ///成功取得列表 | |
const img = $('.profile-header').find('img').attr('src') | |
const name = $('.profile-header__name').text() | |
const result = $('.board.leftside.profile-main') | |
.find('.profile-list__content') | |
.map((_, el) => ({ | |
title: stringFMT($(el).find('.qa-list__title-link', 'a').text()), | |
badges: $(el) | |
.find('.qa-list__title .title-badge') | |
.map((_, el) => stringFMT($(el).text())) | |
.get(), | |
tags: $(el) | |
.find('.tag') | |
.map((_, el) => stringFMT($(el).text())) | |
.get(), | |
link: stringFMT($(el).find('.qa-list__title-link').attr('href')), | |
})) | |
.get(); | |
resolve({ | |
name:stringFMT(name), | |
img:img, | |
title: stringFMT($('.board.leftside.profile-main').find('h2').text()), | |
doclist: result | |
}); | |
}); | |
}); | |
} | |
(async () => console.log("擷取此人的文章 New: ", JSON.stringify(await GetMyDocsNew({ | |
url: 'https://ithelp.ithome.com.tw/users/20130369/articles', | |
headers: { | |
'Cookie': ` __utma=147059567.1935332645.1599489542.1599489542.1599489542.1; __utmc=147059567; __utmz=147059567.1599489542.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _token=07f67b5a86c2402c9eed535b9ca07ad064a456b3; _ga=GA1.3.1935332645.1599489542; _gid=GA1.3.359045945.1599490256; __utmt=1; XSRF-TOKEN=eyJpdiI6IlpRVTBKYnZXY2w5dDl1UmMyTllOT3c9PSIsInZhbHVlIjoiUzdURUZTWGY0RzhCVnpTY1daeW8wR0RTWSt0aXFxT3dXQUVZXC9Dd25iTlRwS2tUZzRGK3BOSmNIUlBFOEVWUmQiLCJtYWMiOiI4NTQyM2E5Y2NhOTZmZDFhN2QwNWIzZjE2MjAwZjdmNjdhOTVmMDBlYTIxOTY1MmViMTQ0YTQzNzQ4ZjBkMGQzIn0%3D; ithelp2016_desktop=eyJpdiI6IlhQVStKbTRTSHZqMkpKY29nM3RJWXc9PSIsInZhbHVlIjoiaHZ0bTVxc1c5czZ6cExzTnNmYzRMY0lrYzJYWUtndklCNVVRaGFheTQzRnZyaFdlNDBFeWN5SjIxUTVCRUV4cyIsIm1hYyI6IjljY2NjODkyNTdlMTRkNzkwNTE5NWYwMWUxNTZjODk4NDJjODlhODA0ZDRhY2ZjNTI0MmZhMTU1ODRkOGRhYWIifQ%3D%3D; __utmb=147059567.21.10.1599489542`, | |
} | |
}))))(); | |
function getTheMyTemaNumber(url) { | |
let result_number = [] // 建立一個儲存結果的容器 | |
return new Promise((resolve, reject) => { | |
request(url, function (error, response, body) { | |
if (error) reject('GetTheTemaNumber Error.') | |
// 用 cheerio 解析 html 資料 | |
let $ = cheerio.load(body) | |
$('.team-detail-lists') | |
.find('.team-detail-member') | |
.map(function (i, el) { | |
return result_number.push({ | |
title: $(this).find('.contestants-list__title').text(), | |
type: stringFMT($(this).find('.group-badge__name').text()), | |
user: stringFMT($(this).find('.team-leader-info__name').text()), | |
link: $(this).find('.contestants-list__title').attr('href'), | |
img: $(this).find('img').attr('src'), | |
icon: $(this).find('.group-badge__img').attr('src') | |
}) | |
}) ///成功取得列表 | |
resolve({ | |
tema: stringFMT( | |
$('.border-frame.border-frame--team.clearfix').find('.team-detail__title').text() | |
), | |
img: $('.border-frame.border-frame--team.clearfix').find('#teamicon').attr('src'), | |
info: $('.border-frame.border-frame--team.clearfix').find('.team-detail__badge').text(), | |
start: $('.border-frame.border-frame--team.clearfix').find('.team-dashboard__date').text(), | |
state: $('.border-frame.border-frame--team.clearfix') | |
.find('.team-dashboard__text.team-dashboard__text--none') | |
.text(), | |
detail: $('.border-frame.border-frame--team.clearfix').find('.team-dashboard__num').text(), | |
numbers: result_number, | |
}) | |
}) | |
}).catch(error => ("擷取錯誤:" + error)) | |
} | |
function GetIronmanDocList(url) { | |
let result_ironman_doc_list = []; // 建立一個儲存結果的容器 | |
let LastDay = ''; // 建立一個儲存結果的容器 | |
let dayList = []; | |
return new Promise((resolve, reject) => { | |
request(url, function (error, response, body) { | |
if (error) reject('GetTheTemaNumber Error.') | |
// 用 cheerio 解析 html 資料 | |
let $ = cheerio.load(body); | |
$('.qa-list' + '.profile-list' + '.ir-profile-list').find('.ir-qa-list__status').map(function (i, el) { | |
LastDay = stringFMT($(this).text()); | |
dayList.push(stringFMT($(this).text())); | |
}) | |
console.log('LastDay', LastDay) | |
$('.board' + '.leftside' + '.profile-main').find('.qa-list__title-link', 'a').map(function (i, el) { | |
let str = $(this).text(); | |
str = str.replace(/\r\n|\n/g, "") | |
str = str.replace(/(^\s*)|(\s*$)/g, "") | |
str = str.replace(/\s+/g, ""); | |
str = str.replace(/\s*/g, ""); | |
return result_ironman_doc_list.push({ | |
'title': str, | |
'day': dayList[i], | |
'link': stringFMT($(this).attr("href")) | |
}); | |
}) ///成功取得列表 | |
// console.log(result_ironman_doc_list); | |
resolve(result_ironman_doc_list) | |
}); | |
}).catch(error => ("擷取錯誤:" + error)) | |
} | |
const stringFMT = (text) => | |
text.replace(/\s*/g, ""); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment