Last active
April 15, 2017 03:05
-
-
Save barretlee/8a4e3a01b53f220ea7e65edb4edd356d to your computer and use it in GitHub Desktop.
xmq data spider
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs'); | |
const request = require('request'); | |
const BASE = 'https://api.xiaomiquan.com/v1.2/groups/4512888248/topics'; | |
const ORIGIN = 'https://wx.xiaomiquan.com'; | |
const REFERER = 'https://wx.xiaomiquan.com/dweb/'; | |
const UA = 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'; | |
const AUTHORIZATION = 'xxx'; | |
const pageSize = '?count=20'; | |
const end_time = (t) => t ? `&end_time=${encodeURIComponent(t)}` : t; | |
class Tool { | |
constructor() { | |
this.topics = []; | |
this.fetch(); | |
} | |
fetch(st) { | |
let self = this; | |
st && console.log(st); | |
console.log(`Fetch: ${BASE}${pageSize}${end_time(st) || ''}`); | |
request({ | |
method: 'GET', | |
url: `${BASE}${pageSize}${end_time(st) || ''}`, | |
headers: { | |
origin: ORIGIN, | |
referer: REFERER, | |
'User-Agent': UA, | |
authorization: AUTHORIZATION, | |
'x-request-id': st || 1 | |
} | |
}, function(err, res, body) { | |
if (err) return console.log(err); | |
body = JSON.parse(body); | |
if (body.resp_data && body.resp_data.topics) { | |
const len = body.resp_data.topics.length; | |
console.log(`Len: ${len}`); | |
if (len > 1) { | |
self.topics = self.topics.concat(body.resp_data.topics); | |
const st = body.resp_data.topics[len - 1].create_time; | |
self.fetch(st); | |
} else { | |
console.log('Fetch Done'); | |
fs.writeFileSync('./xmq-data.json', JSON.stringify(self.topics, null, 2)); | |
self.resolve(); | |
} | |
} | |
// console.log(JSON.stringify(body, null, 2)); | |
}); | |
} | |
_textProcesser(text) { | |
return text && text.replace(/<e type="([^"]+?)"[^>]+?>/gi, function($0, $1) { | |
if ($1 === 'mention') { | |
const name = $0.match(/title="([^"]+?)"/)[1]; | |
return `${decodeURIComponent(name)}`; | |
} else if ($1 === 'web') { | |
const href = $0.match(/href="([^"]+?)"/)[1]; | |
const title = $0.match(/title="([^"]+?)"/)[1]; | |
return `<a target="_blank" href="${decodeURIComponent(href)}">${decodeURIComponent(title).replace(/\+/gmi, ' ')}</a>`; | |
} else { | |
return ''; | |
} | |
}) | |
.replace(/\n/gmi, '<br />') | |
.replace(/ /gmi, ' ') | |
.replace(/\"/gmi, "'"); | |
} | |
resolve() { | |
const self = this; | |
const list = {}; | |
for (let i = 0, len = this.topics.length; i < len; i++) { | |
const item = this.topics[i]; | |
const comments = (item.show_comments || item.comments || []).filter((c) => { | |
return c.owner.name === '小胡子哥'; | |
}).map((c) => { | |
return self._textProcesser(c.text); | |
}); | |
if (item.type === 'talk') { | |
list[item.topic_id] = { | |
q: self._textProcesser(item.talk.text), | |
qImgs: item.talk.images && item.talk.images.map((item) => { | |
return item.large.url; | |
}), | |
a: comments | |
} | |
} else if (item.type === 'q&a') { | |
list[item.topic_id] = { | |
q: self._textProcesser(item.question.text), | |
qImgs: item.question.images && item.question.images.map((item) => { | |
return item.large.url; | |
}), | |
a: [item.answer.text].concat(comments) | |
} | |
} | |
} | |
fs.writeFileSync('./xmq-filter.json', JSON.stringify(list, null, 2)); | |
// self.generateHtml(list); | |
} | |
generateHtml(data) { | |
let str = ''; | |
for(var key in data) { | |
const item = data[key]; | |
const imgs = item.qImgs ? | |
`<div>${item.qImgs.map((m)=>'<a href="' + m + '" target="_blank"><img width="200" src="' + m + '"></a>').join('')}</div>` : ''; | |
str += `<div class="q">${item.q}${imgs}</div>`; | |
if (item.a && item.a.length) { | |
str += `<div class="a">${item.a.map((i)=>'<p>' + i + '</p>').join('')}</div>` | |
} | |
} | |
document.body.innerHTML = `<style>.wrapper{text-align:left;width: 800px; line-height:120%; font-family:Consolas; margin: 40px auto;}.q{font-weight: bold;margin-top: 40px;}.a{padding-left: 20px; padding: 8px 30px;padding-right:0;border-left: 4px solid #EEE;margin-top: 20px;}img{margin-top: 20px;display:inline-block;margin-right:20px;}</style><div class="wrapper">${str}</div>`; | |
} | |
} | |
new Tool(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment