Skip to content

Instantly share code, notes, and snippets.

@barretlee
Last active April 15, 2017 03:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save barretlee/8a4e3a01b53f220ea7e65edb4edd356d to your computer and use it in GitHub Desktop.
Save barretlee/8a4e3a01b53f220ea7e65edb4edd356d to your computer and use it in GitHub Desktop.
xmq data spider
const fs = require('fs');
const request = require('request');
const BASE = 'https://api.xiaomiquan.com/v1.2/groups/4512888248/topics';
const ORIGIN = 'https://wx.xiaomiquan.com';
const REFERER = 'https://wx.xiaomiquan.com/dweb/';
const UA = 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36';
const AUTHORIZATION = 'xxx';
const pageSize = '?count=20';
const end_time = (t) => t ? `&end_time=${encodeURIComponent(t)}` : t;
class Tool {
constructor() {
this.topics = [];
this.fetch();
}
fetch(st) {
let self = this;
st && console.log(st);
console.log(`Fetch: ${BASE}${pageSize}${end_time(st) || ''}`);
request({
method: 'GET',
url: `${BASE}${pageSize}${end_time(st) || ''}`,
headers: {
origin: ORIGIN,
referer: REFERER,
'User-Agent': UA,
authorization: AUTHORIZATION,
'x-request-id': st || 1
}
}, function(err, res, body) {
if (err) return console.log(err);
body = JSON.parse(body);
if (body.resp_data && body.resp_data.topics) {
const len = body.resp_data.topics.length;
console.log(`Len: ${len}`);
if (len > 1) {
self.topics = self.topics.concat(body.resp_data.topics);
const st = body.resp_data.topics[len - 1].create_time;
self.fetch(st);
} else {
console.log('Fetch Done');
fs.writeFileSync('./xmq-data.json', JSON.stringify(self.topics, null, 2));
self.resolve();
}
}
// console.log(JSON.stringify(body, null, 2));
});
}
_textProcesser(text) {
return text && text.replace(/<e type="([^"]+?)"[^>]+?>/gi, function($0, $1) {
if ($1 === 'mention') {
const name = $0.match(/title="([^"]+?)"/)[1];
return `${decodeURIComponent(name)}`;
} else if ($1 === 'web') {
const href = $0.match(/href="([^"]+?)"/)[1];
const title = $0.match(/title="([^"]+?)"/)[1];
return `<a target="_blank" href="${decodeURIComponent(href)}">${decodeURIComponent(title).replace(/\+/gmi, ' ')}</a>`;
} else {
return '';
}
})
.replace(/\n/gmi, '<br />')
.replace(/ /gmi, ' ')
.replace(/\"/gmi, "'");
}
resolve() {
const self = this;
const list = {};
for (let i = 0, len = this.topics.length; i < len; i++) {
const item = this.topics[i];
const comments = (item.show_comments || item.comments || []).filter((c) => {
return c.owner.name === '小胡子哥';
}).map((c) => {
return self._textProcesser(c.text);
});
if (item.type === 'talk') {
list[item.topic_id] = {
q: self._textProcesser(item.talk.text),
qImgs: item.talk.images && item.talk.images.map((item) => {
return item.large.url;
}),
a: comments
}
} else if (item.type === 'q&a') {
list[item.topic_id] = {
q: self._textProcesser(item.question.text),
qImgs: item.question.images && item.question.images.map((item) => {
return item.large.url;
}),
a: [item.answer.text].concat(comments)
}
}
}
fs.writeFileSync('./xmq-filter.json', JSON.stringify(list, null, 2));
// self.generateHtml(list);
}
generateHtml(data) {
let str = '';
for(var key in data) {
const item = data[key];
const imgs = item.qImgs ?
`<div>${item.qImgs.map((m)=>'<a href="' + m + '" target="_blank"><img width="200" src="' + m + '"></a>').join('')}</div>` : '';
str += `<div class="q">${item.q}${imgs}</div>`;
if (item.a && item.a.length) {
str += `<div class="a">${item.a.map((i)=>'<p>' + i + '</p>').join('')}</div>`
}
}
document.body.innerHTML = `<style>.wrapper{text-align:left;width: 800px; line-height:120%; font-family:Consolas; margin: 40px auto;}.q{font-weight: bold;margin-top: 40px;}.a{padding-left: 20px; padding: 8px 30px;padding-right:0;border-left: 4px solid #EEE;margin-top: 20px;}img{margin-top: 20px;display:inline-block;margin-right:20px;}</style><div class="wrapper">${str}</div>`;
}
}
new Tool();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment