Skip to content

Instantly share code, notes, and snippets.

@yuanliwei
Created December 5, 2018 14:09
Show Gist options
  • Save yuanliwei/2e7602fe5e1c083ae22a82c6f0067336 to your computer and use it in GitHub Desktop.
Save yuanliwei/2e7602fe5e1c083ae22a82c6f0067336 to your computer and use it in GitHub Desktop.
小说爬虫.js
var request = require('request')
var cheerio = require('cheerio')
var iconv = require('iconv-lite');
async function get(url) {
return new Promise((resolve) => {
request.get(url, {
encoding: null,
gzip: true
}, (err, resp, body) => {
body = iconv.decode(body, 'gbk')
body = body.replace(/<\/p>/gi,'</p>\n')
let $ = cheerio.load(body)
let title = $('#content h1').text()
console.log('title',title);
let article = $('#content').text().replace(title,'')
let result = `\n\t${title}\n\n${article}\n\n\n\n`
let next = $('li>a.next').attr('href')
console.log('next',next);
resolve([result, next])
})
})
}
async function start() {
let fs = require('fs')
let url = '/wapbook/97027_158708817.html'
while(url){
let result = await get(`https://m.boquge.com${url}`)
fs.appendFileSync('books2.txt', result[0], 'utf-8')
url = result[1]
}
}
start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment