Skip to content

Instantly share code, notes, and snippets.

@BonsoirDiep
Created August 7, 2018 07:48
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save BonsoirDiep/ae58fc62777ba0bc8509b4f8b2640513 to your computer and use it in GitHub Desktop.
Puppeteer test 1
const puppeteer = require('puppeteer');
var database = {};
var fs = require('fs');
var cheerio = require('cheerio');
/* utils */
function save(path, data){
var xx = require('path').join(__dirname, path);
fs.writeFileSync(xx, JSON.stringify(data));
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
String.prototype.GetValue = function(para) {
let reg = new RegExp("(^|&)" + para + "=([^&]*)(&|$)");
let r = this.substr(this.indexOf("\?") + 1).match(reg);
if (r != null) return unescape(r[2]);
return null;
}
String.prototype.GetValue2 = function(para) {
let reg = new RegExp("(^|&)" + para + "=([^&]*)(&|$)");
let r = this.substr(this.indexOf("\?") + 1).match(reg);
if (r != null) return (r[2]);
return null;
}
var meUrl = 'https://www.1688.com/';
var datas = [];
function addMore(key, newVal){
if(typeof(newVal)== 'object'){
var a = datas.filter((el)=> el.itemMore==key)[0];
if(a) a.itemMore = newVal;
}
}
//
(async() => {
const browser = await puppeteer.launch({
//slowMo: 250, // slow down by 250ms
headless: false,
// args: ['--auto-open-devtools-for-tabs']
// args: ['--start-fullscreen']
args: ["--no-sandbox", "--disable-web-security", `--user-data-dir=data`]
});
const page = await browser.newPage();
await page.setRequestInterception(true);
/*page.on('load', (event) => {
console.log('Opened new url');
});*/
page.on('close',(evt)=>{
console.log('close: ', evt);
});
//domcontentloaded
page.on('domcontentloaded',()=>{
console.log('domcontentloaded: #');
});
/*page.on('console', msg => {
for (let i = 0; i < msg.args().length; ++i)
console.log(`${i}: ${msg.args()[i]}`);
cont = false;
});*/
page.on('request', (request) => {
if (request.resourceType() === 'image'){
request.abort();
//request.continue();
}
else{
request.continue();
}
});
page.on('response', (res) => {
var a = res.url();
if(a.includes('/ajax/member_bsr_indexs_json.do')){
// console.log('reason: hover mouse in item product');
res.text().then(function(data){
if(data && a.GetValue2('callback')== 'diep12'){
var newContent = data;
newContent = newContent.substring(newContent.indexOf('(')+1,newContent.indexOf('})')+1);
try{
newContent = JSON.parse(newContent);
if(newContent.success) addMore(a.GetValue2('loginid'), newContent);
}
catch(ex) { console.log(ex.message) }
}
else if(a.GetValue2('callback')== 'bug'){
datas.forEach(function(el){
if(el.itemMore && typeof(el.itemMore)!= 'string')
console.log(el);
})
}
});
} else if(a.includes('rpc_async_render.jsonp')){
console.log('reason: scroll mouse');
res.text().then(function(data){
console.log({
url: a,
// newContent: data
})
});
}
});
await page.goto(meUrl, {
//networkIdleTimeout: 5000,
//waitUntil: 'networkidle',
timeout: 0
});
//await page.screenshot({path: 'news.png', fullPage: true});
let cont = true;
while (cont) {
const dimensions = await page.evaluate(() => {
// close popup
document.getElementsByClassName('identity-close')[0].click();
window.addEventListener("beforeunload", function (e) {
return console.log('agagagag');
});
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
bodyHeight: document.body.scrollHeight
};
});
/*await page.evaluate((_x, _y)=> {
window.scrollTo(parseInt(_x || 0, 10), parseInt(_y || 0, 10));
}, 0, dimensions.bodyHeight);*/
// await sleep(2500);
cont = false;
};
// await page.evaluate(()=> {window.scrollTo(0, 0); return -1;});
await sleep(1500);
await page.type('.searchfollow_keywords', '壁橱');
await page.click('button#alisearch-submit');
await sleep(3000);
var huyC = await page.evaluate(() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
bodyHeight: document.body.scrollHeight
};
});
for(var i=0; i<=huyC.bodyHeight; i+=50){
huyC = await page.evaluate((_x, _y)=> {
if(document.getElementsByClassName('s-overlay-close-b')[0])
document.getElementsByClassName('s-overlay-close-b')[0].click();
window.scrollTo(parseInt(_x || 0, 10), parseInt(_y || 0, 10));
return {
bodyHeight: document.body.scrollHeight
};
}, 0, i);
await sleep(300);
}
huyC = await page.evaluate(() => {
return {
body: document.getElementsByClassName('sw-layout-box')[2].innerHTML
};
});
if(huyC.body){
var $ = cheerio.load(huyC.body);
$('li').each(function(idx, el){
var a = $(this).find('div.sw-dpl-offer-photo a img');
var data = {};
if(a.attr('alt')){
data.title = a.attr('alt');
data.img = a.attr('src');
//
a = $(this).find('div.sm-offer-company a').eq(0);
data.company = {
name: a.text().trim(),
href: a.attr('href'),
offerid: a.attr('offerid'),
memberid: a.attr('memberid'),
gotodetail: a.attr('gotodetail')
};
data.price = $(this).find('.sm-offer-price').eq(0).text().trim();
data.location = $(this).find('.sm-offer-location').eq(0).text().trim();
//
data.itemMore = $(this).find('span.sw-ui-flaticon-ww-off-s').eq(0).attr('data-nick');
datas.push(data);
}
})
for(var i in datas){
var el = datas[i];
if(el.itemMore){
await page.evaluate((itemMore) => {
fetch('https://member.1688.com/member/ajax/member_bsr_indexs_json.do?_input_charset=utf-8&callback=diep12&loginid='+ itemMore
).then(r=> r.text()).then(data=> console.log(data)).catch(err=> console.log(err.message))
}, el.itemMore);
await sleep(1200);
}
}
} else {
console.log('capcha or something wrong...')
}
// console.log(datas)
datas.forEach(function(el){
if(el.itemMore && typeof(el.itemMore)!= 'string')
console.log(el);
})
// await browser.close();
// save('./data.json', database);
console.log('closed!!!!!!!!!');
})();
@BonsoirDiep
Copy link
Author

Các API có thể sử dụng, cần đọc thêm:
Các URL có thể cần cookie hay gì đó, nhưng phải request với header referer https://s.1688.com/ ...

load list sản phẩm:
https://s.1688.com/selloffer/rpc_async_render.jsonp?keywords=<từ_tìm_kiếm>&n=y&sug=1_0&uniqfield=pic_tag_id&templateConfigName=marketOfferresult&offset=2&pageSize=60&asyncCount=20&startIndex=20&async=true&enableAsync=true&leftP4PIds=569025065725&rpcflag=new&pageName=market&callback=<tên_hàm_xử_lý_JSON>

load các chi tiết thêm về sản phẩm:
https://member.1688.com/member/ajax/member_bsr_indexs_json.do?_input_charset=utf-8&callback=<tên_hàm_xử_lý_JSON>&loginid=<id_của_sản_phẩm>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment