Skip to content

Instantly share code, notes, and snippets.

@huybui02
Last active June 8, 2017 04:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huybui02/68cc0967cab1427b95900569c16726c8 to your computer and use it in GitHub Desktop.
Save huybui02/68cc0967cab1427b95900569c16726c8 to your computer and use it in GitHub Desktop.
getLazada
var express = require("express");
var app = express();
var request = require("request");
var request1 = require("request");
var cheerio = require("cheerio");
var fs = require("fs");
var rp = require("request-promise");
var Promise = require('promise');
// app.use(express.static("public"));
// app.set("view engine","ejs");
// app.set("views","./views");
// app.listen(3000);
var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/lazada');
var dbMongo = mongoose.connection;
dbMongo.on('err',console.error.bind(console,'Loi ket noi!'));
dbMongo.once('open',function(){
console.log("Ket noi thanh cong");
});
var Schema = new mongoose.Schema({
id: String,
type : String,
link : String,
img : String,
name : String,
price : String,
comment: String,
desc : String
});
var dl = mongoose.model('data',Schema);
var link_1 = "http://www.lazada.vn/dien-thoai-di-dong/?spm=a2o4n.category-010100000000.0.0.LNbOng&itemperpage=120&page=";
var link_2 = "http://www.lazada.vn/laptop/?spm=a2o4n.category-020100000000.0.0.59I7Wc&itemperpage=120&page=";
var link_3 = "http://www.lazada.vn/may-tinh-bang/?spm=a2o4n.category-010200000000.0.0.JNr58x&itemperpage=120&page=";
var link_4 = "http://www.lazada.vn/phu-kien-dien-thoai-may-tinh-bang/?spm=a2o4n.category-011100000000.0.0.bT0iuT&itemperpage=120&page=";
var link_5 = "http://www.lazada.vn/phu-kien-may-vi-tinh/?spm=a2o4n.category-020300000000.0.0.UrqBPl&itemperpage=120&page=";
var link_6 = "http://www.lazada.vn/may-anh-may-quay-phim/?spm=a2o4n.category-040000000000.0.0.fMn5AI&itemperpage=120&page=";
var link_7 = "http://www.lazada.vn/tivi/?spm=a2o4n.category-030500000000.0.0.1zH25l&itemperpage=120&page=";
var link_8 = "http://www.lazada.vn/thiet-bi-am-thanh-di-dong/?spm=a2o4n.category-030100000000.0.0.oGyCy5&itemperpage=120&page=";
var link_9 = "http://www.lazada.vn/thiet-bi-choi-game/?spm=a2o4n.category-030200000000.0.0.NsOTRx&itemperpage=120&page=";
var link_10 = "http://www.lazada.vn/thiet-bi-so/?spm=a2o4n.category-030300000000.0.0.Pl2afU&itemperpage=120&page=";
var link_11 = "http://www.lazada.vn/video/?spm=a2o4n.category-030400000000.0.0.zz9ebX&itemperpage=120&page=";
var link_12 = "http://www.lazada.vn/thiet-bi-deo-cong-nghe/?spm=a2o4n.category-031000000000.0.0.kqvokv&itemperpage=120&page=";
var link_13 = "http://www.lazada.vn/phu-kien-cho-tv/?spm=a2o4n.category-031100000000.0.0.R6mCy5&itemperpage=120&page=";
var link_14 = "http://www.lazada.vn/thiet-bi-dieu-khien-qua-app/?spm=a2o4n.category-031200000000.0.0.e0W9LL&itemperpage=120&page=";
var link_15 = "http://www.lazada.vn/thoi-trang-nu/?spm=a2o4n.category-090100000000.0.0.ximZPE&itemperpage=120&page=";
var link_16 = "http://www.lazada.vn/thoi-trang-nam/?spm=a2o4n.category-090200000000.0.0.VZl5P2&itemperpage=120&page=";
var link_17 = "http://www.lazada.vn/thoi-trang-danh-cho-be-gai/?spm=a2o4n.category-091600000000.0.0.iGmz3U&itemperpage=120&page=";
var link_18 = "http://www.lazada.vn/thoi-trang-cho-be-trai/?spm=a2o4n.category-091700000000.0.0.lFEC1R&itemperpage=120&page=";
var link_19 = "http://www.lazada.vn/dong-ho/?spm=a2o4n.category-170300000000.0.0.sRBoor&itemperpage=120&page=";
var link_20 = "http://www.lazada.vn/mat-kinh/?spm=a2o4n.category-170100000000.0.0.IRGihX&itemperpage=120&page=";
var link_21 = "http://www.lazada.vn/do-trang-suc/?spm=a2o4n.category-170200000000.0.0.8eu4d7&itemperpage=120&page=";
var link_22 = "http://www.lazada.vn/do-gia-dung-nha-bep/?spm=a2o4n.category-050600000000.0.0.Ya1Yeo&itemperpage=120&page=";
var link_23 ="http://www.lazada.vn/do-gia-dung-lon/?spm=a2o4n.category-050400000000.0.0.8JiYr1&itemperpage=120&page=";
var link_24 ="http://www.lazada.vn/quat-may-nong-lanh/?spm=a2o4n.category-050100000000.0.0.pFRycU&itemperpage=120&page=";
var link_25 ="http://www.lazada.vn/thiet-bi-cham-soc-quan-ao/?spm=a2o4n.category-050200000000.0.0.SAnv93&itemperpage=120&page=";
var link_26 ="http://www.lazada.vn/dung-cu-thiet-bi-gia-dinh-moi/?spm=a2o4n.category-050300000000.0.0.toFrkc&itemperpage=120&page=";
var link_27 ="http://www.lazada.vn/thiet-bi-do-gia-dung/?spm=a2o4n.category-051000000000.0.0.x0yRII&itemperpage=120&page=";
var link_28 ="http://www.lazada.vn/van-phong-pham-gia-dinh/?spm=a2o4n.category-060900000000.0.0.dLZRka&itemperpage=120&page=";
var link_29 ="http://www.lazada.vn/do-dung-phu-kien-phong-tam/?spm=a2o4n.category-060100000000.0.0.HKUrzS&itemperpage=120&page=";
var link_30 ="http://www.lazada.vn/do-dung-bep-phong-an/?spm=a2o4n.category-060600000000.0.0.wyvNRB&itemperpage=120&page=";
var link_31 ="http://www.lazada.vn/do-dung-phong-ngu-gia-dinh/?spm=a2o4n.category-060200000000.0.0.dvwb1O&itemperpage=120&page=";
var link_32 = "http://www.lazada.vn/san-pham-noi-that/?spm=a2o4n.category-060300000000.0.0.oRcBEY&itemperpage=120&page=";
var link_33 = "http://www.lazada.vn/tan-trang-nha-cua/?spm=a2o4n.category-060500000000.0.0.MyaeCZ&itemperpage=120&page=";
var link_34 = "http://www.lazada.vn/tu-dung-va-sap-xep-do/?spm=a2o4n.category-061000000000.0.0.TLOl5L&itemperpage=120&page=";
var link_35 = "http://www.lazada.vn/san-pham-trang-tri-nha-cua/?spm=a2o4n.category-060400000000.0.0.a8UWnH&itemperpage=120&page=";
var link_36 = "http://www.lazada.vn/do-dung-ngoai-troi-san-vuon/?spm=a2o4n.category-060700000000.0.0.lWdBmz&itemperpage=120&page=";
var link_37 = "http://www.lazada.vn/cac-loai-den/?spm=a2o4n.category-061100000000.0.0.D3l2nU&itemperpage=120&page=";
var link_38 = "http://www.lazada.vn/trang-diem/?spm=a2o4n.category-080800000000.0.0.xbsmLt&itemperpage=120&page=";
var link_39 = "http://www.lazada.vn/cham-soc-da-mat/?spm=a2o4n.category-080200000000.0.0.3G38au&itemperpage=120&page=";
var link_40 = "http://www.lazada.vn/san-pham-tam-cham-soc-co-the/?spm=a2o4n.category-080100000000.0.0.eK0INt&itemperpage=120&page=";
var link_41 = "http://www.lazada.vn/san-pham-cham-soc-toc/?spm=a2o4n.category-080600000000.0.0.6WSxHh&itemperpage=120&page=";
var link_42 = "http://www.lazada.vn/nuoc-hoa/?spm=a2o4n.category-080400000000.0.0.QvQhG3&itemperpage=120&page=";
var link_43 = "http://www.lazada.vn/thuc-pham-bo-sung/?spm=a2o4n.category-080300000000.0.0.lGDzhV&itemperpage=120&page=";
var link_44 = "http://www.lazada.vn/dung-cu-cham-soc-sac-dep/?spm=a2o4n.category-080700000000.0.0.wQEk3N&itemperpage=120&page=";
var link_45 = "http://www.lazada.vn/cham-soc-cho-nam-gioi/?spm=a2o4n.category-080900000000.0.0.HDiwGd&itemperpage=120&page=";
var link_46 = "http://www.lazada.vn/cham-soc-ca-nhan/?spm=a2o4n.category-081000000000.0.0.iKtKQ2&itemperpage=120&page=";
var link_47 = "http://www.lazada.vn/thiet-bi-y-te/?spm=a2o4n.category-081300000000.0.0.NksBR3&itemperpage=120&page=";
var link_48 = "http://www.lazada.vn/ho-tro-tinh-duc/?spm=a2o4n.category-081700000000.0.0.RxkAPx&itemperpage=120&page=";
var link_49 = "http://www.lazada.vn/do-dung-bu-sua-an-dam/?spm=a2o4n.category-180100000000.0.0.ux2I2H&itemperpage=120&page=";
var link_50 = "http://www.lazada.vn/quan-ao-phu-kien-cho-be/?spm=a2o4n.category-181100000000.0.0.jXSDql&itemperpage=120&page=";
var link_51 = "http://www.lazada.vn/cham-soc-ba-me-mang-thai/?spm=a2o4n.category-181000000000.0.0.slOZmM&itemperpage=120&page=";
var link_52 = "http://www.lazada.vn/qua-tang/?spm=a2o4n.category-180900000000.0.0.Sfh1u2&itemperpage=120&page=";
var link_53 = "http://www.lazada.vn/xe-ghe-em-be/?spm=a2o4n.category-180600000000.0.0.3vp25Q&itemperpage=120&page=";
var link_54 = "http://www.lazada.vn/suc-khoe-an-toan/?spm=a2o4n.category-180500000000.0.0.UwIrlN&itemperpage=120&page=";
var link_55 = "http://www.lazada.vn/cham-soc-tre-so-sinh-tre-nho/?spm=a2o4n.category-180400000000.0.0.OEEoaU&itemperpage=120&page=";
var link_56 = "http://www.lazada.vn/ta-dung-cu-ve-sinh/?spm=a2o4n.category-180300000000.0.0.OX1SUt&itemperpage=120&page=";
var link_57 = "http://www.lazada.vn/ti-ngam-phu-kien/?spm=a2o4n.category-180200000000.0.0.PvSIxp&itemperpage=120&page=";
var link_58 = "http://www.lazada.vn/do-choi-tro-choi/?spm=a2o4n.category-190000000000.0.0.FYlW2C&itemperpage=120&page=";
var link_59 = "http://www.lazada.vn/tre-so-sinh-tre-nho/?spm=a2o4n.category-180000000000.0.0.Et7TrO&itemperpage=120&page=";
var link_60 = "http://www.lazada.vn/the-thao-da-ngoai/?spm=a2o4n.category-110000000000.0.0.KvFtZd&itemperpage=120&page=";
var link_61 = "http://www.lazada.vn/vali-ba-lo-tui-du-lich/?spm=a2o4n.category-100000000000.0.0.HiY8RX&itemperpage=120&page=";
var link_62 = "http://www.lazada.vn/thiet-bi-phu-kien-o-to-xe-may/?spm=a2o4n.category-120100000000.0.0.ijgBnS&itemperpage=120&page=";
var link_63 = "http://www.lazada.vn/bach-hoa-online/?spm=a2o4n.category-140000000000.0.0.rgx6xR&itemperpage=120&page=";
var link_64 = "http://www.lazada.vn/sach/?spm=a2o4n.category-130100000000.0.0.6p22Xb&itemperpage=120&page=";
var link_65 = "http://www.lazada.vn/nhac-cu-moi/?spm=a2o4n.category-130700000000.0.0.FStpPg&itemperpage=120&page=";
var link_66 = "http://www.lazada.vn/cham-soc-thu-cung/?spm=a2o4n.category-200000000000.0.0.okp1IU&itemperpage=120&page=";
var link_67 = "http://www.lazada.vn/khuyen-mai-dich-vu/?spm=a2o4n.category-160000000000.0.0.xNIJHH&itemperpage=120&page=";
function getPager(url, callback){
new Promise(function (resolve, reject) {
request(url, function (error, res, body) {
if (error) {
return reject(error);
} else {
var $ = cheerio.load(body);
var ks = $('.c-paging__link').last().text();
return resolve(parseInt(ks));
}
});
}).then(function (pager) {
callback(null, pager);
}).catch(function (err) {
callback(err, null);
});
}
//push link vao mang
var manglink = [];
manglink.push(link_1);
manglink.push(link_2);
manglink.push(link_3);
manglink.push(link_4);
manglink.push(link_5);
manglink.push(link_6);
manglink.push(link_7);
manglink.push(link_8);
manglink.push(link_9);
manglink.push(link_10);
manglink.push(link_11);
manglink.push(link_12);
manglink.push(link_13);
manglink.push(link_14);
manglink.push(link_15);
manglink.push(link_16);
manglink.push(link_17);
manglink.push(link_18);
manglink.push(link_19);
manglink.push(link_20);
manglink.push(link_21);
manglink.push(link_22);
manglink.push(link_23);
manglink.push(link_24);
manglink.push(link_25);
manglink.push(link_26);
manglink.push(link_27);
manglink.push(link_28);
manglink.push(link_29);
manglink.push(link_30);
manglink.push(link_31);
manglink.push(link_32);
manglink.push(link_33);
manglink.push(link_34);
manglink.push(link_35);
manglink.push(link_36);
manglink.push(link_37);
manglink.push(link_38);
manglink.push(link_39);
manglink.push(link_40);
manglink.push(link_41);
manglink.push(link_42);
manglink.push(link_43);
manglink.push(link_44);
manglink.push(link_45);
manglink.push(link_46);
manglink.push(link_47);
manglink.push(link_48);
manglink.push(link_49);
manglink.push(link_50);
manglink.push(link_51);
manglink.push(link_52);
manglink.push(link_53);
manglink.push(link_54);
manglink.push(link_55);
manglink.push(link_56);
manglink.push(link_57);
manglink.push(link_58);
manglink.push(link_59);
manglink.push(link_60);
manglink.push(link_61);
manglink.push(link_62);
manglink.push(link_63);
manglink.push(link_64);
manglink.push(link_65);
manglink.push(link_66);
manglink.push(link_67);
// console.log(manglink[0]+"1");
//lay du lieu tu trang dt lazada 66
manglink.forEach(function(index,cb){
getPager(index+"1", function(err, result){
if(err==null){
for(let i=1;i<=result; i++){
layData(index+i);
// console.log("i = "+i);
// console.log("So trang = "+result);
// console.log("index + i ="+(index+i));
}
cb();
}
});
});
var cb = function(){
console.log("------------------------Da ghi xong du lieu--------------------------------");
}
function layData(u){
request(u,
function(error,respone,body){
if(error){
console.log("------");
}else{
var $ = cheerio.load(body);
var kq = [];
var loai = $('.c-breadcrumb__item').last().text().trim();
$('div.c-product-list__item').each(function(i,e){
var lienket = "http://ladaza.vn";
var a = $(this);
// var stt =i;
var aa = a.children().next().next().children().next().next().children();
var lket = a.find('.c-product-card__name').prop('href');
var gia = a.find('.c-product-card__price-final').text().trim();
var tensp = a.find('.c-product-card__name').text().trim();
var urlhinh = $(this).find('div.c-product-card__img-placeholder > a > span').attr('data-js-component-params').substr(27);
urlhinh = urlhinh.slice(0,-9);
// var urlhinh = a.find('img.c-img-lazy__img').prop('src');
var id = lket.split('.html')[0].split('-').pop();
var mota = [];
var com = [];
$(this).find('.c-product-card__attr').each(function(){
mota.push($(this).text());
});
var lk = lienket+lket;
// var j = i - 120;
var data = {
//numb : (parseInt(j)+parseInt(120)),
id: id,
type : loai,
link : (lienket+lket),
img : urlhinh,
name : tensp,
price : gia,
desc : mota
};
kq.push(data);
});
console.log(kq);
dl.create(kq);
}
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment