Last active
June 8, 2017 04:10
-
-
Save huybui02/68cc0967cab1427b95900569c16726c8 to your computer and use it in GitHub Desktop.
getLazada
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var express = require("express"); | |
var app = express(); | |
var request = require("request"); | |
var request1 = require("request"); | |
var cheerio = require("cheerio"); | |
var fs = require("fs"); | |
var rp = require("request-promise"); | |
var Promise = require('promise'); | |
// app.use(express.static("public")); | |
// app.set("view engine","ejs"); | |
// app.set("views","./views"); | |
// app.listen(3000); | |
var mongoose = require('mongoose'); | |
mongoose.connect('mongodb://localhost/lazada'); | |
var dbMongo = mongoose.connection; | |
dbMongo.on('err',console.error.bind(console,'Loi ket noi!')); | |
dbMongo.once('open',function(){ | |
console.log("Ket noi thanh cong"); | |
}); | |
var Schema = new mongoose.Schema({ | |
id: String, | |
type : String, | |
link : String, | |
img : String, | |
name : String, | |
price : String, | |
comment: String, | |
desc : String | |
}); | |
var dl = mongoose.model('data',Schema); | |
var link_1 = "http://www.lazada.vn/dien-thoai-di-dong/?spm=a2o4n.category-010100000000.0.0.LNbOng&itemperpage=120&page="; | |
var link_2 = "http://www.lazada.vn/laptop/?spm=a2o4n.category-020100000000.0.0.59I7Wc&itemperpage=120&page="; | |
var link_3 = "http://www.lazada.vn/may-tinh-bang/?spm=a2o4n.category-010200000000.0.0.JNr58x&itemperpage=120&page="; | |
var link_4 = "http://www.lazada.vn/phu-kien-dien-thoai-may-tinh-bang/?spm=a2o4n.category-011100000000.0.0.bT0iuT&itemperpage=120&page="; | |
var link_5 = "http://www.lazada.vn/phu-kien-may-vi-tinh/?spm=a2o4n.category-020300000000.0.0.UrqBPl&itemperpage=120&page="; | |
var link_6 = "http://www.lazada.vn/may-anh-may-quay-phim/?spm=a2o4n.category-040000000000.0.0.fMn5AI&itemperpage=120&page="; | |
var link_7 = "http://www.lazada.vn/tivi/?spm=a2o4n.category-030500000000.0.0.1zH25l&itemperpage=120&page="; | |
var link_8 = "http://www.lazada.vn/thiet-bi-am-thanh-di-dong/?spm=a2o4n.category-030100000000.0.0.oGyCy5&itemperpage=120&page="; | |
var link_9 = "http://www.lazada.vn/thiet-bi-choi-game/?spm=a2o4n.category-030200000000.0.0.NsOTRx&itemperpage=120&page="; | |
var link_10 = "http://www.lazada.vn/thiet-bi-so/?spm=a2o4n.category-030300000000.0.0.Pl2afU&itemperpage=120&page="; | |
var link_11 = "http://www.lazada.vn/video/?spm=a2o4n.category-030400000000.0.0.zz9ebX&itemperpage=120&page="; | |
var link_12 = "http://www.lazada.vn/thiet-bi-deo-cong-nghe/?spm=a2o4n.category-031000000000.0.0.kqvokv&itemperpage=120&page="; | |
var link_13 = "http://www.lazada.vn/phu-kien-cho-tv/?spm=a2o4n.category-031100000000.0.0.R6mCy5&itemperpage=120&page="; | |
var link_14 = "http://www.lazada.vn/thiet-bi-dieu-khien-qua-app/?spm=a2o4n.category-031200000000.0.0.e0W9LL&itemperpage=120&page="; | |
var link_15 = "http://www.lazada.vn/thoi-trang-nu/?spm=a2o4n.category-090100000000.0.0.ximZPE&itemperpage=120&page="; | |
var link_16 = "http://www.lazada.vn/thoi-trang-nam/?spm=a2o4n.category-090200000000.0.0.VZl5P2&itemperpage=120&page="; | |
var link_17 = "http://www.lazada.vn/thoi-trang-danh-cho-be-gai/?spm=a2o4n.category-091600000000.0.0.iGmz3U&itemperpage=120&page="; | |
var link_18 = "http://www.lazada.vn/thoi-trang-cho-be-trai/?spm=a2o4n.category-091700000000.0.0.lFEC1R&itemperpage=120&page="; | |
var link_19 = "http://www.lazada.vn/dong-ho/?spm=a2o4n.category-170300000000.0.0.sRBoor&itemperpage=120&page="; | |
var link_20 = "http://www.lazada.vn/mat-kinh/?spm=a2o4n.category-170100000000.0.0.IRGihX&itemperpage=120&page="; | |
var link_21 = "http://www.lazada.vn/do-trang-suc/?spm=a2o4n.category-170200000000.0.0.8eu4d7&itemperpage=120&page="; | |
var link_22 = "http://www.lazada.vn/do-gia-dung-nha-bep/?spm=a2o4n.category-050600000000.0.0.Ya1Yeo&itemperpage=120&page="; | |
var link_23 ="http://www.lazada.vn/do-gia-dung-lon/?spm=a2o4n.category-050400000000.0.0.8JiYr1&itemperpage=120&page="; | |
var link_24 ="http://www.lazada.vn/quat-may-nong-lanh/?spm=a2o4n.category-050100000000.0.0.pFRycU&itemperpage=120&page="; | |
var link_25 ="http://www.lazada.vn/thiet-bi-cham-soc-quan-ao/?spm=a2o4n.category-050200000000.0.0.SAnv93&itemperpage=120&page="; | |
var link_26 ="http://www.lazada.vn/dung-cu-thiet-bi-gia-dinh-moi/?spm=a2o4n.category-050300000000.0.0.toFrkc&itemperpage=120&page="; | |
var link_27 ="http://www.lazada.vn/thiet-bi-do-gia-dung/?spm=a2o4n.category-051000000000.0.0.x0yRII&itemperpage=120&page="; | |
var link_28 ="http://www.lazada.vn/van-phong-pham-gia-dinh/?spm=a2o4n.category-060900000000.0.0.dLZRka&itemperpage=120&page="; | |
var link_29 ="http://www.lazada.vn/do-dung-phu-kien-phong-tam/?spm=a2o4n.category-060100000000.0.0.HKUrzS&itemperpage=120&page="; | |
var link_30 ="http://www.lazada.vn/do-dung-bep-phong-an/?spm=a2o4n.category-060600000000.0.0.wyvNRB&itemperpage=120&page="; | |
var link_31 ="http://www.lazada.vn/do-dung-phong-ngu-gia-dinh/?spm=a2o4n.category-060200000000.0.0.dvwb1O&itemperpage=120&page="; | |
var link_32 = "http://www.lazada.vn/san-pham-noi-that/?spm=a2o4n.category-060300000000.0.0.oRcBEY&itemperpage=120&page="; | |
var link_33 = "http://www.lazada.vn/tan-trang-nha-cua/?spm=a2o4n.category-060500000000.0.0.MyaeCZ&itemperpage=120&page="; | |
var link_34 = "http://www.lazada.vn/tu-dung-va-sap-xep-do/?spm=a2o4n.category-061000000000.0.0.TLOl5L&itemperpage=120&page="; | |
var link_35 = "http://www.lazada.vn/san-pham-trang-tri-nha-cua/?spm=a2o4n.category-060400000000.0.0.a8UWnH&itemperpage=120&page="; | |
var link_36 = "http://www.lazada.vn/do-dung-ngoai-troi-san-vuon/?spm=a2o4n.category-060700000000.0.0.lWdBmz&itemperpage=120&page="; | |
var link_37 = "http://www.lazada.vn/cac-loai-den/?spm=a2o4n.category-061100000000.0.0.D3l2nU&itemperpage=120&page="; | |
var link_38 = "http://www.lazada.vn/trang-diem/?spm=a2o4n.category-080800000000.0.0.xbsmLt&itemperpage=120&page="; | |
var link_39 = "http://www.lazada.vn/cham-soc-da-mat/?spm=a2o4n.category-080200000000.0.0.3G38au&itemperpage=120&page="; | |
var link_40 = "http://www.lazada.vn/san-pham-tam-cham-soc-co-the/?spm=a2o4n.category-080100000000.0.0.eK0INt&itemperpage=120&page="; | |
var link_41 = "http://www.lazada.vn/san-pham-cham-soc-toc/?spm=a2o4n.category-080600000000.0.0.6WSxHh&itemperpage=120&page="; | |
var link_42 = "http://www.lazada.vn/nuoc-hoa/?spm=a2o4n.category-080400000000.0.0.QvQhG3&itemperpage=120&page="; | |
var link_43 = "http://www.lazada.vn/thuc-pham-bo-sung/?spm=a2o4n.category-080300000000.0.0.lGDzhV&itemperpage=120&page="; | |
var link_44 = "http://www.lazada.vn/dung-cu-cham-soc-sac-dep/?spm=a2o4n.category-080700000000.0.0.wQEk3N&itemperpage=120&page="; | |
var link_45 = "http://www.lazada.vn/cham-soc-cho-nam-gioi/?spm=a2o4n.category-080900000000.0.0.HDiwGd&itemperpage=120&page="; | |
var link_46 = "http://www.lazada.vn/cham-soc-ca-nhan/?spm=a2o4n.category-081000000000.0.0.iKtKQ2&itemperpage=120&page="; | |
var link_47 = "http://www.lazada.vn/thiet-bi-y-te/?spm=a2o4n.category-081300000000.0.0.NksBR3&itemperpage=120&page="; | |
var link_48 = "http://www.lazada.vn/ho-tro-tinh-duc/?spm=a2o4n.category-081700000000.0.0.RxkAPx&itemperpage=120&page="; | |
var link_49 = "http://www.lazada.vn/do-dung-bu-sua-an-dam/?spm=a2o4n.category-180100000000.0.0.ux2I2H&itemperpage=120&page="; | |
var link_50 = "http://www.lazada.vn/quan-ao-phu-kien-cho-be/?spm=a2o4n.category-181100000000.0.0.jXSDql&itemperpage=120&page="; | |
var link_51 = "http://www.lazada.vn/cham-soc-ba-me-mang-thai/?spm=a2o4n.category-181000000000.0.0.slOZmM&itemperpage=120&page="; | |
var link_52 = "http://www.lazada.vn/qua-tang/?spm=a2o4n.category-180900000000.0.0.Sfh1u2&itemperpage=120&page="; | |
var link_53 = "http://www.lazada.vn/xe-ghe-em-be/?spm=a2o4n.category-180600000000.0.0.3vp25Q&itemperpage=120&page="; | |
var link_54 = "http://www.lazada.vn/suc-khoe-an-toan/?spm=a2o4n.category-180500000000.0.0.UwIrlN&itemperpage=120&page="; | |
var link_55 = "http://www.lazada.vn/cham-soc-tre-so-sinh-tre-nho/?spm=a2o4n.category-180400000000.0.0.OEEoaU&itemperpage=120&page="; | |
var link_56 = "http://www.lazada.vn/ta-dung-cu-ve-sinh/?spm=a2o4n.category-180300000000.0.0.OX1SUt&itemperpage=120&page="; | |
var link_57 = "http://www.lazada.vn/ti-ngam-phu-kien/?spm=a2o4n.category-180200000000.0.0.PvSIxp&itemperpage=120&page="; | |
var link_58 = "http://www.lazada.vn/do-choi-tro-choi/?spm=a2o4n.category-190000000000.0.0.FYlW2C&itemperpage=120&page="; | |
var link_59 = "http://www.lazada.vn/tre-so-sinh-tre-nho/?spm=a2o4n.category-180000000000.0.0.Et7TrO&itemperpage=120&page="; | |
var link_60 = "http://www.lazada.vn/the-thao-da-ngoai/?spm=a2o4n.category-110000000000.0.0.KvFtZd&itemperpage=120&page="; | |
var link_61 = "http://www.lazada.vn/vali-ba-lo-tui-du-lich/?spm=a2o4n.category-100000000000.0.0.HiY8RX&itemperpage=120&page="; | |
var link_62 = "http://www.lazada.vn/thiet-bi-phu-kien-o-to-xe-may/?spm=a2o4n.category-120100000000.0.0.ijgBnS&itemperpage=120&page="; | |
var link_63 = "http://www.lazada.vn/bach-hoa-online/?spm=a2o4n.category-140000000000.0.0.rgx6xR&itemperpage=120&page="; | |
var link_64 = "http://www.lazada.vn/sach/?spm=a2o4n.category-130100000000.0.0.6p22Xb&itemperpage=120&page="; | |
var link_65 = "http://www.lazada.vn/nhac-cu-moi/?spm=a2o4n.category-130700000000.0.0.FStpPg&itemperpage=120&page="; | |
var link_66 = "http://www.lazada.vn/cham-soc-thu-cung/?spm=a2o4n.category-200000000000.0.0.okp1IU&itemperpage=120&page="; | |
var link_67 = "http://www.lazada.vn/khuyen-mai-dich-vu/?spm=a2o4n.category-160000000000.0.0.xNIJHH&itemperpage=120&page="; | |
function getPager(url, callback){ | |
new Promise(function (resolve, reject) { | |
request(url, function (error, res, body) { | |
if (error) { | |
return reject(error); | |
} else { | |
var $ = cheerio.load(body); | |
var ks = $('.c-paging__link').last().text(); | |
return resolve(parseInt(ks)); | |
} | |
}); | |
}).then(function (pager) { | |
callback(null, pager); | |
}).catch(function (err) { | |
callback(err, null); | |
}); | |
} | |
//push link vao mang | |
var manglink = []; | |
manglink.push(link_1); | |
manglink.push(link_2); | |
manglink.push(link_3); | |
manglink.push(link_4); | |
manglink.push(link_5); | |
manglink.push(link_6); | |
manglink.push(link_7); | |
manglink.push(link_8); | |
manglink.push(link_9); | |
manglink.push(link_10); | |
manglink.push(link_11); | |
manglink.push(link_12); | |
manglink.push(link_13); | |
manglink.push(link_14); | |
manglink.push(link_15); | |
manglink.push(link_16); | |
manglink.push(link_17); | |
manglink.push(link_18); | |
manglink.push(link_19); | |
manglink.push(link_20); | |
manglink.push(link_21); | |
manglink.push(link_22); | |
manglink.push(link_23); | |
manglink.push(link_24); | |
manglink.push(link_25); | |
manglink.push(link_26); | |
manglink.push(link_27); | |
manglink.push(link_28); | |
manglink.push(link_29); | |
manglink.push(link_30); | |
manglink.push(link_31); | |
manglink.push(link_32); | |
manglink.push(link_33); | |
manglink.push(link_34); | |
manglink.push(link_35); | |
manglink.push(link_36); | |
manglink.push(link_37); | |
manglink.push(link_38); | |
manglink.push(link_39); | |
manglink.push(link_40); | |
manglink.push(link_41); | |
manglink.push(link_42); | |
manglink.push(link_43); | |
manglink.push(link_44); | |
manglink.push(link_45); | |
manglink.push(link_46); | |
manglink.push(link_47); | |
manglink.push(link_48); | |
manglink.push(link_49); | |
manglink.push(link_50); | |
manglink.push(link_51); | |
manglink.push(link_52); | |
manglink.push(link_53); | |
manglink.push(link_54); | |
manglink.push(link_55); | |
manglink.push(link_56); | |
manglink.push(link_57); | |
manglink.push(link_58); | |
manglink.push(link_59); | |
manglink.push(link_60); | |
manglink.push(link_61); | |
manglink.push(link_62); | |
manglink.push(link_63); | |
manglink.push(link_64); | |
manglink.push(link_65); | |
manglink.push(link_66); | |
manglink.push(link_67); | |
// console.log(manglink[0]+"1"); | |
//lay du lieu tu trang dt lazada 66 | |
manglink.forEach(function(index,cb){ | |
getPager(index+"1", function(err, result){ | |
if(err==null){ | |
for(let i=1;i<=result; i++){ | |
layData(index+i); | |
// console.log("i = "+i); | |
// console.log("So trang = "+result); | |
// console.log("index + i ="+(index+i)); | |
} | |
cb(); | |
} | |
}); | |
}); | |
var cb = function(){ | |
console.log("------------------------Da ghi xong du lieu--------------------------------"); | |
} | |
function layData(u){ | |
request(u, | |
function(error,respone,body){ | |
if(error){ | |
console.log("------"); | |
}else{ | |
var $ = cheerio.load(body); | |
var kq = []; | |
var loai = $('.c-breadcrumb__item').last().text().trim(); | |
$('div.c-product-list__item').each(function(i,e){ | |
var lienket = "http://ladaza.vn"; | |
var a = $(this); | |
// var stt =i; | |
var aa = a.children().next().next().children().next().next().children(); | |
var lket = a.find('.c-product-card__name').prop('href'); | |
var gia = a.find('.c-product-card__price-final').text().trim(); | |
var tensp = a.find('.c-product-card__name').text().trim(); | |
var urlhinh = $(this).find('div.c-product-card__img-placeholder > a > span').attr('data-js-component-params').substr(27); | |
urlhinh = urlhinh.slice(0,-9); | |
// var urlhinh = a.find('img.c-img-lazy__img').prop('src'); | |
var id = lket.split('.html')[0].split('-').pop(); | |
var mota = []; | |
var com = []; | |
$(this).find('.c-product-card__attr').each(function(){ | |
mota.push($(this).text()); | |
}); | |
var lk = lienket+lket; | |
// var j = i - 120; | |
var data = { | |
//numb : (parseInt(j)+parseInt(120)), | |
id: id, | |
type : loai, | |
link : (lienket+lket), | |
img : urlhinh, | |
name : tensp, | |
price : gia, | |
desc : mota | |
}; | |
kq.push(data); | |
}); | |
console.log(kq); | |
dl.create(kq); | |
} | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment