Last active
August 15, 2019 09:50
-
-
Save SeriaWei/cabcf8e1f9d26c9a3f5e4a7e46611a7e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using HtmlAgilityPack; | |
using Newtonsoft.Json.Linq; | |
using System; | |
using System.Collections.Generic; | |
using System.Dynamic; | |
using System.IO; | |
using System.Linq.Expressions; | |
using System.Net; | |
using System.Net.Http; | |
using System.Reflection; | |
using System.Text; | |
using System.Threading.Tasks; | |
namespace ConsoleApp | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
//Crawler crawler = new Crawler(); | |
//crawler.Host = "http://localhost:8000"; | |
//crawler.CrawlPage("/"); | |
string host = "http://www.nbmyfamily.com"; | |
var web = new HtmlWeb(); | |
var doc = web.Load($"{host}/products_detail/productId=24.html"); | |
JObject jobj = new JObject(); | |
string title = doc.DocumentNode.SelectSingleNode("/html/head/title").InnerText; | |
jobj.Add("Title", new JValue(title)); | |
var brand = doc.DocumentNode.SelectSingleNode("//li[@class=\"brandlogo\"]/img"); | |
string thumbnail = brand.GetAttributeValue("src", string.Empty); | |
jobj.Add("ImageUrl", new JValue(thumbnail)); | |
DownloadFile(host, thumbnail); | |
HttpClient httpClient = new HttpClient(); | |
string productImageJson = httpClient.PostAsync($"{host}/products_detail/comp-FrontProductsItem_imagesBySpecJson01-001", new FormUrlEncodedContent(new[] | |
{ | |
new KeyValuePair<string, string>("productId", "24"), | |
})).Result.Content.ReadAsStringAsync().Result; | |
JArray array = JArray.Parse(productImageJson); | |
JArray imagesArray = new JArray(); | |
foreach (var item in array) | |
{ | |
JObject imgItem = new JObject(); | |
DownloadFile(host, (item as JValue).ToString()); | |
imgItem.Add("ImageUrl", new JValue((item as JValue).ToString())); | |
imgItem.Add("Status", new JValue(1)); | |
imgItem.Add("ActionType", new JValue(1)); | |
imagesArray.Add(imgItem); | |
} | |
jobj.Add("ProductImages", imagesArray); | |
string description = doc.DocumentNode.SelectSingleNode("//div[@class=\"detail\"]/div[1]").InnerText.Trim(); | |
jobj.Add("ProductContent", new JValue(description)); | |
var gallery = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[2]//img"); | |
if (gallery != null) | |
{ | |
StringBuilder galleryBuilder = new StringBuilder(); | |
foreach (var item in gallery) | |
{ | |
string imgUrl = item.GetAttributeValue("src", string.Empty); | |
DownloadFile(host, imgUrl); | |
galleryBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>"); | |
} | |
jobj.Add("Gallery", new JValue(galleryBuilder.ToString())); | |
} | |
var zoom = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[3]//img"); | |
if (zoom != null) | |
{ | |
StringBuilder zoomBuilder = new StringBuilder(); | |
foreach (var item in zoom) | |
{ | |
string imgUrl = item.GetAttributeValue("src", string.Empty); | |
DownloadFile(host, imgUrl); | |
zoomBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>"); | |
} | |
jobj.Add("Zoom", new JValue(zoomBuilder.ToString())); | |
} | |
var doors = doc.DocumentNode.SelectSingleNode("//div[@class=\"detail\"]/div[4]/div").InnerHtml; | |
jobj.Add("Doors", new JValue(doors)); | |
foreach (var item in doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[4]//img")) | |
{ | |
DownloadFile(host, item.GetAttributeValue("src", string.Empty)); | |
} | |
foreach (var item in doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[4]//area")) | |
{ | |
DownloadFile(host, item.GetAttributeValue("href", string.Empty)); | |
} | |
var handles = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[5]//img"); | |
if (handles != null) | |
{ | |
StringBuilder handlesBuilder = new StringBuilder(); | |
foreach (var item in handles) | |
{ | |
string imgUrl = item.GetAttributeValue("src", string.Empty); | |
DownloadFile(host, imgUrl); | |
handlesBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>"); | |
} | |
jobj.Add("Handles", new JValue(handlesBuilder.ToString())); | |
} | |
var colour = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[6]//img"); | |
if (colour != null) | |
{ | |
StringBuilder colourBuilder = new StringBuilder(); | |
foreach (var item in colour) | |
{ | |
string imgUrl = item.GetAttributeValue("src", string.Empty); | |
DownloadFile(host, imgUrl); | |
colourBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>"); | |
} | |
jobj.Add("Colour", new JValue(colourBuilder.ToString())); | |
} | |
var tableAndChair = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[7]//img"); | |
if (tableAndChair != null) | |
{ | |
StringBuilder tableBuilder = new StringBuilder(); | |
foreach (var item in tableAndChair) | |
{ | |
string imgUrl = item.GetAttributeValue("src", string.Empty); | |
DownloadFile(host, imgUrl); | |
tableBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>"); | |
} | |
jobj.Add("TableAndChair", new JValue(tableBuilder.ToString())); | |
} | |
while (true) | |
{ | |
Console.ReadKey(); | |
} | |
} | |
static void DownloadFile(string host, string path) | |
{ | |
Console.WriteLine("Downloading: {0}", path); | |
WebClient webClient = new WebClient(); | |
string filePath = Directory.GetCurrentDirectory() + path.Replace("/", "\\"); | |
CreateFileDir(filePath); | |
webClient.DownloadFile($"{host}{path}", filePath); | |
webClient.Dispose(); | |
} | |
static void CreateFileDir(string filePath) | |
{ | |
var dir = new FileInfo(filePath).Directory; | |
if (!dir.Exists) | |
{ | |
dir.Create(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment