Skip to content

Instantly share code, notes, and snippets.

@SeriaWei
Last active August 15, 2019 09:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SeriaWei/cabcf8e1f9d26c9a3f5e4a7e46611a7e to your computer and use it in GitHub Desktop.
Save SeriaWei/cabcf8e1f9d26c9a3f5e4a7e46611a7e to your computer and use it in GitHub Desktop.
using HtmlAgilityPack;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Dynamic;
using System.IO;
using System.Linq.Expressions;
using System.Net;
using System.Net.Http;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp
{
class Program
{
static void Main(string[] args)
{
//Crawler crawler = new Crawler();
//crawler.Host = "http://localhost:8000";
//crawler.CrawlPage("/");
string host = "http://www.nbmyfamily.com";
var web = new HtmlWeb();
var doc = web.Load($"{host}/products_detail/productId=24.html");
JObject jobj = new JObject();
string title = doc.DocumentNode.SelectSingleNode("/html/head/title").InnerText;
jobj.Add("Title", new JValue(title));
var brand = doc.DocumentNode.SelectSingleNode("//li[@class=\"brandlogo\"]/img");
string thumbnail = brand.GetAttributeValue("src", string.Empty);
jobj.Add("ImageUrl", new JValue(thumbnail));
DownloadFile(host, thumbnail);
HttpClient httpClient = new HttpClient();
string productImageJson = httpClient.PostAsync($"{host}/products_detail/comp-FrontProductsItem_imagesBySpecJson01-001", new FormUrlEncodedContent(new[]
{
new KeyValuePair<string, string>("productId", "24"),
})).Result.Content.ReadAsStringAsync().Result;
JArray array = JArray.Parse(productImageJson);
JArray imagesArray = new JArray();
foreach (var item in array)
{
JObject imgItem = new JObject();
DownloadFile(host, (item as JValue).ToString());
imgItem.Add("ImageUrl", new JValue((item as JValue).ToString()));
imgItem.Add("Status", new JValue(1));
imgItem.Add("ActionType", new JValue(1));
imagesArray.Add(imgItem);
}
jobj.Add("ProductImages", imagesArray);
string description = doc.DocumentNode.SelectSingleNode("//div[@class=\"detail\"]/div[1]").InnerText.Trim();
jobj.Add("ProductContent", new JValue(description));
var gallery = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[2]//img");
if (gallery != null)
{
StringBuilder galleryBuilder = new StringBuilder();
foreach (var item in gallery)
{
string imgUrl = item.GetAttributeValue("src", string.Empty);
DownloadFile(host, imgUrl);
galleryBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>");
}
jobj.Add("Gallery", new JValue(galleryBuilder.ToString()));
}
var zoom = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[3]//img");
if (zoom != null)
{
StringBuilder zoomBuilder = new StringBuilder();
foreach (var item in zoom)
{
string imgUrl = item.GetAttributeValue("src", string.Empty);
DownloadFile(host, imgUrl);
zoomBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>");
}
jobj.Add("Zoom", new JValue(zoomBuilder.ToString()));
}
var doors = doc.DocumentNode.SelectSingleNode("//div[@class=\"detail\"]/div[4]/div").InnerHtml;
jobj.Add("Doors", new JValue(doors));
foreach (var item in doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[4]//img"))
{
DownloadFile(host, item.GetAttributeValue("src", string.Empty));
}
foreach (var item in doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[4]//area"))
{
DownloadFile(host, item.GetAttributeValue("href", string.Empty));
}
var handles = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[5]//img");
if (handles != null)
{
StringBuilder handlesBuilder = new StringBuilder();
foreach (var item in handles)
{
string imgUrl = item.GetAttributeValue("src", string.Empty);
DownloadFile(host, imgUrl);
handlesBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>");
}
jobj.Add("Handles", new JValue(handlesBuilder.ToString()));
}
var colour = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[6]//img");
if (colour != null)
{
StringBuilder colourBuilder = new StringBuilder();
foreach (var item in colour)
{
string imgUrl = item.GetAttributeValue("src", string.Empty);
DownloadFile(host, imgUrl);
colourBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>");
}
jobj.Add("Colour", new JValue(colourBuilder.ToString()));
}
var tableAndChair = doc.DocumentNode.SelectNodes("//div[@class=\"detail\"]/div[7]//img");
if (tableAndChair != null)
{
StringBuilder tableBuilder = new StringBuilder();
foreach (var item in tableAndChair)
{
string imgUrl = item.GetAttributeValue("src", string.Empty);
DownloadFile(host, imgUrl);
tableBuilder.AppendLine($"<p><img src=\"{imgUrl}\" /></p>");
}
jobj.Add("TableAndChair", new JValue(tableBuilder.ToString()));
}
while (true)
{
Console.ReadKey();
}
}
static void DownloadFile(string host, string path)
{
Console.WriteLine("Downloading: {0}", path);
WebClient webClient = new WebClient();
string filePath = Directory.GetCurrentDirectory() + path.Replace("/", "\\");
CreateFileDir(filePath);
webClient.DownloadFile($"{host}{path}", filePath);
webClient.Dispose();
}
static void CreateFileDir(string filePath)
{
var dir = new FileInfo(filePath).Directory;
if (!dir.Exists)
{
dir.Create();
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment