Skip to content

Instantly share code, notes, and snippets.

@saasindustries
Created February 23, 2021 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saasindustries/ac2ddf9f26911f300808042a6971cf95 to your computer and use it in GitHub Desktop.
Save saasindustries/ac2ddf9f26911f300808042a6971cf95 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using HtmlAgilityPack;
using ScrapySharp.Extensions;
using ScrapySharp.Network;
using System.IO;
using System.Globalization;
using CsvHelper;
namespace ScrapySharp_scraper
{
class Program
{
static ScrapingBrowser _scrapingbrowser = new ScrapingBrowser();
static void Main(string[] args)
{
Console.WriteLine("Please enter the Keyword :");
var Keyword = Console.ReadLine();
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?");
var lstAdDetails = GetAdDetails(adLinks, Keyword);
exportAdsToCsv(lstAdDetails, Keyword);
}
static List<string> GetAdLinks(string url){
var mainPageAdLinks = new List<string>();
var html = GetHtml(url);
var links = html.CssSelect("a");
foreach (var link in links){
if(link.Attributes["href"].Value.Contains(".html")){
mainPageAdLinks.Add(link.Attributes["href"].Value);
}
}
return mainPageAdLinks;
}
static List<AdDetails> GetAdDetails(List<string> urls, string Keyword){
var lstAdDetails = new List<AdDetails>();
foreach (var url in urls){
var htmlNode = GetHtml(url);
var AdDetails = new AdDetails();
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText;
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText;
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", "");
AdDetails.AdUrl = url;
var KeywordInTitle = AdDetails.AdTitle.ToLower().Contains(Keyword.ToLower());
var KeywordInDescription = AdDetails.AdDescription.ToLower().Contains(Keyword.ToLower());
if(KeywordInTitle || KeywordInDescription){
lstAdDetails.Add(AdDetails);
}
}
return lstAdDetails;
}
static void exportAdsToCsv(List<AdDetails> lstAdDetails, string Keyword){
using(var writer = new StreamWriter($@"/Users/guest/Desktop/ScrapySharp_scraper/CSVs/{Keyword}_{DateTime.Now.ToFileTime()}.csv"))
using(var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)){
csv.WriteRecords(lstAdDetails);
}
}
static HtmlNode GetHtml(string url){
WebPage webPage = _scrapingbrowser.NavigateToPage(new Uri(url));
return webPage.Html;
}
}
public class AdDetails{
public string AdTitle { get; set; }
public string AdDescription { get; set; }
public string AdUrl { get; set; }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment