Skip to content

Instantly share code, notes, and snippets.

@TheDiligentDev
TheDiligentDev / Program.cs
Created January 26, 2020 17:40
C# Scraper - Initial Console
using System;
namespace Scraper {
class Program {
static void Main(string[] args) {
Console.WriteLine("Hello World!");
}
}
}
@TheDiligentDev
TheDiligentDev / launch.json
Created January 26, 2020 17:42
Launch Settings - C# Scraper
{
"version": "0.2.0",
"configurations": [
{
"name": ".NET Core Launch (console)",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
"program": "${workspaceFolder}/bin/Debug/netcoreapp2.1/CraigslistScraper.dll",
"args": [],
@TheDiligentDev
TheDiligentDev / Program.cs
Created January 26, 2020 18:26
GetHtml - C# Web Scraper
static HtmlNode GetHtml(string url) {
WebPage webpage = _browser.NavigateToPage(new Uri(url));
return webpage.Html;
}
@TheDiligentDev
TheDiligentDev / Program.cs
Last active January 26, 2020 18:29
GetMainPageLinks
static List<string> GetMainPageLinks(string url)
{
var homePageLinks = new List<string>();
var html = GetHtml(url);
var links = html.CssSelect("a");
foreach (var link in links)
{
if (link.Attributes["href"].Value.Contains(".html"))
{
@TheDiligentDev
TheDiligentDev / Program.cs
Created January 26, 2020 18:30
GetMainPageLinks - C# Scraper
static List<string> GetMainPageLinks(string url)
{
var homePageLinks = new List<string>();
var html = GetHtml(url);
var links = html.CssSelect("a");
foreach (var link in links)
{
if (link.Attributes["href"].Value.Contains(".html"))
{
static void Main(string[] args)
{
var mainPageLinks = GetMainPageLinks("https://newyork.craigslist.org/d/computer-gigs/search/cpg");
}
@TheDiligentDev
TheDiligentDev / PageDetails.cs
Created January 26, 2020 18:33
PageDetails
public class PageDetails {
public string title { get; set; }
public string description { get; set; }
public string url { get; set; }
}
@TheDiligentDev
TheDiligentDev / Program.cs
Last active January 26, 2020 18:36
GetPageDetails
static List<PageDetails> GetPageDetails(List<string> urls)
{
var lstPageDetails = new List<PageDetails>();
foreach (var url in urls)
{
var htmlNode = GetHtml(url);
var pageDetails = new PageDetails();
pageDetails.title = htmlNode.OwnerDocument.DocumentNode
.SelectSingleNode("//html/head/title").InnerText;
static void Main(string[] args) {
var mainPageLinks = GetMainPageLinks("https://newyork.craigslist.org/d/computer-gigs/search/cpg");
var lstGigs = GetPageDetails(mainPageLinks);
}
static void Main(string[] args) {
Console.WriteLine("Please enter a search term:")
var searchTerm = Console.ReadLine();
var mainPageLinks = GetMainPageLinks("https://newyork.craigslist.org/d/computer-gigs/search/cpg");
var lstGigs = GetPageDetails(mainPageLinks, searchTerm);
}
static List < PageDetails > GetPageDetails(List < string > urls, string searchTerm) {
var lstPageDetails = new List < PageDetails > ();
foreach(var url in urls) {