View csharp_tutorial_full_snippet.cs
using System; | |
using System.Collections.Generic; | |
using HtmlAgilityPack; | |
using ScrapySharp.Extensions; | |
using ScrapySharp.Network; | |
using System.IO; | |
using System.Globalization; | |
using CsvHelper; | |
namespace ScrapySharp_scraper |
View chsarp_tutorial_8.cs
static void Main(string[] args) | |
{ | |
Console.WriteLine("Please enter the Keyword :"); | |
var Keyword = Console.ReadLine(); | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
ExportAdsToCsv(lstAdDetails, Keyword); | |
} |
View csharp_tutorial_7.cs
static void ExportAdsToCsv(List<AdDetails> lstAdDetails, string Keyword){ | |
using(var writer = new StreamWriter($@"/Users/guest/Desktop/ScrapySharp_scraper/CSVs/{Keyword}_{DateTime.Now.ToFileTime()}.csv")) | |
using(var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)){ | |
csv.WriteRecords(lstAdDetails); | |
} | |
} |
View csharp_tutorial_6.cs
static void Main(string[] args) | |
{ | |
Console.WriteLine("Please enter the Keyword :"); | |
var Keyword = Console.ReadLine(); | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
} |
View csharp_tutorial_5.cs
static List<AdDetails> GetAdDetails(List<string> urls, string Keyword){ | |
var lstAdDetails = new List<AdDetails>(); | |
foreach (var url in urls){ | |
var htmlNode = GetHtml(url); | |
var AdDetails = new AdDetails(); | |
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); |
View csharp_tutorial_4.cs
static void Main(string[] args) | |
{ | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks); | |
} |
View csharp_tutorial_3.cs
static List<AdDetails> GetAdDetails(List<string> urls){ | |
var lstAdDetails = new List<AdDetails>(); | |
foreach (var url in urls){ | |
var htmlNode = GetHtml(url); | |
var AdDetails = new AdDetails(); | |
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); |
View csharp_tutorial_2.cs
public class AdDetails{ | |
public string AdTitle { get; set; } | |
public string AdDescription { get; set; } | |
public string AdUrl { get; set; } | |
} |
View csharp_tutorial_1.cs
static List<string> GetAdLinks(string url){ | |
var mainPageAdLinks = new List<string>(); | |
var html = GetHtml(url); | |
var links = html.CssSelect("a"); | |
foreach (var link in links){ | |
if(link.Attributes["href"].Value.Contains(".html")){ | |
mainPageAdLinks.Add(link.Attributes["href"].Value); | |
} | |
} |
View r_web_scraping_1.R
library(rvest) | |
library(dplyr) | |
get_cast = function(movie_link) { | |
movie_page = read_html(movie_link) | |
movie_cast = movie_page %>% html_nodes(".primary_photo+ td a") %>% html_text() %>% paste(collapse = ",") | |
return(movie_cast) | |
} | |
movie_list = data.frame() |
NewerOlder