Skip to content

Instantly share code, notes, and snippets.

@damieng
Created February 3, 2023 15:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save damieng/762c3b34d218aa68815c42d0128adf49 to your computer and use it in GitHub Desktop.
Save damieng/762c3b34d218aa68815c42d0128adf49 to your computer and use it in GitHub Desktop.
Basic scraping example in C#
using HtmlAgilityPack;
namespace ScapeSports;
internal class Program
{
static void Main(string[] args)
{
var start = new Uri("https://www.sportsengine.com/search/listings?code=98052");
var done = new HashSet<Uri>();
var queue = new Queue<Uri>();
var web = new HtmlWeb();
queue.Enqueue(start);
while (queue.Count > 0)
{
var url = queue.Dequeue();
done.Add(url);
// Console.WriteLine("Parsing " + url);
var doc = web.Load(url);
var clubs = doc.DocumentNode.SelectNodes("//div[@class='pl-listing']");
foreach (var club in clubs)
{
var name = club.SelectSingleNode(".//*[@class='pl-listing__title']")?.InnerText;
var subtitle = club.SelectSingleNode(".//*[@class='pl-listing__subtitle']")?.InnerText;
Console.WriteLine(name + "\n\t" + subtitle);
}
// Find next page
var pagination = doc.DocumentNode.SelectNodes("//a['pl-pagination__button']");
foreach (var paginator in pagination)
{
if (paginator.InnerText.Contains("Next"))
{
var next = new UriBuilder(url)
{
Query = paginator.Attributes["href"].Value
};
var nextUri = next.Uri;
if (!done.Contains(nextUri) && !queue.Contains(nextUri) && !next.Query.Contains("map"))
queue.Enqueue(nextUri);
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment