Skip to content

Instantly share code, notes, and snippets.

@ptupitsyn
Last active September 28, 2023 10:58
Show Gist options
  • Save ptupitsyn/187bcb657047e98d10a9ec00dbb3b54e to your computer and use it in GitHub Desktop.
Save ptupitsyn/187bcb657047e98d10a9ec00dbb3b54e to your computer and use it in GitHub Desktop.
Bamboo directory parser
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net7.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<None Update="bamboo-directory.html">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<PackageReference Include="CsvHelper" Version="30.0.1" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.53" />
</ItemGroup>
</Project>
using System.Globalization;
using CsvHelper;
using HtmlAgilityPack;
const string file = "bamboo-directory.html";
var html = File.ReadAllText(file);
var doc = new HtmlDocument();
doc.LoadHtml(html);
var employeeCards = SelectDivByClass(doc.DocumentNode, "EmployeeCardContainer__infoContainer").ToList();
var employees = employeeCards.Select(ParseEmployeeCard).ToList();
employees.ForEach(Console.WriteLine);
// Save to CSV
using (var writer = new StreamWriter("bamboo-directory.csv"))
using (var csv = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
csv.WriteRecords(employees);
}
Console.WriteLine($"CSV saved: {employees.Count} employees");
static Employee ParseEmployeeCard(HtmlNode employeeCard)
{
var name = SelectDivTextByClass(employeeCard, "JobInfo__name").Single();
var jobInfo = SelectDivTextByClass(employeeCard, "JobInfo__text").ToList();
var reports = SelectDivTextByClass(employeeCard, "ReportsTo__item").ToList();
var contactInfo = SelectDivTextByClass(employeeCard, "ContactInfo__text").ToList();
return new Employee(
Name: name,
Title: jobInfo[0].Replace("&amp;", "&"),
Email: contactInfo[0],
Phone: contactInfo.FirstOrDefault(x => x.StartsWith("+")) ?? string.Empty,
SupervisorName: reports.First().Replace("Reports to ", string.Empty),
Location: jobInfo[1].Split('|', StringSplitOptions.TrimEntries)[0],
Team: jobInfo[2].Replace("&amp;", "&"));
}
static IEnumerable<string> SelectDivTextByClass(HtmlNode node, string cls) =>
SelectDivByClass(node, cls).Select(x => x.InnerText);
static HtmlNodeCollection SelectDivByClass(HtmlNode node, string cls) =>
node.SelectNodes($".//div[contains(@class, '{cls}')]");
public record Employee(
string Name,
string Title,
string Email,
string Phone,
string SupervisorName,
string Location,
string Team);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment