Skip to content

Instantly share code, notes, and snippets.

@pitermarx
Last active March 23, 2023 12:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pitermarx/d11e02d627b50e3f35f2463a15fcc5a5 to your computer and use it in GitHub Desktop.
Save pitermarx/d11e02d627b50e3f35f2463a15fcc5a5 to your computer and use it in GitHub Desktop.
Enred Scraper with Playwright
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
<PackageReference Include="Microsoft.Playwright.NUnit" Version="1.31.1" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
<PackageReference Include="NUnit.Analyzers" Version="3.3.0" />
<PackageReference Include="coverlet.collector" Version="3.1.2" />
</ItemGroup>
</Project>
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Microsoft.Playwright;
using Microsoft.Playwright.NUnit;
using NUnit.Framework;
namespace PlaywrightTests;
[Parallelizable(ParallelScope.Self)]
[TestFixture]
public class Tests : PageTest
{
const string filePath = "<<filePath>>";
[Test]
public async Task ScrapeEnredData()
{
await Page.GotoAsync("https://www.myedenred.pt/");
// Login
await Page.GetByPlaceholder("Inserir Email").FillAsync("<<userName>>");
await Page.GetByPlaceholder("Inserir Palavra-passe").FillAsync("<<password>>");
await Page.Locator("#login").ClickAsync();
// wait for load
var name = Page.Locator(".holder-name");
await Expect(name).ToHaveTextAsync("<<your name>>");
// scrape
var rows = await Page.QuerySelectorAllAsync(".table-body .row");
List<string> lines = new();
foreach (var row in rows)
{
var date = await row.QuerySelectorAsync("div:nth-child(1)")
.Select(q => q?.TextContentAsync())
.Select(t => t?.Trim().Substring(0, 10));
var txt = await row.QuerySelectorAsync("div:nth-child(2)")
.Select(q => q?.TextContentAsync())
.Select(t => t?.Trim());
var value = await row.QuerySelectorAsync("div:nth-child(3)")
.Select(q => q?.TextContentAsync())
.Select(t => t?.Replace(",", ".").Replace("€", "").Trim());
var balance = await row.QuerySelectorAsync("div:nth-child(4)")
.Select(q => q?.TextContentAsync())
.Select(t => t?.Replace(",", ".").Replace("€", "").Trim());
lines.Add($"{date};{txt};{value};{balance}");
}
var existingLines = await File.ReadAllLinesAsync(filePath);
await File.AppendAllLinesAsync(filePath, lines.SkipWhile(existingLines.Contains));
}
}
public static class TaskMonad
{
public static async Task<TOut?> Select<TIn, TOut>(this Task<TIn> t, Func<TIn, TOut?> selector)
=> selector(await t) ?? default(TOut);
public static async Task<TOut?> Select<TIn, TOut>(this Task<TIn> t, Func<TIn, Task<TOut?>?> selector)
=> await (selector(await t) ?? Task.FromResult(default(TOut)));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment