Scrapes data from https://www.myedenred.pt/ into a csv file
run with dotnet test
Scrapes data from https://www.myedenred.pt/ into a csv file
run with dotnet test
<Project Sdk="Microsoft.NET.Sdk"> | |
<PropertyGroup> | |
<TargetFramework>net7.0</TargetFramework> | |
<ImplicitUsings>enable</ImplicitUsings> | |
<Nullable>enable</Nullable> | |
<IsPackable>false</IsPackable> | |
</PropertyGroup> | |
<ItemGroup> | |
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" /> | |
<PackageReference Include="Microsoft.Playwright.NUnit" Version="1.31.1" /> | |
<PackageReference Include="NUnit" Version="3.13.3" /> | |
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" /> | |
<PackageReference Include="NUnit.Analyzers" Version="3.3.0" /> | |
<PackageReference Include="coverlet.collector" Version="3.1.2" /> | |
</ItemGroup> | |
</Project> |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
using Microsoft.Playwright; | |
using Microsoft.Playwright.NUnit; | |
using NUnit.Framework; | |
namespace PlaywrightTests; | |
[Parallelizable(ParallelScope.Self)] | |
[TestFixture] | |
public class Tests : PageTest | |
{ | |
const string filePath = "<<filePath>>"; | |
[Test] | |
public async Task ScrapeEnredData() | |
{ | |
await Page.GotoAsync("https://www.myedenred.pt/"); | |
// Login | |
await Page.GetByPlaceholder("Inserir Email").FillAsync("<<userName>>"); | |
await Page.GetByPlaceholder("Inserir Palavra-passe").FillAsync("<<password>>"); | |
await Page.Locator("#login").ClickAsync(); | |
// wait for load | |
var name = Page.Locator(".holder-name"); | |
await Expect(name).ToHaveTextAsync("<<your name>>"); | |
// scrape | |
var rows = await Page.QuerySelectorAllAsync(".table-body .row"); | |
List<string> lines = new(); | |
foreach (var row in rows) | |
{ | |
var date = await row.QuerySelectorAsync("div:nth-child(1)") | |
.Select(q => q?.TextContentAsync()) | |
.Select(t => t?.Trim().Substring(0, 10)); | |
var txt = await row.QuerySelectorAsync("div:nth-child(2)") | |
.Select(q => q?.TextContentAsync()) | |
.Select(t => t?.Trim()); | |
var value = await row.QuerySelectorAsync("div:nth-child(3)") | |
.Select(q => q?.TextContentAsync()) | |
.Select(t => t?.Replace(",", ".").Replace("€", "").Trim()); | |
var balance = await row.QuerySelectorAsync("div:nth-child(4)") | |
.Select(q => q?.TextContentAsync()) | |
.Select(t => t?.Replace(",", ".").Replace("€", "").Trim()); | |
lines.Add($"{date};{txt};{value};{balance}"); | |
} | |
var existingLines = await File.ReadAllLinesAsync(filePath); | |
await File.AppendAllLinesAsync(filePath, lines.SkipWhile(existingLines.Contains)); | |
} | |
} | |
public static class TaskMonad | |
{ | |
public static async Task<TOut?> Select<TIn, TOut>(this Task<TIn> t, Func<TIn, TOut?> selector) | |
=> selector(await t) ?? default(TOut); | |
public static async Task<TOut?> Select<TIn, TOut>(this Task<TIn> t, Func<TIn, Task<TOut?>?> selector) | |
=> await (selector(await t) ?? Task.FromResult(default(TOut))); | |
} |