Skip to content

Instantly share code, notes, and snippets.

@relyky
Last active January 24, 2024 09:40
Show Gist options
  • Save relyky/dcf2f373629f404a0f587b12acbc8b6d to your computer and use it in GitHub Desktop.
Save relyky/dcf2f373629f404a0f587b12acbc8b6d to your computer and use it in GitHub Desktop.
WebView2 範例, Regex, 網頁爬文
private async void Form1_Load(object sender, EventArgs e)
{
// init
cboStatus.SelectedIndex = 0;
// init WebView2 → webAgent
await webAgent.EnsureCoreWebView2Async(null);
webAgent.CoreWebView2.Navigate("https://www.itfseafarers.org/en/abandonment-list/seafarer-abandonment");
}
private async void button2_Click(object sender, EventArgs e)
{
labelResult.Text = string.Empty;
progressBar.Value = 0;
//string status = "disputed"; // All | open | disputed | inactive | resolved 。
string status = (string)(cboStatus.Text ?? "ALL");
string queryJs = $"document.querySelector('#edit-field-status-list-value').value = '{status}'; "
+ @"document.querySelector('#views-exposed-form-itf-current-cases-block-1').submit();";
await webAgent.CoreWebView2.ExecuteScriptAsync(queryJs);
progressBar.Value = 10;
// 讓子彈飛
await Task.Delay(1000);
progressBar.Value = 20;
await Task.Delay(1000);
progressBar.Value = 30;
await Task.Delay(1000);
progressBar.Value = 40;
await Task.Delay(1000);
progressBar.Value = 50;
await Task.Delay(1000);
progressBar.Value = 60;
// 檢畫面確定status 查詢條件無誤!
string regionResult = await webAgent.CoreWebView2.ExecuteScriptAsync(@"document.querySelector('#edit-field-region-target-id').value");
string statusResult = await webAgent.CoreWebView2.ExecuteScriptAsync(@"document.querySelector('#edit-field-status-list-value').value");
labelResult.Text = $"{regionResult} | {statusResult}";
progressBar.Value = 70;
// 取回資訊 html 或直接用 js 取值出來
string html = await webAgent.CoreWebView2.ExecuteScriptAsync(@"document.querySelector('#block-views-block-itf-current-cases-block-1 > div > div > table > tbody').outerHTML");
string decodedHtml = Regex.Unescape(html); // 解碼:Unicode, \u003C => `<`
string decodedHtml2 = WebUtility.HtmlDecode(decodedHtml); // 解碼:htmle
progressBar.Value = 80;
// 解析資訊 => datainfo with RegEx
const string pattern =
@"(<td .*-vessel-name"">(?<vesselName>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-imo"">(?<imo>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-flag"">(?<flag>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-port"">(?<port>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-country"">(?<country>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-owed-wages"">(?<owedWages>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-seafarers"">(?<seafarers>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-nationalities"">(?<nationalities>.*)<\/td>)[\s\r\n\t]*" +
@"(<td .*-reported-to-itf is-active"">(?<reportedToItf>[\s\S]*?)<\/td>)[\s\r\n\t]*" +
@"(<td .*-status-list"">(?<statusList>.*)<\/td>)";
List<SeafarerAbandonment> infoList = new();
foreach (Match tr in Regex.Matches(decodedHtml2, @"<tr>([\s\S\r\n\t]*?)<\/tr>"))
{
Match match = Regex.Match(tr.Value, pattern);
if (match.Success)
{
var info = new SeafarerAbandonment();
info.VesselName = match.Groups["vesselName"].ToString().Trim();
info.Imo = match.Groups["imo"].ToString().Trim();
info.Flag = match.Groups["flag"].ToString().Trim();
info.Port = match.Groups["port"].ToString().Trim();
info.Country = match.Groups["country"].ToString().Trim();
info.OwedWages = match.Groups["owedWages"].ToString().Trim();
info.Seafarers = match.Groups["seafarers"].ToString().Trim();
info.Nationalities = match.Groups["nationalities"].ToString().Trim();
info.ReportedToItf = match.Groups["reportedToItf"].ToString().Trim();
info.StatusList = match.Groups["statusList"].ToString().Trim();
infoList.Add(info);
}
}
progressBar.Value = 90;
//## 輸出到畫面
string dataJson = JsonSerializer.Serialize(infoList, new JsonSerializerOptions
{
WriteIndented = true,
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping, // 中文字不編碼
});
textBox1.Text = dataJson;
progressBar.Value = 100;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment