Created
April 17, 2018 00:10
-
-
Save kekyo/c093d982dc5040ba8ec4b6c58f4821f9 to your computer and use it in GitHub Desktop.
イラク復興支援群の日報 370日分 - 朝日新聞
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Linq; | |
using System.Net.Http; | |
using System.Threading.Tasks; | |
using AngleSharp.Dom.Html; | |
using AngleSharp.Parser.Html; | |
namespace ConsoleApp1 | |
{ | |
public static class Program | |
{ | |
private static async Task<IHtmlDocument> GetHtmlDocumentAsync(Uri url) | |
{ | |
var httpClient = new HttpClient(); | |
using (var hs = await httpClient.GetStreamAsync(url)) | |
{ | |
var parser = new HtmlParser(); | |
return await parser.ParseAsync(hs); | |
} | |
} | |
private static async Task DownloadPdfDocumentAsync(Uri url, Uri referrer, string basePath) | |
{ | |
var httpClient = new HttpClient(); | |
httpClient.DefaultRequestHeaders.Referrer = referrer; | |
using (var hs = await httpClient.GetStreamAsync(url)) | |
{ | |
var path = Path.Combine(basePath, url.PathAndQuery.Split('/').Last()); | |
using (var ts = File.Create(path)) | |
{ | |
await hs.CopyToAsync(ts); | |
await ts.FlushAsync(); | |
} | |
} | |
} | |
private static async Task MainAsync(string[] args) | |
{ | |
var target = new Uri("https://www.asahi.com/articles/ASL4J669JL4JUEHF016.html", UriKind.RelativeOrAbsolute); | |
var document = await GetHtmlDocumentAsync(target); | |
var hrefs = | |
(from a in document.Links | |
let href = a.GetAttribute("href") | |
where (string.IsNullOrWhiteSpace(href) == false) && (href.EndsWith(".pdf")) | |
select new Uri(href, UriKind.RelativeOrAbsolute)) | |
.Distinct() | |
.ToArray(); | |
var basePath = "pdf"; | |
if (!Directory.Exists(basePath)) | |
{ | |
Directory.CreateDirectory(basePath); | |
} | |
await Task.WhenAll(hrefs.Select(href => DownloadPdfDocumentAsync(href, target, basePath))); | |
} | |
static void Main(string[] args) | |
{ | |
MainAsync(args).Wait(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment