Skip to content

Instantly share code, notes, and snippets.

@guitarrapc
Last active January 8, 2019 19:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guitarrapc/1b898b6677d7b74e3bb6757780bd62c7 to your computer and use it in GitHub Desktop.
Save guitarrapc/1b898b6677d7b74e3bb6757780bd62c7 to your computer and use it in GitHub Desktop.
retrieve sitemap
using System;
using System.Linq;
using System.Net.Http;
using System.Threading.Tasks;
using System.Xml.Linq;
namespace Hatenablog_Sitemap
{
class Program
{
static async Task Main(string[] args)
{
if (args.Length == 0)
{
throw new ArgumentOutOfRangeException($"Usage: {System.Reflection.Assembly.GetExecutingAssembly().Location} URL");
}
var url = args[0] + "/sitemap.xml";
var urls = await GetHatenaBlogEntries(url);
foreach (var item in urls)
{
Console.WriteLine(item);
}
}
private static async Task<string[]> GetHatenaBlogEntries(string url)
{
var client = new HttpClient();
XNamespace ns = "http://www.sitemaps.org/schemas/sitemap/0.9";
async Task<string[]> FetchSitemapAsync(string dest)
{
var content = await client.GetStringAsync(dest);
return XElement.Parse(content).Descendants(ns + "loc").Select(x => x.Value).ToArray();
}
var sitemaps = await FetchSitemapAsync(url);
var urls = await Task.WhenAll(sitemaps.Select(x => FetchSitemapAsync(x)));
var result = urls.SelectMany(x => x).ToArray();
return result;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment