Skip to content

Instantly share code, notes, and snippets.

@joelverhagen
Created December 1, 2023 15:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joelverhagen/b50c44dcb6d1cbd5b6031773449630cd to your computer and use it in GitHub Desktop.
Save joelverhagen/b50c44dcb6d1cbd5b6031773449630cd to your computer and use it in GitHub Desktop.
Validate consistency of NuGet.org catalog page items and pages
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Text.Json;
using System.Text.Json.Serialization;
using Xunit;
Console.WriteLine("Starting...");
var indexUrl = "https://api.nuget.org/v3/catalog0/index.json";
using var httpClient = new HttpClient();
Console.WriteLine($"Downloading {indexUrl}...");
using var indexStream = await httpClient.GetStreamAsync(indexUrl);
var index = (await JsonSerializer.DeserializeAsync<CatalogIndex>(indexStream))!;
var pageItems = new ConcurrentBag<(int Index, string FileName, CatalogPageItem Item)>();
foreach (var pageItem in index.Items)
{
var fileName = new Uri(pageItem.Url).AbsolutePath.Split('/').Last();
pageItems.Add((GetPageIndex(pageItem.Url), fileName, pageItem));
}
var cachedFiles = Directory
.GetFiles(Directory.GetCurrentDirectory(), "page*.json")
.Select(p => Path.GetFileName(p));
string? latestFileName = null;
if (cachedFiles.Any())
{
latestFileName = cachedFiles.MaxBy(GetPageIndex);
Console.WriteLine("Latest cached file name: " + latestFileName);
}
pageItems = new ConcurrentBag<(int Index, string FileName, CatalogPageItem Item)>(pageItems.OrderBy(x => x.Index).Take(100_000));
var pageCount = pageItems.Count;
Console.WriteLine($"There are {pageCount} pages.");
var pages = new ConcurrentBag<(int Index, string FileName, CatalogPageItem Item, CatalogPage Page)>();
var fetched = 0;
Console.WriteLine();
await Task.WhenAll(Enumerable
.Range(0, 16)
.Select(async x =>
{
while (pageItems.TryTake(out var pageItem))
{
var sw = Stopwatch.StartNew();
var cached = true;
if (!File.Exists(pageItem.FileName) || pageItem.FileName == latestFileName)
{
var tempFileName = pageItem.FileName + ".temp";
using (var pageStream = await httpClient.GetStreamAsync(pageItem.Item.Url))
using (var fileStream = new FileStream(tempFileName, FileMode.Create))
{
pageStream.CopyTo(fileStream);
}
File.Move(tempFileName, pageItem.FileName, overwrite: true);
cached = false;
}
using (var fileStream = new FileStream(pageItem.FileName, FileMode.Open))
{
var page = (await JsonSerializer.DeserializeAsync<CatalogPage>(fileStream))!;
pages.Add((pageItem.Index, pageItem.FileName, pageItem.Item, page));
}
Interlocked.Increment(ref fetched);
Console.Write(cached ? "." : "o");
}
}));
Console.WriteLine();
var rules = new Dictionary<string, Action<(int Index, string FileName, CatalogPageItem Item, CatalogPage Page)>>
{
{ "Item has unexpected @type", page => Assert.Equal("CatalogPage", page.Item.Type) },
{ "Item has unexpected count", page => Assert.Equal(page.Page.Items.Count, page.Item.Count) },
{ "Item has unexpected commitId", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitId, page.Item.CommitId) },
{ "Item has unexpected commitTimeStamp", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitTimestamp, page.Item.CommitTimestamp) },
{ "Page has unexpected @id", page => Assert.Equal(page.Item.Url, page.Page.Url) },
{ "Page has unexpected parent", page => Assert.Equal(indexUrl, page.Page.Parent) },
{ "Page has unexpected @type", page => Assert.Equal("CatalogPage", page.Page.Type) },
{ "Page has unexpected count", page => Assert.Equal(page.Page.Items.Count, page.Page.Count) },
{ "Page has unexpected commitId", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitId, page.Page.CommitId) },
{ "Page has unexpected commitTimestamp", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitTimestamp, page.Page.CommitTimestamp) },
};
Console.WriteLine(new string('-', 40));
foreach (var page in pages.OrderBy(x => x.Index))
{
var anyFailures = false;
foreach ((var ruleName, var rule) in rules.OrderBy(x => x.Key))
{
try
{
rule.Invoke(page);
}
catch (Exception ex)
{
if (!anyFailures)
{
Console.WriteLine(page.FileName);
anyFailures = true;
}
Console.WriteLine($"{ruleName}: " + ex.Message);
}
}
if (anyFailures)
{
Console.WriteLine(new string('-', 40));
}
}
int GetPageIndex(string path)
{
var fileName = path.Replace('\\', '/').Split('/').Last();
return int.Parse(fileName.Substring("page".Length, fileName.Length - "page.json".Length));
}
public class CatalogIndex
{
[JsonPropertyName("@id")]
public string Url { get; set; }
[JsonPropertyName("commitId")]
public string CommitId { get; set; }
[JsonPropertyName("commitTimeStamp")]
public string CommitTimestamp { get; set; }
[JsonPropertyName("count")]
public int Count { get; set; }
[JsonPropertyName("items")]
public List<CatalogPageItem> Items { get; set; }
}
public class CatalogPageItem
{
[JsonPropertyName("@id")]
public string Url { get; set; }
[JsonPropertyName("@type")]
public string Type { get; set; }
[JsonPropertyName("commitId")]
public string CommitId { get; set; }
[JsonPropertyName("commitTimeStamp")]
public string CommitTimestamp { get; set; }
[JsonPropertyName("count")]
public int Count { get; set; }
}
public class CatalogPage
{
[JsonPropertyName("@id")]
public string Url { get; set; }
[JsonPropertyName("@type")]
public string Type { get; set; }
[JsonPropertyName("commitId")]
public string CommitId { get; set; }
[JsonPropertyName("commitTimeStamp")]
public string CommitTimestamp { get; set; }
[JsonPropertyName("count")]
public int Count { get; set; }
[JsonPropertyName("items")]
public List<CatalogLeafItem> Items { get; set; }
[JsonPropertyName("parent")]
public string Parent { get; set; }
}
public class CatalogLeafItem
{
[JsonPropertyName("@id")]
public string Url { get; set; }
[JsonPropertyName("@type")]
public string Type { get; set; }
[JsonPropertyName("commitTimeStamp")]
public string CommitTimestamp { get; set; }
[JsonPropertyName("nuget:id")]
public string PackageId { get; set; }
[JsonPropertyName("nuget:version")]
public string PackageVersion { get; set; }
[JsonPropertyName("commitId")]
public string CommitId { get; set; }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment