Skip to content

Instantly share code, notes, and snippets.

@leandromoh
Created September 23, 2023 16:34
Show Gist options
  • Save leandromoh/14c1ea328f80f092bee48c50fc9d87e2 to your computer and use it in GitHub Desktop.
Save leandromoh/14c1ea328f80f092bee48c50fc9d87e2 to your computer and use it in GitHub Desktop.
downlods array json file with paginating GET request & join multiples array json files into one
using ConsoleApp2;
using MoreLinq;
using System.Buffers;
using System.IO.Compression;
using System.Reflection;
using System.Text;
namespace ConsoleApp3
{
public class Program
{
private HttpClient _client = new HttpClient();
private string internalPath = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
// API
public const string hostAddress_Prd = "https://xxx.com.br";
public const string hostAddress_Stg = "https://xxx.com.br";
public const string token = "Bearer ???";
public static async Task Main(string[] _)
{
var p = new Program();
var pasta = "C:\\Users\\yyyy\\Desktop\\pasta";
var files = Directory.GetFiles(pasta);
var output = Path.Combine(pasta, "full.json");
await p.JoinJsons(files, output);
}
public Program()
{
_client.DefaultRequestHeaders.Add("Authorization", token);
_client.DefaultRequestHeaders.Add("IdentityProvider", "AzureAd");
}
public async Task JoinJsons(IEnumerable<string> files, string outputFile)
{
var count = files.Count();
var buffer = ArrayPool<char>.Shared.Rent((int)Math.Pow(2, 18));
try
{
using var full = new FileStream(outputFile, FileMode.Create, FileAccess.Write);
using var writer = new StreamWriter(full);
var esp = '[';
foreach (var (i, file) in files.Index())
{
await writer.WriteAsync(esp);
using var reader = new StreamReader(file);
while (await reader.ReadAsync(buffer) is var read && read != 0)
{
var content = buffer.AsMemory().Slice(0, read).Trim("[]");
await writer.WriteAsync(content);
}
Console.WriteLine($"{file} - {i}/{count}");
esp = ',';
}
await writer.WriteAsync(']');
await writer.FlushAsync();
}
finally
{
ArrayPool<char>.Shared.Return(buffer);
}
}
private async Task DownloadJsonByHash(string hash)
{
var uri = $"/my-api/documents?hash={hash}";
var baseAddres = new Uri(hostAddress_Prd);
var pageLimit = 1000;
var maxPage = await CountPages();
ParallelOptions opts = new ParallelOptions
{
MaxDegreeOfParallelism = 8
};
await Parallel.ForEachAsync(Enumerable.Range(1, maxPage), opts, (page, token) => SaveFile(page));
async ValueTask SaveFile(int page)
{
var httpRequest = new HttpRequestMessage(HttpMethod.Get, new Uri(baseAddres, uri));
httpRequest.Headers.Add("x-page", page.ToString());
httpRequest.Headers.Add("x-page-limit", pageLimit.ToString());
var response = await _client.SendAsync(httpRequest);
if (response.StatusCode != System.Net.HttpStatusCode.OK)
throw new Exception($"page {page} returns {response.StatusCode}");
var downloadPath = Path.Combine(internalPath, $"{hash}-{page}");
using var contentStream = await response.Content.ReadAsStreamAsync();
using var stream = new FileStream(downloadPath, FileMode.Create, FileAccess.Write);
await contentStream.CopyToAsync(stream);
await stream.FlushAsync();
Console.WriteLine($"hash = {hash} page = {page}");
}
async Task<int> CountPages()
{
var httpRequest = new HttpRequestMessage(HttpMethod.Get, new Uri(baseAddres, uri));
httpRequest.Headers.Add("x-page", 1.ToString());
httpRequest.Headers.Add("x-page-limit", 1.ToString());
httpRequest.Headers.Add("x-count", "true");
var response = await _client.SendAsync(httpRequest);
var count = int.Parse(GetHeader(response, "x-count"));
return count / pageLimit + (count % pageLimit == 0 ? 0 : 1);
}
static string GetHeader(HttpResponseMessage response, string header)
{
var values = response.Headers.SingleOrDefault(x => x.Key == header);
var res = values.Value.SingleOrDefault();
return res;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment