Skip to content

Instantly share code, notes, and snippets.

@ianmcook
Last active March 18, 2024 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ianmcook/7c9e4033b59bf71245603b67e69e4235 to your computer and use it in GitHub Desktop.
Save ianmcook/7c9e4033b59bf71245603b67e69e4235 to your computer and use it in GitHub Desktop.
C# example to receive Arrow record batches over HTTP and write to file
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading.Tasks;
using Apache.Arrow;
using Apache.Arrow.Ipc;
namespace ArrowHttpClient
{
public class Program
{
public static async Task Main(string[] args)
{
string serverUri = "http://localhost:8008/";
DateTime startTime = DateTime.UtcNow;
HttpClient httpClient = new HttpClient
{
BaseAddress = new Uri(serverUri),
};
using (var stream = await httpClient.GetStreamAsync(serverUri))
using (var reader = new ArrowStreamReader(stream))
{
Console.WriteLine("Connected");
List<RecordBatch> batches = new List<RecordBatch>();
int numRows = 0;
RecordBatch batch;
while ((batch = await reader.ReadNextRecordBatchAsync()) != null)
{
numRows += batch.Length;
batches.Add(batch);
}
Schema schema = reader.Schema;
DateTime endTime = DateTime.UtcNow;
Console.WriteLine($"{numRows} records received");
Console.WriteLine($"{batches.Count} record batches received");
Console.WriteLine($"{(endTime - startTime).TotalSeconds} seconds elapsed");
WriteToArrowFile("received_data.arrows", schema, batches);
}
}
private static void WriteToArrowFile(string filePath, Schema schema, IEnumerable<RecordBatch> batches)
{
using (var fileStream = new FileStream(filePath, FileMode.Create))
using (var writer = new ArrowStreamWriter(fileStream, schema))
{
foreach (var batch in batches)
{
writer.WriteRecordBatch(batch);
}
}
Console.WriteLine($"Data written to {filePath}");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment