Skip to content

Instantly share code, notes, and snippets.

@dazfuller
Last active February 9, 2018 16:31
Show Gist options
  • Save dazfuller/ffedb2ed17a1977ce56c2aac13b7896c to your computer and use it in GitHub Desktop.
Save dazfuller/ffedb2ed17a1977ce56c2aac13b7896c to your computer and use it in GitHub Desktop.
Reading Parquet from Azure Blob
using System;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Auth;
using Parquet;
namespace ParquetFromAzureBlob
{
public static class Program
{
private const string AccountName = "<account name>";
private const string AccountKey = "<account key>";
private const string ContainerName = "<container name>";
public static void Main(string[] args)
{
const string blobReference = "<blob reference>";
var creds = new StorageCredentials(AccountName, AccountKey);
var storageAccount = new CloudStorageAccount(creds, true);
var client = storageAccount.CreateCloudBlobClient();
var container = client.GetContainerReference(ContainerName);
var blob = container.GetBlobReference(blobReference);
blob.FetchAttributes();
var buffer = new byte[blob.Properties.Length];
blob.DownloadToByteArray(buffer, 0);
using (var stream = new MemoryStream(buffer))
{
var options = new ParquetOptions();
var ds = ParquetReader.Read(stream, options);
Console.WriteLine($"Successfully read in {ds.TotalRowCount} row(s)");
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment