Skip to content

Instantly share code, notes, and snippets.

@bohdanszymanik
Created July 16, 2013 04:41
Show Gist options
  • Save bohdanszymanik/6005813 to your computer and use it in GitHub Desktop.
Save bohdanszymanik/6005813 to your computer and use it in GitHub Desktop.
Sample using both Windows Azure Table Storage and a bit of simulation with MathNet.Numerics.Statistics
//
// We're going to simulate some financial transactions and experiement with storing them in azure table storage
//
//#r "..\packages\Fog.0.1.3.1\Lib\Net40\Fog.dll"
//#r "..\packages\FogMyBuild\Debug\Fog.dll"
//#r "Microsoft.WindowsAzure.Diagnostics"
#r @"C:\wd\AzureTxnUploader\packages\WindowsAzure.Storage.2.0.6.0\lib\net40\Microsoft.WindowsAzure.Storage.dll"
//#r @"C:\wd\AzureTxnUploader\packages\Microsoft.Data.Edm.5.5.0\lib\net40\Microsoft.Data.Edm.dll"
//#r @"C:\wd\AzureTxnUploader\packages\System.Spatial.5.5.0\lib\net40\System.Spatial.dll"
#r @"C:\wd\AzureTxnUploader\packages\Microsoft.Data.OData.5.2.0\lib\net40\Microsoft.Data.OData.dll"
#r @"C:\wd\AzureTxnUploader\packages\Microsoft.Data.Edm.5.2.0\lib\net40\Microsoft.Data.Edm.dll"
#r @"C:\wd\AzureTxnUploader\packages\System.Spatial.5.2.0\lib\net40\System.Spatial.dll"
#r "Microsoft.WindowsAzure.ServiceRuntime"
#r "System.Data.Services.Client"
#r "System.Linq"
open System
open Microsoft.WindowsAzure
open Microsoft.WindowsAzure.Storage
open System.Windows.Forms
// Account name and key. Modify for your account.
let accountName = "someAccountName"
let accountKey = "someAccountKey"
//Get a reference to the storage account, with authentication credentials
let credentials = new Auth.StorageCredentials(accountName, accountKey)
let storageAccount = new CloudStorageAccount(credentials, true)
storageAccount.BlobEndpoint = Uri("http://bohszy.blob.core.windows.net/")
storageAccount.TableEndpoint = Uri("http://bohszy.table.core.windows.net/")
storageAccount.QueueEndpoint = Uri("http://bohszy.queue.core.windows.net/")
//Create a new client and use it to create a table
let tableClient = storageAccount.CreateCloudTableClient()
let table = tableClient.GetTableReference("someTable")
table.CreateIfNotExists()
// quickly experiment putting some entities into the table
// for our test scenario let's consider fictional transcations
// we'll use the partition and row keys to identify an individual month's worth of transactions for a customer's account
//[<DataServiceKey("PartitionKey", "RowKey")>]
type YearMonthTxns() =
inherit Table.TableEntity()
member val txns = "" with get, set
// we have a few choices for how to identify a customer-account-year-month's worth of transactions
// we can make the partition key more or less fine grained
// that is, we could use cust-acct-year as a partition key, month as row key, or
// cust-year as partition key, account-month as row key, or even
// How to decide??? Lot's of good advice here: http://msdn.microsoft.com/en-us/library/windowsazure/hh508997.aspx
// Partition key is for resiliency to enable partitioning and replication of data
// Entity Group Transactions requires the same partition key for the batch - but that's probably not important in this scenario
// For best performance you want items likely to be retrieved together within a single partition
// but partitions are local to servers - so retrieving too much, too often could impact performance (or hit partition rate limit of 500 requests/sec)
// So I think in real use you'd need to try a variety of strategies, also data is returned sorted by key so perhaps we need to decide on the order of the
// parts making up the keys
// How about we run with partition key being year-customer and use month-account for the row key
// note that a number of characters are not allowed within the key eg /\#? and it looks like all the control characters
let someYearMonthTxns = YearMonthTxns( PartitionKey = "2013-Fred", RowKey = "06-Savings", txns = "test" )
let cmd = Table.TableOperation.Insert(someYearMonthTxns)
let tableResult = table.Execute(cmd)
// Output container URI to debug window.
System.Diagnostics.Debug.WriteLine(container.Uri)
// what I need to do now is generate some higher volume data to get a handle on pricing
// this means a bit of simulation
// let's create a set of ~1000 test customers
let cs = seq{1..1000}
// with a random number of accounts centered around 2, log-normal like so there's a good tail to it
// and some random txns spread across months in a single year, maybe centered on 10, let's use MathNet.Numerics
#r @"C:\wd\AzureTxnUploader\packages\MathNet.Numerics.2.5.0\lib\net40\MathNet.Numerics.dll"
#r @"C:\wd\AzureTxnUploader\packages\MathNet.Numerics.FSharp.2.5.0\lib\net40\MathNet.Numerics.FSharp.dll"
open MathNet.Numerics.Distributions
open MathNet.Numerics.Random
open MathNet.Numerics.Statistics
#load @"C:\wd\AzureTxnUploader\packages\FSharp.Charting.0.83\FSharp.Charting.fsx"
open FSharp.Charting
let logNormal = new LogNormal(1.0, 0.5)
let ha = MathNet.Numerics.Statistics.Histogram( (logNormal.Samples() |> Seq.take 10000) , 20, 0.0, 20.0)
Chart.Column( seq {for b in 1 .. ha.BucketCount - 1 do yield (float)b, ha.Item(b).Count} ) // looks good!
// and for transactions we can try weibull, just to be different
let weibull = new Weibull(4.0, 11.0)
let ht = MathNet.Numerics.Statistics.Histogram( (weibull.Samples() |> Seq.take 10000) , 20, 0.0, 20.0)
Chart.Column( seq {for b in 1 .. ht.BucketCount - 1 do yield (float)b, ht.Item(b).Count} ) // honestly, this doesn't look that great... but we'll use it
// going to be generating a few random numbers ahead, for both performance and randomness better to have one instance which we use across future code
// creating System.Random() within various functions called repeatedly over short intervals just generates the same number sequence - and actually, I
// find on this laptop timing is so quick that the based seeding of the default constructor starts the sequence at the same value for most of the iterations
let random = new System.Random()
// clever little function from: http://stackoverflow.com/questions/1855150/randomly-choose-an-instance-from-union-in-f
let randInst<'t>() =
let cases = Reflection.FSharpType.GetUnionCases(typeof<'t>)
let index = random.Next(cases.Length)
let case = cases.[index]
Reflection.FSharpValue.MakeUnion(case, [||]) :?> 't
// do I need to make all these types classes?
type TxnType = ATM | POS | BP | Cash | Deposit
type Txn =
{
Id : Guid
Posted : DateTime;
Value : decimal;
Type : TxnType;
Description : string;
}
// some imaginative sample text data
let txnDescLookup = ["payment to fred"; "money for me"; "savings for house"; "supermarket"; "petrol"; "loan repayment"]
let createRandMonthlyTxnList year month =
// we use the sample from our distribution for txns/month to give the number of times we randomly choose a day in the month
[ for i in 1 .. (int)(weibull.Sample()) do
yield {
Txn.Id = Guid.NewGuid();
Txn.Posted=( new DateTime(year, month, random.Next( 1, DateTime.DaysInMonth(year, month) ) ) );
Txn.Value = (decimal)( random.Next(0,100000) )/100M;
Txn.Type = randInst<TxnType>();
Txn.Description = txnDescLookup.[ (random.Next(0, (List.length txnDescLookup)-1)) ];
} ] |> List.sortBy (fun t -> t.Posted)
type Account = {
Id : int;
Txns : Txn list // going to deal with List here but change to Array before submitting to Azure
}
let createRandomAccountList dummy = // need to include a dummy parameter here to force re-evaluation on each call, interesting default behaviour is to cache fns without parameters
[1 .. (int)(logNormal.Sample())]
|> List.map (fun a -> {Account.Id = a; Account.Txns = [1 .. 12] |> List.map (fun m -> createRandMonthlyTxnList 2012 m) |> List.concat } )
type Customer = {
Id : int
Accounts : Account list
}
let createCustomers =
[
for i in 1 .. 100 do
// this is too ugly - using a dummy parameter to force evaluation. I'm going to revert to copying the body of the function below
// yield {Customer.Id = i; Customer.Accounts = createRandomAccountList 1}
yield {Customer.Id = i; Customer.Accounts =
[1 .. (int)(logNormal.Sample())]
|> List.map (fun a -> {Account.Id = a; Account.Txns = [1 .. 12] |> List.map (fun m -> createRandMonthlyTxnList 2012 m) |> List.concat } )
}
]
// so, now I have a whole lot of customers, accounts and transactions, time to put them into azure
// and was I intending year-customer for the partition key and month-account for the row key with transactions in an entity, doh! I'm going to have reformat all this
// fortunately, I'm only storing data for one year - 2012
// so here's my customers for 2012 with their accounts, month and txns by posted date
createCustomers |> Seq.iter (fun c ->
c.Accounts |> Seq.iter (fun a ->
a.Txns |> Seq.groupBy (fun t -> t.Posted.Month)
|> Seq.iter (fun (m, ts) ->
ts |> Seq.iter (fun t -> printfn "Cust %i, Account %i, Month %i, Txn %A" c.Id a.Id m t.Posted )
)
)
)
// and looking at it as the number of txns in each cust acct month group
createCustomers |> Seq.iter (fun c ->
c.Accounts |> Seq.iter (fun a ->
a.Txns |> Seq.groupBy (fun t -> t.Posted.Month)
|> Seq.iter (fun (m, ts) -> printfn "Cust %i, Account %i, Month %i, Txns %A" c.Id a.Id m (Seq.length(ts)) )
)
)
// so I think this is the entity type we need to use
// first attempt was to use <Txn list> but that wouldn't serialize and end up in azure
// then I tried Txn[] but that wouldn't work either
// then I figured why not serialise the txn list to a string first and put that in table - of course you can't then query it within azure either
type YearCustomerMonthAccountTxns() =
inherit Table.TableEntity()
member val txnJson : string = "" with get, set
#r "System.Xml"
#r "System.Runtime.Serialization"
open System.Text
open System.Runtime.Serialization.Json
open System.IO
let serializeTxnListToJson (txnList: Txn list) =
let serializer = new DataContractJsonSerializer( typeof<Txn list> )
let stream = new MemoryStream()
let data = serializer.WriteObject(stream, txnList)
let updateData = stream.ToArray()
(Encoding.UTF8.GetString(updateData))
// let's populate a bunch of entities and send to azure
createCustomers
|> Seq.iter (fun c ->
c.Accounts
|> Seq.iter (fun a ->
a.Txns
|> Seq.groupBy (fun t -> t.Posted.Month)
|> Seq.iter (fun (m, ts) ->
let cmd = Table.TableOperation.InsertOrMerge( YearCustomerMonthAccountTxns(
PartitionKey = (sprintf "2007-%i" c.Id),
RowKey = (sprintf "%i-%i" m a.Id),
txnJson = (ts |> List.ofSeq |> serializeTxnListToJson )
)
)
let tableResult = table.Execute(cmd)
printfn "Cust %i, Account %i, Month %i, Txns %A" c.Id a.Id m (Seq.length ts)
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment