Skip to content

Instantly share code, notes, and snippets.

@AzimUddin
Last active August 29, 2015 14:00
Show Gist options
  • Save AzimUddin/11025548 to your computer and use it in GitHub Desktop.
Save AzimUddin/11025548 to your computer and use it in GitHub Desktop.
An Example of HDInsight Cluster customization via HDInsight .Net SDK
/*1. Create a Visual studio 2012 Project.
2. Add HDInsight SDK NuGet to your project -
In Visual Studio 2012, Click on Tools -> library package manager -> Package Manager Console
PM> Install-Package Microsoft.WindowsAzure.Management.HDInsight
3. Use the following code, fill up the relevant info, then build and run -
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
//for Certtificate
using System.Security.Cryptography.X509Certificates;
//HDInsight
using Microsoft.WindowsAzure.Management.HDInsight;
namespace ClusterProvisioningSample
{
class Program
{
static void Main(string[] args)
{
/*
Pre-req steps:
1. install HDInsight SDK NuGet package via tools-> Library Package Manager -> Package Manager Console
PM> Install-Package Microsoft.WindowsAzure.Management.HDInsight
2. Get Subscription related Info handy, say via Windows Azure PowerShell or portal
Get-AzureSubscription cmdlet should give us subscription id and Certificate Thumbprint
3. Get Azure Storage Account(s) and Keys handy via Windows Azure Powershell or portal
$key1 = Get-AzureStorageKey "StorageAcctName" | %{ $_.Primary }
*/
//Subscription id and certificate info, obtained from Get-AzureSubscription cmdlet
string mySubscriptionId = "YourSubscriptionID"; //Your subscription ID
string myCertThumbPrint = "YourThumbprintID"; //Thumbrint of my azure certificate
var store = new X509Store();
store.Open(OpenFlags.ReadOnly);
var cert = store.Certificates.Cast<X509Certificate2>().First(item => item.Thumbprint == myCertThumbPrint);
// Create an instance of HDInsightCertificateCredential
var creds = new HDInsightCertificateCredential(new Guid(mySubscriptionId), cert);
//Specify the details of Cluster to be created
string myClusterName = "YourClusterName";
string myClusterLocation = "West US"; //Change region as needed
int numClusterNodes = 4; // Change Number of nodes as needed
string myClusterVersion = "3.0"; //Change Cluster version as needed
string MyHDInsightUserName = "YourClusterUid";
string MyHDInsightPwd = "YourClusterPwd";
string MySqlAzureUserName = "YourSqlAzureUid";
string MySqlAzurePwd = "YourSqlazurePwd";
string MySqlAzureServerName = "YourSqlAzureServerName";
string MySqlAzureDbName = "AzureDB1";
string MySqlAzureDbName2 = "AzureDB2";
string DefaultStorageAccountFqdn = "asv1.blob.core.windows.net";
string SecondStorageAccountFqdn = "asv2.blob.core.windows.net";
string defaultContainerName = "yourDefaultContainerName";
string primaryStorageAcctKey = "key1"; // for DefaultStorageAccount
string secondStorageAcctKey = "key2"; // Secondary StorageAccount
// Specify details of the cluster to create.
var clusterInfo = new ClusterCreateParameters()
{
//cluster DNS name
Name = myClusterName,
//Cluster Region
Location = myClusterLocation,
//Cluster Version
Version = myClusterVersion,
//DefaultStorageAccountName = "{Existing storage account}",
DefaultStorageAccountName = DefaultStorageAccountFqdn,
//DefaultStorageAccountKey = "{Storage account key}",
DefaultStorageAccountKey = primaryStorageAcctKey,
//DefaultStorageContainer = "{storage container - if the container doesn't exist HDInsight creates one}",
DefaultStorageContainer = defaultContainerName,
//UserName = "{Cluster admin user name}",
UserName = MyHDInsightUserName,
//Password = "{Password for the cluster admin user}",
Password = MyHDInsightPwd,
//ClusterSizeInNodes = {size of the cluster}
ClusterSizeInNodes = numClusterNodes
};
// Add addtional storage Account for the cluster
var secondaryStorage = new WabStorageAccountConfiguration(SecondStorageAccountFqdn, secondStorageAcctKey);
clusterInfo.AdditionalStorageAccounts.Add(secondaryStorage);
// Hive Metastore
var hiveMetaStore = new Metastore(MySqlAzureServerName, MySqlAzureDbName, MySqlAzureUserName, MySqlAzurePwd);
clusterInfo.HiveMetastore = hiveMetaStore;
// Oozie MetaStore
var oozieMetaStore = new Metastore(MySqlAzureServerName, MySqlAzureDbName2, MySqlAzureUserName, MySqlAzurePwd);
clusterInfo.OozieMetastore = oozieMetaStore;
/* Customize configuration values for the cluster
* http://hadoopsdk.codeplex.com/wikipage?title=PowerShell%20Cmdlets%20for%20Cluster%20Management&referringTitle=Home
* 1.Customizing Hadoop configuration values. Following configuration files are supported:
* 1.core-site.xml
2.hdfs-site.xml
3.mapred-site.xml
4.capacity-scheduler.xml
5.hive-site.xml
6.oozie-site.xml
*
* 7. (in HDI 3.0) yarn-site.xml configuration is supported.
*/
/* NOTE on capacity-scheduler.xml:
* Remember that, Capacity-scheduler is part of MapReduce in HDInsight 2.x and Yarn in HDInsight 3.x
* Accordingly, in HDInsight SDK, capacity-scheduler.xml configurations can be set via -
* 1. clusterInfo.YarnConfiguration (for HDI 3.x clusters)
* 2. clusterInfo.MapReduceConfiguration.CapacitySchedulerConfigurationCollection (for HDI 2.x clusters)
*/
// hdfs-site.xml configuration
clusterInfo.HdfsConfiguration.Add(new KeyValuePair<string, string>("dfs.blocksize", "64m")); //default is 128MB in HDI 3.0 and 256MB in HDI 2.1
//core-site.xml configuration
clusterInfo.CoreConfiguration.Add(new KeyValuePair<string, string>("ipc.client.connect.max.retries", "60")); //default is 50
// mapred-site.xml configuration
clusterInfo.MapReduceConfiguration.ConfigurationCollection.Add(new KeyValuePair<string, string>("mapreduce.task.timeout", "1200000")); //default 600000
// capacity-scheduler.xml configuration in HDI 2.x
//clusterInfo.MapReduceConfiguration.CapacitySchedulerConfigurationCollection.Add(new KeyValuePair<string, string>("mapred.capacity-scheduler.maximum-system-jobs", "3100")); //default 3000
// hive-site.xml configuration
clusterInfo.HiveConfiguration.ConfigurationCollection.Add(new KeyValuePair<string, string>("hive.metastore.client.socket.timeout", "90")); //default 60
// oozie-site.xml configuration
clusterInfo.OozieConfiguration.ConfigurationCollection.Add(new KeyValuePair<string, string>("oozie.service.coord.normal.default.timeout", "150")); //default 120
// In HDI 3.x, yarn-site.xml and capacity-scheduler.xml configurations can be added to YarnConfiguration collection.
// yarn-site.xml configuration
clusterInfo.YarnConfiguration.Add(new KeyValuePair<string, string>("yarn.nodemanager.resource.memory-mb", "6200")); //default 6144
// capacity-scheduler.xml configuration in HDI 3.x
clusterInfo.YarnConfiguration.Add(new KeyValuePair<string, string>("yarn.scheduler.capacity.root.joblauncher.maximum-capacity", "60")); //default 50
// Additional Libraries for Hive
string hiveAddtionalLibContainer ="azimhivelibs";
var hiveAdditionalLibStorage = new WabStorageAccountConfiguration(DefaultStorageAccountFqdn, primaryStorageAcctKey, hiveAddtionalLibContainer);
clusterInfo.HiveConfiguration.AdditionalLibraries = hiveAdditionalLibStorage;
//Additional Library for Oozie, we can set similar to Hive
//clusterInfo.OozieConfiguration.AdditionalSharedLibraries
//clusterInfo.OozieConfiguration.AdditionalActionExecutorLibraries
IHDInsightClient client = null;
// Authenticate to Windows Azure
try
{
// First Authenticate to Windows Azure Via certificate
client = HDInsightClient.Connect(creds);
// Create cluster.
//CreateCluster function will poll automatically for the cluster status and will return when the cluster has finished provisioning.
//If you need method to return immediately you can use Async version.
try
{
Console.WriteLine("\nCreating Cluster");
var clusterDetails = client.CreateCluster(clusterInfo);
}
catch (Exception ex2)
{
Console.WriteLine("\nError while Creating Cluster, Error Message: \r\n");
Console.WriteLine(ex2.Message);
Console.WriteLine(ex2.StackTrace);
}
}
catch (Exception ex1)
{
Console.WriteLine("\nError while authenticating to Windows Azure: \r\n");
Console.WriteLine(ex1.Message);
Console.WriteLine(ex1.StackTrace);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment