Last active
August 29, 2015 14:00
-
-
Save AzimUddin/11025548 to your computer and use it in GitHub Desktop.
An Example of HDInsight Cluster customization via HDInsight .Net SDK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*1. Create a Visual studio 2012 Project. | |
2. Add HDInsight SDK NuGet to your project - | |
In Visual Studio 2012, Click on Tools -> library package manager -> Package Manager Console | |
PM> Install-Package Microsoft.WindowsAzure.Management.HDInsight | |
3. Use the following code, fill up the relevant info, then build and run - | |
*/ | |
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
//for Certtificate | |
using System.Security.Cryptography.X509Certificates; | |
//HDInsight | |
using Microsoft.WindowsAzure.Management.HDInsight; | |
namespace ClusterProvisioningSample | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
/* | |
Pre-req steps: | |
1. install HDInsight SDK NuGet package via tools-> Library Package Manager -> Package Manager Console | |
PM> Install-Package Microsoft.WindowsAzure.Management.HDInsight | |
2. Get Subscription related Info handy, say via Windows Azure PowerShell or portal | |
Get-AzureSubscription cmdlet should give us subscription id and Certificate Thumbprint | |
3. Get Azure Storage Account(s) and Keys handy via Windows Azure Powershell or portal | |
$key1 = Get-AzureStorageKey "StorageAcctName" | %{ $_.Primary } | |
*/ | |
//Subscription id and certificate info, obtained from Get-AzureSubscription cmdlet | |
string mySubscriptionId = "YourSubscriptionID"; //Your subscription ID | |
string myCertThumbPrint = "YourThumbprintID"; //Thumbrint of my azure certificate | |
var store = new X509Store(); | |
store.Open(OpenFlags.ReadOnly); | |
var cert = store.Certificates.Cast<X509Certificate2>().First(item => item.Thumbprint == myCertThumbPrint); | |
// Create an instance of HDInsightCertificateCredential | |
var creds = new HDInsightCertificateCredential(new Guid(mySubscriptionId), cert); | |
//Specify the details of Cluster to be created | |
string myClusterName = "YourClusterName"; | |
string myClusterLocation = "West US"; //Change region as needed | |
int numClusterNodes = 4; // Change Number of nodes as needed | |
string myClusterVersion = "3.0"; //Change Cluster version as needed | |
string MyHDInsightUserName = "YourClusterUid"; | |
string MyHDInsightPwd = "YourClusterPwd"; | |
string MySqlAzureUserName = "YourSqlAzureUid"; | |
string MySqlAzurePwd = "YourSqlazurePwd"; | |
string MySqlAzureServerName = "YourSqlAzureServerName"; | |
string MySqlAzureDbName = "AzureDB1"; | |
string MySqlAzureDbName2 = "AzureDB2"; | |
string DefaultStorageAccountFqdn = "asv1.blob.core.windows.net"; | |
string SecondStorageAccountFqdn = "asv2.blob.core.windows.net"; | |
string defaultContainerName = "yourDefaultContainerName"; | |
string primaryStorageAcctKey = "key1"; // for DefaultStorageAccount | |
string secondStorageAcctKey = "key2"; // Secondary StorageAccount | |
// Specify details of the cluster to create. | |
var clusterInfo = new ClusterCreateParameters() | |
{ | |
//cluster DNS name | |
Name = myClusterName, | |
//Cluster Region | |
Location = myClusterLocation, | |
//Cluster Version | |
Version = myClusterVersion, | |
//DefaultStorageAccountName = "{Existing storage account}", | |
DefaultStorageAccountName = DefaultStorageAccountFqdn, | |
//DefaultStorageAccountKey = "{Storage account key}", | |
DefaultStorageAccountKey = primaryStorageAcctKey, | |
//DefaultStorageContainer = "{storage container - if the container doesn't exist HDInsight creates one}", | |
DefaultStorageContainer = defaultContainerName, | |
//UserName = "{Cluster admin user name}", | |
UserName = MyHDInsightUserName, | |
//Password = "{Password for the cluster admin user}", | |
Password = MyHDInsightPwd, | |
//ClusterSizeInNodes = {size of the cluster} | |
ClusterSizeInNodes = numClusterNodes | |
}; | |
// Add addtional storage Account for the cluster | |
var secondaryStorage = new WabStorageAccountConfiguration(SecondStorageAccountFqdn, secondStorageAcctKey); | |
clusterInfo.AdditionalStorageAccounts.Add(secondaryStorage); | |
// Hive Metastore | |
var hiveMetaStore = new Metastore(MySqlAzureServerName, MySqlAzureDbName, MySqlAzureUserName, MySqlAzurePwd); | |
clusterInfo.HiveMetastore = hiveMetaStore; | |
// Oozie MetaStore | |
var oozieMetaStore = new Metastore(MySqlAzureServerName, MySqlAzureDbName2, MySqlAzureUserName, MySqlAzurePwd); | |
clusterInfo.OozieMetastore = oozieMetaStore; | |
/* Customize configuration values for the cluster | |
* http://hadoopsdk.codeplex.com/wikipage?title=PowerShell%20Cmdlets%20for%20Cluster%20Management&referringTitle=Home | |
* 1.Customizing Hadoop configuration values. Following configuration files are supported: | |
* 1.core-site.xml | |
2.hdfs-site.xml | |
3.mapred-site.xml | |
4.capacity-scheduler.xml | |
5.hive-site.xml | |
6.oozie-site.xml | |
* | |
* 7. (in HDI 3.0) yarn-site.xml configuration is supported. | |
*/ | |
/* NOTE on capacity-scheduler.xml: | |
* Remember that, Capacity-scheduler is part of MapReduce in HDInsight 2.x and Yarn in HDInsight 3.x | |
* Accordingly, in HDInsight SDK, capacity-scheduler.xml configurations can be set via - | |
* 1. clusterInfo.YarnConfiguration (for HDI 3.x clusters) | |
* 2. clusterInfo.MapReduceConfiguration.CapacitySchedulerConfigurationCollection (for HDI 2.x clusters) | |
*/ | |
// hdfs-site.xml configuration | |
clusterInfo.HdfsConfiguration.Add(new KeyValuePair<string, string>("dfs.blocksize", "64m")); //default is 128MB in HDI 3.0 and 256MB in HDI 2.1 | |
//core-site.xml configuration | |
clusterInfo.CoreConfiguration.Add(new KeyValuePair<string, string>("ipc.client.connect.max.retries", "60")); //default is 50 | |
// mapred-site.xml configuration | |
clusterInfo.MapReduceConfiguration.ConfigurationCollection.Add(new KeyValuePair<string, string>("mapreduce.task.timeout", "1200000")); //default 600000 | |
// capacity-scheduler.xml configuration in HDI 2.x | |
//clusterInfo.MapReduceConfiguration.CapacitySchedulerConfigurationCollection.Add(new KeyValuePair<string, string>("mapred.capacity-scheduler.maximum-system-jobs", "3100")); //default 3000 | |
// hive-site.xml configuration | |
clusterInfo.HiveConfiguration.ConfigurationCollection.Add(new KeyValuePair<string, string>("hive.metastore.client.socket.timeout", "90")); //default 60 | |
// oozie-site.xml configuration | |
clusterInfo.OozieConfiguration.ConfigurationCollection.Add(new KeyValuePair<string, string>("oozie.service.coord.normal.default.timeout", "150")); //default 120 | |
// In HDI 3.x, yarn-site.xml and capacity-scheduler.xml configurations can be added to YarnConfiguration collection. | |
// yarn-site.xml configuration | |
clusterInfo.YarnConfiguration.Add(new KeyValuePair<string, string>("yarn.nodemanager.resource.memory-mb", "6200")); //default 6144 | |
// capacity-scheduler.xml configuration in HDI 3.x | |
clusterInfo.YarnConfiguration.Add(new KeyValuePair<string, string>("yarn.scheduler.capacity.root.joblauncher.maximum-capacity", "60")); //default 50 | |
// Additional Libraries for Hive | |
string hiveAddtionalLibContainer ="azimhivelibs"; | |
var hiveAdditionalLibStorage = new WabStorageAccountConfiguration(DefaultStorageAccountFqdn, primaryStorageAcctKey, hiveAddtionalLibContainer); | |
clusterInfo.HiveConfiguration.AdditionalLibraries = hiveAdditionalLibStorage; | |
//Additional Library for Oozie, we can set similar to Hive | |
//clusterInfo.OozieConfiguration.AdditionalSharedLibraries | |
//clusterInfo.OozieConfiguration.AdditionalActionExecutorLibraries | |
IHDInsightClient client = null; | |
// Authenticate to Windows Azure | |
try | |
{ | |
// First Authenticate to Windows Azure Via certificate | |
client = HDInsightClient.Connect(creds); | |
// Create cluster. | |
//CreateCluster function will poll automatically for the cluster status and will return when the cluster has finished provisioning. | |
//If you need method to return immediately you can use Async version. | |
try | |
{ | |
Console.WriteLine("\nCreating Cluster"); | |
var clusterDetails = client.CreateCluster(clusterInfo); | |
} | |
catch (Exception ex2) | |
{ | |
Console.WriteLine("\nError while Creating Cluster, Error Message: \r\n"); | |
Console.WriteLine(ex2.Message); | |
Console.WriteLine(ex2.StackTrace); | |
} | |
} | |
catch (Exception ex1) | |
{ | |
Console.WriteLine("\nError while authenticating to Windows Azure: \r\n"); | |
Console.WriteLine(ex1.Message); | |
Console.WriteLine(ex1.StackTrace); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment