dharkum

## FindPattern.java
package HiveUDF;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.hadoop.hive.ql.exec.UDF;
 public final class FindPattern extends UDF
 {
     public static String evaluate(String targetString, String strPattern)
     {
         // Pattern matching on the input pattern
         Pattern p = Pattern.compile(strPattern);

## MyUDFQuery.hql
CREATE EXTERNAL TABLE IF NOT EXISTS WebFeeds
 (
 UserID int,
 Message String
 )
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY ','
 STORED AS TEXTFILE
 LOCATION 'wasb://install@myprimarystorage.blob.core.windows.net/webfeeds';


## CreateHDICluster1.ps1
param
(

 #############################################################################
 ### Please edit the values of the parameters below for your configuration ###
 #############################################################################

 [string]$PrimarySubscriptionName="<Your Subscription Name>", # Replace <Your Subscription Name> with your Azure subscription name
 [string]$HDInsightClusterLocation="West US", # This is the data center where you are going to provision this HDInsight cluster
 [string]$HDInsightClusterName="MyHDICluster", # This is the name that you want for the HDInsight cluster

## TestHiveUDF.ps1
#############################################################################
### Please edit the values of the parameters below for your configuration ###
#############################################################################

$subscriptionName = "<Your Azure Subscription Name>"
$clusterName = "MyHDICluster" # HDInsight Cluster Name

############################################################################
###                               End Edits                              ###
############################################################################

## coordinator.xml
<coordinator-app name="MY_APP" frequency="${coord:months(1)}" start="${jobStart}" end="${jobEnd}" timezone="UTC" xmlns="uri:oozie:coordinator:0.3">
    <datasets>
      <dataset name="input1" frequency="${coord:months(1)}" initial-instance="${initialInstance}" timezone="UTC">
         <uri-template>hcat://headnode0:9083/default/samplelog/dt=${YEAR}-${MONTH}</uri-template>
      </dataset>
   </datasets>
   <input-events>
      <data-in name="coordInput1" dataset="input1">
          <instance>${coord:current(1)}</instance>
      </data-in>

## workflow.xml
<workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf">
    <start to="hive-node"/>
    <action name="hive-node">
        <hive xmlns="uri:oozie:hive-action:0.2">
           <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>default</value>

## samplelog.hiv
ALTER TABLE samplelog DROP IF EXISTS PARTITION (dt<'${DROPDT}');

## HiveDataPrep.ps1

#region edits

# This is the name of the Azure HDInsight Subscription
[string]$SubscriptionName = "Contoso2014"
# This is the HDInsight Cluster Name that you want to work with
[string]$HDInsightClusterName = "ServerLogs"
# The table specified here will be dropped, please ensure that this is a test table
[string]$TableName = "samplelog"
# This is the location where the external table's data will be located on WASB

## OozieCoordRESTCall.ps1

#region edits

# Your HDInsight Cluster Name
$HDIClusterName = "ServerLogs"
# Your HDInisght Cluster Admin User Name
$MyHDInsightUserName = "sysadmin"
# Your HDInsight Cluster Admin Password
$MyHdInsightPwd = "xxxx"
# Your HDInsight Metastore User Name

## CreateHDICluster.ps1
##################### Begin Edits ####################
#region edits
param
(
    # NOTE: All the storage accounts and containers need to be created on the same data center as the HDInsight cluster and would need to be created prior to running the script
    # They can be created from the Azure Management Portal

    # This is the name of your Azure Subscription that will be used for provisiong Azure HDInsight
    [string]$PrimarySubscriptionName="xxx",
    # This is the primary storage account that needs to be created on the same data center as your HDInsight Cluster
	package HiveUDF;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;
	import org.apache.hadoop.hive.ql.exec.UDF;
	public final class FindPattern extends UDF
	{
	public static String evaluate(String targetString, String strPattern)
	{
	// Pattern matching on the input pattern
	Pattern p = Pattern.compile(strPattern);
	CREATE EXTERNAL TABLE IF NOT EXISTS WebFeeds
	(
	UserID int,
	Message String
	)
	ROW FORMAT DELIMITED
	FIELDS TERMINATED BY ','
	STORED AS TEXTFILE
	LOCATION 'wasb://install@myprimarystorage.blob.core.windows.net/webfeeds';
	param
	(

	#############################################################################
	### Please edit the values of the parameters below for your configuration ###
	#############################################################################

	[string]$PrimarySubscriptionName="<Your Subscription Name>", # Replace <Your Subscription Name> with your Azure subscription name
	[string]$HDInsightClusterLocation="West US", # This is the data center where you are going to provision this HDInsight cluster
	[string]$HDInsightClusterName="MyHDICluster", # This is the name that you want for the HDInsight cluster
	<coordinator-app name="MY_APP" frequency="${coord:months(1)}" start="${jobStart}" end="${jobEnd}" timezone="UTC" xmlns="uri:oozie:coordinator:0.3">
	<datasets>
	<dataset name="input1" frequency="${coord:months(1)}" initial-instance="${initialInstance}" timezone="UTC">
	<uri-template>hcat://headnode0:9083/default/samplelog/dt=${YEAR}-${MONTH}</uri-template>
	</dataset>
	</datasets>
	<input-events>
	<data-in name="coordInput1" dataset="input1">
	<instance>${coord:current(1)}</instance>
	</data-in>
	<workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf">
	<start to="hive-node"/>
	<action name="hive-node">
	<hive xmlns="uri:oozie:hive-action:0.2">
	<job-tracker>${jobTracker}</job-tracker>
	<name-node>${nameNode}</name-node>
	<configuration>
	<property>
	<name>mapred.job.queue.name</name>
	<value>default</value>

	#region edits

	# This is the name of the Azure HDInsight Subscription
	[string]$SubscriptionName = "Contoso2014"
	# This is the HDInsight Cluster Name that you want to work with
	[string]$HDInsightClusterName = "ServerLogs"
	# The table specified here will be dropped, please ensure that this is a test table
	[string]$TableName = "samplelog"
	# This is the location where the external table's data will be located on WASB

	#region edits

	# Your HDInsight Cluster Name
	$HDIClusterName = "ServerLogs"
	# Your HDInisght Cluster Admin User Name
	$MyHDInsightUserName = "sysadmin"
	# Your HDInsight Cluster Admin Password
	$MyHdInsightPwd = "xxxx"
	# Your HDInsight Metastore User Name
	##################### Begin Edits ####################
	#region edits
	param
	(
	# NOTE: All the storage accounts and containers need to be created on the same data center as the HDInsight cluster and would need to be created prior to running the script
	# They can be created from the Azure Management Portal

	# This is the name of your Azure Subscription that will be used for provisiong Azure HDInsight
	[string]$PrimarySubscriptionName="xxx",
	# This is the primary storage account that needs to be created on the same data center as your HDInsight Cluster