Skip to content

Instantly share code, notes, and snippets.

Use-AzureHDInsightCluster MyHDI30
Invoke-Hive -file "wasb://install@myhdi30primary.blob.core.windows.net/complex_test.sql"
{"country":"Switzerland","languages":["German","French","Italian"],"religions":{"catholic":"30","protestant":"60"}}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.openx.data</groupId>
<artifactId>json-serde</artifactId>
<version>1.1.9.3-SNAPSHOT</version>
<packaging>jar</packaging>
<name>openx-json-serde</name>
DROP TABLE IF EXISTS json_complex_test;
CREATE TABLE IF NOT EXISTS json_complex_test ( country string, languages array<string>, religions map<string,string>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE;
LOAD DATA INPATH 'wasb://install@myhdi30primary.blob.core.windows.net/complexdata.txt' OVERWRITE INTO TABLE json_complex_test ;
select * from json_complex_test;
select languages[0] from json_complex_test;
select religions['catholic'] from json_complex_test
##################### Begin Edits ####################
#region edits
param
(
# NOTE: All the storage accounts and containers need to be created on the same data center as the HDInsight cluster and would need to be created prior to running the script
# They can be created from the Azure Management Portal
# This is the name of your Azure Subscription that will be used for provisiong Azure HDInsight
[string]$PrimarySubscriptionName="xxx",
# This is the primary storage account that needs to be created on the same data center as your HDInsight Cluster
#region edits
# Your HDInsight Cluster Name
$HDIClusterName = "ServerLogs"
# Your HDInisght Cluster Admin User Name
$MyHDInsightUserName = "sysadmin"
# Your HDInsight Cluster Admin Password
$MyHdInsightPwd = "xxxx"
# Your HDInsight Metastore User Name
#region edits
# This is the name of the Azure HDInsight Subscription
[string]$SubscriptionName = "Contoso2014"
# This is the HDInsight Cluster Name that you want to work with
[string]$HDInsightClusterName = "ServerLogs"
# The table specified here will be dropped, please ensure that this is a test table
[string]$TableName = "samplelog"
# This is the location where the external table's data will be located on WASB
ALTER TABLE samplelog DROP IF EXISTS PARTITION (dt<'${DROPDT}');
<workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf">
<start to="hive-node"/>
<action name="hive-node">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>default</value>
<coordinator-app name="MY_APP" frequency="${coord:months(1)}" start="${jobStart}" end="${jobEnd}" timezone="UTC" xmlns="uri:oozie:coordinator:0.3">
<datasets>
<dataset name="input1" frequency="${coord:months(1)}" initial-instance="${initialInstance}" timezone="UTC">
<uri-template>hcat://headnode0:9083/default/samplelog/dt=${YEAR}-${MONTH}</uri-template>
</dataset>
</datasets>
<input-events>
<data-in name="coordInput1" dataset="input1">
<instance>${coord:current(1)}</instance>
</data-in>