View TestJSON.ps1
| Use-AzureHDInsightCluster MyHDI30 | |
| Invoke-Hive -file "wasb://install@myhdi30primary.blob.core.windows.net/complex_test.sql" |
View complexdata.txt
| {"country":"Switzerland","languages":["German","French","Italian"],"religions":{"catholic":"30","protestant":"60"}} |
View pom.xml
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |
| <modelVersion>4.0.0</modelVersion> | |
| <groupId>org.openx.data</groupId> | |
| <artifactId>json-serde</artifactId> | |
| <version>1.1.9.3-SNAPSHOT</version> | |
| <packaging>jar</packaging> | |
| <name>openx-json-serde</name> |
View complex_test.sql
| DROP TABLE IF EXISTS json_complex_test; | |
| CREATE TABLE IF NOT EXISTS json_complex_test ( country string, languages array<string>, religions map<string,string>) | |
| ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' | |
| STORED AS TEXTFILE; | |
| LOAD DATA INPATH 'wasb://install@myhdi30primary.blob.core.windows.net/complexdata.txt' OVERWRITE INTO TABLE json_complex_test ; | |
| select * from json_complex_test; | |
| select languages[0] from json_complex_test; | |
| select religions['catholic'] from json_complex_test |
View CreateHDICluster.ps1
| ##################### Begin Edits #################### | |
| #region edits | |
| param | |
| ( | |
| # NOTE: All the storage accounts and containers need to be created on the same data center as the HDInsight cluster and would need to be created prior to running the script | |
| # They can be created from the Azure Management Portal | |
| # This is the name of your Azure Subscription that will be used for provisiong Azure HDInsight | |
| [string]$PrimarySubscriptionName="xxx", | |
| # This is the primary storage account that needs to be created on the same data center as your HDInsight Cluster |
View OozieCoordRESTCall.ps1
| #region edits | |
| # Your HDInsight Cluster Name | |
| $HDIClusterName = "ServerLogs" | |
| # Your HDInisght Cluster Admin User Name | |
| $MyHDInsightUserName = "sysadmin" | |
| # Your HDInsight Cluster Admin Password | |
| $MyHdInsightPwd = "xxxx" | |
| # Your HDInsight Metastore User Name |
View HiveDataPrep.ps1
| #region edits | |
| # This is the name of the Azure HDInsight Subscription | |
| [string]$SubscriptionName = "Contoso2014" | |
| # This is the HDInsight Cluster Name that you want to work with | |
| [string]$HDInsightClusterName = "ServerLogs" | |
| # The table specified here will be dropped, please ensure that this is a test table | |
| [string]$TableName = "samplelog" | |
| # This is the location where the external table's data will be located on WASB |
View samplelog.hiv
| ALTER TABLE samplelog DROP IF EXISTS PARTITION (dt<'${DROPDT}'); |
View workflow.xml
| <workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf"> | |
| <start to="hive-node"/> | |
| <action name="hive-node"> | |
| <hive xmlns="uri:oozie:hive-action:0.2"> | |
| <job-tracker>${jobTracker}</job-tracker> | |
| <name-node>${nameNode}</name-node> | |
| <configuration> | |
| <property> | |
| <name>mapred.job.queue.name</name> | |
| <value>default</value> |
View coordinator.xml
| <coordinator-app name="MY_APP" frequency="${coord:months(1)}" start="${jobStart}" end="${jobEnd}" timezone="UTC" xmlns="uri:oozie:coordinator:0.3"> | |
| <datasets> | |
| <dataset name="input1" frequency="${coord:months(1)}" initial-instance="${initialInstance}" timezone="UTC"> | |
| <uri-template>hcat://headnode0:9083/default/samplelog/dt=${YEAR}-${MONTH}</uri-template> | |
| </dataset> | |
| </datasets> | |
| <input-events> | |
| <data-in name="coordInput1" dataset="input1"> | |
| <instance>${coord:current(1)}</instance> | |
| </data-in> |
NewerOlder