View 00-LogParser-Hive-Regex
This gist includes hive ql scripts to create an external partitioned table for Syslog
generated log files using regex serde;
Usecase: Count the number of occurances of processes that got logged, by year, month,
day and process.
Sample data and structure: 01-SampleDataAndStructure
Data download: 02-DataDownload
Data load commands: 03-DataLoadCommands

Setting up github and bitbucket on the same computer

Github will be the main account and bitbucket the secondary.

Create SSH Keys

ssh-keygen -t rsa -C "github email"

Enter passphrase when prompted. If you see an option to save the passphrase in your keychain, do it for an easier life.

View beeline help
beeline --showDbInPrompt=true
!connect jdbc:hive2://,,;transportMode=http;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
--TO launch queries over a specific YARN queue add ?[queue_name]
!connect jdbc:hive2://,,;transportMode=http;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2?[queue_name]
drop database if exists aravind_northwind cascade;
View SparkSQLJira.scala
package com.databricks.spark.jira
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.sources.{TableScan, BaseRelation, RelationProvider}
View spark-duplicates.scala
val transactions =
.option("header", "true")
.option("inferSchema", "true")
transactions.groupBy("id", "organization").count.sort($"count".desc).show
View spark-compare-dataframes.scala
val today =
val todayTransactions =
.option("header", "true")
.option("inferSchema", "true")
val yesterdayTransactions =
.option("header", "true")
.option("inferSchema", "true")
curl -X POST -d http://master-host:6066/v1/submissions/create --header "Content-Type:application/json" --data '{
"action": "CreateSubmissionRequest",
"appResource": "hdfs://localhost:9000/user/spark-examples_2.11-2.0.0.jar",
"clientSparkVersion": "2.0.0",
"appArgs": [ "10" ],
"environmentVariables" : {
"mainClass": "org.apache.spark.examples.SparkPi",
"sparkProperties": {
View db-example.go
package main
import (
_ ""
func main() {
View account_main.go
func main() {
aliceAcct := OpenSavingsAccount("12345", "Alice", time.Date(1999, time.January, 03, 0, 0, 0, 0, time.UTC))
fmt.Println("Alice's account =", aliceAcct)
fmt.Println("Alice's account (after deposit) =", aliceAcct)
if err := aliceAcct.Withdraw(Money(10)); err != nil {
} else {
View account.go
package account
import (
//Account is an interface that wraps the common behavior for accounts.
type Account interface {