View beeline help
beeline --showDbInPrompt=true
!connect jdbc:hive2://zk1.com,zk2.com,zk3.com:2181/;transportMode=http;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
--TO launch queries over a specific YARN queue add ?mapred.job.queue.name=[queue_name]
!connect jdbc:hive2://zk1.com,zk2.com,zk3.com:2181/;transportMode=http;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2?mapred.job.queue.name=[queue_name]
drop database if exists aravind_northwind cascade;
View SparkSQLJira.scala
package com.databricks.spark.jira
import scala.io.Source
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.sources.{TableScan, BaseRelation, RelationProvider}
View spark-duplicates.scala
val transactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
.json("s3n://bucket-name/transaction.json")
transactions.groupBy("id", "organization").count.sort($"count".desc).show
View spark-compare-dataframes.scala
val today = LocalDate.now
val todayTransactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
.json(s"s3n://bucket-name/${today}/transaction.json")
val yesterdayTransactions = spark.read
.option("header", "true")
.option("inferSchema", "true")
View spark-rest-submit.sh
curl -X POST -d http://master-host:6066/v1/submissions/create --header "Content-Type:application/json" --data '{
"action": "CreateSubmissionRequest",
"appResource": "hdfs://localhost:9000/user/spark-examples_2.11-2.0.0.jar",
"clientSparkVersion": "2.0.0",
"appArgs": [ "10" ],
"environmentVariables" : {
"SPARK_ENV_LOADED" : "1"
},
"mainClass": "org.apache.spark.examples.SparkPi",
"sparkProperties": {
View db-example.go
package main
import (
"database/sql"
"fmt"
_ "github.com/mattn/go-sqlite3"
)
func main() {
View account_main.go
func main() {
aliceAcct := OpenSavingsAccount("12345", "Alice", time.Date(1999, time.January, 03, 0, 0, 0, 0, time.UTC))
fmt.Println("Alice's account =", aliceAcct)
aliceAcct.Deposit(Money(100.0))
fmt.Println("Alice's account (after deposit) =", aliceAcct)
if err := aliceAcct.Withdraw(Money(10)); err != nil {
fmt.Println(err)
} else {
View account.go
package account
import (
"errors"
"fmt"
"time"
)
//Account is an interface that wraps the common behavior for accounts.
type Account interface {
View Unix Shell related commands
View start-spark-master-slave-history-servers.cmd
TITLE Launcher - Spark Master, 1 Worker and History Server
set SPARK_HOME=C:\aravind\sw\spark-2.0.2-bin-hadoop2.7
:: - Find the IP Address and set it to IP_ADDR env var and reuse it while launching Worker
for /f "tokens=1-2 delims=:" %%a in ('ipconfig^|find "IPv4"') do set ip=%%b
set IP_ADDR=%ip:~1%
echo %IP_ADDR%
:: - Start master
START "Spark Master" %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.master.Master