Skip to content

Instantly share code, notes, and snippets.

View KKcorps's full-sized avatar
🏠
Working from home

Kartik Khare KKcorps

🏠
Working from home
View GitHub Profile
import os
from importlib.util import find_spec
import glob
def _contains_jar(path):
jar_file = path + "/dummy-*.jar"
jar_file_matches = glob.glob(jar_file)
if len(jar_file_matches) > 0:
return jar_file_matches[0]
else:
import com.github.davidmoten.geo.GeoHash;
public static String latLngToGeo(Double latitude, Double longitude, Integer geoLength) {
return GeoHash.encodeHash(latitude, longitude, geoLength);
}
SELECT encode_geohash(28.7, 77.1, 4) FROM TABLE
-- Scalar Functions
SELECT UPPER(playerName) FROM baseballStats
SELECT SUBSTR(playerName, 2, 11) FROM baseballStats
SELECT CONCAT(firstName, lastName, '-') FROM baseballStats
-- Non scalar Functions
@KKcorps
KKcorps / groovy.sql
Last active September 28, 2020 16:36
SELECT groovy('{"returnType":"STRING","isSingleValue":true}', 'new Date().format( "yyyyMMdd" )')
FROM table
SELECT groovy('{"returnType":"INT","isSingleValue":true}', 'arg0 + arg1', colA, colB)
FROM table
export PINOT_VERSION=0.5.0
wget https://downloads.apache.org/incubator/pinot/apache-pinot-incubating-$PINOT_VERSION/apache-pinot-incubating-$PINOT_VERSION-bin.tar.gz
tar -zxvf apache-pinot-incubating-$PINOT_VERSION-bin.tar.gz
cd apache-pinot-incubating-$PINOT_VERSION-bin
timestampInEpoch id name age score
1597044264380 1 david 15 98
1597044264381 2 henry 16 97
1597044264382 3 katie 14 99
1597044264383 4 catelyn 15 96
1597044264384 5 emma 13 93
1597044264390 6 john 15 100
1597044264396 7 isabella 13 89
1597044264399 8 linda 17 91
1597044264502 9 mark 16 67
val df = spark.read.format("csv").option("header", true).load("path/to/students.csv")
df.write.option("compression","none").mode("overwrite").parquet("/path/to/batch_input/")
{
"resultTable": {
"dataSchema": {
"columnNames": [
"age",
"id",
"name",
"score",
"timestampInEpoch"
],
export PINOT_VERSION=0.4.0
export PINOT_DISTRIBUTION_DIR=/path/to/apache-pinot-incubating-${PINOT_VERSION}-bin
spark-submit //
--class org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand //
--master local --deploy-mode client //
--conf "spark.driver.extraJavaOptions=-Dplugins.dir=${PINOT_DISTRIBUTION_DIR}/plugins -Dplugins.include=pinot-s3,pinot-parquet -Dlog4j2.configurationFile=${PINOT_DISTRIBUTION_DIR}/conf/pinot-ingestion-job-log4j2.xml" //
--conf "spark.driver.extraClassPath=${PINOT_DISTRIBUTION_DIR}/plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/pinot-batch-ingestion-spark-0.4.0-shaded.jar:${PINOT_DISTRIBUTION_DIR}/lib/pinot-all-${PINOT_VERSION}-jar-with-dependencies.jar:${PINOT_DISTRIBUTION_DIR}/plugins/pinot-file-system/pinot-s3/pinot-s3-0.4.0-shaded.jar:${PINOT_DISTRIBUTION_DIR}/plugins/pinot-input-format/pinot-parquet/pinot-parquet-0.4.0-shaded.jar" //
local://${PINOT_DISTRIBUTION_DIR}/lib/pinot-all-${PINOT_VERSION}-jar-with-dependencies.jar -jobSpecFile /path/to/spark_jo