This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
spark = SparkSession.builder.appName("Mining App").getOrCreate() | |
dataFrame = spark.read.json("/home/data/testData.json") | |
dataFrame.show() | |
dataFrame.write.parquet("/home/data/testData.parquet") | |
parquetData = spark.read.parquet("/home/data/testData.parquet") | |
parquetData.printSchema() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
spark = SparkSession.builder\ | |
.appName("Mining App").getOrCreate() | |
#.config("spark.driver.extraClassPath", "/home/data/drivers/postgresql-42.1.4.jar")\ | |
dataFrame = spark.read.json("/home/data/testData.json") | |
dataFrame.show() | |
dataFrame.write.parquet("/home/data/testData.parquet") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
class BetEventPersistance: | |
def __init__(self): | |
self.out_file = open("data.json","w") | |
self.url = "http://localhost:10000/message/producer/betevent" | |
self.headers = { | |
'content-type': "application/json", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use retail_db; | |
create table departments_avro | |
row format serde 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' | |
stored as inputformat 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' | |
outputformat 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' | |
location 'hdfs:///user/cloudera/departments_avro' | |
tblproperties('avro.schema.url'='hdfs:///user/cloudera/departments_schema.avsc'); | |
# departments.hql |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sqoop import-all-tables --connect jdbc:mysql://cloudera.quickstart:3306/retail_db \ | |
--username root \ | |
-P \ | |
--compress \ | |
--compression-codec org.apache.hadoop.io.compress.SnappyCodec \ | |
--hive-import \ | |
--hive-overwrite \ | |
--create-hive-table \ | |
--outdir /user/cloudera/output |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Sample Data: | |
-- Jun 30 2017 12:00:00 [INFO] kafkaproducer com.logger.info.nameoftheclasss.factory: this is the message coming out from the log | |
use extractions; | |
show tables; | |
create table if not exists logdata( | |
month string, | |
date int, | |
year int, | |
time string, | |
logtype string, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create database if not exists extractions comment 'Raw imports'; | |
create database if not exists transformations comment 'Transformed Data' location '/user/hive/warehouse/transformations'; | |
use extractions; | |
create table if not exists petextracts( | |
data string | |
); | |
show tables; | |
load data inpath '/user/cloudera/case1.csv' into table petextracts; | |
select data from petextracts; | |
create table if not exists pets( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Basic sqoop commands | |
sqoop import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \ | |
--username root \ | |
-P \ | |
--table categories \ | |
--target-dir /user/cloudera/categories_pipe \ | |
--terminated-by "\n" \ | |
--fields-seperated-by "|" | |
sqoop import --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Creating a sqoop job | |
sqoop job --create categories_export -- export \ | |
--connect jdbc:mysql://quickstart:3306/sqoop_exports \ | |
--username root \ | |
--password cloudera \ | |
--table categories \ | |
--export-dir /user/cloudera/categories | |
# displaying all sqoop jobs | |
sqoop job --list | |
# Displaying a specific sqoop job |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -X GET \ | |
http://localhost:8081/subjects/TweetStructure/versions/1 | |
# Response | |
# { | |
# "subject": "TweetStructure", | |
# "version": 1, | |
# "id": 1, | |
# "schema": "{\"type\":\"record\",\"name\":\"TweetStructure\",\"fields\":[{\"name\":\"version\",\"type\":\"int\"},{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"lang\",\"type\":\"string\"},{\"name\":\"isRetweet\",\"type\":\"boolean\"}]}" | |
# } |
OlderNewer