Skip to content

Instantly share code, notes, and snippets.

CREATE DATABASE nyse;
CREATE USER 'nyse_user' IDENTIFIED BY 'itversity';
GRANT ALL ON nyse.* TO nyse_user;
GRANT FILE ON *.* TO nyse_user;
GRANT SUPER ON *.* TO nyse_user;
FLUSH PRIVILEGES;
# Create topics
kafka-topics.sh --create \
--zookeeper m01.itversity.com:2181,m02.itversity.com:2181,w01.itversity.com:2181 \
--replication-factor 1 \
--partitions 1 \
--topic kafkadg
# List all topics
kafka-topics.sh --list \
--zookeeper m01.itversity.com:2181,m02.itversity.com:2181,w01.itversity.com:2181
sudo yum -y install mariadb-server
sudo systemctl enable mariadb.service
sudo systemctl start mariadb
sudo /usr/bin/mysql_secure_installation
sqoop import \
--connect jdbc:mysql://ms.itversity.com:3306/retail_db \
--username retail_user \
--password itversity \
--table order_items_nopk \
--warehouse-dir /user/dgadiraju/sqoop_import/retail_db \
--autoreset-to-one-mapper
import pandas as pd
import json
df = pd.read_csv(
's3://airetail/bronze/sfleads/addresses/Addresses.csv'
)
df.address = df['address'].apply(json.loads)
df.phone_numbers = df['phone_numbers']. \
apply(lambda pn: json.loads(pn) if not pd.isnull(pn) else None)
cd /etc/yum.repos.d/
sudo wget https://archive.cloudera.com/cm5/redhat/7/x86_64/cm/cloudera-manager.repo
cd /var/www/html
sudo mkdir -p cm5/redhat/7/x86_64/cm/5
sudo reposync -r cloudera-manager
sudo mv /var/www/html/cloudera-manager/RPMS /var/www/html/cm5/redhat/7/x86_64/cm/5
cd /var/www/html/cm5/redhat/7/x86_64/cm/5
sudo createrepo .
import java.sql.Timestamp
import java.text.SimpleDateFormat
import java.util.Date
import com.typesafe.config.ConfigFactory
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Get, Put}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.spark.sql.functions._
name := "bcstructuredstreamingdemo"
version := "1.0"
scalaVersion := "2.11.12"
libraryDependencies += "com.typesafe" % "config" % "1.3.2"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.3.0"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.3.0"
libraryDependencies += "org.apache.hbase" % "hbase-client" % "1.1.8"
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.