Skip to content

Instantly share code, notes, and snippets.

View jayhuang75's full-sized avatar

jayhuang75 jayhuang75

View GitHub Profile
@jayhuang75
jayhuang75 / install.txt
Last active May 18, 2017 17:45
install the python2.7, pip and pyspark in centos 7
yum update
yum install scl-utils
yum install centos-release-scl-rh
yum install python27
python -V
scl enable python27 bash
@jayhuang75
jayhuang75 / Kafka-Hortonworks-create-pub-sub.sh
Last active May 23, 2017 03:29
Kafka create topic, publish and listen Hortonworks
cd /usr/hdp/current/kafka-broker/
## create topic
./kafka-topics --create --zookeeper sandbox.hortonworks.com:2181 --replication-factor 1 --partitions 1 --topic TOPIC_NAME
## list topics
./kafka-topics.sh --list --zookeeper sandbox.hortonworks.com:2181
## pub
./kafka-console-producer.sh --broker-list sandbox.hortonworks.com:6667 --topic TOPIC_NAME
[[bin]] # Bin to run the gRPC client
name = "rust-grpc-kafka-pipeline-client"
path = "src/bin/client.rs"
// Init logging
env_logger::init();
let channel = tonic::transport::Channel::from_static("http://[::0]:50051")
.connect()
.await?;
let mut client = PlayerServiceClient::new(channel);
let mut sent_data = Vec::new();
@jayhuang75
jayhuang75 / rust-gist-medium-grpc-server-start
Created September 13, 2020 16:14
Build a Realtime Data Pipeline During the Weekend in Rust
// Load Env Config
let config = config::Config::new().unwrap_or_else(|err| {
panic!("Load Environment variable failed: {}", err);
});
// Init the GRPC server
let addr = "[::0]:50051".parse().unwrap();
let mut player_serivce = MyPlayerService::default();
player_serivce.config = config;
info!("[server.rs] PlayerServer listening on {}", addr);
// MapReduce job example
hadoop jar hadoop-mapreduce-example-2.7.1-sources.jar org.apache.hadoop.examples.WordCount input output
// Hive job example
hive -f example_inert.hql
File System Counters
FILE: Number of bytes read=52
FILE: Number of bytes written=229301
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=0
HDFS: Number of bytes written=25
HDFS: Number of read operations=5
HDFS: Number of large read operations=0
{"entity":"attempt_1598586421585_0003_1_00_000000_0","entitytype":"TEZ_TASK_ATTEMPT_ID","relatedEntities":[{"entity":"6c45e57db0e1:43133","entitytype":"nodeId"},{"entity":"container_e04_1598586421585_0003_01_000002","entitytype":"containerId"},{"entity":"task_1598586421585_0003_1_00_000000","entitytype":"TEZ_TASK_ID"}],"events":[{"ts":1598832652874,"eventtype":"TASK_ATTEMPT_STARTED"}],"otherinfo":{"inProgressLogsURL":"6c45e57db0e1:8042\/node\/containerlogs\/container_e04_1598586421585_0003_01_000002\/root","completedLogsURL":"http:\/\/historyserver:8188\/applicationhistory\/logs\/\/6c45e57db0e1:43133\/container_e04_1598586421585_0003_01_000002\/v_Map 1_attempt_1598586421585_0003_1_00_000000_0\/root"}}
@jayhuang75
jayhuang75 / rust-gist-medium-monitoring-hadoop-job-4
Created September 13, 2020 16:36
get the logs from hadoop for developement
// copy from hdfs to local container
hdfs dfs -copyToLocal <log-id>
// copy from local container to local machine
docker cp <log-id> <your-path-of-rust-project>
@jayhuang75
jayhuang75 / rust-gist-medium-monitoring-hadoop-job-4.rs
Last active September 13, 2020 22:26
rust parsing MapReduce and Hive logs
lazy_static! {
static ref MAP_REDUCE: Regex = Regex::new(r"[a-zA-Z\(\)]+=[0-9]+").unwrap();
static ref HIVE: Regex = Regex::new(r"(?x)entity*").unwrap();
}
#[allow(dead_code)]
pub fn run(tx: channel::Sender<BatchCTL>, app: &NewApp) -> Result<(), Box<dyn Error>> {
let output = Command::new("hdfs")
.arg("dfs")
.arg("-cat")