sudo vim /etc/yum.repos.d/cassandra.repo
[cassandra]
name=Apache Cassandra
baseurl=https://www.apache.org/dist/cassandra/redhat/311x/
INFO [2016-09-21 07:51:29,993] ({pool-2-thread-3} SchedulerFactory.java[jobStarted]:131) - Job remoteInterpretJob_1474444289992 started by scheduler org.apache.zeppelin.spark.SparkInterpreter1153170779 | |
INFO [2016-09-21 07:51:30,682] ({pool-2-thread-3} Logging.scala[logInfo]:58) - Starting job: take at NativeMethodAccessorImpl.java:-2 | |
INFO [2016-09-21 07:51:30,709] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Registering RDD 8 (take at NativeMethodAccessorImpl.java:-2) | |
INFO [2016-09-21 07:51:30,712] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Got job 0 (take at NativeMethodAccessorImpl.java:-2) with 200 output partitions | |
INFO [2016-09-21 07:51:30,712] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Final stage: ResultStage 1 (take at NativeMethodAccessorImpl.java:-2) | |
INFO [2016-09-21 07:51:30,713] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Parents of final stage: List(ShuffleMapStage 0) | |
INFO [2016-09-21 07:51:30,714] ({dag-scheduler-event-loop} Logging.sca |
mvn clean package -DskipTests \ | |
-Pr \ | |
-Pspark-1.6 \ | |
-Psparkr \ | |
-Phadoop-2.6 \ | |
-Dspark.version=1.6.2 \ | |
-Dhadoop.version=2.6.0-cdh5.7.1 \ | |
-Dhbase.hbase.version=1.2.0-cdh5.7.1 \ | |
-Dhbase.hadoop.version=2.6.0-cdh5.7.1 \ | |
-Dhive.hive.version=1.1.0-cdh5.7.1 \ |
mvn clean package -DskipTests \ | |
-Phive \ | |
-Phive-thriftserver \ | |
-Pyarn \ | |
-Psparkr \ | |
-Phadoop-2.6 \ | |
-Dhadoop.version=2.6.0-cdh5.7.1 \ | |
-Dhive.hive.version=1.1.0-cdh5.7.1 \ | |
-Dhive.hadoop.version=2.6.0-cdh5.7.1 |
Stream<Supplier<String>> calls = list.stream().map(value -> () -> callMe(value)); | |
List<String> results = calls | |
.map(CompletableFuture::supplyAsync).collect(Collectors.toList()) | |
.stream() | |
.map(CompletableFuture::join).collect(Collectors.toList()); |
/* | |
* PyroCollector : Clickstream Collector Client | |
* (c)2018 DataPyro.com | |
*/ | |
// set your endpoint | |
var endPoint = "https://d1q9rxqnug6ou7.cloudfront.net/public/blank.gif?"; | |
// collector | |
var pyroCollector = { | |
collect: function(obj) { | |
if (typeof(obj) !== undefined) { |
/** | |
<properties> | |
<sqlite4java.version>1.0.392</sqlite4java.version> | |
</properties> | |
<dependencies> | |
<dependency> | |
<groupId>com.almworks.sqlite4java</groupId> | |
<artifactId>sqlite4java</artifactId> | |
<version>${sqlite4java.version}</version> |
SparkConf sparkConf = new SparkConf() | |
.setAppName(JdbcDynamoDbExportJob.class.getSimpleName()) | |
.setMaster(config.getProperty("spark.master")); | |
JavaSparkContext jsc = new JavaSparkContext(sparkConf); | |
SQLContext sqlContext = new SQLContext(jsc); | |
// read from database | |
Properties properties = new Properties(); | |
properties.setProperty("user", config.getProperty("jdbc.user")); | |
properties.setProperty("password", config.getProperty("jdbc.pass")); |
var elasticsearch = require('elasticsearch'); | |
var elastic = new elasticsearch.Client({ | |
host: 'localhost:9200', | |
log: 'info' | |
}); | |
var kafka = require('kafka-node'), | |
HighLevelConsumer = kafka.HighLevelConsumer, | |
client = new kafka.Client(), | |
consumer = new HighLevelConsumer( |
#!/bin/bash | |
# install git | |
sudo yum install git | |
# maven | |
sudo wget http://repos.fedorapeople.org/repos/dchen/apache-maven/epel-apache-maven.repo -O /etc/yum.repos.d/epel-apache-maven.repo | |
sudo sed -i s/\$releasever/6/g /etc/yum.repos.d/epel-apache-maven.repo | |
sudo yum install -y apache-maven | |
mvn --version |