sudo vim /etc/yum.repos.d/cassandra.repo
[cassandra]
name=Apache Cassandra
baseurl=https://www.apache.org/dist/cassandra/redhat/311x/
INFO [2016-09-21 07:51:29,993] ({pool-2-thread-3} SchedulerFactory.java[jobStarted]:131) - Job remoteInterpretJob_1474444289992 started by scheduler org.apache.zeppelin.spark.SparkInterpreter1153170779 | |
INFO [2016-09-21 07:51:30,682] ({pool-2-thread-3} Logging.scala[logInfo]:58) - Starting job: take at NativeMethodAccessorImpl.java:-2 | |
INFO [2016-09-21 07:51:30,709] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Registering RDD 8 (take at NativeMethodAccessorImpl.java:-2) | |
INFO [2016-09-21 07:51:30,712] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Got job 0 (take at NativeMethodAccessorImpl.java:-2) with 200 output partitions | |
INFO [2016-09-21 07:51:30,712] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Final stage: ResultStage 1 (take at NativeMethodAccessorImpl.java:-2) | |
INFO [2016-09-21 07:51:30,713] ({dag-scheduler-event-loop} Logging.scala[logInfo]:58) - Parents of final stage: List(ShuffleMapStage 0) | |
INFO [2016-09-21 07:51:30,714] ({dag-scheduler-event-loop} Logging.sca |
mvn clean package -DskipTests \ | |
-Pr \ | |
-Pspark-1.6 \ | |
-Psparkr \ | |
-Phadoop-2.6 \ | |
-Dspark.version=1.6.2 \ | |
-Dhadoop.version=2.6.0-cdh5.7.1 \ | |
-Dhbase.hbase.version=1.2.0-cdh5.7.1 \ | |
-Dhbase.hadoop.version=2.6.0-cdh5.7.1 \ | |
-Dhive.hive.version=1.1.0-cdh5.7.1 \ |
mvn clean package -DskipTests \ | |
-Phive \ | |
-Phive-thriftserver \ | |
-Pyarn \ | |
-Psparkr \ | |
-Phadoop-2.6 \ | |
-Dhadoop.version=2.6.0-cdh5.7.1 \ | |
-Dhive.hive.version=1.1.0-cdh5.7.1 \ | |
-Dhive.hadoop.version=2.6.0-cdh5.7.1 |
Stream<Supplier<String>> calls = list.stream().map(value -> () -> callMe(value)); | |
List<String> results = calls | |
.map(CompletableFuture::supplyAsync).collect(Collectors.toList()) | |
.stream() | |
.map(CompletableFuture::join).collect(Collectors.toList()); |
/** | |
<properties> | |
<sqlite4java.version>1.0.392</sqlite4java.version> | |
</properties> | |
<dependencies> | |
<dependency> | |
<groupId>com.almworks.sqlite4java</groupId> | |
<artifactId>sqlite4java</artifactId> | |
<version>${sqlite4java.version}</version> |
SparkConf sparkConf = new SparkConf() | |
.setAppName(JdbcDynamoDbExportJob.class.getSimpleName()) | |
.setMaster(config.getProperty("spark.master")); | |
JavaSparkContext jsc = new JavaSparkContext(sparkConf); | |
SQLContext sqlContext = new SQLContext(jsc); | |
// read from database | |
Properties properties = new Properties(); | |
properties.setProperty("user", config.getProperty("jdbc.user")); | |
properties.setProperty("password", config.getProperty("jdbc.pass")); |
/* | |
* PyroCollector : Clickstream Collector Client | |
* (c)2018 DataPyro.com | |
*/ | |
// set your endpoint | |
var endPoint = "https://d1q9rxqnug6ou7.cloudfront.net/public/blank.gif?"; | |
// collector | |
var pyroCollector = { | |
collect: function(obj) { | |
if (typeof(obj) !== undefined) { |
tier1.sources = source1 | |
tier1.channels = channel1 | |
tier1.sinks = sink1 | |
# sources | |
tier1.sources.source1.type = org.apache.flume.source.kafka.KafkaSource | |
tier1.sources.source1.zookeeperConnect = localhost:2181 | |
tier1.sources.source1.topic = network-data | |
tier1.sources.source1.groupId = flume-kafka-test | |
tier1.sources.source1.channels = channel1 |
# Script for generating csv partitions report for Impala | |
IMPALA_DAEMON=localhost | |
databases=$(impala-shell --quiet -i $IMPALA_DAEMON -d default --delimited -q "SHOW DATABASES" | cut -f1 | grep -e dl -e ods_) | |
for database in $databases | |
do | |
echo $database | |
directory="partitions/$database" | |
mkdir -p $directory |