View punctuateMethodForProcessor.java
// process call left out for clarity; it places airline data
// in a list, storing in state store by airport code
@Override
public void punctuate(long timestamp) {
KeyValueIterator<String, List<String>> allFlights = flights.all();
while (allFlights.hasNext()) {
KeyValue<String, List<String>> kv = allFlights.next();
List<String> flightList = kv.value;
String key = kv.key;
if(flightList.size() >= 100) {
View predictionPredictMethod.java
public static String predict(DataRegression dataRegression) {
try (OnlineLogisticRegression logisticRegression = new OnlineLogisticRegression()) {
FlightData flightData = new FlightData(dataRegression.data);
logisticRegression.readFields(new DataInputStream(new ByteArrayInputStream(dataRegression.coefficients)));
double prediction = logisticRegression.classifyScalar(flightData.vector);
String arrivalPrediction = prediction > 0.5 ? "on-time" : "late";
return String.format("%s predicted to be %s", new Flight(dataRegression.data), arrivalPrediction);
} catch (Exception e) {
LOG.error("Problems with predicting " + dataRegression.data, e);
return null;
View mappingValues.java
dataByAirportStream.join(regressionsByAirPortTable,
(k, v) -> k,
DataRegression::new)
.mapValues(Predictor::predict)
View buildingKStreamAndGlobalKTable.java
// configuration and Serde creation left out for clarity
KStream<String, String> dataByAirportStream = builder.stream("raw-airline-data");
GlobalKTable<String, byte[]> regressionsByAirPortTable = builder.globalTable(Serdes.String(),
byteArraySerde,
"onlineRegression-by-airport");
// stream reads raw data joins with coefficients then makes prediction
dataByAirportStream.join(regressionsByAirPortTable,
(k, v) -> k,
DataRegression::new)
View dataByAirportStream.java
dataByAirportStream.join(regressionsByAirPortTable,(k, v) -> k, DataRegression::new)
.mapValues(Predictor::predict)
.filter((k, v) -> v != null)
.peek((k, v) -> System.out.println("Prediction " + v))
.to("predictions");
View tmux-kafka.sh
#!/bin/sh
KAFKA_DIR=/usr/local/kafka_2.11-0.9.0.0-SNAPSHOT
START_ZK="./bin/zookeeper-server-start.sh"
ZK_PROPS="config/zookeeper.properties"
START_KAFKA="./bin/kafka-server-start.sh"
KAFKA_PROPS="config/server.properties"
View CombineByKey.scala
package bbejeck.grouping
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
/**
* Created by bbejeck on 8/6/15.
* Example usage of combineByKey
View AggregateByKey.scala
package bbejeck.grouping
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
/**
* Created by bbejeck on 7/31/15.
*
View PartiallyAppliedFunctionsTest.java
package bbejeck.function;
import org.junit.Test;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import static org.junit.Assert.*;
import static org.hamcrest.CoreMatchers.*;
View ThrowingFunction.java
/*
* *
*
*
* Copyright 2015 Bill Bejeck
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*