Skip to content

Instantly share code, notes, and snippets.

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@bbejeck
bbejeck / CombineByKey.scala
Created August 7, 2015 14:52
Source code for CombineByKey blog post
package bbejeck.grouping
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
/**
* Created by bbejeck on 8/6/15.
* Example usage of combineByKey
@bbejeck
bbejeck / punctuateMethodForProcessor.java
Last active October 18, 2017 01:53
Updating the Prediction Model
// process call left out for clarity; it places airline data
// in a list, storing in state store by airport code
@Override
public void punctuate(long timestamp) {
KeyValueIterator<String, List<String>> allFlights = flights.all();
while (allFlights.hasNext()) {
KeyValue<String, List<String>> kv = allFlights.next();
List<String> flightList = kv.value;
String key = kv.key;
if(flightList.size() >= 100) {
@bbejeck
bbejeck / predictionPredictMethod.java
Last active September 15, 2017 14:39
Showing the Prediction Process
public static String predict(DataRegression dataRegression) {
try (OnlineLogisticRegression logisticRegression = new OnlineLogisticRegression()) {
FlightData flightData = new FlightData(dataRegression.data);
logisticRegression.readFields(new DataInputStream(new ByteArrayInputStream(dataRegression.coefficients)));
double prediction = logisticRegression.classifyScalar(flightData.vector);
String arrivalPrediction = prediction > 0.5 ? "on-time" : "late";
return String.format("%s predicted to be %s", new Flight(dataRegression.data), arrivalPrediction);
} catch (Exception e) {
LOG.error("Problems with predicting " + dataRegression.data, e);
return null;
@bbejeck
bbejeck / buildingKStreamAndGlobalKTable.java
Created September 15, 2017 14:32
Builds The KStream and GlobalKTable
// configuration and Serde creation left out for clarity
KStream<String, String> dataByAirportStream = builder.stream("raw-airline-data");
GlobalKTable<String, byte[]> regressionsByAirPortTable = builder.globalTable(Serdes.String(),
byteArraySerde,
"onlineRegression-by-airport");
// stream reads raw data joins with coefficients then makes prediction
dataByAirportStream.join(regressionsByAirPortTable,
(k, v) -> k,
DataRegression::new)
@bbejeck
bbejeck / dataByAirportStream.java
Created September 15, 2017 14:00
Kafka Streams Topology for online predictions
dataByAirportStream.join(regressionsByAirPortTable,(k, v) -> k, DataRegression::new)
.mapValues(Predictor::predict)
.filter((k, v) -> v != null)
.peek((k, v) -> System.out.println("Prediction " + v))
.to("predictions");
@bbejeck
bbejeck / ListenableFuturesTest.java
Created November 23, 2011 04:25
Unit test/sample code for ListneableFutures blog
import com.google.common.util.concurrent.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executors;
@bbejeck
bbejeck / LambdaGuavaTest.java
Created December 23, 2011 05:25
Source for Guava Functions and Java 8 Lambdas
import bbejeck.guava.futures.SearchingTestBase;
import bbejeck.support.model.Person;
import com.google.common.base.Function;
import com.google.common.util.concurrent.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.lang.SuppressWarnings;
import java.util.List;
@bbejeck
bbejeck / backup_config.sh
Created December 7, 2011 04:32
Source for Simple WordPress backups Blog
#! /bin/sh
BLOG=blog_backup
BASE_DIR=/home/<username>/webapps/wp
USER=remote_username
PASS=database_password
DBUSER=database_user
DATABASE=database_name
DEST_DIR=destination_dir
IP_ADDRESS=blog_ip_address
@bbejeck
bbejeck / MonitorExample.java
Created November 16, 2011 05:38
Sample Code for Guava Monitor Blog
import com.google.common.util.concurrent.Monitor;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Created by IntelliJ IDEA.
* User: bbejeck
* Date: 11/11/11
* Time: 10:01 PM
*/