John Rauser
Alice Goldfuss (GitHub)
Bryan Liles (CapitalOne)
// An Example is an observation with optional target value and features in the form of a vector of Doubles | |
case class Example(target: Option[Double] = None, features: Vector[Double]) | |
// Base model API looks something like: | |
abstract class BaseModel(val modelSettings: Settings) | |
extends Serializable | |
with Logging { | |
def fit(data: RDD[Example]) |
public class HCatInputFormat extends InputFormat<SerializableWritable<Writable>, HCatRecord> { | |
private final org.apache.hcatalog.mapreduce.HCatInputFormat input; | |
public HCatInputFormat() { | |
input = new org.apache.hcatalog.mapreduce.HCatInputFormat(); | |
} | |
@Override | |
public RecordReader<SerializableWritable<Writable>, HCatRecord> createRecordReader( |
// Python RDD creation functions // | |
// SequenceFile converted to Text and then to String | |
def sequenceFileAsText(path: String) = { | |
implicit val kcm = ClassManifest.fromClass(classOf[Text]) | |
implicit val fcm = ClassManifest.fromClass(classOf[SequenceFileAsTextInputFormat]) | |
new JavaPairRDD(sc | |
.newAPIHadoopFile[Text, Text, SequenceFileAsTextInputFormat](path) | |
.map{ case (k, v) => (k.toString, v.toString) } |
<style type="text/css"> | |
#dashboard_list ul { | |
margin:20px, 40px, 40px, 10px; | |
overflow:hidden; | |
} | |
#dashboard_list li { | |
line-height:1.5em; | |
float:left; | |
display:inline; | |
} |
John Rauser
Alice Goldfuss (GitHub)
Bryan Liles (CapitalOne)
Adrian -
I appreciate that you spent time in writing this post. I know I've been up until 2am writing similarly long ones as well. I will take responsibility for having what is likely an irrational response (I blame Twitter for that) to the term "NoOps", but I invite you to investigate why that might be. I'm certainly not the only one who feels this way, apparently, and thus far have decided this issue is easily the largest distraction in my field I've encountered in recent years. I have had the option to simply ignore my opposition to the term, and just let the chips fall where they may with how popular the term "NoOps" may or may not get. I have obviously not taken that option in the past, but I plan to in the future.
You're not an analyst saying "NoOps". Analysts are easy (for me) to ignore, because they're not practitioners. We have expectations of engineering maturity from practitioners in this field of web engineering, especially those we consider leaders. I don't have any expectations from analysts,
# Helper function to plot a decision boundary. | |
# If you don't fully understand this function don't worry, it just generates the contour plot below. | |
def plot_decision_boundary(pred_func): | |
# Set min and max values and give it some padding | |
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 | |
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 | |
h = 0.01 | |
# Generate a grid of points with distance h between them | |
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) | |
# Predict the function value for the whole gid |
import spark.streaming.{Seconds, StreamingContext} | |
import spark.storage.StorageLevel | |
import spark.streaming.examples.twitter.TwitterInputDStream | |
import com.twitter.algebird._ | |
import spark.streaming.StreamingContext._ | |
import spark.SparkContext._ | |
/** | |
* Example of using CountMinSketch monoid from Twitter's Algebird together with Spark Streaming's | |
* TwitterInputDStream |
require 'rubygems' | |
require 'mechanize' | |
FIRST_NAME = 'FIRST_NAME' | |
LAST_NAME = 'LAST_NAME' | |
PHONE = 'PHONE' | |
EMAIL = 'EMAIL@provider.com' | |
PARTY_SIZE = 2 | |
SCHEDULE_RANGE = { :start_time => '19:00', :end_time => '20:30' } |