Hao Ren invkrh

## Cache.java
import java.util.Map;

public interface Cache<K, V> {
    V get(K k);
    void put(K key, V v);
    Map<K, V> asMap();
}

## gist:583fcf11234471cbd42c
#!/bin/bash

function gen {
	year=$1
	mon=$2
	days=$3

	for d in $(eval echo "{01..$days}")
	do
		for h in {00..23}

## gist:15d0b14cdf3a64fdf7eb
  // Pay attention that glob can not contains `space`,
  // or only the first part will be matched.
  sc.textFile("s3a://leboncoin.fr-datalake-dev" +
    "/test_cases/qa2/logs/http/ip/raw/txt/2013/{01,02,03}/*/*_{00,02}.log")
    .collect foreach println

## filter.scala
  trait DateRepr {
    def dt: DateTime
  }

  trait HasYear extends DateRepr {
    def year = dt.getYear
  }

  trait HasMonth extends DateRepr {
    def month = dt.getMonthOfYear

## KMeas.json
{
	"cells": [{
		"cell_type": "markdown",
		"source": ["# ETL and K-Means\n \nThis lab will demonstrate loading data from a file, transforming that data into a form usable with the ML and MLlib libraries, and building a k-means clustering using both ML and MLlib.\n \nUpon completing this lab you should understand how to read from and write to files in Spark, convert between `RDDs` and `DataFrames`, and build a model using both the ML and MLlib APIs."],
		"metadata": {}
	}, {
		"cell_type": "markdown",
		"source": ["#### Loading the data\n \nFirst, we need to load data into Spark.  We'll use a built-in utility to load a [libSVM file](www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html), which is stored in an S3 bucket on AWS.  We'll use `MLUtils.loadLibSVMFile` to load our file.  Here are the [Python](http://spark.apache.org/docs/latest/api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils.loadLibSVMFile) and [Scala](https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) APIs

## atom-settings-sync
Atom settings' backup files

## highlightTest.html
<html>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.1.0/styles/monokai.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.1.0/highlight.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>

<body>
<pre>
<code class="scala">/**
 * A person has a name and an age.
 */

## AppConf.scala
object AppConf {
  def main(args: Array[String]) {
    val sparkConf = new SparkConf()
      .setAppName(this.getClass.getName)
    val sc = new SparkContext(sparkConf)

    sc.makeRDD(1 to 20).map(x => {
      val conf = ConfigFactory.load()
      val host = conf.getString("settings.host")
      val port = conf.getInt("settings.port")

## Context.scala
package me.invkrh.spark

import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkContext, SparkConf}

object Context {
  val conf: SparkConf    = new SparkConf().setMaster("local[*]").setAppName("Test")
  val ctx : SparkContext = new SparkContext(conf)
  val sqlc: SQLContext   = new SQLContext(ctx)

## sqlContext.scala
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}

object Test extends App {

  val sc          = new SparkContext("local[2]", "test", new SparkConf)
  val hiveContext = new HiveContext(sc)
  val sqlContext  = new SQLContext(sc)
	import java.util.Map;

	public interface Cache<K, V> {
	V get(K k);
	void put(K key, V v);
	Map<K, V> asMap();
	}
	#!/bin/bash

	function gen {
	year=$1
	mon=$2
	days=$3

	for d in $(eval echo "{01..$days}")
	do
	for h in {00..23}
	// Pay attention that glob can not contains `space`,
	// or only the first part will be matched.
	sc.textFile("s3a://leboncoin.fr-datalake-dev" +
	"/test_cases/qa2/logs/http/ip/raw/txt/2013/{01,02,03}//_{00,02}.log")
	.collect foreach println
	trait DateRepr {
	def dt: DateTime
	}

	trait HasYear extends DateRepr {
	def year = dt.getYear
	}

	trait HasMonth extends DateRepr {
	def month = dt.getMonthOfYear
	{
	"cells": [{
	"cell_type": "markdown",
	"source": ["# ETL and K-Means\n \nThis lab will demonstrate loading data from a file, transforming that data into a form usable with the ML and MLlib libraries, and building a k-means clustering using both ML and MLlib.\n \nUpon completing this lab you should understand how to read from and write to files in Spark, convert between `RDDs` and `DataFrames`, and build a model using both the ML and MLlib APIs."],
	"metadata": {}
	}, {
	"cell_type": "markdown",
	"source": ["#### Loading the data\n \nFirst, we need to load data into Spark. We'll use a built-in utility to load a [libSVM file](www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html), which is stored in an S3 bucket on AWS. We'll use `MLUtils.loadLibSVMFile` to load our file. Here are the [Python](http://spark.apache.org/docs/latest/api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils.loadLibSVMFile) and [Scala](https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) APIs
	<html>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.1.0/styles/monokai.min.css">
	<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.1.0/highlight.min.js"></script>
	<script>hljs.initHighlightingOnLoad();</script>

	<body>
	<pre>
	<code class="scala">/**
	* A person has a name and an age.
	*/
	object AppConf {
	def main(args: Array[String]) {
	val sparkConf = new SparkConf()
	.setAppName(this.getClass.getName)
	val sc = new SparkContext(sparkConf)

	sc.makeRDD(1 to 20).map(x => {
	val conf = ConfigFactory.load()
	val host = conf.getString("settings.host")
	val port = conf.getInt("settings.port")
	import org.apache.spark.sql.SQLContext
	import org.apache.spark.sql.hive.HiveContext
	import org.apache.spark.{SparkConf, SparkContext}

	object Test extends App {

	val sc = new SparkContext("local[2]", "test", new SparkConf)
	val hiveContext = new HiveContext(sc)
	val sqlContext = new SQLContext(sc)