Skip to content

Instantly share code, notes, and snippets.

View zsxwing's full-sized avatar
:octocat:

Shixiong Zhu zsxwing

:octocat:
  • Databricks, Inc.
  • San Francisco
View GitHub Profile
@zsxwing
zsxwing / create.sql
Created August 6, 2013 03:13
Hive - create external table
CREATE EXTERNAL TABLE IF NOT EXISTS access (
ip STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/user/mstr/access';
@zsxwing
zsxwing / exit_when_error.sh
Created September 2, 2013 11:08
We can add -e at the end of "#!/bin/bash" or use "set -e" to let the script exit when some error happens
#!/bin/bash -e
set -e
@zsxwing
zsxwing / build.gradle
Created September 7, 2013 09:40
A template of build.gradle for scala
apply plugin: 'scala'
apply plugin: 'eclipse'
sourceCompatibility = JavaVersion.VERSION_1_6
targetCompatibility = JavaVersion.VERSION_1_6
eclipse {
classpath {
downloadSources = true
downloadJavadoc = false
@zsxwing
zsxwing / async.scala
Last active January 2, 2016 13:39
Asnyc exmaple
import rx.lang.scala.Observable._
import rx.lang.scala.JavaConversions._
import rx.lang.scala.ImplicitFunctionConversions._
import rx.util.async.Async
import java.util.concurrent.Executors
import rx.schedulers.Schedulers
object Test extends App {
val executor = Executors.newFixedThreadPool(10)
package rx.android.observables;
import rx.Observable;
import rx.Observable.OnSubscribe;
import rx.Subscriber;
import rx.android.subscriptions.AndroidSubscriptions;
import rx.functions.Action0;
import android.content.BroadcastReceiver;
import android.content.Context;
import android.content.Intent;
/**
* Copyright 2013 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@zsxwing
zsxwing / hbase-spark.scala
Created August 11, 2014 08:29
hbase-spark.scala
import java.io.{DataOutputStream, ByteArrayOutputStream}
import java.lang.String
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Base64
def convertScanToString(scan: Scan): String = {
@zsxwing
zsxwing / test.scala
Created August 14, 2014 14:44
Report Foo cannot be serialized.
scala> class Foo { def foo() = Array(1.0) }
defined class Foo
scala> val t = new Foo
t: Foo = $iwC$$iwC$$iwC$$iwC$Foo@5ef6a5b6
scala> val m = t.foo
m: Array[Double] = Array(1.0)
scala> val r1 = sc.parallelize(List(1, 2, 3))
@zsxwing
zsxwing / test2.scala
Last active August 29, 2015 14:05
This example can work.
scala> class Foo { def foo() = Array(1.0) }
defined class Foo
scala> var m: Array[Double] = null
m: Array[Double] = null
scala> {
| val t = new Foo
| m = t.foo
| }
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.scheduler.*;
import scala.Tuple2;