Cheng Lian liancheng

## arrow-schema-dsl.scala
package example

import scala.collection.JavaConverters._
import scala.language.implicitConversions

import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}

trait FieldBuilder {
  def named(name: String): Field

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                liancheng
                / keybase.md
            
            
              Created
              June 13, 2016 22:27
            
          
    Keybase proof

I hereby claim:

I am liancheng on github.
I am liancheng (https://keybase.io/liancheng) on keybase.
I have a public key ASAVimRA8LFNh06-5t17L6yTHgQJp-j6gItxZLXhwVnD-Ao

To claim this, I am signing this object:

  
## scraper-repl.txt
$ ./build/sbt repl/run

...

@ context range 10 groupBy 'id agg (count('id), 'id + 1) having ('id > 0 and count('id) > 0) explain ()
# Logical plan
Filter: condition=$0 ==> [?output?]
├╴$0: ((`id` > 0:INT) AND (COUNT(`id`) > 0:INT))
╰╴UnresolvedAggregate: keys=[$0], projectList=[$1, $2] ==> [?output?]
  ├╴$0: `id`

## plan-tree.txt
@ context range 10 groupBy 'id agg count('id) having ('id > 0 and count('id) > 0) explain ()
# Logical plan
Filter: condition=$0 ==> [?output?]
├╴$0: ((`id` > 0:INT) AND (COUNT(`id`) > 0:INT))
╰╴UnresolvedAggregate: keys=[$0], projectList=[$1] ==> [?output?]
  ├╴$0: `id`
  ├╴$1: COUNT(`id`) AS ?alias?
  ╰╴LocalRelation: data=<local-data> ==> [`id`#0:BIGINT!]

# Analyzed plan

## binop.scala
trait Expression

trait BinaryPredicate extends Expression {
  def left: Expression
  def right: Expression
}

case class Literal(value: Int) extends Expression

case class Lt(left: Expression, right: Expression) extends BinaryPredicate

## count.scala
case class HiveSampleData(ClientID: String, QueryTime: String, Market: String, DevicePlatform: String, DeviceMake: String, DeviceModel: String, State: String, Country: String, SessionId: Long, SessionPageViewOrder: Long)

val mobiletxt = sc.textFile("file:///tmp/a.csv")
mobiletxt.count()

// Import data within sc SparkContext and convert to DataFrame via .toDF()
val mobile = sc.textFile("file:///tmp/a.csv").map(_.split(",")).map(m => HiveSampleData(m(0), m(1), m(2), m(3), m(4), m(5), m(6), m(7), m(8).toLong, m(9).toLong)).toDF()

// Register table
mobile.registerTempTable("mobile")

## guava-17-deps.txt
[info] com.google.guava:guava:17.0
[info]   +-com.fasterxml.jackson.module:jackson-module-scala_2.10:2.4.4 [S]
[info]   | +-org.apache.spark:spark-core_2.10:1.3.0-SNAPSHOT [S]
[info]   |   +-org.apache.spark:spark-catalyst_2.10:1.3.0-SNAPSHOT [S]
[info]   |   | +-org.apache.spark:spark-sql_2.10:1.3.0-SNAPSHOT [S]
[info]   |   |
[info]   |   +-org.apache.spark:spark-sql_2.10:1.3.0-SNAPSHOT [S]
[info]   |
[info]   +-com.spotify:docker-client:2.7.5
[info]   | +-org.apache.spark:spark-sql_2.10:1.3.0-SNAPSHOT [S]

## containsNull.scala
  test("save - append - ArrayType.containsNull") {
    withTempPath { file =>
      val df = Seq.empty[Tuple1[Seq[Int]]].toDF("arrayVal")
      val nonNullSchema = StructType(df.schema.map {
        case f @ StructField(_, a: ArrayType, _, _) =>
          f.copy(dataType = a.copy(containsNull = false))
        case f => f
      })

      sqlContext.createDataFrame(df.rdd, nonNullSchema).save(file.getCanonicalPath)

## data-sources-api.scala
/**
 * :: DeveloperApi ::
 * Base class for table scan operators.
 */
@DeveloperApi
abstract class Scan {
  def sqlContext: SQLContext

  /**
   * Returns an estimated size of the input of this scan operator in bytes.

## fn.scala
import java.io._

object Main {
  def main(args: Array[String]): Unit = {
    val stream = new ByteArrayOutputStream()
    val out = new ObjectOutputStream(stream)

    def foo(): String => String = {
      val test = "hello"
      def bar(name: String): String = s"$test $name"
	package example

	import scala.collection.JavaConverters._
	import scala.language.implicitConversions

	import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}

	trait FieldBuilder {
	def named(name: String): Field
	$ ./build/sbt repl/run

	...

	@ context range 10 groupBy 'id agg (count('id), 'id + 1) having ('id > 0 and count('id) > 0) explain ()
	# Logical plan
	Filter: condition=$0 ==> [?output?]
	├╴$0: ((`id` > 0:INT) AND (COUNT(`id`) > 0:INT))
	╰╴UnresolvedAggregate: keys=[$0], projectList=[$1, $2] ==> [?output?]
	├╴$0: `id`
	@ context range 10 groupBy 'id agg count('id) having ('id > 0 and count('id) > 0) explain ()
	# Logical plan
	Filter: condition=$0 ==> [?output?]
	├╴$0: ((`id` > 0:INT) AND (COUNT(`id`) > 0:INT))
	╰╴UnresolvedAggregate: keys=[$0], projectList=[$1] ==> [?output?]
	├╴$0: `id`
	├╴$1: COUNT(`id`) AS ?alias?
	╰╴LocalRelation: data=<local-data> ==> [`id`#0:BIGINT!]

	# Analyzed plan
	trait Expression

	trait BinaryPredicate extends Expression {
	def left: Expression
	def right: Expression
	}

	case class Literal(value: Int) extends Expression

	case class Lt(left: Expression, right: Expression) extends BinaryPredicate
	case class HiveSampleData(ClientID: String, QueryTime: String, Market: String, DevicePlatform: String, DeviceMake: String, DeviceModel: String, State: String, Country: String, SessionId: Long, SessionPageViewOrder: Long)

	val mobiletxt = sc.textFile("file:///tmp/a.csv")
	mobiletxt.count()

	// Import data within sc SparkContext and convert to DataFrame via .toDF()
	val mobile = sc.textFile("file:///tmp/a.csv").map(_.split(",")).map(m => HiveSampleData(m(0), m(1), m(2), m(3), m(4), m(5), m(6), m(7), m(8).toLong, m(9).toLong)).toDF()

	// Register table
	mobile.registerTempTable("mobile")
	[info] com.google.guava:guava:17.0
	[info] +-com.fasterxml.jackson.module:jackson-module-scala_2.10:2.4.4 [S]
	[info] \| +-org.apache.spark:spark-core_2.10:1.3.0-SNAPSHOT [S]
	[info] \| +-org.apache.spark:spark-catalyst_2.10:1.3.0-SNAPSHOT [S]
	[info] \| \| +-org.apache.spark:spark-sql_2.10:1.3.0-SNAPSHOT [S]
	[info] \| \|
	[info] \| +-org.apache.spark:spark-sql_2.10:1.3.0-SNAPSHOT [S]
	[info] \|
	[info] +-com.spotify:docker-client:2.7.5
	[info] \| +-org.apache.spark:spark-sql_2.10:1.3.0-SNAPSHOT [S]
	test("save - append - ArrayType.containsNull") {
	withTempPath { file =>
	val df = Seq.empty[Tuple1[Seq[Int]]].toDF("arrayVal")
	val nonNullSchema = StructType(df.schema.map {
	case f @ StructField(_, a: ArrayType, _, _) =>
	f.copy(dataType = a.copy(containsNull = false))
	case f => f
	})

	sqlContext.createDataFrame(df.rdd, nonNullSchema).save(file.getCanonicalPath)
	/**
	* :: DeveloperApi ::
	* Base class for table scan operators.
	*/
	@DeveloperApi
	abstract class Scan {
	def sqlContext: SQLContext

	/**
	* Returns an estimated size of the input of this scan operator in bytes.
	import java.io._

	object Main {
	def main(args: Array[String]): Unit = {
	val stream = new ByteArrayOutputStream()
	val out = new ObjectOutputStream(stream)

	def foo(): String => String = {
	val test = "hello"
	def bar(name: String): String = s"$test $name"