Skip to content

Instantly share code, notes, and snippets.

@invkrh
Created July 5, 2016 22:17
Show Gist options
  • Save invkrh/f30712af40adf48bbdf385bd4bee5634 to your computer and use it in GitHub Desktop.
Save invkrh/f30712af40adf48bbdf385bd4bee5634 to your computer and use it in GitHub Desktop.
Spark SQL single quote escape problem
package me.invkrh.ad2vec
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}
object Test extends App {
val sc = new SparkContext("local[2]", "test", new SparkConf)
val hiveContext = new HiveContext(sc)
val sqlContext = new SQLContext(sc)
val context = hiveContext
// val context = sqlContext
import context.implicits._
val df = Seq((Seq(1, 2), 2)).toDF("a", "b")
df.registerTempTable("tbl")
df.printSchema()
// case 1
context.sql("select cast(a as array<string>) from tbl").show()
// HiveContext => org.apache.spark.sql.AnalysisException: cannot recognize input near 'array' '<' 'string' in primitive type specification; line 1 pos 17
// SQLContext => OK
// case 2
context.sql("select 'a\\'b'").show()
// HiveContext => OK
// SQLContext => failure: ``union'' expected but ErrorToken(unclosed string literal) found
// case 3
df.selectExpr("cast(a as array<string>)").show() // OK with HiveContext and SQLContext
// case 4
df.selectExpr("'a\\'b'").show() // HiveContext, SQLContext => failure: end of input expected
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment