Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save MisterSpicy/20ef8cc2777dc82e305f29a507ae83d7 to your computer and use it in GitHub Desktop.
Save MisterSpicy/20ef8cc2777dc82e305f29a507ae83d7 to your computer and use it in GitHub Desktop.
SPARK-14492 test file
import java.nio.file.Paths;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.hive.HiveContext;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestSparkHive {
private static JavaSparkContext javaSparkContext;
private static HiveContext hiveContext;
@BeforeClass
public static void setUpClass(){
SparkConf conf = getDefaultSparkConfiguration();
javaSparkContext = new JavaSparkContext(conf);
hiveContext = new HiveContext(JavaSparkContext.toSparkContext(javaSparkContext));
hiveContext.setConf("hive.metastore.warehouse.dir",
Paths.get(".").toAbsolutePath().normalize().toString() + "/target/tmp");
hiveContext.sql("create database spark_extract_job");
}
@AfterClass
public static void tearDownClass(){
hiveContext.sql("drop database spark_extract_job cascade");
javaSparkContext.stop();
javaSparkContext.close();
}
private static SparkConf getDefaultSparkConfiguration() {
/*
Setting the metastore version here will only work if the hive 1.2.1 jars are not on the classpath.
If the 1.2.1 jars exist, then the metastore defaults to those.
*/
SparkConf conf = new SparkConf()
.setMaster("local[*]")
.setAppName("test-verifier")
.set("spark.ui.enabled", "false")
.set("spark.testing", "true")
.set("spark.sql.hive.metastore.version", "1.1.0");
return conf;
}
@Test
public void testSomething() {
// TODO: Need to have a README.md at the root directory for this to run.
String logFile = Paths.get(".").toAbsolutePath().normalize().toString() + "/README.md";
System.out.println(logFile);
JavaRDD<String> logData = javaSparkContext.textFile(logFile).cache();
long numAs = logData.filter((s) -> s.contains("a")).count();
long numBs = logData.filter((s) -> s.contains("b")).count();
System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
System.out.println("Asserting the Hive Context works");
assert numAs == 10;
assert numBs == 10;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment