NitinKumar94/check_records.q

## check_records.q
--run this script after inserting records using spark

use test_inserts;

--should not spawn a map-reduce job and return wrong result immediately
select count(*) from test_hive_inserts;

--should display all inserted records from both hive and spark
select * from test_hive_inserts;

## insert_recorcds.py
## Run: spark-submit insert_recotrds.py

from pyspark import SparkConf, SparkContext
from pyspark.sql import HiveContext

def insert_records():
  conf = SparkConf().setAppName("Testing Inserts").setMaster("local[*]")
  sc = SparkContext(conf=conf)
  sqlContext = HiveContext(sc)

  data_points = [(3, "ghi", "xyz"), (4, "jkl", "xyz"), (5, "mno", "xyz")]
  column_names = ["identity_id", "emp_name", "department_name"]

  df = sqlContext.createDataFrame(data_points, column_names)
  df.show()

  df.registerTempTable("temp_table")

  sqlContext.sql("insert into table test_inserts.test_hive_inserts select * from temp_table")

if __name__ == "__main__":
  insert_records()


## insert_records.q
create database if not exists test_inserts;
use test_inserts;
create table if not exists test_hive_inserts (id int, name string, dept string) stored as ORC;
insert into table test_hive_inserts values (1, 'abc', 'xyz');
insert into table test_hive_inserts values (2, 'def', 'xyz');
select count(*) from test_hive_inserts;
	--run this script after inserting records using spark

	use test_inserts;

	--should not spawn a map-reduce job and return wrong result immediately
	select count(*) from test_hive_inserts;

	--should display all inserted records from both hive and spark
	select * from test_hive_inserts;
	## Run: spark-submit insert_recotrds.py

	from pyspark import SparkConf, SparkContext
	from pyspark.sql import HiveContext

	def insert_records():
	conf = SparkConf().setAppName("Testing Inserts").setMaster("local[*]")
	sc = SparkContext(conf=conf)
	sqlContext = HiveContext(sc)

	data_points = [(3, "ghi", "xyz"), (4, "jkl", "xyz"), (5, "mno", "xyz")]
	column_names = ["identity_id", "emp_name", "department_name"]

	df = sqlContext.createDataFrame(data_points, column_names)
	df.show()

	df.registerTempTable("temp_table")

	sqlContext.sql("insert into table test_inserts.test_hive_inserts select * from temp_table")

	if __name__ == "__main__":
	insert_records()
	create database if not exists test_inserts;
	use test_inserts;
	create table if not exists test_hive_inserts (id int, name string, dept string) stored as ORC;
	insert into table test_hive_inserts values (1, 'abc', 'xyz');
	insert into table test_hive_inserts values (2, 'def', 'xyz');
	select count(*) from test_hive_inserts;