whs2k/hive_commonCommands.py

## hive_commonCommands.py
'''
For the Examples Below, Assume a table with the following format:
root
 |-- jobgroup: string (nullable = true)
 |-- jobname: string (nullable = true)
 |-- starttime: timestamp (nullable = true)
 |-- endtime: timestamp (nullable = true)
 |-- status: string (nullable = true)
 |-- comments: string (nullable = true)
 |-- no_of_recs_processed: integer (nullable = true)

'''

'''
1. Insert Rows Into Hive Table in Bash
'''
hive -S -e 'use bdr; insert into table job_log SELECT "bdr-ptab", "ptab", CURRENT_TIMESTAMP(), CURRENT_TIMESTAMP(), "started", NULL, 0 FROM job_log LIMIT 1;'

'''
2. Execute Hive Script in Bash
'''
hive -S -f "$ROOT_SCRIPT_PATH/CREATE_bdr-job_control.hql"


'''
3. Extract Field From HIVE and put into Bash Variable
'''
JOB_CNTL_TIMESTAMP=$(hive -S -e 'SELECT max(last_mod_ts) FROM '"$JOB_CONTROL_TABLE"' WHERE job_name = "ptab";')

'''
4. Create a New Table without any of 'XXX' rows (instead of a delete)
'''
#hive>
insert overwrite table bdr.job_log  select * from bdr.job_log where jobname <> 'ptab';

'''
5. Insert Rows into HIVE Table in Python
'''
spark = SparkSession \
    .builder \
    .appName("PTAB_automation ") \
    .enableHiveSupport()\
    .getOrCreate()
hive = HiveContext(spark)

insert_values = (job_group, job_name,
			start_time, end_time,
			'Completed', 'No.of PTAB documents pushed to elastic',
			no_of_recs_processed)
print('Values to Insert into bdr.job_log Table: ', insert_values)
hive.sql("INSERT INTO TABLE " + str(job_log_table) + " VALUES " + str(insert_values))
	'''
	For the Examples Below, Assume a table with the following format:
	root
	\|-- jobgroup: string (nullable = true)
	\|-- jobname: string (nullable = true)
	\|-- starttime: timestamp (nullable = true)
	\|-- endtime: timestamp (nullable = true)
	\|-- status: string (nullable = true)
	\|-- comments: string (nullable = true)
	\|-- no_of_recs_processed: integer (nullable = true)

	'''

	'''
	1. Insert Rows Into Hive Table in Bash
	'''
	hive -S -e 'use bdr; insert into table job_log SELECT "bdr-ptab", "ptab", CURRENT_TIMESTAMP(), CURRENT_TIMESTAMP(), "started", NULL, 0 FROM job_log LIMIT 1;'

	'''
	2. Execute Hive Script in Bash
	'''
	hive -S -f "$ROOT_SCRIPT_PATH/CREATE_bdr-job_control.hql"


	'''
	3. Extract Field From HIVE and put into Bash Variable
	'''
	JOB_CNTL_TIMESTAMP=$(hive -S -e 'SELECT max(last_mod_ts) FROM '"$JOB_CONTROL_TABLE"' WHERE job_name = "ptab";')

	'''
	4. Create a New Table without any of 'XXX' rows (instead of a delete)
	'''
	#hive>
	insert overwrite table bdr.job_log select * from bdr.job_log where jobname <> 'ptab';

	'''
	5. Insert Rows into HIVE Table in Python
	'''
	spark = SparkSession \
	.builder \
	.appName("PTAB_automation ") \
	.enableHiveSupport()\
	.getOrCreate()
	hive = HiveContext(spark)

	insert_values = (job_group, job_name,
	start_time, end_time,
	'Completed', 'No.of PTAB documents pushed to elastic',
	no_of_recs_processed)
	print('Values to Insert into bdr.job_log Table: ', insert_values)
	hive.sql("INSERT INTO TABLE " + str(job_log_table) + " VALUES " + str(insert_values))