using ODBC #Connect to Hadoop cluster via Hive (pre-defined Windows DSN in ODBC Manager) hiveconn = ODBC.connect("Production hiveserver2"; usr="your-user-name", pwd="your-password-here") #Clean data, return results directly to file #Data returned with have origin of flight, flight takeoff, flight landing and elapsed time hive_query_string = "select origin, from_unixtime(flight_takeoff_datetime_origin) as flight_takeoff_datetime_origin, from_unixtime(flight_takeoff_datetime_origin + (actualelapsedtime * 60)) as flight_landing_datetime_origin, actualelapsedtime from (select origin, unix_timestamp(CONCAT(year,\"-\", month, \"-\", dayofmonth, \" \", SUBSTR(LPAD(deptime, 4, 0), 1, 2), \":\", SUBSTR(LPAD(deptime, 4, 0), 3, 4), \":\", \"00\")) as flight_takeoff_datetime_origin, actualelapsedtime from vw_airline where year = 1987 and actualelapsedtime > 0) inner_query;" #Run query, save results directly to file query(hive_query_string, hiveconn;output="C:\\airline_times.csv",delim=',')