Skip to content

Instantly share code, notes, and snippets.

@jeffwong
Created August 4, 2014 21:04
Show Gist options
  • Save jeffwong/38479cb22a5eec7522e5 to your computer and use it in GitHub Desktop.
Save jeffwong/38479cb22a5eec7522e5 to your computer and use it in GitHub Desktop.
R to Hive utils
dbBuildTableDefinition.hive =
function (databasename, tablename, df, field.types = NULL, s3path)
{
if (!is.data.frame(df))
df <- as.data.frame(df)
if (is.null(field.types)) {
field.types <- lapply(df, hiveDataType)
}
flds <- paste(names(field.types), field.types)
base = sprintf("CREATE EXTERNAL TABLE %s.%s
(%s)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\\u0001'
COLLECTION ITEMS TERMINATED BY '\\u0004'
MAP KEYS TERMINATED BY '\\u0002'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'%s'", databasename, tablename,
paste(flds, collapse = ",\n"),
s3path)
return (base)
}
hiveDataType = function (obj)
{
rs.class <- data.class(obj)
rs.mode <- storage.mode(obj)
if (rs.mode %in% c("integer", "int") & rs.class == 'numeric') "int"
else switch(rs.class,
character = "string",
logical = "tinyint",
Factor = "string",
Date = "int",
POSIXct = "string",
numeric = "double",
"string")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment