Skip to content

Instantly share code, notes, and snippets.

@aczire
Created February 24, 2017 21:31
Show Gist options
  • Save aczire/a3adb0dd0b8be50c342037694fda3309 to your computer and use it in GitHub Desktop.
Save aczire/a3adb0dd0b8be50c342037694fda3309 to your computer and use it in GitHub Desktop.
CREATE TABLE ntsb_accidents (
EventId STRING,
InvestigationType STRING,
AccidentNumber STRING,
EventDate STRING,
Location STRING,
Country STRING,
Latitude STRING,
Longitude STRING,
AirportCode STRING,
AirportName STRING,
InjurySeverity STRING,
AircraftDamage STRING,
AircraftCategory STRING,
RegistrationNumber STRING,
Make STRING,
Model STRING,
AmateurBuilt STRING,
NumberOfEngines STRING,
EngineType STRING,
FARDescription STRING,
Schedule STRING,
PurposeOfFlight STRING,
AirCarrier STRING,
TotalFatalInjuries STRING,
TotalSeriousInjuries STRING,
TotalMinorInjuries STRING,
TotalUninjured STRING,
WeatherCondition STRING,
BroadPhaseOfFlight STRING,
ReportStatus STRING,
PublicationDate STRING
)
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
WITH SERDEPROPERTIES (
"column.xpath.EventId"="/ROW/@EventId",
"column.xpath.InvestigationType"="/ROW/@InvestigationType",
"column.xpath.AccidentNumber"="/ROW/@AccidentNumber",
"column.xpath.EventDate"="/ROW/@EventDate",
"column.xpath.Location"="/ROW/@Location",
"column.xpath.Country"="/ROW/@Country",
"column.xpath.Latitude"="/ROW/@Latitude",
"column.xpath.Longitude"="/ROW/@Longitude",
"column.xpath.AirportCode"="/ROW/@AirportCode",
"column.xpath.AirportName"="/ROW/@AirportName",
"column.xpath.InjurySeverity"="/ROW/@InjurySeverity",
"column.xpath.AircraftDamage"="/ROW/@AircraftDamage",
"column.xpath.AircraftCategory"="/ROW/@AircraftCategory",
"column.xpath.RegistrationNumber"="/ROW/@RegistrationNumber",
"column.xpath.Make"="/ROW/@Make",
"column.xpath.Model"="/ROW/@Model",
"column.xpath.AmateurBuilt"="/ROW/@AmateurBuilt",
"column.xpath.NumberOfEngines"="/ROW/@NumberOfEngines",
"column.xpath.EngineType"="/ROW/@EngineType",
"column.xpath.FARDescription"="/ROW/@FARDescription",
"column.xpath.Schedule"="/ROW/@Schedule",
"column.xpath.PurposeOfFlight"="/ROW/@PurposeOfFlight",
"column.xpath.AirCarrier"="/ROW/@AirCarrier",
"column.xpath.TotalFatalInjuries"="/ROW/@TotalFatalInjuries",
"column.xpath.TotalSeriousInjuries"="/ROW/@TotalSeriousInjuries",
"column.xpath.TotalMinorInjuries"="/ROW/@TotalMinorInjuries",
"column.xpath.TotalUninjured"="/ROW/@TotalUninjured",
"column.xpath.WeatherCondition"="/ROW/@WeatherCondition",
"column.xpath.BroadPhaseOfFlight"="/ROW/@BroadPhaseOfFlight",
"column.xpath.ReportStatus"="/ROW/@ReportStatus",
"column.xpath.PublicationDate"="/ROW/@PublicationDate"
)
STORED AS
INPUTFORMAT 'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
TBLPROPERTIES (
"xmlinput.start"="<DATA xmlns",
"xmlinput.end"="</DATA>"
);
LOAD DATA LOCAL INPATH 'AviationData.xml' OVERWRITE INTO TABLE ntsb_accidents;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment