Skip to content

Instantly share code, notes, and snippets.

@arpieb
Last active May 16, 2018 01:32
Show Gist options
  • Save arpieb/8d59ad7402d9e58b78417e7ca1db11e2 to your computer and use it in GitHub Desktop.
Save arpieb/8d59ad7402d9e58b78417e7ca1db11e2 to your computer and use it in GitHub Desktop.
MariaDB CONNECT table schemas for GDELT v2 events and mentions CSV files
create table events (
GlobalEventID bigint unsigned ,
Day date date_format='YYYYMMDD' ,
MonthYear date date_format='YYYYMM' ,
Year date date_format='YYYY' ,
FractionDate double ,
Actor1Code char(15) ,
Actor1Name varchar(255) ,
Actor1CountryCode char(3) ,
Actor1KnownGroupCode char(3) ,
Actor1EthnicCode char(3) ,
Actor1Religion1Code char(3) ,
Actor1Religion2Code char(3) ,
Actor1Type1Code char(3) ,
Actor1Type2Code char(3) ,
Actor1Type3Code char(3) ,
Actor2Code char(15) ,
Actor2Name varchar(255) ,
Actor2CountryCode char(3) ,
Actor2KnownGroupCode char(3) ,
Actor2EthnicCode char(3) ,
Actor2Religion1Code char(3) ,
Actor2Religion2Code char(3) ,
Actor2Type1Code char(3) ,
Actor2Type2Code char(3) ,
Actor2Type3Code char(3) ,
IsRootEvent boolean ,
EventCode char(4) ,
EventBaseCode char(4) ,
EventRootCode char(4) ,
QuadClass tinyint unsigned ,
GoldsteinScale double ,
NumMentions int unsigned ,
NumSources int unsigned ,
NumArticles int unsigned ,
AvgTone double ,
Actor1Geo_Type int unsigned ,
Actor1Geo_Fullname varchar(255) ,
Actor1Geo_CountryCode char(2) ,
Actor1Geo_ADM1Code char(16) ,
Actor1Geo_ADM2Code char(16) ,
Actor1Geo_Lat double ,
Actor1Geo_Long double ,
Actor1Geo_FeatureID char(16) ,
Actor2Geo_Type int unsigned ,
Actor2Geo_Fullname varchar(255) ,
Actor2Geo_CountryCode char(2) ,
Actor2Geo_ADM1Code char(16) ,
Actor2Geo_ADM2Code char(16) ,
Actor2Geo_Lat double ,
Actor2Geo_Long double ,
Actor2Geo_FeatureID char(16) ,
ActionGeo_Type int unsigned ,
ActionGeo_Fullname varchar(255) ,
ActionGeo_CountryCode char(2) ,
ActionGeo_ADM1Code char(16) ,
ActionGeo_ADM2Code char(16) ,
ActionGeo_Lat double ,
ActionGeo_Long double ,
ActionGeo_FeatureID char(16) ,
DATEADDED datetime date_format='YYYYMMDDHHMMSS' ,
SOURCEURL varchar(1024)
)
engine=CONNECT
table_type=CSV
sep_char='\t'
file_name='/data/rbates/gdelt/*export.CSV.zip'
zipped=1
multiple=1
option_list='Entry=*.CSV'
;
create table mentions (
GlobalEventID bigint unsigned ,
EventTimeDate date date_format='YYYYMMDDHHMMSS' ,
MentionTimeDate date date_format='YYYYMMDDHHMMSS' ,
MentionType tinyint unsigned ,
MentionSourceName varchar(255) ,
MentionIdentifier varchar(1024) ,
SentenceID int unsigned ,
Actor1CharOffset int unsigned ,
Actor2CharOffset int unsigned ,
ActionCharOffset int unsigned ,
InRawText boolean ,
Confidence double ,
MentionDocLen int unsigned ,
MentionDocTone double ,
MentionDocTranslationInfo varchar(1024) ,
Extras varchar(255)
)
engine=CONNECT
table_type=CSV
sep_char='\t'
file_name='/data/rbates/gdelt/*mentions.CSV.zip'
zipped=1
multiple=1
option_list='Entry=*.CSV'
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment