Last active
May 16, 2018 01:32
-
-
Save arpieb/8d59ad7402d9e58b78417e7ca1db11e2 to your computer and use it in GitHub Desktop.
MariaDB CONNECT table schemas for GDELT v2 events and mentions CSV files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table events ( | |
GlobalEventID bigint unsigned , | |
Day date date_format='YYYYMMDD' , | |
MonthYear date date_format='YYYYMM' , | |
Year date date_format='YYYY' , | |
FractionDate double , | |
Actor1Code char(15) , | |
Actor1Name varchar(255) , | |
Actor1CountryCode char(3) , | |
Actor1KnownGroupCode char(3) , | |
Actor1EthnicCode char(3) , | |
Actor1Religion1Code char(3) , | |
Actor1Religion2Code char(3) , | |
Actor1Type1Code char(3) , | |
Actor1Type2Code char(3) , | |
Actor1Type3Code char(3) , | |
Actor2Code char(15) , | |
Actor2Name varchar(255) , | |
Actor2CountryCode char(3) , | |
Actor2KnownGroupCode char(3) , | |
Actor2EthnicCode char(3) , | |
Actor2Religion1Code char(3) , | |
Actor2Religion2Code char(3) , | |
Actor2Type1Code char(3) , | |
Actor2Type2Code char(3) , | |
Actor2Type3Code char(3) , | |
IsRootEvent boolean , | |
EventCode char(4) , | |
EventBaseCode char(4) , | |
EventRootCode char(4) , | |
QuadClass tinyint unsigned , | |
GoldsteinScale double , | |
NumMentions int unsigned , | |
NumSources int unsigned , | |
NumArticles int unsigned , | |
AvgTone double , | |
Actor1Geo_Type int unsigned , | |
Actor1Geo_Fullname varchar(255) , | |
Actor1Geo_CountryCode char(2) , | |
Actor1Geo_ADM1Code char(16) , | |
Actor1Geo_ADM2Code char(16) , | |
Actor1Geo_Lat double , | |
Actor1Geo_Long double , | |
Actor1Geo_FeatureID char(16) , | |
Actor2Geo_Type int unsigned , | |
Actor2Geo_Fullname varchar(255) , | |
Actor2Geo_CountryCode char(2) , | |
Actor2Geo_ADM1Code char(16) , | |
Actor2Geo_ADM2Code char(16) , | |
Actor2Geo_Lat double , | |
Actor2Geo_Long double , | |
Actor2Geo_FeatureID char(16) , | |
ActionGeo_Type int unsigned , | |
ActionGeo_Fullname varchar(255) , | |
ActionGeo_CountryCode char(2) , | |
ActionGeo_ADM1Code char(16) , | |
ActionGeo_ADM2Code char(16) , | |
ActionGeo_Lat double , | |
ActionGeo_Long double , | |
ActionGeo_FeatureID char(16) , | |
DATEADDED datetime date_format='YYYYMMDDHHMMSS' , | |
SOURCEURL varchar(1024) | |
) | |
engine=CONNECT | |
table_type=CSV | |
sep_char='\t' | |
file_name='/data/rbates/gdelt/*export.CSV.zip' | |
zipped=1 | |
multiple=1 | |
option_list='Entry=*.CSV' | |
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table mentions ( | |
GlobalEventID bigint unsigned , | |
EventTimeDate date date_format='YYYYMMDDHHMMSS' , | |
MentionTimeDate date date_format='YYYYMMDDHHMMSS' , | |
MentionType tinyint unsigned , | |
MentionSourceName varchar(255) , | |
MentionIdentifier varchar(1024) , | |
SentenceID int unsigned , | |
Actor1CharOffset int unsigned , | |
Actor2CharOffset int unsigned , | |
ActionCharOffset int unsigned , | |
InRawText boolean , | |
Confidence double , | |
MentionDocLen int unsigned , | |
MentionDocTone double , | |
MentionDocTranslationInfo varchar(1024) , | |
Extras varchar(255) | |
) | |
engine=CONNECT | |
table_type=CSV | |
sep_char='\t' | |
file_name='/data/rbates/gdelt/*mentions.CSV.zip' | |
zipped=1 | |
multiple=1 | |
option_list='Entry=*.CSV' | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment