Skip to content

Instantly share code, notes, and snippets.

@antlauzon
Created March 16, 2021 03:47
Show Gist options
  • Save antlauzon/218a4c8f9eb7d4ed0313ec50bb776ece to your computer and use it in GitHub Desktop.
Save antlauzon/218a4c8f9eb7d4ed0313ec50bb776ece to your computer and use it in GitHub Desktop.
discogs_data.ddl
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.artist (
`id` int,
`name` string,
`realname` string,
`profile` string,
`data_quality` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/artist'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.artist_alias (
`artist_id` int,
`alias_name` string,
`alias_artist_id` int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/artist_alias'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.artist_image (
`artist_id` int,
`type` string,
`width` int,
`height` int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/artist_image'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.artist_namevariation (
`id` int,
`artist_id` int,
`name` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/artist_namevariation'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.artist_url (
`id` int,
`artist_id` int,
`url` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/artist_url'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.group_member (
`group_artist_id` int,
`member_artist_id` int,
`member_name` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/group_member'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.label (
`id` int,
`name` string,
`contact_info` string,
`profile` string,
`parent_id` int,
`parent_name` string,
`data_quality` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/label'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.label_image (
`label_id` int,
`type` string,
`width` int,
`height` int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/label_image'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.label_url (
`id` int,
`label_id` int,
`url` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/label_url'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.master (
`id` int,
`title` string,
`year` int,
`main_release` int,
`data_quality` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/master'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.master_artist (
`id` int,
`master_id` int,
`artist_id` int,
`artist_name` string,
`anv` string,
`position` int,
`join_string` string,
`role` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/master_artist'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.master_genre (
`id` int,
`master_id` int,
`genre` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/master_genre'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.master_image (
`master_id` int,
`type` string,
`width` int,
`height` int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/master_image'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.master_style (
`id` int,
`master_id` int,
`style` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/master_style'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.master_video (
`id` int,
`master_id` int,
`duration` int,
`title` string,
`description` string,
`uri` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/master_video'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release (
`id` int,
`title` string,
`released` string,
`country` string,
`notes` string,
`data_quality` string,
`main` int,
`master_id` int,
`status` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_artist (
`id` int,
`release_id` int,
`artist_id` int,
`artist_name` string,
`extra` int,
`anv` string,
`position` int,
`join_string` string,
`role` string,
`tracks` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_artist'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_company (
`id` int,
`release_id` int,
`company_id` int,
`company_name` string,
`entity_type` string,
`entity_type_name` string,
`uri` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_company'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_format (
`id` int,
`release_id` int,
`name` string,
`qty` string,
`text_string` string,
`descriptions` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_format'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_genre (
`id` int,
`release_id` int,
`genre` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_genre'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_identifier (
`id` int,
`release_id` int,
`description` string,
`type` string,
`value` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_identifier'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_image (
`release_id` int,
`type` string,
`width` int,
`height` int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_image'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_label (
`id` int,
`release_id` int,
`label_id` int,
`label_name` string,
`catno` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_label'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_style (
`release_id` int,
`style` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_style'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_track (
`id` int,
`release_id` int,
`sequence` int,
`position` string,
`parent` int,
`title` string,
`duration` string,
`track_id` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_track'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_track_artist (
`id` int,
`track_id` string,
`release_id` int,
`track_sequence` int,
`artist_id` int,
`artist_name` string,
`extra` string,
`anv` string,
`position` int,
`join_string` string,
`role` string,
`tracks` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_track_artist'
CREATE EXTERNAL TABLE IF NOT EXISTS discogs.release_video (
`id` int,
`release_id` int,
`duration` int,
`title` string,
`description` string,
`uri` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('serialization.format' = '1', 'ignore.malformed.json' = 'true')
LOCATION 's3://{s3_bucket}/release_video'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment