Skip to content

Instantly share code, notes, and snippets.

@mafernando
Last active November 22, 2016 23:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mafernando/4e48c019b472638fa62f8a51a4df3523 to your computer and use it in GitHub Desktop.
Save mafernando/4e48c019b472638fa62f8a51a4df3523 to your computer and use it in GitHub Desktop.
CornCharts
##########################################################################################
;

# raw_corn table schema
CREATE TABLE `raw_corn` (
  `domain_code` varchar(4) DEFAULT NULL,
  `domain` varchar(30) DEFAULT NULL,
  `area_code` int(11) DEFAULT NULL,
  `area_name` varchar(30) DEFAULT NULL,
  `element_code` int(11) DEFAULT NULL,
  `element_name` varchar(20) DEFAULT NULL,
  `item_code` int(11) DEFAULT NULL,
  `item_name` varchar(10) DEFAULT NULL,
  `year` varchar(4) DEFAULT NULL,
  `value` int(11) DEFAULT NULL,
  `flag` varchar(1) DEFAULT NULL,
  `flag_desc` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

# import is 1103 rows, drops metadata rows
select count(*) from raw_corn;

# get codes for dataset
select distinct domain_code as code, domain as name, 'domain' as type from raw_corn union select distinct area_code as code, area_name as name, 'area' as type from raw_corn union select distinct element_code as code, element_name as name, 'element' as type from raw_corn union select distinct item_code as code, item_name as name, 'item' as type from raw_corn;

# get years in range
select distinct year from raw_corn where year >= '1961' and year <= '2014';

# get regions - note: polynesia has no corn production
select distinct area_name, area_code from raw_corn where year >= '1961' and year <= '2014';

# QC    Crops   domain
# TP    Crops and livestock products    domain
# 5101  Eastern Africa  area
# 5102  Middle Africa   area
# 5103  Northern Africa area
# 5104  Southern Africa area
# 5105  Western Africa  area
# 5203  Northern America    area
# 5204  Central America area
# 5206  Caribbean   area
# 5207  South America   area
# 5301  Central Asia    area
# 5302  Eastern Asia    area
# 5303  Southern Asia   area
# 5304  South-Eastern Asia  area
# 5305  Western Asia    area
# 5401  Eastern Europe  area
# 5402  Northern Europe area
# 5403  Southern Europe area
# 5404  Western Europe  area
# 5501  Australia & New Zealand area
# 5502  Melanesia   area
# 5503  Micronesia  area
# 5504  Polynesia   area
# 5510  Production  element
# 5610  Import Quantity element
# 5622  Import Value    element
# 5910  Export Quantity element
# 5922  Export Value    element
# 56    Maize   item

# construct hfc view to derive variance for element dataset
-- create or replace view hfc as
select
     a.area_code
    ,a.area_name
--  ,a.element_code
    ,a.element_name
    ,y.year 
--  ,r.domain_code
--  ,r.domain
--  ,r.area_code
--  ,r.area_name
--  ,r.element_code
--  ,r.element_name
--  ,r.item_code
--  ,r.item_name
--  ,r.year
    ,IF(r.value IS NULL, 0, r.value) as value
--  ,r.flag
--  ,r.flag_desc
from
    # get years in range of query
    (select distinct year from raw_corn where year >= '1961' and year <= '2013') as y
    left join 
    # bind domain and element against area to specify regions in scope
    # we're assuming that item is `Maize` all the way around
    (select 
--     	domain_code,domain,area_code,area_name,element_code,element_name	
--       'TP' as domain_code, 'Crops and livestock products' as domain, area_code as area_code, area_name as area_name,'5610' as element_code, 'Import Quantity' as element_name # Import Quantity (tonnes)
--       'TP' as domain_code, 'Crops and livestock products' as domain, area_code as area_code, area_name as area_name, '5622' as element_code, 'Import Value' as element_name # Import Value (USD $1000)
      'TP' as domain_code, 'Crops and livestock products' as domain, area_code as area_code, area_name as area_name, '5910' as element_code, 'Export Quantity' as element_name # Export Quantity (tonnes)
--       'TP' as domain_code, 'Crops and livestock products' as domain, area_code as area_code, area_name as area_name, '5922' as element_code, 'Export Value' as element_name # Export Value (USD $1000)
--          'QC' as domain_code, 'Crops' as domain, area_code as area_code, area_name as area_name, '5510' as element_code, 'Production' as element_name # Production      
    from 
        raw_corn 
    where 1 
-- 		and (domain_code = 'TP' and domain = 'Crops and livestock products' and element_code = '5610' and element_name = 'Import Quantity') # Import Quantity (tonnes)    
-- 		and (domain_code = 'TP' and domain = 'Crops and livestock products' and element_code = '5622' and element_name = 'Import Value') # Import Value ($1000 USD)    
-- 		and (domain_code = 'TP' and domain = 'Crops and livestock products' and element_code = '5910' and element_name = 'Export Quantity') # Export Quantity (tonnes)    
-- 		and (domain_code = 'TP' and domain = 'Crops and livestock products' and element_code = '5922' and element_name = 'Export Value') # Export Value ($1000 USD)    				
    group by 1,2,3,4,5,6) as a on 1
    left join raw_corn r on 1
        and r.domain_code = a.domain_code and r.domain = a.domain
        and r.area_code = a.area_code and r.area_name = a.area_name
        and r.element_code = a.element_code and r.element_name = a.element_name             
        and r.year = y.year         
where 1
--  and r.element_code = '5510' and r.element_name = 'Production'   
order by a.area_code, y.year
;

# build where clause by ranking areas in view
select
--   c.area_code, c.area_name, @idx := @idx + 1 as rank
    CONCAT('WHEN \'',c.area_name,'\' THEN ',@idx := @idx + 1) as clause
from
    (select distinct area_code, area_name from hfc c) as c,
    (select @idx := -1) r   
;

# drop derivitive table 
drop table if exists hfc_deriv;
drop table if exists hfc_import;
drop table if exists hfc_export;

# crete derivitive table
-- create table hfc_deriv as
-- create table hfc_import as
-- create table hfc_export as
select
     h.area_code
    ,h.area_name
    ,h.element_name
    ,h.year as year
    ,h.value as value
    ,GREATEST( (CASE h.year WHEN @curr_year THEN @idx ELSE ( (@idx := @idx + 1) AND (@curr_year := h.year)) END), 1, @idx) - 1 AS x 
    ,(CASE h.area_name
        WHEN 'Eastern Africa' THEN 0
        WHEN 'Middle Africa' THEN 1
        WHEN 'Northern Africa' THEN 2
        WHEN 'Southern Africa' THEN 3
        WHEN 'Western Africa' THEN 4
        WHEN 'Northern America' THEN 5
        WHEN 'Central America' THEN 6
        WHEN 'Caribbean' THEN 7
        WHEN 'South America' THEN 8
        WHEN 'Central Asia' THEN 9
        WHEN 'Eastern Asia' THEN 10
        WHEN 'Southern Asia' THEN 11
        WHEN 'South-Eastern Asia' THEN 12
        WHEN 'Western Asia' THEN 13
        WHEN 'Eastern Europe' THEN 14
        WHEN 'Northern Europe' THEN 15
        WHEN 'Southern Europe' THEN 16
        WHEN 'Western Europe' THEN 17
        WHEN 'Australia & New Zealand' THEN 18
        WHEN 'Melanesia' THEN 19
        WHEN 'Micronesia' THEN 20
        WHEN 'Polynesia' THEN 21
        ELSE -1
    END) AS y   
--  ,h2.area_code
--  ,h2.area_name
--  ,h2.element_name
--  ,h2.year as last_year
    ,IF(h2.value IS NULL, 0, h2.value) as last_value
    ,IF(h.value - h2.value IS NULL, 0, h.value - h2.value) as diff
    ,IF(h2.value is null or h2.value = 0, 0, (h.value - h2.value) / h2.value * 100) as variance
from
    hfc h left join hfc h2 on h.area_code = h2.area_code and h.year - 1 = h2.year,
    (SELECT @idx := -1,  @curr_year := '') r
where 1
--  and domain_code = 'QC'
--  and year <= '2014'
--  and year >= '1961'
order by year asc, area_code asc    
;

# generate x-axis (year) and y-axis (area) categories
select
--  group_concat(distinct ' \'',h.year,'\'' order by year) as value
    group_concat(distinct ' \'',h.area_name,'\'' order by area_code) as value
--  h.area_name, h.area_code
from
    hfc_deriv h 
--  group by h.area_name, h.area_code order by area_code
;

# generate bar & stacked data series
select
    concat('{ name: \'',h.area_name, IF(h.stack = 'import', '\', linkedTo: \':previous', ''), '\', stack: \'',h.stack,'\'', ', data: [', h.data, '] },') as object
from
    (select
         h.area_name
        ,(CASE h.element_name
        	WHEN 'Import Quantity' THEN 'import'
        	WHEN 'Export Quantity' THEN 'export'        	
        	ELSE 'unknown'        	
        END) AS stack
        ,(CASE h.area_name
            WHEN 'Eastern Africa' THEN 0
            WHEN 'Middle Africa' THEN 1
            WHEN 'Northern Africa' THEN 2
            WHEN 'Southern Africa' THEN 3
            WHEN 'Western Africa' THEN 4
            WHEN 'Northern America' THEN 5
            WHEN 'Central America' THEN 6
            WHEN 'Caribbean' THEN 7
            WHEN 'South America' THEN 8
            WHEN 'Central Asia' THEN 9
            WHEN 'Eastern Asia' THEN 10
            WHEN 'Southern Asia' THEN 11
            WHEN 'South-Eastern Asia' THEN 12
            WHEN 'Western Asia' THEN 13
            WHEN 'Eastern Europe' THEN 14
            WHEN 'Northern Europe' THEN 15
            WHEN 'Southern Europe' THEN 16
            WHEN 'Western Europe' THEN 17
            WHEN 'Australia & New Zealand' THEN 18
            WHEN 'Melanesia' THEN 19
            WHEN 'Micronesia' THEN 20
            WHEN 'Polynesia' THEN 21
            ELSE -1
        END) AS rank         
        ,group_concat(h.value order by year asc) as data
    from
--         hfc_deriv h
        (select * from hfc_import union select * from hfc_export) as h
    group by 1,2  
    order by rank, stack asc) h
;

# generate heatmap data series
select
    concat(
         '{ code: ',  h.area_code
--      ,', area: \'', h.area_name,'\''
--      ,', year: ', h.year
        ,', value: ', h.value
        ,', x: ',h.x
        ,', y: ',h.y
        ,', last_value: ', h.last_value
        ,', diff: ', h.diff
        ,', variance: ', h.variance,' },'
    ) as value
from
    hfc_deriv h
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment