Created
November 11, 2011 21:01
-
-
Save nickstenning/1359244 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- model.json 2011-11-11 20:21:34.000000000 +0000 | |
+++ modelwc.json 2011-11-11 21:00:31.000000000 +0000 | |
@@ -8,7 +8,7 @@ | |
var model_proposal_fl2 = { | |
"dataset": { | |
/* Basic dataset metadata, largely unchanged. */ | |
- "schema_version": 3, | |
+ "schema_version": 3, // Not terribly important, but can't this just be "version"? | |
"name": "greece", | |
"label": "The Full Truth About The Economic Situation of Greece (abridged)", | |
"description": "This data has been collected by my homeboy Sarkoz, .... ", | |
@@ -17,6 +17,8 @@ | |
* Regarding the terminology, "natures" are used in Eclipse for tag-typing: | |
* http://www.eclipse.org/articles/Article-Builders/builders.html#2 | |
* */ | |
+ // "natures" is a little obscure? Why not "tags"? Or, if we want to dictate the | |
+ // possible values, maybe "classifications" or something? | |
"natures": ["government", "spending"], | |
/* Other options: party, funding, grants, budget */ | |
@@ -24,6 +26,7 @@ | |
* Allows for extensions such as "EU" (European Union). Sub-national granularity | |
* is not considered in this draft. | |
*/ | |
+ // Are we specifying defaults for some of these keys, or are they all mandatory? | |
"country": "GR", | |
/* Re #58: ISO 639-1 content language */ | |
@@ -37,28 +40,41 @@ | |
* The specification of a year of valuation is optional and will be passed into | |
* monetary transformations if needed. | |
*/ | |
- "currency": "EUR:2011" | |
+ "currency": "EUR:2011" // I like this format "<ISO:year>", but see my query below. | |
- /* Re #253 (unique_keys): not the absence of unique_keys here. */ | |
+ /* Re #253 (unique_keys): note the absence of unique_keys here. */ | |
}, | |
/* Re #208: Split of mapping and model core definition. */ | |
"dimensions": { | |
/* Not using 'model' as a name as it also applies to the whole. */ | |
+ // Agreed: this was confusing to many. | |
"amount": { | |
/* Q: do we still require default schema name "amount"? I'd say yes. */ | |
+ // Yes. | |
/* New type (supported since 0.10): */ | |
"type": "measure", | |
/* implied (do we even need it?): */ | |
"datatype": "decimal", /* not 'float' */ | |
+ // If "datatype": <anything-other-than-decimal> is a model validation | |
+ // error, then this should be a default, yes. | |
"label": "Amount granted.", | |
"description": "...", | |
/* Re #252: currencies; explicit definition for this dimension. */ | |
"currency": "EUR:2011" | |
+ // I'm not sure of the point of the "currency" key on "dataset" | |
+ // if we require a specification of currency on the (required) | |
+ // "amount" dimension? Or is this optional? If it's optional, | |
+ // then either: | |
+ // | |
+ // a) It may differ from the value of `dataset.currency`, in which | |
+ // case what does `dataset.currency` specify? | |
+ // b) It must be the same as `dataset.currency`, in which case why | |
+ // do we need `dataset.currency` at all? | |
/* Q: how does this imply the creation of normalized measures? | |
* | |
@@ -72,16 +88,26 @@ | |
* Option 3: a conversion factor is stored in dataset metadata and | |
* conversion is performed on all output at query time. | |
*/ | |
+ // As far as I can tell, all computations that we'd want to do on currency | |
+ // dimensions are linear, so I vote for option 4, in which a conversion | |
+ // factor is looked up in an independent database and any and all conversions | |
+ // are computed on-the-fly. I can't foresee this having any performance | |
+ // implications, but do correct me if you think that's wrong. | |
}, | |
"time": { | |
/* Q: do we still require default schema name "time"? I'd say yes. */ | |
+ // Yes. | |
/* New type (supported since 0.10): */ | |
"type": "date", | |
+ // Hmm. Maybe there's a good argument for this but why is this not | |
+ // "compound", with automagic subfields "year", "yearmonth", etc? | |
/* Q: do we want to support date format strings? */ | |
"datatype": "date", | |
+ // Again, this should be a default for a dimension named "time", and | |
+ // probably a model validation error if set and not "date". | |
"label": "Date of grant", | |
"description": "...", | |
@@ -116,7 +142,7 @@ | |
/* Using dictionary instead of "name" key: */ | |
"name": { | |
"datatype": "id", | |
- "default_value": "undefined", | |
+ "default_value": "undefined", // To clarify: this is presumably the *unparsed* default value? | |
/* There can be many of those: */ | |
"key": true | |
@@ -128,7 +154,7 @@ | |
}, | |
/* Include as facet in browser: */ | |
- "facet": true, | |
+ "facet": true, // Surely "browser_facet", then, with backwards compat for "facet"? | |
"browser_column": true | |
}, | |
@@ -156,6 +182,9 @@ | |
* an attribute to specify the name of the dimension name. | |
*/ | |
"parent": "cofog1", | |
+ // Dimension hierarchies make me happy, but I don't have any idea | |
+ // what you mean by "the name of the dimension name"... Can you | |
+ // clarify this for me? | |
"attributes": { | |
"name": {"datatype": "id", "key": true}, | |
@@ -168,8 +197,15 @@ | |
/* Q: do we really still need constant values? Don't see a use case | |
* any longer with bunkered datasets. | |
*/ | |
+ // We could perhaps replicate their behaviour pretty cheaply using | |
+ // "default_value" and then omitting the "mapping" entry for that | |
+ // dimension? | |
/* Note that we're not requiring "to" and "from" any longer. */ | |
+ // Whoa! We're not? Sorry if I've missed a whole discussion on this | |
+ // but what's the motivation? To allow simple visualisation of budgets | |
+ // and other datasets where the recipients are fuzzily specified? | |
+ // This is a discussion to be had on IRC, for sure. | |
}, | |
"mapping": { | |
@@ -211,16 +247,25 @@ | |
"name": "default", | |
"label": "By Beneficiary", | |
"dimension": "dataset", | |
- "dilldown": "beneficiary", | |
+ "drilldown": "beneficiary", | |
"cuts": {} | |
}, | |
+ // Only one comment here: any particular reason you chose not to move to | |
+ // a dictionary for these too? | |
+ // | |
+ // "default": {...} | |
+ // | |
+ // rather than | |
+ // | |
+ // {"name": "default", ...} | |
+ | |
/* Dataset for bubbletree (multiple drilldowns): */ | |
{ | |
"name": "cofog", | |
"label": "By Function", | |
"dimension": "dataset", | |
- "dilldown": ["cofog1", "cofog2"], | |
+ "drilldown": ["cofog1", "cofog2"], | |
"cuts": {} | |
}, | |
@@ -229,7 +274,7 @@ | |
"name": "default", | |
"label": "By Function", | |
"dimension": "beneficiary", | |
- "dilldown": "cofog1", | |
+ "drilldown": "cofog1", | |
"cuts": {} | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment