nickstenning/modelcomments.diff

## modelcomments.diff
--- model.json	2011-11-11 20:21:34.000000000 +0000
+++ modelwc.json	2011-11-11 21:00:31.000000000 +0000
@@ -8,7 +8,7 @@
 var model_proposal_fl2 = {
   "dataset": {
     /* Basic dataset metadata, largely unchanged. */
-    "schema_version": 3,
+    "schema_version": 3, // Not terribly important, but can't this just be "version"?
     "name": "greece",
     "label": "The Full Truth About The Economic Situation of Greece (abridged)",
     "description": "This data has been collected by my homeboy Sarkoz, .... ",
@@ -17,6 +17,8 @@
     * Regarding the terminology, "natures" are used in Eclipse for tag-typing:
     * http://www.eclipse.org/articles/Article-Builders/builders.html#2
     * */
+    // "natures" is a little obscure? Why not "tags"? Or, if we want to dictate the
+    // possible values, maybe "classifications" or something?
     "natures": ["government", "spending"],
     /* Other options: party, funding, grants, budget */

@@ -24,6 +26,7 @@
     * Allows for extensions such as "EU" (European Union). Sub-national granularity
     * is not considered in this draft.
     */
+    // Are we specifying defaults for some of these keys, or are they all mandatory?
     "country": "GR",

     /* Re #58: ISO 639-1 content language */
@@ -37,28 +40,41 @@
     * The specification of a year of valuation is optional and will be passed into
     * monetary transformations if needed.
     */
-    "currency": "EUR:2011"
+    "currency": "EUR:2011" // I like this format "<ISO:year>", but see my query below.

-    /* Re #253 (unique_keys): not the absence of unique_keys here. */
+    /* Re #253 (unique_keys): note the absence of unique_keys here. */
   },
   /* Re #208: Split of mapping and model core definition. */
   "dimensions": {
     /* Not using 'model' as a name as it also applies to the whole. */
+    // Agreed: this was confusing to many.

     "amount": {
       /* Q: do we still require default schema name "amount"? I'd say yes. */
+      // Yes.

       /* New type (supported since 0.10): */
       "type": "measure",

       /* implied (do we even need it?): */
       "datatype": "decimal", /* not 'float' */
+      // If "datatype": <anything-other-than-decimal> is a model validation
+      // error, then this should be a default, yes.

       "label": "Amount granted.",
       "description": "...",

       /* Re #252: currencies; explicit definition for this dimension. */
       "currency": "EUR:2011"
+      // I'm not sure of the point of the "currency" key on "dataset"
+      // if we require a specification of currency on the (required)
+      // "amount" dimension? Or is this optional? If it's optional,
+      // then either:
+      //
+      // a) It may differ from the value of `dataset.currency`, in which
+      //    case what does `dataset.currency` specify?
+      // b) It must be the same as `dataset.currency`, in which case why
+      //    do we need `dataset.currency` at all?

       /* Q: how does this imply the creation of normalized measures?
       *
@@ -72,16 +88,26 @@
       * Option 3: a conversion factor is stored in dataset metadata and
       *   conversion is performed on all output at query time.
       */
+      // As far as I can tell, all computations that we'd want to do on currency
+      // dimensions are linear, so I vote for option 4, in which a conversion
+      // factor is looked up in an independent database and any and all conversions
+      // are computed on-the-fly. I can't foresee this having any performance
+      // implications, but do correct me if you think that's wrong.
     },

     "time": {
       /* Q: do we still require default schema name "time"? I'd say yes. */
+      // Yes.

       /* New type (supported since 0.10): */
       "type": "date",
+      // Hmm. Maybe there's a good argument for this but why is this not
+      // "compound", with automagic subfields "year", "yearmonth", etc?

       /* Q: do we want to support date format strings? */
       "datatype": "date",
+      // Again, this should be a default for a dimension named "time", and
+      // probably a model validation error if set and not "date".

       "label": "Date of grant",
       "description": "...",
@@ -116,7 +142,7 @@
         /* Using dictionary instead of "name" key: */
         "name": {
           "datatype": "id",
-          "default_value": "undefined",
+          "default_value": "undefined", // To clarify: this is presumably the *unparsed* default value?

           /* There can be many of those: */
           "key": true
@@ -128,7 +154,7 @@
       },

       /* Include as facet in browser: */
-      "facet": true,
+      "facet": true, // Surely "browser_facet", then, with backwards compat for "facet"?

       "browser_column": true
     },
@@ -156,6 +182,9 @@
       * an attribute to specify the name of the dimension name.
       */
       "parent": "cofog1",
+      // Dimension hierarchies make me happy, but I don't have any idea
+      // what you mean by "the name of the dimension name"... Can you
+      // clarify this for me?

       "attributes": {
         "name": {"datatype": "id", "key": true},
@@ -168,8 +197,15 @@
     /* Q: do we really still need constant values? Don't see a use case
     * any longer with bunkered datasets.
     */
+    // We could perhaps replicate their behaviour pretty cheaply using
+    // "default_value" and then omitting the "mapping" entry for that
+    // dimension?

     /* Note that we're not requiring "to" and "from" any longer. */
+    // Whoa! We're not? Sorry if I've missed a whole discussion on this
+    // but what's the motivation? To allow simple visualisation of budgets
+    // and other datasets where the recipients are fuzzily specified?
+    // This is a discussion to be had on IRC, for sure.
   },

   "mapping": {
@@ -211,16 +247,25 @@
       "name": "default",
       "label": "By Beneficiary",
       "dimension": "dataset",
-      "dilldown": "beneficiary",
+      "drilldown": "beneficiary",
       "cuts": {}
     },

+    // Only one comment here: any particular reason you chose not to move to
+    // a dictionary for these too?
+    //
+    //     "default": {...}
+    //
+    // rather than
+    //
+    //     {"name": "default", ...}
+
     /* Dataset for bubbletree (multiple drilldowns): */
     {
       "name": "cofog",
       "label": "By Function",
       "dimension": "dataset",
-      "dilldown": ["cofog1", "cofog2"],
+      "drilldown": ["cofog1", "cofog2"],
       "cuts": {}
     },

@@ -229,7 +274,7 @@
       "name": "default",
       "label": "By Function",
       "dimension": "beneficiary",
-      "dilldown": "cofog1",
+      "drilldown": "cofog1",
       "cuts": {}
     }
   ]
	--- model.json 2011-11-11 20:21:34.000000000 +0000
	+++ modelwc.json 2011-11-11 21:00:31.000000000 +0000
	@@ -8,7 +8,7 @@
	var model_proposal_fl2 = {
	"dataset": {
	/* Basic dataset metadata, largely unchanged. */
	- "schema_version": 3,
	+ "schema_version": 3, // Not terribly important, but can't this just be "version"?
	"name": "greece",
	"label": "The Full Truth About The Economic Situation of Greece (abridged)",
	"description": "This data has been collected by my homeboy Sarkoz, .... ",
	@@ -17,6 +17,8 @@
	* Regarding the terminology, "natures" are used in Eclipse for tag-typing:
	* http://www.eclipse.org/articles/Article-Builders/builders.html#2
	* */
	+ // "natures" is a little obscure? Why not "tags"? Or, if we want to dictate the
	+ // possible values, maybe "classifications" or something?
	"natures": ["government", "spending"],
	/* Other options: party, funding, grants, budget */

	@@ -24,6 +26,7 @@
	* Allows for extensions such as "EU" (European Union). Sub-national granularity
	* is not considered in this draft.
	*/
	+ // Are we specifying defaults for some of these keys, or are they all mandatory?
	"country": "GR",

	/* Re #58: ISO 639-1 content language */
	@@ -37,28 +40,41 @@
	* The specification of a year of valuation is optional and will be passed into
	* monetary transformations if needed.
	*/
	- "currency": "EUR:2011"
	+ "currency": "EUR:2011" // I like this format "<ISO:year>", but see my query below.

	- /* Re #253 (unique_keys): not the absence of unique_keys here. */
	+ /* Re #253 (unique_keys): note the absence of unique_keys here. */
	},
	/* Re #208: Split of mapping and model core definition. */
	"dimensions": {
	/* Not using 'model' as a name as it also applies to the whole. */
	+ // Agreed: this was confusing to many.

	"amount": {
	/* Q: do we still require default schema name "amount"? I'd say yes. */
	+ // Yes.

	/* New type (supported since 0.10): */
	"type": "measure",

	/* implied (do we even need it?): */
	"datatype": "decimal", /* not 'float' */
	+ // If "datatype": <anything-other-than-decimal> is a model validation
	+ // error, then this should be a default, yes.

	"label": "Amount granted.",
	"description": "...",

	/* Re #252: currencies; explicit definition for this dimension. */
	"currency": "EUR:2011"
	+ // I'm not sure of the point of the "currency" key on "dataset"
	+ // if we require a specification of currency on the (required)
	+ // "amount" dimension? Or is this optional? If it's optional,
	+ // then either:
	+ //
	+ // a) It may differ from the value of `dataset.currency`, in which
	+ // case what does `dataset.currency` specify?
	+ // b) It must be the same as `dataset.currency`, in which case why
	+ // do we need `dataset.currency` at all?

	/* Q: how does this imply the creation of normalized measures?
	*
	@@ -72,16 +88,26 @@
	* Option 3: a conversion factor is stored in dataset metadata and
	* conversion is performed on all output at query time.
	*/
	+ // As far as I can tell, all computations that we'd want to do on currency
	+ // dimensions are linear, so I vote for option 4, in which a conversion
	+ // factor is looked up in an independent database and any and all conversions
	+ // are computed on-the-fly. I can't foresee this having any performance
	+ // implications, but do correct me if you think that's wrong.
	},

	"time": {
	/* Q: do we still require default schema name "time"? I'd say yes. */
	+ // Yes.

	/* New type (supported since 0.10): */
	"type": "date",
	+ // Hmm. Maybe there's a good argument for this but why is this not
	+ // "compound", with automagic subfields "year", "yearmonth", etc?

	/* Q: do we want to support date format strings? */
	"datatype": "date",
	+ // Again, this should be a default for a dimension named "time", and
	+ // probably a model validation error if set and not "date".

	"label": "Date of grant",
	"description": "...",
	@@ -116,7 +142,7 @@
	/* Using dictionary instead of "name" key: */
	"name": {
	"datatype": "id",
	- "default_value": "undefined",
	+ "default_value": "undefined", // To clarify: this is presumably the unparsed default value?

	/* There can be many of those: */
	"key": true
	@@ -128,7 +154,7 @@
	},

	/* Include as facet in browser: */
	- "facet": true,
	+ "facet": true, // Surely "browser_facet", then, with backwards compat for "facet"?

	"browser_column": true
	},
	@@ -156,6 +182,9 @@
	* an attribute to specify the name of the dimension name.
	*/
	"parent": "cofog1",
	+ // Dimension hierarchies make me happy, but I don't have any idea
	+ // what you mean by "the name of the dimension name"... Can you
	+ // clarify this for me?

	"attributes": {
	"name": {"datatype": "id", "key": true},
	@@ -168,8 +197,15 @@
	/* Q: do we really still need constant values? Don't see a use case
	* any longer with bunkered datasets.
	*/
	+ // We could perhaps replicate their behaviour pretty cheaply using
	+ // "default_value" and then omitting the "mapping" entry for that
	+ // dimension?

	/* Note that we're not requiring "to" and "from" any longer. */
	+ // Whoa! We're not? Sorry if I've missed a whole discussion on this
	+ // but what's the motivation? To allow simple visualisation of budgets
	+ // and other datasets where the recipients are fuzzily specified?
	+ // This is a discussion to be had on IRC, for sure.
	},

	"mapping": {
	@@ -211,16 +247,25 @@
	"name": "default",
	"label": "By Beneficiary",
	"dimension": "dataset",
	- "dilldown": "beneficiary",
	+ "drilldown": "beneficiary",
	"cuts": {}
	},

	+ // Only one comment here: any particular reason you chose not to move to
	+ // a dictionary for these too?
	+ //
	+ // "default": {...}
	+ //
	+ // rather than
	+ //
	+ // {"name": "default", ...}
	+
	/* Dataset for bubbletree (multiple drilldowns): */
	{
	"name": "cofog",
	"label": "By Function",
	"dimension": "dataset",
	- "dilldown": ["cofog1", "cofog2"],
	+ "drilldown": ["cofog1", "cofog2"],
	"cuts": {}
	},

	@@ -229,7 +274,7 @@
	"name": "default",
	"label": "By Function",
	"dimension": "beneficiary",
	- "dilldown": "cofog1",
	+ "drilldown": "cofog1",
	"cuts": {}
	}
	]