|
/******************************************************************** |
|
Table Definations to Store AI/ML Predictions |
|
|
|
A typical prediction consists of the following components - `train`, |
|
`test` and `validation` and the model has certain `accuracy` measured |
|
based on some metrics. Considering the following scenario, a JSON |
|
structure can be developed like: |
|
|
|
```json |
|
{ |
|
"accuracy" : { |
|
"_type" : "in-sample"/"validation"/etc. |
|
"metrics" : { |
|
# name of the metrics, for example `RMSE`/`MAPE`/etc. |
|
# and value as accuacy value, for example, |
|
"RMSE" : 10, |
|
"MAPE" : 2, |
|
} |
|
}, |
|
|
|
"train" : { |
|
# training dataset with in-sample/validation predictions |
|
<feature-1> : <values-1>, |
|
<feature-2> : <values-2>, |
|
... |
|
<feature-n> : <values-n>, |
|
|
|
# we can keep the y-values also, like |
|
<y> : <values> |
|
}, |
|
|
|
"test" : { |
|
# testing dataset with predictions |
|
<feature-1> : <values-1>, |
|
<feature-2> : <values-2>, |
|
... |
|
<feature-n> : <values-n>, |
|
|
|
# we can keep the y-values also, like |
|
# errors/accuracy can be dynamically calculated |
|
<y_actuals> : <values>, |
|
<y_predictions> : <values> |
|
}, |
|
} |
|
``` |
|
|
|
Like a typical NoSQL record, each type of key can have additional |
|
fields/values which can be controlled from an external script. Check |
|
the utility functions defined in `tblPredictions.py` for more |
|
information. |
|
|
|
|
|
Author : Debmalya Pramanik |
|
Version : v0.0.2-alpha |
|
|
|
Copywright © [2024] Debmalya Pramanik |
|
********************************************************************/ |
|
|
|
CREATE TABLE dbo.tblPredictions ( |
|
-- ? PK: `PrdictionUUID` - Recommended to use `UUID4()` as primary key, generate externally from py-script |
|
-- ! or, to reduce complexity (for single m/c application) set `PrdictionID IDENTITY(1, 1) PRIMARY KEY` |
|
PredictionUUID VARCHAR(36) PRIMARY KEY, |
|
|
|
-- ? Define the class/type of machine learning model, this is for information purpose |
|
ModelType VARCHAR(16) NULL DEFAULT NULL CHECK(ModelType IN ('REGRESSION', 'CLASSIFICATION')), |
|
AlgorithmType VARCHAR(16) NULL DEFAULT NULL CHECK(AlgorithmType IN ('SUPERVISED', 'SEMISUPERVISED', 'UNSUPERVISED', 'REINFORCEMENT')), |
|
|
|
-- ? The following parameters can be used to recreate the model from scratch |
|
-- recommended to keep model as `*.h5` file, thus all are set as optional, to reduce complexity |
|
framework VARCHAR(64) NULL DEFAULT NULL, -- sklearn/tensorflow/pytorch/etc. |
|
modelName VARCHAR(64) NULL DEFAULT NULL, -- LinearRegression()/DNN()/etc. |
|
modelParams NVARCHAR(MAX) NULL DEFAULT NULL, -- JSON with Model Parameters |
|
|
|
-- ? Optionally keep prediction feature label, generally `pd.DataFrame.columns[-1]` |
|
yFeatureName VARCHAR(64) NULL, |
|
|
|
-- ? Optionally keep information about when the prediction was created, and if updated |
|
CreatedOn DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- auto populated, no need to worry |
|
UpdatedOn DATETIME SPARSE NULL, -- only use on update query thus can be sparse |
|
|
|
-- ? NoSQL AI/ML Predictions as JSON Object in Table |
|
ModelPredictions NVARCHAR(MAX) NOT NULL, -- check structure from file documentation |
|
|
|
-- ? Added a remarks column for any optional remarks from user |
|
remarks VARCHAR(MAX) NULL, |
|
|
|
CONSTRAINT ck_model_params CHECK(ISJSON(modelParams) = 1), |
|
CONSTRAINT ck_model_predictions CHECK(ISJSON(ModelPredictions) = 1) |
|
); |