ZenithClown/MS-SQL NoSQL.md

## insert.sql
/********************************************************************
Insert Statement for `tblPredictions` on New Prediction

General insert statement to populate the bare minimum details about
the model, and insert the same as a record into the table. The table
defaults are controlled from external insert script.

Author  : Debmalya Pramanik
Version : v0.0.1

Copywright © [2023] Debmalya Pramanik
********************************************************************/

INSERT INTO dbo.tblPredictions (PredictionUUID, ModelType, AlgorithmType, framework, modelName, modelParams, yFeatureName, ModelPredictions, remarks)
    VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?);

## MS-SQL NoSQL.md

      
    Raw
  

              MS-SQL NoSQL.md
            
          
NoSQL in MS-SQL Server

usecases - saving ai/ml predictions without complications
Colab Notebook


⚠⚠⚠THIS CODE IS NOT FINALIZED, AND IS SUBJECT TO CHANGE⚠⚠⚠


Traditional companies often stores data into a traditional database (like MS-SQL Server, or OracleDB) and using an additional NoSQL Database like MongoDB offers large benefits. However, while working a unique challenge was propsed - store the results of a machine learning model into a traditional database. MS-SQL server was already being used, so here is the simplified approach to store and process the data as JSON and do analysis with python.
Getting Started

The code is publically available at GitHub gists which is a simple platform for sharing code snippets with the community. To use the code, simply clone the code like:
git clone https://gist.github.com/ZenithClown/.git databases_
export PYTHONPATH="${PYTHONPATH}:databases_"


## tblPredictions.py
# -*- encoding: utf-8 -*-

"""
Utility Commands to Populate `tblPredictions` Table

A set of useful functions to insert/update/process records defined
under the `tblPredictions` table. Check function/class defination for
more information.

@author:  Debmalya Pramanik
@version: v0.0.2
"""

import os
import json

from typing import Iterable
from uuid import uuid4 as UUID

import pandas as pd


# lambda utility to rename columns, no special charecters
strFormatter = lambda string : "".join([s for s in string if s.isalnum()])


def jsonify(train : pd.DataFrame, test : pd.DataFrame, y_predicted : Iterable, y_predicted_validation : Iterable = None, accuracy : dict = dict(), **kwargs) -> dict:
    """
    Return a JSON Equivalent Output for tblPredictions

    As defined under the table schema, the function transforms each
    of the dataframe, into an equivalent output json.
    """

    frame_ = dict(
        accuracy = accuracy,
        train = {strFormatter(column) : list(train[column].values) for column in train.columns} | {"predictions" : y_predicted_validation},
        test = {strFormatter(column) : list(train[column].values) for column in test.columns} | {"predictions" : y_predicted}
    )

    if kwargs: frame |= kwargs
    return json.dumps(frame_, default = str)


def insert(train : pd.DataFrame, test : pd.DataFrame, y_predicted : Iterable, y_predicted_validation : Iterable, cursor : object, **kwargs) -> str:
    """
    Insert a New Record into the Table

    Insert a new record, using the bare minimum statement, and format
    the code using `str.format()` convention preserving the default
    values if not already passed using keyword arguments.

    :param train, test: Training and testing dataframe respectively
    """

    _unique_id = str(UUID()).upper() # return this for future reference
    statement = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "insert.sql"), "r").read()

    # accuracy metrics is fetched by user input, and is directly added to output
    accuracy = kwargs.get("accuracy", dict())

    # have the provision to insert any other parameters into `ModelPredictions` as dictionary
    _already_used_keys = ["ModelType", "AlgorithmType", "framework", "modelName", "modelParams", "yFeatureName", "remarks", "accuracy"]
    _other_parameters = {k : v for k, v in kwargs.items() if k not in _already_used_keys}

    cursor.execute(
        statement,   # parameterized insert statement read from file
        _unique_id,  # each record must have an unique key, and return for reference
        kwargs.get("ModelType", None),
        kwargs.get("AlgorithmType", None),
        kwargs.get("framework", None),
        kwargs.get("modelName", None),
        kwargs.get("modelParams", None),
        kwargs.get("yFeatureName", None),
        jsonify(train, test, y_predicted, y_predicted_validation, accuracy, **_other_parameters),
        kwargs.get("remarks", None)
    )

    cursor.commit()
    return _unique_id

## tblPredictions.sql
/********************************************************************
Table Definations to Store AI/ML Predictions

A typical prediction consists of the following components - `train`,
`test` and `validation` and the model has certain `accuracy` measured
based on some metrics. Considering the following scenario, a JSON
structure can be developed like:

```json
{
    "accuracy" : {
        "_type" : "in-sample"/"validation"/etc.
        "metrics" : {
            # name of the metrics, for example `RMSE`/`MAPE`/etc.
            # and value as accuacy value, for example,
            "RMSE" : 10,
            "MAPE" : 2,
        }
    },

    "train" : {
        # training dataset with in-sample/validation predictions
        <feature-1> : <values-1>,
        <feature-2> : <values-2>,
        ...
        <feature-n> : <values-n>,

        # we can keep the y-values also, like
        <y> : <values>
    },

    "test" : {
        # testing dataset with predictions
        <feature-1> : <values-1>,
        <feature-2> : <values-2>,
        ...
        <feature-n> : <values-n>,

        # we can keep the y-values also, like
        # errors/accuracy can be dynamically calculated
        <y_actuals> : <values>,
        <y_predictions> : <values>
    },
}
```

Like a typical NoSQL record, each type of key can have additional
fields/values which can be controlled from an external script. Check
the utility functions defined in `tblPredictions.py` for more
information.


Author  : Debmalya Pramanik
Version : v0.0.2-alpha

Copywright © [2024] Debmalya Pramanik
********************************************************************/

CREATE TABLE dbo.tblPredictions (
    -- ? PK: `PrdictionUUID` - Recommended to use `UUID4()` as primary key, generate externally from py-script
    -- ! or, to reduce complexity (for single m/c application) set `PrdictionID IDENTITY(1, 1) PRIMARY KEY`
    PredictionUUID VARCHAR(36) PRIMARY KEY,

    -- ? Define the class/type of machine learning model, this is for information purpose
    ModelType     VARCHAR(16) NULL DEFAULT NULL CHECK(ModelType IN ('REGRESSION', 'CLASSIFICATION')),
    AlgorithmType VARCHAR(16) NULL DEFAULT NULL CHECK(AlgorithmType IN ('SUPERVISED', 'SEMISUPERVISED', 'UNSUPERVISED', 'REINFORCEMENT')),

    -- ? The following parameters can be used to recreate the model from scratch
    -- recommended to keep model as `*.h5` file, thus all are set as optional, to reduce complexity
    framework   VARCHAR(64) NULL DEFAULT NULL,   -- sklearn/tensorflow/pytorch/etc.
    modelName   VARCHAR(64) NULL DEFAULT NULL,   -- LinearRegression()/DNN()/etc.
    modelParams NVARCHAR(MAX) NULL DEFAULT NULL, -- JSON with Model Parameters

    -- ? Optionally keep prediction feature label, generally `pd.DataFrame.columns[-1]`
    yFeatureName VARCHAR(64) NULL,

    -- ? Optionally keep information about when the prediction was created, and if updated
    CreatedOn   DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- auto populated, no need to worry
    UpdatedOn   DATETIME SPARSE NULL,           -- only use on update query thus can be sparse

    -- ? NoSQL AI/ML Predictions as JSON Object in Table
    ModelPredictions NVARCHAR(MAX) NOT NULL, -- check structure from file documentation

    -- ? Added a remarks column for any optional remarks from user
    remarks VARCHAR(MAX) NULL,

    CONSTRAINT ck_model_params CHECK(ISJSON(modelParams) = 1),
    CONSTRAINT ck_model_predictions CHECK(ISJSON(ModelPredictions) = 1)
);
	# -- encoding: utf-8 --

	"""
	Utility Commands to Populate `tblPredictions` Table

	A set of useful functions to insert/update/process records defined
	under the `tblPredictions` table. Check function/class defination for
	more information.

	@author: Debmalya Pramanik
	@version: v0.0.2
	"""

	import os
	import json

	from typing import Iterable
	from uuid import uuid4 as UUID

	import pandas as pd


	# lambda utility to rename columns, no special charecters
	strFormatter = lambda string : "".join([s for s in string if s.isalnum()])


	def jsonify(train : pd.DataFrame, test : pd.DataFrame, y_predicted : Iterable, y_predicted_validation : Iterable = None, accuracy : dict = dict(), **kwargs) -> dict:
	"""
	Return a JSON Equivalent Output for tblPredictions

	As defined under the table schema, the function transforms each
	of the dataframe, into an equivalent output json.
	"""

	frame_ = dict(
	accuracy = accuracy,
	train = {strFormatter(column) : list(train[column].values) for column in train.columns} \| {"predictions" : y_predicted_validation},
	test = {strFormatter(column) : list(train[column].values) for column in test.columns} \| {"predictions" : y_predicted}
	)

	if kwargs: frame \|= kwargs
	return json.dumps(frame_, default = str)


	def insert(train : pd.DataFrame, test : pd.DataFrame, y_predicted : Iterable, y_predicted_validation : Iterable, cursor : object, **kwargs) -> str:
	"""
	Insert a New Record into the Table

	Insert a new record, using the bare minimum statement, and format
	the code using `str.format()` convention preserving the default
	values if not already passed using keyword arguments.

	:param train, test: Training and testing dataframe respectively
	"""

	_unique_id = str(UUID()).upper() # return this for future reference
	statement = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "insert.sql"), "r").read()

	# accuracy metrics is fetched by user input, and is directly added to output
	accuracy = kwargs.get("accuracy", dict())

	# have the provision to insert any other parameters into `ModelPredictions` as dictionary
	_already_used_keys = ["ModelType", "AlgorithmType", "framework", "modelName", "modelParams", "yFeatureName", "remarks", "accuracy"]
	_other_parameters = {k : v for k, v in kwargs.items() if k not in _already_used_keys}

	cursor.execute(
	statement, # parameterized insert statement read from file
	_unique_id, # each record must have an unique key, and return for reference
	kwargs.get("ModelType", None),
	kwargs.get("AlgorithmType", None),
	kwargs.get("framework", None),
	kwargs.get("modelName", None),
	kwargs.get("modelParams", None),
	kwargs.get("yFeatureName", None),
	jsonify(train, test, y_predicted, y_predicted_validation, accuracy, **_other_parameters),
	kwargs.get("remarks", None)
	)

	cursor.commit()
	return _unique_id
	/********************************************************************
	Table Definations to Store AI/ML Predictions

	A typical prediction consists of the following components - `train`,
	`test` and `validation` and the model has certain `accuracy` measured
	based on some metrics. Considering the following scenario, a JSON
	structure can be developed like:

	```json
	{
	"accuracy" : {
	"_type" : "in-sample"/"validation"/etc.
	"metrics" : {
	# name of the metrics, for example `RMSE`/`MAPE`/etc.
	# and value as accuacy value, for example,
	"RMSE" : 10,
	"MAPE" : 2,
	}
	},

	"train" : {
	# training dataset with in-sample/validation predictions
	<feature-1> : <values-1>,
	<feature-2> : <values-2>,
	...
	<feature-n> : <values-n>,

	# we can keep the y-values also, like
	<y> : <values>
	},

	"test" : {
	# testing dataset with predictions
	<feature-1> : <values-1>,
	<feature-2> : <values-2>,
	...
	<feature-n> : <values-n>,

	# we can keep the y-values also, like
	# errors/accuracy can be dynamically calculated
	<y_actuals> : <values>,
	<y_predictions> : <values>
	},
	}
	```

	Like a typical NoSQL record, each type of key can have additional
	fields/values which can be controlled from an external script. Check
	the utility functions defined in `tblPredictions.py` for more
	information.


	Author : Debmalya Pramanik
	Version : v0.0.2-alpha

	Copywright © [2024] Debmalya Pramanik
	********************************************************************/

	CREATE TABLE dbo.tblPredictions (
	-- ? PK: `PrdictionUUID` - Recommended to use `UUID4()` as primary key, generate externally from py-script
	-- ! or, to reduce complexity (for single m/c application) set `PrdictionID IDENTITY(1, 1) PRIMARY KEY`
	PredictionUUID VARCHAR(36) PRIMARY KEY,

	-- ? Define the class/type of machine learning model, this is for information purpose
	ModelType VARCHAR(16) NULL DEFAULT NULL CHECK(ModelType IN ('REGRESSION', 'CLASSIFICATION')),
	AlgorithmType VARCHAR(16) NULL DEFAULT NULL CHECK(AlgorithmType IN ('SUPERVISED', 'SEMISUPERVISED', 'UNSUPERVISED', 'REINFORCEMENT')),

	-- ? The following parameters can be used to recreate the model from scratch
	-- recommended to keep model as `*.h5` file, thus all are set as optional, to reduce complexity
	framework VARCHAR(64) NULL DEFAULT NULL, -- sklearn/tensorflow/pytorch/etc.
	modelName VARCHAR(64) NULL DEFAULT NULL, -- LinearRegression()/DNN()/etc.
	modelParams NVARCHAR(MAX) NULL DEFAULT NULL, -- JSON with Model Parameters

	-- ? Optionally keep prediction feature label, generally `pd.DataFrame.columns[-1]`
	yFeatureName VARCHAR(64) NULL,

	-- ? Optionally keep information about when the prediction was created, and if updated
	CreatedOn DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- auto populated, no need to worry
	UpdatedOn DATETIME SPARSE NULL, -- only use on update query thus can be sparse

	-- ? NoSQL AI/ML Predictions as JSON Object in Table
	ModelPredictions NVARCHAR(MAX) NOT NULL, -- check structure from file documentation

	-- ? Added a remarks column for any optional remarks from user
	remarks VARCHAR(MAX) NULL,

	CONSTRAINT ck_model_params CHECK(ISJSON(modelParams) = 1),
	CONSTRAINT ck_model_predictions CHECK(ISJSON(ModelPredictions) = 1)
	);