Skip to content

Instantly share code, notes, and snippets.

@nxlogics
Last active January 29, 2024 19:53
Show Gist options
  • Save nxlogics/157cfcf13756b56d8e37081590a5d425 to your computer and use it in GitHub Desktop.
Save nxlogics/157cfcf13756b56d8e37081590a5d425 to your computer and use it in GitHub Desktop.
Keep track of all the open/proprietary data sources using the metadata table. The gist can also be used to create a new source, and query the table.
# -*- encoding: utf-8 -*-
"""
CLI Application for METADATA_DATASOURCE Queries
A simple terminal based prompt for update, insert or manipulate
metadata table for data sources. The queries are fetched from defined
`queries.py` file and the execution is done from the `cli.py` based
application.
@author: nxlogics, Debmalya Pramanik
@version: v0.0.1
"""
import sqlalchemy as sa
from datasource import engine
class DataSource(object):
"""
REST like API Services for METADATA_DATASOURCE
The object works as a wrapper to enable REST-API like commits to
fetch, update, or delete a data source. The service is configured
w/o implementing a HTTP server or a full fledged flask/django
like retful service, but by directly using SQL query.
For more information on using SQL, Query Builder or ORM and its
usage check: https://www.youtube.com/watch?v=x1fCJ7sUXCM
"""
def __init__(self, DataSourceID : int = None, **kwargs) -> None:
self.DataSourceID = DataSourceID # if available, can be used to fetch record
# ? may define the unique keys using the keyword arguments
# ? the unique key can also be used for fetching records from db
self.DataSourceURI = kwargs.get("DataSourceURI", None)
self.DataSourceName = kwargs.get("DataSourceName", None)
self.DataSourceType = kwargs.get("DataSourceType", None)
@property
def DataUpdateFrequency(self):
prompt = input("What is the Data Update Frequency? ") or None
return prompt
@property
def DataAvblGranularity(self):
prompt = input("What is the Available Data Granularity? ") or None
return prompt
@property
def APIAvailable(self):
prompt = input("API Available (Y/n)? ") or ""
if prompt:
prompt = 1 if prompt.upper() == "Y" else 0
else:
prompt = None
return prompt
@property
def FreeAccessQuota(self):
prompt = input("Describe Free Acess Quota (eg. 1k API calls/month): ") or None
return prompt
@property
def FreeOSSAvailable(self):
prompt = input("Free Open-Source Software License Available (Y/n)? ") or ""
if prompt:
prompt = 1 if prompt.upper() == "Y" else 0
else:
prompt = None
return prompt
@property
def FreeStudentLicense(self):
prompt = input("Free Student License Available (Y/n)? ") or ""
if prompt:
prompt = 1 if prompt.upper() == "Y" else 0
else:
prompt = None
return prompt
@property
def FreeDeveloperLicense(self):
prompt = input("Free Developer License Available (Y/n)? ") or ""
if prompt:
prompt = 1 if prompt.upper() == "Y" else 0
else:
prompt = None
return prompt
@property
def DataSourceDescription(self):
prompt = input("Set a Data Source Description: ") or None
return prompt
@property
def DataSourceUsageOptions(self):
prompt = input("What is the Tentative use of the Data Source? ") or None
return prompt
def insert(self, engine : sa.engine.base.Engine) -> bool:
"""
Insert/Create/Register a New Data Source
Register a new data source with an auto generated ID from
the database, and return `bool` on insert success.
TODO: Create query to fetch and return the newly created ID
"""
statement = sa.text("""
INSERT INTO `meta.DATA_SOURCE` (
DataSourceURI
, DataSourceName
, DataSourceType
, DataUpdateFrequency
, DataAvblGranularity
, APIAvailable
, FreeAccessQuota
, FreeOSSAvailable
, FreeStudentLicense
, FreeDeveloperLicense
, DataSourceDescription
, DataSourceUsageOptions
) VALUES (:0, :1, :2, :3, :4, :5, :6, :7, :8, :9, :10, :11)
""")
self.DataSourceURI = input("URL/Documentation Link: ")
self.DataSourceName = input("Set a Unique Data Source Name: ")
self.DataSourceType = input("Set a Data Source Type (OSS/PROPRIETARY/OPEN DATA/etc.): ")
params = [
self.DataSourceURI
, self.DataSourceName
, self.DataSourceType
, self.DataUpdateFrequency
, self.DataAvblGranularity
, self.APIAvailable
, self.FreeAccessQuota
, self.FreeOSSAvailable
, self.FreeStudentLicense
, self.FreeDeveloperLicense
, self.DataSourceDescription
, self.DataSourceUsageOptions
]
values = {str(k) : v for k, v in zip(range(12), params)}
engine.execute(statement, values)
return True
if __name__ == "__main__":
print("CLI Application for METADATA_DATASOURCE Queries")
print("===============================================", end = "\n\n")
print("Select ONE of the Choice from Below:")
print(" 1. Create/Register a New Datasource: ")
choice = int(input("Your Choice: "))
data_source_ = DataSource()
if choice == 1:
data_source_.insert(engine = engine) # run insert statement
else:
raise ValueError(f"Choice `{choice}` is not yet implemented.")
# -*- encoding: utf-8 -*-
"""
A Metadata Table for Handling Multiple Data Sources
Multiple datasources (open source/proprietary/etc.) are required for
`neuralNOD INC.` application and the same can be tracked, or referred
using the metadata table.
@author: nxlogics, neuralNOD INC.
@version: v0.0.1
"""
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
# ? import and register https://github.com/neuralNOD/ndprdconfig
import ndprdconfig
db_config = ndprdconfig.DBConnection(instance = "docker")
engine = db_config.connect()
session = sessionmaker(bind = engine)
declarative_base_ = declarative_base()
class METADATA_DATASOURCE(declarative_base_):
__tablename__ = "meta.DATA_SOURCE"
DataSourceID = sa.Column(sa.Integer, primary_key = True, autoincrement = True) # PK: Generate a New ID when a Source is Registered
DataSourceURI = sa.Column(sa.String(256), unique = True, nullable = False) # UQ: Dataset URL Link for API/Documentation/etc.
DataSourceName = sa.Column(sa.String(64), unique = True, nullable = False) # UQ: Name of the Dataset, user-defined, set as unique
DataSourceType = sa.Column(sa.String(32), nullable = False) # Can be used for tracking type like open-source, proprietary, etc.
# ? optional, the below information can be used for tracking site information
DataUpdateFrequency = sa.Column(sa.String(32), nullable = True) # frequency of data update, `monthly on 15`, recommended to use cron styling
DataAvblGranularity = sa.Column(sa.String(32), nullable = True) # data granularity, eg. `city`, for demographic, or `minute` for time based, etc.
# ? optional, the below can be used for tracking payment/free usage information
APIAvailable = sa.Column(sa.Integer, nullable = True)
FreeAccessQuota = sa.Column(sa.String(128), nullable = True) # eg. 1k API calls/month, etc. NULL > not available, undefined
FreeOSSAvailable = sa.Column(sa.Integer, nullable = True) # free open source software license available
FreeStudentLicense = sa.Column(sa.Integer, nullable = True)
FreeDeveloperLicense = sa.Column(sa.Integer, nullable = True)
# ? optional, add description for the data source
DataSourceDescription = sa.Column(sa.String(512), nullable = True)
DataSourceUsageOptions = sa.Column(sa.String(512), nullable = True) # linked modules/services with the data source
def __repr__(self) -> str:
return f"<{self.__tablename__}(DataSourceID = {self.DataSourceID}, URI = {self.DataSourceURI}, Source Name = {self.DataSourceName})>"
if __name__ == "__main__":
declarative_base_.metadata.create_all(engine)

METADATA - Data Source Table

a centralized metadata for handling/referencing to all different data sources

Colab Notebook

⚠⚠⚠THIS CODE IS NOT FINALIZED, AND IS SUBJECT TO CHANGE⚠⚠⚠


The organization sources data from different open/proprietary data sources, and the metadata can be useful for tracking different sources along with referencing the same in sub-tables using the primary key DataSourceID globally.

Getting Started

The code is publically available at GitHub gists which is a simple platform for sharing code snippets with the community. To use the code, simply clone the code like:

git clone https://gist.github.com/ZenithClown/.git databases_
export PYTHONPATH="${PYTHONPATH}:databases_"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment