Johanan Ottensooser oatsandsugar

## Handset Telemetry pipeline JSON object
{'id': '0a275942-a069-48bd-9f70-07168334a149',
 'name': 'US Telemetry classification pipeline',
 'streams': [{'id': '35214455-ef86-48c6-8943-c1991ce34933',
   'isReady': True,
   'stream': {'source': {'type': 'GCS',
     'id': '4d034c34-4976-423e-b93a-379ec8645873',
     'bucket': 'dtl-handset-telemetry',
     'fileName': 'us_340M.csv',
     'format': 'Csv',
     'params': {}},

## Handset Telemetry pipeline definition
# Attach a datastore: European Sample Dataset

gcsdef_2 = GCSDatastoreDef(
    bucket = "dtl-handset-telemetry",
    file_name = "5K_eu_telemetry.csv",
    file_format = FileFormat.Csv,
)

EU_sample_handset_data = dtl.datastore.create(
    Datastore(

## Training data for handset telemetry ontology
# details about data store

us_telemetry_data_store_name = "US Telemetry 5k"
us_telemetry_data_store_id = GetDatastoreID(us_telemetry_data_store_name)
us_telemetry_data_path = "5K_us_telemetry.csv"

# ontology nodes, and training data columns

us_telemetry_training_col_dict = {
    "Packet Loss": "main_QOS_PacketLoss_LostPercentage",

## Handset Telemetry Ontology
wireless_ontology = Ontology(
    "Declassified Wireless Carrier Data",
    "This is for the purpose of cleaning and delivering safe data as a product.",
    [
        OntologyNode(
            "Sensitive Data",
            "This is data NOT to be distributed to 3rd parties.",
            [
                OntologyNode(
                    "Subscriber",

## Default arguments are your friend.py
def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''):
    """
    Count variables of interest in one dataframe, write count into appropriate row of second dataframe.

    Keyword arguments:
    tally_dataset -- dataset in which tally of variable of interest is to be recorded
    tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code)
    tally_count -- column in which tally of variable of interest is to be recorded
    feature_dataset -- dataset containing variable the occurence of which is counted
    feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column)

## No loops in loops.py
def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''):
    """
    Count variables of interest in one dataframe, write count into appropriate row of second dataframe.

    Keyword arguments:
    tally_dataset -- dataset in which tally of variable of interest is to be recorded
    tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code)
    tally_count -- column in which tally of variable of interest is to be recorded
    feature_dataset -- dataset containing variable the occurence of which is counted
    feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column)

## Loops of loops.py
def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''):
    """
    Count variables of interest in one dataframe, write count into appropriate row of second dataframe.

    Keyword arguments:
    tally_dataset -- dataset in which tally of variable of interest is to be recorded
    tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code)
    tally_count -- column in which tally of variable of interest is to be recorded
    feature_dataset -- dataset containing variable the occurence of which is counted
    feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column)

## Test 19: Structure by deduction |[BOSS LEVEL]| [2].JSON
{
    "1": {
        "A": 1,
        "": 2,
        "{2}": 97,
        "C": null,
        "D": null
    },
    "2": {
        "A": null,

## Test 19: Structure by deduction |[BOSS LEVEL]|.JSON
{
	"A": [1, null],
	"": [2, 98],
	"{2}": [null, 97],
	"C": [null, null],
	"D": [null, 96]
}

## Test 19: Structure by deduction |[BOSS LEVEL]|.scala
  // Test 19: Structure by deduction |[BOSS LEVEL]| no row or column is complete

  val BOSSA1: AbstractDataGraph = {
    val nodes = Map(
      0 -> DataNode(0, 0, None, None),
      1 -> DataNode(1, 0, Some("A"), Some(AbstractLong(1))),
      2 -> DataNode(2, 1, None, Some(AbstractLong(2))),
      3 -> DataNode(3, 2, None, None),
      4 -> DataNode(4, 3, Some("C"), None),
      5 -> DataNode(5, 4, Some("D"), None)
	{'id': '0a275942-a069-48bd-9f70-07168334a149',
	'name': 'US Telemetry classification pipeline',
	'streams': [{'id': '35214455-ef86-48c6-8943-c1991ce34933',
	'isReady': True,
	'stream': {'source': {'type': 'GCS',
	'id': '4d034c34-4976-423e-b93a-379ec8645873',
	'bucket': 'dtl-handset-telemetry',
	'fileName': 'us_340M.csv',
	'format': 'Csv',
	'params': {}},
	# Attach a datastore: European Sample Dataset

	gcsdef_2 = GCSDatastoreDef(
	bucket = "dtl-handset-telemetry",
	file_name = "5K_eu_telemetry.csv",
	file_format = FileFormat.Csv,
	)

	EU_sample_handset_data = dtl.datastore.create(
	Datastore(
	# details about data store

	us_telemetry_data_store_name = "US Telemetry 5k"
	us_telemetry_data_store_id = GetDatastoreID(us_telemetry_data_store_name)
	us_telemetry_data_path = "5K_us_telemetry.csv"

	# ontology nodes, and training data columns

	us_telemetry_training_col_dict = {
	"Packet Loss": "main_QOS_PacketLoss_LostPercentage",
	wireless_ontology = Ontology(
	"Declassified Wireless Carrier Data",
	"This is for the purpose of cleaning and delivering safe data as a product.",
	[
	OntologyNode(
	"Sensitive Data",
	"This is data NOT to be distributed to 3rd parties.",
	[
	OntologyNode(
	"Subscriber",
	def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''):
	"""
	Count variables of interest in one dataframe, write count into appropriate row of second dataframe.

	Keyword arguments:
	tally_dataset -- dataset in which tally of variable of interest is to be recorded
	tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code)
	tally_count -- column in which tally of variable of interest is to be recorded
	feature_dataset -- dataset containing variable the occurence of which is counted
	feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column)
	{
	"1": {
	"A": 1,
	"": 2,
	"{2}": 97,
	"C": null,
	"D": null
	},
	"2": {
	"A": null,
	{
	"A": [1, null],
	"": [2, 98],
	"{2}": [null, 97],
	"C": [null, null],
	"D": [null, 96]
	}
	// Test 19: Structure by deduction \|[BOSS LEVEL]\| no row or column is complete

	val BOSSA1: AbstractDataGraph = {
	val nodes = Map(
	0 -> DataNode(0, 0, None, None),
	1 -> DataNode(1, 0, Some("A"), Some(AbstractLong(1))),
	2 -> DataNode(2, 1, None, Some(AbstractLong(2))),
	3 -> DataNode(3, 2, None, None),
	4 -> DataNode(4, 3, Some("C"), None),
	5 -> DataNode(5, 4, Some("D"), None)