I hereby claim:
- I am oatsandsugar on github.
- I am oatsandsugar (https://keybase.io/oatsandsugar) on keybase.
- I have a public key whose fingerprint is 2DE1 AD0C A8EB CCAA 4DDF 5296 681B DCAC 5475 A14F
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
// Test 19: Structure by deduction |[BOSS LEVEL]| no row or column is complete | |
val BOSSA1: AbstractDataGraph = { | |
val nodes = Map( | |
0 -> DataNode(0, 0, None, None), | |
1 -> DataNode(1, 0, Some("A"), Some(AbstractLong(1))), | |
2 -> DataNode(2, 1, None, Some(AbstractLong(2))), | |
3 -> DataNode(3, 2, None, None), | |
4 -> DataNode(4, 3, Some("C"), None), | |
5 -> DataNode(5, 4, Some("D"), None) |
{ | |
"A": [1, null], | |
"": [2, 98], | |
"{2}": [null, 97], | |
"C": [null, null], | |
"D": [null, 96] | |
} |
{ | |
"1": { | |
"A": 1, | |
"": 2, | |
"{2}": 97, | |
"C": null, | |
"D": null | |
}, | |
"2": { | |
"A": null, |
def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''): | |
""" | |
Count variables of interest in one dataframe, write count into appropriate row of second dataframe. | |
Keyword arguments: | |
tally_dataset -- dataset in which tally of variable of interest is to be recorded | |
tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code) | |
tally_count -- column in which tally of variable of interest is to be recorded | |
feature_dataset -- dataset containing variable the occurence of which is counted | |
feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column) |
def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''): | |
""" | |
Count variables of interest in one dataframe, write count into appropriate row of second dataframe. | |
Keyword arguments: | |
tally_dataset -- dataset in which tally of variable of interest is to be recorded | |
tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code) | |
tally_count -- column in which tally of variable of interest is to be recorded | |
feature_dataset -- dataset containing variable the occurence of which is counted | |
feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column) |
def count(tally_dataset, tally_column, tally_count, feature_dataset, feature_column, comparator = False, comparator_value = '', comparator_column = ''): | |
""" | |
Count variables of interest in one dataframe, write count into appropriate row of second dataframe. | |
Keyword arguments: | |
tally_dataset -- dataset in which tally of variable of interest is to be recorded | |
tally_column -- column containing variable which is the key to the tally of (e.g. ZIP code) | |
tally_count -- column in which tally of variable of interest is to be recorded | |
feature_dataset -- dataset containing variable the occurence of which is counted | |
feature_column -- column containing variable the occurence of which is key in the count (e.g. each ZIP code in this column is recorded to tally_count according to row tally_column) |
wireless_ontology = Ontology( | |
"Declassified Wireless Carrier Data", | |
"This is for the purpose of cleaning and delivering safe data as a product.", | |
[ | |
OntologyNode( | |
"Sensitive Data", | |
"This is data NOT to be distributed to 3rd parties.", | |
[ | |
OntologyNode( | |
"Subscriber", |
# details about data store | |
us_telemetry_data_store_name = "US Telemetry 5k" | |
us_telemetry_data_store_id = GetDatastoreID(us_telemetry_data_store_name) | |
us_telemetry_data_path = "5K_us_telemetry.csv" | |
# ontology nodes, and training data columns | |
us_telemetry_training_col_dict = { | |
"Packet Loss": "main_QOS_PacketLoss_LostPercentage", |
# Attach a datastore: European Sample Dataset | |
gcsdef_2 = GCSDatastoreDef( | |
bucket = "dtl-handset-telemetry", | |
file_name = "5K_eu_telemetry.csv", | |
file_format = FileFormat.Csv, | |
) | |
EU_sample_handset_data = dtl.datastore.create( | |
Datastore( |