Skip to content

Instantly share code, notes, and snippets.

@cheesinglee
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cheesinglee/b4512a629bfbd94c7a14 to your computer and use it in GitHub Desktop.
Save cheesinglee/b4512a629bfbd94c7a14 to your computer and use it in GitHub Desktop.
BigML dataset scatter plot
{
"all_fields": true,
"category": 0,
"cluster": null,
"cluster_status": true,
"code": 200,
"columns": 15,
"created": "2013-05-07T20:30:05.554000",
"credits": 3.790340423584,
"description": "",
"download": {
"code": 0,
"excluded_input_fields": [
],
"header": true,
"input_fields": [
],
"message": "",
"preview": [
],
"separator": ","
},
"excluded_fields": [
],
"field_types": {
"categorical": 9,
"datetime": 0,
"numeric": 6,
"preferred": 14,
"text": 0,
"total": 15
},
"fields": {
"000000": {
"column_number": 0,
"datatype": "int8",
"name": "age",
"optype": "numeric",
"order": 0,
"preferred": true,
"summary": {
"bins": [
[
18.75643,
2410
],
[
21.51515,
1485
],
[
23.47642,
1675
],
[
25.48278,
1626
],
[
27.5094,
1702
],
[
29.51434,
1674
],
[
31.48252,
1716
],
[
33.50312,
1761
],
[
35.5062,
1774
],
[
37.4908,
1685
],
[
39.49317,
1610
],
[
41.49118,
1588
],
[
43.48461,
1494
],
[
46.38942,
2722
],
[
50.4325,
2252
],
[
53.47213,
879
],
[
55.46624,
785
],
[
57.50552,
724
],
[
59.46777,
667
],
[
61.46237,
558
],
[
63.47489,
438
],
[
65.45732,
328
],
[
67.4428,
271
],
[
69.45178,
197
],
[
71.48201,
139
],
[
73.44348,
115
],
[
75.50549,
91
],
[
77.44231,
52
],
[
80.28947,
76
],
[
83.95,
20
],
[
87.75,
4
],
[
90,
43
]
],
"maximum": 90,
"mean": 38.58165,
"median": 37.03324,
"minimum": 17,
"missing_count": 0,
"population": 32561,
"splits": [
18.56006,
20.0016,
21.38046,
22.68262,
23.90316,
25.14086,
26.39515,
27.6193,
28.82499,
30.03033,
31.21005,
32.396,
33.55405,
34.63274,
35.87617,
37.03324,
38.24651,
39.49294,
40.76573,
42.0444,
43.3639,
44.75256,
46.13703,
47.60107,
49.39145,
51.09725,
53.14627,
55.56526,
58.35547,
61.50785,
66.43583
],
"standard_deviation": 13.64043,
"sum": 1256257,
"sum_squares": 54526623,
"variance": 186.0614
}
},
"000001": {
"column_number": 1,
"datatype": "string",
"name": "workclass",
"optype": "categorical",
"order": 1,
"preferred": true,
"summary": {
"categories": [
[
"Private",
22696
],
[
"Self-emp-not-inc",
2541
],
[
"Local-gov",
2093
],
[
"State-gov",
1298
],
[
"Self-emp-inc",
1116
],
[
"Federal-gov",
960
],
[
"Without-pay",
14
],
[
"Never-worked",
7
]
],
"missing_count": 1836
}
},
"000002": {
"column_number": 2,
"datatype": "int32",
"name": "fnlwgt",
"optype": "numeric",
"order": 2,
"preferred": true,
"summary": {
"bins": [
[
40474.70552,
3046
],
[
71342.70027,
1091
],
[
107715.93908,
5368
],
[
156058.22275,
6743
],
[
198629.3617,
6945
],
[
236736.99872,
2347
],
[
277819.0126,
2937
],
[
320775.49275,
1035
],
[
355899.60679,
1503
],
[
406693.69448,
779
],
[
458207.99133,
346
],
[
507718.84314,
153
],
[
554716.51546,
97
],
[
600235.89286,
56
],
[
635092.07692,
13
],
[
663651.32143,
28
],
[
702975.69565,
23
],
[
748004,
19
],
[
803628.28571,
7
],
[
856317.25,
4
],
[
889965,
1
],
[
918003.25,
4
],
[
969190,
3
],
[
1034999,
3
],
[
1091484,
2
],
[
1125613,
1
],
[
1172992.5,
2
],
[
1226583,
1
],
[
1268339,
1
],
[
1366120,
1
],
[
1455435,
1
],
[
1484705,
1
]
],
"maximum": 1484705,
"mean": 189778.36651,
"median": 179997.99189,
"minimum": 12285,
"missing_count": 0,
"population": 32561,
"splits": [
34127.61977,
46774.92338,
61694.89321,
78542.8479,
94356.73663,
103498.24205,
111272.43926,
118129.41533,
125255.93694,
135159.04754,
144014.08077,
152056.8314,
159570.84405,
166333.59091,
173020.40619,
179997.99189,
185278.69,
189866.27029,
195682.90761,
201891.82105,
208585.02999,
215974.97018,
225143.72149,
236404.28237,
252204.79486,
266915.72372,
285830.24184,
307916.72651,
334341.70241,
364623.109,
414970.89241
],
"standard_deviation": 105549.9777,
"sum": 6179373392,
"sum_squares": 1.5354557645044e+15,
"variance": 11140797791.842
}
},
"000003": {
"column_number": 3,
"datatype": "string",
"name": "education",
"optype": "categorical",
"order": 3,
"preferred": true,
"summary": {
"categories": [
[
"HS-grad",
10501
],
[
"Some-college",
7291
],
[
"Bachelors",
5355
],
[
"Masters",
1723
],
[
"Assoc-voc",
1382
],
[
"11th",
1175
],
[
"Assoc-acdm",
1067
],
[
"10th",
933
],
[
"7th-8th",
646
],
[
"Prof-school",
576
],
[
"9th",
514
],
[
"12th",
433
],
[
"Doctorate",
413
],
[
"5th-6th",
333
],
[
"1st-4th",
168
],
[
"Preschool",
51
]
],
"missing_count": 0
}
},
"000004": {
"column_number": 4,
"datatype": "int8",
"name": "education-num",
"optype": "numeric",
"order": 4,
"preferred": true,
"summary": {
"counts": [
[
1,
51
],
[
2,
168
],
[
3,
333
],
[
4,
646
],
[
5,
514
],
[
6,
933
],
[
7,
1175
],
[
8,
433
],
[
9,
10501
],
[
10,
7291
],
[
11,
1382
],
[
12,
1067
],
[
13,
5355
],
[
14,
1723
],
[
15,
576
],
[
16,
413
]
],
"maximum": 16,
"mean": 10.08068,
"median": 9.72591,
"minimum": 1,
"missing_count": 0,
"population": 32561,
"standard_deviation": 2.57272,
"sum": 328237,
"sum_squares": 3524363,
"variance": 6.61889
}
},
"000005": {
"column_number": 5,
"datatype": "string",
"name": "marital-status",
"optype": "categorical",
"order": 5,
"preferred": true,
"summary": {
"categories": [
[
"Married-civ-spouse",
14976
],
[
"Never-married",
10683
],
[
"Divorced",
4443
],
[
"Separated",
1025
],
[
"Widowed",
993
],
[
"Married-spouse-absent",
418
],
[
"Married-AF-spouse",
23
]
],
"missing_count": 0
}
},
"000006": {
"column_number": 6,
"datatype": "string",
"name": "occupation",
"optype": "categorical",
"order": 6,
"preferred": true,
"summary": {
"categories": [
[
"Prof-specialty",
4140
],
[
"Craft-repair",
4099
],
[
"Exec-managerial",
4066
],
[
"Adm-clerical",
3770
],
[
"Sales",
3650
],
[
"Other-service",
3295
],
[
"Machine-op-inspct",
2002
],
[
"Transport-moving",
1597
],
[
"Handlers-cleaners",
1370
],
[
"Farming-fishing",
994
],
[
"Tech-support",
928
],
[
"Protective-serv",
649
],
[
"Priv-house-serv",
149
],
[
"Armed-Forces",
9
]
],
"missing_count": 1843
}
},
"000007": {
"column_number": 7,
"datatype": "string",
"name": "relationship",
"optype": "categorical",
"order": 7,
"preferred": true,
"summary": {
"categories": [
[
"Husband",
13193
],
[
"Not-in-family",
8305
],
[
"Own-child",
5068
],
[
"Unmarried",
3446
],
[
"Wife",
1568
],
[
"Other-relative",
981
]
],
"missing_count": 0
}
},
"000008": {
"column_number": 8,
"datatype": "string",
"name": "race",
"optype": "categorical",
"order": 8,
"preferred": true,
"summary": {
"categories": [
[
"White",
27816
],
[
"Black",
3124
],
[
"Asian-Pac-Islander",
1039
],
[
"Amer-Indian-Eskimo",
311
],
[
"Other",
271
]
],
"missing_count": 0
}
},
"000009": {
"column_number": 9,
"datatype": "string",
"name": "sex",
"optype": "categorical",
"order": 9,
"preferred": true,
"summary": {
"categories": [
[
"Male",
21790
],
[
"Female",
10771
]
],
"missing_count": 0
}
},
"00000a": {
"column_number": 10,
"datatype": "int32",
"name": "capital-gain",
"optype": "numeric",
"order": 10,
"preferred": true,
"summary": {
"bins": [
[
0.02291,
29855
],
[
583.27778,
36
],
[
1052.2963,
54
],
[
1603.7037,
54
],
[
2319.77922,
231
],
[
3022.86667,
225
],
[
3412.46763,
139
],
[
3972.7218,
133
],
[
4575.77419,
186
],
[
5109.11976,
167
],
[
5523.57895,
19
],
[
6097,
1
],
[
6459.96429,
28
],
[
6829.52941,
34
],
[
7305.35769,
260
],
[
7691.17361,
288
],
[
8614,
55
],
[
9413.07692,
26
],
[
10541.2459,
61
],
[
11678,
2
],
[
13550,
27
],
[
14184.89552,
67
],
[
15023.94318,
352
],
[
15831,
6
],
[
18481,
2
],
[
20051,
37
],
[
22040,
1
],
[
25206.13333,
15
],
[
27828,
34
],
[
34095,
5
],
[
41310,
2
],
[
99999,
159
]
],
"maximum": 99999,
"mean": 1077.64884,
"median": 5.30214,
"minimum": 0,
"missing_count": 0,
"population": 32561,
"splits": [
5.30214
],
"standard_deviation": 7385.29208,
"sum": 35089324,
"sum_squares": 1813719045084,
"variance": 54542539.17841
}
},
"00000b": {
"column_number": 11,
"datatype": "int16",
"name": "capital-loss",
"optype": "numeric",
"order": 11,
"preferred": true,
"summary": {
"bins": [
[
0,
31042
],
[
155,
1
],
[
213,
4
],
[
323,
3
],
[
419,
3
],
[
630.6,
15
],
[
810,
2
],
[
880,
6
],
[
974,
2
],
[
1102.22222,
9
],
[
1258,
4
],
[
1340,
7
],
[
1401.34483,
29
],
[
1490.65714,
70
],
[
1592.57059,
170
],
[
1667.54795,
73
],
[
1736.03125,
128
],
[
1887.39083,
458
],
[
1981.24706,
255
],
[
2050.56757,
37
],
[
2182.56818,
44
],
[
2254.45,
40
],
[
2340.36842,
19
],
[
2411.77083,
96
],
[
2567.19048,
21
],
[
2754,
2
],
[
2824,
10
],
[
3004,
2
],
[
3683,
2
],
[
3770,
2
],
[
3900,
2
],
[
4356,
3
]
],
"maximum": 4356,
"mean": 87.30383,
"median": 3.83993,
"minimum": 0,
"missing_count": 0,
"population": 32561,
"splits": [
3.83993
],
"standard_deviation": 402.96022,
"sum": 2842700,
"sum_squares": 5535171692,
"variance": 162376.93781
}
},
"00000c": {
"column_number": 12,
"datatype": "int8",
"name": "hours-per-week",
"optype": "numeric",
"order": 12,
"preferred": true,
"summary": {
"bins": [
[
1.54545,
44
],
[
3.97368,
152
],
[
6.17172,
99
],
[
9.34292,
452
],
[
12.11735,
196
],
[
15.34077,
672
],
[
19.89529,
1337
],
[
22.32308,
65
],
[
24.72786,
926
],
[
27.45752,
153
],
[
30.00433,
1154
],
[
32.12787,
305
],
[
35.28926,
1694
],
[
39.93949,
15767
],
[
42.40811,
370
],
[
44.98523,
2167
],
[
49.68917,
3378
],
[
52.15337,
163
],
[
55.10601,
849
],
[
59.96093,
1510
],
[
62.35714,
28
],
[
65.03943,
279
],
[
69.92079,
303
],
[
72.0274,
73
],
[
74.98507,
67
],
[
77.29412,
17
],
[
80.0365,
137
],
[
84.22414,
58
],
[
86.33333,
3
],
[
89.97297,
37
],
[
95.8,
10
],
[
98.88542,
96
]
],
"maximum": 99,
"mean": 40.43746,
"median": 40.14714,
"minimum": 1,
"missing_count": 0,
"population": 32561,
"splits": [
13.89277,
19.52004,
23.11502,
28.32244,
30.9092,
34.72876,
36.6596,
38.5764,
38.92213,
39.16923,
39.37233,
39.54891,
39.70725,
39.85206,
39.9863,
40.14714,
40.32441,
40.51773,
40.73245,
40.9778,
41.27248,
41.66936,
43.20011,
44.90808,
46.12464,
49.10874,
49.97571,
50.93479,
55.02331,
59.74827,
64.6363
],
"standard_deviation": 12.34743,
"sum": 1316684,
"sum_squares": 58207416,
"variance": 152.459
}
},
"00000d": {
"column_number": 13,
"datatype": "string",
"name": "native-country",
"optype": "categorical",
"order": 13,
"preferred": false,
"summary": {
"categories": [
[
"United-States",
29170
],
[
"Mexico",
643
],
[
"Philippines",
198
],
[
"Germany",
137
],
[
"Canada",
121
],
[
"Puerto-Rico",
114
],
[
"El-Salvador",
106
],
[
"India",
100
],
[
"Cuba",
95
],
[
"England",
90
],
[
"Jamaica",
81
],
[
"South",
80
],
[
"China",
75
],
[
"Italy",
73
],
[
"Dominican-Republic",
70
],
[
"Vietnam",
67
],
[
"Guatemala",
64
],
[
"Japan",
62
],
[
"Poland",
60
],
[
"Columbia",
59
],
[
"Taiwan",
51
],
[
"Haiti",
44
],
[
"Iran",
43
],
[
"Portugal",
37
],
[
"Nicaragua",
34
],
[
"Peru",
31
],
[
"France",
29
],
[
"Greece",
29
],
[
"Ecuador",
28
],
[
"Ireland",
24
],
[
"Hong",
20
],
[
"Trinadad&Tobago",
19
],
[
"Cambodia",
19
],
[
"Laos",
18
],
[
"Thailand",
18
],
[
"Yugoslavia",
16
],
[
"Outlying-US(Guam-USVI-etc)",
14
],
[
"Honduras",
13
],
[
"Hungary",
13
],
[
"Scotland",
12
],
[
"Holand-Netherlands",
1
]
],
"missing_count": 583
}
},
"00000e": {
"column_number": 14,
"datatype": "string",
"name": "income",
"optype": "categorical",
"order": 14,
"preferred": true,
"summary": {
"categories": [
[
"<=50K",
24720
],
[
">50K",
7841
]
],
"missing_count": 0
}
}
},
"fields_meta": {
"count": 15,
"limit": 1000,
"offset": 0,
"query_total": 15,
"total": 15
},
"locale": "en-US",
"missing_tokens": [
"",
"N\/A",
"n\/a",
"NULL",
"null",
"-",
"#DIV\/0",
"#REF!",
"#NAME?",
"NIL",
"nil",
"NA",
"na",
"#VALUE!",
"#NULL!",
"NaN",
"#N\/A",
"#NUM!",
"?"
],
"name": "adult's dataset",
"number_of_batchcentroids": 0,
"number_of_batchpredictions": 0,
"number_of_clusters": 0,
"number_of_ensembles": 1,
"number_of_evaluations": 2,
"number_of_models": 1,
"number_of_predictions": 0,
"objective_field": {
"column_number": 14,
"datatype": "string",
"id": "00000e",
"name": "income",
"optype": "categorical",
"order": 14
},
"price": 0,
"private": true,
"ranges": null,
"replacements": null,
"resource": "dataset\/5189644d925ded74ad00009f",
"rows": 32561,
"sample_rates": null,
"seeds": null,
"shared": false,
"size": 3030035,
"source": "source\/5189642d925ded74aa000082",
"source_status": true,
"status": {
"bytes": 3974460,
"code": 5,
"elapsed": 10447,
"field_errors": [
],
"message": "The dataset has been created",
"row_format_errors": [
],
"serialized_rows": 32561
},
"subscription": false,
"tags": [
],
"term_limit": 1000,
"updated": "2013-05-07T23:12:12.836000",
"user_metadata": {
}
}
<!DOCTYPE html>
<meta charset="utf-8">
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
X-Axis: <select class="axis" id="xaxis"></select>
Y-Axis: <select class="axis" id="yaxis"></select>
<script>
var margin = {top: 40, right: 20, bottom: 50, left: 20},
width = 960 - margin.left - margin.right,
height = 500 - margin.bottom - margin.top;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var width = 960,
size = 150,
padding = 19.5;
var x = d3.scale.linear()
.range([padding / 2, size - padding / 2]);
var y = d3.scale.linear()
.range([size - padding / 2, padding / 2]);
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom")
.ticks(5);
var yAxis = d3.svg.axis()
.scale(y)
.orient("left")
.ticks(5);
var defs = d3.select("svg").append("defs");
function sample_histogram(fields){
var points = [] ;
return points ;
}
d3.json("adult.json",function(error,root){
var fields = root.fields ;
var fieldnames = [] ;
for (var f in fields){
var field = fields[f] ;
//console.log(field.name+' '+field.optype) ;
if (field.optype == "numeric") fieldnames.push(field.name) ;
}
//console.log(fieldnames) ;
d3.select("body")
.selectAll(".axis")
.selectAll("option")
.data(fieldnames)
.enter()
.append("option")
.text(function(d){return d})
})
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment