Poul Petersen petersen-poul

## Active Learning - Diabetes Example
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {

## gist:b460ac3a547f8cad2078
import bigml, sys, csv, time
from bigml.api import BigML

api = BigML(dev_mode=True)

source = api.create_source("s3://bigml-public/csv/lc_sample.csv.gz", { "name": "LC Source"} )
api.ok(source)
soure = api.get_source(source)

lc_dataset = api.create_dataset(source, { "name": "LC Dataset" })

## highways.py
#!/usr/bin/env python

import bigml
from bigml.api import BigML

# You need to define BIGML_USERNAME and BIGML_API_KEY in your environment, or
# add them here:
#api = BigML(username, api_key, dev_mode=True)

api = BigML(dev_mode=True)

## json-extract-simple.json
{
    "name": "Simple JSON key/val extraction",
    "description": "Given a dataset field containing JSON documents and a key, this WhizzML script creates a new feature with the JSON values. This is a hack and *NOT* a valid JSON parser",
    "inputs": [
        {
            "name": "dataset-in",
            "type": "dataset-id",
            "description": "Dataset to transform by extracting JSON values."
        },
        {

## json-extract.json
{
    "name": "JSON key/val extraction",
    "description": "Given a dataset field containing JSON documents and a key, this WhizzML script creates a new feature with the JSON values. This is a hack and *NOT* a valid JSON parser",
    "inputs": [
        {
            "name": "dataset-in",
            "type": "dataset-id",
            "description": "Dataset to transform by extracting JSON values."
        },
        {

## redfin-deals.json
{
    "name": "Redfin Deals",
    "description": "Given a source of sold homes and listed homes, builds a model to predict the price and then shows possible deals.",
    "inputs": [
        {
            "name": "redfin-sold-source",
            "type": "source-id",
            "description": "Source of sold homes from Redfin."
        },
        {

## latlong-ref-dist.json
{
    "name": "Lat/Long Distance from a reference point",
    "description": "Extends a dataset with the distance in meters between lat/long fields and a reference point.",
    "inputs": [
        {
            "name": "dataset-in",
            "type": "dataset-id",
            "description": "Dataset for extending with distance calculation."
        },
        {

## min-scale-class-purity.json
{
    "name": "Minimum Scale for Cluster Class Purity",
    "description": "Given a dataset and a categorical field, finds the minimum scale required to create class purity in the cluster with k = number of classes.",
    "inputs": [
        {
            "name": "dataset",
            "type": "dataset-id",
            "description": "Dataset to analyze."
        },
        {

## one-click-dataset-prefer-it-all.json
{
    "name": "1-Click Dataset Prefer-it-All",
    "description": "Given a source, creates a 1-click dataset and then marks all non-preferred field as preferred.",
    "inputs": [
        {
            "name": "source",
            "type": "source-id",
            "description": "Source to process."
        }
    ],

## fieldtype-by-fieldname.json
{
  "name": "Assign Field Types by Field Name",
  "description": "Sometimes, the automatic field detection does not assign field types correctly. This is especially a problem with fields that have a lot of missing values since the detection process only takes a peek at the data to determine if a field should be numeric, categorical, etc. This script allows you to alter the field types for a source based on the name of each field. Just put a partial match for the name in the list for the type you want to assign, and it will change all the fields whose name contain that string.",
  "inputs": [
    {
      "name": "source",
      "description": "Source to update.",
      "type": "source-id"
    },
    {
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	import bigml, sys, csv, time
	from bigml.api import BigML

	api = BigML(dev_mode=True)

	source = api.create_source("s3://bigml-public/csv/lc_sample.csv.gz", { "name": "LC Source"} )
	api.ok(source)
	soure = api.get_source(source)

	lc_dataset = api.create_dataset(source, { "name": "LC Dataset" })
	#!/usr/bin/env python

	import bigml
	from bigml.api import BigML

	# You need to define BIGML_USERNAME and BIGML_API_KEY in your environment, or
	# add them here:
	#api = BigML(username, api_key, dev_mode=True)

	api = BigML(dev_mode=True)
	{
	"name": "Simple JSON key/val extraction",
	"description": "Given a dataset field containing JSON documents and a key, this WhizzML script creates a new feature with the JSON values. This is a hack and NOT a valid JSON parser",
	"inputs": [
	{
	"name": "dataset-in",
	"type": "dataset-id",
	"description": "Dataset to transform by extracting JSON values."
	},
	{
	{
	"name": "JSON key/val extraction",
	"description": "Given a dataset field containing JSON documents and a key, this WhizzML script creates a new feature with the JSON values. This is a hack and NOT a valid JSON parser",
	"inputs": [
	{
	"name": "dataset-in",
	"type": "dataset-id",
	"description": "Dataset to transform by extracting JSON values."
	},
	{
	{
	"name": "Redfin Deals",
	"description": "Given a source of sold homes and listed homes, builds a model to predict the price and then shows possible deals.",
	"inputs": [
	{
	"name": "redfin-sold-source",
	"type": "source-id",
	"description": "Source of sold homes from Redfin."
	},
	{
	{
	"name": "Lat/Long Distance from a reference point",
	"description": "Extends a dataset with the distance in meters between lat/long fields and a reference point.",
	"inputs": [
	{
	"name": "dataset-in",
	"type": "dataset-id",
	"description": "Dataset for extending with distance calculation."
	},
	{
	{
	"name": "Minimum Scale for Cluster Class Purity",
	"description": "Given a dataset and a categorical field, finds the minimum scale required to create class purity in the cluster with k = number of classes.",
	"inputs": [
	{
	"name": "dataset",
	"type": "dataset-id",
	"description": "Dataset to analyze."
	},
	{
	{
	"name": "1-Click Dataset Prefer-it-All",
	"description": "Given a source, creates a 1-click dataset and then marks all non-preferred field as preferred.",
	"inputs": [
	{
	"name": "source",
	"type": "source-id",
	"description": "Source to process."
	}
	],
	{
	"name": "Assign Field Types by Field Name",
	"description": "Sometimes, the automatic field detection does not assign field types correctly. This is especially a problem with fields that have a lot of missing values since the detection process only takes a peek at the data to determine if a field should be numeric, categorical, etc. This script allows you to alter the field types for a source based on the name of each field. Just put a partial match for the name in the list for the type you want to assign, and it will change all the fields whose name contain that string.",
	"inputs": [
	{
	"name": "source",
	"description": "Source to update.",
	"type": "source-id"
	},
	{