Mause/gist:8349051

## gistfile1.txt
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Firstly, some imports for statistics manipulation;"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import print_function\n",
      "\n",
      "from csv import DictReader\n",
      "from operator import itemgetter\n",
      "from collections import Counter, defaultdict\n",
      "from itertools import chain"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We load in the csv data;"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "with open('data.csv', 'r') as fh:\n",
      "    rows = list(DictReader(fh))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We can then determine the numbers of each gender;"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# we grab the gender of each class member\n",
      "gender_occurs = map(itemgetter('GENDER'), rows)\n",
      "# determine the number of occurances of each gender\n",
      "gender_occurs = Counter(gender_occurs)\n",
      "print(dict(gender_occurs))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "{'F': 14, 'M': 9}\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Voila, female and male numbers"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Now, we need to store our ranges for the class ranges"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "classes = [\n",
      "    (160, 169),\n",
      "    (170, 179),\n",
      "    (180, 189),\n",
      "    (190, 199),\n",
      "    (200, 209),\n",
      "    (210, 219),\n",
      "    (220, 229),\n",
      "    (230, 239),\n",
      "    (240, 249),\n",
      "    (250, 259)\n",
      "]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Then we grab the hand widths of each participant, and recast 'em as floats;"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "hand_widths = map(itemgetter('HANDSPAN (mm)'), rows)\n",
      "hand_widths = map(float, hand_widths)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We'll store the outputs in a `defaultdict(list)`, and ones we cannot classify in a `set()`"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "output = defaultdict(list)\n",
      "unclassifiable = set()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Now we run though each width, and check if it is contained within a range;"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for width in hand_widths:\n",
      "    for mini, maxi in classes:\n",
      "        if mini <= width < maxi:\n",
      "            output[mini, maxi].append(width)\n",
      "            break\n",
      "\n",
      "    # if it does not fit into a range, add to the unclassifiables\n",
      "    if width not in chain.from_iterable(output.values()):\n",
      "        unclassifiable.add(width)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Finally, display the results;"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for k, v in output.items():\n",
      "    print('{}: {}'.format(k, len(v)))\n",
      "\n",
      "print('Unclassifiable: {}'.format(list(unclassifiable)))\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "(180, 189): 1\n",
        "(160, 169): 8\n",
        "(190, 199): 1\n",
        "(230, 239): 1\n",
        "(200, 209): 1\n",
        "(210, 219): 3\n",
        "(220, 229): 3\n",
        "(170, 179): 3\n",
        "Unclassifiable: [115.0, 260.0]\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 13
    }
   ],
   "metadata": {}
  }
 ]
}
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Firstly, some imports for statistics manipulation;"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"from __future__ import print_function\n",
	"\n",
	"from csv import DictReader\n",
	"from operator import itemgetter\n",
	"from collections import Counter, defaultdict\n",
	"from itertools import chain"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 6
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We load in the csv data;"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"with open('data.csv', 'r') as fh:\n",
	" rows = list(DictReader(fh))"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 7
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We can then determine the numbers of each gender;"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# we grab the gender of each class member\n",
	"gender_occurs = map(itemgetter('GENDER'), rows)\n",
	"# determine the number of occurances of each gender\n",
	"gender_occurs = Counter(gender_occurs)\n",
	"print(dict(gender_occurs))"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"{'F': 14, 'M': 9}\n"
	]
	}
	],
	"prompt_number": 8
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Voila, female and male numbers"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now, we need to store our ranges for the class ranges"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"classes = [\n",
	" (160, 169),\n",
	" (170, 179),\n",
	" (180, 189),\n",
	" (190, 199),\n",
	" (200, 209),\n",
	" (210, 219),\n",
	" (220, 229),\n",
	" (230, 239),\n",
	" (240, 249),\n",
	" (250, 259)\n",
	"]"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 9
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Then we grab the hand widths of each participant, and recast 'em as floats;"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"hand_widths = map(itemgetter('HANDSPAN (mm)'), rows)\n",
	"hand_widths = map(float, hand_widths)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 10
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We'll store the outputs in a `defaultdict(list)`, and ones we cannot classify in a `set()`"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"output = defaultdict(list)\n",
	"unclassifiable = set()"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 11
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now we run though each width, and check if it is contained within a range;"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"for width in hand_widths:\n",
	" for mini, maxi in classes:\n",
	" if mini <= width < maxi:\n",
	" output[mini, maxi].append(width)\n",
	" break\n",
	"\n",
	" # if it does not fit into a range, add to the unclassifiables\n",
	" if width not in chain.from_iterable(output.values()):\n",
	" unclassifiable.add(width)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 12
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Finally, display the results;"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"for k, v in output.items():\n",
	" print('{}: {}'.format(k, len(v)))\n",
	"\n",
	"print('Unclassifiable: {}'.format(list(unclassifiable)))\n"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"(180, 189): 1\n",
	"(160, 169): 8\n",
	"(190, 199): 1\n",
	"(230, 239): 1\n",
	"(200, 209): 1\n",
	"(210, 219): 3\n",
	"(220, 229): 3\n",
	"(170, 179): 3\n",
	"Unclassifiable: [115.0, 260.0]\n"
	]
	}
	],
	"prompt_number": 13
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 13
	}
	],
	"metadata": {}
	}
	]
	}