Created
January 13, 2016 10:44
-
-
Save anshulkgupta93/a60b1b73ff2d17f997b4 to your computer and use it in GitHub Desktop.
Data analysis with MongoDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"/home/anshul/DecisionStats/python-codes\n" | |
] | |
} | |
], | |
"source": [ | |
"cd /home/anshul/DecisionStats/python-codes/" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Import all Required Libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import csv\n", | |
"import json\n", | |
"import pprint\n", | |
"import sys\n", | |
"sys.path.append(\"/usr/local/lib/python2.7/dist-packages\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"DATAFILE = 'BigDiamonds.csv'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### This function will read the CSV file and convert it in Dictionary form so that can be stored in MongoDB" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def process_file(filename):\n", | |
" data = []\n", | |
" with open(filename, \"r\") as f:\n", | |
" reader = csv.DictReader(f)\n", | |
" for line in reader:\n", | |
" line[\"\"]=int(line[\"\"])\n", | |
" if line[\"price\"]!=\"NA\":\n", | |
" line[\"price\"]=float(line[\"price\"])\n", | |
" line[\"carat\"]=float(line[\"carat\"])\n", | |
" if line[\"x\"]!=\"NA\":\n", | |
" line[\"x\"]=float(line[\"x\"])\n", | |
" if line[\"y\"]!=\"NA\": \n", | |
" line[\"y\"]=float(line[\"y\"])\n", | |
" if line[\"z\"]!=\"NA\": \n", | |
" line[\"z\"]=float(line[\"z\"])\n", | |
" line[\"depth\"]=float(line[\"depth\"])\n", | |
" data.append(line)\n", | |
" return data\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Function to store Documents in examples database and in collection BigDiamonds in MongoDB" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def insert_data(data, db):\n", | |
"\n", | |
" db.BigDiamonds.insert_many(data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### This will store the csv data in json file and in MongoDB" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def csvToMongo():\n", | |
" data = process_file(DATAFILE)\n", | |
" \n", | |
" with open('BigDiamonds.json',\"w\") as f:\n", | |
" json.dump(data,f)\n", | |
" \n", | |
" \n", | |
" from pymongo import MongoClient\n", | |
" client = MongoClient(\"mongodb://localhost:27017\")\n", | |
" db = client.examples\n", | |
" insert_data(data, db)\n", | |
" print db.BigDiamonds.find_one()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{u'': 1, u'cut': u'V.Good', u'color': u'K', u'clarity': u'I1', u'price': u'NA', u'measurements': u'3.96 x 3.95 x 2.52', u'carat': 0.25, u'depth': 63.7, u'cert': u'GIA', u'z': 2.52, u'table': u'59', u'y': 3.95, u'x': 3.96, u'_id': ObjectId('56962361ce42082cb0d619ef')}\n" | |
] | |
} | |
], | |
"source": [ | |
"csvToMongo()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### To run aggregate queries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def aggregate(db, pipeline):\n", | |
" return [doc for doc in db.BigDiamonds.aggregate(pipeline)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from pymongo import MongoClient\n", | |
"client = MongoClient('localhost:27017')\n", | |
"db = client.examples\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Viewing First 20 documents of the data in MongoDB" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{u'': 1,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619ef'),\n", | |
" u'carat': 0.25,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'K',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 63.7,\n", | |
" u'measurements': u'3.96 x 3.95 x 2.52',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'59',\n", | |
" u'x': 3.96,\n", | |
" u'y': 3.95,\n", | |
" u'z': 2.52}\n", | |
"{u'': 2,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f0'),\n", | |
" u'carat': 0.23,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 58.1,\n", | |
" u'measurements': u'4.00 x 4.05 x 2.30',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'61',\n", | |
" u'x': 4.0,\n", | |
" u'y': 4.05,\n", | |
" u'z': 2.3}\n", | |
"{u'': 3,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f1'),\n", | |
" u'carat': 0.34,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I2',\n", | |
" u'color': u'J',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 58.7,\n", | |
" u'measurements': u'4.56 x 4.53 x 2.67',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'58',\n", | |
" u'x': 4.56,\n", | |
" u'y': 4.53,\n", | |
" u'z': 2.67}\n", | |
"{u'': 4,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f2'),\n", | |
" u'carat': 0.21,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'D',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 60.6,\n", | |
" u'measurements': u'3.80 x 3.82 x 2.31',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'60',\n", | |
" u'x': 3.8,\n", | |
" u'y': 3.82,\n", | |
" u'z': 2.31}\n", | |
"{u'': 5,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f3'),\n", | |
" u'carat': 0.31,\n", | |
" u'cert': u'EGL',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'K',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 62.2,\n", | |
" u'measurements': u'4.35 x 4.26 x 2.68',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'59',\n", | |
" u'x': 4.35,\n", | |
" u'y': 4.26,\n", | |
" u'z': 2.68}\n", | |
"{u'': 6,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f4'),\n", | |
" u'carat': 0.2,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 64.4,\n", | |
" u'measurements': u'3.74 x 3.67 x 2.38',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'60',\n", | |
" u'x': 3.74,\n", | |
" u'y': 3.67,\n", | |
" u'z': 2.38}\n", | |
"{u'': 7,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f5'),\n", | |
" u'carat': 0.2,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 62.6,\n", | |
" u'measurements': u'3.72 x 3.65 x 2.31',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'63',\n", | |
" u'x': 3.72,\n", | |
" u'y': 3.65,\n", | |
" u'z': 2.31}\n", | |
"{u'': 8,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f6'),\n", | |
" u'carat': 0.22,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'D',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 59.2,\n", | |
" u'measurements': u'3.95 x 3.97 x 2.34',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'61',\n", | |
" u'x': 3.95,\n", | |
" u'y': 3.97,\n", | |
" u'z': 2.34}\n", | |
"{u'': 9,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f7'),\n", | |
" u'carat': 0.23,\n", | |
" u'cert': u'IGI',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'K',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 63.6,\n", | |
" u'measurements': u'3.87 x 3.90 x 2.47',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'57.5',\n", | |
" u'x': 3.87,\n", | |
" u'y': 3.9,\n", | |
" u'z': 2.47}\n", | |
"{u'': 10,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f8'),\n", | |
" u'carat': 0.2,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI1',\n", | |
" u'color': u'F',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 54.9,\n", | |
" u'measurements': u'3.83 x 4.00 x 2.14',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'65',\n", | |
" u'x': 3.83,\n", | |
" u'y': 4.0,\n", | |
" u'z': 2.14}\n", | |
"{u'': 11,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f9'),\n", | |
" u'carat': 0.24,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'F',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 62.5,\n", | |
" u'measurements': u'3.96 x 3.90 x 2.46',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'58',\n", | |
" u'x': 3.96,\n", | |
" u'y': 3.9,\n", | |
" u'z': 2.46}\n", | |
"{u'': 12,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619fa'),\n", | |
" u'carat': 0.2,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 60.2,\n", | |
" u'measurements': u'3.76 x 3.79 x 2.27',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'61',\n", | |
" u'x': 3.76,\n", | |
" u'y': 3.79,\n", | |
" u'z': 2.27}\n", | |
"{u'': 13,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619fb'),\n", | |
" u'carat': 0.22,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'D',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 59.7,\n", | |
" u'measurements': u'3.94 x 3.93 x 2.35',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'57',\n", | |
" u'x': 3.94,\n", | |
" u'y': 3.93,\n", | |
" u'z': 2.35}\n", | |
"{u'': 14,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619fc'),\n", | |
" u'carat': 0.23,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 55.5,\n", | |
" u'measurements': u'4.12 x 4.04 x 2.27',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'62',\n", | |
" u'x': 4.12,\n", | |
" u'y': 4.04,\n", | |
" u'z': 2.27}\n", | |
"{u'': 15,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619fd'),\n", | |
" u'carat': 0.23,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'F',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 62.5,\n", | |
" u'measurements': u'3.91 x 3.93 x 2.45',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'59',\n", | |
" u'x': 3.91,\n", | |
" u'y': 3.93,\n", | |
" u'z': 2.45}\n", | |
"{u'': 16,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619fe'),\n", | |
" u'carat': 0.23,\n", | |
" u'cert': u'IGI',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'J',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 63.7,\n", | |
" u'measurements': u'3.88 x 3.94 x 2.49',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'52.5',\n", | |
" u'x': 3.88,\n", | |
" u'y': 3.94,\n", | |
" u'z': 2.49}\n", | |
"{u'': 17,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619ff'),\n", | |
" u'carat': 0.2,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'VS2',\n", | |
" u'color': u'E',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 60.4,\n", | |
" u'measurements': u'3.81 x 3.74 x 2.28',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'62',\n", | |
" u'x': 3.81,\n", | |
" u'y': 3.74,\n", | |
" u'z': 2.28}\n", | |
"{u'': 18,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d61a00'),\n", | |
" u'carat': 0.29,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 64.0,\n", | |
" u'measurements': u'4.17 x 4.19 x 2.67',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'58',\n", | |
" u'x': 4.17,\n", | |
" u'y': 4.19,\n", | |
" u'z': 2.67}\n", | |
"{u'': 19,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d61a01'),\n", | |
" u'carat': 0.31,\n", | |
" u'cert': u'EGL',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'F',\n", | |
" u'cut': u'V.Good',\n", | |
" u'depth': 62.8,\n", | |
" u'measurements': u'4.33 x 4.27 x 2.67',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'59',\n", | |
" u'x': 4.33,\n", | |
" u'y': 4.27,\n", | |
" u'z': 2.67}\n", | |
"{u'': 20,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d61a02'),\n", | |
" u'carat': 0.21,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI1',\n", | |
" u'color': u'E',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 62.5,\n", | |
" u'measurements': u'3.81 x 3.73 x 2.35',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'66',\n", | |
" u'x': 3.81,\n", | |
" u'y': 3.73,\n", | |
" u'z': 2.35}\n" | |
] | |
} | |
], | |
"source": [ | |
"count=0\n", | |
"for doc in db.BigDiamonds.find():\n", | |
" if count==20:\n", | |
" break\n", | |
" pprint.pprint(doc)\n", | |
" count+=1\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Number of Documents" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"598024" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"db.BigDiamonds.find().count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Selection Query " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{u'': 2,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f0'),\n", | |
" u'carat': 0.23,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I1',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 58.1,\n", | |
" u'measurements': u'4.00 x 4.05 x 2.30',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'61',\n", | |
" u'x': 4.0,\n", | |
" u'y': 4.05,\n", | |
" u'z': 2.3}\n", | |
"{u'': 3,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f1'),\n", | |
" u'carat': 0.34,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'I2',\n", | |
" u'color': u'J',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 58.7,\n", | |
" u'measurements': u'4.56 x 4.53 x 2.67',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'58',\n", | |
" u'x': 4.56,\n", | |
" u'y': 4.53,\n", | |
" u'z': 2.67}\n", | |
"{u'': 6,\n", | |
" u'_id': ObjectId('56962361ce42082cb0d619f4'),\n", | |
" u'carat': 0.2,\n", | |
" u'cert': u'GIA',\n", | |
" u'clarity': u'SI2',\n", | |
" u'color': u'G',\n", | |
" u'cut': u'Good',\n", | |
" u'depth': 64.4,\n", | |
" u'measurements': u'3.74 x 3.67 x 2.38',\n", | |
" u'price': u'NA',\n", | |
" u'table': u'60',\n", | |
" u'x': 3.74,\n", | |
" u'y': 3.67,\n", | |
" u'z': 2.38}\n" | |
] | |
} | |
], | |
"source": [ | |
"for doc in db.BigDiamonds.find({\"cut\":\"Good\"})[:3]:\n", | |
" pprint.pprint(doc)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Running aggregation queries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[{u'_id': u'null',\n", | |
" u'average_carat': 1.0722833163976402,\n", | |
" u'average_price': 8753.0179738863}]\n" | |
] | |
} | |
], | |
"source": [ | |
"def make_pipeline():\n", | |
" # complete the aggregation pipeline\n", | |
" pipeline = [{\"$match\":{\"price\": {\"$ne\":\"NA\"}\n", | |
" }\n", | |
" }, \n", | |
" {\"$group\":{\"_id\":\"null\",\n", | |
" \"average_price\":{\"$avg\":\"$price\"},\n", | |
" \"average_carat\":{\"$avg\":\"$carat\"}\n", | |
" }\n", | |
" }\n", | |
" ]\n", | |
" return pipeline\n", | |
"pipeline = make_pipeline()\n", | |
"result = aggregate(db, pipeline)\n", | |
"pprint.pprint(result)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[{u'_id': u'K', u'average_carat': 1.4950645585278706},\n", | |
" {u'_id': u'L', u'average_carat': 1.3632705053852925},\n", | |
" {u'_id': u'J', u'average_carat': 1.3475398796936426},\n", | |
" {u'_id': u'I', u'average_carat': 1.2712822628836897},\n", | |
" {u'_id': u'H', u'average_carat': 1.2099406596704285},\n", | |
" {u'_id': u'G', u'average_carat': 1.063840796640399},\n", | |
" {u'_id': u'F', u'average_carat': 0.9410531884197142},\n", | |
" {u'_id': u'E', u'average_carat': 0.8318823743354592},\n", | |
" {u'_id': u'D', u'average_carat': 0.8266182262664965}]\n" | |
] | |
} | |
], | |
"source": [ | |
"def make_pipeline():\n", | |
" # complete the aggregation pipeline\n", | |
" pipeline = [{\"$group\":{\"_id\":\"$color\",\n", | |
" \"average_carat\":{\"$avg\":\"$carat\"},\n", | |
" }\n", | |
" },\n", | |
" {\"$sort\":{\"average_carat\":-1}\n", | |
" }\n", | |
" ]\n", | |
" return pipeline\n", | |
"pipeline = make_pipeline()\n", | |
"result = aggregate(db, pipeline)\n", | |
"pprint.pprint(result)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[{u'_id': u'Ideal', u'average_carat': 1.120193559039977},\n", | |
" {u'_id': u'V.Good', u'average_carat': 1.0247596745924552},\n", | |
" {u'_id': u'Good', u'average_carat': 0.9003031166220351}]\n" | |
] | |
} | |
], | |
"source": [ | |
"def make_pipeline():\n", | |
" # complete the aggregation pipeline\n", | |
" pipeline = [{\"$group\":{\"_id\":\"$cut\",\n", | |
" \"average_carat\":{\"$avg\":\"$carat\"},\n", | |
" }\n", | |
" },\n", | |
" {\"$sort\":{\"average_carat\":-1}\n", | |
" }\n", | |
" ]\n", | |
" return pipeline\n", | |
"pipeline = make_pipeline()\n", | |
"result = aggregate(db, pipeline)\n", | |
"pprint.pprint(result)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment