Skip to content

Instantly share code, notes, and snippets.

@ischurov
Created April 18, 2021 18:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ischurov/ebe633748900375348b301eb976d08b8 to your computer and use it in GitHub Desktop.
Save ischurov/ebe633748900375348b301eb976d08b8 to your computer and use it in GitHub Desktop.
lesson23.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## Язык запросов SQL и SQL-базы данных: часть 2"
},
{
"metadata": {
"trusted": false
},
"id": "integrated-empty",
"cell_type": "code",
"source": "import sqlite3",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "authorized-roulette",
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "consistent-andrews",
"cell_type": "code",
"source": "imdb = pd.read_csv(\"https://github.com/Godoy/imdb-5000-movie-dataset/raw/master/data/movie_metadata.csv\")",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "breeding-throat",
"cell_type": "code",
"source": "conn = sqlite3.connect(\"imdb.sqlite\")",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "surgical-gambling",
"cell_type": "code",
"source": "imdb.to_sql(\"movies\", conn)",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "centered-technology",
"cell_type": "code",
"source": "c = conn.cursor()",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "suburban-component",
"cell_type": "code",
"source": "conn.commit()",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "identified-library",
"cell_type": "code",
"source": "c.execute(\"\"\"\nSELECT * FROM movies\nLIMIT 10\n\"\"\").fetchall()",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": "[(0,\n 'Color',\n 'James Cameron',\n 723.0,\n 178.0,\n 0.0,\n 855.0,\n 'Joel David Moore',\n 1000.0,\n 760505847.0,\n 'Action|Adventure|Fantasy|Sci-Fi',\n 'CCH Pounder',\n 'Avatar\\xa0',\n 886204,\n 4834,\n 'Wes Studi',\n 0.0,\n 'avatar|future|marine|native|paraplegic',\n 'http://www.imdb.com/title/tt0499549/?ref_=fn_tt_tt_1',\n 3054.0,\n 'English',\n 'USA',\n 'PG-13',\n 237000000.0,\n 2009.0,\n 936.0,\n 7.9,\n 1.78,\n 33000),\n (1,\n 'Color',\n 'Gore Verbinski',\n 302.0,\n 169.0,\n 563.0,\n 1000.0,\n 'Orlando Bloom',\n 40000.0,\n 309404152.0,\n 'Action|Adventure|Fantasy',\n 'Johnny Depp',\n \"Pirates of the Caribbean: At World's End\\xa0\",\n 471220,\n 48350,\n 'Jack Davenport',\n 0.0,\n 'goddess|marriage ceremony|marriage proposal|pirate|singapore',\n 'http://www.imdb.com/title/tt0449088/?ref_=fn_tt_tt_1',\n 1238.0,\n 'English',\n 'USA',\n 'PG-13',\n 300000000.0,\n 2007.0,\n 5000.0,\n 7.1,\n 2.35,\n 0),\n (2,\n 'Color',\n 'Sam Mendes',\n 602.0,\n 148.0,\n 0.0,\n 161.0,\n 'Rory Kinnear',\n 11000.0,\n 200074175.0,\n 'Action|Adventure|Thriller',\n 'Christoph Waltz',\n 'Spectre\\xa0',\n 275868,\n 11700,\n 'Stephanie Sigman',\n 1.0,\n 'bomb|espionage|sequel|spy|terrorist',\n 'http://www.imdb.com/title/tt2379713/?ref_=fn_tt_tt_1',\n 994.0,\n 'English',\n 'UK',\n 'PG-13',\n 245000000.0,\n 2015.0,\n 393.0,\n 6.8,\n 2.35,\n 85000),\n (3,\n 'Color',\n 'Christopher Nolan',\n 813.0,\n 164.0,\n 22000.0,\n 23000.0,\n 'Christian Bale',\n 27000.0,\n 448130642.0,\n 'Action|Thriller',\n 'Tom Hardy',\n 'The Dark Knight Rises\\xa0',\n 1144337,\n 106759,\n 'Joseph Gordon-Levitt',\n 0.0,\n 'deception|imprisonment|lawlessness|police officer|terrorist plot',\n 'http://www.imdb.com/title/tt1345836/?ref_=fn_tt_tt_1',\n 2701.0,\n 'English',\n 'USA',\n 'PG-13',\n 250000000.0,\n 2012.0,\n 23000.0,\n 8.5,\n 2.35,\n 164000),\n (4,\n None,\n 'Doug Walker',\n None,\n None,\n 131.0,\n None,\n 'Rob Walker',\n 131.0,\n None,\n 'Documentary',\n 'Doug Walker',\n 'Star Wars: Episode VII - The Force Awakens\\xa0 ',\n 8,\n 143,\n None,\n 0.0,\n None,\n 'http://www.imdb.com/title/tt5289954/?ref_=fn_tt_tt_1',\n None,\n None,\n None,\n None,\n None,\n None,\n 12.0,\n 7.1,\n None,\n 0),\n (5,\n 'Color',\n 'Andrew Stanton',\n 462.0,\n 132.0,\n 475.0,\n 530.0,\n 'Samantha Morton',\n 640.0,\n 73058679.0,\n 'Action|Adventure|Sci-Fi',\n 'Daryl Sabara',\n 'John Carter\\xa0',\n 212204,\n 1873,\n 'Polly Walker',\n 1.0,\n 'alien|american civil war|male nipple|mars|princess',\n 'http://www.imdb.com/title/tt0401729/?ref_=fn_tt_tt_1',\n 738.0,\n 'English',\n 'USA',\n 'PG-13',\n 263700000.0,\n 2012.0,\n 632.0,\n 6.6,\n 2.35,\n 24000),\n (6,\n 'Color',\n 'Sam Raimi',\n 392.0,\n 156.0,\n 0.0,\n 4000.0,\n 'James Franco',\n 24000.0,\n 336530303.0,\n 'Action|Adventure|Romance',\n 'J.K. Simmons',\n 'Spider-Man 3\\xa0',\n 383056,\n 46055,\n 'Kirsten Dunst',\n 0.0,\n 'sandman|spider man|symbiote|venom|villain',\n 'http://www.imdb.com/title/tt0413300/?ref_=fn_tt_tt_1',\n 1902.0,\n 'English',\n 'USA',\n 'PG-13',\n 258000000.0,\n 2007.0,\n 11000.0,\n 6.2,\n 2.35,\n 0),\n (7,\n 'Color',\n 'Nathan Greno',\n 324.0,\n 100.0,\n 15.0,\n 284.0,\n 'Donna Murphy',\n 799.0,\n 200807262.0,\n 'Adventure|Animation|Comedy|Family|Fantasy|Musical|Romance',\n 'Brad Garrett',\n 'Tangled\\xa0',\n 294810,\n 2036,\n 'M.C. Gainey',\n 1.0,\n '17th century|based on fairy tale|disney|flower|tower',\n 'http://www.imdb.com/title/tt0398286/?ref_=fn_tt_tt_1',\n 387.0,\n 'English',\n 'USA',\n 'PG',\n 260000000.0,\n 2010.0,\n 553.0,\n 7.8,\n 1.85,\n 29000),\n (8,\n 'Color',\n 'Joss Whedon',\n 635.0,\n 141.0,\n 0.0,\n 19000.0,\n 'Robert Downey Jr.',\n 26000.0,\n 458991599.0,\n 'Action|Adventure|Sci-Fi',\n 'Chris Hemsworth',\n 'Avengers: Age of Ultron\\xa0',\n 462669,\n 92000,\n 'Scarlett Johansson',\n 4.0,\n 'artificial intelligence|based on comic book|captain america|marvel cinematic universe|superhero',\n 'http://www.imdb.com/title/tt2395427/?ref_=fn_tt_tt_1',\n 1117.0,\n 'English',\n 'USA',\n 'PG-13',\n 250000000.0,\n 2015.0,\n 21000.0,\n 7.5,\n 2.35,\n 118000),\n (9,\n 'Color',\n 'David Yates',\n 375.0,\n 153.0,\n 282.0,\n 10000.0,\n 'Daniel Radcliffe',\n 25000.0,\n 301956980.0,\n 'Adventure|Family|Fantasy|Mystery',\n 'Alan Rickman',\n 'Harry Potter and the Half-Blood Prince\\xa0',\n 321795,\n 58753,\n 'Rupert Grint',\n 3.0,\n 'blood|book|love|potion|professor',\n 'http://www.imdb.com/title/tt0417741/?ref_=fn_tt_tt_1',\n 973.0,\n 'English',\n 'UK',\n 'PG',\n 250000000.0,\n 2009.0,\n 11000.0,\n 7.5,\n 2.35,\n 10000)]"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "hybrid-cleaning",
"cell_type": "code",
"source": "pd.read_sql_query(\"\"\"\nSELECT * FROM movies\nLIMIT 10\n\"\"\", conn)",
"execution_count": 12,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>index</th>\n <th>color</th>\n <th>director_name</th>\n <th>num_critic_for_reviews</th>\n <th>duration</th>\n <th>director_facebook_likes</th>\n <th>actor_3_facebook_likes</th>\n <th>actor_2_name</th>\n <th>actor_1_facebook_likes</th>\n <th>gross</th>\n <th>...</th>\n <th>num_user_for_reviews</th>\n <th>language</th>\n <th>country</th>\n <th>content_rating</th>\n <th>budget</th>\n <th>title_year</th>\n <th>actor_2_facebook_likes</th>\n <th>imdb_score</th>\n <th>aspect_ratio</th>\n <th>movie_facebook_likes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>Color</td>\n <td>James Cameron</td>\n <td>723.0</td>\n <td>178.0</td>\n <td>0.0</td>\n <td>855.0</td>\n <td>Joel David Moore</td>\n <td>1000.0</td>\n <td>760505847.0</td>\n <td>...</td>\n <td>3054.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>237000000.0</td>\n <td>2009.0</td>\n <td>936.0</td>\n <td>7.9</td>\n <td>1.78</td>\n <td>33000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>Color</td>\n <td>Gore Verbinski</td>\n <td>302.0</td>\n <td>169.0</td>\n <td>563.0</td>\n <td>1000.0</td>\n <td>Orlando Bloom</td>\n <td>40000.0</td>\n <td>309404152.0</td>\n <td>...</td>\n <td>1238.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>300000000.0</td>\n <td>2007.0</td>\n <td>5000.0</td>\n <td>7.1</td>\n <td>2.35</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>Color</td>\n <td>Sam Mendes</td>\n <td>602.0</td>\n <td>148.0</td>\n <td>0.0</td>\n <td>161.0</td>\n <td>Rory Kinnear</td>\n <td>11000.0</td>\n <td>200074175.0</td>\n <td>...</td>\n <td>994.0</td>\n <td>English</td>\n <td>UK</td>\n <td>PG-13</td>\n <td>245000000.0</td>\n <td>2015.0</td>\n <td>393.0</td>\n <td>6.8</td>\n <td>2.35</td>\n <td>85000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>Color</td>\n <td>Christopher Nolan</td>\n <td>813.0</td>\n <td>164.0</td>\n <td>22000.0</td>\n <td>23000.0</td>\n <td>Christian Bale</td>\n <td>27000.0</td>\n <td>448130642.0</td>\n <td>...</td>\n <td>2701.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>250000000.0</td>\n <td>2012.0</td>\n <td>23000.0</td>\n <td>8.5</td>\n <td>2.35</td>\n <td>164000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>None</td>\n <td>Doug Walker</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>131.0</td>\n <td>NaN</td>\n <td>Rob Walker</td>\n <td>131.0</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>12.0</td>\n <td>7.1</td>\n <td>NaN</td>\n <td>0</td>\n </tr>\n <tr>\n <th>5</th>\n <td>5</td>\n <td>Color</td>\n <td>Andrew Stanton</td>\n <td>462.0</td>\n <td>132.0</td>\n <td>475.0</td>\n <td>530.0</td>\n <td>Samantha Morton</td>\n <td>640.0</td>\n <td>73058679.0</td>\n <td>...</td>\n <td>738.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>263700000.0</td>\n <td>2012.0</td>\n <td>632.0</td>\n <td>6.6</td>\n <td>2.35</td>\n <td>24000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>6</td>\n <td>Color</td>\n <td>Sam Raimi</td>\n <td>392.0</td>\n <td>156.0</td>\n <td>0.0</td>\n <td>4000.0</td>\n <td>James Franco</td>\n <td>24000.0</td>\n <td>336530303.0</td>\n <td>...</td>\n <td>1902.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>258000000.0</td>\n <td>2007.0</td>\n <td>11000.0</td>\n <td>6.2</td>\n <td>2.35</td>\n <td>0</td>\n </tr>\n <tr>\n <th>7</th>\n <td>7</td>\n <td>Color</td>\n <td>Nathan Greno</td>\n <td>324.0</td>\n <td>100.0</td>\n <td>15.0</td>\n <td>284.0</td>\n <td>Donna Murphy</td>\n <td>799.0</td>\n <td>200807262.0</td>\n <td>...</td>\n <td>387.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG</td>\n <td>260000000.0</td>\n <td>2010.0</td>\n <td>553.0</td>\n <td>7.8</td>\n <td>1.85</td>\n <td>29000</td>\n </tr>\n <tr>\n <th>8</th>\n <td>8</td>\n <td>Color</td>\n <td>Joss Whedon</td>\n <td>635.0</td>\n <td>141.0</td>\n <td>0.0</td>\n <td>19000.0</td>\n <td>Robert Downey Jr.</td>\n <td>26000.0</td>\n <td>458991599.0</td>\n <td>...</td>\n <td>1117.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>250000000.0</td>\n <td>2015.0</td>\n <td>21000.0</td>\n <td>7.5</td>\n <td>2.35</td>\n <td>118000</td>\n </tr>\n <tr>\n <th>9</th>\n <td>9</td>\n <td>Color</td>\n <td>David Yates</td>\n <td>375.0</td>\n <td>153.0</td>\n <td>282.0</td>\n <td>10000.0</td>\n <td>Daniel Radcliffe</td>\n <td>25000.0</td>\n <td>301956980.0</td>\n <td>...</td>\n <td>973.0</td>\n <td>English</td>\n <td>UK</td>\n <td>PG</td>\n <td>250000000.0</td>\n <td>2009.0</td>\n <td>11000.0</td>\n <td>7.5</td>\n <td>2.35</td>\n <td>10000</td>\n </tr>\n </tbody>\n</table>\n<p>10 rows × 29 columns</p>\n</div>",
"text/plain": " index color director_name num_critic_for_reviews duration \\\n0 0 Color James Cameron 723.0 178.0 \n1 1 Color Gore Verbinski 302.0 169.0 \n2 2 Color Sam Mendes 602.0 148.0 \n3 3 Color Christopher Nolan 813.0 164.0 \n4 4 None Doug Walker NaN NaN \n5 5 Color Andrew Stanton 462.0 132.0 \n6 6 Color Sam Raimi 392.0 156.0 \n7 7 Color Nathan Greno 324.0 100.0 \n8 8 Color Joss Whedon 635.0 141.0 \n9 9 Color David Yates 375.0 153.0 \n\n director_facebook_likes actor_3_facebook_likes actor_2_name \\\n0 0.0 855.0 Joel David Moore \n1 563.0 1000.0 Orlando Bloom \n2 0.0 161.0 Rory Kinnear \n3 22000.0 23000.0 Christian Bale \n4 131.0 NaN Rob Walker \n5 475.0 530.0 Samantha Morton \n6 0.0 4000.0 James Franco \n7 15.0 284.0 Donna Murphy \n8 0.0 19000.0 Robert Downey Jr. \n9 282.0 10000.0 Daniel Radcliffe \n\n actor_1_facebook_likes gross ... num_user_for_reviews language \\\n0 1000.0 760505847.0 ... 3054.0 English \n1 40000.0 309404152.0 ... 1238.0 English \n2 11000.0 200074175.0 ... 994.0 English \n3 27000.0 448130642.0 ... 2701.0 English \n4 131.0 NaN ... NaN None \n5 640.0 73058679.0 ... 738.0 English \n6 24000.0 336530303.0 ... 1902.0 English \n7 799.0 200807262.0 ... 387.0 English \n8 26000.0 458991599.0 ... 1117.0 English \n9 25000.0 301956980.0 ... 973.0 English \n\n country content_rating budget title_year actor_2_facebook_likes \\\n0 USA PG-13 237000000.0 2009.0 936.0 \n1 USA PG-13 300000000.0 2007.0 5000.0 \n2 UK PG-13 245000000.0 2015.0 393.0 \n3 USA PG-13 250000000.0 2012.0 23000.0 \n4 None None NaN NaN 12.0 \n5 USA PG-13 263700000.0 2012.0 632.0 \n6 USA PG-13 258000000.0 2007.0 11000.0 \n7 USA PG 260000000.0 2010.0 553.0 \n8 USA PG-13 250000000.0 2015.0 21000.0 \n9 UK PG 250000000.0 2009.0 11000.0 \n\n imdb_score aspect_ratio movie_facebook_likes \n0 7.9 1.78 33000 \n1 7.1 2.35 0 \n2 6.8 2.35 85000 \n3 8.5 2.35 164000 \n4 7.1 NaN 0 \n5 6.6 2.35 24000 \n6 6.2 2.35 0 \n7 7.8 1.85 29000 \n8 7.5 2.35 118000 \n9 7.5 2.35 10000 \n\n[10 rows x 29 columns]"
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "respiratory-update",
"cell_type": "code",
"source": "c.execute(\"\"\"\nUPDATE movies \nSET director_facebook_likes = 1\nWHERE [index] == 0\n\"\"\")",
"execution_count": 14,
"outputs": [
{
"data": {
"text/plain": "<sqlite3.Cursor at 0x7ffe317aff10>"
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "engaging-operation",
"cell_type": "code",
"source": "pd.read_sql_query(\"\"\"\nSELECT * FROM movies\nLIMIT 10\n\"\"\", conn)",
"execution_count": 15,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>index</th>\n <th>color</th>\n <th>director_name</th>\n <th>num_critic_for_reviews</th>\n <th>duration</th>\n <th>director_facebook_likes</th>\n <th>actor_3_facebook_likes</th>\n <th>actor_2_name</th>\n <th>actor_1_facebook_likes</th>\n <th>gross</th>\n <th>...</th>\n <th>num_user_for_reviews</th>\n <th>language</th>\n <th>country</th>\n <th>content_rating</th>\n <th>budget</th>\n <th>title_year</th>\n <th>actor_2_facebook_likes</th>\n <th>imdb_score</th>\n <th>aspect_ratio</th>\n <th>movie_facebook_likes</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>Color</td>\n <td>James Cameron</td>\n <td>723.0</td>\n <td>178.0</td>\n <td>1.0</td>\n <td>855.0</td>\n <td>Joel David Moore</td>\n <td>1000.0</td>\n <td>760505847.0</td>\n <td>...</td>\n <td>3054.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>237000000.0</td>\n <td>2009.0</td>\n <td>936.0</td>\n <td>7.9</td>\n <td>1.78</td>\n <td>33000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>Color</td>\n <td>Gore Verbinski</td>\n <td>302.0</td>\n <td>169.0</td>\n <td>563.0</td>\n <td>1000.0</td>\n <td>Orlando Bloom</td>\n <td>40000.0</td>\n <td>309404152.0</td>\n <td>...</td>\n <td>1238.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>300000000.0</td>\n <td>2007.0</td>\n <td>5000.0</td>\n <td>7.1</td>\n <td>2.35</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>Color</td>\n <td>Sam Mendes</td>\n <td>602.0</td>\n <td>148.0</td>\n <td>0.0</td>\n <td>161.0</td>\n <td>Rory Kinnear</td>\n <td>11000.0</td>\n <td>200074175.0</td>\n <td>...</td>\n <td>994.0</td>\n <td>English</td>\n <td>UK</td>\n <td>PG-13</td>\n <td>245000000.0</td>\n <td>2015.0</td>\n <td>393.0</td>\n <td>6.8</td>\n <td>2.35</td>\n <td>85000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>Color</td>\n <td>Christopher Nolan</td>\n <td>813.0</td>\n <td>164.0</td>\n <td>22000.0</td>\n <td>23000.0</td>\n <td>Christian Bale</td>\n <td>27000.0</td>\n <td>448130642.0</td>\n <td>...</td>\n <td>2701.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>250000000.0</td>\n <td>2012.0</td>\n <td>23000.0</td>\n <td>8.5</td>\n <td>2.35</td>\n <td>164000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>None</td>\n <td>Doug Walker</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>131.0</td>\n <td>NaN</td>\n <td>Rob Walker</td>\n <td>131.0</td>\n <td>NaN</td>\n <td>...</td>\n <td>NaN</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>12.0</td>\n <td>7.1</td>\n <td>NaN</td>\n <td>0</td>\n </tr>\n <tr>\n <th>5</th>\n <td>5</td>\n <td>Color</td>\n <td>Andrew Stanton</td>\n <td>462.0</td>\n <td>132.0</td>\n <td>475.0</td>\n <td>530.0</td>\n <td>Samantha Morton</td>\n <td>640.0</td>\n <td>73058679.0</td>\n <td>...</td>\n <td>738.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>263700000.0</td>\n <td>2012.0</td>\n <td>632.0</td>\n <td>6.6</td>\n <td>2.35</td>\n <td>24000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>6</td>\n <td>Color</td>\n <td>Sam Raimi</td>\n <td>392.0</td>\n <td>156.0</td>\n <td>0.0</td>\n <td>4000.0</td>\n <td>James Franco</td>\n <td>24000.0</td>\n <td>336530303.0</td>\n <td>...</td>\n <td>1902.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>258000000.0</td>\n <td>2007.0</td>\n <td>11000.0</td>\n <td>6.2</td>\n <td>2.35</td>\n <td>0</td>\n </tr>\n <tr>\n <th>7</th>\n <td>7</td>\n <td>Color</td>\n <td>Nathan Greno</td>\n <td>324.0</td>\n <td>100.0</td>\n <td>15.0</td>\n <td>284.0</td>\n <td>Donna Murphy</td>\n <td>799.0</td>\n <td>200807262.0</td>\n <td>...</td>\n <td>387.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG</td>\n <td>260000000.0</td>\n <td>2010.0</td>\n <td>553.0</td>\n <td>7.8</td>\n <td>1.85</td>\n <td>29000</td>\n </tr>\n <tr>\n <th>8</th>\n <td>8</td>\n <td>Color</td>\n <td>Joss Whedon</td>\n <td>635.0</td>\n <td>141.0</td>\n <td>0.0</td>\n <td>19000.0</td>\n <td>Robert Downey Jr.</td>\n <td>26000.0</td>\n <td>458991599.0</td>\n <td>...</td>\n <td>1117.0</td>\n <td>English</td>\n <td>USA</td>\n <td>PG-13</td>\n <td>250000000.0</td>\n <td>2015.0</td>\n <td>21000.0</td>\n <td>7.5</td>\n <td>2.35</td>\n <td>118000</td>\n </tr>\n <tr>\n <th>9</th>\n <td>9</td>\n <td>Color</td>\n <td>David Yates</td>\n <td>375.0</td>\n <td>153.0</td>\n <td>282.0</td>\n <td>10000.0</td>\n <td>Daniel Radcliffe</td>\n <td>25000.0</td>\n <td>301956980.0</td>\n <td>...</td>\n <td>973.0</td>\n <td>English</td>\n <td>UK</td>\n <td>PG</td>\n <td>250000000.0</td>\n <td>2009.0</td>\n <td>11000.0</td>\n <td>7.5</td>\n <td>2.35</td>\n <td>10000</td>\n </tr>\n </tbody>\n</table>\n<p>10 rows × 29 columns</p>\n</div>",
"text/plain": " index color director_name num_critic_for_reviews duration \\\n0 0 Color James Cameron 723.0 178.0 \n1 1 Color Gore Verbinski 302.0 169.0 \n2 2 Color Sam Mendes 602.0 148.0 \n3 3 Color Christopher Nolan 813.0 164.0 \n4 4 None Doug Walker NaN NaN \n5 5 Color Andrew Stanton 462.0 132.0 \n6 6 Color Sam Raimi 392.0 156.0 \n7 7 Color Nathan Greno 324.0 100.0 \n8 8 Color Joss Whedon 635.0 141.0 \n9 9 Color David Yates 375.0 153.0 \n\n director_facebook_likes actor_3_facebook_likes actor_2_name \\\n0 1.0 855.0 Joel David Moore \n1 563.0 1000.0 Orlando Bloom \n2 0.0 161.0 Rory Kinnear \n3 22000.0 23000.0 Christian Bale \n4 131.0 NaN Rob Walker \n5 475.0 530.0 Samantha Morton \n6 0.0 4000.0 James Franco \n7 15.0 284.0 Donna Murphy \n8 0.0 19000.0 Robert Downey Jr. \n9 282.0 10000.0 Daniel Radcliffe \n\n actor_1_facebook_likes gross ... num_user_for_reviews language \\\n0 1000.0 760505847.0 ... 3054.0 English \n1 40000.0 309404152.0 ... 1238.0 English \n2 11000.0 200074175.0 ... 994.0 English \n3 27000.0 448130642.0 ... 2701.0 English \n4 131.0 NaN ... NaN None \n5 640.0 73058679.0 ... 738.0 English \n6 24000.0 336530303.0 ... 1902.0 English \n7 799.0 200807262.0 ... 387.0 English \n8 26000.0 458991599.0 ... 1117.0 English \n9 25000.0 301956980.0 ... 973.0 English \n\n country content_rating budget title_year actor_2_facebook_likes \\\n0 USA PG-13 237000000.0 2009.0 936.0 \n1 USA PG-13 300000000.0 2007.0 5000.0 \n2 UK PG-13 245000000.0 2015.0 393.0 \n3 USA PG-13 250000000.0 2012.0 23000.0 \n4 None None NaN NaN 12.0 \n5 USA PG-13 263700000.0 2012.0 632.0 \n6 USA PG-13 258000000.0 2007.0 11000.0 \n7 USA PG 260000000.0 2010.0 553.0 \n8 USA PG-13 250000000.0 2015.0 21000.0 \n9 UK PG 250000000.0 2009.0 11000.0 \n\n imdb_score aspect_ratio movie_facebook_likes \n0 7.9 1.78 33000 \n1 7.1 2.35 0 \n2 6.8 2.35 85000 \n3 8.5 2.35 164000 \n4 7.1 NaN 0 \n5 6.6 2.35 24000 \n6 6.2 2.35 0 \n7 7.8 1.85 29000 \n8 7.5 2.35 118000 \n9 7.5 2.35 10000 \n\n[10 rows x 29 columns]"
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "bored-absorption",
"cell_type": "code",
"source": "import requests\nr = requests.get(\"http://math-info.hse.ru/f/2018-19/nes-ds/gradebook.sqlite\")\nwith open('gradebook.sqlite', \"wb\") as f:\n f.write(r.content)",
"execution_count": 16,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "educational-france",
"cell_type": "code",
"source": "conn.close()",
"execution_count": 18,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "pressed-premises",
"cell_type": "code",
"source": "conn = sqlite3.connect(\"gradebook.sqlite\")",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "innovative-garlic",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT * FROM sqlite_master where type = 'table' \n\"\"\", conn)",
"execution_count": 22,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>type</th>\n <th>name</th>\n <th>tbl_name</th>\n <th>rootpage</th>\n <th>sql</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>table</td>\n <td>courses</td>\n <td>courses</td>\n <td>2</td>\n <td>CREATE TABLE \"courses\" (\\n\"id\" INTEGER,\\n \"co...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>table</td>\n <td>submissions</td>\n <td>submissions</td>\n <td>4</td>\n <td>CREATE TABLE \"submissions\" (\\n\"id\" INTEGER,\\n ...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>table</td>\n <td>users</td>\n <td>users</td>\n <td>5</td>\n <td>CREATE TABLE \"users\" (\\n\"id\" INTEGER,\\n \"firs...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>table</td>\n <td>course_user</td>\n <td>course_user</td>\n <td>6</td>\n <td>CREATE TABLE \"course_user\" (\\n\"course_id\" INTE...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>table</td>\n <td>assignments</td>\n <td>assignments</td>\n <td>7</td>\n <td>CREATE TABLE \"assignments\" (\\n\"id\" INTEGER,\\n ...</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " type name tbl_name rootpage \\\n0 table courses courses 2 \n1 table submissions submissions 4 \n2 table users users 5 \n3 table course_user course_user 6 \n4 table assignments assignments 7 \n\n sql \n0 CREATE TABLE \"courses\" (\\n\"id\" INTEGER,\\n \"co... \n1 CREATE TABLE \"submissions\" (\\n\"id\" INTEGER,\\n ... \n2 CREATE TABLE \"users\" (\\n\"id\" INTEGER,\\n \"firs... \n3 CREATE TABLE \"course_user\" (\\n\"course_id\" INTE... \n4 CREATE TABLE \"assignments\" (\\n\"id\" INTEGER,\\n ... "
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "established-shame",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT * FROM courses LIMIT 10\n\"\"\", conn)",
"execution_count": 23,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>code</th>\n <th>year</th>\n <th>active</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>nes-ds</td>\n <td>2017</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>nes-ds</td>\n <td>2018</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>dj-prog</td>\n <td>2018</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>python-all</td>\n <td>2015</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>python-icef</td>\n <td>2018</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id code year active\n0 1 nes-ds 2017 0\n1 2 nes-ds 2018 1\n2 3 dj-prog 2018 1\n3 4 python-all 2015 0\n4 5 python-icef 2018 1"
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "studied-rental",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT * FROM submissions LIMIT 10\n\"\"\", conn)",
"execution_count": 24,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>assignment_id</th>\n <th>user_id</th>\n <th>grade</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>2</td>\n <td>1</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>3</td>\n <td>1</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>2</td>\n <td>2</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>3</td>\n <td>2</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>5</th>\n <td>6</td>\n <td>4</td>\n <td>3</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7</td>\n <td>2</td>\n <td>3</td>\n <td>23.0</td>\n </tr>\n <tr>\n <th>7</th>\n <td>8</td>\n <td>4</td>\n <td>3</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>8</th>\n <td>9</td>\n <td>5</td>\n <td>4</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>9</th>\n <td>10</td>\n <td>4</td>\n <td>4</td>\n <td>5.0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id assignment_id user_id grade\n0 1 1 1 6.0\n1 2 2 1 5.0\n2 3 3 1 NaN\n3 4 2 2 3.0\n4 5 3 2 2.0\n5 6 4 3 10.0\n6 7 2 3 23.0\n7 8 4 3 15.0\n8 9 5 4 4.0\n9 10 4 4 5.0"
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "european-ability",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT * FROM assignments LIMIT 10\n\"\"\", conn)",
"execution_count": 25,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>course_id</th>\n <th>code</th>\n <th>name</th>\n <th>max_grade</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>1</td>\n <td>ps01</td>\n <td>Intro to Python</td>\n <td>10</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>2</td>\n <td>ps01</td>\n <td>Intro</td>\n <td>12</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>2</td>\n <td>ps02</td>\n <td>Functions</td>\n <td>8</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>1</td>\n <td>ps01</td>\n <td>Python, part 1</td>\n <td>22</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>2</td>\n <td>ps03</td>\n <td>Pandas</td>\n <td>10</td>\n </tr>\n <tr>\n <th>5</th>\n <td>6</td>\n <td>3</td>\n <td>ps03</td>\n <td>Classes</td>\n <td>15</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7</td>\n <td>2</td>\n <td>ps04</td>\n <td>Advanced Python</td>\n <td>32</td>\n </tr>\n <tr>\n <th>7</th>\n <td>8</td>\n <td>3</td>\n <td>ps01</td>\n <td>Introduction</td>\n <td>10</td>\n </tr>\n <tr>\n <th>8</th>\n <td>9</td>\n <td>4</td>\n <td>ps01</td>\n <td>First class</td>\n <td>7</td>\n </tr>\n <tr>\n <th>9</th>\n <td>10</td>\n <td>5</td>\n <td>ps01</td>\n <td>Welcome to Python</td>\n <td>8</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id course_id code name max_grade\n0 1 1 ps01 Intro to Python 10\n1 2 2 ps01 Intro 12\n2 3 2 ps02 Functions 8\n3 4 1 ps01 Python, part 1 22\n4 5 2 ps03 Pandas 10\n5 6 3 ps03 Classes 15\n6 7 2 ps04 Advanced Python 32\n7 8 3 ps01 Introduction 10\n8 9 4 ps01 First class 7\n9 10 5 ps01 Welcome to Python 8"
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "rational-projector",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT * FROM users LIMIT 10\n\"\"\", conn)",
"execution_count": 26,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>first_name</th>\n <th>last_name</th>\n <th>email</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>Eddard</td>\n <td>Stark</td>\n <td>ned@winterfell.north.7k</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>Jon</td>\n <td>Snow</td>\n <td>jsnow@winterfell.north.7k</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>Arya</td>\n <td>Stark</td>\n <td>arya@winterfell.north.7k</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>Tyrion</td>\n <td>Lannister</td>\n <td>hand-of-a-queen@dragons.rules</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>Cersei</td>\n <td>Lannister</td>\n <td>queen@iron-throne.7k</td>\n </tr>\n <tr>\n <th>5</th>\n <td>6</td>\n <td>Sansa</td>\n <td>Stark</td>\n <td>lady@winterfell.north.7k</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7</td>\n <td>Daenerys</td>\n <td>Targaryen</td>\n <td>queen@dragons.rules</td>\n </tr>\n <tr>\n <th>7</th>\n <td>8</td>\n <td>Samwell</td>\n <td>Tarly</td>\n <td>samwell@citadel.7k</td>\n </tr>\n <tr>\n <th>8</th>\n <td>9</td>\n <td>Brienne</td>\n <td>None</td>\n <td>brienne@tarth.7k</td>\n </tr>\n <tr>\n <th>9</th>\n <td>10</td>\n <td>Ygritte</td>\n <td>None</td>\n <td>ygritte@wildlings.north.7k</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id first_name last_name email\n0 1 Eddard Stark ned@winterfell.north.7k\n1 2 Jon Snow jsnow@winterfell.north.7k\n2 3 Arya Stark arya@winterfell.north.7k\n3 4 Tyrion Lannister hand-of-a-queen@dragons.rules\n4 5 Cersei Lannister queen@iron-throne.7k\n5 6 Sansa Stark lady@winterfell.north.7k\n6 7 Daenerys Targaryen queen@dragons.rules\n7 8 Samwell Tarly samwell@citadel.7k\n8 9 Brienne None brienne@tarth.7k\n9 10 Ygritte None ygritte@wildlings.north.7k"
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "powerful-favorite",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT * FROM course_user LIMIT 10\n\"\"\", conn)",
"execution_count": 38,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>course_id</th>\n <th>user_id</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>2</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2</td>\n <td>1</td>\n </tr>\n <tr>\n <th>5</th>\n <td>3</td>\n <td>2</td>\n </tr>\n <tr>\n <th>6</th>\n <td>3</td>\n <td>3</td>\n </tr>\n <tr>\n <th>7</th>\n <td>4</td>\n <td>2</td>\n </tr>\n <tr>\n <th>8</th>\n <td>5</td>\n <td>4</td>\n </tr>\n <tr>\n <th>9</th>\n <td>5</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " course_id user_id\n0 1 2\n1 2 1\n2 1 3\n3 1 4\n4 2 1\n5 3 2\n6 3 3\n7 4 2\n8 5 4\n9 5 4"
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "surprised-centre",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT *\nFROM assignments LEFT JOIN submissions \nON assignments.id == submissions.assignment_id\n\"\"\", conn)",
"execution_count": 34,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>course_id</th>\n <th>code</th>\n <th>name</th>\n <th>max_grade</th>\n <th>id</th>\n <th>assignment_id</th>\n <th>user_id</th>\n <th>grade</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>1</td>\n <td>ps01</td>\n <td>Intro to Python</td>\n <td>10</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>6.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>1</td>\n <td>ps01</td>\n <td>Intro to Python</td>\n <td>10</td>\n <td>20</td>\n <td>1</td>\n <td>7</td>\n <td>16.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>2</td>\n <td>ps01</td>\n <td>Intro</td>\n <td>12</td>\n <td>2</td>\n <td>2</td>\n <td>1</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2</td>\n <td>2</td>\n <td>ps01</td>\n <td>Intro</td>\n <td>12</td>\n <td>4</td>\n <td>2</td>\n <td>2</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2</td>\n <td>2</td>\n <td>ps01</td>\n <td>Intro</td>\n <td>12</td>\n <td>7</td>\n <td>2</td>\n <td>3</td>\n <td>23.0</td>\n </tr>\n <tr>\n <th>5</th>\n <td>3</td>\n <td>2</td>\n <td>ps02</td>\n <td>Functions</td>\n <td>8</td>\n <td>3</td>\n <td>3</td>\n <td>1</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>6</th>\n <td>3</td>\n <td>2</td>\n <td>ps02</td>\n <td>Functions</td>\n <td>8</td>\n <td>5</td>\n <td>3</td>\n <td>2</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>7</th>\n <td>4</td>\n <td>1</td>\n <td>ps01</td>\n <td>Python, part 1</td>\n <td>22</td>\n <td>6</td>\n <td>4</td>\n <td>3</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>8</th>\n <td>4</td>\n <td>1</td>\n <td>ps01</td>\n <td>Python, part 1</td>\n <td>22</td>\n <td>8</td>\n <td>4</td>\n <td>3</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>9</th>\n <td>4</td>\n <td>1</td>\n <td>ps01</td>\n <td>Python, part 1</td>\n <td>22</td>\n <td>10</td>\n <td>4</td>\n <td>4</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>10</th>\n <td>4</td>\n <td>1</td>\n <td>ps01</td>\n <td>Python, part 1</td>\n <td>22</td>\n <td>11</td>\n <td>4</td>\n <td>4</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>11</th>\n <td>4</td>\n <td>1</td>\n <td>ps01</td>\n <td>Python, part 1</td>\n <td>22</td>\n <td>22</td>\n <td>4</td>\n <td>8</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>12</th>\n <td>5</td>\n <td>2</td>\n <td>ps03</td>\n <td>Pandas</td>\n <td>10</td>\n <td>9</td>\n <td>5</td>\n <td>4</td>\n <td>4.0</td>\n </tr>\n <tr>\n <th>13</th>\n <td>5</td>\n <td>2</td>\n <td>ps03</td>\n <td>Pandas</td>\n <td>10</td>\n <td>12</td>\n <td>5</td>\n <td>4</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>14</th>\n <td>5</td>\n <td>2</td>\n <td>ps03</td>\n <td>Pandas</td>\n <td>10</td>\n <td>21</td>\n <td>5</td>\n <td>8</td>\n <td>10.0</td>\n </tr>\n <tr>\n <th>15</th>\n <td>6</td>\n <td>3</td>\n <td>ps03</td>\n <td>Classes</td>\n <td>15</td>\n <td>13</td>\n <td>6</td>\n <td>5</td>\n <td>20.0</td>\n </tr>\n <tr>\n <th>16</th>\n <td>6</td>\n <td>3</td>\n <td>ps03</td>\n <td>Classes</td>\n <td>15</td>\n <td>15</td>\n <td>6</td>\n <td>5</td>\n <td>21.0</td>\n </tr>\n <tr>\n <th>17</th>\n <td>6</td>\n <td>3</td>\n <td>ps03</td>\n <td>Classes</td>\n <td>15</td>\n <td>23</td>\n <td>6</td>\n <td>10</td>\n <td>8.0</td>\n </tr>\n <tr>\n <th>18</th>\n <td>7</td>\n <td>2</td>\n <td>ps04</td>\n <td>Advanced Python</td>\n <td>32</td>\n <td>14</td>\n <td>7</td>\n <td>5</td>\n <td>14.0</td>\n </tr>\n <tr>\n <th>19</th>\n <td>7</td>\n <td>2</td>\n <td>ps04</td>\n <td>Advanced Python</td>\n <td>32</td>\n <td>16</td>\n <td>7</td>\n <td>6</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>20</th>\n <td>8</td>\n <td>3</td>\n <td>ps01</td>\n <td>Introduction</td>\n <td>10</td>\n <td>17</td>\n <td>8</td>\n <td>6</td>\n <td>32.0</td>\n </tr>\n <tr>\n <th>21</th>\n <td>9</td>\n <td>4</td>\n <td>ps01</td>\n <td>First class</td>\n <td>7</td>\n <td>18</td>\n <td>9</td>\n <td>7</td>\n <td>15.0</td>\n </tr>\n <tr>\n <th>22</th>\n <td>10</td>\n <td>5</td>\n <td>ps01</td>\n <td>Welcome to Python</td>\n <td>8</td>\n <td>19</td>\n <td>10</td>\n <td>7</td>\n <td>15.0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id course_id code name max_grade id assignment_id \\\n0 1 1 ps01 Intro to Python 10 1 1 \n1 1 1 ps01 Intro to Python 10 20 1 \n2 2 2 ps01 Intro 12 2 2 \n3 2 2 ps01 Intro 12 4 2 \n4 2 2 ps01 Intro 12 7 2 \n5 3 2 ps02 Functions 8 3 3 \n6 3 2 ps02 Functions 8 5 3 \n7 4 1 ps01 Python, part 1 22 6 4 \n8 4 1 ps01 Python, part 1 22 8 4 \n9 4 1 ps01 Python, part 1 22 10 4 \n10 4 1 ps01 Python, part 1 22 11 4 \n11 4 1 ps01 Python, part 1 22 22 4 \n12 5 2 ps03 Pandas 10 9 5 \n13 5 2 ps03 Pandas 10 12 5 \n14 5 2 ps03 Pandas 10 21 5 \n15 6 3 ps03 Classes 15 13 6 \n16 6 3 ps03 Classes 15 15 6 \n17 6 3 ps03 Classes 15 23 6 \n18 7 2 ps04 Advanced Python 32 14 7 \n19 7 2 ps04 Advanced Python 32 16 7 \n20 8 3 ps01 Introduction 10 17 8 \n21 9 4 ps01 First class 7 18 9 \n22 10 5 ps01 Welcome to Python 8 19 10 \n\n user_id grade \n0 1 6.0 \n1 7 16.0 \n2 1 5.0 \n3 2 3.0 \n4 3 23.0 \n5 1 NaN \n6 2 2.0 \n7 3 10.0 \n8 3 15.0 \n9 4 5.0 \n10 4 1.0 \n11 8 10.0 \n12 4 4.0 \n13 4 10.0 \n14 8 10.0 \n15 5 20.0 \n16 5 21.0 \n17 10 8.0 \n18 5 14.0 \n19 6 NaN \n20 6 32.0 \n21 7 15.0 \n22 7 15.0 "
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "waiting-graphic",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT assignments.id, \n assignments.name, \n avg(min(submissions.grade, assignments.max_grade))\nFROM assignments LEFT JOIN submissions \nON assignments.id == submissions.assignment_id\nGROUP BY assignments.id\n\"\"\", conn)",
"execution_count": 37,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>name</th>\n <th>avg(min(submissions.grade, assignments.max_grade))</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>Intro to Python</td>\n <td>8.000000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>Intro</td>\n <td>6.666667</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>Functions</td>\n <td>2.000000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>Python, part 1</td>\n <td>8.200000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>Pandas</td>\n <td>8.000000</td>\n </tr>\n <tr>\n <th>5</th>\n <td>6</td>\n <td>Classes</td>\n <td>12.666667</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7</td>\n <td>Advanced Python</td>\n <td>14.000000</td>\n </tr>\n <tr>\n <th>7</th>\n <td>8</td>\n <td>Introduction</td>\n <td>10.000000</td>\n </tr>\n <tr>\n <th>8</th>\n <td>9</td>\n <td>First class</td>\n <td>7.000000</td>\n </tr>\n <tr>\n <th>9</th>\n <td>10</td>\n <td>Welcome to Python</td>\n <td>8.000000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id name avg(min(submissions.grade, assignments.max_grade))\n0 1 Intro to Python 8.000000 \n1 2 Intro 6.666667 \n2 3 Functions 2.000000 \n3 4 Python, part 1 8.200000 \n4 5 Pandas 8.000000 \n5 6 Classes 12.666667 \n6 7 Advanced Python 14.000000 \n7 8 Introduction 10.000000 \n8 9 First class 7.000000 \n9 10 Welcome to Python 8.000000 "
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "treated-affiliate",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT *\nFROM users \nJOIN course_user ON users.id == course_user.user_id\nJOIN courses ON courses.id == course_user.course_id\nWHERE courses.id == 1\n\"\"\", conn)",
"execution_count": 44,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>first_name</th>\n <th>last_name</th>\n <th>email</th>\n <th>course_id</th>\n <th>user_id</th>\n <th>id</th>\n <th>code</th>\n <th>year</th>\n <th>active</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2</td>\n <td>Jon</td>\n <td>Snow</td>\n <td>jsnow@winterfell.north.7k</td>\n <td>1</td>\n <td>2</td>\n <td>1</td>\n <td>nes-ds</td>\n <td>2017</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>3</td>\n <td>Arya</td>\n <td>Stark</td>\n <td>arya@winterfell.north.7k</td>\n <td>1</td>\n <td>3</td>\n <td>1</td>\n <td>nes-ds</td>\n <td>2017</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>4</td>\n <td>Tyrion</td>\n <td>Lannister</td>\n <td>hand-of-a-queen@dragons.rules</td>\n <td>1</td>\n <td>4</td>\n <td>1</td>\n <td>nes-ds</td>\n <td>2017</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id first_name last_name email course_id \\\n0 2 Jon Snow jsnow@winterfell.north.7k 1 \n1 3 Arya Stark arya@winterfell.north.7k 1 \n2 4 Tyrion Lannister hand-of-a-queen@dragons.rules 1 \n\n user_id id code year active \n0 2 1 nes-ds 2017 0 \n1 3 1 nes-ds 2017 0 \n2 4 1 nes-ds 2017 0 "
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "eight-recorder",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT *\nFROM users \nJOIN course_user ON users.id == course_user.user_id\nJOIN courses ON courses.id == course_user.course_id\nWHERE users.id == 2\n\"\"\", conn)",
"execution_count": 50,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>first_name</th>\n <th>last_name</th>\n <th>email</th>\n <th>course_id</th>\n <th>user_id</th>\n <th>id</th>\n <th>code</th>\n <th>year</th>\n <th>active</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2</td>\n <td>Jon</td>\n <td>Snow</td>\n <td>jsnow@winterfell.north.7k</td>\n <td>1</td>\n <td>2</td>\n <td>1</td>\n <td>nes-ds</td>\n <td>2017</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>Jon</td>\n <td>Snow</td>\n <td>jsnow@winterfell.north.7k</td>\n <td>3</td>\n <td>2</td>\n <td>3</td>\n <td>dj-prog</td>\n <td>2018</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>Jon</td>\n <td>Snow</td>\n <td>jsnow@winterfell.north.7k</td>\n <td>4</td>\n <td>2</td>\n <td>4</td>\n <td>python-all</td>\n <td>2015</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2</td>\n <td>Jon</td>\n <td>Snow</td>\n <td>jsnow@winterfell.north.7k</td>\n <td>4</td>\n <td>2</td>\n <td>4</td>\n <td>python-all</td>\n <td>2015</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " id first_name last_name email course_id user_id id \\\n0 2 Jon Snow jsnow@winterfell.north.7k 1 2 1 \n1 2 Jon Snow jsnow@winterfell.north.7k 3 2 3 \n2 2 Jon Snow jsnow@winterfell.north.7k 4 2 4 \n3 2 Jon Snow jsnow@winterfell.north.7k 4 2 4 \n\n code year active \n0 nes-ds 2017 0 \n1 dj-prog 2018 1 \n2 python-all 2015 0 \n3 python-all 2015 0 "
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "weird-sunrise",
"cell_type": "code",
"source": "pd.read_sql(\"\"\"\nSELECT users.first_name, users.last_name, assignments.name, avg(grade)\nFROM users \nJOIN course_user ON users.id == course_user.user_id\nJOIN courses ON courses.id == course_user.course_id\nJOIN assignments ON assignments.course_id == courses.id\nJOIN submissions ON submissions.user_id == users.id and \n submissions.assignment_id == assignments.id\nGROUP BY users.id, assignments.id\n\"\"\", conn)",
"execution_count": 54,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>first_name</th>\n <th>last_name</th>\n <th>name</th>\n <th>avg(grade)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Eddard</td>\n <td>Stark</td>\n <td>Intro</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Eddard</td>\n <td>Stark</td>\n <td>Functions</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Arya</td>\n <td>Stark</td>\n <td>Python, part 1</td>\n <td>12.5</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Tyrion</td>\n <td>Lannister</td>\n <td>Python, part 1</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Cersei</td>\n <td>Lannister</td>\n <td>Classes</td>\n <td>20.5</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " first_name last_name name avg(grade)\n0 Eddard Stark Intro 5.0\n1 Eddard Stark Functions NaN\n2 Arya Stark Python, part 1 12.5\n3 Tyrion Lannister Python, part 1 3.0\n4 Cersei Lannister Classes 20.5"
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
]
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.10",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "ebe633748900375348b301eb976d08b8",
"data": {
"description": "lesson23.ipynb",
"public": false
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/ebe633748900375348b301eb976d08b8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment