/numpy + pandas.ipynb

## numpy + pandas.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd       # Import, manipulate, export data (DataFrames)\nimport numpy as np        # Mathematical and matrix operations\nimport os                 # Set working directory",
      "execution_count": 27,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Working directory\nos.chdir('/Users/bernardo/Dropbox (Personal)/Documentos/Python/Data Mining/')",
      "execution_count": 28,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Import CSV or Excel file"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df = pd.read_csv(\"05. Classification Concepts/5052_05_Code/anes_dataset.csv\")\n#df = pd.read_excel(path)\ndf.head(3)",
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 29,
          "data": {
            "text/plain": "   popul  TVnews  selfLR  ClinLR  DoleLR  PID  age  educ  income  vote\n0      0       7       7       1       6    6   36     3       1     1\n1    190       1       3       3       5    1   20     4       1     0\n2     31       7       2       2       6    1   24     6       1     0",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>popul</th>\n      <th>TVnews</th>\n      <th>selfLR</th>\n      <th>ClinLR</th>\n      <th>DoleLR</th>\n      <th>PID</th>\n      <th>age</th>\n      <th>educ</th>\n      <th>income</th>\n      <th>vote</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>7</td>\n      <td>7</td>\n      <td>1</td>\n      <td>6</td>\n      <td>6</td>\n      <td>36</td>\n      <td>3</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>190</td>\n      <td>1</td>\n      <td>3</td>\n      <td>3</td>\n      <td>5</td>\n      <td>1</td>\n      <td>20</td>\n      <td>4</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>31</td>\n      <td>7</td>\n      <td>2</td>\n      <td>2</td>\n      <td>6</td>\n      <td>1</td>\n      <td>24</td>\n      <td>6</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Matrix and Dictionary manual creation"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Create a manual dictionary\nd1 = {'Nombe': pd.Series(['Tomas','Jaime','Ricardo','Victor','Esteban','Susana','Jorge','Lili','David','Liliana','Beto','JJ']),\n     'Edad': pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),\n     'Rating': pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}",
      "execution_count": 30,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Create a manual matrix with lists\nnames = ['Tomas','Jaime','Ricardo','Victor','Esteban','Susana','Jorge','Lili','David','Liliana','Beto','JJ']\nage = [25,26,25,23,30,29,23,34,40,30,51,46]\nrating = [4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65]\nd2 = list(zip(names, age, rating))\nprint(d2)",
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "stream",
          "text": "[('Tomas', 25, 4.23), ('Jaime', 26, 3.24), ('Ricardo', 25, 3.98), ('Victor', 23, 2.56), ('Esteban', 30, 3.2), ('Susana', 29, 4.6), ('Jorge', 23, 3.8), ('Lili', 34, 3.78), ('David', 40, 2.98), ('Liliana', 30, 4.8), ('Beto', 51, 4.1), ('JJ', 46, 3.65)]\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Create a sequence with an array\nx = np.arange(1,15,1)\na = np.array([[1, 1, 2], [3, 5, 8], [13, 21, 34]])\nprint(\"Arange:\",x)\nprint(\"Array\",a)",
      "execution_count": 46,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Arange: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14]\nArray [[ 1  1  2]\n [ 3  5  8]\n [13 21 34]]\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# DataFrames for data manipulation"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Create a panda Data frame with Dictionary or Matrix (they are the same)\ndf = pd.DataFrame(data = d1)\ndf = pd.DataFrame(data = d2, columns = ['Nombre','Edad','Rating'])\ndf",
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 33,
          "data": {
            "text/plain": "    Edad    Nombe  Rating\n0     25    Tomas    4.23\n1     26    Jaime    3.24\n2     25  Ricardo    3.98\n3     23   Victor    2.56\n4     30  Esteban    3.20\n5     29   Susana    4.60\n6     23    Jorge    3.80\n7     34     Lili    3.78\n8     40    David    2.98\n9     30  Liliana    4.80\n10    51     Beto    4.10\n11    46       JJ    3.65",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Edad</th>\n      <th>Nombe</th>\n      <th>Rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>25</td>\n      <td>Tomas</td>\n      <td>4.23</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>26</td>\n      <td>Jaime</td>\n      <td>3.24</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>25</td>\n      <td>Ricardo</td>\n      <td>3.98</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>23</td>\n      <td>Victor</td>\n      <td>2.56</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>30</td>\n      <td>Esteban</td>\n      <td>3.20</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>29</td>\n      <td>Susana</td>\n      <td>4.60</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>23</td>\n      <td>Jorge</td>\n      <td>3.80</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>34</td>\n      <td>Lili</td>\n      <td>3.78</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>40</td>\n      <td>David</td>\n      <td>2.98</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>30</td>\n      <td>Liliana</td>\n      <td>4.80</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>51</td>\n      <td>Beto</td>\n      <td>4.10</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>46</td>\n      <td>JJ</td>\n      <td>3.65</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Data analysis and manipulation"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.info()",
      "execution_count": 35,
      "outputs": [
        {
          "output_type": "stream",
          "text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 12 entries, 0 to 11\nData columns (total 3 columns):\nNombre    12 non-null object\nEdad      12 non-null int64\nRating    12 non-null float64\ndtypes: float64(1), int64(1), object(1)\nmemory usage: 368.0+ bytes\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# First rows\ndf.head(5)",
      "execution_count": 36,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 36,
          "data": {
            "text/plain": "    Nombre  Edad  Rating\n0    Tomas    25    4.23\n1    Jaime    26    3.24\n2  Ricardo    25    3.98\n3   Victor    23    2.56\n4  Esteban    30    3.20",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Nombre</th>\n      <th>Edad</th>\n      <th>Rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Tomas</td>\n      <td>25</td>\n      <td>4.23</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Jaime</td>\n      <td>26</td>\n      <td>3.24</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Ricardo</td>\n      <td>25</td>\n      <td>3.98</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Victor</td>\n      <td>23</td>\n      <td>2.56</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Esteban</td>\n      <td>30</td>\n      <td>3.20</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# DataFrame dimensions\nprint(\"Dim:\",df.shape)\nprint(\"Rows:\",len(df.index))\nprint(\"Columns:\",len(df.columns))",
      "execution_count": 37,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Dim: (12, 3)\nRows: 12\nColumns: 3\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Statistical description on the data frame with 2 decimals\nround(df.describe(),2)",
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 38,
          "data": {
            "text/plain": "        Edad  Rating\ncount  12.00   12.00\nmean   31.83    3.74\nstd     9.23    0.66\nmin    23.00    2.56\n25%    25.00    3.23\n50%    29.50    3.79\n75%    35.50    4.13\nmax    51.00    4.80",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Edad</th>\n      <th>Rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>12.00</td>\n      <td>12.00</td>\n    </tr>\n    <tr>\n      <th>mean</th>\n      <td>31.83</td>\n      <td>3.74</td>\n    </tr>\n    <tr>\n      <th>std</th>\n      <td>9.23</td>\n      <td>0.66</td>\n    </tr>\n    <tr>\n      <th>min</th>\n      <td>23.00</td>\n      <td>2.56</td>\n    </tr>\n    <tr>\n      <th>25%</th>\n      <td>25.00</td>\n      <td>3.23</td>\n    </tr>\n    <tr>\n      <th>50%</th>\n      <td>29.50</td>\n      <td>3.79</td>\n    </tr>\n    <tr>\n      <th>75%</th>\n      <td>35.50</td>\n      <td>4.13</td>\n    </tr>\n    <tr>\n      <th>max</th>\n      <td>51.00</td>\n      <td>4.80</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Mean on all numerical features\ndf.mean()",
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 39,
          "data": {
            "text/plain": "Edad      31.833333\nRating     3.743333\ndtype: float64"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Max value on a specific column\ndf[\"Edad\"].max()",
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 40,
          "data": {
            "text/plain": "51"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Arrange columns, ascending, by a specific column and only show top 5\ndf.sort_values(by=\"Edad\", ascending = False).head(5)",
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 41,
          "data": {
            "text/plain": "     Nombre  Edad  Rating\n10     Beto    51    4.10\n11       JJ    46    3.65\n8     David    40    2.98\n7      Lili    34    3.78\n4   Esteban    30    3.20",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Nombre</th>\n      <th>Edad</th>\n      <th>Rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>10</th>\n      <td>Beto</td>\n      <td>51</td>\n      <td>4.10</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>JJ</td>\n      <td>46</td>\n      <td>3.65</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>David</td>\n      <td>40</td>\n      <td>2.98</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>Lili</td>\n      <td>34</td>\n      <td>3.78</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Esteban</td>\n      <td>30</td>\n      <td>3.20</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Filter rows given a condition\ndf[df.Rating >= 4]",
      "execution_count": 42,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 42,
          "data": {
            "text/plain": "     Nombre  Edad  Rating\n0     Tomas    25    4.23\n5    Susana    29    4.60\n9   Liliana    30    4.80\n10     Beto    51    4.10",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Nombre</th>\n      <th>Edad</th>\n      <th>Rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Tomas</td>\n      <td>25</td>\n      <td>4.23</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Susana</td>\n      <td>29</td>\n      <td>4.60</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>Liliana</td>\n      <td>30</td>\n      <td>4.80</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>Beto</td>\n      <td>51</td>\n      <td>4.10</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Group, count, and arrange given a column's name\ndf[['Edad']].groupby('Edad')['Edad'].count().sort_values(ascending=False)",
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 43,
          "data": {
            "text/plain": "Edad\n30    2\n25    2\n23    2\n51    1\n46    1\n40    1\n34    1\n29    1\n26    1\nName: Edad, dtype: int64"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Show all column's names\nlist(df.columns.values)",
      "execution_count": 44,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 44,
          "data": {
            "text/plain": "['Nombre', 'Edad', 'Rating']"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Export to CSV files"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Export to a CSV file on a specific path\ndf.to_csv('02. Python and Packages/example.csv')",
      "execution_count": 45,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.3",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "b737bf065d2267636c25e888f8beca4b",
      "data": {
        "description": "numpy and pandas example",
        "public": true
      }
    },
    "_draft": {
      "nbviewer_url": "https://gist.github.com/b737bf065d2267636c25e888f8beca4b"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import pandas as pd # Import, manipulate, export data (DataFrames)\nimport numpy as np # Mathematical and matrix operations\nimport os # Set working directory",
	"execution_count": 27,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Working directory\nos.chdir('/Users/bernardo/Dropbox (Personal)/Documentos/Python/Data Mining/')",
	"execution_count": 28,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Import CSV or Excel file"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df = pd.read_csv(\"05. Classification Concepts/5052_05_Code/anes_dataset.csv\")\n#df = pd.read_excel(path)\ndf.head(3)",
	"execution_count": 29,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 29,
	"data": {
	"text/plain": " popul TVnews selfLR ClinLR DoleLR PID age educ income vote\n0 0 7 7 1 6 6 36 3 1 1\n1 190 1 3 3 5 1 20 4 1 0\n2 31 7 2 2 6 1 24 6 1 0",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>popul</th>\n <th>TVnews</th>\n <th>selfLR</th>\n <th>ClinLR</th>\n <th>DoleLR</th>\n <th>PID</th>\n <th>age</th>\n <th>educ</th>\n <th>income</th>\n <th>vote</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>7</td>\n <td>7</td>\n <td>1</td>\n <td>6</td>\n <td>6</td>\n <td>36</td>\n <td>3</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>190</td>\n <td>1</td>\n <td>3</td>\n <td>3</td>\n <td>5</td>\n <td>1</td>\n <td>20</td>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>31</td>\n <td>7</td>\n <td>2</td>\n <td>2</td>\n <td>6</td>\n <td>1</td>\n <td>24</td>\n <td>6</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Matrix and Dictionary manual creation"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Create a manual dictionary\nd1 = {'Nombe': pd.Series(['Tomas','Jaime','Ricardo','Victor','Esteban','Susana','Jorge','Lili','David','Liliana','Beto','JJ']),\n 'Edad': pd.Series([25,26,25,23,30,29,23,34,40,30,51,46]),\n 'Rating': pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65])}",
	"execution_count": 30,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Create a manual matrix with lists\nnames = ['Tomas','Jaime','Ricardo','Victor','Esteban','Susana','Jorge','Lili','David','Liliana','Beto','JJ']\nage = [25,26,25,23,30,29,23,34,40,30,51,46]\nrating = [4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10,3.65]\nd2 = list(zip(names, age, rating))\nprint(d2)",
	"execution_count": 31,
	"outputs": [
	{
	"output_type": "stream",
	"text": "[('Tomas', 25, 4.23), ('Jaime', 26, 3.24), ('Ricardo', 25, 3.98), ('Victor', 23, 2.56), ('Esteban', 30, 3.2), ('Susana', 29, 4.6), ('Jorge', 23, 3.8), ('Lili', 34, 3.78), ('David', 40, 2.98), ('Liliana', 30, 4.8), ('Beto', 51, 4.1), ('JJ', 46, 3.65)]\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Create a sequence with an array\nx = np.arange(1,15,1)\na = np.array([[1, 1, 2], [3, 5, 8], [13, 21, 34]])\nprint(\"Arange:\",x)\nprint(\"Array\",a)",
	"execution_count": 46,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Arange: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14]\nArray [[ 1 1 2]\n [ 3 5 8]\n [13 21 34]]\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# DataFrames for data manipulation"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Create a panda Data frame with Dictionary or Matrix (they are the same)\ndf = pd.DataFrame(data = d1)\ndf = pd.DataFrame(data = d2, columns = ['Nombre','Edad','Rating'])\ndf",
	"execution_count": 33,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 33,
	"data": {
	"text/plain": " Edad Nombe Rating\n0 25 Tomas 4.23\n1 26 Jaime 3.24\n2 25 Ricardo 3.98\n3 23 Victor 2.56\n4 30 Esteban 3.20\n5 29 Susana 4.60\n6 23 Jorge 3.80\n7 34 Lili 3.78\n8 40 David 2.98\n9 30 Liliana 4.80\n10 51 Beto 4.10\n11 46 JJ 3.65",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Edad</th>\n <th>Nombe</th>\n <th>Rating</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>25</td>\n <td>Tomas</td>\n <td>4.23</td>\n </tr>\n <tr>\n <th>1</th>\n <td>26</td>\n <td>Jaime</td>\n <td>3.24</td>\n </tr>\n <tr>\n <th>2</th>\n <td>25</td>\n <td>Ricardo</td>\n <td>3.98</td>\n </tr>\n <tr>\n <th>3</th>\n <td>23</td>\n <td>Victor</td>\n <td>2.56</td>\n </tr>\n <tr>\n <th>4</th>\n <td>30</td>\n <td>Esteban</td>\n <td>3.20</td>\n </tr>\n <tr>\n <th>5</th>\n <td>29</td>\n <td>Susana</td>\n <td>4.60</td>\n </tr>\n <tr>\n <th>6</th>\n <td>23</td>\n <td>Jorge</td>\n <td>3.80</td>\n </tr>\n <tr>\n <th>7</th>\n <td>34</td>\n <td>Lili</td>\n <td>3.78</td>\n </tr>\n <tr>\n <th>8</th>\n <td>40</td>\n <td>David</td>\n <td>2.98</td>\n </tr>\n <tr>\n <th>9</th>\n <td>30</td>\n <td>Liliana</td>\n <td>4.80</td>\n </tr>\n <tr>\n <th>10</th>\n <td>51</td>\n <td>Beto</td>\n <td>4.10</td>\n </tr>\n <tr>\n <th>11</th>\n <td>46</td>\n <td>JJ</td>\n <td>3.65</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Data analysis and manipulation"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df.info()",
	"execution_count": 35,
	"outputs": [
	{
	"output_type": "stream",
	"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 12 entries, 0 to 11\nData columns (total 3 columns):\nNombre 12 non-null object\nEdad 12 non-null int64\nRating 12 non-null float64\ndtypes: float64(1), int64(1), object(1)\nmemory usage: 368.0+ bytes\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# First rows\ndf.head(5)",
	"execution_count": 36,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 36,
	"data": {
	"text/plain": " Nombre Edad Rating\n0 Tomas 25 4.23\n1 Jaime 26 3.24\n2 Ricardo 25 3.98\n3 Victor 23 2.56\n4 Esteban 30 3.20",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Nombre</th>\n <th>Edad</th>\n <th>Rating</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Tomas</td>\n <td>25</td>\n <td>4.23</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Jaime</td>\n <td>26</td>\n <td>3.24</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Ricardo</td>\n <td>25</td>\n <td>3.98</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Victor</td>\n <td>23</td>\n <td>2.56</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Esteban</td>\n <td>30</td>\n <td>3.20</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# DataFrame dimensions\nprint(\"Dim:\",df.shape)\nprint(\"Rows:\",len(df.index))\nprint(\"Columns:\",len(df.columns))",
	"execution_count": 37,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Dim: (12, 3)\nRows: 12\nColumns: 3\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Statistical description on the data frame with 2 decimals\nround(df.describe(),2)",
	"execution_count": 38,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 38,
	"data": {
	"text/plain": " Edad Rating\ncount 12.00 12.00\nmean 31.83 3.74\nstd 9.23 0.66\nmin 23.00 2.56\n25% 25.00 3.23\n50% 29.50 3.79\n75% 35.50 4.13\nmax 51.00 4.80",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Edad</th>\n <th>Rating</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>12.00</td>\n <td>12.00</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>31.83</td>\n <td>3.74</td>\n </tr>\n <tr>\n <th>std</th>\n <td>9.23</td>\n <td>0.66</td>\n </tr>\n <tr>\n <th>min</th>\n <td>23.00</td>\n <td>2.56</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>25.00</td>\n <td>3.23</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>29.50</td>\n <td>3.79</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>35.50</td>\n <td>4.13</td>\n </tr>\n <tr>\n <th>max</th>\n <td>51.00</td>\n <td>4.80</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Mean on all numerical features\ndf.mean()",
	"execution_count": 39,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 39,
	"data": {
	"text/plain": "Edad 31.833333\nRating 3.743333\ndtype: float64"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Max value on a specific column\ndf[\"Edad\"].max()",
	"execution_count": 40,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 40,
	"data": {
	"text/plain": "51"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Arrange columns, ascending, by a specific column and only show top 5\ndf.sort_values(by=\"Edad\", ascending = False).head(5)",
	"execution_count": 41,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 41,
	"data": {
	"text/plain": " Nombre Edad Rating\n10 Beto 51 4.10\n11 JJ 46 3.65\n8 David 40 2.98\n7 Lili 34 3.78\n4 Esteban 30 3.20",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Nombre</th>\n <th>Edad</th>\n <th>Rating</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>10</th>\n <td>Beto</td>\n <td>51</td>\n <td>4.10</td>\n </tr>\n <tr>\n <th>11</th>\n <td>JJ</td>\n <td>46</td>\n <td>3.65</td>\n </tr>\n <tr>\n <th>8</th>\n <td>David</td>\n <td>40</td>\n <td>2.98</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Lili</td>\n <td>34</td>\n <td>3.78</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Esteban</td>\n <td>30</td>\n <td>3.20</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Filter rows given a condition\ndf[df.Rating >= 4]",
	"execution_count": 42,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 42,
	"data": {
	"text/plain": " Nombre Edad Rating\n0 Tomas 25 4.23\n5 Susana 29 4.60\n9 Liliana 30 4.80\n10 Beto 51 4.10",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Nombre</th>\n <th>Edad</th>\n <th>Rating</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Tomas</td>\n <td>25</td>\n <td>4.23</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Susana</td>\n <td>29</td>\n <td>4.60</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Liliana</td>\n <td>30</td>\n <td>4.80</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Beto</td>\n <td>51</td>\n <td>4.10</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Group, count, and arrange given a column's name\ndf[['Edad']].groupby('Edad')['Edad'].count().sort_values(ascending=False)",
	"execution_count": 43,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 43,
	"data": {
	"text/plain": "Edad\n30 2\n25 2\n23 2\n51 1\n46 1\n40 1\n34 1\n29 1\n26 1\nName: Edad, dtype: int64"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Show all column's names\nlist(df.columns.values)",
	"execution_count": 44,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 44,
	"data": {
	"text/plain": "['Nombre', 'Edad', 'Rating']"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Export to CSV files"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Export to a CSV file on a specific path\ndf.to_csv('02. Python and Packages/example.csv')",
	"execution_count": 45,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": false
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.6.3",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"gist": {
	"id": "b737bf065d2267636c25e888f8beca4b",
	"data": {
	"description": "numpy and pandas example",
	"public": true
	}
	},
	"_draft": {
	"nbviewer_url": "https://gist.github.com/b737bf065d2267636c25e888f8beca4b"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}