americofreitasjr/CargaBaseISP.ipynb

## CargaBaseISP.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Step 1 - Carregando a base do ISP.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "include_colab_link": true
    },
    "language_info": {
      "codemirror_mode": "r",
      "file_extension": ".r",
      "mimetype": "text/x-r-source",
      "name": "R",
      "pygments_lexer": "r",
      "version": "3.5.2"
    },
    "kernelspec": {
      "display_name": "R",
      "language": "R",
      "name": "ir"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/americofreitasjr/AnaliseSegurancaPublicaRJ/blob/master/Step_1_Carregando_a_base_do_ISP.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zrIEG3K-p-Sk",
        "colab_type": "text"
      },
      "source": [
        "#Carregando a base do ISP"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fn8qIDWGqoQL",
        "colab_type": "text"
      },
      "source": [
        "\n",
        "Importando e instalando os pacotes"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FnZchyNoql8x",
        "colab_type": "code",
        "outputId": "1f90e3ad-1f65-4fe3-c55c-ead84f0ec017",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "options(warn=-1)\n",
        "\n",
        "requiredPackages = c('tidyverse','gdata','caret','plyr','rlang','digest','DataExplorer')\n",
        "for(p in requiredPackages){\n",
        "  if(!require(p,character.only = TRUE)) install.packages(p)\n",
        "  suppressMessages(suppressWarnings(library(p,character.only = TRUE)))\n",
        "}"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Loading required package: tidyverse\n",
            "── \u001b[1mAttaching packages\u001b[22m ─────────────────────────────────────── tidyverse 1.2.1 ──\n",
            "\u001b[32m✔\u001b[39m \u001b[34mggplot2\u001b[39m 3.2.0     \u001b[32m✔\u001b[39m \u001b[34mpurrr  \u001b[39m 0.3.2\n",
            "\u001b[32m✔\u001b[39m \u001b[34mtibble \u001b[39m 2.1.3     \u001b[32m✔\u001b[39m \u001b[34mdplyr  \u001b[39m 0.8.3\n",
            "\u001b[32m✔\u001b[39m \u001b[34mtidyr  \u001b[39m 0.8.3     \u001b[32m✔\u001b[39m \u001b[34mstringr\u001b[39m 1.4.0\n",
            "\u001b[32m✔\u001b[39m \u001b[34mreadr  \u001b[39m 1.3.1     \u001b[32m✔\u001b[39m \u001b[34mforcats\u001b[39m 0.4.0\n",
            "── \u001b[1mConflicts\u001b[22m ────────────────────────────────────────── tidyverse_conflicts() ──\n",
            "\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mfilter()\u001b[39m masks \u001b[34mstats\u001b[39m::filter()\n",
            "\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mlag()\u001b[39m    masks \u001b[34mstats\u001b[39m::lag()\n",
            "Loading required package: gdata\n",
            "gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.\n",
            "\n",
            "gdata: Unable to load perl libaries needed by read.xls()\n",
            "gdata: to support 'XLSX' (Excel 2007+) files.\n",
            "\n",
            "gdata: Run the function 'installXLSXsupport()'\n",
            "gdata: to automatically download and install the perl\n",
            "gdata: libaries needed to support Excel XLS and XLSX formats.\n",
            "\n",
            "Attaching package: ‘gdata’\n",
            "\n",
            "The following objects are masked from ‘package:dplyr’:\n",
            "\n",
            "    combine, first, last\n",
            "\n",
            "The following object is masked from ‘package:purrr’:\n",
            "\n",
            "    keep\n",
            "\n",
            "The following object is masked from ‘package:stats’:\n",
            "\n",
            "    nobs\n",
            "\n",
            "The following object is masked from ‘package:utils’:\n",
            "\n",
            "    object.size\n",
            "\n",
            "The following object is masked from ‘package:base’:\n",
            "\n",
            "    startsWith\n",
            "\n",
            "Loading required package: caret\n",
            "Loading required package: lattice\n",
            "\n",
            "Attaching package: ‘caret’\n",
            "\n",
            "The following object is masked from ‘package:purrr’:\n",
            "\n",
            "    lift\n",
            "\n",
            "Loading required package: plyr\n",
            "------------------------------------------------------------------------------\n",
            "You have loaded plyr after dplyr - this is likely to cause problems.\n",
            "If you need functions from both plyr and dplyr, please load plyr first, then dplyr:\n",
            "library(plyr); library(dplyr)\n",
            "------------------------------------------------------------------------------\n",
            "\n",
            "Attaching package: ‘plyr’\n",
            "\n",
            "The following objects are masked from ‘package:dplyr’:\n",
            "\n",
            "    arrange, count, desc, failwith, id, mutate, rename, summarise,\n",
            "    summarize\n",
            "\n",
            "The following object is masked from ‘package:purrr’:\n",
            "\n",
            "    compact\n",
            "\n",
            "Loading required package: rlang\n",
            "\n",
            "Attaching package: ‘rlang’\n",
            "\n",
            "The following objects are masked from ‘package:gdata’:\n",
            "\n",
            "    env, ll\n",
            "\n",
            "The following objects are masked from ‘package:purrr’:\n",
            "\n",
            "    %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,\n",
            "    flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,\n",
            "    splice\n",
            "\n",
            "Loading required package: digest\n",
            "Loading required package: DataExplorer\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Szh_1xe2qKgc",
        "colab_type": "text"
      },
      "source": [
        "Definindo o caminho dos arquivos para download e as credenciais de acesso"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9_LKETLKp0y6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "user=\"mba\"\n",
        "pass=\"g3DxHEdWz7cLWyV7\"\n",
        "url=\"data.cienciacomdados.com.br\"\n",
        "fullUrl=paste0(\"https://\",user,\":\",pass,\"@\",url,\"/\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "oLGIqWIkqfLn",
        "colab_type": "text"
      },
      "source": [
        "Carregando os arquivos para os data frames\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EG5TriGyp0zC",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "parte1 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte1.csv\"), header = TRUE, sep = \";\")\n",
        "parte2 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte2.csv\"), header = TRUE, sep = \";\")\n",
        "parte3 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte3.csv\"), header = TRUE, sep = \";\")\n",
        "parte4 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte4.csv\"), header = TRUE, sep = \";\")\n",
        "parte5 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte5.csv\"), header = TRUE, sep = \";\")\n",
        "parte6 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte6.csv\"), header = TRUE, sep = \";\")\n",
        "parte7 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte7.csv\"), header = TRUE, sep = \";\")\n",
        "parte8 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte8.csv\"), header = TRUE, sep = \";\")\n",
        "parte9 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte9.csv\"), header = TRUE, sep = \";\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wastyNkEqwe5",
        "colab_type": "text"
      },
      "source": [
        "Juntando todos os data frames em apenas um"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zqa9d5Xep0zK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df<-rbind.fill(parte1,parte2,parte3,parte4,parte5,parte6,parte7,parte8,parte9)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zYWYwSKhky-c",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "save(df,file=\"df.RData\")"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "Step 1 - Carregando a base do ISP.ipynb",
	"version": "0.3.2",
	"provenance": [],
	"collapsed_sections": [],
	"toc_visible": true,
	"include_colab_link": true
	},
	"language_info": {
	"codemirror_mode": "r",
	"file_extension": ".r",
	"mimetype": "text/x-r-source",
	"name": "R",
	"pygments_lexer": "r",
	"version": "3.5.2"
	},
	"kernelspec": {
	"display_name": "R",
	"language": "R",
	"name": "ir"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/github/americofreitasjr/AnaliseSegurancaPublicaRJ/blob/master/Step_1_Carregando_a_base_do_ISP.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "zrIEG3K-p-Sk",
	"colab_type": "text"
	},
	"source": [
	"#Carregando a base do ISP"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "fn8qIDWGqoQL",
	"colab_type": "text"
	},
	"source": [
	"\n",
	"Importando e instalando os pacotes"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "FnZchyNoql8x",
	"colab_type": "code",
	"outputId": "1f90e3ad-1f65-4fe3-c55c-ead84f0ec017",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 1000
	}
	},
	"source": [
	"options(warn=-1)\n",
	"\n",
	"requiredPackages = c('tidyverse','gdata','caret','plyr','rlang','digest','DataExplorer')\n",
	"for(p in requiredPackages){\n",
	" if(!require(p,character.only = TRUE)) install.packages(p)\n",
	" suppressMessages(suppressWarnings(library(p,character.only = TRUE)))\n",
	"}"
	],
	"execution_count": 1,
	"outputs": [
	{
	"output_type": "stream",
	"text": [
	"Loading required package: tidyverse\n",
	"── \u001b[1mAttaching packages\u001b[22m ─────────────────────────────────────── tidyverse 1.2.1 ──\n",
	"\u001b[32m✔\u001b[39m \u001b[34mggplot2\u001b[39m 3.2.0 \u001b[32m✔\u001b[39m \u001b[34mpurrr \u001b[39m 0.3.2\n",
	"\u001b[32m✔\u001b[39m \u001b[34mtibble \u001b[39m 2.1.3 \u001b[32m✔\u001b[39m \u001b[34mdplyr \u001b[39m 0.8.3\n",
	"\u001b[32m✔\u001b[39m \u001b[34mtidyr \u001b[39m 0.8.3 \u001b[32m✔\u001b[39m \u001b[34mstringr\u001b[39m 1.4.0\n",
	"\u001b[32m✔\u001b[39m \u001b[34mreadr \u001b[39m 1.3.1 \u001b[32m✔\u001b[39m \u001b[34mforcats\u001b[39m 0.4.0\n",
	"── \u001b[1mConflicts\u001b[22m ────────────────────────────────────────── tidyverse_conflicts() ──\n",
	"\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mfilter()\u001b[39m masks \u001b[34mstats\u001b[39m::filter()\n",
	"\u001b[31m✖\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mlag()\u001b[39m masks \u001b[34mstats\u001b[39m::lag()\n",
	"Loading required package: gdata\n",
	"gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.\n",
	"\n",
	"gdata: Unable to load perl libaries needed by read.xls()\n",
	"gdata: to support 'XLSX' (Excel 2007+) files.\n",
	"\n",
	"gdata: Run the function 'installXLSXsupport()'\n",
	"gdata: to automatically download and install the perl\n",
	"gdata: libaries needed to support Excel XLS and XLSX formats.\n",
	"\n",
	"Attaching package: ‘gdata’\n",
	"\n",
	"The following objects are masked from ‘package:dplyr’:\n",
	"\n",
	" combine, first, last\n",
	"\n",
	"The following object is masked from ‘package:purrr’:\n",
	"\n",
	" keep\n",
	"\n",
	"The following object is masked from ‘package:stats’:\n",
	"\n",
	" nobs\n",
	"\n",
	"The following object is masked from ‘package:utils’:\n",
	"\n",
	" object.size\n",
	"\n",
	"The following object is masked from ‘package:base’:\n",
	"\n",
	" startsWith\n",
	"\n",
	"Loading required package: caret\n",
	"Loading required package: lattice\n",
	"\n",
	"Attaching package: ‘caret’\n",
	"\n",
	"The following object is masked from ‘package:purrr’:\n",
	"\n",
	" lift\n",
	"\n",
	"Loading required package: plyr\n",
	"------------------------------------------------------------------------------\n",
	"You have loaded plyr after dplyr - this is likely to cause problems.\n",
	"If you need functions from both plyr and dplyr, please load plyr first, then dplyr:\n",
	"library(plyr); library(dplyr)\n",
	"------------------------------------------------------------------------------\n",
	"\n",
	"Attaching package: ‘plyr’\n",
	"\n",
	"The following objects are masked from ‘package:dplyr’:\n",
	"\n",
	" arrange, count, desc, failwith, id, mutate, rename, summarise,\n",
	" summarize\n",
	"\n",
	"The following object is masked from ‘package:purrr’:\n",
	"\n",
	" compact\n",
	"\n",
	"Loading required package: rlang\n",
	"\n",
	"Attaching package: ‘rlang’\n",
	"\n",
	"The following objects are masked from ‘package:gdata’:\n",
	"\n",
	" env, ll\n",
	"\n",
	"The following objects are masked from ‘package:purrr’:\n",
	"\n",
	" %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,\n",
	" flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,\n",
	" splice\n",
	"\n",
	"Loading required package: digest\n",
	"Loading required package: DataExplorer\n"
	],
	"name": "stderr"
	}
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "Szh_1xe2qKgc",
	"colab_type": "text"
	},
	"source": [
	"Definindo o caminho dos arquivos para download e as credenciais de acesso"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "9_LKETLKp0y6",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"user=\"mba\"\n",
	"pass=\"g3DxHEdWz7cLWyV7\"\n",
	"url=\"data.cienciacomdados.com.br\"\n",
	"fullUrl=paste0(\"https://\",user,\":\",pass,\"@\",url,\"/\")"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "oLGIqWIkqfLn",
	"colab_type": "text"
	},
	"source": [
	"Carregando os arquivos para os data frames\n"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "EG5TriGyp0zC",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"parte1 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte1.csv\"), header = TRUE, sep = \";\")\n",
	"parte2 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte2.csv\"), header = TRUE, sep = \";\")\n",
	"parte3 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte3.csv\"), header = TRUE, sep = \";\")\n",
	"parte4 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte4.csv\"), header = TRUE, sep = \";\")\n",
	"parte5 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte5.csv\"), header = TRUE, sep = \";\")\n",
	"parte6 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte6.csv\"), header = TRUE, sep = \";\")\n",
	"parte7 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte7.csv\"), header = TRUE, sep = \";\")\n",
	"parte8 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte8.csv\"), header = TRUE, sep = \";\")\n",
	"parte9 = read.csv2(file = paste0(fullUrl,\"pedido_022_2019_parte9.csv\"), header = TRUE, sep = \";\")"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "wastyNkEqwe5",
	"colab_type": "text"
	},
	"source": [
	"Juntando todos os data frames em apenas um"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "zqa9d5Xep0zK",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"df<-rbind.fill(parte1,parte2,parte3,parte4,parte5,parte6,parte7,parte8,parte9)\n"
	],
	"execution_count": 0,
	"outputs": []
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "zYWYwSKhky-c",
	"colab_type": "code",
	"colab": {}
	},
	"source": [
	"save(df,file=\"df.RData\")"
	],
	"execution_count": 0,
	"outputs": []
	}
	]
	}