Created
December 1, 2018 19:52
-
-
Save tresende/28ed94c1457db3f9409ba4a46b359fd1 to your computer and use it in GitHub Desktop.
Read ibm cloud s3 files and use on jupyter notebook pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat_minor": 1, | |
"cells": [ | |
{ | |
"execution_count": 21, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "import pandas as pd" | |
}, | |
{ | |
"source": "## Connect s3 file storage", | |
"cell_type": "markdown", | |
"metadata": {} | |
}, | |
{ | |
"execution_count": 22, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "from botocore.client import Config\nimport ibm_boto3\nimport types\n\ndef __iter__(self): return 0\n\napi_key='xxxxxx',\nbucket='yyyyyy'\n\n\nclient = ibm_boto3.client(service_name='s3',\n ibm_api_key_id=api_key,\n ibm_auth_endpoint=\"https://iam.bluemix.net/oidc/token\",\n config=Config(signature_version='oauth'),\n endpoint_url='https://s3.eu-geo.objectstorage.service.networklayer.com')" | |
}, | |
{ | |
"source": "## Read file", | |
"cell_type": "markdown", | |
"metadata": {} | |
}, | |
{ | |
"execution_count": 23, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "filename = 'aluguel.csv'\nbody = client.get_object(Bucket=bucket,Key=filename)['Body']\nif not hasattr(body, \"__iter__\"): body.__iter__ = types.MethodType( __iter__, body )\n\ndados = pd.read_csv(body, sep=\";\")" | |
}, | |
{ | |
"execution_count": 24, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"execution_count": 24, | |
"metadata": {}, | |
"data": { | |
"text/plain": "pandas.core.frame.DataFrame" | |
}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": "type(dados)" | |
}, | |
{ | |
"execution_count": 25, | |
"cell_type": "code", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"execution_count": 25, | |
"metadata": {}, | |
"data": { | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Tipo</th>\n <th>Bairro</th>\n <th>Quartos</th>\n <th>Vagas</th>\n <th>Suites</th>\n <th>Area</th>\n <th>Valor</th>\n <th>Condominio</th>\n <th>IPTU</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Quitinete</td>\n <td>Copacabana</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>40</td>\n <td>1700.0</td>\n <td>500.0</td>\n <td>60.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Casa</td>\n <td>Jardim Bot\u00e2nico</td>\n <td>2</td>\n <td>0</td>\n <td>1</td>\n <td>100</td>\n <td>7000.0</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Conjunto Comercial/Sala</td>\n <td>Barra da Tijuca</td>\n <td>0</td>\n <td>4</td>\n <td>0</td>\n <td>150</td>\n <td>5200.0</td>\n <td>4020.0</td>\n <td>1111.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Apartamento</td>\n <td>Centro</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>15</td>\n <td>800.0</td>\n <td>390.0</td>\n <td>20.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Apartamento</td>\n <td>Higien\u00f3polis</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>48</td>\n <td>800.0</td>\n <td>230.0</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"text/plain": " Tipo Bairro Quartos Vagas Suites Area \\\n0 Quitinete Copacabana 1 0 0 40 \n1 Casa Jardim Bot\u00e2nico 2 0 1 100 \n2 Conjunto Comercial/Sala Barra da Tijuca 0 4 0 150 \n3 Apartamento Centro 1 0 0 15 \n4 Apartamento Higien\u00f3polis 1 0 0 48 \n\n Valor Condominio IPTU \n0 1700.0 500.0 60.0 \n1 7000.0 NaN NaN \n2 5200.0 4020.0 1111.0 \n3 800.0 390.0 20.0 \n4 800.0 230.0 NaN " | |
}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": "dados.head()" | |
}, | |
{ | |
"source": "## Informa\u00e7\u00f5es Gerais Sobre a base dados", | |
"cell_type": "markdown", | |
"metadata": {} | |
}, | |
{ | |
"execution_count": 26, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "tipos_de_dados = pd.DataFrame(dados.dtypes, columns=['Tipos de Dados'])" | |
}, | |
{ | |
"execution_count": 27, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "tipos_de_dados.columns.name ='Vari\u00e1veis'" | |
}, | |
{ | |
"execution_count": 28, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "A base dados apresenta 32960 registros(im\u00f3veis) e 9 variavies\n" | |
} | |
], | |
"source": "print('A base dados apresenta {} registros(im\u00f3veis) e {} variavies'.format(dados.shape[0], dados.shape[1]))" | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.5", | |
"name": "python3", | |
"language": "python" | |
}, | |
"language_info": { | |
"mimetype": "text/x-python", | |
"nbconvert_exporter": "python", | |
"version": "3.5.5", | |
"name": "python", | |
"file_extension": ".py", | |
"pygments_lexer": "ipython3", | |
"codemirror_mode": { | |
"version": 3, | |
"name": "ipython" | |
} | |
} | |
}, | |
"nbformat": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment