Skip to content

Instantly share code, notes, and snippets.

@tinproject
Created December 15, 2015 16:14
Show Gist options
  • Save tinproject/bb1538a61083d8e4cb21 to your computer and use it in GitHub Desktop.
Save tinproject/bb1538a61083d8e4cb21 to your computer and use it in GitHub Desktop.
Caliair, process air quality data from Ayto. Madrid.
from typing import Any, Callable, Iterable, Generator, List, Optional, Tuple, Dict
import datetime
from itertools import count
def identity(value: Any) -> Any:
"""
Identity function, takes something, returns something.
"""
return value
def bool_test(something: Any) -> bool:
"""
Test something with bool.
"""
return bool(something)
def remove_commas(line_gen: Iterable[str]) -> Generator[str, Any, Any]:
"""
Remove commas from a string returned by a generator
:param line_gen: iterator that yields a string
:return: a generator from a generator
"""
for l in line_gen:
line = l.replace(',', '')
yield line
def slice_str(start: int, stop: Optional[int]) -> Callable[[str], str]:
def f(s):
return s[start: stop:]
def g(s):
return s[start:]
return f if stop is not None else g
def strip(string: str, size: int) -> Tuple[int, str]:
for index in count():
start = index * size
stop = start + size
if stop > len(string):
raise StopIteration
yield index, string[start: stop]
# los datos horarios en tiempo real tienen el año en cuatro cifras, los históricos en dos
def str_yymmdd_to_date(s):
result = datetime.datetime.strptime(s, "%y%m%d").date()
return result
def str_yyyymmdd_to_date(s):
result = datetime.datetime.strptime(s, "%Y%m%d").date()
return result
################################################################################
# ---------- Classes for fields and registers
class Field:
def __init__(self, name: str,
extract: Callable[[str], str],
validate: Callable[[str], bool]=bool_test,
transform: Callable[[str], Any]=identity) -> 'Field':
"""
Field object
:param name: name of the field
:param extract: extract the field from the original string
:param validate: validates the value of the field, can be used for logging
:param transform: transforms the string extracted to the correct type
:return:
"""
self.name = name
self.extract = extract
self.validate = validate
self.transform = transform
def to_key_value(self, record: str) -> [str, Any]:
field = self.extract(record)
if self.validate(field):
return self.name, self.transform(field)
class RepeatableField:
def __init__(self,
extract: Callable[[str], str],
size: int,
fields: List[Field],
index_label: str,
index_transform: Callable[[int], str]=lambda x: str(x)) -> 'RepeatableField':
"""
Repeatable field oject, have some Fields repeated withing a record
:param extract: funtion to extract the RepeatableField from a record (string)
:param size: the size of the repeated part
:param fields: list of field that forms the repeated part
:param index_label: label to the index of the repeated portion
:param index_transform: function to adapt the index of the repeated part
:return: RepeatabeField object
"""
self.extract = extract
self.size = size
self.fields = fields
self.index_label = index_label
self.index_transform = index_transform
def to_key_value(self, record: str) -> Generator[Dict[str, Any], Any, Any]:
repeatable_field = self.extract(record)
for index, rep_record in strip(repeatable_field, self.size):
result = dict((field.to_key_value(rep_record) for field in self.fields))
if self.index_label:
result[self.index_label] = self.index_transform(index)
yield result
class RepeatableRegister:
def __init__(self, fixed_fields: List[Field], repeatable_field: RepeatableField) -> 'RepeatableRegister':
self.fixed_fields = fixed_fields
self.repeatable_field = repeatable_field
def str_to_dict_gen(self, gen: Iterable) -> Generator[Dict[str, Any], Any, Any]:
for record in gen:
fixed = dict(field.to_key_value(record) for field in self.fixed_fields)
for repeated in self.repeatable_field.to_key_value(record):
repeated.update(fixed)
yield repeated
def filter_by(field_gen: Iterable[Dict[str, Any]], **kwargs) -> Generator[Dict[str, Any], Any, Any]:
"""
Filter some Iterable of dicts, comparing function keywords and values
:param field_gen: Iterable of dicts
:param kwargs: key=value to filter
:return: a generator to the filtered values
"""
for record in field_gen:
if all(arg in record and (record[arg] == value) for arg, value in kwargs.items()):
yield record
################################################################################
# ---------- Definición de los campos de contaminación del aire.
fixed_fields = [Field('codigo_estacion', slice_str(0, 8)),
Field('magnitud_medida', slice_str(8, 10)),
Field('tecnica_analitica', slice_str(10, 12)),
Field('periodo', slice_str(12, 14)),
Field('fecha', slice_str(14, 22), transform=str_yyyymmdd_to_date),
]
repeated_fields = [Field('valor', slice_str(0, 5)),
Field('validez', slice_str(5, 6))]
repeatable_field = RepeatableField(extract=slice_str(22, None),
size=6,
fields=repeated_fields,
index_label='intervalo',
index_transform=lambda x: str(x+1))
tiempo_real = RepeatableRegister(fixed_fields, repeatable_field)
magnitud_medida = {
"01": "Dióxido de Azufre",
"06": "Monóxido de Carbono",
"07": "Monóxido de Nitrógeno",
"08": "Dióxido de Nitrógeno",
"09": "Partículas < 2.5 μm",
"10": "Partículas < 10 μm",
"12": "Óxidos de Nitrógeno",
"14": "Ozono",
"20": "Tolueno",
"30": "Benceno",
"35": "Etilbenceno",
"37": "Metaxileno",
"38": "Paraxileno",
"39": "Ortoxileno",
"42": "Hidrocarburos totales (hexano)",
"43": "Hidrocarburos (metano)",
"44": "Hidrocarburos no metánicos (hexano)",
"80": "Radiación ultravioleta",
"81": "Velocidad del viento",
"82": "Dirección del viento",
"83": "Temperatura",
"86": "Humedad relativa",
"87": "Presión",
"88": "Radiación solar",
"89": "Precipitación",
"92": "Lluvia ácida",
}
tecnica_analitica = {
"38": "Fluorescencia ultravioleta",
"48": "Absorción infrarroja",
"08": "Quimioluminiscencia",
"47": "Microbalanza",
"06": "Absorción ultravioleta",
"59": "Cromatografía de gases",
"02": "Ionización de llama",
"98": "Sensores meteorológicos",
}
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"## Calidad del aire: Datos en tiempo real\n",
"\n",
"Página correspondiente al portal de datos abiertos del Ayuntamietno de Madrid:\n",
"\n",
"http://datos.madrid.es/portal/site/egob/menuitem.c05c1f754a33a9fbe4b2e4b284f1a5a0/?vgnextoid=41e01e007c9db410VgnVCM2000000c205a0aRCRD&vgnextchannel=374512b9ace9f310VgnVCM100000171f5a0aRCRD\n",
"\n",
"Fichero que proporcionado mostrando la estructura de los datos:\n",
"\n",
"http://datos.madrid.es/FWProjects/egob/contenidos/datasets/ficheros/MedioAmbiente_CalidadAire/INTPHORA-DIA_V2.2.pdf\n",
"\n",
"url del fichero con los datos horarios:\n",
"\n",
"http://www.mambiente.munimadrid.es/opendata/horario.txt\n",
"\n",
"\n",
"## Calidad del aire: Estaciones de control\n",
"\n",
"http://datos.madrid.es/portal/site/egob/menuitem.c05c1f754a33a9fbe4b2e4b284f1a5a0/?vgnextoid=9e42c176313eb410VgnVCM1000000b205a0aRCRD&vgnextchannel=374512b9ace9f310VgnVCM100000171f5a0aRCRD"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"from caliair import *"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import requests\n",
"url = \"http://www.mambiente.munimadrid.es/opendata/horario.txt\"\n",
"\n",
"response = requests.get(url)\n",
"raw_data = response.text"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"record_gen = remove_commas(line for line in raw_data.splitlines())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'280790990138022015121400010V00009V00008V00007V00007V00007V00007V00007V00009V00009V00010V00009V00009V00008V00008V00008V00008V00008V00008V00009V00008V00008V00007V00000N'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(record_gen)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data_gen = tiempo_real.str_to_dict_gen(record_gen)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'codigo_estacion': '28079099',\n",
" 'fecha': datetime.date(2015, 12, 14),\n",
" 'intervalo': '1',\n",
" 'magnitud_medida': '06',\n",
" 'periodo': '02',\n",
" 'tecnica_analitica': '48',\n",
" 'validez': 'V',\n",
" 'valor': '000.7'}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"next(data_gen)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>codigo_estacion</th>\n",
" <th>fecha</th>\n",
" <th>intervalo</th>\n",
" <th>magnitud_medida</th>\n",
" <th>periodo</th>\n",
" <th>tecnica_analitica</th>\n",
" <th>validez</th>\n",
" <th>valor</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>2</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>3</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>4</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>5</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>6</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>7</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>8</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>9</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>10</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>11</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>12</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>13</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>14</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>15</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>16</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>17</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>18</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>19</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>20</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>21</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>22</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>23</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>V</td>\n",
" <td>000.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>24</td>\n",
" <td>06</td>\n",
" <td>02</td>\n",
" <td>48</td>\n",
" <td>N</td>\n",
" <td>00000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>1</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00074</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>2</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00053</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>3</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>4</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>5</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>6</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>28079099</td>\n",
" <td>2015-12-14</td>\n",
" <td>7</td>\n",
" <td>07</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5129</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>19</td>\n",
" <td>87</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00927</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5130</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>20</td>\n",
" <td>87</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5131</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>21</td>\n",
" <td>87</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5132</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>22</td>\n",
" <td>87</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5133</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>23</td>\n",
" <td>87</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5134</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>24</td>\n",
" <td>87</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>N</td>\n",
" <td>00000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5135</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>1</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5136</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>2</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5137</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>3</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5138</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>4</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5139</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>5</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5140</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>6</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5141</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>7</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5142</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>8</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5143</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>9</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5144</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>10</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5145</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>11</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5146</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>12</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5147</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>13</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5148</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>14</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5149</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>15</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5150</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>16</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5151</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>17</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5152</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>18</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5153</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>19</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5154</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>20</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5155</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>21</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5156</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>22</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5157</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>23</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5158</th>\n",
" <td>28079060</td>\n",
" <td>2015-12-14</td>\n",
" <td>24</td>\n",
" <td>89</td>\n",
" <td>02</td>\n",
" <td>98</td>\n",
" <td>N</td>\n",
" <td>00000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5159 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" codigo_estacion fecha intervalo magnitud_medida periodo \\\n",
"0 28079099 2015-12-14 2 06 02 \n",
"1 28079099 2015-12-14 3 06 02 \n",
"2 28079099 2015-12-14 4 06 02 \n",
"3 28079099 2015-12-14 5 06 02 \n",
"4 28079099 2015-12-14 6 06 02 \n",
"5 28079099 2015-12-14 7 06 02 \n",
"6 28079099 2015-12-14 8 06 02 \n",
"7 28079099 2015-12-14 9 06 02 \n",
"8 28079099 2015-12-14 10 06 02 \n",
"9 28079099 2015-12-14 11 06 02 \n",
"10 28079099 2015-12-14 12 06 02 \n",
"11 28079099 2015-12-14 13 06 02 \n",
"12 28079099 2015-12-14 14 06 02 \n",
"13 28079099 2015-12-14 15 06 02 \n",
"14 28079099 2015-12-14 16 06 02 \n",
"15 28079099 2015-12-14 17 06 02 \n",
"16 28079099 2015-12-14 18 06 02 \n",
"17 28079099 2015-12-14 19 06 02 \n",
"18 28079099 2015-12-14 20 06 02 \n",
"19 28079099 2015-12-14 21 06 02 \n",
"20 28079099 2015-12-14 22 06 02 \n",
"21 28079099 2015-12-14 23 06 02 \n",
"22 28079099 2015-12-14 24 06 02 \n",
"23 28079099 2015-12-14 1 07 02 \n",
"24 28079099 2015-12-14 2 07 02 \n",
"25 28079099 2015-12-14 3 07 02 \n",
"26 28079099 2015-12-14 4 07 02 \n",
"27 28079099 2015-12-14 5 07 02 \n",
"28 28079099 2015-12-14 6 07 02 \n",
"29 28079099 2015-12-14 7 07 02 \n",
"... ... ... ... ... ... \n",
"5129 28079060 2015-12-14 19 87 02 \n",
"5130 28079060 2015-12-14 20 87 02 \n",
"5131 28079060 2015-12-14 21 87 02 \n",
"5132 28079060 2015-12-14 22 87 02 \n",
"5133 28079060 2015-12-14 23 87 02 \n",
"5134 28079060 2015-12-14 24 87 02 \n",
"5135 28079060 2015-12-14 1 89 02 \n",
"5136 28079060 2015-12-14 2 89 02 \n",
"5137 28079060 2015-12-14 3 89 02 \n",
"5138 28079060 2015-12-14 4 89 02 \n",
"5139 28079060 2015-12-14 5 89 02 \n",
"5140 28079060 2015-12-14 6 89 02 \n",
"5141 28079060 2015-12-14 7 89 02 \n",
"5142 28079060 2015-12-14 8 89 02 \n",
"5143 28079060 2015-12-14 9 89 02 \n",
"5144 28079060 2015-12-14 10 89 02 \n",
"5145 28079060 2015-12-14 11 89 02 \n",
"5146 28079060 2015-12-14 12 89 02 \n",
"5147 28079060 2015-12-14 13 89 02 \n",
"5148 28079060 2015-12-14 14 89 02 \n",
"5149 28079060 2015-12-14 15 89 02 \n",
"5150 28079060 2015-12-14 16 89 02 \n",
"5151 28079060 2015-12-14 17 89 02 \n",
"5152 28079060 2015-12-14 18 89 02 \n",
"5153 28079060 2015-12-14 19 89 02 \n",
"5154 28079060 2015-12-14 20 89 02 \n",
"5155 28079060 2015-12-14 21 89 02 \n",
"5156 28079060 2015-12-14 22 89 02 \n",
"5157 28079060 2015-12-14 23 89 02 \n",
"5158 28079060 2015-12-14 24 89 02 \n",
"\n",
" tecnica_analitica validez valor \n",
"0 48 V 000.6 \n",
"1 48 V 000.4 \n",
"2 48 V 000.4 \n",
"3 48 V 000.3 \n",
"4 48 V 000.3 \n",
"5 48 V 000.3 \n",
"6 48 V 000.4 \n",
"7 48 V 000.6 \n",
"8 48 V 000.6 \n",
"9 48 V 000.5 \n",
"10 48 V 000.5 \n",
"11 48 V 000.4 \n",
"12 48 V 000.3 \n",
"13 48 V 000.3 \n",
"14 48 V 000.3 \n",
"15 48 V 000.3 \n",
"16 48 V 000.5 \n",
"17 48 V 000.5 \n",
"18 48 V 000.5 \n",
"19 48 V 000.5 \n",
"20 48 V 000.4 \n",
"21 48 V 000.4 \n",
"22 48 N 00000 \n",
"23 08 V 00074 \n",
"24 08 V 00053 \n",
"25 08 V 00030 \n",
"26 08 V 00022 \n",
"27 08 V 00016 \n",
"28 08 V 00017 \n",
"29 08 V 00024 \n",
"... ... ... ... \n",
"5129 98 V 00927 \n",
"5130 98 V 00928 \n",
"5131 98 V 00928 \n",
"5132 98 V 00929 \n",
"5133 98 V 00929 \n",
"5134 98 N 00000 \n",
"5135 98 V 00.00 \n",
"5136 98 V 00.00 \n",
"5137 98 V 00.00 \n",
"5138 98 V 00.00 \n",
"5139 98 V 00.00 \n",
"5140 98 V 00.00 \n",
"5141 98 V 00.00 \n",
"5142 98 V 00.00 \n",
"5143 98 V 00.00 \n",
"5144 98 V 00.00 \n",
"5145 98 V 00.00 \n",
"5146 98 V 00.00 \n",
"5147 98 V 00.00 \n",
"5148 98 V 00.00 \n",
"5149 98 V 00.00 \n",
"5150 98 V 00.00 \n",
"5151 98 V 00.00 \n",
"5152 98 V 00.00 \n",
"5153 98 V 00.00 \n",
"5154 98 V 00.00 \n",
"5155 98 V 00.00 \n",
"5156 98 V 00.50 \n",
"5157 98 V 00.00 \n",
"5158 98 N 00000 \n",
"\n",
"[5159 rows x 8 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame.from_records(data_gen)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>codigo_estacion</th>\n",
" <th>fecha</th>\n",
" <th>intervalo</th>\n",
" <th>magnitud_medida</th>\n",
" <th>periodo</th>\n",
" <th>tecnica_analitica</th>\n",
" <th>validez</th>\n",
" <th>valor</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" <td>5159</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>25</td>\n",
" <td>1</td>\n",
" <td>24</td>\n",
" <td>22</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>623</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>28079024</td>\n",
" <td>2015-12-14</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>02</td>\n",
" <td>08</td>\n",
" <td>V</td>\n",
" <td>00.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>528</td>\n",
" <td>5159</td>\n",
" <td>215</td>\n",
" <td>600</td>\n",
" <td>5159</td>\n",
" <td>1800</td>\n",
" <td>4934</td>\n",
" <td>320</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" codigo_estacion fecha intervalo magnitud_medida periodo \\\n",
"count 5159 5159 5159 5159 5159 \n",
"unique 25 1 24 22 1 \n",
"top 28079024 2015-12-14 13 12 02 \n",
"freq 528 5159 215 600 5159 \n",
"\n",
" tecnica_analitica validez valor \n",
"count 5159 5159 5159 \n",
"unique 8 2 623 \n",
"top 08 V 00.00 \n",
"freq 1800 4934 320 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment