Skip to content

Instantly share code, notes, and snippets.

@tinproject
Created December 15, 2015 16:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tinproject/bb1538a61083d8e4cb21 to your computer and use it in GitHub Desktop.
Save tinproject/bb1538a61083d8e4cb21 to your computer and use it in GitHub Desktop.
Caliair, process air quality data from Ayto. Madrid.
from typing import Any, Callable, Iterable, Generator, List, Optional, Tuple, Dict
import datetime
from itertools import count
def identity(value: Any) -> Any:
"""
Identity function, takes something, returns something.
"""
return value
def bool_test(something: Any) -> bool:
"""
Test something with bool.
"""
return bool(something)
def remove_commas(line_gen: Iterable[str]) -> Generator[str, Any, Any]:
"""
Remove commas from a string returned by a generator
:param line_gen: iterator that yields a string
:return: a generator from a generator
"""
for l in line_gen:
line = l.replace(',', '')
yield line
def slice_str(start: int, stop: Optional[int]) -> Callable[[str], str]:
def f(s):
return s[start: stop:]
def g(s):
return s[start:]
return f if stop is not None else g
def strip(string: str, size: int) -> Tuple[int, str]:
for index in count():
start = index * size
stop = start + size
if stop > len(string):
raise StopIteration
yield index, string[start: stop]
# los datos horarios en tiempo real tienen el año en cuatro cifras, los históricos en dos
def str_yymmdd_to_date(s):
result = datetime.datetime.strptime(s, "%y%m%d").date()
return result
def str_yyyymmdd_to_date(s):
result = datetime.datetime.strptime(s, "%Y%m%d").date()
return result
################################################################################
# ---------- Classes for fields and registers
class Field:
def __init__(self, name: str,
extract: Callable[[str], str],
validate: Callable[[str], bool]=bool_test,
transform: Callable[[str], Any]=identity) -> 'Field':
"""
Field object
:param name: name of the field
:param extract: extract the field from the original string
:param validate: validates the value of the field, can be used for logging
:param transform: transforms the string extracted to the correct type
:return:
"""
self.name = name
self.extract = extract
self.validate = validate
self.transform = transform
def to_key_value(self, record: str) -> [str, Any]:
field = self.extract(record)
if self.validate(field):
return self.name, self.transform(field)
class RepeatableField:
def __init__(self,
extract: Callable[[str], str],
size: int,
fields: List[Field],
index_label: str,
index_transform: Callable[[int], str]=lambda x: str(x)) -> 'RepeatableField':
"""
Repeatable field oject, have some Fields repeated withing a record
:param extract: funtion to extract the RepeatableField from a record (string)
:param size: the size of the repeated part
:param fields: list of field that forms the repeated part
:param index_label: label to the index of the repeated portion
:param index_transform: function to adapt the index of the repeated part
:return: RepeatabeField object
"""
self.extract = extract
self.size = size
self.fields = fields
self.index_label = index_label
self.index_transform = index_transform
def to_key_value(self, record: str) -> Generator[Dict[str, Any], Any, Any]:
repeatable_field = self.extract(record)
for index, rep_record in strip(repeatable_field, self.size):
result = dict((field.to_key_value(rep_record) for field in self.fields))
if self.index_label:
result[self.index_label] = self.index_transform(index)
yield result
class RepeatableRegister:
def __init__(self, fixed_fields: List[Field], repeatable_field: RepeatableField) -> 'RepeatableRegister':
self.fixed_fields = fixed_fields
self.repeatable_field = repeatable_field
def str_to_dict_gen(self, gen: Iterable) -> Generator[Dict[str, Any], Any, Any]:
for record in gen:
fixed = dict(field.to_key_value(record) for field in self.fixed_fields)
for repeated in self.repeatable_field.to_key_value(record):
repeated.update(fixed)
yield repeated
def filter_by(field_gen: Iterable[Dict[str, Any]], **kwargs) -> Generator[Dict[str, Any], Any, Any]:
"""
Filter some Iterable of dicts, comparing function keywords and values
:param field_gen: Iterable of dicts
:param kwargs: key=value to filter
:return: a generator to the filtered values
"""
for record in field_gen:
if all(arg in record and (record[arg] == value) for arg, value in kwargs.items()):
yield record
################################################################################
# ---------- Definición de los campos de contaminación del aire.
fixed_fields = [Field('codigo_estacion', slice_str(0, 8)),
Field('magnitud_medida', slice_str(8, 10)),
Field('tecnica_analitica', slice_str(10, 12)),
Field('periodo', slice_str(12, 14)),
Field('fecha', slice_str(14, 22), transform=str_yyyymmdd_to_date),
]
repeated_fields = [Field('valor', slice_str(0, 5)),
Field('validez', slice_str(5, 6))]
repeatable_field = RepeatableField(extract=slice_str(22, None),
size=6,
fields=repeated_fields,
index_label='intervalo',
index_transform=lambda x: str(x+1))
tiempo_real = RepeatableRegister(fixed_fields, repeatable_field)
magnitud_medida = {
"01": "Dióxido de Azufre",
"06": "Monóxido de Carbono",
"07": "Monóxido de Nitrógeno",
"08": "Dióxido de Nitrógeno",
"09": "Partículas < 2.5 μm",
"10": "Partículas < 10 μm",
"12": "Óxidos de Nitrógeno",
"14": "Ozono",
"20": "Tolueno",
"30": "Benceno",
"35": "Etilbenceno",
"37": "Metaxileno",
"38": "Paraxileno",
"39": "Ortoxileno",
"42": "Hidrocarburos totales (hexano)",
"43": "Hidrocarburos (metano)",
"44": "Hidrocarburos no metánicos (hexano)",
"80": "Radiación ultravioleta",
"81": "Velocidad del viento",
"82": "Dirección del viento",
"83": "Temperatura",
"86": "Humedad relativa",
"87": "Presión",
"88": "Radiación solar",
"89": "Precipitación",
"92": "Lluvia ácida",
}
tecnica_analitica = {
"38": "Fluorescencia ultravioleta",
"48": "Absorción infrarroja",
"08": "Quimioluminiscencia",
"47": "Microbalanza",
"06": "Absorción ultravioleta",
"59": "Cromatografía de gases",
"02": "Ionización de llama",
"98": "Sensores meteorológicos",
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment