Creates time based Glue partitions given time range.
Keep in mind that you don't need data to add partitions. So, you can create partitions for a whole year and add the data to S3 later.
def open_yaml(path): | |
""" | |
Load yaml file. | |
Parameters | |
---------- | |
path: pathlib.PosixPath | |
Path to yaml file | |
Return | |
------ | |
Dictionary |
Creates time based Glue partitions given time range.
Keep in mind that you don't need data to add partitions. So, you can create partitions for a whole year and add the data to S3 later.
select id_grid_h3, hora, ST_ASTEXT(ANY_VALUE(geometria)) wkt, count(*) n_registros, ANY_VALUE(quantidade_pessoas) populacao | |
from `rj-smtr.br_rj_riodejaneiro_onibus_gps.registros_tratada` t1 | |
join `basedosdados.br_ipea_acesso_oportunidades.estatisticas_2019` t2 | |
on st_intersects(geometria, st_geogpoint(longitude, latitude)) | |
where id_municipio in ( | |
select id_municipio | |
from `basedosdados.br_bd_diretorios_brasil.municipio` | |
where municipio = 'Rio de Janeiro') | |
group by id_grid_h3, hora |
def line_polygon_intersection(line_df, poly_df): | |
""" | |
It cuts the line if it sits between polygons. | |
""" | |
column_geom_poly = poly_df._geometry_column_name | |
column_geom_line = line_df._geometry_column_name | |
spatial_index = line_df.sindex | |
bbox = poly_df.geometry.apply(lambda x: x.bounds) |
from github import Github | |
from time import sleep | |
g = Github(token) | |
search_str = 'basedosdados' | |
repo = [] | |
for i in g.search_code(search_str): | |
sleep(0.2) |
from shapely.geometry import box, Polygon, MultiPolygon, GeometryCollection | |
from shapely.wkt import loads | |
def threshold_func(geometry, threshold_value): | |
"""Compares the threshold values with the polygon area""" | |
return geometry.area < threshold_value | |
def katana(geometry, threshold_func, threshold_value, number_tiles=0, max_number_tiles=250): | |
"""Splits a geometry in tiles forming a grid given a threshold function and | |
a maximum number of tiles. |
import xmltodict | |
import pandas as pd | |
import requests | |
xml = request.get('url').text | |
df = pd.DataFrame(xmltodict.parse(xml)) | |
df.rename(columns=lambda x: x.replace('@', ''), inplace=True) | |
df.to_csv('data.csv') |
# Parallelly download all aws-lambda functions | |
# Assumes you have ran `aws configure` and have output-mode as "text" | |
# Works with "aws-cli/1.16.72 Python/3.6.7 Linux/4.15.0-42-generic botocore/1.12.62" | |
download_code () { | |
local OUTPUT=$1 | |
aws lambda get-function --function-name $OUTPUT | head -n 1 | cut -f 2 | xargs wget -O ./lambda_functions/$OUTPUT.zip | |
} | |
mkdir lambda_functions | |
for run in $(aws lambda list-functions | cut -f 6 | xargs); |
function coord_boundaries(coord) { | |
coord = coord.toString() | |
return coord[0] >= 1 && coord[0] <= 8 && coord[1] >= 1 && coord[1] <= 8 | |
} | |
function highlight_square(coord, color) { | |
board = document.getElementsByClassName('layout-board')[0] |
.PHONY: create-env update-env | |
# It creates an env. with the directory name | |
REPO=$(shell basename $(CURDIR)) | |
create-env: | |
python3 -m venv .$(REPO); | |
source .$(REPO)/bin/activate; \ | |
pip3 install --upgrade -r requirements.txt; \ |