Created
March 8, 2021 21:36
-
-
Save anna-anisienia/71d2c4970800a0a05689d89fb36b9fcd to your computer and use it in GitHub Desktop.
DynamoDB example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Dataset used: https://datasets.wri.org/dataset/globalpowerplantdatabase | |
""" | |
import os | |
import time | |
import pandas as pd | |
from datetime import datetime, timedelta | |
import awswrangler as wr | |
from boto3.dynamodb.conditions import Key | |
def extract_power_plant_data(csv_file: str = 'global_power_plant_database.csv') -> pd.DataFrame: | |
project_dir = os.getcwd() | |
return pd.read_csv(os.path.join(project_dir, csv_file), nrows=200) | |
def transform_power_plant_data(df) -> pd.DataFrame: | |
df = df[['country', 'country_long', 'name', 'gppd_idnr', 'capacity_mw', 'owner']] | |
df.loc[:, 'capacity_mw'] = [format(i, '.3f') for i in df['capacity_mw']] | |
df = df.drop_duplicates() | |
df.loc[:, 'insert_timestamp'] = datetime.utcnow().isoformat() | |
df = df.reset_index(drop=False) | |
print(df.shape) | |
return df | |
def write_to_dynamodb(data: pd.DataFrame, tbl_name: str = 'demo') -> None: | |
start = time.time() | |
wr.dynamodb.put_df(df=data, table_name=tbl_name) | |
end = time.time() | |
print("Write took %d", round(end - start, 2)) | |
def read_data_back_from_dynamodb(tbl_name: str = 'demo') -> pd.DataFrame: | |
start = time.time() | |
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") | |
tbl = wr.dynamodb.get_table(table_name=tbl_name) | |
api_response = tbl.scan(FilterExpression=Key('insert_timestamp').gte(yesterday)) | |
df_from_nosql_tbl = pd.DataFrame(api_response['Items']) | |
end = time.time() | |
print("Read took %d", round(end - start, 2)) | |
return df_from_nosql_tbl | |
if __name__ == '__main__': | |
energy_data = extract_power_plant_data() | |
energy_data = transform_power_plant_data(energy_data) | |
write_to_dynamodb(data=energy_data) | |
data_from_nosql = read_data_back_from_dynamodb() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment