Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@anna-anisienia
Created March 8, 2021 21:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anna-anisienia/71d2c4970800a0a05689d89fb36b9fcd to your computer and use it in GitHub Desktop.
Save anna-anisienia/71d2c4970800a0a05689d89fb36b9fcd to your computer and use it in GitHub Desktop.
DynamoDB example
"""
Dataset used: https://datasets.wri.org/dataset/globalpowerplantdatabase
"""
import os
import time
import pandas as pd
from datetime import datetime, timedelta
import awswrangler as wr
from boto3.dynamodb.conditions import Key
def extract_power_plant_data(csv_file: str = 'global_power_plant_database.csv') -> pd.DataFrame:
project_dir = os.getcwd()
return pd.read_csv(os.path.join(project_dir, csv_file), nrows=200)
def transform_power_plant_data(df) -> pd.DataFrame:
df = df[['country', 'country_long', 'name', 'gppd_idnr', 'capacity_mw', 'owner']]
df.loc[:, 'capacity_mw'] = [format(i, '.3f') for i in df['capacity_mw']]
df = df.drop_duplicates()
df.loc[:, 'insert_timestamp'] = datetime.utcnow().isoformat()
df = df.reset_index(drop=False)
print(df.shape)
return df
def write_to_dynamodb(data: pd.DataFrame, tbl_name: str = 'demo') -> None:
start = time.time()
wr.dynamodb.put_df(df=data, table_name=tbl_name)
end = time.time()
print("Write took %d", round(end - start, 2))
def read_data_back_from_dynamodb(tbl_name: str = 'demo') -> pd.DataFrame:
start = time.time()
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
tbl = wr.dynamodb.get_table(table_name=tbl_name)
api_response = tbl.scan(FilterExpression=Key('insert_timestamp').gte(yesterday))
df_from_nosql_tbl = pd.DataFrame(api_response['Items'])
end = time.time()
print("Read took %d", round(end - start, 2))
return df_from_nosql_tbl
if __name__ == '__main__':
energy_data = extract_power_plant_data()
energy_data = transform_power_plant_data(energy_data)
write_to_dynamodb(data=energy_data)
data_from_nosql = read_data_back_from_dynamodb()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment