Skip to content

Instantly share code, notes, and snippets.

View garystafford's full-sized avatar
💭
Happily Coding!

Gary A. Stafford garystafford

💭
Happily Coding!
View GitHub Profile
id address city state zip country property_type assessed_value
1 1008 Walk Burg Houston TX 77002 United States Multi-family 1122321
2 7088 Second Square Oklahoma City OK 73102 United States Single-family 261940
3 1425 Ridge Terrace Indianapolis IN 46204 United States Single-family 1030391
4 982 Way Lane New York NY 10007 United States Multi-family 95499
5 9404 Port Court Columbus OH 43215 United States Single-family 922404
6 7135 Crossing Trail Virginia Beach VA 23451 United States Single-family 272910
7 9481 Harbor Brook New York NY 10007 United States Multi-family 232795
8 8585 Manor Branch Raleigh NC 27601 United States Single-family 701217
9 7703 Bluff Boulevard Las Vegas NV 89101 United States Single-family 530581
# Purpose: Test coffee shop sales data generator
# Author: Gary A. Stafford and GitHub Copilot
# Date: 2023-04-13
# Usage: pytest coffee_shop_data_gen_tests.py -v
# write a python class that inherits from unittest.TestCase
# write a unit test for the get_product function
# write a unit test for the get_sales_record function
# write a unit test for the write_sales_records function
We can make this file beautiful and searchable if this error is corrected: It looks like row 10 should actually have 11 columns, instead of 7. in line 9.
"transaction_id","date","time","product_id","product","calories","price","type","quantity","amount","payment_type"
"47a157f84e727fe3335db1519ee736a6","06/27/2022","19:59:42",22,"Quiche",456,4.99,"Food",2,9.98,"Debit"
"1bf01013e699ca0f804650ea50826c82","11/20/2022","06:21:14",22,"Quiche",456,4.99,"Food",3,14.97,"Cash"
"84f41c15749090d1e79bf9a48a58d6c3","08/18/2022","11:50:22",14,"Chai Tea",200,3.5,"Drink",2,7.0,"Apple Pay"
"ef1845b8438bf3b5b99d2f4891a48f03","11/13/2022","17:20:51",12,"Lemonade",120,3.0,"Drink",2,6.0,"Debit"
"9863de11be3099d6361392584e30e624","06/03/2022","18:27:03",18,"Muffin",426,3.99,"Food",2,7.98,"Gift card"
"f50ed8878250bc06f66b97f5cd2f6df7","02/21/2022","17:02:18",7,"Hot Chocolate",300,3.5,"Drink",2,7.0,"Credit"
"1903169473f41a0275ee702f2c6b1dd6","05/24/2022","14:58:25",10,"Smoothie",200,4.0,"Drink",3,12.0,"Venmo"
"164a9519fd3db952e721e9f55dc1be74","01/07/2022","14:19:35",14,"Chai Tea",200,3.5,"Drink",2,7.0,"Debit"
"dc85a202143de48ad4646190cdc0bf5c","01/28/2022","08:52:38",20,"Wrap",388,5
# Purpose: Generate coffee shop sales data
# Author: Gary A. Stafford and GitHub Copilot
# Date: 2023-04-12
# Usage: python3 coffee_shop_data_gen_final.py 100
# Command-line argument(s): rec_count (number of records to generate as an integer)
import csv
import random
from datetime import datetime, timedelta
import argparse
@garystafford
garystafford / github_copilot_test.py
Last active March 29, 2023 13:40
Example of Python script created with the assistance of GitHub Copilot.
"""
Purpose: Creates an Amazon DynamoDB table, adds an item to the table,
gets that item from the table, and finally deletes the table
Author(s): Gary A. Stafford and GitHub Copilot
Created: 2023-03-26
Usage: python3 github_copilot_test.py table_name
pytest github_copilot_test.py -v
"""
import boto3
# config.providers.secretManager worker configuration
config.providers.secretManager.param.secret.prefix=cdc
config.providers.secretManager.param.secret.ttl.ms=3600000
config.providers.secretManager.class=com.github.jcustenborder.kafka.config.aws.SecretsManagerConfigProvider
config.providers=secretManager
config.providers.secretManager.param.aws.region=us-east-1
{
"_hoodie_commit_time": "20230227035213698",
"_hoodie_commit_seqno": "20230227035213698_0_2168",
"_hoodie_record_key": "200",
"_hoodie_partition_path": "__table=sale",
"_hoodie_file_name": "cce62f4e-a111-4aae-bfd1-2c5af1c6bdeb-0_0-129-115_20230227035213698.parquet",
"salesid": 200,
"listid": 214,
"sellerid": 31484,
"buyerid": 11694,
{
"_hoodie_commit_time": "20230227031713915",
"_hoodie_commit_seqno": "20230227031713915_0_2168",
"_hoodie_record_key": "200",
"_hoodie_partition_path": "__table=sale",
"_hoodie_file_name": "cce62f4e-a111-4aae-bfd1-2c5af1c6bdeb-0_0-82-84_20230227031713915.parquet",
"salesid": 200,
"listid": 214,
"sellerid": 31484,
"buyerid": 11694,
DATA_LAKE_BUCKET="<your_data_lake_s3_bucket>"
TARGET_TABLE="tickit.ecomm.sale"
spark-submit \
--name %{TARGET_TABLE} \
--jars /usr/lib/spark/jars/spark-avro.jar,/usr/lib/hudi/hudi-utilities-bundle.jar \
--conf spark.sql.catalogImplementation=hive \
--conf spark.yarn.submit.waitAppCompletion=false \
--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer `ls /usr/lib/hudi/hudi-utilities-bundle.jar` \
--props file://${PWD}/${TARGET_TABLE}.properties \
{
"salesid" : 200,
"listid" : 214,
"sellerid" : 31484,
"buyerid" : 11694,
"eventid" : 3272,
"dateid" : 1891,
"qtysold" : 4,
"pricepaid" : 600.0,
"commission" : 90.0,