Anh Dang anhdanggit

## Py-Athena-Conn.py
import pyathena
import pandas as pd

## Directly by panda
athena_conn = pyathena.connect(aws_access_key_id=os.environ['ATHENA_USER'], ##credentials of aws_access_key_id
                 aws_secret_access_key=os.environ['ATHENA_PASSWORD'], ##credentials of aws_secret_access_key
                 s3_staging_dir='s3://aws-athena-query-results-<your-details>', ##where the athena query result saved - checked in S3 ,
                 region_name='eu-west-1') ##the region you set for Athena

df = pd.read_sql("SELECT * FROM tutorial.wbcdata LIMIT 10", athena_conn)

## R-Athena-Conn.R
library(RJDBC)
library(rJava)
library(DBI)

# 1 - Set up driver connection to JDBC
fil = '~/<paht-to>/AthenaJDBC42_2.0.7.jar' ##path to the AthenaJDBC in your machine
drv <- JDBC(driverClass='com.simba.athena.jdbc.Driver',
            classPath = fil,
            identifier.quote="'")

## vscode-python-snippets.json
{
	"Header": {
		"prefix": "header",
		"body": [
			"'''",
			"#FILE: $1",
			"Project: $WORKSPACE_NAME",
			"-------------------",
			"By: Anh Dang",
			"Date: $CURRENT_YEAR-$CURRENT_MONTH-$CURRENT_DATE",

## SQL-explore.sql
-- Search for a columns in DB
SELECT      COLUMN_NAME AS "ColumnName",
            TABLE_NAME AS  "TableName",
            TABLE_SCHEMA
FROM        INFORMATION_SCHEMA.COLUMNS
WHERE       COLUMN_NAME LIKE 'cashfac_id'
ORDER BY    TABLE_NAME,
            COLUMN_NAME;

-- List all columns one table

## bankstat-parse.R

##-------------------------------------#
## BankStat Prep
## PARSE BANK ACCOUNT AND DOC DATE
## This script to parse the info of bank account iban and the doc date for bank statement
##-------------------------------------#


# --- FUNCTIONS ---------------

## R_GetoptLong_toQuery.r
library(DBI)
library(dplyr)
library(GetoptLong)
library(magrittr)

data_warehouse <- function(){DBI::dbConnect(RPostgres::Postgres()
                                               ,dbname="database"
                                            ## to hide the credentials (set it up in .Renv)
                                               ,host=Sys.getenv("HOST")
                                               ,user= Sys.getenv("USERNAME")

## json-extract-sql.R
## Code to extract all fields from json
sphonic_data <- tbl(connection_risk_house(), sql(
    "SELECT distinct on (application_id,jkey)
    application_id,
    jsonb_object_keys(section_payload) as jkey,
    jsonb_object_field_text(section_payload, jsonb_object_keys(section_payload)) as jvalue
    FROM fraud_detection.response_sections
    WHERE  sent_at >= '2017-10-01'
    AND sent_at < '2017-12-01'
    GROUP BY application_id,section_payload

## xml-extract-sql.R
data_xml <- dbGetQuery(conn = DB_conn,statement = qq('

                    SELECT decision_id, data,
                                         xpath(\'//application-uuid\',data) as application_uuid,
                                         xpath(\'//brokerage-id\',data) as brokerage_id,
                                         xpath(\'//Transformed\',data) as transformed,
                                         xpath(\'//pd_commercial\',data) as pd_commercial,

                                         xpath(\'//row_pd_comb\',data) as row_pd_comb,
                                         xpath(\'//marginal_group\',data) as marginal_group,

## Py-Zip-tricks.py
## Tuple:
# Capture arbitrarily long list
values = 1,2,3,4,5
a, b, *rest = values
# Discard the rest
a, b, *_ = values

## Zip:
# "pairs" up the elements of list
seq1 = ['foo','bar','baz']

## Py-Dict-tricks.py
# Categorizing to dicts
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letters = {}

for word in words:
    letter = word[0]
    by_letters[letter] = by_letters.get(letter, 0) + 1
    value = by_letters[letter]
    print('Letter: {} - Count: {}'.format(letter, value))
	import pyathena
	import pandas as pd

	## Directly by panda
	athena_conn = pyathena.connect(aws_access_key_id=os.environ['ATHENA_USER'], ##credentials of aws_access_key_id
	aws_secret_access_key=os.environ['ATHENA_PASSWORD'], ##credentials of aws_secret_access_key
	s3_staging_dir='s3://aws-athena-query-results-<your-details>', ##where the athena query result saved - checked in S3 ,
	region_name='eu-west-1') ##the region you set for Athena

	df = pd.read_sql("SELECT * FROM tutorial.wbcdata LIMIT 10", athena_conn)
	library(RJDBC)
	library(rJava)
	library(DBI)

	# 1 - Set up driver connection to JDBC
	fil = '~/<paht-to>/AthenaJDBC42_2.0.7.jar' ##path to the AthenaJDBC in your machine
	drv <- JDBC(driverClass='com.simba.athena.jdbc.Driver',
	classPath = fil,
	identifier.quote="'")
	{
	"Header": {
	"prefix": "header",
	"body": [
	"'''",
	"#FILE: $1",
	"Project: $WORKSPACE_NAME",
	"-------------------",
	"By: Anh Dang",
	"Date: $CURRENT_YEAR-$CURRENT_MONTH-$CURRENT_DATE",
	-- Search for a columns in DB
	SELECT COLUMN_NAME AS "ColumnName",
	TABLE_NAME AS "TableName",
	TABLE_SCHEMA
	FROM INFORMATION_SCHEMA.COLUMNS
	WHERE COLUMN_NAME LIKE 'cashfac_id'
	ORDER BY TABLE_NAME,
	COLUMN_NAME;

	-- List all columns one table

	##-------------------------------------#
	## BankStat Prep
	## PARSE BANK ACCOUNT AND DOC DATE
	## This script to parse the info of bank account iban and the doc date for bank statement
	##-------------------------------------#


	# --- FUNCTIONS ---------------
	library(DBI)
	library(dplyr)
	library(GetoptLong)
	library(magrittr)

	data_warehouse <- function(){DBI::dbConnect(RPostgres::Postgres()
	,dbname="database"
	## to hide the credentials (set it up in .Renv)
	,host=Sys.getenv("HOST")
	,user= Sys.getenv("USERNAME")
	## Code to extract all fields from json
	sphonic_data <- tbl(connection_risk_house(), sql(
	"SELECT distinct on (application_id,jkey)
	application_id,
	jsonb_object_keys(section_payload) as jkey,
	jsonb_object_field_text(section_payload, jsonb_object_keys(section_payload)) as jvalue
	FROM fraud_detection.response_sections
	WHERE sent_at >= '2017-10-01'
	AND sent_at < '2017-12-01'
	GROUP BY application_id,section_payload
	data_xml <- dbGetQuery(conn = DB_conn,statement = qq('

	SELECT decision_id, data,
	xpath(\'//application-uuid\',data) as application_uuid,
	xpath(\'//brokerage-id\',data) as brokerage_id,
	xpath(\'//Transformed\',data) as transformed,
	xpath(\'//pd_commercial\',data) as pd_commercial,

	xpath(\'//row_pd_comb\',data) as row_pd_comb,
	xpath(\'//marginal_group\',data) as marginal_group,
	## Tuple:
	# Capture arbitrarily long list
	values = 1,2,3,4,5
	a, b, *rest = values
	# Discard the rest
	a, b, *_ = values

	## Zip:
	# "pairs" up the elements of list
	seq1 = ['foo','bar','baz']
	# Categorizing to dicts
	words = ['apple', 'bat', 'bar', 'atom', 'book']
	by_letters = {}

	for word in words:
	letter = word[0]
	by_letters[letter] = by_letters.get(letter, 0) + 1
	value = by_letters[letter]
	print('Letter: {} - Count: {}'.format(letter, value))