ak anilktechie

## SnowflakeFuzzyMatching1.sql
use role sysadmin;

-- set up dev environment
create database if not exists my_test_db;
create schema if not exists fuzzy_match;
create warehouse if not exists dev_wh warehouse_size = 'small' auto_suspend = 300 initially_suspended=true;
use schema my_test_db.fuzzy_match;
use warehouse dev_wh;


## snowflake_database_ddl.py
#!/usr/bin/env python

# Get all the database objects and permissions.
# Can be used after running snowflake_roles.py to create the required roles
# 2018-10-23  jfrink    added ddl for roles, modified connection parameters
# 2019-01-15  jfrink    added Roles and permissions report by object.
# 2019-03-07  jfrink    added extract script to create a dump of all the tables to a stage
#                       and also the corresponding script to load all the data.
#                       Converted show tables over to using information schema for cases greater then 10k rows.
#                       Converted show views over to using information schema for cases greater then 10k rows.

## redshift_utils.py
import psycopg2

# Functions for reading scripts
class ScriptReader(object):

    @staticmethod
    def get_script(path):
        return open(path, 'r').read()

# Utils for messages

## settings.py
###############################################
# Script settings and constants.
###############################################
SCRIPT_PATH = 'script.sql'

DB_CONNECTION = {
    'db_host': 'myhost.redshift.amazonaws.com',
    'db_name': 'somedb',
    'db_username': 'user',
    'db_password': 'pA$$word'

## 07b_get_cluster_parameters.py
import configparser

# Read AWS credentials from the config file
cfg_data = configparser.ConfigParser()
cfg_data.read('dl.cfg')

# Save Redshift cluster
cluster_identifier = cfg_data["Redshift"]["cluster_identifier"]
cluster_type       = cfg_data["Redshift"]["cluster_type"]
node_type          = cfg_data["Redshift"]["node_type"]

## 13b_explore_source_data.py
# Define S3 client
s3 = boto3.client(
    "s3",
    aws_access_key_id = access_key_id,
    aws_secret_access_key = secret_access_key
)

# Get object containing file to be staged
obj = s3.get_object(
    Bucket = "data-to-migrate",

## json-split.py
#!/usr/bin/env python
# based on  http://stackoverflow.com/questions/7052947/split-95mb-json-array-into-smaller-chunks
# usage: python json-split filename.json
# produces multiple filename_0.json of 1.49 MB size

import json
import sys

with open(sys.argv[1],'r') as infile:
    o = json.load(infile)

## stream.json
{
  "StreamName": "$input.params('stream-name')"
}

## lambda_function.py
###
### This gist contains 2 files : settings.json and lambda_function.py
###

### settings.json
{
    "extensions" : ["*.hdr", "*.glb", "*.wasm"]
}

### lambda_function.py

## list_objects_google_storage_boto3.py
from boto3.session import Session
from botocore.client import Config
from botocore.handlers import set_list_objects_encoding_type_url

import boto3

ACCESS_KEY = "xx"
SECRET_KEY = "yy"
boto3.set_stream_logger('')
	use role sysadmin;

	-- set up dev environment
	create database if not exists my_test_db;
	create schema if not exists fuzzy_match;
	create warehouse if not exists dev_wh warehouse_size = 'small' auto_suspend = 300 initially_suspended=true;
	use schema my_test_db.fuzzy_match;
	use warehouse dev_wh;
	#!/usr/bin/env python

	# Get all the database objects and permissions.
	# Can be used after running snowflake_roles.py to create the required roles
	# 2018-10-23 jfrink added ddl for roles, modified connection parameters
	# 2019-01-15 jfrink added Roles and permissions report by object.
	# 2019-03-07 jfrink added extract script to create a dump of all the tables to a stage
	# and also the corresponding script to load all the data.
	# Converted show tables over to using information schema for cases greater then 10k rows.
	# Converted show views over to using information schema for cases greater then 10k rows.
	import psycopg2

	# Functions for reading scripts
	class ScriptReader(object):

	@staticmethod
	def get_script(path):
	return open(path, 'r').read()

	# Utils for messages
	###############################################
	# Script settings and constants.
	###############################################
	SCRIPT_PATH = 'script.sql'

	DB_CONNECTION = {
	'db_host': 'myhost.redshift.amazonaws.com',
	'db_name': 'somedb',
	'db_username': 'user',
	'db_password': 'pA$$word'
	import configparser

	# Read AWS credentials from the config file
	cfg_data = configparser.ConfigParser()
	cfg_data.read('dl.cfg')

	# Save Redshift cluster
	cluster_identifier = cfg_data["Redshift"]["cluster_identifier"]
	cluster_type = cfg_data["Redshift"]["cluster_type"]
	node_type = cfg_data["Redshift"]["node_type"]
	# Define S3 client
	s3 = boto3.client(
	"s3",
	aws_access_key_id = access_key_id,
	aws_secret_access_key = secret_access_key
	)

	# Get object containing file to be staged
	obj = s3.get_object(
	Bucket = "data-to-migrate",
	#!/usr/bin/env python
	# based on http://stackoverflow.com/questions/7052947/split-95mb-json-array-into-smaller-chunks
	# usage: python json-split filename.json
	# produces multiple filename_0.json of 1.49 MB size

	import json
	import sys

	with open(sys.argv[1],'r') as infile:
	o = json.load(infile)
	###
	### This gist contains 2 files : settings.json and lambda_function.py
	###

	### settings.json
	{
	"extensions" : [".hdr", ".glb", "*.wasm"]
	}

	### lambda_function.py
	from boto3.session import Session
	from botocore.client import Config
	from botocore.handlers import set_list_objects_encoding_type_url

	import boto3

	ACCESS_KEY = "xx"
	SECRET_KEY = "yy"
	boto3.set_stream_logger('')