Han leehanchung

## factors_gen.txt
# Contains human-readable representation of NFactor::TCodeGenInput message (defined in factors_metadata.proto)

Group: [
    "Datetime",
    "Domain",
    "RapidClicks",
    "RegHostStatic",
    "RegDocStatic",
    "Regex",
    "LinkBM25",

## collisionLSH.py
import tensorflow as tf #We need tensorflow 2.x
import numpy as np

#The hashlength in bits
hashLength = 256

def buildModel():
    #we can set the seed to simulate the fact that this network is known and doesn't change between runs
    #tf.random.set_seed(42)
    model = tf.keras.Sequential()

## benfords_election.py
##############################################################################
# Author: Han-chung Lee
# Date: 2020-11-6
#
# Detecting election fraud activity at Milwaukee County using Benford's Law
#
# Statistical detection of systematic election irregularities:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3478593/
##############################################################################

## benfords_preproc
def preprocess(data: Dict[str, List[str]]) -> Tuple[Dict[str, List[int]], Dict[str, List[float]]]:
    """preprocess a dictionary of candidate name and votes and generate counts
    of the leading digits and the expected values using Benford's Law

    Args:
        data (Dict[str, List[str]]): candidate: vote counts for all prescints

    Returns:
        Tuple[Dict[str, List[int]], Dict[str, List[float]]]: observed and
            expected leading digit count for all candidates

## gist:a64146423f4d0b15de050e57811524db
def get_soup(url:str):
    """get html data from input url and load it into beautifulsoup parser

    Args:
        url (str): source url

    Returns:
        beautifulsoup parser with loaded data
    """
    headers = {

## benfords
def get_benfords() -> List[float]:
    """get list of expected discrete benford distribution values to be used
    for chi square test

    Returns:
        List[float]: discrete benford's law distribution
    """
    benfords = [log10(1 + 1/d) for d in range(1, 10)]
    return benfords

## tf_text_classification.py
import tensorflow_datasets as tfds
import tensorflow as tf

import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)

## test_sagemaker_cli.sh
#!/usr/bin/env bash
ENDPOINT_NAME=<<name your endpoint>>

aws sagemaker-runtime invoke-endpoint \
--endpoint-name ${ENDPOINT_NAME} \
--body '{"instances": [1.0,2.0,3.0]}' response.json

## create_sagemaker_endpoint.sh
#!/usr/bin/env bash

MODEL_NAME=<<name your model>>

ENDPOINT_CONFIG_NAME=<<name your endpointn config>>

ENDPOINT_NAME=<<name your endpoint>>

# Using one ml.c4.large instance for the endpoint
PRODUCTION_VARIANTS="VariantName=Default,ModelName=${MODEL_NAME},"\

## create_sagemaker_model.sh
#!/usr/bin/env bash

MODEL_NAME=<<name your model>>
ROLE_NAME=AmazonSageMaker-ExecutionRole-XXXXXXXXXXXXXXXX

# the name of the image created with
ECS_IMAGE_NAME=<<your image name>>

# the role arn of the role
EXECUTION_ROLE_ARN=$(aws iam get-role --role-name ${ROLE_NAME} | jq -r .Role.Arn)
	# Contains human-readable representation of NFactor::TCodeGenInput message (defined in factors_metadata.proto)

	Group: [
	"Datetime",
	"Domain",
	"RapidClicks",
	"RegHostStatic",
	"RegDocStatic",
	"Regex",
	"LinkBM25",
	import tensorflow as tf #We need tensorflow 2.x
	import numpy as np

	#The hashlength in bits
	hashLength = 256

	def buildModel():
	#we can set the seed to simulate the fact that this network is known and doesn't change between runs
	#tf.random.set_seed(42)
	model = tf.keras.Sequential()
	##############################################################################
	# Author: Han-chung Lee
	# Date: 2020-11-6
	#
	# Detecting election fraud activity at Milwaukee County using Benford's Law
	#
	# Statistical detection of systematic election irregularities:
	# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3478593/
	##############################################################################
	def preprocess(data: Dict[str, List[str]]) -> Tuple[Dict[str, List[int]], Dict[str, List[float]]]:
	"""preprocess a dictionary of candidate name and votes and generate counts
	of the leading digits and the expected values using Benford's Law

	Args:
	data (Dict[str, List[str]]): candidate: vote counts for all prescints

	Returns:
	Tuple[Dict[str, List[int]], Dict[str, List[float]]]: observed and
	expected leading digit count for all candidates
	def get_soup(url:str):
	"""get html data from input url and load it into beautifulsoup parser

	Args:
	url (str): source url

	Returns:
	beautifulsoup parser with loaded data
	"""
	headers = {
	def get_benfords() -> List[float]:
	"""get list of expected discrete benford distribution values to be used
	for chi square test

	Returns:
	List[float]: discrete benford's law distribution
	"""
	benfords = [log10(1 + 1/d) for d in range(1, 10)]
	return benfords
	import tensorflow_datasets as tfds
	import tensorflow as tf

	import matplotlib.pyplot as plt

	def plot_graphs(history, metric):
	plt.plot(history.history[metric])
	plt.plot(history.history['val_'+metric], '')
	plt.xlabel("Epochs")
	plt.ylabel(metric)
	#!/usr/bin/env bash
	ENDPOINT_NAME=<<name your endpoint>>

	aws sagemaker-runtime invoke-endpoint \
	--endpoint-name ${ENDPOINT_NAME} \
	--body '{"instances": [1.0,2.0,3.0]}' response.json
	#!/usr/bin/env bash

	MODEL_NAME=<<name your model>>

	ENDPOINT_CONFIG_NAME=<<name your endpointn config>>

	ENDPOINT_NAME=<<name your endpoint>>

	# Using one ml.c4.large instance for the endpoint
	PRODUCTION_VARIANTS="VariantName=Default,ModelName=${MODEL_NAME},"\
	#!/usr/bin/env bash

	MODEL_NAME=<<name your model>>
	ROLE_NAME=AmazonSageMaker-ExecutionRole-XXXXXXXXXXXXXXXX

	# the name of the image created with
	ECS_IMAGE_NAME=<<your image name>>

	# the role arn of the role
	EXECUTION_ROLE_ARN=$(aws iam get-role --role-name ${ROLE_NAME} \| jq -r .Role.Arn)