Dave Ruijter DaveRuijter

## pipeline-backup-weekly.yml
parameters:
  - name: backupStore
    displayName: 'Backup 05 store'
    type: boolean
    default: true
  - name: backupBronze
    displayName: 'Backup 10 bronze'
    type: boolean
    default: true
  - name: backupSilver

## pipeline-backup-daily.yml
parameters:
  - name: backupStore
    displayName: 'Backup 05 store'
    type: boolean
    default: true
  - name: backupBronze
    displayName: 'Backup 10 bronze'
    type: boolean
    default: true
  - name: backupSilver

## is_pipeline_running.json
{
	"name": "00_is_pipeline_running",
	"properties": {
		"activities": [
			{
				"name": "Get Pipeline Runs",
				"type": "WebActivity",
				"dependsOn": [
					{
						"activity": "getSubscriptionID",

## multicolumn_expression_evaluation.py
from great_expectations.expectations.expectation import MulticolumnMapExpectation
from great_expectations.expectations.util import render_evaluation_parameter_string
from great_expectations.render.util import (
    num_to_str,
    substitute_none_for_missing,
    parse_row_condition_string_pandas_engine,
)
from scipy import stats as stats
from great_expectations.execution_engine import (
    PandasExecutionEngine,

## generate_hash.py

spark.udf.register("udf_removehtmltagsfromstring", udf_removehtmltagsfromstring, "string")

# This is the central hashing function, used by other functions. It uses the blake2b hashing algorithm. With a central function, we can adjust the hashing when needed.
def udf_centralhash(string: str) -> int:
    val = hashlib.blake2b(
        digest_size=6
    )  # Increase digest size to make the hashing bigger. 6 seems a good start for our use for dimensions.
    val.update(string.encode("utf-8"))  # give the input string as utf-8 to the blake2b object
    intval = int(val.hexdigest(), 16)  # and convert it to an integer
	parameters:
	- name: backupStore
	displayName: 'Backup 05 store'
	type: boolean
	default: true
	- name: backupBronze
	displayName: 'Backup 10 bronze'
	type: boolean
	default: true
	- name: backupSilver
	{
	"name": "00_is_pipeline_running",
	"properties": {
	"activities": [
	{
	"name": "Get Pipeline Runs",
	"type": "WebActivity",
	"dependsOn": [
	{
	"activity": "getSubscriptionID",
	from great_expectations.expectations.expectation import MulticolumnMapExpectation
	from great_expectations.expectations.util import render_evaluation_parameter_string
	from great_expectations.render.util import (
	num_to_str,
	substitute_none_for_missing,
	parse_row_condition_string_pandas_engine,
	)
	from scipy import stats as stats
	from great_expectations.execution_engine import (
	PandasExecutionEngine,

	spark.udf.register("udf_removehtmltagsfromstring", udf_removehtmltagsfromstring, "string")

	# This is the central hashing function, used by other functions. It uses the blake2b hashing algorithm. With a central function, we can adjust the hashing when needed.
	def udf_centralhash(string: str) -> int:
	val = hashlib.blake2b(
	digest_size=6
	) # Increase digest size to make the hashing bigger. 6 seems a good start for our use for dimensions.
	val.update(string.encode("utf-8")) # give the input string as utf-8 to the blake2b object
	intval = int(val.hexdigest(), 16) # and convert it to an integer