Skip to content

Instantly share code, notes, and snippets.

View pietheinstrengholt's full-sized avatar

Piethein Strengholt pietheinstrengholt

View GitHub Profile
JQFEED = {
numNews: 5,
appendTo: '#blogfeed',
realUrl: 'http://www.perry.es',
internalUrl: '/blog_feed',
loadFeeds: function() {
$(JQFEED.appendTo).html('Loading feed...');
$.ajax({
url: JQFEED.internalUrl,
type: 'GET',
#!/bin/sh
# Converts a mysqldump file into a Sqlite 3 compatible file. It also extracts the MySQL `KEY xxxxx` from the
# CREATE block and create them in separate commands _after_ all the INSERTs.
# Awk is choosen because it's fast and portable. You can use gawk, original awk or even the lightning fast mawk.
# The mysqldump file is traversed only once.
# Usage: $ ./mysql2sqlite mysqldump-opts db-name | sqlite3 database.sqlite
# Example: $ ./mysql2sqlite --no-data -u root -pMySecretPassWord myDbase | sqlite3 database.sqlite
<?php
/**
* ODBC for DBO
*
* PHP versions 4 and 5
*
* CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
* Copyright 2005-2009, Cake Software Foundation, Inc. (http://cakefoundation.org)
*
* Licensed under The MIT License
<?php
/**
* ODBC layer for DBO
* Helpful for Linux connection to MS SQL Server via FreeTDS
*
* PHP 5
*
* CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
* Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
*
%%pyspark
schema = StructType([
StructField("src_Id", IntegerType(), True),
StructField("src_Firstname", StringType(), True),
StructField("src_Lastname", StringType(), True),
StructField("src_CreatedAt", DateType(), True),
StructField("src_Department", StringType(), True),
StructField("src_Salary", StringType(), True)
]
current_date = datetime.today().date()
print(current_date)
# Prepare for merge - Added effective and end date
df_source_new = dfDataChanged.withColumn('src_current', lit(True)).withColumn('src_effectiveDate', lit(current_date)).withColumn('src_endDate', lit(None))
df_source_new.show()
# FULL Merge, join on key column and also high date column to make only join to the latest records
df_merge = dfDataOriginal.join(df_source_new, (df_source_new.src_Id == dfDataOriginal.Id), how='fullouter')
{
"enumDefs": [],
"structDefs": [],
"classificationDefs": [],
"entityDefs": [
{
"name": "data_landing_zone",
"description": "Data Landing Zone",
"createdBy": "admin",
"updatedBy": "admin",
{
"entity": {
"status": "ACTIVE",
"createTime": 1592928847,
"updateTime": 1594120997,
"createdBy": "Superadmin",
"version": 1,
"attributes": {
"name": "SalesLTAddress",
"db": {
%%pyspark
from pyspark import *
from pyspark.sql.window import Window
from pyspark.sql.functions import *
from pyspark.sql import Row
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType, BooleanType, DateType
from typing import List
from datetime import datetime
%%pyspark
from datetime import date
current_date = datetime.today().date()
try:
# Read original data - this is your scd type 2 table holding all data
dataOriginal = spark.read.load(dfDataOriginalPath + "/" + cw_database + "/" + cw_table, format='delta')
except:
# Use first load when no data exists yet