Skip to content

Instantly share code, notes, and snippets.

@justinnaldzin
justinnaldzin / database.py
Created February 17, 2017 20:40
python class for SQLAlchemy Databases
import sys
import logging
'''Usage:
from database import Database
db = Database(attributes)
db.connect()
sql = "SELECT * FROM TABLE"
dataframe = pandas.read_sql(sql, db.connection)
@justinnaldzin
justinnaldzin / sql_server_bulk_insert
Created February 20, 2017 17:45
Create tables and insert data into SQL Server 2014 from all CSV files in a directory
#!/usr/bin/env python3
# Create tables and insert data into SQL Server 2014 from all CSV files in a directory. The process involves:
# - Read all CSV files in a directory
# - Ensure all headers are the same
# - Generate DTS (Data Transformation Service) files for all CSV files
# - Execute all .dtsx files to bulk insert the CSV
import os
@justinnaldzin
justinnaldzin / install_apache_maven.sh
Created February 20, 2017 17:54
Install Apache Maven
# Install Apache Maven
cd /opt
wget http://www-eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
tar xzf apache-maven-3.3.9-bin.tar.gz
ln -s apache-maven-3.3.9 maven
echo 'export M2_HOME=/opt/maven' >> /etc/profile.d/maven.sh
echo 'PATH=${M2_HOME}/bin:${PATH}' >> /etc/profile.d/maven.sh
source /etc/profile.d/maven.sh
@justinnaldzin
justinnaldzin / timeout_and_retry_function.py
Last active March 7, 2017 18:24
Python 'Timeout' class that will raise an exception after the containing code does not finish within a specified number of seconds. Use the 'retry' decorator to retry the function a specified number of times after the timeout. NOTE: this works for Unix only. Additionally, signal only works in main thread.
import time
import signal
from retrying import retry
class Timeout:
def __init__(self, seconds=1, error_message='Timeout'):
self.seconds = seconds
self.error_message = error_message
@justinnaldzin
justinnaldzin / bootstrap_social_buttons.html
Created April 4, 2017 14:57
HTML example using Bootstrap Social Buttons
<!DOCTYPE html>
<html>
<head>
<title>Bootstrap Social Buttons</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
<style>
/*
* Social Buttons for Bootstrap
@justinnaldzin
justinnaldzin / gcs_bucket.py
Created April 25, 2018 03:04
Define Google Cloud Storage bucket (create bucket if it doesn't exist)
from google.cloud import storage
# Define Google Cloud Storage bucket
storage_client = storage.Client()
bucket = storage_client.lookup_bucket(bucket_name)
if bucket:
print("Using bucket '{}'.".format(bucket.name))
else:
# Create Google Cloud Storage bucket if it doesn't exist
print("Bucket '{}' doesn't exist. Creating bucket...".format(bucket_name))
@justinnaldzin
justinnaldzin / replace_dictionary_key_chars.py
Last active April 27, 2018 02:25
Recursively replace characters from keys of a Python dictionary
################################################################################
# The following demonstrates how to recursively replace specific characters
# from within keys of a nested dictionary or JSON object, leaving values as is.
################################################################################
# Given the following example dictionary with characters needing to be replaced:
example = [
{
"_id": "5ae2821988fc6a16af73aeb0",
"index": 0,
@justinnaldzin
justinnaldzin / delete_tables_in_bigquery_dataset.py
Created July 13, 2018 05:01
Delete all tables within a BigQuery dataset
# Delete all tables within a BigQuery dataset
from google.cloud import bigquery
bigquery_client = bigquery.Client()
bq_dataset = 'my_dataset'
dataset_ref = bigquery_client.dataset(bq_dataset)
tables = list(bigquery_client.list_dataset_tables(dataset_ref))
for table in tables:
bigquery_client.delete_table(table)
@justinnaldzin
justinnaldzin / spark_dataframe_size_estimator.py
Created July 18, 2018 19:29
Estimate size of Spark DataFrame in bytes
# Function to convert python object to Java objects
def _to_java_object_rdd(rdd):
""" Return a JavaRDD of Object by unpickling
It will convert each Python object into Java object by Pyrolite, whenever the
RDD is serialized in batch or not.
"""
rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer()))
return rdd.ctx._jvm.org.apache.spark.mllib.api.python.SerDe.pythonToJava(rdd._jrdd, True)
# Convert DataFrame to an RDD
@justinnaldzin
justinnaldzin / gcloud_kms_iam.sh
Created August 6, 2018 12:58
Google Cloud KMS using IAM
# https://cloud.google.com/kms/docs/iam
# Add IAM policy binding to a specific KMS keyring with the cryptoKeyEncrypterDecrypter role
KEYRING=my_keyring_name
USER_EMAIL=serviceAccount:example-compute@developer.gserviceaccount.com
gcloud kms keyrings add-iam-policy-binding $KEYRING --location global --member user:$USER_EMAIL --role roles/cloudkms.cryptoKeyEncrypterDecrypter
# Add IAM policy binding to a specific KMS key with the cryptoKeyEncrypterDecrypter role
KEY=my_key_name
KEYRING=my_keyring_name