Skip to content

Instantly share code, notes, and snippets.

@emerrf
emerrf / pyspark_df_sample.py
Created August 10, 2020 14:11
Pyspark dataframe sample
import pyspark.sql.functions as F
from pyspark.sql.types import (StructType, StructField, StringType, ShortType, BooleanType,
IntegerType, FloatType, DoubleType, DecimalType, LongType)
from decimal import Decimal
data_dict = [
{"id":1,"first_name":"Grace","last_name":"Balsellie","email":"gbalsellie0@nih.gov","gender":"Male","ip_address":"210.75.33.4","has_a_turtle":True,"age":31,"date_of_birth":"1982-06-13","last_seen_epoch":1579953816000,"height_m":1.85,"weight_kg":81.1,"longitude":Decimal(12.3750266),"latitude":Decimal(4.6740235),"creditcard_number":"5602241598652866450","description":"Supplement Descending Colon with Nonautologous Tissue Substitute, Percutaneous Endoscopic Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHOSURBVDjLpZOxa1RBEIe/d/e8FKeFEA2IGBWCICZYBSESBCFglUDSCJZaRBBbK1HQ0s4/QQlCgoKdoBA9sVBshCBETCNRiUUg5PDt7MxY7HuXdxgEycKwyzJ88/vN7Gbuzl5WDvDoz
# conda install pycrypto
# All the credit to: https://www.it-swarm.dev/es/python/cifrar-y-descifrar-usando-pycrypto-aes-256/1069206762/
import base64
import hashlib
from Crypto import Random
from Crypto.Cipher import AES
class AESCipher(object):
from time import time
class TokenBucket(object):
"""An implementation of the token bucket algorithm.
Usage example:
from time import sleep
import random
t_0 = time()
@emerrf
emerrf / 48872315.R
Created February 21, 2018 11:00
Comparing R TukeyHSD results with Statistics, William L. Hays p 581-583
# StackOverflow question:
# https://stackoverflow.com/questions/48872315/getting-wrong-p-values-for-tukey-test-for-one-way-mixed-effect-anova/
subject=c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6,
6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10)
treatment=c("a1", "a2", "a3", "a4", "a1", "a2", "a3", "a4", "a1", "a2", "a3",
"a4", "a1", "a2", "a3", "a4", "a1", "a2", "a3", "a4", "a1", "a2",
"a3", "a4", "a1", "a2", "a3", "a4", "a1", "a2", "a3", "a4", "a1",
"a2", "a3", "a4", "a1", "a2", "a3", "a4")

Keybase proof

I hereby claim:

  • I am emerrf on github.
  • I am emerrf (https://keybase.io/emerrf) on keybase.
  • I have a public key ASBtKB3f1__8CL8oGroT_o1upjJt_l_ZScNlIgFlXGAg3Ao

To claim this, I am signing this object: