Skip to content

Instantly share code, notes, and snippets.

View mkaranasou's full-sized avatar
🏠
Working from home

Maria Karanasou mkaranasou

🏠
Working from home
View GitHub Profile
@mkaranasou
mkaranasou / bbtree.py
Created March 17, 2017 10:29 — forked from olomix/bbtree.py
Balanced binary tree in Python
#!/usr/bin/env python2.7
import random
import subprocess
class Node(object):
def __init__(self, key, value):
self.key = key
self.value = value
@mkaranasou
mkaranasou / relations.py
Created May 22, 2018 14:18 — forked from cyrexcyborg/relations.py
Flask-Admin-SQLAlchemy one-to-one, one-to-many between two tables
# -*- coding: utf-8 -*-
# Many thanks to http://stackoverflow.com/users/400617/davidism
# This code under "I don't care" license
# Take it, use it, learn from it, make it better.
# Start this from cmd or shell or whatever
# Go to favourite browser and type localhost:5000/admin
import sys
from flask import Flask
from flask.ext.sqlalchemy import SQLAlchemy
from flask.ext.admin import Admin
@mkaranasou
mkaranasou / pyspark_uneven_df_union.py
Last active November 2, 2018 11:57
Function to union pyspark data frames with different columns
def union_uneven(df_base, df_new, default=None):
"""
Union dfs with different columns
:param: pyspark.DataFrame df_base: the dataframe to join to
:param: pyspark.DataFrame df_new: the dataframe to be joined
:return: the union of the two dataframes, having the missing columns filled with the default value
:rtype: pyspark.DataFrame
"""
base_columns = set(df_base.columns)
df_new_columns = set(df_new.columns)
@mkaranasou
mkaranasou / pyspark_parse_json_and_expand_into_columns.py
Last active March 1, 2019 17:21
Parse a json column in pyspark and expand the dict into columns
json_col = 'json_col'
# either infer the features schema:
schema = self.spark.read.json(df.select(json_col).rdd.map(lambda x: x[0])).schema
# parse the features string into a map
df = df.withColumn(json_col, (F.from_json(F.col(json_col), schema)))
# access the feature columns by name
df.select(F.col(json_col)['some_key']).show()
@mkaranasou
mkaranasou / ImageTools.js
Created April 6, 2019 08:08 — forked from SagiMedina/ImageTools.js
Resize and crop images in the Browser with orientation fix using exif
import EXIF from 'exif-js';
const hasBlobConstructor = typeof (Blob) !== 'undefined' && (function checkBlobConstructor() {
try {
return Boolean(new Blob());
} catch (error) {
return false;
}
}());
@mkaranasou
mkaranasou / pyspark_index_with_row_num_sortable_data.py
Last active October 4, 2019 09:39
Adding indexes to a dataframe with row_num if your data is sortable
>>> from pyspark.sql import Window
# the window is necessary here because row_number is a windowing function
# that means you can have row_number run over some amount of your data
# we'll be currently running it over the sorted by column1 data, row per row - our window will be of size 2 (rows),
# the whole dataframe that is.
>>> window = Window.orderBy(F.col('column1'))
>>> df_final = df_final.withColumn('row_number', F.row_number().over(window)
>>> df_final.select('index', 'row_number', 'column1', 'column2').show()
+-----+----------+-------+-------+
@mkaranasou
mkaranasou / pyspark_simple_file_read_short.py
Last active October 4, 2019 13:48
Read a txt file with pyspark
from pyspark import SparkConf
from pyspark.sql import SparkSession, functions as F
conf = SparkConf()
# optional but it would be good to set the amount of ram the driver can use to
# a reasonable (regarding the size of the file we want to read) amount, so that we don't get an OOM exception
conf.set('spark.driver.memory', '6G')
spark = SparkSession.builder \
.config(conf=conf) \
@mkaranasou
mkaranasou / pyspark_simple_read_text_file.py
Last active October 4, 2019 14:16
Use pyspark to read a text file and identify a term
from pyspark import SparkConf
from pyspark.sql import SparkSession, functions as F
conf = SparkConf()
# optional but it would be good to set the amount of ram the driver can use to
# a reasonable (regarding the size of the file we want to read) amount, so that we don't get an OOM exception
conf.set('spark.driver.memory', '6G')
# create a spark session - nothing can be done without this:
spark = SparkSession.builder \
@mkaranasou
mkaranasou / pyspark_feature_a_to_b_ratio_example.py
Last active October 13, 2019 11:09
An example feature class
from pyspark.sql import functions as F
class FeatureAToBRatio(object):
feature_name = 'a_to_b_ratio'
default_value = 0.
def calculate(self, df):
"""
Given a dataframe that contains columns a and b,
@mkaranasou
mkaranasou / test_feature_a_to_b_ratio.py
Last active October 13, 2019 13:07
Example of pyspark unittest test case for feature a to b ratio
from pyspark.sql.utils import AnalysisException
from pyspark_unittesting import SparkSQLTestCase
class TestFeatureAToBRatio(SparkSQLTestCase):
def setUp(self):
super(TestFeatureAToBRatio, self).setUp()
self.feature = FeatureAToBRatio()