Skip to content

Instantly share code, notes, and snippets.

@lukleh
lukleh / spark_code.py
Last active July 29, 2019 16:23
spark use shuffle service
df = spark.sql('select abs(n1 - n2) as rdiff, count(1) as cnt from tempsampledata group by rdiff order by cnt desc')
df.persist()
df.show(n=100, truncate=False)
@lukleh
lukleh / sample_data.py
Last active July 29, 2019 16:23
create sample spark dataframe
import pyspark.sql.functions as F
df = spark.range(1, 1000000, numPartitions=2000)
df = df.withColumn('n1', (F.rand() * 100000).cast('integer')).withColumn('n2', (F.rand() * 100000).cast('integer'))
df.createOrReplaceTempView('tempsampledata')
@lukleh
lukleh / commit-msg
Last active May 24, 2016 16:05
git commit msg hook to check that branch and commit message start with a number and that the numbers are equal
#!/usr/bin/env bash
current_branch="$(git rev-parse --abbrev-ref HEAD)"
branch_regex='([0-9]+)_.*'
if [[ ! $current_branch =~ $branch_regex ]]; then
echo "branch does not start with a number" >&2
exit 1
fi
branch_num=${BASH_REMATCH[1]}
import asyncio, random
import threading
import time
q = asyncio.Queue()
@asyncio.coroutine
def produce():
while True:
print("putting")
-- scanl definition
scanl :: (a -> b -> a) -> a -> [b] -> [a]
scanl f q ls =
q : (case ls of
[] -> []
x:xs -> scanl f (f q x) xs)
fibs = 1 : scanl (+) 1 fibs
__version__ = "0.1.6.8"
if __name__ == "__main__":
import sys
import argparse
def increment_line(args):
vi = [int(i) for i in __version__.split('.')]
print('current version: %s' % __version__)
@lukleh
lukleh / elasticsearch_mapping.sh
Created March 2, 2011 18:58 — forked from vhyza/elasticsearch_mapping.sh
ElasticSearch mapping for language stemming
echo "Delete DBs"
curl -s -X DELETE 'http://localhost:9200/test_twitter_cs/'
echo
curl -s -X DELETE 'http://localhost:9200/test_twitter_en/'
echo
echo "Create Czech index"
curl -s -X PUT 'http://localhost:9200/test_twitter_cs/'
echo
echo "Create English index"
curl -s -X PUT 'http://localhost:9200/test_twitter_en/'