Skip to content

Instantly share code, notes, and snippets.

View tromika's full-sized avatar

Tamas Szuromi tromika

View GitHub Profile
@tromika
tromika / test.py
Created December 18, 2017 11:00
Spark on Windows
from pyspark import SparkContext, SparkConf
import pyspark
import os
os.environ["JAVA_HOME"] = "C:\Program Files\Java\jre1.8.0_151"
os.environ['SPARK_HOME']="C:\spark-2.2.1-bin-hadoop2.7"
os.environ['HADOOP_HOME']="C:\hadoop-2.7.1"
config = SparkConf().setMaster("local[8]").set("spark.executor.memory","8g")
spark = pyspark.sql.SparkSession.builder.config(conf=config).appName('test').getOrCreate()
@tromika
tromika / zookeeperKafka.py
Created January 6, 2017 14:42
Python script to get Kafka Brokers from Zookeeper
from kazoo.client import KazooClient
import json
####
# A quick function to get Kafka brokers from Zookeeper
###
# Probably you need only the first one because the broker will advertise the other brokers
# This is need only for producers due there you can only use bootstrap servers
# Arguments
@tromika
tromika / gist:889d6d62175ccdaedc8fc3085442c146
Created May 23, 2016 09:36
Apache Spark SQL INET_NTOA function pyspark dummy version
%pyspark
import socket,struct
import re
def inetNtoa(ip,num):
ip = re.match("^([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$",socket.inet_ntoa(struct.pack('!I', ip)))
str = ''
if(num==0 or num > 3):
return ip.group(0)
else:
for i in range(1,num+1):
@tromika
tromika / gist:6bc0ddb414b657b8cd1676a981e0d66d
Last active May 23, 2016 09:36
Apache Spark SQL INET_NTOA function pyspark dummy version
%pyspark
import socket,struct
import re
def inetNtoa(ip,num):
ip = re.match("^([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$",socket.inet_ntoa(struct.pack('!I', ip)))
str = ''
if(num==0 or num > 3):
return ip.group(0)
else:
for i in range(1,num+1):
@tromika
tromika / twitter_con.py
Created December 18, 2014 14:43
Twitter connections
#install twitterapi
#pip install TwitterAPI
#Docs for twittterAPI
#https://github.com/geduldig/TwitterAPI
#Twitter credentials
#https://apps.twitter.com/
#Twitter API docs
@tromika
tromika / gist:cbffaa666010dec651eb
Created June 4, 2014 10:05
MS SQL Server Corpus Tokenizer Function - Get the specific token from a corpus
--The function is based on http://ole.michelsen.dk/blog/split-string-to-table-using-transact-sql/
--Thx for the core BTW
IF OBJECT_ID('Tokenize') IS NOT NULL
DROP FUNCTION [dbo].[Tokenize]
GO
CREATE FUNCTION [dbo].[Tokenize]
(
@String NVARCHAR(4000),
@Delimiter NCHAR(1),
@tromika
tromika / sublime
Created December 30, 2013 11:23
In windows add "Edit with Notepad++" like context menu for Sublime Text 3. Based on http://sublimetext.userecho.com/topic/43345-windows-context-menu-right-click-edit-with-sublime-notepad-style/
@echo off
SET st2Path=C:\Program Files\Sublime Text 3\sublime_text.exe
rem add it for all file types
@reg add "HKEY_CLASSES_ROOT\*\shell\Open with Sublime Text 3" /t REG_SZ /v "" /d "Open with Sublime Text 3" /f
@reg add "HKEY_CLASSES_ROOT\*\shell\Open with Sublime Text 3" /t REG_EXPAND_SZ /v "Icon" /d "%st2Path%,0" /f
@reg add "HKEY_CLASSES_ROOT\*\shell\Open with Sublime Text 3\command" /t REG_SZ /v "" /d "%st2Path% \"%%1\"" /f
pause