Eduardo Gonzalez eddienko

## parsel.sql
-- DROP FUNCTION IF EXISTS public.parsel(db text, table_to_chunk text, pkey text, query text, output_table text, table_to_chunk_alias text, num_chunks integer);
CREATE OR REPLACE FUNCTION public.parsel(db text, table_to_chunk text, pkey text, query text, output_table text, table_to_chunk_alias text default '', num_chunks integer default 2)
  RETURNS text AS
$BODY$
DECLARE
  sql     TEXT;
  min_id  integer;
  max_id  integer;
  step_size integer;
  lbnd integer;

## Sublime Text 3 cheating.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eddienko
                / Sublime Text 3 cheating.md
            
            
              Created
              June 13, 2017 17:36
                — forked from laptrinhcomvn/Sublime Text 3 cheating.md
            
              
                Sublime Text 3 patching 
              
          
    Ref:
https://gist.github.com/vertexclique/9839383
For 3126 build 2016.09.26


VERSION
PLATFORM
OFFSET
ORIGINAL
CRACKED


3126
macOS
0x6890
F8
E0


3126
Win x86
0xBA11D
1A
2A


## Spark_Jupyter_OS_X.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eddienko
                / Spark_Jupyter_OS_X.md
            
            
              Created
              January 27, 2018 18:15
                — forked from frank-leap/Spark_Jupyter_OS_X.md
            
              
                Steps to configure Jupyter (iPython Notebook) with Python (3.5.1) and Spark (1.6.0) kernel on Mac OS X (El Capitan)
              
          
    Install Python3, Scala and Apache Spark via Brew (http://brew.sh/)
brew update
brew install python3
brew install scala
brew install apache-spark
Set environment variables


## pcap.py
import pandas as pd

def parse(line):
    words = line.split()
    time = words[0]
    protocol = words[1]

    if protocol == 'IP':
        src_ip, src_port = words[2].rsplit('.', 1)
        dst_ip, dst_port = words[4].strip(':').rsplit('.', 1)

## confluent-kafka-tornado.ipynb

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eddienko
                / confluent-kafka-tornado.ipynb
            
            
              Created
              March 12, 2018 15:05
                — forked from mrocklin/confluent-kafka-tornado.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Image pipeline (delayed).ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eddienko
                / Image pipeline (delayed).ipynb
            
            
              Created
              April 1, 2018 06:22
            
              
                Dask/Image pipeline (delayed).ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Makefile
# Makefile for converting the CSV files from http://cdn.gea.esac.esa.int/Gaia/gdr2/gaia_source/csv/
# to a single (vaex) hdf5 file
#  * https://docs.vaex.io
#  * https://github.com/maartenbreddels/vaex/
# It is multistage to work around opening 60 000 files at once.
# Strategy is
#  * stage1: convert all cvs.gz to csv to hdf5
#   * do this via xargs and calling make again, since gmake has trouble matching 60 000 rules
#  * stage2: Create part-<NUMBER>.txt files containing max FILES_PER_PART per file
#  * stage3: convert the list of hdf5 files to single hdf5 files (part-<NUMBER>.hdf5)

## image-manipulation-executed.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                eddienko
                / image-manipulation-executed.ipynb
            
            
              Created
              January 21, 2019 20:48
                — forked from mrocklin/image-manipulation-executed.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## update_dask_k8s.py
#!/usr/bin/env python3
"""
Update Dask configuration based on the configuration
of the running Pod.

To be run at startup.
"""

import os

## personal.yaml
distributed:
  version: 2
  scheduler:
    bandwidth: 1000000000     # 100 MB/s estimated worker-worker bandwidth
  worker:
    memory:
      target: 0.90  # target fraction to stay below
      spill: False  # fraction at which we spill to disk
      pause: 0.80  # fraction at which we pause worker threads
      terminate: 0.95  # fraction at which we terminate the worker
	-- DROP FUNCTION IF EXISTS public.parsel(db text, table_to_chunk text, pkey text, query text, output_table text, table_to_chunk_alias text, num_chunks integer);
	CREATE OR REPLACE FUNCTION public.parsel(db text, table_to_chunk text, pkey text, query text, output_table text, table_to_chunk_alias text default '', num_chunks integer default 2)
	RETURNS text AS
	$BODY$
	DECLARE
	sql TEXT;
	min_id integer;
	max_id integer;
	step_size integer;
	lbnd integer;
	import pandas as pd

	def parse(line):
	words = line.split()
	time = words[0]
	protocol = words[1]

	if protocol == 'IP':
	src_ip, src_port = words[2].rsplit('.', 1)
	dst_ip, dst_port = words[4].strip(':').rsplit('.', 1)
	# Makefile for converting the CSV files from http://cdn.gea.esac.esa.int/Gaia/gdr2/gaia_source/csv/
	# to a single (vaex) hdf5 file
	# * https://docs.vaex.io
	# * https://github.com/maartenbreddels/vaex/
	# It is multistage to work around opening 60 000 files at once.
	# Strategy is
	# * stage1: convert all cvs.gz to csv to hdf5
	# * do this via xargs and calling make again, since gmake has trouble matching 60 000 rules
	# * stage2: Create part-<NUMBER>.txt files containing max FILES_PER_PART per file
	# * stage3: convert the list of hdf5 files to single hdf5 files (part-<NUMBER>.hdf5)
	#!/usr/bin/env python3
	"""
	Update Dask configuration based on the configuration
	of the running Pod.

	To be run at startup.
	"""

	import os
	distributed:
	version: 2
	scheduler:
	bandwidth: 1000000000 # 100 MB/s estimated worker-worker bandwidth
	worker:
	memory:
	target: 0.90 # target fraction to stay below
	spill: False # fraction at which we spill to disk
	pause: 0.80 # fraction at which we pause worker threads
	terminate: 0.95 # fraction at which we terminate the worker