Erik Gafni egafni

## run.py
"""
Single entry point for running a script locally inside docker, as a GKE job, or as an ai-platform job

    python myproject/run.py ai-platform \
        --master-accelerator type=nvidia-tesla-t4,count=1 \
        --master-machine-type n1-standard-8 \
        -n $JOBNAME \
        --polling-interval 10 --region us-central1 \
        --docker-uri gcr.io/xyz/myproject:$TAG -- \
        python3 myproject/train.py --train_config $CONFIG_PATH

## gist:3f912fb227d838035e0b3c835c998b43
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCeGnw3scJyJ/wUkOBexsUHcENJ5oIvPcAIYNYPebIIkE5ud3o6VDmmaOY6Ufnom1sPscvQiXprWmgu4tz/FzhOTh/6M8vmocwnggE6sf1gCosqoYxC9S+wwqgkbZP4U1JiDrL9og5oXiG2PcwhunK14BL/Jdw2mbO5aW/lZa0hEOCaLOGR9XVfV0LTgY5xAu7LA5b+wMSEiWWRojZOHmvAybcjGNT6mDNVm/7HRi8VZ/+6l4FjYjsmZYDbPiSzysgxEHTWEaN/XBmgmG6y+opPIap0oWlNyYjP6SivBsXYym8OMe5hypDwBXFb38xzdLwAygIq0RxUk4fnQXRaojef egafni@Eriks-MacBook-Pro.local

## gist:b3ffe9bb1c4986fd0a86afcbe6e10236
"""
barebones replacement module for pytorch_lightning which can't be installed with tensorflow1 because of a
tensorboard dependency
"""
import abc

import torch
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

## make_job_template.py
from base64 import encodebytes
import json
name = "pipe-template_v4"
user_data=b"""MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="

--==MYBOUNDARY==
Content-Type: text/x-shellscript; charset="us-ascii"

#cloud-boothook

## gist:d89888aa19a9d75d7104e9cdfb556d82
import numpy


def duplicated(arr, keep='first'):
    """
    Mimic pandas.Series.duplicated, but works with multi-dimensional numpy arrays
    """
    arr = numpy.asarray(arr)
    mask = numpy.ones(len(arr), dtype=bool)
    unique_ar, indices, inverse, counts = numpy.unique(arr, return_index=True, return_inverse=True, return_counts=True,

## netcdf4_multiindex_xarray.py
import xarray as xr
import pandas as pd


def encode_multiindices(dataset):
    """
    Provides a way to encode multiindices for saving to a netcdf file
    Adapted from https://github.com/pydata/xarray/issues/1077#issuecomment-436015893
    """
    for idx_name, index in list(dataset.indexes.items()):

## gist:a210df902500489c4264f2046dfbd8ab
Erik Gafni [3:27 PM]
add this to your jupyer lab hotkey overrides to run all your notebook cells above the current one (useful right after a restart...)
```{
   "notebook:run-all-above": {
      "command": "notebook:run-all-above",
      "keys": [
        "Shift D"
      ],
      "selector": ".jp-Notebook:focus",
      "title": "Run All Below",

## poisson_outliers.py
def poisson_outliers(arr, min_pval=.01, min_size_to_filter=5):
    """
    >>> poisson_outliers([1,1,1,1,1,2,3])
    array([False, False, False, False, False, False, False], dtype=bool)
    >>> poisson_outliers([1,1,1,1,1,2,10])
    array([False, False, False, False, False, False,  True], dtype=bool)
    >>> poisson_outliers([1,1,10])
    array([False, False, False], dtype=bool)
    """
    arr = np.asarray(arr)

## credible_coordinates
def credible_coordinates(arr, min_percentile=.99):
    """
    Calculates a type of credible "interval" by a simple greedy approach of the most likely coordinates of a joint
    density.

    >>> X = np.array([.1,.1,.5,.3])
    >>> Y = np.array([.2,.8])
    >>> Z = np.array([.5,.5])
    >>> R = np.array([[[x*y*z for z in Z] for y in Y] for x in X ])

## get_adjacent_cells.py
def get_adjacent_cells(arr, selected_idxs):
    """
    >>> arr = np.ones((3,))
    >>> get_adjacent_cells(arr, {(1,)})
    {(0,), (1,), (2,)}
    >>> arr = np.ones((3,2))
    >>> get_adjacent_cells(arr, {(1,1)})
    {(0, 1), (1, 0), (1, 1), (2, 1)}
    >>> arr = np.ones((3,2,3))
    >>> {(0, 1, 0), (1, 0, 0), (1, 1, 0), (1, 1, 1), (2, 1, 0)}
	"""
	Single entry point for running a script locally inside docker, as a GKE job, or as an ai-platform job

	python myproject/run.py ai-platform \
	--master-accelerator type=nvidia-tesla-t4,count=1 \
	--master-machine-type n1-standard-8 \
	-n $JOBNAME \
	--polling-interval 10 --region us-central1 \
	--docker-uri gcr.io/xyz/myproject:$TAG -- \
	python3 myproject/train.py --train_config $CONFIG_PATH
	"""
	barebones replacement module for pytorch_lightning which can't be installed with tensorflow1 because of a
	tensorboard dependency
	"""
	import abc

	import torch
	from torch.utils.tensorboard import SummaryWriter
	from tqdm import tqdm
	from base64 import encodebytes
	import json
	name = "pipe-template_v4"
	user_data=b"""MIME-Version: 1.0
	Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="

	--==MYBOUNDARY==
	Content-Type: text/x-shellscript; charset="us-ascii"

	#cloud-boothook
	import numpy


	def duplicated(arr, keep='first'):
	"""
	Mimic pandas.Series.duplicated, but works with multi-dimensional numpy arrays
	"""
	arr = numpy.asarray(arr)
	mask = numpy.ones(len(arr), dtype=bool)
	unique_ar, indices, inverse, counts = numpy.unique(arr, return_index=True, return_inverse=True, return_counts=True,
	import xarray as xr
	import pandas as pd


	def encode_multiindices(dataset):
	"""
	Provides a way to encode multiindices for saving to a netcdf file
	Adapted from https://github.com/pydata/xarray/issues/1077#issuecomment-436015893
	"""
	for idx_name, index in list(dataset.indexes.items()):
	Erik Gafni [3:27 PM]
	add this to your jupyer lab hotkey overrides to run all your notebook cells above the current one (useful right after a restart...)
	```{
	"notebook:run-all-above": {
	"command": "notebook:run-all-above",
	"keys": [
	"Shift D"
	],
	"selector": ".jp-Notebook:focus",
	"title": "Run All Below",
	def poisson_outliers(arr, min_pval=.01, min_size_to_filter=5):
	"""
	>>> poisson_outliers([1,1,1,1,1,2,3])
	array([False, False, False, False, False, False, False], dtype=bool)
	>>> poisson_outliers([1,1,1,1,1,2,10])
	array([False, False, False, False, False, False, True], dtype=bool)
	>>> poisson_outliers([1,1,10])
	array([False, False, False], dtype=bool)
	"""
	arr = np.asarray(arr)
	def credible_coordinates(arr, min_percentile=.99):
	"""
	Calculates a type of credible "interval" by a simple greedy approach of the most likely coordinates of a joint
	density.

	>>> X = np.array([.1,.1,.5,.3])
	>>> Y = np.array([.2,.8])
	>>> Z = np.array([.5,.5])
	>>> R = np.array([[[xyz for z in Z] for y in Y] for x in X ])
	def get_adjacent_cells(arr, selected_idxs):
	"""
	>>> arr = np.ones((3,))
	>>> get_adjacent_cells(arr, {(1,)})
	{(0,), (1,), (2,)}
	>>> arr = np.ones((3,2))
	>>> get_adjacent_cells(arr, {(1,1)})
	{(0, 1), (1, 0), (1, 1), (2, 1)}
	>>> arr = np.ones((3,2,3))
	>>> {(0, 1, 0), (1, 0, 0), (1, 1, 0), (1, 1, 1), (2, 1, 0)}