cupdike

## InstallCaCerts.sh
##### Scenario:
# You are getting 'x509: certificate signed by unknown authority' trying to
# run a simple finch container on your mac.

# $ finch run --rm public.ecr.aws/finch/hello-finch
# public.ecr.aws/finch/hello-finch:latest: resolving      |--------------------------------------|
# elapsed: 0.1 s                           total:   0.0 B (0.0 B/s)
# INFO[0000] trying next host                              error="failed to do request: Head \"https://public.ecr.aws/v2/finch/hello-finch/manifests/latest\": x509: certificate signed by unknown authority" host=public.ecr.aws
# FATA[0000] failed to resolve reference "public.ecr.aws/finch/hello-finch:latest": failed to do request: Head "https://public.ecr.aws/v2/finch/hello-finch/manifests/latest": x509: certificate signed by unknown authority
# FATA[0000] exit status 1

## gist.customattributes.py
import multiprocessing as mp
from multiprocessing.pool import Pool

# GOAL IN CONTEXT:
# Simulate using a multiprocessing pool to download a list of files synchronously
# from a set of servers where each worker in the pool targets a specific
# download server.
# Our Worker subclasses Process so the target server can be added as an attribute.
# A CustPool subclasses Pool so our Worker subclass is used instead of Process.

## zshrc_et_al
function venvtemp {
    # Inspired by:  https://gist.github.com/csinchok/9714005#file-bash_profile
    THROWAWAY_DIR=$(mktemp -d -t venv);
    cd $THROWAWAY_DIR;
    python3 -m venv venv;
    source venv/bin/activate;

    printf "\n\nMaybe run:\n\tpython -m pip install --upgrade pip wheel setuptools\n\n"

    printf "Consider:\n\texport PIP_DEFAULT_TIMEOUT=100 && python -m pip install <PACKAGES> \\\t

## DevKubeflowEnv.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                cupdike
                / DevKubeflowEnv.md
            
            
              Created
              February 24, 2022 18:29
            
          
    Set up a Dev Kubeflow Environment Using Kubeflow Manifests, Kustomize and Rancher Desktop

Rancher Desktop provides a solid Kubernetes cluster platform for developer workstations.  Here, we'll use it to install Kubeflow via the Manifests project with Kustomize.  For the best experience, use a release version and make sure you follow the README guidance specific to that version (include prerequisite versions of Kustomize and Kubeflow).
Steps


Install Rancher Desktop
Go to the Kubeflow project

https://github.com/kubeflow/manifests


Use the selector to find the latest stable release (or whatever release you prefer) and select it (will reload the README for that specific version)

https://github.com/kubeflow/manifests/tree/v1.4.1


Go down to the prerequisites section and observe the supported kubeflow and kustomize versions


## JupyterShellExperiments.txt
# TL;DR
# There is some tricky behavior lurking underneath Jupyter's shell variable substition syntax.
# For the best experience, stick with single quotes with {} variable placeholders, e.g:
python_variable='blah'
!echo somebashcommand '{python_variable} some extra gobblygook'

### Lessons Learned
# 1) Failed substitions (including unintentional variables) cause silent failures preventing any substitution.
# 1.1) If your substitution isn't working:
# 1.1.1) Prefix with echo, and strip your statement down until you find the part that isn't working

## gist:2d3ce5b3aa31a77f6b27d400d7c531b9
# Demonstrates string.partition() to split a string by a sequence of delimiters.
# Not terribly useful, can do with regex pretty easily.

s = "apple AND banana AND cherry AND date OR elderberry BUT fig"
delims = [" AND "]*3 + [" OR ", " BUT "]

# [' AND ', ' AND ', ' AND ', ' OR ', ' BUT ']

def splitByDelimList(str, delimList):
    delims = delimList.copy()

## pyarrowKerberizedHdfsDebugger.py
import pyarrow
import os
import sh

# Get obscure error without this:  pyarrow.lib.ArrowIOError: HDFS list directory failed, errno: 2 (No such file or directory)
os.environ['CLASSPATH'] = str(sh.hadoop('classpath','--glob'))

# Not needed
#os.environ['HADOOP_HOME'] = '/opt/cloudera/parcels/CDH-<your version>/'

## CombiningPythonGenerators.txt
>>> def genX():
...   for i in range(3):
...     yield i
...
>>> for i in genX(): print(i)
...
0
1
2
>>> def genY():

## shErrorCode255Tip.txt
If you are trying to run a script like this
    import sh

    myScriptCommand = sh.Command("/path/to/script")
    myScriptCommand("my arg")

and you see this error:

    sh.ErrorReturnCode_255

## gist:c5554233e1dd6b233a9b6ec6adb05c5a
from datetime import datetime, timedelta

def round_minutes(dt, resolutionInMinutes):
    """round_minutes(datetime, resolutionInMinutes) => datetime rounded to lower interval
    Works for minute resolution up to a day (e.g. cannot round to nearest week).
    """

    # First zero out seconds and micros
    dtTrunc = dt.replace(second=0, microsecond=0)
	##### Scenario:
	# You are getting 'x509: certificate signed by unknown authority' trying to
	# run a simple finch container on your mac.

	# $ finch run --rm public.ecr.aws/finch/hello-finch
	# public.ecr.aws/finch/hello-finch:latest: resolving \|--------------------------------------\|
	# elapsed: 0.1 s total: 0.0 B (0.0 B/s)
	# INFO[0000] trying next host error="failed to do request: Head \"https://public.ecr.aws/v2/finch/hello-finch/manifests/latest\": x509: certificate signed by unknown authority" host=public.ecr.aws
	# FATA[0000] failed to resolve reference "public.ecr.aws/finch/hello-finch:latest": failed to do request: Head "https://public.ecr.aws/v2/finch/hello-finch/manifests/latest": x509: certificate signed by unknown authority
	# FATA[0000] exit status 1
	import multiprocessing as mp
	from multiprocessing.pool import Pool

	# GOAL IN CONTEXT:
	# Simulate using a multiprocessing pool to download a list of files synchronously
	# from a set of servers where each worker in the pool targets a specific
	# download server.
	# Our Worker subclasses Process so the target server can be added as an attribute.
	# A CustPool subclasses Pool so our Worker subclass is used instead of Process.
	function venvtemp {
	# Inspired by: https://gist.github.com/csinchok/9714005#file-bash_profile
	THROWAWAY_DIR=$(mktemp -d -t venv);
	cd $THROWAWAY_DIR;
	python3 -m venv venv;
	source venv/bin/activate;

	printf "\n\nMaybe run:\n\tpython -m pip install --upgrade pip wheel setuptools\n\n"

	printf "Consider:\n\texport PIP_DEFAULT_TIMEOUT=100 && python -m pip install <PACKAGES> \\\t
	# TL;DR
	# There is some tricky behavior lurking underneath Jupyter's shell variable substition syntax.
	# For the best experience, stick with single quotes with {} variable placeholders, e.g:
	python_variable='blah'
	!echo somebashcommand '{python_variable} some extra gobblygook'

	### Lessons Learned
	# 1) Failed substitions (including unintentional variables) cause silent failures preventing any substitution.
	# 1.1) If your substitution isn't working:
	# 1.1.1) Prefix with echo, and strip your statement down until you find the part that isn't working
	# Demonstrates string.partition() to split a string by a sequence of delimiters.
	# Not terribly useful, can do with regex pretty easily.

	s = "apple AND banana AND cherry AND date OR elderberry BUT fig"
	delims = [" AND "]*3 + [" OR ", " BUT "]

	# [' AND ', ' AND ', ' AND ', ' OR ', ' BUT ']

	def splitByDelimList(str, delimList):
	delims = delimList.copy()
	import pyarrow
	import os
	import sh

	# Get obscure error without this: pyarrow.lib.ArrowIOError: HDFS list directory failed, errno: 2 (No such file or directory)
	os.environ['CLASSPATH'] = str(sh.hadoop('classpath','--glob'))

	# Not needed
	#os.environ['HADOOP_HOME'] = '/opt/cloudera/parcels/CDH-<your version>/'
	>>> def genX():
	... for i in range(3):
	... yield i
	...
	>>> for i in genX(): print(i)
	...
	0
	1
	2
	>>> def genY():
	If you are trying to run a script like this
	import sh

	myScriptCommand = sh.Command("/path/to/script")
	myScriptCommand("my arg")

	and you see this error:

	sh.ErrorReturnCode_255
	from datetime import datetime, timedelta

	def round_minutes(dt, resolutionInMinutes):
	"""round_minutes(datetime, resolutionInMinutes) => datetime rounded to lower interval
	Works for minute resolution up to a day (e.g. cannot round to nearest week).
	"""

	# First zero out seconds and micros
	dtTrunc = dt.replace(second=0, microsecond=0)