Guangyang Li gyli

## vpn.sh
function vpn {
  if [ $1 = "c" ]; then
    /opt/cisco/anyconnect/bin/vpn connect "vpn.example.com";
  elif [ $1 = "d" ]; then
    /opt/cisco/anyconnect/bin/vpn disconnect;
    kill $(ps aux | grep "[C]isco AnyConnect Secure Mobility Client" | awk '{print $2}') 2>/dev/null;
  elif [ $1 = "k" ]; then
    kill $(ps aux | grep "[C]isco AnyConnect Secure Mobility Client" | awk '{print $2}') 2>/dev/null;
    kill -9 $(ps aux | grep "[/]opt/cisco/anyconnect/bin/vpn connect" | awk '{print $2}') 2>/dev/null;
  fi

## nested_bracketed_values.py
template = """head{var1}middle{var2{nested}}end"""

def parenthetic_contents(string):
    """Generate parenthesized contents in string as pairs (level, contents)."""
    stack = []
    for i, c in enumerate(string):
        if c == '{':
            stack.append(i)
        elif c == '}' and stack:
            start = stack.pop()

## fix-pipx-packages.sh
# Once brew upgraded Python, packages installed through pipx might need to be reinstalled
rm -rf ~/.local/pipx/shared/
pipx reinstall-all

## split_string_but_ignore_separator_wrapped_in_brackets.py
import re

example = """Ann,Bob,Cat(Tom,Max),Dave"""

re.split(r',(?![^\(\)]*\))', example)

# Output:
# ['Ann', 'Bob', 'Cat(Tom,Max)', 'Dave']

## load_parameters_with_default_values_in_an_elegant_way_in_python.py
import csv


class CustomCSVReader:
    _CSV_DEFAULT_PARS = {
        "delimiter": ",",
        "doublequote": True,
        "escapechar": None,
        "lineterminator": "\r\n",
        "quotechar": '"',

## CopyObjectRecursively.scala
  def CopyObjectRecursively(
      s3client: AmazonS3Client,
      sourcePath: String,
      targetPath: String,
      includeSourceBucketInTargetPath: Boolean = false): Unit = {

    val sourceURI: AmazonS3URI = new AmazonS3URI(sourcePath)
    val targetURI: AmazonS3URI = new AmazonS3URI(targetPath)

    val sourceBucket = sourceURI.getBucket()

## find_number_of_routing_shards.py
# https://www.elastic.co/guide/en/elasticsearch/reference/7.0/indices-split-index.html
# Parameter number_of_routing_shards is used for splitting index in Elasticsearch
# Since ES 7.0, it has default value, which is designed to split by factors of 2 up to a maximum of 1024 shards.
# However, depending on the original number of primary shards, the default value might not be the best choice,
# since it might not provide the most possibles the shards could be split to.

# For example, if the current primary shard number is 5, es would give number_of_routing_shards 650 as default value,
# and it allows the index to be split to 10, 20, 40, 80, 160, 320 or 640.
# However, assuming the maximum shard number is still 1024, set number_of_routing_shards to 900 would give the splitting
# more options: 5, 10, 15, 20, 25, 30, 45, 50, 60, 75, 90, 100, 150, 180, 225, 300, 450, 900

## pandas_multiprocessing_uneven.py
import multiprocessing
import time


class WorkerPool:
    def __init__(self, worker_number):
        self.worker_number = worker_number
        self.pool = [multiprocessing.Process()] * worker_number

    def run(self, target, args=None, sleep_time=1):

## pandas_multiprocessing.py
import pandas as pd
import multiprocessing as mp
import traceback


def processing_chunk(chunk):
    for row in chunk.iterrows():
        pass
    time.sleep(5)

## join_nullable.py
def join_nullable(l: List[str], sep: str) -> str:
    """
    Filter out None in a list and join the remaining strings.
    Example:
        Input: l=['a', None, 'c'], sep=' '
        Output: 'a c'
    :param l: list of strings
    :param sep: separator
    """
    return sep.join(filter(lambda i: bool(i), l))
	function vpn {
	if [ $1 = "c" ]; then
	/opt/cisco/anyconnect/bin/vpn connect "vpn.example.com";
	elif [ $1 = "d" ]; then
	/opt/cisco/anyconnect/bin/vpn disconnect;
	kill $(ps aux \| grep "[C]isco AnyConnect Secure Mobility Client" \| awk '{print $2}') 2>/dev/null;
	elif [ $1 = "k" ]; then
	kill $(ps aux \| grep "[C]isco AnyConnect Secure Mobility Client" \| awk '{print $2}') 2>/dev/null;
	kill -9 $(ps aux \| grep "[/]opt/cisco/anyconnect/bin/vpn connect" \| awk '{print $2}') 2>/dev/null;
	fi
	template = """head{var1}middle{var2{nested}}end"""

	def parenthetic_contents(string):
	"""Generate parenthesized contents in string as pairs (level, contents)."""
	stack = []
	for i, c in enumerate(string):
	if c == '{':
	stack.append(i)
	elif c == '}' and stack:
	start = stack.pop()
	# Once brew upgraded Python, packages installed through pipx might need to be reinstalled
	rm -rf ~/.local/pipx/shared/
	pipx reinstall-all
	import re

	example = """Ann,Bob,Cat(Tom,Max),Dave"""

	re.split(r',(?![^\(\)]*\))', example)

	# Output:
	# ['Ann', 'Bob', 'Cat(Tom,Max)', 'Dave']
	import csv


	class CustomCSVReader:
	_CSV_DEFAULT_PARS = {
	"delimiter": ",",
	"doublequote": True,
	"escapechar": None,
	"lineterminator": "\r\n",
	"quotechar": '"',
	def CopyObjectRecursively(
	s3client: AmazonS3Client,
	sourcePath: String,
	targetPath: String,
	includeSourceBucketInTargetPath: Boolean = false): Unit = {

	val sourceURI: AmazonS3URI = new AmazonS3URI(sourcePath)
	val targetURI: AmazonS3URI = new AmazonS3URI(targetPath)

	val sourceBucket = sourceURI.getBucket()
	# https://www.elastic.co/guide/en/elasticsearch/reference/7.0/indices-split-index.html
	# Parameter number_of_routing_shards is used for splitting index in Elasticsearch
	# Since ES 7.0, it has default value, which is designed to split by factors of 2 up to a maximum of 1024 shards.
	# However, depending on the original number of primary shards, the default value might not be the best choice,
	# since it might not provide the most possibles the shards could be split to.

	# For example, if the current primary shard number is 5, es would give number_of_routing_shards 650 as default value,
	# and it allows the index to be split to 10, 20, 40, 80, 160, 320 or 640.
	# However, assuming the maximum shard number is still 1024, set number_of_routing_shards to 900 would give the splitting
	# more options: 5, 10, 15, 20, 25, 30, 45, 50, 60, 75, 90, 100, 150, 180, 225, 300, 450, 900
	import multiprocessing
	import time


	class WorkerPool:
	def __init__(self, worker_number):
	self.worker_number = worker_number
	self.pool = [multiprocessing.Process()] * worker_number

	def run(self, target, args=None, sleep_time=1):
	import pandas as pd
	import multiprocessing as mp
	import traceback


	def processing_chunk(chunk):
	for row in chunk.iterrows():
	pass
	time.sleep(5)
	def join_nullable(l: List[str], sep: str) -> str:
	"""
	Filter out None in a list and join the remaining strings.
	Example:
	Input: l=['a', None, 'c'], sep=' '
	Output: 'a c'
	:param l: list of strings
	:param sep: separator
	"""
	return sep.join(filter(lambda i: bool(i), l))