Zhuyi Xue zyxue

## lock_demo.py
import multiprocessing
import multiprocessing.synchronize
import random
import time


def print_func(process_index: int) -> None:
    for x in "hello world":
        print(x, end='',  flush=True)

## FlinkToy.java
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0

## utils.py
import os
import time
import logging
from functools import update_wrapper


logging.basicConfig(
    level=logging.DEBUG, format='%(asctime)s|%(levelname)s|%(message)s')


## init_new_venv.sh
conda fgcreate -n venv ipython

source activate venv

pip install \
    pandas \
    scipy \
    jupyter \
    jupyterlab \
    matplotlib \

## gpustat.sh
# https://github.com/wookayin/gpustat
watch --color -n1.0 gpustat -u -p -P

## SRNN-vs-LSTM
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, SimpleRNN


N = 10000
num_repeats = 30
num_epochs = 5
# sequence length options

## spark-custom-aggregator
import scala.collection.mutable.Map

import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.Encoder
import org.apache.spark.sql.Encoders

import spark.implicits._
import org.apache.spark.sql.types._


## grid-cv
# related SF question: https://stackoverflow.com/questions/46351157/why-gridsearchcv-in-scikit-learn-spawn-so-many-threads

import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV


Cs = 10 ** np.arange(-2, 2, 0.1)

## eda_imports.py
import os
import io
import gzip
import time
import sys
import glob
import json
import re
import csv
import datetime

## execute.py
def execute(cmd, flag_file=None, msg_id='', debug=False):
    """
    # http://stackoverflow.com/questions/1606795/catching-stdout-in-realtime-from-subprocess
    :param cmd: should never inlcude pipe or redirection, which would requires
    a new shell process
    This execute logs all stdout and stderr, which could look funny, especially
    when it comes to tools like aspc and wget
    """
    logger.info('executing: {0}'.format(cmd))
    # todo: should check whether cmdsp includes pipe or redirection here
	import multiprocessing
	import multiprocessing.synchronize
	import random
	import time


	def print_func(process_index: int) -> None:
	for x in "hello world":
	print(x, end='', flush=True)
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	import os
	import time
	import logging
	from functools import update_wrapper


	logging.basicConfig(
	level=logging.DEBUG, format='%(asctime)s\|%(levelname)s\|%(message)s')
	conda fgcreate -n venv ipython

	source activate venv

	pip install \
	pandas \
	scipy \
	jupyter \
	jupyterlab \
	matplotlib \
	# https://github.com/wookayin/gpustat
	watch --color -n1.0 gpustat -u -p -P
	import matplotlib.pyplot as plt
	import numpy as np
	from keras.models import Model
	from keras.layers import Input, LSTM, Dense, SimpleRNN


	N = 10000
	num_repeats = 30
	num_epochs = 5
	# sequence length options
	import scala.collection.mutable.Map

	import org.apache.spark.sql.expressions.Aggregator
	import org.apache.spark.sql.Encoder
	import org.apache.spark.sql.Encoders

	import spark.implicits._
	import org.apache.spark.sql.types._
	# related SF question: https://stackoverflow.com/questions/46351157/why-gridsearchcv-in-scikit-learn-spawn-so-many-threads

	import numpy as np

	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import StratifiedKFold
	from sklearn.model_selection import GridSearchCV


	Cs = 10 ** np.arange(-2, 2, 0.1)
	import os
	import io
	import gzip
	import time
	import sys
	import glob
	import json
	import re
	import csv
	import datetime
	def execute(cmd, flag_file=None, msg_id='', debug=False):
	"""
	# http://stackoverflow.com/questions/1606795/catching-stdout-in-realtime-from-subprocess
	:param cmd: should never inlcude pipe or redirection, which would requires
	a new shell process
	This execute logs all stdout and stderr, which could look funny, especially
	when it comes to tools like aspc and wget
	"""
	logger.info('executing: {0}'.format(cmd))
	# todo: should check whether cmdsp includes pipe or redirection here