raybellwaves/dask_jobqueue_lsf_memory_issue_case_pr138_test

## dask_jobqueue_lsf_memory_issue_case_pr138_test
# Build notes:
# conda create --name djq_mem_issue2 python=3.6
# conda activate djq_mem_issue2
# conda install -c conda-forge psutil notebook ipywidgets bokeh pandas
# mkdir test
# cd test
# git clone https://github.com/dask/dask-jobqueue.git
# cd dask-jobqueue/
# git remote add upstream https://github.com/dask/dask-jobqueue.git
# git fetch -fu upstream refs/pull/138/head:pr/138 && git checkout pr/138
# pip install -e .
# cd ../..

# tunnel the notebook to laptop
# Find the IP address by doing
#>>> from dask_jobqueue import LSFCluster
#>>> from dask.distributed import Client
#>>> cluster = LSFCluster(cores=2, memory='4 GB')
/nethome/rxb826/local/bin/miniconda3/envs/djq_mem_issue2/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
#>>> client = Client(cluster)
#>>> print(client)
#<Client: scheduler='tcp://xx.xxx.xxx.xxx:xxxxx' processes=0 cores=0>
# ssh -N -L 8888:xx.xxx.xxx.xxx:8888 USERNAME@pegasus.ccs.miami.edu

# This is run in a notebook:
from dask_jobqueue import LSFCluster
from dask.distributed import Client
import dask.dataframe as dd
import dask.array as da

#cluster = LSFCluster(cores=8, memory='25 GB', queue='general', walltime='00:10')
cluster = LSFCluster(cores=8, memory='250 GB', queue='bigmem', walltime='00:10')
cluster
# Click on 'Manual Scaling' choose 50 workers.
# on HPC type 'bjobs' to see how many are running
# 7 jobs running

client = Client(cluster)
client

df = dd.demo.make_timeseries(start='2000-01-01',
                             end='2010-12-31',
                             dtypes={'x': float, 'y': float, 'id': int},
                             freq='10ms',
                             partition_freq='24h')
df

df.head()

df = df.persist()
# Some errors occuring:
Job 17752343 has got a restarting worker tcp://10.10.103.1:34877
Job 17752337 has got a restarting worker tcp://10.10.103.9:58938
Job 17752342 has got a restarting worker tcp://10.10.103.1:50572
Job 17752343 has got a restarting worker tcp://10.10.103.1:51945
Job 17752337 has got a restarting worker tcp://10.10.103.9:36510
Job 17752342 has got a restarting worker tcp://10.10.103.1:35777
Job 17752340 has got a restarting worker tcp://10.10.104.9:40764
Job 17752343 has got a restarting worker tcp://10.10.103.1:38282
Job 17752337 has got a restarting worker tcp://10.10.103.9:49435
Job 17752342 has got a restarting worker tcp://10.10.103.1:47680
Job 17752343 has got a restarting worker tcp://10.10.103.1:50480
Job 17752337 has got a restarting worker tcp://10.10.103.9:57997
Job 17752339 has got a restarting worker tcp://10.10.104.8:37845
Job 17752338 has got a restarting worker tcp://10.10.104.2:33663
Job 17752342 has got a restarting worker tcp://10.10.103.1:34988
Job 17752341 has got a restarting worker tcp://10.10.104.5:33372
Job 17752337 has got a restarting worker tcp://10.10.103.9:50979
Job 17752343 has got a restarting worker tcp://10.10.103.1:49825
Job 17752342 has got a restarting worker tcp://10.10.103.1:40381
Job 17752337 has got a restarting worker tcp://10.10.103.9:55247
Job 17752343 has got a restarting worker tcp://10.10.103.1:53597
Job 17752345 has got a restarting worker tcp://10.10.103.9:60780
Job 17752349 has got a restarting worker tcp://10.10.103.1:48362
Job 17752350 has got a restarting worker tcp://10.10.103.1:52849
Job 17752345 has got a restarting worker tcp://10.10.103.9:54050
Job 17752349 has got a restarting worker tcp://10.10.103.1:45576
	# Build notes:
	# conda create --name djq_mem_issue2 python=3.6
	# conda activate djq_mem_issue2
	# conda install -c conda-forge psutil notebook ipywidgets bokeh pandas
	# mkdir test
	# cd test
	# git clone https://github.com/dask/dask-jobqueue.git
	# cd dask-jobqueue/
	# git remote add upstream https://github.com/dask/dask-jobqueue.git
	# git fetch -fu upstream refs/pull/138/head:pr/138 && git checkout pr/138
	# pip install -e .
	# cd ../..

	# tunnel the notebook to laptop
	# Find the IP address by doing
	#>>> from dask_jobqueue import LSFCluster
	#>>> from dask.distributed import Client
	#>>> cluster = LSFCluster(cores=2, memory='4 GB')
	/nethome/rxb826/local/bin/miniconda3/envs/djq_mem_issue2/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
	return f(args, *kwds)
	#>>> client = Client(cluster)
	#>>> print(client)
	#<Client: scheduler='tcp://xx.xxx.xxx.xxx:xxxxx' processes=0 cores=0>
	# ssh -N -L 8888:xx.xxx.xxx.xxx:8888 USERNAME@pegasus.ccs.miami.edu

	# This is run in a notebook:
	from dask_jobqueue import LSFCluster
	from dask.distributed import Client
	import dask.dataframe as dd
	import dask.array as da

	#cluster = LSFCluster(cores=8, memory='25 GB', queue='general', walltime='00:10')
	cluster = LSFCluster(cores=8, memory='250 GB', queue='bigmem', walltime='00:10')
	cluster
	# Click on 'Manual Scaling' choose 50 workers.
	# on HPC type 'bjobs' to see how many are running
	# 7 jobs running

	client = Client(cluster)
	client

	df = dd.demo.make_timeseries(start='2000-01-01',
	end='2010-12-31',
	dtypes={'x': float, 'y': float, 'id': int},
	freq='10ms',
	partition_freq='24h')
	df

	df.head()

	df = df.persist()
	# Some errors occuring:
	Job 17752343 has got a restarting worker tcp://10.10.103.1:34877
	Job 17752337 has got a restarting worker tcp://10.10.103.9:58938
	Job 17752342 has got a restarting worker tcp://10.10.103.1:50572
	Job 17752343 has got a restarting worker tcp://10.10.103.1:51945
	Job 17752337 has got a restarting worker tcp://10.10.103.9:36510
	Job 17752342 has got a restarting worker tcp://10.10.103.1:35777
	Job 17752340 has got a restarting worker tcp://10.10.104.9:40764
	Job 17752343 has got a restarting worker tcp://10.10.103.1:38282
	Job 17752337 has got a restarting worker tcp://10.10.103.9:49435
	Job 17752342 has got a restarting worker tcp://10.10.103.1:47680
	Job 17752343 has got a restarting worker tcp://10.10.103.1:50480
	Job 17752337 has got a restarting worker tcp://10.10.103.9:57997
	Job 17752339 has got a restarting worker tcp://10.10.104.8:37845
	Job 17752338 has got a restarting worker tcp://10.10.104.2:33663
	Job 17752342 has got a restarting worker tcp://10.10.103.1:34988
	Job 17752341 has got a restarting worker tcp://10.10.104.5:33372
	Job 17752337 has got a restarting worker tcp://10.10.103.9:50979
	Job 17752343 has got a restarting worker tcp://10.10.103.1:49825
	Job 17752342 has got a restarting worker tcp://10.10.103.1:40381
	Job 17752337 has got a restarting worker tcp://10.10.103.9:55247
	Job 17752343 has got a restarting worker tcp://10.10.103.1:53597
	Job 17752345 has got a restarting worker tcp://10.10.103.9:60780
	Job 17752349 has got a restarting worker tcp://10.10.103.1:48362
	Job 17752350 has got a restarting worker tcp://10.10.103.1:52849
	Job 17752345 has got a restarting worker tcp://10.10.103.9:54050
	Job 17752349 has got a restarting worker tcp://10.10.103.1:45576