Skip to content

Instantly share code, notes, and snippets.

@epifanio
Created November 8, 2016 12:09
Show Gist options
  • Save epifanio/b675c596c3b717ff08bbfef4a36879b2 to your computer and use it in GitHub Desktop.
Save epifanio/b675c596c3b717ff08bbfef4a36879b2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import psutil"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"svmem(total=33418977280, available=32548933632, percent=2.6, used=476016640, free=21143883776, active=10692493312, inactive=494125056, buffers=2009366528, cached=9789710336, shared=163205120)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.virtual_memory()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"sswap(total=17171345408, used=10005352448, free=7165992960, percent=58.3, sin=6190977024, sout=47739355136)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.swap_memory()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 2.2G\r\n",
"drwxr-xr-x 3 epinux epinux 4.0K Nov 7 00:45 \u001b[0m\u001b[01;34m.\u001b[0m/\r\n",
"drwxr-xr-x 10 epinux epinux 4.0K Nov 7 23:07 \u001b[01;34m..\u001b[0m/\r\n",
"-rwxr-xr-x 1 epinux epinux 115M Sep 5 10:41 \u001b[01;32m170_001_0000_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 82M Sep 5 10:43 \u001b[01;32m170_001_1942_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 95M Sep 5 10:45 \u001b[01;32m170_001_2352_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 191M Sep 5 10:50 \u001b[01;32m170_002_2008_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 212M Sep 5 10:55 \u001b[01;32m170_002_2030_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 234M Sep 5 11:01 \u001b[01;32m170_003_2055_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 203M Sep 5 11:05 \u001b[01;32m170_003_2118_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 238M Sep 5 11:11 \u001b[01;32m170_004_2143_0001_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 197M Sep 5 11:16 \u001b[01;32m170_004_2210_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 38M Sep 5 11:16 \u001b[01;32m170_005_2233_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 197M Sep 5 11:21 \u001b[01;32m170_005_2236_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 197M Sep 5 11:26 \u001b[01;32m170_005_2257_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 183M Sep 5 11:30 \u001b[01;32m170_005_2322_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"-rwxr-xr-x 1 epinux epinux 24M Sep 5 11:31 \u001b[01;32m170_006_2320_ascii_ara_beam_detail.txt\u001b[0m*\r\n",
"drwxr-xr-x 2 epinux epinux 4.0K Sep 5 12:48 \u001b[01;34m.ipynb_checkpoints\u001b[0m/\r\n"
]
}
],
"source": [
"ls -lah ASCII/"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 2.8G\r\n",
"drwxr-xr-x 2 epinux epinux 4.0K Nov 7 01:07 \u001b[0m\u001b[01;34m.\u001b[0m/\r\n",
"drwxr-xr-x 10 epinux epinux 4.0K Nov 7 23:07 \u001b[01;34m..\u001b[0m/\r\n",
"-rw-r--r-- 1 epinux epinux 146M Nov 7 01:07 170_001_0000.ft\r\n",
"-rw-r--r-- 1 epinux epinux 105M Nov 7 01:06 170_001_1942.ft\r\n",
"-rw-r--r-- 1 epinux epinux 121M Nov 7 01:07 170_001_2352.ft\r\n",
"-rw-r--r-- 1 epinux epinux 244M Nov 7 01:07 170_002_2008.ft\r\n",
"-rw-r--r-- 1 epinux epinux 270M Nov 7 01:07 170_002_2030.ft\r\n",
"-rw-r--r-- 1 epinux epinux 298M Nov 7 01:07 170_003_2055.ft\r\n",
"-rw-r--r-- 1 epinux epinux 259M Nov 7 01:07 170_003_2118.ft\r\n",
"-rw-r--r-- 1 epinux epinux 314M Nov 7 01:07 170_004_2143_0001.ft\r\n",
"-rw-r--r-- 1 epinux epinux 251M Nov 7 01:07 170_004_2210.ft\r\n",
"-rw-r--r-- 1 epinux epinux 48M Nov 7 01:06 170_005_2233.ft\r\n",
"-rw-r--r-- 1 epinux epinux 251M Nov 7 01:07 170_005_2236.ft\r\n",
"-rw-r--r-- 1 epinux epinux 251M Nov 7 01:07 170_005_2257.ft\r\n",
"-rw-r--r-- 1 epinux epinux 233M Nov 7 01:07 170_005_2322.ft\r\n",
"-rw-r--r-- 1 epinux epinux 30M Nov 7 01:06 170_006_2320.ft\r\n"
]
}
],
"source": [
"ls -lah feather2/"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from multiprocessing import Process, Queue\n",
"\n",
"import multiprocessing\n",
"from glob import glob\n",
"import pandas as pd\n",
"import numpy as np\n",
"import feather"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def worker1(filename):\n",
" key=filename.split('/')[-1].split('.')[0][:-22]\n",
" names=['Ping Time',\n",
" 'Ping Number',\n",
" 'Beam Number',\n",
" 'Easting',\n",
" 'Northing', \n",
" 'Depth',\n",
" 'Longitude', \n",
" 'Latitude',\n",
" 'Backscatter Value',\n",
" 'Corrected Backscatter Value', \n",
" 'True Angle']\n",
" df = pd.read_csv(filename, \n",
" skiprows=16, \n",
" names=names, \n",
" delim_whitespace=True)\n",
" df['Geom']=makegeom(df=df,x='Easting',y='Northing')\n",
" df = df.assign(datetime=pd.to_datetime(df['Ping Time'],unit='s') - pd.Timedelta(16,unit='s'), line=key)\n",
" return df\n",
"\n",
"def makegeom(df, x, y):\n",
" geom = np.core.defchararray.add(\n",
" np.core.defchararray.add(\n",
" np.core.defchararray.add(\n",
" 'Point(', \n",
" df[x].values.astype(str)), \n",
" ' '), \n",
" np.core.defchararray.add(\n",
" df[y].values.astype(str), \n",
" ')')\n",
" )\n",
" return geom\n",
"\n",
"def worker2(filename):\n",
" df = feather.read_dataframe(filename)\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test: \n",
"## **Processing ASCII files**"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def slave1(queue, filename):\n",
" print(filename)\n",
" #val = worker1(filename)\n",
" queue.put(worker1(filename))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ASCII/170_004_2210_ascii_ara_beam_detail.txt\n",
"ASCII/170_003_2118_ascii_ara_beam_detail.txt\n",
"ASCII/170_005_2322_ascii_ara_beam_detail.txt\n",
"ASCII/170_005_2236_ascii_ara_beam_detail.txt\n",
"ASCII/170_003_2055_ascii_ara_beam_detail.txt\n",
"ASCII/170_001_0000_ascii_ara_beam_detail.txt\n",
"ASCII/170_002_2008_ascii_ara_beam_detail.txt\n",
"ASCII/170_002_2030_ascii_ara_beam_detail.txt\n",
"ASCII/170_001_1942_ascii_ara_beam_detail.txt\n",
"ASCII/170_005_2257_ascii_ara_beam_detail.txt\n",
"ASCII/170_001_2352_ascii_ara_beam_detail.txt\n",
"ASCII/170_006_2320_ascii_ara_beam_detail.txt\n",
"ASCII/170_004_2143_0001_ascii_ara_beam_detail.txt\n",
"ASCII/170_005_2233_ascii_ara_beam_detail.txt\n",
"running ... 13 file left\n",
"running ... 12 file left\n",
"running ... 11 file left\n",
"running ... 10 file left\n",
"running ... 9 file left\n",
"running ... 8 file left\n",
"running ... 7 file left\n",
"running ... 6 file left\n",
"running ... 5 file left\n",
"running ... 4 file left\n",
"running ... 3 file left\n",
"running ... 2 file left\n",
"running ... 1 file left\n",
"running ... 0 file left\n",
"CPU times: user 5.78 s, sys: 3.36 s, total: 9.14 s\n",
"Wall time: 1min\n"
]
}
],
"source": [
"%%time\n",
"lista = []\n",
"queue = Queue()\n",
"\n",
"procs = [Process(target=slave1, args=(queue, i)) for i in glob('%s/*' % 'ASCII')]\n",
"for proc in procs:\n",
" proc.start()\n",
"finished = 0\n",
"\n",
"while finished < len(glob('%s/*' % 'ASCII')):\n",
" #item = queue.get()\n",
" lista.append(queue.get())\n",
" finished = finished+1\n",
" left = len(glob('%s/*' % 'ASCII')) - finished\n",
" print('running ... %s file left' % left)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"queue.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Merge results in a single dataframe"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(20581319, 14)\n"
]
}
],
"source": [
"#%%time\n",
"acoustic = pd.concat(lista)\n",
"print(acoustic.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Write results to disk as feather binary file"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/epinux/.local/lib/python3.5/site-packages/ipykernel/__main__.py:2: DeprecationWarning: pandas.core.common.is_categorical_dtype is deprecated. import from the public API: pandas.api.types.is_categorical_dtype instead\n",
" from ipykernel import kernelapp as app\n",
"/home/epinux/.local/lib/python3.5/site-packages/ipykernel/__main__.py:2: DeprecationWarning: pandas.core.common.is_datetime64_any_dtype is deprecated. import from the public API: pandas.api.types.is_datetime64_any_dtype instead\n",
" from ipykernel import kernelapp as app\n"
]
}
],
"source": [
"#%%time\n",
"feather.write_dataframe(acoustic, 'acoustic.ft')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"del lista, acoustic"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test:\n",
"## Read dataframe from feather files"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def slave2(queue, filename):\n",
" print(filename)\n",
" #val = worker2(filename)\n",
" queue.put(worker2(filename))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"feather2/170_005_2236.ft\n",
"feather2/170_005_2322.ft\n",
"feather2/170_001_0000.ft\n",
"feather2/170_002_2030.ft\n",
"feather2/170_005_2257.ft\n",
"feather2/170_003_2055.ft\n",
"feather2/170_004_2210.ft\n",
"feather2/170_003_2118.ft\n",
"feather2/170_006_2320.ft\n",
"feather2/170_005_2233.ft\n",
"feather2/170_001_1942.ft\n",
"feather2/170_004_2143_0001.ft\n",
"feather2/170_001_2352.ft\n",
"feather2/170_002_2008.ft\n",
"running ... 13 file left\n",
"running ... 12 file left\n",
"running ... 11 file left\n",
"running ... 10 file left\n",
"running ... 9 file left\n",
"running ... 8 file left\n",
"running ... 7 file left\n",
"running ... 6 file left\n",
"running ... 5 file left\n",
"running ... 4 file left\n",
"running ... 3 file left\n",
"running ... 2 file left\n",
"running ... 1 file left\n",
"running ... 0 file left\n",
"CPU times: user 5.68 s, sys: 5.01 s, total: 10.7 s\n",
"Wall time: 13.7 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"lista = []\n",
"queue = Queue()\n",
"\n",
"procs = [Process(target=slave2, args=(queue, i)) for i in glob('%s/*' % 'feather2')]\n",
"for proc in procs:\n",
" proc.start()\n",
"finished = 0\n",
"\n",
"while finished < len(glob('%s/*' % 'feather2')):\n",
" #item = queue.get()\n",
" lista.append(queue.get())\n",
" finished = finished+1\n",
" left = len(glob('%s/*' % 'feather2')) - finished\n",
" print('running ... %s file left' % left)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"queue.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Merge results in a single dataframe"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(20581319, 14)\n",
"CPU times: user 1.59 s, sys: 788 ms, total: 2.38 s\n",
"Wall time: 2.37 s\n"
]
}
],
"source": [
"%%time\n",
"acoustic = pd.concat(lista)\n",
"print(acoustic.shape)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"!rm -rf acoustic.ft"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Write results to disk as feather binary file"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/epinux/.local/lib/python3.5/site-packages/ipykernel/__main__.py:1: DeprecationWarning: pandas.core.common.is_categorical_dtype is deprecated. import from the public API: pandas.api.types.is_categorical_dtype instead\n",
" if __name__ == '__main__':\n",
"/home/epinux/.local/lib/python3.5/site-packages/ipykernel/__main__.py:1: DeprecationWarning: pandas.core.common.is_datetime64_any_dtype is deprecated. import from the public API: pandas.api.types.is_datetime64_any_dtype instead\n",
" if __name__ == '__main__':\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.59 s, sys: 1.79 s, total: 3.38 s\n",
"Wall time: 3.53 s\n"
]
}
],
"source": [
"%%time\n",
"feather.write_dataframe(acoustic, 'acoustic.ft')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"del lista, acoustic"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test\n",
"## **Read feather binary file**"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 4.72 s, sys: 1.57 s, total: 6.29 s\n",
"Wall time: 6.29 s\n"
]
}
],
"source": [
"%%time\n",
"df = feather.read_dataframe('acoustic.ft')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"svmem(total=33418977280, available=20894773248, percent=37.5, used=12130164736, free=9488871424, active=22290239488, inactive=494153728, buffers=2010030080, cached=9789911040, shared=163221504)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.virtual_memory()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"sswap(total=17171345408, used=10005352448, free=7165992960, percent=58.3, sin=6190977024, sout=47739355136)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.swap_memory()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"svmem(total=33418977280, available=20923850752, percent=37.4, used=12101091328, free=9517772800, active=22288998400, inactive=494153728, buffers=2010185728, cached=9789927424, shared=163217408)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.virtual_memory()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"sswap(total=17171345408, used=10005352448, free=7165992960, percent=58.3, sin=6190977024, sout=47739355136)"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.swap_memory()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"del df"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"svmem(total=33418977280, available=26274242560, percent=21.4, used=6750691328, free=14868123648, active=16949641216, inactive=494153728, buffers=2010210304, cached=9789952000, shared=163217408)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.virtual_memory()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"sswap(total=17171345408, used=10005352448, free=7165992960, percent=58.3, sin=6190977024, sout=47739355136)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"psutil.swap_memory()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Variable Type Data/Info\n",
"---------------------------------------\n",
"Process type <class 'multiprocessing.context.Process'>\n",
"Queue method <bound method BaseContext<...>bject at 0x7fd3d1e38828>>\n",
"feather module <module 'feather' from '/<...>ges/feather/__init__.py'>\n",
"finished int 14\n",
"glob function <function glob at 0x7fd3d746a488>\n",
"left int 0\n",
"makegeom function <function makegeom at 0x7fd39abea268>\n",
"multiprocessing module <module 'multiprocessing'<...>iprocessing/__init__.py'>\n",
"np module <module 'numpy' from '/us<...>kages/numpy/__init__.py'>\n",
"pd module <module 'pandas' from '/u<...>.egg/pandas/__init__.py'>\n",
"proc Process <Process(Process-28, stopped)>\n",
"procs list n=14\n",
"psutil module <module 'psutil' from '/u<...>ages/psutil/__init__.py'>\n",
"queue Queue <multiprocessing.queues.Q<...>object at 0x7fd39ae76d30>\n",
"slave1 function <function slave1 at 0x7fd3c8b8ae18>\n",
"slave2 function <function slave2 at 0x7fd39ac02d08>\n",
"worker1 function <function worker1 at 0x7fd39abea1e0>\n",
"worker2 function <function worker2 at 0x7fd39abea2f0>\n"
]
}
],
"source": [
"whos"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://gist.github.com/19072613ce3a831468a4bc4da44a9fb2\r\n"
]
}
],
"source": [
"!gist -p Test\\ with\\ Multiprocessing.ipynb"
]
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/381044acc84c0388b3e677dda03ff8a8"
},
"gist": {
"data": {
"description": "BS/Test with Multiprocessing.ipynb",
"public": false
},
"id": "381044acc84c0388b3e677dda03ff8a8"
},
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2+"
},
"latex_envs": {
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 0
},
"toc": {
"toc_cell": false,
"toc_number_sections": true,
"toc_threshold": 6,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment