Skip to content

Instantly share code, notes, and snippets.

@nocarryr
Last active August 17, 2018 15:48
Show Gist options
  • Save nocarryr/4e55c117daf1ba57ceff1dcd2e5cdaab to your computer and use it in GitHub Desktop.
Save nocarryr/4e55c117daf1ba57ceff1dcd2e5cdaab to your computer and use it in GitHub Desktop.
import os
import argparse
import cProfile
import pstats
from itertools import zip_longest
import numpy as np
def build_data(num_bytes):
a = np.arange(256, dtype=np.uint8)
b = np.arange(255, -1, -1, dtype=np.uint8)
ab = np.zeros(a.size+b.size, dtype=np.uint8)
ab[::2] = a
ab[1::2] = b
num_reps = (num_bytes // ab.size) + 1
ab = np.tile(ab, num_reps)
return ab[:num_bytes]
def data_to_buffer(data):
data = np.asarray(data, dtype=np.uint8)
return np.ctypeslib.as_ctypes(data)
# TEST FUNCTIONS
def original(raw_data):
"""Current implementation
"""
iq = np.empty(len(raw_data)//2, 'complex')
iq.real, iq.imag = raw_data[::2], raw_data[1::2]
iq /= (255/2)
iq -= (1 + 1j)
return iq
def original_hardcode(raw_data):
"""Current implementation with hardcoded scalars
"""
iq = np.empty(len(raw_data)//2, 'complex')
iq.real, iq.imag = raw_data[::2], raw_data[1::2]
iq /= 127.5
iq -= (1 + 1j)
return iq
def frombuffer_complex_proc(raw_data):
"""Using np.frombuffer, but other methods match original
"""
data = np.frombuffer(raw_data, dtype=np.uint8)
iq = np.empty(data.size // 2, dtype=np.complex128)
iq.real, iq.imag = data[::2], data[1::2]
iq /= (255/2)
iq -= (1 + 1j)
return iq
def frombuffer_complex_proc_hardcode(raw_data):
"""Using np.frombuffer and hardcoded scalars
"""
data = np.frombuffer(raw_data, dtype=np.uint8)
iq = np.empty(data.size // 2, dtype=np.complex128)
iq.real, iq.imag = data[::2], data[1::2]
iq /= 127.5
iq -= (1 + 1j)
return iq
def frombuffer_int_proc(raw_data):
"""Using np.frombuffer and scaling the int array before converting to complex
"""
data = np.frombuffer(raw_data, dtype=np.uint8)
data = (data - 127.5) / 127.5
iq = np.empty(data.size // 2, dtype=np.complex128)
iq.real, iq.imag = data[::2], data[1::2]
return iq
def frombuffer_view_cast(raw_data):
"""Using np.frombuffer and view casting instead of slicing
"""
data = np.frombuffer(raw_data, dtype=np.uint8)
iq = data.astype(np.float64).view(np.complex128)
iq /= 127.5
iq -= (1 + 1j)
return iq
def frombuffer_view_cast_int_proc(raw_data):
"""Using np.frombuffer, view casting and int processing
"""
data = np.frombuffer(raw_data, dtype=np.uint8)
data = (data - 127.5) / 127.5
return data.astype(np.float64).view(np.complex128)
def ctypeslib_complex_proc(raw_data):
"""Using np.ctypeslib.as_array instead of np.frombuffer
"""
data = np.ctypeslib.as_array(raw_data)
iq = np.empty(data.size // 2, dtype=np.complex128)
iq.real, iq.imag = data[::2], data[1::2]
iq /= 127.5
iq -= (1 + 1j)
return iq
def ctypeslib_int_proc(raw_data):
"""Using np.ctypeslib.as_array and int processing
"""
data = np.ctypeslib.as_array(raw_data)
data = (data - 127.5) / 127.5
iq = np.empty(data.size // 2, dtype=np.complex128)
iq.real, iq.imag = data[::2], data[1::2]
return iq
def ctypeslib_view_cast(raw_data):
"""Using np.ctypeslib.as_array and view casting
"""
# https://github.com/roger-/pyrtlsdr/issues/15#issuecomment-71748977
data = np.ctypeslib.as_array(raw_data)
iq = data.astype(np.float64).view(np.complex128)
iq /= (255/2)
iq -= (1 + 1j)
return iq
def ctypeslib_view_cast_int_proc(raw_data):
"""Using np.ctypeslib.as_array, view casting and int processing
"""
data = np.ctypeslib.as_array(raw_data)
data = (data - 127.5) / 127.5
return data.view(np.complex128)
def profile_run(func, data, num_runs=1, show_pstats=True, sdr=None, num_samples=4096):
pr = cProfile.Profile()
if data is None:
data_size = num_samples
else:
data_size = data.size // 2
results = np.zeros((num_runs, data_size), dtype=np.complex128)
for i in range(num_runs):
if data is None:
raw_data = sdr.read_bytes(num_samples * 2)
else:
raw_data = data_to_buffer(data)
pr.enable()
r = func(raw_data)
pr.disable()
if num_runs == 1:
results[:] = r
else:
results[i] = r
sort_by = 'cumulative'
ps = pstats.Stats(pr)
ps.strip_dirs()
ps.sort_stats(sort_by)
if show_pstats:
print(func.__name__)
print('-'*80)
ps.print_stats()
return ps, results
TEST_FUNCTIONS = [
original,
original_hardcode,
frombuffer_complex_proc,
frombuffer_complex_proc_hardcode,
frombuffer_int_proc,
frombuffer_view_cast,
frombuffer_view_cast_int_proc,
ctypeslib_complex_proc,
ctypeslib_int_proc,
ctypeslib_view_cast,
ctypeslib_view_cast_int_proc,
]
def test_all(num_bytes=8192, num_runs=256, show_pstats=True):
stats = {}
results = {}
data = build_data(num_bytes)
for func in TEST_FUNCTIONS:
ps, r = profile_run(func, data, num_runs, show_pstats)
stats[func.__name__] = ps
results[func.__name__] = r
def check_results(name):
iq = results[name].flatten()
for _name, iq_stacked in results.items():
if _name == name:
continue
assert np.allclose(iq, iq_stacked.flatten())
for name in results.keys():
check_results(name)
return stats, results
def test_all_with_sdr(sdr, num_samples=4096, num_runs=64, show_pstats=True):
stats = {}
results = {}
for func in TEST_FUNCTIONS:
ps, r = profile_run(func, None, num_runs, show_pstats, sdr, num_samples)
stats[func.__name__] = ps
results[func.__name__] = r
return stats, results
class Stats(object):
def __init__(self):
self.stat_objs = {}
def __setitem__(self, key, item):
if not isinstance(item, Stat):
item = Stat(key, item)
self.stat_objs[key] = item
def __getitem__(self, key):
return self.stat_objs[key]
def __iter__(self):
for stat_obj in sorted(self.stat_objs.values()):
yield stat_obj
def max_width(self):
w = 0
for stat_obj in self:
s = str(stat_obj)
if len(s) > w:
w = len(s)
return w
class Stat(object):
def __init__(self, name, pstat):
self.name = name
self.pstat = pstat
self.total_time = pstat.total_tt
def _cmp(self, other):
if self.total_time > other.total_time:
return 1
elif self.total_time < other.total_time:
return -1
if self.name > other.name:
return 1
elif self.name < other.name:
return -1
return 0
def __lt__(self, other):
return self._cmp(other) == -1
def __gt__(self, other):
return self._cmp(other) == 1
def __eq__(self, other):
return self._cmp(other) == 0
def __str__(self):
return '{self.total_time:f} seconds: {self.name}'.format(self=self)
def print_summary(stats):
int_stats = Stats()
norm_stats = Stats()
for key, val in stats.items():
if 'int' in key:
int_stats[key] = val
else:
norm_stats[key] = val
col1_width = norm_stats.max_width()
col2_width = int_stats.max_width()
col_widths = [col1_width, col2_width]
def print_line(col1, col2):
cols = [col1, col2]
fill_strs = ['', '']
for i, col, col_width in zip(range(len(cols)), cols, col_widths):
fill_str = ''
if len(col) < col_width:
fill_str = ' ' * (col_width - len(col))
cols[i] = ''.join([col, fill_str])
print('| {0} | {1} |'.format(*cols))
print('Summary:')
print('')
print_line('Original calc methods', 'Int calc methods')
print_line('-'*col1_width, '-'*col2_width)
for norm_stat, int_stat in zip_longest(norm_stats, int_stats, fillvalue=''):
print_line(str(norm_stat), str(int_stat))
if __name__ == '__main__':
p = argparse.ArgumentParser()
p.add_argument('-d', dest='data_dir', help='Path for dumping profile results')
p.add_argument('--use-sdr', dest='use_sdr', action='store_true', help='Use RtlSdr device')
p.add_argument('--show-pstats', dest='show_pstats', action='store_true', help='Show pstats result for each test')
p.add_argument('--num-bytes', dest='num_bytes', type=int, default=8192)
p.add_argument('--num-runs', dest='num_runs', type=int, default=256)
p.add_argument('--num-samples', dest='num_samples', type=int, default=4096, help='Number of samples (in sdr mode)')
args = p.parse_args()
if args.use_sdr:
from rtlsdr import RtlSdr
sdr = RtlSdr()
print('starting sdr test: num_samples={}, num_runs={}'.format(args.num_samples, args.num_runs))
stats, results = test_all_with_sdr(sdr, args.num_samples, args.num_runs, args.show_pstats)
sdr.close()
else:
print('starting test: num_bytes={}, num_runs={}'.format(args.num_bytes, args.num_runs))
stats, results = test_all(args.num_bytes, args.num_runs, args.show_pstats)
print_summary(stats)
if args.data_dir:
if not os.path.exists(args.data_dir):
os.makedirs(args.data_dir)
for testname, ps in stats.items():
fn = os.path.join(args.data_dir, '{}.pstats'.format(testname))
print('dumping stats: {}'.format(fn))
ps.dump_stats(fn)
@nocarryr
Copy link
Author

starting test: num_bytes=8192, num_runs=256
original
--------------------------------------------------------------------------------
         1024 function calls in 0.322 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      256    0.320    0.001    0.322    0.001 test_numpy_efficiency.py:25(original)
      256    0.002    0.000    0.002    0.000 {built-in method numpy.core.multiarray.empty}
      256    0.000    0.000    0.000    0.000 {built-in method builtins.len}
      256    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


original_hardcode
--------------------------------------------------------------------------------
         1024 function calls in 0.326 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      256    0.323    0.001    0.326    0.001 test_numpy_efficiency.py:34(original_hardcode)
      256    0.002    0.000    0.002    0.000 {built-in method numpy.core.multiarray.empty}
      256    0.000    0.000    0.000    0.000 {built-in method builtins.len}
      256    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


frombuffer_complex_proc
--------------------------------------------------------------------------------
         1024 function calls in 0.026 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      256    0.025    0.000    0.026    0.000 test_numpy_efficiency.py:43(frombuffer_complex_proc)
      256    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.empty}
      256    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.frombuffer}
      256    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


frombuffer_complex_proc_hardcode
--------------------------------------------------------------------------------
         1024 function calls in 0.026 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      256    0.025    0.000    0.026    0.000 test_numpy_efficiency.py:53(frombuffer_complex_proc_hardcode)
      256    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.empty}
      256    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.frombuffer}
      256    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


frombuffer_int_proc
--------------------------------------------------------------------------------
         1024 function calls in 0.024 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      256    0.022    0.000    0.024    0.000 test_numpy_efficiency.py:63(frombuffer_int_proc)
      256    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.empty}
      256    0.001    0.000    0.001    0.000 {built-in method numpy.core.multiarray.frombuffer}
      256    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


@nocarryr
Copy link
Author

nocarryr commented Aug 16, 2018

starting test: num_bytes=16384, num_runs=1024
Summary:

| Original calc methods                              | Int calc methods                                |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.178670 seconds: frombuffer_view_cast             | 0.134537 seconds: ctypeslib_view_cast_int_proc  |
| 0.181929 seconds: frombuffer_complex_proc          | 0.144247 seconds: frombuffer_view_cast_int_proc |
| 0.185374 seconds: frombuffer_complex_proc_hardcode | 0.162626 seconds: frombuffer_int_proc           |
| 0.189538 seconds: ctypeslib_view_cast              | 0.168155 seconds: ctypeslib_int_proc            |
| 0.193424 seconds: ctypeslib_complex_proc           |                                                 |
| 2.480207 seconds: original                         |                                                 |
| 2.485661 seconds: original_hardcode                |                                                 |

@nocarryr
Copy link
Author

starting sdr test: num_samples=4096, num_runs=256
Summary:

| Original calc methods                              | Int calc methods                                |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.075824 seconds: frombuffer_view_cast             | 0.063961 seconds: ctypeslib_view_cast_int_proc  |
| 0.077738 seconds: ctypeslib_view_cast              | 0.066412 seconds: frombuffer_view_cast_int_proc |
| 0.078438 seconds: frombuffer_complex_proc          | 0.076373 seconds: frombuffer_int_proc           |
| 0.082369 seconds: ctypeslib_complex_proc           | 0.084129 seconds: ctypeslib_int_proc            |
| 0.082578 seconds: frombuffer_complex_proc_hardcode |                                                 |
| 0.515942 seconds: original                         |                                                 |
| 0.526086 seconds: original_hardcode                |                                                 |

@nocarryr
Copy link
Author

starting sdr test: num_samples=16384, num_runs=256
Summary:

| Original calc methods                              | Int calc methods                                |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.186796 seconds: ctypeslib_view_cast              | 0.181220 seconds: ctypeslib_view_cast_int_proc  |
| 0.201872 seconds: frombuffer_view_cast             | 0.188237 seconds: frombuffer_view_cast_int_proc |
| 0.204594 seconds: ctypeslib_complex_proc           | 0.206626 seconds: frombuffer_int_proc           |
| 0.210123 seconds: frombuffer_complex_proc_hardcode | 0.212620 seconds: ctypeslib_int_proc            |
| 0.215597 seconds: frombuffer_complex_proc          |                                                 |
| 2.063196 seconds: original                         |                                                 |
| 2.108378 seconds: original_hardcode                |                                                 |

@nocarryr
Copy link
Author

starting sdr test: num_samples=32768, num_runs=128
Summary:

| Original calc methods                              | Int calc methods                                |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.182868 seconds: ctypeslib_view_cast              | 0.147324 seconds: ctypeslib_view_cast_int_proc  |
| 0.186696 seconds: frombuffer_complex_proc          | 0.164336 seconds: frombuffer_view_cast_int_proc |
| 0.187644 seconds: frombuffer_complex_proc_hardcode | 0.177535 seconds: ctypeslib_int_proc            |
| 0.190678 seconds: ctypeslib_complex_proc           | 0.180414 seconds: frombuffer_int_proc           |
| 0.191777 seconds: frombuffer_view_cast             |                                                 |
| 1.904986 seconds: original_hardcode                |                                                 |
| 1.906985 seconds: original                         |                                                 |

@nocarryr
Copy link
Author

nocarryr commented Aug 17, 2018

From the last three result sets it seems that:

  • View casting is faster than slicing
  • The performance hit from scaling complex128 arrays versus float64 arrays has less of an impact as the number of samples increases
  • Larger arrays are handled more efficiently with np.ctypeslib.as_array() as opposed to np.frombuffer()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment