Last active
August 17, 2018 15:48
-
-
Save nocarryr/4e55c117daf1ba57ceff1dcd2e5cdaab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import argparse | |
import cProfile | |
import pstats | |
from itertools import zip_longest | |
import numpy as np | |
def build_data(num_bytes): | |
a = np.arange(256, dtype=np.uint8) | |
b = np.arange(255, -1, -1, dtype=np.uint8) | |
ab = np.zeros(a.size+b.size, dtype=np.uint8) | |
ab[::2] = a | |
ab[1::2] = b | |
num_reps = (num_bytes // ab.size) + 1 | |
ab = np.tile(ab, num_reps) | |
return ab[:num_bytes] | |
def data_to_buffer(data): | |
data = np.asarray(data, dtype=np.uint8) | |
return np.ctypeslib.as_ctypes(data) | |
# TEST FUNCTIONS | |
def original(raw_data): | |
"""Current implementation | |
""" | |
iq = np.empty(len(raw_data)//2, 'complex') | |
iq.real, iq.imag = raw_data[::2], raw_data[1::2] | |
iq /= (255/2) | |
iq -= (1 + 1j) | |
return iq | |
def original_hardcode(raw_data): | |
"""Current implementation with hardcoded scalars | |
""" | |
iq = np.empty(len(raw_data)//2, 'complex') | |
iq.real, iq.imag = raw_data[::2], raw_data[1::2] | |
iq /= 127.5 | |
iq -= (1 + 1j) | |
return iq | |
def frombuffer_complex_proc(raw_data): | |
"""Using np.frombuffer, but other methods match original | |
""" | |
data = np.frombuffer(raw_data, dtype=np.uint8) | |
iq = np.empty(data.size // 2, dtype=np.complex128) | |
iq.real, iq.imag = data[::2], data[1::2] | |
iq /= (255/2) | |
iq -= (1 + 1j) | |
return iq | |
def frombuffer_complex_proc_hardcode(raw_data): | |
"""Using np.frombuffer and hardcoded scalars | |
""" | |
data = np.frombuffer(raw_data, dtype=np.uint8) | |
iq = np.empty(data.size // 2, dtype=np.complex128) | |
iq.real, iq.imag = data[::2], data[1::2] | |
iq /= 127.5 | |
iq -= (1 + 1j) | |
return iq | |
def frombuffer_int_proc(raw_data): | |
"""Using np.frombuffer and scaling the int array before converting to complex | |
""" | |
data = np.frombuffer(raw_data, dtype=np.uint8) | |
data = (data - 127.5) / 127.5 | |
iq = np.empty(data.size // 2, dtype=np.complex128) | |
iq.real, iq.imag = data[::2], data[1::2] | |
return iq | |
def frombuffer_view_cast(raw_data): | |
"""Using np.frombuffer and view casting instead of slicing | |
""" | |
data = np.frombuffer(raw_data, dtype=np.uint8) | |
iq = data.astype(np.float64).view(np.complex128) | |
iq /= 127.5 | |
iq -= (1 + 1j) | |
return iq | |
def frombuffer_view_cast_int_proc(raw_data): | |
"""Using np.frombuffer, view casting and int processing | |
""" | |
data = np.frombuffer(raw_data, dtype=np.uint8) | |
data = (data - 127.5) / 127.5 | |
return data.astype(np.float64).view(np.complex128) | |
def ctypeslib_complex_proc(raw_data): | |
"""Using np.ctypeslib.as_array instead of np.frombuffer | |
""" | |
data = np.ctypeslib.as_array(raw_data) | |
iq = np.empty(data.size // 2, dtype=np.complex128) | |
iq.real, iq.imag = data[::2], data[1::2] | |
iq /= 127.5 | |
iq -= (1 + 1j) | |
return iq | |
def ctypeslib_int_proc(raw_data): | |
"""Using np.ctypeslib.as_array and int processing | |
""" | |
data = np.ctypeslib.as_array(raw_data) | |
data = (data - 127.5) / 127.5 | |
iq = np.empty(data.size // 2, dtype=np.complex128) | |
iq.real, iq.imag = data[::2], data[1::2] | |
return iq | |
def ctypeslib_view_cast(raw_data): | |
"""Using np.ctypeslib.as_array and view casting | |
""" | |
# https://github.com/roger-/pyrtlsdr/issues/15#issuecomment-71748977 | |
data = np.ctypeslib.as_array(raw_data) | |
iq = data.astype(np.float64).view(np.complex128) | |
iq /= (255/2) | |
iq -= (1 + 1j) | |
return iq | |
def ctypeslib_view_cast_int_proc(raw_data): | |
"""Using np.ctypeslib.as_array, view casting and int processing | |
""" | |
data = np.ctypeslib.as_array(raw_data) | |
data = (data - 127.5) / 127.5 | |
return data.view(np.complex128) | |
def profile_run(func, data, num_runs=1, show_pstats=True, sdr=None, num_samples=4096): | |
pr = cProfile.Profile() | |
if data is None: | |
data_size = num_samples | |
else: | |
data_size = data.size // 2 | |
results = np.zeros((num_runs, data_size), dtype=np.complex128) | |
for i in range(num_runs): | |
if data is None: | |
raw_data = sdr.read_bytes(num_samples * 2) | |
else: | |
raw_data = data_to_buffer(data) | |
pr.enable() | |
r = func(raw_data) | |
pr.disable() | |
if num_runs == 1: | |
results[:] = r | |
else: | |
results[i] = r | |
sort_by = 'cumulative' | |
ps = pstats.Stats(pr) | |
ps.strip_dirs() | |
ps.sort_stats(sort_by) | |
if show_pstats: | |
print(func.__name__) | |
print('-'*80) | |
ps.print_stats() | |
return ps, results | |
TEST_FUNCTIONS = [ | |
original, | |
original_hardcode, | |
frombuffer_complex_proc, | |
frombuffer_complex_proc_hardcode, | |
frombuffer_int_proc, | |
frombuffer_view_cast, | |
frombuffer_view_cast_int_proc, | |
ctypeslib_complex_proc, | |
ctypeslib_int_proc, | |
ctypeslib_view_cast, | |
ctypeslib_view_cast_int_proc, | |
] | |
def test_all(num_bytes=8192, num_runs=256, show_pstats=True): | |
stats = {} | |
results = {} | |
data = build_data(num_bytes) | |
for func in TEST_FUNCTIONS: | |
ps, r = profile_run(func, data, num_runs, show_pstats) | |
stats[func.__name__] = ps | |
results[func.__name__] = r | |
def check_results(name): | |
iq = results[name].flatten() | |
for _name, iq_stacked in results.items(): | |
if _name == name: | |
continue | |
assert np.allclose(iq, iq_stacked.flatten()) | |
for name in results.keys(): | |
check_results(name) | |
return stats, results | |
def test_all_with_sdr(sdr, num_samples=4096, num_runs=64, show_pstats=True): | |
stats = {} | |
results = {} | |
for func in TEST_FUNCTIONS: | |
ps, r = profile_run(func, None, num_runs, show_pstats, sdr, num_samples) | |
stats[func.__name__] = ps | |
results[func.__name__] = r | |
return stats, results | |
class Stats(object): | |
def __init__(self): | |
self.stat_objs = {} | |
def __setitem__(self, key, item): | |
if not isinstance(item, Stat): | |
item = Stat(key, item) | |
self.stat_objs[key] = item | |
def __getitem__(self, key): | |
return self.stat_objs[key] | |
def __iter__(self): | |
for stat_obj in sorted(self.stat_objs.values()): | |
yield stat_obj | |
def max_width(self): | |
w = 0 | |
for stat_obj in self: | |
s = str(stat_obj) | |
if len(s) > w: | |
w = len(s) | |
return w | |
class Stat(object): | |
def __init__(self, name, pstat): | |
self.name = name | |
self.pstat = pstat | |
self.total_time = pstat.total_tt | |
def _cmp(self, other): | |
if self.total_time > other.total_time: | |
return 1 | |
elif self.total_time < other.total_time: | |
return -1 | |
if self.name > other.name: | |
return 1 | |
elif self.name < other.name: | |
return -1 | |
return 0 | |
def __lt__(self, other): | |
return self._cmp(other) == -1 | |
def __gt__(self, other): | |
return self._cmp(other) == 1 | |
def __eq__(self, other): | |
return self._cmp(other) == 0 | |
def __str__(self): | |
return '{self.total_time:f} seconds: {self.name}'.format(self=self) | |
def print_summary(stats): | |
int_stats = Stats() | |
norm_stats = Stats() | |
for key, val in stats.items(): | |
if 'int' in key: | |
int_stats[key] = val | |
else: | |
norm_stats[key] = val | |
col1_width = norm_stats.max_width() | |
col2_width = int_stats.max_width() | |
col_widths = [col1_width, col2_width] | |
def print_line(col1, col2): | |
cols = [col1, col2] | |
fill_strs = ['', ''] | |
for i, col, col_width in zip(range(len(cols)), cols, col_widths): | |
fill_str = '' | |
if len(col) < col_width: | |
fill_str = ' ' * (col_width - len(col)) | |
cols[i] = ''.join([col, fill_str]) | |
print('| {0} | {1} |'.format(*cols)) | |
print('Summary:') | |
print('') | |
print_line('Original calc methods', 'Int calc methods') | |
print_line('-'*col1_width, '-'*col2_width) | |
for norm_stat, int_stat in zip_longest(norm_stats, int_stats, fillvalue=''): | |
print_line(str(norm_stat), str(int_stat)) | |
if __name__ == '__main__': | |
p = argparse.ArgumentParser() | |
p.add_argument('-d', dest='data_dir', help='Path for dumping profile results') | |
p.add_argument('--use-sdr', dest='use_sdr', action='store_true', help='Use RtlSdr device') | |
p.add_argument('--show-pstats', dest='show_pstats', action='store_true', help='Show pstats result for each test') | |
p.add_argument('--num-bytes', dest='num_bytes', type=int, default=8192) | |
p.add_argument('--num-runs', dest='num_runs', type=int, default=256) | |
p.add_argument('--num-samples', dest='num_samples', type=int, default=4096, help='Number of samples (in sdr mode)') | |
args = p.parse_args() | |
if args.use_sdr: | |
from rtlsdr import RtlSdr | |
sdr = RtlSdr() | |
print('starting sdr test: num_samples={}, num_runs={}'.format(args.num_samples, args.num_runs)) | |
stats, results = test_all_with_sdr(sdr, args.num_samples, args.num_runs, args.show_pstats) | |
sdr.close() | |
else: | |
print('starting test: num_bytes={}, num_runs={}'.format(args.num_bytes, args.num_runs)) | |
stats, results = test_all(args.num_bytes, args.num_runs, args.show_pstats) | |
print_summary(stats) | |
if args.data_dir: | |
if not os.path.exists(args.data_dir): | |
os.makedirs(args.data_dir) | |
for testname, ps in stats.items(): | |
fn = os.path.join(args.data_dir, '{}.pstats'.format(testname)) | |
print('dumping stats: {}'.format(fn)) | |
ps.dump_stats(fn) |
Author
nocarryr
commented
Aug 16, 2018
•
starting sdr test: num_samples=4096, num_runs=256
Summary:
| Original calc methods | Int calc methods |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.075824 seconds: frombuffer_view_cast | 0.063961 seconds: ctypeslib_view_cast_int_proc |
| 0.077738 seconds: ctypeslib_view_cast | 0.066412 seconds: frombuffer_view_cast_int_proc |
| 0.078438 seconds: frombuffer_complex_proc | 0.076373 seconds: frombuffer_int_proc |
| 0.082369 seconds: ctypeslib_complex_proc | 0.084129 seconds: ctypeslib_int_proc |
| 0.082578 seconds: frombuffer_complex_proc_hardcode | |
| 0.515942 seconds: original | |
| 0.526086 seconds: original_hardcode | |
starting sdr test: num_samples=16384, num_runs=256
Summary:
| Original calc methods | Int calc methods |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.186796 seconds: ctypeslib_view_cast | 0.181220 seconds: ctypeslib_view_cast_int_proc |
| 0.201872 seconds: frombuffer_view_cast | 0.188237 seconds: frombuffer_view_cast_int_proc |
| 0.204594 seconds: ctypeslib_complex_proc | 0.206626 seconds: frombuffer_int_proc |
| 0.210123 seconds: frombuffer_complex_proc_hardcode | 0.212620 seconds: ctypeslib_int_proc |
| 0.215597 seconds: frombuffer_complex_proc | |
| 2.063196 seconds: original | |
| 2.108378 seconds: original_hardcode | |
starting sdr test: num_samples=32768, num_runs=128
Summary:
| Original calc methods | Int calc methods |
| -------------------------------------------------- | ----------------------------------------------- |
| 0.182868 seconds: ctypeslib_view_cast | 0.147324 seconds: ctypeslib_view_cast_int_proc |
| 0.186696 seconds: frombuffer_complex_proc | 0.164336 seconds: frombuffer_view_cast_int_proc |
| 0.187644 seconds: frombuffer_complex_proc_hardcode | 0.177535 seconds: ctypeslib_int_proc |
| 0.190678 seconds: ctypeslib_complex_proc | 0.180414 seconds: frombuffer_int_proc |
| 0.191777 seconds: frombuffer_view_cast | |
| 1.904986 seconds: original_hardcode | |
| 1.906985 seconds: original | |
From the last three result sets it seems that:
- View casting is faster than slicing
- The performance hit from scaling
complex128
arrays versusfloat64
arrays has less of an impact as the number of samples increases - Larger arrays are handled more efficiently with
np.ctypeslib.as_array()
as opposed tonp.frombuffer()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment