dayjaby/merge_datasets.py

## merge_datasets.py
import numpy as np

# tuples of timestamps and "data"
data1 = np.array([
    (3, 0xF0A),
    (7, 0xF0B),
    (12, 0xF0C),
    (18, 0xF0D)
])

data2 = np.array([
    (2, 0xEE0),
    (8, 0xEE1),
    (11.9, 0xEE2),
    (13, 0xEE3),
    (17, 0xEE4)
])

data3 = np.array([
    (8, 0xE1),
    (11.9, 0xE2),
    (13, 0xE3),
    (17, 0xE4)
])

def merge_datasets(*datasets):
    first = np.array(datasets[0])
    original_shape = shape = first.shape
    datasets = datasets[1:]
    for dataset in datasets:
        shape = (shape[0], shape[1] + dataset.shape[1] - 1)
    new = np.ndarray(shape)
    new[:,0:original_shape[1]] = first
    new[:,original_shape[1]:] = float("nan")
    l = original_shape[1]
    for dataset in datasets:
        for j in reversed(range(shape[0])):
            try:
                new[j,l:l+dataset.shape[1]-1] = dataset[np.max(np.where(dataset[:,0] <= new[j,0])), 1:]
            except ValueError:
                pass # no dataset with smaller timestamp found. keep nan values
        l += dataset.shape[1] - 1
    return new

print(merge_datasets(data1, data2, data3))

# Prints:
[[3.000e+00 3.850e+03 3.808e+03       nan]
 [7.000e+00 3.851e+03 3.808e+03       nan]
 [1.200e+01 3.852e+03 3.810e+03 2.260e+02]
 [1.800e+01 3.853e+03 3.812e+03 2.280e+02]]
	import numpy as np

	# tuples of timestamps and "data"
	data1 = np.array([
	(3, 0xF0A),
	(7, 0xF0B),
	(12, 0xF0C),
	(18, 0xF0D)
	])

	data2 = np.array([
	(2, 0xEE0),
	(8, 0xEE1),
	(11.9, 0xEE2),
	(13, 0xEE3),
	(17, 0xEE4)
	])

	data3 = np.array([
	(8, 0xE1),
	(11.9, 0xE2),
	(13, 0xE3),
	(17, 0xE4)
	])

	def merge_datasets(*datasets):
	first = np.array(datasets[0])
	original_shape = shape = first.shape
	datasets = datasets[1:]
	for dataset in datasets:
	shape = (shape[0], shape[1] + dataset.shape[1] - 1)
	new = np.ndarray(shape)
	new[:,0:original_shape[1]] = first
	new[:,original_shape[1]:] = float("nan")
	l = original_shape[1]
	for dataset in datasets:
	for j in reversed(range(shape[0])):
	try:
	new[j,l:l+dataset.shape[1]-1] = dataset[np.max(np.where(dataset[:,0] <= new[j,0])), 1:]
	except ValueError:
	pass # no dataset with smaller timestamp found. keep nan values
	l += dataset.shape[1] - 1
	return new

	print(merge_datasets(data1, data2, data3))

	# Prints:
	[[3.000e+00 3.850e+03 3.808e+03 nan]
	[7.000e+00 3.851e+03 3.808e+03 nan]
	[1.200e+01 3.852e+03 3.810e+03 2.260e+02]
	[1.800e+01 3.853e+03 3.812e+03 2.280e+02]]