/// Transmit bit_cnt number of bits from buf (LSB format) starting at the bit offset.
/// Transmit to target is always byte-aligned with trailing bits=0, so will not cause overrun error.
static void spi_exchange_transmit(uint8_t buf[], unsigned int offset, unsigned int bit_cnt)
{
	...
	//  Otherwise we must be transmitting the SWD Command Header, which is 8 bits and byte-aligned:
	//  ** host -> trgt offset 0 bits  8: 81
	//  Or JTAG-To-SWD, which is 136 bits and byte-aligned:
	//  ** host -> trgt offset 0 bits 136: ff ff ff ff ff ff ff 9e e7 ff ff ff ff ff ff ff 00
	unsigned int byte_cnt = (bit_cnt + 7) / 8;  //  Round up to next byte count.
	memset(lsb_buf, 0, sizeof(lsb_buf));
	lsb_buf_bit_index = 0;

	//  Consolidate the bits into LSB buffer before transmitting.
	for (unsigned int i = offset; i < bit_cnt + offset; i++) {
		int bytec = i/8;
		int bcval = 1 << (i % 8);
		int next_bit = buf[bytec] & bcval;
		//  If next_bit is true, push bit 1. Else push bit 0.
		if (next_bit) {
			push_lsb_buf(1);
		} else {
			push_lsb_buf(0);
		}
	}

	//  Pad with null bits until the whole byte is populated.  Should be 2 bits for SWD Write Command.
	int i = 0;
	while (lsb_buf_bit_index % 8 != 0) {        
		push_lsb_buf(0);
		i++;
	}
	...
	//  Transmit the consolidated LSB buffer to target.
	spi_transmit(spi_fd, lsb_buf, byte_cnt);
}

/// Transmit len bytes of buf (assumed to be in LSB format) to the SPI device in MSB format
static void spi_transmit(int fd, const uint8_t *buf, unsigned int len) {
	//  Reverse LSB to MSB for LSB buf into MSB buffer.
	for (unsigned int i = 0; i < len; i++) {
		uint8_t b = buf[i];
		msb_buf[i] = reverse_byte[(uint8_t) b];
	}
	//  Transmit the MSB buffer to SPI device.
	struct spi_ioc_transfer tr = {
		.tx_buf = (unsigned long) msb_buf,
		.rx_buf = (unsigned long) NULL,
		.len = len,
		.delay_usecs = delay,
		.speed_hz = speed,
		.bits_per_word = bits,
	};
	int ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
	//  Check SPI result.
	if (ret < 1) { pabort("spi_transmit failed"); }
}