Skip to content

Instantly share code, notes, and snippets.

@BlakeRain
Created December 11, 2020 11:53
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BlakeRain/354a21571fa9dfe432b46b833ccec595 to your computer and use it in GitHub Desktop.
Save BlakeRain/354a21571fa9dfe432b46b833ccec595 to your computer and use it in GitHub Desktop.
Allocation of hugepages for DMA in Linux
#include <memory>
#include <regex>
#include <cassert>
#include <experimental/filesystem>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
namespace fs = std::experimental::filesystem;
struct HugePage {
using Ref = std::shared_ptr<HugePage>;
void *virt; // Virtual address
uintptr_t phy; // Physical address
std::size_t size; // Size (in bytes)
HugePage(void *v, uintptr_t p, std::size_t sz)
: virt(v), phy(p), size(sz) {
}
~HugePage() {
int rc = munmap(virt, size);
assert(rc != -1);
}
};
// -------------------------------------------------------------------
struct HugePageInfo {
std::size_t size; // The size of the hugepage (in bytes)
HugePageInfo(const fs::directory_entry &);
// Allocate a hugepage in this pool
HugePage::Ref allocate() const;
// Load all the available hugepage pools
static std::vector<HugePageInfo> load();
};
std::size_t parse_suffixed_size(const std::string &str) {
auto ptr = str.begin();
// Skip any spaces
while (ptr != str.end() && std::isspace(*ptr)) {
++ptr;
}
// Make sure that there are still some characters left
if (ptr == str.end()) {
return 0;
}
std::size_t value = 0;
while (ptr != str.end() && std::isdigit(*ptr)) {
value = (value * 10) + (*ptr - '0');
++ptr;
}
if (ptr == str.end()) {
return value;
}
if (*ptr == ' ') {
++ptr;
}
if (ptr != str.end()) {
switch (*ptr) {
case 'G':
case 'g':
value *= 1024;
case 'M':
case 'm':
value *= 1024;
case 'K':
case 'k':
value *= 1024;
default:
break;
}
}
return value;
}
static const std::regex HUGEPAGE_RE{"hugepages-([0-9]+[kKmMgG])[bB]"};
HugePageInfo::HugePageInfo(const fs::directory_entry &entry) {
// Extract the size of the hugepage from the directory name
std::string filename = entry.path().filename();
std::smatch match;
if (std::regex_match(filename, match, HUGEPAGE_RE)) {
size = parse_suffixed_size(match[1].str());
} else {
throw std::runtime_error("Unable to parse hugepage: " + filename);
}
}
static const fs::path SYS_HUGEPAGE_DIR = "/sys/kernel/mm/hugepages";
std::vector<HugePageInfo> HugePageInfo::load() {
std::vector<HugePageInfo> hugepages;
for (auto &entry : fs::directory_iterator(SYS_HUGEPAGE_DIR)) {
hugepages.emplace_back(entry);
}
return hugepages;
}
#define BIT(n) (1ULL << (n))
static uintptr_t virtual_to_physical(const void *vaddr) {
auto page_size = sysconf(_SC_PAGESIZE);
int fd = ::open("/proc/self/pagemap", O_RDONLY);
assert(fd != -1);
int res = ::lseek64(fd,
(uintptr_t)vaddr / page_size * sizeof(uintptr_t),
SEEK_SET);
assert(res != -1);
uintptr_t phy = 0;
res = ::read(fd, &phy, sizeof(uintptr_t));
assert(res == sizeof(uintptr_t));
::close(fd);
assert((phy & BIT(63)) != 0);
return (phy & 0x7fffffffffffffULL) * page_size +
(uintptr_t)vaddr % page_size;
}
HugePage::Ref HugePageInfo::allocate() const {
// Map a hugepage into memory
void *vaddr = (void *)mmap(NULL, size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
assert(vaddr != MAP_FAILED);
return std::make_shared<HugePage>(vaddr,
virtual_to_physical(vaddr),
size);
}
// -------------------------------------------------------------------
struct Buffer {
void *address; // Virtual address of the buffer data
uintptr_t phy; // Corresponding physical memory addr
std::size_t size; // Size of the buffer (in bytes)
Buffer * next; // Next buffer (if in free list)
// Other data fields such as packet length, RSS hash and so on can
// be added here, so long as 'padding' is adjusted accordingly.
uint32_t padding[8];
};
// Make sure that the buffer header is a multiple of 64 bytes in size
static_assert(sizeof(Buffer) % 64 == 0,
"Buffer header is not a multiple of 64 bytes in size");
struct Chunk {
HugePage::Ref dma;
std::size_t buf_size;
Buffer * first_buffer;
Chunk * next;
uint32_t padding[5];
};
// Make sure that the chunk header is a multiple of 64 bytes in size
static_assert(sizeof(Chunk) % 64 == 0,
"Chunk header is not a multiple of 64 bytes in size");
template<typename T>
inline T align_to(T p, std::size_t a) {
uintptr_t offset = (uintptr_t)p % a;
return offset > 0 ? p + (a - offset) : p;
}
struct Layout {
// Our arguments/input variables
std::size_t buffer_size;
std::size_t alignment;
std::size_t nbuffers;
// Computed layout information
uint64_t chunk_header_size{0};
uint64_t buffer0_offset{0};
uint64_t chunk_slop{0};
uint64_t buffer_slop{0};
uint64_t chunk_space{0};
Layout(std::size_t size,
std::size_t align,
std::size_t page_size)
: buffer_size(size),
alignment(align),
nbuffers(1) {
optimize(page_size);
}
private:
// Compute the layout information for a given number of buffers
void compute(std::size_t n);
// Try and find a "best fit" buffer count
void optimize(std::size_t page_size);
};
void Layout::compute(std::size_t n) {
nbuffers = n;
// Calculate the chunk header size (C + n * H)
chunk_header_size = sizeof(Chunk) + sizeof(Buffer) * n;
// Calculate the offset to the first buffer
buffer0_offset = align_to(chunk_header_size, alignment);
// Calculate the slop between the headers and first buffer
chunk_slop = buffer0_offset - chunk_header_size;
// Calculate the interstital buffer slop
buffer_slop = buffer_size % alignment;
// Work out the total size
// Start off with the headers and slop
chunk_space = chunk_header_size + chunk_slop;
// Add on the buffer data
chunk_space += n * buffer_size;
// Now accommodate the interstital slop. Bear in mind that there
// will be N-1 interstitals between N buffers.
chunk_space += (n - 1) * buffer_slop;
}
void Layout::optimize(std::size_t page_size) {
std::size_t current_nbuffers = nbuffers;
std::size_t last_nbuffers = current_nbuffers;
for (;;) {
// Compute the size of the chunk for the current buffer count
compute(current_nbuffers);
if (chunk_space > page_size) {
break;
}
last_nbuffers = current_nbuffers;
++current_nbuffers;
}
compute(last_nbuffers);
if (chunk_space > page_size) {
throw std::runtime_error("Page cannot accommodate buffers");
}
}
// -------------------------------------------------------------------
class DMAPool {
const HugePageInfo *_huge_page;
Layout _layout;
Chunk * _first_chunk{nullptr};
Buffer * _free_list{nullptr};
void new_chunk();
public:
DMAPool(const HugePageInfo *hp,
std::size_t buffer_size,
std::size_t alignment)
: _huge_page(hp), _layout(buffer_size, alignment, hp->size) {
}
~DMAPool();
Buffer *allocate();
void free(Buffer *);
};
DMAPool::~DMAPool() {
Chunk *chunk = _first_chunk;
Chunk *next = nullptr;
while (chunk) {
next = chunk->next;
chunk->dma.reset();
chunk = next;
}
}
void DMAPool::new_chunk() {
// Allocate a new huge page
auto page = _huge_page->allocate();
// We need to move around by bytes from the start of the virtual
// address of the huge page. We'll create a 'start' pointer for
// this process.
uint8_t *start = (uint8_t *)page->virt;
// The chunk header starts at the beginning of the huge page
Chunk *chunk = (Chunk *)start;
// Populate some of the fields of the chunk header
chunk->dma = page;
chunk->buf_size = _layout.buffer_size;
// Get a pointer to the first buffer header in the page
Buffer *buffer = (Buffer *)(start + sizeof(Chunk));
// Set this as the first buffer header in the chunk
chunk->first_buffer = buffer;
// Get a pointer to the first buffer data block in the page
uint8_t *buffer_data = start + _layout.buffer0_offset;
for (std::size_t i = 0; i < _layout.nbuffers;
++i, ++buffer,
buffer_data += (_layout.buffer_size + _layout.buffer_slop)) {
// Set the fields of the buffer header
buffer->address = buffer_data;
buffer->phy = page->phy + (buffer_data - start);
buffer->size = _layout.buffer_size;
// Chain this buffer onto the free list
buffer->next = _free_list;
_free_list = buffer;
}
// Chain the chunk onto our chunk list
chunk->next = _first_chunk;
_first_chunk = chunk;
}
Buffer *DMAPool::allocate() {
Buffer *buffer = _free_list;
if (!buffer) {
new_chunk();
buffer = _free_list;
}
assert(buffer != nullptr);
_free_list = buffer->next;
buffer->next = nullptr;
return buffer;
}
void DMAPool::free(Buffer *buffer) {
buffer->next = _free_list;
_free_list = buffer;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment