Created
December 11, 2020 11:53
-
-
Save BlakeRain/354a21571fa9dfe432b46b833ccec595 to your computer and use it in GitHub Desktop.
Allocation of hugepages for DMA in Linux
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <memory> | |
#include <regex> | |
#include <cassert> | |
#include <experimental/filesystem> | |
#include <fcntl.h> | |
#include <sys/mman.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#include <unistd.h> | |
namespace fs = std::experimental::filesystem; | |
struct HugePage { | |
using Ref = std::shared_ptr<HugePage>; | |
void *virt; // Virtual address | |
uintptr_t phy; // Physical address | |
std::size_t size; // Size (in bytes) | |
HugePage(void *v, uintptr_t p, std::size_t sz) | |
: virt(v), phy(p), size(sz) { | |
} | |
~HugePage() { | |
int rc = munmap(virt, size); | |
assert(rc != -1); | |
} | |
}; | |
// ------------------------------------------------------------------- | |
struct HugePageInfo { | |
std::size_t size; // The size of the hugepage (in bytes) | |
HugePageInfo(const fs::directory_entry &); | |
// Allocate a hugepage in this pool | |
HugePage::Ref allocate() const; | |
// Load all the available hugepage pools | |
static std::vector<HugePageInfo> load(); | |
}; | |
std::size_t parse_suffixed_size(const std::string &str) { | |
auto ptr = str.begin(); | |
// Skip any spaces | |
while (ptr != str.end() && std::isspace(*ptr)) { | |
++ptr; | |
} | |
// Make sure that there are still some characters left | |
if (ptr == str.end()) { | |
return 0; | |
} | |
std::size_t value = 0; | |
while (ptr != str.end() && std::isdigit(*ptr)) { | |
value = (value * 10) + (*ptr - '0'); | |
++ptr; | |
} | |
if (ptr == str.end()) { | |
return value; | |
} | |
if (*ptr == ' ') { | |
++ptr; | |
} | |
if (ptr != str.end()) { | |
switch (*ptr) { | |
case 'G': | |
case 'g': | |
value *= 1024; | |
case 'M': | |
case 'm': | |
value *= 1024; | |
case 'K': | |
case 'k': | |
value *= 1024; | |
default: | |
break; | |
} | |
} | |
return value; | |
} | |
static const std::regex HUGEPAGE_RE{"hugepages-([0-9]+[kKmMgG])[bB]"}; | |
HugePageInfo::HugePageInfo(const fs::directory_entry &entry) { | |
// Extract the size of the hugepage from the directory name | |
std::string filename = entry.path().filename(); | |
std::smatch match; | |
if (std::regex_match(filename, match, HUGEPAGE_RE)) { | |
size = parse_suffixed_size(match[1].str()); | |
} else { | |
throw std::runtime_error("Unable to parse hugepage: " + filename); | |
} | |
} | |
static const fs::path SYS_HUGEPAGE_DIR = "/sys/kernel/mm/hugepages"; | |
std::vector<HugePageInfo> HugePageInfo::load() { | |
std::vector<HugePageInfo> hugepages; | |
for (auto &entry : fs::directory_iterator(SYS_HUGEPAGE_DIR)) { | |
hugepages.emplace_back(entry); | |
} | |
return hugepages; | |
} | |
#define BIT(n) (1ULL << (n)) | |
static uintptr_t virtual_to_physical(const void *vaddr) { | |
auto page_size = sysconf(_SC_PAGESIZE); | |
int fd = ::open("/proc/self/pagemap", O_RDONLY); | |
assert(fd != -1); | |
int res = ::lseek64(fd, | |
(uintptr_t)vaddr / page_size * sizeof(uintptr_t), | |
SEEK_SET); | |
assert(res != -1); | |
uintptr_t phy = 0; | |
res = ::read(fd, &phy, sizeof(uintptr_t)); | |
assert(res == sizeof(uintptr_t)); | |
::close(fd); | |
assert((phy & BIT(63)) != 0); | |
return (phy & 0x7fffffffffffffULL) * page_size + | |
(uintptr_t)vaddr % page_size; | |
} | |
HugePage::Ref HugePageInfo::allocate() const { | |
// Map a hugepage into memory | |
void *vaddr = (void *)mmap(NULL, size, | |
PROT_READ | PROT_WRITE, | |
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, | |
-1, 0); | |
assert(vaddr != MAP_FAILED); | |
return std::make_shared<HugePage>(vaddr, | |
virtual_to_physical(vaddr), | |
size); | |
} | |
// ------------------------------------------------------------------- | |
struct Buffer { | |
void *address; // Virtual address of the buffer data | |
uintptr_t phy; // Corresponding physical memory addr | |
std::size_t size; // Size of the buffer (in bytes) | |
Buffer * next; // Next buffer (if in free list) | |
// Other data fields such as packet length, RSS hash and so on can | |
// be added here, so long as 'padding' is adjusted accordingly. | |
uint32_t padding[8]; | |
}; | |
// Make sure that the buffer header is a multiple of 64 bytes in size | |
static_assert(sizeof(Buffer) % 64 == 0, | |
"Buffer header is not a multiple of 64 bytes in size"); | |
struct Chunk { | |
HugePage::Ref dma; | |
std::size_t buf_size; | |
Buffer * first_buffer; | |
Chunk * next; | |
uint32_t padding[5]; | |
}; | |
// Make sure that the chunk header is a multiple of 64 bytes in size | |
static_assert(sizeof(Chunk) % 64 == 0, | |
"Chunk header is not a multiple of 64 bytes in size"); | |
template<typename T> | |
inline T align_to(T p, std::size_t a) { | |
uintptr_t offset = (uintptr_t)p % a; | |
return offset > 0 ? p + (a - offset) : p; | |
} | |
struct Layout { | |
// Our arguments/input variables | |
std::size_t buffer_size; | |
std::size_t alignment; | |
std::size_t nbuffers; | |
// Computed layout information | |
uint64_t chunk_header_size{0}; | |
uint64_t buffer0_offset{0}; | |
uint64_t chunk_slop{0}; | |
uint64_t buffer_slop{0}; | |
uint64_t chunk_space{0}; | |
Layout(std::size_t size, | |
std::size_t align, | |
std::size_t page_size) | |
: buffer_size(size), | |
alignment(align), | |
nbuffers(1) { | |
optimize(page_size); | |
} | |
private: | |
// Compute the layout information for a given number of buffers | |
void compute(std::size_t n); | |
// Try and find a "best fit" buffer count | |
void optimize(std::size_t page_size); | |
}; | |
void Layout::compute(std::size_t n) { | |
nbuffers = n; | |
// Calculate the chunk header size (C + n * H) | |
chunk_header_size = sizeof(Chunk) + sizeof(Buffer) * n; | |
// Calculate the offset to the first buffer | |
buffer0_offset = align_to(chunk_header_size, alignment); | |
// Calculate the slop between the headers and first buffer | |
chunk_slop = buffer0_offset - chunk_header_size; | |
// Calculate the interstital buffer slop | |
buffer_slop = buffer_size % alignment; | |
// Work out the total size | |
// Start off with the headers and slop | |
chunk_space = chunk_header_size + chunk_slop; | |
// Add on the buffer data | |
chunk_space += n * buffer_size; | |
// Now accommodate the interstital slop. Bear in mind that there | |
// will be N-1 interstitals between N buffers. | |
chunk_space += (n - 1) * buffer_slop; | |
} | |
void Layout::optimize(std::size_t page_size) { | |
std::size_t current_nbuffers = nbuffers; | |
std::size_t last_nbuffers = current_nbuffers; | |
for (;;) { | |
// Compute the size of the chunk for the current buffer count | |
compute(current_nbuffers); | |
if (chunk_space > page_size) { | |
break; | |
} | |
last_nbuffers = current_nbuffers; | |
++current_nbuffers; | |
} | |
compute(last_nbuffers); | |
if (chunk_space > page_size) { | |
throw std::runtime_error("Page cannot accommodate buffers"); | |
} | |
} | |
// ------------------------------------------------------------------- | |
class DMAPool { | |
const HugePageInfo *_huge_page; | |
Layout _layout; | |
Chunk * _first_chunk{nullptr}; | |
Buffer * _free_list{nullptr}; | |
void new_chunk(); | |
public: | |
DMAPool(const HugePageInfo *hp, | |
std::size_t buffer_size, | |
std::size_t alignment) | |
: _huge_page(hp), _layout(buffer_size, alignment, hp->size) { | |
} | |
~DMAPool(); | |
Buffer *allocate(); | |
void free(Buffer *); | |
}; | |
DMAPool::~DMAPool() { | |
Chunk *chunk = _first_chunk; | |
Chunk *next = nullptr; | |
while (chunk) { | |
next = chunk->next; | |
chunk->dma.reset(); | |
chunk = next; | |
} | |
} | |
void DMAPool::new_chunk() { | |
// Allocate a new huge page | |
auto page = _huge_page->allocate(); | |
// We need to move around by bytes from the start of the virtual | |
// address of the huge page. We'll create a 'start' pointer for | |
// this process. | |
uint8_t *start = (uint8_t *)page->virt; | |
// The chunk header starts at the beginning of the huge page | |
Chunk *chunk = (Chunk *)start; | |
// Populate some of the fields of the chunk header | |
chunk->dma = page; | |
chunk->buf_size = _layout.buffer_size; | |
// Get a pointer to the first buffer header in the page | |
Buffer *buffer = (Buffer *)(start + sizeof(Chunk)); | |
// Set this as the first buffer header in the chunk | |
chunk->first_buffer = buffer; | |
// Get a pointer to the first buffer data block in the page | |
uint8_t *buffer_data = start + _layout.buffer0_offset; | |
for (std::size_t i = 0; i < _layout.nbuffers; | |
++i, ++buffer, | |
buffer_data += (_layout.buffer_size + _layout.buffer_slop)) { | |
// Set the fields of the buffer header | |
buffer->address = buffer_data; | |
buffer->phy = page->phy + (buffer_data - start); | |
buffer->size = _layout.buffer_size; | |
// Chain this buffer onto the free list | |
buffer->next = _free_list; | |
_free_list = buffer; | |
} | |
// Chain the chunk onto our chunk list | |
chunk->next = _first_chunk; | |
_first_chunk = chunk; | |
} | |
Buffer *DMAPool::allocate() { | |
Buffer *buffer = _free_list; | |
if (!buffer) { | |
new_chunk(); | |
buffer = _free_list; | |
} | |
assert(buffer != nullptr); | |
_free_list = buffer->next; | |
buffer->next = nullptr; | |
return buffer; | |
} | |
void DMAPool::free(Buffer *buffer) { | |
buffer->next = _free_list; | |
_free_list = buffer; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment