Skip to content

Instantly share code, notes, and snippets.

@ned14
Last active September 5, 2017 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ned14/8e579df8e89fbec38b71a14a2604c07d to your computer and use it in GitHub Desktop.
Save ned14/8e579df8e89fbec38b71a14a2604c07d to your computer and use it in GitHub Desktop.
Demos the poor cached i/o performance on Windows
/* Demos the poor cached i/o performance on Windows
Windows x64 with NTFS:
memcpy: 91.7057ns per 1Kb
Write: 1258.34ns per 1Kb
Read: 1318ns per 1Kb
Linux x64 with ext4:
memcpy: 125.164ns per 1Kb
Write: 298.724ns per 1Kb
Read: 115.725ns per 1Kb
*/
#include <chrono>
#include <iostream>
#include <vector>
#ifdef _WIN32
#include <Windows.h>
int main() {
std::cout << "Preallocating the test file ..." << std::endl;
// Get some aligned memory
void *mem =
VirtualAlloc(NULL, 1024, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
memset(mem, '1', 1024);
// Work around the scheduler bug which badly affects blocking i/o
SetThreadAffinityMask(GetCurrentThread(), 1);
// Standard fully cached file
HANDLE h = CreateFile(L"testfile", GENERIC_READ | GENERIC_WRITE, 0, NULL,
CREATE_ALWAYS, FILE_FLAG_DELETE_ON_CLOSE, NULL);
// Preallocate to ensure later i/o is exclusively to/from page cache only
FILE_END_OF_FILE_INFO feofi;
feofi.EndOfFile.QuadPart = 1024 * 1024 * 1024;
SetFileInformationByHandle(h, FileEndOfFileInfo, &feofi, sizeof(feofi));
for (size_t n = 0; n < 1024; n++) {
static char buffer[1024 * 1024];
DWORD bytes = 0;
WriteFile(h, buffer, 1024 * 1024, &bytes, NULL);
if (bytes != 1024 * 1024)
abort();
}
SetFilePointer(h, 0, NULL, FILE_BEGIN);
// Precalc randomised offsets
std::vector<uint64_t> offsets(1024 * 1024);
for (size_t n = 0; n < 1024 * 1024; n++) {
offsets[n] = (rand() * 64) % (1024 * 1024);
}
static char buffer[1024 * 1024 * 1024];
memset(buffer, 1, sizeof(buffer));
// Ensure background writing doesn't interfere with results
FlushFileBuffers(h);
// Warm up SpeedStep
auto begin = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::high_resolution_clock::now() - begin)
.count() < 3)
;
begin = std::chrono::high_resolution_clock::now();
for (size_t n = 0; n < 1024 * 1024; n++) {
memcpy(buffer + n * 1024, mem, 1024);
}
auto end = std::chrono::high_resolution_clock::now();
auto diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
.count() /
1024.0 / 1024.0;
std::cout << "memcpy: " << diff << "ns per 1Kb" << std::endl;
// Warm up SpeedStep
begin = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::high_resolution_clock::now() - begin)
.count() < 3)
;
begin = std::chrono::high_resolution_clock::now();
for (size_t n = 0; n < 1024 * 1024; n++) {
DWORD bytes = 0;
OVERLAPPED ol;
memset(&ol, 0, sizeof(ol));
ol.Internal = offsets[n];
WriteFile(h, mem, 1024, &bytes, &ol);
if (bytes != 1024)
abort();
}
end = std::chrono::high_resolution_clock::now();
diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
.count() /
1024.0 / 1024.0;
std::cout << "Write: " << diff << "ns per 1Kb" << std::endl;
// Warm up SpeedStep
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::high_resolution_clock::now() - begin)
.count() < 3)
;
begin = std::chrono::high_resolution_clock::now();
for (size_t n = 0; n < 1024 * 1024; n++) {
DWORD bytes = 0;
OVERLAPPED ol;
memset(&ol, 0, sizeof(ol));
ol.Internal = offsets[n];
ReadFile(h, mem, 1024, &bytes, &ol);
if (bytes != 1024)
abort();
}
end = std::chrono::high_resolution_clock::now();
diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
.count() /
1024.0 / 1024.0;
std::cout << "Read: " << diff << "ns per 1Kb" << std::endl;
return 0;
}
#else
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
int main() {
std::cout << "Preallocating the test file ..." << std::endl;
// Get some aligned memory
void *mem = mmap(NULL, 1024, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
memset(mem, '1', 1024);
// Standard fully cached file
int h = open("testfile", O_CREAT | O_RDWR | O_TRUNC, 0700);
// Preallocate to ensure later i/o is exclusively to/from page cache only
for (size_t n = 0; n < 1024; n++) {
static char buffer[1024 * 1024];
write(h, buffer, 1024 * 1024);
}
lseek(h, 0, SEEK_SET);
// Precalc randomised offsets
std::vector<uint64_t> offsets(1024 * 1024);
for (size_t n = 0; n < 1024 * 1024; n++) {
offsets[n] = (rand() * 64) % (1024 * 1024);
}
static char buffer[1024 * 1024 * 1024];
memset(buffer, 1, sizeof(buffer));
// Ensure background writing doesn't interfere with results
fsync(h);
// Warm up SpeedStep
auto begin = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::high_resolution_clock::now() - begin)
.count() < 3)
;
begin = std::chrono::high_resolution_clock::now();
for (size_t n = 0; n < 1024 * 1024; n++) {
memcpy(buffer + n * 1024, mem, 1024);
}
auto end = std::chrono::high_resolution_clock::now();
auto diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
.count() /
1024.0 / 1024.0;
std::cout << "memcpy: " << diff << "ns per 1Kb" << std::endl;
// Warm up SpeedStep
begin = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::high_resolution_clock::now() - begin)
.count() < 3)
;
begin = std::chrono::high_resolution_clock::now();
for (size_t n = 0; n < 1024 * 1024; n++) {
pwrite(h, mem, 1024, offsets[n]);
}
end = std::chrono::high_resolution_clock::now();
diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
.count() /
1024.0 / 1024.0;
std::cout << "Write: " << diff << "ns per 1Kb" << std::endl;
// Warm up SpeedStep
while (std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::high_resolution_clock::now() - begin)
.count() < 3)
;
begin = std::chrono::high_resolution_clock::now();
for (size_t n = 0; n < 1024 * 1024; n++) {
pread(h, mem, 1024, offsets[n]);
}
end = std::chrono::high_resolution_clock::now();
diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
.count() /
1024.0 / 1024.0;
std::cout << "Read: " << diff << "ns per 1Kb" << std::endl;
return 0;
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment