Memory-mapped i/o

Revision en1, by jamjury, 2024-06-12 17:57:47

Memory-mapped input file is the fastest possible input method I know of. I used it for last ICPC Challenge.

Unfortunately, it is not possible — without exploiting Windows kernel bugs — to also map output file into memory on CF, because the handle lacks GENERIC_READ, which is required for FILE_MAP_WRITE mapping. So the best we can do for output is to write the whole buffer at once (but it doesn't really speed things up).

#include <Windows.h> // GetStdHandle, GetFileSize, CreateFileMapping, MapViewOfFile, UnmapViewOfFile, WriteFile, CloseHandle
#include <charconv> // from_chars, to_chars
#include <cstring> // memcpy
#include <cstdlib> // malloc, free
#include <cstddef> // size_t

// #define FREE_IO_ON_EXIT
 
struct input {
    LPVOID view;
    const char *first, *last;
 
    input() noexcept {
        HANDLE input_handle = GetStdHandle(STD_INPUT_HANDLE);
        DWORD input_size = GetFileSize(
            input_handle, // [in]            HANDLE  hFile,
            NULL          // [out, optional] LPDWORD lpFileSizeHigh
        );
        HANDLE mapping_object = CreateFileMapping(
            input_handle,  // [in]           HANDLE                hFile
            NULL,          // [in, optional] LPSECURITY_ATTRIBUTES lpFileMappingAttributes
            PAGE_READONLY, // [in]           DWORD                 flProtect
            0, /* whole */ // [in]           DWORD                 dwMaximumSizeHigh
            0, /*  file */ // [in]           DWORD                 dwMaximumSizeLow
            NULL           // [in, optional] LPCSTR                lpName
        );
        view = MapViewOfFile(
            mapping_object,  // [in] HANDLE hFileMappingObject
            FILE_MAP_READ,   // [in] DWORD  dwDesiredAccess
            0,               // [in] DWORD  dwFileOffsetHigh
            0,               // [in] DWORD  dwFileOffsetLow
            0 /*whole file*/ // [in] SIZE_T dwNumberOfBytesToMap
        );

        first = (char*) view;
        last = first + input_size;
        #ifdef FREE_IO_ON_EXIT
            CloseHandle(input_handle);
            CloseHandle(mapping_object);
        #endif
    }
 
    int take_int() noexcept {
        int result;
        first = std::from_chars(first, last, result).ptr + 1;
        return result;
    }
 
    double take_double() noexcept {
        double result;
        first = std::from_chars(first, last, result).ptr + 1;
        return result;
    }
 
    #ifdef FREE_IO_ON_EXIT
        ~input() noexcept {
            UnmapViewOfFile(view);
        }
    #endif
} in;

struct output {
    constexpr static std::size_t buf_size = 32*1024*1024; // 32MB
    char *buf, *first, *last;
 
    output() noexcept
        : buf((char*) std::malloc(buf_size))
        , first(buf)
        , last(buf + buf_size)
    {}
 
    void put_int(int value) noexcept {
        first = std::to_chars(first, last, value).ptr;
    }
 
    template <std::size_t size>
    void put_str(const char (&str)[size]) noexcept {
        std::memcpy(first, str, size - 1);
        first += size - 1;
    }
 
    void put_char(char c) noexcept {
        *first++ = c;
    }
 
    ~output() noexcept {
        HANDLE output_handle = GetStdHandle(STD_OUTPUT_HANDLE);
        WriteFile(output_handle, buf, first - buf, NULL, NULL);
        #ifdef FREE_IO_ON_EXIT
            CloseHandle(output_handle);
            std::free(buf);
        #endif
    }
} out;

int main() {
    out.put_int(in.take_int());
}

The benchmark (webarchive) on my machine gcc 13.2.0 (MinGW-W64), compiled with -std=c++20 -O2

int, scanf         4.01   3.93   4.05
int, cin           9.31   0.64   9.21
int, mmap in       0.06   0.06   0.06

int, printf        2.25   2.27   2.22
int, cout          0.39   0.40   0.40
int, mmap out      0.39   0.38   0.39

*mmap in/out is this implementation

Tags input-output, performance, c++, memory manipulation

History

 
 
 
 
Revisions
 
 
  Rev. Lang. By When Δ Comment
en1 English jamjury 2024-06-12 17:57:47 4222 Initial revision (published)