/** * \file zstd_sampler.cpp * \brief Generate uncompressed chunks * \author Maelys Bois * \version 0.0.1 * * Generate random uncompressed chunks for Zstd dictionary training. */ #include "world/Chunk.hpp" #include #include #include #include #include #include #include #include const auto KB = 1000; const auto COUNT = 100; const auto SIZE = COUNT * KB; const auto SAMPLES = 100; const auto RANGE = 1 << 18; /// Entry point int main(int /*unused*/, char * /*unused*/[]) { std::srand(std::time(nullptr)); world::Generator generator(std::rand()); std::vector samples; samples.reserve(SIZE * SAMPLES); std::vector sizes; sizes.reserve(SAMPLES * 10); std::cout << "Generating..." << std::endl; std::chrono::nanoseconds gen_time(0); while(samples.size() < SIZE * SAMPLES) { const auto start = std::chrono::high_resolution_clock::now(); world::Chunk chunk(chunk_pos(-(std::rand() % RANGE), -(std::rand() % RANGE), -(std::rand() % RANGE)), generator); gen_time += (std::chrono::high_resolution_clock::now() - start); std::ostringstream oss; chunk.write(oss); const auto str = oss.str(); samples.insert(samples.end(), str.begin(), str.end()); sizes.push_back(str.size()); } std::cout << gen_time.count() / sizes.size() << "ns/chunk" << std::endl; std::vector dict(SIZE); std::cout << "Training on " << sizes.size() << " samples..." << std::endl; const auto actualSize = ZDICT_trainFromBuffer(dict.data(), dict.size(), samples.data(), sizes.data(), sizes.size()); if(ZSTD_isError(actualSize)) { std::cout << "Error: " << ZSTD_getErrorName(actualSize) << std::endl; return 1; } std::cout << "Dictionary of " << actualSize / KB << "kb" << std::endl; std::ofstream out("content/zstd.dict"); out.write(dict.data(), actualSize); out.close(); return 0; }