1
0
Fork 0
Univerxel/src/zstd_sampler.cpp

63 lines
2.0 KiB
C++

/**
* \file zstd_sampler.cpp
* \brief Generate uncompressed chunks
* \author Maelys Bois
* \version 0.0.1
*
* Generate random uncompressed chunks for Zstd dictionary training.
*/
#include "world/Chunk.hpp"
#include <cstdlib>
#include <ctime>
#include <chrono>
#include <fstream>
#include <iostream>
#include <vector>
#include <zstd.h>
#include <zdict.h>
const auto KB = 1000;
const auto COUNT = 100;
const auto SIZE = COUNT * KB;
const auto SAMPLES = 100;
const auto RANGE = 1 << 18;
/// Entry point
int main(int /*unused*/, char * /*unused*/[])
{
std::srand(std::time(nullptr));
auto generator = world::generator::load(world::generator::Cave::Params(std::rand()));
std::vector<char> samples;
samples.reserve(SIZE * SAMPLES);
std::vector<size_t> sizes;
sizes.reserve(SAMPLES * 10);
std::cout << "Generating..." << std::endl;
std::chrono::nanoseconds gen_time(0);
while(samples.size() < SIZE * SAMPLES) {
const auto start = std::chrono::high_resolution_clock::now();
world::Chunk chunk(chunk_pos(-(std::rand() % RANGE), -(std::rand() % RANGE), -(std::rand() % RANGE)), generator);
gen_time += (std::chrono::high_resolution_clock::now() - start);
std::ostringstream oss;
chunk.write(oss);
const auto str = oss.str();
samples.insert(samples.end(), str.begin(), str.end());
sizes.push_back(str.size());
}
std::cout << gen_time.count() / sizes.size() << "ns/chunk" << std::endl;
std::vector<char> dict(SIZE);
std::cout << "Training on " << sizes.size() << " samples..." << std::endl;
const auto actualSize = ZDICT_trainFromBuffer(dict.data(), dict.size(), samples.data(), sizes.data(), sizes.size());
if(ZSTD_isError(actualSize)) {
std::cout << "Error: " << ZSTD_getErrorName(actualSize) << std::endl;
return 1;
}
std::cout << "Dictionary of " << actualSize / KB << "kb" << std::endl;
std::ofstream out("content/zstd.dict");
out.write(dict.data(), actualSize);
out.close();
return 0;
}