2015-10-30 13:33:40 +02:00
|
|
|
#include "compression.hh"
|
2019-12-10 10:47:38 +02:00
|
|
|
#include "tarfile.hh"
|
2016-04-22 19:15:02 +03:00
|
|
|
#include "util.hh"
|
2016-04-29 18:43:37 +03:00
|
|
|
#include "finally.hh"
|
2018-02-11 20:47:42 +02:00
|
|
|
#include "logging.hh"
|
2015-10-30 13:33:40 +02:00
|
|
|
|
2019-12-10 10:47:38 +02:00
|
|
|
#include <archive.h>
|
|
|
|
#include <archive_entry.h>
|
2015-12-31 15:18:20 +02:00
|
|
|
#include <cstdio>
|
2016-04-29 18:43:37 +03:00
|
|
|
#include <cstring>
|
2015-10-30 13:33:40 +02:00
|
|
|
|
2017-12-29 22:42:14 +02:00
|
|
|
#include <brotli/decode.h>
|
|
|
|
#include <brotli/encode.h>
|
|
|
|
|
2016-05-04 16:46:25 +03:00
|
|
|
#include <iostream>
|
2016-02-15 22:45:56 +02:00
|
|
|
|
2016-05-04 16:46:25 +03:00
|
|
|
namespace nix {
|
2016-02-15 22:45:56 +02:00
|
|
|
|
2021-10-12 09:14:36 +03:00
|
|
|
static const int COMPRESSION_LEVEL_DEFAULT = -1;
|
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
// Don't feed brotli too much at once.
|
|
|
|
struct ChunkedCompressionSink : CompressionSink
|
2018-03-16 17:59:31 +02:00
|
|
|
{
|
2018-08-21 16:20:23 +03:00
|
|
|
uint8_t outbuf[32 * 1024];
|
2018-08-06 16:40:29 +03:00
|
|
|
|
2020-12-02 15:00:43 +02:00
|
|
|
void write(std::string_view data) override
|
2018-08-06 16:40:29 +03:00
|
|
|
{
|
|
|
|
const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
|
2020-12-02 15:00:43 +02:00
|
|
|
while (!data.empty()) {
|
|
|
|
size_t n = std::min(CHUNK_SIZE, data.size());
|
2021-04-20 23:54:49 +03:00
|
|
|
writeInternal(data.substr(0, n));
|
2020-12-02 15:00:43 +02:00
|
|
|
data.remove_prefix(n);
|
2018-03-16 17:59:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-02 15:00:43 +02:00
|
|
|
virtual void writeInternal(std::string_view data) = 0;
|
2018-08-06 16:40:29 +03:00
|
|
|
};
|
|
|
|
|
2019-12-10 10:47:38 +02:00
|
|
|
struct ArchiveDecompressionSource : Source
|
2019-12-13 10:29:33 +02:00
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
std::unique_ptr<TarArchive> archive = 0;
|
|
|
|
Source & src;
|
|
|
|
ArchiveDecompressionSource(Source & src) : src(src) {}
|
|
|
|
~ArchiveDecompressionSource() override {}
|
|
|
|
size_t read(char * data, size_t len) override {
|
2021-04-15 14:51:00 +03:00
|
|
|
struct archive_entry * ae;
|
2019-12-10 10:47:38 +02:00
|
|
|
if (!archive) {
|
|
|
|
archive = std::make_unique<TarArchive>(src, true);
|
2021-04-15 14:51:00 +03:00
|
|
|
this->archive->check(archive_read_next_header(this->archive->archive, &ae),
|
|
|
|
"failed to read header (%s)");
|
2019-12-10 10:47:38 +02:00
|
|
|
if (archive_filter_count(this->archive->archive) < 2) {
|
2021-04-15 14:51:00 +03:00
|
|
|
throw CompressionError("input compression not recognized");
|
2019-12-13 10:29:33 +02:00
|
|
|
}
|
|
|
|
}
|
2019-12-10 10:47:38 +02:00
|
|
|
ssize_t result = archive_read_data(this->archive->archive, data, len);
|
|
|
|
if (result > 0) return result;
|
|
|
|
if (result == 0) {
|
|
|
|
throw EndOfFile("reached end of compressed file");
|
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
this->archive->check(result, "failed to read compressed data (%s)");
|
2019-12-10 10:47:38 +02:00
|
|
|
return result;
|
2019-12-13 10:29:33 +02:00
|
|
|
}
|
|
|
|
};
|
2021-04-15 14:51:00 +03:00
|
|
|
|
2019-12-10 10:47:38 +02:00
|
|
|
struct ArchiveCompressionSink : CompressionSink
|
2018-08-06 16:40:29 +03:00
|
|
|
{
|
|
|
|
Sink & nextSink;
|
2021-04-15 14:51:00 +03:00
|
|
|
struct archive * archive;
|
|
|
|
|
2021-10-13 12:00:10 +03:00
|
|
|
ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel, int level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink)
|
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
archive = archive_write_new();
|
|
|
|
if (!archive) throw Error("failed to initialize libarchive");
|
2021-04-15 14:51:00 +03:00
|
|
|
check(archive_write_add_filter_by_name(archive, format.c_str()), "couldn't initialize compression (%s)");
|
2019-12-10 10:47:38 +02:00
|
|
|
check(archive_write_set_format_raw(archive));
|
2021-10-13 12:00:10 +03:00
|
|
|
if (parallel)
|
2019-12-10 10:47:38 +02:00
|
|
|
check(archive_write_set_filter_option(archive, format.c_str(), "threads", "0"));
|
2021-10-13 12:00:10 +03:00
|
|
|
if (level != COMPRESSION_LEVEL_DEFAULT)
|
|
|
|
check(archive_write_set_filter_option(archive, format.c_str(), "compression-level", std::to_string(level).c_str()));
|
2019-12-10 10:47:38 +02:00
|
|
|
// disable internal buffering
|
|
|
|
check(archive_write_set_bytes_per_block(archive, 0));
|
|
|
|
// disable output padding
|
|
|
|
check(archive_write_set_bytes_in_last_block(archive, 1));
|
|
|
|
open();
|
2018-08-06 16:40:29 +03:00
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
|
|
|
|
~ArchiveCompressionSink() override
|
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
if (archive) archive_write_free(archive);
|
2018-08-06 16:40:29 +03:00
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
|
|
|
|
void finish() override
|
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
flush();
|
|
|
|
check(archive_write_close(archive));
|
2015-10-30 13:33:40 +02:00
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
|
|
|
|
void check(int err, const std::string & reason = "failed to compress (%s)")
|
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
if (err == ARCHIVE_EOF)
|
|
|
|
throw EndOfFile("reached end of archive");
|
|
|
|
else if (err != ARCHIVE_OK)
|
|
|
|
throw Error(reason, archive_error_string(this->archive));
|
2018-08-06 16:40:29 +03:00
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
|
|
|
|
void write(std::string_view data) override
|
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
ssize_t result = archive_write_data(archive, data.data(), data.length());
|
|
|
|
if (result <= 0) check(result);
|
2016-04-29 18:43:37 +03:00
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
|
2019-12-10 10:47:38 +02:00
|
|
|
private:
|
2021-04-15 14:51:00 +03:00
|
|
|
void open()
|
|
|
|
{
|
|
|
|
check(archive_write_open(archive, this, nullptr, ArchiveCompressionSink::callback_write, nullptr));
|
|
|
|
auto ae = archive_entry_new();
|
2019-12-10 10:47:38 +02:00
|
|
|
archive_entry_set_filetype(ae, AE_IFREG);
|
|
|
|
check(archive_write_header(archive, ae));
|
|
|
|
archive_entry_free(ae);
|
2018-08-06 16:40:29 +03:00
|
|
|
}
|
2021-04-15 14:51:00 +03:00
|
|
|
|
|
|
|
static ssize_t callback_write(struct archive * archive, void * _self, const void * buffer, size_t length)
|
|
|
|
{
|
|
|
|
auto self = (ArchiveCompressionSink *) _self;
|
|
|
|
self->nextSink({(const char *) buffer, length});
|
2019-12-10 10:47:38 +02:00
|
|
|
return length;
|
2018-08-06 16:40:29 +03:00
|
|
|
}
|
2019-12-10 10:47:38 +02:00
|
|
|
};
|
2018-08-06 16:40:29 +03:00
|
|
|
|
2019-12-10 10:47:38 +02:00
|
|
|
struct NoneSink : CompressionSink
|
|
|
|
{
|
|
|
|
Sink & nextSink;
|
2021-10-13 12:00:10 +03:00
|
|
|
NoneSink(Sink & nextSink, int level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink)
|
|
|
|
{
|
2021-10-12 09:14:36 +03:00
|
|
|
if (level != COMPRESSION_LEVEL_DEFAULT)
|
2021-10-13 12:00:10 +03:00
|
|
|
warn("requested compression level '%d' not supported by compression method 'none'", level);
|
2021-10-12 09:14:36 +03:00
|
|
|
}
|
2019-12-10 10:47:38 +02:00
|
|
|
void finish() override { flush(); }
|
|
|
|
void write(std::string_view data) override { nextSink(data); }
|
2018-08-06 16:40:29 +03:00
|
|
|
};
|
2018-03-16 17:59:31 +02:00
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
struct BrotliDecompressionSink : ChunkedCompressionSink
|
|
|
|
{
|
|
|
|
Sink & nextSink;
|
|
|
|
BrotliDecoderState * state;
|
|
|
|
bool finished = false;
|
2017-12-29 22:42:14 +02:00
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink)
|
|
|
|
{
|
|
|
|
state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
|
|
|
|
if (!state)
|
|
|
|
throw CompressionError("unable to initialize brotli decoder");
|
|
|
|
}
|
2017-12-29 22:42:14 +02:00
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
~BrotliDecompressionSink()
|
|
|
|
{
|
|
|
|
BrotliDecoderDestroyInstance(state);
|
2017-12-29 22:42:14 +02:00
|
|
|
}
|
2018-08-06 16:40:29 +03:00
|
|
|
|
|
|
|
void finish() override
|
|
|
|
{
|
|
|
|
flush();
|
2020-12-02 15:00:43 +02:00
|
|
|
writeInternal({});
|
2018-08-06 16:40:29 +03:00
|
|
|
}
|
|
|
|
|
2020-12-02 15:00:43 +02:00
|
|
|
void writeInternal(std::string_view data) override
|
2018-08-06 16:40:29 +03:00
|
|
|
{
|
2020-12-02 15:00:43 +02:00
|
|
|
auto next_in = (const uint8_t *) data.data();
|
|
|
|
size_t avail_in = data.size();
|
2018-08-06 16:40:29 +03:00
|
|
|
uint8_t * next_out = outbuf;
|
|
|
|
size_t avail_out = sizeof(outbuf);
|
|
|
|
|
2020-12-02 15:00:43 +02:00
|
|
|
while (!finished && (!data.data() || avail_in)) {
|
2018-08-06 16:40:29 +03:00
|
|
|
checkInterrupt();
|
|
|
|
|
|
|
|
if (!BrotliDecoderDecompressStream(state,
|
|
|
|
&avail_in, &next_in,
|
|
|
|
&avail_out, &next_out,
|
|
|
|
nullptr))
|
|
|
|
throw CompressionError("error while decompressing brotli file");
|
|
|
|
|
|
|
|
if (avail_out < sizeof(outbuf) || avail_in == 0) {
|
2020-12-02 15:00:43 +02:00
|
|
|
nextSink({(char *) outbuf, sizeof(outbuf) - avail_out});
|
2018-08-06 16:40:29 +03:00
|
|
|
next_out = outbuf;
|
|
|
|
avail_out = sizeof(outbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
finished = BrotliDecoderIsFinished(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
2017-03-13 15:40:15 +02:00
|
|
|
|
2018-03-16 17:59:31 +02:00
|
|
|
ref<std::string> decompress(const std::string & method, const std::string & in)
|
2016-05-04 16:46:25 +03:00
|
|
|
{
|
2021-04-22 11:23:20 +03:00
|
|
|
StringSink ssink;
|
|
|
|
auto sink = makeDecompressionSink(method, ssink);
|
|
|
|
(*sink)(in);
|
|
|
|
sink->finish();
|
|
|
|
return ssink.s;
|
2016-05-04 16:46:25 +03:00
|
|
|
}
|
|
|
|
|
2019-12-10 10:47:38 +02:00
|
|
|
std::unique_ptr<FinishSink> makeDecompressionSink(const std::string & method, Sink & nextSink)
|
2016-04-29 18:02:57 +03:00
|
|
|
{
|
2018-08-06 16:40:29 +03:00
|
|
|
if (method == "none" || method == "")
|
2019-12-10 10:47:38 +02:00
|
|
|
return std::make_unique<NoneSink>(nextSink);
|
2017-03-13 15:40:15 +02:00
|
|
|
else if (method == "br")
|
2019-12-10 10:47:38 +02:00
|
|
|
return std::make_unique<BrotliDecompressionSink>(nextSink);
|
2016-04-29 18:02:57 +03:00
|
|
|
else
|
2021-04-15 14:51:00 +03:00
|
|
|
return sourceToSink([&](Source & source) {
|
2021-04-22 11:23:20 +03:00
|
|
|
auto decompressionSource = std::make_unique<ArchiveDecompressionSource>(source);
|
2021-04-15 14:51:00 +03:00
|
|
|
decompressionSource->drainInto(nextSink);
|
|
|
|
});
|
2016-04-29 18:02:57 +03:00
|
|
|
}
|
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
struct BrotliCompressionSink : ChunkedCompressionSink
|
2017-12-29 22:42:14 +02:00
|
|
|
{
|
|
|
|
Sink & nextSink;
|
|
|
|
uint8_t outbuf[BUFSIZ];
|
2021-04-15 14:51:00 +03:00
|
|
|
BrotliEncoderState * state;
|
2017-12-29 22:42:14 +02:00
|
|
|
bool finished = false;
|
2017-03-14 16:03:53 +02:00
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink)
|
2017-03-14 16:03:53 +02:00
|
|
|
{
|
2017-12-29 22:42:14 +02:00
|
|
|
state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
|
|
|
|
if (!state)
|
|
|
|
throw CompressionError("unable to initialise brotli encoder");
|
2017-03-14 16:03:53 +02:00
|
|
|
}
|
|
|
|
|
2018-08-06 16:40:29 +03:00
|
|
|
~BrotliCompressionSink()
|
2017-03-14 16:03:53 +02:00
|
|
|
{
|
2017-12-29 22:42:14 +02:00
|
|
|
BrotliEncoderDestroyInstance(state);
|
2017-03-14 16:03:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void finish() override
|
|
|
|
{
|
|
|
|
flush();
|
2020-12-02 15:00:43 +02:00
|
|
|
writeInternal({});
|
2017-12-29 22:42:14 +02:00
|
|
|
}
|
2018-05-02 13:54:30 +03:00
|
|
|
|
2020-12-02 15:00:43 +02:00
|
|
|
void writeInternal(std::string_view data) override
|
2017-12-29 22:42:14 +02:00
|
|
|
{
|
2020-12-02 15:00:43 +02:00
|
|
|
auto next_in = (const uint8_t *) data.data();
|
|
|
|
size_t avail_in = data.size();
|
2018-08-06 16:40:29 +03:00
|
|
|
uint8_t * next_out = outbuf;
|
2017-12-29 22:42:14 +02:00
|
|
|
size_t avail_out = sizeof(outbuf);
|
|
|
|
|
2020-12-02 15:00:43 +02:00
|
|
|
while (!finished && (!data.data() || avail_in)) {
|
2017-12-29 22:42:14 +02:00
|
|
|
checkInterrupt();
|
|
|
|
|
|
|
|
if (!BrotliEncoderCompressStream(state,
|
2020-12-02 15:00:43 +02:00
|
|
|
data.data() ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
|
2018-08-06 16:40:29 +03:00
|
|
|
&avail_in, &next_in,
|
|
|
|
&avail_out, &next_out,
|
|
|
|
nullptr))
|
|
|
|
throw CompressionError("error while compressing brotli compression");
|
2017-12-29 22:42:14 +02:00
|
|
|
|
|
|
|
if (avail_out < sizeof(outbuf) || avail_in == 0) {
|
2020-12-02 15:00:43 +02:00
|
|
|
nextSink({(const char *) outbuf, sizeof(outbuf) - avail_out});
|
2017-12-29 22:42:14 +02:00
|
|
|
next_out = outbuf;
|
|
|
|
avail_out = sizeof(outbuf);
|
|
|
|
}
|
2018-08-06 16:40:29 +03:00
|
|
|
|
|
|
|
finished = BrotliEncoderIsFinished(state);
|
2017-12-29 22:42:14 +02:00
|
|
|
}
|
2017-03-14 16:03:53 +02:00
|
|
|
}
|
|
|
|
};
|
2021-04-15 14:51:00 +03:00
|
|
|
|
2021-10-12 09:14:36 +03:00
|
|
|
ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel, int level)
|
2016-04-29 18:02:57 +03:00
|
|
|
{
|
2019-12-10 10:47:38 +02:00
|
|
|
std::vector<std::string> la_supports = {
|
|
|
|
"bzip2", "compress", "grzip", "gzip", "lrzip", "lz4", "lzip", "lzma", "lzop", "xz", "zstd"
|
|
|
|
};
|
|
|
|
if (std::find(la_supports.begin(), la_supports.end(), method) != la_supports.end()) {
|
2021-10-12 09:14:36 +03:00
|
|
|
return make_ref<ArchiveCompressionSink>(nextSink, method, parallel, level);
|
2019-12-10 10:47:38 +02:00
|
|
|
}
|
2016-04-29 18:02:57 +03:00
|
|
|
if (method == "none")
|
2016-05-04 16:46:25 +03:00
|
|
|
return make_ref<NoneSink>(nextSink);
|
2017-03-14 16:03:53 +02:00
|
|
|
else if (method == "br")
|
2018-08-06 16:40:29 +03:00
|
|
|
return make_ref<BrotliCompressionSink>(nextSink);
|
2016-04-29 18:02:57 +03:00
|
|
|
else
|
2020-04-22 02:07:07 +03:00
|
|
|
throw UnknownCompressionMethod("unknown compression method '%s'", method);
|
2016-04-29 18:02:57 +03:00
|
|
|
}
|
|
|
|
|
2021-10-12 09:14:36 +03:00
|
|
|
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel, int level)
|
2018-03-16 17:59:31 +02:00
|
|
|
{
|
|
|
|
StringSink ssink;
|
2021-10-12 09:14:36 +03:00
|
|
|
auto sink = makeCompressionSink(method, ssink, parallel, level);
|
2018-03-16 17:59:31 +02:00
|
|
|
(*sink)(in);
|
|
|
|
sink->finish();
|
|
|
|
return ssink.s;
|
|
|
|
}
|
|
|
|
|
2015-10-30 13:33:40 +02:00
|
|
|
}
|