2019-12-07 22:35:14 +07:00
|
|
|
#include <archive.h>
|
|
|
|
#include <archive_entry.h>
|
2019-12-09 17:21:46 +07:00
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
#include "finally.hh"
|
2019-12-09 17:21:46 +07:00
|
|
|
#include "serialise.hh"
|
2019-12-10 15:47:38 +07:00
|
|
|
#include "tarfile.hh"
|
2023-10-25 00:43:36 -04:00
|
|
|
#include "file-system.hh"
|
2019-03-27 14:12:20 +01:00
|
|
|
|
2019-09-11 13:10:46 +02:00
|
|
|
namespace nix {
|
2021-04-15 13:51:00 +02:00
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
int callback_open(struct archive *, void * self)
|
2021-04-15 13:51:00 +02:00
|
|
|
{
|
2019-12-10 15:47:38 +07:00
|
|
|
return ARCHIVE_OK;
|
|
|
|
}
|
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
ssize_t callback_read(struct archive * archive, void * _self, const void ** buffer)
|
2021-04-15 13:51:00 +02:00
|
|
|
{
|
|
|
|
auto self = (TarArchive *) _self;
|
2019-12-10 15:47:38 +07:00
|
|
|
*buffer = self->buffer.data();
|
|
|
|
|
|
|
|
try {
|
2023-01-31 23:14:48 +01:00
|
|
|
return self->source->read((char *) self->buffer.data(), self->buffer.size());
|
2019-12-10 15:47:38 +07:00
|
|
|
} catch (EndOfFile &) {
|
|
|
|
return 0;
|
2021-04-15 13:51:00 +02:00
|
|
|
} catch (std::exception & err) {
|
2019-12-10 15:47:38 +07:00
|
|
|
archive_set_error(archive, EIO, "Source threw exception: %s", err.what());
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2019-09-11 13:10:46 +02:00
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
int callback_close(struct archive *, void * self)
|
2021-04-15 13:51:00 +02:00
|
|
|
{
|
2019-12-10 15:47:38 +07:00
|
|
|
return ARCHIVE_OK;
|
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
void checkLibArchive(archive * archive, int err, const std::string & reason)
|
2019-12-10 15:47:38 +07:00
|
|
|
{
|
2021-04-15 13:51:00 +02:00
|
|
|
if (err == ARCHIVE_EOF)
|
|
|
|
throw EndOfFile("reached end of archive");
|
|
|
|
else if (err != ARCHIVE_OK)
|
2024-03-30 01:29:29 +03:00
|
|
|
throw Error(reason, archive_error_string(archive));
|
2021-04-15 13:51:00 +02:00
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
constexpr auto defaultBufferSize = std::size_t{65536};
|
|
|
|
}
|
|
|
|
|
|
|
|
void TarArchive::check(int err, const std::string & reason)
|
2019-12-10 15:47:38 +07:00
|
|
|
{
|
2024-03-30 01:29:29 +03:00
|
|
|
checkLibArchive(archive, err, reason);
|
|
|
|
}
|
2022-02-06 14:47:07 +01:00
|
|
|
|
2024-03-30 01:29:29 +03:00
|
|
|
/// @brief Get filter_code from its name.
|
|
|
|
///
|
|
|
|
/// libarchive does not provide a convenience function like archive_write_add_filter_by_name but for reading.
|
|
|
|
/// Instead it's necessary to use this kludge to convert method -> code and
|
|
|
|
/// then use archive_read_support_filter_by_code. Arguably this is better than
|
|
|
|
/// hand-rolling the equivalent function that is better implemented in libarchive.
|
|
|
|
int getArchiveFilterCodeByName(const std::string & method)
|
|
|
|
{
|
|
|
|
auto * ar = archive_write_new();
|
|
|
|
auto cleanup = Finally{[&ar]() { checkLibArchive(ar, archive_write_close(ar), "failed to close archive: %s"); }};
|
|
|
|
auto err = archive_write_add_filter_by_name(ar, method.c_str());
|
|
|
|
checkLibArchive(ar, err, "failed to get libarchive filter by name: %s");
|
|
|
|
auto code = archive_filter_code(ar, 0);
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
|
|
|
|
TarArchive::TarArchive(Source & source, bool raw, std::optional<std::string> compression_method)
|
|
|
|
: archive{archive_read_new()}
|
|
|
|
, source{&source}
|
|
|
|
, buffer(defaultBufferSize)
|
|
|
|
{
|
|
|
|
if (!compression_method) {
|
2022-02-06 14:47:07 +01:00
|
|
|
archive_read_support_filter_all(archive);
|
2024-03-30 01:29:29 +03:00
|
|
|
} else {
|
|
|
|
archive_read_support_filter_by_code(archive, getArchiveFilterCodeByName(*compression_method));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!raw) {
|
2019-12-07 18:08:33 +00:00
|
|
|
archive_read_support_format_all(archive);
|
2022-02-06 14:47:07 +01:00
|
|
|
} else {
|
2019-12-10 15:47:38 +07:00
|
|
|
archive_read_support_format_raw(archive);
|
2022-02-06 14:47:07 +01:00
|
|
|
archive_read_support_format_empty(archive);
|
|
|
|
}
|
2024-03-30 01:29:29 +03:00
|
|
|
|
2023-12-07 11:04:48 +01:00
|
|
|
archive_read_set_option(archive, NULL, "mac-ext", NULL);
|
2024-03-30 01:29:22 +03:00
|
|
|
check(
|
|
|
|
archive_read_open(archive, (void *) this, callback_open, callback_read, callback_close),
|
|
|
|
"Failed to open archive (%s)");
|
2019-12-10 15:47:38 +07:00
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2021-04-15 13:51:00 +02:00
|
|
|
TarArchive::TarArchive(const Path & path)
|
2024-03-30 01:29:29 +03:00
|
|
|
: archive{archive_read_new()}
|
|
|
|
, buffer(defaultBufferSize)
|
2019-12-10 15:47:38 +07:00
|
|
|
{
|
2021-08-30 15:41:19 +02:00
|
|
|
archive_read_support_filter_all(archive);
|
2022-02-06 14:47:07 +01:00
|
|
|
archive_read_support_format_all(archive);
|
2023-12-07 11:04:48 +01:00
|
|
|
archive_read_set_option(archive, NULL, "mac-ext", NULL);
|
2022-02-06 14:47:07 +01:00
|
|
|
check(archive_read_open_filename(archive, path.c_str(), 16384), "failed to open archive: %s");
|
2021-08-30 15:41:19 +02:00
|
|
|
}
|
|
|
|
|
2021-04-15 13:51:00 +02:00
|
|
|
void TarArchive::close()
|
|
|
|
{
|
2019-12-10 15:47:38 +07:00
|
|
|
check(archive_read_close(this->archive), "Failed to close archive (%s)");
|
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2021-04-15 13:51:00 +02:00
|
|
|
TarArchive::~TarArchive()
|
|
|
|
{
|
2024-03-30 01:29:22 +03:00
|
|
|
if (this->archive)
|
|
|
|
archive_read_free(this->archive);
|
2019-12-10 15:47:38 +07:00
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2019-12-19 15:01:58 +01:00
|
|
|
static void extract_archive(TarArchive & archive, const Path & destDir)
|
|
|
|
{
|
2024-03-30 01:29:22 +03:00
|
|
|
int flags = ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_SECURE_SYMLINKS | ARCHIVE_EXTRACT_SECURE_NODOTDOT;
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2019-12-19 15:01:58 +01:00
|
|
|
for (;;) {
|
|
|
|
struct archive_entry * entry;
|
2019-12-07 18:08:33 +00:00
|
|
|
int r = archive_read_next_header(archive.archive, &entry);
|
2024-03-30 01:29:22 +03:00
|
|
|
if (r == ARCHIVE_EOF)
|
|
|
|
break;
|
2021-08-30 15:30:34 +02:00
|
|
|
auto name = archive_entry_pathname(entry);
|
|
|
|
if (!name)
|
|
|
|
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
|
|
|
|
if (r == ARCHIVE_WARN)
|
2019-12-19 15:01:58 +01:00
|
|
|
warn(archive_error_string(archive.archive));
|
2019-12-07 18:08:33 +00:00
|
|
|
else
|
|
|
|
archive.check(r);
|
|
|
|
|
2024-03-30 01:29:22 +03:00
|
|
|
archive_entry_copy_pathname(entry, (destDir + "/" + name).c_str());
|
2019-12-19 15:08:16 +01:00
|
|
|
|
2022-11-01 15:46:30 +01:00
|
|
|
// sources can and do contain dirs with no rx bits
|
|
|
|
if (archive_entry_filetype(entry) == AE_IFDIR && (archive_entry_mode(entry) & 0500) != 0500)
|
|
|
|
archive_entry_set_mode(entry, archive_entry_mode(entry) | 0500);
|
|
|
|
|
2021-12-10 17:03:51 +01:00
|
|
|
// Patch hardlink path
|
2024-03-30 01:29:22 +03:00
|
|
|
const char * original_hardlink = archive_entry_hardlink(entry);
|
2021-12-10 17:03:51 +01:00
|
|
|
if (original_hardlink) {
|
2024-03-30 01:29:22 +03:00
|
|
|
archive_entry_copy_hardlink(entry, (destDir + "/" + original_hardlink).c_str());
|
2021-12-10 17:03:51 +01:00
|
|
|
}
|
|
|
|
|
2019-12-07 18:08:33 +00:00
|
|
|
archive.check(archive_read_extract(archive.archive, entry, flags));
|
2019-12-07 22:35:14 +07:00
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
|
|
|
archive.close();
|
2019-12-07 22:35:14 +07:00
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2019-12-07 22:35:14 +07:00
|
|
|
void unpackTarfile(Source & source, const Path & destDir)
|
|
|
|
{
|
2019-12-07 18:08:33 +00:00
|
|
|
auto archive = TarArchive(source);
|
2019-12-07 23:23:11 +07:00
|
|
|
|
2019-12-07 22:35:14 +07:00
|
|
|
createDirs(destDir);
|
2019-12-07 18:08:33 +00:00
|
|
|
extract_archive(archive, destDir);
|
2019-12-07 22:35:14 +07:00
|
|
|
}
|
2019-12-07 18:08:33 +00:00
|
|
|
|
2019-12-07 22:35:14 +07:00
|
|
|
void unpackTarfile(const Path & tarFile, const Path & destDir)
|
|
|
|
{
|
2019-12-07 18:08:33 +00:00
|
|
|
auto archive = TarArchive(tarFile);
|
2019-12-07 23:23:11 +07:00
|
|
|
|
2019-12-07 22:35:14 +07:00
|
|
|
createDirs(destDir);
|
2019-12-07 18:08:33 +00:00
|
|
|
extract_archive(archive, destDir);
|
2019-09-11 15:25:43 +02:00
|
|
|
}
|
|
|
|
|
2023-12-21 04:28:06 -05:00
|
|
|
time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink)
|
|
|
|
{
|
|
|
|
time_t lastModified = 0;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
// FIXME: merge with extract_archive
|
|
|
|
struct archive_entry * entry;
|
|
|
|
int r = archive_read_next_header(archive.archive, &entry);
|
2024-03-30 01:29:22 +03:00
|
|
|
if (r == ARCHIVE_EOF)
|
|
|
|
break;
|
2023-12-21 04:28:06 -05:00
|
|
|
auto path = archive_entry_pathname(entry);
|
|
|
|
if (!path)
|
|
|
|
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
|
|
|
|
if (r == ARCHIVE_WARN)
|
|
|
|
warn(archive_error_string(archive.archive));
|
|
|
|
else
|
|
|
|
archive.check(r);
|
|
|
|
|
|
|
|
lastModified = std::max(lastModified, archive_entry_mtime(entry));
|
|
|
|
|
|
|
|
switch (archive_entry_filetype(entry)) {
|
|
|
|
|
|
|
|
case AE_IFDIR:
|
|
|
|
parseSink.createDirectory(path);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case AE_IFREG: {
|
|
|
|
parseSink.createRegularFile(path, [&](auto & crf) {
|
|
|
|
if (archive_entry_mode(entry) & S_IXUSR)
|
|
|
|
crf.isExecutable();
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
std::vector<unsigned char> buf(128 * 1024);
|
|
|
|
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
|
|
|
|
if (n < 0)
|
|
|
|
throw Error("cannot read file '%s' from tarball", path);
|
2024-03-30 01:29:22 +03:00
|
|
|
if (n == 0)
|
|
|
|
break;
|
|
|
|
crf(std::string_view{
|
2023-12-21 04:28:06 -05:00
|
|
|
(const char *) buf.data(),
|
|
|
|
(size_t) n,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case AE_IFLNK: {
|
|
|
|
auto target = archive_entry_symlink(entry);
|
|
|
|
|
|
|
|
parseSink.createSymlink(path, target);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
throw Error("file '%s' in tarball has unsupported file type", path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return lastModified;
|
|
|
|
}
|
|
|
|
|
2019-09-11 13:10:46 +02:00
|
|
|
}
|