diff --git a/patches/base/s3ql/s3v4.patch b/patches/base/s3ql/s3v4.patch index 9fdaf71..baab9b3 100644 --- a/patches/base/s3ql/s3v4.patch +++ b/patches/base/s3ql/s3v4.patch @@ -1,19 +1,200 @@ +From 11e3a9cea77cd8498d874f7fd69a938af4da68cd Mon Sep 17 00:00:00 2001 +From: xeji <36407913+xeji@users.noreply.github.com> +Date: Thu, 28 Mar 2024 22:19:11 +0100 +Subject: [PATCH] new backend s3c4: s3c with V4 request signatures (#349) + +--- + rst/backends.rst | 15 ++++ + src/s3ql/backends/__init__.py | 3 +- + src/s3ql/backends/s3.py | 100 ++---------------------- + src/s3ql/backends/s3c4.py | 140 ++++++++++++++++++++++++++++++++++ + src/s3ql/parse_args.py | 2 +- + tests/mock_server.py | 11 +++ + 6 files changed, 174 insertions(+), 97 deletions(-) + create mode 100644 src/s3ql/backends/s3c4.py + +diff --git a/rst/backends.rst b/rst/backends.rst +index 7220ee96..4bc68387 100644 +--- a/rst/backends.rst ++++ b/rst/backends.rst +@@ -341,6 +341,14 @@ can be an arbitrary prefix that will be prepended to all object names + used by S3QL. This allows you to store several S3QL file systems in + the same bucket. + ++`s3c://` authenticates API requests using AWS V2 signatures, which are ++deprecated by AWS but still accepted by many S3 compatible services. ++ ++`s3c4://` denotes a variant of this backend that works the same ++but uses AWS V4 signatures for request authentication instead: :: ++ ++ s3c4://:// ++ + The S3 compatible backend accepts the following backend options: + + .. option:: no-ssl +@@ -385,6 +393,13 @@ The S3 compatible backend accepts the following backend options: + necessary if your storage server does not return a valid response + body for a successful copy operation. + ++.. option:: sig-region= ++ ++ For `s3c4://` variant only: Region to use for calculating V4 ++ request signatures. Contrary to S3, the region is not a defined ++ part of the storage URL and must be specified separately. ++ Defaults to `us-east-1`. ++ + .. _`S3 COPY API`: http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html + .. __: https://doc.s3.amazonaws.com/proposals/copy.html + +diff --git a/src/s3ql/backends/__init__.py b/src/s3ql/backends/__init__.py +index a1335762..442828cd 100644 +--- a/src/s3ql/backends/__init__.py ++++ b/src/s3ql/backends/__init__.py +@@ -6,7 +6,7 @@ + This work can be distributed under the terms of the GNU GPLv3. + ''' + +-from . import gs, local, rackspace, s3, s3c, swift, swiftks ++from . import gs, local, rackspace, s3, s3c, s3c4, swift, swiftks + from .b2.b2_backend import B2Backend + + #: Mapping from storage URL prefixes to backend classes +@@ -15,6 +15,7 @@ + 'local': local.Backend, + 'gs': gs.Backend, + 's3c': s3c.Backend, ++ 's3c4': s3c4.Backend, + 'swift': swift.Backend, + 'swiftks': swiftks.Backend, + 'rackspace': rackspace.Backend, diff --git a/src/s3ql/backends/s3.py b/src/s3ql/backends/s3.py -index d19b783..5b5831f 100644 +index e05a49ba..5548a855 100644 --- a/src/s3ql/backends/s3.py +++ b/src/s3ql/backends/s3.py -@@ -9,6 +9,7 @@ This work can be distributed under the terms of the GNU GPLv3. - from ..logging import logging, QuietError # Ensure use of custom logger class - from . import s3c - from .s3c import get_S3Error -+from .s3c import hmac_sha256 - from .common import NoSuchObject, retry - from ..inherit_docstrings import copy_ancestor_docstring +@@ -15,7 +15,7 @@ from xml.sax.saxutils import escape as xml_escape -@@ -236,10 +237,3 @@ class Backend(s3c.Backend): - signing_key = hmac_sha256(service_key, b'aws4_request') - self.signing_key = (signing_key, ymd) + from ..logging import QuietError +-from . import s3c ++from . import s3c4 + from .common import retry + from .s3c import get_S3Error + +@@ -28,22 +28,23 @@ + # pylint: disable=E1002,E1101 + + +-class Backend(s3c.Backend): ++class Backend(s3c4.Backend): + """A backend to store data in Amazon S3 + + This class uses standard HTTP connections to connect to S3. + """ + +- known_options = (s3c.Backend.known_options | {'sse', 'rrs', 'ia', 'oia', 'it'}) - { ++ known_options = (s3c4.Backend.known_options | {'sse', 'rrs', 'ia', 'oia', 'it'}) - { + 'dumb-copy', + 'disable-expect100', ++ 'sig-region', + } + + def __init__(self, options): + self.region = None +- self.signing_key = None + super().__init__(options) + self._set_storage_options(self._extra_put_headers) ++ self.sig_region = self.region + + def _parse_storage_url(self, storage_url, ssl_context): + hit = re.match(r'^s3s?://([^/]+)/([^/]+)(?:/(.*))?$', storage_url) +@@ -147,94 +148,3 @@ def _delete_multi(self, keys): + + except: + self.conn.discard() +- +- def _authorize_request(self, method, path, headers, subres, query_string): +- '''Add authorization information to *headers*''' +- +- # See http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html +- +- now = time.gmtime() +- # now = time.strptime('Fri, 24 May 2013 00:00:00 GMT', +- # '%a, %d %b %Y %H:%M:%S GMT') +- +- ymd = time.strftime('%Y%m%d', now) +- ymdhms = time.strftime('%Y%m%dT%H%M%SZ', now) +- +- headers['x-amz-date'] = ymdhms +- headers['x-amz-content-sha256'] = 'UNSIGNED-PAYLOAD' +- # headers['x-amz-content-sha256'] = hashlib.sha256(body).hexdigest() +- headers.pop('Authorization', None) +- +- auth_strs = [method] +- auth_strs.append(urllib.parse.quote(path)) +- +- if query_string: +- s = urllib.parse.urlencode( +- query_string, doseq=True, quote_via=urllib.parse.quote +- ).split('&') +- else: +- s = [] +- if subres: +- s.append(urllib.parse.quote(subres) + '=') +- if s: +- s = '&'.join(sorted(s)) +- else: +- s = '' +- auth_strs.append(s) +- +- # Headers +- sig_hdrs = sorted(x.lower() for x in headers.keys()) +- for hdr in sig_hdrs: +- auth_strs.append('%s:%s' % (hdr, headers[hdr].strip())) +- auth_strs.append('') +- auth_strs.append(';'.join(sig_hdrs)) +- auth_strs.append(headers['x-amz-content-sha256']) +- can_req = '\n'.join(auth_strs) +- # log.debug('canonical request: %s', can_req) +- +- can_req_hash = hashlib.sha256(can_req.encode()).hexdigest() +- str_to_sign = ( +- "AWS4-HMAC-SHA256\n" +- + ymdhms +- + '\n' +- + '%s/%s/s3/aws4_request\n' % (ymd, self.region) +- + can_req_hash +- ) +- # log.debug('string to sign: %s', str_to_sign) +- +- if self.signing_key is None or self.signing_key[1] != ymd: +- self.update_signing_key(ymd) +- signing_key = self.signing_key[0] +- +- sig = hmac_sha256(signing_key, str_to_sign.encode(), hex=True) +- +- cred = '%s/%04d%02d%02d/%s/s3/aws4_request' % ( +- self.login, +- now.tm_year, +- now.tm_mon, +- now.tm_mday, +- self.region, +- ) +- +- headers['Authorization'] = ( +- 'AWS4-HMAC-SHA256 ' +- 'Credential=%s,' +- 'SignedHeaders=%s,' +- 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig) +- ) +- +- def update_signing_key(self, ymd): +- date_key = hmac_sha256(("AWS4" + self.password).encode(), ymd.encode()) +- region_key = hmac_sha256(date_key, self.region.encode()) +- service_key = hmac_sha256(region_key, b's3') +- signing_key = hmac_sha256(service_key, b'aws4_request') +- +- self.signing_key = (signing_key, ymd) +- - -def hmac_sha256(key, msg, hex=False): - d = hmac.new(key, msg, hashlib.sha256) @@ -21,76 +202,86 @@ index d19b783..5b5831f 100644 - return d.hexdigest() - else: - return d.digest() -diff --git a/src/s3ql/backends/s3c.py b/src/s3ql/backends/s3c.py -index 11687d5..05750b9 100644 ---- a/src/s3ql/backends/s3c.py -+++ b/src/s3ql/backends/s3c.py -@@ -78,6 +78,8 @@ class Backend(AbstractBackend, metaclass=ABCDocstMeta): - self.conn = self._get_conn() - self.password = options.backend_password - self.login = options.backend_login -+ self.region = "us-east-1" +diff --git a/src/s3ql/backends/s3c4.py b/src/s3ql/backends/s3c4.py +new file mode 100644 +index 00000000..37ff0b7a +--- /dev/null ++++ b/src/s3ql/backends/s3c4.py +@@ -0,0 +1,140 @@ ++''' ++s3c4.py - this file is part of S3QL. ++ ++Copyright © 2008 Nikolaus Rath ++ ++This work can be distributed under the terms of the GNU GPLv3. ++''' ++ ++import hashlib ++import hmac ++import logging ++import re ++import time ++import urllib.parse ++from xml.sax.saxutils import escape as xml_escape ++ ++from ..logging import QuietError ++from . import s3c ++from .common import retry ++from .s3c import get_S3Error ++ ++log = logging.getLogger(__name__) ++ ++# Maximum number of keys that can be deleted at once ++MAX_KEYS = 1000 ++ ++# Pylint goes berserk with false positives ++# pylint: disable=E1002,E1101 ++ ++ ++class Backend(s3c.Backend): ++ """A backend to stored data in some S3 compatible storage service. ++ ++ This classes uses AWS Signature V4 for authorization. ++ """ ++ ++ known_options = s3c.Backend.known_options | {'sig-region'} ++ ++ def __init__(self, options): ++ self.sig_region = options.backend_options.get('sig-region', 'us-east-1') + self.signing_key = None - - @property - @copy_ancestor_docstring -@@ -597,43 +599,76 @@ class Backend(AbstractBackend, metaclass=ABCDocstMeta): - def _authorize_request(self, method, path, headers, subres, query_string): - '''Add authorization information to *headers*''' - -- # See http://docs.amazonwebservices.com/AmazonS3/latest/dev/RESTAuthentication.html ++ super().__init__(options) ++ ++ def __str__(self): ++ return 's3c4://%s/%s/%s' % (self.hostname, self.bucket_name, self.prefix) ++ ++ def _authorize_request(self, method, path, headers, subres, query_string): ++ '''Add authorization information to *headers*''' ++ + # See http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html - -- # Date, can't use strftime because it's locale dependent - now = time.gmtime() -- headers['Date'] = ('%s, %02d %s %04d %02d:%02d:%02d GMT' -- % (C_DAY_NAMES[now.tm_wday], -- now.tm_mday, -- C_MONTH_NAMES[now.tm_mon - 1], -- now.tm_year, now.tm_hour, -- now.tm_min, now.tm_sec)) -- -- auth_strs = [method, '\n'] -- -- for hdr in ('Content-MD5', 'Content-Type', 'Date'): -- if hdr in headers: -- auth_strs.append(headers[hdr]) -- auth_strs.append('\n') -- -- for hdr in sorted(x for x in headers if x.lower().startswith('x-amz-')): -- val = ' '.join(re.split(r'\s*\n\s*', headers[hdr].strip())) -- auth_strs.append('%s:%s\n' % (hdr, val)) -- -- # Always include bucket name in path for signing -- if self.hostname.startswith(self.bucket_name): -- path = '/%s%s' % (self.bucket_name, path) -- sign_path = urllib.parse.quote(path) -- auth_strs.append(sign_path) -- if subres: -- auth_strs.append('?%s' % subres) -+ #now = time.strptime('Fri, 24 May 2013 00:00:00 GMT', ++ ++ now = time.gmtime() ++ # now = time.strptime('Fri, 24 May 2013 00:00:00 GMT', + # '%a, %d %b %Y %H:%M:%S GMT') - -- # False positive, hashlib *does* have sha1 member -- #pylint: disable=E1101 -- auth_str = ''.join(auth_strs).encode() -- signature = b64encode(hmac.new(self.password.encode(), auth_str, -- hashlib.sha1).digest()).decode() ++ + ymd = time.strftime('%Y%m%d', now) + ymdhms = time.strftime('%Y%m%dT%H%M%SZ', now) - -- headers['Authorization'] = 'AWS %s:%s' % (self.login, signature) ++ ++ # add non-standard port to host header, needed for correct signature ++ if self.port != 443: ++ headers['host'] = '%s:%s' % (self.hostname, self.port) ++ + headers['x-amz-date'] = ymdhms + headers['x-amz-content-sha256'] = 'UNSIGNED-PAYLOAD' -+ #headers['x-amz-content-sha256'] = hashlib.sha256(body).hexdigest() ++ + headers.pop('Authorization', None) + + auth_strs = [method] + auth_strs.append(urllib.parse.quote(path)) + + if query_string: -+ s = urllib.parse.urlencode(query_string, doseq=True, -+ quote_via=urllib.parse.quote).split('&') ++ s = urllib.parse.urlencode( ++ query_string, doseq=True, quote_via=urllib.parse.quote ++ ).split('&') + else: + s = [] + if subres: @@ -102,20 +293,24 @@ index 11687d5..05750b9 100644 + auth_strs.append(s) + + # Headers -+ sig_hdrs = sorted(x for x in (x.lower() for x in headers.keys()) if x == "host" or x == "content-type" or x.startswith("x-amz-")) ++ sig_hdrs = sorted(x.lower() for x in headers.keys()) + for hdr in sig_hdrs: + auth_strs.append('%s:%s' % (hdr, headers[hdr].strip())) + auth_strs.append('') + auth_strs.append(';'.join(sig_hdrs)) + auth_strs.append(headers['x-amz-content-sha256']) + can_req = '\n'.join(auth_strs) -+ #log.debug('canonical request: %s', can_req) ++ # log.debug('canonical request: %s', can_req) + + can_req_hash = hashlib.sha256(can_req.encode()).hexdigest() -+ str_to_sign = ("AWS4-HMAC-SHA256\n" + ymdhms + '\n' + -+ '%s/%s/s3/aws4_request\n' % (ymd, self.region) + -+ can_req_hash) -+ #log.debug('string to sign: %s', str_to_sign) ++ str_to_sign = ( ++ "AWS4-HMAC-SHA256\n" ++ + ymdhms ++ + '\n' ++ + '%s/%s/s3/aws4_request\n' % (ymd, self.sig_region) ++ + can_req_hash ++ ) ++ # log.debug('string to sign: %s', str_to_sign) + + if self.signing_key is None or self.signing_key[1] != ymd: + self.update_signing_key(ymd) @@ -123,47 +318,75 @@ index 11687d5..05750b9 100644 + + sig = hmac_sha256(signing_key, str_to_sign.encode(), hex=True) + -+ cred = ('%s/%04d%02d%02d/%s/s3/aws4_request' -+ % (self.login, now.tm_year, now.tm_mon, now.tm_mday, -+ self.region)) ++ cred = '%s/%04d%02d%02d/%s/s3/aws4_request' % ( ++ self.login, ++ now.tm_year, ++ now.tm_mon, ++ now.tm_mday, ++ self.sig_region, ++ ) + + headers['Authorization'] = ( + 'AWS4-HMAC-SHA256 ' + 'Credential=%s,' + 'SignedHeaders=%s,' -+ 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig)) ++ 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig) ++ ) + + def update_signing_key(self, ymd): -+ date_key = hmac_sha256(("AWS4" + self.password).encode(), -+ ymd.encode()) -+ region_key = hmac_sha256(date_key, self.region.encode()) ++ date_key = hmac_sha256(("AWS4" + self.password).encode(), ymd.encode()) ++ region_key = hmac_sha256(date_key, self.sig_region.encode()) + service_key = hmac_sha256(region_key, b's3') + signing_key = hmac_sha256(service_key, b'aws4_request') + + self.signing_key = (signing_key, ymd) - - def _send_request(self, method, path, headers, subres=None, query_string=None, body=None): - '''Add authentication and send request -@@ -646,7 +681,7 @@ class Backend(AbstractBackend, metaclass=ABCDocstMeta): - - if not self.hostname.startswith(self.bucket_name): - path = '/%s%s' % (self.bucket_name, path) -- headers['host'] = self.hostname -+ headers['host'] = self.hostname if int(self.port) == 80 or int(self.port) == 443 else f"{self.hostname}:{self.port}" - - self._authorize_request(method, path, headers, subres, query_string) - -@@ -950,6 +985,13 @@ def md5sum_b64(buf): - - return b64encode(hashlib.md5(buf).digest()).decode('ascii') - ++ ++ +def hmac_sha256(key, msg, hex=False): + d = hmac.new(key, msg, hashlib.sha256) + if hex: + return d.hexdigest() + else: + return d.digest() -+ - def _parse_retry_after(header): - '''Parse headers for Retry-After value''' +diff --git a/src/s3ql/parse_args.py b/src/s3ql/parse_args.py +index 272e10c7..24ad50f4 100644 +--- a/src/s3ql/parse_args.py ++++ b/src/s3ql/parse_args.py +@@ -374,7 +374,7 @@ def storage_url_type(s): + # slash (even when using a prefix), but we can't do that now because it + # would make file systems created without trailing slash inaccessible. + if re.match(r'^(s3|gs)://[^/]+$', s) or re.match( +- r'^(s3c|swift(ks)?|rackspace)://[^/]+/[^/]+$', s ++ r'^(s3c|s3c4|swift(ks)?|rackspace)://[^/]+/[^/]+$', s + ): + s += '/' +diff --git a/tests/mock_server.py b/tests/mock_server.py +index b453e705..e3084065 100644 +--- a/tests/mock_server.py ++++ b/tests/mock_server.py +@@ -292,6 +292,16 @@ def send_error(self, status, message=None, code='', resource='', extra_headers=N + self.wfile.write(content) + + ++class S3C4RequestHandler(S3CRequestHandler): ++ '''Request Handler for s3c4 backend ++ ++ Currently identical to S3CRequestHandler since mock request handlers ++ do not check request signatures. ++ ''' ++ ++ pass ++ ++ + class BasicSwiftRequestHandler(S3CRequestHandler): + '''A request handler implementing a subset of the OpenStack Swift Interface + +@@ -569,6 +579,7 @@ def inline_error(http_status, body): + #: corresponding storage urls + handler_list = [ + (S3CRequestHandler, 's3c://%(host)s:%(port)d/s3ql_test'), ++ (S3C4RequestHandler, 's3c4://%(host)s:%(port)d/s3ql_test'), + # Special syntax only for testing against mock server + (BasicSwiftRequestHandler, 'swift://%(host)s:%(port)d/s3ql_test'), + (CopySwiftRequestHandler, 'swift://%(host)s:%(port)d/s3ql_test'),