From 11e3a9cea77cd8498d874f7fd69a938af4da68cd Mon Sep 17 00:00:00 2001 From: xeji <36407913+xeji@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:19:11 +0100 Subject: [PATCH] new backend s3c4: s3c with V4 request signatures (#349) --- rst/backends.rst | 15 ++++ src/s3ql/backends/__init__.py | 3 +- src/s3ql/backends/s3.py | 100 ++---------------------- src/s3ql/backends/s3c4.py | 140 ++++++++++++++++++++++++++++++++++ src/s3ql/parse_args.py | 2 +- tests/mock_server.py | 11 +++ 6 files changed, 174 insertions(+), 97 deletions(-) create mode 100644 src/s3ql/backends/s3c4.py diff --git a/rst/backends.rst b/rst/backends.rst index 7220ee96..4bc68387 100644 --- a/rst/backends.rst +++ b/rst/backends.rst @@ -341,6 +341,14 @@ can be an arbitrary prefix that will be prepended to all object names used by S3QL. This allows you to store several S3QL file systems in the same bucket. +`s3c://` authenticates API requests using AWS V2 signatures, which are +deprecated by AWS but still accepted by many S3 compatible services. + +`s3c4://` denotes a variant of this backend that works the same +but uses AWS V4 signatures for request authentication instead: :: + + s3c4://:// + The S3 compatible backend accepts the following backend options: .. option:: no-ssl @@ -385,6 +393,13 @@ The S3 compatible backend accepts the following backend options: necessary if your storage server does not return a valid response body for a successful copy operation. +.. option:: sig-region= + + For `s3c4://` variant only: Region to use for calculating V4 + request signatures. Contrary to S3, the region is not a defined + part of the storage URL and must be specified separately. + Defaults to `us-east-1`. + .. _`S3 COPY API`: http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html .. __: https://doc.s3.amazonaws.com/proposals/copy.html diff --git a/src/s3ql/backends/__init__.py b/src/s3ql/backends/__init__.py index a1335762..442828cd 100644 --- a/src/s3ql/backends/__init__.py +++ b/src/s3ql/backends/__init__.py @@ -6,7 +6,7 @@ This work can be distributed under the terms of the GNU GPLv3. ''' -from . import gs, local, rackspace, s3, s3c, swift, swiftks +from . import gs, local, rackspace, s3, s3c, s3c4, swift, swiftks from .b2.b2_backend import B2Backend #: Mapping from storage URL prefixes to backend classes @@ -15,6 +15,7 @@ 'local': local.Backend, 'gs': gs.Backend, 's3c': s3c.Backend, + 's3c4': s3c4.Backend, 'swift': swift.Backend, 'swiftks': swiftks.Backend, 'rackspace': rackspace.Backend, diff --git a/src/s3ql/backends/s3.py b/src/s3ql/backends/s3.py index e05a49ba..5548a855 100644 --- a/src/s3ql/backends/s3.py +++ b/src/s3ql/backends/s3.py @@ -15,7 +15,7 @@ from xml.sax.saxutils import escape as xml_escape from ..logging import QuietError -from . import s3c +from . import s3c4 from .common import retry from .s3c import get_S3Error @@ -28,22 +28,23 @@ # pylint: disable=E1002,E1101 -class Backend(s3c.Backend): +class Backend(s3c4.Backend): """A backend to store data in Amazon S3 This class uses standard HTTP connections to connect to S3. """ - known_options = (s3c.Backend.known_options | {'sse', 'rrs', 'ia', 'oia', 'it'}) - { + known_options = (s3c4.Backend.known_options | {'sse', 'rrs', 'ia', 'oia', 'it'}) - { 'dumb-copy', 'disable-expect100', + 'sig-region', } def __init__(self, options): self.region = None - self.signing_key = None super().__init__(options) self._set_storage_options(self._extra_put_headers) + self.sig_region = self.region def _parse_storage_url(self, storage_url, ssl_context): hit = re.match(r'^s3s?://([^/]+)/([^/]+)(?:/(.*))?$', storage_url) @@ -147,94 +148,3 @@ def _delete_multi(self, keys): except: self.conn.discard() - - def _authorize_request(self, method, path, headers, subres, query_string): - '''Add authorization information to *headers*''' - - # See http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html - - now = time.gmtime() - # now = time.strptime('Fri, 24 May 2013 00:00:00 GMT', - # '%a, %d %b %Y %H:%M:%S GMT') - - ymd = time.strftime('%Y%m%d', now) - ymdhms = time.strftime('%Y%m%dT%H%M%SZ', now) - - headers['x-amz-date'] = ymdhms - headers['x-amz-content-sha256'] = 'UNSIGNED-PAYLOAD' - # headers['x-amz-content-sha256'] = hashlib.sha256(body).hexdigest() - headers.pop('Authorization', None) - - auth_strs = [method] - auth_strs.append(urllib.parse.quote(path)) - - if query_string: - s = urllib.parse.urlencode( - query_string, doseq=True, quote_via=urllib.parse.quote - ).split('&') - else: - s = [] - if subres: - s.append(urllib.parse.quote(subres) + '=') - if s: - s = '&'.join(sorted(s)) - else: - s = '' - auth_strs.append(s) - - # Headers - sig_hdrs = sorted(x.lower() for x in headers.keys()) - for hdr in sig_hdrs: - auth_strs.append('%s:%s' % (hdr, headers[hdr].strip())) - auth_strs.append('') - auth_strs.append(';'.join(sig_hdrs)) - auth_strs.append(headers['x-amz-content-sha256']) - can_req = '\n'.join(auth_strs) - # log.debug('canonical request: %s', can_req) - - can_req_hash = hashlib.sha256(can_req.encode()).hexdigest() - str_to_sign = ( - "AWS4-HMAC-SHA256\n" - + ymdhms - + '\n' - + '%s/%s/s3/aws4_request\n' % (ymd, self.region) - + can_req_hash - ) - # log.debug('string to sign: %s', str_to_sign) - - if self.signing_key is None or self.signing_key[1] != ymd: - self.update_signing_key(ymd) - signing_key = self.signing_key[0] - - sig = hmac_sha256(signing_key, str_to_sign.encode(), hex=True) - - cred = '%s/%04d%02d%02d/%s/s3/aws4_request' % ( - self.login, - now.tm_year, - now.tm_mon, - now.tm_mday, - self.region, - ) - - headers['Authorization'] = ( - 'AWS4-HMAC-SHA256 ' - 'Credential=%s,' - 'SignedHeaders=%s,' - 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig) - ) - - def update_signing_key(self, ymd): - date_key = hmac_sha256(("AWS4" + self.password).encode(), ymd.encode()) - region_key = hmac_sha256(date_key, self.region.encode()) - service_key = hmac_sha256(region_key, b's3') - signing_key = hmac_sha256(service_key, b'aws4_request') - - self.signing_key = (signing_key, ymd) - - -def hmac_sha256(key, msg, hex=False): - d = hmac.new(key, msg, hashlib.sha256) - if hex: - return d.hexdigest() - else: - return d.digest() diff --git a/src/s3ql/backends/s3c4.py b/src/s3ql/backends/s3c4.py new file mode 100644 index 00000000..37ff0b7a --- /dev/null +++ b/src/s3ql/backends/s3c4.py @@ -0,0 +1,140 @@ +''' +s3c4.py - this file is part of S3QL. + +Copyright © 2008 Nikolaus Rath + +This work can be distributed under the terms of the GNU GPLv3. +''' + +import hashlib +import hmac +import logging +import re +import time +import urllib.parse +from xml.sax.saxutils import escape as xml_escape + +from ..logging import QuietError +from . import s3c +from .common import retry +from .s3c import get_S3Error + +log = logging.getLogger(__name__) + +# Maximum number of keys that can be deleted at once +MAX_KEYS = 1000 + +# Pylint goes berserk with false positives +# pylint: disable=E1002,E1101 + + +class Backend(s3c.Backend): + """A backend to stored data in some S3 compatible storage service. + + This classes uses AWS Signature V4 for authorization. + """ + + known_options = s3c.Backend.known_options | {'sig-region'} + + def __init__(self, options): + self.sig_region = options.backend_options.get('sig-region', 'us-east-1') + self.signing_key = None + super().__init__(options) + + def __str__(self): + return 's3c4://%s/%s/%s' % (self.hostname, self.bucket_name, self.prefix) + + def _authorize_request(self, method, path, headers, subres, query_string): + '''Add authorization information to *headers*''' + + # See http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html + + now = time.gmtime() + # now = time.strptime('Fri, 24 May 2013 00:00:00 GMT', + # '%a, %d %b %Y %H:%M:%S GMT') + + ymd = time.strftime('%Y%m%d', now) + ymdhms = time.strftime('%Y%m%dT%H%M%SZ', now) + + # add non-standard port to host header, needed for correct signature + if self.port != 443: + headers['host'] = '%s:%s' % (self.hostname, self.port) + + headers['x-amz-date'] = ymdhms + headers['x-amz-content-sha256'] = 'UNSIGNED-PAYLOAD' + + headers.pop('Authorization', None) + + auth_strs = [method] + auth_strs.append(urllib.parse.quote(path)) + + if query_string: + s = urllib.parse.urlencode( + query_string, doseq=True, quote_via=urllib.parse.quote + ).split('&') + else: + s = [] + if subres: + s.append(urllib.parse.quote(subres) + '=') + if s: + s = '&'.join(sorted(s)) + else: + s = '' + auth_strs.append(s) + + # Headers + sig_hdrs = sorted(x.lower() for x in headers.keys()) + for hdr in sig_hdrs: + auth_strs.append('%s:%s' % (hdr, headers[hdr].strip())) + auth_strs.append('') + auth_strs.append(';'.join(sig_hdrs)) + auth_strs.append(headers['x-amz-content-sha256']) + can_req = '\n'.join(auth_strs) + # log.debug('canonical request: %s', can_req) + + can_req_hash = hashlib.sha256(can_req.encode()).hexdigest() + str_to_sign = ( + "AWS4-HMAC-SHA256\n" + + ymdhms + + '\n' + + '%s/%s/s3/aws4_request\n' % (ymd, self.sig_region) + + can_req_hash + ) + # log.debug('string to sign: %s', str_to_sign) + + if self.signing_key is None or self.signing_key[1] != ymd: + self.update_signing_key(ymd) + signing_key = self.signing_key[0] + + sig = hmac_sha256(signing_key, str_to_sign.encode(), hex=True) + + cred = '%s/%04d%02d%02d/%s/s3/aws4_request' % ( + self.login, + now.tm_year, + now.tm_mon, + now.tm_mday, + self.sig_region, + ) + + headers['Authorization'] = ( + 'AWS4-HMAC-SHA256 ' + 'Credential=%s,' + 'SignedHeaders=%s,' + 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig) + ) + + def update_signing_key(self, ymd): + date_key = hmac_sha256(("AWS4" + self.password).encode(), ymd.encode()) + region_key = hmac_sha256(date_key, self.sig_region.encode()) + service_key = hmac_sha256(region_key, b's3') + signing_key = hmac_sha256(service_key, b'aws4_request') + + self.signing_key = (signing_key, ymd) + + +def hmac_sha256(key, msg, hex=False): + d = hmac.new(key, msg, hashlib.sha256) + if hex: + return d.hexdigest() + else: + return d.digest() diff --git a/src/s3ql/parse_args.py b/src/s3ql/parse_args.py index 272e10c7..24ad50f4 100644 --- a/src/s3ql/parse_args.py +++ b/src/s3ql/parse_args.py @@ -374,7 +374,7 @@ def storage_url_type(s): # slash (even when using a prefix), but we can't do that now because it # would make file systems created without trailing slash inaccessible. if re.match(r'^(s3|gs)://[^/]+$', s) or re.match( - r'^(s3c|swift(ks)?|rackspace)://[^/]+/[^/]+$', s + r'^(s3c|s3c4|swift(ks)?|rackspace)://[^/]+/[^/]+$', s ): s += '/' diff --git a/tests/mock_server.py b/tests/mock_server.py index b453e705..e3084065 100644 --- a/tests/mock_server.py +++ b/tests/mock_server.py @@ -292,6 +292,16 @@ def send_error(self, status, message=None, code='', resource='', extra_headers=N self.wfile.write(content) +class S3C4RequestHandler(S3CRequestHandler): + '''Request Handler for s3c4 backend + + Currently identical to S3CRequestHandler since mock request handlers + do not check request signatures. + ''' + + pass + + class BasicSwiftRequestHandler(S3CRequestHandler): '''A request handler implementing a subset of the OpenStack Swift Interface @@ -569,6 +579,7 @@ def inline_error(http_status, body): #: corresponding storage urls handler_list = [ (S3CRequestHandler, 's3c://%(host)s:%(port)d/s3ql_test'), + (S3C4RequestHandler, 's3c4://%(host)s:%(port)d/s3ql_test'), # Special syntax only for testing against mock server (BasicSwiftRequestHandler, 'swift://%(host)s:%(port)d/s3ql_test'), (CopySwiftRequestHandler, 'swift://%(host)s:%(port)d/s3ql_test'),