depot/patches/base/s3ql/s3v4.patch

393 lines
13 KiB
Diff
Raw Normal View History

From 11e3a9cea77cd8498d874f7fd69a938af4da68cd Mon Sep 17 00:00:00 2001
From: xeji <36407913+xeji@users.noreply.github.com>
Date: Thu, 28 Mar 2024 22:19:11 +0100
Subject: [PATCH] new backend s3c4: s3c with V4 request signatures (#349)
---
rst/backends.rst | 15 ++++
src/s3ql/backends/__init__.py | 3 +-
src/s3ql/backends/s3.py | 100 ++----------------------
src/s3ql/backends/s3c4.py | 140 ++++++++++++++++++++++++++++++++++
src/s3ql/parse_args.py | 2 +-
tests/mock_server.py | 11 +++
6 files changed, 174 insertions(+), 97 deletions(-)
create mode 100644 src/s3ql/backends/s3c4.py
diff --git a/rst/backends.rst b/rst/backends.rst
index 7220ee96..4bc68387 100644
--- a/rst/backends.rst
+++ b/rst/backends.rst
@@ -341,6 +341,14 @@ can be an arbitrary prefix that will be prepended to all object names
used by S3QL. This allows you to store several S3QL file systems in
the same bucket.
+`s3c://` authenticates API requests using AWS V2 signatures, which are
+deprecated by AWS but still accepted by many S3 compatible services.
+
+`s3c4://` denotes a variant of this backend that works the same
+but uses AWS V4 signatures for request authentication instead: ::
+
+ s3c4://<hostname>:<port>/<bucketname>/<prefix>
+
The S3 compatible backend accepts the following backend options:
.. option:: no-ssl
@@ -385,6 +393,13 @@ The S3 compatible backend accepts the following backend options:
necessary if your storage server does not return a valid response
body for a successful copy operation.
+.. option:: sig-region=<region>
+
+ For `s3c4://` variant only: Region to use for calculating V4
+ request signatures. Contrary to S3, the region is not a defined
+ part of the storage URL and must be specified separately.
+ Defaults to `us-east-1`.
+
.. _`S3 COPY API`: http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html
.. __: https://doc.s3.amazonaws.com/proposals/copy.html
diff --git a/src/s3ql/backends/__init__.py b/src/s3ql/backends/__init__.py
index a1335762..442828cd 100644
--- a/src/s3ql/backends/__init__.py
+++ b/src/s3ql/backends/__init__.py
@@ -6,7 +6,7 @@
This work can be distributed under the terms of the GNU GPLv3.
'''
-from . import gs, local, rackspace, s3, s3c, swift, swiftks
+from . import gs, local, rackspace, s3, s3c, s3c4, swift, swiftks
from .b2.b2_backend import B2Backend
#: Mapping from storage URL prefixes to backend classes
@@ -15,6 +15,7 @@
'local': local.Backend,
'gs': gs.Backend,
's3c': s3c.Backend,
+ 's3c4': s3c4.Backend,
'swift': swift.Backend,
'swiftks': swiftks.Backend,
'rackspace': rackspace.Backend,
diff --git a/src/s3ql/backends/s3.py b/src/s3ql/backends/s3.py
index e05a49ba..5548a855 100644
--- a/src/s3ql/backends/s3.py
+++ b/src/s3ql/backends/s3.py
@@ -15,7 +15,7 @@
from xml.sax.saxutils import escape as xml_escape
from ..logging import QuietError
-from . import s3c
+from . import s3c4
from .common import retry
from .s3c import get_S3Error
@@ -28,22 +28,23 @@
# pylint: disable=E1002,E1101
-class Backend(s3c.Backend):
+class Backend(s3c4.Backend):
"""A backend to store data in Amazon S3
This class uses standard HTTP connections to connect to S3.
"""
- known_options = (s3c.Backend.known_options | {'sse', 'rrs', 'ia', 'oia', 'it'}) - {
+ known_options = (s3c4.Backend.known_options | {'sse', 'rrs', 'ia', 'oia', 'it'}) - {
'dumb-copy',
'disable-expect100',
+ 'sig-region',
}
def __init__(self, options):
self.region = None
- self.signing_key = None
super().__init__(options)
self._set_storage_options(self._extra_put_headers)
+ self.sig_region = self.region
def _parse_storage_url(self, storage_url, ssl_context):
hit = re.match(r'^s3s?://([^/]+)/([^/]+)(?:/(.*))?$', storage_url)
@@ -147,94 +148,3 @@ def _delete_multi(self, keys):
except:
self.conn.discard()
-
- def _authorize_request(self, method, path, headers, subres, query_string):
- '''Add authorization information to *headers*'''
-
- # See http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html
-
- now = time.gmtime()
- # now = time.strptime('Fri, 24 May 2013 00:00:00 GMT',
- # '%a, %d %b %Y %H:%M:%S GMT')
-
- ymd = time.strftime('%Y%m%d', now)
- ymdhms = time.strftime('%Y%m%dT%H%M%SZ', now)
-
- headers['x-amz-date'] = ymdhms
- headers['x-amz-content-sha256'] = 'UNSIGNED-PAYLOAD'
- # headers['x-amz-content-sha256'] = hashlib.sha256(body).hexdigest()
- headers.pop('Authorization', None)
-
- auth_strs = [method]
- auth_strs.append(urllib.parse.quote(path))
-
- if query_string:
- s = urllib.parse.urlencode(
- query_string, doseq=True, quote_via=urllib.parse.quote
- ).split('&')
- else:
- s = []
- if subres:
- s.append(urllib.parse.quote(subres) + '=')
- if s:
- s = '&'.join(sorted(s))
- else:
- s = ''
- auth_strs.append(s)
-
- # Headers
- sig_hdrs = sorted(x.lower() for x in headers.keys())
- for hdr in sig_hdrs:
- auth_strs.append('%s:%s' % (hdr, headers[hdr].strip()))
- auth_strs.append('')
- auth_strs.append(';'.join(sig_hdrs))
- auth_strs.append(headers['x-amz-content-sha256'])
- can_req = '\n'.join(auth_strs)
- # log.debug('canonical request: %s', can_req)
-
- can_req_hash = hashlib.sha256(can_req.encode()).hexdigest()
- str_to_sign = (
- "AWS4-HMAC-SHA256\n"
- + ymdhms
- + '\n'
- + '%s/%s/s3/aws4_request\n' % (ymd, self.region)
- + can_req_hash
- )
- # log.debug('string to sign: %s', str_to_sign)
-
- if self.signing_key is None or self.signing_key[1] != ymd:
- self.update_signing_key(ymd)
- signing_key = self.signing_key[0]
-
- sig = hmac_sha256(signing_key, str_to_sign.encode(), hex=True)
-
- cred = '%s/%04d%02d%02d/%s/s3/aws4_request' % (
- self.login,
- now.tm_year,
- now.tm_mon,
- now.tm_mday,
- self.region,
- )
-
- headers['Authorization'] = (
- 'AWS4-HMAC-SHA256 '
- 'Credential=%s,'
- 'SignedHeaders=%s,'
- 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig)
- )
-
- def update_signing_key(self, ymd):
- date_key = hmac_sha256(("AWS4" + self.password).encode(), ymd.encode())
- region_key = hmac_sha256(date_key, self.region.encode())
- service_key = hmac_sha256(region_key, b's3')
- signing_key = hmac_sha256(service_key, b'aws4_request')
-
- self.signing_key = (signing_key, ymd)
-
-
-def hmac_sha256(key, msg, hex=False):
- d = hmac.new(key, msg, hashlib.sha256)
- if hex:
- return d.hexdigest()
- else:
- return d.digest()
diff --git a/src/s3ql/backends/s3c4.py b/src/s3ql/backends/s3c4.py
new file mode 100644
index 00000000..37ff0b7a
--- /dev/null
+++ b/src/s3ql/backends/s3c4.py
@@ -0,0 +1,140 @@
+'''
+s3c4.py - this file is part of S3QL.
+
+Copyright © 2008 Nikolaus Rath <Nikolaus@rath.org>
+
+This work can be distributed under the terms of the GNU GPLv3.
+'''
+
+import hashlib
+import hmac
+import logging
+import re
+import time
+import urllib.parse
+from xml.sax.saxutils import escape as xml_escape
+
+from ..logging import QuietError
+from . import s3c
+from .common import retry
+from .s3c import get_S3Error
+
+log = logging.getLogger(__name__)
+
+# Maximum number of keys that can be deleted at once
+MAX_KEYS = 1000
+
+# Pylint goes berserk with false positives
+# pylint: disable=E1002,E1101
+
+
+class Backend(s3c.Backend):
+ """A backend to stored data in some S3 compatible storage service.
+
+ This classes uses AWS Signature V4 for authorization.
+ """
+
+ known_options = s3c.Backend.known_options | {'sig-region'}
+
+ def __init__(self, options):
+ self.sig_region = options.backend_options.get('sig-region', 'us-east-1')
+ self.signing_key = None
+ super().__init__(options)
+
+ def __str__(self):
+ return 's3c4://%s/%s/%s' % (self.hostname, self.bucket_name, self.prefix)
+
+ def _authorize_request(self, method, path, headers, subres, query_string):
+ '''Add authorization information to *headers*'''
+
+ # See http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html
+
+ now = time.gmtime()
+ # now = time.strptime('Fri, 24 May 2013 00:00:00 GMT',
+ # '%a, %d %b %Y %H:%M:%S GMT')
+
+ ymd = time.strftime('%Y%m%d', now)
+ ymdhms = time.strftime('%Y%m%dT%H%M%SZ', now)
+
+ # add non-standard port to host header, needed for correct signature
+ if self.port != 443:
+ headers['host'] = '%s:%s' % (self.hostname, self.port)
+
+ headers['x-amz-date'] = ymdhms
+ headers['x-amz-content-sha256'] = 'UNSIGNED-PAYLOAD'
+
+ headers.pop('Authorization', None)
+
+ auth_strs = [method]
+ auth_strs.append(urllib.parse.quote(path))
+
+ if query_string:
+ s = urllib.parse.urlencode(
+ query_string, doseq=True, quote_via=urllib.parse.quote
+ ).split('&')
+ else:
+ s = []
+ if subres:
+ s.append(urllib.parse.quote(subres) + '=')
+ if s:
+ s = '&'.join(sorted(s))
+ else:
+ s = ''
+ auth_strs.append(s)
+
+ # Headers
+ sig_hdrs = sorted(x.lower() for x in headers.keys())
+ for hdr in sig_hdrs:
+ auth_strs.append('%s:%s' % (hdr, headers[hdr].strip()))
+ auth_strs.append('')
+ auth_strs.append(';'.join(sig_hdrs))
+ auth_strs.append(headers['x-amz-content-sha256'])
+ can_req = '\n'.join(auth_strs)
+ # log.debug('canonical request: %s', can_req)
+
+ can_req_hash = hashlib.sha256(can_req.encode()).hexdigest()
+ str_to_sign = (
+ "AWS4-HMAC-SHA256\n"
+ + ymdhms
+ + '\n'
+ + '%s/%s/s3/aws4_request\n' % (ymd, self.sig_region)
+ + can_req_hash
+ )
+ # log.debug('string to sign: %s', str_to_sign)
+
+ if self.signing_key is None or self.signing_key[1] != ymd:
+ self.update_signing_key(ymd)
+ signing_key = self.signing_key[0]
+
+ sig = hmac_sha256(signing_key, str_to_sign.encode(), hex=True)
+
+ cred = '%s/%04d%02d%02d/%s/s3/aws4_request' % (
+ self.login,
+ now.tm_year,
+ now.tm_mon,
+ now.tm_mday,
+ self.sig_region,
+ )
+
+ headers['Authorization'] = (
+ 'AWS4-HMAC-SHA256 '
+ 'Credential=%s,'
+ 'SignedHeaders=%s,'
+ 'Signature=%s' % (cred, ';'.join(sig_hdrs), sig)
+ )
+
+ def update_signing_key(self, ymd):
+ date_key = hmac_sha256(("AWS4" + self.password).encode(), ymd.encode())
+ region_key = hmac_sha256(date_key, self.sig_region.encode())
+ service_key = hmac_sha256(region_key, b's3')
+ signing_key = hmac_sha256(service_key, b'aws4_request')
+
+ self.signing_key = (signing_key, ymd)
+
+
+def hmac_sha256(key, msg, hex=False):
+ d = hmac.new(key, msg, hashlib.sha256)
+ if hex:
+ return d.hexdigest()
+ else:
+ return d.digest()
diff --git a/src/s3ql/parse_args.py b/src/s3ql/parse_args.py
index 272e10c7..24ad50f4 100644
--- a/src/s3ql/parse_args.py
+++ b/src/s3ql/parse_args.py
@@ -374,7 +374,7 @@ def storage_url_type(s):
# slash (even when using a prefix), but we can't do that now because it
# would make file systems created without trailing slash inaccessible.
if re.match(r'^(s3|gs)://[^/]+$', s) or re.match(
- r'^(s3c|swift(ks)?|rackspace)://[^/]+/[^/]+$', s
+ r'^(s3c|s3c4|swift(ks)?|rackspace)://[^/]+/[^/]+$', s
):
s += '/'
diff --git a/tests/mock_server.py b/tests/mock_server.py
index b453e705..e3084065 100644
--- a/tests/mock_server.py
+++ b/tests/mock_server.py
@@ -292,6 +292,16 @@ def send_error(self, status, message=None, code='', resource='', extra_headers=N
self.wfile.write(content)
+class S3C4RequestHandler(S3CRequestHandler):
+ '''Request Handler for s3c4 backend
+
+ Currently identical to S3CRequestHandler since mock request handlers
+ do not check request signatures.
+ '''
+
+ pass
+
+
class BasicSwiftRequestHandler(S3CRequestHandler):
'''A request handler implementing a subset of the OpenStack Swift Interface
@@ -569,6 +579,7 @@ def inline_error(http_status, body):
#: corresponding storage urls
handler_list = [
(S3CRequestHandler, 's3c://%(host)s:%(port)d/s3ql_test'),
+ (S3C4RequestHandler, 's3c4://%(host)s:%(port)d/s3ql_test'),
# Special syntax only for testing against mock server
(BasicSwiftRequestHandler, 'swift://%(host)s:%(port)d/s3ql_test'),
(CopySwiftRequestHandler, 'swift://%(host)s:%(port)d/s3ql_test'),