From f23f08e1550af9a3d32481d5d8c18d8fff59e0b3 Mon Sep 17 00:00:00 2001 From: Xavi Garcia Date: Thu, 21 Sep 2023 16:23:00 +0200 Subject: [PATCH] rgw/sfs: Calculate multipart etag based on its parts This fixes the issue with multipart etags by updating the multipart hash for every part. Now etags for multiparts should be following the `md5-num_parts` format. Fixes: https://github.com/aquarist-labs/s3gw/issues/721 Signed-off-by: Xavi Garcia --- qa/rgw/store/sfs/tests/test-sfs-multipart.py | 56 +++++++++++++++----- src/rgw/driver/sfs/multipart.cc | 1 + 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/qa/rgw/store/sfs/tests/test-sfs-multipart.py b/qa/rgw/store/sfs/tests/test-sfs-multipart.py index e9a6311300d0f..c65b2188337f8 100644 --- a/qa/rgw/store/sfs/tests/test-sfs-multipart.py +++ b/qa/rgw/store/sfs/tests/test-sfs-multipart.py @@ -25,6 +25,7 @@ from pydantic import BaseModel import hashlib import datetime +import re ACCESS_KEY = "test" SECRET_KEY = "test" @@ -154,6 +155,24 @@ def assert_bucket_exists(self, name: str) -> None: break self.assertTrue(found) + def verify_multipart_response(self, response, nb_parts, objsize) -> None: + self.assertTrue("ETag" in response) + # check that it is a md5 followed by - followed by number of parts + etag_multipart_re = re.compile('"[A-Fa-f0-9]{32}-[0-9]+"') + self.assertIsNotNone(etag_multipart_re.match(response["ETag"])) + # we uploaded 10 parts + self.assertTrue(response["ETag"].endswith('-{}"'.format(nb_parts))) + self.assertTrue("LastModified" in response) + now = datetime.datetime.now() + # greater just in case we're running this test right at new year's eve + self.assertGreaterEqual(now.year, response["LastModified"].year) + self.assertTrue("ContentType" in response) + self.assertEqual("binary/octet-stream", response["ContentType"]) + self.assertTrue("VersionId" in response) + self.assertNotEqual("", response["VersionId"]) + self.assertTrue("ContentLength" in response) + self.assertEqual(objsize, response["ContentLength"]) + def test_dne_upload_multipart(self): bucket_name = self.create_bucket() objname = self.get_random_object_name() @@ -466,18 +485,29 @@ def test_multipart_upload_download_response(self): obj.upload_file(objpath.as_posix(), Config=cfg) response = self.s3c.get_object(Bucket=bucket_name, Key=objname) - self.assertTrue("ETag" in response) - # we uploaded 10 parts - self.assertTrue(response["ETag"].endswith('e-10"')) - self.assertTrue("LastModified" in response) - now = datetime.datetime.now() - # greater just in case we're running this test right at new year's eve - self.assertGreaterEqual(now.year, response["LastModified"].year) - self.assertTrue("ContentType" in response) - self.assertEqual("binary/octet-stream", response["ContentType"]) - self.assertTrue("VersionId" in response) - self.assertNotEqual("", response["VersionId"]) - self.assertTrue("ContentLength" in response) - self.assertEqual(objsize, response["ContentLength"]) + self.verify_multipart_response(response, 10, objsize) + # store etag to compare with second upload + first_upload_etag = response["ETag"] + + # now upload a new multipart object + objname = self.get_random_object_name() + objsize = 110 * 1024**2 # 110 MB + objpath, md5 = self.gen_random_file(objname, objsize) + + cfg = boto3.s3.transfer.TransferConfig( + multipart_threshold=10 * 1024, # 10 MB + max_concurrency=11, + multipart_chunksize=10 * 1024**2, # 10 MB + use_threads=True, + ) + + obj = self.s3.Object(bucket_name, objname) + obj.upload_file(objpath.as_posix(), Config=cfg) + + response = self.s3c.get_object(Bucket=bucket_name, Key=objname) + self.verify_multipart_response(response, 11, objsize) + + # check that etags are different + self.assertNotEqual(response["ETag"], first_upload_etag) self.delete_bucket(bucket_name) diff --git a/src/rgw/driver/sfs/multipart.cc b/src/rgw/driver/sfs/multipart.cc index 240bd35b48bbd..94a021f43d4d5 100644 --- a/src/rgw/driver/sfs/multipart.cc +++ b/src/rgw/driver/sfs/multipart.cc @@ -282,6 +282,7 @@ int SFSMultipartUploadV2::complete( << dendl; return -ERR_INVALID_PART; } + hash.update(etag); if ((part.size < 5 * 1024 * 1024) && (std::distance(it, part_etags.cend()) > 1)) {