From b3e0672535cdbb769fa957b2c91be5c0d830b8ca Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 13 Feb 2023 10:31:31 +0000 Subject: [PATCH] s3: Check multipart upload ETag when --s3-no-head is in use Before this change if --s3-no-head was in use rclone didn't check the multipart upload ETag at all. However the ETag is returned in the final POST request when completing the object. This change uses that ETag from the final POST if --s3-no-head is in use, otherwise it uses the ETag from a fresh HEAD request. See: https://forum.rclone.org/t/in-some-cases-rclone-does-not-use-etag-to-verify-files/36095/ --- backend/s3/s3.go | 56 ++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 75a477a9e..1d9c281d6 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -4986,7 +4986,7 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read var warnStreamUpload sync.Once -func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, versionID *string, err error) { +func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (wantETag, gotETag string, versionID *string, err error) { f := o.fs // make concurrency machinery @@ -5030,7 +5030,7 @@ func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, si return f.shouldRetry(ctx, err) }) if err != nil { - return etag, nil, fmt.Errorf("multipart upload failed to initialise: %w", err) + return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to initialise: %w", err) } uid := cout.UploadId @@ -5103,7 +5103,7 @@ func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, si finished = true } else if err != nil { free() - return etag, nil, fmt.Errorf("multipart upload failed to read source: %w", err) + return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to read source: %w", err) } buf = buf[:n] @@ -5158,7 +5158,7 @@ func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, si } err = g.Wait() if err != nil { - return etag, nil, err + return wantETag, gotETag, nil, err } // sort the completed parts by part number @@ -5180,14 +5180,17 @@ func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, si return f.shouldRetry(ctx, err) }) if err != nil { - return etag, nil, fmt.Errorf("multipart upload failed to finalise: %w", err) + return wantETag, gotETag, nil, fmt.Errorf("multipart upload failed to finalise: %w", err) } hashOfHashes := md5.Sum(md5s) - etag = fmt.Sprintf("%s-%d", hex.EncodeToString(hashOfHashes[:]), len(parts)) + wantETag = fmt.Sprintf("%s-%d", hex.EncodeToString(hashOfHashes[:]), len(parts)) if resp != nil { + if resp.ETag != nil { + gotETag = *resp.ETag + } versionID = resp.VersionId } - return etag, versionID, nil + return wantETag, gotETag, versionID, nil } // unWrapAwsError unwraps AWS errors, looking for a non AWS error @@ -5487,16 +5490,16 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op } var wantETag string // Multipart upload Etag to check - var gotEtag string // Etag we got from the upload + var gotETag string // Etag we got from the upload var lastModified time.Time // Time we got from the upload var versionID *string // versionID we got from the upload if multipart { - wantETag, versionID, err = o.uploadMultipart(ctx, &req, size, in) + wantETag, gotETag, versionID, err = o.uploadMultipart(ctx, &req, size, in) } else { if o.fs.opt.UsePresignedRequest { - gotEtag, lastModified, versionID, err = o.uploadSinglepartPresignedRequest(ctx, &req, size, in) + gotETag, lastModified, versionID, err = o.uploadSinglepartPresignedRequest(ctx, &req, size, in) } else { - gotEtag, lastModified, versionID, err = o.uploadSinglepartPutObject(ctx, &req, size, in) + gotETag, lastModified, versionID, err = o.uploadSinglepartPutObject(ctx, &req, size, in) } } if err != nil { @@ -5512,32 +5515,33 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op // User requested we don't HEAD the object after uploading it // so make up the object as best we can assuming it got // uploaded properly. If size < 0 then we need to do the HEAD. + var head *s3.HeadObjectOutput if o.fs.opt.NoHead && size >= 0 { - var head s3.HeadObjectOutput - //structs.SetFrom(&head, &req) - setFrom_s3HeadObjectOutput_s3PutObjectInput(&head, &req) + head = new(s3.HeadObjectOutput) + //structs.SetFrom(head, &req) + setFrom_s3HeadObjectOutput_s3PutObjectInput(head, &req) head.ETag = &md5sumHex // doesn't matter quotes are missing head.ContentLength = &size - // If we have done a single part PUT request then we can read these - if gotEtag != "" { - head.ETag = &gotEtag + // We get etag back from single and multipart upload so fill it in here + if gotETag != "" { + head.ETag = &gotETag } if lastModified.IsZero() { lastModified = time.Now() } head.LastModified = &lastModified head.VersionId = versionID - o.setMetaData(&head) - return nil - } - - // Read the metadata from the newly created object - o.meta = nil // wipe old metadata - head, err := o.headObject(ctx) - if err != nil { - return err + } else { + // Read the metadata from the newly created object + o.meta = nil // wipe old metadata + head, err = o.headObject(ctx) + if err != nil { + return err + } } o.setMetaData(head) + + // Check multipart upload ETag if required if o.fs.opt.UseMultipartEtag.Value && !o.fs.etagIsNotMD5 && wantETag != "" && head.ETag != nil && *head.ETag != "" { gotETag := strings.Trim(strings.ToLower(*head.ETag), `"`) if wantETag != gotETag {