s3: add --s3-no-head parameter to minimise transactions on upload

See: https://forum.rclone.org/t/prevent-head-on-amazon-s3-family/21935
This commit is contained in:
Nick Craig-Wood 2021-01-29 11:13:42 +00:00
parent 0d8bcc08da
commit 8b41dfa50a
1 changed files with 57 additions and 1 deletions

View File

@ -1185,6 +1185,39 @@ rclone does if you know the bucket exists already.
It can also be needed if the user you are using does not have bucket
creation permissions. Before v1.52.0 this would have passed silently
due to a bug.
`,
Default: false,
Advanced: true,
}, {
Name: "no_head",
Help: `If set, don't HEAD uploaded objects to check integrity
This can be useful when trying to minimise the number of transactions
rclone does.
Setting it means that if rclone receives a 200 OK message after
uploading an object with PUT then it will assume that it got uploaded
properly.
In particular it will assume:
- the metadata, including modtime, storage class and content type was as uploaded
- the size was as uploaded
It reads the following items from the response for a single part PUT:
- the MD5SUM
- The uploaded date
For multipart uploads these items aren't read.
If an source object of unknown length is uploaded then rclone **will** do a
HEAD request.
Setting this flag increases the chance for undetected upload failures,
in particular an incorrect size, so it isn't recommended for normal
operation. In practice the chance of an undetected upload failure is
very small even with this flag.
`,
Default: false,
Advanced: true,
@ -1285,6 +1318,7 @@ type Options struct {
LeavePartsOnError bool `config:"leave_parts_on_error"`
ListChunk int64 `config:"list_chunk"`
NoCheckBucket bool `config:"no_check_bucket"`
NoHead bool `config:"no_head"`
Enc encoder.MultiEncoder `config:"encoding"`
MemoryPoolFlushTime fs.Duration `config:"memory_pool_flush_time"`
MemoryPoolUseMmap bool `config:"memory_pool_use_mmap"`
@ -3234,6 +3268,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
}
}
var resp *http.Response // response from PUT
if multipart {
err = o.uploadMultipart(ctx, &req, size, in)
if err != nil {
@ -3274,7 +3309,8 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
httpReq.ContentLength = size
err = o.fs.pacer.CallNoRetry(func() (bool, error) {
resp, err := o.fs.srv.Do(httpReq)
var err error
resp, err = o.fs.srv.Do(httpReq)
if err != nil {
return o.fs.shouldRetry(err)
}
@ -3293,6 +3329,26 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
}
}
// User requested we don't HEAD the object after uploading it
// so make up the object as best we can assuming it got
// uploaded properly. If size < 0 then we need to do the HEAD.
if o.fs.opt.NoHead && size >= 0 {
o.md5 = md5sum
o.bytes = size
o.lastModified = time.Now()
o.meta = req.Metadata
o.mimeType = aws.StringValue(req.ContentType)
o.storageClass = aws.StringValue(req.StorageClass)
// If we have done a single part PUT request then we can read these
if resp != nil {
if date, err := http.ParseTime(resp.Header.Get("Date")); err == nil {
o.lastModified = date
}
o.setMD5FromEtag(resp.Header.Get("Etag"))
}
return nil
}
// Read the metadata from the newly created object
o.meta = nil // wipe old metadata
err = o.readMetaData(ctx)