From 0501773db13221171e102781809378a6555dd312 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 9 Aug 2022 10:44:54 +0100 Subject: [PATCH] azureblob,b2,s3: fix chunksize calculations producing too many parts Before this fix, the chunksize calculator was using the previous size of the object, not the new size of the object to calculate the chunk sizes. This meant that uploading a replacement object which needed a new chunk size would fail, using too many parts. This fix fixes the calculator to take the size explicitly. --- backend/azureblob/azureblob.go | 4 +- backend/b2/upload.go | 2 +- backend/s3/s3.go | 6 +- fs/chunksize/chunksize.go | 32 ++++++---- fs/chunksize/chunksize_test.go | 104 +++++++++++++++++++++++++++------ 5 files changed, 111 insertions(+), 37 deletions(-) diff --git a/backend/azureblob/azureblob.go b/backend/azureblob/azureblob.go index 0814b0168..edb863070 100644 --- a/backend/azureblob/azureblob.go +++ b/backend/azureblob/azureblob.go @@ -1678,14 +1678,14 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op } } - uploadParts := int64(maxUploadParts) + uploadParts := maxUploadParts if uploadParts < 1 { uploadParts = 1 } else if uploadParts > maxUploadParts { uploadParts = maxUploadParts } // calculate size of parts/blocks - partSize := chunksize.Calculator(o, int(uploadParts), o.fs.opt.ChunkSize) + partSize := chunksize.Calculator(o, src.Size(), uploadParts, o.fs.opt.ChunkSize) putBlobOptions := azblob.UploadStreamToBlockBlobOptions{ BufferSize: int(partSize), diff --git a/backend/b2/upload.go b/backend/b2/upload.go index 8a1f67635..47ba53473 100644 --- a/backend/b2/upload.go +++ b/backend/b2/upload.go @@ -97,7 +97,7 @@ func (f *Fs) newLargeUpload(ctx context.Context, o *Object, in io.Reader, src fs if size == -1 { fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", f.opt.ChunkSize, maxParts*f.opt.ChunkSize) } else { - chunkSize = chunksize.Calculator(src, maxParts, defaultChunkSize) + chunkSize = chunksize.Calculator(o, size, maxParts, defaultChunkSize) parts = size / int64(chunkSize) if size%int64(chunkSize) != 0 { parts++ diff --git a/backend/s3/s3.go b/backend/s3/s3.go index f0a02b391..1f86b8592 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -2116,7 +2116,7 @@ type Options struct { UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` CopyCutoff fs.SizeSuffix `config:"copy_cutoff"` ChunkSize fs.SizeSuffix `config:"chunk_size"` - MaxUploadParts int64 `config:"max_upload_parts"` + MaxUploadParts int `config:"max_upload_parts"` DisableChecksum bool `config:"disable_checksum"` SharedCredentialsFile string `config:"shared_credentials_file"` Profile string `config:"profile"` @@ -4718,10 +4718,10 @@ func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, si if size == -1 { warnStreamUpload.Do(func() { fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v", - f.opt.ChunkSize, fs.SizeSuffix(int64(partSize)*uploadParts)) + f.opt.ChunkSize, fs.SizeSuffix(int64(partSize)*int64(uploadParts))) }) } else { - partSize = chunksize.Calculator(o, int(uploadParts), f.opt.ChunkSize) + partSize = chunksize.Calculator(o, size, uploadParts, f.opt.ChunkSize) } memPool := f.getMemoryPool(int64(partSize)) diff --git a/fs/chunksize/chunksize.go b/fs/chunksize/chunksize.go index 5ecf1d5f3..1a60dda5f 100644 --- a/fs/chunksize/chunksize.go +++ b/fs/chunksize/chunksize.go @@ -5,18 +5,26 @@ import ( "github.com/rclone/rclone/fs" ) -/* -Calculator calculates the minimum chunk size needed to fit within the maximum number of parts, rounded up to the nearest fs.Mebi +// Calculator calculates the minimum chunk size needed to fit within +// the maximum number of parts, rounded up to the nearest fs.Mebi. +// +// For most backends, (chunk_size) * (concurrent_upload_routines) +// memory will be required so we want to use the smallest possible +// chunk size that's going to allow the upload to proceed. Rounding up +// to the nearest fs.Mebi on the assumption that some backends may +// only allow integer type parameters when specifying the chunk size. +// +// Returns the default chunk size if it is sufficiently large enough +// to support the given file size otherwise returns the smallest chunk +// size necessary to allow the upload to proceed. +func Calculator(o interface{}, size int64, maxParts int, defaultChunkSize fs.SizeSuffix) fs.SizeSuffix { + // If streaming then use default chunk size + if size < 0 { + fs.Debugf(o, "Streaming upload with chunk_size %s allows uploads of up to %s and will fail only when that limit is reached.", defaultChunkSize, fs.SizeSuffix(maxParts)*defaultChunkSize) -For most backends, (chunk_size) * (concurrent_upload_routines) memory will be required so we want to use the smallest -possible chunk size that's going to allow the upload to proceed. Rounding up to the nearest fs.Mebi on the assumption -that some backends may only allow integer type parameters when specifying the chunk size. - -Returns the default chunk size if it is sufficiently large enough to support the given file size otherwise returns the -smallest chunk size necessary to allow the upload to proceed. -*/ -func Calculator(objInfo fs.ObjectInfo, maxParts int, defaultChunkSize fs.SizeSuffix) fs.SizeSuffix { - fileSize := fs.SizeSuffix(objInfo.Size()) + return defaultChunkSize + } + fileSize := fs.SizeSuffix(size) requiredChunks := fileSize / defaultChunkSize if requiredChunks < fs.SizeSuffix(maxParts) || (requiredChunks == fs.SizeSuffix(maxParts) && fileSize%defaultChunkSize == 0) { return defaultChunkSize @@ -31,6 +39,6 @@ func Calculator(objInfo fs.ObjectInfo, maxParts int, defaultChunkSize fs.SizeSuf minChunk += fs.Mebi } - fs.Debugf(objInfo, "size: %v, parts: %v, default: %v, new: %v; default chunk size insufficient, returned new chunk size", fileSize, maxParts, defaultChunkSize, minChunk) + fs.Debugf(o, "size: %v, parts: %v, default: %v, new: %v; default chunk size insufficient, returned new chunk size", fileSize, maxParts, defaultChunkSize, minChunk) return minChunk } diff --git a/fs/chunksize/chunksize_test.go b/fs/chunksize/chunksize_test.go index 9cdeb70d6..a1a778f94 100644 --- a/fs/chunksize/chunksize_test.go +++ b/fs/chunksize/chunksize_test.go @@ -2,34 +2,100 @@ package chunksize import ( "testing" - "time" "github.com/rclone/rclone/fs" - "github.com/rclone/rclone/fs/object" ) func TestComputeChunkSize(t *testing.T) { - tests := map[string]struct { - fileSize fs.SizeSuffix + for _, test := range []struct { + name string + size fs.SizeSuffix maxParts int defaultChunkSize fs.SizeSuffix - expected fs.SizeSuffix + want fs.SizeSuffix }{ - "default size returned when file size is small enough": {fileSize: 1000, maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(10), expected: toSizeSuffixMiB(10)}, - "default size returned when file size is just 1 byte small enough": {fileSize: toSizeSuffixMiB(100000) - 1, maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(10), expected: toSizeSuffixMiB(10)}, - "no rounding up when everything divides evenly": {fileSize: toSizeSuffixMiB(1000000), maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(100), expected: toSizeSuffixMiB(100)}, - "rounding up to nearest MiB when not quite enough parts": {fileSize: toSizeSuffixMiB(1000000), maxParts: 9999, defaultChunkSize: toSizeSuffixMiB(100), expected: toSizeSuffixMiB(101)}, - "rounding up to nearest MiB when one extra byte": {fileSize: toSizeSuffixMiB(1000000) + 1, maxParts: 10000, defaultChunkSize: toSizeSuffixMiB(100), expected: toSizeSuffixMiB(101)}, - "expected MiB value when rounding sets to absolute minimum": {fileSize: toSizeSuffixMiB(1) - 1, maxParts: 1, defaultChunkSize: toSizeSuffixMiB(1), expected: toSizeSuffixMiB(1)}, - "expected MiB value when rounding to absolute min with extra": {fileSize: toSizeSuffixMiB(1) + 1, maxParts: 1, defaultChunkSize: toSizeSuffixMiB(1), expected: toSizeSuffixMiB(2)}, - } + { + name: "streaming file", + size: -1, + maxParts: 10000, + defaultChunkSize: toSizeSuffixMiB(10), + want: toSizeSuffixMiB(10), + }, { + name: "default size returned when file size is small enough", + size: 1000, + maxParts: 10000, + defaultChunkSize: toSizeSuffixMiB(10), + want: toSizeSuffixMiB(10), + }, { + name: "default size returned when file size is just 1 byte small enough", + size: toSizeSuffixMiB(100000) - 1, + maxParts: 10000, + defaultChunkSize: toSizeSuffixMiB(10), + want: toSizeSuffixMiB(10), + }, { + name: "no rounding up when everything divides evenly", + size: toSizeSuffixMiB(1000000), + maxParts: 10000, + defaultChunkSize: toSizeSuffixMiB(100), + want: toSizeSuffixMiB(100), + }, { + name: "rounding up to nearest MiB when not quite enough parts", + size: toSizeSuffixMiB(1000000), + maxParts: 9999, + defaultChunkSize: toSizeSuffixMiB(100), + want: toSizeSuffixMiB(101), + }, { + name: "rounding up to nearest MiB when one extra byte", + size: toSizeSuffixMiB(1000000) + 1, + maxParts: 10000, + defaultChunkSize: toSizeSuffixMiB(100), + want: toSizeSuffixMiB(101), + }, { + name: "expected MiB value when rounding sets to absolute minimum", + size: toSizeSuffixMiB(1) - 1, + maxParts: 1, + defaultChunkSize: toSizeSuffixMiB(1), + want: toSizeSuffixMiB(1), + }, { + name: "expected MiB value when rounding to absolute min with extra", + size: toSizeSuffixMiB(1) + 1, + maxParts: 1, + defaultChunkSize: toSizeSuffixMiB(1), + want: toSizeSuffixMiB(2), + }, { + name: "issue from forum #1", + size: 120864818840, + maxParts: 10000, + defaultChunkSize: 5 * 1024 * 1024, + want: toSizeSuffixMiB(12), + }, + } { + t.Run(test.name, func(t *testing.T) { + got := Calculator(test.name, int64(test.size), test.maxParts, test.defaultChunkSize) + if got != test.want { + t.Fatalf("expected: %v, got: %v", test.want, got) + } + if test.size < 0 { + return + } + parts := func(result fs.SizeSuffix) int { + n := test.size / result + r := test.size % result + if r != 0 { + n++ + } + return int(n) + } + // Check this gives the parts in range + if parts(got) > test.maxParts { + t.Fatalf("too many parts %d", parts(got)) + } + // Check that setting chunk size smaller gave too many parts + if got > test.defaultChunkSize { + if parts(got-toSizeSuffixMiB(1)) <= test.maxParts { + t.Fatalf("chunk size %v too big as %v only gives %d parts", got, got-toSizeSuffixMiB(1), parts(got-toSizeSuffixMiB(1))) + } - for name, tc := range tests { - t.Run(name, func(t *testing.T) { - src := object.NewStaticObjectInfo("mock", time.Now(), int64(tc.fileSize), true, nil, nil) - result := Calculator(src, tc.maxParts, tc.defaultChunkSize) - if result != tc.expected { - t.Fatalf("expected: %v, got: %v", tc.expected, result) } }) }