From 2781f8e2f14f146d416ab57e92e2c82fc9eb7155 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 31 Mar 2022 15:41:08 +0100 Subject: [PATCH] gcs: Fix download of "Content-Encoding: gzip" compressed objects Before this change, if an object compressed with "Content-Encoding: gzip" was downloaded, a length and hash mismatch would occur since the as the go runtime automatically decompressed the object on download. This change erases the length and hash on compressed objects so they can be downloaded successfully, at the cost of not being able to check the length or the hash of the downloaded object. This also adds the --gcs-download-compressed flag to allow the compressed files to be downloaded as-is providing compressed objects with intact size and hash information. Fixes #2658 --- .../googlecloudstorage/googlecloudstorage.go | 58 +++++++++++++++---- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/backend/googlecloudstorage/googlecloudstorage.go b/backend/googlecloudstorage/googlecloudstorage.go index 9a121349d..2f8a88562 100644 --- a/backend/googlecloudstorage/googlecloudstorage.go +++ b/backend/googlecloudstorage/googlecloudstorage.go @@ -24,6 +24,7 @@ import ( "path" "strconv" "strings" + "sync" "time" "github.com/rclone/rclone/fs" @@ -304,6 +305,21 @@ rclone does if you know the bucket exists already. `, Default: false, Advanced: true, + }, { + Name: "download_compressed", + Help: `If set this will download compressed objects as-is. + +It is possible to upload objects to GCS with "Content-Encoding: gzip" +set. Normally rclone will transparently decompress these files on +download. This means that rclone can't check the hash or the size of +the file as both of these refer to the compressed object. + +If this flag is set then rclone will download files with +"Content-Encoding: gzip" as they are received. This means that rclone +can check the size and hash but the file contents will be compressed. +`, + Advanced: true, + Default: false, }, { Name: config.ConfigEncoding, Help: config.ConfigEncodingHelp, @@ -327,21 +343,23 @@ type Options struct { Location string `config:"location"` StorageClass string `config:"storage_class"` NoCheckBucket bool `config:"no_check_bucket"` + DownloadCompressed bool `config:"download_compressed"` Enc encoder.MultiEncoder `config:"encoding"` } // Fs represents a remote storage server type Fs struct { - name string // name of this remote - root string // the path we are working on if any - opt Options // parsed options - features *fs.Features // optional features - svc *storage.Service // the connection to the storage server - client *http.Client // authorized client - rootBucket string // bucket part of root (if any) - rootDirectory string // directory part of root (if any) - cache *bucket.Cache // cache of bucket status - pacer *fs.Pacer // To pace the API calls + name string // name of this remote + root string // the path we are working on if any + opt Options // parsed options + features *fs.Features // optional features + svc *storage.Service // the connection to the storage server + client *http.Client // authorized client + rootBucket string // bucket part of root (if any) + rootDirectory string // directory part of root (if any) + cache *bucket.Cache // cache of bucket status + pacer *fs.Pacer // To pace the API calls + warnCompressed sync.Once // warn once about compressed files } // Object describes a storage object @@ -355,6 +373,7 @@ type Object struct { bytes int64 // Bytes in the object modTime time.Time // Modified time of the object mimeType string + gzipped bool // set if object has Content-Encoding: gzip } // ------------------------------------------------------------ @@ -975,6 +994,7 @@ func (o *Object) setMetaData(info *storage.Object) { o.url = info.MediaLink o.bytes = int64(info.Size) o.mimeType = info.ContentType + o.gzipped = info.ContentEncoding == "gzip" // Read md5sum md5sumData, err := base64.StdEncoding.DecodeString(info.Md5Hash) @@ -1013,6 +1033,15 @@ func (o *Object) setMetaData(info *storage.Object) { } else { o.modTime = modTime } + + // If gunzipping then size and md5sum are unknown + if o.gzipped && !o.fs.opt.DownloadCompressed { + o.bytes = -1 + o.md5sum = "" + o.fs.warnCompressed.Do(func() { + fs.Logf(o.fs, "Decompressing 'Content-Encoding: gzip' compressed file. Use --gcs-download-compressed to override") + }) + } } // readObjectInfo reads the definition for an object @@ -1113,6 +1142,15 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read return nil, err } fs.FixRangeOption(options, o.bytes) + if o.gzipped && o.fs.opt.DownloadCompressed { + // Allow files which are stored on the cloud storage system + // compressed to be downloaded without being decompressed. Note + // that setting this here overrides the automatic decompression + // in the Transport. + // + // See: https://cloud.google.com/storage/docs/transcoding + req.Header.Set("Accept-Encoding", "gzip") + } fs.OpenOptionAddHTTPHeaders(req.Header, options) var res *http.Response err = o.fs.pacer.Call(func() (bool, error) {