From 0537791d14675e9c66a2a237680f6403cba4e3f1 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 5 Apr 2021 10:32:20 +0100 Subject: [PATCH] sftp: Fix performance regression by re-enabling concurrent writes #5197 Betweeen rclone v1.54 and v1.55 there was an approx 3x performance regression when transferring to distant SFTP servers (in particular rsync.net). This turned out to be due to the library github.com/pkg/sftp rclone uses. Concurrent writes used to be enabled in this library by default (for v1.12.0 as used in rclone v1.54) but they are no longer enabled (for v1.13.0 as used in rclone v1.55) for safety reasons and it is necessary to enable them specifically. The safety concerns are due to the uncertainty as to whether writes come in order and whether a half completed file might have holes in it. This isn't a problem for rclone since a) it doesn't restart uploads and b) it has a post-transfer checksum test. This change introduces a new flag `--sftp-disable-concurrent-writes` to control the feature which defaults to false, meaning that concurrent writes are enabled as in v1.54. However this isn't quite enough to fix the problem as the sftp library needs to be able to sniff the size of the stream from the reader passed in, so this also adds a `Size` interface to the reader to enable this. This involved a patch to the library. The library was reverted to v1.12.0 for v1.55.1 - this patch installs v1.13.0+master to fix the Size interface problem. See: https://github.com/pkg/sftp/issues/426 --- backend/sftp/sftp.go | 77 +++++++++++++++++++++++++++++--------------- go.mod | 6 ++-- go.sum | 6 ++++ 3 files changed, 60 insertions(+), 29 deletions(-) diff --git a/backend/sftp/sftp.go b/backend/sftp/sftp.go index 5e6898949..e9f5a490a 100644 --- a/backend/sftp/sftp.go +++ b/backend/sftp/sftp.go @@ -224,6 +224,17 @@ have a server which returns Then you may need to enable this flag. If concurrent reads are disabled, the use_fstat option is ignored. +`, + Advanced: true, + }, { + Name: "disable_concurrent_writes", + Default: false, + Help: `If set don't use concurrent writes + +Normally rclone uses concurrent writes to upload files. This improves +the performance greatly, especially for distant servers. + +This option disables concurrent writes should that be necessary. `, Advanced: true, }, { @@ -244,29 +255,30 @@ Set to 0 to keep connections indefinitely. // Options defines the configuration for this backend type Options struct { - Host string `config:"host"` - User string `config:"user"` - Port string `config:"port"` - Pass string `config:"pass"` - KeyPem string `config:"key_pem"` - KeyFile string `config:"key_file"` - KeyFilePass string `config:"key_file_pass"` - PubKeyFile string `config:"pubkey_file"` - KnownHostsFile string `config:"known_hosts_file"` - KeyUseAgent bool `config:"key_use_agent"` - UseInsecureCipher bool `config:"use_insecure_cipher"` - DisableHashCheck bool `config:"disable_hashcheck"` - AskPassword bool `config:"ask_password"` - PathOverride string `config:"path_override"` - SetModTime bool `config:"set_modtime"` - Md5sumCommand string `config:"md5sum_command"` - Sha1sumCommand string `config:"sha1sum_command"` - SkipLinks bool `config:"skip_links"` - Subsystem string `config:"subsystem"` - ServerCommand string `config:"server_command"` - UseFstat bool `config:"use_fstat"` - DisableConcurrentReads bool `config:"disable_concurrent_reads"` - IdleTimeout fs.Duration `config:"idle_timeout"` + Host string `config:"host"` + User string `config:"user"` + Port string `config:"port"` + Pass string `config:"pass"` + KeyPem string `config:"key_pem"` + KeyFile string `config:"key_file"` + KeyFilePass string `config:"key_file_pass"` + PubKeyFile string `config:"pubkey_file"` + KnownHostsFile string `config:"known_hosts_file"` + KeyUseAgent bool `config:"key_use_agent"` + UseInsecureCipher bool `config:"use_insecure_cipher"` + DisableHashCheck bool `config:"disable_hashcheck"` + AskPassword bool `config:"ask_password"` + PathOverride string `config:"path_override"` + SetModTime bool `config:"set_modtime"` + Md5sumCommand string `config:"md5sum_command"` + Sha1sumCommand string `config:"sha1sum_command"` + SkipLinks bool `config:"skip_links"` + Subsystem string `config:"subsystem"` + ServerCommand string `config:"server_command"` + UseFstat bool `config:"use_fstat"` + DisableConcurrentReads bool `config:"disable_concurrent_reads"` + DisableConcurrentWrites bool `config:"disable_concurrent_writes"` + IdleTimeout fs.Duration `config:"idle_timeout"` } // Fs stores the interface to the remote SFTP files @@ -414,8 +426,8 @@ func (f *Fs) newSftpClient(conn *ssh.Client, opts ...sftp.ClientOption) (*sftp.C opts = opts[:len(opts):len(opts)] // make sure we don't overwrite the callers opts opts = append(opts, sftp.UseFstat(f.opt.UseFstat), - // FIXME disabled after library reversion - // sftp.UseConcurrentReads(!f.opt.DisableConcurrentReads), + sftp.UseConcurrentReads(!f.opt.DisableConcurrentReads), + sftp.UseConcurrentWrites(!f.opt.DisableConcurrentWrites), ) if f.opt.DisableConcurrentReads { // FIXME fs.Errorf(f, "Ignoring disable_concurrent_reads after library reversion - see #5197") @@ -1494,6 +1506,19 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read return in, nil } +type sizeReader struct { + io.Reader + size int64 +} + +// Size returns the expected size of the stream +// +// It is used in sftpFile.ReadFrom as a hint to work out the +// concurrency needed +func (sr *sizeReader) Size() int64 { + return sr.size +} + // Update a remote sftp file using the data and ModTime from func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { o.fs.addTransfer() // Show transfer in progress @@ -1525,7 +1550,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op fs.Debugf(src, "Removed after failed upload: %v", err) } } - _, err = file.ReadFrom(in) + _, err = file.ReadFrom(&sizeReader{Reader: in, size: src.Size()}) if err != nil { remove() return errors.Wrap(err, "Update ReadFrom failed") diff --git a/go.mod b/go.mod index 12aa3f736..ceefe21be 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,7 @@ require ( github.com/nsf/termbox-go v1.1.1-0.20210421210813-2ff630277754 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pkg/errors v0.9.1 - github.com/pkg/sftp v1.12.0 + github.com/pkg/sftp v1.13.1-0.20210424083437-2b80967078b8 github.com/prometheus/client_golang v1.10.0 github.com/prometheus/common v0.20.0 // indirect github.com/putdotio/go-putio/putio v0.0.0-20200123120452-16d982cac2b8 @@ -69,11 +69,11 @@ require ( go.etcd.io/bbolt v1.3.5 go.uber.org/zap v1.16.0 // indirect goftp.io/server v0.4.1 - golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc + golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d golang.org/x/oauth2 v0.0.0-20210413134643-5e61552d6c78 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c - golang.org/x/sys v0.0.0-20210419170143-37df388d1f33 + golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7 golang.org/x/term v0.0.0-20210406210042-72f3dc4e9b72 // indirect golang.org/x/text v0.3.6 golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba diff --git a/go.sum b/go.sum index 8ead45ce6..5c4bdf044 100644 --- a/go.sum +++ b/go.sum @@ -522,6 +522,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/pkg/sftp v1.12.0 h1:/f3b24xrDhkhddlaobPe2JgBqfdt+gC/NYl0QY9IOuI= github.com/pkg/sftp v1.12.0/go.mod h1:fUqqXB5vEgVCZ131L+9say31RAri6aF6KDViawhxKK8= +github.com/pkg/sftp v1.13.1-0.20210424083437-2b80967078b8 h1:0sHotAvxm+h6PnYq2sz+xbbmPyzCMsubDzTMqT1Gbkw= +github.com/pkg/sftp v1.13.1-0.20210424083437-2b80967078b8/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= @@ -722,6 +724,8 @@ golang.org/x/crypto v0.0.0-20201112155050-0c6587e931a9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc h1:+q90ECDSAQirdykUN6sPEiBXBsp8Csjcca8Oy7bgLTA= golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= +golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b h1:7mWr3k41Qtv8XlltBkDkl8LoP3mpSgBW8BUoxtEdbXg= +golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -896,6 +900,8 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210419170143-37df388d1f33 h1:zah5VTTvBlVRELjcDwGLLaWRHZJQsBtplweVYCii0KM= golang.org/x/sys v0.0.0-20210419170143-37df388d1f33/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7 h1:iGu644GcxtEcrInvDsQRCwJjtCIOlT2V7IRt6ah2Whw= +golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210406210042-72f3dc4e9b72 h1:VqE9gduFZ4dbR7XoL77lHFp0/DyDUBKSXK7CMFkVcV0=