diff --git a/docs/content/overview.md b/docs/content/overview.md index 3aafb40fe..21cd0c98e 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -15,25 +15,25 @@ show through. Here is an overview of the major features of each cloud storage system. -| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | -| ---------------------------- |:-------:|:-------:|:----------------:|:---------------:|:---------:| -| Amazon Drive | MD5 | No | Yes | No | R | -| Amazon S3 | MD5 | Yes | No | No | R/W | -| Backblaze B2 | SHA1 | Yes | No | No | R/W | -| Box | SHA1 | Yes | Yes | No | - | -| Dropbox | DBHASH †| Yes | Yes | No | - | -| FTP | - | No | No | No | - | -| Google Cloud Storage | MD5 | Yes | No | No | R/W | -| Google Drive | MD5 | Yes | No | Yes | R/W | -| HTTP | - | No | No | No | R | -| Hubic | MD5 | Yes | No | No | R/W | -| Microsoft Azure Blob Storage | MD5 | Yes | No | No | R/W | -| Microsoft OneDrive | SHA1 | Yes | Yes | No | R | -| Openstack Swift | MD5 | Yes | No | No | R/W | -| QingStor | - | No | No | No | R/W | -| SFTP | - | Yes | Depends | No | - | -| Yandex Disk | MD5 | Yes | No | No | R/W | -| The local filesystem | All | Yes | Depends | No | - | +| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | +| ---------------------------- |:-----------:|:-------:|:----------------:|:---------------:|:---------:| +| Amazon Drive | MD5 | No | Yes | No | R | +| Amazon S3 | MD5 | Yes | No | No | R/W | +| Backblaze B2 | SHA1 | Yes | No | No | R/W | +| Box | SHA1 | Yes | Yes | No | - | +| Dropbox | DBHASH † | Yes | Yes | No | - | +| FTP | - | No | No | No | - | +| Google Cloud Storage | MD5 | Yes | No | No | R/W | +| Google Drive | MD5 | Yes | No | Yes | R/W | +| HTTP | - | No | No | No | R | +| Hubic | MD5 | Yes | No | No | R/W | +| Microsoft Azure Blob Storage | MD5 | Yes | No | No | R/W | +| Microsoft OneDrive | SHA1 | Yes | Yes | No | R | +| Openstack Swift | MD5 | Yes | No | No | R/W | +| QingStor | - | No | No | No | R/W | +| SFTP | MD5, SHA1 * | Yes | Depends | No | - | +| Yandex Disk | MD5 | Yes | No | No | R/W | +| The local filesystem | All | Yes | Depends | No | - | ### Hash ### @@ -49,6 +49,9 @@ systems they must support a common hash type. hash](https://www.dropbox.com/developers/reference/content-hash). This is an SHA256 sum of all the 4MB block SHA256s. +* SFTP supports checksums if the same login has shell access and `md5sum` +or `sha1sum` as well as `echo` are in the remote's PATH. + ### ModTime ### The cloud storage system supports setting modification times on diff --git a/docs/content/sftp.md b/docs/content/sftp.md index 188a8b5fc..e5fa2662e 100644 --- a/docs/content/sftp.md +++ b/docs/content/sftp.md @@ -149,7 +149,8 @@ Modified times are used in syncing and are fully supported. ### Limitations ### -SFTP does not support any checksums. +SFTP supports checksums if the same login has shell access and `md5sum` +or `sha1sum` as well as `echo` are in the remote's PATH. The only ssh agent supported under Windows is Putty's pagent. diff --git a/sftp/sftp.go b/sftp/sftp.go index b5acfa05f..966db9b12 100644 --- a/sftp/sftp.go +++ b/sftp/sftp.go @@ -14,8 +14,10 @@ import ( "github.com/ncw/rclone/fs" "github.com/pkg/errors" "github.com/pkg/sftp" - sshagent "github.com/xanzy/ssh-agent" + "github.com/xanzy/ssh-agent" "golang.org/x/crypto/ssh" + "strings" + "regexp" ) func init() { @@ -55,13 +57,14 @@ func init() { // Fs stores the interface to the remote SFTP files type Fs struct { - name string - root string - features *fs.Features // optional features - url string - sshClient *ssh.Client - sftpClient *sftp.Client - mkdirLock *stringLock + name string + root string + features *fs.Features // optional features + url string + sshClient *ssh.Client + sftpClient *sftp.Client + mkdirLock *stringLock + cachedHashes *fs.HashSet } // Object is a remote SFTP file that has been stat'd (so it exists, but is not necessarily open for reading) @@ -71,6 +74,8 @@ type Object struct { size int64 // size of the object modTime time.Time // modification time of the object mode os.FileMode // mode bits from the file + md5sum *string // Cached MD5 checksum + sha1sum *string // Cached SHA1 checksum } // ObjectReader holds the sftp.File interface to a remote SFTP file opened for reading @@ -432,9 +437,44 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error { return nil } -// Hashes returns fs.HashNone to indicate remote hashing is unavailable func (f *Fs) Hashes() fs.HashSet { - return fs.HashSet(fs.HashNone) + if f.cachedHashes != nil { + return *f.cachedHashes + } + + session, err := f.sshClient.NewSession() + if err != nil { + return fs.HashSet(fs.HashNone) + } + sha1Output, _ := session.Output("echo 'abc' | sha1sum") + expectedSha1 := "03cfd743661f07975fa2f1220c5194cbaff48451" + _ = session.Close() + + session, err = f.sshClient.NewSession() + if err != nil { + return fs.HashSet(fs.HashNone) + } + md5Output, _ := session.Output("echo 'abc' | md5sum") + expectedMd5 := "0bee89b07a248e27c83fc3d5951213c1" + _ = session.Close() + + sha1Works := parseHash(sha1Output) == expectedSha1 + md5Works := parseHash(md5Output) == expectedMd5 + + var set fs.HashSet = fs.NewHashSet() + if !sha1Works && !md5Works { + set.Add(fs.HashNone) + } + if sha1Works { + set.Add(fs.HashSHA1) + } + if md5Works { + set.Add(fs.HashMD5) + } + + _ = session.Close() + f.cachedHashes = &set + return set } // Fs is the filesystem this remote sftp file object is located within @@ -455,9 +495,58 @@ func (o *Object) Remote() string { return o.remote } -// Hash returns "" since SFTP (in Go or OpenSSH) doesn't support remote calculation of hashes func (o *Object) Hash(r fs.HashType) (string, error) { - return "", fs.ErrHashUnsupported + if r == fs.HashMD5 && o.md5sum != nil { + return *o.md5sum, nil + } else if r == fs.HashSHA1 && o.sha1sum != nil { + return *o.sha1sum, nil + } + + session, err := o.fs.sshClient.NewSession() + if err != nil { + o.fs.cachedHashes = nil // Something has changed on the remote system + return "", fs.ErrHashUnsupported + } + + err = fs.ErrHashUnsupported + var outputBytes []byte + escapedPath := shellEscape(o.path()) + if r == fs.HashMD5 { + outputBytes, err = session.Output("md5sum " + escapedPath) + } else if r == fs.HashSHA1 { + outputBytes, err = session.Output("sha1sum " + escapedPath) + } + + if err != nil { + o.fs.cachedHashes = nil // Something has changed on the remote system + _ = session.Close() + return "", fs.ErrHashUnsupported + } + + _ = session.Close() + str := parseHash(outputBytes) + if r == fs.HashMD5 { + o.md5sum = &str + } else if r == fs.HashSHA1 { + o.sha1sum = &str + } + return str, nil +} + +var shellEscapeRegex = regexp.MustCompile(`[^A-Za-z0-9_.,:/@\n-]`) + +// Escape a string s.t. it cannot cause unintended behavior +// when sending it to a shell. +func shellEscape(str string) string { + safe := shellEscapeRegex.ReplaceAllString(str, `\$0`) + return strings.Replace(safe, "\n", "'\n'", -1) +} + +// Converts a byte array from the SSH session returned by +// an invocation of md5sum/sha1sum to a hash string +// as expected by the rest of this application +func parseHash(bytes []byte) string { + return strings.Split(string(bytes), " ")[0] // Split at hash / filename separator } // Size returns the size in bytes of the remote sftp file diff --git a/sftp/sftp_internal_test.go b/sftp/sftp_internal_test.go new file mode 100644 index 000000000..820c4e02f --- /dev/null +++ b/sftp/sftp_internal_test.go @@ -0,0 +1,34 @@ +package sftp + +import ( + "testing" + "github.com/stretchr/testify/assert" + "fmt" +) + +func TestShellEscape(t *testing.T) { + for i, test := range []struct { + unescaped, escaped string + }{ + {"", ""}, + {"/this/is/harmless", "/this/is/harmless"}, + {"$(rm -rf /)", "\\$\\(rm\\ -rf\\ /\\)"}, + {"/test/\n", "/test/'\n'"}, + {":\"'", ":\\\"\\'"}, + } { + got := shellEscape(test.unescaped) + assert.Equal(t, test.escaped, got, fmt.Sprintf("Test %d unescaped = %q", i, test.unescaped)) + } +} + +func TestParseHash(t *testing.T) { + for i, test := range []struct { + sshOutput, checksum string + }{ + {"8dbc7733dbd10d2efc5c0a0d8dad90f958581821 RELEASE.md\n", "8dbc7733dbd10d2efc5c0a0d8dad90f958581821"}, + {"03cfd743661f07975fa2f1220c5194cbaff48451 -\n", "03cfd743661f07975fa2f1220c5194cbaff48451"}, + } { + got := parseHash([]byte(test.sshOutput)) + assert.Equal(t, test.checksum, got, fmt.Sprintf("Test %d sshOutput = %q", i, test.sshOutput)) + } +}