lsjson: Add --hash-type parameter and use it in lsf to speed up hashing

Before this change if you specified --hash MD5 in rclone lsf it would
calculate all the hashes and just return the MD5 hash which was very
slow on the local backend.

Likewise specifying --hash on rclone lsjson was equally slow.

This change introduces the --hash-type flag (and corresponding
internal parameter) so that the hashes required can be selected in
lsjson.

This is used internally in lsf when the --hash parameter is selected
to speed up the hashing by only hashing with the one hash specified.

Fixes #4181
This commit is contained in:
Nick Craig-Wood 2020-04-28 18:44:04 +01:00
parent a3f0992a22
commit f37af9afec
3 changed files with 41 additions and 15 deletions

View File

@ -185,6 +185,7 @@ func Lsf(ctx context.Context, fsrc fs.Fs, out io.Writer) error {
case 'h':
list.AddHash(hashType)
opt.ShowHash = true
opt.HashTypes = []string{hashType.String()}
case 'i':
list.AddID()
case 'm':

View File

@ -29,6 +29,7 @@ func init() {
flags.BoolVarP(cmdFlags, &opt.ShowOrigIDs, "original", "", false, "Show the ID of the underlying Object.")
flags.BoolVarP(cmdFlags, &opt.FilesOnly, "files-only", "", false, "Show only files in the listing.")
flags.BoolVarP(cmdFlags, &opt.DirsOnly, "dirs-only", "", false, "Show only directories in the listing.")
flags.StringArrayVarP(cmdFlags, &opt.HashTypes, "hash-type", "", nil, "Show only this hash type (may be repeated).")
}
var commandDefinition = &cobra.Command{
@ -58,17 +59,25 @@ The output is an array of Items, where each Item looks like this
"Tier" : "hot",
}
If --hash is not specified the Hashes property won't be emitted.
If --hash is not specified the Hashes property won't be emitted. The
types of hash can be specified with the --hash-type parameter (which
may be repeated). If --hash-type is set then it implies --hash.
If --no-modtime is specified then ModTime will be blank. This can speed things up on remotes where reading the ModTime takes an extra request (eg s3, swift).
If --no-modtime is specified then ModTime will be blank. This can
speed things up on remotes where reading the ModTime takes an extra
request (eg s3, swift).
If --no-mimetype is specified then MimeType will be blank. This can speed things up on remotes where reading the MimeType takes an extra request (eg s3, swift).
If --no-mimetype is specified then MimeType will be blank. This can
speed things up on remotes where reading the MimeType takes an extra
request (eg s3, swift).
If --encrypted is not specified the Encrypted won't be emitted.
If --dirs-only is not specified files in addition to directories are returned
If --dirs-only is not specified files in addition to directories are
returned
If --files-only is not specified directories in addition to the files will be returned.
If --files-only is not specified directories in addition to the files
will be returned.
The Path field will only show folders below the remote path being listed.
If "remote:path" contains the file "subfolder/file.txt", the Path for "file.txt"

View File

@ -8,6 +8,7 @@ import (
"github.com/pkg/errors"
"github.com/rclone/rclone/backend/crypt"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/walk"
)
@ -69,14 +70,15 @@ func formatForPrecision(precision time.Duration) string {
// ListJSONOpt describes the options for ListJSON
type ListJSONOpt struct {
Recurse bool `json:"recurse"`
NoModTime bool `json:"noModTime"`
NoMimeType bool `json:"noMimeType"`
ShowEncrypted bool `json:"showEncrypted"`
ShowOrigIDs bool `json:"showOrigIDs"`
ShowHash bool `json:"showHash"`
DirsOnly bool `json:"dirsOnly"`
FilesOnly bool `json:"filesOnly"`
Recurse bool `json:"recurse"`
NoModTime bool `json:"noModTime"`
NoMimeType bool `json:"noMimeType"`
ShowEncrypted bool `json:"showEncrypted"`
ShowOrigIDs bool `json:"showOrigIDs"`
ShowHash bool `json:"showHash"`
DirsOnly bool `json:"dirsOnly"`
FilesOnly bool `json:"filesOnly"`
HashTypes []string `json:"hashTypes"` // hash types to show if ShowHash is set, eg "MD5", "SHA-1"
}
// ListJSON lists fsrc using the options in opt calling callback for each item
@ -99,6 +101,20 @@ func ListJSON(ctx context.Context, fsrc fs.Fs, remote string, opt *ListJSONOpt,
canGetTier := features.GetTier
format := formatForPrecision(fsrc.Precision())
isBucket := features.BucketBased && remote == "" && fsrc.Root() == "" // if bucket based remote listing the root mark directories as buckets
showHash := opt.ShowHash
hashTypes := fsrc.Hashes().Array()
if len(opt.HashTypes) != 0 {
showHash = true
hashTypes = []hash.Type{}
for _, hashType := range opt.HashTypes {
var ht hash.Type
err := ht.Set(hashType)
if err != nil {
return err
}
hashTypes = append(hashTypes, ht)
}
}
err := walk.ListR(ctx, fsrc, remote, false, ConfigMaxDepth(opt.Recurse), walk.ListAll, func(entries fs.DirEntries) (err error) {
for _, entry := range entries {
switch entry.(type) {
@ -150,9 +166,9 @@ func ListJSON(ctx context.Context, fsrc fs.Fs, remote string, opt *ListJSONOpt,
item.IsBucket = isBucket
case fs.Object:
item.IsDir = false
if opt.ShowHash {
if showHash {
item.Hashes = make(map[string]string)
for _, hashType := range x.Fs().Hashes().Array() {
for _, hashType := range hashTypes {
hash, err := x.Hash(ctx, hashType)
if err != nil {
fs.Errorf(x, "Failed to read hash: %v", err)