chunker: prevent chunk corruption, survive meta-like input

2019-10-09 12:21:45 +03:00 · 2019-10-09 12:21:45 +03:00 · 9049bb62ca
parent 7aa2b4191c
commit 9049bb62ca
4 changed files with 506 additions and 79 deletions
--- a/backend/chunker/chunker.go
+++ b/backend/chunker/chunker.go
@ -98,6 +98,14 @@ const optimizeFirstChunk = false
 // revealHidden is a stub until chunker lands the `reveal hidden` option.
 const revealHidden = false

+// Prevent memory overflow due to specially crafted chunk name
+const maxSafeChunkNumber = 10000000
+
+// standard chunker errors
+var (
+	ErrChunkOverflow = errors.New("chunk number overflow")
+)
+
 // Note: metadata logic is tightly coupled with chunker code in many
 // places, eg. in checks whether a file should have meta object or is
 // eligible for chunking.
@ -176,18 +184,17 @@ falling back to SHA1 if unsupported. Requires "simplejson".`,
 				Help:  `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`,
 			}},
 		}, {
-			Name:     "fail_on_bad_chunks",
+			Name:     "fail_hard",
 			Advanced: true,
 			Default:  false,
-			Help: `The list command might encounter files with missinng or invalid chunks.
-This boolean flag tells what rclone should do in such cases.`,
+			Help:     `Choose how chunker should handle files with missing or invalid chunks.`,
 			Examples: []fs.OptionExample{
 				{
 					Value: "true",
-					Help:  "Fail with error.",
+					Help:  "Report errors and abort current command.",
 				}, {
 					Value: "false",
-					Help:  "Silently ignore invalid object.",
+					Help:  "Warn user, skip incomplete file and proceed.",
 				},
 			},
 		}},
@ -231,6 +238,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
 		root: rpath,
 		opt:  *opt,
 	}
+	f.dirSort = true // processEntries requires that meta Objects prerun data chunks atm.

 	switch opt.MetaFormat {
 	case "none":
@ -298,13 +306,13 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {

 // Options defines the configuration for this backend
 type Options struct {
-	Remote          string        `config:"remote"`
-	ChunkSize       fs.SizeSuffix `config:"chunk_size"`
-	NameFormat      string        `config:"name_format"`
-	StartFrom       int           `config:"start_from"`
-	MetaFormat      string        `config:"meta_format"`
-	HashType        string        `config:"hash_type"`
-	FailOnBadChunks bool          `config:"fail_on_bad_chunks"`
+	Remote     string        `config:"remote"`
+	ChunkSize  fs.SizeSuffix `config:"chunk_size"`
+	NameFormat string        `config:"name_format"`
+	StartFrom  int           `config:"start_from"`
+	MetaFormat string        `config:"meta_format"`
+	HashType   string        `config:"hash_type"`
+	FailHard   bool          `config:"fail_hard"`
 }

 // Fs represents a wrapped fs.Fs
@ -322,6 +330,7 @@ type Fs struct {
 	nameRegexp  *regexp.Regexp // regular expression to match chunk names
 	opt         Options        // copy of Options
 	features    *fs.Features   // optional features
+	dirSort     bool           // reserved for future, ignored
 }

 // setChunkNameFormat converts pattern based chunk name format
@ -454,6 +463,20 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl
 	return
 }

+// forbidChunk prints error message or raises error if file is chunk.
+// First argument sets log prefix, use `false` to suppress message.
+func (f *Fs) forbidChunk(o interface{}, filePath string) error {
+	if mainPath, _, _, _ := f.parseChunkName(filePath); mainPath != "" {
+		if f.opt.FailHard {
+			return fmt.Errorf("chunk overlap with %q", mainPath)
+		}
+		if boolVal, isBool := o.(bool); !isBool || boolVal {
+			fs.Errorf(o, "chunk overlap with %q", mainPath)
+		}
+	}
+	return nil
+}
+
 // List the objects and directories in dir into entries.
 // The entries can be returned in any order but should be
 // for a complete directory.
@ -480,7 +503,7 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
 	if err != nil {
 		return nil, err
 	}
-	return f.chunkEntries(ctx, entries, f.opt.FailOnBadChunks)
+	return f.processEntries(ctx, entries, dir)
 }

 // ListR lists the objects and directories of the Fs starting
@ -498,11 +521,11 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
 // immediately.
 //
 // Don't implement this unless you have a more efficient way
-// of listing recursively that doing a directory traversal.
+// of listing recursively than doing a directory traversal.
 func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
 	do := f.base.Features().ListR
 	return do(ctx, dir, func(entries fs.DirEntries) error {
-		newEntries, err := f.chunkEntries(ctx, entries, f.opt.FailOnBadChunks)
+		newEntries, err := f.processEntries(ctx, entries, dir)
 		if err != nil {
 			return err
 		}
@ -510,13 +533,15 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
 	})
 }

-// chunkEntries is called by List(R). It assembles chunk entries from
-// wrapped remote into composite directory entries.
-func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardErrors bool) (chunkedEntries fs.DirEntries, err error) {
+// processEntries assembles chunk entries into composite entries
+func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirPath string) (newEntries fs.DirEntries, err error) {
 	// sort entries, so that meta objects (if any) appear before their chunks
-	sortedEntries := make(fs.DirEntries, len(origEntries))
-	copy(sortedEntries, origEntries)
-	sort.Sort(sortedEntries)
+	sortedEntries := origEntries
+	if f.dirSort {
+		sortedEntries := make(fs.DirEntries, len(origEntries))
+		copy(sortedEntries, origEntries)
+		sort.Sort(sortedEntries)
+	}

 	byRemote := make(map[string]*Object)
 	badEntry := make(map[string]bool)
@ -554,7 +579,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
 					}
 				}
 				if err := mainObject.addChunk(entry, chunkNo); err != nil {
-					if hardErrors {
+					if f.opt.FailHard {
 						return nil, err
 					}
 					badEntry[mainRemote] = true
@ -570,7 +595,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
 			wrapDir.SetRemote(entry.Remote())
 			tempEntries = append(tempEntries, wrapDir)
 		default:
-			if hardErrors {
+			if f.opt.FailHard {
 				return nil, fmt.Errorf("Unknown object type %T", entry)
 			}
 			fs.Debugf(f, "unknown object type %T", entry)
@ -581,7 +606,7 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
 		if object, ok := entry.(*Object); ok {
 			remote := object.Remote()
 			if isSubdir[remote] {
-				if hardErrors {
+				if f.opt.FailHard {
 					return nil, fmt.Errorf("%q is both meta object and directory", remote)
 				}
 				badEntry[remote] = true // fall thru
@ -591,17 +616,20 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
 				continue
 			}
 			if err := object.validate(); err != nil {
-				if hardErrors {
+				if f.opt.FailHard {
 					return nil, err
 				}
 				fs.Debugf(f, "invalid chunks in object %q", remote)
 				continue
 			}
 		}
-		chunkedEntries = append(chunkedEntries, entry)
+		newEntries = append(newEntries, entry)
 	}

-	return chunkedEntries, nil
+	if f.dirSort {
+		sort.Sort(newEntries)
+	}
+	return newEntries, nil
 }

 // NewObject finds the Object at remote.
@ -615,8 +643,8 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
 // but opening even a small file can be slow on some backends.
 //
 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
-	if mainRemote, _, _, _ := f.parseChunkName(remote); mainRemote != "" {
-		return nil, fmt.Errorf("%q should be meta object, not a chunk", remote)
+	if err := f.forbidChunk(false, remote); err != nil {
+		return nil, errors.Wrap(err, "can't access")
 	}

 	var (
@ -734,12 +762,12 @@ func (o *Object) readMetadata(ctx context.Context) error {
 	if err != nil {
 		return err
 	}
+	_ = reader.Close() // ensure file handle is freed on windows

 	switch o.f.opt.MetaFormat {
 	case "simplejson":
-		metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata)
+		metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata, true)
 		if err != nil {
-			// TODO: in a rare case we might mistake a small file for metadata
 			return errors.Wrap(err, "invalid metadata")
 		}
 		if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks {
@ -775,8 +803,12 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
 	baseRemote := remote

 	// Transfer chunks data
-	for chunkNo := 0; !c.done; chunkNo++ {
-		tempRemote := f.makeChunkName(baseRemote, chunkNo, "", xactNo)
+	for c.chunkNo = 0; !c.done; c.chunkNo++ {
+		if c.chunkNo > maxSafeChunkNumber {
+			return nil, ErrChunkOverflow
+		}
+
+		tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactNo)
 		size := c.sizeLeft
 		if size > c.chunkSize {
 			size = c.chunkSize
@ -785,7 +817,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st

 		// If a single chunk is expected, avoid the extra rename operation
 		chunkRemote := tempRemote
-		if c.expectSingle && chunkNo == 0 && optimizeFirstChunk {
+		if c.expectSingle && c.chunkNo == 0 && optimizeFirstChunk {
 			chunkRemote = baseRemote
 		}
 		info := f.wrapInfo(src, chunkRemote, size)
@ -836,8 +868,17 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
 		return nil, fmt.Errorf("Incorrect upload size %d != %d", c.readCount, c.sizeTotal)
 	}

-	// Finalize the non-chunked object
-	if len(c.chunks) == 1 {
+	// Check for input that looks like valid metadata
+	needMeta := len(c.chunks) > 1
+	if c.readCount <= maxMetadataSize && len(c.chunks) == 1 {
+		_, err := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead, false)
+		needMeta = err == nil
+	}
+
+	// Finalize small object as non-chunked.
+	// This can be bypassed, and single chunk with metadata will be
+	// created due to unsafe input.
+	if !needMeta && f.useMeta {
 		// If previous object was chunked, remove its chunks
 		f.removeOldChunks(ctx, baseRemote)

@ -918,10 +959,12 @@ type chunkingReader struct {
 	readCount    int64
 	chunkSize    int64
 	chunkLimit   int64
+	chunkNo      int
 	err          error
 	done         bool
 	chunks       []fs.Object
 	expectSingle bool
+	smallHead    []byte
 	fs           *Fs
 	hasher       gohash.Hash
 	md5          string
@ -1001,6 +1044,9 @@ func (c *chunkingReader) Read(buf []byte) (bytesRead int, err error) {
 		return
 	}
 	c.accountBytes(int64(bytesRead))
+	if c.chunkNo == 0 && c.expectSingle && bytesRead > 0 && c.readCount <= maxMetadataSize {
+		c.smallHead = append(c.smallHead, buf[:bytesRead]...)
+	}
 	if bytesRead == 0 && c.sizeLeft == 0 {
 		err = io.EOF // Force EOF when no data left.
 	}
@ -1048,16 +1094,25 @@ func (f *Fs) removeOldChunks(ctx context.Context, remote string) {
 // will return the object and the error, otherwise will return
 // nil and the error
 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
+	if err := f.forbidChunk(src, src.Remote()); err != nil {
+		return nil, errors.Wrap(err, "refusing to put")
+	}
 	return f.put(ctx, in, src, src.Remote(), options, f.base.Put)
 }

 // PutStream uploads to the remote path with the modTime given of indeterminate size
 func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
+	if err := f.forbidChunk(src, src.Remote()); err != nil {
+		return nil, errors.Wrap(err, "refusing to upload")
+	}
 	return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream)
 }

 // Update in to the object with the modTime given of the given size
 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
+	if err := o.f.forbidChunk(o, o.Remote()); err != nil {
+		return errors.Wrap(err, "update refused")
+	}
 	if err := o.readMetadata(ctx); err != nil {
 		// refuse to update a file of unsupported format
 		return errors.Wrap(err, "refusing to update")
@ -1080,13 +1135,12 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
 //
 // This will create a duplicate if we upload a new file without
 // checking to see if there is one already - use Put() for that.
-// TODO: really split stream here
 func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
 	do := f.base.Features().PutUnchecked
 	if do == nil {
 		return nil, errors.New("can't PutUnchecked")
 	}
-	// TODO: handle options and chunking!
+	// TODO: handle range/limit options and really chunk stream here!
 	o, err := do(ctx, in, f.wrapInfo(src, "", -1))
 	if err != nil {
 		return nil, err
@ -1117,6 +1171,9 @@ func (f *Fs) Hashes() hash.Set {
 //
 // Shouldn't return an error if it already exists
 func (f *Fs) Mkdir(ctx context.Context, dir string) error {
+	if err := f.forbidChunk(dir, dir); err != nil {
+		return errors.Wrap(err, "can't mkdir")
+	}
 	return f.base.Mkdir(ctx, dir)
 }

@ -1181,6 +1238,11 @@ func (f *Fs) Purge(ctx context.Context) error {
 // the `delete hidden` flag above or at least the user has been warned.
 //
 func (o *Object) Remove(ctx context.Context) (err error) {
+	if err := o.f.forbidChunk(o, o.Remote()); err != nil {
+		// operations.Move can still call Remove if chunker's Move refuses
+		// to corrupt file in hard mode. Hence, refuse to Remove, too.
+		return errors.Wrap(err, "refuse to corrupt")
+	}
 	if err := o.readMetadata(ctx); err != nil {
 		// Proceed but warn user that unexpected things can happen.
 		fs.Errorf(o, "Removing a file with unsupported metadata: %v", err)
@ -1206,6 +1268,9 @@ func (o *Object) Remove(ctx context.Context) (err error) {

 // copyOrMove implements copy or move
 func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMoveFn, md5, sha1, opName string) (fs.Object, error) {
+	if err := f.forbidChunk(o, remote); err != nil {
+		return nil, errors.Wrapf(err, "can't %s", opName)
+	}
 	if !o.isComposite() {
 		fs.Debugf(o, "%s non-chunked object...", opName)
 		oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk
@ -1493,6 +1558,9 @@ func (o *Object) addChunk(chunk fs.Object, chunkNo int) error {
 		o.chunks = append(o.chunks, chunk)
 		return nil
 	}
+	if chunkNo > maxSafeChunkNumber {
+		return ErrChunkOverflow
+	}
 	if chunkNo > len(o.chunks) {
 		newChunks := make([]fs.Object, (chunkNo + 1), (chunkNo+1)*2)
 		copy(newChunks, o.chunks)
@ -1897,20 +1965,31 @@ func (o *Object) ID() string {

 // Meta format `simplejson`
 type metaSimpleJSON struct {
-	Version int    `json:"ver"`
-	Size    int64  `json:"size"`    // total size of data chunks
-	NChunks int    `json:"nchunks"` // number of data chunks
-	MD5     string `json:"md5"`
-	SHA1    string `json:"sha1"`
+	// required core fields
+	Version  *int   `json:"ver"`
+	Size     *int64 `json:"size"`    // total size of data chunks
+	ChunkNum *int   `json:"nchunks"` // number of data chunks
+	// optional extra fields
+	MD5  string `json:"md5,omitempty"`
+	SHA1 string `json:"sha1,omitempty"`
 }

+// marshalSimpleJSON
+//
+// Current implementation creates metadata in two cases:
+// - for files larger than chunk size
+// - if file contents can be mistaken as meta object
+//
 func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) {
+	version := metadataVersion
 	metadata := metaSimpleJSON{
-		Version: metadataVersion,
-		Size:    size,
-		NChunks: nChunks,
-		MD5:     md5,
-		SHA1:    sha1,
+		// required core fields
+		Version:  &version,
+		Size:     &size,
+		ChunkNum: &nChunks,
+		// optional extra fields
+		MD5:  md5,
+		SHA1: sha1,
 	}
 	data, err := json.Marshal(&metadata)
 	if err == nil && data != nil && len(data) >= maxMetadataSize {
@ -1920,6 +1999,7 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
 	return data, err
 }

+// unmarshalSimpleJSON
 // Note: only metadata format version 1 is supported atm.
 //
 // Current implementation creates metadata only for files larger than
@ -1931,22 +2011,37 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
 // handled by current implementation.
 // The version check below will then explicitly ask user to upgrade rclone.
 //
-func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) {
-	if len(data) > maxMetadataSize {
+func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte, strictChecks bool) (info *ObjectInfo, err error) {
+	// Be strict about JSON format
+	// to reduce possibility that a random small file resembles metadata.
+	if data != nil && len(data) > maxMetadataSize {
 		return nil, errors.New("too big")
 	}
+	if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
+		return nil, errors.New("invalid json")
+	}
 	var metadata metaSimpleJSON
 	err = json.Unmarshal(data, &metadata)
 	if err != nil {
 		return nil, err
 	}
-
+	// Basic fields are strictly required
+	// to reduce possibility that a random small file resembles metadata.
+	if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil {
+		return nil, errors.New("missing required field")
+	}
 	// Perform strict checks, avoid corruption of future metadata formats.
-	if metadata.Size < 0 {
+	if *metadata.Version < 1 {
+		return nil, errors.New("wrong version")
+	}
+	if *metadata.Size < 0 {
 		return nil, errors.New("negative file size")
 	}
-	if metadata.NChunks <= 0 {
-		return nil, errors.New("wrong number of chunks")
+	if *metadata.ChunkNum < 0 {
+		return nil, errors.New("negative number of chunks")
+	}
+	if *metadata.ChunkNum > maxSafeChunkNumber {
+		return nil, ErrChunkOverflow
 	}
 	if metadata.MD5 != "" {
 		_, err = hex.DecodeString(metadata.MD5)
@ -1960,18 +2055,20 @@ func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte)
 			return nil, errors.New("wrong sha1 hash")
 		}
 	}
-	if metadata.Version <= 0 {
-		return nil, errors.New("wrong version number")
+	// ChunkNum is allowed to be 0 in future versions
+	if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion {
+		return nil, errors.New("wrong number of chunks")
 	}
-	if metadata.Version != metadataVersion {
-		return nil, errors.Errorf("version %d is not supported, please upgrade rclone", metadata.Version)
+	// Non-strict mode also accepts future metadata versions
+	if *metadata.Version > metadataVersion && strictChecks {
+		return nil, fmt.Errorf("version %d is not supported, please upgrade rclone", metadata.Version)
 	}

 	var nilFs *Fs // nil object triggers appropriate type method
-	info = nilFs.wrapInfo(metaObject, "", metadata.Size)
+	info = nilFs.wrapInfo(metaObject, "", *metadata.Size)
+	info.nChunks = *metadata.ChunkNum
 	info.md5 = metadata.MD5
 	info.sha1 = metadata.SHA1
-	info.nChunks = metadata.NChunks
 	return info, nil
 }

--- a/backend/chunker/chunker_internal_test.go
+++ b/backend/chunker/chunker_internal_test.go
@ -1,15 +1,23 @@
 package chunker

 import (
+	"bytes"
 	"context"
 	"flag"
 	"fmt"
+	"io/ioutil"
+	"path"
+	"regexp"
+	"strings"
 	"testing"

 	"github.com/rclone/rclone/fs"
+	"github.com/rclone/rclone/fs/operations"
 	"github.com/rclone/rclone/fstest"
 	"github.com/rclone/rclone/fstest/fstests"
+	"github.com/rclone/rclone/lib/random"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 // Command line flags
@ -240,6 +248,307 @@ func testChunkNameFormat(t *testing.T, f *Fs) {
 	assertMakeNamePanics("fish", -2, "bind.", 0)
 }

+func testSmallFileInternals(t *testing.T, f *Fs) {
+	const dir = "small"
+	ctx := context.Background()
+	saveOpt := f.opt
+	defer func() {
+		f.opt.FailHard = false
+		_ = operations.Purge(ctx, f.base, dir)
+		f.opt = saveOpt
+	}()
+	f.opt.FailHard = false
+
+	modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
+
+	checkSmallFileInternals := func(obj fs.Object) {
+		assert.NotNil(t, obj)
+		o, ok := obj.(*Object)
+		assert.True(t, ok)
+		assert.NotNil(t, o)
+		if o == nil {
+			return
+		}
+		switch {
+		case !f.useMeta:
+			// If meta format is "none", non-chunked file (even empty)
+			// internally is a single chunk without meta object.
+			assert.Nil(t, o.main)
+			assert.True(t, o.isComposite()) // sorry, sometimes a name is misleading
+			assert.Equal(t, 1, len(o.chunks))
+		default:
+			// normally non-chunked file is kept in the Object's main field
+			assert.NotNil(t, o.main)
+			assert.False(t, o.isComposite())
+			assert.Equal(t, 0, len(o.chunks))
+		}
+	}
+
+	checkContents := func(obj fs.Object, contents string) {
+		assert.NotNil(t, obj)
+		assert.Equal(t, int64(len(contents)), obj.Size())
+
+		r, err := obj.Open(ctx)
+		assert.NoError(t, err)
+		assert.NotNil(t, r)
+		if r == nil {
+			return
+		}
+		data, err := ioutil.ReadAll(r)
+		assert.NoError(t, err)
+		assert.Equal(t, contents, string(data))
+		_ = r.Close()
+	}
+
+	checkSmallFile := func(name, contents string) {
+		filename := path.Join(dir, name)
+		item := fstest.Item{Path: filename, ModTime: modTime}
+		_, put := fstests.PutTestContents(ctx, t, f, &item, contents, false)
+		assert.NotNil(t, put)
+		checkSmallFileInternals(put)
+		checkContents(put, contents)
+
+		// objects returned by Put and NewObject must have similar structure
+		obj, err := f.NewObject(ctx, filename)
+		assert.NoError(t, err)
+		assert.NotNil(t, obj)
+		checkSmallFileInternals(obj)
+		checkContents(obj, contents)
+
+		_ = obj.Remove(ctx)
+		_ = put.Remove(ctx) // for good
+	}
+
+	checkSmallFile("emptyfile", "")
+	checkSmallFile("smallfile", "Ok")
+}
+
+func testPreventCorruption(t *testing.T, f *Fs) {
+	if f.opt.ChunkSize > 50 {
+		t.Skip("this test requires small chunks")
+	}
+	const dir = "corrupted"
+	ctx := context.Background()
+	saveOpt := f.opt
+	defer func() {
+		f.opt.FailHard = false
+		_ = operations.Purge(ctx, f.base, dir)
+		f.opt = saveOpt
+	}()
+	f.opt.FailHard = true
+
+	contents := random.String(250)
+	modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
+	const overlapMessage = "chunk overlap"
+
+	assertOverlapError := func(err error) {
+		assert.Error(t, err)
+		if err != nil {
+			assert.Contains(t, err.Error(), overlapMessage)
+		}
+	}
+
+	newFile := func(name string) fs.Object {
+		item := fstest.Item{Path: path.Join(dir, name), ModTime: modTime}
+		_, obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
+		require.NotNil(t, obj)
+		return obj
+	}
+	billyObj := newFile("billy")
+
+	billyChunkName := func(chunkNo int) string {
+		return f.makeChunkName(billyObj.Remote(), chunkNo, "", -1)
+	}
+
+	err := f.Mkdir(ctx, billyChunkName(1))
+	assertOverlapError(err)
+
+	_, err = f.Move(ctx, newFile("silly1"), billyChunkName(2))
+	assert.Error(t, err)
+	assert.True(t, err == fs.ErrorCantMove || (err != nil && strings.Contains(err.Error(), overlapMessage)))
+
+	_, err = f.Copy(ctx, newFile("silly2"), billyChunkName(3))
+	assert.Error(t, err)
+	assert.True(t, err == fs.ErrorCantCopy || (err != nil && strings.Contains(err.Error(), overlapMessage)))
+
+	// accessing chunks in strict mode is prohibited
+	f.opt.FailHard = true
+	billyChunk4Name := billyChunkName(4)
+	billyChunk4, err := f.NewObject(ctx, billyChunk4Name)
+	assertOverlapError(err)
+
+	f.opt.FailHard = false
+	billyChunk4, err = f.NewObject(ctx, billyChunk4Name)
+	assert.NoError(t, err)
+	require.NotNil(t, billyChunk4)
+
+	f.opt.FailHard = true
+	_, err = f.Put(ctx, bytes.NewBufferString(contents), billyChunk4)
+	assertOverlapError(err)
+
+	// you can freely read chunks (if you have an object)
+	r, err := billyChunk4.Open(ctx)
+	assert.NoError(t, err)
+	var chunkContents []byte
+	assert.NotPanics(t, func() {
+		chunkContents, err = ioutil.ReadAll(r)
+	})
+	assert.NoError(t, err)
+	assert.NotEqual(t, contents, string(chunkContents))
+
+	// but you can't change them
+	err = billyChunk4.Update(ctx, bytes.NewBufferString(contents), newFile("silly3"))
+	assertOverlapError(err)
+
+	// Remove isn't special, you can't corrupt files even if you have an object
+	err = billyChunk4.Remove(ctx)
+	assertOverlapError(err)
+
+	// recreate billy in case it was anyhow corrupted
+	willyObj := newFile("willy")
+	willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", -1)
+	f.opt.FailHard = false
+	willyChunk, err := f.NewObject(ctx, willyChunkName)
+	f.opt.FailHard = true
+	assert.NoError(t, err)
+	require.NotNil(t, willyChunk)
+
+	_, err = operations.Copy(ctx, f, willyChunk, willyChunkName, newFile("silly4"))
+	assertOverlapError(err)
+
+	// operations.Move will return error when chunker's Move refused
+	// to corrupt target file, but reverts to copy/delete method
+	// still trying to delete target chunk. Chunker must come to rescue.
+	_, err = operations.Move(ctx, f, willyChunk, willyChunkName, newFile("silly5"))
+	assertOverlapError(err)
+	r, err = willyChunk.Open(ctx)
+	assert.NoError(t, err)
+	assert.NotPanics(t, func() {
+		_, err = ioutil.ReadAll(r)
+	})
+	assert.NoError(t, err)
+}
+
+func testChunkNumberOverflow(t *testing.T, f *Fs) {
+	if f.opt.ChunkSize > 50 {
+		t.Skip("this test requires small chunks")
+	}
+	const dir = "wreaked"
+	const wreakNumber = 10200300
+	ctx := context.Background()
+	saveOpt := f.opt
+	defer func() {
+		f.opt.FailHard = false
+		_ = operations.Purge(ctx, f.base, dir)
+		f.opt = saveOpt
+	}()
+
+	modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
+	contents := random.String(100)
+
+	newFile := func(f fs.Fs, name string) (fs.Object, string) {
+		filename := path.Join(dir, name)
+		item := fstest.Item{Path: filename, ModTime: modTime}
+		_, obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
+		require.NotNil(t, obj)
+		return obj, filename
+	}
+
+	f.opt.FailHard = false
+	file, fileName := newFile(f, "wreaker")
+	wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", -1))
+
+	f.opt.FailHard = false
+	fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision())
+	_, err := f.NewObject(ctx, fileName)
+	assert.Error(t, err)
+
+	f.opt.FailHard = true
+	_, err = f.List(ctx, dir)
+	assert.Error(t, err)
+	_, err = f.NewObject(ctx, fileName)
+	assert.Error(t, err)
+
+	f.opt.FailHard = false
+	_ = wreak.Remove(ctx)
+	_ = file.Remove(ctx)
+}
+
+func testMetadataInput(t *testing.T, f *Fs) {
+	const minChunkForTest = 50
+	if f.opt.ChunkSize < minChunkForTest {
+		t.Skip("this test requires chunks that fit metadata")
+	}
+
+	const dir = "usermeta"
+	ctx := context.Background()
+	saveOpt := f.opt
+	defer func() {
+		f.opt.FailHard = false
+		_ = operations.Purge(ctx, f.base, dir)
+		f.opt = saveOpt
+	}()
+	f.opt.FailHard = false
+
+	modTime := fstest.Time("2001-02-03T04:05:06.499999999Z")
+
+	putFile := func(f fs.Fs, name, contents, message string, check bool) fs.Object {
+		item := fstest.Item{Path: name, ModTime: modTime}
+		_, obj := fstests.PutTestContents(ctx, t, f, &item, contents, check)
+		assert.NotNil(t, obj, message)
+		return obj
+	}
+
+	runSubtest := func(contents, name string) {
+		description := fmt.Sprintf("file with %s metadata", name)
+		filename := path.Join(dir, name)
+		require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct")
+
+		part := putFile(f.base, f.makeChunkName(filename, 0, "", -1), "oops", "", true)
+		_ = putFile(f, filename, contents, "upload "+description, false)
+
+		obj, err := f.NewObject(ctx, filename)
+		assert.NoError(t, err, "access "+description)
+		assert.NotNil(t, obj)
+		assert.Equal(t, int64(len(contents)), obj.Size(), "size "+description)
+
+		o, ok := obj.(*Object)
+		assert.NotNil(t, ok)
+		if o != nil {
+			assert.True(t, o.isComposite() && len(o.chunks) == 1, description+" is forced composite")
+			o = nil
+		}
+
+		defer func() {
+			_ = obj.Remove(ctx)
+			_ = part.Remove(ctx)
+		}()
+
+		r, err := obj.Open(ctx)
+		assert.NoError(t, err, "open "+description)
+		assert.NotNil(t, r, "open stream of "+description)
+		if err == nil && r != nil {
+			data, err := ioutil.ReadAll(r)
+			assert.NoError(t, err, "read all of "+description)
+			assert.Equal(t, contents, string(data), description+" contents is ok")
+			_ = r.Close()
+		}
+	}
+
+	metaData, err := marshalSimpleJSON(ctx, 3, 1, "", "")
+	require.NoError(t, err)
+	todaysMeta := string(metaData)
+	runSubtest(todaysMeta, "today")
+
+	pastMeta := regexp.MustCompile(`"ver":[0-9]+`).ReplaceAllLiteralString(todaysMeta, `"ver":1`)
+	pastMeta = regexp.MustCompile(`"size":[0-9]+`).ReplaceAllLiteralString(pastMeta, `"size":0`)
+	runSubtest(pastMeta, "past")
+
+	futureMeta := regexp.MustCompile(`"ver":[0-9]+`).ReplaceAllLiteralString(todaysMeta, `"ver":999`)
+	futureMeta = regexp.MustCompile(`"nchunks":[0-9]+`).ReplaceAllLiteralString(futureMeta, `"nchunks":0,"x":"y"`)
+	runSubtest(futureMeta, "future")
+}
+
 // InternalTest dispatches all internal tests
 func (f *Fs) InternalTest(t *testing.T) {
 	t.Run("PutLarge", func(t *testing.T) {
@ -251,6 +560,18 @@ func (f *Fs) InternalTest(t *testing.T) {
 	t.Run("ChunkNameFormat", func(t *testing.T) {
 		testChunkNameFormat(t, f)
 	})
+	t.Run("SmallFileInternals", func(t *testing.T) {
+		testSmallFileInternals(t, f)
+	})
+	t.Run("PreventCorruption", func(t *testing.T) {
+		testPreventCorruption(t, f)
+	})
+	t.Run("ChunkNumberOverflow", func(t *testing.T) {
+		testChunkNumberOverflow(t, f)
+	})
+	t.Run("MetadataInput", func(t *testing.T) {
+		testMetadataInput(t, f)
+	})
 }

 var _ fstests.InternalTester = (*Fs)(nil)
--- a/docs/content/chunker.md
+++ b/docs/content/chunker.md
@ -115,11 +115,16 @@ original content.
 When the `list` rclone command scans a directory on wrapped remote,
 the potential chunk files are accounted for, grouped and assembled into
 composite directory entries. Any temporary chunks are hidden.
-`list` can sometimes come across composite files with missing or invalid
-chunks, eg if wrapped file system has been tampered with or damaged.
-If chunker detects a missing chunk it will by default silently ignore
-the whole group. You can use the `--chunker-fail-on-bad-chunks`
-rclone option to make `list` fail with a loud error message.
+
+List and other commands can sometimes come across composite files with
+missing or invalid chunks, eg. shadowed by like-named directory or
+another file. This usually means that wrapped file system has been directly
+tampered with or damaged. If chunker detects a missing chunk it will
+by default print warning, skip the whole incomplete group of chunks but
+proceed with current command.
+You can set the `--chunker-fail-hard` flag to have commands abort with
+error message in such cases.
+

 #### Chunk names

@ -368,19 +373,18 @@ Metadata is a small JSON file named after the composite file.
        - Simple JSON supports hash sums and chunk validation.
        - It has the following fields: ver, size, nchunks, md5, sha1.

-#### --chunker-fail-on-bad-chunks
+#### --chunker-fail-hard

-The list command might encounter files with missinng or invalid chunks.
-This boolean flag tells what rclone should do in such cases.
+Choose how chunker should handle files with missing or invalid chunks.

- Config:      fail_on_bad_chunks
- Env Var:     RCLONE_CHUNKER_FAIL_ON_BAD_CHUNKS
+- Config:      fail_hard
+- Env Var:     RCLONE_CHUNKER_FAIL_HARD
 - Type:        bool
 - Default:     false
 - Examples:
    - "true"
-        - Fail with error.
+        - Report errors and abort current command.
    - "false"
-        - Silently ignore invalid object.
+        - Warn user, skip incomplete file and proceed.

 <!--- autogenerated options stop -->
--- a/fstest/fstests/fstests.go
+++ b/fstest/fstests/fstests.go
@ -151,16 +151,19 @@ func retry(t *testing.T, what string, f func() error) {
 	require.NoError(t, err, what)
 }

-// testPut puts file to the remote
+// testPut puts file with random contents to the remote
 func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (string, fs.Object) {
+	return PutTestContents(ctx, t, f, file, random.String(100), true)
+}
+
+// PutTestContents puts file with given contents to the remote and checks it but unlike TestPutLarge doesn't remove
+func PutTestContents(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, contents string, check bool) (string, fs.Object) {
 	var (
 		err        error
 		obj        fs.Object
 		uploadHash *hash.MultiHasher
-		contents   string
 	)
 	retry(t, "Put", func() error {
-		contents = random.String(100)
 		buf := bytes.NewBufferString(contents)
 		uploadHash = hash.NewMultiHasher()
 		in := io.TeeReader(buf, uploadHash)
@ -171,10 +174,12 @@ func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (str
 		return err
 	})
 	file.Hashes = uploadHash.Sums()
-	file.Check(t, obj, f.Precision())
-	// Re-read the object and check again
-	obj = findObject(ctx, t, f, file.Path)
-	file.Check(t, obj, f.Precision())
+	if check {
+		file.Check(t, obj, f.Precision())
+		// Re-read the object and check again
+		obj = findObject(ctx, t, f, file.Path)
+		file.Check(t, obj, f.Precision())
+	}
 	return contents, obj
 }