From 6210e22ab585eccdf27c12642d342056459e2e6e Mon Sep 17 00:00:00 2001 From: Ivan Andreev Date: Sun, 16 May 2021 19:39:33 +0300 Subject: [PATCH] bisync: implementation #5164 Fixes #118 Co-authored-by: Chris Nelson --- cmd/all/all.go | 1 + cmd/bisync/LICENSE.cjnaz | 21 + cmd/bisync/bilib/canonical.go | 42 ++ cmd/bisync/bilib/files.go | 137 ++++ cmd/bisync/bilib/names.go | 61 ++ cmd/bisync/bilib/output.go | 22 + cmd/bisync/bisync_test.go | 1249 +++++++++++++++++++++++++++++++++ cmd/bisync/cmd.go | 225 ++++++ cmd/bisync/deltas.go | 310 ++++++++ cmd/bisync/help.go | 26 + cmd/bisync/listing.go | 305 ++++++++ cmd/bisync/log.go | 49 ++ cmd/bisync/operations.go | 455 ++++++++++++ cmd/bisync/queue.go | 62 ++ cmd/bisync/rc.go | 91 +++ go.mod | 1 + 16 files changed, 3057 insertions(+) create mode 100644 cmd/bisync/LICENSE.cjnaz create mode 100644 cmd/bisync/bilib/canonical.go create mode 100644 cmd/bisync/bilib/files.go create mode 100644 cmd/bisync/bilib/names.go create mode 100644 cmd/bisync/bilib/output.go create mode 100644 cmd/bisync/bisync_test.go create mode 100644 cmd/bisync/cmd.go create mode 100644 cmd/bisync/deltas.go create mode 100644 cmd/bisync/help.go create mode 100644 cmd/bisync/listing.go create mode 100644 cmd/bisync/log.go create mode 100644 cmd/bisync/operations.go create mode 100644 cmd/bisync/queue.go create mode 100644 cmd/bisync/rc.go diff --git a/cmd/all/all.go b/cmd/all/all.go index dbf2147ad..569d1821e 100644 --- a/cmd/all/all.go +++ b/cmd/all/all.go @@ -7,6 +7,7 @@ import ( _ "github.com/rclone/rclone/cmd/about" _ "github.com/rclone/rclone/cmd/authorize" _ "github.com/rclone/rclone/cmd/backend" + _ "github.com/rclone/rclone/cmd/bisync" _ "github.com/rclone/rclone/cmd/cachestats" _ "github.com/rclone/rclone/cmd/cat" _ "github.com/rclone/rclone/cmd/check" diff --git a/cmd/bisync/LICENSE.cjnaz b/cmd/bisync/LICENSE.cjnaz new file mode 100644 index 000000000..d9d1ef438 --- /dev/null +++ b/cmd/bisync/LICENSE.cjnaz @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017-2020 Chris Nelson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cmd/bisync/bilib/canonical.go b/cmd/bisync/bilib/canonical.go new file mode 100644 index 000000000..8dff41bff --- /dev/null +++ b/cmd/bisync/bilib/canonical.go @@ -0,0 +1,42 @@ +// Package bilib provides common stuff for bisync and bisync_test +package bilib + +import ( + "os" + "regexp" + "runtime" + "strings" + + "github.com/rclone/rclone/fs" +) + +// FsPath converts Fs to a suitable rclone argument +func FsPath(f fs.Fs) string { + name, path, slash := f.Name(), f.Root(), "/" + if name == "local" { + slash = string(os.PathSeparator) + if runtime.GOOS == "windows" { + path = strings.ReplaceAll(path, "/", slash) + path = strings.TrimPrefix(path, `\\?\`) + } + } else { + path = name + ":" + path + } + if !strings.HasSuffix(path, slash) { + path += slash + } + return path +} + +// CanonicalPath converts a remote to a suitable base file name +func CanonicalPath(remote string) string { + trimmed := strings.Trim(remote, `\/`) + return nonCanonicalChars.ReplaceAllString(trimmed, "_") +} + +var nonCanonicalChars = regexp.MustCompile(`[\s\\/:?*]`) + +// SessionName makes a unique base name for the sync operation +func SessionName(fs1, fs2 fs.Fs) string { + return CanonicalPath(FsPath(fs1)) + ".." + CanonicalPath(FsPath(fs2)) +} diff --git a/cmd/bisync/bilib/files.go b/cmd/bisync/bilib/files.go new file mode 100644 index 000000000..9171c9700 --- /dev/null +++ b/cmd/bisync/bilib/files.go @@ -0,0 +1,137 @@ +// Package bilib provides common stuff for bisync and bisync_test +// Here it's got local file/directory helpers (nice to have in lib/file) +package bilib + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "runtime" +) + +// PermSecure is a Unix permission for a file accessible only by its owner +const PermSecure = 0600 + +var ( + regexLocalPath = regexp.MustCompile(`^[./\\]`) + regexWindowsPath = regexp.MustCompile(`^[a-zA-Z]:`) + regexRemotePath = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_-]*:`) +) + +// IsLocalPath returns true if its argument is a non-remote path. +// Empty string or a relative path will be considered local. +// Note: `c:dir` will be considered local on Windows but remote on Linux. +func IsLocalPath(path string) bool { + if path == "" || regexLocalPath.MatchString(path) { + return true + } + if runtime.GOOS == "windows" && regexWindowsPath.MatchString(path) { + return true + } + return !regexRemotePath.MatchString(path) +} + +// FileExists returns true if the local file exists +func FileExists(file string) bool { + _, err := os.Stat(file) + return !os.IsNotExist(err) +} + +// CopyFileIfExists is like CopyFile but does to fail if source does not exist +func CopyFileIfExists(srcFile, dstFile string) error { + if !FileExists(srcFile) { + return nil + } + return CopyFile(srcFile, dstFile) +} + +// CopyFile copies a local file +func CopyFile(src, dst string) (err error) { + var ( + rd io.ReadCloser + wr io.WriteCloser + info os.FileInfo + ) + if info, err = os.Stat(src); err != nil { + return + } + if rd, err = os.Open(src); err != nil { + return + } + defer func() { + _ = rd.Close() + }() + if wr, err = os.Create(dst); err != nil { + return + } + _, err = io.Copy(wr, rd) + if e := wr.Close(); err == nil { + err = e + } + if e := os.Chmod(dst, info.Mode()); err == nil { + err = e + } + if e := os.Chtimes(dst, info.ModTime(), info.ModTime()); err == nil { + err = e + } + return +} + +// CopyDir copies a local directory +func CopyDir(src string, dst string) (err error) { + src = filepath.Clean(src) + dst = filepath.Clean(dst) + + si, err := os.Stat(src) + if err != nil { + return err + } + if !si.IsDir() { + return fmt.Errorf("source is not a directory") + } + + _, err = os.Stat(dst) + if err != nil && !os.IsNotExist(err) { + return + } + if err == nil { + return fmt.Errorf("destination already exists") + } + + err = os.MkdirAll(dst, si.Mode()) + if err != nil { + return + } + + entries, err := ioutil.ReadDir(src) + if err != nil { + return + } + + for _, entry := range entries { + srcPath := filepath.Join(src, entry.Name()) + dstPath := filepath.Join(dst, entry.Name()) + + if entry.IsDir() { + err = CopyDir(srcPath, dstPath) + if err != nil { + return + } + } else { + // Skip symlinks. + if entry.Mode()&os.ModeSymlink != 0 { + continue + } + + err = CopyFile(srcPath, dstPath) + if err != nil { + return + } + } + } + + return +} diff --git a/cmd/bisync/bilib/names.go b/cmd/bisync/bilib/names.go new file mode 100644 index 000000000..177a19f4a --- /dev/null +++ b/cmd/bisync/bilib/names.go @@ -0,0 +1,61 @@ +package bilib + +import ( + "bytes" + "io/ioutil" + "sort" + "strconv" +) + +// Names comprises a set of file names +type Names map[string]interface{} + +// ToNames converts string slice to a set of names +func ToNames(list []string) Names { + ns := Names{} + for _, f := range list { + ns.Add(f) + } + return ns +} + +// Add adds new file name to the set +func (ns Names) Add(name string) { + ns[name] = nil +} + +// Has checks whether given name is present in the set +func (ns Names) Has(name string) bool { + _, ok := ns[name] + return ok +} + +// NotEmpty checks whether set is not empty +func (ns Names) NotEmpty() bool { + return len(ns) > 0 +} + +// ToList converts name set to string slice +func (ns Names) ToList() []string { + list := []string{} + for file := range ns { + list = append(list, file) + } + sort.Strings(list) + return list +} + +// Save saves name set in a text file +func (ns Names) Save(path string) error { + return SaveList(ns.ToList(), path) +} + +// SaveList saves file name list in a text file +func SaveList(list []string, path string) error { + buf := &bytes.Buffer{} + for _, s := range list { + _, _ = buf.WriteString(strconv.Quote(s)) + _ = buf.WriteByte('\n') + } + return ioutil.WriteFile(path, buf.Bytes(), PermSecure) +} diff --git a/cmd/bisync/bilib/output.go b/cmd/bisync/bilib/output.go new file mode 100644 index 000000000..22712e93b --- /dev/null +++ b/cmd/bisync/bilib/output.go @@ -0,0 +1,22 @@ +// Package bilib provides common stuff for bisync and bisync_test +package bilib + +import ( + "bytes" + "log" + + "github.com/sirupsen/logrus" +) + +// CaptureOutput runs a function capturing its output. +func CaptureOutput(fun func()) []byte { + logSave := log.Writer() + logrusSave := logrus.StandardLogger().Writer() + buf := &bytes.Buffer{} + log.SetOutput(buf) + logrus.SetOutput(buf) + fun() + log.SetOutput(logSave) + logrus.SetOutput(logrusSave) + return buf.Bytes() +} diff --git a/cmd/bisync/bisync_test.go b/cmd/bisync/bisync_test.go new file mode 100644 index 000000000..9f88970ed --- /dev/null +++ b/cmd/bisync/bisync_test.go @@ -0,0 +1,1249 @@ +// TestBisync is a test engine for bisync test cases. +// See https://rclone.org/bisync/#testing for documentation. +// Test cases are organized in subdirs beneath ./testdata +// Results are compared against golden listings and log file. +package bisync_test + +import ( + "bytes" + "context" + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "path" + "path/filepath" + "regexp" + "runtime" + "sort" + "strconv" + "strings" + "testing" + "time" + + "github.com/rclone/rclone/cmd/bisync" + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/cache" + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/fspath" + "github.com/rclone/rclone/fs/object" + "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/fs/sync" + "github.com/rclone/rclone/fstest" + "github.com/rclone/rclone/lib/atexit" + "github.com/rclone/rclone/lib/random" + + "github.com/pkg/errors" + "github.com/pmezard/go-difflib/difflib" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + _ "github.com/rclone/rclone/backend/all" // for integration tests +) + +const ( + touchDateFormat = "2006-01-02" + goldenCanonBase = "_testdir_" + logFileName = "test.log" + dropMe = "*** [DROP THIS LINE] ***" + eol = "\n" + slash = string(os.PathSeparator) + fixSlash = (runtime.GOOS == "windows") +) + +// logReplacements make modern test logs comparable with golden dir. +// It is a string slice of even length with this structure: +// {`matching regular expression`, "mangled result string", ...} +var logReplacements = []string{ + // skip syslog facility markers + `^(<[1-9]>)(INFO |ERROR |NOTICE|DEBUG ):(.*)$`, "$2:$3", + // skip log prefixes + `^\d+/\d\d/\d\d \d\d:\d\d:\d\d(?:\.\d{6})? `, "", + // ignore rclone info messages + `^INFO : .*?: (Deleted|Copied |Moved |Updated ).*$`, dropMe, + `^NOTICE: .*?: Replacing invalid UTF-8 characters in "[^"]*"$`, dropMe, + // ignore rclone debug messages + `^DEBUG : .*$`, dropMe, + // ignore dropbox info messages + `^NOTICE: too_many_(requests|write_operations)/\.*: Too many requests or write operations.*$`, dropMe, + `^NOTICE: Dropbox root .*?: Forced to upload files to set modification times on this backend.$`, dropMe, + `^INFO : .*?: src and dst identical but can't set mod time without deleting and re-uploading$`, dropMe, +} + +// Some dry-run messages differ depending on the particular remote. +var dryrunReplacements = []string{ + `^(NOTICE: file5.txt: Skipped) (copy|update modification time) (as --dry-run is set [(]size \d+[)])$`, + `$1 copy (or update modification time) $3`, +} + +// Some groups of log lines may appear unordered because rclone applies +// many operations in parallel to boost performance. +var logHoppers = []string{ + // Test case `dry-run` produced log mismatches due to non-deterministic + // order of captured dry-run info messages. + `NOTICE: \S+?: Skipped (?:copy|move|delete|copy \(or [^)]+\)|update modification time) as --dry-run is set \(size \d+\)`, + + // Test case `extended-filenames` detected difference in order of files + // with extended unicode names between Windows and Unix or GDrive, + // but the order is in fact not important for success. + `(?:INFO |NOTICE): - Path[12] +File (?:was deleted|is new|is newer|is OLDER) +- .*`, + + // Test case `check-access-filters` detected listing miscompares due + // to indeterminate order of rclone operations in presence of multiple + // subdirectories. The order inconsistency initially showed up in the + // listings and triggered reordering of log messages, but the actual + // files will in fact match. + `ERROR : - +Access test failed: Path[12] file not found in Path[12] - .*`, + + // Test case `resync` suffered from the order of queued copies. + `(?:INFO |NOTICE): - Path2 Resync will copy to Path1 +- .*`, +} + +// Some log lines can contain Windows path separator that must be +// converted to "/" in every matching token to match golden logs. +var logLinesWithSlash = []string{ + `\(\d\d\) : (touch-glob|touch-copy|copy-file|copy-as|copy-dir|delete-file) `, + `INFO : - Path[12] +Queue copy to Path[12] `, + `INFO : Synching Path1 .*? with Path2 `, + `INFO : Validating listings for `, +} +var regexFixSlash = regexp.MustCompile("^(" + strings.Join(logLinesWithSlash, "|") + ")") + +// Command line flags for bisync test +var ( + argTestCase = flag.String("case", "", "Bisync test case to run") + argRemote2 = flag.String("remote2", "", "Path2 for bisync tests") + argNoCompare = flag.Bool("no-compare", false, "Do not compare test results with golden") + argNoCleanup = flag.Bool("no-cleanup", false, "Keep test files") + argGolden = flag.Bool("golden", false, "Store results as golden") + argDebug = flag.Bool("debug", false, "Print debug messages") + argStopAt = flag.Int("stop-at", 0, "Stop after given test step") + // Flag -refresh-times helps with Dropbox tests failing with message + // "src and dst identical but can't set mod time without deleting and re-uploading" + argRefreshTimes = flag.Bool("refresh-times", false, "Force refreshing the target modtime, useful for Dropbox (default: false)") +) + +// bisyncTest keeps all test data in a single place +type bisyncTest struct { + // per-test state + t *testing.T + step int + stopped bool + stepStr string + testCase string + sessionName string + // test dirs + testDir string + dataDir string + initDir string + goldenDir string + workDir string + fs1 fs.Fs + path1 string + canonPath1 string + fs2 fs.Fs + path2 string + canonPath2 string + // test log + logDir string + logPath string + logFile *os.File + // global state + dataRoot string + randName string + tempDir string + parent1 fs.Fs + parent2 fs.Fs + // global flags + argRemote1 string + argRemote2 string + noCompare bool + noCleanup bool + golden bool + debug bool + stopAt int +} + +// TestBisync is a test engine for bisync test cases. +func TestBisync(t *testing.T) { + ctx := context.Background() + fstest.Initialise() + + ci := fs.GetConfig(ctx) + ciSave := *ci + defer func() { + *ci = ciSave + }() + if *argRefreshTimes { + ci.RefreshTimes = true + } + + baseDir, err := os.Getwd() + require.NoError(t, err, "get current directory") + randName := "bisync." + time.Now().Format("150405-") + random.String(5) + tempDir := filepath.Join(os.TempDir(), randName) + workDir := filepath.Join(tempDir, "workdir") + + b := &bisyncTest{ + // per-test state + t: t, + // global state + tempDir: tempDir, + randName: randName, + workDir: workDir, + dataRoot: filepath.Join(baseDir, "testdata"), + logDir: filepath.Join(tempDir, "logs"), + logPath: filepath.Join(workDir, logFileName), + // global flags + argRemote1: *fstest.RemoteName, + argRemote2: *argRemote2, + noCompare: *argNoCompare, + noCleanup: *argNoCleanup, + golden: *argGolden, + debug: *argDebug, + stopAt: *argStopAt, + } + + b.mkdir(b.tempDir) + b.mkdir(b.logDir) + + fnHandle := atexit.Register(func() { + if atexit.Signalled() { + b.cleanupAll() + } + }) + defer func() { + b.cleanupAll() + atexit.Unregister(fnHandle) + }() + + argCase := *argTestCase + if argCase == "" { + argCase = "all" + if testing.Short() { + // remote tests can be long, help with "go test -short" + argCase = "basic" + } + } + + testList := strings.Split(argCase, ",") + if strings.ToLower(argCase) == "all" { + testList = nil + for _, testCase := range b.listDir(b.dataRoot) { + if strings.HasPrefix(testCase, "test_") { + testList = append(testList, testCase) + } + } + } + require.False(t, b.stopAt > 0 && len(testList) > 1, "-stop-at is meaningful only for a single test") + + for _, testCase := range testList { + testCase = strings.ReplaceAll(testCase, "-", "_") + testCase = strings.TrimPrefix(testCase, "test_") + t.Run(testCase, func(childTest *testing.T) { + b.runTestCase(ctx, childTest, testCase) + }) + } +} + +func (b *bisyncTest) cleanupAll() { + if b.noCleanup { + return + } + ctx := context.Background() + if b.parent1 != nil { + _ = operations.Purge(ctx, b.parent1, "") + } + if b.parent2 != nil { + _ = operations.Purge(ctx, b.parent2, "") + } + _ = os.RemoveAll(b.tempDir) +} + +func (b *bisyncTest) runTestCase(ctx context.Context, t *testing.T, testCase string) { + b.t = t + b.testCase = testCase + var err error + + b.fs1, b.parent1, b.path1, b.canonPath1 = b.makeTempRemote(ctx, b.argRemote1, "path1") + b.fs2, b.parent2, b.path2, b.canonPath2 = b.makeTempRemote(ctx, b.argRemote2, "path2") + + b.sessionName = bilib.SessionName(b.fs1, b.fs2) + b.testDir = b.ensureDir(b.dataRoot, "test_"+b.testCase, false) + b.initDir = b.ensureDir(b.testDir, "initial", false) + b.goldenDir = b.ensureDir(b.testDir, "golden", false) + b.dataDir = b.ensureDir(b.testDir, "modfiles", true) // optional + + // For test stability, jam initial dates to a fixed past date. + // Test cases that change files will touch specific files to fixed new dates. + initDate := time.Date(2000, time.January, 1, 0, 0, 0, 0, bisync.TZ) + err = filepath.Walk(b.initDir, func(path string, info os.FileInfo, err error) error { + if err == nil && !info.IsDir() { + return os.Chtimes(path, initDate, initDate) + } + return err + }) + require.NoError(b.t, err, "jamming initial dates") + + // Prepare initial content + b.cleanupCase(ctx) + initFs, err := fs.NewFs(ctx, b.initDir) + require.NoError(b.t, err) + require.NoError(b.t, sync.CopyDir(ctx, b.fs1, initFs, true), "setting up path1") + require.NoError(b.t, sync.CopyDir(ctx, b.fs2, initFs, true), "setting up path2") + + // Create log file + b.mkdir(b.workDir) + b.logFile, err = os.Create(b.logPath) + require.NoError(b.t, err, "creating log file") + + // Execute test scenario + scenFile := filepath.Join(b.testDir, "scenario.txt") + scenBuf, err := ioutil.ReadFile(scenFile) + scenReplacer := b.newReplacer(false) + require.NoError(b.t, err) + b.step = 0 + b.stopped = false + for _, line := range strings.Split(string(scenBuf), "\n") { + comment := strings.Index(line, "#") + if comment != -1 { + line = line[:comment] + } + line = strings.TrimSpace(line) + if line == "" { + if b.golden { + // Keep empty lines in golden logs + _, _ = b.logFile.WriteString("\n") + } + continue + } + + b.step++ + b.stepStr = fmt.Sprintf("(%02d) :", b.step) + line = scenReplacer.Replace(line) + if err = b.runTestStep(ctx, line); err != nil { + require.Failf(b.t, "test step failed", "step %d failed: %v", b.step, err) + return + } + if b.stopAt > 0 && b.step >= b.stopAt { + comment := "" + if b.golden { + comment = " (ignoring -golden)" + } + b.logPrintf("Stopping after step %d%s", b.step, comment) + b.stopped = true + b.noCleanup = true + b.noCompare = true + break + } + } + + // Perform post-run activities + require.NoError(b.t, b.logFile.Close(), "flushing test log") + b.logFile = nil + + savedLog := b.testCase + ".log" + err = bilib.CopyFile(b.logPath, filepath.Join(b.logDir, savedLog)) + require.NoError(b.t, err, "saving log file %s", savedLog) + + if b.golden && !b.stopped { + log.Printf("Store results to golden directory") + b.storeGolden() + return + } + + errorCount := 0 + if b.noCompare { + log.Printf("Skip comparing results with golden directory") + errorCount = -2 + } else { + errorCount = b.compareResults() + } + + if b.noCleanup { + log.Printf("Skip cleanup") + } else { + b.cleanupCase(ctx) + } + + var msg string + var passed bool + switch errorCount { + case 0: + msg = fmt.Sprintf("TEST %s PASSED", b.testCase) + passed = true + case -2: + msg = fmt.Sprintf("TEST %s SKIPPED", b.testCase) + passed = true + case -1: + msg = fmt.Sprintf("TEST %s FAILED - WRONG NUMBER OF FILES", b.testCase) + passed = false + default: + msg = fmt.Sprintf("TEST %s FAILED - %d MISCOMPARED FILES", b.testCase, errorCount) + buckets := b.fs1.Features().BucketBased || b.fs2.Features().BucketBased + passed = false + if b.testCase == "rmdirs" && buckets { + msg += " (expected failure on bucket remotes)" + passed = true + } + } + b.t.Log(msg) + if !passed { + b.t.FailNow() + } +} + +// makeTempRemote creates temporary folder and makes a filesystem +// if a local path is provided, it's ignored (the test will run under system temp) +func (b *bisyncTest) makeTempRemote(ctx context.Context, remote, subdir string) (f, parent fs.Fs, path, canon string) { + var err error + if bilib.IsLocalPath(remote) { + if remote != "" && remote != "local" { + b.t.Fatalf(`Missing ":" in remote %q. Use "local" to test with local filesystem.`, remote) + } + parent, err = fs.NewFs(ctx, b.tempDir) + require.NoError(b.t, err, "parsing %s", b.tempDir) + + path = filepath.Join(b.tempDir, b.testCase) + canon = bilib.CanonicalPath(path) + "_" + path = filepath.Join(path, subdir) + } else { + last := remote[len(remote)-1] + if last != ':' && last != '/' { + remote += "/" + } + remote += b.randName + parent, err = fs.NewFs(ctx, remote) + require.NoError(b.t, err, "parsing %s", remote) + + path = remote + "/" + b.testCase + canon = bilib.CanonicalPath(path) + "_" + path += "/" + subdir + } + + f, err = fs.NewFs(ctx, path) + require.NoError(b.t, err, "parsing %s/%s", remote, subdir) + path = bilib.FsPath(f) // Make it canonical + + if f.Precision() == fs.ModTimeNotSupported { + b.t.Skipf("modification time support is missing on %s", subdir) + } + return +} + +func (b *bisyncTest) cleanupCase(ctx context.Context) { + // Silence "directory not found" errors from the ftp backend + _ = bilib.CaptureOutput(func() { + _ = operations.Purge(ctx, b.fs1, "") + }) + _ = bilib.CaptureOutput(func() { + _ = operations.Purge(ctx, b.fs2, "") + }) + _ = os.RemoveAll(b.workDir) + accounting.Stats(ctx).ResetCounters() +} + +func (b *bisyncTest) runTestStep(ctx context.Context, line string) (err error) { + var fsrc, fdst fs.Fs + accounting.Stats(ctx).ResetErrors() + b.logPrintf("%s %s", b.stepStr, line) + + ci := fs.GetConfig(ctx) + ciSave := *ci + defer func() { + *ci = ciSave + }() + ci.LogLevel = fs.LogLevelInfo + if b.debug { + ci.LogLevel = fs.LogLevelDebug + } + + args := splitLine(line) + switch args[0] { + case "test": + b.checkArgs(args, 1, 0) + return nil + case "copy-listings": + b.checkArgs(args, 1, 1) + return b.saveTestListings(args[1], true) + case "move-listings": + b.checkArgs(args, 1, 1) + return b.saveTestListings(args[1], false) + case "purge-children": + b.checkArgs(args, 1, 1) + if fsrc, err = fs.NewFs(ctx, args[1]); err != nil { + return err + } + return purgeChildren(ctx, fsrc, "") + case "delete-file": + b.checkArgs(args, 1, 1) + dir, file := filepath.Split(args[1]) + if fsrc, err = fs.NewFs(ctx, dir); err != nil { + return err + } + var obj fs.Object + if obj, err = fsrc.NewObject(ctx, file); err != nil { + return err + } + return operations.DeleteFile(ctx, obj) + case "delete-glob": + b.checkArgs(args, 2, 2) + if fsrc, err = fs.NewFs(ctx, args[1]); err != nil { + return err + } + return deleteFiles(ctx, fsrc, args[2]) + case "touch-glob": + b.checkArgs(args, 3, 3) + date, src, glob := args[1], args[2], args[3] + if fsrc, err = fs.NewFs(ctx, src); err != nil { + return err + } + _, err = touchFiles(ctx, date, fsrc, src, glob) + return err + case "touch-copy": + b.checkArgs(args, 3, 3) + date, src, dst := args[1], args[2], args[3] + dir, file := filepath.Split(src) + if fsrc, err = fs.NewFs(ctx, dir); err != nil { + return err + } + if _, err = touchFiles(ctx, date, fsrc, dir, file); err != nil { + return err + } + return b.copyFile(ctx, src, dst, "") + case "copy-file": + b.checkArgs(args, 2, 2) + return b.copyFile(ctx, args[1], args[2], "") + case "copy-as": + b.checkArgs(args, 3, 3) + return b.copyFile(ctx, args[1], args[2], args[3]) + case "copy-dir", "sync-dir": + b.checkArgs(args, 2, 2) + if fsrc, err = cache.Get(ctx, args[1]); err != nil { + return err + } + if fdst, err = cache.Get(ctx, args[2]); err != nil { + return err + } + switch args[0] { + case "copy-dir": + err = sync.CopyDir(ctx, fdst, fsrc, true) + case "sync-dir": + err = sync.Sync(ctx, fdst, fsrc, true) + } + return err + case "list-dirs": + b.checkArgs(args, 1, 1) + return b.listSubdirs(ctx, args[1]) + case "bisync": + return b.runBisync(ctx, args[1:]) + default: + return errors.Errorf("unknown command: %q", args[0]) + } +} + +// splitLine splits scenario line into tokens and performs +// substitutions that involve whitespace or control chars. +func splitLine(line string) (args []string) { + for _, s := range strings.Fields(line) { + b := []byte(whitespaceReplacer.Replace(s)) + b = regexChar.ReplaceAllFunc(b, func(b []byte) []byte { + c, _ := strconv.ParseUint(string(b[5:7]), 16, 8) + return []byte{byte(c)} + }) + args = append(args, string(b)) + } + return +} + +var whitespaceReplacer = strings.NewReplacer( + "{spc}", " ", + "{tab}", "\t", + "{eol}", eol, +) +var regexChar = regexp.MustCompile(`\{chr:([0-9a-f]{2})\}`) + +// checkArgs verifies the number of the test command arguments +func (b *bisyncTest) checkArgs(args []string, min, max int) { + cmd := args[0] + num := len(args) - 1 + if min == max && num != min { + b.t.Fatalf("%q must have strictly %d args", cmd, min) + } + if min > 0 && num < min { + b.t.Fatalf("%q must have at least %d args", cmd, min) + } + if max > 0 && num > max { + b.t.Fatalf("%q must have at most %d args", cmd, max) + } +} + +func (b *bisyncTest) runBisync(ctx context.Context, args []string) (err error) { + opt := &bisync.Options{ + Workdir: b.workDir, + NoCleanup: true, + SaveQueues: true, + MaxDelete: bisync.DefaultMaxDelete, + CheckFilename: bisync.DefaultCheckFilename, + CheckSync: bisync.CheckSyncTrue, + } + octx, ci := fs.AddConfig(ctx) + fs1, fs2 := b.fs1, b.fs2 + + addSubdir := func(path, subdir string) fs.Fs { + remote := path + subdir + f, err := fs.NewFs(ctx, remote) + require.NoError(b.t, err, "parsing remote %q", remote) + return f + } + + for _, arg := range args { + val := "" + pos := strings.Index(arg, "=") + if pos > 0 { + arg, val = arg[:pos], arg[pos+1:] + } + switch arg { + case "resync": + opt.Resync = true + case "dry-run": + ci.DryRun = true + opt.DryRun = true + case "force": + opt.Force = true + case "remove-empty-dirs": + opt.RemoveEmptyDirs = true + case "check-sync-only": + opt.CheckSync = bisync.CheckSyncOnly + case "no-check-sync": + opt.CheckSync = bisync.CheckSyncFalse + case "check-access": + opt.CheckAccess = true + case "check-filename": + opt.CheckFilename = val + case "filters-file": + opt.FiltersFile = val + case "max-delete": + opt.MaxDelete, err = strconv.Atoi(val) + require.NoError(b.t, err, "parsing max-delete=%q", val) + case "size-only": + ci.SizeOnly = true + case "subdir": + fs1 = addSubdir(b.path1, val) + fs2 = addSubdir(b.path2, val) + default: + return errors.Errorf("invalid bisync option %q", arg) + } + } + + output := bilib.CaptureOutput(func() { + err = bisync.Bisync(octx, fs1, fs2, opt) + }) + + _, _ = os.Stdout.Write(output) + _, _ = b.logFile.Write(output) + + if err != nil { + b.logPrintf("Bisync error: %v", err) + } + return nil +} + +// saveTestListings creates a copy of test artifacts with given prefix +// including listings (.lst*), queues (.que) and filters (.flt, .flt.md5) +func (b *bisyncTest) saveTestListings(prefix string, keepSource bool) (err error) { + count := 0 + for _, srcFile := range b.listDir(b.workDir) { + switch fileType(srcFile) { + case "listing", "queue", "filters": + // fall thru + default: + continue + } + count++ + dstFile := fmt.Sprintf("%s.%s.sav", prefix, b.toGolden(srcFile)) + src := filepath.Join(b.workDir, srcFile) + dst := filepath.Join(b.workDir, dstFile) + if err = bilib.CopyFile(src, dst); err != nil { + return + } + if keepSource { + continue + } + if err = os.Remove(src); err != nil { + return + } + } + if count == 0 { + err = errors.New("listings not found") + } + return +} + +func (b *bisyncTest) copyFile(ctx context.Context, src, dst, asName string) (err error) { + var fsrc, fdst fs.Fs + var srcPath, srcFile, dstPath, dstFile string + + switch fsrc, err = cache.Get(ctx, src); err { + case fs.ErrorIsFile: + // ok + case nil: + return errors.New("source must be a file") + default: + return err + } + + if _, srcPath, err = fspath.SplitFs(src); err != nil { + return err + } + srcFile = path.Base(srcPath) + + if dstPath, dstFile, err = fspath.Split(dst); err != nil { + return err + } + if dstPath == "" { + return errors.New("invalid destination") + } + if dstFile != "" { + dstPath = dst // force directory + } + if fdst, err = cache.Get(ctx, dstPath); err != nil { + return err + } + + if asName != "" { + dstFile = asName + } else { + dstFile = srcFile + } + + fctx, fi := filter.AddConfig(ctx) + if err := fi.AddFile(srcFile); err != nil { + return err + } + return operations.CopyFile(fctx, fdst, fsrc, dstFile, srcFile) +} + +// listSubdirs is equivalent to `rclone lsf -R --dirs-only` +func (b *bisyncTest) listSubdirs(ctx context.Context, remote string) error { + f, err := fs.NewFs(ctx, remote) + if err != nil { + return err + } + opt := operations.ListJSONOpt{ + NoModTime: true, + NoMimeType: true, + DirsOnly: true, + Recurse: true, + } + fmt := operations.ListFormat{} + fmt.SetDirSlash(true) + fmt.AddPath() + printItem := func(item *operations.ListJSONItem) error { + b.logPrintf("%s", fmt.Format(item)) + return nil + } + return operations.ListJSON(ctx, f, "", &opt, printItem) +} + +// purgeChildren deletes child files and purges subdirs under given path. +// Note: this cannot be done with filters. +func purgeChildren(ctx context.Context, f fs.Fs, dir string) error { + entries, firstErr := f.List(ctx, dir) + if firstErr != nil { + return firstErr + } + for _, entry := range entries { + var err error + switch dirObj := entry.(type) { + case fs.Object: + fs.Debugf(dirObj, "Remove file") + err = dirObj.Remove(ctx) + case fs.Directory: + fs.Debugf(dirObj, "Purge subdir") + err = operations.Purge(ctx, f, dirObj.Remote()) + } + if firstErr == nil { + firstErr = err + } + } + return firstErr +} + +// deleteFiles deletes a group of files by the name pattern. +func deleteFiles(ctx context.Context, f fs.Fs, glob string) error { + fctx, fi := filter.AddConfig(ctx) + if err := fi.Add(true, glob); err != nil { + return err + } + if err := fi.Add(false, "/**"); err != nil { + return err + } + return operations.Delete(fctx, f) +} + +// touchFiles sets modification time on a group of files. +// Returns names of touched files and/or error. +// Note: `rclone touch` can touch only single file, doesn't support filters. +func touchFiles(ctx context.Context, dateStr string, f fs.Fs, dir, glob string) ([]string, error) { + files := []string{} + + date, err := time.ParseInLocation(touchDateFormat, dateStr, bisync.TZ) + if err != nil { + return files, errors.Wrapf(err, "invalid date %q", dateStr) + } + + matcher, firstErr := filter.GlobToRegexp(glob, false) + if firstErr != nil { + return files, errors.Errorf("invalid glob %q", glob) + } + + entries, firstErr := f.List(ctx, "") + if firstErr != nil { + return files, firstErr + } + + for _, entry := range entries { + obj, isFile := entry.(fs.Object) + if !isFile { + continue + } + remote := obj.Remote() + if !matcher.MatchString(remote) { + continue + } + files = append(files, dir+remote) + + fs.Debugf(obj, "Set modification time %s", dateStr) + err := obj.SetModTime(ctx, date) + if err == fs.ErrorCantSetModTimeWithoutDelete { + // Workaround for dropbox, similar to --refresh-times + err = nil + buf := new(bytes.Buffer) + size := obj.Size() + if size > 0 { + err = operations.Cat(ctx, f, buf, 0, size) + } + info := object.NewStaticObjectInfo(remote, date, size, true, nil, f) + if err == nil { + _ = obj.Remove(ctx) + _, err = f.Put(ctx, buf, info) + } + } + if firstErr == nil { + firstErr = err + } + } + + return files, firstErr +} + +// compareResults validates scenario results against golden dir +func (b *bisyncTest) compareResults() int { + goldenFiles := b.listDir(b.goldenDir) + resultFiles := b.listDir(b.workDir) + + // Adapt test file names to their golden counterparts + renamed := false + for _, fileName := range resultFiles { + goldName := b.toGolden(fileName) + if goldName != fileName { + filePath := filepath.Join(b.workDir, fileName) + goldPath := filepath.Join(b.workDir, goldName) + require.NoError(b.t, os.Rename(filePath, goldPath)) + renamed = true + } + } + if renamed { + resultFiles = b.listDir(b.workDir) + } + + goldenSet := bilib.ToNames(goldenFiles) + resultSet := bilib.ToNames(resultFiles) + goldenNum := len(goldenFiles) + resultNum := len(resultFiles) + errorCount := 0 + const divider = "----------------------------------------------------------" + + if goldenNum != resultNum { + log.Print(divider) + log.Printf("MISCOMPARE - Number of Golden and Results files do not match:") + log.Printf(" Golden count: %d", goldenNum) + log.Printf(" Result count: %d", resultNum) + log.Printf(" Golden files: %s", strings.Join(goldenFiles, ", ")) + log.Printf(" Result files: %s", strings.Join(resultFiles, ", ")) + } + + for _, file := range goldenFiles { + if !resultSet.Has(file) { + errorCount++ + log.Printf(" File found in Golden but not in Results: %s", file) + } + } + for _, file := range resultFiles { + if !goldenSet.Has(file) { + errorCount++ + log.Printf(" File found in Results but not in Golden: %s", file) + } + } + + for _, file := range goldenFiles { + if !resultSet.Has(file) { + continue + } + + goldenText := b.mangleResult(b.goldenDir, file, false) + resultText := b.mangleResult(b.workDir, file, false) + + if fileType(file) == "log" { + // save mangled logs so difference is easier on eyes + goldenFile := filepath.Join(b.logDir, "mangled.golden.log") + resultFile := filepath.Join(b.logDir, "mangled.result.log") + require.NoError(b.t, ioutil.WriteFile(goldenFile, []byte(goldenText), bilib.PermSecure)) + require.NoError(b.t, ioutil.WriteFile(resultFile, []byte(resultText), bilib.PermSecure)) + } + + if goldenText == resultText { + continue + } + errorCount++ + + diff := difflib.UnifiedDiff{ + A: difflib.SplitLines(goldenText), + B: difflib.SplitLines(resultText), + Context: 0, + } + text, err := difflib.GetUnifiedDiffString(diff) + require.NoError(b.t, err, "diff failed") + + log.Print(divider) + log.Printf("| MISCOMPARE -Golden vs +Results for %s", file) + for _, line := range strings.Split(strings.TrimSpace(text), "\n") { + log.Printf("| %s", strings.TrimSpace(line)) + } + } + + if errorCount > 0 { + log.Print(divider) + } + if errorCount == 0 && goldenNum != resultNum { + return -1 + } + return errorCount +} + +// storeGolden will store workdir files to the golden directory. +// Golden results will have adapted file names and contain +// generic strings instead of local or cloud paths. +func (b *bisyncTest) storeGolden() { + // Perform consistency checks + files := b.listDir(b.workDir) + require.NotEmpty(b.t, files, "nothing to store in golden dir") + + // Pass 1: validate files before storing + for _, fileName := range files { + if fileType(fileName) == "lock" { + continue + } + goldName := b.toGolden(fileName) + if goldName != fileName { + targetPath := filepath.Join(b.workDir, goldName) + exists := bilib.FileExists(targetPath) + require.False(b.t, exists, "golden name overlap for file %s", fileName) + } + text := b.mangleResult(b.workDir, fileName, true) + if fileType(fileName) == "log" { + require.NotEmpty(b.t, text, "incorrect golden log %s", fileName) + } + } + + // Pass 2: perform a verbatim copy + _ = os.RemoveAll(b.goldenDir) + require.NoError(b.t, bilib.CopyDir(b.workDir, b.goldenDir)) + + // Pass 3: adapt file names and content + for _, fileName := range files { + if fileType(fileName) == "lock" { + continue + } + text := b.mangleResult(b.goldenDir, fileName, true) + + goldName := b.toGolden(fileName) + goldPath := filepath.Join(b.goldenDir, goldName) + err := ioutil.WriteFile(goldPath, []byte(text), bilib.PermSecure) + assert.NoError(b.t, err, "writing golden file %s", goldName) + + if goldName != fileName { + origPath := filepath.Join(b.goldenDir, fileName) + assert.NoError(b.t, os.Remove(origPath), "removing original file %s", fileName) + } + } +} + +// mangleResult prepares test logs or listings for comparison +func (b *bisyncTest) mangleResult(dir, file string, golden bool) string { + buf, err := ioutil.ReadFile(filepath.Join(dir, file)) + require.NoError(b.t, err) + text := string(buf) + + switch fileType(strings.TrimSuffix(file, ".sav")) { + case "queue": + lines := strings.Split(text, eol) + sort.Strings(lines) + return joinLines(lines) + case "listing": + return mangleListing(text, golden) + case "log": + // fall thru + default: + return text + } + + // Adapt log lines to the golden way. + lines := strings.Split(string(buf), eol) + pathReplacer := b.newReplacer(true) + + rep := logReplacements + if b.testCase == "dry_run" { + rep = append(rep, dryrunReplacements...) + } + repFrom := make([]*regexp.Regexp, len(rep)/2) + repTo := make([]string, len(rep)/2) + for i := 0; i < len(rep); i += 2 { + repFrom[i/2] = regexp.MustCompile(rep[i]) + repTo[i/2] = rep[i+1] + } + + hoppers := make([]*regexp.Regexp, len(logHoppers)) + dampers := make([][]string, len(logHoppers)) + for i, regex := range logHoppers { + hoppers[i] = regexp.MustCompile("^" + regex + "$") + } + + // The %q format doubles backslashes, hence "{1,2}" + regexBackslash := regexp.MustCompile(`\\{1,2}`) + + emptyCount := 0 + maxEmpty := 0 + if b.golden { + maxEmpty = 2 + } + + result := make([]string, 0, len(lines)) + for _, s := range lines { + // Adapt file paths + s = pathReplacer.Replace(strings.TrimSpace(s)) + + // Apply regular expression replacements + for i := 0; i < len(repFrom); i++ { + s = repFrom[i].ReplaceAllString(s, repTo[i]) + } + s = strings.TrimSpace(s) + if s == dropMe { + continue + } + + if fixSlash && regexFixSlash.MatchString(s) { + s = regexBackslash.ReplaceAllString(s, "/") + } + + // Sort consecutive groups of naturally unordered lines. + // Any such group must end before the log ends or it might be lost. + absorbed := false + for i := 0; i < len(dampers); i++ { + match := false + if s != "" && !absorbed { + match = hoppers[i].MatchString(s) + } + if match { + dampers[i] = append(dampers[i], s) + absorbed = true + } else if len(dampers[i]) > 0 { + sort.Strings(dampers[i]) + result = append(result, dampers[i]...) + dampers[i] = nil + } + } + if absorbed { + continue + } + + // Skip empty lines unless storing to golden + if s == "" { + if emptyCount < maxEmpty { + result = append(result, "") + } + emptyCount++ + continue + } + result = append(result, s) + emptyCount = 0 + } + + return joinLines(result) +} + +// mangleListing sorts listing lines before comparing. +func mangleListing(text string, golden bool) string { + lines := strings.Split(text, eol) + + hasHeader := len(lines) > 0 && strings.HasPrefix(lines[0], bisync.ListingHeader) + if hasHeader { + lines = lines[1:] + } + + // Split line in 4 groups: (flag, size)(hash.)( .id., .......modtime....... )(name). + regex := regexp.MustCompile(`^([^ ] +\d+ )([^ ]+)( [^ ]+ [\d-]+T[\d:.]+[\d+-]+ )(".+")$`) + + getFile := func(s string) string { + if match := regex.FindStringSubmatch(strings.TrimSpace(s)); match != nil { + if name, err := strconv.Unquote(match[4]); err == nil { + return name + } + } + return s + } + + sort.SliceStable(lines, func(i, j int) bool { + return getFile(lines[i]) < getFile(lines[j]) + }) + + // Store hash as golden but ignore when comparing. + if !golden { + for i, s := range lines { + match := regex.FindStringSubmatch(strings.TrimSpace(s)) + if match != nil && match[2] != "-" { + lines[i] = match[1] + "-" + match[3] + match[4] + } + } + } + + text = joinLines(lines) + if hasHeader && golden { + text = bisync.ListingHeader + " test\n" + text + } + return text +} + +// joinLines joins text lines dropping empty lines at the beginning and at the end +func joinLines(lines []string) string { + text := strings.Join(lines, eol) + text = strings.TrimLeft(text, eol) + text = strings.TrimRight(text, eol) + if text != "" { + text += eol + } + return text +} + +// newReplacer can create two kinds of string replacers. +// If mangle is false, it will substitute macros in test scenario. +// If true then mangle paths in test log to match with golden log. +func (b *bisyncTest) newReplacer(mangle bool) *strings.Replacer { + if !mangle { + rep := []string{ + "{datadir/}", b.dataDir + slash, + "{testdir/}", b.testDir + slash, + "{workdir/}", b.workDir + slash, + "{path1/}", b.path1, + "{path2/}", b.path2, + "{session}", b.sessionName, + "{/}", slash, + } + return strings.NewReplacer(rep...) + } + + rep := []string{ + b.dataDir + slash, "{datadir/}", + b.testDir + slash, "{testdir/}", + b.workDir + slash, "{workdir/}", + b.path1, "{path1/}", + b.path2, "{path2/}", + b.sessionName, "{session}", + } + if fixSlash { + prep := []string{} + for i := 0; i < len(rep); i += 2 { + // A hack for backslashes doubled by the go format "%q". + doubled := strings.ReplaceAll(rep[i], "\\", "\\\\") + if rep[i] != doubled { + prep = append(prep, doubled, rep[i+1]) + } + } + // Put longer patterns first to ensure correct translation. + rep = append(prep, rep...) + } + return strings.NewReplacer(rep...) +} + +// toGolden makes a result file name golden. +// It replaces each canonical path separately instead of using the +// session name to allow for subdirs in the extended-char-paths case. +func (b *bisyncTest) toGolden(name string) string { + name = strings.ReplaceAll(name, b.canonPath1, goldenCanonBase) + name = strings.ReplaceAll(name, b.canonPath2, goldenCanonBase) + name = strings.TrimSuffix(name, ".sav") + return name +} + +func (b *bisyncTest) mkdir(dir string) { + require.NoError(b.t, os.MkdirAll(dir, os.ModePerm)) +} + +func (b *bisyncTest) ensureDir(parent, dir string, optional bool) string { + path := filepath.Join(parent, dir) + if !optional { + info, err := os.Stat(path) + require.NoError(b.t, err, "%s must exist", path) + require.True(b.t, info.IsDir(), "%s must be a directory", path) + } + return path +} + +func (b *bisyncTest) listDir(dir string) (names []string) { + files, err := ioutil.ReadDir(dir) + require.NoError(b.t, err) + for _, file := range files { + names = append(names, filepath.Base(file.Name())) + } + // Sort files to ensure comparability. + sort.Strings(names) + return +} + +// fileType detects test artifact type. +// Notes: +// - "filtersfile.txt" will NOT be recognized as a filters file +// - only "test.log" will be recognized as a test log file +func fileType(fileName string) string { + if fileName == logFileName { + return "log" + } + switch filepath.Ext(fileName) { + case ".lst", ".lst-new", ".lst-err", ".lst-dry", ".lst-dry-new": + return "listing" + case ".que": + return "queue" + case ".lck": + return "lock" + case ".flt": + return "filters" + } + if strings.HasSuffix(fileName, ".flt.md5") { + return "filters" + } + return "other" +} + +// logPrintf prints a message to stdout and to the test log +func (b *bisyncTest) logPrintf(text string, args ...interface{}) { + line := fmt.Sprintf(text, args...) + log.Print(line) + if b.logFile != nil { + _, err := fmt.Fprintln(b.logFile, line) + require.NoError(b.t, err, "writing log file") + } +} diff --git a/cmd/bisync/cmd.go b/cmd/bisync/cmd.go new file mode 100644 index 000000000..fbcb27d11 --- /dev/null +++ b/cmd/bisync/cmd.go @@ -0,0 +1,225 @@ +// Package bisync implements bisync +// Copyright (c) 2017-2020 Chris Nelson +package bisync + +import ( + "context" + "crypto/md5" + "encoding/hex" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + "time" + + "github.com/rclone/rclone/cmd" + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/config" + "github.com/rclone/rclone/fs/config/flags" + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/hash" + + "github.com/pkg/errors" + "github.com/spf13/cobra" +) + +// Options keep bisync options +type Options struct { + Resync bool + CheckAccess bool + CheckFilename string + CheckSync CheckSyncMode + RemoveEmptyDirs bool + MaxDelete int // percentage from 0 to 100 + Force bool + FiltersFile string + Workdir string + DryRun bool + NoCleanup bool + SaveQueues bool // save extra debugging files (test only flag) +} + +// Default values +const ( + DefaultMaxDelete int = 50 + DefaultCheckFilename string = "RCLONE_TEST" +) + +// DefaultWorkdir is default working directory +var DefaultWorkdir = filepath.Join(config.GetCacheDir(), "bisync") + +// CheckSyncMode controls when to compare final listings +type CheckSyncMode int + +// CheckSync modes +const ( + CheckSyncTrue CheckSyncMode = iota // Compare final listings (default) + CheckSyncFalse // Disable comparison of final listings + CheckSyncOnly // Only compare listings from the last run, do not sync +) + +func (x CheckSyncMode) String() string { + switch x { + case CheckSyncTrue: + return "true" + case CheckSyncFalse: + return "false" + case CheckSyncOnly: + return "only" + } + return "unknown" +} + +// Set a CheckSync mode from a string +func (x *CheckSyncMode) Set(s string) error { + switch strings.ToLower(s) { + case "true": + *x = CheckSyncTrue + case "false": + *x = CheckSyncFalse + case "only": + *x = CheckSyncOnly + default: + return errors.Errorf("unknown check-sync mode for bisync: %q", s) + } + return nil +} + +// Type of the CheckSync value +func (x *CheckSyncMode) Type() string { + return "string" +} + +// Opt keeps command line options +var Opt Options + +func init() { + cmd.Root.AddCommand(commandDefinition) + cmdFlags := commandDefinition.Flags() + flags.BoolVarP(cmdFlags, &Opt.Resync, "resync", "1", Opt.Resync, "Performs the resync run. Path1 files may overwrite Path2 versions. Consider using --verbose or --dry-run first.") + flags.BoolVarP(cmdFlags, &Opt.CheckAccess, "check-access", "", Opt.CheckAccess, makeHelp("Ensure expected {CHECKFILE} files are found on both Path1 and Path2 filesystems, else abort.")) + flags.StringVarP(cmdFlags, &Opt.CheckFilename, "check-filename", "", Opt.CheckFilename, makeHelp("Filename for --check-access (default: {CHECKFILE})")) + flags.BoolVarP(cmdFlags, &Opt.Force, "force", "", Opt.Force, "Bypass --max-delete safety check and run the sync. Consider using with --verbose") + flags.FVarP(cmdFlags, &Opt.CheckSync, "check-sync", "", "Controls comparison of final listings: true|false|only (default: true)") + flags.BoolVarP(cmdFlags, &Opt.RemoveEmptyDirs, "remove-empty-dirs", "", Opt.RemoveEmptyDirs, "Remove empty directories at the final cleanup step.") + flags.StringVarP(cmdFlags, &Opt.FiltersFile, "filters-file", "", Opt.FiltersFile, "Read filtering patterns from a file") + flags.StringVarP(cmdFlags, &Opt.Workdir, "workdir", "", Opt.Workdir, makeHelp("Use custom working dir - useful for testing. (default: {WORKDIR})")) + flags.BoolVarP(cmdFlags, &tzLocal, "localtime", "", tzLocal, "Use local time in listings (default: UTC)") + flags.BoolVarP(cmdFlags, &Opt.NoCleanup, "no-cleanup", "", Opt.NoCleanup, "Retain working files (useful for troubleshooting and testing).") +} + +// bisync command definition +var commandDefinition = &cobra.Command{ + Use: "bisync remote1:path1 remote2:path2", + Short: shortHelp, + Long: longHelp, + RunE: func(command *cobra.Command, args []string) error { + cmd.CheckArgs(2, 2, command, args) + fs1, file1, fs2, file2 := cmd.NewFsSrcDstFiles(args) + if file1 != "" || file2 != "" { + return errors.New("paths must be existing directories") + } + + ctx := context.Background() + opt := Opt + opt.applyContext(ctx) + + if tzLocal { + TZ = time.Local + } + + commonHashes := fs1.Hashes().Overlap(fs2.Hashes()) + isDropbox1 := strings.HasPrefix(fs1.String(), "Dropbox") + isDropbox2 := strings.HasPrefix(fs2.String(), "Dropbox") + if commonHashes == hash.Set(0) && (isDropbox1 || isDropbox2) { + ci := fs.GetConfig(ctx) + if !ci.DryRun && !ci.RefreshTimes { + fs.Debugf(nil, "Using flag --refresh-times is recommended") + } + } + + fs.Logf(nil, "bisync is EXPERIMENTAL. Don't use in production!") + cmd.Run(false, true, command, func() error { + err := Bisync(ctx, fs1, fs2, &opt) + if err == ErrBisyncAborted { + os.Exit(2) + } + return err + }) + return nil + }, +} + +func (opt *Options) applyContext(ctx context.Context) { + maxDelete := DefaultMaxDelete + ci := fs.GetConfig(ctx) + if ci.MaxDelete >= 0 { + maxDelete = int(ci.MaxDelete) + } + if maxDelete < 0 { + maxDelete = 0 + } + if maxDelete > 100 { + maxDelete = 100 + } + opt.MaxDelete = maxDelete + // reset MaxDelete for fs/operations, bisync handles this parameter specially + ci.MaxDelete = -1 + opt.DryRun = ci.DryRun +} + +func (opt *Options) setDryRun(ctx context.Context) context.Context { + ctxNew, ci := fs.AddConfig(ctx) + ci.DryRun = opt.DryRun + return ctxNew +} + +func (opt *Options) applyFilters(ctx context.Context) (context.Context, error) { + filtersFile := opt.FiltersFile + if filtersFile == "" { + return ctx, nil + } + + f, err := os.Open(filtersFile) + if err != nil { + return ctx, errors.Errorf("specified filters file does not exist: %s", filtersFile) + } + + fs.Infof(nil, "Using filters file %s", filtersFile) + hasher := md5.New() + if _, err := io.Copy(hasher, f); err != nil { + _ = f.Close() + return ctx, err + } + gotHash := hex.EncodeToString(hasher.Sum(nil)) + _ = f.Close() + + hashFile := filtersFile + ".md5" + wantHash, err := ioutil.ReadFile(hashFile) + if err != nil && !opt.Resync { + return ctx, errors.Errorf("filters file md5 hash not found (must run --resync): %s", filtersFile) + } + + if gotHash != string(wantHash) && !opt.Resync { + return ctx, errors.Errorf("filters file has changed (must run --resync): %s", filtersFile) + } + + if opt.Resync { + fs.Infof(nil, "Storing filters file hash to %s", hashFile) + if err := ioutil.WriteFile(hashFile, []byte(gotHash), bilib.PermSecure); err != nil { + return ctx, err + } + } + + // Prepend our filter file first in the list + filterOpt := filter.GetConfig(ctx).Opt + filterOpt.FilterFrom = append([]string{filtersFile}, filterOpt.FilterFrom...) + newFilter, err := filter.NewFilter(&filterOpt) + if err != nil { + return ctx, errors.Wrapf(err, "invalid filters file: %s", filtersFile) + } + + return filter.ReplaceConfig(ctx, newFilter), nil +} diff --git a/cmd/bisync/deltas.go b/cmd/bisync/deltas.go new file mode 100644 index 000000000..0e09f15f2 --- /dev/null +++ b/cmd/bisync/deltas.go @@ -0,0 +1,310 @@ +// Package bisync implements bisync +// Copyright (c) 2017-2020 Chris Nelson +package bisync + +import ( + "context" + "path/filepath" + "sort" + + "github.com/pkg/errors" + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/operations" +) + +// delta +type delta uint8 + +const ( + deltaZero delta = 0 + deltaNew delta = 1 << iota + deltaNewer + deltaOlder + deltaSize + deltaHash + deltaDeleted +) + +const ( + deltaModified delta = deltaNewer | deltaOlder | deltaSize | deltaHash | deltaDeleted + deltaOther delta = deltaNew | deltaNewer | deltaOlder +) + +func (d delta) is(cond delta) bool { + return d&cond != 0 +} + +// deltaSet +type deltaSet struct { + deltas map[string]delta + opt *Options + fs fs.Fs // base filesystem + msg string // filesystem name for logging + oldCount int // original number of files (for "excess deletes" check) + deleted int // number of deleted files (for "excess deletes" check) + foundSame bool // true if found at least one unchanged file + checkFiles bilib.Names +} + +func (ds *deltaSet) empty() bool { + return len(ds.deltas) == 0 +} + +func (ds *deltaSet) sort() (sorted []string) { + if ds.empty() { + return + } + sorted = make([]string, 0, len(ds.deltas)) + for file := range ds.deltas { + sorted = append(sorted, file) + } + sort.Strings(sorted) + return +} + +func (ds *deltaSet) printStats() { + if ds.empty() { + return + } + nAll := len(ds.deltas) + nNew := 0 + nNewer := 0 + nOlder := 0 + nDeleted := 0 + for _, d := range ds.deltas { + if d.is(deltaNew) { + nNew++ + } + if d.is(deltaNewer) { + nNewer++ + } + if d.is(deltaOlder) { + nOlder++ + } + if d.is(deltaDeleted) { + nDeleted++ + } + } + fs.Infof(nil, "%s: %4d changes: %4d new, %4d newer, %4d older, %4d deleted", + ds.msg, nAll, nNew, nNewer, nOlder, nDeleted) +} + +// findDeltas +func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing, newListing, msg string) (ds *deltaSet, err error) { + var old, now *fileList + + old, err = b.loadListing(oldListing) + if err != nil { + fs.Errorf(nil, "Failed loading prior %s listing: %s", msg, oldListing) + b.abort = true + return + } + if err = b.checkListing(old, oldListing, "prior "+msg); err != nil { + return + } + + now, err = b.makeListing(fctx, f, newListing) + if err == nil { + err = b.checkListing(now, newListing, "current "+msg) + } + if err != nil { + return + } + + ds = &deltaSet{ + deltas: map[string]delta{}, + fs: f, + msg: msg, + oldCount: len(old.list), + opt: b.opt, + checkFiles: bilib.Names{}, + } + + for _, file := range old.list { + d := deltaZero + if !now.has(file) { + b.indent(msg, file, "File was deleted") + ds.deleted++ + d |= deltaDeleted + } else { + if old.getTime(file) != now.getTime(file) { + if old.beforeOther(now, file) { + b.indent(msg, file, "File is newer") + d |= deltaNewer + } else { // Current version is older than prior sync. + b.indent(msg, file, "File is OLDER") + d |= deltaOlder + } + } + // TODO Compare sizes and hashes + } + + if d.is(deltaModified) { + ds.deltas[file] = d + } else { + // Once we've found at least one unchanged file, + // we know that not everything has changed, + // as with a DST time change + ds.foundSame = true + } + } + + for _, file := range now.list { + if !old.has(file) { + b.indent(msg, file, "File is new") + ds.deltas[file] = deltaNew + } + } + + if b.opt.CheckAccess { + // checkFiles is a small structure compared with the `now`, so we + // return it alone and let the full delta map be garbage collected. + for _, file := range now.list { + if filepath.Base(file) == b.opt.CheckFilename { + ds.checkFiles.Add(file) + } + } + } + + return +} + +// applyDeltas +func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (changes1, changes2 bool, err error) { + path1 := bilib.FsPath(b.fs1) + path2 := bilib.FsPath(b.fs2) + + copy1to2 := bilib.Names{} + copy2to1 := bilib.Names{} + delete1 := bilib.Names{} + delete2 := bilib.Names{} + handled := bilib.Names{} + + ctxMove := b.opt.setDryRun(ctx) + + for _, file := range ds1.sort() { + p1 := path1 + file + p2 := path2 + file + d1 := ds1.deltas[file] + + if d1.is(deltaOther) { + d2, in2 := ds2.deltas[file] + if !in2 { + b.indent("Path1", p2, "Queue copy to Path2") + copy1to2.Add(file) + } else if d2.is(deltaDeleted) { + b.indent("Path1", p2, "Queue copy to Path2") + copy1to2.Add(file) + handled.Add(file) + } else if d2.is(deltaOther) { + b.indent("!WARNING", file, "New or changed in both paths") + b.indent("!Path1", p1+"..path1", "Renaming Path1 copy") + if err = operations.MoveFile(ctxMove, b.fs1, b.fs1, file+"..path1", file); err != nil { + err = errors.Wrapf(err, "path1 rename failed for %s", p1) + b.critical = true + return + } + b.indent("!Path1", p2+"..path1", "Queue copy to Path2") + copy1to2.Add(file + "..path1") + + b.indent("!Path2", p2+"..path2", "Renaming Path2 copy") + if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, file+"..path2", file); err != nil { + err = errors.Wrapf(err, "path2 rename failed for %s", file) + return + } + b.indent("!Path2", p1+"..path2", "Queue copy to Path1") + copy2to1.Add(file + "..path2") + handled.Add(file) + } + } else { + // Path1 deleted + d2, in2 := ds2.deltas[file] + if !in2 { + b.indent("Path2", p2, "Queue delete") + delete2.Add(file) + } else if d2.is(deltaOther) { + b.indent("Path2", p1, "Queue copy to Path1") + copy2to1.Add(file) + handled.Add(file) + } else if d2.is(deltaDeleted) { + handled.Add(file) + } + } + } + + for _, file := range ds2.sort() { + p1 := path1 + file + d2 := ds2.deltas[file] + + if handled.Has(file) { + continue + } + if d2.is(deltaOther) { + b.indent("Path2", p1, "Queue copy to Path1") + copy2to1.Add(file) + } else { + // Deleted + b.indent("Path1", p1, "Queue delete") + delete1.Add(file) + } + } + + // Do the batch operation + if copy2to1.NotEmpty() { + changes1 = true + b.indent("Path2", "Path1", "Do queued copies to") + err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1") + if err != nil { + return + } + } + + if copy1to2.NotEmpty() { + changes2 = true + b.indent("Path1", "Path2", "Do queued copies to") + err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2") + if err != nil { + return + } + } + + if delete1.NotEmpty() { + changes1 = true + b.indent("", "Path1", "Do queued deletes on") + err = b.fastDelete(ctx, b.fs1, delete1, "delete1") + if err != nil { + return + } + } + + if delete2.NotEmpty() { + changes2 = true + b.indent("", "Path2", "Do queued deletes on") + err = b.fastDelete(ctx, b.fs2, delete2, "delete2") + if err != nil { + return + } + } + + return +} + +// exccessDeletes checks whether number of deletes is within allowed range +func (ds *deltaSet) excessDeletes() bool { + maxDelete := ds.opt.MaxDelete + maxRatio := float64(maxDelete) / 100.0 + curRatio := 0.0 + if ds.deleted > 0 && ds.oldCount > 0 { + curRatio = float64(ds.deleted) / float64(ds.oldCount) + } + + if curRatio <= maxRatio { + return false + } + + fs.Errorf("Safety abort", + "too many deletes (>%d%%, %d of %d) on %s %s. Run with --force if desired.", + maxDelete, ds.deleted, ds.oldCount, ds.msg, quotePath(bilib.FsPath(ds.fs))) + return true +} diff --git a/cmd/bisync/help.go b/cmd/bisync/help.go new file mode 100644 index 000000000..91c610314 --- /dev/null +++ b/cmd/bisync/help.go @@ -0,0 +1,26 @@ +package bisync + +import ( + "strconv" + "strings" +) + +func makeHelp(help string) string { + replacer := strings.NewReplacer( + "|", "`", + "{MAXDELETE}", strconv.Itoa(DefaultMaxDelete), + "{CHECKFILE}", DefaultCheckFilename, + "{WORKDIR}", DefaultWorkdir, + ) + return replacer.Replace(help) +} + +var shortHelp = `Perform bidirectonal synchronization between two paths.` + +var rcHelp = makeHelp(` +TODO +`) + +var longHelp = shortHelp + makeHelp(` +TODO +`) diff --git a/cmd/bisync/listing.go b/cmd/bisync/listing.go new file mode 100644 index 000000000..3b08bc482 --- /dev/null +++ b/cmd/bisync/listing.go @@ -0,0 +1,305 @@ +// Package bisync implements bisync +// Copyright (c) 2017-2020 Chris Nelson +package bisync + +import ( + "bufio" + "context" + "fmt" + "io" + "os" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "github.com/pkg/errors" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/fs/walk" +) + +// ListingHeader defines first line of a listing +const ListingHeader = "# bisync listing v1 from" + +// lineRegex and lineFormat define listing line format +// +// flags <- size -> <- hash -> id <------------ modtime -----------> "<----- remote" +// - 3009805 md5:xxxxxx - 2006-01-02T15:04:05.000000000-0700 "12 - Wait.mp3" +// +// flags: "-" for a file and "d" for a directory (reserved) +// hash: "type:value" or "-" (example: "md5:378840336ab14afa9c6b8d887e68a340") +// id: "-" (reserved) +const lineFormat = "%s %8d %s %s %s %q\n" + +var lineRegex = regexp.MustCompile(`^(\S) +(\d+) (\S+) (\S+) (\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d{9}[+-]\d{4}) (".+")$`) + +// timeFormat defines time format used in listings +const timeFormat = "2006-01-02T15:04:05.000000000-0700" + +// TZ defines time zone used in listings +var TZ = time.UTC +var tzLocal = false + +// fileInfo describes a file +type fileInfo struct { + size int64 + time time.Time + hash string + id string +} + +// fileList represents a listing +type fileList struct { + list []string + info map[string]*fileInfo + hash hash.Type +} + +func newFileList() *fileList { + return &fileList{ + info: map[string]*fileInfo{}, + list: []string{}, + } +} + +func (ls *fileList) empty() bool { + return len(ls.list) == 0 +} + +func (ls *fileList) has(file string) bool { + _, found := ls.info[file] + return found +} + +func (ls *fileList) get(file string) *fileInfo { + return ls.info[file] +} + +func (ls *fileList) put(file string, size int64, time time.Time, hash, id string) { + fi := ls.get(file) + if fi != nil { + fi.size = size + fi.time = time + } else { + fi = &fileInfo{ + size: size, + time: time, + hash: hash, + id: id, + } + ls.info[file] = fi + ls.list = append(ls.list, file) + } +} + +func (ls *fileList) getTime(file string) time.Time { + fi := ls.get(file) + if fi == nil { + return time.Time{} + } + return fi.time +} + +func (ls *fileList) beforeOther(other *fileList, file string) bool { + thisTime := ls.getTime(file) + thatTime := other.getTime(file) + if thisTime.IsZero() || thatTime.IsZero() { + return false + } + return thisTime.Before(thatTime) +} + +func (ls *fileList) afterTime(file string, time time.Time) bool { + fi := ls.get(file) + if fi == nil { + return false + } + return fi.time.After(time) +} + +// save will save listing to a file. +func (ls *fileList) save(ctx context.Context, listing string) error { + file, err := os.Create(listing) + if err != nil { + return err + } + + hashName := "" + if ls.hash != hash.None { + hashName = ls.hash.String() + } + + _, err = fmt.Fprintf(file, "%s %s\n", ListingHeader, time.Now().In(TZ).Format(timeFormat)) + if err != nil { + _ = file.Close() + _ = os.Remove(listing) + return err + } + + for _, remote := range ls.list { + fi := ls.get(remote) + + time := fi.time.In(TZ).Format(timeFormat) + + hash := "-" + if hashName != "" && fi.hash != "" { + hash = hashName + ":" + fi.hash + } + + id := fi.id + if id == "" { + id = "-" + } + + flags := "-" + _, err = fmt.Fprintf(file, lineFormat, flags, fi.size, hash, id, time, remote) + if err != nil { + _ = file.Close() + _ = os.Remove(listing) + return err + } + } + + return file.Close() +} + +// loadListing will load listing from a file. +// The key is the path to the file relative to the Path1/Path2 base. +// File size of -1, as for Google Docs, prints a warning and won't be loaded. +func (b *bisyncRun) loadListing(listing string) (*fileList, error) { + file, err := os.Open(listing) + if err != nil { + return nil, err + } + defer func() { + _ = file.Close() + }() + + reader := bufio.NewReader(file) + ls := newFileList() + lastHashName := "" + + for { + line, err := reader.ReadString('\n') + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + line = strings.TrimSuffix(line, "\n") + if line == "" || line[0] == '#' { + continue + } + + match := lineRegex.FindStringSubmatch(line) + if match == nil { + fs.Logf(listing, "Ignoring incorrect line: %q", line) + continue + } + flags, sizeStr, hashStr := match[1], match[2], match[3] + id, timeStr, nameStr := match[4], match[5], match[6] + + sizeVal, sizeErr := strconv.ParseInt(sizeStr, 10, 64) + timeVal, timeErr := time.ParseInLocation(timeFormat, timeStr, TZ) + nameVal, nameErr := strconv.Unquote(nameStr) + + hashName, hashVal, hashErr := parseHash(hashStr) + if hashErr == nil && hashName != "" { + if lastHashName == "" { + lastHashName = hashName + hashErr = ls.hash.Set(hashName) + } else if hashName != lastHashName { + fs.Logf(listing, "Inconsistent hash type in line: %q", line) + continue + } + } + + if flags != "-" || id != "-" || sizeErr != nil || timeErr != nil || hashErr != nil || nameErr != nil { + fs.Logf(listing, "Ignoring incorrect line: %q", line) + continue + } + + if ls.has(nameVal) { + fs.Logf(listing, "Duplicate line (keeping latest): %q", line) + if ls.afterTime(nameVal, timeVal) { + continue + } + } + + ls.put(nameVal, sizeVal, timeVal.In(TZ), hashVal, id) + } + + return ls, nil +} + +func parseHash(str string) (string, string, error) { + if str == "-" { + return "", "", nil + } + if pos := strings.Index(str, ":"); pos > 0 { + name, val := str[:pos], str[pos+1:] + if name != "" && val != "" { + return name, val, nil + } + } + return "", "", errors.Errorf("invalid hash %q", str) +} + +// makeListing will produce listing from directory tree and write it to a file +func (b *bisyncRun) makeListing(ctx context.Context, f fs.Fs, listing string) (ls *fileList, err error) { + ci := fs.GetConfig(ctx) + depth := ci.MaxDepth + hashType := hash.None + if !ci.IgnoreChecksum { + // Currently bisync just honors --ignore-checksum + // TODO add full support for checksums and related flags + hashType = f.Hashes().GetOne() + } + ls = newFileList() + ls.hash = hashType + var lock sync.Mutex + err = walk.ListR(ctx, f, "", false, depth, walk.ListObjects, func(entries fs.DirEntries) error { + var firstErr error + entries.ForObject(func(o fs.Object) { + //tr := accounting.Stats(ctx).NewCheckingTransfer(o) // TODO + var ( + hashVal string + hashErr error + ) + if hashType != hash.None { + hashVal, hashErr = o.Hash(ctx, hashType) + if firstErr == nil { + firstErr = hashErr + } + } + time := o.ModTime(ctx).In(TZ) + id := "" // TODO + lock.Lock() + ls.put(o.Remote(), o.Size(), time, hashVal, id) + lock.Unlock() + //tr.Done(ctx, nil) // TODO + }) + return firstErr + }) + if err == nil { + err = ls.save(ctx, listing) + } + if err != nil { + b.abort = true + } + return +} + +// checkListing verifies that listing is not empty (unless resynching) +func (b *bisyncRun) checkListing(ls *fileList, listing, msg string) error { + if b.opt.Resync || !ls.empty() { + return nil + } + fs.Errorf(nil, "Empty %s listing. Cannot sync to an empty directory: %s", msg, listing) + b.critical = true + return errors.Errorf("empty %s listing: %s", msg, listing) +} diff --git a/cmd/bisync/log.go b/cmd/bisync/log.go new file mode 100644 index 000000000..6c4a7dfc2 --- /dev/null +++ b/cmd/bisync/log.go @@ -0,0 +1,49 @@ +package bisync + +import ( + "fmt" + "runtime" + "strconv" + "strings" + + "github.com/rclone/rclone/fs" +) + +func (b *bisyncRun) indentf(tag, file, format string, args ...interface{}) { + b.indent(tag, file, fmt.Sprintf(format, args...)) +} + +func (b *bisyncRun) indent(tag, file, msg string) { + logf := fs.Infof + switch { + case tag == "ERROR": + tag = "" + logf = fs.Errorf + case tag == "INFO": + tag = "" + case strings.HasPrefix(tag, "!"): + tag = tag[1:] + logf = fs.Logf + } + logf(nil, "- %-9s%-35s - %s", tag, msg, escapePath(file, false)) +} + +// escapePath will escape control characters in path. +// It won't quote just due to backslashes on Windows. +func escapePath(path string, forceQuotes bool) string { + test := path + if runtime.GOOS == "windows" { + test = strings.ReplaceAll(path, "\\", "/") + } + if strconv.Quote(test) != `"`+test+`"` { + return strconv.Quote(path) + } + if forceQuotes { + return `"` + path + `"` + } + return path +} + +func quotePath(path string) string { + return escapePath(path, true) +} diff --git a/cmd/bisync/operations.go b/cmd/bisync/operations.go new file mode 100644 index 000000000..e7d853cd1 --- /dev/null +++ b/cmd/bisync/operations.go @@ -0,0 +1,455 @@ +// Package bisync implements bisync +// Copyright (c) 2017-2020 Chris Nelson +// Contributions to original python version: Hildo G. Jr., e2t, kalemas, silenceleaf +package bisync + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "strconv" + gosync "sync" + + "github.com/pkg/errors" + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/fs/sync" + "github.com/rclone/rclone/lib/atexit" +) + +// ErrBisyncAborted signals that bisync is aborted and forces exit code 2 +var ErrBisyncAborted = errors.New("bisync aborted") + +// bisyncRun keeps bisync runtime state +type bisyncRun struct { + fs1 fs.Fs + fs2 fs.Fs + abort bool + critical bool + basePath string + workDir string + opt *Options +} + +// Bisync handles lock file, performs bisync run and checks exit status +func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) { + opt := *optArg // ensure that input is never changed + b := &bisyncRun{ + fs1: fs1, + fs2: fs2, + opt: &opt, + } + + if opt.CheckFilename == "" { + opt.CheckFilename = DefaultCheckFilename + } + if opt.Workdir == "" { + opt.Workdir = DefaultWorkdir + } + + if !opt.DryRun && !opt.Force { + if fs1.Precision() == fs.ModTimeNotSupported { + return errors.New("modification time support is missing on path1") + } + if fs2.Precision() == fs.ModTimeNotSupported { + return errors.New("modification time support is missing on path2") + } + } + + if b.workDir, err = filepath.Abs(opt.Workdir); err != nil { + return errors.Wrap(err, "failed to make workdir absolute") + } + if err = os.MkdirAll(b.workDir, os.ModePerm); err != nil { + return errors.Wrap(err, "failed to create workdir") + } + + // Produce a unique name for the sync operation + b.basePath = filepath.Join(b.workDir, bilib.SessionName(b.fs1, b.fs2)) + listing1 := b.basePath + ".path1.lst" + listing2 := b.basePath + ".path2.lst" + + // Handle lock file + lockFile := "" + if !opt.DryRun { + lockFile = b.basePath + ".lck" + if bilib.FileExists(lockFile) { + return errors.Errorf("prior lock file found: %s", lockFile) + } + + pidStr := []byte(strconv.Itoa(os.Getpid())) + if err = ioutil.WriteFile(lockFile, pidStr, bilib.PermSecure); err != nil { + return errors.Wrapf(err, "cannot create lock file: %s", lockFile) + } + fs.Debugf(nil, "Lock file created: %s", lockFile) + } + + // Handle SIGINT + var finaliseOnce gosync.Once + markFailed := func(file string) { + failFile := file + "-err" + if bilib.FileExists(file) { + _ = os.Remove(failFile) + _ = os.Rename(file, failFile) + } + } + finalise := func() { + finaliseOnce.Do(func() { + if atexit.Signalled() { + fs.Logf(nil, "Bisync interrupted. Must run --resync to recover.") + markFailed(listing1) + markFailed(listing2) + _ = os.Remove(lockFile) + } + }) + } + fnHandle := atexit.Register(finalise) + defer atexit.Unregister(fnHandle) + + // run bisync + err = b.runLocked(ctx, listing1, listing2) + + if lockFile != "" { + errUnlock := os.Remove(lockFile) + if errUnlock == nil { + fs.Debugf(nil, "Lock file removed: %s", lockFile) + } else if err == nil { + err = errUnlock + } else { + fs.Errorf(nil, "cannot remove lockfile %s: %v", lockFile, errUnlock) + } + } + + if b.critical { + if bilib.FileExists(listing1) { + _ = os.Rename(listing1, listing1+"-err") + } + if bilib.FileExists(listing2) { + _ = os.Rename(listing2, listing2+"-err") + } + fs.Errorf(nil, "Bisync critical error: %v", err) + fs.Errorf(nil, "Bisync aborted. Must run --resync to recover.") + return ErrBisyncAborted + } + if b.abort { + fs.Logf(nil, "Bisync aborted. Please try again.") + } + if err == nil { + fs.Infof(nil, "Bisync successful") + } + return err +} + +// runLocked performs a full bisync run +func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (err error) { + opt := b.opt + path1 := bilib.FsPath(b.fs1) + path2 := bilib.FsPath(b.fs2) + + if opt.CheckSync == CheckSyncOnly { + fs.Infof(nil, "Validating listings for Path1 %s vs Path2 %s", quotePath(path1), quotePath(path2)) + if err = b.checkSync(listing1, listing2); err != nil { + b.critical = true + } + return err + } + + fs.Infof(nil, "Synching Path1 %s with Path2 %s", quotePath(path1), quotePath(path2)) + + if opt.DryRun { + // In --dry-run mode, preserve original listings and save updates to the .lst-dry files + origListing1 := listing1 + origListing2 := listing2 + listing1 += "-dry" + listing2 += "-dry" + if err := bilib.CopyFileIfExists(origListing1, listing1); err != nil { + return err + } + if err := bilib.CopyFileIfExists(origListing2, listing2); err != nil { + return err + } + } + + // Create second context with filters + var fctx context.Context + if fctx, err = b.opt.applyFilters(octx); err != nil { + b.critical = true + return + } + + // Generate Path1 and Path2 listings and copy any unique Path2 files to Path1 + if opt.Resync { + return b.resync(octx, fctx, listing1, listing2) + } + + // Check for existence of prior Path1 and Path2 listings + if !bilib.FileExists(listing1) || !bilib.FileExists(listing2) { + // On prior critical error abort, the prior listings are renamed to .lst-err to lock out further runs + b.critical = true + return errors.New("cannot find prior Path1 or Path2 listings, likely due to critical error on prior run") + } + + // Check for Path1 deltas relative to the prior sync + fs.Infof(nil, "Path1 checking for diffs") + newListing1 := listing1 + "-new" + ds1, err := b.findDeltas(fctx, b.fs1, listing1, newListing1, "Path1") + if err != nil { + return err + } + ds1.printStats() + + // Check for Path2 deltas relative to the prior sync + fs.Infof(nil, "Path2 checking for diffs") + newListing2 := listing2 + "-new" + ds2, err := b.findDeltas(fctx, b.fs2, listing2, newListing2, "Path2") + if err != nil { + return err + } + ds2.printStats() + + // Check access health on the Path1 and Path2 filesystems + if opt.CheckAccess { + fs.Infof(nil, "Checking access health") + err = b.checkAccess(ds1.checkFiles, ds2.checkFiles) + if err != nil { + b.critical = true + return + } + } + + // Check for too many deleted files - possible error condition. + // Don't want to start deleting on the other side! + if !opt.Force { + if ds1.excessDeletes() || ds2.excessDeletes() { + b.abort = true + return errors.New("too many deletes") + } + } + + // Check for all files changed such as all dates changed due to DST change + // to avoid errant copy everything. + if !opt.Force { + msg := "Safety abort: all files were changed on %s %s. Run with --force if desired." + if !ds1.foundSame { + fs.Errorf(nil, msg, ds1.msg, quotePath(path1)) + } + if !ds2.foundSame { + fs.Errorf(nil, msg, ds2.msg, quotePath(path2)) + } + if !ds1.foundSame || !ds2.foundSame { + b.abort = true + return errors.New("all files were changed") + } + } + + // Determine and apply changes to Path1 and Path2 + noChanges := ds1.empty() && ds2.empty() + changes1 := false + changes2 := false + if noChanges { + fs.Infof(nil, "No changes found") + } else { + fs.Infof(nil, "Applying changes") + changes1, changes2, err = b.applyDeltas(octx, ds1, ds2) + if err != nil { + b.critical = true + return err + } + } + + // Clean up and check listings integrity + fs.Infof(nil, "Updating listings") + var err1, err2 error + if noChanges { + err1 = bilib.CopyFileIfExists(newListing1, listing1) + err2 = bilib.CopyFileIfExists(newListing2, listing2) + } else { + if changes1 { + _, err1 = b.makeListing(fctx, b.fs1, listing1) + } else { + err1 = bilib.CopyFileIfExists(newListing1, listing1) + } + if changes2 { + _, err2 = b.makeListing(fctx, b.fs2, listing2) + } else { + err2 = bilib.CopyFileIfExists(newListing2, listing2) + } + } + err = err1 + if err == nil { + err = err2 + } + if err != nil { + b.critical = true + return err + } + + if !opt.NoCleanup { + _ = os.Remove(newListing1) + _ = os.Remove(newListing2) + } + + if opt.CheckSync == CheckSyncTrue && !opt.DryRun { + fs.Infof(nil, "Validating listings for Path1 %s vs Path2 %s", quotePath(path1), quotePath(path2)) + if err := b.checkSync(listing1, listing2); err != nil { + b.critical = true + return err + } + } + + // Optional rmdirs for empty directories + if opt.RemoveEmptyDirs { + fs.Infof(nil, "Removing empty directories") + err1 := operations.Rmdirs(fctx, b.fs1, "", true) + err2 := operations.Rmdirs(fctx, b.fs2, "", true) + err := err1 + if err == nil { + err = err2 + } + if err != nil { + b.critical = true + return err + } + } + + return nil +} + +// resync implements the --resync mode. +// It will generate path1 and path2 listings +// and copy any unique path2 files to path1. +func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string) error { + fs.Infof(nil, "Copying unique Path2 files to Path1") + + newListing1 := listing1 + "-new" + filesNow1, err := b.makeListing(fctx, b.fs1, newListing1) + if err == nil { + err = b.checkListing(filesNow1, newListing1, "current Path1") + } + if err != nil { + return err + } + + newListing2 := listing2 + "-new" + filesNow2, err := b.makeListing(fctx, b.fs2, newListing2) + if err == nil { + err = b.checkListing(filesNow2, newListing2, "current Path2") + } + if err != nil { + return err + } + + copy2to1 := []string{} + for _, file := range filesNow2.list { + if !filesNow1.has(file) { + b.indent("Path2", file, "Resync will copy to Path1") + copy2to1 = append(copy2to1, file) + } + } + + if len(copy2to1) > 0 { + b.indent("Path2", "Path1", "Resync is doing queued copies to") + // octx does not have extra filters! + err = b.fastCopy(octx, b.fs2, b.fs1, bilib.ToNames(copy2to1), "resync-copy2to1") + if err != nil { + b.critical = true + return err + } + } + + fs.Infof(nil, "Resynching Path1 to Path2") + ctxRun := b.opt.setDryRun(fctx) + // fctx has our extra filters added! + ctxSync, filterSync := filter.AddConfig(ctxRun) + if filterSync.Opt.MinSize == -1 { + // prevent overwriting Google Doc files (their size is -1) + filterSync.Opt.MinSize = 0 + } + if err = sync.Sync(ctxSync, b.fs2, b.fs1, false); err != nil { + b.critical = true + return err + } + + fs.Infof(nil, "Resync updating listings") + if _, err = b.makeListing(fctx, b.fs1, listing1); err != nil { + b.critical = true + return err + } + + if _, err = b.makeListing(fctx, b.fs2, listing2); err != nil { + b.critical = true + return err + } + + if !b.opt.NoCleanup { + _ = os.Remove(newListing1) + _ = os.Remove(newListing2) + } + return nil +} + +// checkSync validates listings +func (b *bisyncRun) checkSync(listing1, listing2 string) error { + files1, err := b.loadListing(listing1) + if err != nil { + return errors.Wrap(err, "cannot read prior listing of Path1") + } + files2, err := b.loadListing(listing2) + if err != nil { + return errors.Wrap(err, "cannot read prior listing of Path2") + } + + ok := true + for _, file := range files1.list { + if !files2.has(file) { + b.indent("ERROR", file, "Path1 file not found in Path2") + ok = false + } + } + for _, file := range files2.list { + if !files1.has(file) { + b.indent("ERROR", file, "Path2 file not found in Path1") + ok = false + } + } + if !ok { + return errors.New("path1 and path2 are out of sync, run --resync to recover") + } + return nil +} + +// checkAccess validates access health +func (b *bisyncRun) checkAccess(checkFiles1, checkFiles2 bilib.Names) error { + ok := true + opt := b.opt + prefix := "Access test failed:" + + numChecks1 := len(checkFiles1) + numChecks2 := len(checkFiles2) + if numChecks1 == 0 || numChecks1 != numChecks2 { + fs.Errorf(nil, "%s Path1 count %d, Path2 count %d - %s", prefix, numChecks1, numChecks2, opt.CheckFilename) + ok = false + } + + for file := range checkFiles1 { + if !checkFiles2.Has(file) { + b.indentf("ERROR", file, "%s Path1 file not found in Path2", prefix) + ok = false + } + } + + for file := range checkFiles2 { + if !checkFiles1.Has(file) { + b.indentf("ERROR", file, "%s Path2 file not found in Path1", prefix) + ok = false + } + } + + if !ok { + return errors.New("check file check failed") + } + fs.Infof(nil, "Found %d matching %q files on both paths", numChecks1, opt.CheckFilename) + return nil +} diff --git a/cmd/bisync/queue.go b/cmd/bisync/queue.go new file mode 100644 index 000000000..08bb20862 --- /dev/null +++ b/cmd/bisync/queue.go @@ -0,0 +1,62 @@ +package bisync + +import ( + "context" + "fmt" + + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/fs/sync" +) + +func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string) error { + if err := b.saveQueue(files, queueName); err != nil { + return err + } + + ctxCopy, filterCopy := filter.AddConfig(b.opt.setDryRun(ctx)) + for _, file := range files.ToList() { + if err := filterCopy.AddFile(file); err != nil { + return err + } + } + + return sync.CopyDir(ctxCopy, fdst, fsrc, false) +} + +func (b *bisyncRun) fastDelete(ctx context.Context, f fs.Fs, files bilib.Names, queueName string) error { + if err := b.saveQueue(files, queueName); err != nil { + return err + } + + transfers := fs.GetConfig(ctx).Transfers + ctxRun := b.opt.setDryRun(ctx) + + objChan := make(fs.ObjectsChan, transfers) + errChan := make(chan error, 1) + go func() { + errChan <- operations.DeleteFiles(ctxRun, objChan) + }() + err := operations.ListFn(ctxRun, f, func(obj fs.Object) { + remote := obj.Remote() + if files.Has(remote) { + objChan <- obj + } + }) + close(objChan) + opErr := <-errChan + if err == nil { + err = opErr + } + return err +} + +func (b *bisyncRun) saveQueue(files bilib.Names, jobName string) error { + if !b.opt.SaveQueues { + return nil + } + queueFile := fmt.Sprintf("%s.%s.que", b.basePath, jobName) + return files.Save(queueFile) +} diff --git a/cmd/bisync/rc.go b/cmd/bisync/rc.go new file mode 100644 index 000000000..fe74af052 --- /dev/null +++ b/cmd/bisync/rc.go @@ -0,0 +1,91 @@ +package bisync + +import ( + "context" + "log" + + "github.com/pkg/errors" + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/rc" +) + +func init() { + rc.Add(rc.Call{ + Path: "sync/bisync", + AuthRequired: true, + Fn: rcBisync, + Title: shortHelp, + Help: rcHelp, + }) +} + +func rcBisync(ctx context.Context, in rc.Params) (out rc.Params, err error) { + opt := &Options{} + octx, ci := fs.AddConfig(ctx) + + if dryRun, err := in.GetBool("dryRun"); err == nil { + ci.DryRun = dryRun + } else if rc.NotErrParamNotFound(err) { + return nil, err + } + + if maxDelete, err := in.GetInt64("maxDelete"); err == nil { + if maxDelete < 0 || maxDelete > 100 { + return nil, rc.NewErrParamInvalid(errors.New("maxDelete must be a percentage between 0 and 100")) + } + ci.MaxDelete = maxDelete + } else if rc.NotErrParamNotFound(err) { + return nil, err + } + + if opt.Resync, err = in.GetBool("resync"); rc.NotErrParamNotFound(err) { + return + } + if opt.CheckAccess, err = in.GetBool("checkAccess"); rc.NotErrParamNotFound(err) { + return + } + if opt.Force, err = in.GetBool("force"); rc.NotErrParamNotFound(err) { + return + } + if opt.RemoveEmptyDirs, err = in.GetBool("removeEmptyDirs"); rc.NotErrParamNotFound(err) { + return + } + if opt.NoCleanup, err = in.GetBool("noCleanup"); rc.NotErrParamNotFound(err) { + return + } + + if opt.CheckFilename, err = in.GetString("checkFilename"); rc.NotErrParamNotFound(err) { + return + } + if opt.FiltersFile, err = in.GetString("filtersFile"); rc.NotErrParamNotFound(err) { + return + } + if opt.Workdir, err = in.GetString("workdir"); rc.NotErrParamNotFound(err) { + return + } + + checkSync, err := in.GetString("checkSync") + if rc.NotErrParamNotFound(err) { + return nil, err + } + if err := opt.CheckSync.Set(checkSync); err != nil { + return nil, err + } + + fs1, err := rc.GetFsNamed(octx, in, "path1") + if err != nil { + return nil, err + } + + fs2, err := rc.GetFsNamed(octx, in, "path2") + if err != nil { + return nil, err + } + + output := bilib.CaptureOutput(func() { + err = Bisync(octx, fs1, fs2, opt) + }) + _, _ = log.Writer().Write(output) + return rc.Params{"output": string(output)}, err +} diff --git a/go.mod b/go.mod index 19a2e23dc..b8c0219cf 100644 --- a/go.mod +++ b/go.mod @@ -52,6 +52,7 @@ require ( github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pkg/errors v0.9.1 github.com/pkg/sftp v1.13.2 + github.com/pmezard/go-difflib v1.0.0 github.com/prometheus/client_golang v1.11.0 github.com/prometheus/common v0.30.0 // indirect github.com/prometheus/procfs v0.7.3 // indirect