From 9933d6c07135926d904ccc3aec10c21e356a9cdc Mon Sep 17 00:00:00 2001 From: nielash Date: Thu, 21 Sep 2023 12:35:40 -0400 Subject: [PATCH] check: respect --no-unicode-normalization and --ignore-case-sync for --checkfile Before this change, --no-unicode-normalization and --ignore-case-sync were respected for rclone check but not for rclone check --checkfile, causing them to give different results. This change adds support for --checkfile so that the behavior is consistent. --- fs/operations/check.go | 22 +++++++++++--- fs/operations/check_test.go | 59 +++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/fs/operations/check.go b/fs/operations/check.go index 06d9f14ec..239b3a72f 100644 --- a/fs/operations/check.go +++ b/fs/operations/check.go @@ -20,6 +20,7 @@ import ( "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/march" "github.com/rclone/rclone/lib/readers" + "golang.org/x/text/unicode/norm" ) // checkFn is the type of the checking function used in CheckFn() @@ -375,6 +376,19 @@ func CheckDownload(ctx context.Context, opt *CheckOpt) error { return CheckFn(ctx, &optCopy) } +// ApplyTransforms handles --no-unicode-normalization and --ignore-case-sync for CheckSum +// so that it matches behavior of Check (where it's handled by March) +func ApplyTransforms(ctx context.Context, s string) string { + ci := fs.GetConfig(ctx) + if !ci.NoUnicodeNormalization { + s = norm.NFC.String(s) + } + if ci.IgnoreCaseSync { + s = strings.ToLower(s) + } + return s +} + // CheckSum checks filesystem hashes against a SUM file func CheckSum(ctx context.Context, fsrc, fsum fs.Fs, sumFile string, hashType hash.Type, opt *CheckOpt, download bool) error { var options CheckOpt @@ -440,10 +454,10 @@ func CheckSum(ctx context.Context, fsrc, fsum fs.Fs, sumFile string, hashType ha // checkSum checks single object against golden hashes func (c *checkMarch) checkSum(ctx context.Context, obj fs.Object, download bool, hashes HashSums, hashType hash.Type) { - remote := obj.Remote() + normalizedRemote := ApplyTransforms(ctx, obj.Remote()) c.ioMu.Lock() - sumHash, sumFound := hashes[remote] - hashes[remote] = "" // mark sum as consumed + sumHash, sumFound := hashes[normalizedRemote] + hashes[normalizedRemote] = "" // mark sum as consumed c.ioMu.Unlock() if !sumFound && c.opt.OneWay { @@ -563,7 +577,7 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) { continue } - fields := re.FindStringSubmatch(line) + fields := re.FindStringSubmatch(ApplyTransforms(ctx, line)) if fields == nil { numWarn++ if numWarn <= maxWarn { diff --git a/fs/operations/check_test.go b/fs/operations/check_test.go index 592485ed2..2f4213ad4 100644 --- a/fs/operations/check_test.go +++ b/fs/operations/check_test.go @@ -20,6 +20,7 @@ import ( "github.com/rclone/rclone/lib/readers" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/text/unicode/norm" ) func testCheck(t *testing.T, checkFunction func(ctx context.Context, opt *operations.CheckOpt) error) { @@ -544,3 +545,61 @@ func TestCheckSum(t *testing.T) { func TestCheckSumDownload(t *testing.T) { testCheckSum(t, true) } + +func TestApplyTransforms(t *testing.T) { + var ( + hashType = hash.MD5 + content = "Hello, World!" + hash = "65a8e27d8879283831b664bd8b7f0ad4" + nfc = norm.NFC.String(norm.NFD.String("測試_Русский___ě_áñ")) + nfd = norm.NFD.String(nfc) + nfcx2 = nfc + nfc + nfdx2 = nfd + nfd + both = nfc + nfd + upper = "HELLO, WORLD!" + lower = "hello, world!" + upperlowermixed = "HeLlO, wOrLd!" + ) + + testScenario := func(checkfileName, remotefileName, scenario string) { + r := fstest.NewRunIndividual(t) + if !r.Flocal.Hashes().Contains(hashType) || !r.Fremote.Hashes().Contains(hashType) { + t.Skipf("Fs lacks %s, skipping", hashType) + } + ctx := context.Background() + ci := fs.GetConfig(ctx) + opt := operations.CheckOpt{} + + remotefile := r.WriteObject(ctx, remotefileName, content, t2) + checkfile := r.WriteFile("test.sum", hash+" "+checkfileName, t2) + r.CheckLocalItems(t, checkfile) + assert.False(t, checkfileName == remotefile.Path, "Values match but should not: %s %s", checkfileName, remotefile.Path) + + testname := scenario + " (without normalization)" + println(testname) + ci.NoUnicodeNormalization = true + ci.IgnoreCaseSync = false + accounting.GlobalStats().ResetCounters() + err := operations.CheckSum(ctx, r.Fremote, r.Flocal, "test.sum", hashType, &opt, false) + assert.Error(t, err, "no expected error for %s %v %v", testname, checkfileName, remotefileName) + + testname = scenario + " (with normalization)" + println(testname) + ci.NoUnicodeNormalization = false + ci.IgnoreCaseSync = true + accounting.GlobalStats().ResetCounters() + err = operations.CheckSum(ctx, r.Fremote, r.Flocal, "test.sum", hashType, &opt, false) + assert.NoError(t, err, "unexpected error for %s %v %v", testname, checkfileName, remotefileName) + } + + testScenario(upper, lower, "upper checkfile vs. lower remote") + testScenario(lower, upper, "lower checkfile vs. upper remote") + testScenario(lower, upperlowermixed, "lower checkfile vs. upperlowermixed remote") + testScenario(upperlowermixed, upper, "upperlowermixed checkfile vs. upper remote") + testScenario(nfd, nfc, "NFD checkfile vs. NFC remote") + testScenario(nfc, nfd, "NFC checkfile vs. NFD remote") + testScenario(nfdx2, both, "NFDx2 checkfile vs. both remote") + testScenario(nfcx2, both, "NFCx2 checkfile vs. both remote") + testScenario(both, nfdx2, "both checkfile vs. NFDx2 remote") + testScenario(both, nfcx2, "both checkfile vs. NFCx2 remote") +}