From e64be7652a15a602316d30476eaa8aaf8edd2a77 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sat, 28 Oct 2023 17:12:03 +0100 Subject: [PATCH] operations: fix invalid UTF-8 when truncating file names when not using --inplace Before this change, when not using --inplace, rclone could generate invalid file names when truncating file names to fit within the character size limits. This fixes it by taking care to truncate on UTF-8 character boundaries. See: https://forum.rclone.org/t/ssh-fx-failure-when-copying-file-with-nonstandard-characters-to-sftp-remote-with-ntfs-drive/42560/ --- fs/operations/copy.go | 23 ++++++++++- fs/operations/copy_test.go | 80 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/fs/operations/copy.go b/fs/operations/copy.go index b396b2d8e..a2a2ebebd 100644 --- a/fs/operations/copy.go +++ b/fs/operations/copy.go @@ -12,6 +12,7 @@ import ( "path" "strings" "time" + "unicode/utf8" "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/accounting" @@ -65,6 +66,26 @@ func (c *copy) removeFailedPartialCopy(ctx context.Context, f fs.Fs, remote stri c.removeFailedCopy(ctx, o) } +// TruncateString s to n bytes. +// +// If s is valid UTF-8 then this may truncate to fewer than n bytes to +// make the returned string also valid UTF-8. +func TruncateString(s string, n int) string { + truncated := s[:n] + if !utf8.ValidString(s) { + // If input string wasn't valid UTF-8 then just return the truncation + return truncated + } + for len(truncated) > 0 { + if utf8.ValidString(truncated) { + return truncated + } + // Remove 1 byte until valid + truncated = truncated[:len(truncated)-1] + } + return truncated +} + // Check to see if we should be using a partial name and return the name for the copy and the inplace flag func (c *copy) checkPartial() (remoteForCopy string, inplace bool, err error) { remoteForCopy = c.remote @@ -79,7 +100,7 @@ func (c *copy) checkPartial() (remoteForCopy string, inplace bool, err error) { suffix := "." + random.String(8) + c.ci.PartialSuffix base := path.Base(c.remoteForCopy) if len(base) > 100 { - remoteForCopy = c.remoteForCopy[:len(c.remoteForCopy)-len(suffix)] + suffix + remoteForCopy = TruncateString(c.remoteForCopy, len(c.remoteForCopy)-len(suffix)) + suffix } else { remoteForCopy += suffix } diff --git a/fs/operations/copy_test.go b/fs/operations/copy_test.go index a2d7e5356..f309b0060 100644 --- a/fs/operations/copy_test.go +++ b/fs/operations/copy_test.go @@ -4,6 +4,7 @@ import ( "context" "crypto/rand" "errors" + "fmt" "strings" "testing" @@ -15,6 +16,85 @@ import ( "github.com/stretchr/testify/require" ) +func TestTruncateString(t *testing.T) { + for _, test := range []struct { + in string + n int + want string + }{ + { + in: "", + n: 0, + want: "", + }, { + in: "Hello World", + n: 5, + want: "Hello", + }, { + in: "ááááá", + n: 5, + want: "áá", + }, { + in: "ááááá\xFF\xFF", + n: 5, + want: "áá\xc3", + }, { + in: "世世世世世", + n: 7, + want: "世世", + }, { + in: "🙂🙂🙂🙂🙂", + n: 16, + want: "🙂🙂🙂🙂", + }, { + in: "🙂🙂🙂🙂🙂", + n: 15, + want: "🙂🙂🙂", + }, { + in: "🙂🙂🙂🙂🙂", + n: 14, + want: "🙂🙂🙂", + }, { + in: "🙂🙂🙂🙂🙂", + n: 13, + want: "🙂🙂🙂", + }, { + in: "🙂🙂🙂🙂🙂", + n: 12, + want: "🙂🙂🙂", + }, { + in: "🙂🙂🙂🙂🙂", + n: 11, + want: "🙂🙂", + }, { + in: "𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ", + n: 100, + want: "𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢ", + }, { + in: "a𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ", + n: 100, + want: "a𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢ", + }, { + in: "aa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ", + n: 100, + want: "aa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱ", + }, { + in: "aaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ", + n: 100, + want: "aaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱ", + }, { + in: "aaaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽⁱˢⁱᵒⁿᵃʳʸ", + n: 100, + want: "aaaa𝓝𝓸𝓫𝓸𝓭𝔂 𝓲𝓼 𝓱𝓸𝓶𝓮 ᴬ ⱽⁱˢⁱᵗ ᶠʳᵒᵐ ᵗʰᵉ ⱽ", + }, + } { + got := operations.TruncateString(test.in, test.n) + assert.Equal(t, test.want, got, fmt.Sprintf("In %q", test.in)) + assert.LessOrEqual(t, len(got), test.n) + assert.GreaterOrEqual(t, len(got), test.n-3) + } +} + func TestCopyFile(t *testing.T) { ctx := context.Background() r := fstest.NewRun(t)