diff --git a/backend/mailru/mrhash/mrhash.go b/backend/mailru/mrhash/mrhash.go new file mode 100644 index 000000000..eb49c796f --- /dev/null +++ b/backend/mailru/mrhash/mrhash.go @@ -0,0 +1,134 @@ +// Package mrhash implements the mailru hash, which is a modified SHA1. +// If file size is less than or equal to the SHA1 block size (20 bytes), +// its hash is simply its data right-padded with zero bytes. +// Hash sum of a larger file is computed as a SHA1 sum of the file data +// bytes concatenated with a decimal representation of the data length. +package mrhash + +import ( + "crypto/sha1" + "encoding" + "encoding/hex" + "errors" + "hash" + "strconv" +) + +const ( + // BlockSize of the checksum in bytes. + BlockSize = sha1.BlockSize + // Size of the checksum in bytes. + Size = sha1.Size + startString = "mrCloud" + hashError = "hash function returned error" +) + +// Global errors +var ( + ErrorInvalidHash = errors.New("invalid hash") +) + +type digest struct { + total int // bytes written into hash so far + sha hash.Hash // underlying SHA1 + small []byte // small content +} + +// New returns a new hash.Hash computing the Mailru checksum. +func New() hash.Hash { + d := &digest{} + d.Reset() + return d +} + +// Write writes len(p) bytes from p to the underlying data stream. It returns +// the number of bytes written from p (0 <= n <= len(p)) and any error +// encountered that caused the write to stop early. Write must return a non-nil +// error if it returns n < len(p). Write must not modify the slice data, even +// temporarily. +// +// Implementations must not retain p. +func (d *digest) Write(p []byte) (n int, err error) { + n, err = d.sha.Write(p) + if err != nil { + panic(hashError) + } + d.total += n + if d.total <= Size { + d.small = append(d.small, p...) + } + return n, nil +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (d *digest) Sum(b []byte) []byte { + // If content is small, return it padded to Size + if d.total <= Size { + padded := make([]byte, Size) + copy(padded, d.small) + return append(b, padded...) + } + endString := strconv.Itoa(d.total) + copy, err := cloneSHA1(d.sha) + if err == nil { + _, err = copy.Write([]byte(endString)) + } + if err != nil { + panic(hashError) + } + return copy.Sum(b) +} + +// cloneSHA1 clones state of SHA1 hash +func cloneSHA1(orig hash.Hash) (clone hash.Hash, err error) { + state, err := orig.(encoding.BinaryMarshaler).MarshalBinary() + if err != nil { + return nil, err + } + clone = sha1.New() + err = clone.(encoding.BinaryUnmarshaler).UnmarshalBinary(state) + return +} + +// Reset resets the Hash to its initial state. +func (d *digest) Reset() { + d.sha = sha1.New() + _, _ = d.sha.Write([]byte(startString)) + d.total = 0 +} + +// Size returns the number of bytes Sum will return. +func (d *digest) Size() int { + return Size +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (d *digest) BlockSize() int { + return BlockSize +} + +// Sum returns the Mailru checksum of the data. +func Sum(data []byte) []byte { + var d digest + d.Reset() + _, _ = d.Write(data) + return d.Sum(nil) +} + +// DecodeString converts a string to the Mailru hash +func DecodeString(s string) ([]byte, error) { + b, err := hex.DecodeString(s) + if err != nil || len(b) != Size { + return nil, ErrorInvalidHash + } + return b, nil +} + +// must implement this interface +var ( + _ hash.Hash = (*digest)(nil) +) diff --git a/backend/mailru/mrhash/mrhash_test.go b/backend/mailru/mrhash/mrhash_test.go new file mode 100644 index 000000000..3c5815652 --- /dev/null +++ b/backend/mailru/mrhash/mrhash_test.go @@ -0,0 +1,81 @@ +package mrhash_test + +import ( + "encoding/hex" + "fmt" + "testing" + + "github.com/rclone/rclone/backend/mailru/mrhash" + "github.com/stretchr/testify/assert" +) + +func testChunk(t *testing.T, chunk int) { + data := make([]byte, chunk) + for i := 0; i < chunk; i++ { + data[i] = 'A' + } + for _, test := range []struct { + n int + want string + }{ + {0, "0000000000000000000000000000000000000000"}, + {1, "4100000000000000000000000000000000000000"}, + {2, "4141000000000000000000000000000000000000"}, + {19, "4141414141414141414141414141414141414100"}, + {20, "4141414141414141414141414141414141414141"}, + {21, "eb1d05e78a18691a5aa196a6c2b60cd40b5faafb"}, + {22, "037e6d960601118a0639afbeff30fe716c66ed2d"}, + {4096, "45a16aa192502b010280fb5b44274c601a91fd9f"}, + {4194303, "fa019d5bd26498cf6abe35e0d61801bf19bf704b"}, + {4194304, "5ed0e07aa6ea5c1beb9402b4d807258f27d40773"}, + {4194305, "67bd0b9247db92e0e7d7e29a0947a50fedcb5452"}, + {8388607, "41a8e2eb044c2e242971b5445d7be2a13fc0dd84"}, + {8388608, "267a970917c624c11fe624276ec60233a66dc2c0"}, + {8388609, "37b60b308d553d2732aefb62b3ea88f74acfa13f"}, + } { + d := mrhash.New() + var toWrite int + for toWrite = test.n; toWrite >= chunk; toWrite -= chunk { + n, err := d.Write(data) + assert.Nil(t, err) + assert.Equal(t, chunk, n) + } + n, err := d.Write(data[:toWrite]) + assert.Nil(t, err) + assert.Equal(t, toWrite, n) + got1 := hex.EncodeToString(d.Sum(nil)) + assert.Equal(t, test.want, got1, fmt.Sprintf("when testing length %d", n)) + got2 := hex.EncodeToString(d.Sum(nil)) + assert.Equal(t, test.want, got2, fmt.Sprintf("when testing length %d (2nd sum)", n)) + } +} + +func TestHashChunk16M(t *testing.T) { testChunk(t, 16*1024*1024) } +func TestHashChunk8M(t *testing.T) { testChunk(t, 8*1024*1024) } +func TestHashChunk4M(t *testing.T) { testChunk(t, 4*1024*1024) } +func TestHashChunk2M(t *testing.T) { testChunk(t, 2*1024*1024) } +func TestHashChunk1M(t *testing.T) { testChunk(t, 1*1024*1024) } +func TestHashChunk64k(t *testing.T) { testChunk(t, 64*1024) } +func TestHashChunk32k(t *testing.T) { testChunk(t, 32*1024) } +func TestHashChunk2048(t *testing.T) { testChunk(t, 2048) } +func TestHashChunk2047(t *testing.T) { testChunk(t, 2047) } + +func TestSumCalledTwice(t *testing.T) { + d := mrhash.New() + assert.NotPanics(t, func() { d.Sum(nil) }) + d.Reset() + assert.NotPanics(t, func() { d.Sum(nil) }) + assert.NotPanics(t, func() { d.Sum(nil) }) + _, _ = d.Write([]byte{1}) + assert.NotPanics(t, func() { d.Sum(nil) }) +} + +func TestSize(t *testing.T) { + d := mrhash.New() + assert.Equal(t, 20, d.Size()) +} + +func TestBlockSize(t *testing.T) { + d := mrhash.New() + assert.Equal(t, 64, d.BlockSize()) +} diff --git a/fs/hash/hash.go b/fs/hash/hash.go index 54e638a3e..39c8abfeb 100644 --- a/fs/hash/hash.go +++ b/fs/hash/hash.go @@ -13,6 +13,7 @@ import ( "github.com/jzelinskie/whirlpool" "github.com/pkg/errors" "github.com/rclone/rclone/backend/dropbox/dbhash" + "github.com/rclone/rclone/backend/mailru/mrhash" "github.com/rclone/rclone/backend/onedrive/quickxorhash" ) @@ -44,13 +45,16 @@ const ( // CRC32 indicates CRC-32 support CRC32 + // Mailru indicates Mailru special hash + Mailru + // None indicates no hashes are supported None Type = 0 ) // Supported returns a set of all the supported hashes by // HashStream and MultiHasher. -var Supported = NewHashSet(MD5, SHA1, Dropbox, QuickXorHash, Whirlpool, CRC32) +var Supported = NewHashSet(MD5, SHA1, Dropbox, QuickXorHash, Whirlpool, CRC32, Mailru) // Width returns the width in characters for any HashType var Width = map[Type]int{ @@ -60,6 +64,7 @@ var Width = map[Type]int{ QuickXorHash: 40, Whirlpool: 128, CRC32: 8, + Mailru: 40, } // Stream will calculate hashes of all supported hash types. @@ -103,6 +108,8 @@ func (h Type) String() string { return "Whirlpool" case CRC32: return "CRC-32" + case Mailru: + return "MailruHash" default: err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h)) panic(err) @@ -126,6 +133,8 @@ func (h *Type) Set(s string) error { *h = Whirlpool case "CRC-32": *h = CRC32 + case "MailruHash": + *h = Mailru default: return errors.Errorf("Unknown hash type %q", s) } @@ -160,6 +169,8 @@ func fromTypes(set Set) (map[Type]hash.Hash, error) { hashers[t] = whirlpool.New() case CRC32: hashers[t] = crc32.NewIEEE() + case Mailru: + hashers[t] = mrhash.New() default: err := fmt.Sprintf("internal error: Unsupported hash type %v", t) panic(err) diff --git a/fs/hash/hash_test.go b/fs/hash/hash_test.go index cefcaffd5..8b149eed9 100644 --- a/fs/hash/hash_test.go +++ b/fs/hash/hash_test.go @@ -75,6 +75,7 @@ var hashTestSet = []hashTest{ hash.QuickXorHash: "0110c000085000031c0001095ec00218d0000700", hash.Whirlpool: "eddf52133d4566d763f716e853d6e4efbabd29e2c2e63f56747b1596172851d34c2df9944beb6640dbdbe3d9b4eb61180720a79e3d15baff31c91e43d63869a4", hash.CRC32: "a6041d7e", + hash.Mailru: "0102030405060708090a0b0c0d0e000000000000", }, }, // Empty data set @@ -87,6 +88,7 @@ var hashTestSet = []hashTest{ hash.QuickXorHash: "0000000000000000000000000000000000000000", hash.Whirlpool: "19fa61d75522a4669b44e39c1d2e1726c530232130d407f89afee0964997f7a73e83be698b288febcf88e3e03c4f0757ea8964e59b63d93708b138cc42a66eb3", hash.CRC32: "00000000", + hash.Mailru: "0000000000000000000000000000000000000000", }, }, }