From 8fd66daab671bb9830a6d906abf65d64cc94b990 Mon Sep 17 00:00:00 2001 From: rinsuki <428rinsuki+git@gmail.com> Date: Sun, 24 Sep 2023 18:07:47 +0900 Subject: [PATCH] drive: add support of SHA-1 and SHA-256 checksum --- backend/drive/drive.go | 26 ++++++++--- docs/content/drive.md | 8 +++- docs/content/overview.md | 94 ++++++++++++++++++++-------------------- 3 files changed, 72 insertions(+), 56 deletions(-) diff --git a/backend/drive/drive.go b/backend/drive/drive.go index c33a615ed..98eb9a0d5 100644 --- a/backend/drive/drive.go +++ b/backend/drive/drive.go @@ -71,7 +71,7 @@ const ( // 1<<18 is the minimum size supported by the Google uploader, and there is no maximum. minChunkSize = fs.SizeSuffix(googleapi.MinUploadChunkSize) defaultChunkSize = 8 * fs.Mebi - partialFields = "id,name,size,md5Checksum,trashed,explicitlyTrashed,modifiedTime,createdTime,mimeType,parents,webViewLink,shortcutDetails,exportLinks,resourceKey" + partialFields = "id,name,size,md5Checksum,sha1Checksum,sha256Checksum,trashed,explicitlyTrashed,modifiedTime,createdTime,mimeType,parents,webViewLink,shortcutDetails,exportLinks,resourceKey" listRGrouping = 50 // number of IDs to search at once when using ListR listRInputBuffer = 1000 // size of input buffer when using ListR defaultXDGIcon = "text-html" @@ -323,13 +323,13 @@ rather than shortcuts themselves when doing server side copies.`, }, { Name: "skip_checksum_gphotos", Default: false, - Help: `Skip MD5 checksum on Google photos and videos only. + Help: `Skip checksums on Google photos and videos only. Use this if you get checksum errors when transferring Google photos or videos. Setting this flag will cause Google photos and videos to return a -blank MD5 checksum. +blank checksums. Google photos are identified by being in the "photos" space. @@ -751,6 +751,8 @@ type Object struct { baseObject url string // Download URL of this object md5sum string // md5sum of the object + sha1sum string // sha1sum of the object + sha256sum string // sha256sum of the object v2Download bool // generate v2 download link ondemand } @@ -1414,6 +1416,8 @@ func (f *Fs) newRegularObject(remote string, info *drive.File) fs.Object { for _, space := range info.Spaces { if space == "photos" { info.Md5Checksum = "" + info.Sha1Checksum = "" + info.Sha256Checksum = "" break } } @@ -1422,6 +1426,8 @@ func (f *Fs) newRegularObject(remote string, info *drive.File) fs.Object { baseObject: f.newBaseObject(remote, info), url: fmt.Sprintf("%sfiles/%s?alt=media", f.svc.BasePath, actualID(info.Id)), md5sum: strings.ToLower(info.Md5Checksum), + sha1sum: strings.ToLower(info.Sha1Checksum), + sha256sum: strings.ToLower(info.Sha256Checksum), v2Download: f.opt.V2DownloadMinSize != -1 && info.Size >= int64(f.opt.V2DownloadMinSize), } if info.ResourceKey != "" { @@ -3007,7 +3013,7 @@ func (f *Fs) DirCacheFlush() { // Hashes returns the supported hash sets. func (f *Fs) Hashes() hash.Set { - return hash.Set(hash.MD5) + return hash.NewHashSet(hash.MD5, hash.SHA1, hash.SHA256) } func (f *Fs) changeChunkSize(chunkSizeString string) (err error) { @@ -3568,10 +3574,16 @@ func (o *baseObject) Remote() string { // Hash returns the Md5sum of an object returning a lowercase hex string func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) { - if t != hash.MD5 { - return "", hash.ErrUnsupported + if t == hash.MD5 { + return o.md5sum, nil } - return o.md5sum, nil + if t == hash.SHA1 { + return o.sha1sum, nil + } + if t == hash.SHA256 { + return o.sha256sum, nil + } + return "", hash.ErrUnsupported } func (o *baseObject) Hash(ctx context.Context, t hash.Type) (string, error) { if t != hash.MD5 { diff --git a/docs/content/drive.md b/docs/content/drive.md index 62ead868f..808968143 100644 --- a/docs/content/drive.md +++ b/docs/content/drive.md @@ -772,13 +772,13 @@ Properties: #### --drive-skip-checksum-gphotos -Skip MD5 checksum on Google photos and videos only. +Skip checksums on Google photos and videos only. Use this if you get checksum errors when transferring Google photos or videos. Setting this flag will cause Google photos and videos to return a -blank MD5 checksum. +blank checksums. Google photos are identified by being in the "photos" space. @@ -1526,6 +1526,10 @@ Waiting a moderate period of time between attempts (estimated to be approximately 1 hour) and/or not using --fast-list both seem to be effective in preventing the problem. +### Hashes + +We need to say that all files have MD5 hashes, but a small fraction of files uploaded may not have SHA1 or SHA256 hashes especially if they were uploaded before 2018. + ## Making your own client_id When you use rclone with Google drive in its default configuration you diff --git a/docs/content/overview.md b/docs/content/overview.md index 2bbe12f89..a0759e579 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -14,53 +14,53 @@ show through. Here is an overview of the major features of each cloud storage system. -| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | Metadata | -| ---------------------------- |:----------------:|:-------:|:----------------:|:---------------:|:---------:|:--------:| -| 1Fichier | Whirlpool | - | No | Yes | R | - | -| Akamai Netstorage | MD5, SHA256 | R/W | No | No | R | - | -| Amazon Drive | MD5 | - | Yes | No | R | - | -| Amazon S3 (or S3 compatible) | MD5 | R/W | No | No | R/W | RWU | -| Backblaze B2 | SHA1 | R/W | No | No | R/W | - | -| Box | SHA1 | R/W | Yes | No | - | - | -| Citrix ShareFile | MD5 | R/W | Yes | No | - | - | -| Dropbox | DBHASH ¹ | R | Yes | No | - | - | -| Enterprise File Fabric | - | R/W | Yes | No | R/W | - | -| FTP | - | R/W ¹⁰ | No | No | - | - | -| Google Cloud Storage | MD5 | R/W | No | No | R/W | - | -| Google Drive | MD5 | R/W | No | Yes | R/W | - | -| Google Photos | - | - | No | Yes | R | - | -| HDFS | - | R/W | No | No | - | - | -| HiDrive | HiDrive ¹² | R/W | No | No | - | - | -| HTTP | - | R | No | No | R | - | -| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | RWU | -| Jottacloud | MD5 | R/W | Yes | No | R | - | -| Koofr | MD5 | - | Yes | No | - | - | -| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | - | -| Mega | - | - | No | Yes | - | - | -| Memory | MD5 | R/W | No | No | - | - | -| Microsoft Azure Blob Storage | MD5 | R/W | No | No | R/W | - | -| Microsoft OneDrive | QuickXorHash ⁵ | R/W | Yes | No | R | - | -| OpenDrive | MD5 | R/W | Yes | Partial ⁸ | - | - | -| OpenStack Swift | MD5 | R/W | No | No | R/W | - | -| Oracle Object Storage | MD5 | R/W | No | No | R/W | - | -| pCloud | MD5, SHA1 ⁷ | R | No | No | W | - | -| PikPak | MD5 | R | No | No | R | - | -| premiumize.me | - | - | Yes | No | R | - | -| put.io | CRC-32 | R/W | No | Yes | R | - | -| Proton Drive | SHA1 | R/W | No | No | R | - | -| QingStor | MD5 | - ⁹ | No | No | R/W | - | -| Quatrix by Maytech | - | R/W | No | No | - | - | -| Seafile | - | - | No | No | - | - | -| SFTP | MD5, SHA1 ² | R/W | Depends | No | - | - | -| Sia | - | - | No | No | - | - | -| SMB | - | - | Yes | No | - | - | -| SugarSync | - | - | No | No | - | - | -| Storj | - | R | No | No | - | - | -| Uptobox | - | - | No | Yes | - | - | -| WebDAV | MD5, SHA1 ³ | R ⁴ | Depends | No | - | - | -| Yandex Disk | MD5 | R/W | No | No | R | - | -| Zoho WorkDrive | - | - | No | No | - | - | -| The local filesystem | All | R/W | Depends | No | - | RWU | +| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | Metadata | +| ---------------------------- |:-----------------:|:-------:|:----------------:|:---------------:|:---------:|:--------:| +| 1Fichier | Whirlpool | - | No | Yes | R | - | +| Akamai Netstorage | MD5, SHA256 | R/W | No | No | R | - | +| Amazon Drive | MD5 | - | Yes | No | R | - | +| Amazon S3 (or S3 compatible) | MD5 | R/W | No | No | R/W | RWU | +| Backblaze B2 | SHA1 | R/W | No | No | R/W | - | +| Box | SHA1 | R/W | Yes | No | - | - | +| Citrix ShareFile | MD5 | R/W | Yes | No | - | - | +| Dropbox | DBHASH ¹ | R | Yes | No | - | - | +| Enterprise File Fabric | - | R/W | Yes | No | R/W | - | +| FTP | - | R/W ¹⁰ | No | No | - | - | +| Google Cloud Storage | MD5 | R/W | No | No | R/W | - | +| Google Drive | MD5, SHA1, SHA256 | R/W | No | Yes | R/W | - | +| Google Photos | - | - | No | Yes | R | - | +| HDFS | - | R/W | No | No | - | - | +| HiDrive | HiDrive ¹² | R/W | No | No | - | - | +| HTTP | - | R | No | No | R | - | +| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | RWU | +| Jottacloud | MD5 | R/W | Yes | No | R | - | +| Koofr | MD5 | - | Yes | No | - | - | +| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | - | +| Mega | - | - | No | Yes | - | - | +| Memory | MD5 | R/W | No | No | - | - | +| Microsoft Azure Blob Storage | MD5 | R/W | No | No | R/W | - | +| Microsoft OneDrive | QuickXorHash ⁵ | R/W | Yes | No | R | - | +| OpenDrive | MD5 | R/W | Yes | Partial ⁸ | - | - | +| OpenStack Swift | MD5 | R/W | No | No | R/W | - | +| Oracle Object Storage | MD5 | R/W | No | No | R/W | - | +| pCloud | MD5, SHA1 ⁷ | R | No | No | W | - | +| PikPak | MD5 | R | No | No | R | - | +| premiumize.me | - | - | Yes | No | R | - | +| put.io | CRC-32 | R/W | No | Yes | R | - | +| Proton Drive | SHA1 | R/W | No | No | R | - | +| QingStor | MD5 | - ⁹ | No | No | R/W | - | +| Quatrix by Maytech | - | R/W | No | No | - | - | +| Seafile | - | - | No | No | - | - | +| SFTP | MD5, SHA1 ² | R/W | Depends | No | - | - | +| Sia | - | - | No | No | - | - | +| SMB | - | - | Yes | No | - | - | +| SugarSync | - | - | No | No | - | - | +| Storj | - | R | No | No | - | - | +| Uptobox | - | - | No | Yes | - | - | +| WebDAV | MD5, SHA1 ³ | R ⁴ | Depends | No | - | - | +| Yandex Disk | MD5 | R/W | No | No | R | - | +| Zoho WorkDrive | - | - | No | No | - | - | +| The local filesystem | All | R/W | Depends | No | - | RWU | ### Notes