diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1aedf92ef..b56b560e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -244,7 +244,7 @@ Getting going * onedrive is a good one to start from if you have a directory based remote * b2 is a good one to start from if you have a bucket based remote * Add your remote to the imports in `fs/all/all.go` - * If web based remotes are easiest to maintain if they use rclone's rest module, but if there is a really good go SDK then use that instead. + * HTTP based remotes are easiest to maintain if they use rclone's rest module, but if there is a really good go SDK then use that instead. Unit tests diff --git a/README.md b/README.md index bfc4d574e..1544d0db1 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Rclone is a command line program to sync files and directories to and from * Google Drive * HTTP * Hubic + * Microsoft Azure Blob Storage * Microsoft OneDrive * Openstack Swift / Rackspace cloud files / Memset Memstore * QingStor diff --git a/azure/azure.go b/azure/azure.go deleted file mode 100644 index 3cac36ebe..000000000 --- a/azure/azure.go +++ /dev/null @@ -1,468 +0,0 @@ -package azure - -import ( - "github.com/Azure/azure-sdk-for-go/storage" - "github.com/ncw/rclone/fs" - "time" - "fmt" - "encoding/base64" - "encoding/hex" - "io" - "os" - "path" -) - -const ( - listChunkSize = 5000 // number of items to read at once -) - -// Fs represents a local filesystem rooted at root -type Fs struct { - name string // the name of the remote - account string // name of the storage Account - container string // name of the Storage Account Container - root string - features *fs.Features // optional features - bc *storage.BlobStorageClient - cc *storage.Container -} - -type Object struct { - fs *Fs - remote string - blob *storage.Blob -} - -// Register with Fs -func init() { - fsi := &fs.RegInfo{ - Name: "azure", - Description: "Azure Blob Storage", - NewFs: NewFs, - Options: []fs.Option{{ - Name: "azure_account", - Help: "Azure Storage Account Name", - }, { - Name: "azure_account_key", - Help: "Azure Storage Account Key", - }, { - Name: "azure_container", - Help: "Azure Storage Account Blob Container", - }}, - } - fs.Register(fsi) -} - -//func azureParseUri(uri string) (account, container, root string, err error) { -// //https://hl37iyhcj646wshrd0.blob.core.windows.net/shared -// parts := matcher.FindStringSubmatch(uri) -// if parts == nil { -// err = errors.Errorf("couldn't parse account / continer out of azure path %q", uri) -// } else { -// account, container, root = parts[1], parts[2], parts[3] -// root = strings.Trim(root, "/") -// } -// return -//} - -func azureConnection(name, account, accountKey, container string) (*storage.BlobStorageClient, *storage.Container, error) { - client, err := storage.NewClient(account, accountKey, storage.DefaultBaseURL, "2016-05-31", true) - if err != nil { - return nil, nil, err - } - tmp_bc := client.GetBlobService() - bc := &tmp_bc - tmp_cc := bc.GetContainerReference(container) - cc := &tmp_cc - return bc, cc, nil -} - -func sl(path string) string { - if path[len(path)-1:] != "/" { - return path + "/" - } else { - return path - } -} - -func unsl(path string) string { - if path[len(path)-1:] == "/" { - return path[:len(path)-1] - } else { - return path - } -} - - -func NewFs(name, root string) (fs.Fs, error) { - account := fs.ConfigFileGet(name, "azure_account", os.Getenv("AZURE_ACCOUNT")) - accountKey := fs.ConfigFileGet(name, "azure_account_key", os.Getenv("AZURE_ACCOUNT_KEY")) - container := fs.ConfigFileGet(name, "azure_container", os.Getenv("AZURE_CONTAINER")) - bc, cc, err := azureConnection(name, account, accountKey, container) - if err != nil { - return nil, err - } - f := &Fs{ - name: name, - account: account, - container: container, - root: root, - bc: bc, - cc: cc, - } - if f.root != "" { - f.root = sl(f.root) - _, err := bc.GetBlobProperties(container, root) - if err == nil { - // exists ! - f.root = path.Dir(root) - if f.root == "." { - f.root = "" - } else { - f.root += "/" - } - return f, fs.ErrorIsFile - } - } - f.features = (&fs.Features{}).Fill(f) - return f, nil -} - -// Name of the remote (as passed into NewFs) -func (f *Fs) Name() string { - return f.name -} - -// Root of the remote (as passed into NewFs) -func (f *Fs) Root() string { - return f.root -} - -// String converts this Fs to a string -func (f *Fs) String() string { - return fmt.Sprintf("Azure Blob Account %s container %s, directory %s", f.account, f.container, f.root) -} - -// Precision of the remote -func (f *Fs) Precision() time.Duration { - return time.Millisecond -} - -func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) { - srcObj, ok := src.(*Object) - if !ok { - fs.Debugf(src, "Can't copy - not same remote type") - return nil, fs.ErrorCantCopy - } - err := f.bc.CopyBlob(f.container, f.root + remote, f.bc.GetBlobURL(f.container, srcObj.blob.Name)) - if err != nil { - return nil, err - } - return f.NewObject(remote) -} - -// Hashes returns the supported hash sets. -func (f *Fs) Hashes() fs.HashSet { - return fs.HashSet(fs.HashMD5) -} - -// Features returns the optional features of this Fs -func (f *Fs) Features() *fs.Features { - return f.features -} - -type visitFunc func(remote string, blob *storage.Blob, isDirectory bool) error - -func listInnerRecurse(f *Fs, out *fs.ListOpts, dir string, level int, visitor visitFunc) error { - dirWithRoot := f.root - if dir != "" { - dirWithRoot += dir + "/" - } - - maxresults := uint(listChunkSize) - delimiter := "/" - if level == fs.MaxLevel { - return fs.ErrorLevelNotSupported - } - - marker := "" - for { - resp, err := f.cc.ListBlobs(storage.ListBlobsParameters{ - Prefix: dirWithRoot, - Delimiter: delimiter, - Marker: marker, - Include: "metadata", - MaxResults: maxresults, - Timeout: 100, - }) - if err != nil { - return err - } - rootLength := len(f.root) - for _, blob := range resp.Blobs { - err := visitor(blob.Name[rootLength:], &blob, false) - if err != nil { - return err - } - } - for _, blobPrefix := range resp.BlobPrefixes { - strippedDir := unsl(blobPrefix[rootLength:]) - err := visitor(strippedDir, nil, true) - if err != nil { - return err - } - if err == nil && level < (*out).Level() { - err := listInnerRecurse(f, out, strippedDir, level+1, visitor) - if err != nil { - return err - } - } - } - if resp.NextMarker != "" { - marker = resp.NextMarker - } else { - break - } - } - return nil -} - -// List lists files and directories to out -func (f *Fs) List(out fs.ListOpts, dir string) { - defer out.Finished() - - // List the objects and directories - listInnerRecurse(f, &out, dir, 1, func(remote string, blob *storage.Blob, isDirectory bool) error { - if isDirectory { - dir := &fs.Dir{ - Name: remote, - Bytes: int64(0), - Count: 0, - } - if out.AddDir(dir) { - return fs.ErrorListAborted - } - } else { - newBlob := blob - o, err := f.newObjectWithInfo(remote, newBlob) - if err != nil { - return err - } - if out.Add(o) { - return fs.ErrorListAborted - } - } - return nil - }) - return -} - -// NewObject finds the Object at remote. If it can't be found -// it returns the error fs.ErrorObjectNotFound. -func (f *Fs) NewObject(remote string) (fs.Object, error) { - return f.newObjectWithInfo(remote, nil) -} - -func copyBlob(blob *storage.Blob) *storage.Blob { - var tmp storage.Blob = storage.Blob{} - tmp.Name = blob.Name - tmp.Properties.LastModified = blob.Properties.LastModified - tmp.Properties.Etag = blob.Properties.Etag - tmp.Properties.ContentMD5 = blob.Properties.ContentMD5 - tmp.Properties.ContentLength = blob.Properties.ContentLength - tmp.Properties.ContentType = blob.Properties.ContentType - tmp.Properties.ContentEncoding = blob.Properties.ContentEncoding - tmp.Properties.CacheControl = blob.Properties.CacheControl - tmp.Properties.ContentLanguage = blob.Properties.ContentLanguage - tmp.Properties.BlobType = blob.Properties.BlobType - tmp.Properties.SequenceNumber = blob.Properties.SequenceNumber - tmp.Properties.CopyID = blob.Properties.CopyID - tmp.Properties.CopyStatus = blob.Properties.CopyStatus - tmp.Properties.CopySource = blob.Properties.CopySource - tmp.Properties.CopyProgress = blob.Properties.CopyProgress - tmp.Properties.CopyCompletionTime = blob.Properties.CopyCompletionTime - tmp.Properties.CopyStatusDescription = blob.Properties.CopyStatusDescription - tmp.Properties.LeaseStatus = blob.Properties.LeaseStatus - tmp.Properties.LeaseState = blob.Properties.LeaseState - for k,v := range blob.Metadata { - tmp.Metadata[k] = v - } - return &tmp -} - -//If it can't be found it returns the error ErrorObjectNotFound. -func (f *Fs) newObjectWithInfo(remote string, blob *storage.Blob) (fs.Object, error) { - o := &Object{ - fs: f, - remote: remote, - } - if blob != nil { - o.blob = copyBlob(blob) - } else { - err := o.readMetaData() // reads info and meta, returning an error - if err != nil { - return nil, err - } - } - return o, nil -} - -// Put the Object into the bucket -func (f *Fs) Put(in io.Reader, src fs.ObjectInfo) (fs.Object, error) { - // Temporary Object under construction - fso := &Object{ - fs: f, - remote: src.Remote(), - } - return fso, fso.Update(in, src) -} - -// Mkdir creates the bucket if it doesn't exist -func (f *Fs) Mkdir(dir string) error { - return nil -} - -// Rmdir deletes the bucket if the fs is at the root -// Returns an error if it isn't empty -func (f *Fs) Rmdir(dir string) error { - return nil -} - -// Fs returns the parent Fs -func (o *Object) Fs() fs.Info { - return o.fs -} - -// Return a string version -func (o *Object) String() string { - if o == nil { - return "" - } - return o.remote -} - -// Remote returns the remote path -func (o *Object) Remote() string { - return o.remote -} - -// Hash returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Hash(t fs.HashType) (string, error) { - if t != fs.HashMD5 { - return "", fs.ErrHashUnsupported - } - dc, err := base64.StdEncoding.DecodeString(o.blob.Properties.ContentMD5) - if err != nil { - fs.Logf(o, "Cannot decode string: %s", err) - return "", err - } - return hex.EncodeToString(dc), nil -} - -// Size returns the size of an object in bytes -func (o *Object) Size() int64 { - return o.blob.Properties.ContentLength -} - -// readMetaData gets the metadata if it hasn't already been fetched -// -// it also sets the info -func (o *Object) readMetaData() (err error) { - if o.blob != nil { - return nil - } - meta, err := o.fs.bc.GetBlobMetadata(o.fs.container, o.fs.root + o.remote) - if err != nil { - return err - } - props, err := o.fs.bc.GetBlobProperties(o.fs.container, o.fs.root + o.remote) - if err != nil { - return err - } - o.blob = copyBlob(&storage.Blob{Name: o.remote, Properties: *props, Metadata: meta}) - return nil -} - -func (o *Object) ModTime() time.Time { - err := o.readMetaData() - t, _ := time.Parse(time.RFC1123, o.blob.Properties.LastModified) - if err != nil { - fs.Logf(o, "Failed to read LastModified: %v", err) - return time.Now() - } - return t -} - -// SetModTime sets the modification time of the local fs object -func (o *Object) SetModTime(modTime time.Time) error { - return nil -} - -// Storable raturns a boolean indicating if this object is storable -func (o *Object) Storable() bool { - return true -} - -// Open an object for read -func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) { - var readRange *string = nil - for _, option := range options { - switch option.(type) { - case *fs.RangeOption, *fs.SeekOption: - _, value := option.Header() - readRange = &value - default: - if option.Mandatory() { - fs.Logf(o, "Unsupported mandatory option: %v", option) - } - } - } - if readRange != nil { - return o.fs.bc.GetBlobRange(o.fs.container, o.fs.root + o.remote, *readRange, map[string]string{}) - } else { - return o.fs.bc.GetBlob(o.fs.container, o.fs.root + o.remote) - } -} - -// Update the Object from in with modTime and size -func (o *Object) Update(in io.Reader, src fs.ObjectInfo) error { - size := src.Size() - if size <= 64 * 1000 * 1000 { - err := o.fs.bc.CreateBlockBlobFromReader(o.fs.container, o.fs.root + o.remote, uint64(size), in, map[string]string{}) - if err != nil { - return err - } - } else { - // create block, put block, put block list - return fs.ErrorCantCopy - } - - - // Read the metadata from the newly created object - o.blob = nil // wipe old metadata - err := o.readMetaData() - return err -} - -// Remove an object -func (o *Object) Remove() error { - return o.fs.bc.DeleteBlob(o.fs.container, o.fs.root + o.remote, map[string]string{}) -} - -// MimeType of an Object if known, "" otherwise -func (o *Object) MimeType() string { - err := o.readMetaData() - if err != nil { - fs.Logf(o, "Failed to read metadata: %v", err) - return "" - } - return o.blob.Properties.ContentType -} - -// Check the interfaces are satisfied -var ( - _ fs.Fs = &Fs{} - _ fs.Copier = &Fs{} - _ fs.Object = &Object{} - _ fs.MimeTyper = &Object{} -) diff --git a/azureblob/azureblob.go b/azureblob/azureblob.go new file mode 100644 index 000000000..5a303b5e0 --- /dev/null +++ b/azureblob/azureblob.go @@ -0,0 +1,1107 @@ +// Package azureblob provides an interface to the Microsoft Azure blob object storage system + +// +build go1.7 + +package azureblob + +import ( + "bytes" + "crypto/md5" + "encoding/base64" + "encoding/binary" + "encoding/hex" + "fmt" + "hash" + "io" + "net/http" + "path" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "github.com/Azure/azure-sdk-for-go/storage" + "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/pacer" + "github.com/pkg/errors" +) + +const ( + apiVersion = "2017-04-17" + minSleep = 10 * time.Millisecond + maxSleep = 10 * time.Second + decayConstant = 1 // bigger for slower decay, exponential + listChunkSize = 5000 // number of items to read at once + modTimeKey = "mtime" + timeFormatIn = time.RFC3339 + timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00" + maxTotalParts = 50000 // in multipart upload + maxUncommittedSize = 9 << 30 // can't upload bigger than this +) + +// Globals +var ( + maxChunkSize = fs.SizeSuffix(100 * 1024 * 1024) + chunkSize = fs.SizeSuffix(4 * 1024 * 1024) + uploadCutoff = fs.SizeSuffix(256 * 1024 * 1024) + maxUploadCutoff = fs.SizeSuffix(256 * 1024 * 1024) +) + +// Register with Fs +func init() { + fs.Register(&fs.RegInfo{ + Name: "azureblob", + Description: "Microsoft Azure Blob Storage", + NewFs: NewFs, + Options: []fs.Option{{ + Name: "account", + Help: "Storage Account Name", + }, { + Name: "key", + Help: "Storage Account Key", + }, { + Name: "endpoint", + Help: "Endpoint for the service - leave blank normally.", + }, + }, + }) + fs.VarP(&uploadCutoff, "azureblob-upload-cutoff", "", "Cutoff for switching to chunked upload") + fs.VarP(&chunkSize, "azureblob-chunk-size", "", "Upload chunk size. Must fit in memory.") +} + +// Fs represents a remote azure server +type Fs struct { + name string // name of this remote + root string // the path we are working on if any + features *fs.Features // optional features + account string // account name + key []byte // auth key + endpoint string // name of the starting api endpoint + bc *storage.BlobStorageClient + cc *storage.Container + container string // the container we are working on + containerOKMu sync.Mutex // mutex to protect container OK + containerOK bool // true if we have created the container + containerDeleted bool // true if we have deleted the container + pacer *pacer.Pacer // To pace and retry the API calls + uploadToken *pacer.TokenDispenser // control concurrency +} + +// Object describes a azure object +type Object struct { + fs *Fs // what this object is part of + remote string // The remote path + id string // azure id of the file + modTime time.Time // The modified time of the object if known + md5 string // MD5 hash if known + size int64 // Size of the object + mimeType string // Content-Type of the object + meta map[string]string // blob metadata +} + +// ------------------------------------------------------------ + +// Name of the remote (as passed into NewFs) +func (f *Fs) Name() string { + return f.name +} + +// Root of the remote (as passed into NewFs) +func (f *Fs) Root() string { + if f.root == "" { + return f.container + } + return f.container + "/" + f.root +} + +// String converts this Fs to a string +func (f *Fs) String() string { + if f.root == "" { + return fmt.Sprintf("Azure container %s", f.container) + } + return fmt.Sprintf("Azure container %s path %s", f.container, f.root) +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// Pattern to match a azure path +var matcher = regexp.MustCompile(`^([^/]*)(.*)$`) + +// parseParse parses a azure 'url' +func parsePath(path string) (container, directory string, err error) { + parts := matcher.FindStringSubmatch(path) + if parts == nil { + err = errors.Errorf("couldn't find container in azure path %q", path) + } else { + container, directory = parts[1], parts[2] + directory = strings.Trim(directory, "/") + } + return +} + +// retryErrorCodes is a slice of error codes that we will retry +var retryErrorCodes = []int{ + 401, // Unauthorized (eg "Token has expired") + 408, // Request Timeout + 429, // Rate exceeded. + 500, // Get occasional 500 Internal Server Error + 503, // Service Unavailable + 504, // Gateway Time-out +} + +// shouldRetry returns a boolean as to whether this resp and err +// deserve to be retried. It returns the err as a convenience +func (f *Fs) shouldRetry(err error) (bool, error) { + // FIXME interpret special errors - more to do here + if storageErr, ok := err.(storage.AzureStorageServiceError); ok { + statusCode := storageErr.StatusCode + for _, e := range retryErrorCodes { + if statusCode == e { + return true, err + } + } + } + return fs.ShouldRetry(err), err +} + +// NewFs contstructs an Fs from the path, container:path +func NewFs(name, root string) (fs.Fs, error) { + if uploadCutoff > maxUploadCutoff { + return nil, errors.Errorf("azure: upload cutoff (%v) must be less than or equal to %v", uploadCutoff, maxUploadCutoff) + } + if chunkSize > maxChunkSize { + return nil, errors.Errorf("azure: chunk size can't be greater than %v - was %v", maxChunkSize, chunkSize) + } + container, directory, err := parsePath(root) + if err != nil { + return nil, err + } + account := fs.ConfigFileGet(name, "account") + if account == "" { + return nil, errors.New("account not found") + } + key := fs.ConfigFileGet(name, "key") + if key == "" { + return nil, errors.New("key not found") + } + keyBytes, err := base64.StdEncoding.DecodeString(key) + if err != nil { + return nil, errors.Errorf("malformed storage account key: %v", err) + } + + endpoint := fs.ConfigFileGet(name, "endpoint", storage.DefaultBaseURL) + + client, err := storage.NewClient(account, key, endpoint, apiVersion, true) + if err != nil { + return nil, errors.Wrap(err, "failed to make azure storage client") + } + client.HTTPClient = fs.Config.Client() + bc := client.GetBlobService() + + f := &Fs{ + name: name, + container: container, + root: directory, + account: account, + key: keyBytes, + endpoint: endpoint, + bc: &bc, + cc: bc.GetContainerReference(container), + pacer: pacer.New().SetMinSleep(minSleep).SetMaxSleep(maxSleep).SetDecayConstant(decayConstant), + uploadToken: pacer.NewTokenDispenser(fs.Config.Transfers), + } + f.features = (&fs.Features{ReadMimeType: true, WriteMimeType: true}).Fill(f) + if f.root != "" { + f.root += "/" + // Check to see if the (container,directory) is actually an existing file + oldRoot := f.root + remote := path.Base(directory) + f.root = path.Dir(directory) + if f.root == "." { + f.root = "" + } else { + f.root += "/" + } + _, err := f.NewObject(remote) + if err != nil { + if err == fs.ErrorObjectNotFound { + // File doesn't exist so return old f + f.root = oldRoot + return f, nil + } + return nil, err + } + // return an error with an fs which points to the parent + return f, fs.ErrorIsFile + } + return f, nil +} + +// Return an Object from a path +// +// If it can't be found it returns the error fs.ErrorObjectNotFound. +func (f *Fs) newObjectWithInfo(remote string, info *storage.Blob) (fs.Object, error) { + o := &Object{ + fs: f, + remote: remote, + } + if info != nil { + err := o.decodeMetaData(info) + if err != nil { + return nil, err + } + } else { + err := o.readMetaData() // reads info and headers, returning an error + if err != nil { + return nil, err + } + } + return o, nil +} + +// NewObject finds the Object at remote. If it can't be found +// it returns the error fs.ErrorObjectNotFound. +func (f *Fs) NewObject(remote string) (fs.Object, error) { + return f.newObjectWithInfo(remote, nil) +} + +// getBlobReference creates an empty blob reference with no metadata +func (f *Fs) getBlobReference(remote string) *storage.Blob { + return f.cc.GetBlobReference(f.root + remote) +} + +// getBlobWithModTime adds the modTime passed in to o.meta and creates +// a Blob from it. +func (o *Object) getBlobWithModTime(modTime time.Time) *storage.Blob { + // Make sure o.meta is not nil + if o.meta == nil { + o.meta = make(map[string]string, 1) + } + + // Set modTimeKey in it + o.meta[modTimeKey] = modTime.Format(timeFormatOut) + + blob := o.getBlobReference() + blob.Metadata = o.meta + return blob +} + +// listFn is called from list to handle an object +type listFn func(remote string, object *storage.Blob, isDirectory bool) error + +// list lists the objects into the function supplied from +// the container and root supplied +// +// dir is the starting directory, "" for root +func (f *Fs) list(dir string, recurse bool, maxResults uint, fn listFn) error { + f.containerOKMu.Lock() + deleted := f.containerDeleted + f.containerOKMu.Unlock() + if deleted { + return fs.ErrorDirNotFound + } + root := f.root + if dir != "" { + root += dir + "/" + } + delimiter := "" + if !recurse { + delimiter = "/" + } + params := storage.ListBlobsParameters{ + MaxResults: maxResults, + Prefix: root, + Delimiter: delimiter, + Include: &storage.IncludeBlobDataset{ + Snapshots: false, + Metadata: true, + UncommittedBlobs: false, + Copy: false, + }, + } + for { + var response storage.BlobListResponse + err := f.pacer.Call(func() (bool, error) { + var err error + response, err = f.cc.ListBlobs(params) + return f.shouldRetry(err) + }) + if err != nil { + if storageErr, ok := err.(storage.AzureStorageServiceError); ok && storageErr.StatusCode == http.StatusNotFound { + return fs.ErrorDirNotFound + } + return err + } + for i := range response.Blobs { + file := &response.Blobs[i] + // Finish if file name no longer has prefix + // if prefix != "" && !strings.HasPrefix(file.Name, prefix) { + // return nil + // } + if !strings.HasPrefix(file.Name, f.root) { + fs.Debugf(f, "Odd name received %q", file.Name) + continue + } + remote := file.Name[len(f.root):] + // Check for directory + isDirectory := strings.HasSuffix(remote, "/") + if isDirectory { + remote = remote[:len(remote)-1] + } + // Send object + err = fn(remote, file, isDirectory) + if err != nil { + return err + } + } + // Send the subdirectories + for _, remote := range response.BlobPrefixes { + remote := strings.TrimRight(remote, "/") + if !strings.HasPrefix(remote, f.root) { + fs.Debugf(f, "Odd directory name received %q", remote) + continue + } + remote = remote[len(f.root):] + // Send object + err = fn(remote, nil, true) + if err != nil { + return err + } + } + // end if no NextFileName + if response.NextMarker == "" { + break + } + params.Marker = response.NextMarker + } + return nil +} + +// Convert a list item into a DirEntry +func (f *Fs) itemToDirEntry(remote string, object *storage.Blob, isDirectory bool) (fs.DirEntry, error) { + if isDirectory { + d := fs.NewDir(remote, time.Time{}) + return d, nil + } + o, err := f.newObjectWithInfo(remote, object) + if err != nil { + return nil, err + } + return o, nil +} + +// listDir lists a single directory +func (f *Fs) listDir(dir string) (entries fs.DirEntries, err error) { + err = f.list(dir, false, listChunkSize, func(remote string, object *storage.Blob, isDirectory bool) error { + entry, err := f.itemToDirEntry(remote, object, isDirectory) + if err != nil { + return err + } + if entry != nil { + entries = append(entries, entry) + } + return nil + }) + if err != nil { + return nil, err + } + return entries, nil +} + +// listContainers returns all the containers to out +func (f *Fs) listContainers(dir string) (entries fs.DirEntries, err error) { + if dir != "" { + return nil, fs.ErrorListBucketRequired + } + err = f.listContainersToFn(func(container *storage.Container) error { + d := fs.NewDir(container.Name, time.Time{}) + entries = append(entries, d) + return nil + }) + if err != nil { + return nil, err + } + return entries, nil +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(dir string) (entries fs.DirEntries, err error) { + if f.container == "" { + return f.listContainers(dir) + } + return f.listDir(dir) +} + +// ListR lists the objects and directories of the Fs starting +// from dir recursively into out. +// +// dir should be "" to start from the root, and should not +// have trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +// +// It should call callback for each tranche of entries read. +// These need not be returned in any particular order. If +// callback returns an error then the listing will stop +// immediately. +// +// Don't implement this unless you have a more efficient way +// of listing recursively that doing a directory traversal. +func (f *Fs) ListR(dir string, callback fs.ListRCallback) (err error) { + if f.container == "" { + return fs.ErrorListBucketRequired + } + list := fs.NewListRHelper(callback) + err = f.list(dir, true, listChunkSize, func(remote string, object *storage.Blob, isDirectory bool) error { + entry, err := f.itemToDirEntry(remote, object, isDirectory) + if err != nil { + return err + } + return list.Add(entry) + }) + if err != nil { + return err + } + return list.Flush() +} + +// listContainerFn is called from listContainersToFn to handle a container +type listContainerFn func(*storage.Container) error + +// listContainersToFn lists the containers to the function supplied +func (f *Fs) listContainersToFn(fn listContainerFn) error { + // FIXME page the containers if necessary? + params := storage.ListContainersParameters{} + var response *storage.ContainerListResponse + err := f.pacer.Call(func() (bool, error) { + var err error + response, err = f.bc.ListContainers(params) + return f.shouldRetry(err) + }) + if err != nil { + return err + } + for i := range response.Containers { + err = fn(&response.Containers[i]) + if err != nil { + return err + } + } + return nil +} + +// Put the object into the container +// +// Copy the reader in to the new object which is returned +// +// The new object may have been created if an error is returned +func (f *Fs) Put(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + // Temporary Object under construction + fs := &Object{ + fs: f, + remote: src.Remote(), + } + return fs, fs.Update(in, src, options...) +} + +// Mkdir creates the container if it doesn't exist +func (f *Fs) Mkdir(dir string) error { + f.containerOKMu.Lock() + defer f.containerOKMu.Unlock() + if f.containerOK { + return nil + } + options := storage.CreateContainerOptions{ + Access: storage.ContainerAccessTypePrivate, + } + err := f.pacer.Call(func() (bool, error) { + err := f.cc.Create(&options) + if err != nil { + if storageErr, ok := err.(storage.AzureStorageServiceError); ok { + switch storageErr.StatusCode { + case http.StatusConflict: + switch storageErr.Code { + case "ContainerAlreadyExists": + f.containerOK = true + return false, nil + case "ContainerBeingDeleted": + f.containerDeleted = true + return true, err + } + } + } + } + return f.shouldRetry(err) + }) + if err == nil { + f.containerOK = true + f.containerDeleted = false + } + return errors.Wrap(err, "failed to make container") +} + +// isEmpty checks to see if a given directory is empty and returns an error if not +func (f *Fs) isEmpty(dir string) (err error) { + empty := true + err = f.list("", true, 1, func(remote string, object *storage.Blob, isDirectory bool) error { + empty = false + return nil + }) + if err != nil { + return err + } + if !empty { + return fs.ErrorDirectoryNotEmpty + } + return nil +} + +// deleteContainer deletes the container. It can delete a full +// container so use isEmpty if you don't want that. +func (f *Fs) deleteContainer() error { + f.containerOKMu.Lock() + defer f.containerOKMu.Unlock() + options := storage.DeleteContainerOptions{} + err := f.pacer.Call(func() (bool, error) { + exists, err := f.cc.Exists() + if err != nil { + return f.shouldRetry(err) + } + if !exists { + return false, fs.ErrorDirNotFound + } + err = f.cc.Delete(&options) + return f.shouldRetry(err) + }) + if err == nil { + f.containerOK = false + f.containerDeleted = true + } + return errors.Wrap(err, "failed to delete container") +} + +// Rmdir deletes the container if the fs is at the root +// +// Returns an error if it isn't empty +func (f *Fs) Rmdir(dir string) error { + err := f.isEmpty(dir) + if err != nil { + return err + } + if f.root != "" || dir != "" { + return nil + } + return f.deleteContainer() +} + +// Precision of the remote +func (f *Fs) Precision() time.Duration { + return time.Nanosecond +} + +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + +// Purge deletes all the files and directories including the old versions. +func (f *Fs) Purge() error { + dir := "" // forward compat! + if f.root != "" || dir != "" { + // Delegate to caller if not root container + return fs.ErrorCantPurge + } + return f.deleteContainer() +} + +// Copy src to this remote using server side copy operations. +// +// This is stored with the remote path given +// +// It returns the destination Object and a possible error +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantCopy +func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) { + err := f.Mkdir("") + if err != nil { + return nil, err + } + srcObj, ok := src.(*Object) + if !ok { + fs.Debugf(src, "Can't copy - not same remote type") + return nil, fs.ErrorCantCopy + } + dstBlob := f.getBlobReference(remote) + srcBlob := srcObj.getBlobReference() + options := storage.CopyOptions{} + sourceBlobURL := srcBlob.GetURL() + err = f.pacer.Call(func() (bool, error) { + err = dstBlob.Copy(sourceBlobURL, &options) + return f.shouldRetry(err) + }) + if err != nil { + return nil, err + } + return f.NewObject(remote) +} + +// ------------------------------------------------------------ + +// Fs returns the parent Fs +func (o *Object) Fs() fs.Info { + return o.fs +} + +// Return a string version +func (o *Object) String() string { + if o == nil { + return "" + } + return o.remote +} + +// Remote returns the remote path +func (o *Object) Remote() string { + return o.remote +} + +// Hash returns the MD5 of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } + // Convert base64 encoded md5 into lower case hex + if o.md5 == "" { + return "", nil + } + data, err := base64.StdEncoding.DecodeString(o.md5) + if err != nil { + return "", errors.Wrapf(err, "Failed to decode Content-MD5: %q", o.md5) + } + return hex.EncodeToString(data), nil +} + +// Size returns the size of an object in bytes +func (o *Object) Size() int64 { + return o.size +} + +// decodeMetaData sets the metadata from the data passed in +// +// Sets +// o.id +// o.modTime +// o.size +// o.md5 +// o.meta +func (o *Object) decodeMetaData(info *storage.Blob) (err error) { + o.md5 = info.Properties.ContentMD5 + o.mimeType = info.Properties.ContentType + o.size = info.Properties.ContentLength + o.modTime = time.Time(info.Properties.LastModified) + if len(info.Metadata) > 0 { + o.meta = info.Metadata + if modTime, ok := info.Metadata[modTimeKey]; ok { + when, err := time.Parse(timeFormatIn, modTime) + if err != nil { + fs.Debugf(o, "Couldn't parse %v = %q: %v", modTimeKey, modTime, err) + } + o.modTime = when + } + } else { + o.meta = nil + } + return nil +} + +// getBlobReference creates an empty blob reference with no metadata +func (o *Object) getBlobReference() *storage.Blob { + return o.fs.getBlobReference(o.remote) +} + +// clearMetaData clears enough metadata so readMetaData will re-read it +func (o *Object) clearMetaData() { + o.modTime = time.Time{} +} + +// readMetaData gets the metadata if it hasn't already been fetched +// +// Sets +// o.id +// o.modTime +// o.size +// o.md5 +func (o *Object) readMetaData() (err error) { + if !o.modTime.IsZero() { + return nil + } + blob := o.getBlobReference() + + // Read metadata (this includes metadata) + getPropertiesOptions := storage.GetBlobPropertiesOptions{} + err = o.fs.pacer.Call(func() (bool, error) { + err = blob.GetProperties(&getPropertiesOptions) + return o.fs.shouldRetry(err) + }) + if err != nil { + if storageErr, ok := err.(storage.AzureStorageServiceError); ok && storageErr.StatusCode == http.StatusNotFound { + return fs.ErrorObjectNotFound + } + return err + } + + return o.decodeMetaData(blob) +} + +// timeString returns modTime as the number of milliseconds +// elapsed since January 1, 1970 UTC as a decimal string. +func timeString(modTime time.Time) string { + return strconv.FormatInt(modTime.UnixNano()/1E6, 10) +} + +// parseTimeString converts a decimal string number of milliseconds +// elapsed since January 1, 1970 UTC into a time.Time and stores it in +// the modTime variable. +func (o *Object) parseTimeString(timeString string) (err error) { + if timeString == "" { + return nil + } + unixMilliseconds, err := strconv.ParseInt(timeString, 10, 64) + if err != nil { + fs.Debugf(o, "Failed to parse mod time string %q: %v", timeString, err) + return err + } + o.modTime = time.Unix(unixMilliseconds/1E3, (unixMilliseconds%1E3)*1E6).UTC() + return nil +} + +// ModTime returns the modification time of the object +// +// It attempts to read the objects mtime and if that isn't present the +// LastModified returned in the http headers +func (o *Object) ModTime() (result time.Time) { + // The error is logged in readMetaData + _ = o.readMetaData() + return o.modTime +} + +// SetModTime sets the modification time of the local fs object +func (o *Object) SetModTime(modTime time.Time) error { + blob := o.getBlobWithModTime(modTime) + options := storage.SetBlobMetadataOptions{} + err := o.fs.pacer.Call(func() (bool, error) { + err := blob.SetMetadata(&options) + return o.fs.shouldRetry(err) + }) + if err != nil { + return err + } + o.modTime = modTime + return nil +} + +// Storable returns if this object is storable +func (o *Object) Storable() bool { + return true +} + +// openFile represents an Object open for reading +type openFile struct { + o *Object // Object we are reading for + resp *http.Response // response of the GET + body io.Reader // reading from here + hash hash.Hash // currently accumulating MD5 + bytes int64 // number of bytes read on this connection + eof bool // whether we have read end of file +} + +// Open an object for read +func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) { + getBlobOptions := storage.GetBlobOptions{} + getBlobRangeOptions := storage.GetBlobRangeOptions{ + GetBlobOptions: &getBlobOptions, + } + for _, option := range options { + switch x := option.(type) { + case *fs.RangeOption: + getBlobRangeOptions.Range = &storage.BlobRange{ + Start: uint64(x.Start), + End: uint64(x.End), + } + case *fs.SeekOption: + getBlobRangeOptions.Range = &storage.BlobRange{ + Start: uint64(x.Offset), + End: uint64(o.size), + } + default: + if option.Mandatory() { + fs.Logf(o, "Unsupported mandatory option: %v", option) + } + } + } + blob := o.getBlobReference() + err = o.fs.pacer.Call(func() (bool, error) { + if getBlobRangeOptions.Range == nil { + in, err = blob.Get(&getBlobOptions) + } else { + in, err = blob.GetRange(&getBlobRangeOptions) + } + return o.fs.shouldRetry(err) + }) + if err != nil { + return nil, errors.Wrap(err, "failed to open for download") + } + return in, nil +} + +// dontEncode is the characters that do not need percent-encoding +// +// The characters that do not need percent-encoding are a subset of +// the printable ASCII characters: upper-case letters, lower-case +// letters, digits, ".", "_", "-", "/", "~", "!", "$", "'", "(", ")", +// "*", ";", "=", ":", and "@". All other byte values in a UTF-8 must +// be replaced with "%" and the two-digit hex value of the byte. +const dontEncode = (`abcdefghijklmnopqrstuvwxyz` + + `ABCDEFGHIJKLMNOPQRSTUVWXYZ` + + `0123456789` + + `._-/~!$'()*;=:@`) + +// noNeedToEncode is a bitmap of characters which don't need % encoding +var noNeedToEncode [256]bool + +func init() { + for _, c := range dontEncode { + noNeedToEncode[c] = true + } +} + +// urlEncode encodes in with % encoding +func urlEncode(in string) string { + var out bytes.Buffer + for i := 0; i < len(in); i++ { + c := in[i] + if noNeedToEncode[c] { + _ = out.WriteByte(c) + } else { + _, _ = out.WriteString(fmt.Sprintf("%%%2X", c)) + } + } + return out.String() +} + +// uploadMultipart uploads a file using multipart upload +// +// Write a larger blob, using CreateBlockBlob, PutBlock, and PutBlockList. +func (o *Object) uploadMultipart(in io.Reader, size int64, blob *storage.Blob, putBlobOptions *storage.PutBlobOptions) (err error) { + // Calculate correct chunkSize + chunkSize := int64(chunkSize) + var totalParts int64 + for { + // Calculate number of parts + var remainder int64 + totalParts, remainder = size/chunkSize, size%chunkSize + if remainder != 0 { + totalParts++ + } + if totalParts < maxTotalParts { + break + } + // Double chunk size if the number of parts is too big + chunkSize *= 2 + if chunkSize > int64(maxChunkSize) { + return errors.Errorf("can't upload as it is too big %v - takes more than %d chunks of %v", fs.SizeSuffix(size), totalParts, fs.SizeSuffix(chunkSize/2)) + } + } + fs.Debugf(o, "Multipart upload session started for %d parts of size %v", totalParts, fs.SizeSuffix(chunkSize)) + + // Create an empty blob + err = o.fs.pacer.Call(func() (bool, error) { + err := blob.CreateBlockBlob(putBlobOptions) + return o.fs.shouldRetry(err) + }) + + // block ID variables + var ( + rawID uint64 + bytesID = make([]byte, 8) + blockID = "" // id in base64 encoded form + blocks = make([]storage.Block, 0, totalParts) + ) + + // increment the blockID + nextID := func() { + rawID++ + binary.LittleEndian.PutUint64(bytesID, rawID) + blockID = base64.StdEncoding.EncodeToString(bytesID) + blocks = append(blocks, storage.Block{ + ID: blockID, + Status: storage.BlockStatusLatest, + }) + } + + // Upload the chunks + remaining := size + position := int64(0) + errs := make(chan error, 1) + var wg sync.WaitGroup +outer: + for part := 0; part < int(totalParts); part++ { + // Check any errors + select { + case err = <-errs: + break outer + default: + } + + reqSize := remaining + if reqSize >= chunkSize { + reqSize = chunkSize + } + + // Make a block of memory + buf := make([]byte, reqSize) + + // Read the chunk + _, err = io.ReadFull(in, buf) + if err != nil { + err = errors.Wrap(err, "multipart upload failed to read source") + break outer + } + + // Transfer the chunk + nextID() + wg.Add(1) + o.fs.uploadToken.Get() + go func(part int, position int64, blockID string) { + defer wg.Done() + defer o.fs.uploadToken.Put() + fs.Debugf(o, "Uploading part %d/%d offset %v/%v part size %v", part+1, totalParts, fs.SizeSuffix(position), fs.SizeSuffix(size), fs.SizeSuffix(chunkSize)) + + // Upload the block, with MD5 for check + md5sum := md5.Sum(buf) + putBlockOptions := storage.PutBlockOptions{ + ContentMD5: base64.StdEncoding.EncodeToString(md5sum[:]), + } + err = o.fs.pacer.Call(func() (bool, error) { + err = blob.PutBlockWithLength(blockID, uint64(len(buf)), bytes.NewBuffer(buf), &putBlockOptions) + return o.fs.shouldRetry(err) + }) + + if err != nil { + err = errors.Wrap(err, "multipart upload failed to upload part") + select { + case errs <- err: + default: + } + return + } + }(part, position, blockID) + + // ready for next block + remaining -= chunkSize + position += chunkSize + } + wg.Wait() + if err == nil { + select { + case err = <-errs: + default: + } + } + if err != nil { + return err + } + + // Finalise the upload session + putBlockListOptions := storage.PutBlockListOptions{} + err = o.fs.pacer.Call(func() (bool, error) { + err := blob.PutBlockList(blocks, &putBlockListOptions) + return o.fs.shouldRetry(err) + }) + if err != nil { + return errors.Wrap(err, "multipart upload failed to finalize") + } + return nil +} + +// Update the object with the contents of the io.Reader, modTime and size +// +// The new object may have been created if an error is returned +func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) { + err = o.fs.Mkdir("") + if err != nil { + return err + } + size := src.Size() + blob := o.getBlobWithModTime(src.ModTime()) + blob.Properties.ContentType = fs.MimeType(o) + if sourceMD5, _ := src.Hash(fs.HashMD5); sourceMD5 != "" { + sourceMD5bytes, err := hex.DecodeString(sourceMD5) + if err == nil { + blob.Properties.ContentMD5 = base64.StdEncoding.EncodeToString(sourceMD5bytes) + } else { + fs.Debugf(o, "Failed to decode %q as MD5: %v", sourceMD5, err) + } + } + putBlobOptions := storage.PutBlobOptions{} + + // Don't retry, return a retry error instead + err = o.fs.pacer.CallNoRetry(func() (bool, error) { + if size >= int64(uploadCutoff) { + // If a large file upload in chunks + err = o.uploadMultipart(in, size, blob, &putBlobOptions) + } else { + // Write a small blob in one transaction + if size == 0 { + in = nil + } + err = blob.CreateBlockBlobFromReader(in, &putBlobOptions) + } + return o.fs.shouldRetry(err) + }) + if err != nil { + return err + } + o.clearMetaData() + return o.readMetaData() +} + +// Remove an object +func (o *Object) Remove() error { + blob := o.getBlobReference() + options := storage.DeleteBlobOptions{} + return o.fs.pacer.Call(func() (bool, error) { + err := blob.Delete(&options) + return o.fs.shouldRetry(err) + }) +} + +// MimeType of an Object if known, "" otherwise +func (o *Object) MimeType() string { + return o.mimeType +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = &Fs{} + _ fs.Copier = &Fs{} + _ fs.Purger = &Fs{} + _ fs.ListRer = &Fs{} + _ fs.Object = &Object{} + _ fs.MimeTyper = &Object{} +) diff --git a/azureblob/azureblob_test.go b/azureblob/azureblob_test.go new file mode 100644 index 000000000..ae36dc5ca --- /dev/null +++ b/azureblob/azureblob_test.go @@ -0,0 +1,76 @@ +// Test AzureBlob filesystem interface +// +// Automatically generated - DO NOT EDIT +// Regenerate with: make gen_tests + +// +build go1.7 + +package azureblob_test + +import ( + "testing" + + "github.com/ncw/rclone/azureblob" + "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/fstest/fstests" +) + +func TestSetup(t *testing.T) { + fstests.NilObject = fs.Object((*azureblob.Object)(nil)) + fstests.RemoteName = "TestAzureBlob:" +} + +// Generic tests for the Fs +func TestInit(t *testing.T) { fstests.TestInit(t) } +func TestFsString(t *testing.T) { fstests.TestFsString(t) } +func TestFsName(t *testing.T) { fstests.TestFsName(t) } +func TestFsRoot(t *testing.T) { fstests.TestFsRoot(t) } +func TestFsRmdirEmpty(t *testing.T) { fstests.TestFsRmdirEmpty(t) } +func TestFsRmdirNotFound(t *testing.T) { fstests.TestFsRmdirNotFound(t) } +func TestFsMkdir(t *testing.T) { fstests.TestFsMkdir(t) } +func TestFsMkdirRmdirSubdir(t *testing.T) { fstests.TestFsMkdirRmdirSubdir(t) } +func TestFsListEmpty(t *testing.T) { fstests.TestFsListEmpty(t) } +func TestFsListDirEmpty(t *testing.T) { fstests.TestFsListDirEmpty(t) } +func TestFsListRDirEmpty(t *testing.T) { fstests.TestFsListRDirEmpty(t) } +func TestFsNewObjectNotFound(t *testing.T) { fstests.TestFsNewObjectNotFound(t) } +func TestFsPutFile1(t *testing.T) { fstests.TestFsPutFile1(t) } +func TestFsPutError(t *testing.T) { fstests.TestFsPutError(t) } +func TestFsPutFile2(t *testing.T) { fstests.TestFsPutFile2(t) } +func TestFsUpdateFile1(t *testing.T) { fstests.TestFsUpdateFile1(t) } +func TestFsListDirFile2(t *testing.T) { fstests.TestFsListDirFile2(t) } +func TestFsListRDirFile2(t *testing.T) { fstests.TestFsListRDirFile2(t) } +func TestFsListDirRoot(t *testing.T) { fstests.TestFsListDirRoot(t) } +func TestFsListRDirRoot(t *testing.T) { fstests.TestFsListRDirRoot(t) } +func TestFsListSubdir(t *testing.T) { fstests.TestFsListSubdir(t) } +func TestFsListRSubdir(t *testing.T) { fstests.TestFsListRSubdir(t) } +func TestFsListLevel2(t *testing.T) { fstests.TestFsListLevel2(t) } +func TestFsListRLevel2(t *testing.T) { fstests.TestFsListRLevel2(t) } +func TestFsListFile1(t *testing.T) { fstests.TestFsListFile1(t) } +func TestFsNewObject(t *testing.T) { fstests.TestFsNewObject(t) } +func TestFsListFile1and2(t *testing.T) { fstests.TestFsListFile1and2(t) } +func TestFsNewObjectDir(t *testing.T) { fstests.TestFsNewObjectDir(t) } +func TestFsCopy(t *testing.T) { fstests.TestFsCopy(t) } +func TestFsMove(t *testing.T) { fstests.TestFsMove(t) } +func TestFsDirMove(t *testing.T) { fstests.TestFsDirMove(t) } +func TestFsRmdirFull(t *testing.T) { fstests.TestFsRmdirFull(t) } +func TestFsPrecision(t *testing.T) { fstests.TestFsPrecision(t) } +func TestFsDirChangeNotify(t *testing.T) { fstests.TestFsDirChangeNotify(t) } +func TestObjectString(t *testing.T) { fstests.TestObjectString(t) } +func TestObjectFs(t *testing.T) { fstests.TestObjectFs(t) } +func TestObjectRemote(t *testing.T) { fstests.TestObjectRemote(t) } +func TestObjectHashes(t *testing.T) { fstests.TestObjectHashes(t) } +func TestObjectModTime(t *testing.T) { fstests.TestObjectModTime(t) } +func TestObjectMimeType(t *testing.T) { fstests.TestObjectMimeType(t) } +func TestObjectSetModTime(t *testing.T) { fstests.TestObjectSetModTime(t) } +func TestObjectSize(t *testing.T) { fstests.TestObjectSize(t) } +func TestObjectOpen(t *testing.T) { fstests.TestObjectOpen(t) } +func TestObjectOpenSeek(t *testing.T) { fstests.TestObjectOpenSeek(t) } +func TestObjectPartialRead(t *testing.T) { fstests.TestObjectPartialRead(t) } +func TestObjectUpdate(t *testing.T) { fstests.TestObjectUpdate(t) } +func TestObjectStorable(t *testing.T) { fstests.TestObjectStorable(t) } +func TestFsIsFile(t *testing.T) { fstests.TestFsIsFile(t) } +func TestFsIsFileNotFound(t *testing.T) { fstests.TestFsIsFileNotFound(t) } +func TestObjectRemove(t *testing.T) { fstests.TestObjectRemove(t) } +func TestFsPutUnknownLengthFile(t *testing.T) { fstests.TestFsPutUnknownLengthFile(t) } +func TestObjectPurge(t *testing.T) { fstests.TestObjectPurge(t) } +func TestFinalise(t *testing.T) { fstests.TestFinalise(t) } diff --git a/azureblob/azureblob_unsupported.go b/azureblob/azureblob_unsupported.go new file mode 100644 index 000000000..178345ca3 --- /dev/null +++ b/azureblob/azureblob_unsupported.go @@ -0,0 +1,6 @@ +// Build for unsupported platforms to stop go complaining +// about "no buildable Go source files " + +// +build !go1.7 + +package azureblob diff --git a/bin/make_manual.py b/bin/make_manual.py index f9e8f5877..9435bfed0 100755 --- a/bin/make_manual.py +++ b/bin/make_manual.py @@ -32,6 +32,7 @@ docs = [ "drive.md", "http.md", "hubic.md", + "azureblob.md", "onedrive.md", "qingstor.md", "swift.md", diff --git a/cmd/cmd.go b/cmd/cmd.go index 22be617f1..1fa9734ab 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -51,6 +51,7 @@ from various cloud storage systems and using file transfer services, such as: * Google Drive * HTTP * Hubic + * Microsoft Azure Blob Storage * Microsoft OneDrive * Openstack Swift / Rackspace cloud files / Memset Memstore * QingStor diff --git a/docs/content/about.md b/docs/content/about.md index edef66a43..545a76494 100644 --- a/docs/content/about.md +++ b/docs/content/about.md @@ -23,6 +23,7 @@ Rclone is a command line program to sync files and directories to and from * Google Drive * HTTP * Hubic + * Microsoft Azure Blob Storage * Microsoft OneDrive * Openstack Swift / Rackspace cloud files / Memset Memstore * QingStor diff --git a/docs/content/azureblob.md b/docs/content/azureblob.md new file mode 100644 index 000000000..f2315884c --- /dev/null +++ b/docs/content/azureblob.md @@ -0,0 +1,159 @@ +--- +title: "Microsoft Azure Blob Storage" +description: "Rclone docs for Microsoft Azure Blob Storage" +date: "2017-07-30" +--- + + Microsoft Azure Blob Storage +----------------------------------------- + +Paths are specified as `remote:container` (or `remote:` for the `lsd` +command.) You may put subdirectories in too, eg +`remote:container/path/to/dir`. + +Here is an example of making a Microsoft Azure Blob Storage +configuration. For a remote called `remote`. First run: + + rclone config + +This will guide you through an interactive setup process: + +``` +No remotes found - make a new one +n) New remote +s) Set configuration password +q) Quit config +n/s/q> n +name> remote +Type of storage to configure. +Choose a number from below, or type in your own value + 1 / Amazon Drive + \ "amazon cloud drive" + 2 / Amazon S3 (also Dreamhost, Ceph, Minio) + \ "s3" + 3 / Backblaze B2 + \ "b2" + 4 / Box + \ "box" + 5 / Dropbox + \ "dropbox" + 6 / Encrypt/Decrypt a remote + \ "crypt" + 7 / FTP Connection + \ "ftp" + 8 / Google Cloud Storage (this is not Google Drive) + \ "google cloud storage" + 9 / Google Drive + \ "drive" +10 / Hubic + \ "hubic" +11 / Local Disk + \ "local" +12 / Microsoft Azure Blob Storage + \ "azureblob" +13 / Microsoft OneDrive + \ "onedrive" +14 / Openstack Swift (Rackspace Cloud Files, Memset Memstore, OVH) + \ "swift" +15 / SSH/SFTP Connection + \ "sftp" +16 / Yandex Disk + \ "yandex" +17 / http Connection + \ "http" +Storage> azureblob +Storage Account Name +account> account_name +Storage Account Key +key> base64encodedkey== +Endpoint for the service - leave blank normally. +endpoint> +Remote config +-------------------- +[remote] +account = account_name +key = base64encodedkey== +endpoint = +-------------------- +y) Yes this is OK +e) Edit this remote +d) Delete this remote +y/e/d> y +``` + +See all containers + + rclone lsd remote: + +Make a new container + + rclone mkdir remote:container + +List the contents of a container + + rclone ls remote:container + +Sync `/home/local/directory` to the remote container, deleting any excess +files in the container. + + rclone sync /home/local/directory remote:container + +### --fast-list ### + +This remote supports `--fast-list` which allows you to use fewer +transactions in exchange for more memory. See the [rclone +docs](/docs/#fast-list) for more details. + +### Modified time ### + +The modified time is stored as metadata on the object with the `mtime` +key. It is stored using RFC3339 Format time with nanosecond +precision. The metadata is supplied during directory listings so +there is no overhead to using it. + +### Hashes ### + +MD5 hashes are stored with small blobs. However blobs that were +uploaded in chunks don't have MD5 hashes. + +### Multipart uploads ### + +Rclone supports multipart uploads with Azure Blob storage. Files +bigger than 256MB will be uploaded using chunked upload by default. + +The files will be uploaded in parallel in 4MB chunks (by default). +Note that these chunks are buffered in memory and there may be up to +`--transfers` of them being uploaded at once. + +Files can't be split into more than 50,000 chunks so by default, so +the largest file that can be uploaded with 4MB chunk size is 195GB. +Above this rclone will double the chunk size until it creates less +than 50,000 chunks. By default this will mean a maximum file size of +3.2TB can be uploaded. This can be raised to 5TB using +`--azureblob-chunk-size 100M`. + +Note that rclone doesn't commit the block list until the end of the +upload which means that there is a limit of 9.5TB of multipart uploads +in progress as Azure won't allow more than that amount of uncommitted +blocks. + +### Specific options ### + +Here are the command line options specific to this cloud storage +system. + +#### --azureblob-upload-cutoff=SIZE #### + +Cutoff for switching to chunked upload - must be <= 256MB. The default +is 256MB. + +#### --azureblob-chunk-size=SIZE #### + +Upload chunk size. Default 4MB. Note that this is stored in memory +and there may be up to `--transfers` chunks stored at once in memory. +This can be at most 100MB. + +### Limitations ### + +MD5 sums are only uploaded with chunked files if the source has an MD5 +sum. This will always be the case for a local to azure copy. diff --git a/docs/content/docs.md b/docs/content/docs.md index 780db1ce6..5d998cf58 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -30,6 +30,7 @@ See the following for detailed instructions for * [Google Drive](/drive/) * [HTTP](/http/) * [Hubic](/hubic/) + * [Microsoft Azure Blob Storage](/azureblob/) * [Microsoft OneDrive](/onedrive/) * [Openstack Swift / Rackspace Cloudfiles / Memset Memstore](/swift/) * [QingStor](/qingstor/) diff --git a/docs/content/overview.md b/docs/content/overview.md index 76589cc4d..3aafb40fe 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -15,24 +15,25 @@ show through. Here is an overview of the major features of each cloud storage system. -| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | -| ---------------------- |:-------:|:-------:|:----------------:|:---------------:|:---------:| -| Amazon Drive | MD5 | No | Yes | No | R | -| Amazon S3 | MD5 | Yes | No | No | R/W | -| Backblaze B2 | SHA1 | Yes | No | No | R/W | -| Box | SHA1 | Yes | Yes | No | - | -| Dropbox | DBHASH †| Yes | Yes | No | - | -| FTP | - | No | No | No | - | -| Google Cloud Storage | MD5 | Yes | No | No | R/W | -| Google Drive | MD5 | Yes | No | Yes | R/W | -| HTTP | - | No | No | No | R | -| Hubic | MD5 | Yes | No | No | R/W | -| Microsoft OneDrive | SHA1 | Yes | Yes | No | R | -| Openstack Swift | MD5 | Yes | No | No | R/W | -| QingStor | - | No | No | No | R/W | -| SFTP | - | Yes | Depends | No | - | -| Yandex Disk | MD5 | Yes | No | No | R/W | -| The local filesystem | All | Yes | Depends | No | - | +| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | +| ---------------------------- |:-------:|:-------:|:----------------:|:---------------:|:---------:| +| Amazon Drive | MD5 | No | Yes | No | R | +| Amazon S3 | MD5 | Yes | No | No | R/W | +| Backblaze B2 | SHA1 | Yes | No | No | R/W | +| Box | SHA1 | Yes | Yes | No | - | +| Dropbox | DBHASH †| Yes | Yes | No | - | +| FTP | - | No | No | No | - | +| Google Cloud Storage | MD5 | Yes | No | No | R/W | +| Google Drive | MD5 | Yes | No | Yes | R/W | +| HTTP | - | No | No | No | R | +| Hubic | MD5 | Yes | No | No | R/W | +| Microsoft Azure Blob Storage | MD5 | Yes | No | No | R/W | +| Microsoft OneDrive | SHA1 | Yes | Yes | No | R | +| Openstack Swift | MD5 | Yes | No | No | R/W | +| QingStor | - | No | No | No | R/W | +| SFTP | - | Yes | Depends | No | - | +| Yandex Disk | MD5 | Yes | No | No | R/W | +| The local filesystem | All | Yes | Depends | No | - | ### Hash ### @@ -111,24 +112,25 @@ All the remotes support a basic set of features, but there are some optional features supported by some remotes used to make some operations more efficient. -| Name | Purge | Copy | Move | DirMove | CleanUp | ListR | -| ---------------------- |:-----:|:----:|:----:|:-------:|:-------:|:-----:| -| Amazon Drive | Yes | No | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | -| Amazon S3 | No | Yes | No | No | No | Yes | -| Backblaze B2 | No | No | No | No | Yes | Yes | -| Box | Yes | Yes | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | -| Dropbox | Yes | Yes | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | -| FTP | No | No | Yes | Yes | No | No | -| Google Cloud Storage | Yes | Yes | No | No | No | Yes | -| Google Drive | Yes | Yes | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | -| HTTP | No | No | No | No | No | No | -| Hubic | Yes † | Yes | No | No | No | Yes | -| Microsoft OneDrive | Yes | Yes | Yes | No [#197](https://github.com/ncw/rclone/issues/197) | No [#575](https://github.com/ncw/rclone/issues/575) | No | -| Openstack Swift | Yes † | Yes | No | No | No | Yes | -| QingStor | No | Yes | No | No | No | Yes | -| SFTP | No | No | Yes | Yes | No | No | -| Yandex Disk | Yes | No | No | No | No [#575](https://github.com/ncw/rclone/issues/575) | Yes | -| The local filesystem | Yes | No | Yes | Yes | No | No | +| Name | Purge | Copy | Move | DirMove | CleanUp | ListR | +| ---------------------------- |:-----:|:----:|:----:|:-------:|:-------:|:-----:| +| Amazon Drive | Yes | No | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | +| Amazon S3 | No | Yes | No | No | No | Yes | +| Backblaze B2 | No | No | No | No | Yes | Yes | +| Box | Yes | Yes | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | +| Dropbox | Yes | Yes | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | +| FTP | No | No | Yes | Yes | No | No | +| Google Cloud Storage | Yes | Yes | No | No | No | Yes | +| Google Drive | Yes | Yes | Yes | Yes | No [#575](https://github.com/ncw/rclone/issues/575) | No | +| HTTP | No | No | No | No | No | No | +| Hubic | Yes † | Yes | No | No | No | Yes | +| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | +| Microsoft OneDrive | Yes | Yes | Yes | No [#197](https://github.com/ncw/rclone/issues/197) | No [#575](https://github.com/ncw/rclone/issues/575) | No | +| Openstack Swift | Yes † | Yes | No | No | No | Yes | +| QingStor | No | Yes | No | No | No | Yes | +| SFTP | No | No | Yes | Yes | No | No | +| Yandex Disk | Yes | No | No | No | No [#575](https://github.com/ncw/rclone/issues/575) | Yes | +| The local filesystem | Yes | No | Yes | Yes | No | No | ### Purge ### diff --git a/docs/layouts/chrome/navbar.html b/docs/layouts/chrome/navbar.html index 68d97d2b2..572d06943 100644 --- a/docs/layouts/chrome/navbar.html +++ b/docs/layouts/chrome/navbar.html @@ -60,6 +60,7 @@
  • Google Drive
  • HTTP
  • Hubic
  • +
  • Microsoft Azure Blob Storage
  • Microsoft OneDrive
  • QingStor
  • Openstack Swift
  • diff --git a/fs/all/all.go b/fs/all/all.go index 08018827b..51921b7b5 100644 --- a/fs/all/all.go +++ b/fs/all/all.go @@ -3,7 +3,7 @@ package all import ( // Active file systems _ "github.com/ncw/rclone/amazonclouddrive" - _ "github.com/ncw/rclone/azure" + _ "github.com/ncw/rclone/azureblob" _ "github.com/ncw/rclone/b2" _ "github.com/ncw/rclone/box" _ "github.com/ncw/rclone/crypt" diff --git a/fs/fs.go b/fs/fs.go index 42a8f6b11..c3500edc0 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -48,6 +48,7 @@ var ( ErrorNotAFile = errors.New("is a not a regular file") ErrorNotDeleting = errors.New("not deleting files as there were IO errors") ErrorCantMoveOverlapping = errors.New("can't move files on overlapping remotes") + ErrorDirectoryNotEmpty = errors.New("directory not empty") ) // RegInfo provides information about a filesystem diff --git a/fs/test_all.go b/fs/test_all.go index ae13c9ee4..409fa175d 100644 --- a/fs/test_all.go +++ b/fs/test_all.go @@ -108,6 +108,11 @@ var ( SubDir: false, FastList: false, }, + { + Name: "TestAzureBlob:", + SubDir: true, + FastList: true, + }, } binary = "fs.test" // Flags diff --git a/fstest/fstests/gen_tests.go b/fstest/fstests/gen_tests.go index 59fc48ba5..4aa3565f0 100644 --- a/fstest/fstests/gen_tests.go +++ b/fstest/fstests/gen_tests.go @@ -163,5 +163,6 @@ func main() { generateTestProgram(t, fns, "FTP") generateTestProgram(t, fns, "Box") generateTestProgram(t, fns, "QingStor", buildConstraint("!plan9")) + generateTestProgram(t, fns, "AzureBlob", buildConstraint("go1.7")) log.Printf("Done") }