diff --git a/changelog/unreleased/issue-3697 b/changelog/unreleased/issue-3697 new file mode 100644 index 00000000000..514f9d70864 --- /dev/null +++ b/changelog/unreleased/issue-3697 @@ -0,0 +1,12 @@ +Enhancement: Allow excluding online-only cloud files (e.g. OneDrive) + +Restic treated OneDrive Files On-Demand as though they were regular files +for the purpose of backup which caused issues with VSS, could make backup +incredibly slow (as OneDrive attempted to download files), or could fill +the source disk (e.g. 1TB of files in OneDrive on a 500GB disk). +Restic now allows the user to exclude these files when backing up with +the `--exclude-cloud-files` switch. + +https://github.com/restic/restic/issues/3697 +https://github.com/restic/restic/issues/4935 +https://github.com/restic/restic/pull/4990 \ No newline at end of file diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index b7eed13184c..42908557ed9 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -77,6 +77,7 @@ type BackupOptions struct { ExcludeIfPresent []string ExcludeCaches bool ExcludeLargerThan string + ExcludeCloudFiles bool Stdin bool StdinFilename string StdinCommand bool @@ -140,6 +141,7 @@ func init() { f.BoolVar(&backupOptions.NoScan, "no-scan", false, "do not run scanner to estimate size of backup") if runtime.GOOS == "windows" { f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)") + f.BoolVar(&backupOptions.ExcludeCloudFiles, "exclude-cloud-files", false, "excludes online-only cloud files (such as OneDrive Files On-Demand)") } f.BoolVar(&backupOptions.SkipIfUnchanged, "skip-if-unchanged", false, "skip snapshot creation if identical to parent snapshot") @@ -347,6 +349,17 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs [ funcs = append(funcs, f) } + if opts.ExcludeCloudFiles && !opts.Stdin && !opts.StdinCommand { + if runtime.GOOS != "windows" { + return nil, errors.Fatalf("exclude-cloud-files is only supported on Windows") + } + f, err := archiver.RejectCloudFiles(Warnf) + if err != nil { + return nil, err + } + funcs = append(funcs, f) + } + if opts.ExcludeCaches { opts.ExcludeIfPresent = append(opts.ExcludeIfPresent, "CACHEDIR.TAG:Signature: 8a477f597d28d172789f06886806bc55") } diff --git a/doc/040_backup.rst b/doc/040_backup.rst index 696b235cce6..a30d80402be 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -297,7 +297,8 @@ the exclude options are: - ``--exclude-file`` Specified one or more times to exclude items listed in a given file - ``--iexclude-file`` Same as ``exclude-file`` but ignores cases like in ``--iexclude`` - ``--exclude-if-present foo`` Specified one or more times to exclude a folder's content if it contains a file called ``foo`` (optionally having a given header, no wildcards for the file name supported) -- ``--exclude-larger-than size`` Specified once to excludes files larger than the given size +- ``--exclude-larger-than size`` Specified once to exclude files larger than the given size +- ``--exclude-cloud-files`` Specified once to exclude online-only cloud files (such as OneDrive Files On-Demand), currently only supported on Windows Please see ``restic help backup`` for more specific information about each exclude option. diff --git a/internal/archiver/exclude.go b/internal/archiver/exclude.go index 6db62aa2025..c7dff0acb1a 100644 --- a/internal/archiver/exclude.go +++ b/internal/archiver/exclude.go @@ -316,3 +316,21 @@ func RejectBySize(maxSize int64) (RejectFunc, error) { return false }, nil } + +// RejectCloudFiles returns a func which rejects files which are online-only cloud files +func RejectCloudFiles(warnf func(msg string, args ...interface{})) (RejectFunc, error) { + return func(item string, fi *fs.ExtendedFileInfo, _ fs.FS) bool { + recall, err := fi.RecallOnDataAccess() + if err != nil { + warnf("item %v: error checking online-only status: %v", item, err) + return false + } + + if recall { + debug.Log("rejecting online-only cloud file %s", item) + return true + } + + return false + }, nil +} diff --git a/internal/fs/stat_bsd.go b/internal/fs/stat_bsd.go index 16506415306..95238be777c 100644 --- a/internal/fs/stat_bsd.go +++ b/internal/fs/stat_bsd.go @@ -32,3 +32,8 @@ func extendedStat(fi os.FileInfo) *ExtendedFileInfo { ChangeTime: time.Unix(s.Ctimespec.Unix()), } } + +// RecallOnDataAccess checks windows-specific attributes to determine if a file is a cloud-only placeholder. +func (*ExtendedFileInfo) RecallOnDataAccess() (bool, error) { + return false, nil +} diff --git a/internal/fs/stat_unix.go b/internal/fs/stat_unix.go index 723ac8b1978..70124658f44 100644 --- a/internal/fs/stat_unix.go +++ b/internal/fs/stat_unix.go @@ -32,3 +32,8 @@ func extendedStat(fi os.FileInfo) *ExtendedFileInfo { ChangeTime: time.Unix(s.Ctim.Unix()), } } + +// RecallOnDataAccess checks windows-specific attributes to determine if a file is a cloud-only placeholder. +func (*ExtendedFileInfo) RecallOnDataAccess() (bool, error) { + return false, nil +} diff --git a/internal/fs/stat_windows.go b/internal/fs/stat_windows.go index a2dfa5f6d5d..a62ddf87fd5 100644 --- a/internal/fs/stat_windows.go +++ b/internal/fs/stat_windows.go @@ -8,6 +8,8 @@ import ( "os" "syscall" "time" + + "golang.org/x/sys/windows" ) // extendedStat extracts info into an ExtendedFileInfo for Windows. @@ -36,3 +38,20 @@ func extendedStat(fi os.FileInfo) *ExtendedFileInfo { return &extFI } + +// RecallOnDataAccess checks if a file is available locally on the disk or if the file is +// just a placeholder which must be downloaded from a remote server. This is typically used +// in cloud syncing services (e.g. OneDrive) to prevent downloading files from cloud storage +// until they are accessed. +func (fi *ExtendedFileInfo) RecallOnDataAccess() (bool, error) { + attrs, ok := fi.sys.(*syscall.Win32FileAttributeData) + if !ok { + return false, fmt.Errorf("could not determine file attributes: %s", fi.Name) + } + + if attrs.FileAttributes&windows.FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS > 0 { + return true, nil + } + + return false, nil +} diff --git a/internal/fs/stat_windows_test.go b/internal/fs/stat_windows_test.go new file mode 100644 index 00000000000..4f258d836c1 --- /dev/null +++ b/internal/fs/stat_windows_test.go @@ -0,0 +1,80 @@ +package fs_test + +import ( + iofs "io/fs" + "os" + "path/filepath" + "syscall" + "testing" + "time" + + "github.com/restic/restic/internal/fs" + rtest "github.com/restic/restic/internal/test" + "golang.org/x/sys/windows" +) + +func TestRecallOnDataAccessRealFile(t *testing.T) { + // create a temp file for testing + tempdir := rtest.TempDir(t) + filename := filepath.Join(tempdir, "regular-file") + err := os.WriteFile(filename, []byte("foobar"), 0640) + rtest.OK(t, err) + + fi, err := os.Stat(filename) + rtest.OK(t, err) + + xs := fs.ExtendedStat(fi) + + // ensure we can check attrs without error + recall, err := xs.RecallOnDataAccess() + rtest.Assert(t, err == nil, "err should be nil", err) + rtest.Assert(t, recall == false, "RecallOnDataAccess should be false") +} + +// mockFileInfo implements os.FileInfo for mocking file attributes +type mockFileInfo struct { + FileAttributes uint32 +} + +func (m mockFileInfo) IsDir() bool { + return false +} +func (m mockFileInfo) ModTime() time.Time { + return time.Now() +} +func (m mockFileInfo) Mode() iofs.FileMode { + return 0 +} +func (m mockFileInfo) Name() string { + return "test" +} +func (m mockFileInfo) Size() int64 { + return 0 +} +func (m mockFileInfo) Sys() any { + return &syscall.Win32FileAttributeData{ + FileAttributes: m.FileAttributes, + } +} + +func TestRecallOnDataAccessMockCloudFile(t *testing.T) { + fi := mockFileInfo{ + FileAttributes: windows.FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS, + } + xs := fs.ExtendedStat(fi) + + recall, err := xs.RecallOnDataAccess() + rtest.Assert(t, err == nil, "err should be nil", err) + rtest.Assert(t, recall, "RecallOnDataAccess should be true") +} + +func TestRecallOnDataAccessMockRegularFile(t *testing.T) { + fi := mockFileInfo{ + FileAttributes: windows.FILE_ATTRIBUTE_ARCHIVE, + } + xs := fs.ExtendedStat(fi) + + recall, err := xs.RecallOnDataAccess() + rtest.Assert(t, err == nil, "err should be nil", err) + rtest.Assert(t, recall == false, "RecallOnDataAccess should be false") +}