From ffb0b7c6481ff63c927e697ec05afded284cd80f Mon Sep 17 00:00:00 2001 From: Christian Simon Date: Wed, 22 Feb 2023 10:07:38 +0000 Subject: [PATCH] WIP: Allow pure prefix listing This change allows to list buckets by pure prefix with the parameter `IterParams.WithoutAppendDirDelim`. This allows to list huge "directories" more efficently and in parallel. Together with lexographical ordering of ULID certain time stamps can be listed. Outstanding tasks: - [ ] Implement this for filesystem - [ ] Test coverage - [ ] Implement for other providers than azure/gcs/s3 Signed-off-by: Christian Simon --- objstore.go | 7 ++++++- providers/azure/azure.go | 7 +++++-- providers/gcs/gcs.go | 10 ++++++---- providers/s3/s3.go | 10 ++++++---- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/objstore.go b/objstore.go index cc6034d5..207fdb5d 100644 --- a/objstore.go +++ b/objstore.go @@ -107,9 +107,14 @@ func WithRecursiveIter(params *IterParams) { params.Recursive = true } +func WithoutApendingDirDelim(params *IterParams) { + params.WithoutAppendDirDelim = true +} + // IterParams holds the Iter() parameters and is used by objstore clients implementations. type IterParams struct { - Recursive bool + Recursive bool + WithoutAppendDirDelim bool } func ApplyIterOptions(options ...IterOption) IterParams { diff --git a/providers/azure/azure.go b/providers/azure/azure.go index 23e66169..b43ecb57 100644 --- a/providers/azure/azure.go +++ b/providers/azure/azure.go @@ -183,12 +183,15 @@ func NewBucketWithConfig(logger log.Logger, conf Config, component string) (*Buc // Iter calls f for each entry in the given directory. The argument to f is the full // object name including the prefix of the inspected directory. func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error { + params := objstore.ApplyIterOptions(options...) + + // Ensure the object name actually ends with a dir suffix, as long as this + // is not explicitly disabled by the WithoutAppendDirDelim. prefix := dir - if prefix != "" && !strings.HasSuffix(prefix, DirDelim) { + if prefix != "" && !strings.HasSuffix(prefix, DirDelim) && !params.WithoutAppendDirDelim { prefix += DirDelim } - params := objstore.ApplyIterOptions(options...) if params.Recursive { opt := &container.ListBlobsFlatOptions{Prefix: &prefix} pager := b.containerClient.NewListBlobsFlatPager(opt) diff --git a/providers/gcs/gcs.go b/providers/gcs/gcs.go index 947e641a..56a9196e 100644 --- a/providers/gcs/gcs.go +++ b/providers/gcs/gcs.go @@ -94,15 +94,17 @@ func (b *Bucket) Name() string { // Iter calls f for each entry in the given directory. The argument to f is the full // object name including the prefix of the inspected directory. func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error { - // Ensure the object name actually ends with a dir suffix. Otherwise we'll just iterate the - // object itself as one prefix item. - if dir != "" { + params := objstore.ApplyIterOptions(options...) + + // Ensure the object name actually ends with a dir suffix, as long as this + // is not explicitly disabled by the WithoutAppendDirDelim. + if dir != "" && !params.WithoutAppendDirDelim { dir = strings.TrimSuffix(dir, DirDelim) + DirDelim } // If recursive iteration is enabled we should pass an empty delimiter. delimiter := DirDelim - if objstore.ApplyIterOptions(options...).Recursive { + if params.Recursive { delimiter = "" } diff --git a/providers/s3/s3.go b/providers/s3/s3.go index 729ee7eb..3667e68a 100644 --- a/providers/s3/s3.go +++ b/providers/s3/s3.go @@ -385,15 +385,17 @@ func ValidateForTests(conf Config) error { // Iter calls f for each entry in the given directory. The argument to f is the full // object name including the prefix of the inspected directory. func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error { - // Ensure the object name actually ends with a dir suffix. Otherwise we'll just iterate the - // object itself as one prefix item. - if dir != "" { + params := objstore.ApplyIterOptions(options...) + + // Ensure the object name actually ends with a dir suffix, as long as this + // is not explicitly disabled by the WithoutAppendDirDelim. + if dir != "" && !params.WithoutAppendDirDelim { dir = strings.TrimSuffix(dir, DirDelim) + DirDelim } opts := minio.ListObjectsOptions{ Prefix: dir, - Recursive: objstore.ApplyIterOptions(options...).Recursive, + Recursive: params.Recursive, UseV1: b.listObjectsV1, }