From 9477b026ccd2a3b5fa11d8b4aa49ecaf3f3caf84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Sat, 7 Oct 2023 21:58:09 +0100 Subject: [PATCH] interp,expand: add support for fs.DirEntry That is, all of our APIs mirroring the signature of ioutil.ReadDir now gain a "2" sibling with whose signature mirrors os.ReadDir. The original APIs are deprecated, and where both can be set together, the new API takes precedence over the old one. This should lead to minor overhead reductions by default, as well as for any users of the old APIs who switch to the new ones. See the godoc explaining why ioutil.ReadDir, as well as related APIs like filepath.Walk, are deprecated. --- expand/expand.go | 51 +++++++++++++++++++++++++++++++++-------------- interp/api.go | 25 +++++++++++++++++++++-- interp/handler.go | 10 ++++++++++ interp/runner.go | 4 ++-- 4 files changed, 71 insertions(+), 19 deletions(-) diff --git a/expand/expand.go b/expand/expand.go index d3061f56..bef99a5c 100644 --- a/expand/expand.go +++ b/expand/expand.go @@ -52,14 +52,18 @@ type Config struct { // this field might change until #451 is completely fixed. ProcSubst func(*syntax.ProcSubst) (string, error) - // TODO(v4): update to os.Readdir with fs.DirEntry. - // We could possibly expose that as a preferred ReadDir2 before then, - // to allow users to opt into better performance in v3. + // TODO(v4): replace ReadDir with ReadDir2. - // ReadDir is used for file path globbing. If nil, globbing is disabled. - // Use ioutil.ReadDir to use the filesystem directly. + // ReadDir is the older form of [ReadDir2], before io/fs. + // + // Deprecated: use ReadDir2 instead. ReadDir func(string) ([]fs.FileInfo, error) + // ReadDir is used for file path globbing. + // If nil, and ReadDir is nil as well, globbing is disabled. + // Use os.ReadDir to use the filesystem directly. + ReadDir2 func(string) ([]fs.DirEntry, error) + // GlobStar corresponds to the shell option that allows globbing with // "**". GlobStar bool @@ -94,6 +98,9 @@ func (u UnexpectedCommandError) Error() string { var zeroConfig = &Config{} +// TODO: note that prepareConfig is modifying the user's config in place, +// which doesn't feel right - we should make a copy. + func prepareConfig(cfg *Config) *Config { if cfg == nil { cfg = zeroConfig @@ -106,6 +113,20 @@ func prepareConfig(cfg *Config) *Config { if vr := cfg.Env.Get("IFS"); vr.IsSet() { cfg.ifs = vr.String() } + + if cfg.ReadDir != nil && cfg.ReadDir2 == nil { + cfg.ReadDir2 = func(path string) ([]fs.DirEntry, error) { + infos, err := cfg.ReadDir(path) + if err != nil { + return nil, err + } + entries := make([]fs.DirEntry, len(infos)) + for i, info := range infos { + entries[i] = fs.FileInfoToDirEntry(info) + } + return entries, nil + } + } return cfg } @@ -441,7 +462,7 @@ func Fields(cfg *Config, words ...*syntax.Word) ([]string, error) { path, doGlob := cfg.escapedGlobField(field) var matches []string var syntaxError *pattern.SyntaxError - if doGlob && cfg.ReadDir != nil { + if doGlob && cfg.ReadDir2 != nil { matches, err = cfg.glob(dir, path) if !errors.As(err, &syntaxError) { if err != nil { @@ -839,11 +860,11 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { // TODO: as an optimization, we could do chunks of the path all at once, // like doing a single stat for "/foo/bar" in "/foo/bar/*". - // TODO: Another optimization would be to reduce the number of ReadDir calls. + // TODO: Another optimization would be to reduce the number of ReadDir2 calls. // For example, /foo/* can end up doing one duplicate call: // - // ReadDir("/foo") to ensure that "/foo/" exists and only matches a directory - // ReadDir("/foo") glob "*" + // ReadDir2("/foo") to ensure that "/foo/" exists and only matches a directory + // ReadDir2("/foo") glob "*" for i, part := range parts { // Keep around for debugging. @@ -864,12 +885,12 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { match = filepath.Join(base, match) } match = pathJoin2(match, part) - // We can't use ReadDir on the parent and match the directory + // We can't use ReadDir2 on the parent and match the directory // entry by name, because short paths on Windows break that. - // Our only option is to ReadDir on the directory entry itself, + // Our only option is to ReadDir2 on the directory entry itself, // which can be wasteful if we only want to see if it exists, // but at least it's correct in all scenarios. - if _, err := cfg.ReadDir(match); err != nil { + if _, err := cfg.ReadDir2(match); err != nil { const errPathNotFound = syscall.Errno(3) // from syscall/types_windows.go, to avoid a build tag var pathErr *os.PathError if runtime.GOOS == "windows" && errors.As(err, &pathErr) && pathErr.Err == errPathNotFound { @@ -945,7 +966,7 @@ func (cfg *Config) globDir(base, dir string, rx *regexp.Regexp, matchHidden bool if !filepath.IsAbs(dir) { fullDir = filepath.Join(base, dir) } - infos, err := cfg.ReadDir(fullDir) + infos, err := cfg.ReadDir2(fullDir) if err != nil { // We still want to return matches, for the sake of reusing slices. return matches, err @@ -954,13 +975,13 @@ func (cfg *Config) globDir(base, dir string, rx *regexp.Regexp, matchHidden bool name := info.Name() if !wantDir { // No filtering. - } else if mode := info.Mode(); mode&os.ModeSymlink != 0 { + } else if mode := info.Type(); mode&os.ModeSymlink != 0 { // We need to know if the symlink points to a directory. // This requires an extra syscall, as ReadDir on the parent directory // does not follow symlinks for each of the directory entries. // ReadDir is somewhat wasteful here, as we only want its error result, // but we could try to reuse its result as per the TODO in Config.glob. - if _, err := cfg.ReadDir(filepath.Join(fullDir, info.Name())); err != nil { + if _, err := cfg.ReadDir2(filepath.Join(fullDir, info.Name())); err != nil { continue } } else if !mode.IsDir() { diff --git a/interp/api.go b/interp/api.go index 27067960..9492a781 100644 --- a/interp/api.go +++ b/interp/api.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "io/fs" "math/rand" "os" "path/filepath" @@ -83,7 +84,7 @@ type Runner struct { // readDirHandler is a function responsible for reading directories during // glob expansion. It must be non-nil. - readDirHandler ReadDirHandlerFunc + readDirHandler ReadDirHandlerFunc2 // statHandler is a function responsible for getting file stat. It must be non-nil. statHandler StatHandlerFunc @@ -186,7 +187,7 @@ func New(opts ...RunnerOption) (*Runner, error) { r := &Runner{ usedNew: true, openHandler: DefaultOpenHandler(), - readDirHandler: DefaultReadDirHandler(), + readDirHandler: DefaultReadDirHandler2(), statHandler: DefaultStatHandler(), } r.dirStack = r.dirBootstrap[:0] @@ -383,7 +384,27 @@ func OpenHandler(f OpenHandlerFunc) RunnerOption { } // ReadDirHandler sets the read directory handler. See [ReadDirHandlerFunc] for more info. +// +// Deprecated: use [ReadDirHandler2]. func ReadDirHandler(f ReadDirHandlerFunc) RunnerOption { + return func(r *Runner) error { + r.readDirHandler = func(ctx context.Context, path string) ([]fs.DirEntry, error) { + infos, err := f(ctx, path) + if err != nil { + return nil, err + } + entries := make([]fs.DirEntry, len(infos)) + for i, info := range infos { + entries[i] = fs.FileInfoToDirEntry(info) + } + return entries, nil + } + return nil + } +} + +// ReadDirHandler2 sets the read directory handler. See [ReadDirHandlerFunc2] for more info. +func ReadDirHandler2(f ReadDirHandlerFunc2) RunnerOption { return func(r *Runner) error { r.readDirHandler = f return nil diff --git a/interp/handler.go b/interp/handler.go index 25ebc6c8..226d70f5 100644 --- a/interp/handler.go +++ b/interp/handler.go @@ -315,6 +315,8 @@ func DefaultOpenHandler() OpenHandlerFunc { // TODO(v4): if this is kept in v4, it most likely needs to use [io/fs.DirEntry] for efficiency type ReadDirHandlerFunc func(ctx context.Context, path string) ([]fs.FileInfo, error) +type ReadDirHandlerFunc2 func(ctx context.Context, path string) ([]fs.DirEntry, error) + // DefaultReadDirHandler returns the [ReadDirHandlerFunc] used by default. // It makes use of [ioutil.ReadDir]. func DefaultReadDirHandler() ReadDirHandlerFunc { @@ -323,6 +325,14 @@ func DefaultReadDirHandler() ReadDirHandlerFunc { } } +// DefaultReadDirHandler2 returns the [ReadDirHandlerFunc2] used by default. +// It uses [os.ReadDir]. +func DefaultReadDirHandler2() ReadDirHandlerFunc2 { + return func(ctx context.Context, path string) ([]fs.DirEntry, error) { + return os.ReadDir(path) + } +} + // StatHandlerFunc is a handler which gets a file's information. type StatHandlerFunc func(ctx context.Context, name string, followSymlinks bool) (fs.FileInfo, error) diff --git a/interp/runner.go b/interp/runner.go index baf3bada..79bf0fd4 100644 --- a/interp/runner.go +++ b/interp/runner.go @@ -156,9 +156,9 @@ func catShortcutArg(stmt *syntax.Stmt) *syntax.Word { func (r *Runner) updateExpandOpts() { if r.opts[optNoGlob] { - r.ecfg.ReadDir = nil + r.ecfg.ReadDir2 = nil } else { - r.ecfg.ReadDir = func(s string) ([]fs.FileInfo, error) { + r.ecfg.ReadDir2 = func(s string) ([]fs.DirEntry, error) { return r.readDirHandler(r.handlerCtx(context.Background()), s) } }