Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up fetch when there are many tags which haven't changed #8770

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions go/libraries/doltcore/doltdb/doltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,56 @@ func (ddb *DoltDB) ResolveTag(ctx context.Context, tagRef ref.TagRef) (*Tag, err
return NewTag(ctx, tagRef.GetPath(), ds, ddb.vrw, ddb.ns)
}

// TagResolver is used to late load tag metadata resolution. There are situations where we need to list all the tags, but
// don't necessarily need to load their metadata. See GetTagResolvers
type TagResolver struct {
ddb *DoltDB
ref ref.TagRef
h hash.Hash
}

// Addr returns the hash of the object storing the Tag data. It is loaded and deserialize by the Resolve method.
func (tr *TagResolver) Addr() hash.Hash {
return tr.h
}

// Resolve resolves the tag reference to a *Tag, complete with its metadata.
func (tr *TagResolver) Resolve(ctx context.Context) (*Tag, error) {
return tr.ddb.ResolveTag(ctx, tr.ref)
}

// GetTagResolvers takes a slice of TagRefs and returns the corresponding Tag objects.
func (ddb *DoltDB) GetTagResolvers(ctx context.Context, tagRefs []ref.DoltRef) ([]TagResolver, error) {
datasets, err := ddb.db.Datasets(ctx)
if err != nil {
return nil, err
}

tagMap := make(map[string]ref.TagRef)
for _, tagRef := range tagRefs {
if tr, ok := tagRef.(ref.TagRef); ok {
tagMap[tagRef.String()] = tr
} else {
panic(fmt.Sprintf("runtime error: expected TagRef, got %T", tagRef))
}
}

results := make([]TagResolver, 0, len(tagRefs))

err = datasets.IterAll(ctx, func(id string, addr hash.Hash) error {
if val, ok := tagMap[id]; ok {
tr := TagResolver{ddb: ddb, ref: val, h: addr}
results = append(results, tr)
}
return nil
})
if err != nil {
return nil, err
}

return results, nil
}

// ResolveWorkingSet takes a WorkingSetRef and returns the corresponding WorkingSet object.
func (ddb *DoltDB) ResolveWorkingSet(ctx context.Context, workingSetRef ref.WorkingSetRef) (*WorkingSet, error) {
ds, err := ddb.db.GetDataset(ctx, workingSetRef.String())
Expand Down
18 changes: 10 additions & 8 deletions go/libraries/doltcore/env/actions/remotes.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,8 @@ func Clone(ctx context.Context, srcDB, destDB *doltdb.DoltDB, eventCh chan<- pul
// been fetched into the destination DB.
// todo: potentially too expensive to iterate over all srcDB tags
func FetchFollowTags(ctx context.Context, tempTableDir string, srcDB, destDB *doltdb.DoltDB, progStarter ProgStarter, progStopper ProgStopper) error {
err := IterResolvedTags(ctx, srcDB, func(tag *doltdb.Tag) (stop bool, err error) {
tagHash, err := tag.GetAddr()
if err != nil {
return true, err
}
err := IterUnresolvedTags(ctx, srcDB, func(tag *doltdb.TagResolver) (stop bool, err error) {
tagHash := tag.Addr()

has, err := destDB.Has(ctx, tagHash)
if err != nil {
Expand All @@ -353,7 +350,12 @@ func FetchFollowTags(ctx context.Context, tempTableDir string, srcDB, destDB *do
return false, nil
}

cmHash, err := tag.Commit.HashOf()
t, err := tag.Resolve(ctx)
if err != nil {
return true, err
}

cmHash, err := t.Commit.HashOf()
if err != nil {
return true, err
}
Expand All @@ -378,7 +380,7 @@ func FetchFollowTags(ctx context.Context, tempTableDir string, srcDB, destDB *do

newCtx, cancelFunc := context.WithCancel(ctx)
wg, statsCh := progStarter(newCtx)
err = FetchTag(ctx, tempTableDir, srcDB, destDB, tag, statsCh)
err = FetchTag(ctx, tempTableDir, srcDB, destDB, t, statsCh)
progStopper(cancelFunc, wg, statsCh)
if err == nil {
cli.Println()
Expand All @@ -390,7 +392,7 @@ func FetchFollowTags(ctx context.Context, tempTableDir string, srcDB, destDB *do
return true, err
}

err = destDB.SetHead(ctx, tag.GetDoltRef(), tagHash)
err = destDB.SetHead(ctx, t.GetDoltRef(), tagHash)

return false, err
})
Expand Down
23 changes: 23 additions & 0 deletions go/libraries/doltcore/env/actions/tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,29 @@ func IterResolvedTags(ctx context.Context, ddb *doltdb.DoltDB, cb func(tag *dolt
break
}
}
return nil
}

// IterUnresolvedTags iterates over tags in dEnv.DoltDB, and calls cb() for each with an unresovled Tag.
func IterUnresolvedTags(ctx context.Context, ddb *doltdb.DoltDB, cb func(tag *doltdb.TagResolver) (stop bool, err error)) error {
tagRefs, err := ddb.GetTags(ctx)
if err != nil {
return err
}

tagResolvers, err := ddb.GetTagResolvers(ctx, tagRefs)
if err != nil {
return err
}

for _, tagResolver := range tagResolvers {
stop, err := cb(&tagResolver)
if err != nil {
return err
}
if stop {
break
}
}
return nil
}
Loading