Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[statspro] Bootstrap database statistics once on startup #8036

Merged
merged 12 commits into from
Jun 24, 2024
2 changes: 2 additions & 0 deletions go/libraries/doltcore/env/actions/branch.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ func RenameBranch(ctx context.Context, dbData env.DbData, oldBranch, newBranch s
}
}

// todo: update default branch variable

return DeleteBranch(ctx, dbData, oldBranch, DeleteOptions{Force: true, AllowDeletingCurrentBranch: true}, remoteDbPro, rsc)
}

Expand Down
1 change: 1 addition & 0 deletions go/libraries/doltcore/sqle/dsess/variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const (
DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs"

DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled"
DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled"
DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold"
DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval"
DoltStatsMemoryOnly = "dolt_stats_memory_only"
Expand Down
1 change: 1 addition & 0 deletions go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ func TestQueryPlans(t *testing.T) {
}

defer harness.Close()
sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0)
enginetest.TestQueryPlans(t, harness, queries.PlanTests)
}

Expand Down
5 changes: 2 additions & 3 deletions go/libraries/doltcore/sqle/statsnoms/iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/planbuilder"
"github.com/dolthub/go-mysql-server/sql/stats"
"gopkg.in/errgo.v2/errors"

"github.com/dolthub/dolt/go/libraries/doltcore/schema"
Expand Down Expand Up @@ -114,15 +113,15 @@ func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) {
upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64)
createdAt := row[schema.StatsCreatedAtTag].(time.Time)

typs := strings.Split(typesStr, ",")
typs := strings.Split(typesStr, "\n")
for i, t := range typs {
typs[i] = strings.TrimSpace(t)
}

qual := sql.NewStatQualifier(dbName, tableName, indexName)
if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) {
s.currentQual = curQual
s.currentTypes, err = stats.ParseTypeStrings(typs)
s.currentTypes, err = parseTypeStrings(typs)
if err != nil {
return nil, err
}
Expand Down
21 changes: 17 additions & 4 deletions go/libraries/doltcore/sqle/statsnoms/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/planbuilder"
"github.com/dolthub/go-mysql-server/sql/stats"

"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
Expand Down Expand Up @@ -68,7 +69,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64)
createdAt := row[schema.StatsCreatedAtTag].(time.Time)

typs := strings.Split(typesStr, ",")
typs := strings.Split(typesStr, "\n")
for i, t := range typs {
typs[i] = strings.TrimSpace(t)
}
Expand All @@ -87,7 +88,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St

mcvs := make([]sql.Row, numMcvs)
for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
if v != nil {
if v != nil && v != "" {
row, err := iter.ParseRow(v.(string))
if err != nil {
return nil, err
Expand Down Expand Up @@ -133,7 +134,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
}

if currentStat.Statistic.Hist == nil {
currentStat.Statistic.Typs, err = stats.ParseTypeStrings(typs)
currentStat.Statistic.Typs, err = parseTypeStrings(typs)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -177,6 +178,18 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St
return qualToStats, nil
}

func parseTypeStrings(typs []string) ([]sql.Type, error) {
var ret []sql.Type
for _, typ := range typs {
ct, err := planbuilder.ParseColumnTypeString(typ)
if err != nil {
return nil, err
}
ret = append(ret, ct)
}
return ret, nil
}

func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) {
dSess := dsess.DSessFromSess(ctx.Session)
roots, ok := dSess.GetRoots(ctx, qual.Db())
Expand Down Expand Up @@ -213,7 +226,7 @@ func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) {
}

firstKey := keyBuilder.Build(buffPool)
var firstRow sql.Row
firstRow := make(sql.Row, keyBuilder.Desc.Count())
for i := 0; i < keyBuilder.Desc.Count(); i++ {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion go/libraries/doltcore/sqle/statsnoms/write.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat
sep := ""
for _, t := range dStats.Statistic.Typs {
typesB.WriteString(sep + t.String())
sep = ","
sep = "\n"
}
typesStr := typesB.String()

Expand Down
48 changes: 47 additions & 1 deletion go/libraries/doltcore/sqle/statspro/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,40 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin
if err != nil {
return err
}
return p.RefreshTableStatsWithBranch(ctx, table, db, branch)
}

func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error {
dSess := dsess.DSessFromSess(ctx.Session)
branches := p.getStatsBranches(ctx)
for _, branch := range branches {
sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch))
if err != nil {
if sql.ErrDatabaseNotFound.Is(err) {
// default branch is not valid
continue
}
return err
}
tables, err := sqlDb.GetTableNames(ctx)
if err != nil {
return err
}
for _, table := range tables {
sqlTable, _, err := GetLatestTable(ctx, table, sqlDb)
if err != nil {
return err
}
if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil {
return err
}
}
}
return nil
}

func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error {
dSess := dsess.DSessFromSess(ctx.Session)

sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch))
if err != nil {
Expand Down Expand Up @@ -134,6 +168,9 @@ func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db strin
// branchQualifiedDatabase returns a branch qualified database. If the database
// is already branch suffixed no duplication is applied.
func (p *Provider) branchQualifiedDatabase(db, branch string) string {
if branch == "" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably better to do this at the call site

return db
}
suffix := fmt.Sprintf("/%s", branch)
if !strings.HasSuffix(db, suffix) {
return fmt.Sprintf("%s%s", db, suffix)
Expand All @@ -143,7 +180,16 @@ func (p *Provider) branchQualifiedDatabase(db, branch string) string {

// GetLatestTable will get the WORKING root table for the current database/branch
func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) {
sqlTable, ok, err := sqlDb.(sqle.Database).GetTableInsensitive(ctx, tableName)
var db sqle.Database
switch d := sqlDb.(type) {
case sqle.Database:
db = d
case sqle.ReadReplicaDatabase:
db = d.Database
default:
return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
}
sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
if err != nil {
return nil, nil, err
}
Expand Down
10 changes: 8 additions & 2 deletions go/libraries/doltcore/sqle/statspro/configure.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co
branches := p.getStatsBranches(loadCtx)

var autoEnabled bool
var startupEnabled bool
var intervalSec time.Duration
var thresholdf64 float64
if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) {
Expand All @@ -55,6 +56,8 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co

p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
p.pro.DropDatabaseHooks = append(p.pro.DropDatabaseHooks, NewStatsDropDatabaseHook(p))
} else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) {
startupEnabled = true
}

eg, ctx := loadCtx.NewErrgroup()
Expand All @@ -69,7 +72,6 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co
} else {
err = fmt.Errorf("%w: %v", ErrFailedToLoad, r)
}

return
}
}()
Expand All @@ -84,6 +86,10 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co
}
if autoEnabled {
return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches)
} else if startupEnabled {
if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil {
return err
}
}
return nil
})
Expand All @@ -109,7 +115,7 @@ func (p *Provider) getStatsBranches(ctx *sql.Context) []string {
}

if branches == nil {
branches = []string{p.pro.DefaultBranch()}
branches = append(branches, p.pro.DefaultBranch())
}
return branches
}
Expand Down
7 changes: 7 additions & 0 deletions go/libraries/doltcore/sqle/system_variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,13 @@ func AddDoltSystemVariables() {
Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
Default: int8(0),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBootstrapEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
Default: int8(1),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsMemoryOnly,
Dynamic: true,
Expand Down
22 changes: 15 additions & 7 deletions integration-tests/bats/stats.bats
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ teardown() {
@test "stats: empty initial stats" {
cd repo2

# disable bootstrap, can only make stats with ANALYZE or background thread
dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"

dolt sql -q "insert into xy values (0,0), (1,1)"

start_sql_server
Expand Down Expand Up @@ -88,6 +91,16 @@ teardown() {
[ "${lines[1]}" = "8" ]
}

@test "stats: bootrap on engine startup" {
cd repo2

dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;"
dolt sql -q "insert into xy values (0,0), (1,1)"
run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "2" ]
}

@test "stats: deletes refresh" {
cd repo2

Expand Down Expand Up @@ -239,20 +252,15 @@ teardown() {

@test "stats: multi db" {
cd repo1

dolt sql -q "insert into ab values (0,0), (1,1)"

cd ../repo2

dolt sql -q "insert into ab values (0,0), (1,1)"
dolt sql -q "insert into xy values (0,0), (1,1)"

cd ..
start_sql_server
sleep 1
stop_sql_server

run dolt sql -r csv -q "select count(*) from dolt_statistics"
[ "$status" -eq 0 ]
[ "${lines[1]}" = "0" ]

dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;"
dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = 0.5"
Expand Down
Loading