diff --git a/go/libraries/doltcore/env/actions/branch.go b/go/libraries/doltcore/env/actions/branch.go index 52f461ff0e4..68897cb15a6 100644 --- a/go/libraries/doltcore/env/actions/branch.go +++ b/go/libraries/doltcore/env/actions/branch.go @@ -63,6 +63,8 @@ func RenameBranch(ctx context.Context, dbData env.DbData, oldBranch, newBranch s } } + // todo: update default branch variable + return DeleteBranch(ctx, dbData, oldBranch, DeleteOptions{Force: true, AllowDeletingCurrentBranch: true}, remoteDbPro, rsc) } diff --git a/go/libraries/doltcore/sqle/dsess/variables.go b/go/libraries/doltcore/sqle/dsess/variables.go index 74e6db00b1c..848ed2218ec 100644 --- a/go/libraries/doltcore/sqle/dsess/variables.go +++ b/go/libraries/doltcore/sqle/dsess/variables.go @@ -60,6 +60,7 @@ const ( DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs" DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled" + DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled" DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold" DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval" DoltStatsMemoryOnly = "dolt_stats_memory_only" diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go index 1d6cfeba360..b05a000115a 100755 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go @@ -266,6 +266,7 @@ func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) { } defer harness.Close() + sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0) enginetest.TestQueryPlans(t, harness, queries.PlanTests) } diff --git a/go/libraries/doltcore/sqle/statsnoms/iter.go b/go/libraries/doltcore/sqle/statsnoms/iter.go index 08074a3b124..8dc9b8161da 100644 --- a/go/libraries/doltcore/sqle/statsnoms/iter.go +++ b/go/libraries/doltcore/sqle/statsnoms/iter.go @@ -21,7 +21,6 @@ import ( "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/planbuilder" - "github.com/dolthub/go-mysql-server/sql/stats" "gopkg.in/errgo.v2/errors" "github.com/dolthub/dolt/go/libraries/doltcore/schema" @@ -114,7 +113,7 @@ func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) { upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64) createdAt := row[schema.StatsCreatedAtTag].(time.Time) - typs := strings.Split(typesStr, ",") + typs := strings.Split(typesStr, "\n") for i, t := range typs { typs[i] = strings.TrimSpace(t) } @@ -122,7 +121,7 @@ func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) { qual := sql.NewStatQualifier(dbName, tableName, indexName) if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) { s.currentQual = curQual - s.currentTypes, err = stats.ParseTypeStrings(typs) + s.currentTypes, err = parseTypeStrings(typs) if err != nil { return nil, err } diff --git a/go/libraries/doltcore/sqle/statsnoms/load.go b/go/libraries/doltcore/sqle/statsnoms/load.go index b2ec5d9606d..ea8e6b10698 100644 --- a/go/libraries/doltcore/sqle/statsnoms/load.go +++ b/go/libraries/doltcore/sqle/statsnoms/load.go @@ -23,6 +23,7 @@ import ( "time" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/planbuilder" "github.com/dolthub/go-mysql-server/sql/stats" "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" @@ -68,7 +69,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64) createdAt := row[schema.StatsCreatedAtTag].(time.Time) - typs := strings.Split(typesStr, ",") + typs := strings.Split(typesStr, "\n") for i, t := range typs { typs[i] = strings.TrimSpace(t) } @@ -90,7 +91,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St mcvs := make([]sql.Row, numMcvs) for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] { - if v != nil { + if v != nil && v != "" { row, err := iter.ParseRow(v.(string)) if err != nil { return nil, err @@ -136,7 +137,7 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St } if currentStat.Statistic.Hist == nil { - currentStat.Statistic.Typs, err = stats.ParseTypeStrings(typs) + currentStat.Statistic.Typs, err = parseTypeStrings(typs) if err != nil { return nil, err } @@ -180,6 +181,18 @@ func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.St return qualToStats, nil } +func parseTypeStrings(typs []string) ([]sql.Type, error) { + var ret []sql.Type + for _, typ := range typs { + ct, err := planbuilder.ParseColumnTypeString(typ) + if err != nil { + return nil, err + } + ret = append(ret, ct) + } + return ret, nil +} + func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) { dSess := dsess.DSessFromSess(ctx.Session) roots, ok := dSess.GetRoots(ctx, qual.Db()) @@ -216,7 +229,7 @@ func loadLowerBound(ctx *sql.Context, qual sql.StatQualifier) (sql.Row, error) { } firstKey := keyBuilder.Build(buffPool) - var firstRow sql.Row + firstRow := make(sql.Row, keyBuilder.Desc.Count()) for i := 0; i < keyBuilder.Desc.Count(); i++ { firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore()) if err != nil { diff --git a/go/libraries/doltcore/sqle/statsnoms/write.go b/go/libraries/doltcore/sqle/statsnoms/write.go index 3e87ed0104d..e5719002be8 100644 --- a/go/libraries/doltcore/sqle/statsnoms/write.go +++ b/go/libraries/doltcore/sqle/statsnoms/write.go @@ -100,7 +100,7 @@ func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *stat sep := "" for _, t := range dStats.Statistic.Typs { typesB.WriteString(sep + t.String()) - sep = "," + sep = "\n" } typesStr := typesB.String() diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go index ccbf60bc0c8..1c8f2969010 100644 --- a/go/libraries/doltcore/sqle/statspro/analyze.go +++ b/go/libraries/doltcore/sqle/statspro/analyze.go @@ -29,12 +29,62 @@ import ( "github.com/dolthub/dolt/go/store/prolly/tree" ) +const ( + boostrapRowLimit = 2e6 +) + func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error { dSess := dsess.DSessFromSess(ctx.Session) branch, err := dSess.GetBranch() if err != nil { return err } + return p.RefreshTableStatsWithBranch(ctx, table, db, branch) +} + +func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error { + dSess := dsess.DSessFromSess(ctx.Session) + branches := p.getStatsBranches(ctx) + var rows uint64 + for _, branch := range branches { + sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch)) + if err != nil { + if sql.ErrDatabaseNotFound.Is(err) { + // default branch is not valid + continue + } + return err + } + tables, err := sqlDb.GetTableNames(ctx) + if err != nil { + return err + } + for _, table := range tables { + sqlTable, _, err := GetLatestTable(ctx, table, sqlDb) + if err != nil { + return err + } + + if st, ok := sqlTable.(sql.StatisticsTable); ok { + cnt, ok, err := st.RowCount(ctx) + if ok && err == nil { + rows += cnt + } + } + if rows >= boostrapRowLimit { + return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE \" or \"call dolt_stats_restart()\" to collect statistics", db) + } + + if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil { + return err + } + } + } + return nil +} + +func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error { + dSess := dsess.DSessFromSess(ctx.Session) sqlDb, err := dSess.Provider().Database(ctx, p.branchQualifiedDatabase(db, branch)) if err != nil { @@ -143,7 +193,16 @@ func (p *Provider) branchQualifiedDatabase(db, branch string) string { // GetLatestTable will get the WORKING root table for the current database/branch func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) { - sqlTable, ok, err := sqlDb.(sqle.Database).GetTableInsensitive(ctx, tableName) + var db sqle.Database + switch d := sqlDb.(type) { + case sqle.Database: + db = d + case sqle.ReadReplicaDatabase: + db = d.Database + default: + return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb) + } + sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName) if err != nil { return nil, nil, err } diff --git a/go/libraries/doltcore/sqle/statspro/configure.go b/go/libraries/doltcore/sqle/statspro/configure.go index 248f4d62c1f..e03cc19a29a 100644 --- a/go/libraries/doltcore/sqle/statspro/configure.go +++ b/go/libraries/doltcore/sqle/statspro/configure.go @@ -43,6 +43,7 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co branches := p.getStatsBranches(loadCtx) var autoEnabled bool + var startupEnabled bool var intervalSec time.Duration var thresholdf64 float64 if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) { @@ -55,6 +56,8 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads)) p.pro.DropDatabaseHooks = append(p.pro.DropDatabaseHooks, NewStatsDropDatabaseHook(p)) + } else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) { + startupEnabled = true } eg, ctx := loadCtx.NewErrgroup() @@ -69,7 +72,6 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co } else { err = fmt.Errorf("%w: %v", ErrFailedToLoad, r) } - return } }() @@ -84,6 +86,10 @@ func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Co } if autoEnabled { return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches) + } else if startupEnabled { + if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil { + return err + } } return nil }) @@ -109,7 +115,7 @@ func (p *Provider) getStatsBranches(ctx *sql.Context) []string { } if branches == nil { - branches = []string{p.pro.DefaultBranch()} + branches = append(branches, p.pro.DefaultBranch()) } return branches } diff --git a/go/libraries/doltcore/sqle/system_variables.go b/go/libraries/doltcore/sqle/system_variables.go index 5df7d441a58..5f069857f95 100644 --- a/go/libraries/doltcore/sqle/system_variables.go +++ b/go/libraries/doltcore/sqle/system_variables.go @@ -219,6 +219,13 @@ func AddDoltSystemVariables() { Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled), Default: int8(0), }, + &sql.MysqlSystemVariable{ + Name: dsess.DoltStatsBootstrapEnabled, + Dynamic: true, + Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), + Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled), + Default: int8(1), + }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsMemoryOnly, Dynamic: true, diff --git a/integration-tests/bats/stats.bats b/integration-tests/bats/stats.bats index 31e2f42c6f9..182a19b7d9d 100644 --- a/integration-tests/bats/stats.bats +++ b/integration-tests/bats/stats.bats @@ -41,6 +41,9 @@ teardown() { @test "stats: empty initial stats" { cd repo2 + # disable bootstrap, can only make stats with ANALYZE or background thread + dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" + dolt sql -q "insert into xy values (0,0), (1,1)" start_sql_server @@ -88,6 +91,16 @@ teardown() { [ "${lines[1]}" = "8" ] } +@test "stats: bootrap on engine startup" { + cd repo2 + + dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;" + dolt sql -q "insert into xy values (0,0), (1,1)" + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [ "${lines[1]}" = "2" ] +} + @test "stats: deletes refresh" { cd repo2 @@ -219,20 +232,15 @@ teardown() { @test "stats: multi db" { cd repo1 + dolt sql -q "insert into ab values (0,0), (1,1)" cd ../repo2 + dolt sql -q "insert into ab values (0,0), (1,1)" dolt sql -q "insert into xy values (0,0), (1,1)" cd .. - start_sql_server - sleep 1 - stop_sql_server - - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "0" ] dolt sql -q "SET @@persist.dolt_stats_auto_refresh_enabled = 1;" dolt sql -q "SET @@persist.dolt_stats_auto_refresh_threshold = 0.5" @@ -327,3 +335,40 @@ SQL [ "${lines[2]}" = "2" ] } +@test "stats: boostrap abort over 1mm rows" { + cat < data.py +import random +import os + +rows = 2*1000*1000+1 + +def main(): + f = open("data.csv","w+") + f.write("id,hostname\n") + + for i in range(rows): + hostname = random.getrandbits(100) + f.write(f"{i},{hostname}\n") + if i % (500*1000) == 0: + print("row :", i) + f.flush() + + f.close() + +if __name__ == "__main__": + main() +EOF + + mkdir repo3 + cd repo3 + python3 ../data.py + + dolt init + dolt sql -q "create table f (id int primary key, hostname int)" + dolt table import -u --continue f data.csv + + run dolt sql -r csv -q "select count(*) from dolt_statistics" + [ "$status" -eq 0 ] + [[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false + [ "${lines[2]}" = "0" ] +}