diff --git a/go/cmd/dolt/commands/engine/sqlengine.go b/go/cmd/dolt/commands/engine/sqlengine.go
index 8f702fd98a0..2260cbfeedf 100644
--- a/go/cmd/dolt/commands/engine/sqlengine.go
+++ b/go/cmd/dolt/commands/engine/sqlengine.go
@@ -16,11 +16,6 @@ package engine
import (
"context"
- "fmt"
- "os"
- "strconv"
- "strings"
-
gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/eventscheduler"
"github.com/dolthub/go-mysql-server/sql"
@@ -31,6 +26,9 @@ import (
_ "github.com/dolthub/go-mysql-server/sql/variables"
"github.com/dolthub/vitess/go/vt/sqlparser"
"github.com/sirupsen/logrus"
+ "os"
+ "strconv"
+ "strings"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
@@ -43,7 +41,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/config"
@@ -189,7 +186,13 @@ func NewSqlEngine(
"authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig),
})
- statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider()))
+ var statsPro sql.StatsProvider
+ _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled)
+ if enabled.(int8) == 1 {
+ statsPro = statspro.NewStatsCoord(pro, sqlEngine.NewDefaultContext, logrus.StandardLogger(), bThreads, mrEnv.GetEnv(mrEnv.GetFirstDatabase()))
+ } else {
+ statsPro = statspro.StatsNoop{}
+ }
engine.Analyzer.Catalog.StatsProvider = statsPro
engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
@@ -202,8 +205,8 @@ func NewSqlEngine(
// configuring stats depends on sessionBuilder
// sessionBuilder needs ref to statsProv
- if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil {
- fmt.Fprintln(cli.CliErr, err)
+ if sc, ok := statsPro.(*statspro.StatsCoord); ok {
+ sc.Init(ctx, dbs)
}
// Load MySQL Db information
diff --git a/go/cmd/dolt/commands/sqlserver/server.go b/go/cmd/dolt/commands/sqlserver/server.go
index 33d253a377a..5844926d0f8 100644
--- a/go/cmd/dolt/commands/sqlserver/server.go
+++ b/go/cmd/dolt/commands/sqlserver/server.go
@@ -19,6 +19,7 @@ import (
"crypto/tls"
"errors"
"fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"net"
"net/http"
"os"
@@ -260,23 +261,23 @@ func ConfigureServices(
var sqlEngine *engine.SqlEngine
InitSqlEngine := &svcs.AnonService{
InitF: func(ctx context.Context) (err error) {
- if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil {
- // Auto-stats is off by default for every command except
- // sql-server. Unless the config specifies a specific
- // behavior, enable server stats collection.
- sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1)
- } else if statsOn != "0" {
- // do not bootstrap if auto-stats enabled
- } else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil {
- // If we've disabled stats collection and config does not
- // specify bootstrap behavior, enable bootstrapping.
- sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1)
- }
sqlEngine, err = engine.NewSqlEngine(
ctx,
mrEnv,
config,
)
+ if sc, ok := sqlEngine.GetUnderlyingEngine().Analyzer.Catalog.StatsProvider.(*statspro.StatsCoord); ok {
+ sqlCtx, err := sqlEngine.NewDefaultContext(ctx)
+ if err != nil {
+ return err
+ }
+ if sc == nil {
+ return fmt.Errorf("unexpected nil stats coord")
+ }
+ if err = sc.Restart(sqlCtx); err != nil {
+ return err
+ }
+ }
return err
},
StopF: func() error {
diff --git a/go/libraries/doltcore/schema/statistic.go b/go/libraries/doltcore/schema/statistic.go
index 1879951e10b..88215a7443a 100644
--- a/go/libraries/doltcore/schema/statistic.go
+++ b/go/libraries/doltcore/schema/statistic.go
@@ -24,12 +24,12 @@ import (
const StatsVersion int64 = 1
const (
- StatsQualifierColName = "qualifier"
StatsDbColName = "database_name"
StatsTableColName = "table_name"
StatsIndexColName = "index_name"
- StatsPositionColName = "position"
+ StatsBranchName = "branch"
StatsCommitHashColName = "commit_hash"
+ StatsPrefixLenName = "prefix_len"
StatsRowCountColName = "row_count"
StatsDistinctCountColName = "distinct_count"
StatsNullCountColName = "null_count"
@@ -42,7 +42,7 @@ const (
StatsMcv2ColName = "mcv2"
StatsMcv3ColName = "mcv3"
StatsMcv4ColName = "mcv4"
- StatsMcvCountsColName = "mcvCounts"
+ StatsMcvCountsColName = "mcv_counts"
StatsVersionColName = "version"
)
@@ -52,6 +52,7 @@ const (
StatsIndexTag
StatsPositionTag
StatsVersionTag
+ StatsPrefixLenTag
StatsCommitHashTag
StatsRowCountTag
StatsDistinctCountTag
@@ -71,9 +72,9 @@ const (
func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
return sql.PrimaryKeySchema{
Schema: sql.Schema{
- &sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
- &sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
- &sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
+ &sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName},
+ &sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName},
+ &sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName},
@@ -88,7 +89,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
&sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName},
},
- PkOrdinals: []int{0, 1},
}
}
@@ -96,20 +96,14 @@ var StatsTableDoltSchema = StatsTableDoltSchemaGen()
func StatsTableDoltSchemaGen() Schema {
colColl := NewColCollection(
- NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}),
- NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}),
- NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}),
- NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}),
+ NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}),
+ NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}),
- NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}),
- NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}),
- NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}),
- NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}),
NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false),
NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false),
NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false),
diff --git a/go/libraries/doltcore/sqle/clusterdb/database.go b/go/libraries/doltcore/sqle/clusterdb/database.go
index dd741a9a205..4577d2f3c4d 100644
--- a/go/libraries/doltcore/sqle/clusterdb/database.go
+++ b/go/libraries/doltcore/sqle/clusterdb/database.go
@@ -162,6 +162,10 @@ func (db database) RequestedName() string {
return db.Name()
}
+func (db database) AliasedName() string {
+ return db.Name()
+}
+
type noopRepoStateWriter struct{}
var _ env.RepoStateWriter = noopRepoStateWriter{}
diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go
index f75e5f52997..10c5e154999 100644
--- a/go/libraries/doltcore/sqle/database.go
+++ b/go/libraries/doltcore/sqle/database.go
@@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds
if err != nil {
return nil, false, err
}
+ if branch == "" {
+ branch = db.Revision()
+ }
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true
case doltdb.ProceduresTableName:
found = true
diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go
index 293e9d7be00..bea3f7fa059 100644
--- a/go/libraries/doltcore/sqle/database_provider.go
+++ b/go/libraries/doltcore/sqle/database_provider.go
@@ -966,7 +966,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua
}
}
- db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
+ db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
// preserve original user case in the case of not found
if sql.ErrDatabaseNotFound.Is(err) {
return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName)
@@ -1507,8 +1507,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b
return "", false, nil
}
-// revisionDbForBranch returns a new database that is tied to the branch named by revSpec
-func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
+// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec
+func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
static := staticRepoState{
branch: ref.NewBranchRef(revSpec),
RepoStateWriter: srcDb.DbData().Rsw,
diff --git a/go/libraries/doltcore/sqle/dprocedures/init.go b/go/libraries/doltcore/sqle/dprocedures/init.go
index 499d4209886..7603093e3ba 100644
--- a/go/libraries/doltcore/sqle/dprocedures/init.go
+++ b/go/libraries/doltcore/sqle/dprocedures/init.go
@@ -47,12 +47,14 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{
{Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag},
{Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints},
- {Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)},
{Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)},
{Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)},
- {Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)},
- {Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)},
+ {Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)},
{Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)},
+ {Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)},
+ {Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)},
+ {Name: "dolt_stats_sync", Schema: statsFuncSchema, Function: statsFunc(statsBranchSync)},
+ {Name: "dolt_stats_validate", Schema: statsFuncSchema, Function: statsFunc(statsValidate)},
}
// stringSchema returns a non-nullable schema with all columns as LONGTEXT.
diff --git a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
index 139bec5e5d2..18ea0fe6cd1 100644
--- a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
+++ b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
@@ -15,13 +15,14 @@
package dprocedures
import (
+ "context"
+ "encoding/json"
"fmt"
"strings"
"github.com/dolthub/go-mysql-server/sql"
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
@@ -35,7 +36,12 @@ var statsFuncSchema = []*sql.Column{
}
func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
- return func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
+ return func(ctx *sql.Context, args ...string) (iter sql.RowIter, err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ err = fmt.Errorf("stats function unexpectedly panicked: %s", r)
+ }
+ }()
res, err := fn(ctx)
if err != nil {
return nil, err
@@ -44,124 +50,181 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con
}
}
-// AutoRefreshStatsProvider is a sql.StatsProvider that exposes hooks for
+type StatsInfo struct {
+ DbCnt int `json:"dbCnt"`
+ ReadCnt int `json:"readCnt"`
+ Active bool `json:"active"`
+ DbSeedCnt int `json:"dbSeedCnt"`
+ EstBucketCnt int `json:"estBucketCnt"`
+ CachedBucketCnt int `json:"cachedBucketCnt"`
+ StatCnt int `json:"statCnt"`
+ GcCounter int `json:"gcCounter"`
+ BranchCounter int `json:"branchCounter"`
+}
+
+func (si StatsInfo) ToJson() string {
+ jsonData, err := json.Marshal(si)
+ if err != nil {
+ return ""
+ }
+ return string(jsonData)
+}
+
+// ToggableStats is a sql.StatsProvider that exposes hooks for
// observing and manipulating background database auto refresh threads.
-type AutoRefreshStatsProvider interface {
+type ToggableStats interface {
sql.StatsProvider
- CancelRefreshThread(string)
- StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error
- ThreadStatus(string) string
- Prune(ctx *sql.Context) error
+ FlushQueue(ctx context.Context) error
+ Restart(context.Context) error
+ Info() StatsInfo
Purge(ctx *sql.Context) error
+ WaitForDbSync(ctx *sql.Context) error
+ Gc(ctx *sql.Context) error
+ BranchSync(ctx *sql.Context) error
+ ValidateState(ctx context.Context) error
+ Init(context.Context, []dsess.SqlDatabase) error
}
type BranchStatsProvider interface {
DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error
}
-// statsRestart tries to stop and then start a refresh thread
+// statsRestart flushes the current job queue and re-inits all
+// statistic databases.
func statsRestart(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider()
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
- if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
- pro := dSess.Provider()
- newFs, err := pro.FileSystemForDatabase(dbName)
+ if afp, ok := statsPro.(ToggableStats); ok {
+ err := afp.FlushQueue(ctx)
if err != nil {
- return nil, fmt.Errorf("failed to restart stats collection: %w", err)
+ return nil, fmt.Errorf("failed to restart collection: %w", err)
}
- dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO")
-
- sqlDb, ok := pro.BaseDatabase(ctx, dbName)
- if !ok {
- return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName)
+ dbs := dSess.Provider().AllDatabases(ctx)
+ var sqlDbs []dsess.SqlDatabase
+ for _, db := range dbs {
+ sqlDb, ok := db.(dsess.SqlDatabase)
+ if ok {
+ sqlDbs = append(sqlDbs, sqlDb)
+ }
}
-
- afp.CancelRefreshThread(dbName)
-
- err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb)
- if err != nil {
- return nil, fmt.Errorf("failed to restart collection: %w", err)
+ if err := afp.Init(ctx, sqlDbs); err != nil {
+ return nil, err
}
+ if err := afp.Restart(ctx); err != nil {
+ return nil, err
+ }
+
return fmt.Sprintf("restarted stats collection: %s", ref.StatsRef{}.String()), nil
}
- return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsStatus returns the last update for a stats thread
-func statsStatus(ctx *sql.Context) (interface{}, error) {
+// statsInfo returns the last update for a stats thread
+func statsInfo(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
pro := dSess.StatsProvider()
- if afp, ok := pro.(AutoRefreshStatsProvider); ok {
- return afp.ThreadStatus(dbName), nil
+ if afp, ok := pro.(ToggableStats); ok {
+ info := afp.Info()
+ return info.ToJson(), nil
}
- return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsStop cancels a refresh thread
-func statsStop(ctx *sql.Context) (interface{}, error) {
+// statsWait blocks until the job queue executes two full loops
+// of instructions, which will (1) pick up and (2) commit new
+// sets of index-bucket dependencies.
+func statsWait(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- statsPro := dSess.StatsProvider()
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
-
- if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
- afp.CancelRefreshThread(dbName)
- return fmt.Sprintf("stopped thread: %s", dbName), nil
+ pro := dSess.StatsProvider()
+ if afp, ok := pro.(ToggableStats); ok {
+ afp.WaitForDbSync(ctx)
+ return nil, nil
}
- return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsDrop deletes the stats ref
-func statsDrop(ctx *sql.Context) (interface{}, error) {
+// statsGc rewrites the cache to only include objects reachable
+// by the current root value.
+func statsGc(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
pro := dSess.StatsProvider()
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
-
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil, fmt.Errorf("failed to drop stats: %w", err)
+ if afp, ok := pro.(ToggableStats); ok {
+ return nil, afp.Gc(ctx)
}
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
+}
- if afp, ok := pro.(AutoRefreshStatsProvider); ok {
- // currently unsafe to drop stats while running refresh
- afp.CancelRefreshThread(dbName)
- }
- if bsp, ok := pro.(BranchStatsProvider); ok {
- err := bsp.DropBranchDbStats(ctx, branch, dbName, true)
- if err != nil {
- return nil, fmt.Errorf("failed to drop stats: %w", err)
- }
+// statsBranchSync update database branch tracking based on the
+// most recent session.
+func statsBranchSync(ctx *sql.Context) (interface{}, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ pro := dSess.StatsProvider()
+ if afp, ok := pro.(ToggableStats); ok {
+ return nil, afp.BranchSync(ctx)
}
-
- return fmt.Sprintf("deleted stats ref for %s", dbName), nil
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsPrune replaces the current disk contents with only the currently
-// tracked in memory statistics.
-func statsPrune(ctx *sql.Context) (interface{}, error) {
+// statsValidate returns inconsistencies if the kv cache is out of date
+func statsValidate(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
- if !ok {
- return nil, fmt.Errorf("stats not persisted, cannot purge")
+ pro := dSess.StatsProvider()
+ if afp, ok := pro.(ToggableStats); ok {
+ return afp.ValidateState(ctx).Error(), nil
}
- if err := pro.Prune(ctx); err != nil {
- return "failed to prune stats databases", err
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
+}
+
+// statsStop flushes the job queue and leaves the stats provider
+// in a paused state.
+func statsStop(ctx *sql.Context) (interface{}, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ statsPro := dSess.StatsProvider()
+ dbName := strings.ToLower(ctx.GetCurrentDatabase())
+
+ if afp, ok := statsPro.(ToggableStats); ok {
+ if err := afp.FlushQueue(ctx); err != nil {
+ return nil, err
+ }
+ return fmt.Sprintf("stopped thread: %s", dbName), nil
}
- return "pruned all stats databases", nil
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsPurge removes the stats database from disk
+// statsPurge flushes the job queue, deletes the current caches
+// and storage targets, re-initializes the tracked database
+// states, and returns with stats collection paused.
func statsPurge(ctx *sql.Context) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
+ pro, ok := dSess.StatsProvider().(ToggableStats)
if !ok {
return nil, fmt.Errorf("stats not persisted, cannot purge")
}
+
+ err := pro.FlushQueue(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("failed to flush queue: %w", err)
+ }
+
if err := pro.Purge(ctx); err != nil {
- return "failed to purged databases", err
+ return "failed to purge stats", err
+ }
+
+ dbs := dSess.Provider().AllDatabases(ctx)
+ var sqlDbs []dsess.SqlDatabase
+ for _, db := range dbs {
+ sqlDb, ok := db.(dsess.SqlDatabase)
+ if ok {
+ sqlDbs = append(sqlDbs, sqlDb)
+ }
}
+
+ // init is currently the safest way to reset state
+ if err := pro.Init(ctx, sqlDbs); err != nil {
+ return "failed to purge stats", err
+ }
+
return "purged all database stats", nil
}
diff --git a/go/libraries/doltcore/sqle/dsess/session_db_provider.go b/go/libraries/doltcore/sqle/dsess/session_db_provider.go
index 3d4969bb114..05e72971747 100644
--- a/go/libraries/doltcore/sqle/dsess/session_db_provider.go
+++ b/go/libraries/doltcore/sqle/dsess/session_db_provider.go
@@ -122,6 +122,7 @@ type SqlDatabase interface {
sql.Database
sql.SchemaDatabase
sql.DatabaseSchema
+ sql.AliasedDatabase
SessionDatabase
RevisionDatabase
diff --git a/go/libraries/doltcore/sqle/dsess/variables.go b/go/libraries/doltcore/sqle/dsess/variables.go
index 848ed2218ec..0d8e0fd4edb 100644
--- a/go/libraries/doltcore/sqle/dsess/variables.go
+++ b/go/libraries/doltcore/sqle/dsess/variables.go
@@ -59,12 +59,12 @@ const (
DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch"
DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs"
- DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled"
- DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled"
- DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold"
- DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval"
- DoltStatsMemoryOnly = "dolt_stats_memory_only"
- DoltStatsBranches = "dolt_stats_branches"
+ DoltStatsEnabled = "dolt_stats_enabled"
+ DoltStatsMemoryOnly = "dolt_stats_memory_only"
+ DoltStatsBranches = "dolt_stats_branches"
+ DoltStatsJobInterval = "dolt_stats_job_interval"
+ DoltStatsBranchInterval = "dolt_stats_branch_interval"
+ DoltStatsGCInterval = "dolt_stats_gc_interval"
)
const URLTemplateDatabasePlaceholder = "{database}"
diff --git a/go/libraries/doltcore/sqle/dtables/statistics_table.go b/go/libraries/doltcore/sqle/dtables/statistics_table.go
index fda463e7e49..f73cfaf192b 100644
--- a/go/libraries/doltcore/sqle/dtables/statistics_table.go
+++ b/go/libraries/doltcore/sqle/dtables/statistics_table.go
@@ -68,7 +68,7 @@ func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) {
}
type BranchStatsProvider interface {
- GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error)
+ GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error)
}
// RowCount implements sql.StatisticsTable
@@ -119,14 +119,19 @@ func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) {
// PartitionRows is a sql.Table interface function that gets a row iterator for a partition
func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- statsPro := dSess.StatsProvider().(BranchStatsProvider)
+ statsPro, ok := dSess.StatsProvider().(BranchStatsProvider)
+ if !ok {
+ return sql.RowsToRowIter(), nil
+ }
var dStats []sql.Statistic
for _, table := range st.tableNames {
dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, st.schemaName, table)
if err != nil {
return nil, err
}
- dStats = append(dStats, dbStats...)
+ for _, s := range dbStats {
+ dStats = append(dStats, s)
+ }
}
return stats.NewStatsIter(ctx, dStats...)
}
diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
index ac958a8084e..d1591f58636 100644
--- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
+++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
@@ -17,6 +17,7 @@ package enginetest
import (
"context"
"fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
"os"
"runtime"
"sync"
@@ -1681,11 +1682,6 @@ func TestStatsStorage(t *testing.T) {
RunStatsStorageTests(t, h)
}
-func TestStatsIOWithoutReload(t *testing.T) {
- h := newDoltEnginetestHarness(t)
- RunStatsIOTestsWithoutReload(t, h)
-}
-
func TestJoinStats(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunJoinStatsTests(t, h)
@@ -1971,22 +1967,23 @@ func TestStatsAutoRefreshConcurrency(t *testing.T) {
// Setting an interval of 0 and a threshold of 0 will result
// in the stats being updated after every operation
- intervalSec := time.Duration(0)
- thresholdf64 := 0.
- bThreads := sql.NewBackgroundThreads()
- branches := []string{"main"}
- statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.Provider)
+ //intervalSec := time.Duration(0)
+ //thresholdf64 := 0.
+ //bThreads := sql.NewBackgroundThreads()
+ //branches := []string{"main"}
+ statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.StatsCoord)
// it is important to use new sessions for this test, to avoid working root conflicts
readCtx := enginetest.NewSession(harness)
writeCtx := enginetest.NewSession(harness)
refreshCtx := enginetest.NewSession(harness)
- newCtx := func(context.Context) (*sql.Context, error) {
- return refreshCtx, nil
- }
- err := statsProv.InitAutoRefreshWithParams(newCtx, sqlDb.Name(), bThreads, intervalSec, thresholdf64, branches)
+ fs, err := engine.EngineAnalyzer().Catalog.DbProvider.(*sqle.DoltDatabaseProvider).FileSystemForDatabase(sqlDb.AliasedName())
+ require.NoError(t, err)
+
+ done, err := statsProv.Add(refreshCtx, sqlDb, ref.NewBranchRef("main"), fs)
require.NoError(t, err)
+ <-done
execQ := func(ctx *sql.Context, q string, id int, tag string) {
_, iter, _, err := engine.Query(ctx, q)
diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
index efd221635f4..d53dc74921a 100755
--- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
+++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
@@ -268,7 +268,6 @@ func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) {
}
defer harness.Close()
- sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0)
enginetest.TestQueryPlans(t, harness, queries.PlanTests)
}
@@ -1562,27 +1561,12 @@ func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
- defer h.Close()
e := mustNewEngine(t, h)
if enginetest.IsServerEngine(e) {
return
}
defer e.Close()
- TestProviderReloadScriptWithEngine(t, e, h, script)
- }()
- }
-}
-
-func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
- for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
- func() {
- h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
- e := mustNewEngine(t, h)
- if enginetest.IsServerEngine(e) {
- return
- }
- defer e.Close()
enginetest.TestScriptWithEngine(t, e, h, script)
}()
}
diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go
index c599c61da79..7a4f9cec641 100644
--- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go
+++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go
@@ -17,10 +17,7 @@ package enginetest
import (
"context"
"fmt"
- "runtime"
- "strings"
- "testing"
-
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/enginetest"
"github.com/dolthub/go-mysql-server/enginetest/scriptgen/setup"
@@ -29,6 +26,9 @@ import (
"github.com/dolthub/go-mysql-server/sql/mysql_db"
"github.com/dolthub/go-mysql-server/sql/rowexec"
"github.com/stretchr/testify/require"
+ "runtime"
+ "strings"
+ "testing"
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
"github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
@@ -36,7 +36,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
@@ -46,7 +45,7 @@ import (
type DoltHarness struct {
t *testing.T
provider dsess.DoltDatabaseProvider
- statsPro sql.StatsProvider
+ statsPro *statspro.StatsCoord
multiRepoEnv *env.MultiRepoEnv
session *dsess.DoltSession
branchControl *branch_control.Controller
@@ -246,13 +245,23 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
d.gcSafepointController = dsess.NewGCSafepointController()
- statsProv := statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
- d.statsPro = statsProv
-
var err error
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, d.gcSafepointController)
require.NoError(t, err)
+ sqlCtx := enginetest.NewContext(d)
+ bThreads := sql.NewBackgroundThreads()
+
+ ctxGen := func(ctx context.Context) (*sql.Context, error) {
+ return d.NewContext(), nil
+ }
+ statsPro := statspro.NewStatsCoord(doltProvider, ctxGen, sqlCtx.Session.GetLogger().Logger, bThreads, d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase()))
+ err = statsPro.Restart(ctx)
+ if err != nil {
+ return nil, err
+ }
+ d.statsPro = statsPro
+
e, err := enginetest.NewEngine(t, d, d.provider, d.setupData, d.statsPro)
if err != nil {
return nil, err
@@ -260,8 +269,8 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
e.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
d.engine = e
- sqlCtx := enginetest.NewContext(d)
databases := pro.AllDatabases(sqlCtx)
+
d.setupDbs = make(map[string]struct{})
var dbs []string
for _, db := range databases {
@@ -281,24 +290,23 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
require.NoError(t, err)
}
- if d.configureStats {
- bThreads := sql.NewBackgroundThreads()
- e = e.WithBackgroundThreads(bThreads)
+ e = e.WithBackgroundThreads(bThreads)
+ if d.configureStats {
dSess := dsess.DSessFromSess(sqlCtx.Session)
dbCache := dSess.DatabaseCache(sqlCtx)
-
dsessDbs := make([]dsess.SqlDatabase, len(dbs))
for i, dbName := range dbs {
dsessDbs[i], _ = dbCache.GetCachedRevisionDb(fmt.Sprintf("%s/main", dbName), dbName)
- }
-
- ctxFact := func(context.Context) (*sql.Context, error) {
- sess := d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"})
- return sql.NewContext(context.Background(), sql.WithSession(sess)), nil
- }
- if err = statsProv.Configure(sqlCtx, ctxFact, bThreads, dsessDbs); err != nil {
- return nil, err
+ fs, err := doltProvider.FileSystemForDatabase(dsessDbs[i].AliasedName())
+ if err != nil {
+ return nil, err
+ }
+ done, err := statsPro.Add(sqlCtx, dsessDbs[i], ref.NewBranchRef("main"), fs)
+ if err != nil {
+ return nil, err
+ }
+ <-done
}
statsOnlyQueries := filterStatsOnlyQueries(d.setupData)
@@ -309,13 +317,20 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
}
// Reset the mysql DB table to a clean state for this new engine
+ ctx := enginetest.NewContext(d)
+
d.engine.Analyzer.Catalog.MySQLDb = mysql_db.CreateEmptyMySQLDb()
d.engine.Analyzer.Catalog.MySQLDb.AddRootAccount()
- d.engine.Analyzer.Catalog.StatsProvider = statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
- var err error
- sqlCtx := enginetest.NewContext(d)
- e, err := enginetest.RunSetupScripts(sqlCtx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
+ ctxGen := func(ctx context.Context) (*sql.Context, error) {
+ return d.NewContext(), nil
+ }
+ bThreads := sql.NewBackgroundThreads()
+ statsPro := statspro.NewStatsCoord(d.provider.(*sqle.DoltDatabaseProvider), ctxGen, ctx.Session.GetLogger().Logger, bThreads, d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase()))
+ require.NoError(t, statsPro.Restart(ctx))
+ d.engine.Analyzer.Catalog.StatsProvider = statsPro
+
+ e, err := enginetest.RunSetupScripts(ctx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
// Get a fresh session after running setup scripts, since some setup scripts can change the session state
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
@@ -430,7 +445,6 @@ func (d *DoltHarness) NewDatabases(names ...string) []sql.Database {
doltProvider, ok := pro.(*sqle.DoltDatabaseProvider)
require.True(d.t, ok)
d.provider = doltProvider
- d.statsPro = statspro.NewProvider(doltProvider, statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
var err error
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), doltProvider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
@@ -502,7 +516,10 @@ func (d *DoltHarness) NewDatabaseProvider() sql.MutableDatabaseProvider {
func (d *DoltHarness) Close() {
d.closeProvider()
- sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, int8(0))
+ if d.statsPro != nil {
+ d.statsPro.Close()
+ }
+ sql.SystemVariables.SetGlobal(dsess.DoltStatsEnabled, int8(0))
}
func (d *DoltHarness) closeProvider() {
diff --git a/go/libraries/doltcore/sqle/enginetest/stats_queries.go b/go/libraries/doltcore/sqle/enginetest/stats_queries.go
index fedb7297d5f..d3c737619cb 100644
--- a/go/libraries/doltcore/sqle/enginetest/stats_queries.go
+++ b/go/libraries/doltcore/sqle/enginetest/stats_queries.go
@@ -16,18 +16,11 @@ package enginetest
import (
"fmt"
- "strings"
- "testing"
-
- gms "github.com/dolthub/go-mysql-server"
- "github.com/dolthub/go-mysql-server/enginetest"
+ "github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/go-mysql-server/enginetest/queries"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
- "github.com/stretchr/testify/require"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
+ "strings"
)
// fillerVarchar pushes the tree into level 3
@@ -510,8 +503,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
{
Name: "incremental stats deletes auto",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
"analyze table xy",
@@ -525,10 +516,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
Query: "delete from xy where x > 500",
},
{
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.1)",
+ Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics group by table_name, index_name",
@@ -540,8 +528,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
// https://github.com/dolthub/dolt/issues/8504
Name: "alter index column type",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
@@ -569,78 +555,9 @@ var DoltStatsStorageTests = []queries.ScriptTest{
},
},
},
- {
- Name: "differentiate table cases",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches ='main'",
- "CREATE table XY (x bigint primary key, y varchar(16))",
- "insert into XY values (0,'0'), (1,'1'), (2,'2')",
- "analyze table XY",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "2"}},
- },
- },
- },
- {
- Name: "deleted table loads OK",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches ='main'",
- "CREATE table xy (x bigint primary key, y varchar(16))",
- "insert into xy values (0,'0'), (1,'1'), (2,'2')",
- "analyze table xy",
- "CREATE table uv (u bigint primary key, v varchar(16))",
- "insert into uv values (0,'0'), (1,'1'), (2,'2')",
- "analyze table uv",
- "drop table uv",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "2"}},
- },
- },
- },
- {
- Name: "differentiate branch names",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches ='main,feat'",
- "CREATE table xy (x bigint primary key, y varchar(16))",
- "insert into xy values (0,'0'), (1,'1'), (2,'2')",
- "analyze table xy",
- "call dolt_checkout('-b', 'feat')",
- "CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
- "insert into xy values (3,'3',3)",
- "analyze table xy",
- "call dolt_checkout('main')",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "2"}},
- },
- {
- Query: "call dolt_checkout('feat')",
- },
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "3"}},
- },
- },
- },
{
Name: "drop primary key",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
@@ -657,10 +574,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
Query: "insert into xy values ('3', '3')",
},
{
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.2)",
+ Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics group by table_name, index_name",
@@ -994,94 +908,6 @@ var StatProcTests = []queries.ScriptTest{
},
}
-// TestProviderReloadScriptWithEngine runs the test script given with the engine provided.
-func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) {
- ctx := enginetest.NewContext(harness)
- err := enginetest.CreateNewConnectionForServerEngine(ctx, e)
- require.NoError(t, err, nil)
-
- t.Run(script.Name, func(t *testing.T) {
- for _, statement := range script.SetUpScript {
- if sh, ok := harness.(enginetest.SkippingHarness); ok {
- if sh.SkipQueryTest(statement) {
- t.Skip()
- }
- }
- ctx = ctx.WithQuery(statement)
- enginetest.RunQueryWithContext(t, e, harness, ctx, statement)
- }
-
- assertions := script.Assertions
- if len(assertions) == 0 {
- assertions = []queries.ScriptTestAssertion{
- {
- Query: script.Query,
- Expected: script.Expected,
- ExpectedErr: script.ExpectedErr,
- ExpectedIndexes: script.ExpectedIndexes,
- },
- }
- }
-
- {
- // reload provider, get disk stats
- eng, ok := e.(*gms.Engine)
- if !ok {
- t.Errorf("expected *gms.Engine but found: %T", e)
- }
-
- branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
- brCopy := make([]string, len(branches))
- copy(brCopy, branches)
- err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
- require.NoError(t, err)
- for _, branch := range brCopy {
- err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
- require.NoError(t, err)
- }
- }
-
- for _, assertion := range assertions {
- t.Run(assertion.Query, func(t *testing.T) {
- if assertion.NewSession {
- th, ok := harness.(enginetest.TransactionHarness)
- require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+
- "but harness doesn't implement TransactionHarness")
- ctx = th.NewSession()
- }
-
- if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) {
- t.Skip()
- }
- if assertion.Skip {
- t.Skip()
- }
-
- if assertion.ExpectedErr != nil {
- enginetest.AssertErr(t, e, harness, assertion.Query, nil, assertion.ExpectedErr)
- } else if assertion.ExpectedErrStr != "" {
- enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, nil, assertion.ExpectedErrStr)
- } else if assertion.ExpectedWarning != 0 {
- enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query,
- assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount,
- assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck)
- } else if assertion.SkipResultsCheck {
- enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query)
- } else if assertion.CheckIndexedAccess {
- enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
- } else {
- var expected = assertion.Expected
- if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine {
- // TODO: remove this check in the future
- expected = nil
- }
- enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings, nil)
- }
- })
- }
- })
-}
-
func mustNewStatQual(s string) sql.StatQualifier {
qual, _ := sql.NewQualifierFromString(s)
return qual
diff --git a/go/libraries/doltcore/sqle/sqlddl_test.go b/go/libraries/doltcore/sqle/sqlddl_test.go
index 7e50899d881..5fee7a12c19 100644
--- a/go/libraries/doltcore/sqle/sqlddl_test.go
+++ b/go/libraries/doltcore/sqle/sqlddl_test.go
@@ -1128,6 +1128,7 @@ func newTestEngine(ctx context.Context, dEnv *env.DoltEnv) (*gms.Engine, *sql.Co
IsServerLocked: false,
}), sqlCtx
}
+
func TestIndexOverwrite(t *testing.T) {
ctx := context.Background()
dEnv := dtestutils.CreateTestEnv()
diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go
deleted file mode 100644
index 6a972a3b103..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/database.go
+++ /dev/null
@@ -1,488 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "context"
- "errors"
- "fmt"
- "path"
- "strings"
- "sync"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
- "github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
- "github.com/dolthub/dolt/go/libraries/utils/earl"
- "github.com/dolthub/dolt/go/libraries/utils/filesys"
- "github.com/dolthub/dolt/go/store/datas"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/types"
-)
-
-func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory {
- return &NomsStatsFactory{dialPro: dialPro}
-}
-
-type NomsStatsFactory struct {
- dialPro dbfactory.GRPCDialProvider
-}
-
-var _ statspro.StatsFactory = NomsStatsFactory{}
-
-func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) {
- params := make(map[string]interface{})
- params[dbfactory.GRPCDialProviderParam] = sf.dialPro
-
- var urlPath string
- u, err := earl.Parse(prov.DbFactoryUrl())
- if u.Scheme == dbfactory.MemScheme {
- urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir)
- } else if u.Scheme == dbfactory.FileScheme {
- urlPath = doltdb.LocalDirDoltDB
- }
-
- statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
- if err != nil {
- return nil, err
- }
-
- var dEnv *env.DoltEnv
- exists, isDir := statsFs.Exists("")
- if !exists {
- err := statsFs.MkDirs("")
- if err != nil {
- return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
- }
-
- dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test")
- sess := dsess.DSessFromSess(ctx.Session)
- err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch())
- if err != nil {
- return nil, err
- }
- } else if !isDir {
- return nil, fmt.Errorf("file exists where the dolt stats directory should be")
- } else {
- dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "", "")
- }
-
- dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params)
-
- deaf := dEnv.DbEaFactory(ctx)
-
- tmpDir, err := dEnv.TempTableFilesDir()
- if err != nil {
- return nil, err
- }
- opts := editor.Options{
- Deaf: deaf,
- Tempdir: tmpDir,
- }
- statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
- if err != nil {
- return nil, err
- }
- return NewNomsStats(sourceDb, statsDb), nil
-}
-
-func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase {
- return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb}
-}
-
-type dbStats map[sql.StatQualifier]*statspro.DoltStats
-
-type NomsStatsDatabase struct {
- mu *sync.Mutex
- destDb dsess.SqlDatabase
- sourceDb dsess.SqlDatabase
- stats []dbStats
- branches []string
- tableHashes []map[string]hash.Hash
- schemaHashes []map[string]hash.Hash
- dirty []*prolly.MutableMap
-}
-
-var _ statspro.Database = (*NomsStatsDatabase)(nil)
-
-func (n *NomsStatsDatabase) Close() error {
- return n.destDb.DbData().Ddb.Close()
-}
-
-func (n *NomsStatsDatabase) Branches() []string {
- return n.branches
-}
-
-func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
- branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)
-
- dSess := dsess.DSessFromSess(ctx.Session)
- sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
- if err != nil {
- ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
- return nil
- }
- branchQDb, ok := sqlDb.(dsess.SqlDatabase)
- if !ok {
- return fmt.Errorf("branch/database not found: %s", branchQDbName)
- }
-
- if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
- return err
- } else if ok {
- ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
- if err := n.DeleteBranchStats(ctx, branch, true); err != nil {
- return err
- }
- }
-
- statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch)
- if errors.Is(err, doltdb.ErrNoStatistics) {
- return n.trackBranch(ctx, branch)
- } else if errors.Is(err, datas.ErrNoBranchStats) {
- return n.trackBranch(ctx, branch)
- } else if err != nil {
- return err
- }
- if cnt, err := statsMap.Count(); err != nil {
- return err
- } else if cnt == 0 {
- return n.trackBranch(ctx, branch)
- }
-
- doltStats, err := loadStats(ctx, branchQDb, statsMap)
- if err != nil {
- return err
- }
- n.branches = append(n.branches, branch)
- n.stats = append(n.stats, doltStats)
- n.dirty = append(n.dirty, nil)
- n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
- n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
- return nil
-}
-
-func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
- root, err := branchQDb.GetRoot(ctx)
- if err != nil {
- return false, err
- }
- tables, err := branchQDb.GetTableNames(ctx)
- if err != nil {
- return false, err
- }
-
- var keys []string
- var schHashes []hash.Hash
- for _, tableName := range tables {
- table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName})
- if err != nil {
- return false, err
- }
- if !ok {
- return false, nil
- }
- curHash, err := table.GetSchemaHash(ctx)
- if err != nil {
- return false, err
- }
-
- keys = append(keys, n.schemaTupleKey(branch, tableName))
- schHashes = append(schHashes, curHash)
- }
-
- ddb := n.destDb.DbData().Ddb
- var schemaChange bool
- for i, key := range keys {
- curHash := schHashes[i]
- if val, ok, err := ddb.GetTuple(ctx, key); err != nil {
- return false, err
- } else if ok {
- oldHash := hash.Parse(string(val))
- if !ok || !oldHash.Equal(curHash) {
- schemaChange = true
- break
- }
- }
- }
- if schemaChange {
- for _, key := range keys {
- ddb.DeleteTuple(ctx, key)
- }
- return true, nil
- }
- return false, nil
-}
-
-func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats {
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- return n.stats[i]
- }
- }
- return nil
-}
-
-func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) {
- n.mu.Lock()
- defer n.mu.Unlock()
- stats := n.getBranchStats(branch)
- ret, ok := stats[qual]
- return ret, ok
-}
-
-func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier {
- n.mu.Lock()
- defer n.mu.Unlock()
- stats := n.getBranchStats(branch)
- var ret []sql.StatQualifier
- for qual, _ := range stats {
- ret = append(ret, qual)
- }
- return ret
-}
-
-func (n *NomsStatsDatabase) setStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
- var statsMap *prolly.MutableMap
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- n.stats[i][qual] = stats
- if n.dirty[i] == nil {
- if err := n.initMutable(ctx, i); err != nil {
- return err
- }
- }
- statsMap = n.dirty[i]
- }
- }
- if statsMap == nil {
- if err := n.trackBranch(ctx, branch); err != nil {
- return err
- }
- statsMap = n.dirty[len(n.branches)-1]
- n.stats[len(n.branches)-1][qual] = stats
- }
-
- return n.replaceStats(ctx, statsMap, stats)
-}
-func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- return n.setStat(ctx, branch, qual, stats)
-}
-
-func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error {
- n.branches = append(n.branches, branch)
- n.stats = append(n.stats, make(dbStats))
- n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
- n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
-
- kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
- newMap, err := prolly.NewMapFromTuples(ctx, n.destDb.DbData().Ddb.NodeStore(), kd, vd)
- if err != nil {
- return err
- }
- n.dirty = append(n.dirty, newMap.Mutate())
- return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf())
-}
-
-func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error {
- statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i])
- if err != nil {
- return err
- }
- n.dirty[i] = statsMap.Mutate()
- return nil
-}
-
-func (n *NomsStatsDatabase) DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- for _, qual := range quals {
- ctx.GetLogger().Debugf("statistics refresh: deleting index statistics: %s/%s", branch, qual)
- delete(n.stats[i], qual)
- }
- }
- }
-}
-
-func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- ctx.GetLogger().Debugf("statistics refresh: deleting branch statistics: %s", branch)
-
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- n.branches = append(n.branches[:i], n.branches[i+1:]...)
- n.dirty = append(n.dirty[:i], n.dirty[i+1:]...)
- n.stats = append(n.stats[:i], n.stats[i+1:]...)
- n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...)
- n.schemaHashes = append(n.schemaHashes[:i], n.schemaHashes[i+1:]...)
- }
- }
- if flush {
- return n.destDb.DbData().Ddb.DropStatisics(ctx, branch)
- }
- return nil
-}
-
-func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- var dbStat dbStats
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- // naive merge the new with old
- dbStat = n.stats[i]
- }
- }
-
- if dbStat == nil {
- if err := n.trackBranch(ctx, branch); err != nil {
- return err
- }
- dbStat = n.stats[len(n.branches)-1]
- }
-
- if _, ok := dbStat[qual]; ok {
- oldChunks := dbStat[qual].Hist
- targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks)
- if err != nil {
- return err
- }
- newStat, err := dbStat[qual].WithHistogram(targetBuckets)
- if err != nil {
- return err
- }
- dbStat[qual] = newStat.(*statspro.DoltStats)
- } else {
- dbStat[qual] = statspro.NewDoltStats()
- }
- dbStat[qual].Chunks = targetHashes
- dbStat[qual].UpdateActive()
-
- // let |n.SetStats| update memory and disk
- return n.setStat(ctx, branch, qual, dbStat[qual])
-}
-
-func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- if n.dirty[i] != nil {
- flushedMap, err := n.dirty[i].Map(ctx)
- if err != nil {
- return err
- }
- n.dirty[i] = nil
- if err := n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()); err != nil {
- return err
- }
- return nil
- }
- }
- }
- return nil
-}
-
-func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash {
- n.mu.Lock()
- defer n.mu.Unlock()
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- return n.tableHashes[i][tableName]
- }
- }
- return hash.Hash{}
-}
-
-func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) {
- n.mu.Lock()
- defer n.mu.Unlock()
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- n.tableHashes[i][tableName] = h
- break
- }
- }
-}
-
-func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) {
- n.mu.Lock()
- defer n.mu.Unlock()
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- return n.schemaHashes[i][tableName], nil
- }
- if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
- if err != nil {
- return hash.Hash{}, err
- }
- h := hash.Parse(string(val))
- n.schemaHashes[i][tableName] = h
- return h, nil
- } else if err != nil {
- return hash.Hash{}, err
- }
- break
- }
- return hash.Hash{}, nil
-}
-
-func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
- return n.sourceDb.Name() + "/" + branch + "/" + tableName
-}
-
-func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
- n.mu.Lock()
- defer n.mu.Unlock()
- branchIdx := -1
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- branchIdx = i
- break
- }
- }
- if branchIdx < 0 {
- branchIdx = len(n.branches)
- if err := n.trackBranch(ctx, branch); err != nil {
- return err
- }
- }
-
- n.schemaHashes[branchIdx][tableName] = h
- key := n.schemaTupleKey(branch, tableName)
- if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
- return err
- }
-
- return n.destDb.DbData().Ddb.SetTuple(ctx, key, []byte(h.String()))
-}
diff --git a/go/libraries/doltcore/sqle/statsnoms/iter.go b/go/libraries/doltcore/sqle/statsnoms/iter.go
deleted file mode 100644
index 59b9456eed6..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/iter.go
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "fmt"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/planbuilder"
- "gopkg.in/errgo.v2/errors"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/prolly/tree"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-var ErrIncompatibleVersion = errors.New("client stats version mismatch")
-
-func NewStatsIter(ctx *sql.Context, schemaName string, m prolly.Map) (*statsIter, error) {
- iter, err := m.IterAll(ctx)
- if err != nil {
- return nil, err
- }
- kd, vd := m.Descriptors()
- keyBuilder := val.NewTupleBuilder(kd)
- valueBuilder := val.NewTupleBuilder(vd)
- ns := m.NodeStore()
-
- return &statsIter{
- iter: iter,
- kb: keyBuilder,
- vb: valueBuilder,
- ns: ns,
- schemaName: schemaName,
- planb: planbuilder.New(ctx, nil, nil, nil),
- }, nil
-}
-
-// statsIter reads histogram buckets into string-compatible types.
-// Values that are SQL rows should be converted with statsIter.ParseRow.
-// todo: make a JSON compatible container for sql.Row w/ types so that we
-// can eagerly convert to sql.Row without sacrificing string printing.
-type statsIter struct {
- iter prolly.MapIter
- kb, vb *val.TupleBuilder
- ns tree.NodeStore
- planb *planbuilder.Builder
- currentQual string
- schemaName string
- currentTypes []sql.Type
-}
-
-var _ sql.RowIter = (*statsIter)(nil)
-
-func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) {
- k, v, err := s.iter.Next(ctx)
- if err != nil {
- return nil, err
- }
-
- // deserialize K, V
- version, err := tree.GetField(ctx, s.vb.Desc, 0, v, s.ns)
- if err != nil {
- return nil, err
- }
- if version != schema.StatsVersion {
- return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
- }
-
- var row sql.Row
- for i := 0; i < s.kb.Desc.Count(); i++ {
- f, err := tree.GetField(ctx, s.kb.Desc, i, k, s.ns)
- if err != nil {
- return nil, err
- }
- row = append(row, f)
- }
-
- for i := 0; i < s.vb.Desc.Count(); i++ {
- f, err := tree.GetField(ctx, s.vb.Desc, i, v, s.ns)
- if err != nil {
- return nil, err
- }
- row = append(row, f)
- }
-
- dbName := row[schema.StatsDbTag].(string)
- tableName := row[schema.StatsTableTag].(string)
- indexName := row[schema.StatsIndexTag].(string)
- position := row[schema.StatsPositionTag].(int64)
- _ = row[schema.StatsVersionTag]
- commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
- rowCount := row[schema.StatsRowCountTag].(int64)
- distinctCount := row[schema.StatsDistinctCountTag].(int64)
- nullCount := row[schema.StatsNullCountTag].(int64)
- columnsStr := row[schema.StatsColumnsTag].(string)
- typesStr := row[schema.StatsTypesTag].(string)
- upperBoundStr := row[schema.StatsUpperBoundTag].(string)
- upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64)
- createdAt := row[schema.StatsCreatedAtTag].(time.Time)
-
- typs := strings.Split(typesStr, "\n")
- for i, t := range typs {
- typs[i] = strings.TrimSpace(t)
- }
-
- qual := sql.NewStatQualifier(dbName, s.schemaName, tableName, indexName)
- if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) {
- s.currentQual = curQual
- s.currentTypes, err = parseTypeStrings(typs)
- if err != nil {
- return nil, err
- }
- }
-
- mcvCountsStr := row[schema.StatsMcvCountsTag].(string)
-
- numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
- mcvs := make([]string, numMcvs)
- for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
- if v != nil {
- mcvs[i] = v.(string)
- }
- }
-
- return sql.Row{
- dbName,
- tableName,
- indexName,
- int(position),
- version,
- commit.String(),
- uint64(rowCount),
- uint64(distinctCount),
- uint64(nullCount),
- columnsStr,
- typesStr,
- upperBoundStr,
- uint64(upperBoundCnt),
- createdAt,
- mcvs[0], mcvs[1], mcvs[2], mcvs[3],
- mcvCountsStr,
- }, nil
-}
-
-func (s *statsIter) ParseRow(rowStr string) (sql.Row, error) {
- var row sql.Row
- for i, v := range strings.Split(rowStr, ",") {
- val, _, err := s.currentTypes[i].Convert(v)
- if err != nil {
- return nil, err
- }
- row = append(row, val)
- }
- return row, nil
-}
-
-func (s *statsIter) Close(context *sql.Context) error {
- return nil
-}
diff --git a/go/libraries/doltcore/sqle/statsnoms/load.go b/go/libraries/doltcore/sqle/statsnoms/load.go
deleted file mode 100644
index 72051260260..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/load.go
+++ /dev/null
@@ -1,308 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "errors"
- "fmt"
- "io"
- "strconv"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/planbuilder"
- "github.com/dolthub/go-mysql-server/sql/stats"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/prolly/tree"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) {
- qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats)
- schemaName := db.SchemaName()
- iter, err := NewStatsIter(ctx, schemaName, m)
- if err != nil {
- return nil, err
- }
- currentStat := statspro.NewDoltStats()
- invalidTables := make(map[string]bool)
- for {
- row, err := iter.Next(ctx)
- if errors.Is(err, io.EOF) {
- break
- } else if err != nil {
- return nil, err
- }
-
- // deserialize K, V
- dbName := row[schema.StatsDbTag].(string)
- tableName := row[schema.StatsTableTag].(string)
- indexName := row[schema.StatsIndexTag].(string)
- _ = row[schema.StatsVersionTag]
- commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
- rowCount := row[schema.StatsRowCountTag].(uint64)
- distinctCount := row[schema.StatsDistinctCountTag].(uint64)
- nullCount := row[schema.StatsNullCountTag].(uint64)
- columns := strings.Split(row[schema.StatsColumnsTag].(string), ",")
- typesStr := row[schema.StatsTypesTag].(string)
- boundRowStr := row[schema.StatsUpperBoundTag].(string)
- upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64)
- createdAt := row[schema.StatsCreatedAtTag].(time.Time)
-
- typs := strings.Split(typesStr, "\n")
- for i, t := range typs {
- typs[i] = strings.TrimSpace(t)
- }
-
- qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
- if _, ok := invalidTables[tableName]; ok {
- continue
- }
-
- if currentStat.Statistic.Qual.String() != qual.String() {
- if !currentStat.Statistic.Qual.Empty() {
- currentStat.UpdateActive()
- qualToStats[currentStat.Statistic.Qual] = currentStat
- }
-
- currentStat = statspro.NewDoltStats()
-
- tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
- if ok {
- currentStat.Statistic.Qual = qual
- currentStat.Statistic.Cols = columns
- currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
- if err != nil {
- return nil, err
- }
- } else if !ok {
- ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
- invalidTables[qual.Table()] = true
- continue
- } else if err != nil {
- return nil, err
- }
- }
-
- numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
-
- mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",")
- mcvCnts := make([]uint64, numMcvs)
- for i, v := range mcvCountsStr {
- if v == "" {
- continue
- }
- val, err := strconv.Atoi(v)
- if err != nil {
- return nil, err
- }
- mcvCnts[i] = uint64(val)
- }
-
- mcvs := make([]sql.Row, numMcvs)
- for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
- if v != nil && v != "" {
- row, err := DecodeRow(ctx, m.NodeStore(), v.(string), currentStat.Tb)
- if err != nil {
- return nil, err
- }
- mcvs[i] = row
- }
- }
-
- for i, v := range mcvCnts {
- if v == 0 {
- mcvs = mcvs[:i]
- mcvCnts = mcvCnts[:i]
- break
- }
- }
-
- if currentStat.Statistic.Hist == nil {
- currentStat.Statistic.Typs, err = parseTypeStrings(typs)
- if err != nil {
- return nil, err
- }
- currentStat.Statistic.Qual = qual
- }
-
- boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb)
- if err != nil {
- return nil, err
- }
-
- bucket := statspro.DoltBucket{
- Chunk: commit,
- Created: createdAt,
- Bucket: &stats.Bucket{
- RowCnt: uint64(rowCount),
- DistinctCnt: uint64(distinctCount),
- NullCnt: uint64(nullCount),
- McvVals: mcvs,
- McvsCnt: mcvCnts,
- BoundCnt: upperBoundCnt,
- BoundVal: boundRow,
- },
- }
-
- currentStat.Hist = append(currentStat.Hist, bucket)
- currentStat.Statistic.RowCnt += uint64(rowCount)
- currentStat.Statistic.DistinctCnt += uint64(distinctCount)
- currentStat.Statistic.NullCnt += uint64(rowCount)
- if currentStat.Statistic.Created.Before(createdAt) {
- currentStat.Statistic.Created = createdAt
- }
- }
- if !currentStat.Qualifier().Empty() {
- currentStat.UpdateActive()
- qualToStats[currentStat.Statistic.Qual] = currentStat
- }
- return qualToStats, nil
-}
-
-func parseTypeStrings(typs []string) ([]sql.Type, error) {
- var ret []sql.Type
- for _, typ := range typs {
- ct, err := planbuilder.ParseColumnTypeString(typ)
- if err != nil {
- return nil, err
- }
- ret = append(ret, ct)
- }
- return ret, nil
-}
-
-func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
- root, err := db.GetRoot(ctx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- iat, ok := sqlTable.(sql.IndexAddressable)
- if !ok {
- return nil, nil, nil, sql.ColSet{}, nil
- }
-
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- var sqlIdx sql.Index
- for _, i := range indexes {
- if strings.EqualFold(i.ID(), qual.Index()) {
- sqlIdx = i
- break
- }
- }
-
- if sqlIdx == nil {
- return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
- }
-
- fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
- if !ok {
- return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
- }
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- var idx durable.Index
- if qual.Index() == "primary" {
- idx, err = table.GetRowData(ctx)
- } else {
- idx, err = table.GetIndexRowData(ctx, qual.Index())
- }
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- prollyMap := durable.ProllyMapFromIndex(idx)
- keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
- buffPool := prollyMap.NodeStore().Pool()
-
- if cnt, err := prollyMap.Count(); err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- } else if cnt == 0 {
- return nil, keyBuilder, nil, sql.ColSet{}, nil
- }
- firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- keyBytes, _, err := firstIter.Next(ctx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- for i := range keyBuilder.Desc.Types {
- keyBuilder.PutRaw(i, keyBytes.GetField(i))
- }
-
- firstKey := keyBuilder.Build(buffPool)
- firstRow := make(sql.Row, keyBuilder.Desc.Count())
- for i := 0; i < keyBuilder.Desc.Count(); i++ {
- firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- }
- return firstRow, keyBuilder, fds, colset, nil
-}
-
-func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
- tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
- if err != nil {
- return nil, sql.ColSet{}, err
- } else if !ok {
- return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table())
- }
-
- iat, ok := tab.(sql.IndexAddressable)
- if !ok {
- return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table())
- }
-
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return nil, sql.ColSet{}, err
- }
-
- var idx sql.Index
- for _, i := range indexes {
- if strings.EqualFold(i.ID(), qual.Index()) {
- idx = i
- break
- }
- }
-
- if idx == nil {
- return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
- }
-
- return stats.IndexFds(qual.Table(), tab.Schema(), idx)
-}
diff --git a/go/libraries/doltcore/sqle/statsnoms/write.go b/go/libraries/doltcore/sqle/statsnoms/write.go
deleted file mode 100644
index c23e1d93dc8..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/write.go
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "context"
- "errors"
- "io"
- "strings"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/stats"
- "github.com/dolthub/go-mysql-server/sql/types"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/prolly/tree"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-// About ~200 20 byte address fit in a ~4k chunk. Chunk sizes
-// are approximate, but certainly shouldn't reach the square
-// of the expected size.
-const maxBucketFanout = 200 * 200
-
-var mcvsTypes = []sql.Type{types.Int64, types.Int64, types.Int64}
-
-func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
- if err := deleteIndexRows(ctx, statsMap, dStats); err != nil {
- return err
- }
- return putIndexRows(ctx, statsMap, dStats)
-}
-
-func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- sch := schema.StatsTableDoltSchema
- kd, _ := sch.GetMapDescriptors()
-
- keyBuilder := val.NewTupleBuilder(kd)
-
- qual := dStats.Qualifier()
- pool := statsMap.NodeStore().Pool()
-
- // delete previous entries for this index -> (db, table, index, pos)
- keyBuilder.PutString(0, qual.Database)
- keyBuilder.PutString(1, qual.Table())
- keyBuilder.PutString(2, qual.Index())
- keyBuilder.PutInt64(3, 0)
- firstKey := keyBuilder.Build(pool)
- keyBuilder.PutString(0, qual.Database)
- keyBuilder.PutString(1, qual.Table())
- keyBuilder.PutString(2, qual.Index())
- keyBuilder.PutInt64(3, maxBucketFanout+1)
- maxKey := keyBuilder.Build(pool)
-
- // there is a limit on the number of buckets for a given index, iter
- // will terminate before maxBucketFanout
- iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey)
- if err != nil {
- return err
- }
-
- for {
- k, _, err := iter.Next(ctx)
- if errors.Is(err, io.EOF) {
- break
- } else if err != nil {
- return err
- }
- err = statsMap.Put(ctx, k, nil)
- if err != nil {
- return err
- }
- }
- return nil
-}
-
-func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- sch := schema.StatsTableDoltSchema
- kd, vd := sch.GetMapDescriptors()
-
- keyBuilder := val.NewTupleBuilder(kd)
- valueBuilder := val.NewTupleBuilder(vd)
-
- qual := dStats.Qualifier()
- pool := statsMap.NodeStore().Pool()
-
- // now add new buckets
- typesB := strings.Builder{}
- sep := ""
- for _, t := range dStats.Statistic.Typs {
- typesB.WriteString(sep + t.String())
- sep = "\n"
- }
- typesStr := typesB.String()
-
- var pos int64
- for _, h := range dStats.Hist {
- keyBuilder.PutString(0, qual.Database)
- keyBuilder.PutString(1, qual.Tab)
- keyBuilder.PutString(2, qual.Idx)
- keyBuilder.PutInt64(3, pos)
-
- valueBuilder.PutInt64(0, schema.StatsVersion)
- valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String())
- valueBuilder.PutInt64(2, int64(h.RowCount()))
- valueBuilder.PutInt64(3, int64(h.DistinctCount()))
- valueBuilder.PutInt64(4, int64(h.NullCount()))
- valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
- valueBuilder.PutString(6, typesStr)
- boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb)
- if err != nil {
- return err
- }
- valueBuilder.PutString(7, string(boundRow))
- valueBuilder.PutInt64(8, int64(h.BoundCount()))
- valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
- for i, r := range h.Mcvs() {
- mcvRow, err := EncodeRow(ctx, statsMap.NodeStore(), r, dStats.Tb)
- if err != nil {
- return err
- }
- valueBuilder.PutString(10+i, string(mcvRow))
- }
- var mcvCntsRow sql.Row
- for _, v := range h.McvCounts() {
- mcvCntsRow = append(mcvCntsRow, int(v))
- }
- valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, mcvsTypes))
-
- key := keyBuilder.Build(pool)
- value := valueBuilder.Build(pool)
- statsMap.Put(ctx, key, value)
- pos++
- }
- return nil
-}
-
-func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
- for i, v := range r {
- if v == nil {
- continue
- }
- if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
- return nil, err
- }
- }
- return tb.Build(ns.Pool()), nil
-}
-
-func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
- tup := []byte(s)
- r := make(sql.Row, tb.Desc.Count())
- var err error
- for i, _ := range r {
- r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
- if err != nil {
- return nil, err
- }
- }
- return r, nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go
deleted file mode 100644
index faa1869315c..00000000000
--- a/go/libraries/doltcore/sqle/statspro/analyze.go
+++ /dev/null
@@ -1,343 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "fmt"
- "strings"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly/tree"
-)
-
-const (
- boostrapRowLimit = 2e6
-)
-
-func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return err
- }
- return p.RefreshTableStatsWithBranch(ctx, table, db, branch)
-}
-
-func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error {
- dSess := dsess.DSessFromSess(ctx.Session)
- branches := p.getStatsBranches(ctx)
- var rows uint64
- for _, branch := range branches {
- sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
- if err != nil {
- if sql.ErrDatabaseNotFound.Is(err) {
- // default branch is not valid
- continue
- }
- return err
- }
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
- for _, table := range tables {
- sqlTable, _, err := GetLatestTable(ctx, table, sqlDb)
- if err != nil {
- return err
- }
-
- if st, ok := sqlTable.(sql.StatisticsTable); ok {
- cnt, ok, err := st.RowCount(ctx)
- if ok && err == nil {
- rows += cnt
- }
- }
- if rows >= boostrapRowLimit {
- return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE
\" or \"call dolt_stats_restart()\" to collect statistics", db)
- }
-
- if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error {
- if !p.TryLockForUpdate(branch, db, table.Name()) {
- return fmt.Errorf("already updating statistics")
- }
- defer p.UnlockTable(branch, db, table.Name())
-
- dSess := dsess.DSessFromSess(ctx.Session)
-
- sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
- if err != nil {
- return err
- }
-
- // lock only after accessing DatabaseProvider
-
- tableName := strings.ToLower(table.Name())
- dbName := strings.ToLower(db)
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- iat, ok := table.(sql.IndexAddressableTable)
- if !ok {
- return nil
- }
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return err
- }
-
- // it's important to update WORKING session references every call
- sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb)
- if err != nil {
- return err
- }
-
- statDb, ok := p.getStatDb(dbName)
- if !ok {
- // if the stats database does not exist, initialize one
- fs, err := p.pro.FileSystemForDatabase(dbName)
- if err != nil {
- return err
- }
- sourceDb, ok := p.pro.BaseDatabase(ctx, dbName)
- if !ok {
- return sql.ErrDatabaseNotFound.New(dbName)
- }
- statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir)
- if err != nil {
- ctx.Warn(0, err.Error())
- return nil
- }
- p.setStatDb(dbName, statDb)
- }
-
- schHash, err := dTab.GetSchemaHash(ctx)
- if err != nil {
- return err
- }
-
- if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() {
- if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
- return fmt.Errorf("set schema hash error: %w", err)
- }
- } else if oldSchHash != schHash {
- ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
- if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
- return err
- }
-
- stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, tableName)
- if err != nil {
- return err
- }
- for _, stat := range stats {
- statDb.DeleteStats(ctx, branch, stat.Qualifier())
- }
- } else if err != nil {
- return err
- }
-
- tablePrefix := fmt.Sprintf("%s.", tableName)
- var idxMetas []indexMeta
- for _, idx := range indexes {
- cols := make([]string, len(idx.Expressions()))
- for i, c := range idx.Expressions() {
- cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
- }
-
- qual := sql.NewStatQualifier(db, schemaName, table.Name(), strings.ToLower(idx.ID()))
- curStat, ok := statDb.GetStat(branch, qual)
- if !ok {
- curStat = NewDoltStats()
- curStat.Statistic.Qual = qual
- }
- idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
- if err != nil {
- return err
- }
- idxMetas = append(idxMetas, idxMeta)
- }
-
- newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
- if err != nil {
- return err
- }
-
- // merge new chunks with preexisting chunks
- for _, idxMeta := range idxMetas {
- stat := newTableStats[idxMeta.qual]
- targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
- if err != nil {
- return err
- }
- if targetChunks == nil {
- // empty table
- continue
- }
- stat.SetChunks(idxMeta.allAddrs)
- stat.Hist = targetChunks
- stat.UpdateActive()
- if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil {
- return err
- }
- }
-
- p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
- return statDb.Flush(ctx, branch)
-}
-
-// BranchQualifiedDatabase returns a branch qualified database. If the database
-// is already branch suffixed no duplication is applied.
-func BranchQualifiedDatabase(db, branch string) string {
- suffix := fmt.Sprintf("/%s", branch)
- if !strings.HasSuffix(db, suffix) {
- return fmt.Sprintf("%s%s", db, suffix)
- }
- return db
-}
-
-// GetLatestTable will get the WORKING root table for the current database/branch
-func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) {
- var db sqle.Database
- switch d := sqlDb.(type) {
- case sqle.Database:
- db = d
- case sqle.ReadReplicaDatabase:
- db = d.Database
- default:
- return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
- }
- sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
- if err != nil {
- return nil, nil, err
- }
- if !ok {
- return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
- }
-
- var dTab *doltdb.Table
- switch t := sqlTable.(type) {
- case *sqle.AlterableDoltTable:
- dTab, err = t.DoltTable.DoltTable(ctx)
- case *sqle.WritableDoltTable:
- dTab, err = t.DoltTable.DoltTable(ctx)
- case *sqle.DoltTable:
- dTab, err = t.DoltTable(ctx)
- default:
- err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
- }
- if err != nil {
- return nil, nil, err
- }
- return sqlTable, dTab, nil
-}
-
-func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) {
- var idx durable.Index
- var err error
- if strings.EqualFold(sqlIndex.ID(), "PRIMARY") {
- idx, err = doltTable.GetRowData(ctx)
- } else {
- idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID())
- }
- if err != nil {
- return indexMeta{}, err
- }
-
- prollyMap := durable.ProllyMapFromIndex(idx)
-
- if cnt, err := prollyMap.Count(); err != nil {
- return indexMeta{}, err
- } else if cnt == 0 {
- return indexMeta{
- qual: curStats.Statistic.Qual,
- cols: cols,
- }, nil
- }
-
- // get newest histogram target level hashes
- levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
- if err != nil {
- return indexMeta{}, err
- }
-
- var addrs []hash.Hash
- var keepChunks []sql.HistogramBucket
- var missingAddrs float64
- var missingChunks []tree.Node
- var missingOffsets []updateOrdinal
- var offset uint64
-
- for _, n := range levelNodes {
- // Compare the previous histogram chunks to the newest tree chunks.
- // Partition the newest chunks into 1) preserved or 2) missing.
- // Missing chunks will need to be scanned on a stats update, so
- // track the (start, end) ordinal offsets to simplify the read iter.
- treeCnt, err := n.TreeCount()
- if err != nil {
- return indexMeta{}, err
- }
-
- addrs = append(addrs, n.HashOf())
- if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok {
- missingChunks = append(missingChunks, n)
- missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)})
- missingAddrs++
- } else {
- keepChunks = append(keepChunks, curStats.Hist[bucketIdx])
- }
- offset += uint64(treeCnt)
- }
-
- var dropChunks []sql.HistogramBucket
- for _, h := range curStats.Chunks {
- var match bool
- for _, b := range keepChunks {
- if DoltBucketChunk(b) == h {
- match = true
- break
- }
- }
- if !match {
- dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]])
- }
- }
-
- return indexMeta{
- qual: curStats.Statistic.Qual,
- cols: cols,
- newNodes: missingChunks,
- updateOrdinals: missingOffsets,
- keepChunks: keepChunks,
- dropChunks: dropChunks,
- allAddrs: addrs,
- }, nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go
deleted file mode 100644
index 3322065f809..00000000000
--- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "fmt"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- types2 "github.com/dolthub/go-mysql-server/sql/types"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
-)
-
-const asyncAutoRefreshStats = "async_auto_refresh_stats"
-
-func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error {
- _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
- _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
- interval64, _, _ := types2.Int64.Convert(interval)
- intervalSec := time.Second * time.Duration(interval64.(int64))
- thresholdf64 := threshold.(float64)
-
- ctx, err := ctxFactory(context.Background())
- if err != nil {
- return err
- }
-
- branches := p.getStatsBranches(ctx)
-
- return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches)
-}
-
-func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error {
- // this is only called after initial statistics are finished loading
- // launch a thread that periodically checks freshness
-
- p.mu.Lock()
- defer p.mu.Unlock()
-
- dropDbCtx, dbStatsCancel := context.WithCancel(context.Background())
- p.autoCtxCancelers[dbName] = dbStatsCancel
-
- return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) {
- ticker := time.NewTicker(checkInterval + time.Nanosecond)
- for {
- select {
- case <-ctx.Done():
- ticker.Stop()
- return
- case <-ticker.C:
- select {
- case <-dropDbCtx.Done():
- ticker.Stop()
- return
- default:
- }
-
- sqlCtx, err := ctxFactory(ctx)
- if err != nil {
- return
- }
-
- dSess := dsess.DSessFromSess(sqlCtx.Session)
- ddb, ok := dSess.GetDoltDB(sqlCtx, dbName)
- if !ok {
- sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName)
- return
- }
- for _, branch := range branches {
- if br, ok, err := ddb.HasBranch(ctx, branch); ok {
- sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String())
- // update WORKING session references
- sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch))
- if err != nil {
- sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
- return
- }
-
- if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil {
- sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
- return
- }
- } else if err != nil {
- sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error())
- } else {
- sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br)
- }
- }
- }
- }
- })
-}
-
-func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error {
- if !p.TryLockForUpdate(branch, dbName, "") {
- return fmt.Errorf("database already being updated: %s/%s", branch, dbName)
- }
- defer p.UnlockTable(branch, dbName, "")
-
- // Iterate all dbs, tables, indexes. Each db will collect
- // []indexMeta above refresh threshold. We read and process those
- // chunks' statistics. We merge updated chunks with precomputed
- // chunks. The full set of statistics for each database lands
- // 1) in the provider's most recent set of database statistics, and
- // 2) on disk in the database's statistics ref'd prolly.Map.
- statDb, ok := p.getStatDb(dbName)
- if !ok {
- return sql.ErrDatabaseNotFound.New(dbName)
- }
-
- var deletedStats []sql.StatQualifier
- qualExists := make(map[sql.StatQualifier]bool)
- tableExistsAndSkipped := make(map[string]bool)
-
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
-
- for _, table := range tables {
- if !p.TryLockForUpdate(branch, dbName, table) {
- ctx.GetLogger().Debugf("statistics refresh: table is already being updated: %s/%s.%s", branch, dbName, table)
- return fmt.Errorf("table already being updated: %s", table)
- }
- defer p.UnlockTable(branch, dbName, table)
-
- sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb)
- if err != nil {
- return err
- }
-
- tableHash, err := dTab.GetRowDataHash(ctx)
- if err != nil {
- return err
- }
-
- if statDb.GetTableHash(branch, table) == tableHash {
- // no data changes since last check
- tableExistsAndSkipped[table] = true
- ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash)
- continue
- } else {
- ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash)
- }
-
- schHash, err := dTab.GetSchemaHash(ctx)
- if err != nil {
- return err
- }
-
- var schemaName string
- if schTab, ok := sqlTable.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, table); oldSchHash.IsEmpty() {
- if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
- return err
- }
- } else if oldSchHash != schHash {
- ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
- if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
- return err
- }
- stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, table)
- if err != nil {
- return err
- }
- for _, stat := range stats {
- statDb.DeleteStats(ctx, branch, stat.Qualifier())
- }
- } else if err != nil {
- return err
- }
-
- iat, ok := sqlTable.(sql.IndexAddressableTable)
- if !ok {
- return fmt.Errorf("table does not support indexes %s", table)
- }
-
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return err
- }
-
- // collect indexes and ranges to be updated
- var idxMetas []indexMeta
- for _, index := range indexes {
- qual := sql.NewStatQualifier(dbName, schemaName, table, strings.ToLower(index.ID()))
- qualExists[qual] = true
- curStat, ok := statDb.GetStat(branch, qual)
- if !ok {
- curStat = NewDoltStats()
- curStat.Statistic.Qual = qual
-
- cols := make([]string, len(index.Expressions()))
- tablePrefix := fmt.Sprintf("%s.", table)
- for i, c := range index.Expressions() {
- cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
- }
- curStat.Statistic.Cols = cols
- }
- ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
-
- updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
- if err != nil {
- ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
- continue
- }
- curCnt := float64(len(curStat.Active))
- updateCnt := float64(len(updateMeta.newNodes))
- deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks))
- ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt))
-
- if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh {
- if curCnt == 0 && updateCnt == 0 {
- continue
- }
- ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual)
- // mark index for updating
- idxMetas = append(idxMetas, updateMeta)
- // update latest hash if we haven't already
- statDb.SetTableHash(branch, table, tableHash)
- }
- }
-
- // get new buckets for index chunks to update
- newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
- if err != nil {
- return err
- }
-
- // merge new chunks with preexisting chunks
- for _, updateMeta := range idxMetas {
- stat := newTableStats[updateMeta.qual]
- if stat != nil {
- var err error
- if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok {
- err = statDb.SetStat(ctx, branch, updateMeta.qual, stat)
- } else {
- err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist)
- }
- if err != nil {
- return err
- }
- p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
- }
- }
- }
-
- for _, q := range statDb.ListStatQuals(branch) {
- // table or index delete leaves hole in stats
- // this is separate from threshold check
- if !tableExistsAndSkipped[q.Table()] && !qualExists[q] {
- // only delete stats we've verified are deleted
- deletedStats = append(deletedStats, q)
- }
- }
-
- statDb.DeleteStats(ctx, branch, deletedStats...)
-
- if err := statDb.Flush(ctx, branch); err != nil {
- return err
- }
-
- return nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/update.go b/go/libraries/doltcore/sqle/statspro/bucket_builder.go
similarity index 52%
rename from go/libraries/doltcore/sqle/statspro/update.go
rename to go/libraries/doltcore/sqle/statspro/bucket_builder.go
index 562e82c5679..f521ebe83bd 100644
--- a/go/libraries/doltcore/sqle/statspro/update.go
+++ b/go/libraries/doltcore/sqle/statspro/bucket_builder.go
@@ -17,19 +17,10 @@ package statspro
import (
"container/heap"
"context"
- "errors"
- "fmt"
- "io"
- "sort"
- "strings"
- "time"
-
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
+ "sort"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
- "github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
@@ -40,153 +31,7 @@ const (
mcvCnt = 3
)
-// createNewStatsBuckets builds histograms for a list of index statistic metadata.
-// We only read chunk ranges indicated by |indexMeta.updateOrdinals|. If
-// the returned buckets are a subset of the index the caller is responsible
-// for reconciling the difference.
-func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Table, indexes []sql.Index, idxMetas []indexMeta) (map[sql.StatQualifier]*DoltStats, error) {
- nameToIdx := make(map[string]sql.Index)
- for _, idx := range indexes {
- nameToIdx[strings.ToLower(idx.ID())] = idx
- }
-
- ret := make(map[sql.StatQualifier]*DoltStats)
-
- for _, meta := range idxMetas {
- var idx durable.Index
- var err error
- if strings.EqualFold(meta.qual.Index(), "PRIMARY") {
- idx, err = dTab.GetRowData(ctx)
- } else {
- idx, err = dTab.GetIndexRowData(ctx, meta.qual.Index())
- }
- if err != nil {
- return nil, err
- }
-
- prollyMap := durable.ProllyMapFromIndex(idx)
- keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
-
- sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())]
- fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx)
- if err != nil {
- return nil, err
- }
-
- var types []sql.Type
- for _, cet := range nameToIdx[strings.ToLower(meta.qual.Index())].ColumnExpressionTypes() {
- types = append(types, cet.Type)
- }
-
- if cnt, err := prollyMap.Count(); err != nil {
- return nil, err
- } else if cnt == 0 {
- // table is empty
- ret[meta.qual] = NewDoltStats()
- ret[meta.qual].Statistic.Created = time.Now()
- ret[meta.qual].Statistic.Cols = meta.cols
- ret[meta.qual].Statistic.Typs = types
- ret[meta.qual].Statistic.Qual = meta.qual
-
- ret[meta.qual].Statistic.Fds = fds
- ret[meta.qual].Statistic.Colset = colSet
- ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
-
- continue
- }
-
- firstRow, err := firstRowForIndex(ctx, prollyMap, keyBuilder, len(meta.cols))
- if err != nil {
- return nil, err
- }
-
- updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc())
- ret[meta.qual] = NewDoltStats()
- ret[meta.qual].Chunks = meta.allAddrs
- ret[meta.qual].Statistic.Created = time.Now()
- ret[meta.qual].Statistic.Cols = meta.cols
- ret[meta.qual].Statistic.Typs = types
- ret[meta.qual].Statistic.Qual = meta.qual
- ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
-
- var start, stop uint64
- // read leaf rows for each bucket
- for i, chunk := range meta.newNodes {
- // each node is a bucket
- updater.newBucket()
-
- // we read exclusive range [node first key, next node first key)
- start, stop = meta.updateOrdinals[i].start, meta.updateOrdinals[i].stop
- iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
- if err != nil {
- return nil, err
- }
- for {
- // stats key will be a prefix of the index key
- keyBytes, _, err := iter.Next(ctx)
- if errors.Is(err, io.EOF) {
- break
- } else if err != nil {
- return nil, err
- }
- // build full key
- for i := range keyBuilder.Desc.Types {
- keyBuilder.PutRaw(i, keyBytes.GetField(i))
- }
-
- updater.add(keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
- keyBuilder.Recycle()
- }
-
- // finalize the aggregation
- bucket, err := updater.finalize(ctx, prollyMap.NodeStore())
- if err != nil {
- return nil, err
- }
- bucket.Chunk = chunk.HashOf()
- ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket)
- }
-
- ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct)
- ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount)
- ret[updater.qual].Statistic.LowerBnd = firstRow
- ret[updater.qual].Statistic.Fds = fds
- ret[updater.qual].Statistic.Colset = colSet
- ret[updater.qual].UpdateActive()
- }
- return ret, nil
-}
-
-// MergeNewChunks combines a set of old and new chunks to create
-// the desired target histogram. Undefined behavior if a |targetHash|
-// does not exist in either |oldChunks| or |newChunks|.
-func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) {
- hashToPos := make(map[hash.Hash]int, len(inputHashes))
- for i, h := range inputHashes {
- hashToPos[h] = i
- }
-
- var cnt int
- targetBuckets := make([]sql.HistogramBucket, len(inputHashes))
- for _, c := range oldChunks {
- if idx, ok := hashToPos[DoltBucketChunk(c)]; ok {
- cnt++
- targetBuckets[idx] = c
- }
- }
- for _, c := range newChunks {
- if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil {
- cnt++
- targetBuckets[idx] = c
- }
- }
- if cnt != len(inputHashes) {
- return nil, fmt.Errorf("encountered invalid statistic chunks")
- }
- return targetBuckets, nil
-}
-
-func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder, prefixLen int) (sql.Row, error) {
+func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder) (sql.Row, error) {
if cnt, err := prollyMap.Count(); err != nil {
return nil, err
} else if cnt == 0 {
@@ -208,9 +53,9 @@ func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.Tu
keyBuilder.PutRaw(i, keyBytes.GetField(i))
}
- firstKey := keyBuilder.BuildPrefixNoRecycle(buffPool, prefixLen)
- firstRow := make(sql.Row, prefixLen)
- for i := 0; i < prefixLen; i++ {
+ firstKey := keyBuilder.Build(buffPool)
+ firstRow := make(sql.Row, firstKey.Count())
+ for i := range firstRow {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, err
@@ -266,7 +111,7 @@ func (u *bucketBuilder) newBucket() {
// finalize converts the current aggregation stats into a histogram bucket,
// which includes deserializing most common value tuples into sql.Rows.
-func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBucket, error) {
+func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (*stats.Bucket, error) {
// update MCV in case we've ended on a run of many identical keys
u.updateMcv()
@@ -276,27 +121,25 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu
// convert the MCV tuples into SQL rows (most efficient to only do this once)
mcvRows, err := u.mcvs.Values(ctx, u.tupleDesc, ns, u.prefixLen)
if err != nil {
- return DoltBucket{}, err
+ return nil, err
}
upperBound := make(sql.Row, u.prefixLen)
if u.currentKey != nil {
for i := 0; i < u.prefixLen; i++ {
upperBound[i], err = tree.GetField(ctx, u.tupleDesc, i, u.currentKey, ns)
if err != nil {
- return DoltBucket{}, err
+ return nil, err
}
}
}
- return DoltBucket{
- Bucket: &stats.Bucket{
- RowCnt: uint64(u.count),
- DistinctCnt: uint64(u.distinct),
- BoundCnt: uint64(u.currentCnt),
- McvVals: mcvRows,
- McvsCnt: u.mcvs.Counts(),
- BoundVal: upperBound,
- NullCnt: uint64(u.nulls),
- },
+ return &stats.Bucket{
+ RowCnt: uint64(u.count),
+ DistinctCnt: uint64(u.distinct),
+ BoundCnt: uint64(u.currentCnt),
+ McvVals: mcvRows,
+ McvsCnt: u.mcvs.Counts(),
+ BoundVal: upperBound,
+ NullCnt: uint64(u.nulls),
}, nil
}
diff --git a/go/libraries/doltcore/sqle/statspro/update_test.go b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go
similarity index 92%
rename from go/libraries/doltcore/sqle/statspro/update_test.go
rename to go/libraries/doltcore/sqle/statspro/bucket_builder_test.go
index ef670e19c8b..e97ad343755 100644
--- a/go/libraries/doltcore/sqle/statspro/update_test.go
+++ b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go
@@ -61,27 +61,27 @@ func TestBucketBuilder(t *testing.T) {
name string
keys []sql.Row
keyDesc val.TupleDesc
- bucket DoltBucket
+ bucket *stats.Bucket
}{
{
name: "ints",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 5,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
- }},
+ },
},
{
// technically nulls should be at beginning
name: "ints with middle nulls",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 16,
DistinctCnt: 6,
NullCnt: 3,
@@ -89,13 +89,13 @@ func TestBucketBuilder(t *testing.T) {
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
- }},
+ },
},
{
name: "ints with beginning nulls",
keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 6,
NullCnt: 2,
@@ -103,86 +103,86 @@ func TestBucketBuilder(t *testing.T) {
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
- }},
+ },
},
{
name: "more ints",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 22,
DistinctCnt: 7,
BoundCnt: 1,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(7)},
- }},
+ },
},
{
name: "2-ints",
keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 11,
McvVals: []sql.Row{{int64(4), int64(1)}},
McvsCnt: []uint64{3},
BoundVal: sql.Row{int64(5), int64(2)},
BoundCnt: 1,
- }},
+ },
},
{
name: "2-ints with nulls",
keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 5,
DistinctCnt: 5,
NullCnt: 3,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(2), int64(2)},
- BoundCnt: 1},
+ BoundCnt: 1,
},
},
{
name: "varchars",
keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 9,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{"i"},
BoundCnt: 2,
- }},
+ },
},
{
name: "varchar-ints",
keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 12,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{"i", int64(1)},
BoundCnt: 2,
- }},
+ },
},
{
name: "mcvs",
keys: []sql.Row{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {7}, {7}, {7}, {8}, {9}, {10}, {10}, {10}, {11}, {12}, {13}, {14}, {15}, {20}, {21}, {22}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 23,
DistinctCnt: 18,
McvVals: []sql.Row{{int64(10)}, {int64(7)}},
McvsCnt: []uint64{3, 4},
BoundVal: sql.Row{int64(22)},
BoundCnt: 1,
- }},
+ },
},
}
diff --git a/go/libraries/doltcore/sqle/statspro/configure.go b/go/libraries/doltcore/sqle/statspro/configure.go
deleted file mode 100644
index f8492a08b61..00000000000
--- a/go/libraries/doltcore/sqle/statspro/configure.go
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "fmt"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- types2 "github.com/dolthub/go-mysql-server/sql/types"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/utils/filesys"
-)
-
-var helpMsg = "call dolt_stats_purge() to reset statistics"
-
-func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Context) (*sql.Context, error), bThreads *sql.BackgroundThreads, dbs []dsess.SqlDatabase) error {
- p.SetStarter(NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
-
- if _, disabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly); disabled == int8(1) {
- return nil
- }
-
- loadCtx, err := ctxFactory(ctx)
- if err != nil {
- return err
- }
-
- branches := p.getStatsBranches(loadCtx)
-
- var autoEnabled bool
- var startupEnabled bool
- var intervalSec time.Duration
- var thresholdf64 float64
- if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) {
- autoEnabled = true
- _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
- _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
- interval64, _, _ := types2.Int64.Convert(interval)
- intervalSec = time.Second * time.Duration(interval64.(int64))
- thresholdf64 = threshold.(float64)
-
- p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
- p.pro.DropDatabaseHooks = append([]sqle.DropDatabaseHook{NewStatsDropDatabaseHook(p)}, p.pro.DropDatabaseHooks...)
- } else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) {
- startupEnabled = true
- }
-
- eg, ctx := loadCtx.NewErrgroup()
- for _, db := range dbs {
- // copy closure variables
- db := db
- eg.Go(func() (err error) {
- defer func() {
- if r := recover(); r != nil {
- if str, ok := r.(fmt.Stringer); ok {
- err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String())
- } else {
- err = fmt.Errorf("%w: %v", ErrFailedToLoad, r)
- }
- return
- }
- }()
-
- fs, err := p.pro.FileSystemForDatabase(db.Name())
- if err != nil {
- return err
- }
-
- if p.Load(loadCtx, fs, db, branches); err != nil {
- return err
- }
- if autoEnabled {
- return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches)
- } else if startupEnabled {
- if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil {
- return err
- }
- }
- return nil
- })
- }
- return eg.Wait()
-}
-
-// getStatsBranches returns the set of branches whose statistics are tracked.
-// The order of precedence is (1) global variable, (2) session current branch,
-// (3) engine default branch.
-func (p *Provider) getStatsBranches(ctx *sql.Context) []string {
- dSess := dsess.DSessFromSess(ctx.Session)
- var branches []string
- if _, bs, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranches); bs == "" {
- defaultBranch, _ := dSess.GetBranch()
- if defaultBranch != "" {
- branches = append(branches, defaultBranch)
- }
- } else {
- for _, branch := range strings.Split(bs.(string), ",") {
- branches = append(branches, strings.TrimSpace(branch))
- }
- }
-
- if branches == nil {
- branches = append(branches, p.pro.DefaultBranch())
- }
- return branches
-}
-
-func (p *Provider) LoadStats(ctx *sql.Context, db, branch string) error {
- if statDb, ok := p.getStatDb(db); ok {
- return statDb.LoadBranchStats(ctx, branch)
- }
- return nil
-}
-
-// Load scans the statistics tables, populating the |stats| attribute.
-// Statistics are not available for reading until we've finished loading.
-func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDatabase, branches []string) {
- // |statPath| is either file://./stat or mem://stat
- statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir)
- if err != nil {
- ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
- return
- }
-
- for _, branch := range branches {
- if err = statsDb.LoadBranchStats(ctx, branch); err != nil {
- // if branch name is invalid, continue loading rest
- // TODO: differentiate bad branch name from other errors
- ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
- continue
- }
- if err := statsDb.Flush(ctx, branch); err != nil {
- ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
- continue
- }
- }
-
- p.setStatDb(strings.ToLower(db.Name()), statsDb)
- return
-}
diff --git a/go/libraries/doltcore/sqle/statspro/doc.go b/go/libraries/doltcore/sqle/statspro/doc.go
new file mode 100644
index 00000000000..281ae80f16e
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/doc.go
@@ -0,0 +1,79 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+// Package statspro provides an event loop that manages table statistics
+// management and access.
+//
+// At any given time there is one thread responsible for pulling work
+// from the job queue to execute. The thread has exclusive ownership
+// over the job channel.
+//
+// All stats are persisted within a single database. If there are multiple
+// databases, one is selected by random as the storage target. If during
+// initialization multiple databases have stats, one will be chosen by
+// random as the target. If a database changes between server restarts,
+// the storage stats will be useless but not impair operations because
+// storage is only ever a best-effort content-addressed persistence layer;
+// buckets will be regenerated if they are missing. If the database acting
+// as a storage target is deleted, we swap the cache to write to a new storage
+// target that still exists.
+//
+// The main data structures:
+// - Table statistics map, that returns a list of table index statistics
+// for a specific branch, database, and table name.
+// - Object caches:
+// - Bucket cache: Chunk addressed histogram bucket. All provider
+// histogram references should be in the bucket cache. This is an LRU
+// that is sized to always fit the current active set, and doubles
+// when the provider bucket counter reaches the threshold. Backed
+// by a best-effort on-disk prolly.Map to make restarts faster.
+// - Template cache: Table-schema/index addressed stats.Statistics object
+// for a specific index.
+// - Bound cache: Chunk addressed first row for an index histogram.
+//
+// Work is broken down into:
+// - A basic update cycle of (1) seed database tables, (2) create or pull
+// buckets from disk, (3) commit statistics accessed by the provider.
+// - GC cycle: Mark and sweep the most recent context's active set into
+// new cache/prolly.Map objects.
+// - Branch sync: Update the tracked set of branch-qualified databases.
+//
+// Regular jobs, GC, and branch-sync are all controlled by tickers at the
+// top level that controls that maximum rate of calling each. GC and
+// branch-sync are prioritized before jobs, and therefore rate-limited to
+// allow the job queue to flush in-between calls.
+//
+// DDL operations and branch create/delete are concurrent to the event
+// loop. We require an extra fixed-sized queue as an intermediary to the
+// job queue to protect the main thread's ownership. DDL acquiring the
+// provider lock is a deadlock risk -- we cannot do any provider checks
+// while holding the db lock. And lastly, the way update jobs are split
+// up over time means we need to do special checks when finalizing a set
+// of database stats. A race between deleting a database and finalizing
+// statistics needs to end with no statistics, which requires a delete check
+// after finalize.
+//
+// The stats lifecycle can be controlled with:
+// - dolt_stats_stop: clear queue and disable thread
+// - dolt_stats_restart: clear queue, refresh queue, start thread
+// - dolt_stats_purge: clear queue, clear cache, refresh queue,
+// disable thread
+// - dolt_stats_validate: return report of cache misses for current
+// root value.
+//
+// `dolt_stats_wait` is additionally useful for blocking on a full
+// queue cycle and then validating whether the session head is caught up.
+//
diff --git a/go/libraries/doltcore/sqle/statspro/dolt_stats.go b/go/libraries/doltcore/sqle/statspro/dolt_stats.go
deleted file mode 100644
index 4c5d43250c9..00000000000
--- a/go/libraries/doltcore/sqle/statspro/dolt_stats.go
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "fmt"
- "sync"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/stats"
-
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-type DoltStats struct {
- Statistic *stats.Statistic
- mu *sync.Mutex
- // Chunks is a list of addresses for the histogram fanout level
- Chunks []hash.Hash
- // Active maps a chunk/bucket address to its position in
- // the histogram. 1-indexed to differentiate from an empty
- // field on disk
- Active map[hash.Hash]int
- Hist sql.Histogram
- Tb *val.TupleBuilder
-}
-
-func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper {
- return s
-}
-
-var _ sql.Statistic = (*DoltStats)(nil)
-
-func (s *DoltStats) SetChunks(h []hash.Hash) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.Chunks = h
-}
-
-func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithRowCount(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithNullCount(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) RowCount() uint64 {
- return s.Statistic.RowCount()
-}
-
-func (s *DoltStats) DistinctCount() uint64 {
- return s.Statistic.DistinctCount()
-}
-
-func (s *DoltStats) NullCount() uint64 {
- return s.Statistic.NullCount()
-
-}
-
-func (s *DoltStats) AvgSize() uint64 {
- return s.Statistic.AvgSize()
-
-}
-
-func (s *DoltStats) CreatedAt() time.Time {
- return s.Statistic.CreatedAt()
-
-}
-
-func (s *DoltStats) Columns() []string {
- return s.Statistic.Columns()
-}
-
-func (s *DoltStats) Types() []sql.Type {
- return s.Statistic.Types()
-}
-
-func (s *DoltStats) Qualifier() sql.StatQualifier {
- return s.Statistic.Qualifier()
-}
-
-func (s *DoltStats) IndexClass() sql.IndexClass {
- return s.Statistic.IndexClass()
-}
-
-func (s *DoltStats) FuncDeps() *sql.FuncDepSet {
- return s.Statistic.FuncDeps()
-}
-
-func (s *DoltStats) ColSet() sql.ColSet {
- return s.Statistic.ColSet()
-}
-
-func (s *DoltStats) LowerBound() sql.Row {
- return s.Statistic.LowerBound()
-}
-
-func NewDoltStats() *DoltStats {
- return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
-}
-
-func (s *DoltStats) ToInterface() (interface{}, error) {
- statVal, err := s.Statistic.ToInterface()
- if err != nil {
- return nil, err
- }
- ret := statVal.(map[string]interface{})
-
- var hist sql.Histogram
- for _, b := range s.Hist {
- hist = append(hist, b)
- }
- histVal, err := hist.ToInterface()
- if err != nil {
- return nil, err
- }
- ret["statistic"].(map[string]interface{})["buckets"] = histVal
- return ret, nil
-}
-
-func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
- s.mu.Lock()
- defer s.mu.Unlock()
- ret := *s
- ret.Hist = nil
- for _, b := range h {
- doltB, ok := b.(DoltBucket)
- if !ok {
- return nil, fmt.Errorf("invalid bucket type: %T, %s", b, h.DebugString())
- }
- ret.Hist = append(ret.Hist, doltB)
- }
- return &ret, nil
-}
-
-func (s *DoltStats) Histogram() sql.Histogram {
- s.mu.Lock()
- defer s.mu.Unlock()
- return s.Hist
-}
-
-func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
- hist, err := DoltHistFromSql(stat.Histogram(), stat.Types())
- if err != nil {
- return nil, err
- }
- ret := &DoltStats{
- mu: &sync.Mutex{},
- Hist: hist,
- Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()),
- Active: make(map[hash.Hash]int),
- }
- ret.Statistic.Fds = stat.FuncDeps()
- ret.Statistic.Colset = stat.ColSet()
- return ret, nil
-}
-
-func (s *DoltStats) UpdateActive() {
- s.mu.Lock()
- defer s.mu.Unlock()
- newActive := make(map[hash.Hash]int)
- for i, hash := range s.Chunks {
- newActive[hash] = i
- }
- s.Active = newActive
-}
-
-type DoltHistogram []DoltBucket
-
-type DoltBucket struct {
- Bucket *stats.Bucket
- Chunk hash.Hash
- Created time.Time
-}
-
-func (d DoltBucket) RowCount() uint64 {
- return d.Bucket.RowCount()
-}
-
-func (d DoltBucket) DistinctCount() uint64 {
- return d.Bucket.DistinctCount()
-}
-
-func (d DoltBucket) NullCount() uint64 {
- return d.Bucket.NullCount()
-}
-
-func (d DoltBucket) BoundCount() uint64 {
- return d.Bucket.BoundCount()
-}
-
-func (d DoltBucket) UpperBound() sql.Row {
- return d.Bucket.UpperBound()
-}
-
-func (d DoltBucket) McvCounts() []uint64 {
- return d.Bucket.McvCounts()
-}
-
-func (d DoltBucket) Mcvs() []sql.Row {
- return d.Bucket.Mcvs()
-}
-
-func DoltBucketChunk(b sql.HistogramBucket) hash.Hash {
- return b.(DoltBucket).Chunk
-}
-
-func DoltBucketCreated(b sql.HistogramBucket) time.Time {
- return b.(DoltBucket).Created
-}
-
-var _ sql.HistogramBucket = (*DoltBucket)(nil)
-
-func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) {
- ret := make(sql.Histogram, len(hist))
- var err error
- for i, b := range hist {
- upperBound := make(sql.Row, len(b.UpperBound()))
- for i, v := range b.UpperBound() {
- upperBound[i], _, err = types[i].Convert(v)
- if err != nil {
- return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
- }
- }
- mcvs := make([]sql.Row, len(b.Mcvs()))
- for i, mcv := range b.Mcvs() {
- for _, v := range mcv {
- conv, _, err := types[i].Convert(v)
- if err != nil {
- return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
- }
- mcvs[i] = append(mcvs[i], conv)
- }
- }
- ret[i] = DoltBucket{
- Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs).(*stats.Bucket),
- }
- }
- return ret, nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/gc.go b/go/libraries/doltcore/sqle/statspro/gc.go
new file mode 100644
index 00000000000..6d476e37d06
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/gc.go
@@ -0,0 +1,196 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "errors"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "log"
+ "strconv"
+ "strings"
+)
+
+type GcMarkJob struct {
+ sqlDb dsess.SqlDatabase
+ done chan struct{}
+}
+
+func NewGcMarkJob(sqlDb dsess.SqlDatabase) GcMarkJob {
+ return GcMarkJob{
+ sqlDb: sqlDb,
+ done: make(chan struct{}),
+ }
+}
+
+func (j GcMarkJob) Finish() {
+ close(j.done)
+}
+
+func (j GcMarkJob) String() string {
+ b := strings.Builder{}
+ b.WriteString("gcMark: ")
+ b.WriteString(j.sqlDb.RevisionQualifiedName())
+ return b.String()
+}
+
+func (sc *StatsCoord) runGc(ctx context.Context, done chan struct{}) (err error) {
+ defer func() {
+ if err != nil {
+ sc.enableGc.Store(true)
+ close(done)
+ }
+ }()
+
+ if !sc.enableGc.Swap(false) {
+ return nil
+ }
+
+ if sc.Debug {
+ log.Println("stats gc number: ", strconv.Itoa(int(sc.gcCounter.Load())))
+ }
+
+ sc.gcCounter.Add(1)
+
+ sc.gcMu.Lock()
+ defer sc.gcMu.Unlock()
+
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return err
+ }
+
+ if err := sc.kv.StartGc(ctx, int(sc.bucketCap)); err != nil {
+ return err
+ }
+
+ // Can't take |dbMu| and provider lock, so copy dbs out.
+ // Unlike branch updates, it is OK if GC misses databases
+ // added in-between GC start and end because stats collection
+ // is paused for the duration.
+ sc.dbMu.Lock()
+ dbs := make([]dsess.SqlDatabase, len(sc.dbs))
+ copy(dbs, sc.dbs)
+ sc.ddlGuard = true
+ sc.dbMu.Unlock()
+
+ var bucketCnt int
+ for _, db := range dbs {
+ j := NewGcMarkJob(db)
+ cnt, err := sc.gcMark(sqlCtx, j)
+ if sql.ErrDatabaseNotFound.Is(err) {
+ // concurrent delete
+ continue
+ } else if errors.Is(err, doltdb.ErrWorkingSetNotFound) {
+ // branch registered but no data
+ continue
+ } else if err != nil {
+ return err
+ }
+ bucketCnt += cnt
+ }
+
+ //sc.bucketCnt.Store(int64(bucketCnt))
+ sc.bucketCap = sc.kv.Cap()
+ sc.kv.FinishGc()
+
+ // Avoid GC starving the loop, only re-enable after
+ // letting a block of other work through.
+ if err := sc.unsafeAsyncSend(ctx, NewControl("re-enable GC", func(sc *StatsCoord) error {
+ sc.enableGc.Store(true)
+ close(done)
+ return nil
+ })); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (sc *StatsCoord) gcMark(sqlCtx *sql.Context, j GcMarkJob) (int, error) {
+ dSess := dsess.DSessFromSess(sqlCtx.Session)
+ db, err := dSess.Provider().Database(sqlCtx, j.sqlDb.AliasedName())
+ if err != nil {
+ return 0, err
+ }
+ sqlDb, err := sqle.RevisionDbForBranch(sqlCtx, db.(dsess.SqlDatabase), j.sqlDb.Revision(), j.sqlDb.Revision()+"/"+j.sqlDb.AliasedName())
+ if err != nil {
+ return 0, err
+ }
+ tableNames, err := sqlDb.GetTableNames(sqlCtx)
+ if err != nil {
+ return 0, err
+ }
+
+ var bucketCnt int
+ for _, tableName := range tableNames {
+ sqlTable, dTab, err := GetLatestTable(sqlCtx, tableName, j.sqlDb)
+ if err != nil {
+ return 0, err
+ }
+ indexes, err := sqlTable.GetIndexes(sqlCtx)
+ if err != nil {
+ return 0, err
+ }
+
+ for _, sqlIdx := range indexes {
+ var idx durable.Index
+ var err error
+ if strings.EqualFold(sqlIdx.ID(), "PRIMARY") {
+ idx, err = dTab.GetRowData(sqlCtx)
+ } else {
+ idx, err = dTab.GetIndexRowData(sqlCtx, sqlIdx.ID())
+ }
+ if err != nil {
+ return 0, err
+ }
+
+ schHash, _, err := sqlTable.IndexCacheKey(sqlCtx)
+ key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
+ sc.kv.GetTemplate(key)
+
+ idxLen := len(sqlIdx.Expressions())
+
+ prollyMap := durable.ProllyMapFromIndex(idx)
+ levelNodes, err := tree.GetHistogramLevel(sqlCtx, prollyMap.Tuples(), bucketLowCnt)
+ if err != nil {
+ return 0, err
+ }
+
+ if len(levelNodes) == 0 {
+ continue
+ }
+
+ bucketCnt += len(levelNodes)
+
+ firstNodeHash := levelNodes[0].HashOf()
+ sc.kv.GetBound(firstNodeHash, idxLen)
+
+ for _, n := range levelNodes {
+ err = sc.kv.MarkBucket(sqlCtx, n.HashOf(), val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen)))
+ if err != nil {
+ return 0, err
+ }
+ }
+ }
+ }
+ return bucketCnt, nil
+}
diff --git a/go/libraries/doltcore/sqle/statspro/initdbhook.go b/go/libraries/doltcore/sqle/statspro/initdbhook.go
index 8e11408ea59..d0b11604254 100644
--- a/go/libraries/doltcore/sqle/statspro/initdbhook.go
+++ b/go/libraries/doltcore/sqle/statspro/initdbhook.go
@@ -15,10 +15,6 @@
package statspro
import (
- "context"
- "fmt"
- "strings"
-
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
@@ -26,67 +22,35 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
-func NewStatsInitDatabaseHook(
- statsProv *Provider,
- ctxFactory func(ctx context.Context) (*sql.Context, error),
- bThreads *sql.BackgroundThreads,
-) sqle.InitDatabaseHook {
+func NewInitDatabaseHook(sc *StatsCoord) sqle.InitDatabaseHook {
return func(
ctx *sql.Context,
- pro *sqle.DoltDatabaseProvider,
+ _ *sqle.DoltDatabaseProvider,
name string,
denv *env.DoltEnv,
db dsess.SqlDatabase,
) error {
- dbName := strings.ToLower(db.Name())
- if statsDb, ok := statsProv.getStatDb(dbName); !ok {
- statsDb, err := statsProv.sf.Init(ctx, db, statsProv.pro, denv.FS, env.GetCurrentUserHomeDir)
- if err != nil {
- ctx.GetLogger().Debugf("statistics load error: %s", err.Error())
- return nil
- }
- statsProv.setStatDb(dbName, statsDb)
- } else {
- dSess := dsess.DSessFromSess(ctx.Session)
- for _, br := range statsDb.Branches() {
- branchQDbName := BranchQualifiedDatabase(dbName, br)
- sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
- if err != nil {
- ctx.GetLogger().Logger.Errorf("branch not found: %s", br)
- continue
- }
- branchQDb, ok := sqlDb.(dsess.SqlDatabase)
- if !ok {
- return fmt.Errorf("branch/database not found: %s", branchQDbName)
- }
+ head := denv.RepoState.Head
+
+ sqlDb, ok := db.(sqle.Database)
+ if !ok {
+ sc.logger.Debugf("stats initialize db failed, expected *sqle.Database, found %T", db)
+ return nil
+ }
- if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil {
- return err
- } else if ok {
- if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil {
- return err
- }
- }
- }
- ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName)
+ // call should only fail if backpressure in secondary queue
+ _, err := sc.Add(ctx, sqlDb, head.Ref, denv.FS)
+ if err != nil {
+ sc.logger.Debugf("cannot initialize db stats for %s; queue is closed", sqlDb.AliasedName())
}
- ctx.GetLogger().Debugf("statistics refresh: initialize %s", name)
- return statsProv.InitAutoRefresh(ctxFactory, name, bThreads)
+ return nil
}
}
-func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook {
+func NewDropDatabaseHook(sc *StatsCoord) sqle.DropDatabaseHook {
return func(ctx *sql.Context, name string) {
- statsProv.CancelRefreshThread(name)
- if err := statsProv.DropDbStats(ctx, name, false); err != nil {
+ if err := sc.DropDbStats(ctx, name, false); err != nil {
ctx.GetLogger().Debugf("failed to close stats database: %s", err)
}
-
- if db, ok := statsProv.getStatDb(name); ok {
- if err := db.Close(); err != nil {
- ctx.GetLogger().Debugf("failed to close stats database: %s", err)
- }
- delete(statsProv.statDbs, name)
- }
}
}
diff --git a/go/libraries/doltcore/sqle/statspro/interface.go b/go/libraries/doltcore/sqle/statspro/interface.go
deleted file mode 100644
index 5a423466f91..00000000000
--- a/go/libraries/doltcore/sqle/statspro/interface.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/utils/filesys"
- "github.com/dolthub/dolt/go/store/hash"
-)
-
-// Database is a backing store for a collection of DoltStats.
-// Each stats database tracks a user database, with multiple
-// branches potentially each having their own statistics.
-type Database interface {
- // ListStatQuals returns the list of index statistics for a branch.
- ListStatQuals(branch string) []sql.StatQualifier
- // LoadBranchStats starts tracking a specific branch's statistics.
- LoadBranchStats(ctx *sql.Context, branch string) error
- // DeleteBranchStats removes references to in memory index statistics.
- // If |flush| is true delete the data from storage.
- DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error
- // GetStat returns a branch's index statistics.
- GetStat(branch string, qual sql.StatQualifier) (*DoltStats, bool)
- //SetStat bulk replaces the statistic, deleting any previous version
- SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *DoltStats) error
- //DeleteStats deletes a list of index statistics.
- DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier)
- // ReplaceChunks is an update interface that lets a stats implementation
- // decide how to edit stats for a stats refresh.
- ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error
- // Flush instructs the database to sync any partial state to disk
- Flush(ctx context.Context, branch string) error
- // Close finalizes any file references.
- Close() error
- // SetTableHash updates the most recently tracked table stats table hash
- SetTableHash(branch, tableName string, h hash.Hash)
- // GetTableHash returns the most recently tracked table stats table hash
- GetTableHash(branch, tableName string) hash.Hash
- // SetSchemaHash updates the most recently stored table stat's schema hash
- SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error
- // GetSchemaHash returns the schema hash for the latest stored statistics
- GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error)
- // Branches returns the set of branches with tracked statistics databases
- Branches() []string
- // SchemaChange returns false if any table schema in the session
- // root is incompatible with the latest schema used to create a stored
- // set of statistics.
- SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error)
-}
-
-// StatsFactory instances construct statistic databases.
-type StatsFactory interface {
- // Init gets a reference to the stats database for a dolt database
- // rooted at the given filesystem. It will create the database if
- // it does not exist.
- Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (Database, error)
-}
diff --git a/go/libraries/doltcore/sqle/statspro/noop_provider.go b/go/libraries/doltcore/sqle/statspro/noop_provider.go
new file mode 100644
index 00000000000..f54e84d51b3
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/noop_provider.go
@@ -0,0 +1,67 @@
+package statspro
+
+import (
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/go-mysql-server/sql"
+)
+
+type StatsNoop struct{}
+
+func (s StatsNoop) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
+ return nil, nil
+}
+
+func (s StatsNoop) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
+ return nil
+}
+
+func (s StatsNoop) SetStats(ctx *sql.Context, stats sql.Statistic) error {
+ return nil
+}
+
+func (s StatsNoop) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
+ return nil, false
+}
+
+func (s StatsNoop) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
+ return nil
+}
+
+func (s StatsNoop) DropDbStats(ctx *sql.Context, db string, flush bool) error {
+ return nil
+}
+
+func (s StatsNoop) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
+ return 0, nil
+}
+
+func (s StatsNoop) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
+ return 0, nil
+}
+
+func (s StatsNoop) CancelRefreshThread(string) {
+ return
+}
+
+func (s StatsNoop) StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error {
+ return nil
+}
+
+func (s StatsNoop) ThreadStatus(string) string {
+ return "stats disabled"
+}
+
+func (s StatsNoop) Prune(ctx *sql.Context) error {
+ return nil
+}
+
+func (s StatsNoop) Purge(ctx *sql.Context) error {
+ return nil
+}
+
+func (s StatsNoop) WaitForDbSync(ctx *sql.Context) error {
+ return nil
+}
+
+var _ sql.StatsProvider = StatsNoop{}
diff --git a/go/libraries/doltcore/sqle/statspro/provider.go b/go/libraries/doltcore/sqle/statspro/provider.go
new file mode 100644
index 00000000000..ea79b20c8a2
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/provider.go
@@ -0,0 +1,582 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "fmt"
+ "github.com/dolthub/dolt/go/cmd/dolt/doltversion"
+ "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
+ "github.com/dolthub/dolt/go/libraries/utils/earl"
+ "github.com/dolthub/dolt/go/libraries/utils/filesys"
+ "github.com/dolthub/dolt/go/store/types"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "golang.org/x/sync/errgroup"
+ "log"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+var _ sql.StatsProvider = (*StatsCoord)(nil)
+
+func (sc *StatsCoord) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ branch, err := dSess.GetBranch()
+ if err != nil {
+ return nil, err
+ }
+ key := tableIndexesKey{
+ db: db,
+ branch: branch,
+ table: table.Name(),
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ st := sc.Stats[key]
+ var ret []sql.Statistic
+ for _, s := range st {
+ ret = append(ret, s)
+ }
+ return ret, nil
+}
+
+func (sc *StatsCoord) RefreshTableStats(ctx *sql.Context, table sql.Table, dbName string) error {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ branch, err := dSess.GetBranch()
+ if err != nil {
+ return err
+ }
+
+ if branch == "" {
+ branch = "main"
+ }
+
+ var sqlDb dsess.SqlDatabase
+ func() {
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+ for _, db := range sc.dbs {
+ if db.AliasedName() == dbName && db.Revision() == branch {
+ sqlDb = db
+ return
+ }
+ }
+ }()
+
+ if sqlDb == nil {
+ return fmt.Errorf("qualified database not found: %s/%s", branch, dbName)
+ }
+
+ after := NewControl("finish analyze", func(sc *StatsCoord) error { return nil })
+ analyze := NewAnalyzeJob(ctx, sqlDb, []string{table.String()}, after)
+
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-sc.Done:
+ return fmt.Errorf("stat queue was interrupted")
+ case sc.Jobs <- analyze: //TODO send jobs
+ }
+
+ // wait for finalize to finish before returning
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-sc.Done:
+ return fmt.Errorf("stat queue was interrupted")
+ case <-after.done:
+ return nil
+ }
+}
+
+func (sc *StatsCoord) SetStats(ctx *sql.Context, s sql.Statistic) error {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ ss, ok := s.(*stats.Statistic)
+ if !ok {
+ return fmt.Errorf("expected *stats.Statistics, found %T", s)
+ }
+ key, err := sc.statsKey(ctx, ss.Qualifier().Db(), ss.Qualifier().Table())
+ if err != nil {
+ return err
+ }
+ sc.Stats[key] = sc.Stats[key][:0]
+ sc.Stats[key] = append(sc.Stats[key], ss)
+ return nil
+}
+
+func (sc *StatsCoord) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ key, err := sc.statsKey(ctx, qual.Database, qual.Table())
+ if err != nil {
+ return nil, false
+ }
+ for _, s := range sc.Stats[key] {
+ if strings.EqualFold(s.Qualifier().Index(), qual.Index()) {
+ return s, true
+ }
+ }
+ return nil, false
+}
+
+func (sc *StatsCoord) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ log.Printf("get stat: %s/%s/%s\n", branch, db, table)
+ key := tableIndexesKey{
+ db: db,
+ branch: branch,
+ table: table,
+ schema: schema,
+ }
+ for key, ss := range sc.Stats {
+ log.Println(" stats exist " + key.String() + " " + strconv.Itoa(len(ss)))
+ }
+ return sc.Stats[key], nil
+}
+
+func (sc *StatsCoord) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
+ key, err := sc.statsKey(ctx, qual.Database, qual.Table())
+ if err != nil {
+ return err
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ delete(sc.Stats, key)
+ return nil
+}
+
+func (sc *StatsCoord) DropDbStats(ctx *sql.Context, dbName string, flush bool) error {
+ var doSwap bool
+ func() {
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+ sc.ddlGuard = true
+
+ doSwap = strings.EqualFold(sc.statsBackingDb, dbName)
+ for i := 0; i < len(sc.dbs); i++ {
+ db := sc.dbs[i]
+ if strings.EqualFold(db.AliasedName(), dbName) {
+ sc.dbs = append(sc.dbs[:i], sc.dbs[i+1:]...)
+ i--
+ }
+ }
+ delete(sc.Branches, dbName)
+ }()
+
+ if doSwap {
+ if err := sc.rotateStorage(ctx); err != nil {
+ return err
+ }
+ }
+
+ sc.setGc()
+
+ // stats lock is more contentious, do last
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ var deleteKeys []tableIndexesKey
+ for k, _ := range sc.Stats {
+ if strings.EqualFold(dbName, k.db) {
+ deleteKeys = append(deleteKeys, k)
+ }
+ }
+ for _, k := range deleteKeys {
+ delete(sc.Stats, k)
+ }
+
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+ delete(sc.dbFs, dbName)
+
+ return nil
+}
+
+func (sc *StatsCoord) statsKey(ctx *sql.Context, dbName, table string) (tableIndexesKey, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ branch, err := dSess.GetBranch()
+ if err != nil {
+ return tableIndexesKey{}, err
+ }
+ key := tableIndexesKey{
+ db: dbName,
+ branch: branch,
+ table: table,
+ }
+ return key, nil
+}
+
+func (sc *StatsCoord) RowCount(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
+ key, err := sc.statsKey(ctx, dbName, table.Name())
+ if err != nil {
+ return 0, err
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ for _, s := range sc.Stats[key] {
+ if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
+ return s.RowCnt, nil
+ }
+ }
+ return 0, nil
+}
+
+func (sc *StatsCoord) DataLength(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
+ key, err := sc.statsKey(ctx, dbName, table.Name())
+ if err != nil {
+ return 0, err
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ for _, s := range sc.Stats[key] {
+ if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
+ return s.RowCnt, nil
+ }
+ }
+ return 0, nil
+}
+
+func (sc *StatsCoord) FlushQueue(ctx context.Context) error {
+ sc.Stop()
+ select {
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-sc.Done:
+ }
+ oldCap := cap(sc.Jobs)
+ close(sc.Jobs)
+ for _ = range sc.Jobs {
+ }
+ sc.Jobs = make(chan StatsJob, oldCap)
+ sc.seedCnt.Store(0)
+ sc.readCounter.Store(0)
+ return nil
+}
+
+func (sc *StatsCoord) StartRefreshThread(ctx *sql.Context, sqlDb dsess.SqlDatabase, branch ref.DoltRef) error {
+ fs, err := sc.pro.FileSystemForDatabase(sqlDb.AliasedName())
+ if err != nil {
+ return err
+ }
+
+ done, err := sc.Add(ctx, sqlDb, branch, fs)
+ if err != nil {
+ return err
+ }
+ <-done
+ return nil
+}
+
+func (sc *StatsCoord) Init(ctx context.Context, dbs []dsess.SqlDatabase) error {
+ sc.dbMu.Lock()
+ sc.statsMu.Lock()
+
+ sc.dbs = sc.dbs[:0]
+ sc.Stats = make(map[tableIndexesKey][]*stats.Statistic)
+ sc.Branches = make(map[string][]ref.DoltRef)
+ sc.dbFs = make(map[string]filesys.Filesys)
+ sc.dbMu.Unlock()
+ sc.statsMu.Unlock()
+
+ sc.bucketCnt.Store(0)
+
+ _, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly)
+ sc.SetMemOnly(memOnly.(int8) == 1)
+
+ typ, jobI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsJobInterval)
+ _, gcI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCInterval)
+ _, brI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranchInterval)
+
+ jobInterval, _, _ := typ.GetType().Convert(jobI)
+ gcInterval, _, _ := typ.GetType().Convert(gcI)
+ brInterval, _, _ := typ.GetType().Convert(brI)
+
+ sc.SetEnableGc(false)
+ sc.enableBrSync.Store(false)
+ sc.JobInterval = 1
+ defer sc.SetTimers(jobInterval.(int64), gcInterval.(int64), brInterval.(int64))
+ defer sc.SetEnableGc(true)
+ defer sc.enableBrSync.Store(true)
+
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return err
+ }
+
+ if err := sc.Restart(sqlCtx); err != nil {
+ return err
+ }
+ eg := errgroup.Group{}
+ for _, db := range dbs {
+ if db, ok := db.(dsess.SqlDatabase); ok {
+ br, err := db.DbData().Ddb.GetBranches(ctx)
+ if err != nil {
+ return err
+ }
+ fs, err := sc.pro.FileSystemForDatabase(db.AliasedName())
+ if err != nil {
+ return err
+ }
+ for _, b := range br {
+ eg.Go(func() error {
+ done, err := sc.Add(sqlCtx, db, b, fs)
+ if err != nil {
+ return err
+ }
+ <-done
+ return nil
+ })
+ }
+ }
+ }
+ eg.Wait()
+ eg.Go(func() error {
+ done, err := sc.Control(ctx, "enable gc", func(sc *StatsCoord) error {
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ <-done
+ sc.Stop()
+ return nil
+ })
+ eg.Wait()
+ <-sc.Done
+ return nil
+}
+
+func (sc *StatsCoord) Purge(ctx *sql.Context) error {
+ if err := sc.rotateStorage(ctx); err != nil {
+ return err
+ }
+ if err := sc.kv.StartGc(ctx, 0); err != nil {
+ return err
+ }
+ sc.kv.FinishGc()
+ sc.bucketCnt.Store(0)
+
+ return nil
+}
+
+func (sc *StatsCoord) rotateStorage(ctx *sql.Context) error {
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+ if sc.statsBackingDb != "" {
+ if err := sc.rm(sc.statsBackingDb); err != nil {
+ return err
+ }
+ }
+
+ var mem *memStats
+ switch kv := sc.kv.(type) {
+ case *prollyStats:
+ mem = kv.mem
+ case *memStats:
+ mem = kv
+ default:
+ mem = NewMemStats()
+ }
+
+ if len(sc.dbs) == 0 {
+ sc.kv = mem
+ sc.statsBackingDb = ""
+ return nil
+ }
+
+ newStorageTarget := sc.dbs[0]
+ if err := sc.rm(newStorageTarget.AliasedName()); err != nil {
+ return err
+ }
+
+ newKv, err := sc.initStorage(ctx, newStorageTarget)
+ if err != nil {
+ return err
+ }
+
+ newKv.mem = mem
+ sc.kv = newKv
+ sc.statsBackingDb = newStorageTarget.AliasedName()
+ return nil
+}
+
+func (sc *StatsCoord) rm(db string) error {
+ fs, ok := sc.dbFs[db]
+ if !ok {
+ return fmt.Errorf("failed to remove stats db: %s filesys not found", db)
+ }
+
+ statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
+ if err != nil {
+ return err
+ }
+
+ if ok, _ := statsFs.Exists(""); ok {
+ if err := statsFs.Delete("", true); err != nil {
+ return err
+ }
+ }
+
+ dropDbLoc, err := statsFs.Abs("")
+ if err != nil {
+ return err
+ }
+
+ if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (sc *StatsCoord) initStorage(ctx *sql.Context, storageTarget dsess.SqlDatabase) (*prollyStats, error) {
+ fs, ok := sc.dbFs[strings.ToLower(storageTarget.AliasedName())]
+ if !ok {
+ return nil, fmt.Errorf("failed to remove stats db: %s filesys not found", storageTarget.AliasedName())
+ }
+
+ params := make(map[string]interface{})
+ params[dbfactory.GRPCDialProviderParam] = sc.dialPro
+
+ var urlPath string
+ u, err := earl.Parse(sc.pro.DbFactoryUrl())
+ if u.Scheme == dbfactory.MemScheme {
+ urlPath = path.Join(sc.pro.DbFactoryUrl(), dbfactory.DoltDataDir)
+ } else if u.Scheme == dbfactory.FileScheme {
+ urlPath = doltdb.LocalDirDoltDB
+ }
+
+ statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
+ if err != nil {
+ return nil, err
+ }
+
+ var dEnv *env.DoltEnv
+ exists, isDir := statsFs.Exists("")
+ if !exists {
+ err := statsFs.MkDirs("")
+ if err != nil {
+ return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
+ }
+
+ dEnv = env.Load(ctx, sc.hdp, statsFs, urlPath, "test")
+ sess := dsess.DSessFromSess(ctx.Session)
+ err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), storageTarget.AliasedName())
+ if err != nil {
+ return nil, err
+ }
+ } else if !isDir {
+ return nil, fmt.Errorf("file exists where the dolt stats directory should be")
+ } else {
+ dEnv = env.LoadWithoutDB(ctx, sc.hdp, statsFs, "", doltversion.Version)
+ }
+
+ if err := dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params); err != nil {
+ return nil, err
+ }
+
+ deaf := dEnv.DbEaFactory(ctx)
+
+ tmpDir, err := dEnv.TempTableFilesDir()
+ if err != nil {
+ return nil, err
+ }
+ opts := editor.Options{
+ Deaf: deaf,
+ Tempdir: tmpDir,
+ }
+ statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
+ if err != nil {
+ return nil, err
+ }
+ return NewProllyStats(ctx, statsDb)
+}
+
+func (sc *StatsCoord) unsafeAsyncSend(ctx context.Context, j StatsJob) error {
+ // The |Jobs| queue can change, the interrupts queue
+ // does not and is safe to send a blocking write to.
+ ji := NewControl("interrupt: '"+j.String()+"'", func(sc *StatsCoord) error {
+ return sc.sendJobs(ctx, j)
+ })
+
+ select {
+ case sc.Interrupts <- ji:
+ return nil
+ default:
+ return fmt.Errorf("async queue overflowed, failed to put job " + j.String())
+ }
+}
+
+func (sc *StatsCoord) WaitForDbSync(ctx *sql.Context) error {
+ // Wait until the control job finishes before returning.
+ // We want to do two cycles -- to pick up new seeds and
+ // execute the finalize jobs that update statistics.
+ for _ = range 2 {
+ j := NewControl("wait for sync", func(sc *StatsCoord) error { return nil })
+ if err := sc.unsafeAsyncSend(ctx, j); err != nil {
+ return err
+ }
+
+ select {
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-sc.Done:
+ return fmt.Errorf("stats queue closed")
+ case <-j.done:
+ }
+ }
+
+ return sc.ValidateState(ctx)
+}
+
+func (sc *StatsCoord) Gc(ctx *sql.Context) error {
+ done := make(chan struct{})
+ if err := sc.runGc(ctx, done); err != nil {
+ return err
+ }
+ select {
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-done:
+ return nil
+ }
+}
+
+func (sc *StatsCoord) BranchSync(ctx *sql.Context) error {
+ done := make(chan struct{})
+ newJobs, err := sc.runBranchSync(ctx, done)
+ if err != nil {
+ return err
+ }
+ for _, j := range newJobs {
+ // have to go through interrupts queue for thread safety
+ sc.Interrupts <- j
+ }
+ select {
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-done:
+ return nil
+ }
+}
diff --git a/go/libraries/doltcore/sqle/statspro/scheduler.go b/go/libraries/doltcore/sqle/statspro/scheduler.go
new file mode 100644
index 00000000000..4e971fdd48a
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/scheduler.go
@@ -0,0 +1,1037 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/utils/filesys"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/prolly"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/sirupsen/logrus"
+ "io"
+ "log"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type StatsJob interface {
+ Finish()
+ String() string
+}
+
+var _ StatsJob = (*ReadJob)(nil)
+var _ StatsJob = (*SeedDbTablesJob)(nil)
+var _ StatsJob = (*ControlJob)(nil)
+var _ StatsJob = (*FinalizeJob)(nil)
+
+func NewSeedJob(sqlDb dsess.SqlDatabase) SeedDbTablesJob {
+ return SeedDbTablesJob{
+ sqlDb: sqlDb,
+ tables: nil,
+ done: make(chan struct{}),
+ }
+}
+
+// todo refactor so we can count buckets globally
+type tableStatsInfo struct {
+ name string
+ schHash hash.Hash
+ idxRoots []hash.Hash
+ bucketCount int
+}
+
+type SeedDbTablesJob struct {
+ sqlDb dsess.SqlDatabase
+ tables []tableStatsInfo
+ done chan struct{}
+}
+
+func (j SeedDbTablesJob) Finish() {
+ close(j.done)
+}
+
+func (j SeedDbTablesJob) String() string {
+ b := strings.Builder{}
+ b.WriteString("seed db: ")
+ b.WriteString(j.sqlDb.RevisionQualifiedName())
+ b.WriteString("[")
+
+ var sep = ""
+ for _, ti := range j.tables {
+ b.WriteString(sep)
+ b.WriteString("(" + ti.name + ": " + ti.schHash.String()[:5] + ")")
+ }
+ b.WriteString("]")
+
+ return b.String()
+}
+
+func NewAnalyzeJob(ctx *sql.Context, sqlDb dsess.SqlDatabase, tables []string, after ControlJob) AnalyzeJob {
+ return AnalyzeJob{ctx: ctx, sqlDb: sqlDb, tables: tables, after: after, done: make(chan struct{})}
+}
+
+type AnalyzeJob struct {
+ ctx *sql.Context
+ sqlDb dsess.SqlDatabase
+ tables []string
+ after ControlJob
+ done chan struct{}
+}
+
+func (j AnalyzeJob) String() string {
+ return "analyze: [" + strings.Join(j.tables, ", ") + "]"
+}
+
+func (j AnalyzeJob) Finish() {
+ close(j.done)
+ return
+}
+
+type ReadJob struct {
+ // |ctx|/|db| track a specific working set
+ ctx *sql.Context
+ db dsess.SqlDatabase
+ table string
+ key templateCacheKey
+ template stats.Statistic
+ m prolly.Map
+ first bool
+ nodes []tree.Node
+ ordinals []updateOrdinal
+ idxLen int
+ done chan struct{}
+}
+
+func (j ReadJob) Finish() {
+ close(j.done)
+}
+
+func (j ReadJob) String() string {
+ b := strings.Builder{}
+ b.WriteString("read: " + j.db.RevisionQualifiedName() + "/" + j.table + ": ")
+ sep := ""
+ for i, o := range j.ordinals {
+ b.WriteString(fmt.Sprintf("%s[%s:%d-%d]", sep, j.nodes[i].HashOf().String()[:5], o.start, o.stop))
+ sep = ", "
+ }
+ return b.String()
+}
+
+type finalizeStruct struct {
+ buckets []hash.Hash
+ tupB *val.TupleBuilder
+}
+
+type FinalizeJob struct {
+ sqlDb dsess.SqlDatabase
+ tableKey tableIndexesKey
+ keepIndexes map[sql.StatQualifier]bool
+ editIndexes map[templateCacheKey]finalizeStruct
+ done chan struct{}
+}
+
+func (j FinalizeJob) Finish() {
+ close(j.done)
+}
+
+func (j FinalizeJob) String() string {
+ b := strings.Builder{}
+ b.WriteString("finalize " + j.tableKey.String())
+ b.WriteString(": ")
+ sep := ""
+ for idx, fs := range j.editIndexes {
+ b.WriteString(fmt.Sprintf("%s(%s: ", sep, idx.idxName))
+ sep = ""
+ for _, h := range fs.buckets {
+ b.WriteString(fmt.Sprintf("%s%s", sep, h.String()[:5]))
+ sep = ", "
+ }
+ b.WriteString(")")
+ sep = ", "
+ }
+ return b.String()
+}
+
+func NewControl(desc string, cb func(sc *StatsCoord) error) ControlJob {
+ return ControlJob{cb: cb, desc: desc, done: make(chan struct{})}
+}
+
+type ControlJob struct {
+ cb func(sc *StatsCoord) error
+ desc string
+ done chan struct{}
+}
+
+func (j ControlJob) Finish() {
+ close(j.done)
+}
+
+func (j ControlJob) String() string {
+ return "ControlJob: " + j.desc
+}
+
+type ctxFactory func(ctx context.Context) (*sql.Context, error)
+
+func NewStatsCoord(pro *sqle.DoltDatabaseProvider, ctxGen ctxFactory, logger *logrus.Logger, threads *sql.BackgroundThreads, dEnv *env.DoltEnv) *StatsCoord {
+ done := make(chan struct{})
+ close(done)
+ kv := NewMemStats()
+ return &StatsCoord{
+ dbMu: &sync.Mutex{},
+ statsMu: &sync.Mutex{},
+ logger: logger,
+ Jobs: make(chan StatsJob, 1024),
+ Done: done,
+ Interrupts: make(chan StatsJob, 1024),
+ JobInterval: 50 * time.Millisecond,
+ gcInterval: 24 * time.Hour,
+ branchInterval: 24 * time.Hour,
+ enableGc: atomic.Bool{},
+ bucketCap: kv.Cap(),
+ Stats: make(map[tableIndexesKey][]*stats.Statistic),
+ Branches: make(map[string][]ref.DoltRef),
+ dbFs: make(map[string]filesys.Filesys),
+ threads: threads,
+ kv: kv,
+ pro: pro,
+ hdp: dEnv.GetUserHomeDir,
+ dialPro: env.NewGRPCDialProviderFromDoltEnv(dEnv),
+ ctxGen: ctxGen,
+ }
+}
+
+func (sc *StatsCoord) SetMemOnly(v bool) {
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+ sc.memOnly = v
+}
+
+func (sc *StatsCoord) SetEnableGc(v bool) {
+ sc.enableGc.Store(v)
+}
+
+func (sc *StatsCoord) SetTimers(job, gc, branch int64) {
+ sc.JobInterval = time.Duration(job) * time.Millisecond
+ sc.gcInterval = time.Duration(gc) * time.Millisecond
+ sc.branchInterval = time.Duration(branch) * time.Millisecond
+}
+
+type tableIndexesKey struct {
+ db string
+ branch string
+ table string
+ schema string
+}
+
+func (k tableIndexesKey) String() string {
+ return k.db + "/" + k.branch + "/" + k.table
+}
+
+type StatsCoord struct {
+ logger *logrus.Logger
+ threads *sql.BackgroundThreads
+ pro *sqle.DoltDatabaseProvider
+ statsBackingDb string
+ dialPro dbfactory.GRPCDialProvider
+ hdp env.HomeDirProvider
+ // ctxGen lets us fetch the most recent working root
+ ctxGen ctxFactory
+
+ JobInterval time.Duration
+ gcInterval time.Duration
+ branchInterval time.Duration
+ memOnly bool
+ Debug bool
+
+ Jobs chan StatsJob
+ // Interrupts skip the job queue and are processed first,
+ // but has a fixed size and will block
+ Interrupts chan StatsJob
+ Done chan struct{}
+
+ // XXX: do not hold the |dbMu| while accessing |pro|
+ dbMu *sync.Mutex
+ // dbs is a list of branch-qualified databases.
+ dbs []dsess.SqlDatabase
+ dbFs map[string]filesys.Filesys
+ // Branches lists the branches tracked for each database.
+ // Should track |dbs|.
+ Branches map[string][]ref.DoltRef
+
+ // kv is a content-addressed cache of histogram objects:
+ // buckets, first bounds, and schema-specific statistic
+ // templates.
+ kv StatsKv
+
+ // Stats tracks table statistics accessible to sessions.
+ Stats map[tableIndexesKey][]*stats.Statistic
+ statsMu *sync.Mutex
+
+ branchCounter atomic.Uint64
+ gcCounter atomic.Uint64
+
+ readCounter atomic.Int32
+
+ doGc atomic.Bool
+ enableGc atomic.Bool
+ enableBrSync atomic.Bool
+ gcMu sync.Mutex
+
+ // ddlGuard is a compare and swap that lets |updateBranches|
+ // safe and nonblocking
+ ddlGuard bool
+ doBranchSync atomic.Bool
+ doCapCheck atomic.Bool
+ bucketCnt atomic.Int64
+ seedCnt atomic.Int64
+ bucketCap int64
+}
+
+func (sc *StatsCoord) Stop() {
+ select {
+ case <-sc.Done:
+ default:
+ close(sc.Done)
+ }
+}
+
+func (sc *StatsCoord) Restart(ctx context.Context) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-sc.Done:
+ default:
+ // have loop stop itself to avoid accidentally closing
+ // channel twice
+ j := NewControl("stop thread", func(sc *StatsCoord) error {
+ sc.Stop()
+ return nil
+ })
+ if err := sc.unsafeAsyncSend(ctx, j); err != nil {
+ return err
+ }
+ select {
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-j.done:
+ case <-sc.Done:
+ }
+ }
+
+ sc.Done = make(chan struct{})
+ return sc.threads.Add("stats", func(ctx context.Context) {
+ sc.run(ctx)
+ })
+}
+
+func (sc *StatsCoord) Close() {
+ sc.Stop()
+ return
+}
+
+func (sc *StatsCoord) Add(ctx *sql.Context, db dsess.SqlDatabase, branch ref.DoltRef, fs filesys.Filesys) (chan struct{}, error) {
+ db, err := sqle.RevisionDbForBranch(ctx, db, branch.GetPath(), branch.GetPath()+"/"+db.AliasedName())
+ if err != nil {
+ sc.error(ControlJob{desc: "add db"}, err)
+ ret := make(chan struct{})
+ close(ret)
+ return ret, nil
+ }
+
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+ sc.ddlGuard = true
+
+ sc.Branches[db.AliasedName()] = append(sc.Branches[db.AliasedName()], ref.NewBranchRef(db.Revision()))
+ sc.dbs = append(sc.dbs, db)
+ sc.dbFs[db.AliasedName()] = fs
+ ret, err := sc.Seed(ctx, db)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(sc.dbs) == 1 {
+ sc.statsBackingDb = db.AliasedName()
+ var mem *memStats
+ switch kv := sc.kv.(type) {
+ case *memStats:
+ mem = kv
+ case *prollyStats:
+ mem = kv.mem
+ default:
+ mem = NewMemStats()
+ return ret, nil
+ }
+ if sc.memOnly {
+ return ret, nil
+ }
+ newKv, err := sc.initStorage(ctx, db)
+ if err != nil {
+ sc.error(ControlJob{desc: "add db"}, err)
+ close(ret)
+ return ret, nil
+ }
+ newKv.mem = mem
+ sc.kv = newKv
+ }
+
+ return ret, nil
+}
+
+func (sc *StatsCoord) Info() dprocedures.StatsInfo {
+ sc.dbMu.Lock()
+ dbCnt := len(sc.dbs)
+ cachedBucketCnt := sc.kv.Len()
+ defer sc.dbMu.Unlock()
+
+ sc.statsMu.Lock()
+ statCnt := len(sc.Stats)
+ defer sc.statsMu.Unlock()
+
+ var active bool
+ select {
+ case <-sc.Done:
+ default:
+ active = true
+ }
+
+ return dprocedures.StatsInfo{
+ DbCnt: dbCnt,
+ ReadCnt: int(sc.readCounter.Load()),
+ Active: active,
+ DbSeedCnt: int(sc.seedCnt.Load()),
+ EstBucketCnt: int(sc.bucketCnt.Load()),
+ CachedBucketCnt: cachedBucketCnt,
+ StatCnt: statCnt,
+ GcCounter: int(sc.gcCounter.Load()),
+ BranchCounter: int(sc.branchCounter.Load()),
+ }
+}
+
+// captureFlushQueue is a debug method that lets us inspect and
+// restore the job queue
+func (sc *StatsCoord) captureFlushQueue(ctx context.Context) ([]StatsJob, error) {
+ select {
+ case <-sc.Done:
+ default:
+ return nil, fmt.Errorf("cannot read queue while event loop is active")
+ // inactive event loop cannot be interrupted, discard
+ }
+ var ret []StatsJob
+ for _ = range len(sc.Jobs) {
+ select {
+ case <-ctx.Done():
+ return nil, nil
+ case j, ok := <-sc.Jobs:
+ if !ok {
+ return nil, nil
+ }
+ ret = append(ret, j)
+ }
+ }
+ return ret, nil
+}
+
+func (sc *StatsCoord) Seed(ctx context.Context, sqlDb dsess.SqlDatabase) (chan struct{}, error) {
+ j := NewSeedJob(sqlDb)
+ if err := sc.unsafeAsyncSend(ctx, j); err != nil {
+ return nil, err
+ }
+ sc.seedCnt.Add(1)
+ return j.done, nil
+}
+
+func (sc *StatsCoord) Control(ctx context.Context, desc string, cb func(sc *StatsCoord) error) (chan struct{}, error) {
+ j := NewControl(desc, cb)
+ if err := sc.unsafeAsyncSend(ctx, j); err != nil {
+ return nil, err
+ }
+ return j.done, nil
+}
+
+func (sc *StatsCoord) Interrupt(desc string, cb func(sc *StatsCoord) error) chan struct{} {
+ j := NewControl(desc, cb)
+ sc.Interrupts <- j
+ return j.done
+}
+
+func (sc *StatsCoord) error(j StatsJob, err error) {
+ if sc.Debug {
+ log.Println("stats error: ", err.Error())
+ }
+ sc.logger.Errorf("stats error; job detail: %s; verbose: %s", j.String(), err)
+}
+
+// statsRunner operates on stats jobs
+func (sc *StatsCoord) run(ctx context.Context) error {
+ jobTimer := time.NewTimer(0)
+ gcTicker := time.NewTicker(sc.gcInterval)
+ branchTicker := time.NewTicker(sc.branchInterval)
+
+ for {
+ // sequentially test:
+ // (1) ctx done/thread canceled
+ // (2) GC check
+ // (3) branch check
+ // (4) interrupt queue
+ // (5) job and other tickers
+ select {
+ case <-sc.Done:
+ return nil
+ case <-ctx.Done():
+ return ctx.Err()
+ default:
+ }
+
+ if sc.doGc.Swap(false) {
+ if err := sc.runGc(ctx, make(chan struct{})); err != nil {
+ if err != nil {
+ sc.error(ControlJob{desc: "gc"}, err)
+ }
+ }
+ }
+
+ if sc.doBranchSync.Swap(false) {
+ j := ControlJob{desc: "branches update"}
+ newJobs, err := sc.runBranchSync(ctx, make(chan struct{}))
+ if err != nil {
+ sc.error(j, err)
+ }
+ err = sc.sendJobs(ctx, newJobs...)
+ if err != nil {
+ sc.error(j, err)
+ }
+ }
+
+ select {
+ case <-sc.Done:
+ return nil
+ case <-ctx.Done():
+ return ctx.Err()
+ case j, ok := <-sc.Interrupts:
+ if !ok {
+ return nil
+ }
+ if sc.Debug {
+ log.Println("stats interrupt job: ", j.String())
+ }
+ err := sc.executeJob(ctx, j)
+ if err != nil {
+ sc.error(j, err)
+ }
+ default:
+ }
+
+ select {
+ case <-sc.Done:
+ return nil
+ case <-ctx.Done():
+ return ctx.Err()
+ case j, ok := <-sc.Interrupts:
+ if !ok {
+ return nil
+ }
+ if sc.Debug {
+ log.Println("stats interrupt job: ", j.String())
+ }
+ err := sc.executeJob(ctx, j)
+ if err != nil {
+ sc.error(j, err)
+ }
+ case <-jobTimer.C:
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case j, ok := <-sc.Jobs:
+ if !ok {
+ return nil
+ }
+ if sc.Debug {
+ log.Println("stats execute job: ", j.String())
+ }
+ err := sc.executeJob(ctx, j)
+ if err != nil {
+ sc.error(j, err)
+ }
+ default:
+ }
+ case <-gcTicker.C:
+ sc.setGc()
+ case <-branchTicker.C:
+ sc.doBranchSync.Store(true)
+ }
+ jobTimer.Reset(sc.JobInterval)
+ }
+}
+
+func (sc *StatsCoord) sendJobs(ctx context.Context, jobs ...StatsJob) error {
+ // jobs can double and access is concurrent
+ sc.dbMu.Lock()
+ defer sc.dbMu.Unlock()
+
+ for i := 0; i < len(jobs); i++ {
+ j := jobs[i]
+ if j == nil {
+ continue
+ }
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case sc.Jobs <- j:
+ if _, ok := j.(ReadJob); ok {
+ sc.readCounter.Add(1)
+ }
+ default:
+ sc.doubleChannelSize(ctx)
+ i--
+ }
+ }
+ return nil
+}
+
+func (sc *StatsCoord) executeJob(ctx context.Context, j StatsJob) (err error) {
+ //defer func() {
+ // if r := recover(); r != nil {
+ // fmt.Println("Recovered in f", r)
+ // err = fmt.Errorf("stats job %s panicked: %s", j.String(), r)
+ // }
+ //}()
+ var newJobs []StatsJob
+ switch j := j.(type) {
+ case SeedDbTablesJob:
+ newJobs, err = sc.seedDbTables(ctx, j)
+ case ReadJob:
+ sc.readCounter.Add(-1)
+ newJobs, err = sc.readChunks(ctx, j)
+ case FinalizeJob:
+ newJobs, err = sc.finalizeUpdate(ctx, j)
+ case ControlJob:
+ if err := j.cb(sc); err != nil {
+ sc.error(j, err)
+ }
+ case AnalyzeJob:
+ newJobs, err = sc.runAnalyze(ctx, j)
+ default:
+ return fmt.Errorf("unknown job type: %T", j)
+ }
+ if err != nil {
+ return err
+ }
+ err = sc.sendJobs(ctx, newJobs...)
+ if err != nil {
+ sc.error(j, err)
+ }
+ j.Finish()
+ return nil
+}
+
+func (sc *StatsCoord) doubleChannelSize(ctx context.Context) {
+ close(sc.Jobs)
+ ch := make(chan StatsJob, cap(sc.Jobs)*2)
+ for j := range sc.Jobs {
+ ch <- j
+ }
+ sc.Jobs = ch
+}
+
+func (sc *StatsCoord) dropTableJob(sqlDb dsess.SqlDatabase, tableName string) StatsJob {
+ return FinalizeJob{
+ tableKey: tableIndexesKey{
+ db: sqlDb.AliasedName(),
+ branch: sqlDb.Revision(),
+ table: tableName,
+ },
+ editIndexes: nil,
+ done: make(chan struct{}),
+ }
+}
+
+func (sc *StatsCoord) readChunks(ctx context.Context, j ReadJob) ([]StatsJob, error) {
+ // check if chunk already in cache
+ // if no, see if on disk and we just need to load
+ // otherwise perform read to create the bucket, write to disk, update mem ref
+
+ prollyMap := j.m
+ updater := newBucketBuilder(sql.StatQualifier{}, j.idxLen, prollyMap.KeyDesc())
+ keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(j.idxLen))
+
+ // all kv puts are guarded by |gcMu| to avoid concurrent
+ // GC with stale data discarding some or all state
+ sc.gcMu.Lock()
+ defer sc.gcMu.Unlock()
+
+ if j.first {
+ sc.kv.PutTemplate(j.key, j.template)
+
+ firstNodeHash := j.nodes[0].HashOf()
+ if _, ok := sc.kv.GetBound(firstNodeHash, j.idxLen); !ok {
+ firstRow, err := firstRowForIndex(j.ctx, prollyMap, keyBuilder)
+ if err != nil {
+ if err != nil {
+ return nil, err
+ }
+ }
+ if sc.Debug {
+ log.Printf("put bound: %s | %s: %v\n", j.table, firstNodeHash.String()[:5], firstRow)
+ }
+ sc.kv.PutBound(firstNodeHash, firstRow)
+ }
+ }
+
+ for i, n := range j.nodes {
+ if _, ok, err := sc.kv.GetBucket(ctx, n.HashOf(), keyBuilder); err != nil {
+ return nil, err
+ } else if ok {
+ // concurrent reads overestimate shared buckets
+ sc.bucketCnt.Add(-1)
+ continue
+ }
+ // each node is a bucket
+ updater.newBucket()
+
+ // we read exclusive range [node first key, next node first key)
+ start, stop := j.ordinals[i].start, j.ordinals[i].stop
+ iter, err := j.m.IterOrdinalRange(ctx, start, stop)
+ if err != nil {
+ return nil, err
+ }
+ for {
+ // stats key will be a prefix of the index key
+ keyBytes, _, err := iter.Next(ctx)
+ if errors.Is(err, io.EOF) {
+ break
+ } else if err != nil {
+ return nil, err
+ }
+ // build full key
+ for i := range keyBuilder.Desc.Types {
+ keyBuilder.PutRaw(i, keyBytes.GetField(i))
+ }
+
+ updater.add(keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
+ keyBuilder.Recycle()
+ }
+
+ // finalize the aggregation
+ bucket, err := updater.finalize(ctx, prollyMap.NodeStore())
+ if err != nil {
+ return nil, err
+ }
+ err = sc.kv.PutBucket(ctx, n.HashOf(), bucket, keyBuilder)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return nil, nil
+}
+
+func (sc *StatsCoord) runAnalyze(_ context.Context, j AnalyzeJob) ([]StatsJob, error) {
+ var ret []StatsJob
+ for _, tableName := range j.tables {
+ readJobs, _, err := sc.readJobsForTable(j.ctx, j.sqlDb, tableStatsInfo{name: tableName})
+ if err != nil {
+ return nil, err
+ }
+ ret = append(ret, readJobs...)
+ }
+ if j.after.done != nil {
+ ret = append(ret, j.after)
+ }
+ return ret, nil
+}
+
+func (sc *StatsCoord) finalizeUpdate(ctx context.Context, j FinalizeJob) ([]StatsJob, error) {
+ if len(j.editIndexes) == 0 {
+ // delete table
+ sc.statsMu.Lock()
+ delete(sc.Stats, j.tableKey)
+ sc.statsMu.Unlock()
+ return nil, nil
+ }
+
+ var newStats []*stats.Statistic
+ for _, s := range sc.Stats[j.tableKey] {
+ if ok := j.keepIndexes[s.Qual]; ok {
+ newStats = append(newStats, s)
+ }
+ }
+ for key, fs := range j.editIndexes {
+ if len(fs.buckets) == 0 {
+ continue
+ }
+
+ template, ok := sc.kv.GetTemplate(key)
+ if !ok {
+ return nil, fmt.Errorf(" missing template dependency for table: %s", key)
+ }
+ template.Qual = sql.NewStatQualifier(j.tableKey.db, "", j.tableKey.table, key.idxName)
+
+ for i, bh := range fs.buckets {
+ if i == 0 {
+ bnd, ok := sc.kv.GetBound(bh, fs.tupB.Desc.Count())
+ if !ok {
+ log.Println("chunks: ", fs.buckets)
+ return nil, fmt.Errorf("missing read job bound dependency for chunk %s: %s", key, bh)
+ }
+ template.LowerBnd = bnd[:fs.tupB.Desc.Count()]
+ }
+ // accumulate counts
+ if b, ok, err := sc.kv.GetBucket(ctx, bh, fs.tupB); err != nil {
+ return nil, err
+ } else if !ok {
+ log.Println("need chunks: ", fs.buckets)
+ return nil, fmt.Errorf("missing read job bucket dependency for chunk: %s", bh)
+ } else {
+ template.RowCnt += b.RowCnt
+ template.DistinctCnt += b.DistinctCnt
+ template.NullCnt += b.NullCnt
+ template.Hist = append(template.Hist, b)
+ }
+ }
+ newStats = append(newStats, &template)
+ }
+
+ // We cannot mutex protect concurrent db drops
+ // and finalization. We need to check afterward
+ // whether there was a db/stats race. We check
+ // separately for database and branch deletes.
+
+ sc.dbMu.Lock()
+ sc.ddlGuard = false
+ sc.dbMu.Unlock()
+
+ sc.statsMu.Lock()
+ sc.Stats[j.tableKey] = newStats
+ sc.statsMu.Unlock()
+
+ sc.dbMu.Lock()
+ if sc.ddlGuard {
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ if _, err := j.sqlDb.GetRoot(sqlCtx); err != nil {
+ sc.statsMu.Lock()
+ delete(sc.Stats, j.tableKey)
+ sc.statsMu.Unlock()
+ }
+ }
+ sc.dbMu.Unlock()
+
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return nil, err
+ }
+ if _, err := j.sqlDb.GetRoot(sqlCtx); err != nil {
+ sc.statsMu.Lock()
+ delete(sc.Stats, j.tableKey)
+ sc.statsMu.Unlock()
+ }
+
+ return nil, nil
+}
+
+type dbBranchKey struct {
+ db string
+ branch string
+}
+
+func (sc *StatsCoord) runBranchSync(ctx context.Context, done chan struct{}) ([]StatsJob, error) {
+ if !sc.enableBrSync.Swap(false) {
+ close(done)
+ return nil, nil
+ }
+
+ if sc.Debug {
+ log.Println("stats branch check number: ", strconv.Itoa(int(sc.branchCounter.Load())))
+ }
+ sc.branchCounter.Add(1)
+
+ j := ControlJob{desc: "branch update"}
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ newBranches := make(map[string][]ref.DoltRef)
+ var newDbs []dsess.SqlDatabase
+
+ // Currently, updateBranches is sensitive to concurrent
+ // add/drop database. We used |ddlGuard| as a compare and
+ // swap check after collecting new dbs, branches, and stats.
+ // A failed guard check retries.
+ // If this were incrementally adding/deleting, |ddlGuard| would
+ // be unnecessary, but more complex and maybe more blocking.
+ sc.dbMu.Lock()
+ sc.ddlGuard = false
+ dbBranches := make(map[string][]ref.DoltRef)
+ for k, v := range sc.Branches {
+ dbBranches[k] = v
+ }
+ dbs := make([]dsess.SqlDatabase, len(sc.dbs))
+ copy(dbs, sc.dbs)
+ sc.dbMu.Unlock()
+
+ {
+ // filter for branches that haven't been deleted
+ var w int
+ for i := 0; i < len(dbs); i++ {
+ if _, err := dbs[i].GetRoot(sqlCtx); err != nil {
+ continue
+ }
+ dbs[w] = dbs[i]
+ w++
+ }
+
+ dbs = dbs[:w]
+ }
+
+ var ret []StatsJob
+ for dbName, branches := range dbBranches {
+ var sqlDb dsess.SqlDatabase
+ for _, db := range dbs {
+ if strings.EqualFold(db.AliasedName(), dbName) {
+ sqlDb = db
+ break
+ }
+ }
+
+ if sqlDb == nil {
+ sc.error(j, fmt.Errorf("database in branches list is not tracked: %s", dbName))
+ continue
+ }
+
+ // check if db still valid
+ dSess := dsess.DSessFromSess(sqlCtx.Session)
+ dbd, ok := dSess.GetDbData(sqlCtx, sqlDb.AliasedName())
+ if !ok {
+ sc.error(j, fmt.Errorf("database in branches list does not exist: %s", dbName))
+ continue
+ }
+ curBranches, err := dbd.Ddb.GetBranches(sqlCtx)
+ if err != nil {
+ sc.error(j, err)
+ continue
+ }
+
+ newBranches[sqlDb.AliasedName()] = curBranches
+
+ i := 0
+ k := 0
+ for i < len(branches) && k < len(curBranches) {
+ br := curBranches[k]
+ switch strings.Compare(branches[i].GetPath(), curBranches[k].GetPath()) {
+ case 0:
+ i++
+ k++
+ sqlDb, err := sqle.RevisionDbForBranch(sqlCtx, sqlDb, br.GetPath(), br.GetPath()+"/"+dbName)
+ if err != nil {
+ sc.error(j, err)
+ continue
+ }
+ newDbs = append(newDbs, sqlDb)
+ case -1:
+ i++
+ case +1:
+ k++
+ sqlDb, err := sqle.RevisionDbForBranch(sqlCtx, sqlDb, br.GetPath(), br.GetPath()+"/"+dbName)
+ if err != nil {
+ sc.error(j, err)
+ continue
+ }
+ _, err = sqlDb.GetRoot(sqlCtx)
+ if err != nil {
+ continue
+ }
+
+ newDbs = append(newDbs, sqlDb)
+ ret = append(ret, NewSeedJob(sqlDb))
+ sc.seedCnt.Add(1)
+ }
+ }
+ for k < len(curBranches) {
+ br := curBranches[k]
+ k++
+ sqlDb, err := sqle.RevisionDbForBranch(sqlCtx, sqlDb, br.GetPath(), br.GetPath()+"/"+dbName)
+ if err != nil {
+ sc.error(j, err)
+ continue
+ }
+
+ newDbs = append(newDbs, sqlDb)
+ ret = append(ret, NewSeedJob(sqlDb))
+ sc.seedCnt.Add(1)
+ }
+ }
+
+ sc.dbMu.Lock()
+
+ if sc.ddlGuard {
+ // ddl interrupted branch refresh
+ sc.dbMu.Unlock()
+ return sc.runBranchSync(ctx, done)
+ }
+
+ sc.Branches = newBranches
+ sc.dbs = newDbs
+
+ var statKeys = make(map[dbBranchKey]bool)
+ for _, db := range sc.dbs {
+ statKeys[dbBranchKey{db.AliasedName(), db.Revision()}] = true
+ }
+ sc.dbMu.Unlock()
+
+ newStats := make(map[tableIndexesKey][]*stats.Statistic)
+ sc.statsMu.Lock()
+ for k, s := range sc.Stats {
+ if statKeys[dbBranchKey{db: k.db, branch: k.branch}] {
+ newStats[k] = s
+ }
+ }
+ sc.Stats = newStats
+ sc.statsMu.Unlock()
+
+ // Avoid branch checks starving the loop, only re-enable after
+ // letting a block of other work through.
+ ret = append(ret, NewControl("re-enable branch check", func(sc *StatsCoord) error {
+ sc.enableBrSync.Store(true)
+ close(done)
+ return nil
+ }))
+
+ return ret, nil
+}
+
+func (sc *StatsCoord) setGc() {
+ if sc.enableGc.Load() {
+ sc.doGc.Store(true)
+ }
+}
diff --git a/go/libraries/doltcore/sqle/statspro/scheduler_test.go b/go/libraries/doltcore/sqle/statspro/scheduler_test.go
new file mode 100644
index 00000000000..a376febdfbe
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/scheduler_test.go
@@ -0,0 +1,1498 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
+ "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ gms "github.com/dolthub/go-mysql-server"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/analyzer"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ lru "github.com/hashicorp/golang-lru/v2"
+ "github.com/sirupsen/logrus"
+ "github.com/stretchr/testify/require"
+ "io"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+)
+
+func TestScheduleLoop(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+
+ {
+ // add more data
+ b := strings.Repeat("b", 100)
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))"))
+ abIns := strings.Builder{}
+ abIns.WriteString("insert into ab values")
+ for i := range 200 {
+ if i > 0 {
+ abIns.WriteString(", ")
+ }
+ abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, abIns.String()))
+
+ // run two cycles -> (1) seed, (2) populate
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ ReadJob{
+ db: sqlDbs[0], table: "ab",
+ ordinals: []updateOrdinal{{0, 47}, {47, 59}, {59, 94}, {94, 125}, {125, 159}, {159, 191}, {191, 200}},
+ },
+ ReadJob{
+ db: sqlDbs[0], table: "ab",
+ ordinals: []updateOrdinal{{0, 26}, {26, 55}, {55, 92}, {92, 110}, {110, 147}, {147, 189}, {189, 200}},
+ },
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "ab"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ templateCacheKey{idxName: "b"}: {},
+ }},
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "ab"}, {name: "xy"}}},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "ab"}, {name: "xy"}}},
+ })
+
+ // 4 old + 2*7 new ab
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 18, kv.buckets.Len())
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 7, len(stat[0].Hist))
+ require.Equal(t, 7, len(stat[1].Hist))
+ }
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy"))
+ runAndPause(t, ctx, sc, &wg)
+ runAndPause(t, ctx, sc, &wg)
+
+ doGcCycle(t, ctx, sc)
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 14, kv.buckets.Len())
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 2, len(stat))
+ require.Equal(t, 7, len(stat[0].Hist))
+ require.Equal(t, 7, len(stat[1].Hist))
+}
+
+func TestAnalyze(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+
+ sc.captureFlushQueue(ctx)
+
+ wg := sync.WaitGroup{}
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (-1,-1)"))
+
+ analyze := NewAnalyzeJob(ctx, sqlDbs[0], []string{"xy"}, ControlJob{})
+ sc.Jobs <- analyze
+
+ validateJobState(t, ctx, sc, []StatsJob{
+ AnalyzeJob{
+ sqlDb: sqlDbs[0],
+ tables: []string{"xy"},
+ },
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ ReadJob{db: sqlDbs[0], table: "xy", nodes: []tree.Node{{}}, ordinals: []updateOrdinal{{0, 416}}},
+ ReadJob{db: sqlDbs[0], table: "xy", nodes: []tree.Node{{}}, ordinals: []updateOrdinal{{0, 241}}},
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ templateCacheKey{idxName: "y"}: {},
+ }},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{})
+ kv := sc.kv.(*memStats)
+ require.Equal(t, uint64(0), sc.gcCounter.Load())
+ require.Equal(t, 6, kv.buckets.Len())
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ for _, tableStats := range sc.Stats {
+ require.Equal(t, 2, len(tableStats))
+ }
+}
+
+func TestModifyColumn(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+ sc.enableGc.Store(false)
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy modify column y bigint"))
+
+ // expect finalize, no GC
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ ReadJob{db: sqlDbs[0], table: "xy", ordinals: []updateOrdinal{{0, 210}, {210, 415}, {415, 470}, {470, 500}}},
+ ReadJob{db: sqlDbs[0], table: "xy", ordinals: []updateOrdinal{{0, 267}, {267, 500}}},
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ templateCacheKey{idxName: "y"}: {},
+ }},
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 10, kv.buckets.Len())
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 4, len(stat[0].Hist))
+ require.Equal(t, 2, len(stat[1].Hist))
+ require.Equal(t, int64(6), sc.bucketCnt.Load())
+
+ doGcCycle(t, ctx, sc)
+ require.Equal(t, int64(6), sc.bucketCnt.Load())
+ require.Equal(t, 6, kv.buckets.Len())
+ }
+}
+
+func TestAddColumn(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+ sc.enableGc.Store(false)
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy add column z int"))
+
+ // schema but no data change
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ },
+ },
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4, kv.buckets.Len())
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates)) // +2 for new schema
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 2, len(stat[0].Hist))
+ require.Equal(t, 2, len(stat[1].Hist))
+ require.Equal(t, int64(4), sc.bucketCnt.Load())
+ }
+}
+
+func TestDropIndex(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+ sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ },
+ },
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4, kv.buckets.Len())
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 2, len(stat[0].Hist))
+ require.Equal(t, int64(2), sc.bucketCnt.Load())
+
+ doGcCycle(t, ctx, sc)
+
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 2, kv.buckets.Len())
+ require.Equal(t, 1, len(kv.bounds))
+ require.Equal(t, 1, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat = sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 2, len(stat[0].Hist))
+ require.Equal(t, int64(2), sc.bucketCnt.Load())
+ }
+}
+
+func TestDropTable(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+ sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into ab values (0,0)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy"))
+
+ runAndPause(t, ctx, sc, &wg)
+
+ validateJobState(t, ctx, sc, []StatsJob{
+ ReadJob{db: sqlDbs[0], table: "ab", ordinals: []updateOrdinal{{0, 1}}},
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "ab"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ },
+ },
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: nil,
+ },
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "ab"}}},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, kv.buckets.Len())
+ require.Equal(t, 3, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 1, len(stat[0].Hist))
+
+ doGcCycle(t, ctx, sc)
+
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 1, kv.buckets.Len())
+ require.Equal(t, 1, len(kv.bounds))
+ require.Equal(t, 1, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat = sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 1, len(stat[0].Hist))
+ require.Equal(t, int64(1), sc.bucketCnt.Load())
+ }
+}
+
+func TestDeleteAboveBoundary(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 498"))
+
+ runAndPause(t, ctx, sc, &wg) // seed
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, kv.buckets.Len()) // 1 for new chunk
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates)) // +1 for schema change
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}]
+ require.Equal(t, 2, len(stat[0].Hist))
+ require.Equal(t, int64(2), sc.bucketCnt.Load())
+
+ doGcCycle(t, ctx, sc)
+ require.Equal(t, 2, kv.buckets.Len())
+ require.Equal(t, int64(2), sc.bucketCnt.Load())
+ }
+}
+
+func TestDeleteBelowBoundary(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 410"))
+
+ runAndPause(t, ctx, sc, &wg) // seed
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ kv := sc.kv.(*memStats)
+
+ require.Equal(t, 5, kv.buckets.Len()) // +1 rewrite partial chunk
+ require.Equal(t, 3, len(kv.bounds)) // +1 rewrite first chunk
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}]
+ require.Equal(t, 1, len(stat[0].Hist))
+ require.Equal(t, int64(1), sc.bucketCnt.Load())
+
+ doGcCycle(t, ctx, sc)
+ require.Equal(t, 1, kv.buckets.Len())
+ require.Equal(t, int64(1), sc.bucketCnt.Load())
+ }
+}
+
+func TestDeleteOnBoundary(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ {
+ // PRIMARY boundary chunk -> rewrite y_idx's second
+ require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 414"))
+
+ runAndPause(t, ctx, sc, &wg) // seed
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4, kv.buckets.Len())
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates)) // +1 schema change
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}]
+ require.Equal(t, 1, len(stat[0].Hist))
+ require.Equal(t, int64(1), sc.bucketCnt.Load())
+
+ doGcCycle(t, ctx, sc)
+ require.Equal(t, 1, kv.buckets.Len())
+ require.Equal(t, int64(1), sc.bucketCnt.Load())
+ }
+}
+
+func TestAddDropDatabases(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := defaultSetup(t, threads, true)
+ sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+
+ var otherDb sqle.Database
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)"))
+
+ for _, db := range sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) {
+ if db.Name() == "otherdb" {
+ dsessDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), "main", "main/"+db.Name())
+ require.NoError(t, err)
+ otherDb = dsessDb.(sqle.Database)
+ }
+ }
+
+ // finish queue of read/finalize
+ runAndPause(t, ctx, sc, &wg) // pull seeds out of interrupt
+ runAndPause(t, ctx, sc, &wg)
+
+ validateJobState(t, ctx, sc, []StatsJob{
+ ReadJob{db: otherDb, table: "t", ordinals: []updateOrdinal{{0, 2}}},
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "otherdb", branch: "main", table: "t"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ }},
+ SeedDbTablesJob{sqlDb: otherDb, tables: []tableStatsInfo{{name: "t"}}},
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ runAndPause(t, ctx, sc, &wg)
+
+ // xy and t
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, kv.buckets.Len())
+ require.Equal(t, 3, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}]
+ require.Equal(t, 1, len(stat))
+ }
+
+ dropHook := NewDropDatabaseHook(sc)
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb"))
+ dropHook(ctx, "otherdb")
+
+ _, ok := sc.Stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}]
+ require.False(t, ok)
+ }
+}
+
+func TestGC(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table s (i int primary key, j int, key (j))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into s values (0,0), (1,1), (2,2)"))
+
+ runAndPause(t, ctx, sc, &wg) // seed interrupt
+ runAndPause(t, ctx, sc, &wg) // read jobs
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ dropHook := NewDropDatabaseHook(sc)
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb"))
+ dropHook(ctx, "otherdb")
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table s drop index j"))
+
+ runAndPause(t, ctx, sc, &wg) // pick up table drop
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ doGcCycle(t, ctx, sc)
+
+ // test for cleanup
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, kv.buckets.Len())
+ require.Equal(t, 3, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ }
+}
+
+func TestBranches(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+ sc.enableGc.Store(true)
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add xy')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add t')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table s (i int primary key, j int, key (j))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into s values (0,0), (1,1), (2,2)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add s')"))
+
+ runAndPause(t, ctx, sc, &wg) // seed interrupt
+ runAndPause(t, ctx, sc, &wg) // read jobs
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat1')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat2')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (2), (3)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'insert into t')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat3')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop table t"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'drop t')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat1')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table s drop index j"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'drop index j')"))
+
+ runAndPause(t, ctx, sc, &wg) // pick up table changes
+ runAndPause(t, ctx, sc, &wg) // finalize
+
+ sc.doBranchSync.Store(true)
+ runAndPause(t, ctx, sc, &wg) // new branches
+
+ require.Equal(t, 7, len(sc.dbs))
+ stat, ok := sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat3", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"thirddb", "feat1", "s", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "main", "t", ""}]
+ require.Equal(t, 1, len(stat))
+ stat = sc.Stats[tableIndexesKey{"thirddb", "main", "s", ""}]
+ require.Equal(t, 2, len(stat))
+
+ runAndPause(t, ctx, sc, &wg) // seed new branches
+ runAndPause(t, ctx, sc, &wg) // finalize branches
+
+ require.Equal(t, 7, len(sc.dbs))
+
+ stat, ok = sc.Stats[tableIndexesKey{"mydb", "feat1", "xy", ""}]
+ require.True(t, ok)
+ require.Equal(t, 2, len(stat))
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}]
+ require.True(t, ok)
+ require.Equal(t, 1, len(stat))
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat3", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"thirddb", "feat1", "s", ""}]
+ require.True(t, ok)
+ require.Equal(t, 1, len(stat))
+
+ // mydb: 4 shared
+ // otherdb: 1 + 1
+ // thirddb: 2 + shared
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4+2+2, kv.buckets.Len())
+ require.Equal(t, 2+(1+1)+2, len(kv.bounds))
+ require.Equal(t, 2+1+(2+1), len(kv.templates))
+ require.Equal(t, 7-1, len(sc.Stats))
+
+ dropHook := NewDropDatabaseHook(sc)
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb"))
+ dropHook(ctx, "otherdb")
+
+ runAndPause(t, ctx, sc, &wg) // finalize drop otherdb
+
+ require.Equal(t, 4, len(sc.dbs))
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "main", "t", ""}]
+ require.False(t, ok)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_branch('-D', 'feat1')"))
+
+ sc.doBranchSync.Store(true)
+ runAndPause(t, ctx, sc, &wg) // detect deleted branch
+ runAndPause(t, ctx, sc, &wg) // finalize branch delete
+
+ require.Equal(t, 3, len(sc.dbs))
+ stat, ok = sc.Stats[tableIndexesKey{"mydb", "feat1", "xy", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.True(t, ok)
+
+ doGcCycle(t, ctx, sc)
+
+ // 3 dbs remaining, mydb/main, thirddb/feat1, thirddb/main
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 4+2, kv.buckets.Len())
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 5, len(kv.templates))
+ require.Equal(t, 3, len(sc.Stats))
+ }
+}
+
+func TestBucketDoubling(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+
+ cur := sc.kv.(*memStats).buckets
+ newB, _ := lru.New[bucketKey, *stats.Bucket](4)
+ for _, k := range cur.Keys() {
+ v, _ := cur.Get(k)
+ newB.Add(k, v)
+ }
+ sc.kv.(*memStats).buckets = newB
+ sc.bucketCap = 4
+
+ // add more data
+ b := strings.Repeat("b", 100)
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))"))
+ abIns := strings.Builder{}
+ abIns.WriteString("insert into ab values")
+ for i := range 200 {
+ if i > 0 {
+ abIns.WriteString(", ")
+ }
+ abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, abIns.String()))
+
+ sc.enableGc.Store(true)
+
+ runAndPause(t, ctx, sc, &wg) // track ab
+ runAndPause(t, ctx, sc, &wg) // finalize ab
+
+ // 4 old + 2*7 new ab
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 18, kv.buckets.Len())
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 7, len(stat[0].Hist))
+ require.Equal(t, 7, len(stat[1].Hist))
+}
+
+func TestBucketCounting(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+
+ // add more data
+ b := strings.Repeat("b", 100)
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))"))
+ abIns := strings.Builder{}
+ abIns.WriteString("insert into ab values")
+ for i := range 200 {
+ if i > 0 {
+ abIns.WriteString(", ")
+ }
+ abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, abIns.String()))
+
+ sc.enableGc.Store(false)
+
+ runAndPause(t, ctx, sc, &wg) // track ab
+ runAndPause(t, ctx, sc, &wg) // finalize ab
+
+ // 4 old + 2*7 new ab
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 18, kv.buckets.Len())
+ require.Equal(t, 2, len(sc.Stats))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table cd (c int primary key, d varchar(200), key (d,c))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into cd select a,b from ab"))
+
+ runAndPause(t, ctx, sc, &wg) // track ab
+ runAndPause(t, ctx, sc, &wg) // finalize ab
+
+ // no new buckets
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 18, kv.buckets.Len())
+ require.Equal(t, 3, len(sc.Stats))
+}
+
+func TestDropOnlyDb(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, false)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ _, ok := sc.kv.(*prollyStats)
+ require.True(t, ok)
+ require.Equal(t, "mydb", sc.statsBackingDb)
+
+ // what happens when we drop the only database? swap to memory?
+ // add first database, switch to prolly?
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database mydb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ sc.Stop()
+
+ // empty memory KV
+ _, ok = sc.kv.(*memStats)
+ require.True(t, ok)
+ require.Equal(t, "", sc.statsBackingDb)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+
+ // empty prollyKv
+ _, ok = sc.kv.(*prollyStats)
+ require.True(t, ok)
+ require.Equal(t, "otherdb", sc.statsBackingDb)
+}
+
+func TestRotateBackingDb(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, startDbs := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+
+ prollyKv, err := NewProllyStats(ctx, startDbs[0])
+ require.NoError(t, err)
+ prollyKv.mem = sc.kv.(*memStats)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database backupdb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use backupdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0), (1,1), (2,2)"))
+
+ runAndPause(t, ctx, sc, &wg) // seed
+ runAndPause(t, ctx, sc, &wg) // track xy
+ runAndPause(t, ctx, sc, &wg) // finalize xy
+
+ require.Equal(t, 5, sc.kv.Len())
+ require.Equal(t, 2, len(sc.Stats))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database mydb"))
+
+ prollyKv, ok := sc.kv.(*prollyStats)
+ require.True(t, ok)
+ require.Equal(t, "backupdb", sc.statsBackingDb)
+
+ // lost the backing storage, previous in-memory moves into new kv
+ require.Equal(t, 5, sc.kv.Len())
+ require.Equal(t, 1, len(sc.Stats))
+
+}
+
+func TestReadCounter(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := defaultSetup(t, threads, true)
+ wg := sync.WaitGroup{}
+
+ {
+ require.Equal(t, 0, sc.Info().ReadCnt)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (501, 0)"))
+ runAndPause(t, ctx, sc, &wg)
+
+ require.Equal(t, 2, sc.Info().ReadCnt)
+ }
+}
+
+func TestJobQueueDoubling(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ dEnv := dtestutils.CreateTestEnv()
+ sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
+ defer sqlEng.Close()
+
+ sc := sqlEng.Analyzer.Catalog.StatsProvider.(*StatsCoord)
+ sc.Jobs = make(chan StatsJob, 1)
+
+ var jobs []StatsJob
+ for _ = range 1025 {
+ jobs = append(jobs, ControlJob{})
+ }
+ require.NoError(t, sc.sendJobs(ctx, jobs...))
+ require.Equal(t, 1025, len(sc.Jobs))
+ require.Equal(t, 2048, cap(sc.Jobs))
+}
+
+func TestEmptyTable(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, sqlDbs := emptySetup(t, threads, false)
+ wg := sync.WaitGroup{}
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y varchar(10), key (y,x))"))
+
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ templateCacheKey{idxName: "y"}: {},
+ }},
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+}
+
+func TestPanic(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ sc.Control(ctx, "panic", func(sc *StatsCoord) error {
+ panic("test panic")
+ })
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+}
+
+func TestValidate(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ sc.Control(ctx, "panic", func(sc *StatsCoord) error {
+ panic("test panic")
+ })
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+}
+
+func TestPurge(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y varchar(10), key (y,x))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0), (1,1), (2,2)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database other"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use other"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(10), key (b,a))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into ab values (0,0), (1,1), (2,2)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ sc.Stop()
+
+ kv := sc.kv.(*prollyStats)
+ require.Equal(t, 2, kv.Len())
+ require.Equal(t, 4, len(kv.mem.templates))
+ require.Equal(t, 2, len(kv.mem.bounds))
+ m, err := kv.m.Map(ctx)
+ require.NoError(t, err)
+ cmpCnt, err := m.Count()
+ require.NoError(t, err)
+ require.Equal(t, 2, cmpCnt)
+
+ require.NoError(t, sc.Purge(ctx))
+
+ kv = sc.kv.(*prollyStats)
+ require.Equal(t, 0, kv.Len())
+ require.Equal(t, 0, len(kv.mem.templates))
+ require.Equal(t, 0, len(kv.mem.bounds))
+ m, err = kv.m.Map(ctx)
+ require.NoError(t, err)
+ cmpCnt, err = m.Count()
+ require.NoError(t, err)
+ require.Equal(t, 0, cmpCnt)
+}
+
+func emptySetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool) (*sql.Context, *gms.Engine, *StatsCoord, []sqle.Database) {
+ dEnv := dtestutils.CreateTestEnv()
+ sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
+ ctx.Session.SetClient(sql.Client{
+ User: "billy boy",
+ Address: "bigbillie@fake.horse",
+ })
+
+ sc := sqlEng.Analyzer.Catalog.StatsProvider.(*StatsCoord)
+ sc.SetEnableGc(false)
+ sc.enableBrSync.Store(false)
+ require.NoError(t, sc.Restart(ctx))
+
+ ctx, _ = sc.ctxGen(ctx)
+ ctx.Session.SetClient(sql.Client{
+ User: "billy boy",
+ Address: "bigbillie@fake.horse",
+ })
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.Stop()
+
+ var sqlDbs []sqle.Database
+ for _, db := range sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) {
+ if sqlDb, ok := db.(sqle.Database); ok {
+ branch := ref.NewBranchRef("main")
+ db, err := sqle.RevisionDbForBranch(ctx, sqlDb, branch.GetPath(), branch.GetPath()+"/"+sqlDb.AliasedName())
+ require.NoError(t, err)
+ sqlDbs = append(sqlDbs, db.(sqle.Database))
+ }
+ }
+
+ if memOnly {
+ statsKv := NewMemStats()
+ sc.kv = statsKv
+ }
+
+ sc.enableBrSync.Store(true)
+
+ return ctx, sqlEng, sc, sqlDbs
+}
+
+func defaultSetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool) (*sql.Context, *gms.Engine, *StatsCoord, []sqle.Database) {
+ ctx, sqlEng, sc, sqlDbs := emptySetup(t, threads, memOnly)
+ //sc.Debug = true
+
+ wg := sync.WaitGroup{}
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int, key (y,x))"))
+
+ xyIns := strings.Builder{}
+ xyIns.WriteString("insert into xy values")
+ for i := range 500 {
+ if i > 0 {
+ xyIns.WriteString(", ")
+ }
+ xyIns.WriteString(fmt.Sprintf("(%d, %d)", i, i%25))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, xyIns.String()))
+
+ {
+ // seed creates read jobs
+ runAndPause(t, ctx, sc, &wg)
+ validateJobState(t, ctx, sc, []StatsJob{
+ ReadJob{db: sqlDbs[0], table: "xy", ordinals: []updateOrdinal{{0, 415}, {415, 500}}},
+ ReadJob{db: sqlDbs[0], table: "xy", ordinals: []updateOrdinal{{0, 240}, {240, 500}}},
+ FinalizeJob{
+ tableKey: tableIndexesKey{db: "mydb", branch: "main", table: "xy"},
+ editIndexes: map[templateCacheKey]finalizeStruct{
+ templateCacheKey{idxName: "PRIMARY"}: {},
+ templateCacheKey{idxName: "y"}: {},
+ }},
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+ }
+
+ {
+ // read jobs populate cache
+ runAndPause(t, ctx, sc, &wg)
+
+ validateJobState(t, ctx, sc, []StatsJob{
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ var kv *memStats
+ switch s := sc.kv.(type) {
+ case *memStats:
+ kv = s
+ case *prollyStats:
+ kv = s.mem
+ }
+ require.Equal(t, 4, kv.buckets.Len())
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ for _, tableStats := range sc.Stats {
+ require.Equal(t, 2, len(tableStats))
+ }
+ }
+
+ {
+ // seed with no changes yields no new jobs
+ runAndPause(t, ctx, sc, &wg)
+
+ validateJobState(t, ctx, sc, []StatsJob{
+ SeedDbTablesJob{sqlDb: sqlDbs[0], tables: []tableStatsInfo{{name: "xy"}}},
+ })
+
+ var kv *memStats
+ switch s := sc.kv.(type) {
+ case *memStats:
+ kv = s
+ case *prollyStats:
+ kv = s.mem
+ }
+ require.Equal(t, 4, kv.buckets.Len())
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ for _, tableStats := range sc.Stats {
+ require.Equal(t, 2, len(tableStats))
+ }
+ }
+ return ctx, sqlEng, sc, sqlDbs
+}
+
+// validateJobs compares the current event loop and launches a background thread
+// that will repopulate the queue in-order
+func validateJobState(t *testing.T, ctx context.Context, sc *StatsCoord, expected []StatsJob) {
+ jobs, err := sc.captureFlushQueue(ctx)
+ require.NoError(t, err)
+
+ require.Equal(t, len(expected), len(jobs), fmt.Sprintf("expected: %s; found: %s", expected, jobs))
+ for i, j := range jobs {
+ switch j := j.(type) {
+ case SeedDbTablesJob:
+ ej, ok := expected[i].(SeedDbTablesJob)
+ require.True(t, ok)
+ for i := range ej.tables {
+ require.Equal(t, ej.tables[i].name, j.tables[i].name)
+ }
+ require.Equal(t, ej.sqlDb.AliasedName(), j.sqlDb.AliasedName())
+ require.Equal(t, ej.sqlDb.Revision(), j.sqlDb.Revision())
+ case ReadJob:
+ ej, ok := expected[i].(ReadJob)
+ require.True(t, ok)
+ require.Equal(t, ej.table, j.table)
+ require.Equal(t, ej.ordinals, j.ordinals)
+ require.Equal(t, ej.db.AliasedName(), j.db.AliasedName())
+ require.Equal(t, ej.db.Revision(), j.db.Revision())
+ case FinalizeJob:
+ ej, ok := expected[i].(FinalizeJob)
+ require.True(t, ok)
+ require.Equal(t, ej.tableKey, j.tableKey)
+ idx := make(map[string]bool)
+ for k, _ := range j.editIndexes {
+ idx[k.idxName] = true
+ }
+ for k, _ := range ej.editIndexes {
+ if _, ok := idx[k.idxName]; !ok {
+ require.Fail(t, "missing index: "+k.idxName)
+ }
+ }
+ case ControlJob:
+ ej, ok := expected[i].(ControlJob)
+ require.True(t, ok)
+ require.Equal(t, ej.desc, j.desc)
+ case AnalyzeJob:
+ ej, ok := expected[i].(AnalyzeJob)
+ require.True(t, ok)
+ require.Equal(t, ej.tables, j.tables)
+ require.Equal(t, ej.sqlDb, j.sqlDb)
+ }
+ }
+
+ // expect queue to fit all jobs, otherwise this deadlocks
+ // since we stopped accepting before running this; it should
+ // just roundtrip to/from the same buffer
+ for _, j := range jobs {
+ select {
+ case <-ctx.Done():
+ return
+ default:
+ sc.Jobs <- j
+ }
+ }
+}
+
+func waitOnJob(wg *sync.WaitGroup, done chan struct{}) {
+ wg.Add(1)
+ go func() {
+ select {
+ case <-context.Background().Done():
+ return
+ case <-done:
+ wg.Add(-1)
+ }
+ }()
+}
+
+func doGcCycle(t *testing.T, ctx *sql.Context, sc *StatsCoord) {
+ sc.enableGc.Store(true)
+ sc.doGc.Store(true)
+ defer sc.enableGc.Store(false)
+
+ wg := sync.WaitGroup{}
+ runAndPause(t, ctx, sc, &wg) // do GC
+ runAndPause(t, ctx, sc, &wg) // pick up finish GC job
+
+ sc.gcMu.Lock()
+ defer sc.gcMu.Unlock()
+ require.False(t, sc.doGc.Load())
+}
+
+func runAndPause(t *testing.T, ctx *sql.Context, sc *StatsCoord, wg *sync.WaitGroup) {
+ // The stop job closes the controller's done channel before the job
+ // is finished. The done channel is closed before the next run loop,
+ // making the loop effectively inactive even if the goroutine is still
+ // in the process of closing by the time we are flushing/validating
+ // the queue.
+ j := NewControl("pause", func(sc *StatsCoord) error {
+ sc.Stop()
+ return nil
+ })
+ sc.Jobs <- j
+ waitOnJob(wg, j.done)
+ require.NoError(t, sc.Restart(ctx))
+ wg.Wait()
+ return
+}
+
+func executeQuery(ctx *sql.Context, eng *gms.Engine, query string) error {
+ _, iter, _, err := eng.Query(ctx, query)
+ if err != nil {
+ return err
+ }
+ for {
+ _, err = iter.Next(ctx)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return err
+ }
+ }
+ return iter.Close(ctx) // tx commit
+}
+
+func executeQueryResults(ctx *sql.Context, eng *gms.Engine, query string) ([]sql.Row, error) {
+ _, iter, _, err := eng.Query(ctx, query)
+ if err != nil {
+ return nil, err
+ }
+ var ret []sql.Row
+ for {
+ r, err := iter.Next(ctx)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ ret = append(ret, r)
+ }
+ return ret, iter.Close(ctx) // tx commit
+}
+
+func newTestEngine(ctx context.Context, dEnv *env.DoltEnv, threads *sql.BackgroundThreads) (*gms.Engine, *sql.Context) {
+ pro, err := sqle.NewDoltDatabaseProviderWithDatabases("main", dEnv.FS, nil, nil)
+ if err != nil {
+ panic(err)
+ }
+
+ mrEnv, err := env.MultiEnvForDirectory(ctx, dEnv.Config.WriteableConfig(), dEnv.FS, dEnv.Version, dEnv)
+ if err != nil {
+ panic(err)
+ }
+
+ sc := NewStatsCoord(pro, nil, logrus.StandardLogger(), threads, dEnv)
+
+ gcSafepointController := dsess.NewGCSafepointController()
+
+ doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController)
+ if err != nil {
+ panic(err)
+ }
+
+ sqlCtx := sql.NewContext(ctx, sql.WithSession(doltSession))
+ sqlCtx.SetCurrentDatabase(mrEnv.GetFirstDatabase())
+
+ sc.ctxGen = func(ctx context.Context) (*sql.Context, error) {
+ doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController)
+ if err != nil {
+ return nil, err
+ }
+ return sql.NewContext(ctx, sql.WithSession(doltSession)), nil
+ }
+
+ pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, NewInitDatabaseHook(sc))
+ pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, NewDropDatabaseHook(sc))
+
+ sqlEng := gms.New(analyzer.NewBuilder(pro).Build(), &gms.Config{
+ IsReadOnly: false,
+ IsServerLocked: false,
+ })
+ sqlEng.Analyzer.Catalog.StatsProvider = sc
+ return sqlEng, sqlCtx
+}
+
+func TestStatsGcConcurrency(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+ sc.JobInterval = 1 * time.Nanosecond
+ sc.gcInterval = 100 * time.Nanosecond
+ sc.branchInterval = 50 * time.Nanosecond
+ require.NoError(t, sc.Restart(ctx))
+
+ addDb := func(ctx *sql.Context, dbName string) {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database "+dbName))
+ }
+
+ addData := func(ctx *sql.Context, dbName string, i int) {
+ //log.Println("add ", dbName)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use "+dbName))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")"))
+ }
+
+ dropDb := func(dropCtx *sql.Context, dbName string) {
+ //log.Println("drop ", dbName)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database "+dbName))
+ }
+
+ // it is important to use new sessions for this test, to avoid working root conflicts
+ addCtx, _ := sc.ctxGen(context.Background())
+ writeCtx, _ := sc.ctxGen(context.Background())
+ dropCtx, _ := sc.ctxGen(context.Background())
+
+ iters := 200
+ dbs := make(chan string, iters)
+
+ {
+ wg := sync.WaitGroup{}
+ wg.Add(2)
+
+ addCnt := 0
+ go func() {
+ for i := range iters {
+ addCnt++
+ dbName := "db" + strconv.Itoa(i)
+ addDb(addCtx, dbName)
+ addData(writeCtx, dbName, i)
+ dbs <- dbName
+ }
+ close(dbs)
+ wg.Done()
+ }()
+
+ dropCnt := 0
+ go func() {
+ i := 0
+ for db := range dbs {
+ if i%2 == 0 {
+ time.Sleep(50 * time.Millisecond)
+ dropCnt++
+ dropDb(dropCtx, db)
+ }
+ i++
+ }
+ wg.Done()
+ }()
+
+ wg.Wait()
+
+ sc.doBranchSync.Store(true)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.doGc.Store(true)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.Stop()
+
+ // 101 dbs, 100 with stats (not main)
+ require.Equal(t, iters/2+1, len(sc.dbs))
+ require.Equal(t, iters/2, len(sc.Stats))
+ require.NoError(t, sc.ValidateState(ctx))
+ require.Equal(t, iters/2, sc.kv.Len())
+ }
+}
+
+func TestStatsBranchConcurrency(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ sc.JobInterval = 10
+ sc.gcInterval = 100
+ sc.branchInterval = 100
+ require.NoError(t, sc.Restart(ctx))
+
+ addBranch := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')"))
+ }
+
+ addData := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ }
+
+ dropBranch := func(dropCtx *sql.Context, branchName string) {
+ //log.Println("delete branch: ", branchName)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ del := "call dolt_branch('-d', '" + branchName + "')"
+ require.NoError(t, executeQuery(ctx, sqlEng, del))
+ }
+
+ // it is important to use new sessions for this test, to avoid working root conflicts
+ addCtx, _ := sc.ctxGen(context.Background())
+ dropCtx, _ := sc.ctxGen(context.Background())
+
+ iters := 100
+ {
+ branches := make(chan string, iters)
+
+ wg := sync.WaitGroup{}
+ wg.Add(2)
+
+ go func() {
+ for i := range iters {
+ addBranch(addCtx, i)
+ addData(addCtx, i)
+ branches <- "branch" + strconv.Itoa(i)
+ }
+ close(branches)
+ wg.Done()
+ }()
+
+ go func() {
+ i := 0
+ for br := range branches {
+ if i%2 == 0 {
+ dropBranch(dropCtx, br)
+ time.Sleep(50 * time.Millisecond)
+ }
+ i++
+ }
+ wg.Done()
+ }()
+
+ wg.Wait()
+
+ sc.doBranchSync.Store(true)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.doGc.Store(true)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.Stop()
+
+ // at the end we should still have |iters/2| databases
+ require.Equal(t, iters/2, len(sc.Stats))
+ require.NoError(t, sc.ValidateState(ctx))
+ require.Equal(t, iters/2, sc.kv.Len())
+ }
+}
+
+func TestStatsCacheGrowth(t *testing.T) {
+ //t.Skip("expensive test")
+
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ sc.JobInterval = 10
+ sc.gcInterval = 100
+ sc.branchInterval = 100
+ require.NoError(t, sc.Restart(ctx))
+
+ addBranch := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')"))
+ }
+
+ addData := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")"))
+
+ }
+
+ // it is important to use new sessions for this test, to avoid working root conflicts
+ iters := 2000
+ if os.Getenv("CI") != "" {
+ iters = 1025
+ }
+ {
+ branches := make(chan string, iters)
+
+ go func() {
+ addCtx, _ := sc.ctxGen(context.Background())
+ for i := range iters {
+ addBranch(addCtx, i)
+ addData(addCtx, i)
+ branches <- "branch" + strconv.Itoa(i)
+ if i%500 == 0 {
+ log.Println("branches: ", strconv.Itoa(i))
+ require.NoError(t, executeQuery(addCtx, sqlEng, "call dolt_stats_wait()"))
+ }
+ }
+ close(branches)
+ }()
+
+ //waitCtx, _ := sc.ctxGen(context.Background())
+ i := 0
+ for _ = range branches {
+ //if i%50 == 0 {
+ // log.Println("branches: ", strconv.Itoa(i))
+ // require.NoError(t, executeQuery(waitCtx, sqlEng, "call dolt_stats_wait()"))
+ //}
+ i++
+ }
+
+ sc.doBranchSync.Store(true)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.doGc.Store(true)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ sc.Stop()
+
+ // at the end we should still have |iters/2| databases
+ require.Equal(t, iters, len(sc.Stats))
+ require.NoError(t, sc.ValidateState(ctx))
+ require.Equal(t, iters, sc.kv.Len())
+ }
+}
diff --git a/go/libraries/doltcore/sqle/statspro/script_test.go b/go/libraries/doltcore/sqle/statspro/script_test.go
new file mode 100644
index 00000000000..f5ceace6f44
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/script_test.go
@@ -0,0 +1,532 @@
+package statspro
+
+import (
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/stretchr/testify/require"
+ "testing"
+)
+
+type scriptTest struct {
+ name string
+ setup []string
+ assertions []assertion
+}
+
+type assertion struct {
+ query string
+ res []sql.Row
+}
+
+func TestStatScripts(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+
+ scripts := []scriptTest{
+ {
+ name: "track updates",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'zero'), (1, 'one')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(9)}},
+ },
+ {
+ query: "update xy set y = 2 where x between 100 and 800",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(9)}},
+ },
+ },
+ },
+ {
+ name: "track deletes",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'zero'), (1, 'one')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(9)}},
+ },
+ {
+ query: "delete from xy where x > 600",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(4)}},
+ },
+ },
+ },
+ {
+ name: "ddl table",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'0'), (1,'0'), (2,'0')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "truncate table xy",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(0)}},
+ },
+ {
+ query: "insert into xy values (0,'0'), (1,'0'), (2,'0')",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "drop table xy",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(0)}},
+ },
+ },
+ },
+ {
+ name: "ddl index",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'0'), (1,'0'), (2,'0')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "alter table xy drop index y",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(1)}},
+ },
+ {
+ query: "alter table xy add index yx (y,x)",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
+ res: []sql.Row{{"varchar(16),int", "0,2"}},
+ },
+ {
+ query: "alter table xy modify column y int",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
+ res: []sql.Row{{"int,int", "0,2"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ },
+ },
+ {
+ name: "mcv counts",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "alter table xy add index y2 (y)",
+ "alter table xy add index x2 (x,y)",
+ "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)",
+ },
+ assertions: []assertion{
+ {
+ query: "select mcv1, mcv2, mcv_counts from dolt_statistics where index_name = 'y2'",
+ res: []sql.Row{{"1", "0", "4,6"}},
+ },
+ {
+ query: "select mcv_counts from dolt_statistics where index_name = 'y'",
+ res: []sql.Row{{""}},
+ },
+ {
+ query: "select mcv_counts from dolt_statistics where index_name = 'x2'",
+ res: []sql.Row{{""}},
+ },
+ },
+ },
+ {
+ name: "caps testing",
+ setup: []string{
+ "create table XY (x int primary key, Y int, key Yx (Y,x))",
+ "alter table xy add index y2 (y)",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y2"}, {"mydb", "xy", "yx"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(3)}},
+ },
+ {
+ query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(12)}},
+ },
+ {
+ query: "delete from xy where x > 500",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(6)}},
+ },
+ },
+ },
+ {
+ name: "database ddl",
+ setup: []string{
+ "create table mydb.xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "create database repo2",
+ "create table repo2.xy (x int primary key, y int, key (y,x))",
+ "insert into repo2.xy values (0,0), (1,0), (2,0)",
+ "create table repo2.ab (a int primary key, b int, key (b,a))",
+ "insert into repo2.ab values (0,0), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{
+ {"mydb", "xy", "primary"}, {"mydb", "xy", "y"},
+ },
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name",
+ res: []sql.Row{
+ {"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
+ {"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
+ },
+ },
+ {
+ query: "use repo2",
+ },
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{
+ {"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
+ {"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
+ },
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(4)}},
+ },
+ {
+ query: "insert into repo2.xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(10)}},
+ },
+ {
+ query: "drop database repo2",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "use mydb",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ },
+ },
+ {
+ name: "recreate table without index",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "drop table xy",
+ },
+ {
+ query: "create table xy (x int primary key, y int)",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(1)}},
+ },
+ },
+ },
+ {
+ name: "stats info",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ {
+ query: "call dolt_checkout('feat')",
+ },
+ {
+ query: "drop table xy",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_gc()",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_gc()",
+ },
+ {
+ query: "call dolt_stats_sync()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":1,"gcCounter":3,"branchCounter":2}`}},
+ },
+ {
+ query: "call dolt_checkout('main')",
+ },
+ {
+ query: "call dolt_branch('-D', 'feat')",
+ },
+ {
+ query: "call dolt_stats_sync()",
+ },
+ {
+ query: "call dolt_stats_gc()",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":1,"readCnt":0,"active":true,"dbSeedCnt":1,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":1,"gcCounter":4,"branchCounter":3}`}},
+ },
+ },
+ },
+ {
+ name: "stats stop/start",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ {
+ query: "call dolt_stats_stop()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":false,"dbSeedCnt":0,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ {
+ query: "call dolt_stats_restart()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ },
+ },
+ {
+ name: "stats purge",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ {
+ query: "call dolt_stats_purge()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":false,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ {
+ query: "call dolt_stats_restart()",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ },
+ },
+ {
+ name: "stats validate",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{`{"dbCnt":2,"readCnt":0,"active":true,"dbSeedCnt":2,"estBucketCnt":2,"cachedBucketCnt":2,"statCnt":2,"gcCounter":1,"branchCounter":1}`}},
+ },
+ {
+ query: "call dolt_stats_stop()",
+ },
+ {
+ query: "create table ab (a int primary key, b int)",
+ },
+ {
+ query: "insert into ab values (0,0), (1,1), (2,2)",
+ },
+ {
+ query: "call dolt_stats_validate()",
+ res: []sql.Row{{"(mydb/main) missing template (PRIMARY/e29in)\n(mydb/main) missing bound (d9aov)\n(mydb/main) missing chunk (d9aov)\n"}},
+ },
+ },
+ },
+ }
+
+ for _, tt := range scripts {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx, sqlEng, sc, _ := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ sc.Debug = true
+
+ for _, s := range tt.setup {
+ require.NoError(t, executeQuery(ctx, sqlEng, s))
+ }
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_sync()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ for _, a := range tt.assertions {
+ rows, err := executeQueryResults(ctx, sqlEng, a.query)
+ require.NoError(t, err)
+ if a.res != nil {
+ require.Equal(t, a.res, rows)
+ }
+ }
+ })
+ }
+}
diff --git a/go/libraries/doltcore/sqle/statspro/seed_job.go b/go/libraries/doltcore/sqle/statspro/seed_job.go
new file mode 100644
index 00000000000..fab444c936d
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/seed_job.go
@@ -0,0 +1,382 @@
+// Copyright 2023 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/prolly"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "strings"
+)
+
+func (sc *StatsCoord) seedDbTables(ctx context.Context, j SeedDbTablesJob) (ret []StatsJob, err error) {
+ // get list of tables, get list of indexes, partition index ranges into ordinal blocks
+ // return list of IO jobs for table/index/ordinal blocks
+ defer func() {
+ if errors.Is(doltdb.ErrWorkingSetNotFound, err) {
+ err = nil
+ ret = []StatsJob{NewSeedJob(j.sqlDb)}
+ } else if err != nil {
+ sc.seedCnt.Add(-1)
+ }
+ }()
+
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return nil, err
+ }
+ dSess := dsess.DSessFromSess(sqlCtx.Session)
+ db, err := dSess.Provider().Database(sqlCtx, j.sqlDb.AliasedName())
+ if err != nil {
+ return nil, err
+ }
+ sqlDb, err := sqle.RevisionDbForBranch(sqlCtx, db.(dsess.SqlDatabase), j.sqlDb.Revision(), j.sqlDb.Revision()+"/"+j.sqlDb.AliasedName())
+ if err != nil {
+ return nil, err
+ }
+ tableNames, err := sqlDb.GetTableNames(sqlCtx)
+ if err != nil {
+ return nil, err
+ }
+
+ var newTableInfo []tableStatsInfo
+ var bucketDiff int
+
+ i := 0
+ k := 0
+ for i < len(tableNames) && k < len(j.tables) {
+ var jobs []StatsJob
+ var ti tableStatsInfo
+ switch strings.Compare(tableNames[i], j.tables[k].name) {
+ case 0:
+ // continue
+ jobs, ti, err = sc.readJobsForTable(sqlCtx, sqlDb, j.tables[k])
+ bucketDiff += ti.bucketCount - j.tables[k].bucketCount
+ i++
+ k++
+ case -1:
+ // new table
+ jobs, ti, err = sc.readJobsForTable(sqlCtx, sqlDb, tableStatsInfo{name: tableNames[i]})
+ bucketDiff += ti.bucketCount
+ i++
+ case +1:
+ // dropped table
+ jobs = append(jobs, sc.dropTableJob(sqlDb, j.tables[k].name))
+ bucketDiff -= j.tables[k].bucketCount
+ k++
+ }
+ if err != nil {
+ return nil, err
+ }
+ if ti.name != "" {
+ newTableInfo = append(newTableInfo, ti)
+ }
+ ret = append(ret, jobs...)
+ }
+ for i < len(tableNames) {
+ jobs, ti, err := sc.readJobsForTable(sqlCtx, sqlDb, tableStatsInfo{name: tableNames[i]})
+ if err != nil {
+ return nil, err
+ }
+ bucketDiff += ti.bucketCount
+ newTableInfo = append(newTableInfo, ti)
+ ret = append(ret, jobs...)
+ i++
+ }
+
+ for k < len(j.tables) {
+ ret = append(ret, sc.dropTableJob(sqlDb, j.tables[k].name))
+ bucketDiff -= j.tables[k].bucketCount
+ k++
+ }
+
+ sc.bucketCnt.Add(int64(bucketDiff))
+
+ for sc.bucketCnt.Load() > sc.bucketCap {
+ sc.bucketCap *= 2
+ sc.doGc.Store(true)
+ }
+
+ // retry again after finishing planned work
+ ret = append(ret, SeedDbTablesJob{tables: newTableInfo, sqlDb: sqlDb, done: make(chan struct{})})
+ return ret, nil
+}
+
+// GetLatestTable will get the WORKING root table for the current database/branch
+func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (*sqle.DoltTable, *doltdb.Table, error) {
+ var db sqle.Database
+ switch d := sqlDb.(type) {
+ case sqle.Database:
+ db = d
+ case sqle.ReadReplicaDatabase:
+ db = d.Database
+ default:
+ return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
+ }
+ sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
+ if err != nil {
+ return nil, nil, err
+ }
+ if !ok {
+ return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
+ }
+
+ var dTab *doltdb.Table
+ var sqleTable *sqle.DoltTable
+ switch t := sqlTable.(type) {
+ case *sqle.AlterableDoltTable:
+ sqleTable = t.DoltTable
+ dTab, err = t.DoltTable.DoltTable(ctx)
+ case *sqle.WritableDoltTable:
+ sqleTable = t.DoltTable
+ dTab, err = t.DoltTable.DoltTable(ctx)
+ case *sqle.DoltTable:
+ sqleTable = t
+ dTab, err = t.DoltTable(ctx)
+ default:
+ err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
+ }
+ if err != nil {
+ return nil, nil, err
+ }
+ return sqleTable, dTab, nil
+}
+
+func (sc *StatsCoord) readJobsForTable(ctx *sql.Context, sqlDb dsess.SqlDatabase, tableInfo tableStatsInfo) ([]StatsJob, tableStatsInfo, error) {
+ var ret []StatsJob
+ var bucketCnt int
+ sqlTable, dTab, err := GetLatestTable(ctx, tableInfo.name, sqlDb)
+ if err != nil {
+ return nil, tableStatsInfo{}, err
+ }
+ indexes, err := sqlTable.GetIndexes(ctx)
+ if err != nil {
+ return nil, tableStatsInfo{}, err
+ }
+
+ schHashKey, _, err := sqlTable.IndexCacheKey(ctx)
+ if err != nil {
+ return nil, tableStatsInfo{}, err
+ }
+
+ schemaChanged := !tableInfo.schHash.Equal(schHashKey.Hash)
+ if !tableInfo.schHash.IsEmpty() && schemaChanged {
+ sc.setGc()
+ }
+
+ var dataChanged bool
+ var isNewData bool
+ var newIdxRoots []hash.Hash
+
+ keepIndexes := make(map[sql.StatQualifier]bool)
+ fullIndexBuckets := make(map[templateCacheKey]finalizeStruct)
+ for i, sqlIdx := range indexes {
+ var idx durable.Index
+ var err error
+ if strings.EqualFold(sqlIdx.ID(), "PRIMARY") {
+ idx, err = dTab.GetRowData(ctx)
+ } else {
+ idx, err = dTab.GetIndexRowData(ctx, sqlIdx.ID())
+ }
+ if err != nil {
+ return nil, tableStatsInfo{}, err
+ }
+
+ prollyMap := durable.ProllyMapFromIndex(idx)
+
+ idxRoot := prollyMap.Node().HashOf()
+ newIdxRoots = append(newIdxRoots, idxRoot)
+
+ levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
+ if err != nil {
+ return nil, tableStatsInfo{}, err
+ }
+
+ bucketCnt += len(levelNodes)
+
+ indexKey := templateCacheKey{h: schHashKey.Hash, idxName: sqlIdx.ID()}
+
+ if i < len(tableInfo.idxRoots) && idxRoot.Equal(tableInfo.idxRoots[i]) && !schemaChanged {
+ qual := sql.StatQualifier{
+ Tab: tableInfo.name,
+ Database: strings.ToLower(sqlDb.AliasedName()),
+ Idx: strings.ToLower(sqlIdx.ID()),
+ }
+ keepIndexes[qual] = true
+ continue
+ }
+ dataChanged = true
+
+ var buckets []hash.Hash
+ for _, n := range levelNodes {
+ buckets = append(buckets, n.HashOf())
+ }
+ fullIndexBuckets[indexKey] = finalizeStruct{
+ buckets: buckets,
+ tupB: val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(sqlIdx.Expressions()))),
+ }
+
+ key, template, err := sc.getTemplate(ctx, sqlTable, sqlIdx)
+ if err != nil {
+ sc.logger.Errorf("stats collection failed to generate a statistic template: %s.%s.%s:%T; %s", sqlDb.RevisionQualifiedName(), tableInfo.name, sqlIdx, sqlIdx, err)
+ continue
+ }
+
+ readJobs, err := sc.partitionStatReadJobs(ctx, sqlDb, tableInfo.name, key, template, levelNodes, prollyMap, len(sqlIdx.Expressions()))
+ if err != nil {
+ return nil, tableStatsInfo{}, err
+ }
+ ret = append(ret, readJobs...)
+ isNewData = isNewData || dataChanged
+ }
+ if len(ret) > 0 || isNewData || schemaChanged {
+ // if there are any reads to perform, we follow those reads with a table finalize
+ ret = append(ret, FinalizeJob{
+ sqlDb: sqlDb,
+ tableKey: tableIndexesKey{
+ db: sqlDb.AliasedName(),
+ branch: sqlDb.Revision(),
+ table: tableInfo.name,
+ },
+ keepIndexes: keepIndexes,
+ editIndexes: fullIndexBuckets,
+ done: make(chan struct{}),
+ })
+ }
+
+ return ret, tableStatsInfo{name: tableInfo.name, schHash: schHashKey.Hash, idxRoots: newIdxRoots, bucketCount: bucketCnt}, nil
+}
+
+type updateOrdinal struct {
+ start, stop uint64
+}
+
+func (sc *StatsCoord) partitionStatReadJobs(ctx *sql.Context, sqlDb dsess.SqlDatabase, tableName string, key templateCacheKey, template stats.Statistic, levelNodes []tree.Node, prollyMap prolly.Map, idxCnt int) ([]StatsJob, error) {
+ if cnt, err := prollyMap.Count(); err != nil {
+ return nil, err
+ } else if cnt == 0 {
+ return nil, nil
+ }
+
+ curCnt := 0
+ jobSize := 100_000
+ var jobs []StatsJob
+ var batchOrdinals []updateOrdinal
+ var nodes []tree.Node
+ var offset uint64
+ for _, n := range levelNodes {
+ treeCnt, err := n.TreeCount()
+ if err != nil {
+ return nil, err
+ }
+ ord := updateOrdinal{
+ start: offset,
+ stop: offset + uint64(treeCnt),
+ }
+ offset += uint64(treeCnt)
+
+ if _, ok, err := sc.kv.GetBucket(ctx, n.HashOf(), val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxCnt))); err != nil {
+ return nil, err
+ } else if ok {
+ // skip redundant work
+ continue
+ }
+
+ curCnt += treeCnt
+ batchOrdinals = append(batchOrdinals, ord)
+ nodes = append(nodes, n)
+
+ if curCnt > jobSize {
+ first := batchOrdinals[0].start == 0
+ jobs = append(jobs, ReadJob{ctx: ctx, db: sqlDb, first: first, table: tableName, key: key, template: template, m: prollyMap, nodes: nodes, ordinals: batchOrdinals, idxLen: idxCnt, done: make(chan struct{})})
+ curCnt = 0
+ batchOrdinals = batchOrdinals[:0]
+ nodes = nodes[:0]
+ }
+ }
+ if curCnt > 0 {
+ first := batchOrdinals[0].start == 0
+ jobs = append(jobs, ReadJob{ctx: ctx, db: sqlDb, first: first, table: tableName, key: key, template: template, m: prollyMap, nodes: nodes, ordinals: batchOrdinals, idxLen: idxCnt, done: make(chan struct{})})
+ }
+
+ return jobs, nil
+}
+
+type templateCacheKey struct {
+ h hash.Hash
+ idxName string
+}
+
+func (k templateCacheKey) String() string {
+ return k.idxName + "/" + k.h.String()[:5]
+}
+
+func (sc *StatsCoord) getTemplate(ctx *sql.Context, sqlTable *sqle.DoltTable, sqlIdx sql.Index) (templateCacheKey, stats.Statistic, error) {
+ schHash, _, err := sqlTable.IndexCacheKey(ctx)
+ key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
+ if template, ok := sc.kv.GetTemplate(key); ok {
+ return key, template, nil
+ }
+ fds, colset, err := stats.IndexFds(strings.ToLower(sqlTable.Name()), sqlTable.Schema(), sqlIdx)
+ if err != nil {
+ return templateCacheKey{}, stats.Statistic{}, err
+ }
+
+ var class sql.IndexClass
+ switch {
+ case sqlIdx.IsSpatial():
+ class = sql.IndexClassSpatial
+ case sqlIdx.IsFullText():
+ class = sql.IndexClassFulltext
+ default:
+ class = sql.IndexClassDefault
+ }
+
+ var types []sql.Type
+ for _, cet := range sqlIdx.ColumnExpressionTypes() {
+ types = append(types, cet.Type)
+ }
+
+ tablePrefix := sqlTable.Name() + "."
+ cols := make([]string, len(sqlIdx.Expressions()))
+ for i, c := range sqlIdx.Expressions() {
+ cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
+ }
+
+ template := stats.Statistic{
+ Cols: cols,
+ Typs: types,
+ IdxClass: uint8(class),
+ Fds: fds,
+ Colset: colset,
+ }
+
+ // We put template twice, once for schema changes with no data
+ // changes (here), and once when we put chunks to avoid GC dropping
+ // templates before the finalize job.
+ sc.kv.PutTemplate(key, template)
+
+ return key, template, nil
+}
diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv.go b/go/libraries/doltcore/sqle/statspro/stats_kv.go
new file mode 100644
index 00000000000..87bddef7cb9
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/stats_kv.go
@@ -0,0 +1,551 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/schema"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/prolly"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/dolthub/go-mysql-server/sql/types"
+ lru "github.com/hashicorp/golang-lru/v2"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+)
+
+var ErrIncompatibleVersion = errors.New("client stats version mismatch")
+
+const defaultBucketSize = 1024 // must be > 0 to avoid panic
+
+type StatsKv interface {
+ PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error
+ GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error)
+ GetTemplate(key templateCacheKey) (stats.Statistic, bool)
+ PutTemplate(key templateCacheKey, stat stats.Statistic)
+ GetBound(h hash.Hash, len int) (sql.Row, bool)
+ PutBound(h hash.Hash, r sql.Row)
+ Flush(ctx context.Context) error
+ StartGc(ctx context.Context, sz int) error
+ MarkBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) error
+ FinishGc()
+ Len() int
+ Cap() int64
+}
+
+var _ StatsKv = (*prollyStats)(nil)
+var _ StatsKv = (*memStats)(nil)
+
+func NewMemStats() *memStats {
+ buckets, _ := lru.New[bucketKey, *stats.Bucket](defaultBucketSize)
+ gcCap := atomic.Int64{}
+ gcCap.Store(defaultBucketSize)
+ return &memStats{
+ mu: sync.Mutex{},
+ buckets: buckets,
+ templates: make(map[templateCacheKey]stats.Statistic),
+ bounds: make(map[bucketKey]sql.Row),
+ gcCap: gcCap,
+ }
+}
+
+type memStats struct {
+ mu sync.Mutex
+ doGc bool
+ gcCap atomic.Int64
+
+ buckets *lru.Cache[bucketKey, *stats.Bucket]
+ nextBuckets *lru.Cache[bucketKey, *stats.Bucket]
+
+ templates map[templateCacheKey]stats.Statistic
+ nextTemplates map[templateCacheKey]stats.Statistic
+
+ bounds map[bucketKey]sql.Row
+ nextBounds map[bucketKey]sql.Row
+}
+
+func (m *memStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ t, ok := m.templates[key]
+ if !ok {
+ return stats.Statistic{}, false
+ }
+ if m.doGc {
+ m.nextTemplates[key] = t
+ }
+ return t, true
+}
+
+func (m *memStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.templates[key] = stat
+ if m.doGc {
+ m.nextTemplates[key] = stat
+ }
+}
+
+type bucketKey [22]byte
+
+func getBucketKey(h hash.Hash, l int) bucketKey {
+ var k bucketKey
+ copy(k[:hash.ByteLen], h[:])
+ binary.BigEndian.PutUint16(k[hash.ByteLen:], uint16(l))
+ return k
+}
+
+func (m *memStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, l)
+ r, ok := m.bounds[k]
+ if !ok {
+ return nil, false
+ }
+ if m.doGc {
+ m.nextBounds[k] = r
+ }
+ return r, true
+}
+
+func (m *memStats) PutBound(h hash.Hash, r sql.Row) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, len(r))
+ m.bounds[k] = r
+ if m.doGc {
+ m.nextBounds[k] = r
+ }
+}
+
+func (m *memStats) StartGc(ctx context.Context, sz int) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.doGc = true
+ m.gcCap.Store(int64(sz))
+ if sz == 0 {
+ sz = m.buckets.Len() * 2
+ }
+ var err error
+ m.nextBuckets, err = lru.New[bucketKey, *stats.Bucket](sz)
+ if err != nil {
+ return err
+ }
+ m.nextBounds = make(map[bucketKey]sql.Row)
+ m.nextTemplates = make(map[templateCacheKey]stats.Statistic)
+ return nil
+}
+
+func (m *memStats) FinishGc() {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.buckets = m.nextBuckets
+ m.templates = m.nextTemplates
+ m.bounds = m.nextBounds
+ m.nextBuckets = nil
+ m.nextTemplates = nil
+ m.nextBounds = nil
+ m.doGc = false
+}
+
+func (m *memStats) Len() int {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return m.buckets.Len()
+}
+
+func (m *memStats) Cap() int64 {
+ return m.gcCap.Load()
+}
+
+func (m *memStats) PutBucket(_ context.Context, h hash.Hash, b *stats.Bucket, _ *val.TupleBuilder) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, len(b.BoundVal))
+ m.buckets.Add(k, b)
+ return nil
+}
+
+func (m *memStats) MarkBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, tupB.Desc.Count())
+ b, ok := m.buckets.Get(k)
+ if ok {
+ m.nextBuckets.Add(k, b)
+ gcCap := int(m.gcCap.Load())
+ nextLen := m.nextBuckets.Len()
+ if nextLen == 1000 {
+ print()
+ }
+ if m.nextBuckets.Len() >= gcCap {
+ m.gcCap.Store(int64(gcCap) * 2)
+ m.nextBuckets.Resize(gcCap * 2)
+ }
+ }
+ return nil
+}
+
+func (m *memStats) GetBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ if h.IsEmpty() {
+ return nil, false, nil
+ }
+ k := getBucketKey(h, tupB.Desc.Count())
+ b, ok := m.buckets.Get(k)
+ return b, ok, nil
+}
+
+func (m *memStats) Flush(_ context.Context) error {
+ return nil
+}
+
+func NewProllyStats(ctx context.Context, destDb dsess.SqlDatabase) (*prollyStats, error) {
+ sch := schema.StatsTableDoltSchema
+ kd, vd := sch.GetMapDescriptors()
+
+ keyBuilder := val.NewTupleBuilder(kd)
+ valueBuilder := val.NewTupleBuilder(vd)
+ newMap, err := prolly.NewMapFromTuples(ctx, destDb.DbData().Ddb.NodeStore(), kd, vd)
+ if err != nil {
+ return nil, err
+ }
+
+ return &prollyStats{
+ mu: sync.Mutex{},
+ destDb: destDb,
+ kb: keyBuilder,
+ vb: valueBuilder,
+ m: newMap.Mutate(),
+ mem: NewMemStats(),
+ }, nil
+}
+
+type prollyStats struct {
+ mu sync.Mutex
+ destDb dsess.SqlDatabase
+ kb, vb *val.TupleBuilder
+ m *prolly.MutableMap
+ newM *prolly.MutableMap
+ mem *memStats
+}
+
+func (p *prollyStats) Len() int {
+ return p.mem.Len()
+}
+
+func (p *prollyStats) Cap() int64 {
+ return p.mem.Cap()
+}
+
+func (p *prollyStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
+ return p.mem.GetTemplate(key)
+}
+
+func (p *prollyStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
+ p.mem.PutTemplate(key, stat)
+}
+
+func (p *prollyStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
+ return p.mem.GetBound(h, l)
+}
+
+func (p *prollyStats) PutBound(h hash.Hash, r sql.Row) {
+ p.mem.PutBound(h, r)
+}
+
+func (p *prollyStats) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error {
+ if err := p.mem.PutBucket(ctx, h, b, tupB); err != nil {
+ return err
+ }
+
+ k, err := p.encodeHash(h, tupB.Desc.Count())
+ if err != nil {
+ return err
+ }
+ v, err := p.encodeBucket(ctx, b, tupB)
+ if err != nil {
+ return err
+ }
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ return p.m.Put(ctx, k, v)
+}
+
+func (p *prollyStats) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
+ if h.IsEmpty() {
+ return nil, false, nil
+ }
+ b, ok, err := p.mem.GetBucket(ctx, h, tupB)
+
+ if err != nil {
+ return nil, false, err
+ }
+ if ok {
+ return b, true, nil
+ }
+
+ // missing bucket and not GC'ing, try disk
+ k, err := p.encodeHash(h, tupB.Desc.Count())
+ if err != nil {
+ return nil, false, err
+ }
+
+ var v val.Tuple
+ err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error {
+ if key != nil {
+ ok = true
+ v = value
+ }
+ return nil
+ })
+ if !ok || err != nil {
+ return nil, false, err
+ }
+
+ if tupB == nil {
+ // still function if treating like memStats
+ return nil, true, nil
+ }
+
+ b, err = p.decodeBucketTuple(ctx, v, tupB)
+ if err != nil {
+ return nil, false, err
+ }
+
+ p.mem.PutBucket(ctx, h, b, tupB)
+ return b, true, nil
+}
+
+func (p *prollyStats) StartGc(ctx context.Context, sz int) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ if err := p.mem.StartGc(ctx, sz); err != nil {
+ return err
+ }
+ kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
+ newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd)
+ if err != nil {
+ return err
+ }
+ p.newM = newMap.Mutate()
+
+ return nil
+}
+
+func (p *prollyStats) MarkBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) error {
+ p.mem.MarkBucket(ctx, h, tupB)
+
+ // try disk
+ k, err := p.encodeHash(h, tupB.Desc.Count())
+ if err != nil {
+ return err
+ }
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ var v val.Tuple
+ var ok bool
+ err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error {
+ if key != nil {
+ ok = true
+ v = value
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ if !ok {
+ return nil
+ }
+
+ return p.newM.Put(ctx, k, v)
+}
+
+func (p *prollyStats) FinishGc() {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.mem.FinishGc()
+ p.m = p.newM
+ p.newM = nil
+}
+
+func (p *prollyStats) encodeHash(h hash.Hash, len int) (val.Tuple, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.kb.PutInt64(0, int64(len))
+ if err := p.kb.PutString(1, h.String()); err != nil {
+ return nil, err
+ }
+ return p.kb.Build(p.m.NodeStore().Pool()), nil
+}
+
+func (p *prollyStats) decodeHashTuple(v val.Tuple) (int, hash.Hash, error) {
+ l, ok := p.kb.Desc.GetInt64(0, v)
+ hStr, ok := p.kb.Desc.GetString(1, v)
+ if !ok {
+ return 0, hash.Hash{}, fmt.Errorf("unexpected null hash")
+ }
+ return int(l), hash.Parse(hStr), nil
+}
+
+func (p *prollyStats) decodeBucketTuple(ctx context.Context, v val.Tuple, tupB *val.TupleBuilder) (*stats.Bucket, error) {
+ var row []interface{}
+ for i := 0; i < p.vb.Desc.Count(); i++ {
+ f, err := tree.GetField(ctx, p.vb.Desc, i, v, p.m.NodeStore())
+ if err != nil {
+ return nil, err
+ }
+ row = append(row, f)
+ }
+
+ version := row[0]
+ if version != schema.StatsVersion {
+ return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
+ }
+ rowCount := row[1].(int64)
+ distinctCount := row[2].(int64)
+ nullCount := row[3].(int64)
+ boundRowStr := row[4].(string)
+ upperBoundCnt := row[5].(int64)
+ mcvCountsStr := row[10].(string)
+
+ boundRow, err := DecodeRow(ctx, p.m.NodeStore(), boundRowStr, tupB)
+ if err != nil {
+ return nil, err
+ }
+
+ var mcvCnts []uint64
+ if len(mcvCountsStr) > 0 {
+ for _, c := range strings.Split(mcvCountsStr, ",") {
+ cnt, err := strconv.ParseInt(c, 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ mcvCnts = append(mcvCnts, uint64(cnt))
+ }
+ }
+
+ mcvs := make([]sql.Row, 4)
+ for i, v := range row[6:10] {
+ if v != nil && v != "" {
+ row, err := DecodeRow(ctx, p.m.NodeStore(), v.(string), tupB)
+ if err != nil {
+ return nil, err
+ }
+ mcvs[i] = row
+ }
+ }
+
+ return &stats.Bucket{
+ RowCnt: uint64(rowCount),
+ DistinctCnt: uint64(distinctCount),
+ NullCnt: uint64(nullCount),
+ McvsCnt: mcvCnts,
+ BoundCnt: uint64(upperBoundCnt),
+ BoundVal: boundRow,
+ McvVals: mcvs,
+ }, nil
+}
+
+var mcvTypes = []sql.Type{types.Int16, types.Int16, types.Int16, types.Int16}
+
+func (p *prollyStats) encodeBucket(ctx context.Context, b *stats.Bucket, tupB *val.TupleBuilder) (val.Tuple, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ p.vb.PutInt64(0, schema.StatsVersion)
+ p.vb.PutInt64(1, int64(b.RowCount()))
+ p.vb.PutInt64(2, int64(b.DistinctCount()))
+ p.vb.PutInt64(3, int64(b.NullCount()))
+ boundRow, err := EncodeRow(ctx, p.m.NodeStore(), b.UpperBound(), tupB)
+ if err != nil {
+ return nil, err
+ }
+ p.vb.PutString(4, string(boundRow))
+ p.vb.PutInt64(5, int64(b.BoundCount()))
+ for i, r := range b.Mcvs() {
+ mcvRow, err := EncodeRow(ctx, p.m.NodeStore(), r, tupB)
+ if err != nil {
+ return nil, err
+ }
+ p.vb.PutString(6+i, string(mcvRow))
+ }
+ var mcvCntsRow sql.Row
+ for _, v := range b.McvCounts() {
+ mcvCntsRow = append(mcvCntsRow, int(v))
+ }
+ p.vb.PutString(10, stats.StringifyKey(mcvCntsRow, mcvTypes[:len(mcvCntsRow)]))
+
+ return p.vb.Build(p.m.NodeStore().Pool()), nil
+}
+
+func (p *prollyStats) Flush(ctx context.Context) error {
+ flushedMap, err := p.m.Map(ctx)
+ if err != nil {
+ return err
+ }
+ return p.destDb.DbData().Ddb.SetStatisics(ctx, "main", flushedMap.HashOf())
+}
+
+func (p *prollyStats) NewEmpty(ctx *sql.Context) (StatsKv, error) {
+ kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
+ newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd)
+ if err != nil {
+ return nil, err
+ }
+ m := newMap.Mutate()
+ return &prollyStats{m: m, destDb: p.destDb, kb: p.kb, vb: p.vb}, nil
+}
+
+func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
+ for i := range tb.Desc.Count() {
+ v := r[i]
+ if v == nil {
+ continue
+ }
+ if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
+ return nil, err
+ }
+ }
+ return tb.Build(ns.Pool()), nil
+}
+
+func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
+ tup := []byte(s)
+ r := make(sql.Row, tb.Desc.Count())
+ var err error
+ for i, _ := range r {
+ r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return r, nil
+}
diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv_test.go b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go
new file mode 100644
index 00000000000..7c44f7f5cb8
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go
@@ -0,0 +1,215 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/stretchr/testify/require"
+ "strconv"
+ "strings"
+ "testing"
+)
+
+func TestProllyKv(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ prollyKv := newTestProllyKv(t, threads)
+
+ h := hash.Parse(strings.Repeat("a", hash.StringLen))
+ h2 := hash.Parse(strings.Repeat("b", hash.StringLen))
+ k := getBucketKey(h, 2)
+
+ tupB := val.NewTupleBuilder(val.NewTupleDescriptor(
+ val.Type{Enc: val.Int64Enc, Nullable: true},
+ val.Type{Enc: val.StringEnc, Nullable: true},
+ ))
+
+ t.Run("test bounds", func(t *testing.T) {
+ exp := sql.Row{1, 1}
+ prollyKv.PutBound(h, exp)
+ cmp, ok := prollyKv.GetBound(h, 2)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ _, ok = prollyKv.GetBound(h2, 2)
+ require.False(t, ok)
+ })
+
+ t.Run("test templates", func(t *testing.T) {
+ exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}}
+ key := templateCacheKey{
+ h: h,
+ idxName: "PRIMARY",
+ }
+ prollyKv.PutTemplate(key, exp)
+ cmp, ok := prollyKv.GetTemplate(key)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ key2 := templateCacheKey{
+ h: h2,
+ idxName: "PRIMARY",
+ }
+ _, ok = prollyKv.GetTemplate(key2)
+ require.False(t, ok)
+ })
+
+ t.Run("test buckets", func(t *testing.T) {
+ exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
+ require.NoError(t, err)
+ cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ _, ok, err = prollyKv.GetBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+ require.False(t, ok)
+
+ // delete from memory, should pull from disk when |tupB| supplied
+ prollyKv.mem.buckets.Remove(k)
+
+ cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, (*stats.Bucket)(nil), cmp)
+
+ cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp.RowCnt, cmp.RowCnt)
+ require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt)
+ require.Equal(t, exp.NullCnt, cmp.NullCnt)
+ require.Equal(t, exp.McvsCnt, cmp.McvsCnt)
+ require.Equal(t, exp.McvVals[0], cmp.McvVals[0])
+ require.Equal(t, exp.McvVals[1], cmp.McvVals[1])
+ require.Equal(t, exp.McvVals[2], cmp.McvVals[2])
+ require.Equal(t, exp.McvVals[3], cmp.McvVals[3])
+ require.Equal(t, exp.BoundVal, cmp.BoundVal)
+ require.Equal(t, exp.BoundCnt, cmp.BoundCnt)
+ })
+
+ t.Run("test bucket GC", func(t *testing.T) {
+ prollyKv.StartGc(context.Background(), 10)
+
+ // if we delete from memory, no more fallback to disk
+ prollyKv.mem.buckets.Remove(k)
+ _, ok, err := prollyKv.GetBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+ require.False(t, ok)
+
+ exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ err = prollyKv.PutBucket(context.Background(), h, exp, tupB)
+ require.NoError(t, err)
+
+ exp2 := stats.NewHistogramBucket(10, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ err = prollyKv.PutBucket(context.Background(), h2, exp2, tupB)
+ require.NoError(t, err)
+
+ prollyKv.FinishGc()
+
+ prollyKv.StartGc(context.Background(), 10)
+ cmp2, ok, err := prollyKv.GetBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp2.BoundCount(), cmp2.BoundCnt)
+ prollyKv.FinishGc()
+ // only tagged one bucket
+ require.Equal(t, 1, prollyKv.Len())
+ })
+
+ t.Run("test GC overflow", func(t *testing.T) {
+ prollyKv.StartGc(context.Background(), 8)
+ expLen := 1024
+ var expected []hash.Hash
+ for i := range expLen {
+ exp := stats.NewHistogramBucket(uint64(i), 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ nh := strconv.AppendInt(nil, int64(i), 10)
+ nh = append(nh, h[:hash.ByteLen-len(nh)]...)
+ newH := hash.New(nh)
+ expected = append(expected, newH)
+ err := prollyKv.PutBucket(context.Background(), newH, exp, tupB)
+ require.NoError(t, err)
+ }
+ prollyKv.FinishGc()
+
+ for _, h := range expected {
+ _, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ }
+
+ require.Equal(t, 1024, prollyKv.Len())
+ require.Equal(t, int64(2048), prollyKv.Cap())
+ })
+
+ t.Run("test bounds GC", func(t *testing.T) {
+ exp := sql.Row{1, 1}
+ prollyKv.PutBound(h, exp)
+ prollyKv.PutBound(h2, exp)
+
+ prollyKv.StartGc(context.Background(), 10)
+ prollyKv.GetBound(h2, 2)
+ prollyKv.FinishGc()
+
+ require.Equal(t, 1, len(prollyKv.mem.bounds))
+ })
+
+ t.Run("test templates GC", func(t *testing.T) {
+ exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}}
+ key := templateCacheKey{
+ h: h,
+ idxName: "PRIMARY",
+ }
+ key2 := templateCacheKey{
+ h: h2,
+ idxName: "PRIMARY",
+ }
+ prollyKv.PutTemplate(key, exp)
+ prollyKv.PutTemplate(key2, exp)
+
+ prollyKv.StartGc(context.Background(), 10)
+ prollyKv.GetTemplate(key2)
+ prollyKv.FinishGc()
+
+ require.Equal(t, 1, len(prollyKv.mem.templates))
+ })
+
+}
+
+func newTestProllyKv(t *testing.T, threads *sql.BackgroundThreads) *prollyStats {
+ dEnv := dtestutils.CreateTestEnv()
+
+ sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
+ ctx.Session.SetClient(sql.Client{
+ User: "billy boy",
+ Address: "bigbillie@fake.horse",
+ })
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+
+ startDbs := sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx)
+
+ kv, err := NewProllyStats(ctx, startDbs[0].(dsess.SqlDatabase))
+ require.NoError(t, err)
+
+ return kv
+}
diff --git a/go/libraries/doltcore/sqle/statspro/stats_provider.go b/go/libraries/doltcore/sqle/statspro/stats_provider.go
deleted file mode 100644
index 573e20b638a..00000000000
--- a/go/libraries/doltcore/sqle/statspro/stats_provider.go
+++ /dev/null
@@ -1,535 +0,0 @@
-// Copyright 2023 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "errors"
- "fmt"
- "path/filepath"
- "strings"
- "sync"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly/tree"
-)
-
-var ErrFailedToLoad = errors.New("failed to load statistics")
-
-type indexMeta struct {
- qual sql.StatQualifier
- cols []string
- newNodes []tree.Node
- // updateOrdinals are [start, stop] tuples for each update chunk
- updateOrdinals []updateOrdinal
- keepChunks []sql.HistogramBucket
- dropChunks []sql.HistogramBucket
- allAddrs []hash.Hash
-}
-
-type updateOrdinal struct {
- start, stop uint64
-}
-
-func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider {
- return &Provider{
- pro: pro,
- sf: sf,
- mu: &sync.Mutex{},
- statDbs: make(map[string]Database),
- autoCtxCancelers: make(map[string]context.CancelFunc),
- analyzeCtxCancelers: make(map[string]context.CancelFunc),
- status: make(map[string]string),
- lockedTables: make(map[string]bool),
- }
-}
-
-// Provider is the engine interface for reading and writing index statistics.
-// Each database has its own statistics table that all tables/indexes in a db
-// share.
-type Provider struct {
- mu *sync.Mutex
- pro *sqle.DoltDatabaseProvider
- sf StatsFactory
- statDbs map[string]Database
- autoCtxCancelers map[string]context.CancelFunc
- analyzeCtxCancelers map[string]context.CancelFunc
- starter sqle.InitDatabaseHook
- status map[string]string
- lockedTables map[string]bool
-}
-
-// each database has one statistics table that is a collection of the
-// table stats in the database
-type dbToStats struct {
- mu *sync.Mutex
- dbName string
- stats map[sql.StatQualifier]*DoltStats
- statsDatabase Database
- latestTableHashes map[string]hash.Hash
-}
-
-func newDbStats(dbName string) *dbToStats {
- return &dbToStats{
- mu: &sync.Mutex{},
- dbName: dbName,
- stats: make(map[sql.StatQualifier]*DoltStats),
- latestTableHashes: make(map[string]hash.Hash),
- }
-}
-
-var _ sql.StatsProvider = (*Provider)(nil)
-
-func (p *Provider) Close() error {
- var lastErr error
- for _, db := range p.statDbs {
- if err := db.Close(); err != nil {
- lastErr = err
- }
- }
- return lastErr
-}
-
-func (p *Provider) TryLockForUpdate(branch, db, table string) bool {
- p.mu.Lock()
- defer p.mu.Unlock()
- lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
- if ok := p.lockedTables[lockId]; ok {
- return false
- }
- p.lockedTables[lockId] = true
- return true
-}
-
-func (p *Provider) UnlockTable(branch, db, table string) {
- p.mu.Lock()
- defer p.mu.Unlock()
- lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
- p.lockedTables[lockId] = false
- return
-}
-
-func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error {
- err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db)
-
- if err != nil {
- p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error()))
- return err
- }
- p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name))
- return nil
-}
-
-func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) {
- p.starter = hook
-}
-
-func (p *Provider) CancelRefreshThread(dbName string) {
- p.mu.Lock()
- if cancel, ok := p.autoCtxCancelers[dbName]; ok {
- cancel()
- }
- p.mu.Unlock()
- p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName))
-
-}
-
-func (p *Provider) ThreadStatus(dbName string) string {
- p.mu.Lock()
- defer p.mu.Unlock()
-
- if msg, ok := p.status[dbName]; ok {
- return msg
- }
- return "no active stats thread"
-}
-
-func (p *Provider) TrackedBranches(dbName string) []string {
- db, ok := p.getStatDb(dbName)
- if !ok {
- return nil
- }
- return db.Branches()
-
-}
-
-func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil, nil
- }
-
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- return p.GetTableDoltStats(ctx, branch, db, schemaName, table.Name())
-}
-
-func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) {
- statDb, ok := p.getStatDb(db)
- if !ok || statDb == nil {
- return nil, nil
- }
-
- if branch == "" {
- dSess := dsess.DSessFromSess(ctx.Session)
- var err error
- branch, err = dSess.GetBranch()
- if err != nil {
- return nil, nil
- }
- }
-
- var ret []sql.Statistic
- for _, qual := range statDb.ListStatQuals(branch) {
- if strings.EqualFold(db, qual.Database) && strings.EqualFold(schema, qual.Sch) && strings.EqualFold(table, qual.Tab) {
- stat, _ := statDb.GetStat(branch, qual)
- ret = append(ret, stat)
- }
- }
-
- return ret, nil
-}
-
-func (p *Provider) setStatDb(name string, db Database) {
- p.mu.Lock()
- defer p.mu.Unlock()
- p.statDbs[name] = db
-}
-
-func (p *Provider) getStatDb(name string) (Database, bool) {
- p.mu.Lock()
- defer p.mu.Unlock()
- statDb, ok := p.statDbs[strings.ToLower(name)]
- return statDb, ok
-}
-
-func (p *Provider) deleteStatDb(name string) {
- p.mu.Lock()
- defer p.mu.Unlock()
- delete(p.statDbs, strings.ToLower(name))
-}
-
-func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error {
- statDb, ok := p.getStatDb(s.Qualifier().Db())
- if !ok {
- return nil
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil
- }
-
- doltStat, err := DoltStatsFromSql(s)
- if err != nil {
- return err
- }
-
- p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db()))
-
- return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat)
-}
-
-func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) {
- statDb, ok := p.getStatDb(qual.Db())
- if !ok {
- return nil, false
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil, false
- }
-
- return statDb.GetStat(branch, qual)
-}
-
-func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) {
- stat, ok := p.getQualStats(ctx, qual)
- if !ok {
- return nil, false
- }
- return stat, true
-}
-
-func (p *Provider) DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return nil
- }
-
- p.mu.Lock()
- defer p.mu.Unlock()
-
- p.status[db] = "dropped"
-
- return statDb.DeleteBranchStats(ctx, branch, flush)
-}
-
-func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return nil
- }
- for _, branch := range statDb.Branches() {
- // remove provider access
- p.DropBranchDbStats(ctx, branch, db, flush)
- }
-
- if flush {
- p.deleteStatDb(db)
- }
-
- return nil
-}
-
-func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error {
- statDb, ok := p.getStatDb(qual.Db())
- if !ok {
- return nil
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil
- }
-
- if _, ok := statDb.GetStat(branch, qual); ok {
- statDb.DeleteStats(ctx, branch, qual)
- p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String()))
- }
-
- return nil
-}
-
-func (p *Provider) UpdateStatus(db string, msg string) {
- p.mu.Lock()
- defer p.mu.Unlock()
-
- p.status[db] = msg
-}
-
-func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return 0, sql.ErrDatabaseNotFound.New(db)
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return 0, err
- }
-
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
- if !ok {
- return 0, nil
- }
-
- return priStats.RowCount(), nil
-}
-
-func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return 0, sql.ErrDatabaseNotFound.New(db)
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return 0, err
- }
-
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
- if !ok {
- return 0, nil
- }
-
- return priStats.AvgSize(), nil
-}
-
-func (p *Provider) Prune(ctx *sql.Context) error {
- dSess := dsess.DSessFromSess(ctx.Session)
-
- for _, sqlDb := range p.pro.DoltDatabases() {
- dbName := strings.ToLower(sqlDb.Name())
- sqlDb, ok, err := dSess.Provider().SessionDatabase(ctx, dbName)
- if err != nil {
- return err
- }
- if !ok {
- continue
- }
- statDb, ok := p.getStatDb(dbName)
- if !ok {
- continue
- }
-
- // Canceling refresh thread prevents background thread from
- // making progress. Prune should succeed.
- p.CancelRefreshThread(dbName)
-
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
-
- for _, branch := range statDb.Branches() {
- err := func() error {
- // function closure ensures safe defers
- var stats []sql.Statistic
- for _, t := range tables {
- // XXX: avoid races with ANALYZE with the table locks.
- // Either concurrent purge or analyze (or both) will fail.
- if !p.TryLockForUpdate(branch, dbName, t) {
- p.mu.Lock()
- fmt.Println(p.lockedTables)
- p.mu.Unlock()
- return fmt.Errorf("concurrent statistics update and prune; retry prune when update is finished")
- }
- defer p.UnlockTable(branch, dbName, t)
-
- tableStats, err := p.GetTableDoltStats(ctx, branch, dbName, sqlDb.SchemaName(), t)
- if err != nil {
- return err
- }
- stats = append(stats, tableStats...)
- }
-
- if err := p.DropBranchDbStats(ctx, branch, dbName, true); err != nil {
- return err
- }
-
- for _, s := range stats {
- ds, ok := s.(*DoltStats)
- if !ok {
- return fmt.Errorf("unexpected statistics type found: %T", s)
- }
- if err := statDb.SetStat(ctx, branch, ds.Qualifier(), ds); err != nil {
- return err
- }
- }
- if err := statDb.Flush(ctx, branch); err != nil {
- return err
- }
- return nil
- }()
- if err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-func (p *Provider) Purge(ctx *sql.Context) error {
- for _, sqlDb := range p.pro.DoltDatabases() {
- dbName := strings.ToLower(sqlDb.Name())
-
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
-
- var branches []string
- db, ok := p.getStatDb(dbName)
- if ok {
- // Canceling refresh thread prevents background thread from
- // making progress. Purge should succeed.
- p.CancelRefreshThread(dbName)
-
- branches = db.Branches()
- for _, branch := range branches {
- err := func() error {
- for _, t := range tables {
- // XXX: avoid races with ANALYZE with the table locks.
- // Either concurrent purge or analyze (or both) will fail.
- if !p.TryLockForUpdate(branch, dbName, t) {
- return fmt.Errorf("concurrent statistics update and prune; retry purge when update is finished")
- }
- defer p.UnlockTable(branch, dbName, t)
- }
-
- err := p.DropBranchDbStats(ctx, branch, dbName, true)
- if err != nil {
- return fmt.Errorf("failed to drop stats: %w", err)
- }
- return nil
- }()
- if err != nil {
- return err
- }
- }
- }
-
- // if the database's failed to load, we still want to delete the folder
-
- fs, err := p.pro.FileSystemForDatabase(dbName)
- if err != nil {
- return err
- }
-
- //remove from filesystem
- statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
- if err != nil {
- return err
- }
-
- if ok, _ := statsFs.Exists(""); ok {
- if err := statsFs.Delete("", true); err != nil {
- return err
- }
- }
-
- dropDbLoc, err := statsFs.Abs("")
- if err != nil {
- return err
- }
-
- if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
- return err
- }
- if len(branches) == 0 {
- // if stats db was invalid on startup, recreate from baseline
- branches = p.getStatsBranches(ctx)
- }
- p.Load(ctx, fs, sqlDb, branches)
- }
- return nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/validate.go b/go/libraries/doltcore/sqle/statspro/validate.go
new file mode 100644
index 00000000000..65ffda6bbc9
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/validate.go
@@ -0,0 +1,155 @@
+// Copyright 2023 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "log"
+ "strings"
+)
+
+func generateDeps(
+ sqlCtx *sql.Context,
+ sqlDb dsess.SqlDatabase,
+ tCb func(key templateCacheKey),
+ bCb func(h hash.Hash, cnt int),
+ hCb func(h hash.Hash, tupB *val.TupleBuilder) error,
+) error {
+ dSess := dsess.DSessFromSess(sqlCtx.Session)
+ db, err := dSess.Provider().Database(sqlCtx, sqlDb.AliasedName())
+ if err != nil {
+ return err
+ }
+ sqlDb, err = sqle.RevisionDbForBranch(sqlCtx, db.(dsess.SqlDatabase), sqlDb.Revision(), sqlDb.Revision()+"/"+sqlDb.AliasedName())
+ if err != nil {
+ return err
+ }
+ tableNames, err := sqlDb.GetTableNames(sqlCtx)
+ if err != nil {
+ return err
+ }
+
+ var bucketCnt int
+ for _, tableName := range tableNames {
+ sqlTable, dTab, err := GetLatestTable(sqlCtx, tableName, sqlDb)
+ if err != nil {
+ return err
+ }
+ indexes, err := sqlTable.GetIndexes(sqlCtx)
+ if err != nil {
+ return err
+ }
+
+ for _, sqlIdx := range indexes {
+ var idx durable.Index
+ var err error
+ if strings.EqualFold(sqlIdx.ID(), "PRIMARY") {
+ idx, err = dTab.GetRowData(sqlCtx)
+ } else {
+ idx, err = dTab.GetIndexRowData(sqlCtx, sqlIdx.ID())
+ }
+ if err != nil {
+ return err
+ }
+
+ schHash, _, err := sqlTable.IndexCacheKey(sqlCtx)
+ key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
+ tCb(key)
+
+ idxLen := len(sqlIdx.Expressions())
+
+ prollyMap := durable.ProllyMapFromIndex(idx)
+ levelNodes, err := tree.GetHistogramLevel(sqlCtx, prollyMap.Tuples(), bucketLowCnt)
+ if err != nil {
+ return err
+ }
+
+ if len(levelNodes) == 0 {
+ log.Println("db-table has no hashes: ", sqlDb.AliasedName())
+ continue
+ }
+
+ bucketCnt += len(levelNodes)
+
+ firstNodeHash := levelNodes[0].HashOf()
+ bCb(firstNodeHash, idxLen)
+
+ for _, n := range levelNodes {
+ err = hCb(n.HashOf(), val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen)))
+ if err != nil {
+ return err
+ }
+ }
+ }
+ }
+ return nil
+}
+
+// ValidateState expects all tracked databases to be fully cached,
+// and returns an error including any gaps.
+func (sc *StatsCoord) ValidateState(ctx context.Context) error {
+ sc.dbMu.Lock()
+ dbs := make([]dsess.SqlDatabase, len(sc.dbs))
+ copy(dbs, sc.dbs)
+ sc.dbMu.Unlock()
+
+ sc.gcMu.Lock()
+ defer sc.gcMu.Unlock()
+
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return err
+ }
+
+ b := strings.Builder{}
+ for i, db := range dbs {
+ _ = i
+ generateDeps(sqlCtx, db, func(key templateCacheKey) {
+ _, ok := sc.kv.GetTemplate(key)
+ if !ok {
+ fmt.Fprintf(&b, "(%s) missing template (%s)\n", db.RevisionQualifiedName(), key.String())
+ }
+ }, func(h hash.Hash, cnt int) {
+ _, ok := sc.kv.GetBound(h, cnt)
+ if !ok {
+ fmt.Fprintf(&b, "(%s) missing bound (%s)\n", db.RevisionQualifiedName(), h.String()[:5])
+ }
+ }, func(h hash.Hash, tupB *val.TupleBuilder) error {
+ _, ok, err := sc.kv.GetBucket(ctx, h, tupB)
+ if err != nil {
+ return err
+ }
+ if !ok {
+ fmt.Fprintf(&b, "(%s) missing chunk (%s)\n", db.RevisionQualifiedName(), h.String()[:5])
+ }
+ return nil
+ })
+ }
+ if b.Len() > 0 {
+ return fmt.Errorf(b.String())
+ }
+ return nil
+}
diff --git a/go/libraries/doltcore/sqle/system_variables.go b/go/libraries/doltcore/sqle/system_variables.go
index 99e6c2f5a9b..0e3ff291a72 100644
--- a/go/libraries/doltcore/sqle/system_variables.go
+++ b/go/libraries/doltcore/sqle/system_variables.go
@@ -219,39 +219,39 @@ var DoltSystemVariables = []sql.SystemVariable{
Default: int8(1),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshEnabled,
+ Name: dsess.DoltStatsEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
- Default: int8(0),
+ Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
+ Default: int8(1),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsBootstrapEnabled,
+ Name: dsess.DoltStatsMemoryOnly,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
+ Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
Default: int8(0),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsMemoryOnly,
+ Name: dsess.DoltStatsJobInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
- Default: int8(0),
+ Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
+ Default: 100,
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshThreshold,
+ Name: dsess.DoltStatsBranchInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
- Default: float64(.5),
+ Type: types.NewSystemIntType(dsess.DoltStatsBranchInterval, 0, math.MaxInt, false),
+ Default: 60 * 60 * 24,
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshInterval,
+ Name: dsess.DoltStatsGCInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
- Default: 600,
+ Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
+ Default: 60 * 60 * 24,
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,
@@ -446,39 +446,39 @@ func AddDoltSystemVariables() {
Default: int8(0),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshEnabled,
+ Name: dsess.DoltStatsEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
- Default: int8(0),
+ Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
+ Default: int8(1),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsBootstrapEnabled,
+ Name: dsess.DoltStatsGCInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
- Default: int8(0),
+ Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
+ Default: 60 * 60 * 24,
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsMemoryOnly,
+ Name: dsess.DoltStatsJobInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
- Default: int8(0),
+ Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
+ Default: 60 * 60 * 24,
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshThreshold,
+ Name: dsess.DoltStatsBranchInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
- Default: float64(.5),
+ Type: types.NewSystemIntType(dsess.DoltStatsBranchInterval, 0, math.MaxInt, false),
+ Default: 60 * 60 * 24,
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshInterval,
+ Name: dsess.DoltStatsMemoryOnly,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
- Default: 120,
+ Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
+ Default: int8(0),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,
diff --git a/go/libraries/doltcore/sqle/tables.go b/go/libraries/doltcore/sqle/tables.go
index e8fb46ea5d1..06765360bff 100644
--- a/go/libraries/doltcore/sqle/tables.go
+++ b/go/libraries/doltcore/sqle/tables.go
@@ -127,12 +127,12 @@ func (t *DoltTable) LookupForExpressions(ctx *sql.Context, exprs ...sql.Expressi
return sql.IndexLookup{}, nil, nil, false, nil
}
- dbState, ok, err := sess.LookupDbState(ctx, t.db.Name())
+ dbState, ok, err := sess.LookupDbState(ctx, t.db.AliasedName())
if err != nil {
return sql.IndexLookup{}, nil, nil, false, nil
}
if !ok {
- return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.Name())
+ return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.AliasedName())
}
var lookupCols []expression.LookupColumn
diff --git a/go/libraries/doltcore/sqle/user_space_database.go b/go/libraries/doltcore/sqle/user_space_database.go
index e54c03b7eb3..c3689e13a61 100644
--- a/go/libraries/doltcore/sqle/user_space_database.go
+++ b/go/libraries/doltcore/sqle/user_space_database.go
@@ -141,6 +141,10 @@ func (db *UserSpaceDatabase) RequestedName() string {
return db.Name()
}
+func (db *UserSpaceDatabase) AliasedName() string {
+ return db.Name()
+}
+
func (db *UserSpaceDatabase) GetSchema(ctx *sql.Context, schemaName string) (sql.DatabaseSchema, bool, error) {
panic(fmt.Sprintf("GetSchema is not implemented for database %T", db))
}
diff --git a/go/store/prolly/tree/mutator.go b/go/store/prolly/tree/mutator.go
index e6474e16cbf..b65fdf8f101 100644
--- a/go/store/prolly/tree/mutator.go
+++ b/go/store/prolly/tree/mutator.go
@@ -17,7 +17,7 @@ package tree
import (
"bytes"
"context"
-
+ "fmt"
"github.com/dolthub/dolt/go/store/prolly/message"
)
@@ -132,7 +132,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer](
prev := newKey
newKey, newValue = edits.NextMutation(ctx)
if newKey != nil {
- assertTrue(order.Compare(K(newKey), K(prev)) > 0, "expected sorted edits")
+ assertTrue(order.Compare(K(newKey), K(prev)) > 0, "expected sorted edits"+fmt.Sprintf("%v, %v", prev, newKey))
}
}
diff --git a/go/store/prolly/tree/stats.go b/go/store/prolly/tree/stats.go
index 1573d01893d..9611f3b583d 100644
--- a/go/store/prolly/tree/stats.go
+++ b/go/store/prolly/tree/stats.go
@@ -141,6 +141,11 @@ func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m Static
// GetHistogramLevel returns the highest internal level of the tree that has
// more than |low| addresses.
func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) {
+ if cnt, err := m.Count(); err != nil {
+ return nil, err
+ } else if cnt == 0 {
+ return nil, nil
+ }
currentLevel := []Node{m.Root}
level := m.Root.Level()
for len(currentLevel) < low && level > 0 {
diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go
index f92bc8ce1cb..9b3a50ea139 100644
--- a/go/store/val/tuple_builder.go
+++ b/go/store/val/tuple_builder.go
@@ -15,6 +15,8 @@
package val
import (
+ "log"
+ "strconv"
"time"
"github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors"
@@ -77,7 +79,7 @@ func NewTupleBuilder(desc TupleDesc) *TupleBuilder {
func (tb *TupleBuilder) Build(pool pool.BuffPool) (tup Tuple) {
for i, typ := range tb.Desc.Types {
if !typ.Nullable && tb.fields[i] == nil {
- panic("cannot write NULL to non-NULL field")
+ log.Println("cannot write NULL to non-NULL field: " + strconv.Itoa(i) + " " + string(tb.fields[i]))
}
}
return tb.BuildPermissive(pool)
diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go
index bd55519ab35..188c1f98829 100644
--- a/go/store/val/tuple_descriptor.go
+++ b/go/store/val/tuple_descriptor.go
@@ -639,7 +639,7 @@ func (td TupleDesc) formatValue(enc Encoding, i int, value []byte) string {
case StringAddrEnc:
return hex.EncodeToString(value)
case CommitAddrEnc:
- return hex.EncodeToString(value)
+ return hash.New(value).String()[:5]
case CellEnc:
return hex.EncodeToString(value)
case ExtendedEnc: