Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stats] Rewrite stat management to use single threaded event loop #8815

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
796564b
[stats] event loop
max-hoffman Dec 30, 2024
5034635
more progress
max-hoffman Dec 31, 2024
8de66e4
basic scheduler test working
max-hoffman Jan 2, 2025
d6882e4
analyze
max-hoffman Jan 3, 2025
4c5bd3f
add/drop hooks
max-hoffman Jan 3, 2025
c2876de
gc
max-hoffman Jan 6, 2025
4d8d8f0
delete an alter
max-hoffman Jan 6, 2025
bb6ab3c
drop index and table
max-hoffman Jan 6, 2025
542bc40
fix other tests
max-hoffman Jan 6, 2025
14cf9fd
branch management
max-hoffman Jan 8, 2025
d949b3d
starter for kv
max-hoffman Jan 9, 2025
397aaa9
gc and refactor maintanance
max-hoffman Jan 14, 2025
16ff4ff
fix bucket doubling
max-hoffman Jan 15, 2025
1d04f74
delete log
max-hoffman Jan 15, 2025
2be37c1
better bucket counting
max-hoffman Jan 21, 2025
ee16cf1
test for disk round trip
max-hoffman Jan 21, 2025
d18b524
more prolly stats gc tests
max-hoffman Jan 21, 2025
ee2286b
rotate backing stats db
max-hoffman Jan 22, 2025
4a91332
progress towards swapping old for new, deleting old code
max-hoffman Jan 22, 2025
128efd5
fix gc bucket overflow
max-hoffman Jan 23, 2025
347d3f5
test for gc overflow
max-hoffman Jan 23, 2025
9bdb958
org and closers
max-hoffman Jan 23, 2025
d503c4e
save progress update
max-hoffman Jan 24, 2025
76a45ff
finally get first two bats running
max-hoffman Jan 25, 2025
373aa9a
startup bound hash issue
max-hoffman Jan 27, 2025
c56dd06
rewrite GC to be synchronous, fix more bugs
max-hoffman Jan 28, 2025
14eae29
fix session freshness
max-hoffman Jan 28, 2025
6ab5193
fix branch gc
max-hoffman Jan 29, 2025
474a85f
cache writes and gc are serialized
max-hoffman Jan 30, 2025
d8e6c09
fix gc/branch update dropped hashes
max-hoffman Jan 30, 2025
31d3780
fix gc race, doubling race, jobs race
max-hoffman Jan 31, 2025
c2c4f05
fix more races
max-hoffman Feb 3, 2025
e23cf1f
docs
max-hoffman Feb 4, 2025
4da767b
convert bats to script tests
max-hoffman Feb 4, 2025
8d3c07f
more tests, purge/stop
max-hoffman Feb 5, 2025
6578011
validate
max-hoffman Feb 6, 2025
aafeec7
docs
max-hoffman Feb 6, 2025
6dd1fb4
some PR cleanup
max-hoffman Feb 6, 2025
4635cfa
more cleanup
max-hoffman Feb 6, 2025
ec8ed11
stash for pull
max-hoffman Feb 6, 2025
c95fcda
merge
max-hoffman Feb 6, 2025
2e424eb
fix bucket hash conflicts
max-hoffman Feb 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions go/cmd/dolt/commands/engine/sqlengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ package engine

import (
"context"
"fmt"
"os"
"strconv"
"strings"

gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/eventscheduler"
"github.com/dolthub/go-mysql-server/sql"
Expand All @@ -31,6 +26,9 @@ import (
_ "github.com/dolthub/go-mysql-server/sql/variables"
"github.com/dolthub/vitess/go/vt/sqlparser"
"github.com/sirupsen/logrus"
"os"
"strconv"
"strings"

"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
Expand All @@ -43,7 +41,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/config"
Expand Down Expand Up @@ -189,7 +186,13 @@ func NewSqlEngine(
"authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig),
})

statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider()))
var statsPro sql.StatsProvider
_, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled)
if enabled.(int8) == 1 {
statsPro = statspro.NewStatsCoord(pro, sqlEngine.NewDefaultContext, logrus.StandardLogger(), bThreads, mrEnv.GetEnv(mrEnv.GetFirstDatabase()))
} else {
statsPro = statspro.StatsNoop{}
}
engine.Analyzer.Catalog.StatsProvider = statsPro

engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
Expand All @@ -202,8 +205,8 @@ func NewSqlEngine(

// configuring stats depends on sessionBuilder
// sessionBuilder needs ref to statsProv
if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil {
fmt.Fprintln(cli.CliErr, err)
if sc, ok := statsPro.(*statspro.StatsCoord); ok {
sc.Init(ctx, dbs)
}

// Load MySQL Db information
Expand Down
25 changes: 13 additions & 12 deletions go/cmd/dolt/commands/sqlserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"crypto/tls"
"errors"
"fmt"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"net"
"net/http"
"os"
Expand Down Expand Up @@ -260,23 +261,23 @@ func ConfigureServices(
var sqlEngine *engine.SqlEngine
InitSqlEngine := &svcs.AnonService{
InitF: func(ctx context.Context) (err error) {
if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil {
// Auto-stats is off by default for every command except
// sql-server. Unless the config specifies a specific
// behavior, enable server stats collection.
sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1)
} else if statsOn != "0" {
// do not bootstrap if auto-stats enabled
} else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil {
// If we've disabled stats collection and config does not
// specify bootstrap behavior, enable bootstrapping.
sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1)
}
sqlEngine, err = engine.NewSqlEngine(
ctx,
mrEnv,
config,
)
if sc, ok := sqlEngine.GetUnderlyingEngine().Analyzer.Catalog.StatsProvider.(*statspro.StatsCoord); ok {
sqlCtx, err := sqlEngine.NewDefaultContext(ctx)
if err != nil {
return err
}
if sc == nil {
return fmt.Errorf("unexpected nil stats coord")
}
if err = sc.Restart(sqlCtx); err != nil {
return err
}
}
return err
},
StopF: func() error {
Expand Down
24 changes: 9 additions & 15 deletions go/libraries/doltcore/schema/statistic.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ import (
const StatsVersion int64 = 1

const (
StatsQualifierColName = "qualifier"
StatsDbColName = "database_name"
StatsTableColName = "table_name"
StatsIndexColName = "index_name"
StatsPositionColName = "position"
StatsBranchName = "branch"
StatsCommitHashColName = "commit_hash"
StatsPrefixLenName = "prefix_len"
StatsRowCountColName = "row_count"
StatsDistinctCountColName = "distinct_count"
StatsNullCountColName = "null_count"
Expand All @@ -42,7 +42,7 @@ const (
StatsMcv2ColName = "mcv2"
StatsMcv3ColName = "mcv3"
StatsMcv4ColName = "mcv4"
StatsMcvCountsColName = "mcvCounts"
StatsMcvCountsColName = "mcv_counts"
StatsVersionColName = "version"
)

Expand All @@ -52,6 +52,7 @@ const (
StatsIndexTag
StatsPositionTag
StatsVersionTag
StatsPrefixLenTag
StatsCommitHashTag
StatsRowCountTag
StatsDistinctCountTag
Expand All @@ -71,9 +72,9 @@ const (
func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
return sql.PrimaryKeySchema{
Schema: sql.Schema{
&sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
&sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName},
Expand All @@ -88,28 +89,21 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
&sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName},
},
PkOrdinals: []int{0, 1},
}
}

var StatsTableDoltSchema = StatsTableDoltSchemaGen()

func StatsTableDoltSchemaGen() Schema {
colColl := NewColCollection(
NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}),
NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}),
NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}),
NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false),
NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false),
NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false),
Expand Down
4 changes: 4 additions & 0 deletions go/libraries/doltcore/sqle/clusterdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ func (db database) RequestedName() string {
return db.Name()
}

func (db database) AliasedName() string {
return db.Name()
}

type noopRepoStateWriter struct{}

var _ env.RepoStateWriter = noopRepoStateWriter{}
Expand Down
3 changes: 3 additions & 0 deletions go/libraries/doltcore/sqle/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds
if err != nil {
return nil, false, err
}
if branch == "" {
branch = db.Revision()
}
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true
case doltdb.ProceduresTableName:
found = true
Expand Down
6 changes: 3 additions & 3 deletions go/libraries/doltcore/sqle/database_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua
}
}

db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
// preserve original user case in the case of not found
if sql.ErrDatabaseNotFound.Is(err) {
return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName)
Expand Down Expand Up @@ -1507,8 +1507,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b
return "", false, nil
}

// revisionDbForBranch returns a new database that is tied to the branch named by revSpec
func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec
func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
static := staticRepoState{
branch: ref.NewBranchRef(revSpec),
RepoStateWriter: srcDb.DbData().Rsw,
Expand Down
8 changes: 5 additions & 3 deletions go/libraries/doltcore/sqle/dprocedures/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{
{Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag},
{Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints},

{Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)},
{Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)},
{Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)},
{Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)},
{Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)},
{Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)},
{Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)},
{Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)},
{Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)},
{Name: "dolt_stats_sync", Schema: statsFuncSchema, Function: statsFunc(statsBranchSync)},
{Name: "dolt_stats_validate", Schema: statsFuncSchema, Function: statsFunc(statsValidate)},
}

// stringSchema returns a non-nullable schema with all columns as LONGTEXT.
Expand Down
Loading
Loading