From 1144c7f53159d41a794f9a39c8577bea90969b54 Mon Sep 17 00:00:00 2001
From: Maximilian Hoffman <max@dolthub.com>
Date: Wed, 30 Oct 2024 13:59:43 -0700
Subject: [PATCH] Fix panics on schema changes (#8505)

* Fix panics on schema changes

* more time for stats auto-update test

* debug output

* more slep

* more sleep

* maybe null hash

* revert test
---
 .../doltcore/sqle/enginetest/stats_queries.go | 65 +++++++++++++++++++
 .../doltcore/sqle/statsnoms/database.go       | 53 +++++++++++----
 .../doltcore/sqle/statspro/analyze.go         | 20 ++++++
 .../doltcore/sqle/statspro/auto_refresh.go    | 24 ++++++-
 .../doltcore/sqle/statspro/interface.go       |  6 +-
 integration-tests/bats/stats.bats             |  2 +-
 6 files changed, 151 insertions(+), 19 deletions(-)

diff --git a/go/libraries/doltcore/sqle/enginetest/stats_queries.go b/go/libraries/doltcore/sqle/enginetest/stats_queries.go
index 4ddee622d37..a844607e71b 100644
--- a/go/libraries/doltcore/sqle/enginetest/stats_queries.go
+++ b/go/libraries/doltcore/sqle/enginetest/stats_queries.go
@@ -536,6 +536,71 @@ var DoltStatsIOTests = []queries.ScriptTest{
 			},
 		},
 	},
+	{
+		// https://github.com/dolthub/dolt/issues/8504
+		Name: "alter index column type",
+		SetUpScript: []string{
+			"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
+			"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
+			"CREATE table xy (x bigint primary key, y varchar(16))",
+			"insert into xy values (0,'0'), (1,'1'), (2,'2')",
+			"analyze table xy",
+		},
+		Assertions: []queries.ScriptTestAssertion{
+			{
+				Query:    "select count(*) from dolt_statistics group by table_name, index_name",
+				Expected: []sql.Row{{1}},
+			},
+			{
+				Query: "alter table xy modify column x varchar(16);",
+			},
+			{
+				Query: "insert into xy values ('3', '3')",
+			},
+			{
+				Query: "call dolt_stats_restart()",
+			},
+			{
+				Query: "select sleep(.2)",
+			},
+			{
+				Query:    "select count(*) from dolt_statistics group by table_name, index_name",
+				Expected: []sql.Row{{1}},
+			},
+		},
+	},
+	{
+		Name: "drop primary key",
+		SetUpScript: []string{
+			"set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
+			"set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
+			"CREATE table xy (x bigint primary key, y varchar(16))",
+			"insert into xy values (0,'0'), (1,'1'), (2,'2')",
+			"analyze table xy",
+		},
+		Assertions: []queries.ScriptTestAssertion{
+			{
+				Query:    "select count(*) from dolt_statistics group by table_name, index_name",
+				Expected: []sql.Row{{1}},
+			},
+			{
+				Query: "alter table xy drop primary key",
+			},
+			{
+				Query: "insert into xy values ('3', '3')",
+			},
+			{
+				Query: "call dolt_stats_restart()",
+			},
+			{
+				Query: "select sleep(.2)",
+			},
+			{
+				Query:    "select count(*) from dolt_statistics group by table_name, index_name",
+				Expected: []sql.Row{},
+			},
+		},
+	},
 }
 
 var StatBranchTests = []queries.ScriptTest{
diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go
index 47b9029d467..2c1491e18fa 100644
--- a/go/libraries/doltcore/sqle/statsnoms/database.go
+++ b/go/libraries/doltcore/sqle/statsnoms/database.go
@@ -118,13 +118,14 @@ func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase {
 type dbStats map[sql.StatQualifier]*statspro.DoltStats
 
 type NomsStatsDatabase struct {
-	mu               *sync.Mutex
-	destDb           dsess.SqlDatabase
-	sourceDb         dsess.SqlDatabase
-	stats            []dbStats
-	branches         []string
-	latestTableRoots []map[string]hash.Hash
-	dirty            []*prolly.MutableMap
+	mu           *sync.Mutex
+	destDb       dsess.SqlDatabase
+	sourceDb     dsess.SqlDatabase
+	stats        []dbStats
+	branches     []string
+	tableHashes  []map[string]hash.Hash
+	schemaHashes []map[string]hash.Hash
+	dirty        []*prolly.MutableMap
 }
 
 var _ statspro.Database = (*NomsStatsDatabase)(nil)
@@ -158,7 +159,8 @@ func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) err
 	n.branches = append(n.branches, branch)
 	n.stats = append(n.stats, doltStats)
 	n.dirty = append(n.dirty, nil)
-	n.latestTableRoots = append(n.latestTableRoots, make(map[string]hash.Hash))
+	n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
+	n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
 	return nil
 }
 
@@ -223,7 +225,8 @@ func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql
 func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error {
 	n.branches = append(n.branches, branch)
 	n.stats = append(n.stats, make(dbStats))
-	n.latestTableRoots = append(n.latestTableRoots, make(map[string]hash.Hash))
+	n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
+	n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
 
 	kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
 	newMap, err := prolly.NewMapFromTuples(ctx, n.destDb.DbData().Ddb.NodeStore(), kd, vd)
@@ -268,7 +271,7 @@ func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, f
 			n.branches = append(n.branches[:i], n.branches[i+1:]...)
 			n.dirty = append(n.dirty[:i], n.dirty[i+1:]...)
 			n.stats = append(n.stats[:i], n.stats[i+1:]...)
-			n.latestTableRoots = append(n.latestTableRoots[:i], n.latestTableRoots[i+1:]...)
+			n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...)
 		}
 	}
 	if flush {
@@ -339,23 +342,45 @@ func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error {
 	return nil
 }
 
-func (n *NomsStatsDatabase) GetLatestHash(branch, tableName string) hash.Hash {
+func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash {
 	n.mu.Lock()
 	defer n.mu.Unlock()
 	for i, b := range n.branches {
 		if strings.EqualFold(branch, b) {
-			return n.latestTableRoots[i][tableName]
+			return n.tableHashes[i][tableName]
 		}
 	}
 	return hash.Hash{}
 }
 
-func (n *NomsStatsDatabase) SetLatestHash(branch, tableName string, h hash.Hash) {
+func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) {
 	n.mu.Lock()
 	defer n.mu.Unlock()
 	for i, b := range n.branches {
 		if strings.EqualFold(branch, b) {
-			n.latestTableRoots[i][tableName] = h
+			n.tableHashes[i][tableName] = h
+			break
+		}
+	}
+}
+
+func (n *NomsStatsDatabase) GetSchemaHash(branch, tableName string) hash.Hash {
+	n.mu.Lock()
+	defer n.mu.Unlock()
+	for i, b := range n.branches {
+		if strings.EqualFold(branch, b) {
+			return n.schemaHashes[i][tableName]
+		}
+	}
+	return hash.Hash{}
+}
+
+func (n *NomsStatsDatabase) SetSchemaHash(branch, tableName string, h hash.Hash) {
+	n.mu.Lock()
+	defer n.mu.Unlock()
+	for i, b := range n.branches {
+		if strings.EqualFold(branch, b) {
+			n.schemaHashes[i][tableName] = h
 			break
 		}
 	}
diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go
index 7da369e13a9..ecd470b0d42 100644
--- a/go/libraries/doltcore/sqle/statspro/analyze.go
+++ b/go/libraries/doltcore/sqle/statspro/analyze.go
@@ -135,6 +135,26 @@ func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table
 		p.setStatDb(dbName, statDb)
 	}
 
+	schHash, err := dTab.GetSchemaHash(ctx)
+	if err != nil {
+		return err
+	}
+
+	if oldSchHash := statDb.GetSchemaHash(branch, tableName); oldSchHash.IsEmpty() {
+		statDb.SetSchemaHash(branch, tableName, schHash)
+	} else if oldSchHash != schHash {
+		ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
+		statDb.SetSchemaHash(branch, tableName, schHash)
+
+		stats, err := p.GetTableDoltStats(ctx, branch, dbName, tableName)
+		if err != nil {
+			return err
+		}
+		for _, stat := range stats {
+			statDb.DeleteStats(ctx, branch, stat.Qualifier())
+		}
+	}
+
 	tablePrefix := fmt.Sprintf("%s.", tableName)
 	var idxMetas []indexMeta
 	for _, idx := range indexes {
diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go
index 7e61b1ded1e..6bc92380fae 100644
--- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go
+++ b/go/libraries/doltcore/sqle/statspro/auto_refresh.go
@@ -149,7 +149,7 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br
 			return err
 		}
 
-		if statDb.GetLatestHash(branch, table) == tableHash {
+		if statDb.GetTableHash(branch, table) == tableHash {
 			// no data changes since last check
 			tableExistsAndSkipped[table] = true
 			ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash)
@@ -158,6 +158,26 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br
 			ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash)
 		}
 
+		schHash, err := dTab.GetSchemaHash(ctx)
+		if err != nil {
+			return err
+		}
+
+		if oldSchHash := statDb.GetSchemaHash(branch, table); oldSchHash.IsEmpty() {
+			statDb.SetSchemaHash(branch, table, schHash)
+		} else if oldSchHash != schHash {
+			ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
+			statDb.SetSchemaHash(branch, table, schHash)
+
+			stats, err := p.GetTableDoltStats(ctx, branch, dbName, table)
+			if err != nil {
+				return err
+			}
+			for _, stat := range stats {
+				statDb.DeleteStats(ctx, branch, stat.Qualifier())
+			}
+		}
+
 		iat, ok := sqlTable.(sql.IndexAddressableTable)
 		if !ok {
 			return fmt.Errorf("table does not support indexes %s", table)
@@ -205,7 +225,7 @@ func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, br
 				// mark index for updating
 				idxMetas = append(idxMetas, updateMeta)
 				// update latest hash if we haven't already
-				statDb.SetLatestHash(branch, table, tableHash)
+				statDb.SetTableHash(branch, table, tableHash)
 			}
 		}
 
diff --git a/go/libraries/doltcore/sqle/statspro/interface.go b/go/libraries/doltcore/sqle/statspro/interface.go
index d8bf5d4894c..987de042069 100644
--- a/go/libraries/doltcore/sqle/statspro/interface.go
+++ b/go/libraries/doltcore/sqle/statspro/interface.go
@@ -50,8 +50,10 @@ type Database interface {
 	Flush(ctx context.Context, branch string) error
 	// Close finalizes any file references.
 	Close() error
-	SetLatestHash(branch, tableName string, h hash.Hash)
-	GetLatestHash(branch, tableName string) hash.Hash
+	SetTableHash(branch, tableName string, h hash.Hash)
+	GetTableHash(branch, tableName string) hash.Hash
+	SetSchemaHash(branch, tableName string, h hash.Hash)
+	GetSchemaHash(branch, tableName string) hash.Hash
 	Branches() []string
 }
 
diff --git a/integration-tests/bats/stats.bats b/integration-tests/bats/stats.bats
index 90efd42c06e..0138ccf2112 100644
--- a/integration-tests/bats/stats.bats
+++ b/integration-tests/bats/stats.bats
@@ -119,7 +119,7 @@ teardown() {
     [ "${lines[1]}" = "0" ]
 
     start_sql_server
-    dolt sql -q "insert into xy values (0,0), (1,1)"
+    run dolt sql -q "insert into xy values (0,0), (1,1)"
     sleep 1
     stop_sql_server