Skip to content

Commit

Permalink
Merge pull request #6980 from dolthub/nicktobey/schemamerge
Browse files Browse the repository at this point in the history
Correctly identify and report data conflicts when deleting rows and/or columns during a three-way merge.
  • Loading branch information
nicktobey authored Nov 21, 2023
2 parents dbb5ce3 + 6527d9d commit 9bc32d9
Show file tree
Hide file tree
Showing 15 changed files with 540 additions and 265 deletions.
416 changes: 258 additions & 158 deletions go/libraries/doltcore/merge/merge_prolly_rows.go

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions go/libraries/doltcore/merge/mutable_secondary_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ func GetMutableSecondaryIdxsWithPending(ctx *sql.Context, sch schema.Schema, tab
return nil, err
}
m := durable.ProllyMapFromIndex(idx)

// If the schema has changed, don't reuse the index.
// TODO: This isn't technically required, but correctly handling updating secondary indexes when only some
// of the table's rows have been updated is difficult to get right.
// Dropping the index is potentially slower but guarenteed to be correct.
if !m.KeyDesc().Equals(index.Schema().GetKeyDescriptorWithNoConversion()) {
continue
}

if !m.ValDesc().Equals(index.Schema().GetValueDescriptor()) {
continue
}

if schema.IsKeyless(sch) {
m = prolly.ConvertToSecondaryKeylessIndex(m)
}
Expand Down
10 changes: 0 additions & 10 deletions go/libraries/doltcore/merge/row_merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,6 @@ var testCases = []testCase{
true,
false,
},
{
"dropping a column should be equivalent to setting a column to null",
build(1, 2, 0),
build(2, 1),
build(1, 1, 1),
3, 2, 3,
build(2, 2),
true,
false,
},
// TODO (dhruv): need to fix this test case for new storage format
//{
// "add rows but one holds a new column",
Expand Down
90 changes: 61 additions & 29 deletions go/libraries/doltcore/merge/schema_merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,24 +268,25 @@ var columnAddDropTests = []schemaMergeTest{
skip: true,
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to non-NULL, other drops NULL, plus data change",
ancestor: singleRow(1, 2, nil),
left: singleRow(1, 3),
right: singleRow(1, 2, 3),
dataConflict: true,
skip: true,
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to non-NULL, other drops non-NULL",
ancestor: singleRow(1, 2, 3),
left: singleRow(1, 2),
right: singleRow(1, 2, 4),
dataConflict: true,
skip: true,
},
{
name: "one side drops column, other deletes row",
ancestor: []sql.Row{row(1, 2, 3), row(4, 5, 6)},
left: []sql.Row{row(1, 2), row(4, 5)},
right: []sql.Row{row(1, 2, 3)},
merged: []sql.Row{row(1, 2)},
},
},
},
Expand All @@ -304,54 +305,39 @@ var columnAddDropTests = []schemaMergeTest{
merged: singleRow(1, 3),
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to NULL, other drops non-NULL",
ancestor: singleRow(1, 2, 3),
left: singleRow(1, 3),
right: singleRow(1, nil, 3),
dataConflict: true,
skip: true,
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to NULL, other drops non-NULL, plus data change",
ancestor: singleRow(1, 2, 4),
left: singleRow(1, 3),
right: singleRow(1, nil, 4),
dataConflict: true,
skip: true,
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to non-NULL, other drops NULL, plus data change",
ancestor: singleRow(1, nil, 3),
left: singleRow(1, 3),
right: singleRow(1, 2, 3),
dataConflict: true,
skip: true,
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to non-NULL, other drops NULL, plus data change",
ancestor: singleRow(1, nil, 3),
left: singleRow(1, 4),
right: singleRow(1, 2, 3),
dataConflict: true,
skip: true,
},
{
// Skipped because the differ currently doesn't try to merge the dropped column.
// (https://github.com/dolthub/dolt/issues/6747)
name: "one side sets to non-NULL, other drops non-NULL",
ancestor: singleRow(1, 2, 3),
left: singleRow(1, 3),
right: singleRow(1, 4, 3),
dataConflict: true,
skip: true,
},
},
},
Expand Down Expand Up @@ -547,6 +533,29 @@ var columnAddDropTests = []schemaMergeTest{
skipNewFmt: true,
skipOldFmt: true,
},
{
name: "right side drops and adds column of same type",
ancestor: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, a int)")),
left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, a int)")),
right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, b int)")),
merged: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, b int)")),
dataTests: []dataTest{
{
name: "left side modifies dropped column",
ancestor: singleRow(1, 1, 2),
left: singleRow(1, 1, 3),
right: singleRow(1, 2, 2),
dataConflict: true,
},
},
},
{
name: "right side drops and adds column of different type",
ancestor: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, a int)")),
left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, a int)")),
right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, b text)")),
merged: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, c int, b text)")),
},
}

var columnDefaultTests = []schemaMergeTest{
Expand Down Expand Up @@ -735,6 +744,27 @@ var typeChangeTests = []schemaMergeTest{
right: singleRow(1, "hello world", 1, "hello world"),
dataConflict: true,
},
{
name: "delete and schema change on left",
ancestor: singleRow(1, "test", 1, "test"),
left: nil,
right: singleRow(1, "test", 1, "test"),
merged: nil,
},
{
name: "schema change on left, delete on right",
ancestor: singleRow(1, "test", 1, "test"),
left: singleRow(1, "test", 1, "test"),
right: nil,
merged: nil,
},
{
name: "schema and value change on left, delete on right",
ancestor: singleRow(1, "test", 1, "test"),
left: singleRow(1, "hello", 1, "hello"),
right: nil,
dataConflict: true,
},
},
},
}
Expand Down Expand Up @@ -952,7 +982,7 @@ func testSchemaMergeHelper(t *testing.T, tests []schemaMergeTest, flipSides bool
require.NoError(t, err)
foundDataConflict = foundDataConflict || hasConflict
}
require.Equal(t, expectDataConflict, foundDataConflict)
require.True(t, foundDataConflict, "Expected data conflict, but didn't find one.")
} else {
for name, addr := range exp {
a, ok := act[name]
Expand All @@ -978,17 +1008,19 @@ func testSchemaMergeHelper(t *testing.T, tests []schemaMergeTest, flipSides bool
runTest(t, test, false)
})
for _, data := range test.dataTests {
test.ancestor.rows = data.ancestor
test.left.rows = data.left
test.right.rows = data.right
test.merged.rows = data.merged
test.skipNewFmt = test.skipNewFmt || data.skip
test.skipFlipOnNewFormat = test.skipFlipOnNewFormat || data.skipFlip
// Copy the test so that the values from one data test don't affect subsequent data tests.
dataDest := test
dataDest.ancestor.rows = data.ancestor
dataDest.left.rows = data.left
dataDest.right.rows = data.right
dataDest.merged.rows = data.merged
dataDest.skipNewFmt = dataDest.skipNewFmt || data.skip
dataDest.skipFlipOnNewFormat = dataDest.skipFlipOnNewFormat || data.skipFlip
t.Run(data.name, func(t *testing.T) {
if data.skip {
t.Skip()
}
runTest(t, test, data.dataConflict)
runTest(t, dataDest, data.dataConflict)
})
}
})
Expand Down
7 changes: 7 additions & 0 deletions go/libraries/doltcore/schema/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,15 @@ type Schema interface {
GetMapDescriptors() (keyDesc, valueDesc val.TupleDesc)

// GetKeyDescriptor returns the key tuple descriptor for this schema.
// If a column has a type that can't appear in a key (such as "address" columns),
// that column will get converted to equivalent types that can. (Example: text -> varchar)
GetKeyDescriptor() val.TupleDesc

// GetKeyDescriptorWithNoConversion returns the a descriptor for the columns used in the key.
// Unlike `GetKeyDescriptor`, it doesn't attempt to convert columns if they can't appear in a key,
// and returns them as they are.
GetKeyDescriptorWithNoConversion() val.TupleDesc

// GetValueDescriptor returns the value tuple descriptor for this schema.
GetValueDescriptor() val.TupleDesc

Expand Down
15 changes: 12 additions & 3 deletions go/libraries/doltcore/schema/schema_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,15 @@ func (si *schemaImpl) GetMapDescriptors() (keyDesc, valueDesc val.TupleDesc) {

// GetKeyDescriptor implements the Schema interface.
func (si *schemaImpl) GetKeyDescriptor() val.TupleDesc {
return si.getKeyColumnsDescriptor(true)
}

// GetKeyDescriptorWithNoConversion implements the Schema interface.
func (si *schemaImpl) GetKeyDescriptorWithNoConversion() val.TupleDesc {
return si.getKeyColumnsDescriptor(false)
}

func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.TupleDesc {
if IsKeyless(si) {
return val.KeylessTupleDesc
}
Expand All @@ -420,17 +429,17 @@ func (si *schemaImpl) GetKeyDescriptor() val.TupleDesc {
sqlType := col.TypeInfo.ToSqlType()
queryType := sqlType.Type()
var t val.Type
if queryType == query.Type_BLOB {
if convertAddressColumns && queryType == query.Type_BLOB {
t = val.Type{
Enc: val.Encoding(EncodingFromSqlType(query.Type_VARBINARY)),
Nullable: columnMissingNotNullConstraint(col),
}
} else if queryType == query.Type_TEXT {
} else if convertAddressColumns && queryType == query.Type_TEXT {
t = val.Type{
Enc: val.Encoding(EncodingFromSqlType(query.Type_VARCHAR)),
Nullable: columnMissingNotNullConstraint(col),
}
} else if queryType == query.Type_GEOMETRY {
} else if convertAddressColumns && queryType == query.Type_GEOMETRY {
t = val.Type{
Enc: val.Encoding(serial.EncodingCell),
Nullable: columnMissingNotNullConstraint(col),
Expand Down
Loading

0 comments on commit 9bc32d9

Please sign in to comment.