protocol/state: replace Outpoint with OutputID

**Problem** Outpoint is a variable-length structure `<txid>:<index>` which is 33-40 bytes long (33 bytes for most transactions). It is used by transaction inputs to identify exact output in the UTXO set ("Assets Merkle Tree"). The tree leafs contain `SHA3(output)` which allows save space and requires transactions to carry redundant copies of spent outputs to perform validation (otherwise nodes would have to store the entire outputs instead of their hashes — over 2x more data, and the ratio is much bigger in protocol 2). Also, for HSM-friendliness the TXSIGHASH must contain a redundant output's hash: `SHA3(txid || input index || SHA3(output))`. **Solution:** We define two new terms: * `OutputID = SHA3(TxHash || OutputIndex)` * `UnspentID = SHA3(OutputID || SHA3(OutputCommitment))` How are these used: 1. Transaction input contains **OutputID** to identify the output being spent. This is a unique identifier of the output. 2. Transaction input uses **second serialization flag** to indicate if it contains the entire previous Output Commitment, or its hash (instead of empty place). 3. UTXO set becomes a **proper set** containing **UnspentIDs** instead of `{Outpoint -> SHA3(OutputCommitment)}`. When a node validates a transaction, it computes `UnspentID` using provided `OutputID` and previous `OutputCommitment`. If the given unspent ID is present in the UTXO set, then previous output is proved to be both authentic and available for spending. **Upsides:** 1. The outputID is constant-size and shorter: 32 bytes instead of 33-40 bytes. This simplifies merkle tree design, transaction data structure and all pieces of software that need to handle outpoints. 2. All outputs (via unspentIDs) in the transaction are randomized across the Assets Merkle Tree instead of being crammed inside a common subpath `<txid>||...`. 3. Inputs automatically commit directly to the spent outputs, so TXSIGHASH does not need to do that and can be simplified to `SHA3(txid || input index)`. HSM is able to verify which output this input commits to without having access to the entire parent transaction. 4. We keep the term _outpoint_ to mean a pair `(txid, index)`, but is internal to Chain Core to support random access to UTXOs. Validation protocol no longer uses outpoints. 5. UTXO takes 2x less RAM because it only contains unpent IDs (32 bytes) instead of a key-value pair (64+ bytes). 6. When we get to _tx entries_ design, we'll generalize the idea of OutputID to EntryID, so that any entry can have a unique identifier. **Downsides:** 1. OutputID no longer indicates the transaction ID which makes it impossible to navigate the chain of transactions without also having a mapping `outpoint -> txid:index`. UTXO tree is not enough as it's only reflecting the latest state of the chain and throws away spent outpoints. Note that in order to navigate the transactions in practice one still needs the mapping `txid -> tx`, so maintaining one more index might not be a significant increase in complexity. Chain is doing this indexing already and we keep that mapping. 2. Chain Core no longer returns (txid,position) pair for annotated txinputs (called `spent_output:{transaction_id:String,position:Int}`), but instead returns output_id (`spent_output_id:String`). To maintain full compatibility, we'd need to make an additional request to locate the previous output's txid and position, but I'm not sure any application actually relies on such historical data. For spending (locating unspents), we fully maintain compatibility with clients using (txid,position) pairs. This is a part of a package of breaking changes in P1: #239 See previous reviews: #417 Closes #421
chain · Jan 26, 2017 · 0536e8a · 0536e8a
1 parent b5d0ff7
commit 0536e8a
Show file tree

Hide file tree

Showing 57 changed files with 564 additions and 431 deletions.
diff --git a/core/account/builder.go b/core/account/builder.go
@@ -93,11 +93,10 @@ func (a *spendAction) Build(ctx context.Context, b *txbuilder.TemplateBuilder) e
 	return nil
 }
 
-func (m *Manager) NewSpendUTXOAction(outpoint bc.Outpoint) txbuilder.Action {
+func (m *Manager) NewSpendUTXOAction(outputID bc.OutputID) txbuilder.Action {
 	return &spendUTXOAction{
 		accounts: m,
-		TxHash:   &outpoint.Hash,
-		TxOut:    &outpoint.Index,
+		OutputID: &outputID,
 	}
 }
 
@@ -109,27 +108,29 @@ func (m *Manager) DecodeSpendUTXOAction(data []byte) (txbuilder.Action, error) {
 
 type spendUTXOAction struct {
 	accounts *Manager
-	TxHash   *bc.Hash `json:"transaction_id"`
-	TxOut    *uint32  `json:"position"`
+	OutputID *bc.OutputID `json:"output_id"`
+	TxHash   *bc.Hash     `json:"transaction_id"`
+	TxOut    *uint32      `json:"position"`
 
 	ReferenceData chainjson.Map `json:"reference_data"`
 	ClientToken   *string       `json:"client_token"`
 }
 
 func (a *spendUTXOAction) Build(ctx context.Context, b *txbuilder.TemplateBuilder) error {
-	var missing []string
-	if a.TxHash == nil {
-		missing = append(missing, "transaction_id")
-	}
-	if a.TxOut == nil {
-		missing = append(missing, "position")
-	}
-	if len(missing) > 0 {
-		return txbuilder.MissingFieldsError(missing...)
+	var outid bc.OutputID
+
+	if a.OutputID != nil {
+		outid = *a.OutputID
+	} else if a.TxHash != nil && a.TxOut != nil {
+		// This is compatibility layer - legacy apps can spend outputs via the raw <txid:index> pair.
+		outid = bc.ComputeOutputID(*a.TxHash, *a.TxOut)
+	} else {
+		// Note: here we do not attempt to check if txid is present, but position is missing, or vice versa.
+		// Instead, the user has to update their code to use the new API anyway.
+		return txbuilder.MissingFieldsError("output_id")
 	}
 
-	out := bc.Outpoint{Hash: *a.TxHash, Index: *a.TxOut}
-	res, err := a.accounts.utxoDB.ReserveUTXO(ctx, out, a.ClientToken, b.MaxTime())
+	res, err := a.accounts.utxoDB.ReserveUTXO(ctx, outid, a.ClientToken, b.MaxTime())
 	if err != nil {
 		return err
 	}
@@ -161,7 +162,7 @@ func utxoToInputs(ctx context.Context, account *signers.Signer, u *utxo, refData
 	*txbuilder.SigningInstruction,
 	error,
 ) {
-	txInput := bc.NewSpendInput(u.Hash, u.Index, nil, u.AssetID, u.Amount, u.ControlProgram, refData)
+	txInput := bc.NewSpendInput(u.OutputID, nil, u.AssetID, u.Amount, u.ControlProgram, refData)
 
 	sigInst := &txbuilder.SigningInstruction{
 		AssetAmount: u.AssetAmount,

diff --git a/core/account/builder_test.go b/core/account/builder_test.go
@@ -59,7 +59,7 @@ func TestAccountSourceReserve(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	wantTxIns := []*bc.TxInput{bc.NewSpendInput(out.Hash, out.Index, nil, out.AssetID, out.Amount, out.ControlProgram, nil)}
+	wantTxIns := []*bc.TxInput{bc.NewSpendInput(out.OutputID, nil, out.AssetID, out.Amount, out.ControlProgram, nil)}
 	if !testutil.DeepEqual(tx.Inputs, wantTxIns) {
 		t.Errorf("build txins\ngot:\n\t%+v\nwant:\n\t%+v", tx.Inputs, wantTxIns)
 	}
@@ -98,7 +98,7 @@ func TestAccountSourceUTXOReserve(t *testing.T) {
 	prottest.MakeBlock(t, c, g.PendingTxs())
 	<-pinStore.PinWaiter(account.PinName, c.Height())
 
-	source := accounts.NewSpendUTXOAction(out.Outpoint)
+	source := accounts.NewSpendUTXOAction(out.OutputID)
 
 	var builder txbuilder.TemplateBuilder
 	err := source.Build(ctx, &builder)
@@ -110,7 +110,7 @@ func TestAccountSourceUTXOReserve(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	wantTxIns := []*bc.TxInput{bc.NewSpendInput(out.Hash, out.Index, nil, out.AssetID, out.Amount, out.ControlProgram, nil)}
+	wantTxIns := []*bc.TxInput{bc.NewSpendInput(out.OutputID, nil, out.AssetID, out.Amount, out.ControlProgram, nil)}
 
 	if !testutil.DeepEqual(tx.Inputs, wantTxIns) {
 		t.Errorf("build txins\ngot:\n\t%+v\nwant:\n\t%+v", tx.Inputs, wantTxIns)

diff --git a/core/account/indexer.go b/core/account/indexer.go
@@ -51,8 +51,14 @@ func (m *Manager) indexAnnotatedAccount(ctx context.Context, a *Account) error {
 	})
 }
 
-type output struct {
+type rawOutput struct {
 	state.Output
+	txHash      bc.Hash
+	outputIndex uint32
+}
+
+type accountOutput struct {
+	rawOutput
 	AccountID string
 	keyIndex  uint64
 }
@@ -66,16 +72,20 @@ func (m *Manager) ProcessBlocks(ctx context.Context) {
 
 func (m *Manager) indexAccountUTXOs(ctx context.Context, b *bc.Block) error {
 	// Upsert any UTXOs belonging to accounts managed by this Core.
-	outs := make([]*state.Output, 0, len(b.Transactions))
+	outs := make([]*rawOutput, 0, len(b.Transactions))
 	blockPositions := make(map[bc.Hash]uint32, len(b.Transactions))
 	for i, tx := range b.Transactions {
 		blockPositions[tx.Hash] = uint32(i)
 		for j, out := range tx.Outputs {
-			stateOutput := &state.Output{
-				TxOutput: *out,
-				Outpoint: bc.Outpoint{Hash: tx.Hash, Index: uint32(j)},
+			out := &rawOutput{
+				Output: state.Output{
+					TxOutput: *out,
+					OutputID: tx.OutputID(uint32(j)),
+				},
+				txHash:      tx.Hash,
+				outputIndex: uint32(j),
 			}
-			outs = append(outs, stateOutput)
+			outs = append(outs, out)
 		}
 	}
 	accOuts, err := m.loadAccountInfo(ctx, outs)
@@ -89,24 +99,23 @@ func (m *Manager) indexAccountUTXOs(ctx context.Context, b *bc.Block) error {
 	}
 
 	// Delete consumed account UTXOs.
-	deltxhash, delindex := prevoutDBKeys(b.Transactions...)
+	delOutputIDs := prevoutDBKeys(b.Transactions...)
 	const delQ = `
 		DELETE FROM account_utxos
-		WHERE (tx_hash, index) IN (SELECT unnest($1::bytea[]), unnest($2::integer[]))
+		WHERE output_id IN (SELECT unnest($1::bytea[]))
 	`
-	_, err = m.db.Exec(ctx, delQ, deltxhash, delindex)
+	_, err = m.db.Exec(ctx, delQ, delOutputIDs)
 	return errors.Wrap(err, "deleting spent account utxos")
 }
 
-func prevoutDBKeys(txs ...*bc.Tx) (txhash pq.ByteaArray, index pg.Uint32s) {
+func prevoutDBKeys(txs ...*bc.Tx) (outputIDs pq.ByteaArray) {
 	for _, tx := range txs {
 		for _, in := range tx.Inputs {
 			if in.IsIssuance() {
 				continue
 			}
-			o := in.Outpoint()
-			txhash = append(txhash, o.Hash[:])
-			index = append(index, o.Index)
+			o := in.SpentOutputID()
+			outputIDs = append(outputIDs, o.Bytes())
 		}
 	}
 	return
@@ -115,8 +124,8 @@ func prevoutDBKeys(txs ...*bc.Tx) (txhash pq.ByteaArray, index pg.Uint32s) {
 // loadAccountInfo turns a set of state.Outputs into a set of
 // outputs by adding account annotations.  Outputs that can't be
 // annotated are excluded from the result.
-func (m *Manager) loadAccountInfo(ctx context.Context, outs []*state.Output) ([]*output, error) {
-	outsByScript := make(map[string][]*state.Output, len(outs))
+func (m *Manager) loadAccountInfo(ctx context.Context, outs []*rawOutput) ([]*accountOutput, error) {
+	outsByScript := make(map[string][]*rawOutput, len(outs))
 	for _, out := range outs {
 		scriptStr := string(out.ControlProgram)
 		outsByScript[scriptStr] = append(outsByScript[scriptStr], out)
@@ -127,7 +136,7 @@ func (m *Manager) loadAccountInfo(ctx context.Context, outs []*state.Output) ([]
 		scripts = append(scripts, []byte(s))
 	}
 
-	result := make([]*output, 0, len(outs))
+	result := make([]*accountOutput, 0, len(outs))
 
 	const q = `
 		SELECT signer_id, key_index, control_program
@@ -136,8 +145,8 @@ func (m *Manager) loadAccountInfo(ctx context.Context, outs []*state.Output) ([]
 	`
 	err := pg.ForQueryRows(ctx, m.db, q, scripts, func(accountID string, keyIndex uint64, program []byte) {
 		for _, out := range outsByScript[string(program)] {
-			newOut := &output{
-				Output:    *out,
+			newOut := &accountOutput{
+				rawOutput: *out,
 				AccountID: accountID,
 				keyIndex:  keyIndex,
 			}
@@ -154,19 +163,23 @@ func (m *Manager) loadAccountInfo(ctx context.Context, outs []*state.Output) ([]
 // upsertConfirmedAccountOutputs records the account data for confirmed utxos.
 // If the account utxo already exists (because it's from a local tx), the
 // block confirmation data will in the row will be updated.
-func (m *Manager) upsertConfirmedAccountOutputs(ctx context.Context, outs []*output, pos map[bc.Hash]uint32, block *bc.Block) error {
+func (m *Manager) upsertConfirmedAccountOutputs(ctx context.Context, outs []*accountOutput, pos map[bc.Hash]uint32, block *bc.Block) error {
 	var (
 		txHash    pq.ByteaArray
 		index     pg.Uint32s
+		outputID  pq.ByteaArray
+		unspentID pq.ByteaArray
 		assetID   pq.ByteaArray
 		amount    pq.Int64Array
 		accountID pq.StringArray
 		cpIndex   pq.Int64Array
 		program   pq.ByteaArray
 	)
 	for _, out := range outs {
-		txHash = append(txHash, out.Outpoint.Hash[:])
-		index = append(index, out.Outpoint.Index)
+		txHash = append(txHash, out.txHash[:])
+		index = append(index, out.outputIndex)
+		outputID = append(outputID, out.OutputID.Bytes())
+		unspentID = append(unspentID, out.UnspentID().Bytes())
 		assetID = append(assetID, out.AssetID[:])
 		amount = append(amount, int64(out.Amount))
 		accountID = append(accountID, out.AccountID)
@@ -175,15 +188,17 @@ func (m *Manager) upsertConfirmedAccountOutputs(ctx context.Context, outs []*out
 	}
 
 	const q = `
-		INSERT INTO account_utxos (tx_hash, index, asset_id, amount, account_id, control_program_index,
+		INSERT INTO account_utxos (tx_hash, index, output_id, unspent_id, asset_id, amount, account_id, control_program_index,
 			control_program, confirmed_in)
-		SELECT unnest($1::bytea[]), unnest($2::bigint[]), unnest($3::bytea[]),  unnest($4::bigint[]),
-			   unnest($5::text[]), unnest($6::bigint[]), unnest($7::bytea[]), $8
+		SELECT unnest($1::bytea[]), unnest($2::bigint[]), unnest($3::bytea[]), unnest($4::bytea[]), unnest($5::bytea[]),  unnest($6::bigint[]),
+			   unnest($7::text[]), unnest($8::bigint[]), unnest($9::bytea[]), $10
 		ON CONFLICT (tx_hash, index) DO NOTHING
 	`
 	_, err := m.db.Exec(ctx, q,
 		txHash,
 		index,
+		outputID,
+		unspentID,
 		assetID,
 		amount,
 		accountID,

diff --git a/core/account/indexer_test.go b/core/account/indexer_test.go
@@ -22,10 +22,14 @@ func TestLoadAccountInfo(t *testing.T) {
 	to1 := bc.NewTxOutput(bc.AssetID{}, 0, acp, nil)
 	to2 := bc.NewTxOutput(bc.AssetID{}, 0, []byte("notfound"), nil)
 
-	outs := []*state.Output{{
-		TxOutput: *to1,
+	outs := []*rawOutput{{
+		Output: state.Output{
+			TxOutput: *to1,
+		},
 	}, {
-		TxOutput: *to2,
+		Output: state.Output{
+			TxOutput: *to2,
+		},
 	}}
 
 	got, err := m.loadAccountInfo(ctx, outs)
@@ -61,7 +65,7 @@ func TestDeleteUTXOs(t *testing.T) {
 	block2 := &bc.Block{Transactions: []*bc.Tx{
 		bc.NewTx(bc.TxData{
 			Inputs: []*bc.TxInput{
-				bc.NewSpendInput(block1.Transactions[0].Hash, 0, nil, assetID, 1, nil, nil),
+				bc.NewSpendInput(bc.ComputeOutputID(block1.Transactions[0].Hash, 0), nil, assetID, 1, nil, nil),
 			},
 		}),
 	}}