Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin_public/main' into partial_restor…
Browse files Browse the repository at this point in the history
…ation_fixes
  • Loading branch information
wwoytenko committed Mar 15, 2024
2 parents 8040c38 + e549611 commit 52a675e
Show file tree
Hide file tree
Showing 33 changed files with 1,487 additions and 838 deletions.
4 changes: 2 additions & 2 deletions cmd/greenmask/cmd/dump/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"

"github.com/greenmaskio/greenmask/internal/db/postgres"
cmdInternals "github.com/greenmaskio/greenmask/internal/db/postgres/cmd"
"github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils"
pgDomains "github.com/greenmaskio/greenmask/internal/domains"
"github.com/greenmaskio/greenmask/internal/storages/builder"
Expand Down Expand Up @@ -52,7 +52,7 @@ var (
log.Fatal().Msg("common.tmp_dir cannot be empty")
}

dump := postgres.NewDump(Config, st, utils.DefaultTransformerRegistry)
dump := cmdInternals.NewDump(Config, st, utils.DefaultTransformerRegistry)

if err := dump.Run(ctx); err != nil {
log.Fatal().Err(err).Msg("cannot make a backup")
Expand Down
4 changes: 2 additions & 2 deletions cmd/greenmask/cmd/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"

"github.com/greenmaskio/greenmask/internal/db/postgres"
cmdInternals "github.com/greenmaskio/greenmask/internal/db/postgres/cmd"
pgDomains "github.com/greenmaskio/greenmask/internal/domains"
"github.com/greenmaskio/greenmask/internal/storages/builder"
"github.com/greenmaskio/greenmask/internal/utils/logger"
Expand Down Expand Up @@ -56,7 +56,7 @@ var (

st = st.SubStorage(dumpId, true)

restore := postgres.NewRestore(
restore := cmdInternals.NewRestore(
Config.Common.PgBinPath, st, &Config.Restore.PgRestoreOptions, Config.Restore.Scripts,
Config.Common.TempDirectory,
)
Expand Down
4 changes: 2 additions & 2 deletions cmd/greenmask/cmd/show_dump/show_dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"

"github.com/greenmaskio/greenmask/internal/db/postgres"
cmdInternals "github.com/greenmaskio/greenmask/internal/db/postgres/cmd"
pgDomains "github.com/greenmaskio/greenmask/internal/domains"
"github.com/greenmaskio/greenmask/internal/storages/builder"
"github.com/greenmaskio/greenmask/internal/utils/logger"
Expand Down Expand Up @@ -87,7 +87,7 @@ var (
}
}

if err := postgres.ShowDump(ctx, st, dumpId, format); err != nil {
if err := cmdInternals.ShowDump(ctx, st, dumpId, format); err != nil {
log.Fatal().Err(err).Msg("")
}
},
Expand Down
99 changes: 71 additions & 28 deletions cmd/greenmask/cmd/validate/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"

"github.com/greenmaskio/greenmask/internal/db/postgres"
cmdInternals "github.com/greenmaskio/greenmask/internal/db/postgres/cmd"
"github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils"
"github.com/greenmaskio/greenmask/internal/domains"
"github.com/greenmaskio/greenmask/internal/utils/logger"
Expand All @@ -31,40 +31,56 @@ var (
Cmd = &cobra.Command{
Use: "validate",
Short: "perform validation procedure and data diff of transformation",
Run: func(cmd *cobra.Command, args []string) {
if err := logger.SetLogLevel(Config.Log.Level, Config.Log.Format); err != nil {
log.Err(err).Msg("")
}
Run: run,
}
Config = domains.NewConfig()
)

if Config.Common.TempDirectory == "" {
log.Fatal().Msg("common.tmp_dir cannot be empty")
}
func run(cmd *cobra.Command, args []string) {
if err := logger.SetLogLevel(Config.Log.Level, Config.Log.Format); err != nil {
log.Err(err).Msg("")
}

if Config.Validate.RowsLimit == 0 {
log.Fatal().Msgf("--rows-limit must be greater than 0 got %d", Config.Validate.RowsLimit)
}
if Config.Common.TempDirectory == "" {
log.Fatal().Msg("common.tmp_dir cannot be empty")
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if Config.Validate.RowsLimit <= 0 {
log.Fatal().
Msgf("--rows-limit must be greater than 0 got %d", Config.Validate.RowsLimit)
}

validate, err := postgres.NewValidate(Config, utils.DefaultTransformerRegistry)
if err != nil {
log.Fatal().Err(err).Msg("")
}
if Config.Validate.Format != cmdInternals.JsonFormat &&
Config.Validate.Format != cmdInternals.TextFormat {
log.Fatal().
Str("RequestedFormat", Config.Validate.Format).
Msg("unknown --format value")
}

if err := validate.Run(ctx); err != nil {
log.Fatal().Err(err).Msg("")
}
if Config.Validate.TableFormat != cmdInternals.VerticalTableFormat &&
Config.Validate.TableFormat != cmdInternals.HorizontalTableFormat {
log.Fatal().
Str("RequestedTableFormat", Config.Validate.TableFormat).
Msg("unknown --table-format value")
}

},
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

validate, err := cmdInternals.NewValidate(Config, utils.DefaultTransformerRegistry)
if err != nil {
log.Fatal().Err(err).Msg("")
}
Config = domains.NewConfig()
)

if err := validate.Run(ctx); err != nil {
log.Fatal().Err(err).Msg("")
}
}

func init() {
tableFlagName := "table"
Cmd.Flags().StringSlice(
tableFlagName, nil, "check tables dump only for specific tables",
tableFlagName, nil, "Check tables dump only for specific tables",
)
flag := Cmd.Flags().Lookup(tableFlagName)
if err := viper.BindPFlag("validate.tables", flag); err != nil {
Expand All @@ -73,7 +89,7 @@ func init() {

dataFlagName := "data"
Cmd.Flags().Bool(
dataFlagName, false, "perform test dump for --rows-limit rows and print it pretty",
dataFlagName, false, "Perform test dump for --rows-limit rows and print it pretty",
)
flag = Cmd.Flags().Lookup(dataFlagName)
if err := viper.BindPFlag("validate.data", flag); err != nil {
Expand All @@ -82,7 +98,7 @@ func init() {

rowsLimitFlagName := "rows-limit"
Cmd.Flags().Uint64(
rowsLimitFlagName, 10, "check tables dump only for specific tables",
rowsLimitFlagName, 10, "Check tables dump only for specific tables",
)
flag = Cmd.Flags().Lookup(rowsLimitFlagName)
if err := viper.BindPFlag("validate.rows_limit", flag); err != nil {
Expand All @@ -91,7 +107,7 @@ func init() {

diffFlagName := "diff"
Cmd.Flags().Bool(
diffFlagName, false, "find difference between original and transformed data",
diffFlagName, false, "Find difference between original and transformed data",
)
flag = Cmd.Flags().Lookup(diffFlagName)
if err := viper.BindPFlag("validate.diff", flag); err != nil {
Expand All @@ -100,11 +116,38 @@ func init() {

formatFlagName := "format"
Cmd.Flags().String(
formatFlagName, "horizontal", "format of table output. possible values [horizontal|vertical]",
formatFlagName, "text", "Format of output. possible values [text|json]",
)
flag = Cmd.Flags().Lookup(formatFlagName)
if err := viper.BindPFlag("validate.format", flag); err != nil {
log.Fatal().Err(err).Msg("fatal")
}

tableFormatFlagName := "table-format"
Cmd.Flags().String(
tableFormatFlagName, cmdInternals.VerticalTableFormat, "Format of table output (only for --format=text). Possible values [vertical|horizontal]",
)
flag = Cmd.Flags().Lookup(tableFormatFlagName)
if err := viper.BindPFlag("validate.table_format", flag); err != nil {
log.Fatal().Err(err).Msg("fatal")
}

onlyTransformedFlagName := "transformed-only"
Cmd.Flags().Bool(
onlyTransformedFlagName, false, "Print only transformed column and primary key",
)
flag = Cmd.Flags().Lookup(onlyTransformedFlagName)
if err := viper.BindPFlag("validate.transformed_only", flag); err != nil {
log.Fatal().Err(err).Msg("fatal")
}

warningsFlagName := "warnings"
Cmd.Flags().Bool(
warningsFlagName, false, "Print warnings",
)
flag = Cmd.Flags().Lookup(warningsFlagName)
if err := viper.BindPFlag("validate.warnings", flag); err != nil {
log.Fatal().Err(err).Msg("fatal")
}

}
15 changes: 11 additions & 4 deletions docs/built_in_transformers/standard_transformers/hash.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@ Generate a hash of the text value using the `Scrypt` hash function under the hoo
| Name | Description | Default | Required | Supported DB types |
|------------|---------------------------------------------------------------------------------------------------------------------------------------|---------|----------|--------------------|
| column | The name of the column to be affected | | Yes | text, varchar |
| function | Hash algorithm to obfuscate data. Can be any of `md5`, `sha1`, `sha256`, `sha512`. | `sha1` | No | - |
| salt | Hex encoded salt string. This value may be provided via environment variable `GREENMASK_GLOBAL_SALT` | | Yes | text, varchar |
| function | Hash algorithm to obfuscate data. Can be any of `md5`, `sha1`, `sha256`, `sha512`, `sha3-224`, `sha3-254`, `sha3-384`, `sha3-512`. | `sha1` | No | - |
| max_length | Indicates whether to truncate the hash tail and specifies at what length. Can be any integer number, where `0` means "no truncation". | `0` | No | - |

## Example: Generate hash from job title

The following example generates a hash from the `jobtitle` into sha1 and truncates the results after the 10th character.

We can set the salt via the environment variable `GREENMASK_GLOBAL_SALT`:

```shell
export GREENMASK_GLOBAL_SALT="12343567baaa"
```

```yaml title="Hash transformer example"
- schema: "humanresources"
name: "employee"
Expand All @@ -25,8 +32,8 @@ The following example generates a hash from the `jobtitle` into sha1 and truncat
```bash title="Expected result"

| column name | original value | transformed |
|-------------|--------------------------|-------------|
| jobtitle | Research and Development | Zpmfe8F+LV |
| column name | original value | transformed |
|-------------|----------------------------------|-------------|
| jobtitle | Research and Development Manager | 3a456da5c5 |

```
30 changes: 15 additions & 15 deletions internal/db/postgres/dump.go → internal/db/postgres/cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package postgres
package cmd

import (
"bytes"
Expand All @@ -29,8 +29,8 @@ import (
"golang.org/x/sync/errgroup"

runtimeContext "github.com/greenmaskio/greenmask/internal/db/postgres/context"
"github.com/greenmaskio/greenmask/internal/db/postgres/dump"
"github.com/greenmaskio/greenmask/internal/db/postgres/dumpers"
"github.com/greenmaskio/greenmask/internal/db/postgres/entries"
"github.com/greenmaskio/greenmask/internal/db/postgres/pgdump"
storageDto "github.com/greenmaskio/greenmask/internal/db/postgres/storage"
"github.com/greenmaskio/greenmask/internal/db/postgres/toc"
Expand Down Expand Up @@ -59,7 +59,7 @@ type Dump struct {
dumpedObjectSizes map[int32]storageDto.ObjectSizeStat
tocFileSize int64
version int
blobs *dump.Blobs
blobs *entries.Blobs
// validate shows that dump worker must be in validation mode
validate bool
}
Expand Down Expand Up @@ -221,7 +221,7 @@ func (d *Dump) schemaOnlyDump(ctx context.Context, tx pgx.Tx) error {
func (d *Dump) dataDump(ctx context.Context) error {
// TODO: You should use pointer to dumpers.DumpTask instead
tasks := make(chan dumpers.DumpTask, d.pgDumpOptions.Jobs)
result := make(chan dump.Entry, d.pgDumpOptions.Jobs)
result := make(chan entries.Entry, d.pgDumpOptions.Jobs)

log.Debug().Msgf("planned %d workers", d.pgDumpOptions.Jobs)
eg, gtx := errgroup.WithContext(ctx)
Expand Down Expand Up @@ -256,11 +256,11 @@ func (d *Dump) dataDump(ctx context.Context) error {
dumpObj.SetDumpId(d.dumpIdSequence)
var task dumpers.DumpTask
switch v := dumpObj.(type) {
case *dump.Table:
task = dumpers.NewTableDumper(v, d.validate, d.config.Validate.Diff)
case *dump.Sequence:
case *entries.Table:
task = dumpers.NewTableDumper(v, d.validate)
case *entries.Sequence:
task = dumpers.NewSequenceDumper(v)
case *dump.Blobs:
case *entries.Blobs:
d.blobs = v
task = dumpers.NewLargeObjectDumper(v)
default:
Expand All @@ -280,7 +280,7 @@ func (d *Dump) dataDump(ctx context.Context) error {
func() error {
var tables, sequences, largeObjects []*toc.Entry
for {
var entry dump.Entry
var entry entries.Entry
var ok bool
select {
case <-gtx.Done():
Expand All @@ -299,15 +299,15 @@ func (d *Dump) dataDump(ctx context.Context) error {
return fmt.Errorf("error producing toc entry: %w", err)
}
switch v := entry.(type) {
case *dump.Table:
case *entries.Table:
d.dumpedObjectSizes[e.DumpId] = storageDto.ObjectSizeStat{
Original: v.OriginalSize,
Compressed: v.CompressedSize,
}
tables = append(tables, e)
case *dump.Sequence:
case *entries.Sequence:
sequences = append(sequences, e)
case *dump.Blobs:
case *entries.Blobs:
d.dumpedObjectSizes[e.DumpId] = storageDto.ObjectSizeStat{
Original: v.OriginalSize,
Compressed: v.CompressedSize,
Expand Down Expand Up @@ -506,7 +506,7 @@ func (d *Dump) getWorkerTransaction(ctx context.Context) (*pgx.Conn, pgx.Tx, err
}

func (d *Dump) dumpWorker(
ctx context.Context, tasks <-chan dumpers.DumpTask, result chan<- dump.Entry, id int,
ctx context.Context, tasks <-chan dumpers.DumpTask, result chan<- entries.Entry, id int,
) error {

conn, tx, err := d.getWorkerTransaction(ctx)
Expand Down Expand Up @@ -574,7 +574,7 @@ func (d *Dump) dumpWorker(
}

func (d *Dump) validateDumpWorker(
ctx context.Context, tasks <-chan dumpers.DumpTask, result chan<- dump.Entry, id int,
ctx context.Context, tasks <-chan dumpers.DumpTask, result chan<- entries.Entry, id int,
) error {
for {

Expand All @@ -601,7 +601,7 @@ func (d *Dump) validateDumpWorker(
Str("ObjectName", task.DebugInfo()).
Msgf("dumping started")

entry, err := func() (dump.Entry, error) {
entry, err := func() (entries.Entry, error) {
conn, tx, err := d.getWorkerTransaction(ctx)

if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package postgres
package cmd

import (
"bufio"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package postgres
package cmd

import (
"context"
Expand All @@ -38,12 +38,12 @@ const (
)

var templateString = `;
; Archive created at {{ .Header.CreationDate.Format "2006-01-02 15:04:05 UTC" }}
; Archive created at {{ .Header.CreationDate.TableFormat "2006-01-02 15:04:05 UTC" }}
; dbname: {{ .Header.DbName }}
; TOC Entries: {{ .Header.TocEntriesCount }}
; Compression: {{ .Header.Compression }}
; Dump Version: {{ .Header.DumpVersion }}
; Format: DIRECTORY
; TableFormat: DIRECTORY
; Integer: {{ .Header.Integer }} bytes
; Offset: {{ .Header.Offset }} bytes
; Dumped from database version: {{ .Header.DumpedFrom }}
Expand Down
Loading

0 comments on commit 52a675e

Please sign in to comment.