Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use multi-core optimized pgzip package in tarball compressor #98

Merged
merged 4 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 121 additions & 26 deletions fileutil/tarball_compressor.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
package fileutil

import (
"os"
"io"
"fmt"
"runtime"
"io/fs"
"strings"
"path/filepath"

"archive/tar"
"github.com/klauspost/pgzip"

bosherr "github.com/cloudfoundry/bosh-utils/errors"
boshsys "github.com/cloudfoundry/bosh-utils/system"
)

type tarballCompressor struct {
cmdRunner boshsys.CmdRunner
fs boshsys.FileSystem
}

func NewTarballCompressor(
cmdRunner boshsys.CmdRunner,
fs boshsys.FileSystem,
) Compressor {
return tarballCompressor{cmdRunner: cmdRunner, fs: fs}
return tarballCompressor{fs: fs}
}

func (c tarballCompressor) CompressFilesInDir(dir string) (string, error) {
Expand All @@ -32,44 +37,134 @@ func (c tarballCompressor) CompressSpecificFilesInDir(dir string, files []string

defer tarball.Close()

tarballPath := tarball.Name()
zw := pgzip.NewWriter(tarball)
tw := tar.NewWriter(zw)

for _, file := range files {
err = c.fs.Walk(filepath.Join(dir, file), func(f string, fi os.FileInfo, err error) error {
if err != nil {
return err
}

if filepath.Base(f) == ".DS_Store" {
return nil
}

header, err := tar.FileInfoHeader(fi, f)
if err != nil {
return bosherr.WrapError(err, "Reading tar header")
}

relPath, err := filepath.Rel(dir, filepath.ToSlash(f))
if err != nil {
return bosherr.WrapError(err, "Resovling relative tar path")
}
header.Name = relPath

if err := tw.WriteHeader(header); err != nil {
return bosherr.WrapError(err, "Writing tar header")
}

if fi.Mode().IsRegular() {
data, err := c.fs.OpenFile(f, os.O_RDONLY, 0)
if err != nil {
return bosherr.WrapError(err, "Reading tar source file")
}
defer data.Close()

if _, err := io.Copy(tw, data); err != nil {
return bosherr.WrapError(err, "Copying data into tar")
}
}
return nil
})
}

args := []string{"-czf", tarballPath, "-C", dir}
if runtime.GOOS == "darwin" {
args = append([]string{"--no-mac-metadata"}, args...)
if err != nil {
return "", bosherr.WrapError(err, "Creating tgz")
}

for _, file := range files { //nolint:gosimple
args = append(args, file)
if err = tw.Close(); err != nil {
return "", bosherr.WrapError(err, "Closing tar writer")
}

_, _, _, err = c.cmdRunner.RunCommand("tar", args...)
if err != nil {
return "", bosherr.WrapError(err, "Shelling out to tar")
if err = zw.Close(); err != nil {
return "", bosherr.WrapError(err, "Closing gzip writer")
}

return tarballPath, nil
return tarball.Name(), nil
}

func (c tarballCompressor) DecompressFileToDir(tarballPath string, dir string, options CompressorOptions) error {
sameOwnerOption := "--no-same-owner"
if options.SameOwner {
sameOwnerOption = "--same-owner"
if _, err := c.fs.Stat(dir); os.IsNotExist(err) {
return bosherr.WrapError(err, "Determine target dir")
}

args := []string{sameOwnerOption, "-xzf", tarballPath, "-C", dir}
if options.StripComponents != 0 {
args = append(args, fmt.Sprintf("--strip-components=%d", options.StripComponents))
tarball, err := c.fs.OpenFile(tarballPath, os.O_RDONLY, 0)
if err != nil {
return bosherr.WrapError(err, "Opening tarball")
}
defer tarball.Close()

if options.PathInArchive != "" {
args = append(args, options.PathInArchive)
}
_, _, _, err := c.cmdRunner.RunCommand("tar", args...)
zr, err := pgzip.NewReader(tarball)
if err != nil {
return bosherr.WrapError(err, "Shelling out to tar")
return bosherr.WrapError(err, "Creating gzip reader")
}
defer zr.Close()

tr := tar.NewReader(zr)

for {
header, err := tr.Next()
if err == io.EOF {
break
}

if err != nil {
return bosherr.WrapError(err, "Loading next file header")
}

if options.PathInArchive != "" && !strings.HasPrefix(
filepath.Clean(options.PathInArchive), filepath.Clean(header.Name)) {
continue
}

fullName := filepath.Join(dir, header.Name)

if options.StripComponents > 0 {
components := strings.Split(filepath.Clean(header.Name), string(filepath.Separator))
if len(components) <= options.StripComponents {
continue
}

fullName = filepath.Join(append([]string{dir}, components[options.StripComponents:]...)...)
}

switch header.Typeflag {
case tar.TypeDir:
if err := c.fs.MkdirAll(fullName, fs.FileMode(header.Mode)); err != nil {
return bosherr.WrapError(err, "Decompressing directory")
}
case tar.TypeReg:
outFile, err := c.fs.OpenFile(fullName, os.O_CREATE|os.O_WRONLY, fs.FileMode(header.Mode))
if err != nil {
return bosherr.WrapError(err, "Creating decompressed file")
}
defer outFile.Close()
if _, err := io.Copy(outFile, tr); err != nil {
return bosherr.WrapError(err, "Decompressing file contents")
}
default:
return fmt.Errorf("unknown type: %v in %s for tar: %s",
header.Typeflag, header.Name, tarballPath)
}

if options.SameOwner {
if err := c.fs.Chown(fullName, fmt.Sprintf("%s:%s", header.Uname, header.Gname)); err != nil {
return bosherr.WrapError(err, "Updating ownership")
}
}
}

return nil
}

Expand Down
Loading
Loading