Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for gzip transcoding #924

Merged
merged 11 commits into from
Sep 21, 2022
Merged
45 changes: 42 additions & 3 deletions fakestorage/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package fakestorage

import (
"bytes"
"compress/gzip"
"encoding/json"
"errors"
"fmt"
Expand Down Expand Up @@ -763,8 +764,45 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) {
var content io.Reader
content = obj.Content
status := http.StatusOK
ranged, start, lastByte, satisfiable := s.handleRange(obj, r)
contentLength := lastByte - start + 1

transcoded := false
ranged := false
start := int64(0)
lastByte := int64(0)
satisfiable := true
contentLength := int64(0)

handledTranscoding := func() bool {
// This should also be false if the Cache-Control metadata field == "no-transform",
// but we don't currently support that field.
// See https://cloud.google.com/storage/docs/transcoding

if obj.ContentEncoding == "gzip" && !strings.Contains(r.Header.Get("accept-encoding"), "gzip") {
// GCS will transparently decompress gzipped content, see
// https://cloud.google.com/storage/docs/transcoding
// In this case, any Range header is ignored and the full content is returned.

// If the content is not a valid gzip file, ignore errors and continue
// without transcoding. Otherwise, return decompressed content.
gzipReader, err := gzip.NewReader(content)
if err == nil {
rawContent, err := io.ReadAll(gzipReader)
if err == nil {
transcoded = true
content = bytes.NewReader(rawContent)
contentLength = int64(len(rawContent))
obj.Size = contentLength
return true
}
}
}
return false
}

if !handledTranscoding() {
ranged, start, lastByte, satisfiable = s.handleRange(obj, r)
contentLength = lastByte - start + 1
}

if ranged && satisfiable {
_, err = obj.Content.Seek(start, io.SeekStart)
Expand Down Expand Up @@ -793,7 +831,8 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) {
if obj.ContentType != "" {
w.Header().Set(contentTypeHeader, obj.ContentType)
}
if obj.ContentEncoding != "" {
// If content was transcoded, the underlying encoding was removed so we shouldn't report it.
if obj.ContentEncoding != "" && !transcoded {
w.Header().Set("Content-Encoding", obj.ContentEncoding)
}
}
Expand Down
123 changes: 123 additions & 0 deletions fakestorage/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package fakestorage

import (
"bytes"
"compress/gzip"
"context"
"encoding/binary"
"errors"
Expand Down Expand Up @@ -361,6 +362,128 @@ func TestServerClientObjectReader(t *testing.T) {
})
}

func TestServerClientObjectTranscoding(t *testing.T) {
const (
bucketName = "some-bucket"
objectName = "items/data.txt"
content = "some nice content, which will be gziped"
contentType = "text/plain; charset=utf-8"
contentEncoding = "gzip"
)

var b bytes.Buffer
gz := gzip.NewWriter(&b)
if _, err := gz.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := gz.Flush(); err != nil {
t.Fatal(err)
}
if err := gz.Close(); err != nil {
t.Fatal(err)
}

objs := []Object{
{
ObjectAttrs: ObjectAttrs{
BucketName: bucketName,
Name: objectName,
ContentType: contentType,
ContentEncoding: contentEncoding,
},
Content: b.Bytes(),
},
}

runServersTest(t, runServersOptions{objs: objs}, func(t *testing.T, server *Server) {
client := server.Client()
objHandle := client.Bucket(bucketName).Object(objectName)
reader, err := objHandle.NewReader(context.TODO())
if err != nil {
t.Fatal(err)
}
defer reader.Close()
data, err := io.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
if string(data) != content {
t.Errorf("wrong data returned\nwant %q\ngot %q", content, string(data))
}
if ct := reader.Attrs.ContentType; ct != contentType {
t.Errorf("wrong content type\nwant %q\ngot %q", contentType, ct)
}
})
}

func TestServerClientObjectSkipTranscoding(t *testing.T) {
const (
bucketName = "some-bucket"
objectName = "items/data.txt"
content = "some nice content, which will be gziped"
contentType = "text/plain; charset=utf-8"
contentEncoding = "gzip"
)

var b bytes.Buffer
gz := gzip.NewWriter(&b)
if _, err := gz.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := gz.Flush(); err != nil {
t.Fatal(err)
}
if err := gz.Close(); err != nil {
t.Fatal(err)
}

objs := []Object{
{
ObjectAttrs: ObjectAttrs{
BucketName: bucketName,
Name: objectName,
ContentType: contentType,
ContentEncoding: contentEncoding,
},
Content: b.Bytes(),
},
}

runServersTest(t, runServersOptions{objs: objs}, func(t *testing.T, server *Server) {
client := server.Client()
objHandle := client.Bucket(bucketName).Object(objectName).ReadCompressed(true) // we skip transcoding by `Accept-Encoding: gzip`
reader, err := objHandle.NewReader(context.TODO())
if err != nil {
t.Fatal(err)
}
defer reader.Close()
// need to unzip manually
gzr, err := gzip.NewReader(reader)
if err != nil {
t.Fatal(err)
}
defer gzr.Close()

var rawBytes bytes.Buffer
_, err = rawBytes.ReadFrom(gzr)
if err != nil {
t.Fatal(err)
}

data := rawBytes.Bytes()

if string(data) != content {
t.Errorf("wrong data returned\nwant %q\ngot %q", content, string(data))
}
if ct := reader.Attrs.ContentType; ct != contentType {
t.Errorf("wrong content type\nwant %q\ngot %q", contentType, ct)
}
if ct := reader.Attrs.ContentEncoding; ct != contentEncoding {
t.Errorf("wrong content encoding\nwant %q\ngot %q", contentEncoding, ct)
}
})
}

func TestServerClientObjectRangeReader(t *testing.T) {
const (
bucketName = "some-bucket"
Expand Down
4 changes: 4 additions & 0 deletions fakestorage/upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,14 @@ func (s *Server) resumableUpload(bucketName string, r *http.Request) jsonRespons
if objName == "" {
objName = metadata.Name
}
if contentEncoding == "" {
contentEncoding = metadata.ContentEncoding
}
obj := Object{
ObjectAttrs: ObjectAttrs{
BucketName: bucketName,
Name: objName,
ContentType: metadata.ContentType,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this the fix for #532?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope so :)

ContentEncoding: contentEncoding,
ACL: getObjectACL(predefinedACL),
Metadata: metadata.Metadata,
Expand Down
3 changes: 2 additions & 1 deletion internal/backend/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ func (s *storageFS) PatchObject(bucketName, objectName string, metadata map[stri
for k, v := range metadata {
obj.Metadata[k] = v
}
obj.Generation = 0 // reset generation id
return s.CreateObject(obj) // recreate object
}

Expand All @@ -356,7 +357,7 @@ func (s *storageFS) UpdateObject(bucketName, objectName string, metadata map[str
for k, v := range metadata {
obj.Metadata[k] = v
}
obj.Generation = 0
obj.Generation = 0 // reset generation id
return s.CreateObject(obj) // recreate object
}

Expand Down