Skip to content

Commit

Permalink
Add support for gzip transcoding (#924)
Browse files Browse the repository at this point in the history
* reset generation for PatchObject (same as doing UpdateObject)

* add gzip transcoding

* add tests for transcoding

* add tests for transcoding

* add tests for transcoding

* add tests for transcoding

* trying to fix CI

* back CI

* fix contain

* more tests

* rename to handledTranscoding
  • Loading branch information
le0pard authored Sep 21, 2022
1 parent c69b953 commit 5ab869f
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 4 deletions.
45 changes: 42 additions & 3 deletions fakestorage/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package fakestorage

import (
"bytes"
"compress/gzip"
"encoding/json"
"errors"
"fmt"
Expand Down Expand Up @@ -763,8 +764,45 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) {
var content io.Reader
content = obj.Content
status := http.StatusOK
ranged, start, lastByte, satisfiable := s.handleRange(obj, r)
contentLength := lastByte - start + 1

transcoded := false
ranged := false
start := int64(0)
lastByte := int64(0)
satisfiable := true
contentLength := int64(0)

handledTranscoding := func() bool {
// This should also be false if the Cache-Control metadata field == "no-transform",
// but we don't currently support that field.
// See https://cloud.google.com/storage/docs/transcoding

if obj.ContentEncoding == "gzip" && !strings.Contains(r.Header.Get("accept-encoding"), "gzip") {
// GCS will transparently decompress gzipped content, see
// https://cloud.google.com/storage/docs/transcoding
// In this case, any Range header is ignored and the full content is returned.

// If the content is not a valid gzip file, ignore errors and continue
// without transcoding. Otherwise, return decompressed content.
gzipReader, err := gzip.NewReader(content)
if err == nil {
rawContent, err := io.ReadAll(gzipReader)
if err == nil {
transcoded = true
content = bytes.NewReader(rawContent)
contentLength = int64(len(rawContent))
obj.Size = contentLength
return true
}
}
}
return false
}

if !handledTranscoding() {
ranged, start, lastByte, satisfiable = s.handleRange(obj, r)
contentLength = lastByte - start + 1
}

if ranged && satisfiable {
_, err = obj.Content.Seek(start, io.SeekStart)
Expand Down Expand Up @@ -793,7 +831,8 @@ func (s *Server) downloadObject(w http.ResponseWriter, r *http.Request) {
if obj.ContentType != "" {
w.Header().Set(contentTypeHeader, obj.ContentType)
}
if obj.ContentEncoding != "" {
// If content was transcoded, the underlying encoding was removed so we shouldn't report it.
if obj.ContentEncoding != "" && !transcoded {
w.Header().Set("Content-Encoding", obj.ContentEncoding)
}
}
Expand Down
123 changes: 123 additions & 0 deletions fakestorage/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package fakestorage

import (
"bytes"
"compress/gzip"
"context"
"encoding/binary"
"errors"
Expand Down Expand Up @@ -361,6 +362,128 @@ func TestServerClientObjectReader(t *testing.T) {
})
}

func TestServerClientObjectTranscoding(t *testing.T) {
const (
bucketName = "some-bucket"
objectName = "items/data.txt"
content = "some nice content, which will be gziped"
contentType = "text/plain; charset=utf-8"
contentEncoding = "gzip"
)

var b bytes.Buffer
gz := gzip.NewWriter(&b)
if _, err := gz.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := gz.Flush(); err != nil {
t.Fatal(err)
}
if err := gz.Close(); err != nil {
t.Fatal(err)
}

objs := []Object{
{
ObjectAttrs: ObjectAttrs{
BucketName: bucketName,
Name: objectName,
ContentType: contentType,
ContentEncoding: contentEncoding,
},
Content: b.Bytes(),
},
}

runServersTest(t, runServersOptions{objs: objs}, func(t *testing.T, server *Server) {
client := server.Client()
objHandle := client.Bucket(bucketName).Object(objectName)
reader, err := objHandle.NewReader(context.TODO())
if err != nil {
t.Fatal(err)
}
defer reader.Close()
data, err := io.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
if string(data) != content {
t.Errorf("wrong data returned\nwant %q\ngot %q", content, string(data))
}
if ct := reader.Attrs.ContentType; ct != contentType {
t.Errorf("wrong content type\nwant %q\ngot %q", contentType, ct)
}
})
}

func TestServerClientObjectSkipTranscoding(t *testing.T) {
const (
bucketName = "some-bucket"
objectName = "items/data.txt"
content = "some nice content, which will be gziped"
contentType = "text/plain; charset=utf-8"
contentEncoding = "gzip"
)

var b bytes.Buffer
gz := gzip.NewWriter(&b)
if _, err := gz.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := gz.Flush(); err != nil {
t.Fatal(err)
}
if err := gz.Close(); err != nil {
t.Fatal(err)
}

objs := []Object{
{
ObjectAttrs: ObjectAttrs{
BucketName: bucketName,
Name: objectName,
ContentType: contentType,
ContentEncoding: contentEncoding,
},
Content: b.Bytes(),
},
}

runServersTest(t, runServersOptions{objs: objs}, func(t *testing.T, server *Server) {
client := server.Client()
objHandle := client.Bucket(bucketName).Object(objectName).ReadCompressed(true) // we skip transcoding by `Accept-Encoding: gzip`
reader, err := objHandle.NewReader(context.TODO())
if err != nil {
t.Fatal(err)
}
defer reader.Close()
// need to unzip manually
gzr, err := gzip.NewReader(reader)
if err != nil {
t.Fatal(err)
}
defer gzr.Close()

var rawBytes bytes.Buffer
_, err = rawBytes.ReadFrom(gzr)
if err != nil {
t.Fatal(err)
}

data := rawBytes.Bytes()

if string(data) != content {
t.Errorf("wrong data returned\nwant %q\ngot %q", content, string(data))
}
if ct := reader.Attrs.ContentType; ct != contentType {
t.Errorf("wrong content type\nwant %q\ngot %q", contentType, ct)
}
if ct := reader.Attrs.ContentEncoding; ct != contentEncoding {
t.Errorf("wrong content encoding\nwant %q\ngot %q", contentEncoding, ct)
}
})
}

func TestServerClientObjectRangeReader(t *testing.T) {
const (
bucketName = "some-bucket"
Expand Down
4 changes: 4 additions & 0 deletions fakestorage/upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,14 @@ func (s *Server) resumableUpload(bucketName string, r *http.Request) jsonRespons
if objName == "" {
objName = metadata.Name
}
if contentEncoding == "" {
contentEncoding = metadata.ContentEncoding
}
obj := Object{
ObjectAttrs: ObjectAttrs{
BucketName: bucketName,
Name: objName,
ContentType: metadata.ContentType,
ContentEncoding: contentEncoding,
ACL: getObjectACL(predefinedACL),
Metadata: metadata.Metadata,
Expand Down
3 changes: 2 additions & 1 deletion internal/backend/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ func (s *storageFS) PatchObject(bucketName, objectName string, metadata map[stri
for k, v := range metadata {
obj.Metadata[k] = v
}
obj.Generation = 0 // reset generation id
return s.CreateObject(obj) // recreate object
}

Expand All @@ -356,7 +357,7 @@ func (s *storageFS) UpdateObject(bucketName, objectName string, metadata map[str
for k, v := range metadata {
obj.Metadata[k] = v
}
obj.Generation = 0
obj.Generation = 0 // reset generation id
return s.CreateObject(obj) // recreate object
}

Expand Down

0 comments on commit 5ab869f

Please sign in to comment.