Skip to content

Commit

Permalink
nlnwa#12: tests for cmd functionality
Browse files Browse the repository at this point in the history
Got some help by @maeb to resolve test data issues :)
  • Loading branch information
Avokadoen committed Mar 22, 2021
1 parent 32d2e26 commit f3425e0
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 33 deletions.
34 changes: 1 addition & 33 deletions cmd/warcserver/cmd/index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@ package index
import (
"errors"
"fmt"
"io"
"os"
"strconv"

"github.com/nlnwa/gowarc/warcoptions"
"github.com/nlnwa/gowarc/warcreader"
"github.com/nlnwa/gowarcserver/pkg/index"
"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -84,33 +79,6 @@ func runE(c *conf) error {
}
defer c.writer.Close()

readFile(c)
ReadFile(c)
return nil
}

// TODO: return error
func readFile(c *conf) {
opts := &warcoptions.WarcOptions{Strict: false}
wf, err := warcreader.NewWarcFilename(c.fileName, 0, opts)
if err != nil {
return
}
defer wf.Close()

count := 0

for {
wr, currentOffset, err := wf.Next()
if err == io.EOF {
break
}
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "Error: %v, rec num: %v, Offset %v\n", err.Error(), strconv.Itoa(count), currentOffset)
break
}
count++

c.writer.Write(wr, c.fileName, currentOffset)
}
fmt.Fprintln(os.Stderr, "Count: ", count)
}
57 changes: 57 additions & 0 deletions cmd/warcserver/cmd/index/io.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package index

import (
"fmt"
"io"
"strconv"

"github.com/nlnwa/gowarc/warcoptions"
"github.com/nlnwa/gowarc/warcreader"
"github.com/nlnwa/gowarcserver/pkg/index"
logrus "github.com/sirupsen/logrus"
)

func ParseFormat(format string) (index.CdxWriter, error) {
switch format {
case "cdx":
return &index.CdxLegacy{}, nil
case "cdxj":
return &index.CdxJ{}, nil
case "cdxpb":
return &index.CdxPb{}, nil
case "db":
return &index.CdxDb{}, nil
}
return nil, fmt.Errorf("unknwon format %v, valid formats are: 'cdx', 'cdxj', 'cdxpb', 'db'", format)
}

func ReadFile(c *conf) error {
opts := &warcoptions.WarcOptions{Strict: false}
wf, err := warcreader.NewWarcFilename(c.fileName, 0, opts)
if err != nil {
return err
}
defer wf.Close()

count := 0

// avoid defer copy value by using a anonymous function
// At the end, print count even if an error occurs
defer func() {
logrus.Printf("Count: %d", count)
}()

for {
wr, currentOffset, err := wf.Next()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("Error: %v, rec num: %v, Offset %v\n", err.Error(), strconv.Itoa(count), currentOffset)
}
count++

c.writer.Write(wr, c.fileName, currentOffset)
}
return nil
}
139 changes: 139 additions & 0 deletions cmd/warcserver/cmd/index/io_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package index

import (
"fmt"
"os"
"path"
"reflect"
"testing"

"github.com/nlnwa/gowarcserver/pkg/index"
log "github.com/sirupsen/logrus"
)

func TestParseFormat(t *testing.T) {
tests := []struct {
name string
format string
expected reflect.Type
errorState bool
}{
{
"'cdx' results in CdxLegacy writer",
"cdx",
reflect.TypeOf((*index.CdxLegacy)(nil)),
false,
},
{
"'cdxj' results in CdxJ writer",
"cdxj",
reflect.TypeOf((*index.CdxJ)(nil)),
false,
},
{
"'db' results in CdxDb writer",
"db",
reflect.TypeOf((*index.CdxDb)(nil)),
false,
},
{
"'cdxpb' results in CdxPd writer",
"cdxpb",
reflect.TypeOf((*index.CdxPb)(nil)),
false,
},
{
"'cd' results in error",
"cd",
nil,
true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseFormat(tt.format)
if err != nil && !tt.errorState {
t.Errorf("Unexpected failure: %v", err)
} else if err == nil && tt.errorState {
t.Errorf("Expected error parsing '%v', got type %T", tt.format, got)
}

if reflect.TypeOf(got) != tt.expected {
t.Errorf("Expected %v, got %v", tt.expected, got)
}
})
}
}

// TODO: this was hard to write tests for and therefore ReadFile
// should probably be refactored
func TestReadFile(t *testing.T) {
log.SetLevel(log.WarnLevel)
// same as testdata/example.warc except removed gzip content because of illegal go str characters
testFileContent := []byte(`WARC/1.0
WARC-Date: 2017-03-06T04:03:53Z
WARC-Record-ID: <urn:uuid:e9a0cecc-0221-11e7-adb1-0242ac120008>
WARC-Type: warcinfo
Content-Length: 0`)

filepath := path.Join(t.TempDir(), "test.warc")
file, err := os.Create(filepath)
if err != nil {
t.Fatalf("Failed to create testfile at '%s'", filepath)
}
// This is not strictly needed because of tmp, but to be platform agnostic it might be a good idea
defer file.Close()

_, err = file.Write(testFileContent)
if err != nil {
t.Fatalf("Failed to write to testfile at '%s'", filepath)
}

err = file.Sync()
if err != nil {
t.Fatalf("Failed to sync testfile at '%s'", filepath)
}

tests := []struct {
writerFormat string
writer index.CdxWriter
}{
{
"cdx",
&index.CdxLegacy{},
},
{
"cdxj",
&index.CdxJ{},
},
{

"cdxpd",
&index.CdxPb{},
},
{
"db",
&index.CdxDb{},
},
}

for _, tt := range tests {
testName := fmt.Sprintf("Readfile: %T successfully indexes", tt.writer)
t.Run(testName, func(t *testing.T) {
c := &conf{
filepath,
tt.writerFormat,
tt.writer,
}
c.writer.Init()
defer c.writer.Close()

err := ReadFile(c)
if err != nil {
t.Errorf("Unexpected failure: %v", err)
}

})
}
}
Binary file modified testdata/example-trunc.warc
Binary file not shown.
Binary file modified testdata/example.warc
Binary file not shown.

0 comments on commit f3425e0

Please sign in to comment.