forked from nlnwa/gowarcserver
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nlnwa#12: tests for cmd functionality
Got some help by @maeb to resolve test data issues :)
- Loading branch information
Showing
5 changed files
with
197 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package index | ||
|
||
import ( | ||
"fmt" | ||
"io" | ||
"strconv" | ||
|
||
"github.com/nlnwa/gowarc/warcoptions" | ||
"github.com/nlnwa/gowarc/warcreader" | ||
"github.com/nlnwa/gowarcserver/pkg/index" | ||
logrus "github.com/sirupsen/logrus" | ||
) | ||
|
||
func ParseFormat(format string) (index.CdxWriter, error) { | ||
switch format { | ||
case "cdx": | ||
return &index.CdxLegacy{}, nil | ||
case "cdxj": | ||
return &index.CdxJ{}, nil | ||
case "cdxpb": | ||
return &index.CdxPb{}, nil | ||
case "db": | ||
return &index.CdxDb{}, nil | ||
} | ||
return nil, fmt.Errorf("unknwon format %v, valid formats are: 'cdx', 'cdxj', 'cdxpb', 'db'", format) | ||
} | ||
|
||
func ReadFile(c *conf) error { | ||
opts := &warcoptions.WarcOptions{Strict: false} | ||
wf, err := warcreader.NewWarcFilename(c.fileName, 0, opts) | ||
if err != nil { | ||
return err | ||
} | ||
defer wf.Close() | ||
|
||
count := 0 | ||
|
||
// avoid defer copy value by using a anonymous function | ||
// At the end, print count even if an error occurs | ||
defer func() { | ||
logrus.Printf("Count: %d", count) | ||
}() | ||
|
||
for { | ||
wr, currentOffset, err := wf.Next() | ||
if err == io.EOF { | ||
break | ||
} | ||
if err != nil { | ||
return fmt.Errorf("Error: %v, rec num: %v, Offset %v\n", err.Error(), strconv.Itoa(count), currentOffset) | ||
} | ||
count++ | ||
|
||
c.writer.Write(wr, c.fileName, currentOffset) | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
package index | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"path" | ||
"reflect" | ||
"testing" | ||
|
||
"github.com/nlnwa/gowarcserver/pkg/index" | ||
log "github.com/sirupsen/logrus" | ||
) | ||
|
||
func TestParseFormat(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
format string | ||
expected reflect.Type | ||
errorState bool | ||
}{ | ||
{ | ||
"'cdx' results in CdxLegacy writer", | ||
"cdx", | ||
reflect.TypeOf((*index.CdxLegacy)(nil)), | ||
false, | ||
}, | ||
{ | ||
"'cdxj' results in CdxJ writer", | ||
"cdxj", | ||
reflect.TypeOf((*index.CdxJ)(nil)), | ||
false, | ||
}, | ||
{ | ||
"'db' results in CdxDb writer", | ||
"db", | ||
reflect.TypeOf((*index.CdxDb)(nil)), | ||
false, | ||
}, | ||
{ | ||
"'cdxpb' results in CdxPd writer", | ||
"cdxpb", | ||
reflect.TypeOf((*index.CdxPb)(nil)), | ||
false, | ||
}, | ||
{ | ||
"'cd' results in error", | ||
"cd", | ||
nil, | ||
true, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
got, err := ParseFormat(tt.format) | ||
if err != nil && !tt.errorState { | ||
t.Errorf("Unexpected failure: %v", err) | ||
} else if err == nil && tt.errorState { | ||
t.Errorf("Expected error parsing '%v', got type %T", tt.format, got) | ||
} | ||
|
||
if reflect.TypeOf(got) != tt.expected { | ||
t.Errorf("Expected %v, got %v", tt.expected, got) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
// TODO: this was hard to write tests for and therefore ReadFile | ||
// should probably be refactored | ||
func TestReadFile(t *testing.T) { | ||
log.SetLevel(log.WarnLevel) | ||
// same as testdata/example.warc except removed gzip content because of illegal go str characters | ||
testFileContent := []byte(`WARC/1.0 | ||
WARC-Date: 2017-03-06T04:03:53Z | ||
WARC-Record-ID: <urn:uuid:e9a0cecc-0221-11e7-adb1-0242ac120008> | ||
WARC-Type: warcinfo | ||
Content-Length: 0`) | ||
|
||
filepath := path.Join(t.TempDir(), "test.warc") | ||
file, err := os.Create(filepath) | ||
if err != nil { | ||
t.Fatalf("Failed to create testfile at '%s'", filepath) | ||
} | ||
// This is not strictly needed because of tmp, but to be platform agnostic it might be a good idea | ||
defer file.Close() | ||
|
||
_, err = file.Write(testFileContent) | ||
if err != nil { | ||
t.Fatalf("Failed to write to testfile at '%s'", filepath) | ||
} | ||
|
||
err = file.Sync() | ||
if err != nil { | ||
t.Fatalf("Failed to sync testfile at '%s'", filepath) | ||
} | ||
|
||
tests := []struct { | ||
writerFormat string | ||
writer index.CdxWriter | ||
}{ | ||
{ | ||
"cdx", | ||
&index.CdxLegacy{}, | ||
}, | ||
{ | ||
"cdxj", | ||
&index.CdxJ{}, | ||
}, | ||
{ | ||
|
||
"cdxpd", | ||
&index.CdxPb{}, | ||
}, | ||
{ | ||
"db", | ||
&index.CdxDb{}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
testName := fmt.Sprintf("Readfile: %T successfully indexes", tt.writer) | ||
t.Run(testName, func(t *testing.T) { | ||
c := &conf{ | ||
filepath, | ||
tt.writerFormat, | ||
tt.writer, | ||
} | ||
c.writer.Init() | ||
defer c.writer.Close() | ||
|
||
err := ReadFile(c) | ||
if err != nil { | ||
t.Errorf("Unexpected failure: %v", err) | ||
} | ||
|
||
}) | ||
} | ||
} |
Binary file not shown.
Binary file not shown.