Skip to content

Commit

Permalink
Merge pull request #19 from nlnwa/fix-warc-ip-address
Browse files Browse the repository at this point in the history
Fix writing of IP address to contentwriter
  • Loading branch information
johnerikhalse authored Aug 19, 2021
2 parents 7186921 + d5ca391 commit d131b78
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 6 deletions.
25 changes: 23 additions & 2 deletions plugin/archivingcache/archivingcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/nlnwa/veidemann-dns-resolver/plugin/forward"
"github.com/nlnwa/veidemann-dns-resolver/plugin/resolve"
"golang.org/x/sync/singleflight"
"net"
"strings"
"time"
)
Expand Down Expand Up @@ -106,11 +107,15 @@ func (a *ArchivingCache) serveDNS(ctx context.Context, w dns.ResponseWriter, r *
msg = nw.Msg

if hasCollectionId {
proxyIpAddr, err := parseHostPortOrIP(proxyAddr)
if err != nil {
log.Errorf("failed to parse proxy address \"%s\" as host:port pair or IP address: %v", proxyAddr, err)
}
mt, _ := response.Typify(msg, a.now)
if err := a.set(key, mt, msg, collectionId, proxyAddr, server); err != nil {
if err := a.set(key, mt, msg, collectionId, proxyIpAddr, server); err != nil {
log.Errorf("%v: %v", key, err)
}
if err = a.archive(state, mt, msg, executionId, collectionId, proxyAddr, fetchStart); err != nil {
if err = a.archive(state, mt, msg, executionId, collectionId, proxyIpAddr, fetchStart); err != nil {
log.Errorf("%v: %v", key, err)
}
}
Expand Down Expand Up @@ -212,6 +217,22 @@ func (a *ArchivingCache) archive(state *request.Request, t response.Type, msg *d
// Name implements the Handler interface.
func (a *ArchivingCache) Name() string { return "archivingcache" }

// parseHostPortOrIP parses a host:port pair or IP address into an IP address.
func parseHostPortOrIP(addr string) (string, error) {
// Assume the proxy address is a host:port pair
host, _, err := net.SplitHostPort(addr)
if err != nil {
// Try to parse proxy address as IP address
ip := net.ParseIP(addr)
if ip == nil {
return "", err
} else {
return ip.String(), nil
}
}
return host, nil
}

type Recorder struct {
dns.ResponseWriter
Rcode int
Expand Down
29 changes: 29 additions & 0 deletions plugin/archivingcache/archivingcache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,3 +392,32 @@ func codeHandler(rcode int) test.HandlerFunc {
return 0, nil
}
}


func TestParseProxyAddress(t *testing.T) {
tests := []struct {
HostPortOrIp string
Expect string
}{
{
HostPortOrIp: "8.8.8.8:53",
Expect: "8.8.8.8",
},
{
HostPortOrIp: "8.8.8.8",
Expect: "8.8.8.8",
},
{
HostPortOrIp: "njet",
Expect: "",
},
}
for _, tt := range tests {
t.Run(tt.HostPortOrIp, func(t *testing.T) {
got, _ := parseHostPortOrIP(tt.HostPortOrIp)
if tt.Expect != got {
t.Errorf("Expected \"%s\" but got \"%s\"", tt.Expect, got)
}
})
}
}
4 changes: 2 additions & 2 deletions plugin/archivingcache/contentwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func (c *ContentWriterClient) Connect() error {
}

// WriteRecord writes a WARC record.
func (c *ContentWriterClient) WriteRecord(payload []byte, fetchStart time.Time, requestedHost string, proxyAddr string, executionId string, collectionId string) (*contentwriterV1.WriteReply, error) {
func (c *ContentWriterClient) WriteRecord(payload []byte, fetchStart time.Time, requestedHost string, ipAddress string, executionId string, collectionId string) (*contentwriterV1.WriteReply, error) {
d := sha1.New()
d.Write(payload)
digest := fmt.Sprintf("sha1:%x", d.Sum(nil))
Expand All @@ -55,7 +55,7 @@ func (c *ContentWriterClient) WriteRecord(payload []byte, fetchStart time.Time,
},
TargetUri: "dns:" + requestedHost,
FetchTimeStamp: timestamppb.New(fetchStart),
IpAddress: proxyAddr,
IpAddress: ipAddress,
ExecutionId: executionId,
CollectionRef: &configV1.ConfigRef{
Kind: configV1.Kind_collection,
Expand Down
4 changes: 2 additions & 2 deletions plugin/archivingcache/logwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (l *LogWriterClient) Connect() error {
}

// WriteCrawlLog stores a crawl log of a dns request/response.
func (l *LogWriterClient) WriteCrawlLog(record *contentwriterV1.WriteResponseMeta_RecordMeta, size int, requestedHost string, fetchStart time.Time, fetchDurationMs int64, proxyAddr string, executionId string) error {
func (l *LogWriterClient) WriteCrawlLog(record *contentwriterV1.WriteResponseMeta_RecordMeta, size int, requestedHost string, fetchStart time.Time, fetchDurationMs int64, ipAddress string, executionId string) error {
crawlLog := &logV1.CrawlLog{
ExecutionId: executionId,
RecordType: "resource",
Expand All @@ -58,7 +58,7 @@ func (l *LogWriterClient) WriteCrawlLog(record *contentwriterV1.WriteResponseMet
TimeStamp: timestamppb.New(time.Now().UTC()),
FetchTimeStamp: timestamppb.New(fetchStart),
FetchTimeMs: fetchDurationMs,
IpAddress: proxyAddr,
IpAddress: ipAddress,
ContentType: "text/dns",
Size: int64(size),
WarcId: record.GetWarcId(),
Expand Down

0 comments on commit d131b78

Please sign in to comment.