From 6b80b485253c9f57b9706aeac392aa91f522f35a Mon Sep 17 00:00:00 2001 From: Tamal Saha Date: Thu, 31 Oct 2019 00:07:33 -0700 Subject: [PATCH 1/2] Add --exclude-private-hosts Signed-off-by: Tamal Saha --- README.md | 3 ++- arguments.go | 19 +++++++++++-------- arguments_test.go | 34 +++++++++++++++++++++------------- file_checker.go | 4 ++-- file_checker_test.go | 8 ++++---- go.sum | 2 +- main.go | 1 + url_checker.go | 42 ++++++++++++++++++++++++++++++++++++------ url_checker_test.go | 30 +++++++++++++++++++++++------- 9 files changed, 101 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 6f0dfc7..ca821b6 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ go get -u github.com/raviqqe/liche Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-x ] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-x ] [-p] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: 512] @@ -31,6 +31,7 @@ Options: -r, --recursive Search Markdown and HTML files recursively -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. -x, --exclude Regex of links to exclude from checking. + -p, --exclude-private-hosts Exclude private domains and ip addresses. -v, --verbose Be verbose. ``` diff --git a/arguments.go b/arguments.go index a97bb99..ae9b67e 100644 --- a/arguments.go +++ b/arguments.go @@ -14,7 +14,7 @@ const defaultConcurrency = maxOpenFiles / 2 const usage = `Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-x ] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-x ] [-p] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: %v] @@ -22,16 +22,18 @@ Options: -r, --recursive Search Markdown and HTML files recursively -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. -x, --exclude Regex of links to exclude from checking. + -p, --exclude-private-hosts Exclude private domains and ip addresses. -v, --verbose Be verbose.` type arguments struct { - filenames []string - documentRoot string - concurrency int - timeout time.Duration - excludedPattern *regexp.Regexp - recursive bool - verbose bool + filenames []string + documentRoot string + concurrency int + timeout time.Duration + excludedPattern *regexp.Regexp + excludePrivateHosts bool + recursive bool + verbose bool } func getArguments(argv []string) (arguments, error) { @@ -77,6 +79,7 @@ func getArguments(argv []string) (arguments, error) { int(c), time.Duration(t) * time.Second, r, + args["--exclude-private-hosts"].(bool), args["--recursive"].(bool), args["--verbose"].(bool), }, nil diff --git a/arguments_test.go b/arguments_test.go index d56fcce..b2d99cd 100644 --- a/arguments_test.go +++ b/arguments_test.go @@ -15,55 +15,63 @@ func TestGetArguments(t *testing.T) { }{ { argv: []string{"file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false}, }, { argv: []string{"-c", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, nil, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false}, }, { argv: []string{"--concurrency", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, nil, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false}, }, { argv: []string{"-d", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false}, }, { argv: []string{"--document-root", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false}, }, { argv: []string{"-r", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false}, }, { argv: []string{"--recursive", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false}, }, { argv: []string{"-t", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false}, }, { argv: []string{"--timeout", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false}, }, { argv: []string{"-x", "^.*$", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false}, }, { argv: []string{"--exclude", "^.*$", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false}, + }, + { + argv: []string{"-p", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false}, + }, + { + argv: []string{"--exclude-private-hosts", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false}, }, { argv: []string{"-v", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true}, }, { argv: []string{"--verbose", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true}, }, } { args, err := getArguments(c.argv) diff --git a/file_checker.go b/file_checker.go index b60db75..bbe199e 100644 --- a/file_checker.go +++ b/file_checker.go @@ -18,8 +18,8 @@ type fileChecker struct { semaphore semaphore } -func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, s semaphore) fileChecker { - return fileChecker{newURLChecker(timeout, d, r, s), s} +func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, excludePrivateHosts bool, s semaphore) fileChecker { + return fileChecker{newURLChecker(timeout, d, r, excludePrivateHosts, s), s} } func (c fileChecker) Check(f string) ([]urlResult, error) { diff --git a/file_checker_test.go b/file_checker_test.go index 06a70c5..4d477e9 100644 --- a/file_checker_test.go +++ b/file_checker_test.go @@ -10,7 +10,7 @@ import ( ) func TestFileCheckerCheck(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(1024)) + c := newFileChecker(0, "", nil, false, newSemaphore(1024)) for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} { rs, err := c.Check(f) @@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) { } func TestFileCheckerCheckMany(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, false, newSemaphore(maxOpenFiles)) for _, fs := range [][]string{ {"README.md"}, @@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) { } func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, false, newSemaphore(maxOpenFiles)) for _, fs := range [][]string{ {"test/absolute_path.md"}, @@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { } func TestFileCheckerExtractURLs(t *testing.T) { - c := newFileChecker(0, "", nil, newSemaphore(42)) + c := newFileChecker(0, "", nil, false, newSemaphore(42)) for _, x := range []struct { html string diff --git a/go.sum b/go.sum index eaa27ff..03bf842 100644 --- a/go.sum +++ b/go.sum @@ -20,7 +20,6 @@ github.com/mattn/go-isatty v0.0.9 h1:d5US/mDsogSGW37IV293h//ZFaeajb69h+EHFsv2xGg github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/russross/blackfriday v2.0.0+incompatible h1:cBXrhZNUf9C+La9/YpS+UHpUT8YD6Td9ZMSU9APFcsk= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= @@ -44,6 +43,7 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24 h1:R8bzl0244nw47n1xKs1MUMAaTNgjavKcN/aX2Ss3+Fo= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/main.go b/main.go index 920f23f..cb090f8 100644 --- a/main.go +++ b/main.go @@ -34,6 +34,7 @@ func main() { args.timeout, args.documentRoot, args.excludedPattern, + args.excludePrivateHosts, newSemaphore(args.concurrency)) go c.CheckMany(m.Filenames(), rc) diff --git a/url_checker.go b/url_checker.go index f3895a0..bc4a4fb 100644 --- a/url_checker.go +++ b/url_checker.go @@ -2,6 +2,7 @@ package main import ( "errors" + "net" "net/url" "os" "path" @@ -11,17 +12,19 @@ import ( "time" "github.com/valyala/fasthttp" + "golang.org/x/net/publicsuffix" ) type urlChecker struct { - timeout time.Duration - documentRoot string - excludedPattern *regexp.Regexp - semaphore semaphore + timeout time.Duration + documentRoot string + excludedPattern *regexp.Regexp + excludePrivateHosts bool + semaphore semaphore } -func newURLChecker(t time.Duration, d string, r *regexp.Regexp, s semaphore) urlChecker { - return urlChecker{t, d, r, s} +func newURLChecker(t time.Duration, d string, r *regexp.Regexp, excludePrivateHosts bool, s semaphore) urlChecker { + return urlChecker{t, d, r, excludePrivateHosts, s} } func (c urlChecker) Check(u string, f string) error { @@ -30,6 +33,18 @@ func (c urlChecker) Check(u string, f string) error { return err } + if !local && c.excludePrivateHosts { + uu, _ := url.Parse(u) + host := uu.Hostname() + if ip := net.ParseIP(host); ip != nil { + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || isPrivate(ip) { + return nil + } + } else if _, icann := publicsuffix.PublicSuffix(host); !icann { + return nil // private domain + } + } + if c.excludedPattern != nil && c.excludedPattern.MatchString(u) { return nil } @@ -88,3 +103,18 @@ func (c urlChecker) resolveURL(u string, f string) (string, bool, error) { return path.Join(c.documentRoot, uu.Path), true, nil } + +// isPrivate reports whether `ip' is a local address, according to +// RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses). +// xref: https://go-review.googlesource.com/c/go/+/162998/ +// xref: https://github.com/golang/go/issues/29146 +func isPrivate(ip net.IP) bool { + if ip4 := ip.To4(); ip4 != nil { + // Local IPv4 addresses are defined in https://tools.ietf.org/html/rfc1918 + return ip4[0] == 10 || + (ip4[0] == 172 && ip4[1]&0xf0 == 16) || + (ip4[0] == 192 && ip4[1] == 168) + } + // Local IPv6 addresses are defined in https://tools.ietf.org/html/rfc4193 + return len(ip) == net.IPv6len && ip[0]&0xfe == 0xfc +} diff --git a/url_checker_test.go b/url_checker_test.go index 435976b..997d00f 100644 --- a/url_checker_test.go +++ b/url_checker_test.go @@ -9,7 +9,7 @@ import ( ) func TestURLCheckerCheck(t *testing.T) { - c := newURLChecker(0, "", nil, newSemaphore(1024)) + c := newURLChecker(0, "", nil, false, newSemaphore(1024)) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -21,7 +21,7 @@ func TestURLCheckerCheck(t *testing.T) { } func TestURLCheckerCheckWithExclude(t *testing.T) { - c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), newSemaphore(1024)) + c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), false, newSemaphore(1024)) for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -32,8 +32,24 @@ func TestURLCheckerCheckWithExclude(t *testing.T) { } } +func TestURLCheckerCheckWithExcludePrivateHosts(t *testing.T) { + c := newURLChecker(0, "", nil, true, newSemaphore(1024)) + + for _, u := range []string{ + "http://localhost:1", + "http://localhost:3", + "http://127.0.0.1:1", + "http://169.254.169.254:1", + "http://192.168.99.100", + "http://example.test", + "http://example.abcdxyz", + } { + assert.Equal(t, nil, c.Check(u, "README.md")) + } +} + func TestURLCheckerCheckWithTimeout(t *testing.T) { - c := newURLChecker(30*time.Second, "", nil, newSemaphore(1024)) + c := newURLChecker(30*time.Second, "", nil, false, newSemaphore(1024)) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -45,7 +61,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) { } func TestURLCheckerCheckMany(t *testing.T) { - c := newURLChecker(0, "", nil, newSemaphore(1024)) + c := newURLChecker(0, "", nil, false, newSemaphore(1024)) for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { rc := make(chan urlResult, 1024) @@ -58,7 +74,7 @@ func TestURLCheckerCheckMany(t *testing.T) { } } func TestURLCheckerResolveURL(t *testing.T) { - f := newURLChecker(0, "", nil, newSemaphore(1024)) + f := newURLChecker(0, "", nil, false, newSemaphore(1024)) for _, c := range []struct { source, target string @@ -76,7 +92,7 @@ func TestURLCheckerResolveURL(t *testing.T) { } func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { - f := newURLChecker(0, "", nil, newSemaphore(1024)) + f := newURLChecker(0, "", nil, false, newSemaphore(1024)) u, _, err := f.resolveURL("/foo", "foo.md") @@ -85,7 +101,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { } func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) { - f := newURLChecker(0, "foo", nil, newSemaphore(1024)) + f := newURLChecker(0, "foo", nil, false, newSemaphore(1024)) for _, c := range []struct { source, target string From a5102b0bf90203b467a4f3b4597d22cd83d94f99 Mon Sep 17 00:00:00 2001 From: Tamal Saha Date: Fri, 1 Nov 2019 15:48:07 -0700 Subject: [PATCH 2/2] Add --exclude-localhost & --exclude-link-local Signed-off-by: Tamal Saha --- README.md | 6 ++++-- arguments.go | 8 +++++++- arguments_test.go | 46 +++++++++++++++++++++++++++++--------------- file_checker.go | 4 ++-- file_checker_test.go | 8 ++++---- main.go | 2 ++ url_checker.go | 26 +++++++++++++++++++------ url_checker_test.go | 40 +++++++++++++++++++++++++++----------- 8 files changed, 99 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index ca821b6..b125dde 100644 --- a/README.md +++ b/README.md @@ -23,15 +23,17 @@ go get -u github.com/raviqqe/liche Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-x ] [-p] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-x ] [-p] [-h] [-l] [-v] ... Options: - -c, --concurrency Set max number of concurrent HTTP requests. [default: 512] + -c, --concurrency Set max number of concurrent HTTP requests. [default: %v] -d, --document-root Set document root directory for absolute paths. -r, --recursive Search Markdown and HTML files recursively -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. -x, --exclude Regex of links to exclude from checking. -p, --exclude-private-hosts Exclude private domains and ip addresses. + -h, --exclude-localhost Exclude localhost addresses. + -l, --exclude-link-local Exclude link local addresses. -v, --verbose Be verbose. ``` diff --git a/arguments.go b/arguments.go index ae9b67e..9907327 100644 --- a/arguments.go +++ b/arguments.go @@ -14,7 +14,7 @@ const defaultConcurrency = maxOpenFiles / 2 const usage = `Link checker for Markdown and HTML Usage: - liche [-c ] [-d ] [-r] [-t ] [-x ] [-p] [-v] ... + liche [-c ] [-d ] [-r] [-t ] [-x ] [-p] [-h] [-l] [-v] ... Options: -c, --concurrency Set max number of concurrent HTTP requests. [default: %v] @@ -23,6 +23,8 @@ Options: -t, --timeout Set timeout for HTTP requests in seconds. Disabled by default. -x, --exclude Regex of links to exclude from checking. -p, --exclude-private-hosts Exclude private domains and ip addresses. + -h, --exclude-localhost Exclude localhost addresses. + -l, --exclude-link-local Exclude link local addresses. -v, --verbose Be verbose.` type arguments struct { @@ -32,6 +34,8 @@ type arguments struct { timeout time.Duration excludedPattern *regexp.Regexp excludePrivateHosts bool + excludeLocalhost bool + excludeLinkLocal bool recursive bool verbose bool } @@ -80,6 +84,8 @@ func getArguments(argv []string) (arguments, error) { time.Duration(t) * time.Second, r, args["--exclude-private-hosts"].(bool), + args["--exclude-localhost"].(bool), + args["--exclude-link-local"].(bool), args["--recursive"].(bool), args["--verbose"].(bool), }, nil diff --git a/arguments_test.go b/arguments_test.go index b2d99cd..b0eee1d 100644 --- a/arguments_test.go +++ b/arguments_test.go @@ -15,63 +15,79 @@ func TestGetArguments(t *testing.T) { }{ { argv: []string{"file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, false, false}, }, { argv: []string{"-c", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false, false, false}, }, { argv: []string{"--concurrency", "42", "file"}, - args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false}, + args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false, false, false}, }, { argv: []string{"-d", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false, false, false}, }, { argv: []string{"--document-root", "directory", "file"}, - args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false}, + args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false, false, false}, }, { argv: []string{"-r", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, true, false}, }, { argv: []string{"--recursive", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, true, false}, }, { argv: []string{"-t", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false, false, false}, }, { argv: []string{"--timeout", "42", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false, false, false}, }, { argv: []string{"-x", "^.*$", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false, false, false}, }, { argv: []string{"--exclude", "^.*$", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false, false, false}, }, { argv: []string{"-p", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false, false, false}, }, { argv: []string{"--exclude-private-hosts", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false, false, false}, + }, + { + argv: []string{"-h", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false, false, false}, + }, + { + argv: []string{"--exclude-localhost", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false, false, false}, + }, + { + argv: []string{"-l", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true, false, false}, + }, + { + argv: []string{"--exclude-link-local", "file"}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true, false, false}, }, { argv: []string{"-v", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, false, true}, }, { argv: []string{"--verbose", "file"}, - args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true}, + args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, false, true}, }, } { args, err := getArguments(c.argv) diff --git a/file_checker.go b/file_checker.go index bbe199e..6f9971f 100644 --- a/file_checker.go +++ b/file_checker.go @@ -18,8 +18,8 @@ type fileChecker struct { semaphore semaphore } -func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, excludePrivateHosts bool, s semaphore) fileChecker { - return fileChecker{newURLChecker(timeout, d, r, excludePrivateHosts, s), s} +func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, excludePrivateHosts, excludeLocalhost, excludeLinkLocal bool, s semaphore) fileChecker { + return fileChecker{newURLChecker(timeout, d, r, excludePrivateHosts, excludeLocalhost, excludeLinkLocal, s), s} } func (c fileChecker) Check(f string) ([]urlResult, error) { diff --git a/file_checker_test.go b/file_checker_test.go index 4d477e9..8c59913 100644 --- a/file_checker_test.go +++ b/file_checker_test.go @@ -10,7 +10,7 @@ import ( ) func TestFileCheckerCheck(t *testing.T) { - c := newFileChecker(0, "", nil, false, newSemaphore(1024)) + c := newFileChecker(0, "", nil, false, false, false, newSemaphore(1024)) for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} { rs, err := c.Check(f) @@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) { } func TestFileCheckerCheckMany(t *testing.T) { - c := newFileChecker(0, "", nil, false, newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, false, false, false, newSemaphore(maxOpenFiles)) for _, fs := range [][]string{ {"README.md"}, @@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) { } func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { - c := newFileChecker(0, "", nil, false, newSemaphore(maxOpenFiles)) + c := newFileChecker(0, "", nil, false, false, false, newSemaphore(maxOpenFiles)) for _, fs := range [][]string{ {"test/absolute_path.md"}, @@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) { } func TestFileCheckerExtractURLs(t *testing.T) { - c := newFileChecker(0, "", nil, false, newSemaphore(42)) + c := newFileChecker(0, "", nil, false, false, false, newSemaphore(42)) for _, x := range []struct { html string diff --git a/main.go b/main.go index cb090f8..32f2134 100644 --- a/main.go +++ b/main.go @@ -35,6 +35,8 @@ func main() { args.documentRoot, args.excludedPattern, args.excludePrivateHosts, + args.excludeLocalhost, + args.excludeLinkLocal, newSemaphore(args.concurrency)) go c.CheckMany(m.Filenames(), rc) diff --git a/url_checker.go b/url_checker.go index bc4a4fb..2d70916 100644 --- a/url_checker.go +++ b/url_checker.go @@ -20,11 +20,13 @@ type urlChecker struct { documentRoot string excludedPattern *regexp.Regexp excludePrivateHosts bool + excludeLocalhost bool + excludeLinkLocal bool semaphore semaphore } -func newURLChecker(t time.Duration, d string, r *regexp.Regexp, excludePrivateHosts bool, s semaphore) urlChecker { - return urlChecker{t, d, r, excludePrivateHosts, s} +func newURLChecker(t time.Duration, d string, r *regexp.Regexp, excludePrivateHosts, excludeLocalhost, excludeLinkLocal bool, s semaphore) urlChecker { + return urlChecker{t, d, r, excludePrivateHosts, excludeLocalhost, excludeLinkLocal, s} } func (c urlChecker) Check(u string, f string) error { @@ -33,15 +35,27 @@ func (c urlChecker) Check(u string, f string) error { return err } - if !local && c.excludePrivateHosts { + if !local { uu, _ := url.Parse(u) host := uu.Hostname() if ip := net.ParseIP(host); ip != nil { - if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || isPrivate(ip) { + if c.excludePrivateHosts && isPrivate(ip) { return nil } - } else if _, icann := publicsuffix.PublicSuffix(host); !icann { - return nil // private domain + if c.excludeLocalhost && ip.IsLoopback() { + return nil + } + if c.excludeLinkLocal && (ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast()) { + return nil + } + } else { + if host == "localhost" { + if c.excludeLocalhost { + return nil + } + } else if _, icann := publicsuffix.PublicSuffix(host); !icann && c.excludePrivateHosts { + return nil // private domain + } } } diff --git a/url_checker_test.go b/url_checker_test.go index 997d00f..2283ede 100644 --- a/url_checker_test.go +++ b/url_checker_test.go @@ -9,7 +9,7 @@ import ( ) func TestURLCheckerCheck(t *testing.T) { - c := newURLChecker(0, "", nil, false, newSemaphore(1024)) + c := newURLChecker(0, "", nil, false, false, false, newSemaphore(1024)) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -21,7 +21,7 @@ func TestURLCheckerCheck(t *testing.T) { } func TestURLCheckerCheckWithExclude(t *testing.T) { - c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), false, newSemaphore(1024)) + c := newURLChecker(0, "", regexp.MustCompile(`^http:\/\/localhost:[13]$`), false, false, false, newSemaphore(1024)) for _, u := range []string{"http://localhost:1", "http://localhost:3", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -33,23 +33,41 @@ func TestURLCheckerCheckWithExclude(t *testing.T) { } func TestURLCheckerCheckWithExcludePrivateHosts(t *testing.T) { - c := newURLChecker(0, "", nil, true, newSemaphore(1024)) + c := newURLChecker(0, "", nil, true, false, false, newSemaphore(1024)) + + for _, u := range []string{ + "http://192.168.99.100", + "http://example.test", + "http://example.abcdxyz", + } { + assert.Equal(t, nil, c.Check(u, "README.md")) + } +} + +func TestURLCheckerCheckWithExcludeLocalhost(t *testing.T) { + c := newURLChecker(0, "", nil, false, true, false, newSemaphore(1024)) for _, u := range []string{ "http://localhost:1", "http://localhost:3", "http://127.0.0.1:1", + } { + assert.Equal(t, nil, c.Check(u, "README.md")) + } +} + +func TestURLCheckerCheckWithExcludeLinkLocal(t *testing.T) { + c := newURLChecker(0, "", nil, false, false, true, newSemaphore(1024)) + + for _, u := range []string{ "http://169.254.169.254:1", - "http://192.168.99.100", - "http://example.test", - "http://example.abcdxyz", } { assert.Equal(t, nil, c.Check(u, "README.md")) } } func TestURLCheckerCheckWithTimeout(t *testing.T) { - c := newURLChecker(30*time.Second, "", nil, false, newSemaphore(1024)) + c := newURLChecker(30*time.Second, "", nil, false, false, false, newSemaphore(1024)) for _, u := range []string{"https://google.com", "README.md"} { assert.Equal(t, nil, c.Check(u, "README.md")) @@ -61,7 +79,7 @@ func TestURLCheckerCheckWithTimeout(t *testing.T) { } func TestURLCheckerCheckMany(t *testing.T) { - c := newURLChecker(0, "", nil, false, newSemaphore(1024)) + c := newURLChecker(0, "", nil, false, false, false, newSemaphore(1024)) for _, us := range [][]string{{}, {"https://google.com", "README.md"}} { rc := make(chan urlResult, 1024) @@ -74,7 +92,7 @@ func TestURLCheckerCheckMany(t *testing.T) { } } func TestURLCheckerResolveURL(t *testing.T) { - f := newURLChecker(0, "", nil, false, newSemaphore(1024)) + f := newURLChecker(0, "", nil, false, false, false, newSemaphore(1024)) for _, c := range []struct { source, target string @@ -92,7 +110,7 @@ func TestURLCheckerResolveURL(t *testing.T) { } func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { - f := newURLChecker(0, "", nil, false, newSemaphore(1024)) + f := newURLChecker(0, "", nil, false, false, false, newSemaphore(1024)) u, _, err := f.resolveURL("/foo", "foo.md") @@ -101,7 +119,7 @@ func TestURLCheckerResolveURLWithAbsolutePath(t *testing.T) { } func TestURLCheckerResolveURLWithDocumentRoot(t *testing.T) { - f := newURLChecker(0, "foo", nil, false, newSemaphore(1024)) + f := newURLChecker(0, "foo", nil, false, false, false, newSemaphore(1024)) for _, c := range []struct { source, target string