Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --exclude-private-hosts, --exclude-localhost & --exclude-link-local #29

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ go get -u github.com/raviqqe/liche
Link checker for Markdown and HTML

Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-p] [-h] [-l] [-v] <filenames>...

Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: 512]
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-p, --exclude-private-hosts Exclude private domains and ip addresses.
-h, --exclude-localhost Exclude localhost addresses.
-l, --exclude-link-local Exclude link local addresses.
-v, --verbose Be verbose.
```

Expand Down
25 changes: 17 additions & 8 deletions arguments.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,30 @@ const defaultConcurrency = maxOpenFiles / 2
const usage = `Link checker for Markdown and HTML

Usage:
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-v] <filenames>...
liche [-c <num-requests>] [-d <directory>] [-r] [-t <timeout>] [-x <regex>] [-p] [-h] [-l] [-v] <filenames>...

Options:
-c, --concurrency <num-requests> Set max number of concurrent HTTP requests. [default: %v]
-d, --document-root <directory> Set document root directory for absolute paths.
-r, --recursive Search Markdown and HTML files recursively
-t, --timeout <timeout> Set timeout for HTTP requests in seconds. Disabled by default.
-x, --exclude <regex> Regex of links to exclude from checking.
-p, --exclude-private-hosts Exclude private domains and ip addresses.
-h, --exclude-localhost Exclude localhost addresses.
-l, --exclude-link-local Exclude link local addresses.
-v, --verbose Be verbose.`

type arguments struct {
filenames []string
documentRoot string
concurrency int
timeout time.Duration
excludedPattern *regexp.Regexp
recursive bool
verbose bool
filenames []string
documentRoot string
concurrency int
timeout time.Duration
excludedPattern *regexp.Regexp
excludePrivateHosts bool
excludeLocalhost bool
excludeLinkLocal bool
recursive bool
verbose bool
}

func getArguments(argv []string) (arguments, error) {
Expand Down Expand Up @@ -77,6 +83,9 @@ func getArguments(argv []string) (arguments, error) {
int(c),
time.Duration(t) * time.Second,
r,
args["--exclude-private-hosts"].(bool),
args["--exclude-localhost"].(bool),
args["--exclude-link-local"].(bool),
args["--recursive"].(bool),
args["--verbose"].(bool),
}, nil
Expand Down
50 changes: 37 additions & 13 deletions arguments_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,55 +15,79 @@ func TestGetArguments(t *testing.T) {
}{
{
argv: []string{"file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, false, false},
},
{
argv: []string{"-c", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, nil, false, false},
args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false, false, false},
},
{
argv: []string{"--concurrency", "42", "file"},
args: arguments{[]string{"file"}, "", 42, 0, nil, false, false},
args: arguments{[]string{"file"}, "", 42, 0, nil, false, false, false, false, false},
},
{
argv: []string{"-d", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false, false, false},
},
{
argv: []string{"--document-root", "directory", "file"},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false},
args: arguments{[]string{"file"}, "directory", defaultConcurrency, 0, nil, false, false, false, false, false},
},
{
argv: []string{"-r", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, true, false},
},
{
argv: []string{"--recursive", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, true, false},
},
{
argv: []string{"-t", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false, false, false},
},
{
argv: []string{"--timeout", "42", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 42 * time.Second, nil, false, false, false, false, false},
},
{
argv: []string{"-x", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false, false, false},
},
{
argv: []string{"--exclude", "^.*$", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, regexp.MustCompile(`^.*$`), false, false, false, false, false},
},
{
argv: []string{"-p", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false, false, false},
},
{
argv: []string{"--exclude-private-hosts", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, true, false, false, false, false},
},
{
argv: []string{"-h", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false, false, false},
},
{
argv: []string{"--exclude-localhost", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true, false, false, false},
},
{
argv: []string{"-l", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true, false, false},
},
{
argv: []string{"--exclude-link-local", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, true, false, false},
},
{
argv: []string{"-v", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, false, true},
},
{
argv: []string{"--verbose", "file"},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, true},
args: arguments{[]string{"file"}, "", defaultConcurrency, 0, nil, false, false, false, false, true},
},
} {
args, err := getArguments(c.argv)
Expand Down
4 changes: 2 additions & 2 deletions file_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ type fileChecker struct {
semaphore semaphore
}

func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, r, s), s}
func newFileChecker(timeout time.Duration, d string, r *regexp.Regexp, excludePrivateHosts, excludeLocalhost, excludeLinkLocal bool, s semaphore) fileChecker {
return fileChecker{newURLChecker(timeout, d, r, excludePrivateHosts, excludeLocalhost, excludeLinkLocal, s), s}
}

func (c fileChecker) Check(f string) ([]urlResult, error) {
Expand Down
8 changes: 4 additions & 4 deletions file_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

func TestFileCheckerCheck(t *testing.T) {
c := newFileChecker(0, "", nil, newSemaphore(1024))
c := newFileChecker(0, "", nil, false, false, false, newSemaphore(1024))

for _, f := range []string{"README.md", "test/foo.md", "test/foo.html"} {
rs, err := c.Check(f)
Expand Down Expand Up @@ -48,7 +48,7 @@ func TestFileCheckerCheck(t *testing.T) {
}

func TestFileCheckerCheckMany(t *testing.T) {
c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
c := newFileChecker(0, "", nil, false, false, false, newSemaphore(maxOpenFiles))

for _, fs := range [][]string{
{"README.md"},
Expand Down Expand Up @@ -77,7 +77,7 @@ func TestFileCheckerCheckMany(t *testing.T) {
}

func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
c := newFileChecker(0, "", nil, newSemaphore(maxOpenFiles))
c := newFileChecker(0, "", nil, false, false, false, newSemaphore(maxOpenFiles))

for _, fs := range [][]string{
{"test/absolute_path.md"},
Expand Down Expand Up @@ -107,7 +107,7 @@ func TestFileCheckerCheckManyWithInvalidFiles(t *testing.T) {
}

func TestFileCheckerExtractURLs(t *testing.T) {
c := newFileChecker(0, "", nil, newSemaphore(42))
c := newFileChecker(0, "", nil, false, false, false, newSemaphore(42))

for _, x := range []struct {
html string
Expand Down
2 changes: 1 addition & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ github.com/mattn/go-isatty v0.0.9 h1:d5US/mDsogSGW37IV293h//ZFaeajb69h+EHFsv2xGg
github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday v2.0.0+incompatible h1:cBXrhZNUf9C+La9/YpS+UHpUT8YD6Td9ZMSU9APFcsk=
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
Expand All @@ -44,6 +43,7 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24 h1:R8bzl0244nw47n1xKs1MUMAaTNgjavKcN/aX2Ss3+Fo=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ func main() {
args.timeout,
args.documentRoot,
args.excludedPattern,
args.excludePrivateHosts,
args.excludeLocalhost,
args.excludeLinkLocal,
newSemaphore(args.concurrency))

go c.CheckMany(m.Filenames(), rc)
Expand Down
56 changes: 50 additions & 6 deletions url_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"errors"
"net"
"net/url"
"os"
"path"
Expand All @@ -11,17 +12,21 @@ import (
"time"

"github.com/valyala/fasthttp"
"golang.org/x/net/publicsuffix"
)

type urlChecker struct {
timeout time.Duration
documentRoot string
excludedPattern *regexp.Regexp
semaphore semaphore
timeout time.Duration
documentRoot string
excludedPattern *regexp.Regexp
excludePrivateHosts bool
excludeLocalhost bool
excludeLinkLocal bool
semaphore semaphore
}

func newURLChecker(t time.Duration, d string, r *regexp.Regexp, s semaphore) urlChecker {
return urlChecker{t, d, r, s}
func newURLChecker(t time.Duration, d string, r *regexp.Regexp, excludePrivateHosts, excludeLocalhost, excludeLinkLocal bool, s semaphore) urlChecker {
return urlChecker{t, d, r, excludePrivateHosts, excludeLocalhost, excludeLinkLocal, s}
}

func (c urlChecker) Check(u string, f string) error {
Expand All @@ -30,6 +35,30 @@ func (c urlChecker) Check(u string, f string) error {
return err
}

if !local {
uu, _ := url.Parse(u)
host := uu.Hostname()
if ip := net.ParseIP(host); ip != nil {
if c.excludePrivateHosts && isPrivate(ip) {
return nil
}
if c.excludeLocalhost && ip.IsLoopback() {
return nil
}
if c.excludeLinkLocal && (ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast()) {
return nil
}
} else {
if host == "localhost" {
if c.excludeLocalhost {
return nil
}
} else if _, icann := publicsuffix.PublicSuffix(host); !icann && c.excludePrivateHosts {
return nil // private domain
}
}
}

if c.excludedPattern != nil && c.excludedPattern.MatchString(u) {
return nil
}
Expand Down Expand Up @@ -88,3 +117,18 @@ func (c urlChecker) resolveURL(u string, f string) (string, bool, error) {

return path.Join(c.documentRoot, uu.Path), true, nil
}

// isPrivate reports whether `ip' is a local address, according to
// RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses).
// xref: https://go-review.googlesource.com/c/go/+/162998/
// xref: https://github.com/golang/go/issues/29146
func isPrivate(ip net.IP) bool {
if ip4 := ip.To4(); ip4 != nil {
// Local IPv4 addresses are defined in https://tools.ietf.org/html/rfc1918
return ip4[0] == 10 ||
(ip4[0] == 172 && ip4[1]&0xf0 == 16) ||
(ip4[0] == 192 && ip4[1] == 168)
}
// Local IPv6 addresses are defined in https://tools.ietf.org/html/rfc4193
return len(ip) == net.IPv6len && ip[0]&0xfe == 0xfc
}
Loading