Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add provider field in json output #99

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions cmd/gau/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ package main

import (
"bufio"
"io"
"os"
"sync"

"github.com/lc/gau/v2/pkg/output"
"github.com/lc/gau/v2/runner"
"github.com/lc/gau/v2/runner/flags"
log "github.com/sirupsen/logrus"
"io"
"os"
"sync"
)

func main() {
Expand Down Expand Up @@ -36,7 +37,7 @@ func main() {
log.Warn(err)
}

results := make(chan string)
results := make(chan output.Result)

var out io.Writer
// Handle results in background
Expand Down
29 changes: 15 additions & 14 deletions pkg/output/output.go
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
package output

import (
jsoniter "github.com/json-iterator/go"
"github.com/valyala/bytebufferpool"
"io"
"net/url"
"path"
"strings"

jsoniter "github.com/json-iterator/go"
"github.com/valyala/bytebufferpool"
)

type JSONResult struct {
Url string `json:"url"`
// Result of lookup from providers.
type Result struct {
URL string `json:"url,omitempty"`
Provider string `json:"provider,omitempty"`
}

func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) error {
func WriteURLs(writer io.Writer, results <-chan Result, blacklistMap map[string]struct{}, RemoveParameters bool) error {
lastURL := make(map[string]struct{})
for result := range results {
buf := bytebufferpool.Get()
if len(blacklistMap) != 0 {
u, err := url.Parse(result)
u, err := url.Parse(result.URL)
if err != nil {
continue
}
Expand All @@ -32,19 +35,19 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]
}
}
if RemoveParameters {
u, err := url.Parse(result)
u, err := url.Parse(result.URL)
if err != nil {
continue
}
if _, ok := lastURL[u.Host+u.Path]; ok {
continue
} else {
lastURL[u.Host+u.Path] = struct{}{} ;
lastURL[u.Host+u.Path] = struct{}{}
}

}

buf.B = append(buf.B, []byte(result)...)
buf.B = append(buf.B, []byte(result.URL)...)
buf.B = append(buf.B, "\n"...)
_, err := writer.Write(buf.B)
if err != nil {
Expand All @@ -55,12 +58,11 @@ func WriteURLs(writer io.Writer, results <-chan string, blacklistMap map[string]
return nil
}

func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap map[string]struct{}, RemoveParameters bool) {
var jr JSONResult
func WriteURLsJSON(writer io.Writer, results <-chan Result, blacklistMap map[string]struct{}, RemoveParameters bool) {
enc := jsoniter.NewEncoder(writer)
for result := range results {
if len(blacklistMap) != 0 {
u, err := url.Parse(result)
u, err := url.Parse(result.URL)
if err != nil {
continue
}
Expand All @@ -73,8 +75,7 @@ func WriteURLsJSON(writer io.Writer, results <-chan string, blacklistMap map[str
}
}
}
jr.Url = result
if err := enc.Encode(jr); err != nil {
if err := enc.Encode(result); err != nil {
// todo: handle this error
continue
}
Expand Down
8 changes: 6 additions & 2 deletions pkg/providers/commoncrawl/commoncrawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
"github.com/lc/gau/v2/pkg/output"
"github.com/lc/gau/v2/pkg/providers"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -55,7 +56,7 @@ func (c *Client) Name() string {

// Fetch fetches all urls for a given domain and sends them to a channel.
// It returns an error should one occur.
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error {
p, err := c.getPagination(domain)
if err != nil {
return err
Expand Down Expand Up @@ -93,7 +94,10 @@ paginate:
return fmt.Errorf("received an error from commoncrawl: %s", res.Error)
}

results <- res.URL
results <- output.Result{
URL: res.URL,
Provider: Name,
}
}
}
}
Expand Down
9 changes: 7 additions & 2 deletions pkg/providers/otx/otx.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package otx
import (
"context"
"fmt"

"github.com/bobesa/go-domain-util/domainutil"
jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
"github.com/lc/gau/v2/pkg/output"
"github.com/lc/gau/v2/pkg/providers"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -45,7 +47,7 @@ func (c *Client) Name() string {
return Name
}

func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error {
paginate:
for page := 1; ; page++ {
select {
Expand All @@ -66,7 +68,10 @@ paginate:
}

for _, entry := range result.URLList {
results <- entry.URL
results <- output.Result{
URL: entry.URL,
Provider: Name,
}
}

if !result.HasNext {
Expand Down
4 changes: 3 additions & 1 deletion pkg/providers/providers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ package providers

import (
"context"

"github.com/lc/gau/v2/pkg/output"
"github.com/valyala/fasthttp"
)

const Version = `2.1.2`

// Provider is a generic interface for all archive fetchers
type Provider interface {
Fetch(ctx context.Context, domain string, results chan string) error
Fetch(ctx context.Context, domain string, results chan output.Result) error
Name() string
}

Expand Down
11 changes: 8 additions & 3 deletions pkg/providers/urlscan/urlscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"bytes"
"context"
"fmt"
"strings"

jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
"github.com/lc/gau/v2/pkg/output"
"github.com/lc/gau/v2/pkg/providers"
"github.com/sirupsen/logrus"
"strings"
)

const (
Expand All @@ -32,7 +34,7 @@ func New(c *providers.Config) *Client {
func (c *Client) Name() string {
return Name
}
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error {
var searchAfter string
var header httpclient.Header

Expand Down Expand Up @@ -73,7 +75,10 @@ paginate:
total := len(result.Results)
for i, res := range result.Results {
if res.Page.Domain == domain || (c.config.IncludeSubdomains && strings.HasSuffix(res.Page.Domain, domain)) {
results <- res.Page.URL
results <- output.Result{
URL: res.Page.URL,
Provider: Name,
}
}

if i == total-1 {
Expand Down
9 changes: 7 additions & 2 deletions pkg/providers/wayback/wayback.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package wayback
import (
"context"
"fmt"

jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
"github.com/lc/gau/v2/pkg/output"
"github.com/lc/gau/v2/pkg/providers"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -38,7 +40,7 @@ type waybackResult [][]string

// Fetch fetches all urls for a given domain and sends them to a channel.
// It returns an error should one occur.
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
func (c *Client) Fetch(ctx context.Context, domain string, results chan output.Result) error {
pages, err := c.getPagination(domain)
if err != nil {
return fmt.Errorf("failed to fetch wayback pagination: %s", err)
Expand Down Expand Up @@ -73,7 +75,10 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
for i, entry := range result {
// Skip first result by default
if i != 0 {
results <- entry[0]
results <- output.Result{
URL: entry[0],
Provider: Name,
}
}
}
}
Expand Down
37 changes: 27 additions & 10 deletions runner/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@ import (
"crypto/tls"
"flag"
"fmt"
"net/url"
"os"
"path/filepath"
"strings"
"time"

"github.com/lc/gau/v2/pkg/providers"
"github.com/lc/gau/v2/pkg/providers/commoncrawl"
"github.com/lc/gau/v2/pkg/providers/otx"
"github.com/lc/gau/v2/pkg/providers/urlscan"
"github.com/lc/gau/v2/pkg/providers/wayback"
"github.com/lynxsecurity/pflag"
"github.com/lynxsecurity/viper"
log "github.com/sirupsen/logrus"
"github.com/valyala/fasthttp"
"github.com/valyala/fasthttp/fasthttpproxy"
"net/url"
"os"
"path/filepath"
"strings"
"time"
)

type URLScanConfig struct {
Expand Down Expand Up @@ -101,7 +106,14 @@ func New() *Options {
pflag.Uint("retries", 0, "retries for HTTP client")
pflag.String("proxy", "", "http proxy to use")
pflag.StringSlice("blacklist", []string{}, "list of extensions to skip")
pflag.StringSlice("providers", []string{}, "list of providers to use (wayback,commoncrawl,otx,urlscan)")
pflag.StringSlice(
"providers",
[]string{},
fmt.Sprintf(
"list of providers to use (%s,%s,%s,%s)",
wayback.Name, commoncrawl.Name, otx.Name, urlscan.Name,
),
)
pflag.Bool("subs", false, "include subdomains of target domain")
pflag.Bool("fp", false, "remove different parameters of the same endpoint")
pflag.Bool("verbose", false, "show verbose output")
Expand Down Expand Up @@ -168,10 +180,15 @@ func (o *Options) DefaultConfig() *Config {
MaxRetries: 5,
IncludeSubdomains: false,
RemoveParameters: false,
Providers: []string{"wayback", "commoncrawl", "otx", "urlscan"},
Blacklist: []string{},
JSON: false,
Outfile: "",
Providers: []string{
commoncrawl.Name,
otx.Name,
urlscan.Name,
wayback.Name,
},
Blacklist: []string{},
JSON: false,
Outfile: "",
}

o.getFlagValues(c)
Expand Down
16 changes: 9 additions & 7 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ package runner
import (
"context"
"fmt"
"sync"

"github.com/lc/gau/v2/pkg/output"
"github.com/lc/gau/v2/pkg/providers"
"github.com/lc/gau/v2/pkg/providers/commoncrawl"
"github.com/lc/gau/v2/pkg/providers/otx"
"github.com/lc/gau/v2/pkg/providers/urlscan"
"github.com/lc/gau/v2/pkg/providers/wayback"
"github.com/sirupsen/logrus"
"sync"
)

type Runner struct {
Expand All @@ -30,14 +32,14 @@ func (r *Runner) Init(c *providers.Config, providerMap ProvidersMap) error {

for name, filters := range providerMap {
switch name {
case "urlscan":
case urlscan.Name:
r.providers = append(r.providers, urlscan.New(c))
case "otx":
case otx.Name:
o := otx.New(c)
r.providers = append(r.providers, o)
case "wayback":
case wayback.Name:
r.providers = append(r.providers, wayback.New(c, filters))
case "commoncrawl":
case commoncrawl.Name:
cc, err := commoncrawl.New(c, filters)
if err != nil {
return fmt.Errorf("error instantiating commoncrawl: %v\n", err)
Expand All @@ -50,7 +52,7 @@ func (r *Runner) Init(c *providers.Config, providerMap ProvidersMap) error {
}

// Starts starts the worker
func (r *Runner) Start(domains chan string, results chan string) {
func (r *Runner) Start(domains chan string, results chan output.Result) {
for i := uint(0); i < r.config.Threads; i++ {
r.wg.Add(1)
go func() {
Expand All @@ -66,7 +68,7 @@ func (r *Runner) Wait() {
}

// worker checks to see if the context is finished and executes the fetching process for each provider
func (r *Runner) worker(ctx context.Context, domains chan string, results chan string) {
func (r *Runner) worker(ctx context.Context, domains chan string, results chan output.Result) {
work:
for {
select {
Expand Down