Skip to content

Commit

Permalink
fix spider
Browse files Browse the repository at this point in the history
  • Loading branch information
markus621 committed Dec 29, 2024
1 parent 690f55a commit 94f7461
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 26 deletions.
12 changes: 6 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@ go 1.22.5

require (
go.osspkg.com/console v0.3.3
go.osspkg.com/do v0.1.2
go.osspkg.com/do v0.1.3
go.osspkg.com/events v0.3.0
go.osspkg.com/goppy/v2 v2.1.5
go.osspkg.com/ioutils v0.4.7
go.osspkg.com/goppy/v2 v2.1.7
go.osspkg.com/ioutils v0.4.8
go.osspkg.com/logx v0.4.2
go.osspkg.com/static v1.4.0
)

require (
github.com/josharian/intern v1.0.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/rogpeppe/go-internal v1.12.0 // indirect
go.osspkg.com/algorithms v1.4.1 // indirect
go.osspkg.com/config v0.1.3 // indirect
go.osspkg.com/errors v0.3.1 // indirect
go.osspkg.com/grape v1.2.3 // indirect
go.osspkg.com/network v0.4.5 // indirect
go.osspkg.com/network v0.5.0 // indirect
go.osspkg.com/syncing v0.3.0 // indirect
go.osspkg.com/xc v0.3.1 // indirect
go.osspkg.com/xc v0.4.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
24 changes: 12 additions & 12 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
Expand All @@ -28,28 +28,28 @@ go.osspkg.com/config v0.1.3 h1:LAgTTYFzoIf4VNBIpFtjG81rR0/3SG29r+1D8I/YBzs=
go.osspkg.com/config v0.1.3/go.mod h1:MyqvXDgpHLZlO2SM/rqf1zogpUZiXvkZlc/Z+0L7alA=
go.osspkg.com/console v0.3.3 h1:UB/pPoPsgWbyNFix8pEMQHbsXdMv/UK/dgsbRknCH2A=
go.osspkg.com/console v0.3.3/go.mod h1:IknBCliH6mX/ogHa6wbycnGDFYixCGH3WuNc5W5tQe8=
go.osspkg.com/do v0.1.2 h1:e7J/R+vMTpF1NL2wF301rBu72DQ1dlhd/IXtjWYFreQ=
go.osspkg.com/do v0.1.2/go.mod h1:hOQEum85f8Kc4m8PWUAECDQ/mTtQ4362ABLD+KW5/vk=
go.osspkg.com/do v0.1.3 h1:oNXeWZOQUv73LQLawKxfAyN1wacTtRZQVlrkN8nj+sw=
go.osspkg.com/do v0.1.3/go.mod h1:hOQEum85f8Kc4m8PWUAECDQ/mTtQ4362ABLD+KW5/vk=
go.osspkg.com/errors v0.3.1 h1:F9m/EEd/Ot2jba/TV7tvVRIpWXzIpNLc7vRJKcBD86A=
go.osspkg.com/errors v0.3.1/go.mod h1:dKXe6Rt07nzY7OyKQNZ8HGBicZ2uQ5TKEoVFnVFOK44=
go.osspkg.com/events v0.3.0 h1:W2IngTsKs0BKYIglqhrETwtpo6uNSZXWRIt0/l7c6dY=
go.osspkg.com/events v0.3.0/go.mod h1:Cjpx+qNM1y2MIAygFyZWYagTuRiYirmKppZQdaZumd4=
go.osspkg.com/goppy/v2 v2.1.5 h1:kW12lqxdyjx2u1TLPVqnOrIZo3rRHJ4kcDFzZU10iok=
go.osspkg.com/goppy/v2 v2.1.5/go.mod h1:7VDQjqkD1bBUyl6kkJCcfG/5Flk/EuVS7qZaB05FpJA=
go.osspkg.com/goppy/v2 v2.1.7 h1:MgLtJLUX7YCll1nKYV20eJZ5xuNknYDS+Sc1JUWKTIM=
go.osspkg.com/goppy/v2 v2.1.7/go.mod h1:WbvqyI3HYFrnpJL6vysKPbn6k2BY4qg3/dCqdRmDfBc=
go.osspkg.com/grape v1.2.3 h1:3umuC4AV8foY4rGz3xoUdtJ7iG8STTLjqSNZyDygc/o=
go.osspkg.com/grape v1.2.3/go.mod h1:lg0K0VqCQE1/o4c2xM4b/wL5ZKG2NkqqCCF16ZjEJSI=
go.osspkg.com/ioutils v0.4.7 h1:ERr37BhApkVH34Ebq2yPY+50p38bElSns6pX64wVsyw=
go.osspkg.com/ioutils v0.4.7/go.mod h1:58HhG2NHf9JUtixAH3R2XISlUmJruwVIUZ3039QVjOY=
go.osspkg.com/ioutils v0.4.8 h1:7o7n6eypWdu3EF8i/ocnuyqDtYXJUqds+Chd1XmZp5s=
go.osspkg.com/ioutils v0.4.8/go.mod h1:58HhG2NHf9JUtixAH3R2XISlUmJruwVIUZ3039QVjOY=
go.osspkg.com/logx v0.4.2 h1:3kqG7EaaT/DxpHytQm4MfcrmDhYf8ha9/iRpVjpRt88=
go.osspkg.com/logx v0.4.2/go.mod h1:mGbH9hdkeC0h9Gw1uWgQfi9MmlANcqNLffB0wxIDpsQ=
go.osspkg.com/network v0.4.5 h1:1vNL62jIY6TF2wLxb2tEM7bDuCy2him5mk4WKj6+wnQ=
go.osspkg.com/network v0.4.5/go.mod h1:OKBbbdb/+Y7U6lArKjFEc0+drK45sKfc3IZ+B7ZfVO8=
go.osspkg.com/network v0.5.0 h1:xTrfsX1FtpIGNi69HPllDtUSSH/lb1R2sZBUdEI4MAg=
go.osspkg.com/network v0.5.0/go.mod h1:wIUrmBmkq7IcqS9pLlRmQuyxeOuAc7NR6aUle8Bvryk=
go.osspkg.com/static v1.4.0 h1:2uy4/11c0QP+QLMucKQZbAU+e6lhVHKw5dWJPTk/DBg=
go.osspkg.com/static v1.4.0/go.mod h1:94YydVU3qUtb1J534486lpm+qg6CviQjqtxKlkpSppM=
go.osspkg.com/syncing v0.3.0 h1:yBkCsDPEt12a+qagInFFt7+ZongfT+GjSQl7nBmcybI=
go.osspkg.com/syncing v0.3.0/go.mod h1:Dpe0ljlEG6cI2Y9PxEjKiYEX2sgs1eUjWNVjFu4/iB0=
go.osspkg.com/xc v0.3.1 h1:6De75eXdP9CVXqgQOcCWLPyAqFw9zP5lM6rV9MLGiCE=
go.osspkg.com/xc v0.3.1/go.mod h1:6dUG4Y/Q2NMhc5vYrNy0ehWIaHQtAi+MFfc22onQHEs=
go.osspkg.com/xc v0.4.0 h1:MGntRGa3EPCpfrTbWEN7x475BAsAtRYGpYEYJ5mE0I8=
go.osspkg.com/xc v0.4.0/go.mod h1:HWDrUQOKMkQser1teXqnFNMB1WVD0YsyIuM1vIKny7U=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
Expand Down
6 changes: 4 additions & 2 deletions internal/jasta/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ package jasta
import (
"fmt"
"path/filepath"
"strings"

"go.osspkg.com/goppy/v2/plugins"
"go.osspkg.com/ioutils/codec"
Expand Down Expand Up @@ -44,7 +43,10 @@ func WebsiteConfigDecode(c *Config) (WebsiteConfigs, error) {
if err0 != nil {
return nil, fmt.Errorf("validate root path for [%s]: %w", filename, err0)
}
wc.Root = filepath.Dir(filenameFull) + "/" + strings.TrimLeft(wc.Root, "./")
wc.Root, err0 = filepath.Abs(filepath.Dir(filenameFull) + "/" + wc.Root)
if err0 != nil {
return nil, fmt.Errorf("validate root path for [%s]: %w", filename, err0)
}
}
if err = wc.Validate(); err != nil {
return nil, err
Expand Down
2 changes: 0 additions & 2 deletions internal/spiderweb/chromium.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,12 @@ const runChromium = `DISPLAY=:0 chromium \
--disable-notifications \
--mute-audio \
--disable-audio-support-for-desktop-share \
--no-startup-window \
--no-default-browser-check \
--no-service-autorun \
--no-first-run \
--no-experiments \
--no-managed-user-acknowledgment-check \
--no-network-profile-warning \
--no-startup-window \
--no-use-mus-in-renderer \
--noerrdialogs \
--non-material \
Expand Down
17 changes: 13 additions & 4 deletions internal/spiderweb/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ func (v *Spider) grab(ctx context.Context) ([]string, error) {
}
}

var (
htmlStart = []byte("<!DOCTYPE")
htmlEnd = []byte("</html>")
)

func (v *Spider) getHtml(ctx context.Context, uri string) ([]byte, error) {
tmpDir, err := os.MkdirTemp(os.TempDir(), "jasta-prerend-*")
if err != nil {
Expand All @@ -130,11 +135,15 @@ func (v *Spider) getHtml(ctx context.Context, uri string) ([]byte, error) {
if err != nil {
return nil, err
}
index := bytes.Index(b, []byte("<!DOCTYPE"))
if index == -1 {
return nil, fmt.Errorf("html is empty")
indexStart := bytes.Index(b, htmlStart)
if indexStart == -1 {
return nil, fmt.Errorf("fail get start HTML document")
}
indexEnd := bytes.LastIndex(b, htmlEnd)
if indexEnd == -1 {
return nil, fmt.Errorf("fail get end HTML document")
}
return b[index:], nil
return b[indexStart : indexEnd+len(htmlEnd)], nil
}

func (v *Spider) buildSitemap(data []string) error {
Expand Down

0 comments on commit 94f7461

Please sign in to comment.