Skip to content

Commit

Permalink
Support publish message to channel with Tor entry
Browse files Browse the repository at this point in the history
Bump version 0.5.5
  • Loading branch information
web-flow committed Jan 15, 2021
1 parent 7e12d86 commit 855d6ea
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 105 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.5.5] - 2021-01-15

### Added
- Support publish message to channel with Tor entry.

### Fixed
- Minor bugfix.

## [0.5.4] - 2020-12-08

### Fixed
Expand Down
20 changes: 12 additions & 8 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@ go 1.15
require (
github.com/btcsuite/btcd v0.21.0-beta // indirect
github.com/cretz/bine v0.1.0
github.com/go-shiori/dom v0.0.0-20200611094855-2cf8a4b8b9eb // indirect
github.com/go-shiori/obelisk v0.0.0-20201115143556-8de0d40b0a9b // indirect
github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible
// github.com/ipsn/go-libtor v1.0.329
github.com/libp2p/go-libp2p-core v0.7.0 // indirect
github.com/libp2p/go-libp2p-core v0.8.0 // indirect
github.com/multiformats/go-multiaddr v0.3.1 // indirect
github.com/sirupsen/logrus v1.6.0 // indirect
github.com/spf13/cobra v1.1.1
github.com/tdewolff/parse/v2 v2.5.7 // indirect
github.com/technoweenie/multipartstreamer v1.0.1 // indirect
github.com/wabarc/archive.is v1.0.2
github.com/wabarc/archive.org v0.1.1
github.com/wabarc/archive.is v1.0.3
github.com/wabarc/archive.org v1.0.3
github.com/wabarc/wbipfs v0.1.2-0.20201111165742-67df7bfbefe6
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb // indirect
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
github.com/whyrusleeping/tar-utils v0.0.0-20201201191210-20a61371de5b // indirect
go.opencensus.io v0.22.5 // indirect
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad // indirect
golang.org/x/net v0.0.0-20201224014010-6772e930b67b // indirect
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
golang.org/x/sys v0.0.0-20210104204734-6f8348627aad // indirect
)

replace github.com/go-shiori/obelisk => github.com/wabarc/obelisk v0.0.0-20201111154435-da60cc3d3acd
replace github.com/go-shiori/obelisk => github.com/wabarc/obelisk v0.0.0-20201203134337-61a4e7973e4c
81 changes: 34 additions & 47 deletions go.sum

Large diffs are not rendered by default.

35 changes: 33 additions & 2 deletions publish/channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,25 @@
package publish // import "github.com/wabarc/wayback/publish"

import (
"bytes"
"text/template"

"github.com/go-telegram-bot-api/telegram-bot-api"
"github.com/wabarc/wayback/config"
"github.com/wabarc/wayback/logger"
)

type Collect struct {
Arc string
Dst map[string]string
}

// ToChannel for publish to message to Telegram channel,
// returns boolean as result.
func ToChannel(opts *config.Options, bot *tgbotapi.BotAPI, text string) bool {
if bot == nil {
var err error
bot, err = tgbotapi.NewBotAPI(opts.TelegramToken())
if err != nil {
if bot, err = tgbotapi.NewBotAPI(opts.TelegramToken()); err != nil {
logger.Error("Publish to Telegram Channel failed, %v", err)
return false
}
Expand All @@ -31,3 +38,27 @@ func ToChannel(opts *config.Options, bot *tgbotapi.BotAPI, text string) bool {

return true
}

func Render(vars []*Collect) string {
var tmplBytes bytes.Buffer

const tmpl = `{{range $ := .}}<b>{{ $.Arc }}</b>:
{{ range $url := $.Dst -}}
• {{ $url }}
{{end}}
{{end}}`

tpl, err := template.New("message").Parse(tmpl)
if err != nil {
logger.Debug("Telegram: parse template failed, %v", err)
return ""
}

err = tpl.Execute(&tmplBytes, vars)
if err != nil {
logger.Debug("Telegram: execute template failed, %v", err)
return ""
}

return tmplBytes.String()
}
51 changes: 40 additions & 11 deletions service/anonymity/tor.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/wabarc/wayback"
"github.com/wabarc/wayback/config"
"github.com/wabarc/wayback/logger"
"github.com/wabarc/wayback/publish"
"github.com/wabarc/wayback/template"
"github.com/wabarc/wayback/utils"
)
Expand Down Expand Up @@ -126,14 +127,15 @@ func (t *tor) process(w http.ResponseWriter, r *http.Request, ctx context.Contex

logger.Debug("Web: text: %s", text)

collector, _ := t.archive(ctx, text)
collector, col := t.archive(ctx, text)
switch r.PostFormValue("data-type") {
case "json":
w.Header().Set("Content-Type", "application/json")

if data, err := json.Marshal(collector); err != nil {
logger.Error("Web: encode for response failed, %v", err)
} else {
go publish.ToChannel(t.opts, nil, publish.Render(col))
w.Write(data)
}

Expand All @@ -142,6 +144,7 @@ func (t *tor) process(w http.ResponseWriter, r *http.Request, ctx context.Contex
w.Header().Set("Content-Type", "text/html; charset=utf-8")

if html, ok := collector.Render(); ok {
go publish.ToChannel(t.opts, nil, publish.Render(col))
w.Write(html)
} else {
logger.Error("Web: render template for response failed")
Expand All @@ -151,15 +154,15 @@ func (t *tor) process(w http.ResponseWriter, r *http.Request, ctx context.Contex
}
}

func (t *tor) archive(ctx context.Context, text string) (c *template.Collector, err error) {
func (t *tor) archive(ctx context.Context, text string) (tc *template.Collector, col []*publish.Collect) {
logger.Debug("Web: archives start...")
c = &template.Collector{}
tc = &template.Collector{}

urls := utils.MatchURL(text)
if len(urls) == 0 {
transform(c, "", map[string]string{text: "URL no found"})
transform(tc, "", map[string]string{text: "URL no found"})
logger.Info("Web: archives failure, URL no found.")
return c, fmt.Errorf("Length Required")
return tc, []*publish.Collect{}
}

wg := sync.WaitGroup{}
Expand All @@ -169,24 +172,50 @@ func (t *tor) archive(ctx context.Context, text string) (c *template.Collector,
continue
}
wg.Add(1)
go func(slot string, c *template.Collector) {
go func(slot string, tc *template.Collector) {
defer wg.Done()
c := &publish.Collect{}
switch slot {
case config.SLOT_IA:
logger.Debug("Web: archiving slot: %s", slot)
transform(c, config.SlotName(slot), wbrc.IA())
ia := wbrc.IA()
slotName := config.SlotName(slot)

// Data for response
transform(tc, slotName, ia)

// Data for publish
c.Arc = fmt.Sprintf("<a href='https://web.archive.org/'>%s</a>", slotName)
c.Dst = ia
case config.SLOT_IS:
logger.Debug("Web: archiving slot: %s", slot)
transform(c, config.SlotName(slot), wbrc.IS())
is := wbrc.IS()
slotName := config.SlotName(slot)

// Data for response
transform(tc, slotName, is)

// Data for publish
c.Arc = fmt.Sprintf("<a href='https://archive.today/'>%s</a>", slotName)
c.Dst = is
case config.SLOT_IP:
logger.Debug("Web: archiving slot: %s", slot)
transform(c, config.SlotName(slot), wbrc.IP())
ip := wbrc.IP()
slotName := config.SlotName(slot)

// Data for response
transform(tc, slotName, ip)

// Data for publish
c.Arc = fmt.Sprintf("<a href='https://ipfs.github.io/public-gateway-checker/'>%s</a>", slotName)
c.Dst = ip
}
}(slot, c)
col = append(col, c)
}(slot, tc)
}
wg.Wait()

return c, nil
return tc, col
}

func transform(c *template.Collector, slot string, arc map[string]string) {
Expand Down
37 changes: 3 additions & 34 deletions service/telegram/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
package telegram // import "github.com/wabarc/wayback/sevice/telegram"

import (
"bytes"
"context"
"fmt"
"sync"
"text/template"

"github.com/go-telegram-bot-api/telegram-bot-api"
"github.com/wabarc/wayback"
Expand Down Expand Up @@ -79,7 +77,7 @@ func (t *telegram) Serve(_ context.Context) error {
return err
}

replyText := render(col)
replyText := publish.Render(col)
logger.Debug("Telegram: reply text, %s", replyText)
msg := tgbotapi.NewMessage(update.Message.Chat.ID, replyText)
msg.ReplyToMessageID = msgID
Expand All @@ -98,12 +96,7 @@ func (t *telegram) Serve(_ context.Context) error {
return nil
}

type collect struct {
Arc string
Dst map[string]string
}

func archive(t *telegram, msgid int, urls []string) (col []*collect, id int, err error) {
func archive(t *telegram, msgid int, urls []string) (col []*publish.Collect, id int, err error) {
logger.Debug("Telegram: archives start...")

wg := sync.WaitGroup{}
Expand All @@ -115,7 +108,7 @@ func archive(t *telegram, msgid int, urls []string) (col []*collect, id int, err
wg.Add(1)
go func(slot string) {
defer wg.Done()
c := &collect{}
c := &publish.Collect{}
switch slot {
case config.SLOT_IA:
logger.Debug("Telegram: archiving slot: %s", slot)
Expand All @@ -137,27 +130,3 @@ func archive(t *telegram, msgid int, urls []string) (col []*collect, id int, err

return col, msgid, nil
}

func render(vars []*collect) string {
var tmplBytes bytes.Buffer

const tmpl = `{{range $ := .}}<b>{{ $.Arc }}</b>:
{{ range $url := $.Dst -}}
• {{ $url }}
{{end}}
{{end}}`

tpl, err := template.New("message").Parse(tmpl)
if err != nil {
logger.Debug("Telegram: parse template failed, %v", err)
return ""
}

err = tpl.Execute(&tmplBytes, vars)
if err != nil {
logger.Debug("Telegram: execute template failed, %v", err)
return ""
}

return tmplBytes.String()
}
9 changes: 9 additions & 0 deletions utils/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright 2020 Wayback Archiver. All rights reserved.
// Use of this source code is governed by the GNU GPL v3
// license that can be found in the LICENSE file.

/*
Package utils handles utils libaries for the application.
*/

package utils // import "github.com/wabarc/wayback/utils"
28 changes: 25 additions & 3 deletions utils/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,38 @@

package utils // import "github.com/wabarc/wayback/utils"

import "regexp"
import (
"net/url"
"regexp"
"strings"
)

// MatchURL is extract URL from text, returns []string always.
func MatchURL(text string) []string {
re := regexp.MustCompile(`https?://(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,255}\.[a-z]{0,63}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)`)
re := regexp.MustCompile(`https?://?[-a-zA-Z0-9@:%._\+~#=]{1,255}\.[a-z]{0,63}\b(?:[-a-zA-Z0-9@:%_\+.~#?&//=]*)`)
urls := []string{}
match := re.FindAllString(text, -1)
for _, el := range match {
urls = append(urls, el)
urls = append(urls, strip(el))
}

return urls
}

func strip(link string) string {
u, err := url.Parse(link)
if err != nil {
return ""
}

queries := u.Query()
for key := range queries {
if strings.HasPrefix(key, "utm_") || strings.HasPrefix(key, "at_custom") || strings.HasPrefix(key, "at_medium") || strings.EqualFold(key, "weibo_id") {
queries.Del(key)
}
}

u.RawQuery = queries.Encode()

return u.String()
}
31 changes: 31 additions & 0 deletions utils/url_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright 2020 Wayback Archiver. All rights reserved.
// Use of this source code is governed by the GNU GPL v3
// license that can be found in the LICENSE file.

/*
Package utils handles utils libaries for the application.
*/

package utils // import "github.com/wabarc/wayback/utils"

import (
"strings"
"testing"
)

func TestStrip(t *testing.T) {
link := "https://example.com/?utm_source=wabarc&utm_medium=cpc"
if strings.Contains(strip(link), "utm") {
t.Fail()
}

link = "https://example.com/t-55534999?at_custom1=link&at_campaign=64&at_custom3=Regional+East&at_custom2=twitter&at_medium=custom7&at_custom=691F31DA-4E9E-11EB-A68F-435816F31EAE"
if strings.Contains(strip(link), "at_custom") {
t.Fail()
}

link = "https://weibointl.api.weibo.cn/share/123456.html?weibo_id=101341001431"
if !strings.EqualFold(strip(link), "https://weibointl.api.weibo.cn/share/123456.html") {
t.Fail()
}
}

0 comments on commit 855d6ea

Please sign in to comment.